diff --git a/phase4/main.py b/phase4/main.py index 0101c99..cbcc880 100644 --- a/phase4/main.py +++ b/phase4/main.py @@ -4,7 +4,7 @@ import csv # get soup from url def get_html(url): - r = requests.get(url) + r = requests.get(url, headers = {'User-agent': 'yann@needsome.coffee'}) html = r.content soup = BeautifulSoup(html, 'html.parser') return soup @@ -165,7 +165,7 @@ def main(): print(len(product_url_list), " livres présents") processed_books += len(product_url_list) - # combine with phase 1 and write in csv for each url from product_url_list named with category + # PHASE 3 : combine with phase 1 and write in csv for each url from product_url_list named with category data = [] img_nb = 1 for page_url in product_url_list: @@ -175,7 +175,7 @@ def main(): # print(phase1.get_data(page_soup, page_url)) data.append(get_data(page_soup, page_url)) - # get img for every book and name it with category and incremental number + # PHASE 4 : get img for every book and name it with category and incremental number img_url = get_image_url(page_soup, page_url) with open(category + "-" + str(img_nb) + ".png", "wb") as img_file: img_file.write(requests.get(img_url).content)