From 73b302a2bc4612f93a261bf731c458c5c3848627 Mon Sep 17 00:00:00 2001 From: yann Date: Tue, 19 Nov 2024 12:25:34 +0100 Subject: [PATCH] improve comments, indicate phases --- phase4/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/phase4/main.py b/phase4/main.py index 0101c99..cbcc880 100644 --- a/phase4/main.py +++ b/phase4/main.py @@ -4,7 +4,7 @@ import csv # get soup from url def get_html(url): - r = requests.get(url) + r = requests.get(url, headers = {'User-agent': 'yann@needsome.coffee'}) html = r.content soup = BeautifulSoup(html, 'html.parser') return soup @@ -165,7 +165,7 @@ def main(): print(len(product_url_list), " livres présents") processed_books += len(product_url_list) - # combine with phase 1 and write in csv for each url from product_url_list named with category + # PHASE 3 : combine with phase 1 and write in csv for each url from product_url_list named with category data = [] img_nb = 1 for page_url in product_url_list: @@ -175,7 +175,7 @@ def main(): # print(phase1.get_data(page_soup, page_url)) data.append(get_data(page_soup, page_url)) - # get img for every book and name it with category and incremental number + # PHASE 4 : get img for every book and name it with category and incremental number img_url = get_image_url(page_soup, page_url) with open(category + "-" + str(img_nb) + ".png", "wb") as img_file: img_file.write(requests.get(img_url).content)