From 73b302a2bc4612f93a261bf731c458c5c3848627 Mon Sep 17 00:00:00 2001
From: yann <yann@needsome.coffee>
Date: Tue, 19 Nov 2024 12:25:34 +0100
Subject: [PATCH] improve comments, indicate phases

---
 phase4/main.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/phase4/main.py b/phase4/main.py
index 0101c99..cbcc880 100644
--- a/phase4/main.py
+++ b/phase4/main.py
@@ -4,7 +4,7 @@ import csv
 
 # get soup from url
 def get_html(url):
-    r = requests.get(url)
+    r = requests.get(url, headers = {'User-agent': 'yann@needsome.coffee'})
     html = r.content
     soup = BeautifulSoup(html, 'html.parser')
     return soup
@@ -165,7 +165,7 @@ def main():
         print(len(product_url_list), " livres présents")
         processed_books += len(product_url_list)
 
-        # combine with phase 1 and write in csv for each url from product_url_list named with category
+        # PHASE 3 : combine with phase 1 and write in csv for each url from product_url_list named with category
         data = []
         img_nb = 1
         for page_url in product_url_list:
@@ -175,7 +175,7 @@ def main():
             #        print(phase1.get_data(page_soup, page_url))
             data.append(get_data(page_soup, page_url))
 
-            # get img for every book and name it with category and incremental number
+            # PHASE 4 : get img for every book and name it with category and incremental number
             img_url = get_image_url(page_soup, page_url)
             with open(category + "-" + str(img_nb) + ".png", "wb") as img_file:
                 img_file.write(requests.get(img_url).content)