improve comments, indicate phases

This commit is contained in:
yann 2024-11-19 12:25:34 +01:00
parent 90f3b22efb
commit 73b302a2bc

View File

@ -4,7 +4,7 @@ import csv
# get soup from url # get soup from url
def get_html(url): def get_html(url):
r = requests.get(url) r = requests.get(url, headers = {'User-agent': 'yann@needsome.coffee'})
html = r.content html = r.content
soup = BeautifulSoup(html, 'html.parser') soup = BeautifulSoup(html, 'html.parser')
return soup return soup
@ -165,7 +165,7 @@ def main():
print(len(product_url_list), " livres présents") print(len(product_url_list), " livres présents")
processed_books += len(product_url_list) processed_books += len(product_url_list)
# combine with phase 1 and write in csv for each url from product_url_list named with category # PHASE 3 : combine with phase 1 and write in csv for each url from product_url_list named with category
data = [] data = []
img_nb = 1 img_nb = 1
for page_url in product_url_list: for page_url in product_url_list:
@ -175,7 +175,7 @@ def main():
# print(phase1.get_data(page_soup, page_url)) # print(phase1.get_data(page_soup, page_url))
data.append(get_data(page_soup, page_url)) data.append(get_data(page_soup, page_url))
# get img for every book and name it with category and incremental number # PHASE 4 : get img for every book and name it with category and incremental number
img_url = get_image_url(page_soup, page_url) img_url = get_image_url(page_soup, page_url)
with open(category + "-" + str(img_nb) + ".png", "wb") as img_file: with open(category + "-" + str(img_nb) + ".png", "wb") as img_file:
img_file.write(requests.get(img_url).content) img_file.write(requests.get(img_url).content)