add way to retrieve images : use requests and write binary in file. Name it with category and incremental number

This commit is contained in:
yann 2024-11-14 15:03:30 +01:00
parent 22ccd97fa3
commit 4785b2e6d8

View File

@ -167,6 +167,7 @@ def main():
# combine with phase 1 and write in csv for each url from product_url_list named with category
data = []
img_nb = 1
for page_url in product_url_list:
page_soup = get_html(page_url)
# print(page_soup)
@ -174,6 +175,12 @@ def main():
# print(phase1.get_data(page_soup, page_url))
data.append(get_data(page_soup, page_url))
# get img for every book and name it with category and incremental number
img_url = get_image_url(page_soup, page_url)
with open(category + "-" + str(img_nb) + ".png", "wb") as img_file:
img_file.write(requests.get(img_url).content)
img_nb += 1
print(processed_books, " livres traités")
print(total_books - processed_books, " livres restants")
print(total_category, " catégories restantes")