add way to retrieve images : use requests and write binary in file. Name it with category and incremental number
This commit is contained in:
parent
22ccd97fa3
commit
4785b2e6d8
@ -167,6 +167,7 @@ def main():
|
|||||||
|
|
||||||
# combine with phase 1 and write in csv for each url from product_url_list named with category
|
# combine with phase 1 and write in csv for each url from product_url_list named with category
|
||||||
data = []
|
data = []
|
||||||
|
img_nb = 1
|
||||||
for page_url in product_url_list:
|
for page_url in product_url_list:
|
||||||
page_soup = get_html(page_url)
|
page_soup = get_html(page_url)
|
||||||
# print(page_soup)
|
# print(page_soup)
|
||||||
@ -174,6 +175,12 @@ def main():
|
|||||||
# print(phase1.get_data(page_soup, page_url))
|
# print(phase1.get_data(page_soup, page_url))
|
||||||
data.append(get_data(page_soup, page_url))
|
data.append(get_data(page_soup, page_url))
|
||||||
|
|
||||||
|
# get img for every book and name it with category and incremental number
|
||||||
|
img_url = get_image_url(page_soup, page_url)
|
||||||
|
with open(category + "-" + str(img_nb) + ".png", "wb") as img_file:
|
||||||
|
img_file.write(requests.get(img_url).content)
|
||||||
|
img_nb += 1
|
||||||
|
|
||||||
print(processed_books, " livres traités")
|
print(processed_books, " livres traités")
|
||||||
print(total_books - processed_books, " livres restants")
|
print(total_books - processed_books, " livres restants")
|
||||||
print(total_category, " catégories restantes")
|
print(total_category, " catégories restantes")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user