improve comments, indicate phases
This commit is contained in:
parent
90f3b22efb
commit
73b302a2bc
@ -4,7 +4,7 @@ import csv
|
|||||||
|
|
||||||
# get soup from url
|
# get soup from url
|
||||||
def get_html(url):
|
def get_html(url):
|
||||||
r = requests.get(url)
|
r = requests.get(url, headers = {'User-agent': 'yann@needsome.coffee'})
|
||||||
html = r.content
|
html = r.content
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
return soup
|
return soup
|
||||||
@ -165,7 +165,7 @@ def main():
|
|||||||
print(len(product_url_list), " livres présents")
|
print(len(product_url_list), " livres présents")
|
||||||
processed_books += len(product_url_list)
|
processed_books += len(product_url_list)
|
||||||
|
|
||||||
# combine with phase 1 and write in csv for each url from product_url_list named with category
|
# PHASE 3 : combine with phase 1 and write in csv for each url from product_url_list named with category
|
||||||
data = []
|
data = []
|
||||||
img_nb = 1
|
img_nb = 1
|
||||||
for page_url in product_url_list:
|
for page_url in product_url_list:
|
||||||
@ -175,7 +175,7 @@ def main():
|
|||||||
# print(phase1.get_data(page_soup, page_url))
|
# print(phase1.get_data(page_soup, page_url))
|
||||||
data.append(get_data(page_soup, page_url))
|
data.append(get_data(page_soup, page_url))
|
||||||
|
|
||||||
# get img for every book and name it with category and incremental number
|
# PHASE 4 : get img for every book and name it with category and incremental number
|
||||||
img_url = get_image_url(page_soup, page_url)
|
img_url = get_image_url(page_soup, page_url)
|
||||||
with open(category + "-" + str(img_nb) + ".png", "wb") as img_file:
|
with open(category + "-" + str(img_nb) + ".png", "wb") as img_file:
|
||||||
img_file.write(requests.get(img_url).content)
|
img_file.write(requests.get(img_url).content)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user