main folder to put files in it, replace category name space, fix / issue in img name
This commit is contained in:
parent
cece9d1874
commit
6cb7913af2
@ -140,6 +140,7 @@ def data_output(info, file):
|
||||
def main():
|
||||
# init
|
||||
url = "https://books.toscrape.com/"
|
||||
os.mkdir("resultat")
|
||||
|
||||
### EXTRACTION ###
|
||||
# get html from URL
|
||||
@ -154,10 +155,13 @@ def main():
|
||||
|
||||
# go ahead for each category
|
||||
for line in get_category_list(soup, url):
|
||||
category = line[0]
|
||||
category_url = line[1]
|
||||
# remove space in category name, to prevent potential issue on directory creation
|
||||
category = line[0].replace(' ', '_')
|
||||
|
||||
category_url = line[1]
|
||||
category_path = "resultat/" + category
|
||||
total_category -= 1
|
||||
|
||||
# display what category is processed
|
||||
print("\n -> Traitement de la catégorie : " + category)
|
||||
|
||||
@ -177,12 +181,13 @@ def main():
|
||||
data = []
|
||||
img_nb = 1
|
||||
|
||||
# go ahead for each product of category
|
||||
# EXTRACT data for each product page
|
||||
for page_url in product_url_list:
|
||||
|
||||
# create the category directory. If it exists already, just continue
|
||||
try:
|
||||
os.mkdir(category)
|
||||
os.mkdir(category_path)
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
@ -195,10 +200,13 @@ def main():
|
||||
# LOAD data in a list
|
||||
data.append(product_data)
|
||||
|
||||
# protect path creation by removing "/" in product name
|
||||
img_name = (product_data[2] + ".png").replace('/', '_')
|
||||
|
||||
# PHASE 4 : get img for every book and name it with category and incremental number
|
||||
# EXTRACT images data -url, title, binary content- and LOAD binary content in a file named with the title
|
||||
img_url = get_image_url(page_soup, page_url)
|
||||
with open(category + "/" + product_data[2] + ".png", "wb") as img_file:
|
||||
with open(category_path + "/" + img_name, "wb") as img_file:
|
||||
img_file.write(requests.get(img_url).content)
|
||||
img_nb += 1
|
||||
|
||||
@ -207,7 +215,7 @@ def main():
|
||||
print(total_category, " catégories restantes")
|
||||
|
||||
# LOAD : write the list in the CSV file
|
||||
print("Done.\n Fichier " + data_output(data, category))
|
||||
print("Done.\n Fichier " + data_output(data, category_path))
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user