remove url_base, refactor list get_data, fix comment and PEP8

This commit is contained in:
yann 2024-11-13 11:03:25 +01:00
parent 1adcf0b224
commit 5d6a9bc263

View File

@ -36,10 +36,19 @@ def product_description(soup):
desc = soup.find("p", class_='').string desc = soup.find("p", class_='').string
return desc return desc
#create a dict with all information for writing loop later #create a list with all information consecutively
# /!\ don't know if that's the best way # /!\ don't know if that's the best way
def get_data(soup, url): def get_data(soup, url):
info = [url, product_information(soup)['UPC'], get_title(soup), product_information(soup)['Price (incl. tax)'], product_information(soup)['Price (excl. tax)'], product_information(soup)['Availability'], product_description(soup), "TODO", product_information(soup)['Number of reviews'], get_image_url(soup, url)] info = [url, product_information(soup)['UPC'],
get_title(soup),
product_information(soup)['Price (incl. tax)'],
product_information(soup)['Price (excl. tax)'],
product_information(soup)['Availability'],
product_description(soup),
"TODO",
product_information(soup)['Number of reviews'],
get_image_url(soup, url)
]
return info return info
#write the file #write the file
@ -57,7 +66,7 @@ def data_output(info, file):
def main(): def main():
url_site="https://books.toscrape.com"
url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html" url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html"
html = extract_web(url) html = extract_web(url)