remove url_base, refactor list get_data, fix comment and PEP8
This commit is contained in:
parent
1adcf0b224
commit
3a6cf9b87e
@ -36,15 +36,34 @@ def product_description(soup):
|
|||||||
desc = soup.find("p", class_='').string
|
desc = soup.find("p", class_='').string
|
||||||
return desc
|
return desc
|
||||||
|
|
||||||
#create a dict with all information for writing loop later
|
#create a list with all information consecutively
|
||||||
# /!\ don't know if that's the best way
|
# /!\ don't know if that's the best way
|
||||||
def get_data(soup, url):
|
def get_data(soup, url):
|
||||||
info = [url, product_information(soup)['UPC'], get_title(soup), product_information(soup)['Price (incl. tax)'], product_information(soup)['Price (excl. tax)'], product_information(soup)['Availability'], product_description(soup), "TODO", product_information(soup)['Number of reviews'], get_image_url(soup, url)]
|
info = [url, product_information(soup)['UPC'],
|
||||||
|
get_title(soup),
|
||||||
|
product_information(soup)['Price (incl. tax)'],
|
||||||
|
product_information(soup)['Price (excl. tax)'],
|
||||||
|
product_information(soup)['Availability'],
|
||||||
|
product_description(soup),
|
||||||
|
"TODO",
|
||||||
|
product_information(soup)['Number of reviews'],
|
||||||
|
get_image_url(soup, url)
|
||||||
|
]
|
||||||
return info
|
return info
|
||||||
|
|
||||||
#write the file
|
#write the file
|
||||||
def data_output(info, file):
|
def data_output(info, file):
|
||||||
fieldnames = ['product_page_url', 'universal_ product_code (upc)', 'title', 'price_including_tax', 'price_excluding_tax', 'number_available', 'product_description', 'category', 'review_rating', 'image_url']
|
fieldnames = ['product_page_url',
|
||||||
|
'universal_ product_code (upc)',
|
||||||
|
'title',
|
||||||
|
'price_including_tax',
|
||||||
|
'price_excluding_tax',
|
||||||
|
'number_available',
|
||||||
|
'product_description',
|
||||||
|
'category',
|
||||||
|
'review_rating',
|
||||||
|
'image_url']
|
||||||
|
|
||||||
with open(file, 'w') as csv_file:
|
with open(file, 'w') as csv_file:
|
||||||
writer = csv.writer(csv_file, delimiter = ',')
|
writer = csv.writer(csv_file, delimiter = ',')
|
||||||
writer.writerow(fieldnames)
|
writer.writerow(fieldnames)
|
||||||
@ -57,7 +76,7 @@ def data_output(info, file):
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
url_site="https://books.toscrape.com"
|
|
||||||
url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html"
|
url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html"
|
||||||
|
|
||||||
html = extract_web(url)
|
html = extract_web(url)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user