Init, README and main with main functions
This commit is contained in:
parent
cffae25b0c
commit
c5b1114e70
@ -0,0 +1,3 @@
|
||||
Un dossier pour chaque phase du projet
|
||||
|
||||
Avec chacun un README contenant les instructions
|
19
phase1/README.md
Normal file
19
phase1/README.md
Normal file
@ -0,0 +1,19 @@
|
||||
# Phase 1
|
||||
|
||||
Choisissez n'importe quelle page Produit sur le site de Books to Scrape.
|
||||
|
||||
Écrivez un script Python qui visite cette page et en extrait les informations suivantes :
|
||||
|
||||
● product_page_url
|
||||
● universal_ product_code (upc)
|
||||
● title
|
||||
● price_including_tax
|
||||
● price_excluding_tax
|
||||
● number_available
|
||||
● product_description
|
||||
● category
|
||||
● review_rating
|
||||
● image_url
|
||||
|
||||
Écrivez les données dans un fichier CSV qui utilise les champs ci-dessus comme
|
||||
en-têtes de colonnes.
|
43
phase1/main.py
Normal file
43
phase1/main.py
Normal file
@ -0,0 +1,43 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import csv
|
||||
|
||||
url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html"
|
||||
|
||||
def extract_web(url):
|
||||
r = requests.get(url)
|
||||
page = r.content
|
||||
return page
|
||||
|
||||
def get_title(soup):
|
||||
title = soup.find("div", class_="product_main").h1.string
|
||||
return title
|
||||
|
||||
|
||||
def product_information(soup):
|
||||
product_info={}
|
||||
for tr in soup.table.find_all("tr"):
|
||||
product_info[tr.th.string] = tr.td.string
|
||||
return product_info
|
||||
|
||||
def get_image_url(soup):
|
||||
link = soup.img.get('src')
|
||||
img_url=url_site+"/"+link.replace('../', '')
|
||||
return img_url
|
||||
|
||||
def product_description(soup):
|
||||
desc = soup.find("p", class_='').string
|
||||
return desc
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
url_site="https://books.toscrape.com"
|
||||
url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html"
|
||||
|
||||
html = extract_web(url)
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
print(product_information(soup))
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user