Init, README and main with main functions
This commit is contained in:
parent
cffae25b0c
commit
c5b1114e70
@ -0,0 +1,3 @@
|
|||||||
|
Un dossier pour chaque phase du projet
|
||||||
|
|
||||||
|
Avec chacun un README contenant les instructions
|
19
phase1/README.md
Normal file
19
phase1/README.md
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# Phase 1
|
||||||
|
|
||||||
|
Choisissez n'importe quelle page Produit sur le site de Books to Scrape.
|
||||||
|
|
||||||
|
Écrivez un script Python qui visite cette page et en extrait les informations suivantes :
|
||||||
|
|
||||||
|
● product_page_url
|
||||||
|
● universal_ product_code (upc)
|
||||||
|
● title
|
||||||
|
● price_including_tax
|
||||||
|
● price_excluding_tax
|
||||||
|
● number_available
|
||||||
|
● product_description
|
||||||
|
● category
|
||||||
|
● review_rating
|
||||||
|
● image_url
|
||||||
|
|
||||||
|
Écrivez les données dans un fichier CSV qui utilise les champs ci-dessus comme
|
||||||
|
en-têtes de colonnes.
|
43
phase1/main.py
Normal file
43
phase1/main.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import csv
|
||||||
|
|
||||||
|
url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html"
|
||||||
|
|
||||||
|
def extract_web(url):
|
||||||
|
r = requests.get(url)
|
||||||
|
page = r.content
|
||||||
|
return page
|
||||||
|
|
||||||
|
def get_title(soup):
|
||||||
|
title = soup.find("div", class_="product_main").h1.string
|
||||||
|
return title
|
||||||
|
|
||||||
|
|
||||||
|
def product_information(soup):
|
||||||
|
product_info={}
|
||||||
|
for tr in soup.table.find_all("tr"):
|
||||||
|
product_info[tr.th.string] = tr.td.string
|
||||||
|
return product_info
|
||||||
|
|
||||||
|
def get_image_url(soup):
|
||||||
|
link = soup.img.get('src')
|
||||||
|
img_url=url_site+"/"+link.replace('../', '')
|
||||||
|
return img_url
|
||||||
|
|
||||||
|
def product_description(soup):
|
||||||
|
desc = soup.find("p", class_='').string
|
||||||
|
return desc
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
url_site="https://books.toscrape.com"
|
||||||
|
url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html"
|
||||||
|
|
||||||
|
html = extract_web(url)
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
print(product_information(soup))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user