From c5b1114e70b2943aacdc173d26973cfd82ad21ff Mon Sep 17 00:00:00 2001 From: yann Date: Tue, 12 Nov 2024 17:56:24 +0100 Subject: [PATCH] Init, README and main with main functions --- README.md | 3 +++ phase1/README.md | 19 +++++++++++++++++++ phase1/main.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 phase1/README.md create mode 100644 phase1/main.py diff --git a/README.md b/README.md index e69de29..3fd57db 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,3 @@ +Un dossier pour chaque phase du projet + +Avec chacun un README contenant les instructions \ No newline at end of file diff --git a/phase1/README.md b/phase1/README.md new file mode 100644 index 0000000..0229516 --- /dev/null +++ b/phase1/README.md @@ -0,0 +1,19 @@ +# Phase 1 + +Choisissez n'importe quelle page Produit sur le site de Books to Scrape. + +Écrivez un script Python qui visite cette page et en extrait les informations suivantes : + + ● product_page_url + ● universal_ product_code (upc) + ● title + ● price_including_tax + ● price_excluding_tax + ● number_available + ● product_description + ● category + ● review_rating + ● image_url + +Écrivez les données dans un fichier CSV qui utilise les champs ci-dessus comme +en-têtes de colonnes. \ No newline at end of file diff --git a/phase1/main.py b/phase1/main.py new file mode 100644 index 0000000..53f4bf7 --- /dev/null +++ b/phase1/main.py @@ -0,0 +1,43 @@ +import requests +from bs4 import BeautifulSoup +import csv + +url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html" + +def extract_web(url): + r = requests.get(url) + page = r.content + return page + +def get_title(soup): + title = soup.find("div", class_="product_main").h1.string + return title + + +def product_information(soup): + product_info={} + for tr in soup.table.find_all("tr"): + product_info[tr.th.string] = tr.td.string + return product_info + +def get_image_url(soup): + link = soup.img.get('src') + img_url=url_site+"/"+link.replace('../', '') + return img_url + +def product_description(soup): + desc = soup.find("p", class_='').string + return desc + +if __name__ == '__main__': + + url_site="https://books.toscrape.com" + url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html" + + html = extract_web(url) + soup = BeautifulSoup(html, "html.parser") + print(product_information(soup)) + + + +