From c5b1114e70b2943aacdc173d26973cfd82ad21ff Mon Sep 17 00:00:00 2001
From: yann <yann@needsome.coffee>
Date: Tue, 12 Nov 2024 17:56:24 +0100
Subject: [PATCH] Init, README and main with main functions

---
 README.md        |  3 +++
 phase1/README.md | 19 +++++++++++++++++++
 phase1/main.py   | 43 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+)
 create mode 100644 phase1/README.md
 create mode 100644 phase1/main.py

diff --git a/README.md b/README.md
index e69de29..3fd57db 100644
--- a/README.md
+++ b/README.md
@@ -0,0 +1,3 @@
+Un dossier pour chaque phase du projet
+
+Avec chacun un README contenant les instructions
\ No newline at end of file
diff --git a/phase1/README.md b/phase1/README.md
new file mode 100644
index 0000000..0229516
--- /dev/null
+++ b/phase1/README.md
@@ -0,0 +1,19 @@
+# Phase 1
+
+Choisissez n'importe quelle page Produit sur le site de Books to Scrape. 
+
+Écrivez un script Python qui visite cette page et en extrait les informations suivantes : 
+
+    ● product_page_url
+    ● universal_ product_code (upc) 
+    ● title
+    ● price_including_tax
+    ● price_excluding_tax
+    ● number_available
+    ● product_description
+    ● category
+    ● review_rating
+    ● image_url
+
+Écrivez les données dans un fichier CSV qui utilise les champs ci-dessus comme
+en-têtes de colonnes.
\ No newline at end of file
diff --git a/phase1/main.py b/phase1/main.py
new file mode 100644
index 0000000..53f4bf7
--- /dev/null
+++ b/phase1/main.py
@@ -0,0 +1,43 @@
+import requests
+from bs4 import BeautifulSoup
+import csv
+
+url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html"
+
+def extract_web(url):
+    r = requests.get(url)
+    page = r.content
+    return page
+
+def get_title(soup):
+    title = soup.find("div", class_="product_main").h1.string
+    return title
+
+
+def product_information(soup):
+    product_info={}
+    for tr in soup.table.find_all("tr"):
+        product_info[tr.th.string] = tr.td.string
+    return product_info
+
+def get_image_url(soup):
+    link = soup.img.get('src')
+    img_url=url_site+"/"+link.replace('../', '')
+    return img_url
+
+def product_description(soup):
+    desc = soup.find("p", class_='').string
+    return desc
+
+if __name__ == '__main__':
+
+    url_site="https://books.toscrape.com"
+    url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html"
+
+    html = extract_web(url)
+    soup = BeautifulSoup(html, "html.parser")
+    print(product_information(soup))
+
+
+
+