44 lines
965 B
Python
44 lines
965 B
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
import csv
|
|
|
|
url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html"
|
|
|
|
def extract_web(url):
|
|
r = requests.get(url)
|
|
page = r.content
|
|
return page
|
|
|
|
def get_title(soup):
|
|
title = soup.find("div", class_="product_main").h1.string
|
|
return title
|
|
|
|
|
|
def product_information(soup):
|
|
product_info={}
|
|
for tr in soup.table.find_all("tr"):
|
|
product_info[tr.th.string] = tr.td.string
|
|
return product_info
|
|
|
|
def get_image_url(soup):
|
|
link = soup.img.get('src')
|
|
img_url=url_site+"/"+link.replace('../', '')
|
|
return img_url
|
|
|
|
def product_description(soup):
|
|
desc = soup.find("p", class_='').string
|
|
return desc
|
|
|
|
if __name__ == '__main__':
|
|
|
|
url_site="https://books.toscrape.com"
|
|
url = "https://books.toscrape.com/catalogue/set-me-free_988/index.html"
|
|
|
|
html = extract_web(url)
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
print(product_information(soup))
|
|
|
|
|
|
|
|
|