just one loop to fill the list, "extend" with each page list
This commit is contained in:
parent
c92ce51aa0
commit
50ca4fccd8
@ -1,7 +1,7 @@
|
|||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
# récupère le soup depuis l'url
|
# get soup from url
|
||||||
def get_html(url):
|
def get_html(url):
|
||||||
r = requests.get(url)
|
r = requests.get(url)
|
||||||
html = r.content
|
html = r.content
|
||||||
@ -35,16 +35,18 @@ def check_for_pages(category_url):
|
|||||||
return url_list
|
return url_list
|
||||||
|
|
||||||
|
|
||||||
def get_product_list(url_category_page, url):
|
# get product list url from a given url category page;
|
||||||
list = []
|
# extract and build each product url using the main url (second arg)
|
||||||
|
def get_product_url_list(url_category_page, url):
|
||||||
|
liste = []
|
||||||
soup = get_html(url_category_page)
|
soup = get_html(url_category_page)
|
||||||
|
|
||||||
for i in soup.find_all("article"):
|
for i in soup.find_all("article"):
|
||||||
relative_url = i.h3.a.get('href')
|
relative_url = i.h3.a.get('href')
|
||||||
product_url = url + relative_url.split('../')[-1]
|
product_url = url + relative_url.split('../')[-1]
|
||||||
list.append(product_url)
|
liste.append(product_url)
|
||||||
|
|
||||||
return(list)
|
return liste
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# init
|
# init
|
||||||
@ -61,18 +63,18 @@ def main():
|
|||||||
|
|
||||||
# check if multiple page and get url list
|
# check if multiple page and get url list
|
||||||
url_list = check_for_pages(category_url)
|
url_list = check_for_pages(category_url)
|
||||||
|
|
||||||
print("Liste des URLs des pages: ", url_list)
|
print("Liste des URLs des pages: ", url_list)
|
||||||
# get product list for every url_list
|
|
||||||
product_list_url = []
|
# get product list for each url_list, extend the main product url list with
|
||||||
|
product_url_list = []
|
||||||
for i in url_list:
|
for i in url_list:
|
||||||
get_list = get_product_list(i, url)
|
product_url_list.extend(get_product_url_list(i, url))
|
||||||
for j in get_list:
|
|
||||||
product_list_url.append(j)
|
|
||||||
|
|
||||||
|
|
||||||
print("Liste des URL des produits: ", product_list_url)
|
|
||||||
print("Longueur de la liste: ", len(product_list_url))
|
|
||||||
|
print("Liste des URL des produits: ", product_url_list)
|
||||||
|
print("Longueur de la liste: ", len(product_url_list))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user