From ebd5f5acd41100c786760c0e121a01958d04c94c Mon Sep 17 00:00:00 2001 From: yann Date: Thu, 14 Nov 2024 14:07:04 +0100 Subject: [PATCH] works. Add processed book and book to go counters displayed --- phase3/main.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/phase3/main.py b/phase3/main.py index b6492d1..5a16d30 100644 --- a/phase3/main.py +++ b/phase3/main.py @@ -139,13 +139,17 @@ def main(): # get html from URL soup = get_html(url) + # init counters + total_books = int(soup.form.strong.text) + processed_books = 0 + # go ahead for each category for line in get_category_list(soup, url): category = line[0] category_url = line[1] # display what category is processed - print("Traitement de la catégorie : " + category) + print("\n Traitement de la catégorie : " + category) # check if multiple pages and create a URL list url_list = check_for_pages(category_url) @@ -156,6 +160,7 @@ def main(): product_url_list.extend(get_product_url_list(i, url)) # print("Liste des URL des produits: ", product_url_list) print("Nombre de livres: ", len(product_url_list)) + processed_books += len(product_url_list) # combine with phase 1 and write in csv for each url from product_url_list named with category data = [] @@ -167,8 +172,11 @@ def main(): data.append(get_data(page_soup, page_url)) print("Done.\n Fichier " + data_output(data, category)) + print("Nombre total de livres traités : ", processed_books) + print("Livres restants à traiter : ", total_books - processed_books) - print("Traitement terminé.") + + print("\n Traitement terminé.") if __name__ == '__main__': main()