diff --git a/phase3/main.py b/phase3/main.py index b6492d1..5a16d30 100644 --- a/phase3/main.py +++ b/phase3/main.py @@ -139,13 +139,17 @@ def main(): # get html from URL soup = get_html(url) + # init counters + total_books = int(soup.form.strong.text) + processed_books = 0 + # go ahead for each category for line in get_category_list(soup, url): category = line[0] category_url = line[1] # display what category is processed - print("Traitement de la catégorie : " + category) + print("\n Traitement de la catégorie : " + category) # check if multiple pages and create a URL list url_list = check_for_pages(category_url) @@ -156,6 +160,7 @@ def main(): product_url_list.extend(get_product_url_list(i, url)) # print("Liste des URL des produits: ", product_url_list) print("Nombre de livres: ", len(product_url_list)) + processed_books += len(product_url_list) # combine with phase 1 and write in csv for each url from product_url_list named with category data = [] @@ -167,8 +172,11 @@ def main(): data.append(get_data(page_soup, page_url)) print("Done.\n Fichier " + data_output(data, category)) + print("Nombre total de livres traités : ", processed_books) + print("Livres restants à traiter : ", total_books - processed_books) - print("Traitement terminé.") + + print("\n Traitement terminé.") if __name__ == '__main__': main()