From fabf2cf56d96c87be4d6b1a7f472108fa295bd37 Mon Sep 17 00:00:00 2001 From: yann Date: Tue, 1 Apr 2025 12:58:16 +0200 Subject: [PATCH] working solution --- README.md | 87 +++++++++++++++++++++++++++++++++++++++++++++++ bruteforce.py | 94 +++++++++++++++++++++++++-------------------------- optimized.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 223 insertions(+), 47 deletions(-) create mode 100644 README.md create mode 100644 optimized.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..c0c192d --- /dev/null +++ b/README.md @@ -0,0 +1,87 @@ +# AlgoInvest&Trade + +Choix optimal parmi une liste d'actions + +## Introduction + +Ces instructions vous permettent de : +- récupérer le programme, +- d'installer l'environnement nécessaire à son exécution, +- de l'exécuter, +- d'en connaitre le résultat + + +### Pré-requis + +``` +paquets : python 3.11, python3.11-venv, git +modules : python requests, BeautifulSoup, csv, os +``` + +### Installation + +Voici les étapes à suivre pour avoir un environnement d'exécution opérationnel : + +créer l'environnement virtuel + +``` +python3.11 -m venv env +source env/bin/activate +``` +cloner le dépôt, aller dans le bon dossier +``` +git clone https://mcstn.fr/gitea/Yann/Projet2.git +cd Projet2/rendu +``` +installer les modules +``` +pip install -r requirements.txt +``` + +## Exécution + +exécuter la commande : +``` +python3 main.py +``` + +## Résultat + +Les fichiers sont placés dans un répertoire "resultat" + +Le programme récupère les catégories sur la page d'accueil de l'URL, puis, pour chaque catégorie : +1. affiche la catégorie traitée, le nombre de catégories restantes, de livres présents, traités au total et restants +2. crée un dossier du nom de la catégorie, y enregistre les images des livres nommées en fonction du titre +3. crée un fichier csv au nom de la catégorie, avec : + - product_page_url + - universal_ product_code (upc) + - title + - price_including_tax + - price_excluding_tax + - number_available + - product_description + - category + - review_rating + - image_url + +``` +$ time python3.11 main.py +1000 à traiter répartis en 50 catégories. + +[ ... ] + + Traitement terminé. + +real 20m17,783s +user 4m30,695s +sys 0m3,172s +``` +## Auteur + +Yann + + + +## License + +N/A diff --git a/bruteforce.py b/bruteforce.py index 90ea31b..62d4963 100644 --- a/bruteforce.py +++ b/bruteforce.py @@ -1,13 +1,5 @@ import csv - -def powerset(itemList): - result = [[]] - for item in itemList: - newsubsets = [subset + [item] for subset in result] - result.extend(newsubsets) - return result - def listFromFile(csv_file): """ get data from a csv file and : @@ -22,61 +14,69 @@ def listFromFile(csv_file): liste.pop(0) for item in liste: item[1] = int(item[1]) - item[2] = float(item[2].strip("%")) + item[2] = item[1] * float(item[2].strip("%")) / 100 return liste -def splitActions(actionList): +def powerset(itemList): """ - split list in two parts, just in case we need to divide the operation for - more efficiency - returns a tuple with two lists + Generate every subset (combination) for a given list + :param itemList: a list of items + :return: a list of combinations(lists) """ - liste1 = [] - liste2 = [] - for i in range(len(actionList)): - if (i < 10): - liste1.append(actionList[i]) - if (i >= 10): - liste2.append(actionList[i]) - return (liste1, liste2) + result = [[]] + for item in itemList: + newsubsets = [subset + [item] for subset in result] + result.extend(newsubsets) + return result -def selectActions(actionList, max): +def transformData(dataset): + """ + Transform in a list of dict with computed values as gain, ratio + Sorted by gain + :param dataset: list of items + :return: a sorted list of dict + """ + tmpset = [{'nom': x[0], 'cout': x[1], + 'rendement': x[2], + 'gain': x[1] * x[2] / 100, + 'ratio1': x[2] / x[1], + 'ratio2': (x[1] * x[2] / 100) / x[1]} + for x in dataset if + x[1] > 0.0 and x[2] > 0.0] + + return sorted(tmpset, key=lambda x: x['gain'], reverse=True) + +def selectActions(actionList, maximal_cost): """ :param actionList: takes a list of combinations and a max :return: a list of selected combinations where cost is under max """ best = [] - best2 = [] for i in actionList: - cout = 0 - rendement = 0 + cost = 0 + gain = 0 for action in i: - cout += action[1] - rendement += action[2] - if cout < int(max): - best.append((rendement, cout, i)) - best2.append(i) - return best, best2 + cost += action[1] + gain += action[2] + if cost < int(maximal_cost): + best.append((gain, cost, i)) + + sortedBest = sorted(best, key=lambda k: k[0], reverse=True) + + return sortedBest.pop(0) actions = listFromFile("/home/b/Documents/OCR/projet7/actions.csv") -powerActions = powerset(actions) -selectedActions, selected = selectActions(powerActions, 500) +power_actions = powerset(actions) +selected_actions = selectActions(power_actions, 500) -print("Longueur de la liste d'actions:", len(actions)) -print("Nb de combinaisons:", len(powerActions)) -print("Nb de combinaisons au cout inferieur à 500:", len(selectedActions)) +print("Nombre d'actions:", len(actions)) +print("Nb de combinaisons:", len(power_actions)) #tri des actions sur le rendement -best_sorted = sorted(selectedActions, key=lambda k: k[0], reverse=True) -best2 = sort(selected, key=lambda k:[]) -#print("\nfive last sorted :") -#for i in range(len(best_sorted)-1, len(best_sorted)-10, -1): -# print("set", i, ":", best_sorted[i]) -#print(f"Rendement: {sum(x[2][1] * x[2][2]/100 for x in best_sorted[0])}") -print(selected[1]) -print("Meilleur rendement:", best_sorted[0][0], "%") -print("Actions sélectionnées:") -for action in best_sorted[0][2]: - print(f"Nom: {action[0]}, Cout: {action[1]}, Rendement: {action[2]}%") \ No newline at end of file +print("Gain: %.2f €" % selected_actions[0]) +print("Cout:", selected_actions[1], "€") + +print("Actions sélectionnées:", selected_actions[2:]) + diff --git a/optimized.py b/optimized.py new file mode 100644 index 0000000..89ea4a9 --- /dev/null +++ b/optimized.py @@ -0,0 +1,89 @@ +import csv + + +def listFromFile(csv_file): + """ + Extract and format data from file(csv) + :param csv_file: full path + :return: a list of items + """ + liste = [] + with open(csv_file) as file: + data = csv.reader(file) + for i in data: + liste.append(i) + liste.pop(0) + for item in liste: + item[1] = float(item[1]) + item[2] = float(item[2]) + return liste + +def transformData(dataset): + """ + Transform in a list of dict with computed values as gain, ratio + Sorted by gain + :param dataset: list of items + :return: a sorted list of dict + """ + tmpset = [{'nom': x[0], 'cout': x[1], + 'rendement': x[2], + 'gain': x[1] * x[2] / 100, + 'ratio1': x[2] / x[1], + 'ratio2': (x[1] * x[2] / 100) / x[1]} for x in dataset if + x[1] > 0.0 and x[2] > 0.0] + + return sorted(tmpset, key=lambda x: x['gain'], reverse=True) + +def sacADosFloat(actions, maximum_cost): + """ + Use dynamic approach + :param actions: a list of dict with minimum key as cost and gain + :param maximum_cost: the constraint, our max cost + :return: maximum gain: int, selected items: list + """ + n = len(actions) + table = [[0.0 for x in range(int(maximum_cost) + 1)] for x in range(n + 1)] + + # Dynamic programing table + for i in range(n + 1): + for w in range(int(maximum_cost) + 1): + if i == 0 or w == 0: + table[i][w] = 0.0 + elif actions[i-1]['cout'] <= w: + table[i][w] = max(actions[i-1]['gain'] + table[i-1][int(w-actions[i-1]['cout'])], table[i-1][w]) + else: + table[i][w] = table[i-1][w] + + # Select + w = maximum_cost + selected_actions = [] + for i in range(n, 0, -1): + if table[i][int(w)] != table[i-1][int(w)]: + selected_actions.append(actions[i-1]) + w -= actions[i-1]['cout'] + + return table[n][int(maximum_cost)], selected_actions + + + +actions = transformData(listFromFile("/home/b/Documents/OCR/projet7/ph3/dataset1_Python+P7.csv")) +actions2 = transformData(listFromFile("/home/b/Documents/OCR/projet7/ph3/dataset2_Python+P7.csv")) + + + +maximum_cost = 500 + +maximum_gain1, selection1 = sacADosFloat(actions, maximum_cost) +maximum_gain2, selection2 = sacADosFloat(actions2, maximum_cost) + +print("\nDATASET 1") +print(f"Cout: {sum(x['cout'] for x in selection1):.2f}") +#print(f"Rendement: {sum((x['cout']*x['rendement']/100)for x in actions_selectionnees):.2f}") +print("Gain: %.2f" % maximum_gain1) +print(f"Actions sélectionnées: {[x['nom'] for x in selection1]}") + +print("\nDATASET 2") +print(f"Cout: {sum(x['cout'] for x in selection2):.2f}") +#print(f"Rendement: {sum((x['cout']*x['rendement']/100)for x in actions_selectionnees2):.2f}") +print("Gain: %.2f" % maximum_gain2) +print(f"Actions sélectionnées: {[x['nom'] for x in selection2]}")