working solution

This commit is contained in:
yann 2025-04-01 12:58:16 +02:00
parent 01dd080a44
commit fabf2cf56d
3 changed files with 223 additions and 47 deletions

87
README.md Normal file
View File

@ -0,0 +1,87 @@
# AlgoInvest&Trade
Choix optimal parmi une liste d'actions
## Introduction
Ces instructions vous permettent de :
- récupérer le programme,
- d'installer l'environnement nécessaire à son exécution,
- de l'exécuter,
- d'en connaitre le résultat
### Pré-requis
```
paquets : python 3.11, python3.11-venv, git
modules : python requests, BeautifulSoup, csv, os
```
### Installation
Voici les étapes à suivre pour avoir un environnement d'exécution opérationnel :
créer l'environnement virtuel
```
python3.11 -m venv env
source env/bin/activate
```
cloner le dépôt, aller dans le bon dossier
```
git clone https://mcstn.fr/gitea/Yann/Projet2.git
cd Projet2/rendu
```
installer les modules
```
pip install -r requirements.txt
```
## Exécution
exécuter la commande :
```
python3 main.py
```
## Résultat
Les fichiers sont placés dans un répertoire "resultat"
Le programme récupère les catégories sur la page d'accueil de l'URL, puis, pour chaque catégorie :
1. affiche la catégorie traitée, le nombre de catégories restantes, de livres présents, traités au total et restants
2. crée un dossier du nom de la catégorie, y enregistre les images des livres nommées en fonction du titre
3. crée un fichier csv au nom de la catégorie, avec :
- product_page_url
- universal_ product_code (upc)
- title
- price_including_tax
- price_excluding_tax
- number_available
- product_description
- category
- review_rating
- image_url
```
$ time python3.11 main.py
1000 à traiter répartis en 50 catégories.
[ ... ]
Traitement terminé.
real 20m17,783s
user 4m30,695s
sys 0m3,172s
```
## Auteur
Yann <yann@needsome.coffee>
## License
N/A

View File

@ -1,13 +1,5 @@
import csv
def powerset(itemList):
result = [[]]
for item in itemList:
newsubsets = [subset + [item] for subset in result]
result.extend(newsubsets)
return result
def listFromFile(csv_file):
"""
get data from a csv file and :
@ -22,61 +14,69 @@ def listFromFile(csv_file):
liste.pop(0)
for item in liste:
item[1] = int(item[1])
item[2] = float(item[2].strip("%"))
item[2] = item[1] * float(item[2].strip("%")) / 100
return liste
def splitActions(actionList):
def powerset(itemList):
"""
split list in two parts, just in case we need to divide the operation for
more efficiency
returns a tuple with two lists
Generate every subset (combination) for a given list
:param itemList: a list of items
:return: a list of combinations(lists)
"""
liste1 = []
liste2 = []
for i in range(len(actionList)):
if (i < 10):
liste1.append(actionList[i])
if (i >= 10):
liste2.append(actionList[i])
return (liste1, liste2)
result = [[]]
for item in itemList:
newsubsets = [subset + [item] for subset in result]
result.extend(newsubsets)
return result
def selectActions(actionList, max):
def transformData(dataset):
"""
Transform in a list of dict with computed values as gain, ratio
Sorted by gain
:param dataset: list of items
:return: a sorted list of dict
"""
tmpset = [{'nom': x[0], 'cout': x[1],
'rendement': x[2],
'gain': x[1] * x[2] / 100,
'ratio1': x[2] / x[1],
'ratio2': (x[1] * x[2] / 100) / x[1]}
for x in dataset if
x[1] > 0.0 and x[2] > 0.0]
return sorted(tmpset, key=lambda x: x['gain'], reverse=True)
def selectActions(actionList, maximal_cost):
"""
:param actionList: takes a list of combinations and a max
:return: a list of selected combinations where cost is under max
"""
best = []
best2 = []
for i in actionList:
cout = 0
rendement = 0
cost = 0
gain = 0
for action in i:
cout += action[1]
rendement += action[2]
if cout < int(max):
best.append((rendement, cout, i))
best2.append(i)
return best, best2
cost += action[1]
gain += action[2]
if cost < int(maximal_cost):
best.append((gain, cost, i))
sortedBest = sorted(best, key=lambda k: k[0], reverse=True)
return sortedBest.pop(0)
actions = listFromFile("/home/b/Documents/OCR/projet7/actions.csv")
powerActions = powerset(actions)
selectedActions, selected = selectActions(powerActions, 500)
power_actions = powerset(actions)
selected_actions = selectActions(power_actions, 500)
print("Longueur de la liste d'actions:", len(actions))
print("Nb de combinaisons:", len(powerActions))
print("Nb de combinaisons au cout inferieur à 500:", len(selectedActions))
print("Nombre d'actions:", len(actions))
print("Nb de combinaisons:", len(power_actions))
#tri des actions sur le rendement
best_sorted = sorted(selectedActions, key=lambda k: k[0], reverse=True)
best2 = sort(selected, key=lambda k:[])
#print("\nfive last sorted :")
#for i in range(len(best_sorted)-1, len(best_sorted)-10, -1):
# print("set", i, ":", best_sorted[i])
#print(f"Rendement: {sum(x[2][1] * x[2][2]/100 for x in best_sorted[0])}")
print(selected[1])
print("Meilleur rendement:", best_sorted[0][0], "%")
print("Actions sélectionnées:")
for action in best_sorted[0][2]:
print(f"Nom: {action[0]}, Cout: {action[1]}, Rendement: {action[2]}%")
print("Gain: %.2f" % selected_actions[0])
print("Cout:", selected_actions[1], "")
print("Actions sélectionnées:", selected_actions[2:])

89
optimized.py Normal file
View File

@ -0,0 +1,89 @@
import csv
def listFromFile(csv_file):
"""
Extract and format data from file(csv)
:param csv_file: full path
:return: a list of items
"""
liste = []
with open(csv_file) as file:
data = csv.reader(file)
for i in data:
liste.append(i)
liste.pop(0)
for item in liste:
item[1] = float(item[1])
item[2] = float(item[2])
return liste
def transformData(dataset):
"""
Transform in a list of dict with computed values as gain, ratio
Sorted by gain
:param dataset: list of items
:return: a sorted list of dict
"""
tmpset = [{'nom': x[0], 'cout': x[1],
'rendement': x[2],
'gain': x[1] * x[2] / 100,
'ratio1': x[2] / x[1],
'ratio2': (x[1] * x[2] / 100) / x[1]} for x in dataset if
x[1] > 0.0 and x[2] > 0.0]
return sorted(tmpset, key=lambda x: x['gain'], reverse=True)
def sacADosFloat(actions, maximum_cost):
"""
Use dynamic approach
:param actions: a list of dict with minimum key as cost and gain
:param maximum_cost: the constraint, our max cost
:return: maximum gain: int, selected items: list
"""
n = len(actions)
table = [[0.0 for x in range(int(maximum_cost) + 1)] for x in range(n + 1)]
# Dynamic programing table
for i in range(n + 1):
for w in range(int(maximum_cost) + 1):
if i == 0 or w == 0:
table[i][w] = 0.0
elif actions[i-1]['cout'] <= w:
table[i][w] = max(actions[i-1]['gain'] + table[i-1][int(w-actions[i-1]['cout'])], table[i-1][w])
else:
table[i][w] = table[i-1][w]
# Select
w = maximum_cost
selected_actions = []
for i in range(n, 0, -1):
if table[i][int(w)] != table[i-1][int(w)]:
selected_actions.append(actions[i-1])
w -= actions[i-1]['cout']
return table[n][int(maximum_cost)], selected_actions
actions = transformData(listFromFile("/home/b/Documents/OCR/projet7/ph3/dataset1_Python+P7.csv"))
actions2 = transformData(listFromFile("/home/b/Documents/OCR/projet7/ph3/dataset2_Python+P7.csv"))
maximum_cost = 500
maximum_gain1, selection1 = sacADosFloat(actions, maximum_cost)
maximum_gain2, selection2 = sacADosFloat(actions2, maximum_cost)
print("\nDATASET 1")
print(f"Cout: {sum(x['cout'] for x in selection1):.2f}")
#print(f"Rendement: {sum((x['cout']*x['rendement']/100)for x in actions_selectionnees):.2f}")
print("Gain: %.2f" % maximum_gain1)
print(f"Actions sélectionnées: {[x['nom'] for x in selection1]}")
print("\nDATASET 2")
print(f"Cout: {sum(x['cout'] for x in selection2):.2f}")
#print(f"Rendement: {sum((x['cout']*x['rendement']/100)for x in actions_selectionnees2):.2f}")
print("Gain: %.2f" % maximum_gain2)
print(f"Actions sélectionnées: {[x['nom'] for x in selection2]}")