diff --git a/phase2/phase1.py b/phase2/phase1.py index ae55d86..72161cd 100644 --- a/phase2/phase1.py +++ b/phase2/phase1.py @@ -32,8 +32,13 @@ def get_image_url(soup, url): # get full description as string # luckily this

was the only one without class +# and manage the case where there's no description def product_description(soup): - desc = soup.find("p", class_='').string + try: + desc = soup.find("p", class_='').string + except AttributeError: + desc = "None" + return desc # get category from breadcrumb @@ -44,16 +49,19 @@ def get_category(soup): # create a list with all information consecutively # /!\ don't know if that's the best way def get_data(soup, url): - info = [url, product_information(soup)['UPC'], - get_title(soup), - product_information(soup)['Price (incl. tax)'], - product_information(soup)['Price (excl. tax)'], - product_information(soup)['Availability'], - product_description(soup), - get_category(soup), - product_information(soup)['Number of reviews'], - get_image_url(soup, url) - ] + info = [ + url, product_information(soup)['UPC'], + get_title(soup), + product_information(soup)['Price (incl. tax)'], + product_information(soup)['Price (excl. tax)'], + product_information(soup)['Availability'], + product_description(soup), + get_category(soup), + product_information(soup)['Number of reviews'], + get_image_url(soup, url) + ] + + return info # write the file @@ -76,6 +84,7 @@ def data_output(info, file): for i in info: writer.writerow(i) + return file