Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| def load_data(catalog): | |
| catalog = pd.read_excel('catalog_1k.xlsx') | |
| return catalog | |
| def preprocess_data(catalog): | |
| # Clean description | |
| catalog['Description'] = catalog['Description'].str.replace('\n', '') | |
| # Id column to integer | |
| catalog['Id'] = pd.to_numeric(catalog['Id'], errors='coerce').astype('Int64') | |
| # Map gender | |
| catalog['Gender'] = catalog['Gender'].map({1: 'Women', 2: 'Men', 3: 'Unisex'}) | |
| # Drop sub-sub-categories | |
| catalog = catalog.drop(['L3'], axis=1) | |
| # Drop items without gender | |
| catalog = catalog.dropna(subset=['Gender']) | |
| # Use best image link | |
| catalog['Image'] = catalog['Image'].str.split(',').str[-1] | |
| # Convert the columns to strings before joining them | |
| catalog["SimpleMetadata"] = catalog[["L1", "L2", "Gender", "MaterialName", "BrandName", "Name"]].astype(str).agg(', '.join, axis=1) | |
| return catalog |