Spaces:
Runtime error
Runtime error
| # tools/build_meta_only.py | |
| import os, json | |
| from pathlib import Path | |
| DATA_CANDIDATES = [ | |
| "data/cosmetics_full_en_zh.jsonl", | |
| "data/cosmetic_full_en_zh.jsonl", | |
| "cosmetics_full_en_zh.jsonl", | |
| "cosmetic_full_en_zh.jsonl", | |
| ] | |
| OUT_DIR = Path("indexes") | |
| OUT_DIR.mkdir(parents=True, exist_ok=True) | |
| OUT_META = OUT_DIR / "cosmetics_meta.json" | |
| def find_dataset(): | |
| for p in DATA_CANDIDATES: | |
| if os.path.exists(p): | |
| return p | |
| raise FileNotFoundError("Dataset JSONL not found in expected locations.") | |
| def main(): | |
| ds = find_dataset() | |
| records = [json.loads(l) for l in open(ds, "r", encoding="utf-8")] | |
| meta = [] | |
| for r in records: | |
| meta.append({ | |
| "id": r.get("id"), | |
| "brand_en": r.get("brand_en"), | |
| "brand_zh": r.get("brand_zh"), | |
| "product_name_en": r.get("product_name_en"), | |
| "product_name_zh": r.get("product_name_zh"), | |
| "category_en": r.get("category_en"), | |
| "category_zh": r.get("category_zh"), | |
| "price_value": r.get("price_value"), | |
| "price_currency": r.get("price_currency"), | |
| "source_url": r.get("source_url"), | |
| "image_url": r.get("image_url"), # <-- now included | |
| }) | |
| with open(OUT_META, "w", encoding="utf-8") as f: | |
| json.dump(meta, f, ensure_ascii=False, indent=2) | |
| print(f"✅ wrote {len(meta)} records -> {OUT_META}") | |
| if __name__ == "__main__": | |
| main() | |