import os, json, faiss, numpy as np, shutil from pathlib import Path from sentence_transformers import SentenceTransformer from huggingface_hub import hf_hub_download INDEX_PATH = os.environ.get("INDEX_PATH", "indexes/cosmetics_faiss_ip.index") META_PATH = os.environ.get("META_PATH", "indexes/cosmetics_meta.json") HUB_REPO_ID = os.environ.get("HUB_REPO_ID", os.environ.get("REPO_ID", "ColdSlim/DermalCare")) HUB_REPO_TYPE = os.environ.get("HUB_REPO_TYPE", "space") EMB_MODEL = os.environ.get("EMB_MODEL_ID", "intfloat/multilingual-e5-base") _embedder = None _index = None _meta = None def _load(): global _embedder, _index, _meta _ensure_index_files() if _embedder is None: _embedder = SentenceTransformer(EMB_MODEL) if _index is None: _index = faiss.read_index(INDEX_PATH) if _meta is None: _meta = json.load(open(META_PATH, "r", encoding="utf-8")) return _embedder, _index, _meta def _ensure_index_files(): """Ensure FAISS index and metadata exist locally; if missing, download from Hub. Downloads from the Space repository's LFS using huggingface_hub. """ index_path = Path(INDEX_PATH) meta_path = Path(META_PATH) index_path.parent.mkdir(parents=True, exist_ok=True) # Download index if missing if not index_path.exists(): cached = hf_hub_download( repo_id=HUB_REPO_ID, repo_type=HUB_REPO_TYPE, filename=f"indexes/{index_path.name}", ) shutil.copy2(cached, index_path) # Download metadata if missing if not meta_path.exists(): cached_meta = hf_hub_download( repo_id=HUB_REPO_ID, repo_type=HUB_REPO_TYPE, filename=f"indexes/{meta_path.name}", ) shutil.copy2(cached_meta, meta_path) def search(query: str, k: int = 8): emb, idx, meta = _load() q = emb.encode([query], normalize_embeddings=True).astype("float32") D, I = idx.search(q, k) out = [] for score, i in zip(D[0], I[0]): if int(i) < 0: # in case FAISS returns -1 for empty index continue m = meta[int(i)] m["_score"] = float(score) out.append(m) return out