DermalCare / tools /retriever.py
Manik Sheokand
Download FAISS index/metadata from HF Hub at runtime if missing
7b2b305
import os, json, faiss, numpy as np, shutil
from pathlib import Path
from sentence_transformers import SentenceTransformer
from huggingface_hub import hf_hub_download
INDEX_PATH = os.environ.get("INDEX_PATH", "indexes/cosmetics_faiss_ip.index")
META_PATH = os.environ.get("META_PATH", "indexes/cosmetics_meta.json")
HUB_REPO_ID = os.environ.get("HUB_REPO_ID", os.environ.get("REPO_ID", "ColdSlim/DermalCare"))
HUB_REPO_TYPE = os.environ.get("HUB_REPO_TYPE", "space")
EMB_MODEL = os.environ.get("EMB_MODEL_ID", "intfloat/multilingual-e5-base")
_embedder = None
_index = None
_meta = None
def _load():
global _embedder, _index, _meta
_ensure_index_files()
if _embedder is None:
_embedder = SentenceTransformer(EMB_MODEL)
if _index is None:
_index = faiss.read_index(INDEX_PATH)
if _meta is None:
_meta = json.load(open(META_PATH, "r", encoding="utf-8"))
return _embedder, _index, _meta
def _ensure_index_files():
"""Ensure FAISS index and metadata exist locally; if missing, download from Hub.
Downloads from the Space repository's LFS using huggingface_hub.
"""
index_path = Path(INDEX_PATH)
meta_path = Path(META_PATH)
index_path.parent.mkdir(parents=True, exist_ok=True)
# Download index if missing
if not index_path.exists():
cached = hf_hub_download(
repo_id=HUB_REPO_ID,
repo_type=HUB_REPO_TYPE,
filename=f"indexes/{index_path.name}",
)
shutil.copy2(cached, index_path)
# Download metadata if missing
if not meta_path.exists():
cached_meta = hf_hub_download(
repo_id=HUB_REPO_ID,
repo_type=HUB_REPO_TYPE,
filename=f"indexes/{meta_path.name}",
)
shutil.copy2(cached_meta, meta_path)
def search(query: str, k: int = 8):
emb, idx, meta = _load()
q = emb.encode([query], normalize_embeddings=True).astype("float32")
D, I = idx.search(q, k)
out = []
for score, i in zip(D[0], I[0]):
if int(i) < 0: # in case FAISS returns -1 for empty index
continue
m = meta[int(i)]
m["_score"] = float(score)
out.append(m)
return out