Spaces:

alrahrooh
/

cgt-llm-chatbot-v2

Runtime error

App Files Files Community

arahrooh commited on 14 days ago

Commit

8531ebf

1 Parent(s): c9adae0

Fix: Always use Inference API on Spaces with better error handling

Browse files

Files changed (1) hide show

app.py +46 -9

app.py CHANGED Viewed

@@ -170,10 +170,17 @@ EXAMPLE_QUESTIONS = [
 class InferenceAPIBot:
     """Wrapper that uses Hugging Face Inference API instead of loading models locally"""
-    def __init__(self, bot: RAGBot, hf_token: str):
-        """Initialize with a RAGBot (for vector DB) and HF token for Inference API"""
         self.bot = bot  # Use bot for vector DB and formatting
-        self.client = InferenceClient(api_key=hf_token)
         self.current_model = bot.args.model
         # Don't set args as attribute - access via bot.args instead
         logger.info(f"InferenceAPIBot initialized with model: {self.current_model}")
@@ -304,15 +311,30 @@ class GradioRAGInterface:
     def __init__(self, initial_bot: RAGBot, use_inference_api: bool = False):
         # Check if we should use Inference API (on Spaces)
         if use_inference_api and HF_INFERENCE_AVAILABLE:
             hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")
-            if hf_token:
                 self.bot = InferenceAPIBot(initial_bot, hf_token)
                 self.use_inference_api = True
-                logger.info("Using Hugging Face Inference API")
-            else:
-                logger.warning("HF_TOKEN not found, falling back to local model")
-                self.bot = initial_bot
-                self.use_inference_api = False
         else:
             self.bot = initial_bot
             self.use_inference_api = False
@@ -398,6 +420,21 @@ class GradioRAGInterface:
         if not question or not question.strip():
             return "Please enter a question.", "N/A", "", "", ""
         try:
             start_time = time.time()
             logger.info(f"Processing question: {question[:50]}...")

 class InferenceAPIBot:
     """Wrapper that uses Hugging Face Inference API instead of loading models locally"""
+    def __init__(self, bot: RAGBot, hf_token: Optional[str] = None):
+        """Initialize with a RAGBot (for vector DB) and optional HF token for Inference API"""
         self.bot = bot  # Use bot for vector DB and formatting
+        # Initialize client - token is optional for public models
+        if hf_token:
+            self.client = InferenceClient(api_key=hf_token)
+            logger.info("Using Inference API with provided token")
+        else:
+            # Try without token first (works for public models)
+            self.client = InferenceClient()
+            logger.info("Using Inference API without token (public models)")
         self.current_model = bot.args.model
         # Don't set args as attribute - access via bot.args instead
         logger.info(f"InferenceAPIBot initialized with model: {self.current_model}")
     def __init__(self, initial_bot: RAGBot, use_inference_api: bool = False):
         # Check if we should use Inference API (on Spaces)
         if use_inference_api and HF_INFERENCE_AVAILABLE:
+            # Try to get token, but it's optional for public models
+            # On Spaces, HF_TOKEN is automatically available
             hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")
+            try:
                 self.bot = InferenceAPIBot(initial_bot, hf_token)
                 self.use_inference_api = True
+                if hf_token:
+                    logger.info("Using Hugging Face Inference API with token")
+                else:
+                    logger.info("Using Hugging Face Inference API without token (public models)")
+            except Exception as e:
+                logger.error(f"Failed to initialize Inference API: {e}")
+                # On Spaces, we MUST use Inference API - don't fall back to local
+                if IS_SPACES:
+                    logger.error("Cannot use local models on Spaces. Please configure HF_TOKEN.")
+                    raise RuntimeError(
+                        "Inference API initialization failed on Spaces. "
+                        "Please add HF_TOKEN as a secret in Space settings: "
+                        "https://huggingface.co/spaces/alrahrooh/cgt-llm-chatbot-v2/settings"
+                    )
+                else:
+                    logger.warning("Falling back to local model")
+                    self.bot = initial_bot
+                    self.use_inference_api = False
         else:
             self.bot = initial_bot
             self.use_inference_api = False
         if not question or not question.strip():
             return "Please enter a question.", "N/A", "", "", ""
+        # Check if we're on Spaces but not using Inference API
+        if IS_SPACES and not self.use_inference_api:
+            error_msg = """⚠️ **Configuration Error**
+This Space is not configured to use the Hugging Face Inference API.
+**To fix this:**
+1. Go to your Space settings: https://huggingface.co/spaces/alrahrooh/cgt-llm-chatbot-v2/settings
+2. Add a secret named `HF_TOKEN` with your Hugging Face token
+3. Get your token from: https://huggingface.co/settings/tokens
+4. Restart the Space
+**Note:** The Inference API is required on Spaces because we cannot load models locally."""
+            return error_msg, "N/A", "", "", ""
         try:
             start_time = time.time()
             logger.info(f"Processing question: {question[:50]}...")