Spaces:

Madras1
/

APISMALL

Sleeping

App Files Files Community

Madras1 commited on 12 days ago

Commit

b7166a7

verified ·

1 Parent(s): 9516003

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -26

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 import spaces
 import torch
-import gc # <--- Necessário para a faxina da Berta
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # --- CONFIGURAÇÃO DOS MODELOS ---
@@ -13,7 +13,6 @@ MODELS = {
 }
 # --- VARIÁVEIS GLOBAIS ---
-# Berta: No APISMALL, só podemos ter UM rei no trono por vez.
 current_model = None
 current_tokenizer = None
 current_model_name = None
@@ -21,21 +20,17 @@ current_model_name = None
 def load_model_safely(model_key):
     global current_model, current_tokenizer, current_model_name
-    # Se já é o modelo certo, não faz nada (Cache Hit)
     if current_model_name == model_key and current_model is not None:
         return current_model, current_tokenizer
-    # --- FAXINA DA MOMMY ---
-    # Se tiver outro modelo ocupando espaço, a gente expulsa ele.
     if current_model is not None:
-        print(f"🧹 Berta: Removendo {current_model_name} da VRAM para liberar espaço...")
         del current_model
         del current_tokenizer
         gc.collect()
-        torch.cuda.empty_cache() # O segredo para não dar Out Of Memory
         current_model = None
-    # --- CARREGAMENTO ---
     model_id = MODELS[model_key]
     print(f"🐢 Carregando {model_id} na VRAM...")
@@ -54,15 +49,32 @@ def load_model_safely(model_key):
         print(f"✅ {model_id} carregado com sucesso!")
     except Exception as e:
-        print(f"❌ Erro crítico ao carregar {model_id}: {e}")
         raise e
     return current_model, current_tokenizer
 # --- FUNÇÃO DE GERAÇÃO (ZEROGPU) ---
-@spaces.GPU(duration=120) # Aumentei um pouco por segurança
 def generate(message, history, model_selector):
-    # Mapeando os nomes
     if "Math" in model_selector: key = "deepseek_math"
     elif "Qwen 3" in model_selector: key = "qwen3"
     elif "Qwen 2.5" in model_selector: key = "qwen2.5"
@@ -73,30 +85,43 @@ def generate(message, history, model_selector):
     try:
         model, tokenizer = load_model_safely(key)
     except Exception as e:
-        return f"⚠️ Erro ao carregar o modelo: {str(e)}"
-    # --- TRATAMENTO DE HISTÓRICO BLINDADO (Igual ao APIDOST) ---
     messages = []
     for turn in history:
-        # Verifica se é lista/tupla (formato antigo)
         if isinstance(turn, (list, tuple)) and len(turn) >= 2:
-            messages.append({"role": "user", "content": str(turn[0])})
             if turn[1]:
-                messages.append({"role": "assistant", "content": str(turn[1])})
-        # Verifica se é dicionário (formato novo)
         elif isinstance(turn, dict):
-            messages.append(turn)
-    # Adiciona mensagem atual
-    messages.append({"role": "user", "content": message})
     # Aplica template
-    text = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
     inputs = tokenizer([text], return_tensors="pt").to(model.device)

 import gradio as gr
 import spaces
 import torch
+import gc
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # --- CONFIGURAÇÃO DOS MODELOS ---
 }
 # --- VARIÁVEIS GLOBAIS ---
 current_model = None
 current_tokenizer = None
 current_model_name = None
 def load_model_safely(model_key):
     global current_model, current_tokenizer, current_model_name
     if current_model_name == model_key and current_model is not None:
         return current_model, current_tokenizer
     if current_model is not None:
+        print(f"🧹 Berta: Limpando VRAM ({current_model_name})...")
         del current_model
         del current_tokenizer
         gc.collect()
+        torch.cuda.empty_cache()
         current_model = None
     model_id = MODELS[model_key]
     print(f"🐢 Carregando {model_id} na VRAM...")
         print(f"✅ {model_id} carregado com sucesso!")
     except Exception as e:
+        print(f"❌ Erro crítico: {e}")
         raise e
     return current_model, current_tokenizer
+# --- FUNÇÃO AUXILIAR DE LIMPEZA (A SALVAÇÃO) ---
+def extract_text_content(content):
+    """Garante que o conteúdo seja sempre uma string, nunca uma lista."""
+    if isinstance(content, str):
+        return content
+    elif isinstance(content, list):
+        # Se for lista, tenta extrair o texto de dentro
+        texts = []
+        for item in content:
+            if isinstance(item, dict) and 'text' in item:
+                texts.append(item['text'])
+            elif isinstance(item, str):
+                texts.append(item)
+        return "\n".join(texts)
+    elif isinstance(content, dict) and 'text' in content:
+         return content['text']
+    return str(content)
 # --- FUNÇÃO DE GERAÇÃO (ZEROGPU) ---
+@spaces.GPU(duration=120)
 def generate(message, history, model_selector):
     if "Math" in model_selector: key = "deepseek_math"
     elif "Qwen 3" in model_selector: key = "qwen3"
     elif "Qwen 2.5" in model_selector: key = "qwen2.5"
     try:
         model, tokenizer = load_model_safely(key)
     except Exception as e:
+        return f"⚠️ Erro ao carregar: {str(e)}"
+    # --- CONSTRUÇÃO DE MENSAGENS SANITIZADA ---
     messages = []
+    # Processa o histórico
     for turn in history:
+        role = "user"
+        content = ""
+        # Formato Antigo (Lista/Tupla)
         if isinstance(turn, (list, tuple)) and len(turn) >= 2:
+            messages.append({"role": "user", "content": extract_text_content(turn[0])})
             if turn[1]:
+                messages.append({"role": "assistant", "content": extract_text_content(turn[1])})
+        # Formato Novo (Dicionário)
         elif isinstance(turn, dict):
+            role = turn.get('role', 'user')
+            raw_content = turn.get('content', '')
+            # AQUI ESTÁ A MÁGICA: Convertemos qualquer coisa para string
+            clean_content = extract_text_content(raw_content)
+            messages.append({"role": role, "content": clean_content})
+    # Processa a mensagem atual (que também pode vir como dicionário/lista no Gradio novo)
+    current_content = extract_text_content(message)
+    messages.append({"role": "user", "content": current_content})
     # Aplica template
+    try:
+        text = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+    except Exception as e:
+        return f"❌ Erro de Template (Berta está investigando): {e}\nDados: {str(messages)}"
     inputs = tokenizer([text], return_tensors="pt").to(model.device)