APISMALL / app.py
Madras1's picture
Update app.py
b7166a7 verified
import gradio as gr
import spaces
import torch
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer
# --- CONFIGURAÇÃO DOS MODELOS ---
MODELS = {
"deepseek_math": "deepseek-ai/deepseek-math-7b-instruct",
"qwen3": "Qwen/Qwen3-4B-Instruct-2507",
"qwen2.5": "Qwen/Qwen2.5-7B-Instruct",
"deepseek_r1": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
}
# --- VARIÁVEIS GLOBAIS ---
current_model = None
current_tokenizer = None
current_model_name = None
def load_model_safely(model_key):
global current_model, current_tokenizer, current_model_name
if current_model_name == model_key and current_model is not None:
return current_model, current_tokenizer
if current_model is not None:
print(f"🧹 Berta: Limpando VRAM ({current_model_name})...")
del current_model
del current_tokenizer
gc.collect()
torch.cuda.empty_cache()
current_model = None
model_id = MODELS[model_key]
print(f"🐢 Carregando {model_id} na VRAM...")
try:
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="cuda",
trust_remote_code=True
)
current_model = model
current_tokenizer = tokenizer
current_model_name = model_key
print(f"✅ {model_id} carregado com sucesso!")
except Exception as e:
print(f"❌ Erro crítico: {e}")
raise e
return current_model, current_tokenizer
# --- FUNÇÃO AUXILIAR DE LIMPEZA (A SALVAÇÃO) ---
def extract_text_content(content):
"""Garante que o conteúdo seja sempre uma string, nunca uma lista."""
if isinstance(content, str):
return content
elif isinstance(content, list):
# Se for lista, tenta extrair o texto de dentro
texts = []
for item in content:
if isinstance(item, dict) and 'text' in item:
texts.append(item['text'])
elif isinstance(item, str):
texts.append(item)
return "\n".join(texts)
elif isinstance(content, dict) and 'text' in content:
return content['text']
return str(content)
# --- FUNÇÃO DE GERAÇÃO (ZEROGPU) ---
@spaces.GPU(duration=120)
def generate(message, history, model_selector):
if "Math" in model_selector: key = "deepseek_math"
elif "Qwen 3" in model_selector: key = "qwen3"
elif "Qwen 2.5" in model_selector: key = "qwen2.5"
else: key = "deepseek_r1"
print(f"🤖 Berta: Usando [{key}] no APISMALL.")
try:
model, tokenizer = load_model_safely(key)
except Exception as e:
return f"⚠️ Erro ao carregar: {str(e)}"
# --- CONSTRUÇÃO DE MENSAGENS SANITIZADA ---
messages = []
# Processa o histórico
for turn in history:
role = "user"
content = ""
# Formato Antigo (Lista/Tupla)
if isinstance(turn, (list, tuple)) and len(turn) >= 2:
messages.append({"role": "user", "content": extract_text_content(turn[0])})
if turn[1]:
messages.append({"role": "assistant", "content": extract_text_content(turn[1])})
# Formato Novo (Dicionário)
elif isinstance(turn, dict):
role = turn.get('role', 'user')
raw_content = turn.get('content', '')
# AQUI ESTÁ A MÁGICA: Convertemos qualquer coisa para string
clean_content = extract_text_content(raw_content)
messages.append({"role": role, "content": clean_content})
# Processa a mensagem atual (que também pode vir como dicionário/lista no Gradio novo)
current_content = extract_text_content(message)
messages.append({"role": "user", "content": current_content})
# Aplica template
try:
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
except Exception as e:
return f"❌ Erro de Template (Berta está investigando): {e}\nDados: {str(messages)}"
inputs = tokenizer([text], return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=2048,
temperature=0.6,
do_sample=True,
top_p=0.9
)
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
return response
# --- INTERFACE GRADIO ---
with gr.Blocks() as demo:
gr.Markdown("# 🧪 Laboratório de IA (Small & Smart)")
gr.Markdown("### Selecione o cérebro digital:")
with gr.Row():
model_dropdown = gr.Dropdown(
choices=[
"🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)",
"🐳 DeepSeek Math 7B (Especialista Antigo)",
"🧪 Qwen 3 4B Instruct (Experimental)",
"🌟 Qwen 2.5 7B Instruct (Clássico e Estável)"
],
value="🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)",
label="Escolha o Modelo",
interactive=True
)
chat = gr.ChatInterface(
fn=generate,
additional_inputs=[model_dropdown]
)
if __name__ == "__main__":
demo.launch()