Spaces:

Madras1
/

APISMALL

Sleeping

File size: 5,465 Bytes

d6f2e7c
 
 
b7166a7
d6f2e7c
 
 
 
b743df9
4df5f66
 
9516003
d6f2e7c
 
9516003
 
 
 
d6f2e7c
9516003
 
d6f2e7c
9516003
 
 
 
b7166a7
9516003
 
 
b7166a7
9516003
 
 
 
 
 
 
 
 
 
 
 
 
d6f2e7c
9516003
 
 
 
 
 
b7166a7
9516003
d6f2e7c
9516003
d6f2e7c
b7166a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6f2e7c
b7166a7
d6f2e7c
9516003
 
 
 
87a344a
9516003
d6f2e7c
4df5f66
9516003
4df5f66
b7166a7
d6f2e7c
b7166a7
d6f2e7c
9516003
b7166a7
9516003
b7166a7
 
 
 
9516003
b7166a7
9516003
b7166a7
 
 
9516003
b7166a7
 
 
 
 
 
 
 
 
d6f2e7c
9516003
b7166a7
 
 
 
 
 
 
 
b743df9
d6f2e7c
 
 
 
 
b743df9
87a344a
 
d6f2e7c
 
 
 
 
87a344a
d5dac55
9516003
b743df9
d6f2e7c
 
 
 
b743df9
 
 
 
d6f2e7c
b743df9
 
d6f2e7c

import gradio as gr
import spaces
import torch
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer

# --- CONFIGURAÇÃO DOS MODELOS ---
MODELS = {
    "deepseek_math": "deepseek-ai/deepseek-math-7b-instruct", 
    "qwen3": "Qwen/Qwen3-4B-Instruct-2507",             
    "qwen2.5": "Qwen/Qwen2.5-7B-Instruct",              
    "deepseek_r1": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" 
}

# --- VARIÁVEIS GLOBAIS ---
current_model = None
current_tokenizer = None
current_model_name = None

def load_model_safely(model_key):
    global current_model, current_tokenizer, current_model_name
    
    if current_model_name == model_key and current_model is not None:
        return current_model, current_tokenizer
    
    if current_model is not None:
        print(f"🧹 Berta: Limpando VRAM ({current_model_name})...")
        del current_model
        del current_tokenizer
        gc.collect()
        torch.cuda.empty_cache()
        current_model = None
    
    model_id = MODELS[model_key]
    print(f"🐢 Carregando {model_id} na VRAM...")
    
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.bfloat16,
            device_map="cuda",
            trust_remote_code=True
        )
        
        current_model = model
        current_tokenizer = tokenizer
        current_model_name = model_key
        print(f"✅ {model_id} carregado com sucesso!")
        
    except Exception as e:
        print(f"❌ Erro crítico: {e}")
        raise e
        
    return current_model, current_tokenizer

# --- FUNÇÃO AUXILIAR DE LIMPEZA (A SALVAÇÃO) ---
def extract_text_content(content):
    """Garante que o conteúdo seja sempre uma string, nunca uma lista."""
    if isinstance(content, str):
        return content
    elif isinstance(content, list):
        # Se for lista, tenta extrair o texto de dentro
        texts = []
        for item in content:
            if isinstance(item, dict) and 'text' in item:
                texts.append(item['text'])
            elif isinstance(item, str):
                texts.append(item)
        return "\n".join(texts)
    elif isinstance(content, dict) and 'text' in content:
         return content['text']
    return str(content)

# --- FUNÇÃO DE GERAÇÃO (ZEROGPU) ---
@spaces.GPU(duration=120)
def generate(message, history, model_selector):
    if "Math" in model_selector: key = "deepseek_math"
    elif "Qwen 3" in model_selector: key = "qwen3"
    elif "Qwen 2.5" in model_selector: key = "qwen2.5"
    else: key = "deepseek_r1"
    
    print(f"🤖 Berta: Usando [{key}] no APISMALL.")
    
    try:
        model, tokenizer = load_model_safely(key)
    except Exception as e:
        return f"⚠️ Erro ao carregar: {str(e)}"
    
    # --- CONSTRUÇÃO DE MENSAGENS SANITIZADA ---
    messages = []
    
    # Processa o histórico
    for turn in history:
        role = "user"
        content = ""
        
        # Formato Antigo (Lista/Tupla)
        if isinstance(turn, (list, tuple)) and len(turn) >= 2:
            messages.append({"role": "user", "content": extract_text_content(turn[0])})
            if turn[1]:
                messages.append({"role": "assistant", "content": extract_text_content(turn[1])})
                
        # Formato Novo (Dicionário)
        elif isinstance(turn, dict):
            role = turn.get('role', 'user')
            raw_content = turn.get('content', '')
            # AQUI ESTÁ A MÁGICA: Convertemos qualquer coisa para string
            clean_content = extract_text_content(raw_content)
            messages.append({"role": role, "content": clean_content})

    # Processa a mensagem atual (que também pode vir como dicionário/lista no Gradio novo)
    current_content = extract_text_content(message)
    messages.append({"role": "user", "content": current_content})

    # Aplica template
    try:
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
    except Exception as e:
        return f"❌ Erro de Template (Berta está investigando): {e}\nDados: {str(messages)}"
    
    inputs = tokenizer([text], return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs, 
        max_new_tokens=2048,
        temperature=0.6,
        do_sample=True,
        top_p=0.9
    )
    
    response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
    return response

# --- INTERFACE GRADIO ---
with gr.Blocks() as demo:
    gr.Markdown("# 🧪 Laboratório de IA (Small & Smart)")
    gr.Markdown("### Selecione o cérebro digital:")
    
    with gr.Row():
        model_dropdown = gr.Dropdown(
            choices=[
                "🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)",
                "🐳 DeepSeek Math 7B (Especialista Antigo)", 
                "🧪 Qwen 3 4B Instruct (Experimental)",
                "🌟 Qwen 2.5 7B Instruct (Clássico e Estável)"
            ],
            value="🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)", 
            label="Escolha o Modelo",
            interactive=True
        )
    
    chat = gr.ChatInterface(
        fn=generate,
        additional_inputs=[model_dropdown]
    )

if __name__ == "__main__":
    demo.launch()