import gradio as gr
import spaces
import torch
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer

# --- CONFIGURAÇÃO DOS MODELOS ---
MODELS = {
    "deepseek_math": "deepseek-ai/deepseek-math-7b-instruct", 
    "qwen3": "Qwen/Qwen3-4B-Instruct-2507",             
    "qwen2.5": "Qwen/Qwen2.5-7B-Instruct",              
    "deepseek_r1": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" 
}

# --- VARIÁVEIS GLOBAIS ---
current_model = None
current_tokenizer = None
current_model_name = None

def load_model_safely(model_key):
    global current_model, current_tokenizer, current_model_name
    
    if current_model_name == model_key and current_model is not None:
        return current_model, current_tokenizer
    
    if current_model is not None:
        print(f"🧹 Berta: Limpando VRAM ({current_model_name})...")
        del current_model
        del current_tokenizer
        gc.collect()
        torch.cuda.empty_cache()
        current_model = None
    
    model_id = MODELS[model_key]
    print(f"🐢 Carregando {model_id} na VRAM...")
    
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.bfloat16,
            device_map="cuda",
            trust_remote_code=True
        )
        
        current_model = model
        current_tokenizer = tokenizer
        current_model_name = model_key
        print(f"✅ {model_id} carregado com sucesso!")
        
    except Exception as e:
        print(f"❌ Erro crítico: {e}")
        raise e
        
    return current_model, current_tokenizer

# --- FUNÇÃO AUXILIAR DE LIMPEZA (A SALVAÇÃO) ---
def extract_text_content(content):
    """Garante que o conteúdo seja sempre uma string, nunca uma lista."""
    if isinstance(content, str):
        return content
    elif isinstance(content, list):
        # Se for lista, tenta extrair o texto de dentro
        texts = []
        for item in content:
            if isinstance(item, dict) and 'text' in item:
                texts.append(item['text'])
            elif isinstance(item, str):
                texts.append(item)
        return "\n".join(texts)
    elif isinstance(content, dict) and 'text' in content:
         return content['text']
    return str(content)

# --- FUNÇÃO DE GERAÇÃO (ZEROGPU) ---
@spaces.GPU(duration=120)
def generate(message, history, model_selector):
    if "Math" in model_selector: key = "deepseek_math"
    elif "Qwen 3" in model_selector: key = "qwen3"
    elif "Qwen 2.5" in model_selector: key = "qwen2.5"
    else: key = "deepseek_r1"
    
    print(f"🤖 Berta: Usando [{key}] no APISMALL.")
    
    try:
        model, tokenizer = load_model_safely(key)
    except Exception as e:
        return f"⚠️ Erro ao carregar: {str(e)}"
    
    # --- CONSTRUÇÃO DE MENSAGENS SANITIZADA ---
    messages = []
    
    # Processa o histórico
    for turn in history:
        role = "user"
        content = ""
        
        # Formato Antigo (Lista/Tupla)
        if isinstance(turn, (list, tuple)) and len(turn) >= 2:
            messages.append({"role": "user", "content": extract_text_content(turn[0])})
            if turn[1]:
                messages.append({"role": "assistant", "content": extract_text_content(turn[1])})
                
        # Formato Novo (Dicionário)
        elif isinstance(turn, dict):
            role = turn.get('role', 'user')
            raw_content = turn.get('content', '')
            # AQUI ESTÁ A MÁGICA: Convertemos qualquer coisa para string
            clean_content = extract_text_content(raw_content)
            messages.append({"role": role, "content": clean_content})

    # Processa a mensagem atual (que também pode vir como dicionário/lista no Gradio novo)
    current_content = extract_text_content(message)
    messages.append({"role": "user", "content": current_content})

    # Aplica template
    try:
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
    except Exception as e:
        return f"❌ Erro de Template (Berta está investigando): {e}\nDados: {str(messages)}"
    
    inputs = tokenizer([text], return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs, 
        max_new_tokens=2048,
        temperature=0.6,
        do_sample=True,
        top_p=0.9
    )
    
    response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
    return response

# --- INTERFACE GRADIO ---
with gr.Blocks() as demo:
    gr.Markdown("# 🧪 Laboratório de IA (Small & Smart)")
    gr.Markdown("### Selecione o cérebro digital:")
    
    with gr.Row():
        model_dropdown = gr.Dropdown(
            choices=[
                "🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)",
                "🐳 DeepSeek Math 7B (Especialista Antigo)", 
                "🧪 Qwen 3 4B Instruct (Experimental)",
                "🌟 Qwen 2.5 7B Instruct (Clássico e Estável)"
            ],
            value="🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)", 
            label="Escolha o Modelo",
            interactive=True
        )
    
    chat = gr.ChatInterface(
        fn=generate,
        additional_inputs=[model_dropdown]
    )

if __name__ == "__main__":
    demo.launch()