import gradio as gr import spaces import torch import gc from transformers import AutoModelForCausalLM, AutoTokenizer # --- CONFIGURAÇÃO DOS MODELOS --- MODELS = { "deepseek_math": "deepseek-ai/deepseek-math-7b-instruct", "qwen3": "Qwen/Qwen3-4B-Instruct-2507", "qwen2.5": "Qwen/Qwen2.5-7B-Instruct", "deepseek_r1": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" } # --- VARIÁVEIS GLOBAIS --- current_model = None current_tokenizer = None current_model_name = None def load_model_safely(model_key): global current_model, current_tokenizer, current_model_name if current_model_name == model_key and current_model is not None: return current_model, current_tokenizer if current_model is not None: print(f"🧹 Berta: Limpando VRAM ({current_model_name})...") del current_model del current_tokenizer gc.collect() torch.cuda.empty_cache() current_model = None model_id = MODELS[model_key] print(f"🐢 Carregando {model_id} na VRAM...") try: tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.bfloat16, device_map="cuda", trust_remote_code=True ) current_model = model current_tokenizer = tokenizer current_model_name = model_key print(f"✅ {model_id} carregado com sucesso!") except Exception as e: print(f"❌ Erro crítico: {e}") raise e return current_model, current_tokenizer # --- FUNÇÃO AUXILIAR DE LIMPEZA (A SALVAÇÃO) --- def extract_text_content(content): """Garante que o conteúdo seja sempre uma string, nunca uma lista.""" if isinstance(content, str): return content elif isinstance(content, list): # Se for lista, tenta extrair o texto de dentro texts = [] for item in content: if isinstance(item, dict) and 'text' in item: texts.append(item['text']) elif isinstance(item, str): texts.append(item) return "\n".join(texts) elif isinstance(content, dict) and 'text' in content: return content['text'] return str(content) # --- FUNÇÃO DE GERAÇÃO (ZEROGPU) --- @spaces.GPU(duration=120) def generate(message, history, model_selector): if "Math" in model_selector: key = "deepseek_math" elif "Qwen 3" in model_selector: key = "qwen3" elif "Qwen 2.5" in model_selector: key = "qwen2.5" else: key = "deepseek_r1" print(f"🤖 Berta: Usando [{key}] no APISMALL.") try: model, tokenizer = load_model_safely(key) except Exception as e: return f"⚠️ Erro ao carregar: {str(e)}" # --- CONSTRUÇÃO DE MENSAGENS SANITIZADA --- messages = [] # Processa o histórico for turn in history: role = "user" content = "" # Formato Antigo (Lista/Tupla) if isinstance(turn, (list, tuple)) and len(turn) >= 2: messages.append({"role": "user", "content": extract_text_content(turn[0])}) if turn[1]: messages.append({"role": "assistant", "content": extract_text_content(turn[1])}) # Formato Novo (Dicionário) elif isinstance(turn, dict): role = turn.get('role', 'user') raw_content = turn.get('content', '') # AQUI ESTÁ A MÁGICA: Convertemos qualquer coisa para string clean_content = extract_text_content(raw_content) messages.append({"role": role, "content": clean_content}) # Processa a mensagem atual (que também pode vir como dicionário/lista no Gradio novo) current_content = extract_text_content(message) messages.append({"role": "user", "content": current_content}) # Aplica template try: text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) except Exception as e: return f"❌ Erro de Template (Berta está investigando): {e}\nDados: {str(messages)}" inputs = tokenizer([text], return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=2048, temperature=0.6, do_sample=True, top_p=0.9 ) response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True) return response # --- INTERFACE GRADIO --- with gr.Blocks() as demo: gr.Markdown("# 🧪 Laboratório de IA (Small & Smart)") gr.Markdown("### Selecione o cérebro digital:") with gr.Row(): model_dropdown = gr.Dropdown( choices=[ "🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)", "🐳 DeepSeek Math 7B (Especialista Antigo)", "🧪 Qwen 3 4B Instruct (Experimental)", "🌟 Qwen 2.5 7B Instruct (Clássico e Estável)" ], value="🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)", label="Escolha o Modelo", interactive=True ) chat = gr.ChatInterface( fn=generate, additional_inputs=[model_dropdown] ) if __name__ == "__main__": demo.launch()