File size: 5,465 Bytes
d6f2e7c b7166a7 d6f2e7c b743df9 4df5f66 9516003 d6f2e7c 9516003 d6f2e7c 9516003 d6f2e7c 9516003 b7166a7 9516003 b7166a7 9516003 d6f2e7c 9516003 b7166a7 9516003 d6f2e7c 9516003 d6f2e7c b7166a7 d6f2e7c b7166a7 d6f2e7c 9516003 87a344a 9516003 d6f2e7c 4df5f66 9516003 4df5f66 b7166a7 d6f2e7c b7166a7 d6f2e7c 9516003 b7166a7 9516003 b7166a7 9516003 b7166a7 9516003 b7166a7 9516003 b7166a7 d6f2e7c 9516003 b7166a7 b743df9 d6f2e7c b743df9 87a344a d6f2e7c 87a344a d5dac55 9516003 b743df9 d6f2e7c b743df9 d6f2e7c b743df9 d6f2e7c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import gradio as gr
import spaces
import torch
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer
# --- CONFIGURAÇÃO DOS MODELOS ---
MODELS = {
"deepseek_math": "deepseek-ai/deepseek-math-7b-instruct",
"qwen3": "Qwen/Qwen3-4B-Instruct-2507",
"qwen2.5": "Qwen/Qwen2.5-7B-Instruct",
"deepseek_r1": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
}
# --- VARIÁVEIS GLOBAIS ---
current_model = None
current_tokenizer = None
current_model_name = None
def load_model_safely(model_key):
global current_model, current_tokenizer, current_model_name
if current_model_name == model_key and current_model is not None:
return current_model, current_tokenizer
if current_model is not None:
print(f"🧹 Berta: Limpando VRAM ({current_model_name})...")
del current_model
del current_tokenizer
gc.collect()
torch.cuda.empty_cache()
current_model = None
model_id = MODELS[model_key]
print(f"🐢 Carregando {model_id} na VRAM...")
try:
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="cuda",
trust_remote_code=True
)
current_model = model
current_tokenizer = tokenizer
current_model_name = model_key
print(f"✅ {model_id} carregado com sucesso!")
except Exception as e:
print(f"❌ Erro crítico: {e}")
raise e
return current_model, current_tokenizer
# --- FUNÇÃO AUXILIAR DE LIMPEZA (A SALVAÇÃO) ---
def extract_text_content(content):
"""Garante que o conteúdo seja sempre uma string, nunca uma lista."""
if isinstance(content, str):
return content
elif isinstance(content, list):
# Se for lista, tenta extrair o texto de dentro
texts = []
for item in content:
if isinstance(item, dict) and 'text' in item:
texts.append(item['text'])
elif isinstance(item, str):
texts.append(item)
return "\n".join(texts)
elif isinstance(content, dict) and 'text' in content:
return content['text']
return str(content)
# --- FUNÇÃO DE GERAÇÃO (ZEROGPU) ---
@spaces.GPU(duration=120)
def generate(message, history, model_selector):
if "Math" in model_selector: key = "deepseek_math"
elif "Qwen 3" in model_selector: key = "qwen3"
elif "Qwen 2.5" in model_selector: key = "qwen2.5"
else: key = "deepseek_r1"
print(f"🤖 Berta: Usando [{key}] no APISMALL.")
try:
model, tokenizer = load_model_safely(key)
except Exception as e:
return f"⚠️ Erro ao carregar: {str(e)}"
# --- CONSTRUÇÃO DE MENSAGENS SANITIZADA ---
messages = []
# Processa o histórico
for turn in history:
role = "user"
content = ""
# Formato Antigo (Lista/Tupla)
if isinstance(turn, (list, tuple)) and len(turn) >= 2:
messages.append({"role": "user", "content": extract_text_content(turn[0])})
if turn[1]:
messages.append({"role": "assistant", "content": extract_text_content(turn[1])})
# Formato Novo (Dicionário)
elif isinstance(turn, dict):
role = turn.get('role', 'user')
raw_content = turn.get('content', '')
# AQUI ESTÁ A MÁGICA: Convertemos qualquer coisa para string
clean_content = extract_text_content(raw_content)
messages.append({"role": role, "content": clean_content})
# Processa a mensagem atual (que também pode vir como dicionário/lista no Gradio novo)
current_content = extract_text_content(message)
messages.append({"role": "user", "content": current_content})
# Aplica template
try:
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
except Exception as e:
return f"❌ Erro de Template (Berta está investigando): {e}\nDados: {str(messages)}"
inputs = tokenizer([text], return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=2048,
temperature=0.6,
do_sample=True,
top_p=0.9
)
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
return response
# --- INTERFACE GRADIO ---
with gr.Blocks() as demo:
gr.Markdown("# 🧪 Laboratório de IA (Small & Smart)")
gr.Markdown("### Selecione o cérebro digital:")
with gr.Row():
model_dropdown = gr.Dropdown(
choices=[
"🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)",
"🐳 DeepSeek Math 7B (Especialista Antigo)",
"🧪 Qwen 3 4B Instruct (Experimental)",
"🌟 Qwen 2.5 7B Instruct (Clássico e Estável)"
],
value="🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)",
label="Escolha o Modelo",
interactive=True
)
chat = gr.ChatInterface(
fn=generate,
additional_inputs=[model_dropdown]
)
if __name__ == "__main__":
demo.launch() |