|
|
import gradio as gr |
|
|
import spaces |
|
|
import torch |
|
|
import gc |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
|
|
|
|
MODELS = { |
|
|
"deepseek_math": "deepseek-ai/deepseek-math-7b-instruct", |
|
|
"qwen3": "Qwen/Qwen3-4B-Instruct-2507", |
|
|
"qwen2.5": "Qwen/Qwen2.5-7B-Instruct", |
|
|
"deepseek_r1": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" |
|
|
} |
|
|
|
|
|
|
|
|
current_model = None |
|
|
current_tokenizer = None |
|
|
current_model_name = None |
|
|
|
|
|
def load_model_safely(model_key): |
|
|
global current_model, current_tokenizer, current_model_name |
|
|
|
|
|
if current_model_name == model_key and current_model is not None: |
|
|
return current_model, current_tokenizer |
|
|
|
|
|
if current_model is not None: |
|
|
print(f"🧹 Berta: Limpando VRAM ({current_model_name})...") |
|
|
del current_model |
|
|
del current_tokenizer |
|
|
gc.collect() |
|
|
torch.cuda.empty_cache() |
|
|
current_model = None |
|
|
|
|
|
model_id = MODELS[model_key] |
|
|
print(f"🐢 Carregando {model_id} na VRAM...") |
|
|
|
|
|
try: |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model_id, |
|
|
torch_dtype=torch.bfloat16, |
|
|
device_map="cuda", |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
current_model = model |
|
|
current_tokenizer = tokenizer |
|
|
current_model_name = model_key |
|
|
print(f"✅ {model_id} carregado com sucesso!") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Erro crítico: {e}") |
|
|
raise e |
|
|
|
|
|
return current_model, current_tokenizer |
|
|
|
|
|
|
|
|
def extract_text_content(content): |
|
|
"""Garante que o conteúdo seja sempre uma string, nunca uma lista.""" |
|
|
if isinstance(content, str): |
|
|
return content |
|
|
elif isinstance(content, list): |
|
|
|
|
|
texts = [] |
|
|
for item in content: |
|
|
if isinstance(item, dict) and 'text' in item: |
|
|
texts.append(item['text']) |
|
|
elif isinstance(item, str): |
|
|
texts.append(item) |
|
|
return "\n".join(texts) |
|
|
elif isinstance(content, dict) and 'text' in content: |
|
|
return content['text'] |
|
|
return str(content) |
|
|
|
|
|
|
|
|
@spaces.GPU(duration=120) |
|
|
def generate(message, history, model_selector): |
|
|
if "Math" in model_selector: key = "deepseek_math" |
|
|
elif "Qwen 3" in model_selector: key = "qwen3" |
|
|
elif "Qwen 2.5" in model_selector: key = "qwen2.5" |
|
|
else: key = "deepseek_r1" |
|
|
|
|
|
print(f"🤖 Berta: Usando [{key}] no APISMALL.") |
|
|
|
|
|
try: |
|
|
model, tokenizer = load_model_safely(key) |
|
|
except Exception as e: |
|
|
return f"⚠️ Erro ao carregar: {str(e)}" |
|
|
|
|
|
|
|
|
messages = [] |
|
|
|
|
|
|
|
|
for turn in history: |
|
|
role = "user" |
|
|
content = "" |
|
|
|
|
|
|
|
|
if isinstance(turn, (list, tuple)) and len(turn) >= 2: |
|
|
messages.append({"role": "user", "content": extract_text_content(turn[0])}) |
|
|
if turn[1]: |
|
|
messages.append({"role": "assistant", "content": extract_text_content(turn[1])}) |
|
|
|
|
|
|
|
|
elif isinstance(turn, dict): |
|
|
role = turn.get('role', 'user') |
|
|
raw_content = turn.get('content', '') |
|
|
|
|
|
clean_content = extract_text_content(raw_content) |
|
|
messages.append({"role": role, "content": clean_content}) |
|
|
|
|
|
|
|
|
current_content = extract_text_content(message) |
|
|
messages.append({"role": "user", "content": current_content}) |
|
|
|
|
|
|
|
|
try: |
|
|
text = tokenizer.apply_chat_template( |
|
|
messages, |
|
|
tokenize=False, |
|
|
add_generation_prompt=True |
|
|
) |
|
|
except Exception as e: |
|
|
return f"❌ Erro de Template (Berta está investigando): {e}\nDados: {str(messages)}" |
|
|
|
|
|
inputs = tokenizer([text], return_tensors="pt").to(model.device) |
|
|
|
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=2048, |
|
|
temperature=0.6, |
|
|
do_sample=True, |
|
|
top_p=0.9 |
|
|
) |
|
|
|
|
|
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True) |
|
|
return response |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# 🧪 Laboratório de IA (Small & Smart)") |
|
|
gr.Markdown("### Selecione o cérebro digital:") |
|
|
|
|
|
with gr.Row(): |
|
|
model_dropdown = gr.Dropdown( |
|
|
choices=[ |
|
|
"🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)", |
|
|
"🐳 DeepSeek Math 7B (Especialista Antigo)", |
|
|
"🧪 Qwen 3 4B Instruct (Experimental)", |
|
|
"🌟 Qwen 2.5 7B Instruct (Clássico e Estável)" |
|
|
], |
|
|
value="🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)", |
|
|
label="Escolha o Modelo", |
|
|
interactive=True |
|
|
) |
|
|
|
|
|
chat = gr.ChatInterface( |
|
|
fn=generate, |
|
|
additional_inputs=[model_dropdown] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |