File size: 5,465 Bytes
d6f2e7c
 
 
b7166a7
d6f2e7c
 
 
 
b743df9
4df5f66
 
9516003
d6f2e7c
 
9516003
 
 
 
d6f2e7c
9516003
 
d6f2e7c
9516003
 
 
 
b7166a7
9516003
 
 
b7166a7
9516003
 
 
 
 
 
 
 
 
 
 
 
 
d6f2e7c
9516003
 
 
 
 
 
b7166a7
9516003
d6f2e7c
9516003
d6f2e7c
b7166a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6f2e7c
b7166a7
d6f2e7c
9516003
 
 
 
87a344a
9516003
d6f2e7c
4df5f66
9516003
4df5f66
b7166a7
d6f2e7c
b7166a7
d6f2e7c
9516003
b7166a7
9516003
b7166a7
 
 
 
9516003
b7166a7
9516003
b7166a7
 
 
9516003
b7166a7
 
 
 
 
 
 
 
 
d6f2e7c
9516003
b7166a7
 
 
 
 
 
 
 
b743df9
d6f2e7c
 
 
 
 
b743df9
87a344a
 
d6f2e7c
 
 
 
 
87a344a
d5dac55
9516003
b743df9
d6f2e7c
 
 
 
b743df9
 
 
 
d6f2e7c
b743df9
 
d6f2e7c
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import gradio as gr
import spaces
import torch
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer

# --- CONFIGURAÇÃO DOS MODELOS ---
MODELS = {
    "deepseek_math": "deepseek-ai/deepseek-math-7b-instruct", 
    "qwen3": "Qwen/Qwen3-4B-Instruct-2507",             
    "qwen2.5": "Qwen/Qwen2.5-7B-Instruct",              
    "deepseek_r1": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" 
}

# --- VARIÁVEIS GLOBAIS ---
current_model = None
current_tokenizer = None
current_model_name = None

def load_model_safely(model_key):
    global current_model, current_tokenizer, current_model_name
    
    if current_model_name == model_key and current_model is not None:
        return current_model, current_tokenizer
    
    if current_model is not None:
        print(f"🧹 Berta: Limpando VRAM ({current_model_name})...")
        del current_model
        del current_tokenizer
        gc.collect()
        torch.cuda.empty_cache()
        current_model = None
    
    model_id = MODELS[model_key]
    print(f"🐢 Carregando {model_id} na VRAM...")
    
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.bfloat16,
            device_map="cuda",
            trust_remote_code=True
        )
        
        current_model = model
        current_tokenizer = tokenizer
        current_model_name = model_key
        print(f"✅ {model_id} carregado com sucesso!")
        
    except Exception as e:
        print(f"❌ Erro crítico: {e}")
        raise e
        
    return current_model, current_tokenizer

# --- FUNÇÃO AUXILIAR DE LIMPEZA (A SALVAÇÃO) ---
def extract_text_content(content):
    """Garante que o conteúdo seja sempre uma string, nunca uma lista."""
    if isinstance(content, str):
        return content
    elif isinstance(content, list):
        # Se for lista, tenta extrair o texto de dentro
        texts = []
        for item in content:
            if isinstance(item, dict) and 'text' in item:
                texts.append(item['text'])
            elif isinstance(item, str):
                texts.append(item)
        return "\n".join(texts)
    elif isinstance(content, dict) and 'text' in content:
         return content['text']
    return str(content)

# --- FUNÇÃO DE GERAÇÃO (ZEROGPU) ---
@spaces.GPU(duration=120)
def generate(message, history, model_selector):
    if "Math" in model_selector: key = "deepseek_math"
    elif "Qwen 3" in model_selector: key = "qwen3"
    elif "Qwen 2.5" in model_selector: key = "qwen2.5"
    else: key = "deepseek_r1"
    
    print(f"🤖 Berta: Usando [{key}] no APISMALL.")
    
    try:
        model, tokenizer = load_model_safely(key)
    except Exception as e:
        return f"⚠️ Erro ao carregar: {str(e)}"
    
    # --- CONSTRUÇÃO DE MENSAGENS SANITIZADA ---
    messages = []
    
    # Processa o histórico
    for turn in history:
        role = "user"
        content = ""
        
        # Formato Antigo (Lista/Tupla)
        if isinstance(turn, (list, tuple)) and len(turn) >= 2:
            messages.append({"role": "user", "content": extract_text_content(turn[0])})
            if turn[1]:
                messages.append({"role": "assistant", "content": extract_text_content(turn[1])})
                
        # Formato Novo (Dicionário)
        elif isinstance(turn, dict):
            role = turn.get('role', 'user')
            raw_content = turn.get('content', '')
            # AQUI ESTÁ A MÁGICA: Convertemos qualquer coisa para string
            clean_content = extract_text_content(raw_content)
            messages.append({"role": role, "content": clean_content})

    # Processa a mensagem atual (que também pode vir como dicionário/lista no Gradio novo)
    current_content = extract_text_content(message)
    messages.append({"role": "user", "content": current_content})

    # Aplica template
    try:
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
    except Exception as e:
        return f"❌ Erro de Template (Berta está investigando): {e}\nDados: {str(messages)}"
    
    inputs = tokenizer([text], return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs, 
        max_new_tokens=2048,
        temperature=0.6,
        do_sample=True,
        top_p=0.9
    )
    
    response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
    return response

# --- INTERFACE GRADIO ---
with gr.Blocks() as demo:
    gr.Markdown("# 🧪 Laboratório de IA (Small & Smart)")
    gr.Markdown("### Selecione o cérebro digital:")
    
    with gr.Row():
        model_dropdown = gr.Dropdown(
            choices=[
                "🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)",
                "🐳 DeepSeek Math 7B (Especialista Antigo)", 
                "🧪 Qwen 3 4B Instruct (Experimental)",
                "🌟 Qwen 2.5 7B Instruct (Clássico e Estável)"
            ],
            value="🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)", 
            label="Escolha o Modelo",
            interactive=True
        )
    
    chat = gr.ChatInterface(
        fn=generate,
        additional_inputs=[model_dropdown]
    )

if __name__ == "__main__":
    demo.launch()