Spaces:

Madras1
/

APISMALL

Sleeping

App Files Files Community

APISMALL / app.py

Madras1

Update app.py

b7166a7 verified 13 days ago

raw

history blame contribute delete

5.47 kB

	import gradio as gr
	import spaces
	import torch
	import gc
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# --- CONFIGURAÇÃO DOS MODELOS ---
	MODELS = {
	"deepseek_math": "deepseek-ai/deepseek-math-7b-instruct",
	"qwen3": "Qwen/Qwen3-4B-Instruct-2507",
	"qwen2.5": "Qwen/Qwen2.5-7B-Instruct",
	"deepseek_r1": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
	}

	# --- VARIÁVEIS GLOBAIS ---
	current_model = None
	current_tokenizer = None
	current_model_name = None

	def load_model_safely(model_key):
	global current_model, current_tokenizer, current_model_name

	if current_model_name == model_key and current_model is not None:
	return current_model, current_tokenizer

	if current_model is not None:
	print(f"🧹 Berta: Limpando VRAM ({current_model_name})...")
	del current_model
	del current_tokenizer
	gc.collect()
	torch.cuda.empty_cache()
	current_model = None

	model_id = MODELS[model_key]
	print(f"🐢 Carregando {model_id} na VRAM...")

	try:
	tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	device_map="cuda",
	trust_remote_code=True
	)

	current_model = model
	current_tokenizer = tokenizer
	current_model_name = model_key
	print(f"✅ {model_id} carregado com sucesso!")

	except Exception as e:
	print(f"❌ Erro crítico: {e}")
	raise e

	return current_model, current_tokenizer

	# --- FUNÇÃO AUXILIAR DE LIMPEZA (A SALVAÇÃO) ---
	def extract_text_content(content):
	"""Garante que o conteúdo seja sempre uma string, nunca uma lista."""
	if isinstance(content, str):
	return content
	elif isinstance(content, list):
	# Se for lista, tenta extrair o texto de dentro
	texts = []
	for item in content:
	if isinstance(item, dict) and 'text' in item:
	texts.append(item['text'])
	elif isinstance(item, str):
	texts.append(item)
	return "\n".join(texts)
	elif isinstance(content, dict) and 'text' in content:
	return content['text']
	return str(content)

	# --- FUNÇÃO DE GERAÇÃO (ZEROGPU) ---
	@spaces.GPU(duration=120)
	def generate(message, history, model_selector):
	if "Math" in model_selector: key = "deepseek_math"
	elif "Qwen 3" in model_selector: key = "qwen3"
	elif "Qwen 2.5" in model_selector: key = "qwen2.5"
	else: key = "deepseek_r1"

	print(f"🤖 Berta: Usando [{key}] no APISMALL.")

	try:
	model, tokenizer = load_model_safely(key)
	except Exception as e:
	return f"⚠️ Erro ao carregar: {str(e)}"

	# --- CONSTRUÇÃO DE MENSAGENS SANITIZADA ---
	messages = []

	# Processa o histórico
	for turn in history:
	role = "user"
	content = ""

	# Formato Antigo (Lista/Tupla)
	if isinstance(turn, (list, tuple)) and len(turn) >= 2:
	messages.append({"role": "user", "content": extract_text_content(turn[0])})
	if turn[1]:
	messages.append({"role": "assistant", "content": extract_text_content(turn[1])})

	# Formato Novo (Dicionário)
	elif isinstance(turn, dict):
	role = turn.get('role', 'user')
	raw_content = turn.get('content', '')
	# AQUI ESTÁ A MÁGICA: Convertemos qualquer coisa para string
	clean_content = extract_text_content(raw_content)
	messages.append({"role": role, "content": clean_content})

	# Processa a mensagem atual (que também pode vir como dicionário/lista no Gradio novo)
	current_content = extract_text_content(message)
	messages.append({"role": "user", "content": current_content})

	# Aplica template
	try:
	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)
	except Exception as e:
	return f"❌ Erro de Template (Berta está investigando): {e}\nDados: {str(messages)}"

	inputs = tokenizer([text], return_tensors="pt").to(model.device)

	outputs = model.generate(
	**inputs,
	max_new_tokens=2048,
	temperature=0.6,
	do_sample=True,
	top_p=0.9
	)

	response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
	return response

	# --- INTERFACE GRADIO ---
	with gr.Blocks() as demo:
	gr.Markdown("# 🧪 Laboratório de IA (Small & Smart)")
	gr.Markdown("### Selecione o cérebro digital:")

	with gr.Row():
	model_dropdown = gr.Dropdown(
	choices=[
	"🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)",
	"🐳 DeepSeek Math 7B (Especialista Antigo)",
	"🧪 Qwen 3 4B Instruct (Experimental)",
	"🌟 Qwen 2.5 7B Instruct (Clássico e Estável)"
	],
	value="🚀 DeepSeek R1 Distill Qwen 7B (O Mais Inteligente - Novo!)",
	label="Escolha o Modelo",
	interactive=True
	)

	chat = gr.ChatInterface(
	fn=generate,
	additional_inputs=[model_dropdown]
	)

	if __name__ == "__main__":
	demo.launch()