Spaces:

MrAlexGov
/

chat-bots-test

Running

App Files Files Community

chat-bots-test / app.py

MrAlexGov

Update app.py

8966a06 verified 12 days ago

raw

history blame

4.57 kB

	import gradio as gr
	from transformers import pipeline, AutoTokenizer
	from typing import List, Dict, Any, Tuple
	import torch

	# CPU-модели (маленькие, chat-ready)
	MODELS = {
	"Qwen2.5-0.5B": "Qwen/Qwen2.5-0.5B-Instruct",
	"Qwen2.5-1.5B": "Qwen/Qwen2.5-1.5B-Instruct",
	"Phi-3-mini": "microsoft/Phi-3-mini-4k-instruct"
	}

	def load_model(model_key: str):
	"""Lazy load pipeline."""
	model_id = MODELS[model_key]
	print(f"🚀 Загрузка {model_id}...")
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	pipe = pipeline(
	"text-generation",
	model=model_id,
	tokenizer=tokenizer,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto" if torch.cuda.is_available() else None,
	max_new_tokens=512,
	do_sample=True,
	temperature=0.7,
	pad_token_id=tokenizer.eos_token_id
	)
	print(f"✅ {model_id} загружена!")
	return pipe

	# Global cache
	model_cache = {}

	def respond(message: str,
	history: List[Dict[str, str]],
	model_key: str,
	system_prompt: str) -> Tuple[List[Dict[str, str]], str, Dict[str, Any]]:
	"""Локальный чат с pipeline."""
	try:
	if model_key not in model_cache:
	model_cache[model_key] = load_model(model_key)
	pipe = model_cache[model_key]

	print(f"🚀 Генерация: {model_key}, Msg='{message[:30]}...'")

	# Chat format (system + history + user)
	messages = []
	if system_prompt.strip():
	messages.append({"role": "system", "content": system_prompt})
	messages.extend(history)
	messages.append({"role": "user", "content": message})

	# Apply chat template (для instruct)
	tokenizer = pipe.tokenizer
	prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	# Generate
	outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
	bot_reply = outputs[0]["generated_text"][len(prompt):].strip()

	print(f"✅ Ответ: {bot_reply[:50]}...")

	new_history = history + [
	{"role": "user", "content": message},
	{"role": "assistant", "content": bot_reply}
	]
	return new_history, "", gr.update(value="")

	except Exception as e:
	error_msg = f"❌ {model_key}: {str(e)}"
	print(f"💥 {error_msg}")
	new_history = history + [
	{"role": "user", "content": message},
	{"role": "assistant", "content": error_msg}
	]
	return new_history, error_msg, gr.update(value="")

	# UI
	with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# Локальный Inference (без API!)\nМаленькие модели — 1-3 сек CPU. Большие думают ооочень долго. Нет limits/token. В качестве примера.")

	with gr.Row(variant="compact"):
	model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-0.5B", label="🧠 Модель")
	system_prompt = gr.Textbox(label="📝 System", placeholder="Ты весёлый ИИ.", lines=2)

	chatbot = gr.Chatbot(type="messages", height=500)

	with gr.Row():
	msg_input = gr.Textbox(placeholder="Привет! (Enter)", scale=7)
	send_btn = gr.Button("📤", variant="primary", scale=1)

	with gr.Row():
	clear_btn = gr.Button("🗑️ Clear")
	retry_btn = gr.Button("🔄 Retry")

	status = gr.Textbox(label="Логи", interactive=False, lines=4)

	# Events
	send_btn.click(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
	msg_input.submit(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])

	def clear():
	return [], "", gr.update(value="")
	clear_btn.click(clear, outputs=[chatbot, status, msg_input])

	def retry(history):
	if len(history) >= 2 and history[-2]["role"] == "user":
	return history[-2]["content"]
	return ""
	retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])

	if __name__ == "__main__":
	demo.queue(max_size=10).launch(debug=True)