chat-bots-test / app.py
MrAlexGov's picture
Update app.py
8966a06 verified
raw
history blame
4.57 kB
import gradio as gr
from transformers import pipeline, AutoTokenizer
from typing import List, Dict, Any, Tuple
import torch
# CPU-модели (маленькие, chat-ready)
MODELS = {
"Qwen2.5-0.5B": "Qwen/Qwen2.5-0.5B-Instruct",
"Qwen2.5-1.5B": "Qwen/Qwen2.5-1.5B-Instruct",
"Phi-3-mini": "microsoft/Phi-3-mini-4k-instruct"
}
def load_model(model_key: str):
"""Lazy load pipeline."""
model_id = MODELS[model_key]
print(f"🚀 Загрузка {model_id}...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
pipe = pipeline(
"text-generation",
model=model_id,
tokenizer=tokenizer,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
max_new_tokens=512,
do_sample=True,
temperature=0.7,
pad_token_id=tokenizer.eos_token_id
)
print(f"✅ {model_id} загружена!")
return pipe
# Global cache
model_cache = {}
def respond(message: str,
history: List[Dict[str, str]],
model_key: str,
system_prompt: str) -> Tuple[List[Dict[str, str]], str, Dict[str, Any]]:
"""Локальный чат с pipeline."""
try:
if model_key not in model_cache:
model_cache[model_key] = load_model(model_key)
pipe = model_cache[model_key]
print(f"🚀 Генерация: {model_key}, Msg='{message[:30]}...'")
# Chat format (system + history + user)
messages = []
if system_prompt.strip():
messages.append({"role": "system", "content": system_prompt})
messages.extend(history)
messages.append({"role": "user", "content": message})
# Apply chat template (для instruct)
tokenizer = pipe.tokenizer
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# Generate
outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
bot_reply = outputs[0]["generated_text"][len(prompt):].strip()
print(f"✅ Ответ: {bot_reply[:50]}...")
new_history = history + [
{"role": "user", "content": message},
{"role": "assistant", "content": bot_reply}
]
return new_history, "", gr.update(value="")
except Exception as e:
error_msg = f"❌ {model_key}: {str(e)}"
print(f"💥 {error_msg}")
new_history = history + [
{"role": "user", "content": message},
{"role": "assistant", "content": error_msg}
]
return new_history, error_msg, gr.update(value="")
# UI
with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)", theme=gr.themes.Soft()) as demo:
gr.Markdown("# Локальный Inference (без API!)\n**Маленькие модели** — 1-3 сек CPU. Большие думают ооочень долго. Нет limits/token. В качестве примера.")
with gr.Row(variant="compact"):
model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-0.5B", label="🧠 Модель")
system_prompt = gr.Textbox(label="📝 System", placeholder="Ты весёлый ИИ.", lines=2)
chatbot = gr.Chatbot(type="messages", height=500)
with gr.Row():
msg_input = gr.Textbox(placeholder="Привет! (Enter)", scale=7)
send_btn = gr.Button("📤", variant="primary", scale=1)
with gr.Row():
clear_btn = gr.Button("🗑️ Clear")
retry_btn = gr.Button("🔄 Retry")
status = gr.Textbox(label="Логи", interactive=False, lines=4)
# Events
send_btn.click(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
msg_input.submit(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
def clear():
return [], "", gr.update(value="")
clear_btn.click(clear, outputs=[chatbot, status, msg_input])
def retry(history):
if len(history) >= 2 and history[-2]["role"] == "user":
return history[-2]["content"]
return ""
retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])
if __name__ == "__main__":
demo.queue(max_size=10).launch(debug=True)