Spaces:
Running
Running
| import gradio as gr | |
| from transformers import pipeline, AutoTokenizer | |
| from typing import List, Dict, Any, Tuple | |
| import torch | |
| # CPU-модели (маленькие, chat-ready) | |
| MODELS = { | |
| "Qwen2.5-0.5B": "Qwen/Qwen2.5-0.5B-Instruct", | |
| "Qwen2.5-1.5B": "Qwen/Qwen2.5-1.5B-Instruct", | |
| "Phi-3-mini": "microsoft/Phi-3-mini-4k-instruct" | |
| } | |
| def load_model(model_key: str): | |
| """Lazy load pipeline.""" | |
| model_id = MODELS[model_key] | |
| print(f"🚀 Загрузка {model_id}...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model_id, | |
| tokenizer=tokenizer, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| device_map="auto" if torch.cuda.is_available() else None, | |
| max_new_tokens=512, | |
| do_sample=True, | |
| temperature=0.7, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| print(f"✅ {model_id} загружена!") | |
| return pipe | |
| # Global cache | |
| model_cache = {} | |
| def respond(message: str, | |
| history: List[Dict[str, str]], | |
| model_key: str, | |
| system_prompt: str) -> Tuple[List[Dict[str, str]], str, Dict[str, Any]]: | |
| """Локальный чат с pipeline.""" | |
| try: | |
| if model_key not in model_cache: | |
| model_cache[model_key] = load_model(model_key) | |
| pipe = model_cache[model_key] | |
| print(f"🚀 Генерация: {model_key}, Msg='{message[:30]}...'") | |
| # Chat format (system + history + user) | |
| messages = [] | |
| if system_prompt.strip(): | |
| messages.append({"role": "system", "content": system_prompt}) | |
| messages.extend(history) | |
| messages.append({"role": "user", "content": message}) | |
| # Apply chat template (для instruct) | |
| tokenizer = pipe.tokenizer | |
| prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| # Generate | |
| outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7) | |
| bot_reply = outputs[0]["generated_text"][len(prompt):].strip() | |
| print(f"✅ Ответ: {bot_reply[:50]}...") | |
| new_history = history + [ | |
| {"role": "user", "content": message}, | |
| {"role": "assistant", "content": bot_reply} | |
| ] | |
| return new_history, "", gr.update(value="") | |
| except Exception as e: | |
| error_msg = f"❌ {model_key}: {str(e)}" | |
| print(f"💥 {error_msg}") | |
| new_history = history + [ | |
| {"role": "user", "content": message}, | |
| {"role": "assistant", "content": error_msg} | |
| ] | |
| return new_history, error_msg, gr.update(value="") | |
| # UI | |
| with gr.Blocks(title="🚀 Локальный HF Чат (на слабом CPU!)", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# Локальный Inference (без API!)\n**Маленькие модели** — 1-3 сек CPU. Большие думают ооочень долго. Нет limits/token. В качестве примера.") | |
| with gr.Row(variant="compact"): | |
| model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-0.5B", label="🧠 Модель") | |
| system_prompt = gr.Textbox(label="📝 System", placeholder="Ты весёлый ИИ.", lines=2) | |
| chatbot = gr.Chatbot(type="messages", height=500) | |
| with gr.Row(): | |
| msg_input = gr.Textbox(placeholder="Привет! (Enter)", scale=7) | |
| send_btn = gr.Button("📤", variant="primary", scale=1) | |
| with gr.Row(): | |
| clear_btn = gr.Button("🗑️ Clear") | |
| retry_btn = gr.Button("🔄 Retry") | |
| status = gr.Textbox(label="Логи", interactive=False, lines=4) | |
| # Events | |
| send_btn.click(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input]) | |
| msg_input.submit(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input]) | |
| def clear(): | |
| return [], "", gr.update(value="") | |
| clear_btn.click(clear, outputs=[chatbot, status, msg_input]) | |
| def retry(history): | |
| if len(history) >= 2 and history[-2]["role"] == "user": | |
| return history[-2]["content"] | |
| return "" | |
| retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input]) | |
| if __name__ == "__main__": | |
| demo.queue(max_size=10).launch(debug=True) |