import gradio as gr from ctransformers import AutoModelForCausalLM # Charger le modèle GGUF (léger) model = AutoModelForCausalLM.from_pretrained( "TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF", model_file="TinyLlama-1.1B-Chat-v0.3.Q4_K_M.gguf", model_type="llama", gpu_layers=0 ) def chat(prompt): response = model( prompt, max_new_tokens=200, temperature=0.7, repetition_penalty=1.1 ) return response with gr.Blocks() as demo: gr.Markdown("# API IA pour Discord (GGUF)") inp = gr.Textbox(label="Prompt") out = gr.Textbox(label="Réponse") btn = gr.Button("Envoyer") btn.click(chat, inp, out) demo.launch(server_name="0.0.0.0", server_port=7860)