RED_IA / app.py
stresspasbg's picture
Create app.py
68b10d9 verified
raw
history blame contribute delete
725 Bytes
import gradio as gr
from ctransformers import AutoModelForCausalLM
# Charger le modèle GGUF (léger)
model = AutoModelForCausalLM.from_pretrained(
"TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF",
model_file="TinyLlama-1.1B-Chat-v0.3.Q4_K_M.gguf",
model_type="llama",
gpu_layers=0
)
def chat(prompt):
response = model(
prompt,
max_new_tokens=200,
temperature=0.7,
repetition_penalty=1.1
)
return response
with gr.Blocks() as demo:
gr.Markdown("# API IA pour Discord (GGUF)")
inp = gr.Textbox(label="Prompt")
out = gr.Textbox(label="Réponse")
btn = gr.Button("Envoyer")
btn.click(chat, inp, out)
demo.launch(server_name="0.0.0.0", server_port=7860)