ghosthets commited on
Commit
4676ca6
·
verified ·
1 Parent(s): 9fd7b58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -41
app.py CHANGED
@@ -1,58 +1,69 @@
1
- from flask import Flask, request, jsonify
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
- import os
5
-
6
- app = Flask(__name__)
7
 
 
 
 
8
  model_id = "HuggingFaceH4/zephyr-7b-beta"
 
 
9
 
10
- print("🚀 Loading model:", model_id)
 
 
 
 
 
 
 
 
11
 
12
- tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
13
- model = AutoModelForCausalLM.from_pretrained(model_id)
14
 
15
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
- model.to(device)
17
 
18
- print("✅ Model Loaded Successfully")
19
 
20
- @app.route('/chat', methods=['POST'])
21
- def chat():
22
- try:
23
- data = request.get_json()
24
- message = data.get("message", "").strip()
25
 
26
- if not message:
27
- return jsonify({"error": "Empty message"}), 400
28
 
29
- # Simple clean prompt
30
- prompt = f"Human: {message}\nAssistant:"
 
 
31
 
32
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
33
 
34
- output = model.generate(
35
- inputs.input_ids,
36
- max_length=300,
37
- do_sample=True,
38
- top_k=40,
39
- top_p=0.92,
40
- temperature=0.72,
41
- pad_token_id=tokenizer.eos_token_id
42
- )
43
 
44
- full = tokenizer.decode(output[0], skip_special_tokens=True)
45
- reply = full.split("Assistant:")[-1].strip()
 
 
 
46
 
47
- return jsonify({"reply": reply})
48
 
49
- except Exception as e:
50
- return jsonify({"error": str(e)}), 500
 
 
51
 
 
52
 
53
- @app.route('/')
54
- def home():
55
- return "LLM Space Active."
 
56
 
57
- if __name__ == "__main__":
58
- app.run(host="0.0.0.0", port=7860)
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import json
 
 
 
4
 
5
+ # ===========================
6
+ # LOAD MODEL
7
+ # ===========================
8
  model_id = "HuggingFaceH4/zephyr-7b-beta"
9
+ ai = pipeline("text-generation", model=model_id, max_new_tokens=200)
10
+
11
 
12
+ # ===========================
13
+ # API ENDPOINT (VERY IMPORTANT)
14
+ # call.php expects: { "reply": "..." }
15
+ # ===========================
16
+ def chat_api(request):
17
+ try:
18
+ message = request["message"]
19
+ except:
20
+ return {"reply": "INVALID_REQUEST"}
21
 
22
+ # Run model
23
+ output = ai(message)[0]["generated_text"]
24
 
25
+ # Return exactly what call.php expects
26
+ return {"reply": output}
27
 
 
28
 
29
+ # ===========================
30
+ # NORMAL UI CHAT (optional)
31
+ # ===========================
32
+ def chat_ui(msg):
33
+ return ai(msg)[0]["generated_text"]
34
 
 
 
35
 
36
+ # ===========================
37
+ # GRADIO SETUP
38
+ # ===========================
39
+ with gr.Blocks() as app:
40
 
41
+ gr.Markdown("# 🚀 Space 1 — Ghost Model")
42
 
43
+ # UI for testing only
44
+ inp = gr.Textbox(label="Your Message")
45
+ out = gr.Textbox(label="Model Output")
46
+ btn = gr.Button("Send")
47
+ btn.click(chat_ui, inp, out)
 
 
 
 
48
 
49
+ # Real API needed for call.php
50
+ gr.JSON(
51
+ value={"message": "send JSON POST here to use"},
52
+ label="API Endpoint Information"
53
+ )
54
 
55
+ app.launch(server_name="0.0.0.0", server_port=7860)
56
 
57
+ # IMPORTANT!
58
+ # Add an API endpoint for HF:
59
+ import gradio as gr
60
+ from fastapi import FastAPI, Request
61
 
62
+ fastapi_app = FastAPI()
63
 
64
+ @fastapi_app.post("/chat")
65
+ async def chat_endpoint(request: Request):
66
+ data = await request.json()
67
+ return chat_api(data)
68
 
69
+ app = gr.mount_gradio_app(fastapi_app, app, path="/")