Spaces:
Runtime error
Runtime error
File size: 3,001 Bytes
3dc800d 1112958 a355102 a77651c a355102 1112958 3dc800d a77651c a355102 7977478 1112958 0394aba 1112958 d616881 a355102 a19b9f5 a355102 a77651c a355102 7977478 c36c476 a320fdd a355102 119f983 d616881 3dc800d 119f983 25b1890 f5c681c 119f983 3dc800d 1bf5e6b a355102 119f983 3dc800d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import gradio as gr
from peft import PeftModel, PeftConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
import spaces
import torch
import os
from tqdm import tqdm
device="cuda" if torch.cuda.is_available() else "cpu"
# Load the model
MODEL_NAME="abdeljalilELmajjodi/alatlas_instruct_lora"
print(f"bf16 available: {torch.cuda.is_bf16_supported()}")
config = PeftConfig.from_pretrained(MODEL_NAME,token = os.environ['TOKEN'])
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
device_map="auto",
token = os.environ['TOKEN'],
torch_dtype=torch.bfloat16
)
model = PeftModel.from_pretrained(model,MODEL_NAME,torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
@spaces.GPU
def generate(prompt, temperature=0.7, top_k=50, repetition_penalty=1.2):
messages=[{"role":"user","content":prompt}]
formatted_prompt=tokenizer.apply_chat_template(messages,tokenize=False)
ids=tokenizer(formatted_prompt,return_tensors="pt").to(device)
output_ids=model.generate(**ids,
max_new_tokens=50,
do_sample=True,
temperature=temperature,
top_k=top_k,
repetition_penalty=repetition_penalty,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id
)
output_ids=output_ids[0][len(ids.input_ids[0]):]
output=tokenizer.decode(output_ids,skip_special_tokens=True)
assistant_marker = "ﭺassistant"
if output.startswith(assistant_marker):
output = output[len(assistant_marker):].strip()
return output
prompt_input=gr.Textbox(label="Enter your prompt",lines=5,rtl=True,)
model_response = gr.Textbox(label="Model Response",lines=5,interactive=False,rtl=True,)
temperature = gr.Slider(minimum=0.01,maximum=1.0,value=0.7, label="Temperature") # Reduced default
top_k = gr.Slider(1, 10000, value=10, label="Top-k") # Reduced default
repetition_penalty = gr.Slider(0.1, 100.0, value=1.2, label="Repetition Penalty") # Reduced default
examples = [
[ "عافاك بغيت نسافر فالمغرب فالصيف ولكن معرفتش فين نمشي. ممكن تعاوني؟",0.1,90,1.2],
[ "عافاك، بغيت نعرف شنو هي أحسن الأماكن لي نقدر نزورها فالمغرب فالصيف؟",0.1,100,1.2],
["شرح ليا الذكاء الاصطناعي عفاك",0.1,1,1.2],
]
demo=gr.Interface(
fn=generate,
inputs=[prompt_input,temperature,top_k,repetition_penalty],
outputs=model_response,
flagging_mode="never",
examples=examples,
cache_examples=True,
)
demo.launch() |