Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from peft import PeftModel, PeftConfig | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import spaces | |
| import torch | |
| import os | |
| from tqdm import tqdm | |
| device="cuda" if torch.cuda.is_available() else "cpu" | |
| # Load the model | |
| MODEL_NAME="abdeljalilELmajjodi/alatlas_instruct_lora" | |
| print(f"bf16 available: {torch.cuda.is_bf16_supported()}") | |
| config = PeftConfig.from_pretrained(MODEL_NAME,token = os.environ['TOKEN']) | |
| model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, | |
| device_map="auto", | |
| token = os.environ['TOKEN'], | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| model = PeftModel.from_pretrained(model,MODEL_NAME,torch_dtype=torch.bfloat16) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| def generate(prompt, temperature=0.7, top_k=50, repetition_penalty=1.2): | |
| messages=[{"role":"user","content":prompt}] | |
| formatted_prompt=tokenizer.apply_chat_template(messages,tokenize=False) | |
| ids=tokenizer(formatted_prompt,return_tensors="pt").to(device) | |
| output_ids=model.generate(**ids, | |
| max_new_tokens=50, | |
| do_sample=True, | |
| temperature=temperature, | |
| top_k=top_k, | |
| repetition_penalty=repetition_penalty, | |
| eos_token_id=tokenizer.eos_token_id, | |
| pad_token_id=tokenizer.pad_token_id | |
| ) | |
| output_ids=output_ids[0][len(ids.input_ids[0]):] | |
| output=tokenizer.decode(output_ids,skip_special_tokens=True) | |
| assistant_marker = "ﭺassistant" | |
| if output.startswith(assistant_marker): | |
| output = output[len(assistant_marker):].strip() | |
| return output | |
| prompt_input=gr.Textbox(label="Enter your prompt",lines=5,rtl=True,) | |
| model_response = gr.Textbox(label="Model Response",lines=5,interactive=False,rtl=True,) | |
| temperature = gr.Slider(minimum=0.01,maximum=1.0,value=0.7, label="Temperature") # Reduced default | |
| top_k = gr.Slider(1, 10000, value=10, label="Top-k") # Reduced default | |
| repetition_penalty = gr.Slider(0.1, 100.0, value=1.2, label="Repetition Penalty") # Reduced default | |
| examples = [ | |
| [ "عافاك بغيت نسافر فالمغرب فالصيف ولكن معرفتش فين نمشي. ممكن تعاوني؟",0.1,90,1.2], | |
| [ "عافاك، بغيت نعرف شنو هي أحسن الأماكن لي نقدر نزورها فالمغرب فالصيف؟",0.1,100,1.2], | |
| ["شرح ليا الذكاء الاصطناعي عفاك",0.1,1,1.2], | |
| ] | |
| demo=gr.Interface( | |
| fn=generate, | |
| inputs=[prompt_input,temperature,top_k,repetition_penalty], | |
| outputs=model_response, | |
| flagging_mode="never", | |
| examples=examples, | |
| cache_examples=True, | |
| ) | |
| demo.launch() |