|
|
import gradio as gr |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import torch |
|
|
import spaces |
|
|
from prompts.base_instruction import basic_instruction |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("braindeck/text2text", trust_remote_code=True, subfolder="checkpoints/model") |
|
|
model = AutoModelForCausalLM.from_pretrained("braindeck/text2text", trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto", subfolder="checkpoints/model") |
|
|
|
|
|
@spaces.GPU |
|
|
def generate_response(prompt): |
|
|
""" |
|
|
Generates a response from the model. |
|
|
""" |
|
|
chat = basic_instruction(prompt, "braindeck/text2text") |
|
|
inputs = tokenizer.apply_chat_template(chat, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device) |
|
|
input_length = inputs.shape[1] |
|
|
outputs = model.generate(inputs, max_new_tokens=512, do_sample=False) |
|
|
|
|
|
|
|
|
generated_tokens = outputs[0, input_length:] |
|
|
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True) |
|
|
|
|
|
|
|
|
ix = generated_text.find("</think>") |
|
|
if ix != -1: |
|
|
generated_text = generated_text[ix + len("</think>") :].lstrip() |
|
|
|
|
|
return generated_text |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# Fine-tuned Text-to-Text Generation") |
|
|
gr.Markdown("Enter a prompt and the model will generate a response.") |
|
|
|
|
|
with gr.Row(): |
|
|
prompt_input = gr.Textbox(label="Prompt", lines=4, placeholder="Enter your prompt here...") |
|
|
|
|
|
with gr.Row(): |
|
|
generate_button = gr.Button("Generate") |
|
|
|
|
|
with gr.Row(): |
|
|
response_output = gr.Textbox(label="Response", lines=8, interactive=False) |
|
|
|
|
|
generate_button.click( |
|
|
fn=generate_response, |
|
|
inputs=prompt_input, |
|
|
outputs=response_output |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |