| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig, AutoModelForSequenceClassification | |
| import torch.nn.functional as F | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| llama_models = { | |
| "Chat-IPT 3.2": "meta-llama/Llama-3.2-1B-Instruct", | |
| } | |
| def load_model(model_name): | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name).to(device) | |
| generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=device) | |
| return generator | |
| model_cache = {} | |
| def predict(model, prompt, response=None): | |
| device = model.device | |
| if response is None: | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| else: | |
| inputs = tokenizer(prompt, response, return_tensors="pt", padding=True, truncation=True) | |
| inputs = inputs.to(device) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| unsafe_prob = F.softmax(outputs.logits, dim=-1)[:, 1] | |
| return unsafe_prob.item() | |
| tokenizer = AutoTokenizer.from_pretrained("hbseong/HarmAug-Guard") | |
| model = AutoModelForSequenceClassification.from_pretrained("hbseong/HarmAug-Guard") | |
| model = model.to(device) | |
| model.eval() | |
| def generate_chat(user_input, history, model_choice): | |
| if model_choice not in model_cache: | |
| model_cache[model_choice] = load_model(llama_models[model_choice]) | |
| generator = model_cache[model_choice] | |
| system_prompt = {"role": "system", "content": "Você é um ótimo assistente"} | |
| if history is None: | |
| history = [system_prompt] | |
| history.append({"role": "user", "content": user_input}) | |
| if len(history) > 5: | |
| history = history[-5:] | |
| response = generator( | |
| user_input, | |
| max_length=1024, | |
| pad_token_id=generator.tokenizer.eos_token_id, | |
| do_sample=True, | |
| temperature=0.55, | |
| top_p=0.7 | |
| )[0]["generated_text"] | |
| unsafe_score = predict(model, user_input, response) | |
| safety_threshold = 0.85 | |
| if unsafe_score > safety_threshold: | |
| response = "Desculpe, mas não posso fornecer ajuda com essa solicitação devido a questões de segurança ou ética." | |
| history.append({"role": "assistant", "content": response}) | |
| return history | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("<h1><center>Teste</center></h1>") | |
| model_choice = gr.Dropdown(list(llama_models.keys()), label="Selecione o Modelo.") | |
| chatbot = gr.Chatbot(label=" ", type = "messages") | |
| txt_input = gr.Textbox(show_label=False, placeholder="Escreva a sua mensagem aqui...") | |
| def respond(user_input, chat_history, model_choice): | |
| if model_choice is None: | |
| model_choice = list(llama_models.keys())[0] | |
| updated_history = generate_chat(user_input, chat_history, model_choice) | |
| return "", updated_history | |
| txt_input.submit(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot]) | |
| submit_btn = gr.Button("Enviar") | |
| submit_btn.click(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot]) | |
| demo.launch(debug=False, show_error=True, share=True) |