Spaces:

Isaac2489
/

admin

Runtime error

App Files Files Community

admin / app.py

Isaac2489

Upload folder using huggingface_hub

6d0c8f0 verified about 1 year ago

raw

history blame contribute delete

3.26 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig, AutoModelForSequenceClassification
	import torch.nn.functional as F

	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	llama_models = {
	"Chat-IPT 3.2": "meta-llama/Llama-3.2-1B-Instruct",
	}

	def load_model(model_name):
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
	generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=device)
	return generator

	model_cache = {}

	def predict(model, prompt, response=None):
	device = model.device
	if response is None:
	inputs = tokenizer(prompt, return_tensors="pt")
	else:
	inputs = tokenizer(prompt, response, return_tensors="pt", padding=True, truncation=True)

	inputs = inputs.to(device)

	with torch.no_grad():
	outputs = model(**inputs)
	unsafe_prob = F.softmax(outputs.logits, dim=-1)[:, 1]

	return unsafe_prob.item()

	tokenizer = AutoTokenizer.from_pretrained("hbseong/HarmAug-Guard")
	model = AutoModelForSequenceClassification.from_pretrained("hbseong/HarmAug-Guard")
	model = model.to(device)
	model.eval()

	def generate_chat(user_input, history, model_choice):
	if model_choice not in model_cache:
	model_cache[model_choice] = load_model(llama_models[model_choice])
	generator = model_cache[model_choice]

	system_prompt = {"role": "system", "content": "Você é um ótimo assistente"}

	if history is None:
	history = [system_prompt]

	history.append({"role": "user", "content": user_input})

	if len(history) > 5:
	history = history[-5:]

	response = generator(
	user_input,
	max_length=1024,
	pad_token_id=generator.tokenizer.eos_token_id,
	do_sample=True,
	temperature=0.55,
	top_p=0.7
	)[0]["generated_text"]

	unsafe_score = predict(model, user_input, response)
	safety_threshold = 0.85

	if unsafe_score > safety_threshold:
	response = "Desculpe, mas não posso fornecer ajuda com essa solicitação devido a questões de segurança ou ética."

	history.append({"role": "assistant", "content": response})

	return history


	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("<h1><center>Teste</center></h1>")

	model_choice = gr.Dropdown(list(llama_models.keys()), label="Selecione o Modelo.")

	chatbot = gr.Chatbot(label=" ", type = "messages")
	txt_input = gr.Textbox(show_label=False, placeholder="Escreva a sua mensagem aqui...")

	def respond(user_input, chat_history, model_choice):
	if model_choice is None:
	model_choice = list(llama_models.keys())[0]
	updated_history = generate_chat(user_input, chat_history, model_choice)
	return "", updated_history

	txt_input.submit(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot])

	submit_btn = gr.Button("Enviar")
	submit_btn.click(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot])

	demo.launch(debug=False, show_error=True, share=True)