Spaces:

Muhammadidrees
/

Conversational

Sleeping

App Files Files Community

Conversational / app.py

Muhammadidrees

Update app.py

f825143 verified 3 months ago

raw

history blame contribute delete

2.82 kB

	# app.py
	import gradio as gr
	import torch
	from transformers import pipeline, AutoProcessor, AutoModelForVision2Seq

	# -------------------
	# Load Whisper (STT) from Hugging Face
	# -------------------
	stt_pipe = pipeline(
	"automatic-speech-recognition",
	model="openai/whisper-small", # Replace with your uploaded Whisper model repo
	device=0 if torch.cuda.is_available() else -1
	)

	# -------------------
	# Load ChatDOC model
	# -------------------
	chatdoc_model_id = "Muhammadidrees/RaiyaChatDoc" # replace with your uploaded repo
	device = "cuda" if torch.cuda.is_available() else "cpu"
	dtype = torch.float16 if device == "cuda" else torch.float32

	processor = AutoProcessor.from_pretrained(chatdoc_model_id, trust_remote_code=True)
	chatdoc_model = AutoModelForVision2Seq.from_pretrained(
	chatdoc_model_id,
	torch_dtype=dtype
	).to(device)

	# -------------------
	# Chat function
	# -------------------
	def chat_with_doc(audio, message, history=[]):
	transcript = ""
	if audio is not None:
	result = stt_pipe(audio)
	transcript = result["text"]

	user_msg = message or transcript
	if not user_msg.strip():
	return history, "No input detected."

	history.append([user_msg, None])

	# System prompt (simplified for demo)
	system_prompt = "You are a medical doctor interviewing a patient. Respond helpfully."
	dialogue = "\n".join([f"Patient: {u}\nDoctor: {b}" for u, b in history if u and b])
	prompt = f"{system_prompt}\n\nConversation:\n{dialogue}\nPatient: {user_msg}\nDoctor:"

	inputs = processor(text=prompt, images=None, return_tensors="pt").to(device)
	with torch.inference_mode():
	outputs = chatdoc_model.generate(
	**inputs,
	max_new_tokens=200,
	do_sample=True,
	temperature=0.7
	)

	input_len = inputs["input_ids"].shape[1]
	gen_tokens = outputs[:, input_len:]
	response = processor.batch_decode(gen_tokens, skip_special_tokens=True)[0].strip()

	history[-1][1] = response
	return history, response

	# -------------------
	# Gradio UI
	# -------------------
	with gr.Blocks(title="ChatDOC") as demo:
	gr.Markdown("# 🩺 ChatDOC + Whisper\nTalk or type your symptoms.")

	chatbot = gr.Chatbot(height=400)
	msg = gr.Textbox(placeholder="Type your symptoms...")
	mic = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak your symptoms")

	clear_btn = gr.Button("Clear Chat")

	state = gr.State([])

	def respond(audio, text, history):
	return chat_with_doc(audio, text, history)

	msg.submit(respond, [mic, msg, state], [chatbot, msg, state])
	mic.change(respond, [mic, msg, state], [chatbot, msg, state])
	clear_btn.click(lambda: ([], "", []), None, [chatbot, msg, state])

	if __name__ == "__main__":
	demo.launch()