Spaces:

Engr-Saeed
/

VoiceToVoiceChatBot

Running

App Files Files Community

VoiceToVoiceChatBot / app.py

Engr-Saeed

Update app.py

e1a0aa9 verified over 1 year ago

raw

history blame contribute delete

2.79 kB

	import os
	import gradio as gr
	import whisper
	from gtts import gTTS
	from groq import Groq

	# Load the Groq API key from the environment variable
	api_key = os.getenv("GROQ_API_KEY")
	if not api_key:
	raise ValueError("No API key found. Please set the GROQ_API_KEY environment variable.")

	# Initialize Whisper and Groq
	whisper_model = whisper.load_model("base")
	client = Groq(api_key=api_key)

	def chatbot(audio_input):
	try:
	# Debug: Check the type and content of audio_input
	print(f"Audio input type: {type(audio_input)}")

	if audio_input is None:
	raise ValueError("Audio input is None. Please provide a valid audio file.")

	# Debug: Check if the file exists
	if not os.path.exists(audio_input):
	raise FileNotFoundError(f"Audio file {audio_input} not found.")

	# Step 1: Load and transcribe audio input using Whisper
	audio = whisper.load_audio(audio_input)
	transcription_result = whisper_model.transcribe(audio)
	if transcription_result is None or not transcription_result.get("text"):
	raise ValueError("Whisper transcription failed or returned empty text.")

	transcription = transcription_result["text"]

	# Step 2: Generate response using LLaMA 8B model via Groq API
	chat_completion = client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": transcription,
	}
	],
	model="llama3-8b-8192",
	)

	# Check if the response from Groq is valid
	if chat_completion and chat_completion.choices:
	response_text = chat_completion.choices[0].message.content
	else:
	raise ValueError("Invalid response from Groq API")

	# Step 3: Convert text response to speech using GTTS
	if response_text.strip():
	tts = gTTS(response_text)
	tts.save("response.mp3")
	else:
	raise ValueError("Response text is empty or invalid")

	# Step 4: Return the response audio and text transcription
	return "response.mp3", transcription, response_text

	except Exception as e:
	# Handle and display the error
	return None, transcription if 'transcription' in locals() else None, f"Error: {str(e)}"

	# Define the Gradio interface
	interface = gr.Interface(
	fn=chatbot,
	inputs=gr.Audio(type="filepath"),
	outputs=[gr.Audio(type="filepath"), "text", "text"],
	title="Voice-to-Voice Chatbot",
	description="Speak to the chatbot and get a real-time response.",
	live=True # Automatically processes input without requiring a button click
	)

	# Launch the Gradio app
	if __name__ == "__main__":
	interface.launch()