Spaces:
Running
Running
| import os | |
| import gradio as gr | |
| import whisper | |
| from gtts import gTTS | |
| from groq import Groq | |
| # Load the Groq API key from the environment variable | |
| api_key = os.getenv("GROQ_API_KEY") | |
| if not api_key: | |
| raise ValueError("No API key found. Please set the GROQ_API_KEY environment variable.") | |
| # Initialize Whisper and Groq | |
| whisper_model = whisper.load_model("base") | |
| client = Groq(api_key=api_key) | |
| def chatbot(audio_input): | |
| try: | |
| # Debug: Check the type and content of audio_input | |
| print(f"Audio input type: {type(audio_input)}") | |
| if audio_input is None: | |
| raise ValueError("Audio input is None. Please provide a valid audio file.") | |
| # Debug: Check if the file exists | |
| if not os.path.exists(audio_input): | |
| raise FileNotFoundError(f"Audio file {audio_input} not found.") | |
| # Step 1: Load and transcribe audio input using Whisper | |
| audio = whisper.load_audio(audio_input) | |
| transcription_result = whisper_model.transcribe(audio) | |
| if transcription_result is None or not transcription_result.get("text"): | |
| raise ValueError("Whisper transcription failed or returned empty text.") | |
| transcription = transcription_result["text"] | |
| # Step 2: Generate response using LLaMA 8B model via Groq API | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": transcription, | |
| } | |
| ], | |
| model="llama3-8b-8192", | |
| ) | |
| # Check if the response from Groq is valid | |
| if chat_completion and chat_completion.choices: | |
| response_text = chat_completion.choices[0].message.content | |
| else: | |
| raise ValueError("Invalid response from Groq API") | |
| # Step 3: Convert text response to speech using GTTS | |
| if response_text.strip(): | |
| tts = gTTS(response_text) | |
| tts.save("response.mp3") | |
| else: | |
| raise ValueError("Response text is empty or invalid") | |
| # Step 4: Return the response audio and text transcription | |
| return "response.mp3", transcription, response_text | |
| except Exception as e: | |
| # Handle and display the error | |
| return None, transcription if 'transcription' in locals() else None, f"Error: {str(e)}" | |
| # Define the Gradio interface | |
| interface = gr.Interface( | |
| fn=chatbot, | |
| inputs=gr.Audio(type="filepath"), | |
| outputs=[gr.Audio(type="filepath"), "text", "text"], | |
| title="Voice-to-Voice Chatbot", | |
| description="Speak to the chatbot and get a real-time response.", | |
| live=True # Automatically processes input without requiring a button click | |
| ) | |
| # Launch the Gradio app | |
| if __name__ == "__main__": | |
| interface.launch() | |