Spaces:

Pontonkid
/

AI-Voice-system

Sleeping

App Files Files Community

AI-Voice-system / app.py

Pontonkid

Create app.py

ac36bdc verified 10 months ago

raw

history blame contribute delete

3.81 kB

	import whisper as openai_whisper
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from TTS.api import TTS
	import gradio as gr
	import torch
	import os

	# 1. Speech-to-Text (STT) Implementation
	def setup_stt():
	model = openai_whisper.load_model("base") # Explicit OpenAI Whisper
	return model

	def transcribe_audio(model, audio_file):
	result = model.transcribe(audio_file)
	print("Transcription:", result['text'])
	return result['text']

	# 2. Natural Language Processing (NLP) Implementation
	def setup_nlp():
	model_name = "gpt2"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)
	return tokenizer, model

	def generate_response(tokenizer, model, input_text):
	prompt = f"User: {input_text}\nAssistant:"
	input_ids = tokenizer.encode(prompt, return_tensors="pt")

	response = model.generate(
	input_ids,
	max_length=150,
	num_return_sequences=1,
	temperature=0.7,
	top_p=0.9,
	pad_token_id=tokenizer.eos_token_id,
	no_repeat_ngram_size=2
	)
	return tokenizer.decode(response[0], skip_special_tokens=True)

	# 3. Text-to-Speech (TTS) Implementation
	def setup_tts():
	tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
	return tts

	def generate_speech(tts, text, file_path="output.wav"):
	tts.tts_to_file(text, file_path=file_path)
	return file_path

	# 4. Voice AI System Class
	class VoiceAISystem:
	def __init__(self):
	print("Initializing Voice AI System...")
	print("Loading STT model...")
	self.stt_model = setup_stt()
	print("Loading NLP model...")
	self.tokenizer, self.nlp_model = setup_nlp()
	print("Loading TTS model...")
	self.tts_model = setup_tts()

	# GPU Optimization
	self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	print(f"Using device: {self.device}")
	self.nlp_model = self.nlp_model.to(self.device)
	print("System initialization complete!")

	def process_audio(self, audio_file):
	try:
	os.makedirs("tmp", exist_ok=True)

	print("Transcribing audio...")
	text = transcribe_audio(self.stt_model, audio_file)

	print("Generating response...")
	with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
	response = generate_response(self.tokenizer, self.nlp_model, text)

	print("Converting response to speech...")
	output_path = os.path.join("tmp", "response.wav")
	audio_response = generate_speech(self.tts_model, response, output_path)

	return audio_response, text, response
	except Exception as e:
	print(f"Error during processing: {str(e)}")
	return None, f"Error: {str(e)}", "Error processing request"

	# 5. Gradio UI Integration
	def create_voice_ai_interface():
	system = VoiceAISystem()

	def chat(audio):
	if audio is None:
	return None, "No audio provided", "No response generated"
	return system.process_audio(audio)

	interface = gr.Interface(
	fn=chat,
	inputs=[
	gr.Audio(
	type="filepath",
	label="Speak here"
	)
	],
	outputs=[
	gr.Audio(label="AI Response"),
	gr.Textbox(label="Transcribed Text"),
	gr.Textbox(label="AI Response Text")
	],
	title="Voice AI System",
	description="Click to record your voice and interact with the AI"
	)

	return interface

	# Launch the interface
	if __name__ == "__main__":
	iface = create_voice_ai_interface()
	iface.launch(share=True)