jam-f5

Running on Zero

File size: 3,401 Bytes

import spaces
import gradio as gr
from f5_tts.infer.utils_infer import remove_silence_for_generated_wav
from f5_tts.api import F5TTS
import tempfile
import os

# Initialize F5TTS inside a GPU-decorated function to avoid CUDA init in main process
@spaces.GPU
def initialize_f5tts():
    return F5TTS()

# Global variable to hold the model (will be initialized lazily)
f5tts = None

@spaces.GPU
def run_tts(ref_audio, ref_text, gen_text, remove_silence=False):
    global f5tts
    if f5tts is None:
        f5tts = F5TTS()
    
    output_wav_path = tempfile.mktemp(suffix=".wav")

    wav, sr, _ = f5tts.infer(
        ref_file=ref_audio,
        ref_text=ref_text,
        gen_text=gen_text,
        file_wave=output_wav_path,
        remove_silence=remove_silence,
    )

    return output_wav_path

# Create the main interface
with gr.Blocks(
    title="🗣️ F5-TTS | Integrated By Muhammad Zameer ul Hassan",
    theme=gr.themes.Default(),
    css="""
    footer {visibility: hidden}
    .gradio-container .prose {display: none !important}
    
    /* Custom Footer */
    .custom-footer {
        position: fixed;
        bottom: 0;
        left: 0;
        right: 0;
        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
        color: white;
        text-align: center;
        padding: 10px 0;
        font-size: 14px;
        z-index: 1000;
        box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
    }
    
    .custom-footer a {
        color: #ffffff;
        text-decoration: none;
        font-weight: bold;
    }
    
    .custom-footer a:hover {
        color: #f0f0f0;
        text-decoration: underline;
    }
    
    /* Add margin to prevent content overlap */
    .gradio-container {
        margin-bottom: 60px;
    }
    """
) as demo:
    gr.Markdown("""
    # 🗣️ F5-TTS Text-to-Speech
    
    Upload a reference voice, give reference and generation text, and hear it in the same voice!
    
    **Instructions:**
    1. Upload a reference audio file (preferably 3-10 seconds)
    2. Enter the text that corresponds to your reference audio
    3. Enter the text you want to generate in the same voice
    4. Optionally enable silence removal for cleaner output
    """)
    
    with gr.Row():
        with gr.Column():
            ref_audio = gr.Audio(label="Reference Audio", type="filepath")
            ref_text = gr.Textbox(
                label="Reference Text", 
                placeholder="Enter the text spoken in the reference audio...",
                lines=2
            )
            gen_text = gr.Textbox(
                label="Generation Text", 
                placeholder="Enter the text you want to generate...",
                lines=3
            )
            remove_silence = gr.Checkbox(label="Remove Silence from Output", value=False)
            
            generate_btn = gr.Button("Generate Speech", variant="primary", size="lg")
        
        with gr.Column():
            output_audio = gr.Audio(label="Generated Speech")
    
    generate_btn.click(
        fn=run_tts,
        inputs=[ref_audio, ref_text, gen_text, remove_silence],
        outputs=output_audio
    )
    
    # Custom Footer
    gr.HTML("""
    <div class="custom-footer">
        <p>🗣️ F5-TTS by Muhammad Zameer ul Hassan | 
        Powered by F5-TTS | 
        </p>
    </div>
    """)

if __name__ == "__main__":
    demo.launch(show_api=False)