import gradio as gr
from huggingface_hub import InferenceClient
import whisper
import torch
import numpy as np


def transcribe_audio(audio):
    """Transcribe audio using Whisper model"""
    if audio is None:
        return ""
    
    # Load Whisper model
    model = whisper.load_model("base")
    
    # Transcribe audio
    result = model.transcribe(audio)
    return result["text"]

def respond(
    message,
    history: list[dict[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    hf_token: gr.OAuthToken,
):
    """
    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
    """
    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")

    messages = [{"role": "system", "content": system_message}]

    messages.extend(history)

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        choices = message.choices
        token = ""
        if len(choices) and choices[0].delta.content:
            token = choices[0].delta.content

        response += token
        yield response


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
chatbot = gr.ChatInterface(
    respond,
    type="messages",
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    chatbot=gr.Chatbot(height=500),
    textbox=gr.Textbox(placeholder="Type your message here or use voice input...", container=False, scale=7),
    additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False),
    examples=[
        "Hello! How are you?",
        "Can you help me with something?",
        "Tell me a joke"
    ],
    multimodal=True,
)

with gr.Blocks() as demo:
    with gr.Sidebar():
        gr.LoginButton()
        gr.Markdown("## Voice Input Settings")
        audio_input = gr.Audio(
            sources=["microphone"],
            type="filepath",
            label="Record your message",
            interactive=True
        )
        transcribe_btn = gr.Button("Transcribe Audio", variant="primary")
        transcribed_text = gr.Textbox(label="Transcribed Text", interactive=False)
        
        transcribe_btn.click(
            fn=transcribe_audio,
            inputs=audio_input,
            outputs=transcribed_text
        )
        
        # Add transcribed text to chat
        use_transcribed_btn = gr.Button("Send to Chat", variant="secondary")
        use_transcribed_btn.click(
            fn=lambda x: x,
            inputs=transcribed_text,
            outputs=chatbot.textbox
        )
    
    chatbot.render()


if __name__ == "__main__":
    demo.launch()