Spaces:

torusvektor
/

offline-speech-recognition-synthesis

Running

File size: 18,341 Bytes

24678c3

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Offline Speech Recognition & Synthesis</title>
    <script src="https://cdn.tailwindcss.com"></script>
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
    <script>
        tailwind.config = {
            theme: {
                extend: {
                    colors: {
                        primary: '#3b82f6',
                        secondary: '#1e40af',
                        dark: '#0f172a',
                        light: '#f8fafc'
                    }
                }
            }
        }
    </script>
    <style>
        .speech-bubble {
            position: relative;
            background: #e0f2ff;
            border-radius: 12px;
            padding: 20px;
            margin: 20px 0;
            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
        }
        .speech-bubble:after {
            content: '';
            position: absolute;
            bottom: -15px;
            left: 50px;
            border-width: 15px 15px 0;
            border-style: solid;
            border-color: #e0f2ff transparent transparent;
            display: block;
            width: 0;
        }
        .pulse {
            animation: pulse 1.5s infinite;
        }
        @keyframes pulse {
            0% {
                box-shadow: 0 0 0 0 rgba(59, 130, 246, 0.4);
            }
            70% {
                box-shadow: 0 0 0 10px rgba(59, 130, 246, 0);
            }
            100% {
                box-shadow: 0 0 0 0 rgba(59, 130, 246, 0);
            }
        }
        .mic-icon {
            transition: all 0.3s ease;
        }
        .mic-active {
            color: #ef4444;
            transform: scale(1.1);
        }
        .mic-inactive {
            color: #94a3b8;
        }
        .history-item {
            transition: all 0.3s ease;
        }
        .history-item:hover {
            background-color: #f1f5f9;
        }
        .scrollbar-thin::-webkit-scrollbar {
            width: 6px;
        }
        .scrollbar-thin::-webkit-scrollbar-track {
            background: #f1f5f9;
            border-radius: 10px;
        }
        .scrollbar-thin::-webkit-scrollbar-thumb {
            background: #cbd5e1;
            border-radius: 10px;
        }
        .scrollbar-thin::-webkit-scrollbar-thumb:hover {
            background: #94a3b8;
        }
    </style>
</head>
<body class="bg-gradient-to-br from-blue-50 to-indigo-100 min-h-screen p-4 md:p-8">
    <div class="max-w-4xl mx-auto">
        <!-- Header -->
        <header class="text-center py-6 mb-8">
            <h1 class="text-3xl md:text-4xl font-bold text-dark">Offline Speech Recognition & Synthesis</h1>
            <p class="text-gray-600 mt-2">Convert speech to text and text to speech directly in your browser</p>
        </header>

        <!-- Main Container -->
        <div class="bg-white rounded-2xl shadow-xl overflow-hidden">
            <!-- Tabs -->
            <div class="flex border-b">
                <button id="speechToTextTab" class="flex-1 py-4 px-6 text-center font-medium bg-primary text-white">
                    <i class="fas fa-microphone mr-2"></i>Speech to Text
                </button>
                <button id="textToSpeechTab" class="flex-1 py-4 px-6 text-center font-medium bg-gray-100 text-gray-600 hover:bg-gray-200">
                    <i class="fas fa-volume-up mr-2"></i>Text to Speech
                </button>
            </div>

            <!-- Speech to Text Panel -->
            <div id="speechToTextPanel" class="p-6">
                <div class="flex flex-col items-center justify-center py-8">
                    <div class="relative">
                        <button id="startRecognition" class="mic-icon pulse bg-primary rounded-full p-6 text-white hover:bg-secondary transition-all">
                            <i class="fas fa-microphone text-4xl"></i>
                        </button>
                        <div id="listeningIndicator" class="absolute -top-2 -right-2 bg-red-500 text-white text-xs px-2 py-1 rounded-full hidden">
                            Listening...
                        </div>
                    </div>
                    <p class="mt-4 text-gray-600 text-center">Click the microphone to start speech recognition</p>
                    
                    <div class="w-full mt-8">
                        <div class="speech-bubble">
                            <p id="recognizedText" class="text-lg text-gray-800 min-h-24">
                                Your speech will appear here...
                            </p>
                        </div>
                    </div>
                    
                    <div class="w-full mt-6">
                        <h3 class="font-semibold text-gray-700 mb-2">Recognition History</h3>
                        <div id="historyContainer" class="bg-gray-50 rounded-lg p-4 h-40 overflow-y-auto scrollbar-thin">
                            <p class="text-gray-500 text-sm">No history yet...</p>
                        </div>
                    </div>
                </div>
            </div>

            <!-- Text to Speech Panel -->
            <div id="textToSpeechPanel" class="hidden p-6">
                <div class="flex flex-col md:flex-row gap-6">
                    <div class="flex-1">
                        <label for="textInput" class="block text-gray-700 font-medium mb-2">Enter text to speak:</label>
                        <textarea id="textInput" class="w-full h-40 p-4 border border-gray-300 rounded-lg focus:ring-2 focus:ring-primary focus:border-transparent" placeholder="Type something here...">Hello! This is a demonstration of text to speech conversion. You can type anything here and click the play button to hear it spoken aloud.</textarea>
                        <div class="mt-4 flex items-center">
                            <label for="voiceSelect" class="mr-2 text-gray-700">Voice:</label>
                            <select id="voiceSelect" class="border border-gray-300 rounded p-2 flex-1"></select>
                            <button id="playText" class="ml-4 bg-primary hover:bg-secondary text-white px-6 py-2 rounded-lg transition">
                                <i class="fas fa-play mr-2"></i>Play
                            </button>
                        </div>
                    </div>
                    <div class="flex-1">
                        <div class="bg-blue-50 rounded-xl p-6 h-64 flex flex-col justify-center items-center">
                            <div class="text-center">
                                <i class="fas fa-volume-up text-primary text-5xl mb-4"></i>
                                <h3 class="text-xl font-semibold text-gray-800">Text to Speech</h3>
                                <p class="text-gray-600 mt-2">Enter text and click play to hear it spoken</p>
                            </div>
                        </div>
                    </div>
                </div>
            </div>
        </div>

        <!-- Info Section -->
        <div class="mt-8 bg-white rounded-2xl shadow-lg p-6">
            <h2 class="text-xl font-bold text-dark mb-4">How It Works</h2>
            <div class="grid grid-cols-1 md:grid-cols-3 gap-4">
                <div class="bg-blue-50 p-4 rounded-lg">
                    <div class="text-primary text-2xl mb-2"><i class="fas fa-microphone"></i></div>
                    <h3 class="font-semibold text-gray-800">Speech Recognition</h3>
                    <p class="text-gray-600 text-sm mt-1">Speak into your microphone and see your words appear in real-time. For offline Czech support, ensure you have Czech language pack installed in your device settings.</p>
                </div>
                <div class="bg-indigo-50 p-4 rounded-lg">
                    <div class="text-primary text-2xl mb-2"><i class="fas fa-volume-up"></i></div>
                    <h3 class="font-semibold text-gray-800">Text to Speech</h3>
                    <p class="text-gray-600 text-sm mt-1">Convert any text to speech with natural sounding voices. Czech voice support depends on your system's available voices.</p>
                </div>
                <div class="bg-blue-50 p-4 rounded-lg">
                    <div class="text-primary text-2xl mb-2"><i class="fas fa-shield-alt"></i></div>
                    <h3 class="font-semibold text-gray-800">Privacy Focused</h3>
                    <p class="text-gray-600 text-sm mt-1">All processing happens locally in your browser - no data leaves your device. Works offline when language packs are installed.</p>
                </div>
            </div>
        </div>

        <!-- Footer -->
        <footer class="mt-8 text-center text-gray-600 text-sm">
            <p>Offline Speech Recognition & Synthesis | Works directly in your browser</p>
            <p class="mt-2 text-xs">For Czech language offline support: Install Czech language pack in your device settings or use a keyboard app with offline voice input capabilities.</p>
        </footer>
    </div>

    <script>
        document.addEventListener('DOMContentLoaded', function() {
            // Tab switching
            const speechToTextTab = document.getElementById('speechToTextTab');
            const textToSpeechTab = document.getElementById('textToSpeechTab');
            const speechToTextPanel = document.getElementById('speechToTextPanel');
            const textToSpeechPanel = document.getElementById('textToSpeechPanel');
            
            speechToTextTab.addEventListener('click', () => {
                speechToTextTab.classList.add('bg-primary', 'text-white');
                speechToTextTab.classList.remove('bg-gray-100', 'text-gray-600');
                textToSpeechTab.classList.add('bg-gray-100', 'text-gray-600');
                textToSpeechTab.classList.remove('bg-primary', 'text-white');
                speechToTextPanel.classList.remove('hidden');
                textToSpeechPanel.classList.add('hidden');
            });
            
            textToSpeechTab.addEventListener('click', () => {
                textToSpeechTab.classList.add('bg-primary', 'text-white');
                textToSpeechTab.classList.remove('bg-gray-100', 'text-gray-600');
                speechToTextTab.classList.add('bg-gray-100', 'text-gray-600');
                textToSpeechTab.classList.remove('bg-primary', 'text-white');
                textToSpeechPanel.classList.remove('hidden');
                speechToTextPanel.classList.add('hidden');
            });
            
            // Speech Recognition
            const startRecognition = document.getElementById('startRecognition');
            const recognizedText = document.getElementById('recognizedText');
            const listeningIndicator = document.getElementById('listeningIndicator');
            const historyContainer = document.getElementById('historyContainer');
            
            let recognition;
            let isListening = false;
            
            // Check if SpeechRecognition is available
            if ('webkitSpeechRecognition' in window || 'SpeechRecognition' in window) {
                // Initialize speech recognition
                const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
                recognition = new SpeechRecognition();
                recognition.continuous = true;
                recognition.interimResults = true;
                recognition.lang = 'cs-CZ'; // Set default language to Czech
                
                recognition.onstart = function() {
                    isListening = true;
                    startRecognition.classList.add('mic-active');
                    startRecognition.classList.remove('mic-inactive');
                    listeningIndicator.classList.remove('hidden');
                    recognizedText.textContent = "Listening...";
                };
                
                recognition.onresult = function(event) {
                    let interimTranscript = '';
                    let finalTranscript = '';
                    
                    for (let i = event.resultIndex; i < event.results.length; i++) {
                        const transcript = event.results[i][0].transcript;
                        if (event.results[i].isFinal) {
                            finalTranscript += transcript + ' ';
                        } else {
                            interimTranscript += transcript;
                        }
                    }
                    
                    recognizedText.textContent = finalTranscript || interimTranscript || "Listening...";
                    
                    if (finalTranscript) {
                        // Add to history
                        addToHistory(finalTranscript);
                    }
                };
                
                recognition.onerror = function(event) {
                    console.error('Speech recognition error', event.error);
                    recognizedText.textContent = "Error: " + event.error;
                    stopListening();
                };
                
                recognition.onend = function() {
                    isListening = false;
                    startRecognition.classList.remove('mic-active');
                    startRecognition.classList.add('mic-inactive');
                    listeningIndicator.classList.add('hidden');
                };
                
                startRecognition.addEventListener('click', function() {
                    if (isListening) {
                        recognition.stop();
                    } else {
                        try {
                            recognition.start();
                        } catch (e) {
                            console.error('Recognition error:', e);
                            recognizedText.textContent = "Error starting recognition: " + e.message;
                        }
                    }
                });
            } else {
                // Speech recognition not supported
                recognizedText.textContent = "Speech recognition is not supported in this browser.";
                startRecognition.disabled = true;
                startRecognition.classList.add('bg-gray-400');
            }
            
            function stopListening() {
                isListening = false;
                startRecognition.classList.remove('mic-active');
                startRecognition.classList.add('mic-inactive');
                listeningIndicator.classList.add('hidden');
            }
            
            function addToHistory(text) {
                const historyItem = document.createElement('div');
                historyItem.className = 'history-item p-3 mb-2 bg-white rounded-lg shadow-sm';
                historyItem.innerHTML = `
                    <div class="flex items-start">
                        <i class="fas fa-comment text-primary mt-1 mr-2"></i>
                        <div>
                            <p class="text-gray-800">${text}</p>
                            <p class="text-xs text-gray-500 mt-1">${new Date().toLocaleTimeString()}</p>
                        </div>
                    </div>
                `;
                historyContainer.prepend(historyItem);
                
                // Keep only the last 5 items
                if (historyContainer.children.length > 5) {
                    historyContainer.removeChild(historyContainer.lastChild);
                }
            }
            
            // Text to Speech
            const textInput = document.getElementById('textInput');
            const voiceSelect = document.getElementById('voiceSelect');
            const playText = document.getElementById('playText');
            
            let voices = [];
            
            function populateVoiceList() {
                voices = window.speechSynthesis.getVoices();
                voiceSelect.innerHTML = '';
                
                voices.forEach((voice, i) => {
                    const option = document.createElement('option');
                    option.textContent = voice.name + ' (' + voice.lang + ')';
                    option.setAttribute('data-lang', voice.lang);
                    option.setAttribute('data-name', voice.name);
                    voiceSelect.appendChild(option);
                });
                
                // Select a default voice
                voiceSelect.selectedIndex = 0;
            }
            
            populateVoiceList();
            if (window.speechSynthesis.onvoiceschanged !== undefined) {
                window.speechSynthesis.onvoiceschanged = populateVoiceList;
            }
            
            playText.addEventListener('click', function() {
                const text = textInput.value;
                if (text) {
                    const utterance = new SpeechSynthesisUtterance(text);
                    utterance.voice = voices[voiceSelect.selectedIndex];
                    window.speechSynthesis.speak(utterance);
                }
            });
            
            // Initialize with sample history
            setTimeout(() => {
                addToHistory("This is a sample history item");
                addToHistory("Another example of speech recognition");
            }, 1000);
        });
    </script>
<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=torusvektor/offline-speech-recognition-synthesis" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
</html>