Spaces:

torusvektor
/

offline-speech-recognition-synthesis

Running

offline-speech-recognition-synthesis / index.html

potrebuji aby se to dalo pouzit offline, abych bud mohl pozadovany jazyk CS-čestina pridat do offline reci v mobilu pri volbe klavesnice a hlasoveho zadavani google offline nebo mel moznost nainstalovat novou klavesnici s podporou offline hlasoveho zadavani s moznosti i českeho jazyka cs-CZ - Initial Deployment

24678c3 verified 4 months ago

raw

history blame contribute delete

18.3 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Offline Speech Recognition & Synthesis</title>
	<script src="https://cdn.tailwindcss.com"></script>
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
	<script>
	tailwind.config = {
	theme: {
	extend: {
	colors: {
	primary: '#3b82f6',
	secondary: '#1e40af',
	dark: '#0f172a',
	light: '#f8fafc'
	}
	}
	}
	}
	</script>
	<style>
	.speech-bubble {
	position: relative;
	background: #e0f2ff;
	border-radius: 12px;
	padding: 20px;
	margin: 20px 0;
	box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
	}
	.speech-bubble:after {
	content: '';
	position: absolute;
	bottom: -15px;
	left: 50px;
	border-width: 15px 15px 0;
	border-style: solid;
	border-color: #e0f2ff transparent transparent;
	display: block;
	width: 0;
	}
	.pulse {
	animation: pulse 1.5s infinite;
	}
	@keyframes pulse {
	0% {
	box-shadow: 0 0 0 0 rgba(59, 130, 246, 0.4);
	}
	70% {
	box-shadow: 0 0 0 10px rgba(59, 130, 246, 0);
	}
	100% {
	box-shadow: 0 0 0 0 rgba(59, 130, 246, 0);
	}
	}
	.mic-icon {
	transition: all 0.3s ease;
	}
	.mic-active {
	color: #ef4444;
	transform: scale(1.1);
	}
	.mic-inactive {
	color: #94a3b8;
	}
	.history-item {
	transition: all 0.3s ease;
	}
	.history-item:hover {
	background-color: #f1f5f9;
	}
	.scrollbar-thin::-webkit-scrollbar {
	width: 6px;
	}
	.scrollbar-thin::-webkit-scrollbar-track {
	background: #f1f5f9;
	border-radius: 10px;
	}
	.scrollbar-thin::-webkit-scrollbar-thumb {
	background: #cbd5e1;
	border-radius: 10px;
	}
	.scrollbar-thin::-webkit-scrollbar-thumb:hover {
	background: #94a3b8;
	}
	</style>
	</head>
	<body class="bg-gradient-to-br from-blue-50 to-indigo-100 min-h-screen p-4 md:p-8">
	<div class="max-w-4xl mx-auto">
	<!-- Header -->
	<header class="text-center py-6 mb-8">
	<h1 class="text-3xl md:text-4xl font-bold text-dark">Offline Speech Recognition & Synthesis</h1>
	<p class="text-gray-600 mt-2">Convert speech to text and text to speech directly in your browser</p>
	</header>

	<!-- Main Container -->
	<div class="bg-white rounded-2xl shadow-xl overflow-hidden">
	<!-- Tabs -->
	<div class="flex border-b">
	<button id="speechToTextTab" class="flex-1 py-4 px-6 text-center font-medium bg-primary text-white">
	<i class="fas fa-microphone mr-2"></i>Speech to Text
	</button>
	<button id="textToSpeechTab" class="flex-1 py-4 px-6 text-center font-medium bg-gray-100 text-gray-600 hover:bg-gray-200">
	<i class="fas fa-volume-up mr-2"></i>Text to Speech
	</button>
	</div>

	<!-- Speech to Text Panel -->
	<div id="speechToTextPanel" class="p-6">
	<div class="flex flex-col items-center justify-center py-8">
	<div class="relative">
	<button id="startRecognition" class="mic-icon pulse bg-primary rounded-full p-6 text-white hover:bg-secondary transition-all">
	<i class="fas fa-microphone text-4xl"></i>
	</button>
	<div id="listeningIndicator" class="absolute -top-2 -right-2 bg-red-500 text-white text-xs px-2 py-1 rounded-full hidden">
	Listening...
	</div>
	</div>
	<p class="mt-4 text-gray-600 text-center">Click the microphone to start speech recognition</p>

	<div class="w-full mt-8">
	<div class="speech-bubble">
	<p id="recognizedText" class="text-lg text-gray-800 min-h-24">
	Your speech will appear here...
	</p>
	</div>
	</div>

	<div class="w-full mt-6">
	<h3 class="font-semibold text-gray-700 mb-2">Recognition History</h3>
	<div id="historyContainer" class="bg-gray-50 rounded-lg p-4 h-40 overflow-y-auto scrollbar-thin">
	<p class="text-gray-500 text-sm">No history yet...</p>
	</div>
	</div>
	</div>
	</div>

	<!-- Text to Speech Panel -->
	<div id="textToSpeechPanel" class="hidden p-6">
	<div class="flex flex-col md:flex-row gap-6">
	<div class="flex-1">
	<label for="textInput" class="block text-gray-700 font-medium mb-2">Enter text to speak:</label>
	<textarea id="textInput" class="w-full h-40 p-4 border border-gray-300 rounded-lg focus:ring-2 focus:ring-primary focus:border-transparent" placeholder="Type something here...">Hello! This is a demonstration of text to speech conversion. You can type anything here and click the play button to hear it spoken aloud.</textarea>
	<div class="mt-4 flex items-center">
	<label for="voiceSelect" class="mr-2 text-gray-700">Voice:</label>
	<select id="voiceSelect" class="border border-gray-300 rounded p-2 flex-1"></select>
	<button id="playText" class="ml-4 bg-primary hover:bg-secondary text-white px-6 py-2 rounded-lg transition">
	<i class="fas fa-play mr-2"></i>Play
	</button>
	</div>
	</div>
	<div class="flex-1">
	<div class="bg-blue-50 rounded-xl p-6 h-64 flex flex-col justify-center items-center">
	<div class="text-center">
	<i class="fas fa-volume-up text-primary text-5xl mb-4"></i>
	<h3 class="text-xl font-semibold text-gray-800">Text to Speech</h3>
	<p class="text-gray-600 mt-2">Enter text and click play to hear it spoken</p>
	</div>
	</div>
	</div>
	</div>
	</div>
	</div>

	<!-- Info Section -->
	<div class="mt-8 bg-white rounded-2xl shadow-lg p-6">
	<h2 class="text-xl font-bold text-dark mb-4">How It Works</h2>
	<div class="grid grid-cols-1 md:grid-cols-3 gap-4">
	<div class="bg-blue-50 p-4 rounded-lg">
	<div class="text-primary text-2xl mb-2"><i class="fas fa-microphone"></i></div>
	<h3 class="font-semibold text-gray-800">Speech Recognition</h3>
	<p class="text-gray-600 text-sm mt-1">Speak into your microphone and see your words appear in real-time. For offline Czech support, ensure you have Czech language pack installed in your device settings.</p>
	</div>
	<div class="bg-indigo-50 p-4 rounded-lg">
	<div class="text-primary text-2xl mb-2"><i class="fas fa-volume-up"></i></div>
	<h3 class="font-semibold text-gray-800">Text to Speech</h3>
	<p class="text-gray-600 text-sm mt-1">Convert any text to speech with natural sounding voices. Czech voice support depends on your system's available voices.</p>
	</div>
	<div class="bg-blue-50 p-4 rounded-lg">
	<div class="text-primary text-2xl mb-2"><i class="fas fa-shield-alt"></i></div>
	<h3 class="font-semibold text-gray-800">Privacy Focused</h3>
	<p class="text-gray-600 text-sm mt-1">All processing happens locally in your browser - no data leaves your device. Works offline when language packs are installed.</p>
	</div>
	</div>
	</div>

	<!-- Footer -->
	<footer class="mt-8 text-center text-gray-600 text-sm">
	<p>Offline Speech Recognition & Synthesis \| Works directly in your browser</p>
	<p class="mt-2 text-xs">For Czech language offline support: Install Czech language pack in your device settings or use a keyboard app with offline voice input capabilities.</p>
	</footer>
	</div>

	<script>
	document.addEventListener('DOMContentLoaded', function() {
	// Tab switching
	const speechToTextTab = document.getElementById('speechToTextTab');
	const textToSpeechTab = document.getElementById('textToSpeechTab');
	const speechToTextPanel = document.getElementById('speechToTextPanel');
	const textToSpeechPanel = document.getElementById('textToSpeechPanel');

	speechToTextTab.addEventListener('click', () => {
	speechToTextTab.classList.add('bg-primary', 'text-white');
	speechToTextTab.classList.remove('bg-gray-100', 'text-gray-600');
	textToSpeechTab.classList.add('bg-gray-100', 'text-gray-600');
	textToSpeechTab.classList.remove('bg-primary', 'text-white');
	speechToTextPanel.classList.remove('hidden');
	textToSpeechPanel.classList.add('hidden');
	});

	textToSpeechTab.addEventListener('click', () => {
	textToSpeechTab.classList.add('bg-primary', 'text-white');
	textToSpeechTab.classList.remove('bg-gray-100', 'text-gray-600');
	speechToTextTab.classList.add('bg-gray-100', 'text-gray-600');
	textToSpeechTab.classList.remove('bg-primary', 'text-white');
	textToSpeechPanel.classList.remove('hidden');
	speechToTextPanel.classList.add('hidden');
	});

	// Speech Recognition
	const startRecognition = document.getElementById('startRecognition');
	const recognizedText = document.getElementById('recognizedText');
	const listeningIndicator = document.getElementById('listeningIndicator');
	const historyContainer = document.getElementById('historyContainer');

	let recognition;
	let isListening = false;

	// Check if SpeechRecognition is available
	if ('webkitSpeechRecognition' in window \|\| 'SpeechRecognition' in window) {
	// Initialize speech recognition
	const SpeechRecognition = window.SpeechRecognition \|\| window.webkitSpeechRecognition;
	recognition = new SpeechRecognition();
	recognition.continuous = true;
	recognition.interimResults = true;
	recognition.lang = 'cs-CZ'; // Set default language to Czech

	recognition.onstart = function() {
	isListening = true;
	startRecognition.classList.add('mic-active');
	startRecognition.classList.remove('mic-inactive');
	listeningIndicator.classList.remove('hidden');
	recognizedText.textContent = "Listening...";
	};

	recognition.onresult = function(event) {
	let interimTranscript = '';
	let finalTranscript = '';

	for (let i = event.resultIndex; i < event.results.length; i++) {
	const transcript = event.results[i][0].transcript;
	if (event.results[i].isFinal) {
	finalTranscript += transcript + ' ';
	} else {
	interimTranscript += transcript;
	}
	}

	recognizedText.textContent = finalTranscript \|\| interimTranscript \|\| "Listening...";

	if (finalTranscript) {
	// Add to history
	addToHistory(finalTranscript);
	}
	};

	recognition.onerror = function(event) {
	console.error('Speech recognition error', event.error);
	recognizedText.textContent = "Error: " + event.error;
	stopListening();
	};

	recognition.onend = function() {
	isListening = false;
	startRecognition.classList.remove('mic-active');
	startRecognition.classList.add('mic-inactive');
	listeningIndicator.classList.add('hidden');
	};

	startRecognition.addEventListener('click', function() {
	if (isListening) {
	recognition.stop();
	} else {
	try {
	recognition.start();
	} catch (e) {
	console.error('Recognition error:', e);
	recognizedText.textContent = "Error starting recognition: " + e.message;
	}
	}
	});
	} else {
	// Speech recognition not supported
	recognizedText.textContent = "Speech recognition is not supported in this browser.";
	startRecognition.disabled = true;
	startRecognition.classList.add('bg-gray-400');
	}

	function stopListening() {
	isListening = false;
	startRecognition.classList.remove('mic-active');
	startRecognition.classList.add('mic-inactive');
	listeningIndicator.classList.add('hidden');
	}

	function addToHistory(text) {
	const historyItem = document.createElement('div');
	historyItem.className = 'history-item p-3 mb-2 bg-white rounded-lg shadow-sm';
	historyItem.innerHTML = `
	<div class="flex items-start">
	<i class="fas fa-comment text-primary mt-1 mr-2"></i>
	<div>
	<p class="text-gray-800">${text}</p>
	<p class="text-xs text-gray-500 mt-1">${new Date().toLocaleTimeString()}</p>
	</div>
	</div>
	`;
	historyContainer.prepend(historyItem);

	// Keep only the last 5 items
	if (historyContainer.children.length > 5) {
	historyContainer.removeChild(historyContainer.lastChild);
	}
	}

	// Text to Speech
	const textInput = document.getElementById('textInput');
	const voiceSelect = document.getElementById('voiceSelect');
	const playText = document.getElementById('playText');

	let voices = [];

	function populateVoiceList() {
	voices = window.speechSynthesis.getVoices();
	voiceSelect.innerHTML = '';

	voices.forEach((voice, i) => {
	const option = document.createElement('option');
	option.textContent = voice.name + ' (' + voice.lang + ')';
	option.setAttribute('data-lang', voice.lang);
	option.setAttribute('data-name', voice.name);
	voiceSelect.appendChild(option);
	});

	// Select a default voice
	voiceSelect.selectedIndex = 0;
	}

	populateVoiceList();
	if (window.speechSynthesis.onvoiceschanged !== undefined) {
	window.speechSynthesis.onvoiceschanged = populateVoiceList;
	}

	playText.addEventListener('click', function() {
	const text = textInput.value;
	if (text) {
	const utterance = new SpeechSynthesisUtterance(text);
	utterance.voice = voices[voiceSelect.selectedIndex];
	window.speechSynthesis.speak(utterance);
	}
	});

	// Initialize with sample history
	setTimeout(() => {
	addToHistory("This is a sample history item");
	addToHistory("Another example of speech recognition");
	}, 1000);
	});
	</script>
	<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=torusvektor/offline-speech-recognition-synthesis" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
	</html>