torusvektor commited on
Commit
24678c3
·
verified ·
1 Parent(s): 0253f5f

potrebuji aby se to dalo pouzit offline, abych bud mohl pozadovany jazyk CS-čestina pridat do offline reci v mobilu pri volbe klavesnice a hlasoveho zadavani google offline nebo mel moznost nainstalovat novou klavesnici s podporou offline hlasoveho zadavani s moznosti i českeho jazyka cs-CZ - Initial Deployment

Browse files
Files changed (2) hide show
  1. README.md +7 -5
  2. index.html +372 -19
README.md CHANGED
@@ -1,10 +1,12 @@
1
  ---
2
- title: Offline Speech Recognition Synthesis
3
- emoji: 📊
4
- colorFrom: gray
5
- colorTo: purple
6
  sdk: static
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: offline-speech-recognition-synthesis
3
+ emoji: 🐳
4
+ colorFrom: pink
5
+ colorTo: yellow
6
  sdk: static
7
  pinned: false
8
+ tags:
9
+ - deepsite
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
index.html CHANGED
@@ -1,19 +1,372 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Offline Speech Recognition & Synthesis</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
9
+ <script>
10
+ tailwind.config = {
11
+ theme: {
12
+ extend: {
13
+ colors: {
14
+ primary: '#3b82f6',
15
+ secondary: '#1e40af',
16
+ dark: '#0f172a',
17
+ light: '#f8fafc'
18
+ }
19
+ }
20
+ }
21
+ }
22
+ </script>
23
+ <style>
24
+ .speech-bubble {
25
+ position: relative;
26
+ background: #e0f2ff;
27
+ border-radius: 12px;
28
+ padding: 20px;
29
+ margin: 20px 0;
30
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
31
+ }
32
+ .speech-bubble:after {
33
+ content: '';
34
+ position: absolute;
35
+ bottom: -15px;
36
+ left: 50px;
37
+ border-width: 15px 15px 0;
38
+ border-style: solid;
39
+ border-color: #e0f2ff transparent transparent;
40
+ display: block;
41
+ width: 0;
42
+ }
43
+ .pulse {
44
+ animation: pulse 1.5s infinite;
45
+ }
46
+ @keyframes pulse {
47
+ 0% {
48
+ box-shadow: 0 0 0 0 rgba(59, 130, 246, 0.4);
49
+ }
50
+ 70% {
51
+ box-shadow: 0 0 0 10px rgba(59, 130, 246, 0);
52
+ }
53
+ 100% {
54
+ box-shadow: 0 0 0 0 rgba(59, 130, 246, 0);
55
+ }
56
+ }
57
+ .mic-icon {
58
+ transition: all 0.3s ease;
59
+ }
60
+ .mic-active {
61
+ color: #ef4444;
62
+ transform: scale(1.1);
63
+ }
64
+ .mic-inactive {
65
+ color: #94a3b8;
66
+ }
67
+ .history-item {
68
+ transition: all 0.3s ease;
69
+ }
70
+ .history-item:hover {
71
+ background-color: #f1f5f9;
72
+ }
73
+ .scrollbar-thin::-webkit-scrollbar {
74
+ width: 6px;
75
+ }
76
+ .scrollbar-thin::-webkit-scrollbar-track {
77
+ background: #f1f5f9;
78
+ border-radius: 10px;
79
+ }
80
+ .scrollbar-thin::-webkit-scrollbar-thumb {
81
+ background: #cbd5e1;
82
+ border-radius: 10px;
83
+ }
84
+ .scrollbar-thin::-webkit-scrollbar-thumb:hover {
85
+ background: #94a3b8;
86
+ }
87
+ </style>
88
+ </head>
89
+ <body class="bg-gradient-to-br from-blue-50 to-indigo-100 min-h-screen p-4 md:p-8">
90
+ <div class="max-w-4xl mx-auto">
91
+ <!-- Header -->
92
+ <header class="text-center py-6 mb-8">
93
+ <h1 class="text-3xl md:text-4xl font-bold text-dark">Offline Speech Recognition & Synthesis</h1>
94
+ <p class="text-gray-600 mt-2">Convert speech to text and text to speech directly in your browser</p>
95
+ </header>
96
+
97
+ <!-- Main Container -->
98
+ <div class="bg-white rounded-2xl shadow-xl overflow-hidden">
99
+ <!-- Tabs -->
100
+ <div class="flex border-b">
101
+ <button id="speechToTextTab" class="flex-1 py-4 px-6 text-center font-medium bg-primary text-white">
102
+ <i class="fas fa-microphone mr-2"></i>Speech to Text
103
+ </button>
104
+ <button id="textToSpeechTab" class="flex-1 py-4 px-6 text-center font-medium bg-gray-100 text-gray-600 hover:bg-gray-200">
105
+ <i class="fas fa-volume-up mr-2"></i>Text to Speech
106
+ </button>
107
+ </div>
108
+
109
+ <!-- Speech to Text Panel -->
110
+ <div id="speechToTextPanel" class="p-6">
111
+ <div class="flex flex-col items-center justify-center py-8">
112
+ <div class="relative">
113
+ <button id="startRecognition" class="mic-icon pulse bg-primary rounded-full p-6 text-white hover:bg-secondary transition-all">
114
+ <i class="fas fa-microphone text-4xl"></i>
115
+ </button>
116
+ <div id="listeningIndicator" class="absolute -top-2 -right-2 bg-red-500 text-white text-xs px-2 py-1 rounded-full hidden">
117
+ Listening...
118
+ </div>
119
+ </div>
120
+ <p class="mt-4 text-gray-600 text-center">Click the microphone to start speech recognition</p>
121
+
122
+ <div class="w-full mt-8">
123
+ <div class="speech-bubble">
124
+ <p id="recognizedText" class="text-lg text-gray-800 min-h-24">
125
+ Your speech will appear here...
126
+ </p>
127
+ </div>
128
+ </div>
129
+
130
+ <div class="w-full mt-6">
131
+ <h3 class="font-semibold text-gray-700 mb-2">Recognition History</h3>
132
+ <div id="historyContainer" class="bg-gray-50 rounded-lg p-4 h-40 overflow-y-auto scrollbar-thin">
133
+ <p class="text-gray-500 text-sm">No history yet...</p>
134
+ </div>
135
+ </div>
136
+ </div>
137
+ </div>
138
+
139
+ <!-- Text to Speech Panel -->
140
+ <div id="textToSpeechPanel" class="hidden p-6">
141
+ <div class="flex flex-col md:flex-row gap-6">
142
+ <div class="flex-1">
143
+ <label for="textInput" class="block text-gray-700 font-medium mb-2">Enter text to speak:</label>
144
+ <textarea id="textInput" class="w-full h-40 p-4 border border-gray-300 rounded-lg focus:ring-2 focus:ring-primary focus:border-transparent" placeholder="Type something here...">Hello! This is a demonstration of text to speech conversion. You can type anything here and click the play button to hear it spoken aloud.</textarea>
145
+ <div class="mt-4 flex items-center">
146
+ <label for="voiceSelect" class="mr-2 text-gray-700">Voice:</label>
147
+ <select id="voiceSelect" class="border border-gray-300 rounded p-2 flex-1"></select>
148
+ <button id="playText" class="ml-4 bg-primary hover:bg-secondary text-white px-6 py-2 rounded-lg transition">
149
+ <i class="fas fa-play mr-2"></i>Play
150
+ </button>
151
+ </div>
152
+ </div>
153
+ <div class="flex-1">
154
+ <div class="bg-blue-50 rounded-xl p-6 h-64 flex flex-col justify-center items-center">
155
+ <div class="text-center">
156
+ <i class="fas fa-volume-up text-primary text-5xl mb-4"></i>
157
+ <h3 class="text-xl font-semibold text-gray-800">Text to Speech</h3>
158
+ <p class="text-gray-600 mt-2">Enter text and click play to hear it spoken</p>
159
+ </div>
160
+ </div>
161
+ </div>
162
+ </div>
163
+ </div>
164
+ </div>
165
+
166
+ <!-- Info Section -->
167
+ <div class="mt-8 bg-white rounded-2xl shadow-lg p-6">
168
+ <h2 class="text-xl font-bold text-dark mb-4">How It Works</h2>
169
+ <div class="grid grid-cols-1 md:grid-cols-3 gap-4">
170
+ <div class="bg-blue-50 p-4 rounded-lg">
171
+ <div class="text-primary text-2xl mb-2"><i class="fas fa-microphone"></i></div>
172
+ <h3 class="font-semibold text-gray-800">Speech Recognition</h3>
173
+ <p class="text-gray-600 text-sm mt-1">Speak into your microphone and see your words appear in real-time. For offline Czech support, ensure you have Czech language pack installed in your device settings.</p>
174
+ </div>
175
+ <div class="bg-indigo-50 p-4 rounded-lg">
176
+ <div class="text-primary text-2xl mb-2"><i class="fas fa-volume-up"></i></div>
177
+ <h3 class="font-semibold text-gray-800">Text to Speech</h3>
178
+ <p class="text-gray-600 text-sm mt-1">Convert any text to speech with natural sounding voices. Czech voice support depends on your system's available voices.</p>
179
+ </div>
180
+ <div class="bg-blue-50 p-4 rounded-lg">
181
+ <div class="text-primary text-2xl mb-2"><i class="fas fa-shield-alt"></i></div>
182
+ <h3 class="font-semibold text-gray-800">Privacy Focused</h3>
183
+ <p class="text-gray-600 text-sm mt-1">All processing happens locally in your browser - no data leaves your device. Works offline when language packs are installed.</p>
184
+ </div>
185
+ </div>
186
+ </div>
187
+
188
+ <!-- Footer -->
189
+ <footer class="mt-8 text-center text-gray-600 text-sm">
190
+ <p>Offline Speech Recognition & Synthesis | Works directly in your browser</p>
191
+ <p class="mt-2 text-xs">For Czech language offline support: Install Czech language pack in your device settings or use a keyboard app with offline voice input capabilities.</p>
192
+ </footer>
193
+ </div>
194
+
195
+ <script>
196
+ document.addEventListener('DOMContentLoaded', function() {
197
+ // Tab switching
198
+ const speechToTextTab = document.getElementById('speechToTextTab');
199
+ const textToSpeechTab = document.getElementById('textToSpeechTab');
200
+ const speechToTextPanel = document.getElementById('speechToTextPanel');
201
+ const textToSpeechPanel = document.getElementById('textToSpeechPanel');
202
+
203
+ speechToTextTab.addEventListener('click', () => {
204
+ speechToTextTab.classList.add('bg-primary', 'text-white');
205
+ speechToTextTab.classList.remove('bg-gray-100', 'text-gray-600');
206
+ textToSpeechTab.classList.add('bg-gray-100', 'text-gray-600');
207
+ textToSpeechTab.classList.remove('bg-primary', 'text-white');
208
+ speechToTextPanel.classList.remove('hidden');
209
+ textToSpeechPanel.classList.add('hidden');
210
+ });
211
+
212
+ textToSpeechTab.addEventListener('click', () => {
213
+ textToSpeechTab.classList.add('bg-primary', 'text-white');
214
+ textToSpeechTab.classList.remove('bg-gray-100', 'text-gray-600');
215
+ speechToTextTab.classList.add('bg-gray-100', 'text-gray-600');
216
+ textToSpeechTab.classList.remove('bg-primary', 'text-white');
217
+ textToSpeechPanel.classList.remove('hidden');
218
+ speechToTextPanel.classList.add('hidden');
219
+ });
220
+
221
+ // Speech Recognition
222
+ const startRecognition = document.getElementById('startRecognition');
223
+ const recognizedText = document.getElementById('recognizedText');
224
+ const listeningIndicator = document.getElementById('listeningIndicator');
225
+ const historyContainer = document.getElementById('historyContainer');
226
+
227
+ let recognition;
228
+ let isListening = false;
229
+
230
+ // Check if SpeechRecognition is available
231
+ if ('webkitSpeechRecognition' in window || 'SpeechRecognition' in window) {
232
+ // Initialize speech recognition
233
+ const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
234
+ recognition = new SpeechRecognition();
235
+ recognition.continuous = true;
236
+ recognition.interimResults = true;
237
+ recognition.lang = 'cs-CZ'; // Set default language to Czech
238
+
239
+ recognition.onstart = function() {
240
+ isListening = true;
241
+ startRecognition.classList.add('mic-active');
242
+ startRecognition.classList.remove('mic-inactive');
243
+ listeningIndicator.classList.remove('hidden');
244
+ recognizedText.textContent = "Listening...";
245
+ };
246
+
247
+ recognition.onresult = function(event) {
248
+ let interimTranscript = '';
249
+ let finalTranscript = '';
250
+
251
+ for (let i = event.resultIndex; i < event.results.length; i++) {
252
+ const transcript = event.results[i][0].transcript;
253
+ if (event.results[i].isFinal) {
254
+ finalTranscript += transcript + ' ';
255
+ } else {
256
+ interimTranscript += transcript;
257
+ }
258
+ }
259
+
260
+ recognizedText.textContent = finalTranscript || interimTranscript || "Listening...";
261
+
262
+ if (finalTranscript) {
263
+ // Add to history
264
+ addToHistory(finalTranscript);
265
+ }
266
+ };
267
+
268
+ recognition.onerror = function(event) {
269
+ console.error('Speech recognition error', event.error);
270
+ recognizedText.textContent = "Error: " + event.error;
271
+ stopListening();
272
+ };
273
+
274
+ recognition.onend = function() {
275
+ isListening = false;
276
+ startRecognition.classList.remove('mic-active');
277
+ startRecognition.classList.add('mic-inactive');
278
+ listeningIndicator.classList.add('hidden');
279
+ };
280
+
281
+ startRecognition.addEventListener('click', function() {
282
+ if (isListening) {
283
+ recognition.stop();
284
+ } else {
285
+ try {
286
+ recognition.start();
287
+ } catch (e) {
288
+ console.error('Recognition error:', e);
289
+ recognizedText.textContent = "Error starting recognition: " + e.message;
290
+ }
291
+ }
292
+ });
293
+ } else {
294
+ // Speech recognition not supported
295
+ recognizedText.textContent = "Speech recognition is not supported in this browser.";
296
+ startRecognition.disabled = true;
297
+ startRecognition.classList.add('bg-gray-400');
298
+ }
299
+
300
+ function stopListening() {
301
+ isListening = false;
302
+ startRecognition.classList.remove('mic-active');
303
+ startRecognition.classList.add('mic-inactive');
304
+ listeningIndicator.classList.add('hidden');
305
+ }
306
+
307
+ function addToHistory(text) {
308
+ const historyItem = document.createElement('div');
309
+ historyItem.className = 'history-item p-3 mb-2 bg-white rounded-lg shadow-sm';
310
+ historyItem.innerHTML = `
311
+ <div class="flex items-start">
312
+ <i class="fas fa-comment text-primary mt-1 mr-2"></i>
313
+ <div>
314
+ <p class="text-gray-800">${text}</p>
315
+ <p class="text-xs text-gray-500 mt-1">${new Date().toLocaleTimeString()}</p>
316
+ </div>
317
+ </div>
318
+ `;
319
+ historyContainer.prepend(historyItem);
320
+
321
+ // Keep only the last 5 items
322
+ if (historyContainer.children.length > 5) {
323
+ historyContainer.removeChild(historyContainer.lastChild);
324
+ }
325
+ }
326
+
327
+ // Text to Speech
328
+ const textInput = document.getElementById('textInput');
329
+ const voiceSelect = document.getElementById('voiceSelect');
330
+ const playText = document.getElementById('playText');
331
+
332
+ let voices = [];
333
+
334
+ function populateVoiceList() {
335
+ voices = window.speechSynthesis.getVoices();
336
+ voiceSelect.innerHTML = '';
337
+
338
+ voices.forEach((voice, i) => {
339
+ const option = document.createElement('option');
340
+ option.textContent = voice.name + ' (' + voice.lang + ')';
341
+ option.setAttribute('data-lang', voice.lang);
342
+ option.setAttribute('data-name', voice.name);
343
+ voiceSelect.appendChild(option);
344
+ });
345
+
346
+ // Select a default voice
347
+ voiceSelect.selectedIndex = 0;
348
+ }
349
+
350
+ populateVoiceList();
351
+ if (window.speechSynthesis.onvoiceschanged !== undefined) {
352
+ window.speechSynthesis.onvoiceschanged = populateVoiceList;
353
+ }
354
+
355
+ playText.addEventListener('click', function() {
356
+ const text = textInput.value;
357
+ if (text) {
358
+ const utterance = new SpeechSynthesisUtterance(text);
359
+ utterance.voice = voices[voiceSelect.selectedIndex];
360
+ window.speechSynthesis.speak(utterance);
361
+ }
362
+ });
363
+
364
+ // Initialize with sample history
365
+ setTimeout(() => {
366
+ addToHistory("This is a sample history item");
367
+ addToHistory("Another example of speech recognition");
368
+ }, 1000);
369
+ });
370
+ </script>
371
+ <p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=torusvektor/offline-speech-recognition-synthesis" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
372
+ </html>