Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import librosa | |
| import numpy as np | |
| from transformers import AutoProcessor, AutoModelForCTC | |
| # Load model and processor | |
| print("Loading model...") | |
| processor = AutoProcessor.from_pretrained("HAMMALE/mms-darija-finetuned") | |
| model = AutoModelForCTC.from_pretrained("HAMMALE/mms-darija-finetuned") | |
| def transcribe_audio(audio_file): | |
| try: | |
| # Load audio | |
| if audio_file is None: | |
| return "Please upload an audio file." | |
| # Load and preprocess audio | |
| audio, sr = librosa.load(audio_file, sr=16000) | |
| # Handle very short audio | |
| if len(audio) < 1600: # Less than 0.1 seconds | |
| return "Audio too short. Please upload a longer audio file." | |
| # Process with model | |
| inputs = processor(audio, sampling_rate=16000, return_tensors="pt") | |
| # Inference | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| transcription = processor.batch_decode(predicted_ids)[0] | |
| return transcription if transcription.strip() else "No transcription generated." | |
| except Exception as e: | |
| return f"Error processing audio: {str(e)}" | |
| # Create Gradio interface | |
| demo = gr.Interface( | |
| fn=transcribe_audio, | |
| inputs=gr.Audio(type="filepath", label="Upload Darija Audio"), | |
| outputs=gr.Textbox(label="Transcription", placeholder="Transcription will appear here..."), | |
| title="🎤 Darija Speech Recognition", | |
| description="Upload an audio file in Moroccan Arabic (Darija) and get the transcription. This model was fine-tuned on the Darija Bible dataset.", | |
| article="Model: [HAMMALE/mms-darija-finetuned](https://huggingface.co/HAMMALE/mms-darija-finetuned)", | |
| examples=[ | |
| # You can add example audio files here if you have them | |
| ], | |
| cache_examples=False, | |
| theme=gr.themes.Soft() | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |