Spaces:
Sleeping
Sleeping
| # PaitentVoiceToText.py | |
| import torch | |
| from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
| # ------------------- | |
| # 1️⃣ Detect device | |
| # ------------------- | |
| use_cuda = torch.cuda.is_available() | |
| dtype = torch.float16 if use_cuda else torch.float32 | |
| print(f"🌟 Using {'GPU' if use_cuda else 'CPU'}, dtype={dtype}") | |
| # ------------------- | |
| # 2️⃣ Load Whisper model | |
| # ------------------- | |
| hub_id = "Muhammadidrees/WispherVOICE" | |
| print("⏳ Loading model...") | |
| model = AutoModelForSpeechSeq2Seq.from_pretrained( | |
| hub_id, | |
| torch_dtype=dtype, | |
| device_map="auto", # accelerate manages device placement | |
| trust_remote_code=True | |
| ) | |
| processor = AutoProcessor.from_pretrained( | |
| hub_id, | |
| trust_remote_code=True | |
| ) | |
| # ------------------- | |
| # 3️⃣ Create pipeline (no device argument!) | |
| # ------------------- | |
| pipe = pipeline( | |
| "automatic-speech-recognition", | |
| model=model, | |
| tokenizer=processor.tokenizer, | |
| feature_extractor=processor.feature_extractor | |
| ) | |
| print("🎧 Whisper pipeline ready.") | |
| # ------------------- | |
| # 4️⃣ Function for external import | |
| # ------------------- | |
| def record_and_transcribe(audio_file): | |
| """ | |
| Transcribe an audio file (path) or recording. | |
| Returns the transcribed text. | |
| """ | |
| if audio_file is None: | |
| return "No audio provided." | |
| result = pipe(audio_file) | |
| return result["text"] | |