# PaitentVoiceToText.py import torch from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline # ------------------- # 1️⃣ Detect device # ------------------- use_cuda = torch.cuda.is_available() dtype = torch.float16 if use_cuda else torch.float32 print(f"🌟 Using {'GPU' if use_cuda else 'CPU'}, dtype={dtype}") # ------------------- # 2️⃣ Load Whisper model # ------------------- hub_id = "Muhammadidrees/WispherVOICE" print("⏳ Loading model...") model = AutoModelForSpeechSeq2Seq.from_pretrained( hub_id, torch_dtype=dtype, device_map="auto", # accelerate manages device placement trust_remote_code=True ) processor = AutoProcessor.from_pretrained( hub_id, trust_remote_code=True ) # ------------------- # 3️⃣ Create pipeline (no device argument!) # ------------------- pipe = pipeline( "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor ) print("🎧 Whisper pipeline ready.") # ------------------- # 4️⃣ Function for external import # ------------------- def record_and_transcribe(audio_file): """ Transcribe an audio file (path) or recording. Returns the transcribed text. """ if audio_file is None: return "No audio provided." result = pipe(audio_file) return result["text"]