#!/usr/bin/env python3 """ Test transcription on extracted audio. """ import sys import time from pathlib import Path # Add scripts directory to path sys.path.insert(0, str(Path(__file__).parent / "scripts")) def test_transcribe(audio_path): """Test transcription using faster_whisper.""" print(f"Testing transcription on: {audio_path}") print(f"Size: {Path(audio_path).stat().st_size / 1024 / 1024:.1f} MB") # Import faster_whisper try: from faster_whisper import WhisperModel except ImportError as e: print(f"Failed to import faster_whisper: {e}") return False # Load model print("Loading Whisper model (tiny, cpu, int8)...") start = time.time() try: model = WhisperModel("tiny", device="cpu", compute_type="int8") load_time = time.time() - start print(f"Model loaded in {load_time:.1f}s") except Exception as e: print(f"Model loading failed: {e}") return False # Transcribe first 30 seconds print("Transcribing first 30 seconds...") start = time.time() try: segments, info = model.transcribe(audio_path, beam_size=5) # Convert generator to list to force processing segments = list(segments) transcribe_time = time.time() - start print(f"Transcription completed in {transcribe_time:.1f}s") print( f"Detected language: {info.language} (prob {info.language_probability:.2f})" ) print(f"Segments found: {len(segments)}") for i, seg in enumerate(segments[:3]): print(f" [{i}] {seg.start:.1f}-{seg.end:.1f}: {seg.text[:80]}...") return True except Exception as e: print(f"Transcription failed: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": if len(sys.argv) != 2: print(f"Usage: {sys.argv[0]} ") sys.exit(1) success = test_transcribe(sys.argv[1]) sys.exit(0 if success else 1)