#!/usr/bin/env python3 """ Debug ASR processing stages for large video. """ import os import sys import time import subprocess import tempfile import json from pathlib import Path def run_ffmpeg_extract(video_path, audio_path): """Extract audio using ffmpeg.""" cmd = [ "ffmpeg", "-i", str(video_path), "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", "-y", str(audio_path), ] print(f"Running ffmpeg: {' '.join(cmd)}") start = time.time() proc = subprocess.run(cmd, capture_output=True, text=True) elapsed = time.time() - start print(f"ffmpeg completed in {elapsed:.1f}s, return code: {proc.returncode}") if proc.returncode != 0: print(f"stderr: {proc.stderr[:500]}") return proc.returncode == 0, elapsed def test_asr_stages(video_path): """Test ASR stages step by step.""" video_path = Path(video_path) print(f"Testing video: {video_path}") print(f"Size: {video_path.stat().st_size / 1024 / 1024:.1f} MB") # Stage 1: Check audio streams print("\n=== Stage 1: Check audio streams ===") cmd = [ "ffprobe", "-v", "error", "-select_streams", "a", "-show_entries", "stream=codec_name,channels,sample_rate,duration", "-of", "csv=p=0", str(video_path), ] proc = subprocess.run(cmd, capture_output=True, text=True) print(f"Audio streams: {proc.stdout.strip()}") # Stage 2: Extract audio print("\n=== Stage 2: Extract audio ===") with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: audio_path = f.name try: success, extract_time = run_ffmpeg_extract(video_path, audio_path) if success: print(f"Audio extracted to {audio_path}") print(f"Audio size: {Path(audio_path).stat().st_size / 1024 / 1024:.1f} MB") else: print("Audio extraction failed") os.unlink(audio_path) return except Exception as e: print(f"Error extracting audio: {e}") return # Stage 3: Load faster_whisper model (just import) print("\n=== Stage 3: Test faster_whisper import ===") try: start = time.time() from faster_whisper import WhisperModel elapsed = time.time() - start print(f"Import faster_whisper: {elapsed:.1f}s") except Exception as e: print(f"Import failed: {e}") os.unlink(audio_path) return # Stage 4: Transcribe a small segment (first 30 seconds) print("\n=== Stage 4: Transcribe first 30 seconds ===") try: # Trim audio to first 30 seconds trim_path = audio_path + ".trim.wav" cmd = [ "ffmpeg", "-i", audio_path, "-t", "30", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", "-y", trim_path, ] subprocess.run(cmd, capture_output=True) # Load model with small model start = time.time() model = WhisperModel("tiny", device="cpu", compute_type="int8") load_time = time.time() - start print(f"Model loaded in {load_time:.1f}s") # Transcribe start = time.time() segments, info = model.transcribe(trim_path, beam_size=5) segments = list(segments) # Force processing transcribe_time = time.time() - start print(f"Transcription of 30s audio: {transcribe_time:.1f}s") print( f"Detected language: {info.language} with probability {info.language_probability}" ) print(f"Segments found: {len(segments)}") # Cleanup os.unlink(trim_path) except Exception as e: print(f"Transcription test failed: {e}") import traceback traceback.print_exc() finally: os.unlink(audio_path) print("\n=== Debug complete ===") if __name__ == "__main__": if len(sys.argv) != 2: print(f"Usage: {sys.argv[0]} ") sys.exit(1) test_asr_stages(sys.argv[1])