#!/usr/bin/env python3 """Test ASR processor on a 10-minute clip from the large problematic video.""" import sys import os import subprocess import json import tempfile import time import shutil # Paths LARGE_VIDEO = "../test_video/1636719d-c31f-78ac-f1dd-8ab0b0b36c66.mov" if not os.path.exists(LARGE_VIDEO): print(f"Large video not found: {LARGE_VIDEO}") sys.exit(1) print(f"Large video size: {os.path.getsize(LARGE_VIDEO) / (1024**3):.2f} GB") # Create temporary directory temp_dir = tempfile.mkdtemp(prefix="asr_test_") clip_path = os.path.join(temp_dir, "clip.mp4") output_path = os.path.join(temp_dir, "output.json") try: # Extract 10-minute clip (600 seconds) starting at 0:00 print("Extracting 10-minute clip...") ffmpeg_cmd = [ "ffmpeg", "-i", LARGE_VIDEO, "-ss", "0", "-t", "600", # 10 minutes "-c", "copy", "-y", clip_path, ] result = subprocess.run(ffmpeg_cmd, capture_output=True) if result.returncode != 0: print(f"FFmpeg failed: {result.stderr.decode()}") sys.exit(1) if not os.path.exists(clip_path): print("Clip not created") sys.exit(1) print( f"Clip created: {clip_path} ({os.path.getsize(clip_path) / (1024**2):.1f} MB)" ) # Run ASR processor on clip with chunked mode forced (set max_direct_duration=300) env = os.environ.copy() env["MOMENTRY_ASR_MAX_DIRECT_DURATION"] = "300" # 5 minutes, force chunked env["MOMENTRY_ASR_CHUNK_DURATION"] = "120" # 2-minute chunks for testing env["MOMENTRY_ASR_MODEL_SIZE"] = "tiny" env["MOMENTRY_ASR_COMPUTE_TYPE"] = "int8" cmd = [ "/opt/homebrew/bin/python3.11", "scripts/asr_processor.py", clip_path, output_path, "--uuid", "test_large", ] print(f"Running ASR processor with forced chunked mode...") print(f"Command: {' '.join(cmd)}") start = time.time() proc = subprocess.run( cmd, capture_output=True, text=True, env=env, timeout=900 ) # 15 min timeout elapsed = time.time() - start print(f"ASR completed in {elapsed:.1f}s") print(f"Return code: {proc.returncode}") if proc.stdout: print(f"STDOUT:\n{proc.stdout}") if proc.stderr: print(f"STDERR:\n{proc.stderr}") # Check output if os.path.exists(output_path): with open(output_path, "r") as f: data = json.load(f) segments = data.get("segments", []) print(f"Output contains {len(segments)} segments") print( f"Language: {data.get('language')} (prob {data.get('language_probability')})" ) print(f"Processing mode: {data.get('processing_mode', 'unknown')}") if segments: print(f"First segment: {segments[0]}") # Verify timestamps are correct (should be within 0-600s) for seg in segments[:5]: if seg["start"] < 0 or seg["end"] > 600: print(f"WARNING: segment outside clip range: {seg}") else: print("ERROR: Output file not created") sys.exit(1) except subprocess.TimeoutExpired: print("ERROR: ASR processing timed out after 900 seconds") sys.exit(1) except Exception as e: print(f"ERROR: {e}") import traceback traceback.print_exc() sys.exit(1) finally: # Clean up print(f"Cleaning up {temp_dir}") shutil.rmtree(temp_dir, ignore_errors=True) print("Done.")