#!/opt/homebrew/bin/python3.11 """Performance benchmark for ASR processor fix.""" import subprocess import tempfile import os import time import sys import json import statistics def test_small_video(): """Test with small video to establish baseline.""" test_video = "/Users/accusys/test_video/BigBuckBunny_320x180.mp4" if not os.path.exists(test_video): print(f"Small test video not found: {test_video}") return None print(f"Testing small video: {os.path.basename(test_video)}") runs = 3 times = [] for run in range(runs): with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f: output_path = f.name try: cmd = [ "/opt/homebrew/bin/python3.11", "scripts/asr_processor.py", test_video, output_path, "--chunk-duration", "600", ] env = os.environ.copy() env["MOMENTRY_DISABLE_REDIS"] = "1" start = time.time() result = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env=env, ) elapsed = time.time() - start if result.returncode == 0: times.append(elapsed) print( f" Run {run + 1}: {elapsed:.1f}s, return code: {result.returncode}" ) else: print( f" Run {run + 1}: FAILED in {elapsed:.1f}s, return code: {result.returncode}" ) print(f" stderr: {result.stderr[-200:]}") except Exception as e: print(f" Run {run + 1}: ERROR {e}") finally: if os.path.exists(output_path): os.unlink(output_path) if times: avg = statistics.mean(times) std = statistics.stdev(times) if len(times) > 1 else 0 print(f" Average: {avg:.1f}s ± {std:.1f}s") return avg return None def test_large_video_chunk(): """Test a single chunk of large video to measure chunk processing time.""" test_video = "/Users/accusys/test_video/1636719d-c31f-78ac-f1dd-8ab0b0b36c66.mov" if not os.path.exists(test_video): print(f"Large test video not found: {test_video}") return None print(f"\nTesting single chunk from large video: {os.path.basename(test_video)}") # Create a test that processes just the first 10 minutes with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f: output_path = f.name try: cmd = [ "/opt/homebrew/bin/python3.11", "scripts/asr_processor.py", test_video, output_path, "--uuid", "test_perf", "--chunk-duration", "600", # 10 minutes "--max-direct-duration", "300", # Force chunked mode even for short audio ] env = os.environ.copy() env["MOMENTRY_DISABLE_REDIS"] = "1" env["ASR_DEBUG"] = "1" start = time.time() result = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env=env, ) elapsed = time.time() - start if result.returncode == 0: # Parse debug output to get chunk processing times stderr_lines = result.stderr.split("\n") chunk_times = [] for line in stderr_lines: if "Chunk" in line and "segments in" in line: # Example: "Chunk 1/12: 159 segments in 27.5s" try: parts = line.split(" in ") if len(parts) == 2: time_str = parts[1].replace("s", "").strip() chunk_times.append(float(time_str)) except: pass print(f" Total time: {elapsed:.1f}s, return code: {result.returncode}") if chunk_times: print(f" Chunk times: {chunk_times}") print(f" Average chunk time: {statistics.mean(chunk_times):.1f}s") # Check output if os.path.exists(output_path): with open(output_path, "r") as f: data = json.load(f) segments = data.get("segments", []) print(f" Total segments: {len(segments)}") # Calculate processing rate if segments: total_audio_duration = 0 for seg in segments: total_audio_duration = max( total_audio_duration, seg.get("end", 0) ) if total_audio_duration > 0: real_time_factor = elapsed / total_audio_duration print(f" Audio duration: {total_audio_duration:.1f}s") print( f" Real-time factor: {real_time_factor:.2f}x (lower is better)" ) return elapsed else: print(f" FAILED in {elapsed:.1f}s, return code: {result.returncode}") print(f" Last 10 lines of stderr:") for line in result.stderr.split("\n")[-10:]: if line.strip(): print(f" {line}") except Exception as e: print(f" ERROR: {e}") import traceback traceback.print_exc() finally: if os.path.exists(output_path): os.unlink(output_path) return None def test_debug_vs_production(): """Compare debug vs production versions.""" test_video = "/Users/accusys/test_video/BigBuckBunny_320x180.mp4" if not os.path.exists(test_video): return print(f"\nComparing debug vs production versions:") versions = [ ("production", "scripts/asr_processor.py"), ("debug", "scripts/asr_processor_debug.py"), ] results = {} for version_name, script_path in versions: print(f"\n Testing {version_name} version...") with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f: output_path = f.name try: cmd = [ "/opt/homebrew/bin/python3.11", script_path, test_video, output_path, "--chunk-duration", "600", ] env = os.environ.copy() env["MOMENTRY_DISABLE_REDIS"] = "1" if version_name == "debug": env["ASR_DEBUG"] = "1" start = time.time() result = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env=env, ) elapsed = time.time() - start if result.returncode == 0: results[version_name] = elapsed print(f" Time: {elapsed:.1f}s") # Count debug lines if version_name == "debug": debug_lines = [ l for l in result.stderr.split("\n") if "ASR_DEBUG" in l ] print(f" Debug lines: {len(debug_lines)}") else: print(f" FAILED: {elapsed:.1f}s, return code: {result.returncode}") except Exception as e: print(f" ERROR: {e}") finally: if os.path.exists(output_path): os.unlink(output_path) # Compare results if "production" in results and "debug" in results: prod_time = results["production"] debug_time = results["debug"] overhead = ((prod_time - debug_time) / debug_time) * 100 print(f"\n Comparison:") print(f" Production: {prod_time:.1f}s") print(f" Debug: {debug_time:.1f}s") print(f" Overhead: {overhead:.1f}%") if overhead <= 5: print(f" ✅ Within 5% overhead limit") else: print(f" ⚠️ Exceeds 5% overhead limit") def main(): print("ASR Processor Performance Benchmark") print("=" * 60) # Test 1: Baseline with small video baseline = test_small_video() # Test 2: Large video chunk performance large_chunk_time = test_large_video_chunk() # Test 3: Debug vs production comparison test_debug_vs_production() print("\n" + "=" * 60) print("Summary:") if baseline: print(f" Small video baseline: {baseline:.1f}s") if large_chunk_time: print(f" Large video chunk: {large_chunk_time:.1f}s") print("\nNext steps:") print(" 1. Complete end-to-end test with full 2.2GB video") print(" 2. Verify chunk merging logic works correctly") print(" 3. Test with Redis enabled (if needed)") print(" 4. Integrate with Rust processor wrapper") if __name__ == "__main__": main()