Files
momentry_core/test_performance.py
Warren b54c2def30 feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid
- Add test scripts for ASR, face, search, processing
- Add portal frontend (Tauri)
- Add config, benchmark, and monitoring utilities
- Add model checkpoints and pretrained model references
2026-04-30 15:11:53 +08:00

291 lines
9.0 KiB
Python

#!/opt/homebrew/bin/python3.11
"""Performance benchmark for ASR processor fix."""
import subprocess
import tempfile
import os
import time
import sys
import json
import statistics
def test_small_video():
"""Test with small video to establish baseline."""
test_video = "/Users/accusys/test_video/BigBuckBunny_320x180.mp4"
if not os.path.exists(test_video):
print(f"Small test video not found: {test_video}")
return None
print(f"Testing small video: {os.path.basename(test_video)}")
runs = 3
times = []
for run in range(runs):
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
output_path = f.name
try:
cmd = [
"/opt/homebrew/bin/python3.11",
"scripts/asr_processor.py",
test_video,
output_path,
"--chunk-duration",
"600",
]
env = os.environ.copy()
env["MOMENTRY_DISABLE_REDIS"] = "1"
start = time.time()
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
env=env,
)
elapsed = time.time() - start
if result.returncode == 0:
times.append(elapsed)
print(
f" Run {run + 1}: {elapsed:.1f}s, return code: {result.returncode}"
)
else:
print(
f" Run {run + 1}: FAILED in {elapsed:.1f}s, return code: {result.returncode}"
)
print(f" stderr: {result.stderr[-200:]}")
except Exception as e:
print(f" Run {run + 1}: ERROR {e}")
finally:
if os.path.exists(output_path):
os.unlink(output_path)
if times:
avg = statistics.mean(times)
std = statistics.stdev(times) if len(times) > 1 else 0
print(f" Average: {avg:.1f}s ± {std:.1f}s")
return avg
return None
def test_large_video_chunk():
"""Test a single chunk of large video to measure chunk processing time."""
test_video = "/Users/accusys/test_video/1636719d-c31f-78ac-f1dd-8ab0b0b36c66.mov"
if not os.path.exists(test_video):
print(f"Large test video not found: {test_video}")
return None
print(f"\nTesting single chunk from large video: {os.path.basename(test_video)}")
# Create a test that processes just the first 10 minutes
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
output_path = f.name
try:
cmd = [
"/opt/homebrew/bin/python3.11",
"scripts/asr_processor.py",
test_video,
output_path,
"--uuid",
"test_perf",
"--chunk-duration",
"600", # 10 minutes
"--max-direct-duration",
"300", # Force chunked mode even for short audio
]
env = os.environ.copy()
env["MOMENTRY_DISABLE_REDIS"] = "1"
env["ASR_DEBUG"] = "1"
start = time.time()
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
env=env,
)
elapsed = time.time() - start
if result.returncode == 0:
# Parse debug output to get chunk processing times
stderr_lines = result.stderr.split("\n")
chunk_times = []
for line in stderr_lines:
if "Chunk" in line and "segments in" in line:
# Example: "Chunk 1/12: 159 segments in 27.5s"
try:
parts = line.split(" in ")
if len(parts) == 2:
time_str = parts[1].replace("s", "").strip()
chunk_times.append(float(time_str))
except:
pass
print(f" Total time: {elapsed:.1f}s, return code: {result.returncode}")
if chunk_times:
print(f" Chunk times: {chunk_times}")
print(f" Average chunk time: {statistics.mean(chunk_times):.1f}s")
# Check output
if os.path.exists(output_path):
with open(output_path, "r") as f:
data = json.load(f)
segments = data.get("segments", [])
print(f" Total segments: {len(segments)}")
# Calculate processing rate
if segments:
total_audio_duration = 0
for seg in segments:
total_audio_duration = max(
total_audio_duration, seg.get("end", 0)
)
if total_audio_duration > 0:
real_time_factor = elapsed / total_audio_duration
print(f" Audio duration: {total_audio_duration:.1f}s")
print(
f" Real-time factor: {real_time_factor:.2f}x (lower is better)"
)
return elapsed
else:
print(f" FAILED in {elapsed:.1f}s, return code: {result.returncode}")
print(f" Last 10 lines of stderr:")
for line in result.stderr.split("\n")[-10:]:
if line.strip():
print(f" {line}")
except Exception as e:
print(f" ERROR: {e}")
import traceback
traceback.print_exc()
finally:
if os.path.exists(output_path):
os.unlink(output_path)
return None
def test_debug_vs_production():
"""Compare debug vs production versions."""
test_video = "/Users/accusys/test_video/BigBuckBunny_320x180.mp4"
if not os.path.exists(test_video):
return
print(f"\nComparing debug vs production versions:")
versions = [
("production", "scripts/asr_processor.py"),
("debug", "scripts/asr_processor_debug.py"),
]
results = {}
for version_name, script_path in versions:
print(f"\n Testing {version_name} version...")
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
output_path = f.name
try:
cmd = [
"/opt/homebrew/bin/python3.11",
script_path,
test_video,
output_path,
"--chunk-duration",
"600",
]
env = os.environ.copy()
env["MOMENTRY_DISABLE_REDIS"] = "1"
if version_name == "debug":
env["ASR_DEBUG"] = "1"
start = time.time()
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
env=env,
)
elapsed = time.time() - start
if result.returncode == 0:
results[version_name] = elapsed
print(f" Time: {elapsed:.1f}s")
# Count debug lines
if version_name == "debug":
debug_lines = [
l for l in result.stderr.split("\n") if "ASR_DEBUG" in l
]
print(f" Debug lines: {len(debug_lines)}")
else:
print(f" FAILED: {elapsed:.1f}s, return code: {result.returncode}")
except Exception as e:
print(f" ERROR: {e}")
finally:
if os.path.exists(output_path):
os.unlink(output_path)
# Compare results
if "production" in results and "debug" in results:
prod_time = results["production"]
debug_time = results["debug"]
overhead = ((prod_time - debug_time) / debug_time) * 100
print(f"\n Comparison:")
print(f" Production: {prod_time:.1f}s")
print(f" Debug: {debug_time:.1f}s")
print(f" Overhead: {overhead:.1f}%")
if overhead <= 5:
print(f" ✅ Within 5% overhead limit")
else:
print(f" ⚠️ Exceeds 5% overhead limit")
def main():
print("ASR Processor Performance Benchmark")
print("=" * 60)
# Test 1: Baseline with small video
baseline = test_small_video()
# Test 2: Large video chunk performance
large_chunk_time = test_large_video_chunk()
# Test 3: Debug vs production comparison
test_debug_vs_production()
print("\n" + "=" * 60)
print("Summary:")
if baseline:
print(f" Small video baseline: {baseline:.1f}s")
if large_chunk_time:
print(f" Large video chunk: {large_chunk_time:.1f}s")
print("\nNext steps:")
print(" 1. Complete end-to-end test with full 2.2GB video")
print(" 2. Verify chunk merging logic works correctly")
print(" 3. Test with Redis enabled (if needed)")
print(" 4. Integrate with Rust processor wrapper")
if __name__ == "__main__":
main()