- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
291 lines
9.0 KiB
Python
291 lines
9.0 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""Performance benchmark for ASR processor fix."""
|
|
|
|
import subprocess
|
|
import tempfile
|
|
import os
|
|
import time
|
|
import sys
|
|
import json
|
|
import statistics
|
|
|
|
|
|
def test_small_video():
|
|
"""Test with small video to establish baseline."""
|
|
test_video = "/Users/accusys/test_video/BigBuckBunny_320x180.mp4"
|
|
if not os.path.exists(test_video):
|
|
print(f"Small test video not found: {test_video}")
|
|
return None
|
|
|
|
print(f"Testing small video: {os.path.basename(test_video)}")
|
|
|
|
runs = 3
|
|
times = []
|
|
|
|
for run in range(runs):
|
|
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
|
|
output_path = f.name
|
|
|
|
try:
|
|
cmd = [
|
|
"/opt/homebrew/bin/python3.11",
|
|
"scripts/asr_processor.py",
|
|
test_video,
|
|
output_path,
|
|
"--chunk-duration",
|
|
"600",
|
|
]
|
|
|
|
env = os.environ.copy()
|
|
env["MOMENTRY_DISABLE_REDIS"] = "1"
|
|
|
|
start = time.time()
|
|
result = subprocess.run(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
env=env,
|
|
)
|
|
elapsed = time.time() - start
|
|
|
|
if result.returncode == 0:
|
|
times.append(elapsed)
|
|
print(
|
|
f" Run {run + 1}: {elapsed:.1f}s, return code: {result.returncode}"
|
|
)
|
|
else:
|
|
print(
|
|
f" Run {run + 1}: FAILED in {elapsed:.1f}s, return code: {result.returncode}"
|
|
)
|
|
print(f" stderr: {result.stderr[-200:]}")
|
|
|
|
except Exception as e:
|
|
print(f" Run {run + 1}: ERROR {e}")
|
|
finally:
|
|
if os.path.exists(output_path):
|
|
os.unlink(output_path)
|
|
|
|
if times:
|
|
avg = statistics.mean(times)
|
|
std = statistics.stdev(times) if len(times) > 1 else 0
|
|
print(f" Average: {avg:.1f}s ± {std:.1f}s")
|
|
return avg
|
|
return None
|
|
|
|
|
|
def test_large_video_chunk():
|
|
"""Test a single chunk of large video to measure chunk processing time."""
|
|
test_video = "/Users/accusys/test_video/1636719d-c31f-78ac-f1dd-8ab0b0b36c66.mov"
|
|
if not os.path.exists(test_video):
|
|
print(f"Large test video not found: {test_video}")
|
|
return None
|
|
|
|
print(f"\nTesting single chunk from large video: {os.path.basename(test_video)}")
|
|
|
|
# Create a test that processes just the first 10 minutes
|
|
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
|
|
output_path = f.name
|
|
|
|
try:
|
|
cmd = [
|
|
"/opt/homebrew/bin/python3.11",
|
|
"scripts/asr_processor.py",
|
|
test_video,
|
|
output_path,
|
|
"--uuid",
|
|
"test_perf",
|
|
"--chunk-duration",
|
|
"600", # 10 minutes
|
|
"--max-direct-duration",
|
|
"300", # Force chunked mode even for short audio
|
|
]
|
|
|
|
env = os.environ.copy()
|
|
env["MOMENTRY_DISABLE_REDIS"] = "1"
|
|
env["ASR_DEBUG"] = "1"
|
|
|
|
start = time.time()
|
|
result = subprocess.run(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
env=env,
|
|
)
|
|
elapsed = time.time() - start
|
|
|
|
if result.returncode == 0:
|
|
# Parse debug output to get chunk processing times
|
|
stderr_lines = result.stderr.split("\n")
|
|
chunk_times = []
|
|
for line in stderr_lines:
|
|
if "Chunk" in line and "segments in" in line:
|
|
# Example: "Chunk 1/12: 159 segments in 27.5s"
|
|
try:
|
|
parts = line.split(" in ")
|
|
if len(parts) == 2:
|
|
time_str = parts[1].replace("s", "").strip()
|
|
chunk_times.append(float(time_str))
|
|
except:
|
|
pass
|
|
|
|
print(f" Total time: {elapsed:.1f}s, return code: {result.returncode}")
|
|
if chunk_times:
|
|
print(f" Chunk times: {chunk_times}")
|
|
print(f" Average chunk time: {statistics.mean(chunk_times):.1f}s")
|
|
|
|
# Check output
|
|
if os.path.exists(output_path):
|
|
with open(output_path, "r") as f:
|
|
data = json.load(f)
|
|
segments = data.get("segments", [])
|
|
print(f" Total segments: {len(segments)}")
|
|
|
|
# Calculate processing rate
|
|
if segments:
|
|
total_audio_duration = 0
|
|
for seg in segments:
|
|
total_audio_duration = max(
|
|
total_audio_duration, seg.get("end", 0)
|
|
)
|
|
|
|
if total_audio_duration > 0:
|
|
real_time_factor = elapsed / total_audio_duration
|
|
print(f" Audio duration: {total_audio_duration:.1f}s")
|
|
print(
|
|
f" Real-time factor: {real_time_factor:.2f}x (lower is better)"
|
|
)
|
|
|
|
return elapsed
|
|
|
|
else:
|
|
print(f" FAILED in {elapsed:.1f}s, return code: {result.returncode}")
|
|
print(f" Last 10 lines of stderr:")
|
|
for line in result.stderr.split("\n")[-10:]:
|
|
if line.strip():
|
|
print(f" {line}")
|
|
|
|
except Exception as e:
|
|
print(f" ERROR: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
finally:
|
|
if os.path.exists(output_path):
|
|
os.unlink(output_path)
|
|
|
|
return None
|
|
|
|
|
|
def test_debug_vs_production():
|
|
"""Compare debug vs production versions."""
|
|
test_video = "/Users/accusys/test_video/BigBuckBunny_320x180.mp4"
|
|
if not os.path.exists(test_video):
|
|
return
|
|
|
|
print(f"\nComparing debug vs production versions:")
|
|
|
|
versions = [
|
|
("production", "scripts/asr_processor.py"),
|
|
("debug", "scripts/asr_processor_debug.py"),
|
|
]
|
|
|
|
results = {}
|
|
|
|
for version_name, script_path in versions:
|
|
print(f"\n Testing {version_name} version...")
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
|
|
output_path = f.name
|
|
|
|
try:
|
|
cmd = [
|
|
"/opt/homebrew/bin/python3.11",
|
|
script_path,
|
|
test_video,
|
|
output_path,
|
|
"--chunk-duration",
|
|
"600",
|
|
]
|
|
|
|
env = os.environ.copy()
|
|
env["MOMENTRY_DISABLE_REDIS"] = "1"
|
|
if version_name == "debug":
|
|
env["ASR_DEBUG"] = "1"
|
|
|
|
start = time.time()
|
|
result = subprocess.run(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
env=env,
|
|
)
|
|
elapsed = time.time() - start
|
|
|
|
if result.returncode == 0:
|
|
results[version_name] = elapsed
|
|
print(f" Time: {elapsed:.1f}s")
|
|
|
|
# Count debug lines
|
|
if version_name == "debug":
|
|
debug_lines = [
|
|
l for l in result.stderr.split("\n") if "ASR_DEBUG" in l
|
|
]
|
|
print(f" Debug lines: {len(debug_lines)}")
|
|
else:
|
|
print(f" FAILED: {elapsed:.1f}s, return code: {result.returncode}")
|
|
|
|
except Exception as e:
|
|
print(f" ERROR: {e}")
|
|
finally:
|
|
if os.path.exists(output_path):
|
|
os.unlink(output_path)
|
|
|
|
# Compare results
|
|
if "production" in results and "debug" in results:
|
|
prod_time = results["production"]
|
|
debug_time = results["debug"]
|
|
overhead = ((prod_time - debug_time) / debug_time) * 100
|
|
print(f"\n Comparison:")
|
|
print(f" Production: {prod_time:.1f}s")
|
|
print(f" Debug: {debug_time:.1f}s")
|
|
print(f" Overhead: {overhead:.1f}%")
|
|
|
|
if overhead <= 5:
|
|
print(f" ✅ Within 5% overhead limit")
|
|
else:
|
|
print(f" ⚠️ Exceeds 5% overhead limit")
|
|
|
|
|
|
def main():
|
|
print("ASR Processor Performance Benchmark")
|
|
print("=" * 60)
|
|
|
|
# Test 1: Baseline with small video
|
|
baseline = test_small_video()
|
|
|
|
# Test 2: Large video chunk performance
|
|
large_chunk_time = test_large_video_chunk()
|
|
|
|
# Test 3: Debug vs production comparison
|
|
test_debug_vs_production()
|
|
|
|
print("\n" + "=" * 60)
|
|
print("Summary:")
|
|
if baseline:
|
|
print(f" Small video baseline: {baseline:.1f}s")
|
|
if large_chunk_time:
|
|
print(f" Large video chunk: {large_chunk_time:.1f}s")
|
|
|
|
print("\nNext steps:")
|
|
print(" 1. Complete end-to-end test with full 2.2GB video")
|
|
print(" 2. Verify chunk merging logic works correctly")
|
|
print(" 3. Test with Redis enabled (if needed)")
|
|
print(" 4. Integrate with Rust processor wrapper")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|