Files
momentry_core/test_asr_v2_large.py
Warren b54c2def30 feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid
- Add test scripts for ASR, face, search, processing
- Add portal frontend (Tauri)
- Add config, benchmark, and monitoring utilities
- Add model checkpoints and pretrained model references
2026-04-30 15:11:53 +08:00

133 lines
3.6 KiB
Python

#!/usr/bin/env python3
"""
Test ASR v2 processor on large video.
"""
import sys
import os
import tempfile
import json
import subprocess
import signal
from pathlib import Path
import threading
def timeout_handler(signum, frame):
raise TimeoutError("Test timed out")
def run_with_timeout(cmd, timeout_sec):
"""Run command with timeout."""
print(f"Running with timeout {timeout_sec}s: {' '.join(cmd)}")
# Start process
start_time = time.time()
proc = subprocess.Popen(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
)
# Set up timeout
timer = threading.Timer(timeout_sec, proc.kill)
try:
timer.start()
stdout, stderr = proc.communicate()
elapsed = time.time() - start_time
finally:
timer.cancel()
return proc.returncode, stdout, stderr, elapsed
def test_large_video():
# Start with 238MB video first
video_path = "../test_video/big_buck_bunny_480p_h264.mov" # 238MB
if not Path(video_path).exists():
print(f"Video not found: {video_path}")
# Try the 2.2GB video
video_path = "../test_video/1636719d-c31f-78ac-f1dd-8ab0b0b36c66.mov"
if not Path(video_path).exists():
print(f"Large video not found")
return False
output_dir = Path("test_output_v2")
output_dir.mkdir(exist_ok=True, parents=True)
output_path = output_dir / "asr_v2_large_output.json"
script_path = Path("scripts/asr_processor_v2.py")
if not script_path.exists():
print(f"Script not found: {script_path}")
return False
cmd = [
sys.executable,
str(script_path),
video_path,
str(output_path),
"--chunk-duration",
"300", # 5 minutes
"--model-size",
"tiny",
"--compute-type",
"int8",
]
print(f"Testing large video: {video_path}")
print(f"Size: {Path(video_path).stat().st_size / (1024 * 1024 * 1024):.2f} GB")
# Run with 5-minute timeout (should be enough for chunked processing)
timeout = 300 # 5 minutes
try:
returncode, stdout, stderr, elapsed = run_with_timeout(cmd, timeout)
except Exception as e:
print(f"Error running test: {e}")
return False
print(f"\nResults:")
print(f" Exit code: {returncode}")
print(f" Elapsed time: {elapsed:.1f}s")
print(f" Timeout: {timeout}s")
if stdout:
print(f" Stdout length: {len(stdout)} chars")
if stderr:
# Show warnings/errors
lines = stderr.split("\n")
error_lines = [
l
for l in lines
if "error" in l.lower() or "warning" in l.lower() or "ASR:" in l
]
if error_lines:
print(f" Stderr highlights:")
for line in error_lines[:10]:
print(f" {line}")
if output_path.exists():
with open(output_path, "r") as f:
data = json.load(f)
print(f"\nTranscription results:")
print(f" Language: {data.get('language')}")
print(f" Segments: {len(data.get('segments', []))}")
print(f" Chunks: {data.get('chunk_count', 1)}")
if data.get("segments"):
print(f" First segment: {data['segments'][0]['text'][:50]}...")
print(f" Last segment: {data['segments'][-1]['text'][:50]}...")
return returncode == 0
if __name__ == "__main__":
import time
print("Testing ASR v2 on large video...")
success = test_large_video()
if success:
print("\n✅ Test passed!")
else:
print("\n❌ Test failed or timed out")
sys.exit(0 if success else 1)