Files
momentry_core/test_asr_large_clip.py
Warren b54c2def30 feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid
- Add test scripts for ASR, face, search, processing
- Add portal frontend (Tauri)
- Add config, benchmark, and monitoring utilities
- Add model checkpoints and pretrained model references
2026-04-30 15:11:53 +08:00

117 lines
3.4 KiB
Python

#!/usr/bin/env python3
"""Test ASR processor on a 10-minute clip from the large problematic video."""
import sys
import os
import subprocess
import json
import tempfile
import time
import shutil
# Paths
LARGE_VIDEO = "../test_video/1636719d-c31f-78ac-f1dd-8ab0b0b36c66.mov"
if not os.path.exists(LARGE_VIDEO):
print(f"Large video not found: {LARGE_VIDEO}")
sys.exit(1)
print(f"Large video size: {os.path.getsize(LARGE_VIDEO) / (1024**3):.2f} GB")
# Create temporary directory
temp_dir = tempfile.mkdtemp(prefix="asr_test_")
clip_path = os.path.join(temp_dir, "clip.mp4")
output_path = os.path.join(temp_dir, "output.json")
try:
# Extract 10-minute clip (600 seconds) starting at 0:00
print("Extracting 10-minute clip...")
ffmpeg_cmd = [
"ffmpeg",
"-i",
LARGE_VIDEO,
"-ss",
"0",
"-t",
"600", # 10 minutes
"-c",
"copy",
"-y",
clip_path,
]
result = subprocess.run(ffmpeg_cmd, capture_output=True)
if result.returncode != 0:
print(f"FFmpeg failed: {result.stderr.decode()}")
sys.exit(1)
if not os.path.exists(clip_path):
print("Clip not created")
sys.exit(1)
print(
f"Clip created: {clip_path} ({os.path.getsize(clip_path) / (1024**2):.1f} MB)"
)
# Run ASR processor on clip with chunked mode forced (set max_direct_duration=300)
env = os.environ.copy()
env["MOMENTRY_ASR_MAX_DIRECT_DURATION"] = "300" # 5 minutes, force chunked
env["MOMENTRY_ASR_CHUNK_DURATION"] = "120" # 2-minute chunks for testing
env["MOMENTRY_ASR_MODEL_SIZE"] = "tiny"
env["MOMENTRY_ASR_COMPUTE_TYPE"] = "int8"
cmd = [
"/opt/homebrew/bin/python3.11",
"scripts/asr_processor.py",
clip_path,
output_path,
"--uuid",
"test_large",
]
print(f"Running ASR processor with forced chunked mode...")
print(f"Command: {' '.join(cmd)}")
start = time.time()
proc = subprocess.run(
cmd, capture_output=True, text=True, env=env, timeout=900
) # 15 min timeout
elapsed = time.time() - start
print(f"ASR completed in {elapsed:.1f}s")
print(f"Return code: {proc.returncode}")
if proc.stdout:
print(f"STDOUT:\n{proc.stdout}")
if proc.stderr:
print(f"STDERR:\n{proc.stderr}")
# Check output
if os.path.exists(output_path):
with open(output_path, "r") as f:
data = json.load(f)
segments = data.get("segments", [])
print(f"Output contains {len(segments)} segments")
print(
f"Language: {data.get('language')} (prob {data.get('language_probability')})"
)
print(f"Processing mode: {data.get('processing_mode', 'unknown')}")
if segments:
print(f"First segment: {segments[0]}")
# Verify timestamps are correct (should be within 0-600s)
for seg in segments[:5]:
if seg["start"] < 0 or seg["end"] > 600:
print(f"WARNING: segment outside clip range: {seg}")
else:
print("ERROR: Output file not created")
sys.exit(1)
except subprocess.TimeoutExpired:
print("ERROR: ASR processing timed out after 900 seconds")
sys.exit(1)
except Exception as e:
print(f"ERROR: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
finally:
# Clean up
print(f"Cleaning up {temp_dir}")
shutil.rmtree(temp_dir, ignore_errors=True)
print("Done.")