Files
momentry_core/test_complete.py
Warren b54c2def30 feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid
- Add test scripts for ASR, face, search, processing
- Add portal frontend (Tauri)
- Add config, benchmark, and monitoring utilities
- Add model checkpoints and pretrained model references
2026-04-30 15:11:53 +08:00

154 lines
5.1 KiB
Python

#!/opt/homebrew/bin/python3.11
"""Test complete ASR processing of large file."""
import subprocess
import tempfile
import os
import time
import sys
import json
def test_complete():
test_video = "/Users/accusys/test_video/1636719d-c31f-78ac-f1dd-8ab0b0b36c66.mov"
if not os.path.exists(test_video):
print(f"Test video not found: {test_video}")
return
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
output_path = f.name
try:
cmd = [
"/opt/homebrew/bin/python3.11",
"scripts/asr_processor.py",
test_video,
output_path,
"--uuid",
"test_complete",
"--chunk-duration",
"600",
]
env = os.environ.copy()
env["MOMENTRY_DISABLE_REDIS"] = "1"
env["ASR_DEBUG"] = "1"
env["MOMENTRY_ASR_CHUNK_TIMEOUT"] = "120" # 2 minutes per chunk (generous)
print(f"Running ASR processor to completion...")
print(f"Command: {' '.join(cmd)}")
print(
f"Env: MOMENTRY_DISABLE_REDIS=1, ASR_DEBUG=1, MOMENTRY_ASR_CHUNK_TIMEOUT=120"
)
print("-" * 60)
start = time.time()
# Run with generous timeout (10 minutes total for 12 chunks)
timeout = 600 # 10 minutes
try:
proc = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=timeout,
env=env,
)
killed = False
except subprocess.TimeoutExpired:
print(f"\n⏱️ TOTAL TIMEOUT after {timeout}s")
killed = True
proc = None
elapsed = time.time() - start
if not killed and proc and proc.returncode == 0:
print(f"\n✓ Process completed successfully in {elapsed:.1f}s")
print(f"Return code: {proc.returncode}")
# Count lines in stderr
stderr_lines = proc.stderr.split("\n")
print(f"Stderr lines: {len(stderr_lines)}")
# Check for success patterns
chunk_successes = [
line for line in stderr_lines if "transcribe_chunk succeeded" in line
]
print(f"Successful chunks: {len(chunk_successes)}")
# Look for any errors
errors = [
line
for line in stderr_lines
if "error" in line.lower() and "debug" not in line
]
if errors:
print(f"Errors found: {len(errors)}")
for err in errors[:5]:
print(f" {err}")
if os.path.exists(output_path):
with open(output_path, "r") as f:
data = json.load(f)
segments = data.get("segments", [])
print(f" Total segments: {len(segments)}")
print(f" Language: {data.get('language')}")
print(f" Language probability: {data.get('language_probability')}")
# Check segment ordering
if segments:
first_start = segments[0].get("start", 0)
last_end = segments[-1].get("end", 0)
print(f" First segment start: {first_start:.1f}s")
print(f" Last segment end: {last_end:.1f}s")
print(f" Total transcription duration: {last_end:.1f}s")
# Check for gaps or overlaps
prev_end = 0
gaps = 0
overlaps = 0
for i, seg in enumerate(segments):
start = seg.get("start", 0)
end = seg.get("end", 0)
if i > 0:
if start > prev_end + 0.1: # gap > 100ms
gaps += 1
elif start < prev_end - 0.1: # overlap > 100ms
overlaps += 1
prev_end = end
print(f" Gaps >100ms: {gaps}, Overlaps >100ms: {overlaps}")
else:
print(f" Output file not found at {output_path}")
# Print last 10 lines of stderr for debugging
print(f"\nLast 10 lines of stderr:")
for line in stderr_lines[-10:]:
if line.strip():
print(f" {line}")
else:
print(f"\n✗ Process failed or killed")
print(f"Elapsed: {elapsed:.1f}s")
if proc:
print(f"Return code: {proc.returncode}")
print(f"Last 20 lines of stderr:")
for line in proc.stderr.split("\n")[-20:]:
if line.strip():
print(f" {line}")
else:
print(f"Process was killed due to timeout")
except Exception as e:
print(f"✗ Error: {e}")
import traceback
traceback.print_exc()
finally:
if os.path.exists(output_path):
os.unlink(output_path)
print(f"✓ Cleaned up output file")
if __name__ == "__main__":
test_complete()