- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
154 lines
5.1 KiB
Python
154 lines
5.1 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""Test complete ASR processing of large file."""
|
|
|
|
import subprocess
|
|
import tempfile
|
|
import os
|
|
import time
|
|
import sys
|
|
import json
|
|
|
|
|
|
def test_complete():
|
|
test_video = "/Users/accusys/test_video/1636719d-c31f-78ac-f1dd-8ab0b0b36c66.mov"
|
|
if not os.path.exists(test_video):
|
|
print(f"Test video not found: {test_video}")
|
|
return
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
|
|
output_path = f.name
|
|
|
|
try:
|
|
cmd = [
|
|
"/opt/homebrew/bin/python3.11",
|
|
"scripts/asr_processor.py",
|
|
test_video,
|
|
output_path,
|
|
"--uuid",
|
|
"test_complete",
|
|
"--chunk-duration",
|
|
"600",
|
|
]
|
|
|
|
env = os.environ.copy()
|
|
env["MOMENTRY_DISABLE_REDIS"] = "1"
|
|
env["ASR_DEBUG"] = "1"
|
|
env["MOMENTRY_ASR_CHUNK_TIMEOUT"] = "120" # 2 minutes per chunk (generous)
|
|
|
|
print(f"Running ASR processor to completion...")
|
|
print(f"Command: {' '.join(cmd)}")
|
|
print(
|
|
f"Env: MOMENTRY_DISABLE_REDIS=1, ASR_DEBUG=1, MOMENTRY_ASR_CHUNK_TIMEOUT=120"
|
|
)
|
|
print("-" * 60)
|
|
|
|
start = time.time()
|
|
# Run with generous timeout (10 minutes total for 12 chunks)
|
|
timeout = 600 # 10 minutes
|
|
|
|
try:
|
|
proc = subprocess.run(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
timeout=timeout,
|
|
env=env,
|
|
)
|
|
killed = False
|
|
except subprocess.TimeoutExpired:
|
|
print(f"\n⏱️ TOTAL TIMEOUT after {timeout}s")
|
|
killed = True
|
|
proc = None
|
|
|
|
elapsed = time.time() - start
|
|
|
|
if not killed and proc and proc.returncode == 0:
|
|
print(f"\n✓ Process completed successfully in {elapsed:.1f}s")
|
|
print(f"Return code: {proc.returncode}")
|
|
|
|
# Count lines in stderr
|
|
stderr_lines = proc.stderr.split("\n")
|
|
print(f"Stderr lines: {len(stderr_lines)}")
|
|
|
|
# Check for success patterns
|
|
chunk_successes = [
|
|
line for line in stderr_lines if "transcribe_chunk succeeded" in line
|
|
]
|
|
print(f"Successful chunks: {len(chunk_successes)}")
|
|
|
|
# Look for any errors
|
|
errors = [
|
|
line
|
|
for line in stderr_lines
|
|
if "error" in line.lower() and "debug" not in line
|
|
]
|
|
if errors:
|
|
print(f"Errors found: {len(errors)}")
|
|
for err in errors[:5]:
|
|
print(f" {err}")
|
|
|
|
if os.path.exists(output_path):
|
|
with open(output_path, "r") as f:
|
|
data = json.load(f)
|
|
segments = data.get("segments", [])
|
|
print(f" Total segments: {len(segments)}")
|
|
print(f" Language: {data.get('language')}")
|
|
print(f" Language probability: {data.get('language_probability')}")
|
|
|
|
# Check segment ordering
|
|
if segments:
|
|
first_start = segments[0].get("start", 0)
|
|
last_end = segments[-1].get("end", 0)
|
|
print(f" First segment start: {first_start:.1f}s")
|
|
print(f" Last segment end: {last_end:.1f}s")
|
|
print(f" Total transcription duration: {last_end:.1f}s")
|
|
|
|
# Check for gaps or overlaps
|
|
prev_end = 0
|
|
gaps = 0
|
|
overlaps = 0
|
|
for i, seg in enumerate(segments):
|
|
start = seg.get("start", 0)
|
|
end = seg.get("end", 0)
|
|
if i > 0:
|
|
if start > prev_end + 0.1: # gap > 100ms
|
|
gaps += 1
|
|
elif start < prev_end - 0.1: # overlap > 100ms
|
|
overlaps += 1
|
|
prev_end = end
|
|
print(f" Gaps >100ms: {gaps}, Overlaps >100ms: {overlaps}")
|
|
else:
|
|
print(f" Output file not found at {output_path}")
|
|
|
|
# Print last 10 lines of stderr for debugging
|
|
print(f"\nLast 10 lines of stderr:")
|
|
for line in stderr_lines[-10:]:
|
|
if line.strip():
|
|
print(f" {line}")
|
|
else:
|
|
print(f"\n✗ Process failed or killed")
|
|
print(f"Elapsed: {elapsed:.1f}s")
|
|
if proc:
|
|
print(f"Return code: {proc.returncode}")
|
|
print(f"Last 20 lines of stderr:")
|
|
for line in proc.stderr.split("\n")[-20:]:
|
|
if line.strip():
|
|
print(f" {line}")
|
|
else:
|
|
print(f"Process was killed due to timeout")
|
|
|
|
except Exception as e:
|
|
print(f"✗ Error: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
finally:
|
|
if os.path.exists(output_path):
|
|
os.unlink(output_path)
|
|
print(f"✓ Cleaned up output file")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_complete()
|