feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
This commit is contained in:
153
test_complete.py
Normal file
153
test_complete.py
Normal file
@@ -0,0 +1,153 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""Test complete ASR processing of large file."""
|
||||
|
||||
import subprocess
|
||||
import tempfile
|
||||
import os
|
||||
import time
|
||||
import sys
|
||||
import json
|
||||
|
||||
|
||||
def test_complete():
|
||||
test_video = "/Users/accusys/test_video/1636719d-c31f-78ac-f1dd-8ab0b0b36c66.mov"
|
||||
if not os.path.exists(test_video):
|
||||
print(f"Test video not found: {test_video}")
|
||||
return
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
|
||||
output_path = f.name
|
||||
|
||||
try:
|
||||
cmd = [
|
||||
"/opt/homebrew/bin/python3.11",
|
||||
"scripts/asr_processor.py",
|
||||
test_video,
|
||||
output_path,
|
||||
"--uuid",
|
||||
"test_complete",
|
||||
"--chunk-duration",
|
||||
"600",
|
||||
]
|
||||
|
||||
env = os.environ.copy()
|
||||
env["MOMENTRY_DISABLE_REDIS"] = "1"
|
||||
env["ASR_DEBUG"] = "1"
|
||||
env["MOMENTRY_ASR_CHUNK_TIMEOUT"] = "120" # 2 minutes per chunk (generous)
|
||||
|
||||
print(f"Running ASR processor to completion...")
|
||||
print(f"Command: {' '.join(cmd)}")
|
||||
print(
|
||||
f"Env: MOMENTRY_DISABLE_REDIS=1, ASR_DEBUG=1, MOMENTRY_ASR_CHUNK_TIMEOUT=120"
|
||||
)
|
||||
print("-" * 60)
|
||||
|
||||
start = time.time()
|
||||
# Run with generous timeout (10 minutes total for 12 chunks)
|
||||
timeout = 600 # 10 minutes
|
||||
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
)
|
||||
killed = False
|
||||
except subprocess.TimeoutExpired:
|
||||
print(f"\n⏱️ TOTAL TIMEOUT after {timeout}s")
|
||||
killed = True
|
||||
proc = None
|
||||
|
||||
elapsed = time.time() - start
|
||||
|
||||
if not killed and proc and proc.returncode == 0:
|
||||
print(f"\n✓ Process completed successfully in {elapsed:.1f}s")
|
||||
print(f"Return code: {proc.returncode}")
|
||||
|
||||
# Count lines in stderr
|
||||
stderr_lines = proc.stderr.split("\n")
|
||||
print(f"Stderr lines: {len(stderr_lines)}")
|
||||
|
||||
# Check for success patterns
|
||||
chunk_successes = [
|
||||
line for line in stderr_lines if "transcribe_chunk succeeded" in line
|
||||
]
|
||||
print(f"Successful chunks: {len(chunk_successes)}")
|
||||
|
||||
# Look for any errors
|
||||
errors = [
|
||||
line
|
||||
for line in stderr_lines
|
||||
if "error" in line.lower() and "debug" not in line
|
||||
]
|
||||
if errors:
|
||||
print(f"Errors found: {len(errors)}")
|
||||
for err in errors[:5]:
|
||||
print(f" {err}")
|
||||
|
||||
if os.path.exists(output_path):
|
||||
with open(output_path, "r") as f:
|
||||
data = json.load(f)
|
||||
segments = data.get("segments", [])
|
||||
print(f" Total segments: {len(segments)}")
|
||||
print(f" Language: {data.get('language')}")
|
||||
print(f" Language probability: {data.get('language_probability')}")
|
||||
|
||||
# Check segment ordering
|
||||
if segments:
|
||||
first_start = segments[0].get("start", 0)
|
||||
last_end = segments[-1].get("end", 0)
|
||||
print(f" First segment start: {first_start:.1f}s")
|
||||
print(f" Last segment end: {last_end:.1f}s")
|
||||
print(f" Total transcription duration: {last_end:.1f}s")
|
||||
|
||||
# Check for gaps or overlaps
|
||||
prev_end = 0
|
||||
gaps = 0
|
||||
overlaps = 0
|
||||
for i, seg in enumerate(segments):
|
||||
start = seg.get("start", 0)
|
||||
end = seg.get("end", 0)
|
||||
if i > 0:
|
||||
if start > prev_end + 0.1: # gap > 100ms
|
||||
gaps += 1
|
||||
elif start < prev_end - 0.1: # overlap > 100ms
|
||||
overlaps += 1
|
||||
prev_end = end
|
||||
print(f" Gaps >100ms: {gaps}, Overlaps >100ms: {overlaps}")
|
||||
else:
|
||||
print(f" Output file not found at {output_path}")
|
||||
|
||||
# Print last 10 lines of stderr for debugging
|
||||
print(f"\nLast 10 lines of stderr:")
|
||||
for line in stderr_lines[-10:]:
|
||||
if line.strip():
|
||||
print(f" {line}")
|
||||
else:
|
||||
print(f"\n✗ Process failed or killed")
|
||||
print(f"Elapsed: {elapsed:.1f}s")
|
||||
if proc:
|
||||
print(f"Return code: {proc.returncode}")
|
||||
print(f"Last 20 lines of stderr:")
|
||||
for line in proc.stderr.split("\n")[-20:]:
|
||||
if line.strip():
|
||||
print(f" {line}")
|
||||
else:
|
||||
print(f"Process was killed due to timeout")
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ Error: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
if os.path.exists(output_path):
|
||||
os.unlink(output_path)
|
||||
print(f"✓ Cleaned up output file")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_complete()
|
||||
Reference in New Issue
Block a user