feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
This commit is contained in:
142
test_chunking_threshold.py
Normal file
142
test_chunking_threshold.py
Normal file
@@ -0,0 +1,142 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test chunking threshold (30 minutes/1800 seconds).
|
||||
Create a 40-minute audio file and test ASR chunking.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def create_test_audio(duration_seconds, output_path):
|
||||
"""Create a silent audio file of specified duration using ffmpeg."""
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-f",
|
||||
"lavfi",
|
||||
"-i",
|
||||
f"anullsrc=r=16000:cl=mono",
|
||||
"-t",
|
||||
str(duration_seconds),
|
||||
"-acodec",
|
||||
"pcm_s16le",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-ac",
|
||||
"1",
|
||||
"-y",
|
||||
output_path,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True)
|
||||
return result.returncode == 0 and os.path.exists(output_path)
|
||||
|
||||
|
||||
def test_chunking():
|
||||
"""Test ASR chunking with different audio durations."""
|
||||
|
||||
# Add scripts directory to path
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "scripts"))
|
||||
|
||||
# Import after path is set
|
||||
try:
|
||||
from asr_processor import run_asr
|
||||
except ImportError as e:
|
||||
print(f"Failed to import asr_processor: {e}")
|
||||
return False
|
||||
|
||||
test_cases = [
|
||||
(1200, "20 minutes - should use direct transcription"),
|
||||
(1800, "30 minutes - boundary, should use direct"),
|
||||
(1810, "30m10s - should use chunked transcription"),
|
||||
(2400, "40 minutes - should use chunked transcription"),
|
||||
]
|
||||
|
||||
for duration, description in test_cases:
|
||||
print(f"\n{'=' * 60}")
|
||||
print(f"Test: {description}")
|
||||
print(f"Duration: {duration} seconds ({duration / 60:.1f} minutes)")
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
audio_path = os.path.join(temp_dir, "test_audio.wav")
|
||||
output_path = os.path.join(temp_dir, "output.json")
|
||||
|
||||
print(f"Creating test audio...")
|
||||
if not create_test_audio(duration, audio_path):
|
||||
print(f"Failed to create test audio")
|
||||
continue
|
||||
|
||||
print(f"Running ASR...")
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Run ASR
|
||||
success = run_asr(
|
||||
video_path=None, # Use audio directly
|
||||
audio_path=audio_path,
|
||||
output_path=output_path,
|
||||
model_size="tiny",
|
||||
progress=False, # Don't use Redis publisher
|
||||
)
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
if success and os.path.exists(output_path):
|
||||
# Load and check result
|
||||
import json
|
||||
|
||||
with open(output_path, "r") as f:
|
||||
data = json.load(f)
|
||||
|
||||
processing_mode = data.get("processing_mode", "unknown")
|
||||
chunk_count = data.get("chunk_count", 1)
|
||||
|
||||
print(f"Result: SUCCESS")
|
||||
print(f"Processing mode: {processing_mode}")
|
||||
print(f"Chunk count: {chunk_count}")
|
||||
print(f"Elapsed time: {elapsed:.2f}s")
|
||||
|
||||
# Verify expected behavior
|
||||
if duration <= 1800 and processing_mode != "direct":
|
||||
print(
|
||||
f"WARNING: Expected direct transcription but got {processing_mode}"
|
||||
)
|
||||
elif duration > 1800 and processing_mode != "chunked":
|
||||
print(
|
||||
f"WARNING: Expected chunked transcription but got {processing_mode}"
|
||||
)
|
||||
|
||||
else:
|
||||
print(f"Result: FAILED")
|
||||
print(f"Success flag: {success}")
|
||||
print(f"Output exists: {os.path.exists(output_path)}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Exception during ASR: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Testing ASR chunking threshold (30 minutes/1800 seconds)")
|
||||
print("This test creates synthetic audio files of various durations")
|
||||
print("and verifies the correct transcription mode is used.\n")
|
||||
|
||||
# Check if ffmpeg is available
|
||||
if subprocess.run(["which", "ffmpeg"], capture_output=True).returncode != 0:
|
||||
print("ERROR: ffmpeg not found in PATH")
|
||||
sys.exit(1)
|
||||
|
||||
success = test_chunking()
|
||||
|
||||
if success:
|
||||
print("\n✅ Chunking threshold test completed")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print("\n❌ Chunking threshold test failed")
|
||||
sys.exit(1)
|
||||
Reference in New Issue
Block a user