- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
143 lines
4.5 KiB
Python
143 lines
4.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test chunking threshold (30 minutes/1800 seconds).
|
|
Create a 40-minute audio file and test ASR chunking.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
import time
|
|
from pathlib import Path
|
|
|
|
|
|
def create_test_audio(duration_seconds, output_path):
|
|
"""Create a silent audio file of specified duration using ffmpeg."""
|
|
cmd = [
|
|
"ffmpeg",
|
|
"-f",
|
|
"lavfi",
|
|
"-i",
|
|
f"anullsrc=r=16000:cl=mono",
|
|
"-t",
|
|
str(duration_seconds),
|
|
"-acodec",
|
|
"pcm_s16le",
|
|
"-ar",
|
|
"16000",
|
|
"-ac",
|
|
"1",
|
|
"-y",
|
|
output_path,
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True)
|
|
return result.returncode == 0 and os.path.exists(output_path)
|
|
|
|
|
|
def test_chunking():
|
|
"""Test ASR chunking with different audio durations."""
|
|
|
|
# Add scripts directory to path
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "scripts"))
|
|
|
|
# Import after path is set
|
|
try:
|
|
from asr_processor import run_asr
|
|
except ImportError as e:
|
|
print(f"Failed to import asr_processor: {e}")
|
|
return False
|
|
|
|
test_cases = [
|
|
(1200, "20 minutes - should use direct transcription"),
|
|
(1800, "30 minutes - boundary, should use direct"),
|
|
(1810, "30m10s - should use chunked transcription"),
|
|
(2400, "40 minutes - should use chunked transcription"),
|
|
]
|
|
|
|
for duration, description in test_cases:
|
|
print(f"\n{'=' * 60}")
|
|
print(f"Test: {description}")
|
|
print(f"Duration: {duration} seconds ({duration / 60:.1f} minutes)")
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
audio_path = os.path.join(temp_dir, "test_audio.wav")
|
|
output_path = os.path.join(temp_dir, "output.json")
|
|
|
|
print(f"Creating test audio...")
|
|
if not create_test_audio(duration, audio_path):
|
|
print(f"Failed to create test audio")
|
|
continue
|
|
|
|
print(f"Running ASR...")
|
|
start_time = time.time()
|
|
|
|
try:
|
|
# Run ASR
|
|
success = run_asr(
|
|
video_path=None, # Use audio directly
|
|
audio_path=audio_path,
|
|
output_path=output_path,
|
|
model_size="tiny",
|
|
progress=False, # Don't use Redis publisher
|
|
)
|
|
elapsed = time.time() - start_time
|
|
|
|
if success and os.path.exists(output_path):
|
|
# Load and check result
|
|
import json
|
|
|
|
with open(output_path, "r") as f:
|
|
data = json.load(f)
|
|
|
|
processing_mode = data.get("processing_mode", "unknown")
|
|
chunk_count = data.get("chunk_count", 1)
|
|
|
|
print(f"Result: SUCCESS")
|
|
print(f"Processing mode: {processing_mode}")
|
|
print(f"Chunk count: {chunk_count}")
|
|
print(f"Elapsed time: {elapsed:.2f}s")
|
|
|
|
# Verify expected behavior
|
|
if duration <= 1800 and processing_mode != "direct":
|
|
print(
|
|
f"WARNING: Expected direct transcription but got {processing_mode}"
|
|
)
|
|
elif duration > 1800 and processing_mode != "chunked":
|
|
print(
|
|
f"WARNING: Expected chunked transcription but got {processing_mode}"
|
|
)
|
|
|
|
else:
|
|
print(f"Result: FAILED")
|
|
print(f"Success flag: {success}")
|
|
print(f"Output exists: {os.path.exists(output_path)}")
|
|
|
|
except Exception as e:
|
|
print(f"Exception during ASR: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
|
|
return True
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("Testing ASR chunking threshold (30 minutes/1800 seconds)")
|
|
print("This test creates synthetic audio files of various durations")
|
|
print("and verifies the correct transcription mode is used.\n")
|
|
|
|
# Check if ffmpeg is available
|
|
if subprocess.run(["which", "ffmpeg"], capture_output=True).returncode != 0:
|
|
print("ERROR: ffmpeg not found in PATH")
|
|
sys.exit(1)
|
|
|
|
success = test_chunking()
|
|
|
|
if success:
|
|
print("\n✅ Chunking threshold test completed")
|
|
sys.exit(0)
|
|
else:
|
|
print("\n❌ Chunking threshold test failed")
|
|
sys.exit(1)
|