- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
133 lines
4.1 KiB
Python
133 lines
4.1 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""Test faster_whisper transcription in isolation."""
|
|
|
|
import sys
|
|
import os
|
|
import time
|
|
import tempfile
|
|
import subprocess
|
|
|
|
# Add scripts directory to path
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
|
|
def test_faster_whisper():
|
|
print("Testing faster_whisper...")
|
|
|
|
# Try to import
|
|
try:
|
|
from faster_whisper import WhisperModel
|
|
|
|
print("✓ faster_whisper imported successfully")
|
|
except ImportError as e:
|
|
print(f"✗ Failed to import faster_whisper: {e}")
|
|
return
|
|
|
|
# Load model
|
|
print("Loading Whisper model (tiny, int8)...")
|
|
start = time.time()
|
|
try:
|
|
model = WhisperModel("tiny", device="cpu", compute_type="int8")
|
|
elapsed = time.time() - start
|
|
print(f"✓ Model loaded successfully in {elapsed:.2f}s")
|
|
except Exception as e:
|
|
print(f"✗ Model loading failed: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
return
|
|
|
|
# Create a test audio file (1 second of silence)
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
|
temp_wav = f.name
|
|
|
|
try:
|
|
# Create silent audio using ffmpeg
|
|
cmd = [
|
|
"ffmpeg",
|
|
"-f",
|
|
"lavfi",
|
|
"-i",
|
|
"anullsrc=r=16000:cl=mono",
|
|
"-t",
|
|
"1",
|
|
"-acodec",
|
|
"pcm_s16le",
|
|
temp_wav,
|
|
"-y",
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True)
|
|
if result.returncode != 0:
|
|
print(f"✗ Failed to create test audio: {result.stderr.decode()}")
|
|
# Try alternative: extract a small chunk from a known video
|
|
print("Trying to extract 5-second chunk from test video...")
|
|
test_video = "/Users/accusys/test_video/20250209_212949.mp4"
|
|
if os.path.exists(test_video):
|
|
cmd = [
|
|
"ffmpeg",
|
|
"-i",
|
|
test_video,
|
|
"-t",
|
|
"5",
|
|
"-acodec",
|
|
"pcm_s16le",
|
|
"-ar",
|
|
"16000",
|
|
"-ac",
|
|
"1",
|
|
temp_wav,
|
|
"-y",
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True)
|
|
if result.returncode != 0:
|
|
print(f"✗ Failed to extract audio: {result.stderr.decode()}")
|
|
os.unlink(temp_wav)
|
|
return
|
|
else:
|
|
print("Test video not found, skipping transcription test")
|
|
os.unlink(temp_wav)
|
|
return
|
|
|
|
print(
|
|
f"✓ Created test audio file: {temp_wav} ({os.path.getsize(temp_wav)} bytes)"
|
|
)
|
|
|
|
# Try transcription
|
|
print("Testing transcription...")
|
|
start_trans = time.time()
|
|
try:
|
|
# Use beam_size=5 like in the ASR processor
|
|
segments, info = model.transcribe(temp_wav, beam_size=5)
|
|
elapsed_trans = time.time() - start_trans
|
|
print(f"✓ Transcription initiated in {elapsed_trans:.2f}s")
|
|
|
|
# Convert generator to list to actually run the transcription
|
|
print("Converting segments to list...")
|
|
segments_list = list(segments)
|
|
elapsed_total = time.time() - start_trans
|
|
print(f"✓ Transcription completed in {elapsed_total:.2f}s")
|
|
print(f" Segments: {len(segments_list)}")
|
|
print(
|
|
f" Language: {info.language}, Probability: {info.language_probability}"
|
|
)
|
|
|
|
for i, segment in enumerate(segments_list[:3]): # Show first 3 segments
|
|
print(
|
|
f" Segment {i}: {segment.start:.2f}s - {segment.end:.2f}s: {segment.text}"
|
|
)
|
|
|
|
except Exception as e:
|
|
print(f"✗ Transcription failed: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
|
|
finally:
|
|
if os.path.exists(temp_wav):
|
|
os.unlink(temp_wav)
|
|
print("✓ Cleaned up temp file")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_faster_whisper()
|