feat: add migrations, test scripts, and utility tools

- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
2026-04-30 15:11:53 +08:00
parent 4d75b2e251
commit b54c2def30
192 changed files with 46721 additions and 0 deletions
--- a/test_performance.py
+++ b/test_performance.py
@@ -0,0 +1,290 @@
+#!/opt/homebrew/bin/python3.11
+"""Performance benchmark for ASR processor fix."""
+
+import subprocess
+import tempfile
+import os
+import time
+import sys
+import json
+import statistics
+
+
+def test_small_video():
+    """Test with small video to establish baseline."""
+    test_video = "/Users/accusys/test_video/BigBuckBunny_320x180.mp4"
+    if not os.path.exists(test_video):
+        print(f"Small test video not found: {test_video}")
+        return None
+
+    print(f"Testing small video: {os.path.basename(test_video)}")
+
+    runs = 3
+    times = []
+
+    for run in range(runs):
+        with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
+            output_path = f.name
+
+        try:
+            cmd = [
+                "/opt/homebrew/bin/python3.11",
+                "scripts/asr_processor.py",
+                test_video,
+                output_path,
+                "--chunk-duration",
+                "600",
+            ]
+
+            env = os.environ.copy()
+            env["MOMENTRY_DISABLE_REDIS"] = "1"
+
+            start = time.time()
+            result = subprocess.run(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                env=env,
+            )
+            elapsed = time.time() - start
+
+            if result.returncode == 0:
+                times.append(elapsed)
+                print(
+                    f"  Run {run + 1}: {elapsed:.1f}s, return code: {result.returncode}"
+                )
+            else:
+                print(
+                    f"  Run {run + 1}: FAILED in {elapsed:.1f}s, return code: {result.returncode}"
+                )
+                print(f"    stderr: {result.stderr[-200:]}")
+
+        except Exception as e:
+            print(f"  Run {run + 1}: ERROR {e}")
+        finally:
+            if os.path.exists(output_path):
+                os.unlink(output_path)
+
+    if times:
+        avg = statistics.mean(times)
+        std = statistics.stdev(times) if len(times) > 1 else 0
+        print(f"  Average: {avg:.1f}s ± {std:.1f}s")
+        return avg
+    return None
+
+
+def test_large_video_chunk():
+    """Test a single chunk of large video to measure chunk processing time."""
+    test_video = "/Users/accusys/test_video/1636719d-c31f-78ac-f1dd-8ab0b0b36c66.mov"
+    if not os.path.exists(test_video):
+        print(f"Large test video not found: {test_video}")
+        return None
+
+    print(f"\nTesting single chunk from large video: {os.path.basename(test_video)}")
+
+    # Create a test that processes just the first 10 minutes
+    with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
+        output_path = f.name
+
+    try:
+        cmd = [
+            "/opt/homebrew/bin/python3.11",
+            "scripts/asr_processor.py",
+            test_video,
+            output_path,
+            "--uuid",
+            "test_perf",
+            "--chunk-duration",
+            "600",  # 10 minutes
+            "--max-direct-duration",
+            "300",  # Force chunked mode even for short audio
+        ]
+
+        env = os.environ.copy()
+        env["MOMENTRY_DISABLE_REDIS"] = "1"
+        env["ASR_DEBUG"] = "1"
+
+        start = time.time()
+        result = subprocess.run(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            env=env,
+        )
+        elapsed = time.time() - start
+
+        if result.returncode == 0:
+            # Parse debug output to get chunk processing times
+            stderr_lines = result.stderr.split("\n")
+            chunk_times = []
+            for line in stderr_lines:
+                if "Chunk" in line and "segments in" in line:
+                    # Example: "Chunk 1/12: 159 segments in 27.5s"
+                    try:
+                        parts = line.split(" in ")
+                        if len(parts) == 2:
+                            time_str = parts[1].replace("s", "").strip()
+                            chunk_times.append(float(time_str))
+                    except:
+                        pass
+
+            print(f"  Total time: {elapsed:.1f}s, return code: {result.returncode}")
+            if chunk_times:
+                print(f"  Chunk times: {chunk_times}")
+                print(f"  Average chunk time: {statistics.mean(chunk_times):.1f}s")
+
+            # Check output
+            if os.path.exists(output_path):
+                with open(output_path, "r") as f:
+                    data = json.load(f)
+                segments = data.get("segments", [])
+                print(f"  Total segments: {len(segments)}")
+
+                # Calculate processing rate
+                if segments:
+                    total_audio_duration = 0
+                    for seg in segments:
+                        total_audio_duration = max(
+                            total_audio_duration, seg.get("end", 0)
+                        )
+
+                    if total_audio_duration > 0:
+                        real_time_factor = elapsed / total_audio_duration
+                        print(f"  Audio duration: {total_audio_duration:.1f}s")
+                        print(
+                            f"  Real-time factor: {real_time_factor:.2f}x (lower is better)"
+                        )
+
+            return elapsed
+
+        else:
+            print(f"  FAILED in {elapsed:.1f}s, return code: {result.returncode}")
+            print(f"  Last 10 lines of stderr:")
+            for line in result.stderr.split("\n")[-10:]:
+                if line.strip():
+                    print(f"    {line}")
+
+    except Exception as e:
+        print(f"  ERROR: {e}")
+        import traceback
+
+        traceback.print_exc()
+    finally:
+        if os.path.exists(output_path):
+            os.unlink(output_path)
+
+    return None
+
+
+def test_debug_vs_production():
+    """Compare debug vs production versions."""
+    test_video = "/Users/accusys/test_video/BigBuckBunny_320x180.mp4"
+    if not os.path.exists(test_video):
+        return
+
+    print(f"\nComparing debug vs production versions:")
+
+    versions = [
+        ("production", "scripts/asr_processor.py"),
+        ("debug", "scripts/asr_processor_debug.py"),
+    ]
+
+    results = {}
+
+    for version_name, script_path in versions:
+        print(f"\n  Testing {version_name} version...")
+
+        with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
+            output_path = f.name
+
+        try:
+            cmd = [
+                "/opt/homebrew/bin/python3.11",
+                script_path,
+                test_video,
+                output_path,
+                "--chunk-duration",
+                "600",
+            ]
+
+            env = os.environ.copy()
+            env["MOMENTRY_DISABLE_REDIS"] = "1"
+            if version_name == "debug":
+                env["ASR_DEBUG"] = "1"
+
+            start = time.time()
+            result = subprocess.run(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                env=env,
+            )
+            elapsed = time.time() - start
+
+            if result.returncode == 0:
+                results[version_name] = elapsed
+                print(f"    Time: {elapsed:.1f}s")
+
+                # Count debug lines
+                if version_name == "debug":
+                    debug_lines = [
+                        l for l in result.stderr.split("\n") if "ASR_DEBUG" in l
+                    ]
+                    print(f"    Debug lines: {len(debug_lines)}")
+            else:
+                print(f"    FAILED: {elapsed:.1f}s, return code: {result.returncode}")
+
+        except Exception as e:
+            print(f"    ERROR: {e}")
+        finally:
+            if os.path.exists(output_path):
+                os.unlink(output_path)
+
+    # Compare results
+    if "production" in results and "debug" in results:
+        prod_time = results["production"]
+        debug_time = results["debug"]
+        overhead = ((prod_time - debug_time) / debug_time) * 100
+        print(f"\n  Comparison:")
+        print(f"    Production: {prod_time:.1f}s")
+        print(f"    Debug: {debug_time:.1f}s")
+        print(f"    Overhead: {overhead:.1f}%")
+
+        if overhead <= 5:
+            print(f"    ✅ Within 5% overhead limit")
+        else:
+            print(f"    ⚠️  Exceeds 5% overhead limit")
+
+
+def main():
+    print("ASR Processor Performance Benchmark")
+    print("=" * 60)
+
+    # Test 1: Baseline with small video
+    baseline = test_small_video()
+
+    # Test 2: Large video chunk performance
+    large_chunk_time = test_large_video_chunk()
+
+    # Test 3: Debug vs production comparison
+    test_debug_vs_production()
+
+    print("\n" + "=" * 60)
+    print("Summary:")
+    if baseline:
+        print(f"  Small video baseline: {baseline:.1f}s")
+    if large_chunk_time:
+        print(f"  Large video chunk: {large_chunk_time:.1f}s")
+
+    print("\nNext steps:")
+    print("  1. Complete end-to-end test with full 2.2GB video")
+    print("  2. Verify chunk merging logic works correctly")
+    print("  3. Test with Redis enabled (if needed)")
+    print("  4. Integrate with Rust processor wrapper")
+
+
+if __name__ == "__main__":
+    main()