momentry_core/scripts/asr_benchmark_runner.py

#!/opt/homebrew/bin/python3.11
"""
ASR Benchmark Runner - Automated Testing Script for ASR Processor Comparison

Version: 1.0.0
Purpose: Compare faster-whisper vs OpenAI whisper on CPU/MPS devices

Features:
1. Real-time timestamp recording (ISO 8601, microsecond precision)
2. Video-time frame calculation (start_frame, end_frame)
3. Independent file output for each test scheme
4. Memory monitoring with psutil
5. Log recording for each test
"""

import sys
import json
import os
import time
import subprocess
import argparse
import signal
import platform
import psutil
from datetime import datetime, timezone
from typing import Dict, Any, Optional, List, Tuple
from pathlib import Path
import traceback

SCRIPTS_DIR = Path(__file__).parent
OUTPUT_DIR = SCRIPTS_DIR.parent / "output" / "benchmark"

CONTRACT_VERSION = "1.0"
RUNNER_VERSION = "1.0.0"

SCHEMES = {
    'A': {
        'name': 'faster-whisper small CPU',
        'script': 'asr_processor.py',
        'engine': 'faster-whisper',
        'model': 'small',
        'device': 'cpu',
        'args': [],
        'env': {}
    },
    'B': {
        'name': 'OpenAI whisper small CPU',
        'script': 'asr_processor_contract_v2.py',
        'engine': 'whisper',
        'model': 'small',
        'device': 'cpu',
        'args': ['--model-size', 'small', '--device', 'cpu'],
        'env': {}
    },
    'C': {
        'name': 'OpenAI whisper small MPS',
        'script': 'asr_processor_contract_v2.py',
        'engine': 'whisper',
        'model': 'small',
        'device': 'mps',
        'args': ['--model-size', 'small', '--device', 'mps'],
        'env': {'MOMENTRY_ASR_DEVICE': 'mps'}
    },
    'D': {
        'name': 'OpenAI whisper medium CPU',
        'script': 'asr_processor_contract_v2.py',
        'engine': 'whisper',
        'model': 'medium',
        'device': 'cpu',
        'args': ['--model-size', 'medium', '--device', 'cpu'],
        'env': {}
    },
    'E': {
        'name': 'OpenAI whisper medium MPS',
        'script': 'asr_processor_contract_v2.py',
        'engine': 'whisper',
        'model': 'medium',
        'device': 'mps',
        'args': ['--model-size', 'medium', '--device', 'mps'],
        'env': {'MOMENTRY_ASR_DEVICE': 'mps'}
    }
}

VIDEOS = {
    'charade': {
        'name': 'Charade 1963',
        'path': '/Users/accusys/momentry/var/sftpgo/data/demo/Old_Time_Movie_Show_-_Charade_1963.HD.mov',
        'output_dir': 'charade_1963',
        'features': ['multilingual', 'movie_dialogue', '114_minutes']
    },
    'exasan': {
        'name': 'ExaSAN PCIe',
        'path': '/Users/accusys/momentry/var/sftpgo/data/demo/ExaSAN PCIe series - Director Ou Yu-Zhi Shares His Experience.mp4',
        'output_dir': 'exasan_pcie',
        'features': ['technical_terms', 'professional_accent', '2_minutes']
    }
}


class SignalHandler:
    def __init__(self):
        self.shutdown_requested = False

    def setup(self):
        signal.signal(signal.SIGTERM, self.handle_signal)
        signal.signal(signal.SIGINT, self.handle_signal)

    def handle_signal(self, signum, frame):
        signal_name = "SIGTERM" if signum == signal.SIGTERM else "SIGINT"
        print(f"[RUNNER] Received {signal_name}, stopping...")
        self.shutdown_requested = True


def get_iso_timestamp() -> str:
    return datetime.now(timezone.utc).astimezone().isoformat()


def get_video_metadata(video_path: str) -> Dict[str, Any]:
    cmd = [
        'ffprobe',
        '-v', 'error',
        '-show_entries', 'format=duration,format_name',
        '-show_entries', 'stream=codec_type,codec_name,r_frame_rate,avg_frame_rate,nb_frames',
        '-of', 'json',
        video_path
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        data = json.loads(result.stdout)

        video_stream = None
        for stream in data.get('streams', []):
            if stream.get('codec_type') == 'video':
                video_stream = stream
                break

        if not video_stream:
            raise ValueError("No video stream found")

        fps_str = video_stream.get('r_frame_rate', video_stream.get('avg_frame_rate', '0/1'))
        fps_parts = fps_str.split('/')
        fps = float(fps_parts[0]) / float(fps_parts[1]) if len(fps_parts) == 2 else float(fps_str)

        nb_frames = int(video_stream.get('nb_frames', 0))
        duration = float(data.get('format', {}).get('duration', 0))

        if nb_frames == 0 and fps > 0 and duration > 0:
            nb_frames = int(duration * fps)

        return {
            'path': video_path,
            'duration_seconds': duration,
            'fps': fps,
            'total_frames': nb_frames,
            'codec_type': video_stream.get('codec_type'),
            'codec_name': video_stream.get('codec_name'),
            'r_frame_rate': fps_str,
            'avg_frame_rate': video_stream.get('avg_frame_rate'),
            'nb_frames': nb_frames
        }
    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"ffprobe failed: {e.stderr}")
    except Exception as e:
        raise RuntimeError(f"Failed to get video metadata: {e}")


def time_to_frame(seconds: float, fps: float) -> int:
    return int(round(seconds * fps))


def process_asr_output(asr_data: Dict[str, Any], video_fps: float) -> Dict[str, Any]:
    segments = asr_data.get('segments', [])

    total_frames = 0
    for segment in segments:
        start = segment.get('start', 0.0)
        end = segment.get('end', 0.0)

        segment['start_frame'] = time_to_frame(start, video_fps)
        segment['end_frame'] = time_to_frame(end, video_fps)
        segment['duration_seconds'] = end - start
        segment['duration_frames'] = segment['end_frame'] - segment['start_frame']
        segment['id'] = segments.index(segment)

        total_frames += segment['duration_frames']

    asr_data['segments'] = segments
    asr_data['total_transcribed_frames'] = total_frames
    asr_data['avg_segment_frames'] = total_frames / len(segments) if segments else 0

    return asr_data


class ASRBenchmarkRunner:
    def __init__(self, output_dir: Path = OUTPUT_DIR, verbose: bool = False):
        self.output_dir = output_dir
        self.verbose = verbose
        self.signal_handler = SignalHandler()
        self.signal_handler.setup()
        self.results = []
        self.test_start_time = None
        self.test_end_time = None

    def log(self, message: str):
        if self.verbose:
            timestamp = get_iso_timestamp()
            print(f"[{timestamp}] {message}")

    def run_single_test(self, scheme_id: str, video_key: str) -> Dict[str, Any]:
        scheme = SCHEMES.get(scheme_id)
        video_info = VIDEOS.get(video_key)

        if not scheme or not video_info:
            raise ValueError(f"Invalid scheme_id or video_key: {scheme_id}, {video_key}")

        if self.signal_handler.shutdown_requested:
            raise RuntimeError("Shutdown requested")

        video_dir = self.output_dir / video_info['output_dir']
        video_dir.mkdir(parents=True, exist_ok=True)

        video_metadata = get_video_metadata(video_info['path'])
        video_fps = video_metadata['fps']

        output_filename = f"scheme_{scheme_id}_{scheme['engine']}_{scheme['model']}_{scheme['device']}.json"
        output_path = video_dir / output_filename
        log_path = video_dir / "logs" / f"scheme_{scheme_id}.log"

        test_id = f"{scheme_id}_{video_key}_{int(time.time())}"

        self.log(f"Starting test: {test_id}")
        self.log(f"Scheme: {scheme['name']}")
        self.log(f"Video: {video_info['name']}")
        self.log(f"FPS: {video_fps}, Total frames: {video_metadata['total_frames']}")

        test_start = get_iso_timestamp()
        start_time = time.time()

        script_path = SCRIPTS_DIR / scheme['script']
        cmd = ['/opt/homebrew/bin/python3.11', str(script_path)]
        cmd.extend(scheme['args'])
        cmd.extend([video_info['path'], str(output_path)])

        env = os.environ.copy()
        env.update(scheme['env'])

        process = None
        stdout_data = ""
        stderr_data = ""
        peak_memory_mb = 0
        avg_memory_mb = 0
        memory_samples = []
        cpu_samples = []

        try:
            self.log(f"Running command: {' '.join(cmd)}")

            process = subprocess.Popen(
                cmd,
                env=env,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True
            )

            psutil_process = psutil.Process(process.pid)

            while process.poll() is None:
                if self.signal_handler.shutdown_requested:
                    process.terminate()
                    raise RuntimeError("Shutdown requested")

                try:
                    mem_info = psutil_process.memory_info()
                    cpu_percent = psutil_process.cpu_percent(interval=0.5)

                    memory_mb = mem_info.rss / 1024 / 1024
                    memory_samples.append(memory_mb)
                    cpu_samples.append(cpu_percent)

                    peak_memory_mb = max(peak_memory_mb, memory_mb)
                except (psutil.NoSuchProcess, psutil.AccessDenied):
                    pass

                time.sleep(1)

            stdout_data, stderr_data = process.communicate()

        except Exception as e:
            if process and process.poll() is None:
                process.terminate()
            raise RuntimeError(f"Process execution failed: {e}")

        end_time = time.time()
        test_end = get_iso_timestamp()
        wall_clock_duration = end_time - start_time

        if memory_samples:
            avg_memory_mb = sum(memory_samples) / len(memory_samples)

        avg_cpu_percent = sum(cpu_samples) / len(cpu_samples) if cpu_samples else 0
        peak_cpu_percent = max(cpu_samples) if cpu_samples else 0

        with open(log_path, 'w') as f:
            f.write(f"Test ID: {test_id}\n")
            f.write(f"Scheme: {scheme['name']}\n")
            f.write(f"Video: {video_info['name']}\n")
            f.write(f"Start: {test_start}\n")
            f.write(f"End: {test_end}\n")
            f.write(f"Duration: {wall_clock_duration:.3f}s\n")
            f.write(f"\n=== STDOUT ===\n{stdout_data}\n")
            f.write(f"\n=== STDERR ===\n{stderr_data}\n")

        success = process.returncode == 0

        asr_output = None
        metrics = {}

        if success and output_path.exists():
            try:
                with open(output_path, 'r') as f:
                    asr_output = json.load(f)

                asr_output = process_asr_output(asr_output, video_fps)

                segments = asr_output.get('segments', [])
                total_duration = sum(s.get('duration_seconds', 0) for s in segments)

                metrics = {
                    'processing_time_seconds': wall_clock_duration,
                    'processing_speed_ratio': video_metadata['duration_seconds'] / wall_clock_duration if wall_clock_duration > 0 else 0,
                    'peak_memory_mb': peak_memory_mb,
                    'avg_memory_mb': avg_memory_mb,
                    'segments_count': len(segments),
                    'avg_segment_length_seconds': total_duration / len(segments) if segments else 0,
                    'avg_segment_frames': asr_output.get('avg_segment_frames', 0),
                    'total_transcribed_duration_seconds': total_duration,
                    'total_transcribed_frames': asr_output.get('total_transcribed_frames', 0),
                    'language_detected': asr_output.get('language', 'unknown'),
                    'language_probability': asr_output.get('language_probability', 0),
                    'cpu_avg_percent': avg_cpu_percent,
                    'cpu_peak_percent': peak_cpu_percent
                }

                asr_data_for_output = {
                    'language': asr_output.get('language'),
                    'language_probability': asr_output.get('language_probability'),
                    'segments': asr_output.get('segments', []),
                    'total_transcribed_frames': asr_output.get('total_transcribed_frames'),
                    'avg_segment_frames': asr_output.get('avg_segment_frames')
                }

            except Exception as e:
                self.log(f"Failed to parse ASR output: {e}")
                asr_output = None
                metrics = {
                    'processing_time_seconds': wall_clock_duration,
                    'processing_speed_ratio': 0,
                    'peak_memory_mb': peak_memory_mb,
                    'avg_memory_mb': avg_memory_mb,
                    'error': str(e)
                }
                asr_data_for_output = None

        if 'asr_data_for_output' not in locals():
            asr_data_for_output = None

        result = {
            'file_info': {
                'filename': output_filename,
                'created_at': test_end,
                'test_id': test_id,
                'scheme_id': scheme_id,
                'scheme_name': scheme['name'],
                'video_name': video_info['name']
            },
            'video_metadata': video_metadata,
            'real_time': {
                'test_start': test_start,
                'test_end': test_end,
                'wall_clock_duration_seconds': wall_clock_duration
            },
            'metrics': metrics,
            'asr_output': asr_data_for_output,
            'resource_usage': {
                'cpu_avg_percent': avg_cpu_percent,
                'cpu_peak_percent': peak_cpu_percent,
                'peak_memory_mb': peak_memory_mb,
                'avg_memory_mb': avg_memory_mb
            },
            'output_file_size_bytes': output_path.stat().st_size if output_path.exists() else 0,
            'success': success,
            'error_message': stderr_data if not success else None
        }

        with open(output_path, 'w') as f:
            json.dump(result, f, indent=2, ensure_ascii=False)

        self.log(f"Test completed: {test_id}")
        self.log(f"Duration: {wall_clock_duration:.3f}s, Speed: {metrics.get('processing_speed_ratio', 0):.2f}x")
        self.log(f"Segments: {metrics.get('segments_count', 0)}, Memory peak: {peak_memory_mb:.1f}MB")
        self.log(f"Output: {output_path}")

        return result

    def save_video_metadata_files(self):
        for video_key, video_info in VIDEOS.items():
            video_dir = self.output_dir / video_info['output_dir']
            video_dir.mkdir(parents=True, exist_ok=True)

            metadata_path = video_dir / "video_metadata.json"

            video_metadata = get_video_metadata(video_info['path'])

            metadata = {
                'video_key': video_key,
                'name': video_info['name'],
                'path': video_info['path'],
                'features': video_info['features'],
                'metadata': video_metadata,
                'created_at': get_iso_timestamp()
            }

            with open(metadata_path, 'w') as f:
                json.dump(metadata, f, indent=2, ensure_ascii=False)

            self.log(f"Saved video metadata: {metadata_path}")

    def run_all_tests(self, schemes: List[str] = None, videos: List[str] = None, skip_existing: bool = False) -> List[Dict[str, Any]]:
        if schemes is None:
            schemes = list(SCHEMES.keys())
        if videos is None:
            videos = list(VIDEOS.keys())

        self.test_start_time = get_iso_timestamp()
        self.log(f"Benchmark started: {self.test_start_time}")

        self.save_video_metadata_files()

        self.results = []

        for video_key in videos:
            for scheme_id in schemes:
                if self.signal_handler.shutdown_requested:
                    self.log("Shutdown requested, stopping tests")
                    break

                video_info = VIDEOS.get(video_key)
                scheme = SCHEMES.get(scheme_id)

                video_dir = self.output_dir / video_info['output_dir']
                output_filename = f"scheme_{scheme_id}_{scheme['engine']}_{scheme['model']}_{scheme['device']}.json"
                output_path = video_dir / output_filename

                if skip_existing and output_path.exists():
                    self.log(f"Skipping existing: {output_path}")
                    try:
                        with open(output_path, 'r') as f:
                            result = json.load(f)
                        self.results.append(result)
                    except Exception as e:
                        self.log(f"Failed to load existing result: {e}")
                    continue

                try:
                    result = self.run_single_test(scheme_id, video_key)
                    self.results.append(result)
                except Exception as e:
                    self.log(f"Test failed: {scheme_id}/{video_key} - {e}")
                    self.results.append({
                        'scheme_id': scheme_id,
                        'video_key': video_key,
                        'success': False,
                        'error': str(e),
                        'traceback': traceback.format_exc()
                    })

        self.test_end_time = get_iso_timestamp()
        self.log(f"Benchmark completed: {self.test_end_time}")

        return self.results

    def generate_results_json(self) -> Path:
        results_path = self.output_dir / "asr_benchmark_results.json"

        successful_tests = [r for r in self.results if r.get('success', False)]
        failed_tests = [r for r in self.results if not r.get('success', False)]

        system_info = {
            'os': platform.system(),
            'os_version': platform.version(),
            'python_version': platform.python_version(),
            'cpu': platform.processor(),
            'machine': platform.machine(),
            'memory_total_gb': psutil.virtual_memory().total / (1024**3)
        }

        benchmark_metadata = {
            'benchmark_id': f"asr_comparison_{int(time.time())}",
            'benchmark_start': self.test_start_time,
            'benchmark_end': self.test_end_time,
            'total_tests': len(self.results),
            'successful_tests': len(successful_tests),
            'failed_tests': len(failed_tests),
            'runner_version': RUNNER_VERSION,
            'system_info': system_info
        }

        summary_by_scheme = {}
        for scheme_id in SCHEMES.keys():
            scheme_results = [r for r in successful_tests if r.get('scheme_id') == scheme_id]
            if scheme_results:
                metrics_list = [r.get('metrics', {}) for r in scheme_results]
                summary_by_scheme[scheme_id] = {
                    'avg_processing_time_seconds': sum(m.get('processing_time_seconds', 0) for m in metrics_list) / len(metrics_list),
                    'avg_speed_ratio': sum(m.get('processing_speed_ratio', 0) for m in metrics_list) / len(metrics_list),
                    'avg_memory_mb': sum(m.get('peak_memory_mb', 0) for m in metrics_list) / len(metrics_list),
                    'avg_segments_count': sum(m.get('segments_count', 0) for m in metrics_list) / len(metrics_list)
                }

        summary_by_video = {}
        for video_key in VIDEOS.keys():
            video_results = [r for r in successful_tests if r.get('video_key') == video_key or r.get('file_info', {}).get('video_name') == VIDEOS[video_key]['name']]
            if video_results:
                metrics_list = [r.get('metrics', {}) for r in video_results]
                summary_by_video[video_key] = {
                    'avg_processing_time_seconds': sum(m.get('processing_time_seconds', 0) for m in metrics_list) / len(metrics_list),
                    'avg_speed_ratio': sum(m.get('processing_speed_ratio', 0) for m in metrics_list) / len(metrics_list),
                    'avg_memory_mb': sum(m.get('peak_memory_mb', 0) for m in metrics_list) / len(metrics_list)
                }

        results_data = {
            'benchmark_metadata': benchmark_metadata,
            'test_results': self.results,
            'summary_statistics': {
                'by_scheme': summary_by_scheme,
                'by_video': summary_by_video
            },
            'created_at': get_iso_timestamp()
        }

        with open(results_path, 'w') as f:
            json.dump(results_data, f, indent=2, ensure_ascii=False)

        self.log(f"Saved results JSON: {results_path}")
        return results_path

    def generate_markdown_report(self) -> Path:
        report_path = self.output_dir / "asr_benchmark_report.md"

        successful_tests = [r for r in self.results if r.get('success', False)]

        lines = []
        lines.append("# ASR Benchmark Automated Report")
        lines.append("")
        lines.append(f"**Generated**: {get_iso_timestamp()}")
        lines.append(f"**Total Tests**: {len(self.results)}")
        lines.append(f"**Successful**: {len(successful_tests)}")
        lines.append(f"**Failed**: {len(self.results) - len(successful_tests)}")
        lines.append("")
        lines.append("---")
        lines.append("")
        lines.append("## Test Results Summary")
        lines.append("")

        lines.append("### By Scheme")
        lines.append("")
        lines.append("| Scheme | Engine | Model | Device | Avg Time (s) | Avg Speed | Avg Memory (MB) | Avg Segments |")
        lines.append("|--------|--------|-------|--------|--------------|-----------|-----------------|---------------|")

        summary = {}
        for r in successful_tests:
            scheme_id = r.get('scheme_id', 'unknown')
            metrics = r.get('metrics', {})
            if scheme_id not in summary:
                summary[scheme_id] = {'times': [], 'speeds': [], 'memories': [], 'segments': []}
            summary[scheme_id]['times'].append(metrics.get('processing_time_seconds', 0))
            summary[scheme_id]['speeds'].append(metrics.get('processing_speed_ratio', 0))
            summary[scheme_id]['memories'].append(metrics.get('peak_memory_mb', 0))
            summary[scheme_id]['segments'].append(metrics.get('segments_count', 0))

        for scheme_id in sorted(summary.keys()):
            s = summary[scheme_id]
            scheme = SCHEMES.get(scheme_id, {})
            avg_time = sum(s['times']) / len(s['times'])
            avg_speed = sum(s['speeds']) / len(s['speeds'])
            avg_mem = sum(s['memories']) / len(s['memories'])
            avg_seg = sum(s['segments']) / len(s['segments'])

            lines.append(f"| {scheme_id} | {scheme.get('engine', 'N/A')} | {scheme.get('model', 'N/A')} | {scheme.get('device', 'N/A')} | {avg_time:.1f} | {avg_speed:.2f}x | {avg_mem:.1f} | {avg_seg:.0f} |")

        lines.append("")
        lines.append("### Detailed Results")
        lines.append("")

        for result in self.results:
            scheme_id = result.get('scheme_id', 'unknown')
            video_name = result.get('file_info', {}).get('video_name', result.get('video_key', 'unknown'))
            success = result.get('success', False)

            lines.append(f"#### {scheme_id} - {video_name}")
            lines.append("")

            if success:
                metrics = result.get('metrics', {})
                real_time = result.get('real_time', {})

                lines.append(f"- **Status**: Success")
                lines.append(f"- **Start**: {real_time.get('test_start', 'N/A')}")
                lines.append(f"- **End**: {real_time.get('test_end', 'N/A')}")
                lines.append(f"- **Duration**: {metrics.get('processing_time_seconds', 0):.3f}s")
                lines.append(f"- **Speed**: {metrics.get('processing_speed_ratio', 0):.2f}x")
                lines.append(f"- **Segments**: {metrics.get('segments_count', 0)}")
                lines.append(f"- **Memory Peak**: {metrics.get('peak_memory_mb', 0):.1f}MB")
                lines.append(f"- **Language**: {metrics.get('language_detected', 'N/A')} ({metrics.get('language_probability', 0):.2f})")
            else:
                lines.append(f"- **Status**: Failed")
                lines.append(f"- **Error**: {result.get('error', 'Unknown error')}")

            lines.append("")

        lines.append("---")
        lines.append("")
        lines.append("## Output Files")
        lines.append("")
        lines.append("All test outputs are saved in:")
        lines.append(f"- `{self.output_dir}/`")
        lines.append("")

        for video_key in VIDEOS.keys():
            video_dir = self.output_dir / VIDEOS[video_key]['output_dir']
            lines.append(f"### {VIDEOS[video_key]['name']}")
            lines.append(f"- `{video_dir}/`")
            for scheme_id in SCHEMES.keys():
                scheme = SCHEMES[scheme_id]
                filename = f"scheme_{scheme_id}_{scheme['engine']}_{scheme['model']}_{scheme['device']}.json"
                lines.append(f"  - `{filename}`")
            lines.append("")

        with open(report_path, 'w') as f:
            f.write('\n'.join(lines))

        self.log(f"Saved markdown report: {report_path}")
        return report_path


def main():
    parser = argparse.ArgumentParser(description='ASR Benchmark Runner')
    parser.add_argument('--output-dir', type=str, default=str(OUTPUT_DIR), help='Output directory')
    parser.add_argument('--schemes', type=str, default='A,B,C,D,E', help='Schemes to test (comma-separated)')
    parser.add_argument('--videos', type=str, default='charade,exasan', help='Videos to test (comma-separated)')
    parser.add_argument('--skip-existing', action='store_true', help='Skip existing output files')
    parser.add_argument('--verbose', action='store_true', help='Verbose output')
    parser.add_argument('--single', type=str, help='Run single test: scheme_id,video_key (e.g., A,charade)')

    args = parser.parse_args()

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    runner = ASRBenchmarkRunner(output_dir=output_dir, verbose=args.verbose)

    try:
        if args.single:
            parts = args.single.split(',')
            if len(parts) != 2:
                print("Error: --single format should be scheme_id,video_key")
                sys.exit(1)

            scheme_id, video_key = parts
            result = runner.run_single_test(scheme_id, video_key)
            print(json.dumps(result, indent=2, ensure_ascii=False))
        else:
            schemes = [s.strip() for s in args.schemes.split(',') if s.strip()]
            videos = [v.strip() for v in args.videos.split(',') if v.strip()]

            runner.run_all_tests(schemes=schemes, videos=videos, skip_existing=args.skip_existing)

            runner.generate_results_json()
            runner.generate_markdown_report()

            print(f"\nBenchmark completed!")
            print(f"Results: {output_dir / 'asr_benchmark_results.json'}")
            print(f"Report: {output_dir / 'asr_benchmark_report.md'}")

    except KeyboardInterrupt:
        print("\nInterrupted by user")
        sys.exit(130)
    except Exception as e:
        print(f"Error: {e}")
        traceback.print_exc()
        sys.exit(1)


if __name__ == '__main__':
    main()