#!/usr/bin/env python3 """Debug the hang issue in ASR processor.""" import sys import os import subprocess import tempfile import time import signal # Test one of the large files test_video = "../test_video/1636719d-c31f-78ac-f1dd-8ab0b0b36c66.mov" if not os.path.exists(test_video): test_video = "../test_video/Old_Time_Movie_Show_-_Charade_1963.HD.mov" print(f"Testing: {test_video}") print(f"Size: {os.path.getsize(test_video) / (1024**3):.2f} GB") # Create temp output temp_dir = tempfile.mkdtemp(prefix="asr_debug_") output_path = os.path.join(temp_dir, "output.json") # Use debug version cmd = [ "/opt/homebrew/bin/python3.11", "scripts/asr_processor_debug.py", test_video, output_path, "--uuid", "debug_test", ] print(f"Command: {' '.join(cmd)}") print(f"Temp dir: {temp_dir}") # Run with timeout timeout = 600 # 10 minutes for debugging start = time.time() # Use Popen to capture real-time output proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1, # Line buffered ) # Read output in real-time stdout_lines = [] stderr_lines = [] def read_output(): """Read output from process in real-time.""" import select # Set pipes to non-blocking import fcntl import os fcntl.fcntl(proc.stdout, fcntl.F_SETFL, os.O_NONBLOCK) fcntl.fcntl(proc.stderr, fcntl.F_SETFL, os.O_NONBLOCK) while True: # Check if process has finished if proc.poll() is not None: break # Read from stdout try: line = proc.stdout.read() if line: stdout_lines.append(line) sys.stdout.write(f"[STDOUT] {line}") sys.stdout.flush() except: pass # Read from stderr try: line = proc.stderr.read() if line: stderr_lines.append(line) sys.stderr.write(f"[STDERR] {line}") sys.stderr.flush() except: pass time.sleep(0.1) # Start reading output import threading reader_thread = threading.Thread(target=read_output) reader_thread.daemon = True reader_thread.start() # Wait for process to complete or timeout try: proc.wait(timeout=timeout) elapsed = time.time() - start success = proc.returncode == 0 timeout_hit = False except subprocess.TimeoutExpired: elapsed = time.time() - start success = False timeout_hit = True print(f"\nProcess timed out after {timeout}s, killing...") proc.kill() proc.wait() print(f"\nElapsed: {elapsed:.1f}s") print(f"Success: {success}") print(f"Timeout: {timeout_hit}") print(f"Return code: {proc.returncode}") # Check output if os.path.exists(output_path): print(f"Output file exists: {output_path}") import json try: with open(output_path, "r") as f: data = json.load(f) print(f"Segments: {len(data.get('segments', []))}") except Exception as e: print(f"Error reading output: {e}") else: print("Output file does not exist") # Save logs log_file = os.path.join(temp_dir, "debug_log.txt") with open(log_file, "w") as f: f.write("=== STDOUT ===\n") f.write("".join(stdout_lines)) f.write("\n=== STDERR ===\n") f.write("".join(stderr_lines)) print(f"\nLogs saved to: {log_file}") print(f"Temp directory preserved: {temp_dir}")