#!/opt/homebrew/bin/python3.11 """ OCR Processor - Text Recognition Uses EasyOCR (local model) """ import sys import json import argparse import os import signal sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from redis_publisher import RedisPublisher def signal_handler(signum, frame): print(f"OCR: Received signal {signum}, exiting...") sys.exit(1) def process_ocr(video_path: str, output_path: str, uuid: str = ""): """Process video for OCR using EasyOCR""" # Set up signal handlers signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) publisher = RedisPublisher(uuid) if uuid else None if publisher: publisher.info("ocr", "OCR_START") try: import easyocr except ImportError: if publisher: publisher.error("ocr", "easyocr not installed") result = {"frame_count": 0, "fps": 0.0, "frames": []} if publisher: publisher.complete("ocr", "0 frames") with open(output_path, "w") as f: json.dump(result, f, indent=2) return result if publisher: publisher.info("ocr", "OCR_LOADING_MODEL") # Load EasyOCR reader # languages: add more like 'fr', 'de', 'ja', 'ko', etc. # gpu: set to True if GPU available reader = easyocr.Reader(["en"], gpu=False, verbose=False) if publisher: publisher.info("ocr", "OCR_MODEL_LOADED") # Get video info import cv2 cap = cv2.VideoCapture(video_path) fps = cap.get(cv2.CAP_PROP_FPS) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) cap.release() if publisher: publisher.info("ocr", f"fps={fps}, frames={total_frames}") publisher.progress("ocr", 0, total_frames, "Starting") # Process every N frames to speed up sample_interval = 30 # Process every 30 frames frames = [] frame_count = 0 processed = 0 cap = cv2.VideoCapture(video_path) while True: ret, frame = cap.read() if not ret: break frame_count += 1 # Sample frames if frame_count % sample_interval != 0: continue processed += 1 timestamp = (frame_count - 1) / fps if fps > 0 else 0 # Convert BGR to RGB frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Run OCR try: detections = reader.readtext( frame_rgb, text_threshold=0.5, low_text=0.3, link_threshold=0.3 ) except Exception as e: if publisher: publisher.error("ocr", f"Frame {frame_count}: {e}") detections = [] texts = [] for detection in detections: det: tuple = tuple(detection) bbox = list(det[0]) text: str = str(det[1]) confidence: float = float(det[2]) x = int(min(float(p[0]) for p in bbox)) y = int(min(float(p[1]) for p in bbox)) width = int(max(float(p[0]) for p in bbox) - x) height = int(max(float(p[1]) for p in bbox) - y) if text.strip(): texts.append( { "text": text, "x": x, "y": y, "width": width, "height": height, "confidence": confidence, } ) # Only add frames with text if texts: frames.append( { "frame": frame_count - 1, "timestamp": round(timestamp, 3), "texts": texts, } ) if publisher: publisher.progress( "ocr", processed, total_frames // sample_interval, f"Frame {frame_count}", ) cap.release() result = {"frame_count": total_frames, "fps": fps, "frames": frames} with open(output_path, "w") as f: json.dump(result, f, indent=2) if publisher: publisher.complete("ocr", f"{len(frames)} frames with text") return result if __name__ == "__main__": parser = argparse.ArgumentParser(description="OCR Text Recognition") parser.add_argument("video_path", help="Path to video file") parser.add_argument("output_path", help="Output JSON path") parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="") args = parser.parse_args() process_ocr(args.video_path, args.output_path, args.uuid)