- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
246 lines
7.3 KiB
Python
Executable File
246 lines
7.3 KiB
Python
Executable File
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
OCR Processor - Text Recognition with Resume Support
|
|
Uses EasyOCR (local model)
|
|
|
|
Resume Feature:
|
|
- Auto-detect existing results and resume from last frame
|
|
- Auto-save at configurable intervals (default: 30 seconds)
|
|
- Ctrl+C gracefully saves and exits
|
|
"""
|
|
|
|
import sys
|
|
import json
|
|
import argparse
|
|
import os
|
|
import signal
|
|
import time
|
|
from datetime import datetime
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
from redis_publisher import RedisPublisher
|
|
from resume_framework import ResumeFramework, format_time, print_progress
|
|
|
|
|
|
def process_ocr(
|
|
video_path: str,
|
|
output_path: str,
|
|
uuid: str = "",
|
|
auto_save_interval: int = 30,
|
|
auto_save_frames: int = 300,
|
|
force_restart: bool = False,
|
|
sample_interval: int = 30,
|
|
):
|
|
"""Process video for OCR using EasyOCR with resume support"""
|
|
|
|
framework = ResumeFramework(
|
|
output_path=output_path,
|
|
processor_name="ocr",
|
|
uuid=uuid,
|
|
auto_save_interval=auto_save_interval,
|
|
auto_save_frames=auto_save_frames,
|
|
force_restart=force_restart,
|
|
)
|
|
|
|
framework.publish_info("OCR_START")
|
|
|
|
try:
|
|
import easyocr
|
|
except ImportError:
|
|
framework.publish_error("easyocr not installed")
|
|
result = {
|
|
"metadata": {"status": "error", "error": "easyocr not installed"},
|
|
"frames": {},
|
|
}
|
|
with open(output_path, "w") as f:
|
|
json.dump(result, f, indent=2)
|
|
framework.publish_progress(0, 0, "0 frames")
|
|
return result
|
|
|
|
framework.publish_info("OCR_LOADING_MODEL")
|
|
|
|
reader = easyocr.Reader(["en"], gpu=False, verbose=False)
|
|
|
|
framework.publish_info("OCR_MODEL_LOADED")
|
|
|
|
import cv2
|
|
|
|
cap = cv2.VideoCapture(video_path)
|
|
|
|
if not cap.isOpened():
|
|
print(f"Error: Cannot open video: {video_path}")
|
|
return {"metadata": {"status": "error"}, "frames": {}}
|
|
|
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
|
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
total_duration = total_frames / fps if fps > 0 else 0
|
|
cap.release()
|
|
|
|
framework.publish_info(f"fps={fps}, frames={total_frames}")
|
|
|
|
existing_data, last_checkpoint = framework.load_existing_data()
|
|
resume_mode = existing_data is not None and last_checkpoint > 0 and not force_restart
|
|
|
|
if resume_mode:
|
|
print(f"\nFound existing data: {output_path}")
|
|
print(f"Last processed frame: {last_checkpoint}")
|
|
print(f"Will resume from frame {last_checkpoint + 1}")
|
|
|
|
if resume_mode and existing_data:
|
|
ocr_data = existing_data
|
|
frame_count = last_checkpoint
|
|
processed_frames = set(int(k) for k in existing_data.get("frames", {}).keys())
|
|
cap = cv2.VideoCapture(video_path)
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
|
|
else:
|
|
ocr_data = {
|
|
"metadata": framework.init_metadata(
|
|
video_path=video_path,
|
|
fps=fps,
|
|
width=width,
|
|
height=height,
|
|
total_frames=total_frames,
|
|
total_duration=total_duration,
|
|
extra={"sample_interval": sample_interval},
|
|
),
|
|
"frames": {},
|
|
}
|
|
frame_count = 0
|
|
processed_frames = set()
|
|
cap = cv2.VideoCapture(video_path)
|
|
|
|
framework.set_data(ocr_data)
|
|
|
|
start_time = time.time()
|
|
framework.last_save_time = start_time
|
|
|
|
print(f"\nProcessing video: {total_frames} frames @ {fps:.2f} fps")
|
|
print(f"Auto-save every {auto_save_interval}s or {auto_save_frames} frames")
|
|
print(f"Resume from frame {frame_count + 1 if resume_mode else 1}")
|
|
print()
|
|
|
|
while True:
|
|
ret, frame = cap.read()
|
|
if not ret:
|
|
break
|
|
|
|
frame_count += 1
|
|
current_time = (frame_count - 1) / fps if fps > 0 else 0
|
|
|
|
if frame_count in processed_frames:
|
|
continue
|
|
|
|
if frame_count % sample_interval != 0:
|
|
continue
|
|
|
|
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
|
|
try:
|
|
detections = reader.readtext(
|
|
frame_rgb, text_threshold=0.5, low_text=0.3, link_threshold=0.3
|
|
)
|
|
except Exception as e:
|
|
framework.publish_error(f"Frame {frame_count}: {e}")
|
|
detections = []
|
|
|
|
texts = []
|
|
for detection in detections:
|
|
det: tuple = tuple(detection)
|
|
bbox = list(det[0])
|
|
text: str = str(det[1])
|
|
confidence: float = float(det[2])
|
|
|
|
x = int(min(float(p[0]) for p in bbox))
|
|
y = int(min(float(p[1]) for p in bbox))
|
|
w = int(max(float(p[0]) for p in bbox) - x)
|
|
h = int(max(float(p[1]) for p in bbox) - y)
|
|
|
|
if text.strip():
|
|
texts.append(
|
|
{
|
|
"text": text,
|
|
"x": x,
|
|
"y": y,
|
|
"width": w,
|
|
"height": h,
|
|
"confidence": confidence,
|
|
}
|
|
)
|
|
|
|
if texts:
|
|
ocr_data["frames"][str(frame_count)] = {
|
|
"frame_number": frame_count,
|
|
"time_seconds": round(current_time, 3),
|
|
"time_formatted": format_time(current_time),
|
|
"texts": texts,
|
|
}
|
|
processed_frames.add(frame_count)
|
|
|
|
if frame_count % 500 == 0:
|
|
elapsed = time.time() - start_time
|
|
print_progress(frame_count, total_frames, elapsed, f"{len(texts)} texts")
|
|
framework.publish_progress(frame_count, total_frames, f"frame {frame_count}")
|
|
|
|
if framework.should_auto_save(frame_count):
|
|
framework.save_progress(frame_count, silent=True)
|
|
|
|
cap.release()
|
|
|
|
total_processed = len(processed_frames)
|
|
|
|
framework.finalize(
|
|
total_processed=total_processed,
|
|
extra_metadata={"sample_interval": sample_interval},
|
|
)
|
|
|
|
print(f"\nOCR completed: {total_processed} frames processed")
|
|
print(f"Frames with text: {len(ocr_data['frames'])}")
|
|
|
|
return ocr_data
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="OCR Text Recognition with Resume Support")
|
|
parser.add_argument("video_path", help="Path to video file")
|
|
parser.add_argument("output_path", help="Output JSON path")
|
|
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
|
parser.add_argument(
|
|
"--auto-save-interval",
|
|
"-a",
|
|
help="Auto-save interval in seconds",
|
|
type=int,
|
|
default=30,
|
|
)
|
|
parser.add_argument(
|
|
"--auto-save-frames",
|
|
"-f",
|
|
help="Auto-save interval in frames",
|
|
type=int,
|
|
default=300,
|
|
)
|
|
parser.add_argument(
|
|
"--force-restart",
|
|
"-r",
|
|
help="Force restart (ignore existing data)",
|
|
action="store_true",
|
|
)
|
|
parser.add_argument(
|
|
"--sample-interval",
|
|
"-s",
|
|
help="Frame sample interval",
|
|
type=int,
|
|
default=30,
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
process_ocr(
|
|
args.video_path,
|
|
args.output_path,
|
|
args.uuid,
|
|
args.auto_save_interval,
|
|
args.auto_save_frames,
|
|
args.force_restart,
|
|
args.sample_interval,
|
|
) |