Files
momentry_core/scripts/face_processor.py
Warren 8f05a7c188 feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors
- Add release pre-flight check script
- Add synonym generation, chunk processing scripts
- Add face recognition, stamp search utilities
2026-04-30 15:07:49 +08:00

300 lines
9.6 KiB
Python
Executable File

#!/opt/homebrew/bin/python3.11
"""
Face Processor - Face Detection & Demographics with Resume Support
Uses InsightFace for detection, age, gender, and embedding extraction.
IMPORTANT: InsightFace is REQUIRED. No Haar fallback.
- InsightFace provides 512-dim ArcFace embedding for identity matching
- Haar Cascade cannot generate embedding, only detection
- If InsightFace fails, processor will ERROR and exit
Resume Feature:
- Auto-detect existing results and resume from last frame
- Auto-save at configurable intervals (default: 30 seconds)
- Ctrl+C gracefully saves and exits
"""
import sys
import json
import argparse
import os
import time
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from redis_publisher import RedisPublisher
from resume_framework import ResumeFramework, format_time, print_progress
from utils.pose_analyzer import calculate_pose_angle_v2
def process_face(
video_path: str,
output_path: str,
uuid: str = "",
auto_save_interval: int = 30,
auto_save_frames: int = 300,
force_restart: bool = False,
sample_interval: int = 30,
):
"""Process video for face detection and demographics analysis with resume support"""
framework = ResumeFramework(
output_path=output_path,
processor_name="face",
uuid=uuid,
auto_save_interval=auto_save_interval,
auto_save_frames=auto_save_frames,
force_restart=force_restart,
)
framework.publish_info("FACE_START")
try:
import cv2
import numpy as np
import insightface
except ImportError as e:
error_msg = f"Missing dependency: {e.name}"
framework.publish_error(error_msg)
result = {
"metadata": {"status": "error", "error": error_msg},
"frames": {},
}
with open(output_path, "w") as f:
json.dump(result, f, indent=2)
return result
app = None
try:
framework.publish_info("LOADING_INSIGHTFACE")
app = insightface.app.FaceAnalysis(
name="buffalo_l", providers=["CPUExecutionProvider"]
)
app.prepare(ctx_id=0, det_size=(320, 320))
framework.publish_info("INSIGHTFACE_LOADED")
except Exception as e:
error_msg = f"InsightFace failed to load (REQUIRED): {e}"
framework.publish_error(error_msg)
result = {
"metadata": {"status": "error", "error": error_msg},
"frames": {},
}
with open(output_path, "w") as f:
json.dump(result, f, indent=2)
return result
framework.publish_info("PROCESSING_VIDEO")
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video: {video_path}")
return {"metadata": {"status": "error"}, "frames": {}}
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
total_duration = total_frames / fps if fps > 0 else 0
cap.release()
framework.publish_info(f"fps={fps}, frames={total_frames}")
existing_data, last_checkpoint = framework.load_existing_data()
resume_mode = existing_data is not None and last_checkpoint > 0 and not force_restart
if resume_mode:
print(f"\nFound existing data: {output_path}")
print(f"Last processed frame: {last_checkpoint}")
print(f"Will resume from frame {last_checkpoint + 1}")
if resume_mode and existing_data:
face_data = existing_data
frame_count = last_checkpoint
processed_frames = set(int(k) for k in existing_data.get("frames", {}).keys())
cap = cv2.VideoCapture(video_path)
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
else:
face_data = {
"metadata": framework.init_metadata(
video_path=video_path,
fps=fps,
width=width,
height=height,
total_frames=total_frames,
total_duration=total_duration,
extra={
"sample_interval": sample_interval,
"detection_method": "insightface",
},
),
"frames": {},
}
frame_count = 0
processed_frames = set()
cap = cv2.VideoCapture(video_path)
framework.set_data(face_data)
start_time = time.time()
framework.last_save_time = start_time
print(f"\nProcessing video: {total_frames} frames @ {fps:.2f} fps")
print(f"Auto-save every {auto_save_interval}s or {auto_save_frames} frames")
print(f"Resume from frame {frame_count + 1 if resume_mode else 1}")
print(f"Detection method: InsightFace (REQUIRED)")
print()
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
current_time = (frame_count - 1) / fps if fps > 0 else 0
if frame_count in processed_frames:
continue
if frame_count % sample_interval != 0:
continue
face_list = []
try:
faces = app.get(frame)
for face in faces:
bbox = face.bbox.astype(int)
bx, by, bw, bh = (
bbox[0],
bbox[1],
bbox[2] - bbox[0],
bbox[3] - bbox[1],
)
age = int(face.age) if hasattr(face, "age") else None
gender_val = face.gender if hasattr(face, "gender") else None
gender = (
"female"
if gender_val == 0
else ("male" if gender_val == 1 else None)
)
embedding = None
if hasattr(face, "embedding"):
embedding = face.embedding.tolist()
landmarks = None
if hasattr(face, "kps"):
landmarks = face.kps.tolist()
elif hasattr(face, "landmark_3d_68"):
landmarks = face.landmark_3d_68.tolist()
pose_angle = None
if landmarks and len(landmarks) >= 5:
try:
pose_result = calculate_pose_angle_v2(landmarks)
pose_angle = {
"angle": pose_result.get("angle", "unknown"),
"confidence": pose_result.get("confidence", 0.0),
"pitch": pose_result.get("pitch", "neutral"),
"features": pose_result.get("features", {}),
}
except Exception as e:
pass
face_list.append(
{
"x": int(bx),
"y": int(by),
"width": int(bw),
"height": int(bh),
"confidence": float(face.det_score)
if hasattr(face, "det_score")
else 0.9,
"embedding": embedding,
"landmarks": landmarks,
"pose_angle": pose_angle,
"attributes": {"age": age, "gender": gender},
}
)
except Exception as e:
print(f"[ERROR] Frame processing error: {e}")
if face_list:
face_data["frames"][str(frame_count)] = {
"frame_number": frame_count,
"time_seconds": round(current_time, 3),
"time_formatted": format_time(current_time),
"faces": face_list,
}
processed_frames.add(frame_count)
if frame_count % 500 == 0:
elapsed = time.time() - start_time
print_progress(frame_count, total_frames, elapsed, f"{len(face_list)} faces")
framework.publish_progress(frame_count, total_frames, f"frame {frame_count}")
if framework.should_auto_save(frame_count):
framework.save_progress(frame_count, silent=True)
cap.release()
total_processed = len(processed_frames)
framework.finalize(
total_processed=total_processed,
extra_metadata={
"sample_interval": sample_interval,
"detection_method": "insightface",
},
)
print(f"\nFace detection completed: {total_processed} frames processed")
print(f"Frames with faces: {len(face_data['frames'])}")
return face_data
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Face Detection & Demographics with Resume Support")
parser.add_argument("video_path", help="Path to video file")
parser.add_argument("output_path", help="Output JSON path")
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
parser.add_argument(
"--auto-save-interval",
"-a",
help="Auto-save interval in seconds",
type=int,
default=30,
)
parser.add_argument(
"--auto-save-frames",
"-f",
help="Auto-save interval in frames",
type=int,
default=300,
)
parser.add_argument(
"--force-restart",
"-r",
help="Force restart (ignore existing data)",
action="store_true",
)
parser.add_argument(
"--sample-interval",
"-s",
help="Frame sample interval",
type=int,
default=30,
)
args = parser.parse_args()
process_face(
args.video_path,
args.output_path,
args.uuid,
args.auto_save_interval,
args.auto_save_frames,
args.force_restart,
args.sample_interval,
)