momentry_core/scripts/face_processor.py

#!/opt/homebrew/bin/python3.11
"""
Face Processor - Face Detection & Demographics
Uses InsightFace for detection, age, and gender analysis.
Falls back to OpenCV Haar Cascade if InsightFace fails.
"""

import sys
import json
import argparse
import os

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from redis_publisher import RedisPublisher


def process_face(video_path: str, output_path: str, uuid: str = ""):
    """Process video for face detection and demographics analysis"""

    publisher = RedisPublisher(uuid) if uuid else None
    if publisher:
        publisher.info("face", "FACE_START")

    try:
        import cv2
        import numpy as np
        import insightface
    except ImportError as e:
        error_msg = f"Missing dependency: {e.name}"
        if publisher:
            publisher.error("face", error_msg)
        result = {"frame_count": 0, "fps": 0.0, "frames": []}
        with open(output_path, "w") as f:
            json.dump(result, f, indent=2)
        return result

    # 1. Initialize InsightFace
    use_insightface = False
    app = None
    try:
        if publisher:
            publisher.info("face", "LOADING_INSIGHTFACE")
        # 'buffalo_l' is a robust model. det_size can be adjusted.
        app = insightface.app.FaceAnalysis(
            name="buffalo_l", providers=["CPUExecutionProvider"]
        )
        app.prepare(ctx_id=0, det_size=(320, 320))
        use_insightface = True
        if publisher:
            publisher.info("face", "INSIGHTFACE_LOADED")
    except Exception as e:
        print(f"[WARNING] InsightFace failed to load: {e}")
        use_insightface = False

    # 2. Fallback to Haar Cascade
    face_cascade = None
    if not use_insightface:
        if publisher:
            publisher.info("face", "LOADING_HAAR_CASCADE")
        face_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
        )
        if face_cascade.empty():
            if publisher:
                publisher.error("face", "Could not load Haar Cascade")
            result = {"frame_count": 0, "fps": 0.0, "frames": []}
            with open(output_path, "w") as f:
                json.dump(result, f, indent=2)
            return result
        if publisher:
            publisher.info("face", "HAAR_CASCADE_LOADED")

    if publisher:
        publisher.info("face", "PROCESSING_VIDEO")

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        if publisher:
            publisher.error("face", "Could not open video")
        result = {"frame_count": 0, "fps": 0.0, "frames": []}
        with open(output_path, "w") as f:
            json.dump(result, f, indent=2)
        return result

    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Optimization: Process every N frames to speed up analysis
    # Since we just need attributes for the person identity, we don't need every single frame.
    sample_interval = 30
    if total_frames > 0:
        estimated_samples = total_frames // sample_interval
    else:
        estimated_samples = 0

    frame_count = 0
    processed_count = 0
    frames_data = []

    if publisher:
        publisher.progress("face", 0, estimated_samples, "Starting")

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1

        # Sampling
        if frame_count % sample_interval != 0:
            continue

        processed_count += 1
        timestamp = (frame_count - 1) / fps if fps > 0 else 0

        face_list = []

        try:
            if use_insightface and app:
                # InsightFace Detection & Analysis
                faces = app.get(frame)
                for face in faces:
                    bbox = face.bbox.astype(int)
                    bx, by, bw, bh = (
                        bbox[0],
                        bbox[1],
                        bbox[2] - bbox[0],
                        bbox[3] - bbox[1],
                    )

                    # Extract Attributes
                    age = int(face.age) if hasattr(face, "age") else None
                    gender_val = face.gender if hasattr(face, "gender") else None
                    gender = (
                        "female"
                        if gender_val == 0
                        else ("male" if gender_val == 1 else None)
                    )

                    face_list.append(
                        {
                            "x": int(bx),
                            "y": int(by),
                            "width": int(bw),
                            "height": int(bh),
                            "confidence": float(face.det_score)
                            if hasattr(face, "det_score")
                            else 0.9,
                            "attributes": {"age": age, "gender": gender},
                        }
                    )
            else:
                # Haar Cascade Fallback (No Age/Gender)
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                faces = face_cascade.detectMultiScale(
                    gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
                )
                for x, y, w, h in faces:
                    face_list.append(
                        {
                            "x": int(x),
                            "y": int(y),
                            "width": int(w),
                            "height": int(h),
                            "confidence": 0.8,
                            "attributes": {"age": None, "gender": None},
                        }
                    )
        except Exception as e:
            print(f"[ERROR] Frame processing error: {e}")

        if face_list:
            frames_data.append(
                {
                    "frame": frame_count - 1,
                    "timestamp": round(timestamp, 3),
                    "faces": face_list,
                }
            )

            if publisher:
                publisher.progress(
                    "face",
                    processed_count,
                    estimated_samples,
                    f"Frame {frame_count}",
                )

    cap.release()

    result = {"frame_count": total_frames, "fps": fps, "frames": frames_data}

    if publisher:
        publisher.complete("face", f"{len(frames_data)} frames processed")

    with open(output_path, "w") as f:
        json.dump(result, f, indent=2)

    return result


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Face Detection & Demographics")
    parser.add_argument("video_path", help="Path to video file")
    parser.add_argument("output_path", help="Output JSON path")
    parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
    args = parser.parse_args()

    process_face(args.video_path, args.output_path, args.uuid)