momentry_core/scripts/utils/test_mediapipe.py

#!/opt/homebrew/bin/python3.11
"""
MediaPipe Test Script - Test all MediaPipe modules

Test modules:
1. Face Mesh (468 keypoints)
2. Pose (33 keypoints)
3. Hands (21 keypoints per hand)
4. Holistic (Face + Pose + Hands)
"""

import cv2
import numpy as np
import mediapipe as mp
from pathlib import Path


def test_face_mesh():
    """
    Test MediaPipe Face Mesh (468 keypoints)
    """
    print("=" * 60)
    print("Testing MediaPipe Face Mesh")
    print("=" * 60)

    mp_face_mesh = mp.solutions.face_mesh

    # Create Face Mesh model
    face_mesh = mp_face_mesh.FaceMesh(
        static_image_mode=True,
        max_num_faces=1,
        refine_landmarks=True,  # Enable iris detection
        min_detection_confidence=0.5,
    )

    print("✅ Face Mesh model created")

    # Test on sample image
    test_image_path = "/Users/accusys/momentry_core_0.1/output/quick_preview/frame_220.jpg"

    if Path(test_image_path).exists():
        image = cv2.imread(test_image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        results = face_mesh.process(image_rgb)

        if results.multi_face_landmarks:
            face_landmarks = results.multi_face_landmarks[0]
            num_landmarks = len(face_landmarks.landmark)

            print(f"✅ Face detected: {num_landmarks} landmarks")

            # Key landmark indices
            key_indices = {
                "nose_tip": 1,
                "left_eye_center": 33,
                "right_eye_center": 263,
                "left_iris_center": 468,
                "right_iris_center": 473,
                "mouth_top": 13,
                "mouth_bottom": 14,
                "mouth_left": 61,
                "mouth_right": 291,
            }

            print("\nKey landmarks:")
            for name, idx in key_indices.items():
                if idx < num_landmarks:
                    landmark = face_landmarks.landmark[idx]
                    print(f"  {name} ({idx}): x={landmark.x:.3f}, y={landmark.y:.3f}")

            # Calculate Eye Aspect Ratio (EAR)
            # Left eye
            p1 = face_landmarks.landmark[33]  # Left eye top
            p2 = face_landmarks.landmark[133]  # Left eye bottom
            p3 = face_landmarks.landmark[159]  # Left eye left
            p4 = face_landmarks.landmark[145]  # Left eye right

            vertical_dist = abs(p2.y - p1.y)
            horizontal_dist = abs(p4.x - p3.x)
            ear_left = vertical_dist / horizontal_dist if horizontal_dist > 0 else 0

            print("\nEye Aspect Ratio (EAR):")
            print(f"  Left eye EAR: {ear_left:.3f}")
            print(f"  Interpretation: {'wide_open' if ear_left > 0.35 else 'normal' if ear_left > 0.2 else 'closed'}")

            # Calculate Mouth Aspect Ratio (MAR)
            mouth_top = face_landmarks.landmark[13]
            mouth_bottom = face_landmarks.landmark[14]
            mouth_left = face_landmarks.landmark[61]
            mouth_right = face_landmarks.landmark[291]

            mouth_height = abs(mouth_bottom.y - mouth_top.y)
            mouth_width = abs(mouth_right.x - mouth_left.x)
            mar = mouth_height / mouth_width if mouth_width > 0 else 0

            print("\nMouth Aspect Ratio (MAR):")
            print(f"  MAR: {mar:.3f}")
            print(f"  Interpretation: {'open' if mar > 0.5 else 'closed' if mar < 0.2 else 'slightly_open'}")
        else:
            print("❌ No face detected")

    face_mesh.close()
    print("\n✅ Face Mesh test completed")


def test_pose():
    """
    Test MediaPipe Pose (33 keypoints)
    """
    print("\n" + "=" * 60)
    print("Testing MediaPipe Pose")
    print("=" * 60)

    mp_pose = mp.solutions.pose

    pose = mp_pose.Pose(
        static_image_mode=True,
        model_complexity=2,  # Full model
        enable_segmentation=False,
        min_detection_confidence=0.5,
    )

    print("✅ Pose model created")

    test_image_path = "/Users/accusys/momentry_core_0.1/output/quick_preview/frame_220.jpg"

    if Path(test_image_path).exists():
        image = cv2.imread(test_image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        results = pose.process(image_rgb)

        if results.pose_landmarks:
            landmarks = results.pose_landmarks.landmark
            num_landmarks = len(landmarks)

            print(f"✅ Pose detected: {num_landmarks} keypoints")

            # Key keypoints
            key_indices = {
                "nose": 0,
                "left_shoulder": 11,
                "right_shoulder": 12,
                "left_elbow": 13,
                "right_elbow": 14,
                "left_wrist": 15,
                "right_wrist": 16,
                "left_hip": 23,
                "right_hip": 24,
                "left_knee": 25,
                "right_knee": 26,
                "left_ankle": 27,
                "right_ankle": 28,
            }

            print("\nKey keypoints:")
            for name, idx in key_indices.items():
                landmark = landmarks[idx]
                print(f"  {name} ({idx}): x={landmark.x:.3f}, y={landmark.y:.3f}, visibility={landmark.visibility:.2f}")

            # Calculate elbow angles
            def calculate_angle(p1, p2, p3):
                v1 = np.array([p1.x, p1.y]) - np.array([p2.x, p2.y])
                v2 = np.array([p3.x, p3.y]) - np.array([p2.x, p2.y])
                angle = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
                return np.degrees(angle)

            # Right arm angle
            right_shoulder = landmarks[12]
            right_elbow = landmarks[14]
            right_wrist = landmarks[16]

            right_elbow_angle = calculate_angle(right_shoulder, right_elbow, right_wrist)

            print(f"\nRight elbow angle: {right_elbow_angle:.1f}°")
            print(f"  Interpretation: {'extended' if right_elbow_angle > 150 else 'folded' if right_elbow_angle < 90 else 'neutral'}")

            # Check if arm is raised
            if right_wrist.y < right_elbow.y < right_shoulder.y:
                print("  Action: raise_right (arm raised)")

            # Knee angles
            left_hip = landmarks[23]
            left_knee = landmarks[25]
            left_ankle = landmarks[27]

            left_knee_angle = calculate_angle(left_hip, left_knee, left_ankle)

            print(f"\nLeft knee angle: {left_knee_angle:.1f}°")
            print(f"  Interpretation: {'standing' if left_knee_angle > 160 else 'knee_bend' if left_knee_angle < 120 else 'neutral'}")
        else:
            print("❌ No pose detected")

    pose.close()
    print("\n✅ Pose test completed")


def test_hands():
    """
    Test MediaPipe Hands (21 keypoints per hand)
    """
    print("\n" + "=" * 60)
    print("Testing MediaPipe Hands")
    print("=" * 60)

    mp_hands = mp.solutions.hands

    hands = mp_hands.Hands(
        static_image_mode=True,
        max_num_hands=2,
        min_detection_confidence=0.5,
    )

    print("✅ Hands model created")

    test_image_path = "/Users/accusys/momentry_core_0.1/output/quick_preview/frame_220.jpg"

    if Path(test_image_path).exists():
        image = cv2.imread(test_image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        results = hands.process(image_rgb)

        if results.multi_hand_landmarks:
            for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
                hand_label = results.multi_handedness[idx].classification[0].label

                print(f"\n✅ Hand {idx+1} detected ({hand_label}): 21 keypoints")

                landmarks = hand_landmarks.landmark

                # Key landmarks
                key_indices = {
                    "wrist": 0,
                    "thumb_tip": 4,
                    "index_tip": 8,
                    "middle_tip": 12,
                    "ring_tip": 16,
                    "pinky_tip": 20,
                }

                print("  Key landmarks:")
                for name, i in key_indices.items():
                    lm = landmarks[i]
                    print(f"    {name} ({i}): x={lm.x:.3f}, y={lm.y:.3f}")

                # Detect gesture
                thumb_tip = landmarks[4]
                index_tip = landmarks[8]
                middle_tip = landmarks[12]
                ring_tip = landmarks[16]
                pinky_tip = landmarks[20]
                wrist = landmarks[0]

                # Calculate finger extensions
                def is_finger_extended(tip, base, wrist):
                    return tip.y < base.y  # Extended upward

                thumb_extended = is_finger_extended(landmarks[4], landmarks[2], wrist)
                index_extended = is_finger_extended(landmarks[8], landmarks[5], wrist)
                middle_extended = is_finger_extended(landmarks[12], landmarks[9], wrist)
                ring_extended = is_finger_extended(landmarks[16], landmarks[13], wrist)
                pinky_extended = is_finger_extended(landmarks[20], landmarks[17], wrist)

                extensions = [thumb_extended, index_extended, middle_extended, ring_extended, pinky_extended]

                print(f"\n  Finger extensions: {['thumb', 'index', 'middle', 'ring', 'pinky']}")
                print(f"    {extensions}")

                # Detect gesture
                gesture = "unknown"
                if all(extensions):
                    gesture = "open_hand"
                elif not any(extensions):
                    gesture = "fist"
                elif thumb_extended and not any(extensions[1:]):
                    gesture = "thumbs_up"
                elif index_extended and middle_extended and not any(extensions[2:]):
                    gesture = "peace_sign"
                elif index_extended and not any(extensions[2:]) and not thumb_extended:
                    gesture = "pointing"

                print(f"  Detected gesture: {gesture}")
        else:
            print("❌ No hands detected")

    hands.close()
    print("\n✅ Hands test completed")


def test_holistic():
    """
    Test MediaPipe Holistic (Face + Pose + Hands combined)
    """
    print("\n" + "=" * 60)
    print("Testing MediaPipe Holistic")
    print("=" * 60)

    mp_holistic = mp.solutions.holistic

    holistic = mp_holistic.Holistic(
        static_image_mode=True,
        model_complexity=2,
        enable_segmentation=False,
        refine_face_landmarks=True,
    )

    print("✅ Holistic model created")

    test_image_path = "/Users/accusys/momentry_core_0.1/output/quick_preview/frame_220.jpg"

    if Path(test_image_path).exists():
        image = cv2.imread(test_image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        results = holistic.process(image_rgb)

        detected_count = 0

        if results.face_landmarks:
            num_face = len(results.face_landmarks.landmark)
            print(f"✅ Face: {num_face} landmarks")
            detected_count += 1

        if results.pose_landmarks:
            num_pose = len(results.pose_landmarks.landmark)
            print(f"✅ Pose: {num_pose} keypoints")
            detected_count += 1

        if results.left_hand_landmarks:
            num_left_hand = len(results.left_hand_landmarks.landmark)
            print(f"✅ Left hand: {num_left_hand} keypoints")
            detected_count += 1

        if results.right_hand_landmarks:
            num_right_hand = len(results.right_hand_landmarks.landmark)
            print(f"✅ Right hand: {num_right_hand} keypoints")
            detected_count += 1

        if detected_count == 0:
            print("❌ No landmarks detected")
        else:
            print(f"\nTotal detections: {detected_count} components")

    holistic.close()
    print("\n✅ Holistic test completed")


def main():
    print("=" * 70)
    print("MediaPipe Installation Test")
    print("=" * 70)

    print(f"\nMediaPipe version: {mp.__version__}")
    print()

    # Test all modules
    test_face_mesh()
    test_pose()
    test_hands()
    test_holistic()

    print("\n" + "=" * 70)
    print("✅ All MediaPipe tests completed!")
    print("=" * 70)

    print("\nNext steps:")
    print("  1. Face Mesh: Use for eye/mouth action detection")
    print("  2. Pose: Use for arm/leg/feet action detection")
    print("  3. Hands: Use for hand gesture detection")
    print("  4. Holistic: Use for full-body action detection")


if __name__ == "__main__":
    main()