momentry_core/scripts/find_kids_pose.py

#!/opt/homebrew/bin/python3.11
"""
Find "Kids" in pose data based on Head-to-Body Ratio.
Heuristic: Kids have a larger head relative to their body height (approx 1:5 or 1:6) compared to adults (approx 1:7.5).
"""

import json
import math
import sys

# Configuration
POSE_JSON_PATH = "output/384b0ff44aaaa1f1/384b0ff44aaaa1f1.pose.json"
# Heuristic Threshold: Kids typically have a body length < 6.0 * head_width
# Adults are usually > 6.5.
# We look for Ratio < 5.5 to be safe (smaller is "more kid-like" relative to head size)
BODY_TO_HEAD_RATIO_THRESHOLD = 5.8

def distance(p1, p2):
    return math.sqrt((p1['x'] - p2['x'])**2 + (p1['y'] - p2['y'])**2)

def get_midpoint(p1, p2):
    return {'x': (p1['x'] + p2['x'])/2, 'y': (p1['y'] + p2['y'])/2}

def find_kids():
    try:
        with open(POSE_JSON_PATH, 'r') as f:
            data = json.load(f)
    except Exception as e:
        print(f"Error loading JSON: {e}")
        return

    frames = data.get("frames", {})
    potential_kids = []

    # Counters for debugging
    total_poses = 0
    analyzed_poses = 0

    for frame_idx_str, frame_data in frames.items():
        # Structure: frames -> { "frame_index": { "timestamp": ..., "poses": [...] } }
        # Or maybe just "poses" list directly?
        # Checking structure: result["frames"][str(idx)] = { "timestamp": ..., "poses": frame_poses }
        # Wait, in the processor code:
        # result["frames"][str(idx)] = { "timestamp": idx / fps ..., "poses": frame_poses }
        # But the loop iterates over `frames.items()`.

        # Actually, looking at the JSON structure saved:
        # It saves the whole result dict.
        # result = { ... "frames": { "0": { ... }, "10": { ... } } }
        # So `frame_data` is { "timestamp": ..., "poses": [...] }

        timestamp = frame_data.get("timestamp", 0)

        # "poses" in this JSON is the list of person detections
        # Each detection has "keypoints" list
        # But wait, looking at the processor code:
        # frame_poses.append({"keypoints": person_keypoints, "person_id": person_idx})
        # The saved JSON structure in process_video_pose is:
        # result["frames"][str(idx)] = { "timestamp": ..., "poses": frame_poses }

        # Let's check the actual JSON structure of the file generated.
        # It is likely: frames -> { "frame_index": { "timestamp": ..., "poses": [...] } }

        people_in_frame = frame_data.get("poses", [])

        for person in people_in_frame:
            total_poses += 1
            kps_list = person.get("keypoints", [])

            # Map keypoints by name for easier access
            kp_dict = {kp['name']: kp for kp in kps_list}

            # We need visible keypoints
            nose = kp_dict.get('nose')
            l_shoulder = kp_dict.get('left_shoulder')
            r_shoulder = kp_dict.get('right_shoulder')
            l_hip = kp_dict.get('left_hip')
            r_hip = kp_dict.get('right_hip')
            l_ankle = kp_dict.get('left_ankle')
            r_ankle = kp_dict.get('right_ankle')

            # Check visibility
            if not nose or not (l_shoulder or r_shoulder):
                continue

            analyzed_poses += 1

            # Estimate Head Size
            # Distance Nose -> Mid-Shoulders is approx half head height.
            if l_shoulder and r_shoulder:
                mid_shoulder = get_midpoint(l_shoulder, r_shoulder)
            elif l_shoulder:
                mid_shoulder = l_shoulder
            else:
                mid_shoulder = r_shoulder

            if not mid_shoulder:
                continue

            # Head Height approx = 2 * distance(Nose, Mid_Shoulder)
            # Why 2? Nose is roughly in the middle of the face vertically (eyes/nose/mouth).
            # Distance from nose to shoulder top is roughly "Neck + Half Head".
            # A rough proxy for Head Height is 1/2 shoulder width? No.
            # Let's use: Head_Height ~ 1.0 * distance(Nose, Shoulder) is risky.
            # Let's assume Head_Height is roughly constant relative to shoulder width.

            # Better metric: Body Length / Shoulder Width?
            # No, shoulder width varies with build.

            # Let's go back to: Total Visible Height / Estimated Head Height.
            # Head Height Estimate = Distance(Nose, Mid_Shoulder) * 2.5 (Rough guess for full head).
            # Actually, let's use: Head_Height = Distance(Left Ear, Right Ear) if visible? No, usually not reliable.
            # Let's use: Head_Height = Distance(Nose, Mid_Shoulder) * 1.8 (Empirical factor).
            head_height_est = distance(nose, mid_shoulder) * 1.8

            if head_height_est < 10: # Too small/noisy
                continue

            # Body Height: Distance from Nose to lowest visible point (Hip or Ankle)
            # We want to estimate Total Height.
            # If Ankles visible:
            if l_ankle and r_ankle:
                mid_ankle = get_midpoint(l_ankle, r_ankle)
                # Height from Top of Head to Ankle
                # Nose is inside head. Distance(Nose, Ankle) + Top_of_Head_offset.
                # Let's just use Distance(Nose, Ankle) as the "Body Length below nose".
                # Total Height ≈ Dist(Nose, Ankle) + Head_Height/2.
                dist_nose_ankle = distance(nose, mid_ankle)
                total_height = dist_nose_ankle + (head_height_est / 2)

                # Check for valid height (avoid division by zero or weird angles)
                if total_height > head_height_est:
                    ratio = total_height / head_height_est

                    # Heuristic:
                    # Adults: ~7.0 - 8.0
                    # Kids: ~4.5 - 6.0
                    # We look for < 6.5
                    if ratio < BODY_TO_HEAD_RATIO_THRESHOLD:
                        potential_kids.append({
                            "frame": frame_idx_str,
                            "timestamp": timestamp,
                            "ratio": round(ratio, 2),
                            "person_id": person.get("person_id", "?")
                        })
            else:
                # If legs not visible (sitting/crouching), harder to judge ratio.
                # We could use Shoulder-to-Hip vs Head, but let's stick to full body for safety.
                pass

    print(f"Analyzed {analyzed_poses} poses out of {total_poses} total detections.")
    print(f"Found {len(potential_kids)} potential 'kids' (Ratio < {BODY_TO_HEAD_RATIO_THRESHOLD}).")

    # Group by timestamp to avoid duplicates (same person in consecutive frames)
    unique_kids = {}
    for k in potential_kids:
        ts = round(k['timestamp'], 1) # Round to 0.1s
        if ts not in unique_kids:
            unique_kids[ts] = k

    # Sort by timestamp
    sorted_kids = sorted(unique_kids.values(), key=lambda x: x['timestamp'])

    print(f"\nUnique potential kid detections (timestamps):")
    for k in sorted_kids:
        print(f"  -> Timestamp: {k['timestamp']:.2f}s | Ratio: {k['ratio']}")

if __name__ == "__main__":
    find_kids()