- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
170 lines
7.1 KiB
Python
170 lines
7.1 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
Find "Kids" in pose data based on Head-to-Body Ratio.
|
|
Heuristic: Kids have a larger head relative to their body height (approx 1:5 or 1:6) compared to adults (approx 1:7.5).
|
|
"""
|
|
|
|
import json
|
|
import math
|
|
import sys
|
|
|
|
# Configuration
|
|
POSE_JSON_PATH = "output/384b0ff44aaaa1f1/384b0ff44aaaa1f1.pose.json"
|
|
# Heuristic Threshold: Kids typically have a body length < 6.0 * head_width
|
|
# Adults are usually > 6.5.
|
|
# We look for Ratio < 5.5 to be safe (smaller is "more kid-like" relative to head size)
|
|
BODY_TO_HEAD_RATIO_THRESHOLD = 5.8
|
|
|
|
def distance(p1, p2):
|
|
return math.sqrt((p1['x'] - p2['x'])**2 + (p1['y'] - p2['y'])**2)
|
|
|
|
def get_midpoint(p1, p2):
|
|
return {'x': (p1['x'] + p2['x'])/2, 'y': (p1['y'] + p2['y'])/2}
|
|
|
|
def find_kids():
|
|
try:
|
|
with open(POSE_JSON_PATH, 'r') as f:
|
|
data = json.load(f)
|
|
except Exception as e:
|
|
print(f"Error loading JSON: {e}")
|
|
return
|
|
|
|
frames = data.get("frames", {})
|
|
potential_kids = []
|
|
|
|
# Counters for debugging
|
|
total_poses = 0
|
|
analyzed_poses = 0
|
|
|
|
for frame_idx_str, frame_data in frames.items():
|
|
# Structure: frames -> { "frame_index": { "timestamp": ..., "poses": [...] } }
|
|
# Or maybe just "poses" list directly?
|
|
# Checking structure: result["frames"][str(idx)] = { "timestamp": ..., "poses": frame_poses }
|
|
# Wait, in the processor code:
|
|
# result["frames"][str(idx)] = { "timestamp": idx / fps ..., "poses": frame_poses }
|
|
# But the loop iterates over `frames.items()`.
|
|
|
|
# Actually, looking at the JSON structure saved:
|
|
# It saves the whole result dict.
|
|
# result = { ... "frames": { "0": { ... }, "10": { ... } } }
|
|
# So `frame_data` is { "timestamp": ..., "poses": [...] }
|
|
|
|
timestamp = frame_data.get("timestamp", 0)
|
|
|
|
# "poses" in this JSON is the list of person detections
|
|
# Each detection has "keypoints" list
|
|
# But wait, looking at the processor code:
|
|
# frame_poses.append({"keypoints": person_keypoints, "person_id": person_idx})
|
|
# The saved JSON structure in process_video_pose is:
|
|
# result["frames"][str(idx)] = { "timestamp": ..., "poses": frame_poses }
|
|
|
|
# Let's check the actual JSON structure of the file generated.
|
|
# It is likely: frames -> { "frame_index": { "timestamp": ..., "poses": [...] } }
|
|
|
|
people_in_frame = frame_data.get("poses", [])
|
|
|
|
for person in people_in_frame:
|
|
total_poses += 1
|
|
kps_list = person.get("keypoints", [])
|
|
|
|
# Map keypoints by name for easier access
|
|
kp_dict = {kp['name']: kp for kp in kps_list}
|
|
|
|
# We need visible keypoints
|
|
nose = kp_dict.get('nose')
|
|
l_shoulder = kp_dict.get('left_shoulder')
|
|
r_shoulder = kp_dict.get('right_shoulder')
|
|
l_hip = kp_dict.get('left_hip')
|
|
r_hip = kp_dict.get('right_hip')
|
|
l_ankle = kp_dict.get('left_ankle')
|
|
r_ankle = kp_dict.get('right_ankle')
|
|
|
|
# Check visibility
|
|
if not nose or not (l_shoulder or r_shoulder):
|
|
continue
|
|
|
|
analyzed_poses += 1
|
|
|
|
# Estimate Head Size
|
|
# Distance Nose -> Mid-Shoulders is approx half head height.
|
|
if l_shoulder and r_shoulder:
|
|
mid_shoulder = get_midpoint(l_shoulder, r_shoulder)
|
|
elif l_shoulder:
|
|
mid_shoulder = l_shoulder
|
|
else:
|
|
mid_shoulder = r_shoulder
|
|
|
|
if not mid_shoulder:
|
|
continue
|
|
|
|
# Head Height approx = 2 * distance(Nose, Mid_Shoulder)
|
|
# Why 2? Nose is roughly in the middle of the face vertically (eyes/nose/mouth).
|
|
# Distance from nose to shoulder top is roughly "Neck + Half Head".
|
|
# A rough proxy for Head Height is 1/2 shoulder width? No.
|
|
# Let's use: Head_Height ~ 1.0 * distance(Nose, Shoulder) is risky.
|
|
# Let's assume Head_Height is roughly constant relative to shoulder width.
|
|
|
|
# Better metric: Body Length / Shoulder Width?
|
|
# No, shoulder width varies with build.
|
|
|
|
# Let's go back to: Total Visible Height / Estimated Head Height.
|
|
# Head Height Estimate = Distance(Nose, Mid_Shoulder) * 2.5 (Rough guess for full head).
|
|
# Actually, let's use: Head_Height = Distance(Left Ear, Right Ear) if visible? No, usually not reliable.
|
|
# Let's use: Head_Height = Distance(Nose, Mid_Shoulder) * 1.8 (Empirical factor).
|
|
head_height_est = distance(nose, mid_shoulder) * 1.8
|
|
|
|
if head_height_est < 10: # Too small/noisy
|
|
continue
|
|
|
|
# Body Height: Distance from Nose to lowest visible point (Hip or Ankle)
|
|
# We want to estimate Total Height.
|
|
# If Ankles visible:
|
|
if l_ankle and r_ankle:
|
|
mid_ankle = get_midpoint(l_ankle, r_ankle)
|
|
# Height from Top of Head to Ankle
|
|
# Nose is inside head. Distance(Nose, Ankle) + Top_of_Head_offset.
|
|
# Let's just use Distance(Nose, Ankle) as the "Body Length below nose".
|
|
# Total Height ≈ Dist(Nose, Ankle) + Head_Height/2.
|
|
dist_nose_ankle = distance(nose, mid_ankle)
|
|
total_height = dist_nose_ankle + (head_height_est / 2)
|
|
|
|
# Check for valid height (avoid division by zero or weird angles)
|
|
if total_height > head_height_est:
|
|
ratio = total_height / head_height_est
|
|
|
|
# Heuristic:
|
|
# Adults: ~7.0 - 8.0
|
|
# Kids: ~4.5 - 6.0
|
|
# We look for < 6.5
|
|
if ratio < BODY_TO_HEAD_RATIO_THRESHOLD:
|
|
potential_kids.append({
|
|
"frame": frame_idx_str,
|
|
"timestamp": timestamp,
|
|
"ratio": round(ratio, 2),
|
|
"person_id": person.get("person_id", "?")
|
|
})
|
|
else:
|
|
# If legs not visible (sitting/crouching), harder to judge ratio.
|
|
# We could use Shoulder-to-Hip vs Head, but let's stick to full body for safety.
|
|
pass
|
|
|
|
print(f"Analyzed {analyzed_poses} poses out of {total_poses} total detections.")
|
|
print(f"Found {len(potential_kids)} potential 'kids' (Ratio < {BODY_TO_HEAD_RATIO_THRESHOLD}).")
|
|
|
|
# Group by timestamp to avoid duplicates (same person in consecutive frames)
|
|
unique_kids = {}
|
|
for k in potential_kids:
|
|
ts = round(k['timestamp'], 1) # Round to 0.1s
|
|
if ts not in unique_kids:
|
|
unique_kids[ts] = k
|
|
|
|
# Sort by timestamp
|
|
sorted_kids = sorted(unique_kids.values(), key=lambda x: x['timestamp'])
|
|
|
|
print(f"\nUnique potential kid detections (timestamps):")
|
|
for k in sorted_kids:
|
|
print(f" -> Timestamp: {k['timestamp']:.2f}s | Ratio: {k['ratio']}")
|
|
|
|
if __name__ == "__main__":
|
|
find_kids()
|