Files
momentry_core/scripts/utils/test_mediapipe.py
Warren e75c4d6f07 cleanup: remove dead code and duplicate docs
- Remove session-ses_2f27.md (161KB raw session log)
- Remove 49 ROOT_* duplicate files across REFERENCE/
- Remove 14 duplicate files between REFERENCE/ root and history/
- Remove asr_legacy.rs (dead code, replaced by asr.rs)
- Remove src/core/worker/ (duplicate JobWorker)
- Remove src/core/layers/ (empty directory)
- Remove 4 .bak files in src/
- Remove 7 dead private methods in worker/processor.rs
- Remove backup directory from git tracking
2026-05-04 01:31:21 +08:00

376 lines
13 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
MediaPipe Test Script - Test all MediaPipe modules
Test modules:
1. Face Mesh (468 keypoints)
2. Pose (33 keypoints)
3. Hands (21 keypoints per hand)
4. Holistic (Face + Pose + Hands)
"""
import cv2
import numpy as np
import mediapipe as mp
from pathlib import Path
def test_face_mesh():
"""
Test MediaPipe Face Mesh (468 keypoints)
"""
print("=" * 60)
print("Testing MediaPipe Face Mesh")
print("=" * 60)
mp_face_mesh = mp.solutions.face_mesh
# Create Face Mesh model
face_mesh = mp_face_mesh.FaceMesh(
static_image_mode=True,
max_num_faces=1,
refine_landmarks=True, # Enable iris detection
min_detection_confidence=0.5,
)
print("✅ Face Mesh model created")
# Test on sample image
test_image_path = "/Users/accusys/momentry_core_0.1/output/quick_preview/frame_220.jpg"
if Path(test_image_path).exists():
image = cv2.imread(test_image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = face_mesh.process(image_rgb)
if results.multi_face_landmarks:
face_landmarks = results.multi_face_landmarks[0]
num_landmarks = len(face_landmarks.landmark)
print(f"✅ Face detected: {num_landmarks} landmarks")
# Key landmark indices
key_indices = {
"nose_tip": 1,
"left_eye_center": 33,
"right_eye_center": 263,
"left_iris_center": 468,
"right_iris_center": 473,
"mouth_top": 13,
"mouth_bottom": 14,
"mouth_left": 61,
"mouth_right": 291,
}
print("\nKey landmarks:")
for name, idx in key_indices.items():
if idx < num_landmarks:
landmark = face_landmarks.landmark[idx]
print(f" {name} ({idx}): x={landmark.x:.3f}, y={landmark.y:.3f}")
# Calculate Eye Aspect Ratio (EAR)
# Left eye
p1 = face_landmarks.landmark[33] # Left eye top
p2 = face_landmarks.landmark[133] # Left eye bottom
p3 = face_landmarks.landmark[159] # Left eye left
p4 = face_landmarks.landmark[145] # Left eye right
vertical_dist = abs(p2.y - p1.y)
horizontal_dist = abs(p4.x - p3.x)
ear_left = vertical_dist / horizontal_dist if horizontal_dist > 0 else 0
print("\nEye Aspect Ratio (EAR):")
print(f" Left eye EAR: {ear_left:.3f}")
print(f" Interpretation: {'wide_open' if ear_left > 0.35 else 'normal' if ear_left > 0.2 else 'closed'}")
# Calculate Mouth Aspect Ratio (MAR)
mouth_top = face_landmarks.landmark[13]
mouth_bottom = face_landmarks.landmark[14]
mouth_left = face_landmarks.landmark[61]
mouth_right = face_landmarks.landmark[291]
mouth_height = abs(mouth_bottom.y - mouth_top.y)
mouth_width = abs(mouth_right.x - mouth_left.x)
mar = mouth_height / mouth_width if mouth_width > 0 else 0
print("\nMouth Aspect Ratio (MAR):")
print(f" MAR: {mar:.3f}")
print(f" Interpretation: {'open' if mar > 0.5 else 'closed' if mar < 0.2 else 'slightly_open'}")
else:
print("❌ No face detected")
face_mesh.close()
print("\n✅ Face Mesh test completed")
def test_pose():
"""
Test MediaPipe Pose (33 keypoints)
"""
print("\n" + "=" * 60)
print("Testing MediaPipe Pose")
print("=" * 60)
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(
static_image_mode=True,
model_complexity=2, # Full model
enable_segmentation=False,
min_detection_confidence=0.5,
)
print("✅ Pose model created")
test_image_path = "/Users/accusys/momentry_core_0.1/output/quick_preview/frame_220.jpg"
if Path(test_image_path).exists():
image = cv2.imread(test_image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = pose.process(image_rgb)
if results.pose_landmarks:
landmarks = results.pose_landmarks.landmark
num_landmarks = len(landmarks)
print(f"✅ Pose detected: {num_landmarks} keypoints")
# Key keypoints
key_indices = {
"nose": 0,
"left_shoulder": 11,
"right_shoulder": 12,
"left_elbow": 13,
"right_elbow": 14,
"left_wrist": 15,
"right_wrist": 16,
"left_hip": 23,
"right_hip": 24,
"left_knee": 25,
"right_knee": 26,
"left_ankle": 27,
"right_ankle": 28,
}
print("\nKey keypoints:")
for name, idx in key_indices.items():
landmark = landmarks[idx]
print(f" {name} ({idx}): x={landmark.x:.3f}, y={landmark.y:.3f}, visibility={landmark.visibility:.2f}")
# Calculate elbow angles
def calculate_angle(p1, p2, p3):
v1 = np.array([p1.x, p1.y]) - np.array([p2.x, p2.y])
v2 = np.array([p3.x, p3.y]) - np.array([p2.x, p2.y])
angle = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
return np.degrees(angle)
# Right arm angle
right_shoulder = landmarks[12]
right_elbow = landmarks[14]
right_wrist = landmarks[16]
right_elbow_angle = calculate_angle(right_shoulder, right_elbow, right_wrist)
print(f"\nRight elbow angle: {right_elbow_angle:.1f}°")
print(f" Interpretation: {'extended' if right_elbow_angle > 150 else 'folded' if right_elbow_angle < 90 else 'neutral'}")
# Check if arm is raised
if right_wrist.y < right_elbow.y < right_shoulder.y:
print(" Action: raise_right (arm raised)")
# Knee angles
left_hip = landmarks[23]
left_knee = landmarks[25]
left_ankle = landmarks[27]
left_knee_angle = calculate_angle(left_hip, left_knee, left_ankle)
print(f"\nLeft knee angle: {left_knee_angle:.1f}°")
print(f" Interpretation: {'standing' if left_knee_angle > 160 else 'knee_bend' if left_knee_angle < 120 else 'neutral'}")
else:
print("❌ No pose detected")
pose.close()
print("\n✅ Pose test completed")
def test_hands():
"""
Test MediaPipe Hands (21 keypoints per hand)
"""
print("\n" + "=" * 60)
print("Testing MediaPipe Hands")
print("=" * 60)
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
static_image_mode=True,
max_num_hands=2,
min_detection_confidence=0.5,
)
print("✅ Hands model created")
test_image_path = "/Users/accusys/momentry_core_0.1/output/quick_preview/frame_220.jpg"
if Path(test_image_path).exists():
image = cv2.imread(test_image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = hands.process(image_rgb)
if results.multi_hand_landmarks:
for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
hand_label = results.multi_handedness[idx].classification[0].label
print(f"\n✅ Hand {idx+1} detected ({hand_label}): 21 keypoints")
landmarks = hand_landmarks.landmark
# Key landmarks
key_indices = {
"wrist": 0,
"thumb_tip": 4,
"index_tip": 8,
"middle_tip": 12,
"ring_tip": 16,
"pinky_tip": 20,
}
print(" Key landmarks:")
for name, i in key_indices.items():
lm = landmarks[i]
print(f" {name} ({i}): x={lm.x:.3f}, y={lm.y:.3f}")
# Detect gesture
thumb_tip = landmarks[4]
index_tip = landmarks[8]
middle_tip = landmarks[12]
ring_tip = landmarks[16]
pinky_tip = landmarks[20]
wrist = landmarks[0]
# Calculate finger extensions
def is_finger_extended(tip, base, wrist):
return tip.y < base.y # Extended upward
thumb_extended = is_finger_extended(landmarks[4], landmarks[2], wrist)
index_extended = is_finger_extended(landmarks[8], landmarks[5], wrist)
middle_extended = is_finger_extended(landmarks[12], landmarks[9], wrist)
ring_extended = is_finger_extended(landmarks[16], landmarks[13], wrist)
pinky_extended = is_finger_extended(landmarks[20], landmarks[17], wrist)
extensions = [thumb_extended, index_extended, middle_extended, ring_extended, pinky_extended]
print(f"\n Finger extensions: {['thumb', 'index', 'middle', 'ring', 'pinky']}")
print(f" {extensions}")
# Detect gesture
gesture = "unknown"
if all(extensions):
gesture = "open_hand"
elif not any(extensions):
gesture = "fist"
elif thumb_extended and not any(extensions[1:]):
gesture = "thumbs_up"
elif index_extended and middle_extended and not any(extensions[2:]):
gesture = "peace_sign"
elif index_extended and not any(extensions[2:]) and not thumb_extended:
gesture = "pointing"
print(f" Detected gesture: {gesture}")
else:
print("❌ No hands detected")
hands.close()
print("\n✅ Hands test completed")
def test_holistic():
"""
Test MediaPipe Holistic (Face + Pose + Hands combined)
"""
print("\n" + "=" * 60)
print("Testing MediaPipe Holistic")
print("=" * 60)
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(
static_image_mode=True,
model_complexity=2,
enable_segmentation=False,
refine_face_landmarks=True,
)
print("✅ Holistic model created")
test_image_path = "/Users/accusys/momentry_core_0.1/output/quick_preview/frame_220.jpg"
if Path(test_image_path).exists():
image = cv2.imread(test_image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = holistic.process(image_rgb)
detected_count = 0
if results.face_landmarks:
num_face = len(results.face_landmarks.landmark)
print(f"✅ Face: {num_face} landmarks")
detected_count += 1
if results.pose_landmarks:
num_pose = len(results.pose_landmarks.landmark)
print(f"✅ Pose: {num_pose} keypoints")
detected_count += 1
if results.left_hand_landmarks:
num_left_hand = len(results.left_hand_landmarks.landmark)
print(f"✅ Left hand: {num_left_hand} keypoints")
detected_count += 1
if results.right_hand_landmarks:
num_right_hand = len(results.right_hand_landmarks.landmark)
print(f"✅ Right hand: {num_right_hand} keypoints")
detected_count += 1
if detected_count == 0:
print("❌ No landmarks detected")
else:
print(f"\nTotal detections: {detected_count} components")
holistic.close()
print("\n✅ Holistic test completed")
def main():
print("=" * 70)
print("MediaPipe Installation Test")
print("=" * 70)
print(f"\nMediaPipe version: {mp.__version__}")
print()
# Test all modules
test_face_mesh()
test_pose()
test_hands()
test_holistic()
print("\n" + "=" * 70)
print("✅ All MediaPipe tests completed!")
print("=" * 70)
print("\nNext steps:")
print(" 1. Face Mesh: Use for eye/mouth action detection")
print(" 2. Pose: Use for arm/leg/feet action detection")
print(" 3. Hands: Use for hand gesture detection")
print(" 4. Holistic: Use for full-body action detection")
if __name__ == "__main__":
main()