#!/opt/homebrew/bin/python3.11 """ Pose Processor - Pose Estimation Uses YOLOv8 Pose via ultralytics (local model) """ import sys import json import argparse import os import signal sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from redis_publisher import RedisPublisher def signal_handler(signum, frame): print(f"POSE: Received signal {signum}, exiting...") sys.exit(1) def process_pose(video_path: str, output_path: str, uuid: str = ""): """Process video for pose estimation using YOLOv8 Pose""" # Set up signal handlers signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) publisher = RedisPublisher(uuid) if uuid else None if publisher: publisher.info("pose", "POSE_START") try: from ultralytics import YOLO # pyright: ignore except ImportError: if publisher: publisher.error("pose", "ultralytics not installed") result = {"frame_count": 0, "fps": 0.0, "frames": []} if publisher: publisher.complete("pose", "0 frames") with open(output_path, "w") as f: json.dump(result, f, indent=2) return result if publisher: publisher.info("pose", "POSE_LOADING_MODEL") # Load YOLOv8 Pose model # yolov8n-pose.pt = nano (fastest) # yolov8s-pose.pt = small # yolov8m-pose.pt = medium model = YOLO("yolov8n-pose.pt") # Get video info import cv2 cap = cv2.VideoCapture(video_path) fps = cap.get(cv2.CAP_PROP_FPS) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) cap.release() if publisher: publisher.info("pose", f"fps={fps}, frames={total_frames}") publisher.progress("pose", 0, total_frames, "Starting") # Process video with YOLO Pose results = model( video_path, conf=0.5, # confidence threshold save=False, stream=True, verbose=False, pose=True, # Enable pose estimation ) # COCO keypoint names KEYPOINT_NAMES = [ "nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle", ] frames = [] frame_count = 0 for result in results: frame_count += 1 # Get frame number and timestamp frame_idx = ( result.orig_frame_idx if hasattr(result, "orig_frame_idx") else frame_count - 1 ) timestamp = frame_idx / fps if fps > 0 else 0 # Get pose keypoints persons = [] if result.keypoints is not None: for person in result.keypoints: keypoints = [] for i, kp in enumerate(person): if len(kp) >= 3: keypoints.append( { "name": KEYPOINT_NAMES[i] if i < len(KEYPOINT_NAMES) else f"kp_{i}", "x": float(kp[0]), "y": float(kp[1]), "confidence": float(kp[2]), } ) # Get bounding box from keypoints if available valid_kps = [kp for kp in keypoints if kp["confidence"] > 0.3] if valid_kps: xs = [kp["x"] for kp in valid_kps] ys = [kp["y"] for kp in valid_kps] bbox = { "x": int(min(xs)), "y": int(min(ys)), "width": int(max(xs) - min(xs)), "height": int(max(ys) - min(ys)), } else: bbox = {"x": 0, "y": 0, "width": 0, "height": 0} persons.append({"keypoints": keypoints, "bbox": bbox}) # Only add frames with poses or sample periodically if persons or frame_count % 30 == 0: frames.append( { "frame": frame_idx, "timestamp": round(timestamp, 3), "persons": persons, } ) if publisher: publisher.progress("pose", frame_count, total_frames, f"Frame {frame_idx}") result = {"frame_count": total_frames, "fps": fps, "frames": frames} if publisher: publisher.complete("pose", f"{len(frames)} frames with poses") with open(output_path, "w") as f: json.dump(result, f, indent=2) return result if __name__ == "__main__": parser = argparse.ArgumentParser(description="Pose Estimation") parser.add_argument("video_path", help="Path to video file") parser.add_argument("output_path", help="Output JSON path") parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="") args = parser.parse_args() process_pose(args.video_path, args.output_path, args.uuid)