179 lines
5.1 KiB
Python
Executable File
179 lines
5.1 KiB
Python
Executable File
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
Pose Processor - Pose Estimation
|
|
Uses YOLOv8 Pose via ultralytics (local model)
|
|
"""
|
|
|
|
import sys
|
|
import json
|
|
import argparse
|
|
import os
|
|
import signal
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
from redis_publisher import RedisPublisher
|
|
|
|
|
|
def signal_handler(signum, frame):
|
|
print(f"POSE: Received signal {signum}, exiting...")
|
|
sys.exit(1)
|
|
|
|
|
|
def process_pose(video_path: str, output_path: str, uuid: str = ""):
|
|
"""Process video for pose estimation using YOLOv8 Pose"""
|
|
|
|
# Set up signal handlers
|
|
signal.signal(signal.SIGTERM, signal_handler)
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
|
|
publisher = RedisPublisher(uuid) if uuid else None
|
|
if publisher:
|
|
publisher.info("pose", "POSE_START")
|
|
|
|
try:
|
|
from ultralytics import YOLO # pyright: ignore
|
|
except ImportError:
|
|
if publisher:
|
|
publisher.error("pose", "ultralytics not installed")
|
|
result = {"frame_count": 0, "fps": 0.0, "frames": []}
|
|
if publisher:
|
|
publisher.complete("pose", "0 frames")
|
|
with open(output_path, "w") as f:
|
|
json.dump(result, f, indent=2)
|
|
return result
|
|
|
|
if publisher:
|
|
publisher.info("pose", "POSE_LOADING_MODEL")
|
|
|
|
# Load YOLOv8 Pose model
|
|
# yolov8n-pose.pt = nano (fastest)
|
|
# yolov8s-pose.pt = small
|
|
# yolov8m-pose.pt = medium
|
|
model = YOLO("yolov8n-pose.pt")
|
|
|
|
# Get video info
|
|
import cv2
|
|
|
|
cap = cv2.VideoCapture(video_path)
|
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
|
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
cap.release()
|
|
|
|
if publisher:
|
|
publisher.info("pose", f"fps={fps}, frames={total_frames}")
|
|
publisher.progress("pose", 0, total_frames, "Starting")
|
|
|
|
# Process video with YOLO Pose
|
|
results = model(
|
|
video_path,
|
|
conf=0.5, # confidence threshold
|
|
save=False,
|
|
stream=True,
|
|
verbose=False,
|
|
pose=True, # Enable pose estimation
|
|
)
|
|
|
|
# COCO keypoint names
|
|
KEYPOINT_NAMES = [
|
|
"nose",
|
|
"left_eye",
|
|
"right_eye",
|
|
"left_ear",
|
|
"right_ear",
|
|
"left_shoulder",
|
|
"right_shoulder",
|
|
"left_elbow",
|
|
"right_elbow",
|
|
"left_wrist",
|
|
"right_wrist",
|
|
"left_hip",
|
|
"right_hip",
|
|
"left_knee",
|
|
"right_knee",
|
|
"left_ankle",
|
|
"right_ankle",
|
|
]
|
|
|
|
frames = []
|
|
frame_count = 0
|
|
|
|
for result in results:
|
|
frame_count += 1
|
|
|
|
# Get frame number and timestamp
|
|
frame_idx = (
|
|
result.orig_frame_idx
|
|
if hasattr(result, "orig_frame_idx")
|
|
else frame_count - 1
|
|
)
|
|
timestamp = frame_idx / fps if fps > 0 else 0
|
|
|
|
# Get pose keypoints
|
|
persons = []
|
|
|
|
if result.keypoints is not None:
|
|
for person in result.keypoints:
|
|
keypoints = []
|
|
|
|
for i, kp in enumerate(person):
|
|
if len(kp) >= 3:
|
|
keypoints.append(
|
|
{
|
|
"name": KEYPOINT_NAMES[i]
|
|
if i < len(KEYPOINT_NAMES)
|
|
else f"kp_{i}",
|
|
"x": float(kp[0]),
|
|
"y": float(kp[1]),
|
|
"confidence": float(kp[2]),
|
|
}
|
|
)
|
|
|
|
# Get bounding box from keypoints if available
|
|
valid_kps = [kp for kp in keypoints if kp["confidence"] > 0.3]
|
|
if valid_kps:
|
|
xs = [kp["x"] for kp in valid_kps]
|
|
ys = [kp["y"] for kp in valid_kps]
|
|
bbox = {
|
|
"x": int(min(xs)),
|
|
"y": int(min(ys)),
|
|
"width": int(max(xs) - min(xs)),
|
|
"height": int(max(ys) - min(ys)),
|
|
}
|
|
else:
|
|
bbox = {"x": 0, "y": 0, "width": 0, "height": 0}
|
|
|
|
persons.append({"keypoints": keypoints, "bbox": bbox})
|
|
|
|
# Only add frames with poses or sample periodically
|
|
if persons or frame_count % 30 == 0:
|
|
frames.append(
|
|
{
|
|
"frame": frame_idx,
|
|
"timestamp": round(timestamp, 3),
|
|
"persons": persons,
|
|
}
|
|
)
|
|
|
|
if publisher:
|
|
publisher.progress("pose", frame_count, total_frames, f"Frame {frame_idx}")
|
|
|
|
result = {"frame_count": total_frames, "fps": fps, "frames": frames}
|
|
|
|
if publisher:
|
|
publisher.complete("pose", f"{len(frames)} frames with poses")
|
|
|
|
with open(output_path, "w") as f:
|
|
json.dump(result, f, indent=2)
|
|
|
|
return result
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Pose Estimation")
|
|
parser.add_argument("video_path", help="Path to video file")
|
|
parser.add_argument("output_path", help="Output JSON path")
|
|
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
|
args = parser.parse_args()
|
|
|
|
process_pose(args.video_path, args.output_path, args.uuid)
|