Files
momentry_core/scripts/store_traced_faces.py
T
Accusys 3eabd45882 fix: ASRX duplication, TKG edges, trace ingest, and add pipeline progress publishing
- ASRX handler no longer stores duplicate 'asr' pre_chunks
- Pre_chunks storage made idempotent (delete-before-insert)
- Rule 1 + trace_ingest changed to query 'asrx' not 'asr'
- Trace chunks removed (dynamic from TKG/Qdrant)
- TKG scroll_face_points fixed: trace_id >= 1 (not == 1)
- TKG AsrxSegmentEntry: start/end -> start_time/end_time (match ASRX JSON)
- Unregister error handling: log instead of silent discard
- Add publish_pipeline_progress calls at each pipeline stage
  (processors, rule1, face_trace, identity_agent, TKG, rule2, completion)
2026-07-02 10:43:46 +08:00

225 lines
8.1 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
Store Traced Faces - Pipeline integration for face trace + position data
Flow:
1. Reads face.json output from face_processor.py
2. Runs face_tracker.py to assign trace_id per face (IoU + embedding)
3. Inserts traced faces into face_detections table with trace_id and position (x,y,w,h)
Usage:
python store_traced_faces.py --file-uuid <uuid> [--face-json <path>]
TKG Export:
trace_id + position (x,y,w,h) per frame enables spatial-temporal graph construction.
Each trace is a temporal entity; position tracks movement across frames.
"""
import sys
import os
import json
import argparse
from collections import defaultdict
import numpy as np
from datetime import datetime
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "utils"))
from qdrant_faces import update_trace_ids
# Config
OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")
SCHEMA = os.environ.get("DATABASE_SCHEMA", "public")
def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict:
"""Merge traces within the same cut - DISABLED (no embeddings)."""
# TODO: Reimplement with Qdrant _faces collection
return face_data
def run_face_tracker(
face_json_path: str, traced_json_path: str, filter_eyes: bool = False
) -> str:
"""Run face_tracker.py on face.json, returns path to face_traced.json"""
from face_tracker import track_faces
with open(face_json_path) as f:
face_data = json.load(f)
# V2.0 uses list format (FaceResult), convert to dict for face_tracker
if isinstance(face_data.get("frames"), list):
frames_dict = {}
for frame in face_data["frames"]:
fnum = str(frame["frame"])
faces = []
for f in frame.get("faces", []):
bbox = f.get("bbox", f)
face = {
"x": bbox.get("x", f.get("x", 0)),
"y": bbox.get("y", f.get("y", 0)),
"width": bbox.get("width", f.get("width", 0)),
"height": bbox.get("height", f.get("height", 0)),
"confidence": f.get("confidence", 0.0),
}
if "landmarks" in f:
face["landmarks"] = f["landmarks"]
if "embedding" in f:
face["embedding"] = f["embedding"]
faces.append(face)
frames_dict[fnum] = {
"frame_number": frame["frame"],
"time_seconds": frame.get("timestamp", 0),
"faces": faces,
}
face_data["frames"] = frames_dict
# Preserve metadata (fps needed by face_tracker)
if "metadata" not in face_data:
face_data["metadata"] = {
"fps": face_data.get("fps", 30.0),
"total_frames": face_data.get("frame_count", 0),
}
# Eye filter: remove faces without at least one eye landmark
if filter_eyes:
removed = 0
for fnum_str, frm_data in face_data.get("frames", {}).items():
faces = frm_data.get("faces", [])
kept = []
for face in faces:
lm = face.get("landmarks", {})
if len(lm.get("left_eye", [])) > 0 or len(lm.get("right_eye", [])) > 0:
kept.append(face)
else:
removed += 1
frm_data["faces"] = kept
print(f"[TRACE] Eye filter: {removed} faces without eyes removed")
print(f"[TRACE] Processing {len(face_data.get('frames', {}))} frames")
# Embeddings no longer loaded from DB - use IoU-only tracking
file_uuid = (
face_json_path.split("/")[-1]
.replace(".face.json", "")
.replace("_traced.json", "")
)
# Load cut boundaries from cut.json (same directory as face.json)
cut_boundaries = None
cut_scenes = None
cuts_path = face_json_path.replace("_traced.json", ".cut.json").replace(
".face.json", ".cut.json"
)
if os.path.exists(cuts_path):
with open(cuts_path) as f:
cuts = json.load(f)
cut_scenes = cuts.get("scenes", [])
cut_boundaries = {s["start_frame"] for s in cut_scenes if s["start_frame"] > 0}
print(f"[TRACE] Loaded {len(cut_boundaries)} cut boundaries")
face_data = track_faces(
face_data, use_embedding=False, cut_boundaries=cut_boundaries
)
# Merge traces within same cut (same person re-appearing after occlusion/pose change)
if cut_scenes and len(cut_scenes) > 0:
face_data = merge_traces_within_cuts(face_data, cut_scenes)
metadata = face_data.get("metadata", {})
metadata["tracking_method"] = "iou_only"
metadata["tracked_at"] = datetime.now().isoformat()
face_data["metadata"] = metadata
with open(traced_json_path, "w") as f:
json.dump(face_data, f, indent=2, ensure_ascii=False)
trace_count = len(face_data.get("traces", {}))
print(f"[TRACE] Completed: {trace_count} traces -> {traced_json_path}")
return traced_json_path
def store_traced_faces(file_uuid: str, traced_json_path: str, schema: str = SCHEMA):
"""Update Qdrant _faces collection with trace_id after face tracking.
face_detections table is deprecated — trace_id is stored only in Qdrant _faces payload.
"""
with open(traced_json_path) as f:
data = json.load(f)
frames = data.get("frames", {})
# Build trace_mapping for Qdrant update: {frame: {bbox_key: trace_id}}
trace_mapping = {}
for frame_num_str, frame_data in sorted(frames.items(), key=lambda x: int(x[0])):
frame_num = int(frame_num_str)
trace_mapping[frame_num] = {}
for face in frame_data.get("faces", []):
trace_id = face.get("trace_id")
if trace_id is None:
continue
bbox_key = f"{face['x']}_{face['y']}_{face['width']}_{face['height']}"
trace_mapping[frame_num][bbox_key] = trace_id
# Update Qdrant _faces collection with trace_id
try:
qdrant_updated = update_trace_ids(file_uuid, trace_mapping)
except Exception as e:
print(f"[TRACE] Warning: Qdrant trace_id update failed: {e}")
qdrant_updated = 0
# Count unique traces from Qdrant
try:
from qdrant_faces import get_file_faces
points = get_file_faces(file_uuid)
trace_ids = set()
for p in points:
tid = p.get("payload", {}).get("trace_id")
if tid is not None and tid > 0:
trace_ids.add(tid)
qdrant_trace_count = len(trace_ids)
except Exception as e:
print(f"[TRACE] Warning: Qdrant trace count failed: {e}")
qdrant_trace_count = 0
total_faces = sum(
1 for fd in frames.values() for f in fd.get("faces", []) if f.get("trace_id") is not None
)
print(f"[TRACE] Updated {qdrant_updated} Qdrant points with trace_id, {qdrant_trace_count} unique traces")
return total_faces, qdrant_trace_count
def main():
parser = argparse.ArgumentParser(description="Store traced faces in DB")
parser.add_argument("--file-uuid", required=True, help="Video file UUID")
parser.add_argument("--face-json", help="Path to face.json (default: auto-detect)")
parser.add_argument("--uuid", help="UUID for Redis tracking (accepted by executor)")
parser.add_argument(
"--filter-eyes",
action="store_true",
help="Remove faces without eye landmarks before tracking",
)
args = parser.parse_args()
face_json = args.face_json or os.path.join(
OUTPUT_DIR, f"{args.file_uuid}.face.json"
)
traced_json = os.path.join(OUTPUT_DIR, f"{args.file_uuid}.face_traced.json")
if not os.path.exists(face_json):
print(f"[TRACE] face.json not found: {face_json}", file=sys.stderr)
sys.exit(1)
# Step 1: Run face tracker
run_face_tracker(face_json, traced_json, filter_eyes=args.filter_eyes)
# Step 2: Store in Qdrant with trace_id
total, traces = store_traced_faces(args.file_uuid, traced_json)
print(f"[TRACE] Done: {total} detections, {traces} traces")
if __name__ == "__main__":
main()