refactor: remove face embedding architecture - single Qdrant _faces collection

- Delete FaceEmbeddingDb module (face_embedding_db.rs)
- Stub match_faces_iterative, generate_seed_embeddings, tmdb_match_handler
- Remove sync_trace_embeddings, populate_face_embeddings_to_qdrant
- Remove embedding from face.json output (face_processor.py)
- Remove embedding from PG UPDATE (store_traced_faces.py)
- Remove workspace traces staging (checkin.rs, qdrant_workspace.rs)
- Fix tests: add pose_angle to Face, hand_nodes to TkgResult

Disabled functions (need reimplement with _faces):
- match_faces_iterative (identity agent)
- generate_seed_embeddings (TMDb seeds)
- tmdb_match_handler (TMDb matching)
- cluster_face_embeddings, search_similar_faces
- merge_traces_within_cuts
This commit is contained in:
Accusys
2026-06-24 22:27:09 +08:00
parent 360cb991e1
commit 074cdcdbed
60 changed files with 657 additions and 9454 deletions
+6 -186
View File
@@ -39,140 +39,8 @@ def get_conn():
def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict:
"""Merge traces within the same cut if they have similar embeddings (same person re-appeared)."""
frames = face_data.get("frames", {})
if not frames:
return face_data
# Map each frame to its scene/cut number
frame_to_scene = {}
for s in cut_scenes:
for f in range(s["start_frame"], s["end_frame"] + 1):
frame_to_scene[f] = s["scene_number"]
# Collect per-trace data: scene numbers, embeddings, face positions
trace_frames = defaultdict(list)
trace_embeddings = defaultdict(list)
trace_poses = {}
for fnum_str, frm_data in frames.items():
fnum = int(fnum_str)
for face in frm_data.get("faces", []):
tid = face.get("trace_id")
if tid is None:
continue
trace_frames[tid].append(fnum)
emb = face.get("embedding")
if emb is not None:
trace_embeddings[tid].append(emb)
if tid not in trace_poses:
trace_poses[tid] = (
face.get("x", 0),
face.get("y", 0),
face.get("width", 0),
face.get("height", 0),
)
if len(trace_embeddings) < 2:
return face_data
# Compute centroid per trace
trace_centroids = {}
for tid, embs in trace_embeddings.items():
centroid = np.mean(embs, axis=0)
norm = np.linalg.norm(centroid)
trace_centroids[tid] = centroid / norm if norm > 0 else centroid
# Determine which scene each trace belongs to (majority of frames)
trace_scene = {}
for tid, fns in trace_frames.items():
scene_votes = defaultdict(int)
for fn in fns:
scene = frame_to_scene.get(fn, -1)
scene_votes[scene] += 1
trace_scene[tid] = max(scene_votes, key=scene_votes.get) if scene_votes else -1
# Within each scene, merge traces with similar centroids
scene_traces = defaultdict(list)
for tid, scene in trace_scene.items():
if scene >= 0 and tid in trace_centroids:
scene_traces[scene].append(tid)
merged = 0
next_new_id = max(trace_frames.keys()) + 1 if trace_frames else 0
SIMILARITY_THRESHOLD = 0.75
for scene, tids in scene_traces.items():
if len(tids) < 2:
continue
used = set()
for i in range(len(tids)):
if tids[i] in used:
continue
keep_tid = tids[i]
for j in range(i + 1, len(tids)):
if tids[j] in used:
continue
sim = float(np.dot(trace_centroids[tids[i]], trace_centroids[tids[j]]))
if sim >= SIMILARITY_THRESHOLD:
# Merge tids[j] into keep_tid
for fnum_str, frm_data in frames.items():
for face in frm_data.get("faces", []):
if face.get("trace_id") == tids[j]:
face["trace_id"] = keep_tid
used.add(tids[j])
merged += 1
# If any merges happened, rebuild trace metadata
if merged > 0:
# Rebuild traces dict
new_traces = {}
new_trace_frames = defaultdict(list)
for fnum_str, frm_data in frames.items():
fnum = int(fnum_str)
for face in frm_data.get("faces", []):
tid = face.get("trace_id")
if tid is not None:
new_trace_frames[tid].append(
{
"frame": fnum,
"face_index": 0,
"bbox": {
"x": face.get("x", 0),
"y": face.get("y", 0),
"width": face.get("width", 0),
"height": face.get("height", 0),
},
"confidence": face.get("confidence", 0.0),
}
)
for tid, path in new_trace_frames.items():
if len(path) >= 1:
frames_sorted = sorted(set(p["frame"] for p in path))
new_traces[str(tid)] = {
"trace_id": tid,
"start_frame": frames_sorted[0],
"end_frame": frames_sorted[-1],
"duration_frames": frames_sorted[-1] - frames_sorted[0] + 1,
"duration_seconds": (frames_sorted[-1] - frames_sorted[0])
/ face_data.get("metadata", {}).get("fps", 25.0),
"total_appearances": len(path),
"path": path,
}
face_data["traces"] = new_traces
face_data["metadata"]["trace_stats"] = {
"total_traces": len(new_traces),
"active_traces": len(new_traces),
"long_traces": len(
[t for t in new_traces.values() if t["duration_frames"] >= 2]
),
}
print(
f"[TRACE] Post-merge: {merged} traces merged, {len(new_traces)} total traces"
)
"""Merge traces within the same cut - DISABLED (no embeddings)."""
# TODO: Reimplement with Qdrant _faces collection
return face_data
@@ -235,57 +103,12 @@ def run_face_tracker(
print(f"[TRACE] Processing {len(face_data.get('frames', {}))} frames")
# Load embeddings from DB for the face tracker
# Embeddings no longer loaded from DB - use IoU-only tracking
file_uuid = (
face_json_path.split("/")[-1]
.replace(".face.json", "")
.replace("_traced.json", "")
)
try:
conn = get_conn()
cur = conn.cursor()
cur.execute(
f"""
SELECT frame_number, x, y, width, height, embedding
FROM {SCHEMA}.face_detections
WHERE file_uuid = %s AND embedding IS NOT NULL
""",
(file_uuid,),
)
emb_rows = cur.fetchall()
conn.close()
# Build lookup: frame_number → list of (bbox, embedding)
emb_map = {}
for fn, x, y, w, h, emb in emb_rows:
emb_map.setdefault(fn, []).append(((x, y, w, h), emb))
print(f"[TRACE] Loaded {len(emb_rows)} embeddings from DB")
# Attach embeddings to face data
attached = 0
for fnum_str, frm_data in face_data.get("frames", {}).items():
fnum = int(fnum_str)
for face in frm_data.get("faces", []):
x, y, w, h = (
face.get("x", 0),
face.get("y", 0),
face.get("width", 0),
face.get("height", 0),
)
candidates = emb_map.get(fnum, [])
# Find matching embedding by bbox proximity
for (ex, ey, ew, eh), emb in candidates:
if (
abs(x - ex) < 10
and abs(y - ey) < 10
and abs(w - ew) < 10
and abs(h - eh) < 10
):
face["embedding"] = emb
attached += 1
break
print(f"[TRACE] Attached {attached} embeddings to faces")
except Exception as e:
print(f"[TRACE] WARNING: Could not load embeddings: {e}")
# Load cut boundaries from cut.json (same directory as face.json)
cut_boundaries = None
@@ -301,7 +124,7 @@ def run_face_tracker(
print(f"[TRACE] Loaded {len(cut_boundaries)} cut boundaries")
face_data = track_faces(
face_data, use_embedding=True, cut_boundaries=cut_boundaries
face_data, use_embedding=False, cut_boundaries=cut_boundaries
)
# Merge traces within same cut (same person re-appearing after occlusion/pose change)
@@ -309,7 +132,7 @@ def run_face_tracker(
face_data = merge_traces_within_cuts(face_data, cut_scenes)
metadata = face_data.get("metadata", {})
metadata["tracking_method"] = "iou_embedding"
metadata["tracking_method"] = "iou_only"
metadata["tracked_at"] = datetime.now().isoformat()
face_data["metadata"] = metadata
@@ -350,22 +173,19 @@ def store_traced_faces(file_uuid: str, traced_json_path: str, schema: str = SCHE
if face_id is None:
face_id = f"face_{trace_id}"
attributes = face.get("attributes")
embedding = face.get("embedding")
bbox = json.dumps({"x": x, "y": y, "width": w, "height": h})
embed_vec = embedding if embedding and len(embedding) > 0 else None
try:
cur.execute(
f"""
UPDATE {schema}.face_detections
SET trace_id = %s, embedding = %s, face_id = %s
SET trace_id = %s, face_id = %s
WHERE file_uuid = %s AND frame_number = %s
AND x = %s AND y = %s AND width = %s AND height = %s
""",
(
trace_id,
embed_vec,
face_id,
file_uuid,
frame_num,