fix: ASRX duplication, TKG edges, trace ingest, and add pipeline progress publishing

- ASRX handler no longer stores duplicate 'asr' pre_chunks
- Pre_chunks storage made idempotent (delete-before-insert)
- Rule 1 + trace_ingest changed to query 'asrx' not 'asr'
- Trace chunks removed (dynamic from TKG/Qdrant)
- TKG scroll_face_points fixed: trace_id >= 1 (not == 1)
- TKG AsrxSegmentEntry: start/end -> start_time/end_time (match ASRX JSON)
- Unregister error handling: log instead of silent discard
- Add publish_pipeline_progress calls at each pipeline stage
  (processors, rule1, face_trace, identity_agent, TKG, rule2, completion)
This commit is contained in:
Accusys
2026-07-02 10:43:46 +08:00
parent d791d138f2
commit 3eabd45882
65 changed files with 9477 additions and 3852 deletions
+27 -82
View File
@@ -21,8 +21,6 @@ import json
import argparse
from collections import defaultdict
import numpy as np
import psycopg2
import psycopg2.extras
from datetime import datetime
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
@@ -30,13 +28,8 @@ sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "uti
from qdrant_faces import update_trace_ids
# Config
DB_URL = os.environ.get("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
SCHEMA = os.environ.get("MOMENTRY_DB_SCHEMA", "dev")
OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")
def get_conn():
return psycopg2.connect(DB_URL)
SCHEMA = os.environ.get("DATABASE_SCHEMA", "public")
def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict:
@@ -146,67 +139,17 @@ def run_face_tracker(
def store_traced_faces(file_uuid: str, traced_json_path: str, schema: str = SCHEMA):
"""Insert traced face detections into face_detections table with trace_id"""
conn = get_conn()
cur = conn.cursor()
"""Update Qdrant _faces collection with trace_id after face tracking.
face_detections table is deprecated — trace_id is stored only in Qdrant _faces payload.
"""
with open(traced_json_path) as f:
data = json.load(f)
frames = data.get("frames", {})
total_stored = 0
for frame_num_str, frame_data in sorted(frames.items(), key=lambda x: int(x[0])):
frame_num = int(frame_num_str)
faces = frame_data.get("faces", [])
for face in faces:
trace_id = face.get("trace_id")
if trace_id is None:
continue
x = face.get("x", 0)
y = face.get("y", 0)
w = face.get("width", 0)
h = face.get("height", 0)
confidence = face.get("confidence", 0.0)
face_id = face.get("face_id")
if face_id is None:
face_id = f"face_{trace_id}"
attributes = face.get("attributes")
bbox = json.dumps({"x": x, "y": y, "width": w, "height": h})
try:
cur.execute(
f"""
UPDATE {schema}.face_detections
SET trace_id = %s, face_id = %s
WHERE file_uuid = %s AND frame_number = %s
AND x = %s AND y = %s AND width = %s AND height = %s
""",
(
trace_id,
face_id,
file_uuid,
frame_num,
x,
y,
w,
h,
),
)
if cur.rowcount > 0:
total_stored += 1
except Exception as e:
print(f"[TRACE] Error storing face at frame {frame_num}: {e}")
conn.rollback()
continue
conn.commit()
# Build trace_mapping for Qdrant update
trace_mapping = {} # {frame: {bbox_key: trace_id}}
# Build trace_mapping for Qdrant update: {frame: {bbox_key: trace_id}}
trace_mapping = {}
for frame_num_str, frame_data in sorted(frames.items(), key=lambda x: int(x[0])):
frame_num = int(frame_num_str)
trace_mapping[frame_num] = {}
@@ -224,22 +167,26 @@ def store_traced_faces(file_uuid: str, traced_json_path: str, schema: str = SCHE
print(f"[TRACE] Warning: Qdrant trace_id update failed: {e}")
qdrant_updated = 0
# Log trace summary
cur.execute(
f"SELECT COUNT(DISTINCT trace_id) FROM {schema}.face_detections WHERE file_uuid = %s AND trace_id IS NOT NULL",
(file_uuid,),
)
db_trace_count = cur.fetchone()[0]
# Count unique traces from Qdrant
try:
from qdrant_faces import get_file_faces
points = get_file_faces(file_uuid)
trace_ids = set()
for p in points:
tid = p.get("payload", {}).get("trace_id")
if tid is not None and tid > 0:
trace_ids.add(tid)
qdrant_trace_count = len(trace_ids)
except Exception as e:
print(f"[TRACE] Warning: Qdrant trace count failed: {e}")
qdrant_trace_count = 0
cur.close()
conn.close()
print(
f"[TRACE] Stored {total_stored} face detections, {db_trace_count} unique traces in DB"
total_faces = sum(
1 for fd in frames.values() for f in fd.get("faces", []) if f.get("trace_id") is not None
)
if qdrant_updated > 0:
print(f"[TRACE] Updated {qdrant_updated} Qdrant points with trace_id")
return total_stored, db_trace_count
print(f"[TRACE] Updated {qdrant_updated} Qdrant points with trace_id, {qdrant_trace_count} unique traces")
return total_faces, qdrant_trace_count
def main():
@@ -248,8 +195,6 @@ def main():
parser.add_argument("--face-json", help="Path to face.json (default: auto-detect)")
parser.add_argument("--schema", default=SCHEMA, help="DB schema name")
parser.add_argument("--uuid", help="UUID for Redis tracking (accepted by executor)")
parser.add_argument(
"--filter-eyes",
@@ -270,8 +215,8 @@ def main():
# Step 1: Run face tracker
run_face_tracker(face_json, traced_json, filter_eyes=args.filter_eyes)
# Step 2: Store in DB with trace_id
total, traces = store_traced_faces(args.file_uuid, traced_json, args.schema)
# Step 2: Store in Qdrant with trace_id
total, traces = store_traced_faces(args.file_uuid, traced_json)
print(f"[TRACE] Done: {total} detections, {traces} traces")