Files
momentry_core/scripts/auto_identify_persons.py
Warren 8f05a7c188 feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors
- Add release pre-flight check script
- Add synonym generation, chunk processing scripts
- Add face recognition, stamp search utilities
2026-04-30 15:07:49 +08:00

201 lines
7.4 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
Auto-Identify Persons: Bridge face_clustered.json + ASRX speaker data
Creates/updates person_identities with auto-generated names and speaker links.
"""
import json
import os
import sys
import psycopg2
from collections import defaultdict
UUID = sys.argv[1] if len(sys.argv) > 1 else "384b0ff44aaaa1f1"
BASE_DIR = f"output/{UUID}"
DB_CONFIG = {
"host": "localhost",
"user": "accusys",
"dbname": "momentry",
}
def load_json(filepath):
with open(filepath, "r") as f:
return json.load(f)
def main():
print(f"🔍 Auto-Identify Persons for {UUID}")
print("=" * 60)
# 1. Load face_clustered.json
clustered_path = os.path.join(BASE_DIR, f"{UUID}.face_clustered.json")
if not os.path.exists(clustered_path):
print(f"❌ Not found: {clustered_path}")
return
clustered = load_json(clustered_path)
print(f"📸 Loaded {len(clustered['frames'])} frames with face data")
# 2. Build Person stats from face_clustered.json
person_stats = defaultdict(
lambda: {
"frame_count": 0,
"timestamps": [],
"first_frame": None,
"last_frame": None,
"first_time": None,
"last_time": None,
}
)
for frame in clustered["frames"]:
ts = frame["timestamp"]
for face in frame.get("faces", []):
pid = face.get("person_id")
if pid:
stats = person_stats[pid]
stats["frame_count"] += 1
stats["timestamps"].append(ts)
if stats["first_time"] is None or ts < stats["first_time"]:
stats["first_time"] = ts
stats["first_frame"] = frame["frame"]
if stats["last_time"] is None or ts > stats["last_time"]:
stats["last_time"] = ts
stats["last_frame"] = frame["frame"]
print(f"👤 Found {len(person_stats)} unique persons from face clustering")
# 3. Load ASRX data from sentence chunks (via DB or JSON)
asrx_path = os.path.join(BASE_DIR, f"{UUID}.asrx.json")
asrx_data = None
if os.path.exists(asrx_path):
asrx_data = load_json(asrx_path)
print(f"🎤 Loaded ASRX: {len(asrx_data.get('segments', []))} segments")
# 4. Match speakers to persons by time overlap
person_speaker_votes = defaultdict(lambda: defaultdict(float))
if asrx_data:
for segment in asrx_data.get("segments", []):
speaker_id = segment.get("speaker_id")
if not speaker_id:
continue
seg_start = segment["start"]
seg_end = segment["end"]
# Find persons whose face timestamps overlap with this ASRX segment
for pid, stats in person_stats.items():
for ts in stats["timestamps"]:
if seg_start <= ts <= seg_end:
person_speaker_votes[pid][speaker_id] += 1.0
# 5. Determine dominant speaker per person
person_dominant_speaker = {}
for pid, votes in person_speaker_votes.items():
if votes:
dominant = max(votes, key=votes.get)
person_dominant_speaker[pid] = {
"speaker_id": dominant,
"votes": votes[dominant],
"total_votes": sum(votes.values()),
"confidence": votes[dominant] / sum(votes.values()),
}
# 6. Generate report
print(f"\n{'=' * 60}")
print(f"📊 Person Identification Results")
print(f"{'=' * 60}")
# Sort by frame count
sorted_persons = sorted(
person_stats.items(), key=lambda x: x[1]["frame_count"], reverse=True
)
for pid, stats in sorted_persons[:20]:
speaker_info = person_dominant_speaker.get(pid, {})
speaker_id = speaker_info.get("speaker_id", "N/A")
confidence = speaker_info.get("confidence", 0.0)
print(
f" {pid:12s} | frames:{stats['frame_count']:5d} | "
f"time:{stats['first_time']:.0f}s-{stats['last_time']:.0f}s | "
f"speaker:{speaker_id} ({confidence:.0%})"
)
# 7. Output JSON for API consumption
output = {"uuid": UUID, "persons": []}
for pid, stats in sorted_persons:
speaker_info = person_dominant_speaker.get(pid, {})
person_data = {
"person_id": pid,
"frame_count": stats["frame_count"],
"first_time": stats["first_time"],
"last_time": stats["last_time"],
"speaker_id": speaker_info.get("speaker_id"),
"speaker_confidence": speaker_info.get("confidence", 0.0),
"suggested_name": pid, # Use cluster label as initial name
}
output["persons"].append(person_data)
output_path = os.path.join(BASE_DIR, f"{UUID}.person_identification.json")
with open(output_path, "w") as f:
json.dump(output, f, indent=2)
print(f"\n💾 Saved: {output_path}")
print(f"📝 Total persons identified: {len(output['persons'])}")
# 8. Execute SQL INSERT statements
print("\n--- Executing SQL ---")
conn = psycopg2.connect(**DB_CONFIG)
cur = conn.cursor()
executed = 0
for p in output["persons"]:
speaker_val = f"'{p['speaker_id']}'" if p["speaker_id"] else "NULL"
sql = f"""INSERT INTO dev.person_identities (person_id, video_uuid, name, speaker_id,
first_appearance_time, last_appearance_time, appearance_count, metadata)
VALUES ('{p["person_id"]}', '{UUID}', '{p["person_id"]}', {speaker_val},
{p["first_time"]}, {p["last_time"]}, {p["frame_count"]},
'{{"auto_identified": true, "speaker_confidence": {p["speaker_confidence"]}}}')
ON CONFLICT (person_id) DO UPDATE SET
name = EXCLUDED.name,
speaker_id = COALESCE(EXCLUDED.speaker_id, person_identities.speaker_id),
first_appearance_time = EXCLUDED.first_appearance_time,
last_appearance_time = EXCLUDED.last_appearance_time,
appearance_count = EXCLUDED.appearance_count,
updated_at = NOW()"""
try:
cur.execute(sql)
executed += 1
except Exception as e:
print(f"Error: {e}")
conn.commit()
cur.close()
conn.close()
print(f"✅ Executed {executed} SQL statements")
# 9. Generate SQL INSERT statements for person_identities
print(f"\n--- SQL INSERT statements for person_identities ---")
for p in output["persons"][:10]:
speaker_val = f"'{p['speaker_id']}'" if p["speaker_id"] else "NULL"
print(
f"INSERT INTO person_identities (person_id, video_uuid, name, speaker_id, "
f"first_appearance_time, last_appearance_time, appearance_count, metadata) "
f"VALUES ('{p['person_id']}', '{UUID}', '{p['person_id']}', {speaker_val}, "
f"{p['first_time']}, {p['last_time']}, {p['frame_count']}, "
f'\'{{"auto_identified": true, "speaker_confidence": {p["speaker_confidence"]}}}\') '
f"ON CONFLICT (person_id) DO UPDATE SET "
f"name = EXCLUDED.name, "
f"speaker_id = COALESCE(EXCLUDED.speaker_id, person_identities.speaker_id), "
f"first_appearance_time = EXCLUDED.first_appearance_time, "
f"last_appearance_time = EXCLUDED.last_appearance_time, "
f"appearance_count = EXCLUDED.appearance_count, "
f"updated_at = NOW();"
)
if __name__ == "__main__":
main()