Files
momentry_core/scripts/backfill_demographics.py
Warren 8f05a7c188 feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors
- Add release pre-flight check script
- Add synonym generation, chunk processing scripts
- Add face recognition, stamp search utilities
2026-04-30 15:07:49 +08:00

105 lines
3.0 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
Backfill missing Age & Gender for persons.
"""
import os
import sys
import cv2
import psycopg2
import insightface
import numpy as np
DB_CONFIG = {"host": "localhost", "user": "accusys", "dbname": "momentry"}
BASE_VIDEO_DIR = "output"
def main():
print("=== Starting Missing Demographics Backfill ===")
conn = psycopg2.connect(**DB_CONFIG)
cur = conn.cursor()
# Load Model
print("Loading InsightFace model...")
try:
app = insightface.app.FaceAnalysis(
name="buffalo_l", providers=["CPUExecutionProvider"]
)
app.prepare(ctx_id=0, det_size=(320, 320))
print("Model loaded.")
except Exception as e:
print(f"Error loading model: {e}")
return
# Query persons missing data
# Join with appearances to find a valid timestamp
cur.execute("""
SELECT DISTINCT ON (pi.person_id) pi.person_id, pa.video_uuid, pa.start_time
FROM person_identities pi
JOIN person_appearances pa ON pi.person_id = pa.person_id
WHERE pi.age IS NULL OR pi.gender IS NULL
ORDER BY pi.person_id, pa.start_time
""")
rows = cur.fetchall()
print(f"Found {len(rows)} entries to process.")
for i, (person_id, video_uuid, start_time) in enumerate(rows):
# Skip if time is null
if start_time is None:
continue
print(f"[{i + 1}/{len(rows)}] Processing: {person_id} @ {start_time:.1f}s")
video_path = f"{BASE_VIDEO_DIR}/{video_uuid}/{video_uuid}.mp4"
if not os.path.exists(video_path):
print(f" -> Video not found at {video_path}")
continue
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(" -> Could not open video.")
continue
# Seek
cap.set(cv2.CAP_PROP_POS_MSEC, start_time * 1000)
ret, frame = cap.read()
cap.release()
if not ret or frame is None:
print(" -> Failed to read frame.")
continue
faces = app.get(frame)
if faces:
face = faces[0]
age = int(face.age) if hasattr(face, "age") else None
gender_val = face.gender if hasattr(face, "gender") else None
gender = (
"female" if gender_val == 0 else ("male" if gender_val == 1 else None)
)
if age is not None and gender is not None:
cur.execute(
"""
UPDATE person_identities
SET age = %s, gender = %s
WHERE person_id = %s
""",
(age, gender, person_id),
)
conn.commit()
print(f" -> Updated: Age {age}, Gender {gender}")
else:
print(f" -> Detection incomplete (Age:{age}, Gender:{gender})")
else:
print(f" -> No face found in frame.")
print("=== Done ===")
conn.close()
if __name__ == "__main__":
main()