Files
momentry_core/scripts/add_yolo_to_chunks.py
accusys 383201cacd feat: Initial v0.9 release with API Key authentication
## v0.9.20260325_144654

### Features
- API Key Authentication System
- Job Worker System
- V2 Backup Versioning

### Bug Fixes
- get_processor_results_by_job column mapping

Co-authored-by: OpenCode
2026-03-25 14:53:41 +08:00

138 lines
3.5 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
Add YOLO metadata to chunks
"""
import json
import psycopg2
YOLO_FILE = "/Users/accusys/test_video/Old_Time_Movie_Show_-_Charade_1963.HD.yolo.json"
VIDEO_UUID = "39567a0eb16f39fd"
FPS = 24.0
POSTGRES_CONFIG = {
"host": "localhost",
"port": 5432,
"user": "accusys",
"password": "Test3200",
"database": "momentry",
}
def load_yolo_data():
"""Load YOLO JSON data"""
print(f"Loading YOLO data from {YOLO_FILE}...")
with open(YOLO_FILE) as f:
data = json.load(f)
print(f"Loaded {len(data['frames'])} frames")
return data
def get_chunk_yolo_metadata(yolo_data, start_time, end_time):
"""Get YOLO objects that appear in a time range"""
start_frame = int(start_time * FPS)
end_frame = int(end_time * FPS)
objects = set()
detections = []
for frame_num in range(start_frame, end_frame + 1):
frame_str = str(frame_num)
if frame_str in yolo_data["frames"]:
frame_data = yolo_data["frames"][frame_str]
for det in frame_data.get("detections", []):
if det["confidence"] >= 0.3:
objects.add(det["class_name"])
detections.append(
{
"class_name": det["class_name"],
"confidence": det["confidence"],
}
)
return {
"objects": list(objects),
"detection_count": len(detections),
}
def add_yolo_metadata_to_chunks():
"""Add YOLO metadata to all chunks"""
yolo_data = load_yolo_data()
conn = psycopg2.connect(**POSTGRES_CONFIG)
cur = conn.cursor()
# Get all sentence chunks for this video
cur.execute(
"""
SELECT chunk_id, start_time, end_time
FROM chunks
WHERE uuid = %s AND chunk_type = 'sentence'
ORDER BY chunk_index
""",
(VIDEO_UUID,),
)
chunks = cur.fetchall()
print(f"Processing {len(chunks)} chunks...")
for i, (chunk_id, start_time, end_time) in enumerate(chunks):
# Get YOLO metadata for this chunk
yolo_meta = get_chunk_yolo_metadata(yolo_data, start_time, end_time)
if yolo_meta["objects"]:
# Update chunk with YOLO metadata
cur.execute(
"""
UPDATE chunks
SET metadata = COALESCE(metadata, '{}'::jsonb) || %s
WHERE chunk_id = %s
""",
(json.dumps({"yolo": yolo_meta}), chunk_id),
)
if (i + 1) % 100 == 0:
print(f"Processed {i + 1}/{len(chunks)} chunks...")
conn.commit()
conn.commit()
cur.close()
conn.close()
print("Done!")
def test_object_search():
"""Test object search"""
_ = load_yolo_data()
conn = psycopg2.connect(**POSTGRES_CONFIG)
cur = conn.cursor()
test_objects = ["person", "car", "clock", "tie", "chair", "bottle"]
for obj in test_objects:
# Count chunks with this object
query = """
SELECT COUNT(*)
FROM chunks
WHERE uuid = %s
AND chunk_type = 'sentence'
AND metadata IS NOT NULL
AND metadata->'yolo'->'objects' ? %s
"""
cur.execute(query, (VIDEO_UUID, obj))
count = cur.fetchone()[0]
print(f"Object '{obj}': {count} chunks")
cur.close()
conn.close()
if __name__ == "__main__":
add_yolo_metadata_to_chunks()
print("\nTesting object search:")
test_object_search()