feat: tmdb_agent now inserts identities and file_identities to DB

- tmdb_agent.py: INSERT identities with status='pending'
- tmdb_agent.py: INSERT file_identities (file_uuid → identity_id)
- identity.json: file_bindings includes file_uuid, movie_id, character
- backfill_file_identities.py: migrate existing TMDb identities
- Tested: 27 Charade cast identities linked to file
This commit is contained in:
Accusys
2026-06-26 13:39:08 +08:00
parent 6cbc11efda
commit 67caf09732
2 changed files with 234 additions and 2 deletions
+87 -2
View File
@@ -207,11 +207,11 @@ def main():
"name": m["name"],
"identity_type": "people",
"source": "tmdb",
"status": "confirmed",
"status": "pending",
"tmdb_id": person_id,
"tmdb_profile": profile_url,
"metadata": {k: v for k, v in metadata.items() if v is not None or k == "tmdb_aliases"},
"file_bindings": [],
"file_bindings": [{"file_uuid": args.file_uuid, "movie_id": movie["id"], "character": m.get("character", ""), "cast_order": i}],
"created_at": now,
"updated_at": now,
}
@@ -240,6 +240,7 @@ def main():
"tmdb_id": person_id,
"character": m.get("character", ""),
"order": i,
"profile_path": m.get("profile_path"),
})
if (i + 1) % 5 == 0:
@@ -256,6 +257,90 @@ def main():
with open(index_path, "w", encoding="utf-8") as f:
json.dump(index, f, indent=2, ensure_ascii=False)
# 6. Insert identities into database and create file_identities links
print(f"[TKG-AGENT] Syncing {len(created_identities)} identities to database...")
identities_table = f"{schema}.identities" if schema else "identities"
file_identities_table = f"{schema}.file_identities" if schema else "file_identities"
conn = psycopg2.connect(args.db)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
synced_count = 0
for ci in created_identities:
try:
# Insert into identities table (ON CONFLICT DO UPDATE)
cur.execute(f"""
INSERT INTO {identities_table} (
uuid, name, identity_type, source, status,
tmdb_id, tmdb_profile, metadata, created_at, updated_at
) VALUES (
%s, %s, %s, %s, %s,
%s, %s, %s, %s, %s
)
ON CONFLICT (tmdb_id) WHERE tmdb_id IS NOT NULL DO UPDATE SET
tmdb_profile = EXCLUDED.tmdb_profile,
metadata = EXCLUDED.metadata,
updated_at = EXCLUDED.updated_at
RETURNING id
""", (
ci["identity_uuid"],
ci["name"],
"people",
"tmdb",
"pending",
ci["tmdb_id"],
f"https://image.tmdb.org/t/p/w185{ci['profile_path']}" if ci.get("profile_path") else None,
json.dumps({
"tmdb_character": ci.get("character", ""),
"tmdb_cast_order": ci.get("order", 0),
"tmdb_movie_id": movie["id"],
"tmdb_movie_title": movie["title"],
}),
now,
now,
))
identity_row = cur.fetchone()
if identity_row:
identity_id = identity_row["id"]
# Insert into file_identities table (link file_uuid to identity_id)
cur.execute(f"""
INSERT INTO {file_identities_table} (
file_uuid, identity_id, confidence, metadata, created_at
) VALUES (
%s, %s, %s, %s, %s
)
ON CONFLICT (file_uuid, identity_id) DO UPDATE SET
confidence = EXCLUDED.confidence,
metadata = EXCLUDED.metadata,
created_at = EXCLUDED.created_at
""", (
args.file_uuid,
identity_id,
1.0,
json.dumps({
"source": "tmdb_cast",
"tmdb_movie_id": movie["id"],
"tmdb_movie_title": movie["title"],
"character": ci.get("character", ""),
"cast_order": ci.get("order", 0),
}),
now,
))
synced_count += 1
except Exception as e:
print(f" [WARN] Failed to sync {ci['name']}: {e}", file=sys.stderr)
conn.commit()
cur.close()
conn.close()
print(f"[TKG-AGENT] Synced {synced_count}/{len(created_identities)} identities to database")
# Write movie cache ({uuid}.tmdb.json) — simplified, no per-person data
cache = {
"file_uuid": args.file_uuid,