fix: ASRX duplication, TKG edges, trace ingest, and add pipeline progress publishing

- ASRX handler no longer stores duplicate 'asr' pre_chunks
- Pre_chunks storage made idempotent (delete-before-insert)
- Rule 1 + trace_ingest changed to query 'asrx' not 'asr'
- Trace chunks removed (dynamic from TKG/Qdrant)
- TKG scroll_face_points fixed: trace_id >= 1 (not == 1)
- TKG AsrxSegmentEntry: start/end -> start_time/end_time (match ASRX JSON)
- Unregister error handling: log instead of silent discard
- Add publish_pipeline_progress calls at each pipeline stage
  (processors, rule1, face_trace, identity_agent, TKG, rule2, completion)
This commit is contained in:
Accusys
2026-07-02 10:43:46 +08:00
parent d791d138f2
commit 3eabd45882
65 changed files with 9477 additions and 3852 deletions
+464 -65
View File
@@ -22,6 +22,12 @@ struct RegisterFileRequest {
user_id: Option<i64>,
content_hash: Option<String>,
pattern: Option<String>,
#[serde(default = "default_force")]
force: bool,
}
fn default_force() -> bool {
true
}
#[derive(Debug, Deserialize, Serialize)]
@@ -188,6 +194,7 @@ async fn register_single_file(
file_path: &str,
_user_id: Option<i64>,
provided_hash: Option<String>,
force: bool,
) -> RegisterFileResponse {
tracing::info!("[REGISTER] Starting registration for: {}", file_path);
@@ -325,41 +332,54 @@ async fn register_single_file(
"[REGISTER] Content hash collision → already registered: {}",
existing_uuid
);
let existing_info: Option<(String, String, f64, i32, i32, f64, i64, Option<String>)> = sqlx::query_as(
&format!("SELECT file_name, file_path, duration, width, height, fps, total_frames, registration_time::text FROM {} WHERE file_uuid = $1", videos_table)
).bind(&existing_uuid).fetch_optional(db.pool()).await.unwrap_or(None);
if let Some((ename, epath, dur, w, h, f, tf, rt)) = existing_info {
// If force=true, unregister asynchronously then continue
if force {
tracing::info!(
"[REGISTER] Force mode: async unregistering existing file {}",
existing_uuid
);
if let Err(e) = unregister_internal(&state, &existing_uuid).await {
tracing::error!("[REGISTER] Force unregister failed for {}: {:?}", existing_uuid, e);
} else {
tracing::info!("[REGISTER] Force unregister completed for {}", existing_uuid);
}
} else {
let existing_info: Option<(String, String, f64, i32, i32, f64, i64, Option<String>)> = sqlx::query_as(
&format!("SELECT file_name, file_path, duration, width, height, fps, total_frames, registration_time::text FROM {} WHERE file_uuid = $1", videos_table)
).bind(&existing_uuid).fetch_optional(db.pool()).await.unwrap_or(None);
if let Some((ename, epath, dur, w, h, f, tf, rt)) = existing_info {
return RegisterFileResponse {
success: true,
file_uuid: existing_uuid,
file_name: ename,
file_path: epath.clone(),
file_type: None,
duration: dur,
width: w as u32,
height: h as u32,
fps: f,
total_frames: tf as u64,
registration_time: rt,
already_exists: true,
message: format!("Content already registered: {}", epath),
};
}
return RegisterFileResponse {
success: true,
file_uuid: existing_uuid,
file_name: ename,
file_path: epath.clone(),
file_name: file_name.clone(),
file_path: canonical_path.clone(),
file_type: None,
duration: dur,
width: w as u32,
height: h as u32,
fps: f,
total_frames: tf as u64,
registration_time: rt,
duration: 0.0,
width: 0,
height: 0,
fps: 0.0,
total_frames: 0,
registration_time: None,
already_exists: true,
message: format!("Content already registered: {}", epath),
message: "Content already registered (identical file)".to_string(),
};
}
return RegisterFileResponse {
success: true,
file_uuid: existing_uuid,
file_name: file_name.clone(),
file_path: canonical_path.clone(),
file_type: None,
duration: 0.0,
width: 0,
height: 0,
fps: 0.0,
total_frames: 0,
registration_time: None,
already_exists: true,
message: "Content already registered (identical file)".to_string(),
};
}
}
@@ -418,12 +438,19 @@ async fn register_single_file(
let duration = temp_probe_json
.get("format")
.and_then(|f| {
let src = if has_video { f.get("duration") } else { None };
src.and_then(|v| v.as_str())
.and_then(|f| f.get("duration"))
.and_then(|v| v.as_str())
.and_then(|s| s.parse::<f64>().ok())
.unwrap_or_else(|| {
temp_probe_json
.get("streams")
.and_then(|s| s.as_array())
.and_then(|streams| streams.iter().next())
.and_then(|st| st.get("duration"))
.and_then(|v| v.as_str())
.and_then(|s| s.parse::<f64>().ok())
})
.unwrap_or(0.0);
.unwrap_or(0.0)
});
let mut width = 0u32;
let mut height = 0u32;
let mut fps = 0.0;
@@ -454,7 +481,7 @@ async fn register_single_file(
let status = "registered";
let _ = sqlx::query(&format!(
"INSERT INTO {} (file_uuid, file_path, file_name, file_type, duration, width, height, fps, probe_json, status, content_hash, registration_time) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NOW()) ON CONFLICT (file_uuid) DO UPDATE SET file_path = EXCLUDED.file_path, file_name = EXCLUDED.file_name, status = EXCLUDED.status, content_hash = EXCLUDED.content_hash",
"INSERT INTO {} (file_uuid, file_path, file_name, file_type, duration, width, height, fps, probe_json, status, content_hash, registration_time) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NOW()) ON CONFLICT (file_uuid) DO UPDATE SET file_path = EXCLUDED.file_path, file_name = EXCLUDED.file_name, status = EXCLUDED.status, content_hash = EXCLUDED.content_hash, duration = EXCLUDED.duration, width = EXCLUDED.width, height = EXCLUDED.height, fps = EXCLUDED.fps, probe_json = EXCLUDED.probe_json",
videos_table
))
.bind(&file_uuid).bind(&canonical_path).bind(&final_name).bind(&final_file_type)
@@ -509,7 +536,6 @@ async fn register_single_file(
}
}
}
}
let audio_tracks: Vec<serde_json::Value> = temp_probe_json
@@ -647,6 +673,7 @@ async fn register_file(
&entry_path.to_string_lossy().to_string(),
req.user_id,
None,
req.force,
)
.await;
if result.success {
@@ -682,7 +709,49 @@ async fn register_file(
}));
}
let resp = register_single_file(&state, &file_path, req.user_id, req.content_hash).await;
// If force=true and file already exists, unregister first
if req.force {
let videos_table = schema::table_name("videos");
// Check by file_path first
if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(&format!(
"SELECT file_uuid FROM {} WHERE file_path = $1 LIMIT 1",
videos_table
))
.bind(&file_path)
.fetch_optional(state.db.pool())
.await
{
tracing::info!(
"[REGISTER] Force mode: unregistering existing file {}",
existing_uuid
);
if let Err(e) = unregister_internal(&state, &existing_uuid).await {
tracing::error!("[REGISTER] Force unregister failed for {}: {:?}", existing_uuid, e);
}
}
// Also check by content_hash if provided
if let Some(ref content_hash) = req.content_hash {
if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(&format!(
"SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1",
videos_table
))
.bind(content_hash)
.fetch_optional(state.db.pool())
.await
{
tracing::info!(
"[REGISTER] Force mode: unregistering by content_hash {}",
existing_uuid
);
if let Err(e) = unregister_internal(&state, &existing_uuid).await {
tracing::error!("[REGISTER] Force unregister failed for {}: {:?}", existing_uuid, e);
}
}
}
}
let resp =
register_single_file(&state, &file_path, req.user_id, req.content_hash, req.force).await;
if resp.success
&& !resp.already_exists
@@ -706,7 +775,8 @@ async fn register_file(
if let Some(ref vp) = video_path {
if let Ok(job) = auto_state.db.create_monitor_job(&auto_uuid, Some(vp)).await {
tracing::info!("[AUTO-PIPELINE] Job {} created for {}", job.id, auto_uuid);
let all_procs: Vec<&str> = vec!["cut", "asr", "asrx", "yolo", "ocr", "face", "pose", "appearance"];
let all_procs: Vec<&str> =
vec!["cut", "asr", "asrx", "ocr", "face", "pose", "appearance"];
let total = sqlx::query_scalar::<_, i64>(&format!(
"SELECT COALESCE(total_frames, 0) FROM {} WHERE file_uuid = $1",
schema::table_name("videos")
@@ -927,6 +997,7 @@ struct UnregisterResponse {
deleted_characters: u64,
deleted_chunks_rule1: u64,
deleted_processor_alerts: u64,
deleted_processor_versions: u64,
}
#[derive(Debug, Deserialize)]
@@ -948,7 +1019,11 @@ fn delete_output_files(uuid: &str) -> u64 {
for entry in entries.flatten() {
let path = entry.path();
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if name.starts_with(uuid) && name.ends_with(".json") {
let is_uuid_file = name.starts_with(uuid) && !path.is_dir();
let is_pipeline_log = name.starts_with("pipeline_")
&& name.contains(uuid)
&& name.ends_with(".log");
if is_uuid_file || is_pipeline_log {
if std::fs::remove_file(&path).is_ok() {
deleted_count += 1;
tracing::info!("[UNREGISTER] Deleted output file: {}", name);
@@ -957,6 +1032,17 @@ fn delete_output_files(uuid: &str) -> u64 {
}
}
}
let uuid_dir = std::path::Path::new(output_dir).join(uuid);
if uuid_dir.is_dir() {
if std::fs::remove_dir_all(&uuid_dir).is_ok() {
deleted_count += 1;
tracing::info!(
"[UNREGISTER] Deleted output directory: {}",
uuid_dir.display()
);
}
}
}
let workspace_sqlite = format!("{}.workspace.sqlite", uuid);
@@ -982,7 +1068,6 @@ async fn unregister(
tracing::info!("[UNREGISTER] Unregistering file: {}", uuid);
let videos_table = schema::table_name("videos");
let face_table = schema::table_name("face_detections");
let processor_table = schema::table_name("processor_results");
let chunks_table = schema::table_name("chunk");
let parent_chunks_table = schema::table_name("parent_chunks");
@@ -1020,7 +1105,7 @@ async fn unregister(
}};
}
let deleted_faces = delete_safe!(face_table, "file_uuid = $1", &uuid, "faces");
let deleted_faces = 0i64; // Deprecated: face_detections table removed
let deleted_processors = delete_safe!(processor_table, "file_uuid = $1", &uuid, "processors");
let deleted_parent_chunks =
delete_safe!(parent_chunks_table, "uuid = $1", &uuid, "parent chunks");
@@ -1045,20 +1130,44 @@ async fn unregister(
})?
.rows_affected() as i64;
let deleted_file_identities =
delete_safe!(file_identities_table, "file_uuid = $1", &uuid, "file identities");
let deleted_speaker_detections =
delete_safe!(speaker_detections_table, "file_uuid = $1", &uuid, "speaker detections");
let deleted_face_clusters =
delete_safe!(face_clusters_table, "file_uuid = $1", &uuid, "face clusters");
let deleted_face_recognition =
delete_safe!(face_recognition_results_table, "file_uuid = $1", &uuid, "face recognition results");
let deleted_characters =
delete_safe!(characters_table, "file_uuid = $1", &uuid, "characters");
let deleted_chunks_rule1 =
delete_safe!(chunks_rule1_table, "uuid = $1", &uuid, "chunks rule1");
let deleted_processor_alerts =
delete_safe!(processor_alerts_table, "file_uuid = $1", &uuid, "processor alerts");
let deleted_file_identities = delete_safe!(
file_identities_table,
"file_uuid = $1",
&uuid,
"file identities"
);
let deleted_speaker_detections = delete_safe!(
speaker_detections_table,
"file_uuid = $1",
&uuid,
"speaker detections"
);
let deleted_face_clusters = delete_safe!(
face_clusters_table,
"file_uuid = $1",
&uuid,
"face clusters"
);
let deleted_face_recognition = delete_safe!(
face_recognition_results_table,
"file_uuid = $1",
&uuid,
"face recognition results"
);
let deleted_characters = delete_safe!(characters_table, "file_uuid = $1", &uuid, "characters");
let deleted_chunks_rule1 = delete_safe!(chunks_rule1_table, "uuid = $1", &uuid, "chunks rule1");
let deleted_processor_alerts = delete_safe!(
processor_alerts_table,
"file_uuid = $1",
&uuid,
"processor alerts"
);
let deleted_processor_versions = delete_safe!(
"processor_versions",
"file_uuid = $1",
&uuid,
"processor versions"
);
sqlx::query(&format!(
"DELETE FROM {} WHERE file_uuid = $1",
@@ -1078,29 +1187,54 @@ async fn unregister(
})?;
tracing::info!(
"[UNREGISTER] Deleted: {} faces, {} processors, {} parent_chunks, {} chunks, {} pre_chunks, {} tkg_nodes, {} cuts, {} strangers, {} chunk_vectors, {} monitor_jobs, {} frames, {} file_identities, {} speaker_detections, {} face_clusters, {} face_recognition_results, {} characters, {} chunks_rule1, {} processor_alerts",
"[UNREGISTER] Deleted: {} faces, {} processors, {} parent_chunks, {} chunks, {} pre_chunks, {} tkg_nodes, {} cuts, {} strangers, {} chunk_vectors, {} monitor_jobs, {} frames, {} file_identities, {} speaker_detections, {} face_clusters, {} face_recognition_results, {} characters, {} chunks_rule1, {} processor_alerts, {} processor_versions",
deleted_faces, deleted_processors, deleted_parent_chunks, deleted_chunks,
deleted_pre_chunks, deleted_tkg_nodes, deleted_cuts, deleted_strangers,
deleted_chunk_vectors, deleted_monitor_jobs, deleted_frames,
deleted_file_identities, deleted_speaker_detections, deleted_face_clusters,
deleted_face_recognition, deleted_characters, deleted_chunks_rule1,
deleted_processor_alerts
deleted_processor_alerts, deleted_processor_versions
);
let deleted_output_files = delete_output_files(&uuid);
let deleted_qdrant_vectors = {
let qdrant = QdrantDb::new();
match qdrant.delete_by_uuid(&uuid).await {
Ok(_) => {
tracing::info!("[UNREGISTER] Deleted Qdrant vectors for {}", uuid);
Some(1)
}
Err(e) => {
tracing::warn!("[UNREGISTER] Failed to delete Qdrant vectors: {}", e);
None
let mut total = 0u64;
if qdrant.delete_by_uuid(&uuid).await.is_ok() {
tracing::info!("[UNREGISTER] Deleted Qdrant vectors from main collection");
total += 1;
} else {
tracing::warn!("[UNREGISTER] Failed to delete Qdrant vectors from main collection");
}
let additional_collections = [
"_faces", // Python store_traced_faces.py
&format!("{}_voice", uuid), // Per-file voice embeddings
];
for coll in &additional_collections {
if QdrantDb::delete_by_uuid_from_collection(
&qdrant.client,
&qdrant.base_url,
&qdrant.api_key,
coll,
&uuid,
)
.await
.is_ok()
{
tracing::info!(
"[UNREGISTER] Deleted Qdrant vectors from collection: {}",
coll
);
total += 1;
} else {
tracing::debug!("[UNREGISTER] No vectors or collection not found: {}", coll);
}
}
Some(total)
};
let deleted_redis_keys = {
@@ -1130,7 +1264,10 @@ async fn unregister(
Some(1)
}
Err(e) => {
tracing::warn!("[UNREGISTER] Failed to delete Qdrant workspace vectors: {}", e);
tracing::warn!(
"[UNREGISTER] Failed to delete Qdrant workspace vectors: {}",
e
);
None
}
}
@@ -1155,13 +1292,275 @@ async fn unregister(
deleted_characters: deleted_characters as u64,
deleted_chunks_rule1: deleted_chunks_rule1 as u64,
deleted_processor_alerts: deleted_processor_alerts as u64,
deleted_processor_versions: deleted_processor_versions as u64,
}))
}
/// Internal unregister function - can be called from both API and register
async fn unregister_internal(state: &AppState, uuid: &str) -> Result<(), StatusCode> {
let videos_table = schema::table_name("videos");
let processor_table = schema::table_name("processor_results");
let chunks_table = schema::table_name("chunk");
let parent_chunks_table = schema::table_name("parent_chunks");
let pre_chunks_table = schema::table_name("pre_chunks");
let tkg_nodes_table = schema::table_name("tkg_nodes");
let cuts_table = schema::table_name("cuts");
let strangers_table = schema::table_name("strangers");
let chunk_vectors_table = schema::table_name("chunk_vectors");
let monitor_jobs_table = schema::table_name("monitor_jobs");
let frames_table = schema::table_name("frames");
let file_identities_table = schema::table_name("file_identities");
let speaker_detections_table = schema::table_name("speaker_detections");
let face_clusters_table = schema::table_name("face_clusters");
let face_recognition_results_table = schema::table_name("face_recognition_results");
let characters_table = schema::table_name("characters");
let chunks_rule1_table = schema::table_name("chunks_rule1");
let processor_alerts_table = schema::table_name("processor_alerts");
let mut tx = state.db.pool().begin().await.map_err(|e| {
tracing::error!("[unregister] Failed to start transaction: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
macro_rules! delete_safe {
($table:expr, $where:expr, $bind:expr, $label:expr) => {{
sqlx::query(&format!("DELETE FROM {} WHERE {}", $table, $where))
.bind($bind)
.execute(&mut *tx)
.await
.map_err(|e| {
tracing::error!("[unregister] Failed to delete {}: {}", $label, e);
StatusCode::INTERNAL_SERVER_ERROR
})?
.rows_affected() as i64
}};
}
let _deleted_faces: i64 = 0; // Deprecated: face_detections table removed
let _deleted_processors = delete_safe!(processor_table, "file_uuid = $1", uuid, "processors");
let _deleted_parent_chunks =
delete_safe!(parent_chunks_table, "uuid = $1", uuid, "parent chunks");
let _deleted_chunks = delete_safe!(chunks_table, "file_uuid = $1", uuid, "chunks");
let _deleted_pre_chunks = delete_safe!(pre_chunks_table, "file_uuid = $1", uuid, "pre_chunks");
let _deleted_tkg_nodes = delete_safe!(tkg_nodes_table, "file_uuid = $1", uuid, "TKG nodes");
let _deleted_cuts = delete_safe!(cuts_table, "file_uuid = $1", uuid, "cuts");
let _deleted_strangers = delete_safe!(strangers_table, "file_uuid = $1", uuid, "strangers");
let _deleted_chunk_vectors =
delete_safe!(chunk_vectors_table, "uuid = $1", uuid, "chunk vectors");
let _deleted_monitor_jobs = delete_safe!(monitor_jobs_table, "uuid = $1", uuid, "monitor jobs");
let _deleted_frames: i64 = sqlx::query(&format!(
"DELETE FROM {} WHERE file_id = (SELECT id FROM {} WHERE file_uuid = $1)",
frames_table, videos_table
))
.bind(uuid)
.execute(&mut *tx)
.await
.map_err(|e| {
tracing::error!("[unregister] Failed to delete frames: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?
.rows_affected() as i64;
let _deleted_file_identities = delete_safe!(
file_identities_table,
"file_uuid = $1",
uuid,
"file identities"
);
let _deleted_speaker_detections = delete_safe!(
speaker_detections_table,
"file_uuid = $1",
uuid,
"speaker detections"
);
let _deleted_face_clusters =
delete_safe!(face_clusters_table, "file_uuid = $1", uuid, "face clusters");
let _deleted_face_recognition = delete_safe!(
face_recognition_results_table,
"file_uuid = $1",
uuid,
"face recognition results"
);
let _deleted_characters = delete_safe!(characters_table, "file_uuid = $1", uuid, "characters");
let _deleted_chunks_rule1 = delete_safe!(chunks_rule1_table, "uuid = $1", uuid, "chunks rule1");
let _deleted_processor_alerts = delete_safe!(
processor_alerts_table,
"file_uuid = $1",
uuid,
"processor alerts"
);
let _deleted_processor_versions = delete_safe!(
"processor_versions",
"file_uuid = $1",
uuid,
"processor versions"
);
sqlx::query(&format!(
"DELETE FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(uuid)
.execute(&mut *tx)
.await
.map_err(|e| {
tracing::error!("[unregister] Failed: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
tx.commit().await.map_err(|e| {
tracing::error!("[unregister] Failed to commit transaction: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
tracing::info!("[UNREGISTER] Deleted all data for {}", uuid);
// Delete output files
delete_output_files(uuid);
// Delete Qdrant vectors
let qdrant = QdrantDb::new();
let _ = qdrant.delete_by_uuid(uuid).await;
let _ = QdrantDb::delete_by_uuid_from_collection(
&qdrant.client,
&qdrant.base_url,
&qdrant.api_key,
"_faces",
uuid,
)
.await;
let _ = QdrantDb::delete_by_uuid_from_collection(
&qdrant.client,
&qdrant.base_url,
&qdrant.api_key,
&format!("{}_voice", uuid),
uuid,
)
.await;
// Delete Qdrant workspace
let workspace = QdrantWorkspace::new();
let _ = workspace.delete_by_file_uuid(uuid).await;
// Delete Redis keys
if let Ok(redis) = RedisClient::new() {
let _ = redis.delete_worker_job(uuid).await;
}
Ok(())
}
#[derive(Debug, Deserialize)]
struct UpdateMetadataRequest {
duration: Option<f64>,
status: Option<String>,
width: Option<i32>,
height: Option<i32>,
fps: Option<f64>,
}
#[derive(Serialize)]
struct UpdateMetadataResponse {
success: bool,
file_uuid: String,
message: String,
}
async fn update_file_metadata(
Path(file_uuid): Path<String>,
State(state): State<AppState>,
Json(req): Json<UpdateMetadataRequest>,
) -> Result<Json<UpdateMetadataResponse>, StatusCode> {
let videos_table = schema::table_name("videos");
let mut set_clauses: Vec<String> = Vec::new();
let mut bind_idx = 2;
if let Some(_) = req.duration {
set_clauses.push(format!("duration = ${}", bind_idx));
bind_idx += 1;
}
if let Some(_) = req.status {
set_clauses.push(format!("status = ${}", bind_idx));
bind_idx += 1;
}
if let Some(_) = req.width {
set_clauses.push(format!("width = ${}", bind_idx));
bind_idx += 1;
}
if let Some(_) = req.height {
set_clauses.push(format!("height = ${}", bind_idx));
bind_idx += 1;
}
if let Some(_) = req.fps {
set_clauses.push(format!("fps = ${}", bind_idx));
bind_idx += 1;
}
if set_clauses.is_empty() {
return Ok(Json(UpdateMetadataResponse {
success: false,
file_uuid,
message: "No fields to update".to_string(),
}));
}
set_clauses.push("updated_at = NOW()".to_string());
let sql = format!(
"UPDATE {} SET {} WHERE file_uuid = $1",
videos_table,
set_clauses.join(", ")
);
let mut query = sqlx::query(&sql).bind(&file_uuid);
if let Some(d) = req.duration {
query = query.bind(d);
}
if let Some(s) = req.status {
query = query.bind(s);
}
if let Some(w) = req.width {
query = query.bind(w);
}
if let Some(h) = req.height {
query = query.bind(h);
}
if let Some(f) = req.fps {
query = query.bind(f);
}
let result = query.execute(state.db.pool()).await;
match result {
Ok(res) if res.rows_affected() > 0 => Ok(Json(UpdateMetadataResponse {
success: true,
file_uuid,
message: "Metadata updated successfully".to_string(),
})),
Ok(_) => Ok(Json(UpdateMetadataResponse {
success: false,
file_uuid,
message: "File not found".to_string(),
})),
Err(e) => {
tracing::error!("[METADATA] Update failed: {}", e);
Ok(Json(UpdateMetadataResponse {
success: false,
file_uuid,
message: format!("Update failed: {}", e),
}))
}
}
}
pub fn file_routes() -> Router<AppState> {
Router::new()
.route("/api/v1/files/register", post(register_file))
.route("/api/v1/files/lookup", get(lookup_file_by_name))
.route("/api/v1/unregister", post(unregister))
.route("/api/v1/file/:file_uuid/probe", get(probe_by_uuid))
.route(
"/api/v1/file/:file_uuid/metadata",
post(update_file_metadata),
)
}
+147 -257
View File
@@ -180,7 +180,7 @@ async fn list_identities(
)
})?;
let sql = format!(
let sql = format!(
r#"SELECT i.id::int, i.uuid, i.name, i.metadata, i.status, i.starred,
COALESCE(
jsonb_agg(jsonb_build_object(
@@ -195,10 +195,19 @@ let sql = format!(
WHERE i.status IS NULL OR i.status != 'merged'
GROUP BY i.id, i.uuid, i.name, i.metadata, i.status, i.starred
ORDER BY i.id DESC LIMIT $1 OFFSET $2"#,
id_table, crate::core::db::schema::table_name("file_identities")
id_table,
crate::core::db::schema::table_name("file_identities")
);
let rows: Vec<(i32, uuid::Uuid, String, Option<serde_json::Value>, Option<String>, Option<bool>, serde_json::Value)> = match sqlx::query_as(&sql)
let rows: Vec<(
i32,
uuid::Uuid,
String,
Option<serde_json::Value>,
Option<String>,
Option<bool>,
serde_json::Value,
)> = match sqlx::query_as(&sql)
.bind(page_size as i64)
.bind(offset)
.fetch_all(db.pool())
@@ -216,10 +225,18 @@ let sql = format!(
let identities: Vec<IdentityResponse> = rows
.into_iter()
.map(|r| {
let file_bindings: Vec<FileBinding> = r.6.as_array()
.map(|arr| arr.iter().filter_map(|v| serde_json::from_value(v.clone()).ok()).collect())
.unwrap_or_default();
let file_uuids: Vec<String> = file_bindings.iter().map(|fb| fb.file_uuid.clone()).collect();
let file_bindings: Vec<FileBinding> =
r.6.as_array()
.map(|arr| {
arr.iter()
.filter_map(|v| serde_json::from_value(v.clone()).ok())
.collect()
})
.unwrap_or_default();
let file_uuids: Vec<String> = file_bindings
.iter()
.map(|fb| fb.file_uuid.clone())
.collect();
IdentityResponse {
id: r.0,
identity_uuid: r.1.to_string().replace('-', ""),
@@ -332,149 +349,57 @@ pub struct IdentityListResponse {
async fn list_face_candidates(
Query(query): Query<FaceCandidatesQuery>,
) -> Result<Json<FaceCandidatesResponse>, (StatusCode, String)> {
let db = match PostgresDb::init().await {
Ok(db) => db,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to connect to database: {}", e),
))
}
};
let page = query.page.unwrap_or(1);
let page_size = std::cmp::min(query.page_size.unwrap_or(15), 100);
let offset = (page - 1) * page_size;
let min_confidence = query.min_confidence.unwrap_or(0.5);
let table = crate::core::db::schema::table_name("face_detections");
// Query Qdrant _faces for unbound faces (identity_id IS NULL)
let qdrant = crate::core::db::qdrant_db::QdrantDb::new();
let mut filter_must = vec![
serde_json::json!({"is_null": {"key": "identity_id"}}),
serde_json::json!({"key": "confidence", "range": {"gte": min_confidence}}),
];
if let Some(ref file_uuid) = query.file_uuid {
filter_must.push(serde_json::json!({"key": "file_uuid", "match": {"value": file_uuid}}));
}
let scroll_filter = serde_json::json!({"must": filter_must});
let total: i64 = if let Some(file_uuid) = &query.file_uuid {
let count_sql = format!(
"SELECT COUNT(*) FROM {} WHERE identity_id IS NULL AND confidence >= $1 AND file_uuid = $2",
table
);
match sqlx::query_scalar(&count_sql)
.bind(min_confidence)
.bind(file_uuid)
.fetch_one(db.pool())
.await
{
Ok(count) => count,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Count error: {}", e),
))
}
}
} else {
let count_sql = format!(
"SELECT COUNT(*) FROM {} WHERE identity_id IS NULL AND confidence >= $1",
table
);
match sqlx::query_scalar(&count_sql)
.bind(min_confidence)
.fetch_one(db.pool())
.await
{
Ok(count) => count,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Count error: {}", e),
))
}
}
};
let rows = if let Some(file_uuid) = &query.file_uuid {
let sql = format!(
"SELECT id, face_id, file_uuid, frame_number::bigint, confidence::float4,
jsonb_build_object('x', x, 'y', y, 'width', width, 'height', height) as bbox,
NULL::jsonb as attributes
FROM {}
WHERE identity_id IS NULL AND confidence >= $1 AND file_uuid = $2
ORDER BY confidence DESC
LIMIT $3 OFFSET $4",
table
);
match sqlx::query_as::<
_,
(
i32,
Option<String>,
String,
i64,
f32,
Option<serde_json::Value>,
Option<serde_json::Value>,
),
>(&sql)
.bind(min_confidence)
.bind(file_uuid)
.bind(page_size as i64)
.bind(offset as i64)
.fetch_all(db.pool())
let all_points = qdrant
.scroll_all_points("_faces", scroll_filter, 1000)
.await
{
Ok(rows) => rows,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Query error: {}", e),
))
}
}
} else {
let sql = format!(
"SELECT id, face_id, file_uuid, frame_number::bigint, confidence::float4,
jsonb_build_object('x', x, 'y', y, 'width', width, 'height', height) as bbox,
NULL::jsonb as attributes
FROM {}
WHERE identity_id IS NULL AND confidence >= $1
ORDER BY confidence DESC
LIMIT $2 OFFSET $3",
table
);
match sqlx::query_as::<
_,
.map_err(|e| {
(
i32,
Option<String>,
String,
i64,
f32,
Option<serde_json::Value>,
Option<serde_json::Value>,
),
>(&sql)
.bind(min_confidence)
.bind(page_size as i64)
.bind(offset as i64)
.fetch_all(db.pool())
.await
{
Ok(rows) => rows,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Query error: {}", e),
))
}
}
};
StatusCode::INTERNAL_SERVER_ERROR,
format!("Qdrant scroll failed: {}", e),
)
})?;
let candidates: Vec<FaceCandidate> = rows
let total = all_points.len() as i64;
// Sort by confidence DESC then paginate
let mut sorted: Vec<&serde_json::Value> = all_points.iter().collect();
sorted.sort_by(|a, b| {
let ca = a["payload"]["confidence"].as_f64().unwrap_or(0.0);
let cb = b["payload"]["confidence"].as_f64().unwrap_or(0.0);
cb.partial_cmp(&ca).unwrap_or(std::cmp::Ordering::Equal)
});
let paginated: Vec<&&serde_json::Value> = sorted.iter().skip(offset).take(page_size).collect();
let candidates: Vec<FaceCandidate> = paginated
.into_iter()
.map(|r| FaceCandidate {
id: r.0,
face_id: r.1,
file_uuid: r.2,
frame_number: r.3,
confidence: r.4,
bbox: r.5,
attributes: r.6,
.map(|p| {
let payload = &p["payload"];
let point_id = p["id"].as_u64().unwrap_or(0);
FaceCandidate {
id: point_id as i32,
face_id: Some(format!("{:x}", point_id)),
file_uuid: payload["file_uuid"].as_str().unwrap_or("").to_string(),
frame_number: payload["frame"].as_i64().unwrap_or(0),
confidence: payload["confidence"].as_f64().unwrap_or(0.0) as f32,
bbox: payload.get("bbox").cloned(),
attributes: None,
}
})
.collect();
@@ -518,133 +443,98 @@ pub struct UnassignedTracesResponse {
async fn list_unassigned_traces(
Query(query): Query<UnassignedTracesQuery>,
) -> Result<Json<UnassignedTracesResponse>, (StatusCode, String)> {
let db = match PostgresDb::init().await {
Ok(db) => db,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to connect to database: {}", e),
))
}
};
let page = query.page.unwrap_or(1);
let page_size = std::cmp::min(query.page_size.unwrap_or(20), 100);
let offset = (page - 1) * page_size;
let table = crate::core::db::schema::table_name("face_detections");
// Query Qdrant _faces for unbound traces (identity_id IS NULL, trace_id > 0)
let qdrant = crate::core::db::qdrant_db::QdrantDb::new();
let mut filter_must: Vec<serde_json::Value> = vec![
serde_json::json!({"is_null": {"key": "identity_id"}}),
serde_json::json!({"key": "trace_id", "range": {"gt": 0}}),
];
if let Some(ref file_uuid) = query.file_uuid {
filter_must.push(serde_json::json!({"key": "file_uuid", "match": {"value": file_uuid}}));
}
let scroll_filter = serde_json::json!({"must": filter_must});
let total: i64 = if let Some(file_uuid) = &query.file_uuid {
let count_sql = format!(
"SELECT COUNT(DISTINCT trace_id) FROM {} WHERE identity_id IS NULL AND trace_id IS NOT NULL AND file_uuid = $1",
table
);
sqlx::query_scalar(&count_sql)
.bind(file_uuid)
.fetch_one(db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Count error: {}", e)))?
} else {
let count_sql = format!(
"SELECT COUNT(DISTINCT trace_id) FROM {} WHERE identity_id IS NULL AND trace_id IS NOT NULL",
table
);
sqlx::query_scalar(&count_sql)
.fetch_one(db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Count error: {}", e)))?
};
let sql = if let Some(file_uuid) = &query.file_uuid {
format!(
"WITH trace_agg AS (
SELECT trace_id, file_uuid,
COUNT(*) as frame_count,
MIN(frame_number) as start_frame,
MAX(frame_number) as end_frame
FROM {}
WHERE identity_id IS NULL AND trace_id IS NOT NULL AND file_uuid = $1
GROUP BY trace_id, file_uuid
),
best_face AS (
SELECT DISTINCT ON (fd.trace_id, fd.file_uuid)
fd.trace_id, fd.file_uuid, fd.id as best_face_id,
fd.frame_number as best_face_frame,
fd.confidence as best_face_confidence,
jsonb_build_object('x', fd.x, 'y', fd.y, 'width', fd.width, 'height', fd.height) as best_face_bbox
FROM {} fd
WHERE fd.identity_id IS NULL AND fd.trace_id IS NOT NULL AND fd.file_uuid = $1
ORDER BY fd.trace_id, fd.file_uuid, fd.confidence DESC
let all_points = qdrant
.scroll_all_points("_faces", scroll_filter, 1000)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Qdrant scroll failed: {}", e),
)
SELECT ta.trace_id, ta.file_uuid, ta.frame_count, ta.start_frame, ta.end_frame,
bf.best_face_id, bf.best_face_frame, bf.best_face_confidence, bf.best_face_bbox
FROM trace_agg ta
JOIN best_face bf ON ta.trace_id = bf.trace_id AND ta.file_uuid = bf.file_uuid
ORDER BY ta.frame_count DESC
LIMIT $2 OFFSET $3",
table, table
)
} else {
format!(
"WITH trace_agg AS (
SELECT trace_id, file_uuid,
COUNT(*) as frame_count,
MIN(frame_number) as start_frame,
MAX(frame_number) as end_frame
FROM {}
WHERE identity_id IS NULL AND trace_id IS NOT NULL
GROUP BY trace_id, file_uuid
),
best_face AS (
SELECT DISTINCT ON (fd.trace_id, fd.file_uuid)
fd.trace_id, fd.file_uuid, fd.id as best_face_id,
fd.frame_number as best_face_frame,
fd.confidence as best_face_confidence,
jsonb_build_object('x', fd.x, 'y', fd.y, 'width', fd.width, 'height', fd.height) as best_face_bbox
FROM {} fd
WHERE fd.identity_id IS NULL AND fd.trace_id IS NOT NULL
ORDER BY fd.trace_id, fd.file_uuid, fd.confidence DESC
)
SELECT ta.trace_id, ta.file_uuid, ta.frame_count, ta.start_frame, ta.end_frame,
bf.best_face_id, bf.best_face_frame, bf.best_face_confidence, bf.best_face_bbox
FROM trace_agg ta
JOIN best_face bf ON ta.trace_id = bf.trace_id AND ta.file_uuid = bf.file_uuid
ORDER BY ta.frame_count DESC
LIMIT $1 OFFSET $2",
table, table
)
};
})?;
let rows: Vec<(i32, String, i64, i64, i64, i32, i64, f64, Option<serde_json::Value>)> =
if let Some(file_uuid) = &query.file_uuid {
sqlx::query_as(&sql)
.bind(file_uuid)
.bind(page_size as i64)
.bind(offset as i64)
.fetch_all(db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Query error: {}", e)))?
} else {
sqlx::query_as(&sql)
.bind(page_size as i64)
.bind(offset as i64)
.fetch_all(db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Query error: {}", e)))?
// Group by (file_uuid, trace_id) and aggregate
use std::collections::BTreeMap;
#[derive(Default)]
struct TraceAgg {
frame_count: i64,
start_frame: i64,
end_frame: i64,
best_confidence: f64,
best_point_id: i64,
best_frame: i64,
best_bbox: Option<serde_json::Value>,
}
let mut trace_map: BTreeMap<(String, i32), TraceAgg> = BTreeMap::new();
for point in &all_points {
let payload = &point["payload"];
let file_uuid = match payload["file_uuid"].as_str() {
Some(f) => f.to_string(),
None => continue,
};
let trace_id = payload["trace_id"].as_i64().unwrap_or(0) as i32;
if trace_id <= 0 {
continue;
}
let frame = payload["frame"].as_i64().unwrap_or(0);
let confidence = payload["confidence"].as_f64().unwrap_or(0.0);
let point_id = point["id"].as_i64().unwrap_or(0);
let traces: Vec<UnassignedTrace> = rows
let entry = trace_map.entry((file_uuid, trace_id)).or_default();
entry.frame_count += 1;
if frame < entry.start_frame || entry.start_frame == 0 {
entry.start_frame = frame;
}
if frame > entry.end_frame {
entry.end_frame = frame;
}
if confidence > entry.best_confidence {
entry.best_confidence = confidence;
entry.best_point_id = point_id;
entry.best_frame = frame;
entry.best_bbox = payload.get("bbox").cloned();
}
}
let total = trace_map.len() as i64;
// Sort by frame_count DESC, paginate
let mut sorted_traces: Vec<((String, i32), TraceAgg)> = trace_map.into_iter().collect();
sorted_traces.sort_by(|a, b| b.1.frame_count.cmp(&a.1.frame_count));
let paginated: Vec<_> = sorted_traces
.into_iter()
.map(|r| UnassignedTrace {
trace_id: r.0,
file_uuid: r.1,
frame_count: r.2,
start_frame: r.3,
end_frame: r.4,
best_face_id: r.5,
best_face_frame: r.6,
best_face_confidence: r.7,
best_face_bbox: r.8,
.skip(offset)
.take(page_size)
.collect();
let traces: Vec<UnassignedTrace> = paginated
.into_iter()
.map(|((file_uuid, trace_id), agg)| UnassignedTrace {
trace_id,
file_uuid,
frame_count: agg.frame_count,
start_frame: agg.start_frame,
end_frame: agg.end_frame,
best_face_id: agg.best_point_id as i32,
best_face_frame: agg.best_frame,
best_face_confidence: agg.best_confidence,
best_face_bbox: agg.best_bbox,
})
.collect();
+351 -222
View File
@@ -8,10 +8,14 @@ use axum::{
use serde::{Deserialize, Serialize};
use sqlx::Row;
use std::path::PathBuf;
use std::sync::Arc;
use crate::api::types::AppState;
use crate::core::db::schema;
use crate::core::db::PostgresDb;
use crate::core::db::QdrantDb;
use crate::core::progress::{AgentPhase, AgentProgress, AgentStats, publish_agent_progress};
use crate::core::db::redis_client::RedisClient;
pub fn identity_agent_routes() -> Router<AppState> {
Router::new()
@@ -27,10 +31,7 @@ pub fn identity_agent_routes() -> Router<AppState> {
"/api/v1/agents/identity/generate-seeds",
post(generate_seeds_handler),
)
.route(
"/api/v1/agents/identity/run",
post(run_identity_handler),
)
.route("/api/v1/agents/identity/run", post(run_identity_handler))
.route(
"/api/v1/agents/identity/confirm",
post(confirm_identity_handler),
@@ -209,39 +210,42 @@ async fn match_from_photo(
}
};
// 4. Find best matching trace (highest similarity, no threshold)
let fd_table = schema::table_name("face_detections");
let best_match: Option<(i32, i32, f64)> = sqlx::query_as(&format!(
r#"SELECT id, trace_id,
1 - (embedding::vector <=> $1::vector) as similarity
FROM {}
WHERE file_uuid = $2 AND embedding IS NOT NULL
ORDER BY embedding::vector <=> $1::vector
LIMIT 1"#,
fd_table
))
.bind(&embedding_f32)
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Search failed: {}", e)})),
)
})?;
// 4. Find best matching trace via Qdrant _faces search
let qdrant = QdrantDb::new();
// 5. Update best match face_detection
let best_match: Option<(i32, f64)> = match qdrant.search_face_collection(
"_faces",
&embedding_f32,
1,
"file_uuid",
"",
Some(&file_uuid),
).await {
Ok(hits) if !hits.is_empty() => {
let (score, payload) = &hits[0];
let trace_id = payload.get("trace_id").and_then(|v| v.as_i64()).unwrap_or(0) as i32;
Some((trace_id, *score))
}
_ => None,
};
// 5. Update best match in Qdrant _faces (trace-scoped)
let mut traces_matched: Vec<i32> = Vec::new();
if let Some((fb_id, fb_trace, fb_sim)) = best_match {
let _ = sqlx::query(&format!(
"UPDATE {} SET identity_id = $1 WHERE id = $2",
fd_table
))
.bind(identity_id)
.bind(fb_id)
.execute(state.db.pool())
.await;
if let Some((fb_trace, fb_sim)) = best_match {
let qdrant = QdrantDb::new();
let filter = serde_json::json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "trace_id", "match": {"value": fb_trace}}
]
});
let payload = serde_json::json!({"identity_id": identity_id});
if let Err(e) = qdrant
.update_payload_by_filter("_faces", filter, payload)
.await
{
tracing::warn!("[match_from_photo] Qdrant update failed: {}", e);
}
traces_matched.push(fb_trace);
// 6. Save identity file
@@ -283,25 +287,26 @@ async fn match_from_trace(
) -> Result<Json<MatchFromPhotoResponse>, (StatusCode, Json<serde_json::Value>)> {
let uuid_clean = req.identity_uuid.replace('-', "");
// 1. Get 3 best face embeddings from this trace at different angles
// Divide trace frame range into 3 segments, pick best face from each
let fd_table = schema::table_name("face_detections");
let all_faces: Vec<(Vec<f32>, i64)> = sqlx::query_as::<_, (Vec<f32>, i64)>(&format!(
"SELECT embedding, frame_number FROM {} \
WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \
ORDER BY frame_number ASC",
fd_table
))
.bind(&req.file_uuid)
.bind(req.trace_id)
.fetch_all(state.db.pool())
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("DB error: {}", e)})),
)
})?;
// 1. Get face embeddings from Qdrant _faces for this trace
let qdrant = QdrantDb::new();
let trace_filter = serde_json::json!({
"must": [
{"key": "file_uuid", "match": {"value": req.file_uuid}},
{"key": "trace_id", "match": {"value": req.trace_id}}
]
});
let points = qdrant.scroll_all_points("_faces", trace_filter, 500).await.unwrap_or_default();
let all_faces: Vec<(Vec<f32>, i64)> = points.iter().filter_map(|p| {
let vector = p.get("vector").and_then(|v| v.as_array())?;
let embedding: Vec<f32> = vector.iter().filter_map(|v| v.as_f64().map(|f| f as f32)).collect();
let frame = p["payload"]["frame"].as_i64()?;
if embedding.len() == 512 {
Some((embedding, frame))
} else {
None
}
}).collect();
if all_faces.is_empty() {
return Err((
@@ -322,18 +327,14 @@ async fn match_from_trace(
let mut query_embeddings: Vec<Vec<f32>> = Vec::new();
// Get width*height info if available (not all pipelines store it)
let face_sizes: Vec<(i64, i32)> = sqlx::query_as::<_, (i64, i32)>(&format!(
"SELECT frame_number, COALESCE(width, 0) * COALESCE(height, 0) AS area \
FROM {} WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \
ORDER BY frame_number ASC",
fd_table
))
.bind(&req.file_uuid)
.bind(req.trace_id)
.fetch_all(state.db.pool())
.await
.unwrap_or_default();
// Get bbox size info from Qdrant payload
let face_sizes: Vec<(i64, i32)> = points.iter().filter_map(|p| {
let frame = p["payload"]["frame"].as_i64()?;
let bbox = &p["payload"]["bbox"];
let w = bbox["width"].as_f64().unwrap_or(0.0) as i32;
let h = bbox["height"].as_f64().unwrap_or(0.0) as i32;
Some((frame, w * h))
}).collect();
let face_sizes_map: std::collections::HashMap<i64, i32> = face_sizes.into_iter().collect();
@@ -358,37 +359,39 @@ async fn match_from_trace(
query_embeddings.push(all_faces[total / 2].0.clone());
}
// 2. Three angles each find their best match; union all results
// 2. Three angles each find their best match via Qdrant; union all results
let mut validated: Vec<(i32, i32, f64)> = Vec::new();
let mut seen_trace_ids = std::collections::HashSet::new();
for qemb in &query_embeddings {
let top = sqlx::query_as::<_, (i32, i32, f64)>(&format!(
r#"SELECT id, trace_id,
1 - (embedding::vector <=> $1::vector) as similarity
FROM {}
WHERE file_uuid = $2
AND trace_id != $3
AND embedding IS NOT NULL
ORDER BY embedding::vector <=> $1::vector
LIMIT 1"#,
fd_table
))
.bind(qemb)
.bind(&req.file_uuid)
.bind(req.trace_id)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Search failed: {}", e)})),
)
})?;
let filter = serde_json::json!({
"must": [
{"key": "file_uuid", "match": {"value": req.file_uuid}}
],
"must_not": [
{"key": "trace_id", "match": {"value": req.trace_id}}
]
});
if let Some((cface_id, c_trace_id, c_sim)) = top {
if seen_trace_ids.insert(c_trace_id) {
validated.push((cface_id, c_trace_id, c_sim));
let hits = match qdrant.search_face_collection(
"_faces",
qemb,
1,
"trace_id",
&req.trace_id.to_string(),
Some(&req.file_uuid),
).await {
Ok(h) => h,
Err(e) => {
tracing::warn!("[match_from_trace] Qdrant search failed: {}", e);
continue;
}
};
if let Some((score, payload)) = hits.first() {
let trace_id = payload.get("trace_id").and_then(|v| v.as_i64()).unwrap_or(0) as i32;
if seen_trace_ids.insert(trace_id) {
validated.push((0, trace_id, *score));
}
}
}
@@ -421,41 +424,49 @@ async fn match_from_trace(
}
};
// 4. Update matched face_detections
// 4. Update matched traces in Qdrant _faces
let qdrant = QdrantDb::new();
let mut traces_matched: Vec<i32> = Vec::new();
for (id, trace_id, _similarity) in &validated {
if let Err(e) = sqlx::query(&format!(
"UPDATE {} SET identity_id = $1 WHERE id = $2",
fd_table
))
.bind(identity_id)
.bind(id)
.execute(state.db.pool())
.await
for (_id, trace_id, _similarity) in &validated {
let filter = serde_json::json!({
"must": [
{"key": "file_uuid", "match": {"value": req.file_uuid}},
{"key": "trace_id", "match": {"value": trace_id}}
]
});
let payload = serde_json::json!({"identity_id": identity_id});
if let Err(e) = qdrant
.update_payload_by_filter("_faces", filter, payload)
.await
{
tracing::warn!(
"[match-from-trace] Failed to update face_detection {}: {}",
id,
"[match-from-trace] Qdrant update failed for trace {}: {}",
trace_id,
e
);
} else {
if !traces_matched.contains(trace_id) {
traces_matched.push(*trace_id);
}
} else if !traces_matched.contains(trace_id) {
traces_matched.push(*trace_id);
}
}
// 5. Also bind the source trace itself
let _ = sqlx::query(&format!(
"UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3",
fd_table
))
.bind(identity_id)
.bind(&req.file_uuid)
.bind(req.trace_id)
.execute(state.db.pool())
.await;
let filter = serde_json::json!({
"must": [
{"key": "file_uuid", "match": {"value": req.file_uuid}},
{"key": "trace_id", "match": {"value": req.trace_id}}
]
});
let payload = serde_json::json!({"identity_id": identity_id});
if let Err(e) = qdrant
.update_payload_by_filter("_faces", filter, payload)
.await
{
tracing::warn!(
"[match-from-trace] Qdrant update failed for source trace {}: {}",
req.trace_id,
e
);
}
if !traces_matched.contains(&req.trace_id) {
traces_matched.push(req.trace_id);
}
@@ -667,33 +678,34 @@ fn average_embeddings<'a>(embeddings: impl Iterator<Item = &'a Vec<f32>>) -> Vec
async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> {
use crate::core::processor::executor::PythonExecutor;
use std::time::Duration;
let executor = PythonExecutor::new()?;
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry/output".to_string());
let output_path = std::path::PathBuf::from(&output_dir)
.join(file_uuid)
.join(format!("{}.identity_match_round1.json", file_uuid));
std::fs::create_dir_all(output_path.parent().unwrap()).ok();
let scripts_dir = executor.script_dir();
let python_path = executor.python_path();
let script_path = scripts_dir.join("identity_matcher.py");
let qdrant_url = std::env::var("QDRANT_URL")
.unwrap_or_else(|_| "http://localhost:6333".to_string());
let qdrant_api_key = std::env::var("QDRANT_API_KEY")
.unwrap_or_else(|_| "Test3200Test3200Test3200".to_string());
let qdrant_url =
std::env::var("QDRANT_URL").unwrap_or_else(|_| "http://localhost:6333".to_string());
let qdrant_api_key =
std::env::var("QDRANT_API_KEY").unwrap_or_else(|_| "Test3200Test3200Test3200".to_string());
let db_url = std::env::var("DATABASE_URL")
.unwrap_or_else(|_| "postgresql://accusys@localhost:5432/momentry".to_string());
let db_schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "public".to_string());
let mut cmd = tokio::process::Command::new(python_path);
cmd.env("MOMENTRY_OUTPUT_DIR", &output_dir);
cmd.env("DATABASE_SCHEMA", "public");
cmd.env("MOMENTRY_DB_SCHEMA", "public");
cmd.env("DATABASE_SCHEMA", &db_schema);
cmd.env("MOMENTRY_DB_SCHEMA", &db_schema);
cmd.env("DATABASE_URL", &db_url);
cmd.env("QDRANT_URL", &qdrant_url);
cmd.env("QDRANT_API_KEY", &qdrant_api_key);
@@ -702,42 +714,50 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
cmd.arg("--round").arg("1");
cmd.arg("--mark-tkg");
cmd.arg("--output").arg(&output_path);
cmd.stdout(std::process::Stdio::piped());
cmd.stderr(std::process::Stdio::piped());
tracing::info!("[FaceMatch] Starting identity_matcher for {}", file_uuid);
let output = cmd.output().await?;
let stdout = String::from_utf8_lossy(&output.stdout);
let stderr = String::from_utf8_lossy(&output.stderr);
if !output.status.success() {
tracing::error!("[FaceMatch] identity_matcher failed with exit code: {:?}", output.status.code());
tracing::error!(
"[FaceMatch] identity_matcher failed with exit code: {:?}",
output.status.code()
);
tracing::error!("[FaceMatch] stderr: {}", stderr);
tracing::error!("[FaceMatch] stdout: {}", stdout);
return Ok(0);
}
tracing::info!("[FaceMatch] stdout: {}", stdout);
if !output_path.exists() {
tracing::info!("[FaceMatch] No matches found for {}", file_uuid);
return Ok(0);
}
let content = std::fs::read_to_string(&output_path)?;
let result: serde_json::Value = serde_json::from_str(&content)?;
let matched = result.get("matched").and_then(|v| v.as_i64()).unwrap_or(0) as usize;
let tkg_updated = result.get("tkg_nodes_updated").and_then(|v| v.as_i64()).unwrap_or(0) as usize;
let tkg_updated = result
.get("tkg_nodes_updated")
.and_then(|v| v.as_i64())
.unwrap_or(0) as usize;
tracing::info!(
"[FaceMatch] Round 1 for {}: {} matches, {} TKG nodes updated",
file_uuid, matched, tkg_updated
file_uuid,
matched,
tkg_updated
);
Ok(matched)
}
@@ -755,17 +775,33 @@ async fn match_faces_iterative_pg(pool: &sqlx::PgPool, file_uuid: &str) -> anyho
/// segments (speaker_id, start_time, end_time), computes overlap,
/// and stores bindings in identity_bindings table.
pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> {
// Load face traces with identity_id and frame numbers
let fd_table = schema::table_name("face_detections");
let traces = sqlx::query_as::<_, (i32, Vec<i32>)>(&format!(
"SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \
FROM {} WHERE file_uuid=$1 AND trace_id IS NOT NULL AND identity_id IS NOT NULL \
GROUP BY trace_id",
fd_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
// Load face traces with identity_id from Qdrant _faces
let qdrant = QdrantDb::new();
let trace_filter = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "identity_id", "exists": true},
{"key": "trace_id", "match": {"value": 1}}
]
});
let points = qdrant.scroll_all_points("_faces", trace_filter, 500).await.unwrap_or_default();
// Group by trace_id, collect frames
let mut traces: HashMap<i32, Vec<i64>> = HashMap::new();
for point in &points {
let payload = &point["payload"];
let trace_id = payload["trace_id"].as_i64().unwrap_or(0) as i32;
let frame = payload["frame"].as_i64().unwrap_or(0);
traces.entry(trace_id).or_default().push(frame);
}
// Sort frames per trace
for frames in traces.values_mut() {
frames.sort();
}
if traces.is_empty() {
tracing::info!("[SpeakerBind] No face traces with identities");
@@ -818,8 +854,23 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
return Ok(0);
}
// Get fps for frame-to-time conversion
let fps: f64 = 25.0; // default, could also read from DB
// Compute fps from video table
let fps: f64 = sqlx::query_scalar::<_, f64>(
"SELECT COALESCE(fps, 25.0) FROM videos WHERE file_uuid=$1"
)
.bind(file_uuid)
.fetch_optional(pool)
.await
.ok()
.flatten()
.unwrap_or(25.0);
tracing::info!(
"[SpeakerBind] Using fps={:.3} for {} ({} traces)",
fps,
file_uuid,
traces.len()
);
// For each trace, compute overlap with each speaker
let mut bindings = 0usize;
@@ -828,13 +879,15 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
continue;
}
// Get identity_id for this trace
let fd_table = schema::table_name("face_detections");
let identity_id: Option<i32> = sqlx::query_scalar(
&format!("SELECT identity_id FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id IS NOT NULL LIMIT 1", fd_table)
)
.bind(file_uuid).bind(trace_id)
.fetch_optional(pool).await?.flatten();
// Get identity_id for this trace from Qdrant payload
let identity_id: Option<i32> = points.iter()
.find(|p| {
p["payload"]["trace_id"].as_i64() == Some(*trace_id as i64)
&& p["payload"]["identity_id"].as_i64().is_some()
&& p["payload"]["identity_id"].as_i64().unwrap() > 0
})
.and_then(|p| p["payload"]["identity_id"].as_i64())
.map(|id| id as i32);
if identity_id.is_none() {
continue;
@@ -873,18 +926,20 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
});
let ib_table = schema::table_name("identity_bindings");
let _ = sqlx::query(
&format!("INSERT INTO {} (identity_id, identity_type, identity_value, file_uuid, confidence, metadata) \
VALUES ($1, 'speaker', $2, $3, $4, $5::jsonb) \
ON CONFLICT (identity_id, identity_type, identity_value, file_uuid) \
if let Err(e) = sqlx::query(
&format!("INSERT INTO {} (identity_id, identity_type, identity_value, confidence, metadata) \
VALUES ($1, 'speaker', $2, $3, $4::jsonb) \
ON CONFLICT (identity_id, identity_type, identity_value) \
DO UPDATE SET confidence = EXCLUDED.confidence, metadata = EXCLUDED.metadata", ib_table)
)
.bind(identity_id)
.bind(&best_speaker)
.bind(file_uuid)
.bind(overlap_ratio)
.bind(&metadata)
.execute(pool).await;
.execute(pool).await
{
tracing::error!("[SpeakerBind] INSERT failed for trace_id={}, identity_id={}, speaker={}: {}", trace_id, identity_id, best_speaker, e);
}
// Also update speaker_detections with the identity_id
let sd_table = schema::table_name("speaker_detections");
@@ -915,16 +970,40 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
/// Pipeline-triggered entry point: runs the full identity agent for a file.
/// Reads face_clustered.json + asrx.json, extracts persons/speakers, creates identities,
/// runs iterative face matching, and binds speakers.
pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<()> {
pub async fn run_identity_agent(
db: &PostgresDb,
file_uuid: &str,
redis: Option<std::sync::Arc<RedisClient>>,
) -> anyhow::Result<()> {
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry/output".to_string());
let pool = db.pool();
// Step 1: 先跑 face matching(不需 face_clustered.json
let mut progress = AgentProgress::new(file_uuid);
if let Some(r) = redis.as_ref() {
publish_agent_progress(&r, file_uuid, &progress).await;
}
// Step 1: Face matching (iterative TMDb matching)
progress.update_phase(AgentPhase::TmdbMatching, 0.3, "Running face matching...");
if let Some(r) = redis.as_ref() {
publish_agent_progress(&r, file_uuid, &progress).await;
}
let matched = match_faces_iterative(pool, file_uuid).await.unwrap_or(0);
progress.stats.tmdb_matches = matched as i64;
progress.update_phase(AgentPhase::TmdbMatching, 1.0, &format!("Face matching: {} matches", matched));
if let Some(r) = redis.as_ref() {
publish_agent_progress(&r, file_uuid, &progress).await;
}
// Step 2: Load face_clustered.json and create identities
progress.update_phase(AgentPhase::FaceClustering, 0.5, "Loading face clusters...");
if let Some(r) = redis.as_ref() {
publish_agent_progress(&r, file_uuid, &progress).await;
}
// Step 2: 試著載入 face_clustered.json 建立新 identities
let video_dir = PathBuf::from(&output_dir).join(file_uuid);
let face_clustered_path = video_dir.join(format!("{}.face_clustered.json", file_uuid));
let face_clustered_path = if face_clustered_path.exists() {
@@ -947,6 +1026,8 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
let speakers = extract_speakers_from_asrx_data(&asrx_data);
let identities = analyze_person_speaker_overlap(&persons, &speakers);
progress.stats.clusters = identities.len() as i64;
let _ = identities.len();
if !identities.is_empty() {
let metadata = serde_json::json!({
@@ -969,6 +1050,13 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
.execute(pool)
.await;
}
progress.stats.identities_created = identities.len() as i64;
progress.update_phase(AgentPhase::IdentityCreation, 1.0, &format!(
"Created {} identities from clusters", identities.len()
));
if let Some(r) = redis.as_ref() {
publish_agent_progress(&r, file_uuid, &progress).await;
}
tracing::info!(
"[IdentityAgent] Analyzed {} face clusters from face_clustered for {}",
identities.len(),
@@ -979,9 +1067,29 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
"[IdentityAgent] face_clustered.json not found for {}, skipping identity creation",
file_uuid
);
progress.update_phase(AgentPhase::IdentityCreation, 0.0, "No face_clustered.json");
if let Some(r) = redis.as_ref() {
publish_agent_progress(&r, file_uuid, &progress).await;
}
}
// Step 3: Speaker binding
progress.update_phase(AgentPhase::SpeakerBinding, 0.5, "Binding speakers...");
if let Some(r) = redis.as_ref() {
publish_agent_progress(&r, file_uuid, &progress).await;
}
let bound = bind_speakers(pool, file_uuid).await.unwrap_or(0);
progress.stats.speaker_bindings = bound as i64;
progress.update_phase(AgentPhase::SpeakerBinding, 1.0, &format!("Speaker binding: {} bound", bound));
if let Some(r) = redis.as_ref() {
publish_agent_progress(&r, file_uuid, &progress).await;
}
progress.mark_completed();
if let Some(r) = redis.as_ref() {
publish_agent_progress(&r, file_uuid, &progress).await;
}
tracing::info!(
"[IdentityAgent] Done for {}: {} face matches, {} speaker bindings",
@@ -999,14 +1107,12 @@ async fn generate_seeds_handler(
let db = &state.db;
let pool = db.pool();
let count = generate_seed_embeddings(db)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"success": false, "message": format!("{}", e)})),
)
})?;
let count = generate_seed_embeddings(db).await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"success": false, "message": format!("{}", e)})),
)
})?;
// Auto-trigger identity agent for all ready files
if count > 0 {
@@ -1019,13 +1125,13 @@ async fn generate_seeds_handler(
);
for file_uuid in &ready_files {
let db = state.db.clone();
let redis = crate::core::db::RedisClient::new().ok().map(Arc::new);
let fid = file_uuid.clone();
tokio::spawn(async move {
match run_identity_agent(&db, &fid).await {
Ok(_) => tracing::info!(
"[GenerateSeeds] Identity agent completed for {}",
fid
),
match run_identity_agent(&db, &fid, redis).await {
Ok(_) => {
tracing::info!("[GenerateSeeds] Identity agent completed for {}", fid)
}
Err(e) => tracing::warn!(
"[GenerateSeeds] Identity agent failed for {}: {}",
fid,
@@ -1044,16 +1150,28 @@ async fn generate_seeds_handler(
})))
}
/// Find videos that are ready for identity processing (have face embeddings).
/// Find videos that are ready for identity processing (have face embeddings in Qdrant).
async fn find_ready_files(pool: &sqlx::PgPool) -> anyhow::Result<Vec<String>> {
let fd_table = crate::core::db::schema::table_name("face_detections");
let rows: Vec<(String,)> = sqlx::query_as(&format!(
"SELECT DISTINCT file_uuid FROM {} WHERE embedding IS NOT NULL AND identity_id IS NULL",
fd_table
))
.fetch_all(pool)
.await?;
Ok(rows.into_iter().map(|r| r.0).collect())
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let qdrant = QdrantDb::new();
// Find files with faces that don't have identity_id set
let filter = json!({
"must": [
{"key": "identity_id", "match": {"value": null}}
]
});
let points = qdrant.scroll_all_points("_faces", filter, 1000).await.unwrap_or_default();
let mut file_uuids: std::collections::HashSet<String> = std::collections::HashSet::new();
for point in &points {
if let Some(fu) = point["payload"]["file_uuid"].as_str() {
file_uuids.insert(fu.to_string());
}
}
Ok(file_uuids.into_iter().collect())
}
/// API handler: POST /api/v1/agents/identity/run
@@ -1071,7 +1189,8 @@ async fn run_identity_handler(
)
})?;
match run_identity_agent(&state.db, file_uuid).await {
let redis = crate::core::db::RedisClient::new().ok().map(Arc::new);
match run_identity_agent(&state.db, file_uuid, redis).await {
Ok(()) => Ok(Json(serde_json::json!({
"success": true,
"message": format!("Identity agent completed for {}", file_uuid),
@@ -1109,29 +1228,28 @@ async fn confirm_identity_handler(
Json(req): Json<ConfirmIdentityRequest>,
) -> Result<Json<ConfirmIdentityResponse>, (StatusCode, Json<serde_json::Value>)> {
use crate::core::processor::executor::PythonExecutor;
let executor = PythonExecutor::new().map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"success": false, "message": format!("PythonExecutor error: {}", e)})),
)
})?;
let scripts_dir = executor.script_dir();
let python_path = executor.python_path();
let script_path = scripts_dir.join("confirm_identity.py");
let qdrant_url = std::env::var("QDRANT_URL")
.unwrap_or_else(|_| "http://localhost:6333".to_string());
let qdrant_api_key = std::env::var("QDRANT_API_KEY")
.unwrap_or_else(|_| "Test3200Test3200Test3200".to_string());
let qdrant_url =
std::env::var("QDRANT_URL").unwrap_or_else(|_| "http://localhost:6333".to_string());
let qdrant_api_key =
std::env::var("QDRANT_API_KEY").unwrap_or_else(|_| "Test3200Test3200Test3200".to_string());
let db_url = std::env::var("DATABASE_URL")
.unwrap_or_else(|_| "postgresql://accusys@localhost:5432/momentry".to_string());
let db_schema = std::env::var("DATABASE_SCHEMA")
.unwrap_or_else(|_| "dev".to_string());
let db_schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
let propagate = req.propagate.unwrap_or(true);
let mut cmd = tokio::process::Command::new(python_path);
cmd.env("DATABASE_URL", &db_url);
cmd.env("DATABASE_SCHEMA", &db_schema);
@@ -1144,31 +1262,39 @@ async fn confirm_identity_handler(
cmd.arg("--identity-id").arg(req.identity_id.to_string());
cmd.arg("--identity-uuid").arg(&req.identity_uuid);
cmd.arg("--name").arg(&req.name);
if !propagate {
cmd.arg("--no-propagate");
}
cmd.stdout(std::process::Stdio::piped());
cmd.stderr(std::process::Stdio::piped());
tracing::info!(
"[ConfirmIdentity] Starting for {} trace {} -> {} ({})",
req.file_uuid, req.trace_id, req.identity_uuid, req.name
req.file_uuid,
req.trace_id,
req.identity_uuid,
req.name
);
let output = cmd.output().await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"success": false, "message": format!("Command failed: {}", e)})),
Json(
serde_json::json!({"success": false, "message": format!("Command failed: {}", e)}),
),
)
})?;
let stdout = String::from_utf8_lossy(&output.stdout);
let stderr = String::from_utf8_lossy(&output.stderr);
if !output.status.success() {
tracing::error!("[ConfirmIdentity] Script failed with exit code: {:?}", output.status.code());
tracing::error!(
"[ConfirmIdentity] Script failed with exit code: {:?}",
output.status.code()
);
tracing::error!("[ConfirmIdentity] stderr: {}", stderr);
tracing::error!("[ConfirmIdentity] stdout: {}", stdout);
return Err((
@@ -1180,9 +1306,9 @@ async fn confirm_identity_handler(
})),
));
}
tracing::info!("[ConfirmIdentity] stdout: {}", stdout);
let json_start = stdout.find('{');
if json_start.is_none() {
return Err((
@@ -1195,7 +1321,7 @@ async fn confirm_identity_handler(
));
}
let json_str = &stdout[json_start.unwrap()..];
let result: serde_json::Value = serde_json::from_str(json_str).map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
@@ -1207,14 +1333,17 @@ async fn confirm_identity_handler(
})),
)
})?;
Ok(Json(ConfirmIdentityResponse {
success: result.get("status").and_then(|v| v.as_str()) == Some("success"),
file_uuid: req.file_uuid,
trace_id: req.trace_id,
identity_uuid: req.identity_uuid,
name: req.name,
steps: result.get("steps").cloned().unwrap_or(serde_json::json!({})),
steps: result
.get("steps")
.cloned()
.unwrap_or(serde_json::json!({})),
propagation: result.get("propagation").cloned(),
}))
}
+214 -253
View File
@@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};
use sqlx::Row;
use std::process::Command;
use crate::core::db::ResourceRecord;
use crate::core::db::{QdrantDb, ResourceRecord};
pub fn identity_routes() -> Router<crate::api::types::AppState> {
Router::new()
@@ -269,12 +269,7 @@ async fn get_file_identities(
let fi_table = crate::core::db::schema::table_name("file_identities");
let total = match sqlx::query_scalar::<_, i64>(
&format!(
r#"SELECT COUNT(DISTINCT identity_id) FROM (
SELECT identity_id FROM {} WHERE file_uuid = $1 AND identity_id IS NOT NULL
UNION
SELECT identity_id FROM {} WHERE file_uuid = $1
) combined"#,
crate::core::db::schema::table_name("face_detections"),
r#"SELECT COUNT(DISTINCT identity_id) FROM {} WHERE file_uuid = $1 AND identity_id IS NOT NULL"#,
fi_table
)
)
@@ -419,7 +414,6 @@ async fn delete_identity(
Extension(auth): Extension<crate::api::middleware::UserAuth>,
Path(identity_uuid): Path<String>,
) -> Result<StatusCode, StatusCode> {
let table = crate::core::db::schema::table_name("face_detections");
let id_table = crate::core::db::schema::table_name("identities");
let history_table = crate::core::db::schema::table_name("identity_history");
@@ -440,15 +434,27 @@ async fn delete_identity(
// Delete identity file from disk
let _ = crate::core::identity::storage::delete_identity_file(&uuid_clean);
// Capture unbound faces before unbinding
let unbound_faces: Vec<(String, Option<String>, Option<i32>)> = sqlx::query_as(&format!(
"SELECT file_uuid, face_id, trace_id FROM {} WHERE identity_id = $1",
table
))
.bind(identity_id)
.fetch_all(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
// Capture unbound faces from Qdrant _faces before unbinding
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let qdrant = QdrantDb::new();
let face_filter = json!({
"must": [
{"key": "identity_id", "match": {"value": identity_id}}
]
});
let points = qdrant.scroll_all_points("_faces", face_filter, 1000).await.unwrap_or_default();
let unbound_faces: Vec<(String, Option<String>, Option<i32>)> = points.iter()
.filter_map(|p| {
let payload = &p["payload"];
let file_uuid = payload["file_uuid"].as_str()?.to_string();
let face_id = payload.get("face_id").and_then(|v| v.as_str()).map(|s| s.to_string());
let trace_id = payload["trace_id"].as_i64().map(|t| t as i32);
Some((file_uuid, face_id, trace_id))
})
.collect();
let face_list: Vec<serde_json::Value> = unbound_faces
.into_iter()
@@ -494,15 +500,17 @@ async fn delete_identity(
.execute(state.db.pool())
.await;
// Unbind all faces
sqlx::query(&format!(
"UPDATE {} SET identity_id = NULL WHERE identity_id = $1",
table
))
.bind(identity_id)
.execute(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
// Unbind all faces in Qdrant _faces
let qdrant = QdrantDb::new();
let filter = serde_json::json!({
"must": [
{"key": "identity_id", "match": {"value": identity_id}}
]
});
let payload = serde_json::json!({"identity_id": serde_json::Value::Null});
let _ = qdrant
.update_payload_by_filter("_faces", filter, payload)
.await;
// Delete identity
sqlx::query(&format!("DELETE FROM {} WHERE id = $1", id_table))
@@ -572,17 +580,21 @@ async fn get_identity_files(
})
.collect();
let total = match sqlx::query_scalar::<_, i64>(&format!(
"SELECT COUNT(DISTINCT fd.file_uuid) FROM {} fd WHERE fd.identity_id = $1",
crate::core::db::schema::table_name("face_detections"),
))
.bind(identity_id)
.fetch_one(state.db.pool())
.await
{
Ok(c) => c,
Err(_) => data.len() as i64,
};
// Get total from Qdrant _faces
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let qdrant = QdrantDb::new();
let face_filter = json!({
"must": [
{"key": "identity_id", "match": {"value": identity_id}}
]
});
let points = qdrant.scroll_all_points("_faces", face_filter, 1000).await.unwrap_or_default();
let unique_files: std::collections::HashSet<String> = points.iter()
.filter_map(|p| p["payload"]["file_uuid"].as_str().map(|s| s.to_string()))
.collect();
let total = unique_files.len() as i64;
Ok(Json(IdentityFilesResponse {
success: true,
@@ -673,17 +685,14 @@ async fn get_identity_faces(
})
.collect();
let total = match sqlx::query_scalar::<_, i64>(&format!(
"SELECT COUNT(*) FROM {} fd WHERE fd.identity_id = $1",
crate::core::db::schema::table_name("face_detections"),
))
.bind(identity_id)
.fetch_one(state.db.pool())
.await
{
Ok(c) => c,
Err(_) => data.len() as i64,
};
let qdrant2 = QdrantDb::new();
let face_filter2 = serde_json::json!({
"must": [
{"key": "identity_id", "match": {"value": identity_id}}
]
});
let points2 = qdrant2.scroll_all_points("_faces", face_filter2, 2000).await.unwrap_or_default();
let total = points2.len() as i64;
Ok(Json(IdentityFacesResponse {
success: true,
@@ -759,151 +768,114 @@ async fn get_file_faces(
let page_size = params.page_size.unwrap_or(50);
let offset = ((page - 1) as i64) * (page_size as i64);
let fd_table = crate::core::db::schema::table_name("face_detections");
let id_table = crate::core::db::schema::table_name("identities");
let st_table = crate::core::db::schema::table_name("strangers");
let video_table = crate::core::db::schema::table_name("videos");
// Build WHERE clauses
let mut where_clauses = vec![format!(
"fd.file_uuid = '{}'",
file_uuid.replace('\'', "''")
)];
// Get fps
let fps: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
video_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
.unwrap_or(25.0);
// Get face points from Qdrant _faces
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let qdrant = QdrantDb::new();
let mut filter_conditions = vec![
json!({"key": "file_uuid", "match": {"value": file_uuid}})
];
if let Some(ref binding) = params.binding {
match binding.as_str() {
"identity" => {
where_clauses.push(format!("fd.identity_id IN (SELECT id FROM {})", id_table));
filter_conditions.push(json!({"key": "identity_id", "exists": true}));
}
"stranger" => {
where_clauses.push("fd.stranger_id IS NOT NULL".to_string());
}
"dangling" => {
where_clauses.push(format!(
"fd.identity_id IS NOT NULL AND NOT EXISTS (SELECT 1 FROM {} WHERE id = fd.identity_id)",
id_table
));
filter_conditions.push(json!({"key": "stranger_id", "exists": true}));
}
"unbound" => {
where_clauses.push("fd.identity_id IS NULL AND fd.stranger_id IS NULL".to_string());
filter_conditions.push(json!({"key": "identity_id", "match": {"value": null}}));
}
_ => {}
}
}
if let Some(tid) = params.trace_id {
where_clauses.push(format!("fd.trace_id = {}", tid));
}
if let Some(mc) = params.min_confidence {
where_clauses.push(format!("fd.confidence >= {}", mc));
}
if let Some(sf) = params.start_frame {
where_clauses.push(format!("fd.frame_number >= {}", sf));
}
if let Some(ef) = params.end_frame {
where_clauses.push(format!("fd.frame_number <= {}", ef));
filter_conditions.push(json!({"key": "trace_id", "match": {"value": tid}}));
}
let where_sql = where_clauses.join(" AND ");
let face_filter = json!({"must": filter_conditions});
let points = qdrant.scroll_all_points("_faces", face_filter, 2000).await.unwrap_or_default();
let select_sql = format!(
"SELECT fd.id::bigint as id, fd.file_uuid, \
fd.frame_number::bigint as frame_number, \
(fd.frame_number::float8 / NULLIF(v.fps, 0)) as timestamp_secs, \
fd.face_id, fd.trace_id, \
fd.x::float8 as x, fd.y::float8 as y, \
fd.width::float8 as width, fd.height::float8 as height, \
fd.confidence::float8 as confidence, \
fd.identity_id, fd.stranger_id, \
i.uuid::text as identity_uuid, i.name as identity_name, \
s.metadata as stranger_metadata \
FROM {} fd \
JOIN {} v ON v.file_uuid = fd.file_uuid \
LEFT JOIN {} i ON i.id = fd.identity_id \
LEFT JOIN {} s ON s.id = fd.stranger_id \
WHERE {} \
ORDER BY fd.frame_number, fd.trace_id \
LIMIT {} OFFSET {}",
fd_table, video_table, id_table, st_table, where_sql, page_size as i64, offset
);
// Apply additional filters in Rust
let filtered: Vec<_> = points.into_iter().filter(|p| {
let payload = &p["payload"];
let confidence = payload["confidence"].as_f64().unwrap_or(0.0);
let frame = payload["frame"].as_i64().unwrap_or(0);
let count_sql = format!(
"SELECT COUNT(*) FROM {} fd \
WHERE {}",
fd_table, where_sql
);
if let Some(mc) = params.min_confidence {
if confidence < mc { return false; }
}
if let Some(sf) = params.start_frame {
if frame < sf { return false; }
}
if let Some(ef) = params.end_frame {
if frame > ef { return false; }
}
true
}).collect();
use sqlx::Row;
let rows = sqlx::query(&select_sql)
.fetch_all(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let total = filtered.len() as i64;
let total: i64 = sqlx::query_scalar(&count_sql)
.fetch_one(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
// Apply pagination
let paged: Vec<_> = filtered.into_iter().skip(offset as usize).take(page_size as usize).collect();
let data: Vec<FileFaceItem> = rows
.into_iter()
.map(|r| {
let identity_id: Option<i32> = r.get("identity_id");
let identity_uuid: Option<String> = r.get("identity_uuid");
let identity_name: Option<String> = r.get("identity_name");
let stranger_id: Option<i32> = r.get("stranger_id");
// Build response items
let mut data = Vec::new();
for point in &paged {
let payload = &point["payload"];
let bbox = &payload["bbox"];
let frame = payload["frame"].as_i64().unwrap_or(0);
let confidence = payload["confidence"].as_f64().unwrap_or(0.0);
let binding = if let (Some(iid), Some(iuuid), Some(iname)) =
(identity_id, identity_uuid, identity_name)
{
FaceBinding::Identity {
identity_id: iid,
identity_uuid: iuuid,
identity_name: iname,
}
} else if let Some(sid) = stranger_id {
FaceBinding::Stranger {
stranger_id: sid,
metadata: r
.get::<Option<serde_json::Value>, _>("stranger_metadata")
.unwrap_or(serde_json::Value::Null),
}
} else if let Some(iid) = identity_id {
FaceBinding::Dangling {
old_identity_id: iid,
}
} else {
FaceBinding::Unbound
};
FileFaceItem {
id: r.get("id"),
file_uuid: r.get("file_uuid"),
frame_number: r.get("frame_number"),
timestamp_secs: r.get("timestamp_secs"),
face_id: r.get("face_id"),
trace_id: r.get("trace_id"),
bbox: BBox {
x: r.get("x"),
y: r.get("y"),
width: r.get("width"),
height: r.get("height"),
},
confidence: r.get("confidence"),
binding,
}
})
.collect();
let item = FileFaceItem {
id: 0,
file_uuid: file_uuid.clone(),
frame_number: frame,
timestamp_secs: Some(frame as f64 / fps),
face_id: payload.get("face_id").and_then(|v| v.as_str()).map(|s| s.to_string()),
trace_id: payload["trace_id"].as_i64().map(|t| t as i32),
bbox: BBox {
x: bbox["x"].as_f64().unwrap_or(0.0),
y: bbox["y"].as_f64().unwrap_or(0.0),
width: bbox["width"].as_f64().unwrap_or(0.0),
height: bbox["height"].as_f64().unwrap_or(0.0),
},
confidence,
binding: FaceBinding::Unbound,
};
data.push(item);
}
Ok(Json(FileFacesResponse {
success: true,
file_uuid,
total,
page,
page_size,
page: page as usize,
page_size: page_size as usize,
data,
}))
}
// --- List Face Candidates ---
#[derive(Debug, Serialize)]
pub struct IdentityChunksResponse {
pub success: bool,
@@ -1305,76 +1277,62 @@ async fn set_profile_from_face(
Json(req): Json<SetProfileFromFaceRequest>,
) -> Result<Json<ProfileImageResponse>, (StatusCode, Json<serde_json::Value>)> {
use crate::core::db::schema;
let fd_table = schema::table_name("face_detections");
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let videos_table = schema::table_name("videos");
let uuid_clean = identity_uuid.replace('-', "");
let (face_identifier, use_trace, use_frame) = match (&req.face_id, req.id, req.trace_id) {
(Some(fid), _, _) => (fid.clone(), false, None),
(None, Some(id), _) => (id.to_string(), false, None),
(None, None, Some(trace_id)) => (trace_id.to_string(), true, req.frame_number),
(Some(fid), _, _) => (fid.clone(), None, None),
(None, Some(id), _) => (id.to_string(), None, None),
(None, None, Some(trace_id)) => (trace_id.to_string(), Some(trace_id), req.frame_number),
(None, None, None) => {
return Err((
StatusCode::BAD_REQUEST,
Json(serde_json::json!({"success": false, "message": "Either face_id, id, or trace_id is required"})),
Json(
serde_json::json!({"success": false, "message": "Either face_id, id, or trace_id is required"}),
),
));
}
};
let row: Option<(i64, i32, i32, i32, i32, f64)> = if use_trace {
// Get face data from Qdrant _faces
let qdrant = QdrantDb::new();
let row: Option<(i64, i32, i32, i32, i32, f64)> = if let Some(trace_id) = use_trace {
let mut filter_conds = vec![
json!({"key": "file_uuid", "match": {"value": req.file_uuid}}),
json!({"key": "trace_id", "match": {"value": trace_id}})
];
if let Some(frame) = use_frame {
sqlx::query_as(&format!(
"SELECT frame_number, x, y, width, height, confidence FROM {} WHERE file_uuid = $1 AND trace_id = $2 AND frame_number = $3 LIMIT 1",
fd_table
))
.bind(&req.file_uuid)
.bind(use_trace)
.bind(frame as i32)
.fetch_optional(state.db.pool())
.await
} else {
sqlx::query_as(&format!(
"SELECT frame_number, x, y, width, height, confidence FROM {} WHERE file_uuid = $1 AND trace_id = $2 ORDER BY confidence DESC LIMIT 1",
fd_table
))
.bind(&req.file_uuid)
.bind(use_trace)
.fetch_optional(state.db.pool())
.await
filter_conds.push(json!({"key": "frame", "match": {"value": frame}}));
}
let face_filter = json!({"must": filter_conds});
let points = qdrant.scroll_all_points("_faces", face_filter, 10).await.unwrap_or_default();
points.first().map(|p| {
let payload = &p["payload"];
let bbox = &payload["bbox"];
(
payload["frame"].as_i64().unwrap_or(0),
bbox["x"].as_f64().unwrap_or(0.0) as i32,
bbox["y"].as_f64().unwrap_or(0.0) as i32,
bbox["width"].as_f64().unwrap_or(0.0) as i32,
bbox["height"].as_f64().unwrap_or(0.0) as i32,
payload["confidence"].as_f64().unwrap_or(0.0),
)
})
} else if req.id.is_some() {
sqlx::query_as(&format!(
"SELECT frame_number, x, y, width, height, confidence FROM {} WHERE file_uuid = $1 AND id = $2",
fd_table
))
.bind(&req.file_uuid)
.bind(req.id.unwrap())
.fetch_optional(state.db.pool())
.await
// id lookup not supported in Qdrant - skip
None
} else {
sqlx::query_as(&format!(
"SELECT frame_number, x, y, width, height, confidence FROM {} WHERE file_uuid = $1 AND face_id = $2",
fd_table
))
.bind(&req.file_uuid)
.bind(&face_identifier)
.fetch_optional(state.db.pool())
.await
}
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"success": false, "message": format!("DB error: {}", e)})),
)
})?;
// face_id lookup not supported in Qdrant - skip
None
};
let (frame_number, x, y, width, height, confidence) = row.ok_or_else(|| {
(
StatusCode::NOT_FOUND,
Json(serde_json::json!({"success": false, "message": "Face not found"})),
)
})?;
let (frame_number, x, y, w, h, confidence) = row.ok_or((
StatusCode::NOT_FOUND,
Json(serde_json::json!({"success": false, "message": "Face not found"})),
))?;
let video_row: Option<(String, Option<i32>, Option<i32>)> = sqlx::query_as(&format!(
"SELECT file_path, width, height FROM {} WHERE file_uuid = $1",
@@ -1400,7 +1358,7 @@ async fn set_profile_from_face(
let vw = video_width.unwrap_or(1920);
let vh = video_height.unwrap_or(1080);
crate::core::thumbnail::validator::validate_crop(x, y, width, height, vw, vh).map_err(|e| {
crate::core::thumbnail::validator::validate_crop(x, y, w, h, vw, vh).map_err(|e| {
(
StatusCode::BAD_REQUEST,
Json(serde_json::json!({"success": false, "message": format!("Crop validation failed: {}", e)})),
@@ -1408,7 +1366,7 @@ async fn set_profile_from_face(
})?;
let select = format!("select=eq(n\\,{})", frame_number);
let vf = format!("{},crop={}:{}:{}:{}", select, width, height, x, y);
let vf = format!("{},crop={}:{}:{}:{}", select, w, h, x, y);
let output = Command::new("ffmpeg")
.args([
@@ -1465,7 +1423,10 @@ async fn set_profile_from_face(
success: true,
identity_uuid: uuid_clean,
path: file_path.to_string_lossy().to_string(),
message: format!("Profile image set from face {} (frame {}, confidence {:.2})", face_identifier, frame_number, confidence),
message: format!(
"Profile image set from face {} (frame {}, confidence {:.2})",
face_identifier, frame_number, confidence
),
}))
}
@@ -1567,21 +1528,20 @@ async fn search_identity_text(
) -> Result<Json<IdentityTextResponse>, StatusCode> {
use crate::core::db::schema;
let chunk_table = schema::table_name("chunk");
let fd_table = schema::table_name("face_detections");
let id_table = schema::table_name("identities");
let ib_table = schema::table_name("identity_bindings");
let like_q = format!("%{}%", params.q.replace('%', "%%"));
let limit = params.limit.unwrap_or(50).min(100);
let sd_table = schema::table_name("speaker_detections");
let query = format!(
r#"SELECT c.file_uuid, c.chunk_id, c.start_time, c.end_time, c.text_content,
fd.identity_id, i.name AS identity_name, i.source AS identity_source,
fd.trace_id
i.id AS identity_id, i.name AS identity_name, i.source AS identity_source,
(c.metadata->>'trace_id')::int AS trace_id
FROM {} c
LEFT JOIN {} fd ON fd.file_uuid = c.file_uuid
AND fd.frame_number BETWEEN c.start_frame AND c.end_frame
AND fd.identity_id IS NOT NULL
LEFT JOIN {} i ON i.id = fd.identity_id
LEFT JOIN {} ib ON ib.identity_value = c.metadata->>'trace_id'
AND ib.identity_type = 'trace'
LEFT JOIN {} i ON i.id = ib.identity_id
WHERE ($1::text IS NULL OR c.file_uuid = $1) AND (LOWER(c.text_content) LIKE LOWER($2) OR LOWER(c.content::text) LIKE LOWER($2))
UNION ALL
@@ -1597,7 +1557,7 @@ async fn search_identity_text(
ORDER BY 3
LIMIT $3"#,
chunk_table, fd_table, id_table, sd_table, id_table, chunk_table
chunk_table, ib_table, id_table, sd_table, id_table, chunk_table
);
let rows = sqlx::query_as::<
@@ -1696,7 +1656,6 @@ async fn search_identities_by_text(
) -> Result<Json<IdentitySearchResponse>, StatusCode> {
use crate::core::db::schema;
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let chunk_table = schema::table_name("chunk");
let like_q = format!("%{}%", params.q.replace('%', "%%"));
let page = params.page.unwrap_or(1).max(1);
@@ -1710,26 +1669,26 @@ async fn search_identities_by_text(
let sd_table = schema::table_name("speaker_detections");
let ib_table = schema::table_name("identity_bindings");
let fi_table = schema::table_name("file_identities");
let query = format!(
r#"WITH matched AS (
SELECT i.id::int, i.name, i.source, i.tmdb_id,
fd.file_uuid, fd.trace_id,
c.file_uuid, (c.metadata->>'trace_id')::int AS trace_id,
c.chunk_id, c.start_frame, c.end_frame, c.fps,
c.start_time, c.end_time, c.text_content
FROM {} i
JOIN {} fi ON fi.identity_id = i.id
JOIN {} ib ON ib.identity_id = i.id AND ib.identity_type = 'trace'
JOIN {} fd ON fd.trace_id = ib.identity_value::int
JOIN {} c ON c.file_uuid = fd.file_uuid
AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0)
AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0)
JOIN {} c ON c.file_uuid = fi.file_uuid
AND c.metadata->>'trace_id' = ib.identity_value
WHERE (i.name ILIKE $1
OR EXISTS (
SELECT 1 FROM jsonb_array_elements(i.metadata->'aliases') AS a
WHERE a->>'name' ILIKE $1
))
AND ($2::text IS NULL OR fd.file_uuid = $2)
AND ($2::text IS NULL OR c.file_uuid = $2)
UNION ALL
UNION ALL
SELECT i.id::int, i.name, i.source, i.tmdb_id,
sd.file_uuid, NULL::int AS trace_id,
@@ -1755,7 +1714,7 @@ SELECT *, COUNT(*) OVER() AS total_count
FROM deduped
ORDER BY name, start_time
LIMIT $3 OFFSET $4"#,
id_table, ib_table, fd_table, chunk_table, id_table, sd_table, chunk_table
id_table, fi_table, ib_table, chunk_table, id_table, sd_table, chunk_table
);
let rows = sqlx::query(&query)
@@ -2093,7 +2052,6 @@ async fn undo_identity(
let table = crate::core::db::schema::table_name("identities");
let history_table = crate::core::db::schema::table_name("identity_history");
let face_table = crate::core::db::schema::table_name("face_detections");
// Try normal identity lookup
let identity_row: Option<(i32,)> = sqlx::query_as(&format!(
@@ -2174,22 +2132,23 @@ async fn undo_identity(
)
})?;
// Re-bind faces
// Re-bind faces via Qdrant _faces
if let Some(faces) = snapshot.get("unbound_faces").and_then(|v| v.as_array()) {
let qdrant = QdrantDb::new();
for face in faces {
let file_uuid = face.get("file_uuid").and_then(|v| v.as_str());
let face_id = face.get("face_id").and_then(|v| v.as_str());
let trace_id = face.get("trace_id").and_then(|v| v.as_i64());
if let (Some(fu), Some(fid)) = (file_uuid, face_id) {
let _ = sqlx::query(&format!(
"UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND face_id = $3",
face_table
))
.bind(new_id)
.bind(fu)
.bind(fid)
.execute(state.db.pool())
.await;
if let (Some(fu), Some(tid)) = (file_uuid, trace_id) {
let filter = serde_json::json!({
"must": [
{"key": "file_uuid", "match": {"value": fu}},
{"key": "trace_id", "match": {"value": tid}}
]
});
let payload = serde_json::json!({"identity_id": new_id});
let _ = qdrant
.update_payload_by_filter("_faces", filter, payload)
.await;
}
}
}
@@ -2377,7 +2336,6 @@ async fn redo_identity(
let table = crate::core::db::schema::table_name("identities");
let history_table = crate::core::db::schema::table_name("identity_history");
let face_table = crate::core::db::schema::table_name("face_detections");
// Get identity_id
let identity_id: i32 = sqlx::query_scalar(&format!(
@@ -2417,14 +2375,17 @@ async fn redo_identity(
// ── Delete redo: re-delete the identity ──
let _ = crate::core::identity::storage::delete_identity_file(&uuid_clean);
// Unbind all faces
let _ = sqlx::query(&format!(
"UPDATE {} SET identity_id = NULL WHERE identity_id = $1",
face_table
))
.bind(identity_id)
.execute(state.db.pool())
.await;
// Unbind all faces in Qdrant _faces
let qdrant = QdrantDb::new();
let filter = serde_json::json!({
"must": [
{"key": "identity_id", "match": {"value": identity_id}}
]
});
let payload = serde_json::json!({"identity_id": serde_json::Value::Null});
let _ = qdrant
.update_payload_by_filter("_faces", filter, payload)
.await;
// Delete identity
sqlx::query(&format!("DELETE FROM {} WHERE id = $1", table))
File diff suppressed because it is too large Load Diff
+114 -47
View File
@@ -7,9 +7,11 @@ use axum::{
Router,
};
use once_cell::sync::Lazy;
use serde_json::json;
use std::collections::HashMap;
use uuid::Uuid;
use crate::core::db::qdrant_db::QdrantDb;
use crate::core::db::{schema, PostgresDb};
/// Shared video query params: mode=normal|debug, audio=on|off
@@ -217,15 +219,32 @@ async fn bbox_overlay_video(
let start_sec = start_f as f64 / fps;
// Get face bboxes
// frame_number is BIGINT (i64) in database
let face_table = schema::table_name("face_detections");
let rows: Vec<(i64, i32, i32, i32, i32, Option<i32>, Option<String>)> = sqlx::query_as(
&format!("SELECT frame_number, x, y, width, height, trace_id, face_id FROM {} WHERE file_uuid = $1 AND frame_number BETWEEN $2 AND $3 ORDER BY frame_number", face_table)
)
.bind(face_fuid).bind(start_f).bind(end_f)
.fetch_all(state.db.pool()).await
.unwrap_or_else(|e| { tracing::error!("bbox query error: {}", e); vec![] });
// Get face bboxes from Qdrant _faces
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let qdrant = QdrantDb::new();
let face_filter = json!({
"must": [
{"key": "file_uuid", "match": {"value": face_fuid}},
{"key": "frame", "range": {"gte": start_f, "lte": end_f}},
{"key": "trace_id", "match": {"value": 1}}
]
});
let points = qdrant.scroll_all_points("_faces", face_filter, 500).await.unwrap_or_default();
let rows: Vec<(i64, i32, i32, i32, i32, Option<i32>, Option<String>)> = points.iter().filter_map(|p| {
let payload = &p["payload"];
let frame = payload["frame"].as_i64()?;
let bbox = &payload["bbox"];
let x = bbox["x"].as_f64()? as i32;
let y = bbox["y"].as_f64()? as i32;
let w = bbox["width"].as_f64()? as i32;
let h = bbox["height"].as_f64()? as i32;
let trace_id = payload["trace_id"].as_i64().map(|t| t as i32);
let face_id = payload.get("face_id").and_then(|v| v.as_str()).map(|s| s.to_string());
Some((frame, x, y, w, h, trace_id, face_id))
}).collect();
// Build filters — each bbox enabled only on its frame
let mut parts: Vec<String> = Vec::new();
@@ -334,16 +353,26 @@ async fn trace_video_inner(
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let (video_path, fps, _width, _height) = row.ok_or(StatusCode::NOT_FOUND)?;
// Query face detections to find frame range for target trace
// frame_number is BIGINT (i64) in database
let face_table = schema::table_name("face_detections");
let rows: Vec<(i64, i32, i32, i32, i32)> = sqlx::query_as(&format!(
"SELECT frame_number, x, y, width, height FROM {} WHERE file_uuid = $1 AND trace_id = $2 ORDER BY frame_number",
face_table
))
.bind(&file_uuid).bind(trace_id)
.fetch_all(state.db.pool()).await
.unwrap_or_else(|e| { tracing::error!("trace query error: {}", e); vec![] });
// Query face detections from Qdrant to find frame range for target trace
let qdrant = QdrantDb::new();
let trace_filter = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "trace_id", "match": {"value": trace_id}}
]
});
let points = qdrant.scroll_all_points("_faces", trace_filter, 500).await.unwrap_or_default();
let rows: Vec<(i64, i32, i32, i32, i32)> = points.iter().filter_map(|p| {
let payload = &p["payload"];
let frame = payload["frame"].as_i64()?;
let bbox = &payload["bbox"];
let x = bbox["x"].as_f64()? as i32;
let y = bbox["y"].as_f64()? as i32;
let w = bbox["width"].as_f64()? as i32;
let h = bbox["height"].as_f64()? as i32;
Some((frame, x, y, w, h))
}).collect();
if rows.is_empty() {
return Err(StatusCode::NOT_FOUND);
@@ -393,22 +422,50 @@ async fn trace_video_inner(
let end_fn = ((start_sec + duration) * fps) as i64;
// Query all traces with identity names and bbox positions in the visible frame range
// frame_number is BIGINT (i64) in database
let identities_table = schema::table_name("identities");
let all_rows: Vec<(i32, i64, i32, i32, i32, i32, Option<String>)> = sqlx::query_as(&format!(
"SELECT fd.trace_id, fd.frame_number, fd.x, fd.y, fd.width, fd.height, i.name \
FROM {} fd \
LEFT JOIN {} i ON fd.identity_id = i.id \
WHERE fd.file_uuid = $1 AND fd.frame_number BETWEEN $2 AND $3 AND fd.trace_id IS NOT NULL \
ORDER BY fd.trace_id, fd.frame_number",
face_table, identities_table
))
.bind(&file_uuid)
.bind(start_fn)
.bind(end_fn)
.fetch_all(state.db.pool())
.await
.unwrap_or_default();
let all_points = qdrant.scroll_all_points("_faces", json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "frame", "range": {"gte": start_fn, "lte": end_fn}},
{"key": "trace_id", "match": {"value": 1}}
]
}), 1000).await.unwrap_or_default();
// Get identity names for traces that have identity_id
let mut identity_names: HashMap<i32, String> = HashMap::new();
for point in &all_points {
let payload = &point["payload"];
if let Some(iid) = payload["identity_id"].as_i64() {
let trace_id = payload["trace_id"].as_i64().unwrap_or(0) as i32;
if iid > 0 && !identity_names.contains_key(&trace_id) {
if let Some(name) = sqlx::query_scalar::<_, String>(&format!(
"SELECT name FROM {} WHERE id = $1",
identities_table
))
.bind(iid as i32)
.fetch_optional(state.db.pool())
.await
.ok()
.flatten()
{
identity_names.insert(trace_id, name);
}
}
}
}
let all_rows: Vec<(i32, i64, i32, i32, i32, i32, Option<String>)> = all_points.iter().filter_map(|p| {
let payload = &p["payload"];
let trace_id = payload["trace_id"].as_i64()? as i32;
let frame = payload["frame"].as_i64()?;
let bbox = &payload["bbox"];
let x = bbox["x"].as_f64()? as i32;
let y = bbox["y"].as_f64()? as i32;
let w = bbox["width"].as_f64()? as i32;
let h = bbox["height"].as_f64()? as i32;
let name = identity_names.get(&trace_id).cloned();
Some((trace_id, frame, x, y, w, h, name))
}).collect();
// Group frames by trace_id, compute start_frame per trace; collect bbox per frame
// frame_number is i64 (BIGINT), so HashMaps need i64 for frame values
@@ -1082,21 +1139,31 @@ async fn stranger_video_inner(
fps
);
// Query face detections by stranger_id directly
let face_table = schema::table_name("face_detections");
tracing::debug!("[stranger_video] face_table: {}", face_table);
// Query face detections by stranger_id from Qdrant _faces
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
// frame_number is BIGINT (i64) in database
let rows: Vec<(i64, i32, i32, i32, i32)> = sqlx::query_as(&format!(
"SELECT frame_number, x, y, width, height FROM {} WHERE file_uuid = $1 AND stranger_id = $2 ORDER BY frame_number",
face_table
))
.bind(&file_uuid).bind(stranger_id)
.fetch_all(state.db.pool()).await
.unwrap_or_else(|e| {
tracing::error!("[stranger_video] Face query error: {}", e);
vec![]
let qdrant = QdrantDb::new();
let face_filter = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "stranger_id", "match": {"value": stranger_id}}
]
});
let points = qdrant.scroll_all_points("_faces", face_filter, 1000).await.unwrap_or_default();
let rows: Vec<(i64, i32, i32, i32, i32)> = points.iter()
.filter_map(|p| {
let payload = &p["payload"];
let frame = payload["frame"].as_i64()?;
let bbox = &payload["bbox"];
let x = bbox["x"].as_f64()? as i32;
let y = bbox["y"].as_f64()? as i32;
let w = bbox["width"].as_f64()? as i32;
let h = bbox["height"].as_f64()? as i32;
Some((frame, x, y, w, h))
})
.collect();
tracing::info!("[stranger_video] Found {} faces", rows.len());
+71 -46
View File
@@ -305,14 +305,21 @@ async fn trigger_processing(
tracing::error!("[TRIGGER] Failed to update monitor job for {}: {}", file_uuid, e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
// Update videos.processing_status to PROCESSING immediately
let processor_names_upper: Vec<String> = processors_to_run.iter().map(|p| p.to_uppercase()).collect();
let progress: serde_json::Map<String, serde_json::Value> = processors_to_run.iter().map(|p| {
(p.to_uppercase(), serde_json::json!({
"current_frame": 0, "total_frames": 0, "percentage": 0, "status": "pending"
}))
}).collect();
let processor_names_upper: Vec<String> =
processors_to_run.iter().map(|p| p.to_uppercase()).collect();
let progress: serde_json::Map<String, serde_json::Value> = processors_to_run
.iter()
.map(|p| {
(
p.to_uppercase(),
serde_json::json!({
"current_frame": 0, "total_frames": 0, "percentage": 0, "status": "pending"
}),
)
})
.collect();
let status = serde_json::json!({
"phase": "PROCESSING",
"active_processors": processor_names_upper,
@@ -320,7 +327,7 @@ async fn trigger_processing(
"progress": progress
});
sqlx::query(&format!(
"UPDATE {videos_table} SET status = 'queued', processing_status = $1, updated_at = CURRENT_TIMESTAMP WHERE file_uuid = $2"
"UPDATE {videos_table} SET status = 'processing', processing_status = $1, updated_at = CURRENT_TIMESTAMP WHERE file_uuid = $2"
))
.bind(&status)
.bind(&file_uuid)
@@ -396,7 +403,7 @@ async fn get_chunk_by_path(
row.map(Json).ok_or(StatusCode::NOT_FOUND)
}
async fn get_progress(file_uuid: Path<String>) -> Result<Json<ProgressResponse>, StatusCode> {
async fn get_progress(file_uuid: Path<String>) -> Result<Json<serde_json::Value>, StatusCode> {
let file_uuid = file_uuid.0;
let redis = RedisClient::new().map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let mut conn = redis
@@ -459,6 +466,24 @@ async fn get_progress(file_uuid: Path<String>) -> Result<Json<ProgressResponse>,
})
.collect();
// Fetch TKG and Agent progress from Redis
let tkg_key = format!("{}progress:{}:tkg", REDIS_KEY_PREFIX.as_str(), file_uuid);
let agent_key = format!("{}progress:{}:agent", REDIS_KEY_PREFIX.as_str(), file_uuid);
let tkg_progress: Option<serde_json::Value> = if let Ok(mut c) = redis.get_conn().await {
let val: Option<String> = redis::cmd("GET").arg(&tkg_key).query_async(&mut c).await.ok();
val.and_then(|s| serde_json::from_str(&s).ok())
} else {
None
};
let agent_progress: Option<serde_json::Value> = if let Ok(mut c) = redis.get_conn().await {
let val: Option<String> = redis::cmd("GET").arg(&agent_key).query_async(&mut c).await.ok();
val.and_then(|s| serde_json::from_str(&s).ok())
} else {
None
};
let overall = if processors.is_empty() {
0
} else {
@@ -466,20 +491,20 @@ async fn get_progress(file_uuid: Path<String>) -> Result<Json<ProgressResponse>,
(sum / processors.len() as u64) as u32
};
Ok(Json(ProgressResponse {
file_uuid,
user: None,
group: None,
file_name: video.as_ref().map(|v| v.file_name.clone()),
duration: video.as_ref().map(|v| v.duration),
overall_progress: overall,
cpu_percent: cpu,
gpu_percent: gpu,
memory_percent: mem_pct,
memory_mb: mem_mb,
system: Some(sys),
processors,
}))
Ok(Json(serde_json::json!({
"file_uuid": file_uuid,
"file_name": video.as_ref().map(|v| &v.file_name),
"duration": video.as_ref().map(|v| v.duration),
"overall_progress": overall,
"cpu_percent": cpu,
"gpu_percent": gpu,
"memory_percent": mem_pct,
"memory_mb": mem_mb,
"system": sys,
"processors": processors,
"tkg_progress": tkg_progress,
"agent_progress": agent_progress,
})))
}
async fn list_jobs(Json(params): Json<JobsQuery>) -> Result<Json<JobListResponse>, StatusCode> {
@@ -575,7 +600,7 @@ async fn get_job(Path(uuid): Path<String>) -> Result<Json<JobDetailResponse>, St
started_at,
updated_at,
) = job.ok_or(StatusCode::NOT_FOUND)?;
// Calculate queue position (pending or queued jobs ahead of this one)
let queue_position = if status == "pending" || status == "queued" {
sqlx::query_scalar::<_, i64>(&format!(
@@ -714,7 +739,7 @@ async fn get_processor_counts(
}
}
if let Ok(content) = std::fs::read_to_string(&json_path) {
if let Ok(content) = std::fs::read_to_string(&json_path) {
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) {
// CUT: prioritize scenes count over frame_count
if proc_name == "cut" {
@@ -737,27 +762,27 @@ if let Ok(content) = std::fs::read_to_string(&json_path) {
.map(|v| v as u32);
}
segment_count = json
.get("segments")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32);
chunk_count = json
.get("child_chunks")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32)
.or_else(|| {
json.get("parent_chunks")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32)
});
if chunk_count.is_none() {
chunk_count = json
.get("chunks")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32);
}
}
}
segment_count = json
.get("segments")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32);
chunk_count = json
.get("child_chunks")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32)
.or_else(|| {
json.get("parent_chunks")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32)
});
if chunk_count.is_none() {
chunk_count = json
.get("chunks")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32);
}
}
}
}
results.push(ProcessorCountInfo {
+447 -22
View File
@@ -10,6 +10,83 @@ use serde::{Deserialize, Serialize};
use super::types::AppState;
use crate::core::db::schema;
/// Comprehensive file stats endpoint — provides all data sources for frontend transparency
/// Combines: JSON file status + PostgreSQL counts + Qdrant collections + TKG stats + Identity Agent stats
#[derive(Debug, Serialize)]
struct FileStatsResponse {
file_uuid: String,
file_name: Option<String>,
status: Option<String>,
// Processor status
processors: Vec<ProcessorStatus>,
// PostgreSQL counts
postgres: PostgresStats,
// Qdrant collection counts
qdrant: QdrantStats,
// TKG stats
tkg: TkgFileStats,
// Identity Agent stats
identity_agent: IdentityAgentStats,
}
#[derive(Debug, Serialize)]
struct ProcessorStatus {
name: String,
status: String,
progress: u32,
message: Option<String>,
}
#[derive(Debug, Serialize, Default)]
struct PostgresStats {
sentence_chunks: i64,
trace_chunks: i64,
relationship_chunks: i64,
identities: i64,
file_identities: i64,
}
#[derive(Debug, Serialize)]
struct QdrantStats {
faces: i64,
face_traces: i64,
face_identities: i64,
text_chunks: i64,
speakers: i64,
}
#[derive(Debug, Serialize, Default)]
struct TkgFileStats {
total_nodes: i64,
total_edges: i64,
face_track_nodes: i64,
gaze_track_nodes: i64,
lip_track_nodes: i64,
text_region_nodes: i64,
appearance_nodes: i64,
accessory_nodes: i64,
object_nodes: i64,
hand_nodes: i64,
speaker_nodes: i64,
co_occurrence_edges: i64,
speaker_face_edges: i64,
face_face_edges: i64,
mutual_gaze_edges: i64,
lip_sync_edges: i64,
has_appearance_edges: i64,
wears_edges: i64,
hand_object_edges: i64,
}
#[derive(Debug, Serialize, Default)]
struct IdentityAgentStats {
clusters: i64,
identities_created: i64,
tmdb_matches: i64,
speaker_bindings: i64,
confirmations: i64,
}
#[derive(Debug, Serialize, Deserialize)]
struct ScannedFileInfo {
file_name: String,
@@ -372,9 +449,46 @@ async fn get_ingestion_status(
) -> Result<Json<IngestionStatusResponse>, StatusCode> {
let pool = state.db.pool();
let chunk = schema::table_name("chunk");
let fd = schema::table_name("face_detections");
let identities = schema::table_name("identities");
// Get face counts from Qdrant _faces
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let qdrant = QdrantDb::new();
let face_filter = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}}
]
});
let points = qdrant.scroll_all_points("_faces", face_filter, 1000).await.unwrap_or_default();
let face_total = points.len() as i64;
let mut trace_ids: std::collections::HashSet<i64> = std::collections::HashSet::new();
let mut identity_ids: std::collections::HashSet<i64> = std::collections::HashSet::new();
let mut stranger_traces: std::collections::HashSet<i64> = std::collections::HashSet::new();
for point in &points {
let payload = &point["payload"];
if let Some(tid) = payload["trace_id"].as_i64() {
if tid > 0 {
trace_ids.insert(tid);
if payload["identity_id"].is_null() {
stranger_traces.insert(tid);
}
}
}
if let Some(iid) = payload["identity_id"].as_i64() {
if iid > 0 {
identity_ids.insert(iid);
}
}
}
let trace_count = trace_ids.len() as i64;
let identity_count = identity_ids.len() as i64;
let strangers = stranger_traces.len() as i64;
let scene_meta_path = format!(
"{}/{}.scene_meta.json",
crate::core::config::OUTPUT_DIR.as_str(),
@@ -398,14 +512,12 @@ async fn get_ingestion_status(
let scene_count = count_sql!(&format!(
"SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'cut'"
));
let face_total = count_sql!(&format!(
"SELECT COUNT(*) FROM {fd} WHERE file_uuid = '{file_uuid}'"
));
let trace_count = count_sql!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd} WHERE file_uuid = '{file_uuid}' AND trace_id IS NOT NULL"));
let face_total = face_total;
let trace_count = trace_count;
let trace_chunks = count_sql!(&format!(
"SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'trace'"
));
let identity_count = count_sql!(&format!("SELECT COUNT(DISTINCT identity_id) FROM {fd} WHERE file_uuid = '{file_uuid}' AND identity_id IS NOT NULL"));
let identity_count = identity_count;
let tkg_nodes = count_sql!(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'",
schema::table_name("tkg_nodes")
@@ -414,12 +526,41 @@ async fn get_ingestion_status(
"SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'",
schema::table_name("tkg_edges")
));
let related_identities: Vec<IdentityRef> =
// Get individual node counts by type
let tkg_nodes_table = schema::table_name("tkg_nodes");
let face_track_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'face_track'"));
let gaze_track_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'gaze_track'"));
let lip_track_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'lip_track'"));
let text_region_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'text_region'"));
let appearance_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'appearance_trace'"));
let accessory_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'accessory'"));
let object_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'yolo_object'"));
let hand_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'hand'"));
let speaker_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'speaker'"));
// Get individual edge counts by type
let tkg_edges_table = schema::table_name("tkg_edges");
let co_occurrence_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'CO_OCCURS_WITH'"));
let speaker_face_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'SPEAKS_AS'"));
let face_face_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'FACE_TO_FACE'"));
let mutual_gaze_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'MUTUAL_GAZE'"));
let lip_sync_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'LIP_SYNC'"));
let has_appearance_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'HAS_APPEARANCE'"));
let wears_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'WEARS'"));
let hand_object_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'HAND_OBJECT'"));
// Rule 2 relationship chunks
let rule2_chunks = count_sql!(&format!(
"SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'relationship'"
));
// Get related identities from Qdrant _faces
let related_identity_ids: Vec<i64> = identity_ids.into_iter().collect();
let related_identities: Vec<IdentityRef> = if !related_identity_ids.is_empty() {
let id_list: String = related_identity_ids.iter().map(|id| id.to_string()).collect::<Vec<_>>().join(",");
match sqlx::query_as::<_, (String, String)>(&format!(
"SELECT DISTINCT i.uuid::text, i.name FROM {identities} i \
JOIN {fd} fd ON fd.identity_id = i.id \
WHERE fd.file_uuid = '{file_uuid}' AND fd.identity_id IS NOT NULL \
ORDER BY i.name"
"SELECT DISTINCT uuid::text, name FROM {identities} \
WHERE id IN ({id_list}) ORDER BY name"
))
.fetch_all(pool)
.await
@@ -435,12 +576,12 @@ async fn get_ingestion_status(
tracing::error!("related_identities query failed: {}", e);
vec![]
}
};
}
} else {
vec![]
};
let strangers = count_sql!(&format!(
"SELECT COUNT(DISTINCT trace_id) FROM {fd} \
WHERE file_uuid = '{file_uuid}' AND trace_id IS NOT NULL AND identity_id IS NULL"
));
let strangers = strangers;
macro_rules! step {
($name:expr, $done:expr, $detail:expr) => {
@@ -462,9 +603,9 @@ async fn get_ingestion_status(
"auto_vectorize",
sentence_embedded > 0,
Some(format!("{sentence_embedded} embedded"))
),
step!(
"face_track",
),
step!(
"face_track",
trace_count > 0,
Some(format!("{trace_count} traces / {face_total} detections"))
),
@@ -473,11 +614,32 @@ step!(
trace_chunks > 0,
Some(format!("{trace_chunks} trace chunks"))
),
// TKG Nodes
step!("tkg_face_track", face_track_nodes > 0, Some(format!("{face_track_nodes} nodes"))),
step!("tkg_gaze_track", gaze_track_nodes > 0, Some(format!("{gaze_track_nodes} nodes"))),
step!("tkg_lip_track", lip_track_nodes > 0, Some(format!("{lip_track_nodes} nodes"))),
step!("tkg_text_region", text_region_nodes > 0, Some(format!("{text_region_nodes} nodes"))),
step!("tkg_appearance", appearance_nodes > 0, Some(format!("{appearance_nodes} nodes"))),
step!("tkg_accessory", accessory_nodes > 0, Some(format!("{accessory_nodes} nodes"))),
step!("tkg_object", object_nodes > 0, Some(format!("{object_nodes} nodes"))),
step!("tkg_hand", hand_nodes > 0, Some(format!("{hand_nodes} nodes"))),
step!("tkg_speaker", speaker_nodes > 0, Some(format!("{speaker_nodes} nodes"))),
// TKG Edges
step!("tkg_co_occurrence", co_occurrence_edges > 0, Some(format!("{co_occurrence_edges} edges"))),
step!("tkg_speaker_face", speaker_face_edges > 0, Some(format!("{speaker_face_edges} edges"))),
step!("tkg_face_face", face_face_edges > 0, Some(format!("{face_face_edges} edges"))),
step!("tkg_mutual_gaze", mutual_gaze_edges > 0, Some(format!("{mutual_gaze_edges} edges"))),
step!("tkg_lip_sync", lip_sync_edges > 0, Some(format!("{lip_sync_edges} edges"))),
step!("tkg_has_appearance", has_appearance_edges > 0, Some(format!("{has_appearance_edges} edges"))),
step!("tkg_wears", wears_edges > 0, Some(format!("{wears_edges} edges"))),
step!("tkg_hand_object", hand_object_edges > 0, Some(format!("{hand_object_edges} edges"))),
// Rule 2
step!(
"tkg",
tkg_nodes > 0 || tkg_edges > 0,
Some(format!("{tkg_nodes} nodes, {tkg_edges} edges"))
"rule2_relationship",
rule2_chunks > 0,
Some(format!("{rule2_chunks} relationship chunks"))
),
// Identity & Scene
step!(
"identity_match",
identity_count > 0,
@@ -494,6 +656,248 @@ step!(
}))
}
/// Comprehensive file stats endpoint — combines all data sources for frontend transparency
async fn get_file_stats(
State(state): State<AppState>,
Path(file_uuid): Path<String>,
) -> Result<Json<FileStatsResponse>, StatusCode> {
let pool = state.db.pool();
// 1. Get file info from PostgreSQL
let videos_table = schema::table_name("videos");
let file_info: Option<(String, String, String)> = sqlx::query_as(&format!(
"SELECT file_uuid, file_name, status FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(pool)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let (file_uuid_str, file_name, status) = file_info
.map(|(uuid, name, s)| (uuid, Some(name), Some(s)))
.unwrap_or_else(|| (file_uuid.clone(), None, None));
// 2. Get processor status from processing_status JSONB
let processing_status: serde_json::Value =
sqlx::query_scalar(&format!(
"SELECT processing_status FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(pool)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.unwrap_or(serde_json::json!({}));
let processors: Vec<ProcessorStatus> = processing_status
.get("progress")
.and_then(|p| p.as_object())
.map(|progress| {
progress
.iter()
.filter_map(|(name, info)| {
info.as_object().map(|obj| {
let status = obj
.get("status")
.and_then(|s| s.as_str())
.unwrap_or("pending")
.to_string();
let progress_val = obj
.get("percentage")
.and_then(|p| p.as_u64())
.unwrap_or(0) as u32;
let message = obj
.get("message")
.and_then(|m| m.as_str())
.map(|s| s.to_string());
ProcessorStatus {
name: name.clone(),
status,
progress: progress_val,
message,
}
})
})
.collect()
})
.unwrap_or_default();
// 3. Get PostgreSQL counts
let chunk_table = schema::table_name("chunk");
let identities_table = schema::table_name("identities");
let file_identities_table = schema::table_name("file_identities");
let postgres = PostgresStats {
sentence_chunks: sqlx::query_scalar::<_, i64>(&format!(
"SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'sentence'"
))
.bind(&file_uuid)
.fetch_one(pool)
.await
.unwrap_or(0),
trace_chunks: sqlx::query_scalar::<_, i64>(&format!(
"SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'trace'"
))
.bind(&file_uuid)
.fetch_one(pool)
.await
.unwrap_or(0),
relationship_chunks: sqlx::query_scalar::<_, i64>(&format!(
"SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'relationship'"
))
.bind(&file_uuid)
.fetch_one(pool)
.await
.unwrap_or(0),
identities: sqlx::query_scalar::<_, i64>(&format!(
"SELECT COUNT(DISTINCT i.id) FROM {identities_table} i \
JOIN {file_identities_table} fi ON fi.identity_id = i.id \
WHERE fi.file_uuid = $1"
))
.bind(&file_uuid)
.fetch_one(pool)
.await
.unwrap_or(0),
file_identities: sqlx::query_scalar::<_, i64>(&format!(
"SELECT COUNT(*) FROM {file_identities_table} WHERE file_uuid = $1"
))
.bind(&file_uuid)
.fetch_one(pool)
.await
.unwrap_or(0),
};
// 4. Get Qdrant stats
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let qdrant_db = QdrantDb::new();
// Face stats
let face_filter = json!({
"must": [{"key": "file_uuid", "match": {"value": file_uuid}}]
});
let face_points = qdrant_db
.scroll_all_points("_faces", face_filter.clone(), 500)
.await
.unwrap_or_default();
let mut face_traces = std::collections::HashSet::new();
let mut face_identities = std::collections::HashSet::new();
for point in &face_points {
let payload = &point["payload"];
if let Some(tid) = payload["trace_id"].as_i64() {
if tid > 0 {
face_traces.insert(tid);
}
}
if let Some(iid) = payload["identity_id"].as_i64() {
if iid > 0 {
face_identities.insert(iid);
}
}
}
// Text chunk stats (rule1 collection)
let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
let rule1_collection = format!("momentry_{}_rule1_v2", schema);
let text_filter = json!({
"must": [{"key": "file_uuid", "match": {"value": file_uuid}}]
});
let text_points = qdrant_db
.scroll_all_points(&rule1_collection, text_filter, 500)
.await
.unwrap_or_default();
// Speaker stats
let speaker_collection = format!("momentry_{}_speaker", schema);
let speaker_filter = json!({
"must": [{"key": "file_uuid", "match": {"value": file_uuid}}]
});
let speaker_points = qdrant_db
.scroll_all_points(&speaker_collection, speaker_filter, 500)
.await
.unwrap_or_default();
let qdrant_stats = QdrantStats {
faces: face_points.len() as i64,
face_traces: face_traces.len() as i64,
face_identities: face_identities.len() as i64,
text_chunks: text_points.len() as i64,
speakers: speaker_points.len() as i64,
};
// 5. Get TKG stats from PostgreSQL
let tkg_nodes_table = schema::table_name("tkg_nodes");
let tkg_edges_table = schema::table_name("tkg_edges");
let tkg = TkgFileStats {
face_track_nodes: count_by_type(pool, &tkg_nodes_table, &file_uuid, "face_track").await,
gaze_track_nodes: count_by_type(pool, &tkg_nodes_table, &file_uuid, "gaze_track").await,
lip_track_nodes: count_by_type(pool, &tkg_nodes_table, &file_uuid, "lip_track").await,
text_region_nodes: count_by_type(pool, &tkg_nodes_table, &file_uuid, "text_region").await,
appearance_nodes: count_by_type(pool, &tkg_nodes_table, &file_uuid, "appearance_trace").await,
accessory_nodes: count_by_type(pool, &tkg_nodes_table, &file_uuid, "accessory").await,
object_nodes: count_by_type(pool, &tkg_nodes_table, &file_uuid, "yolo_object").await,
hand_nodes: count_by_type(pool, &tkg_nodes_table, &file_uuid, "hand").await,
speaker_nodes: count_by_type(pool, &tkg_nodes_table, &file_uuid, "speaker").await,
co_occurrence_edges: count_by_type(pool, &tkg_edges_table, &file_uuid, "CO_OCCURS_WITH").await,
speaker_face_edges: count_by_type(pool, &tkg_edges_table, &file_uuid, "SPEAKS_AS").await,
face_face_edges: count_by_type(pool, &tkg_edges_table, &file_uuid, "FACE_TO_FACE").await,
mutual_gaze_edges: count_by_type(pool, &tkg_edges_table, &file_uuid, "MUTUAL_GAZE").await,
lip_sync_edges: count_by_type(pool, &tkg_edges_table, &file_uuid, "LIP_SYNC").await,
has_appearance_edges: count_by_type(pool, &tkg_edges_table, &file_uuid, "HAS_APPEARANCE").await,
wears_edges: count_by_type(pool, &tkg_edges_table, &file_uuid, "WEARS").await,
hand_object_edges: count_by_type(pool, &tkg_edges_table, &file_uuid, "HAND_OBJECT").await,
..Default::default()
};
// 6. Get Identity Agent stats from Qdrant _seeds
let seeds_filter = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}}
]
});
let seed_points = qdrant_db
.scroll_all_points("_seeds", seeds_filter, 500)
.await
.unwrap_or_default();
let identity_agent = IdentityAgentStats {
clusters: 0, // From face_clustered.json if available
identities_created: face_identities.len() as i64,
tmdb_matches: seed_points.iter()
.filter(|p| p["payload"]["source"].as_str() == Some("tmdb"))
.count() as i64,
speaker_bindings: speaker_points.len() as i64,
confirmations: 0, // From identity_bindings table
};
Ok(Json(FileStatsResponse {
file_uuid: file_uuid_str,
file_name,
status,
processors,
postgres,
qdrant: qdrant_stats,
tkg,
identity_agent,
}))
}
async fn count_by_type(pool: &sqlx::PgPool, table: &str, file_uuid: &str, type_val: &str) -> i64 {
sqlx::query_scalar::<_, i64>(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND (node_type = $2 OR edge_type = $2)",
table
))
.bind(file_uuid)
.bind(type_val)
.fetch_one(pool)
.await
.unwrap_or(0)
}
pub fn scan_routes() -> Router<AppState> {
Router::new()
.route("/api/v1/files/scan", get(scan_files))
@@ -502,4 +906,25 @@ pub fn scan_routes() -> Router<AppState> {
"/api/v1/stats/ingestion-status/:file_uuid",
get(get_ingestion_status),
)
.route(
"/api/v1/stats/file/:file_uuid",
get(get_file_stats),
)
.route(
"/api/v1/stats/pipeline/:file_uuid",
get(get_pipeline_progress_handler),
)
}
/// Get segmented pipeline progress with weighted stages
async fn get_pipeline_progress_handler(
State(state): State<AppState>,
Path(file_uuid): Path<String>,
) -> Result<Json<crate::core::progress::PipelineProgress>, StatusCode> {
let redis_lock = state.redis_cache.get_client().await;
let redis_guard = redis_lock.read().await;
let pipeline = crate::core::progress::get_pipeline_progress(&*redis_guard, &file_uuid)
.await
.unwrap_or_else(|| crate::core::progress::PipelineProgress::new(&file_uuid));
Ok(Json(pipeline))
}
+7 -8
View File
@@ -149,7 +149,6 @@ pub async fn smart_search(
},
)?;
const KEYWORD_FIXED_SCORE: f64 = 0.5;
const IDENTITY_FIXED_SCORE: f64 = 0.85;
let fetch_limit = limit * 3;
@@ -302,23 +301,23 @@ pub async fn smart_search(
});
}
// Add keyword results (fixed score 0.5)
let keyword_fixed = KEYWORD_FIXED_SCORE;
for (file_uuid, chunk_id, _) in keyword_results.iter() {
// Add keyword results (score from FTS rank, capped at 1.0)
for (file_uuid, chunk_id, actual_score) in keyword_results.iter() {
let key = (file_uuid.clone(), chunk_id.clone());
let capped = actual_score.min(1.0).max(0.1);
merged
.entry(key)
.and_modify(|e| {
e.score = e.score.max(keyword_fixed);
e.keyword_score = Some(keyword_fixed);
e.score = e.score.max(capped);
e.keyword_score = Some(capped);
e.source = format!("{}_keyword", e.source);
})
.or_insert(MergedResult {
file_uuid: file_uuid.clone(),
chunk_id: chunk_id.clone(),
score: keyword_fixed,
score: capped,
semantic_score: None,
keyword_score: Some(keyword_fixed),
keyword_score: Some(capped),
identity_score: None,
source: "keyword".to_string(),
});
+9 -3
View File
@@ -16,7 +16,7 @@ use super::checkin_api;
use super::docs;
use super::files;
use super::health;
use super::health::{health, health_detailed, health_consistency};
use super::health::{health, health_consistency, health_detailed};
use super::identities;
use super::identity_agent_api;
use super::identity_api;
@@ -138,8 +138,14 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
let public_health_routes = Router::new()
.route("/api/v1/health", axum::routing::get(health))
.route("/api/v1/health/detailed", axum::routing::get(health_detailed))
.route("/api/v1/health/consistency", axum::routing::get(health_consistency));
.route(
"/api/v1/health/detailed",
axum::routing::get(health_detailed),
)
.route(
"/api/v1/health/consistency",
axum::routing::get(health_consistency),
);
let app = Router::new()
.merge(auth::auth_routes())
+2 -1
View File
@@ -619,6 +619,7 @@ async fn tmdb_match_handler(
file_uuid,
bindings_created: 0,
tmdb_identities_available: 0,
message: "TMDb matching disabled - needs reimplementation with _faces collection".to_string(),
message: "TMDb matching disabled - needs reimplementation with _faces collection"
.to_string(),
}))
}
+244 -228
View File
@@ -7,6 +7,7 @@ use axum::{
Router,
};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use crate::core::db::PostgresDb;
@@ -73,6 +74,7 @@ struct TraceInfo {
duration_sec: f64,
avg_confidence: f64,
sample_face_id: Option<String>,
thumbnail_url: String,
}
#[derive(Debug, Serialize)]
@@ -118,46 +120,76 @@ async fn list_traces_sorted(
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
.unwrap_or(24.0);
let query = format!(
"SELECT tt.*, fd.id AS sample_face_id FROM (
SELECT trace_id::int AS trace_id,
COUNT(*) AS face_count,
MIN(frame_number)::bigint AS start_frame,
MAX(frame_number)::bigint AS end_frame,
(MAX(frame_number) - MIN(frame_number))::float8 AS duration_sec,
AVG(confidence)::float8 AS avg_confidence
FROM {}
WHERE file_uuid = $1 AND trace_id IS NOT NULL
AND confidence >= $5 AND confidence <= $6
GROUP BY trace_id
HAVING COUNT(*) >= $2
ORDER BY {}
LIMIT $3 OFFSET $4
) tt
LEFT JOIN LATERAL (
SELECT id FROM {}
WHERE trace_id = tt.trace_id AND file_uuid = $1
ORDER BY confidence DESC LIMIT 1
) fd ON true",
crate::core::db::schema::table_name("face_detections"),
order_clause,
crate::core::db::schema::table_name("face_detections"),
);
// Get face points from Qdrant _faces
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
use std::collections::HashMap;
let rows: Vec<(i32, i64, i64, i64, f64, f64, Option<i32>)> = sqlx::query_as(&query)
.bind(&file_uuid)
.bind(min_faces)
.bind(effective_limit)
.bind(db_offset)
.bind(min_confidence)
.bind(max_confidence)
.fetch_all(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let qdrant = QdrantDb::new();
let face_filter = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}}
]
});
let points = qdrant.scroll_all_points("_faces", face_filter, 2000).await.unwrap_or_default();
let traces: Vec<TraceInfo> = rows
// Aggregate by trace_id
struct TraceAgg {
face_count: i64,
start_frame: i64,
end_frame: i64,
avg_confidence: f64,
sum_confidence: f64,
}
let mut trace_data: HashMap<i32, TraceAgg> = HashMap::new();
for point in &points {
let payload = &point["payload"];
let trace_id = payload["trace_id"].as_i64().unwrap_or(0) as i32;
let frame = payload["frame"].as_i64().unwrap_or(0);
let confidence = payload["confidence"].as_f64().unwrap_or(0.5);
if confidence < min_confidence || confidence > max_confidence {
continue;
}
let entry = trace_data.entry(trace_id).or_insert(TraceAgg {
face_count: 0,
start_frame: i64::MAX,
end_frame: i64::MIN,
avg_confidence: 0.0,
sum_confidence: 0.0,
});
entry.face_count += 1;
entry.start_frame = entry.start_frame.min(frame);
entry.end_frame = entry.end_frame.max(frame);
entry.sum_confidence += confidence;
}
// Filter by min_faces and sort
let mut traces_vec: Vec<(i32, i64, i64, i64, f64, f64)> = trace_data.into_iter()
.filter(|(_, agg)| agg.face_count >= min_faces)
.map(|(tid, agg)| {
let duration = (agg.end_frame - agg.start_frame) as f64;
let avg_conf = if agg.face_count > 0 { agg.sum_confidence / agg.face_count as f64 } else { 0.0 };
(tid, agg.face_count, agg.start_frame, agg.end_frame, duration, avg_conf)
})
.collect();
match order_clause {
"face_count DESC" => traces_vec.sort_by(|a, b| b.1.cmp(&a.1)),
"duration_sec DESC" => traces_vec.sort_by(|a, b| b.4.partial_cmp(&a.4).unwrap_or(std::cmp::Ordering::Equal)),
_ => traces_vec.sort_by(|a, b| a.2.cmp(&b.2)),
}
// Apply pagination
let total_traces = traces_vec.len() as i64;
let total_faces: i64 = points.len() as i64;
let traces_vec: Vec<_> = traces_vec.into_iter().skip(db_offset as usize).take(effective_limit as usize).collect();
let traces: Vec<TraceInfo> = traces_vec
.into_iter()
.map(|(tid, fc, sf, ef, dur, conf, fid)| TraceInfo {
.map(|(tid, fc, sf, ef, dur, conf)| TraceInfo {
trace_id: tid,
face_count: fc,
start_frame: sf,
@@ -166,19 +198,11 @@ async fn list_traces_sorted(
end_time: ef as f64 / fps,
duration_sec: dur / fps,
avg_confidence: conf,
sample_face_id: fid.map(|v| v.to_string()),
sample_face_id: None,
thumbnail_url: format!("/api/v1/file/{}/trace/{}/thumbnail", file_uuid, tid),
})
.collect();
let (total_traces, total_faces): (i64, i64) = sqlx::query_as(
&format!("SELECT COUNT(DISTINCT trace_id), COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL",
crate::core::db::schema::table_name("face_detections"))
)
.bind(&file_uuid)
.fetch_one(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(Json(TracesResponse {
success: true,
file_uuid,
@@ -260,55 +284,57 @@ async fn list_trace_faces(
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
.unwrap_or(24.0);
let total_detected: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2",
crate::core::db::schema::table_name("face_detections")
))
.bind(&file_uuid)
.bind(trace_id)
.fetch_one(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
// Get face points from Qdrant _faces for this trace
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let rows: Vec<(
i32,
i64,
Option<i32>,
Option<i32>,
Option<i32>,
Option<i32>,
f32,
)> = sqlx::query_as(&format!(
"SELECT id, frame_number, x, y, width, height, confidence::float4 \
FROM {} WHERE file_uuid = $1 AND trace_id = $2 \
ORDER BY frame_number ASC LIMIT $3 OFFSET $4",
crate::core::db::schema::table_name("face_detections")
))
.bind(&file_uuid)
.bind(trace_id)
.bind(limit)
.bind(offset)
.fetch_all(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let qdrant = QdrantDb::new();
let trace_filter = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "trace_id", "match": {"value": trace_id}}
]
});
let points = qdrant.scroll_all_points("_faces", trace_filter, 1000).await.unwrap_or_default();
let total_detected: i64 = points.len() as i64;
// Apply pagination
let paged: Vec<_> = points.into_iter().skip(offset as usize).take(limit as usize).collect();
let mut faces: Vec<TraceFaceItem> = Vec::new();
for (i, (id, frame, x, y, w, h, conf)) in rows.iter().enumerate() {
for (i, point) in paged.iter().enumerate() {
let payload = &point["payload"];
let frame = payload["frame"].as_i64().unwrap_or(0);
let bbox = &payload["bbox"];
let x = bbox["x"].as_f64().unwrap_or(0.0) as i32;
let y = bbox["y"].as_f64().unwrap_or(0.0) as i32;
let w = bbox["width"].as_f64().unwrap_or(0.0) as i32;
let h = bbox["height"].as_f64().unwrap_or(0.0) as i32;
let conf = payload["confidence"].as_f64().unwrap_or(0.5) as f32;
let id = i as i32;
let cur = (x, y, w, h);
// Add interpolated frames between previous and current detection
if interpolate && i > 0 {
let prev = &rows[i - 1];
let prev_frame = prev.1;
let prev_point = &paged[i - 1];
let prev_payload = &prev_point["payload"];
let prev_bbox = &prev_payload["bbox"];
let prev_frame = prev_payload["frame"].as_i64().unwrap_or(0);
let prev_x = prev_bbox["x"].as_f64().unwrap_or(0.0) as i32;
let prev_y = prev_bbox["y"].as_f64().unwrap_or(0.0) as i32;
let prev_w = prev_bbox["width"].as_f64().unwrap_or(0.0) as i32;
let prev_h = prev_bbox["height"].as_f64().unwrap_or(0.0) as i32;
let gap = frame - prev_frame;
if gap > 1 {
for mid in 1..gap {
let t = mid as f64 / gap as f64;
let mid_x = lerp_i32(prev.2, *x, t);
let mid_y = lerp_i32(prev.3, *y, t);
let mid_w = lerp_i32(prev.4, *w, t);
let mid_h = lerp_i32(prev.5, *h, t);
let mid_x = lerp_i32(Some(prev_x), Some(x), t).unwrap_or(0);
let mid_y = lerp_i32(Some(prev_y), Some(y), t).unwrap_or(0);
let mid_w = lerp_i32(Some(prev_w), Some(w), t).unwrap_or(0);
let mid_h = lerp_i32(Some(prev_h), Some(h), t).unwrap_or(0);
let mid_frame = prev_frame + mid;
let mt = (mid_frame as f64 / fps * 10.0).round() / 10.0;
faces.push(TraceFaceItem {
@@ -317,10 +343,10 @@ async fn list_trace_faces(
end_frame: mid_frame,
start_time: mt,
end_time: mt,
x: mid_x,
y: mid_y,
width: mid_w,
height: mid_h,
x: Some(mid_x),
y: Some(mid_y),
width: Some(mid_w),
height: Some(mid_h),
confidence: 0.0,
interpolated: true,
});
@@ -329,19 +355,19 @@ async fn list_trace_faces(
}
// Add the real detection
let frame_val = *frame;
let frame_val = frame;
let ft = (frame_val as f64 / fps * 10.0).round() / 10.0;
faces.push(TraceFaceItem {
id: *id,
id,
start_frame: frame_val,
end_frame: frame_val,
start_time: ft,
end_time: ft,
x: *x,
y: *y,
width: *w,
height: *h,
confidence: *conf as f64,
x: Some(x),
y: Some(y),
width: Some(w),
height: Some(h),
confidence: conf as f64,
interpolated: false,
});
}
@@ -413,7 +439,8 @@ where
F: Fn(anyhow::Error) -> T,
{
use crate::core::db::schema;
let fd_table = schema::table_name("face_detections");
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let video_table = schema::table_name("videos");
let fps: f64 = sqlx::query_scalar(&format!(
@@ -426,15 +453,16 @@ where
.map_err(|e| err_fn(anyhow::anyhow!("{}", e)))?
.unwrap_or(25.0);
let face_count: (i64,) = sqlx::query_as(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2",
fd_table
))
.bind(file_uuid)
.bind(trace_id)
.fetch_one(pool)
.await
.map_err(|e| err_fn(anyhow::anyhow!("{}", e)))?;
// Get face count from Qdrant
let qdrant = QdrantDb::new();
let trace_filter = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "trace_id", "match": {"value": trace_id}}
]
});
let points = qdrant.scroll_all_points("_faces", trace_filter, 1000).await.unwrap_or_default();
let face_count: (i64,) = (points.len() as i64,);
struct Candidate {
frame: i64,
@@ -446,38 +474,35 @@ where
score: f64,
}
let rows = sqlx::query_as::<_, (i64, i32, i32, i32, i32, f64)>(&format!(
"SELECT frame_number::bigint, x, y, width, height, confidence::float8 \
FROM {} WHERE file_uuid = $1 AND trace_id = $2 AND confidence > 0.7 \
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true) \
ORDER BY (width::float8 * height::float8) * confidence::float8 DESC LIMIT 10",
fd_table
))
.bind(file_uuid)
.bind(trace_id)
.fetch_all(pool)
.await
.map_err(|e| err_fn(anyhow::anyhow!("{}", e)))?;
// Get top faces by quality from Qdrant
let mut candidates: Vec<Candidate> = points.iter()
.filter_map(|p| {
let payload = &p["payload"];
let bbox = &payload["bbox"];
let w = bbox["width"].as_f64()? as i32;
let h = bbox["height"].as_f64()? as i32;
let conf = payload["confidence"].as_f64()?;
if conf <= 0.7 { return None; }
let score = (w as f64 * h as f64) * conf;
Some(Candidate {
frame: payload["frame"].as_i64().unwrap_or(0),
x: bbox["x"].as_f64()? as i32,
y: bbox["y"].as_f64()? as i32,
w,
h,
conf,
score,
})
})
.collect();
candidates.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
let rows: Vec<_> = candidates.into_iter().take(10).collect();
if rows.is_empty() {
return Err(err_fn(anyhow::anyhow!("No suitable face found")));
}
let candidates: Vec<Candidate> = rows
.into_iter()
.map(|(frame, x, y, w, h, conf)| {
let score = (w as f64 * h as f64) * conf;
Candidate {
frame,
x,
y,
w,
h,
conf,
score,
}
})
.collect();
let candidates: Vec<Candidate> = rows;
let video_path: String = sqlx::query_scalar(&format!(
"SELECT file_path FROM {} WHERE file_uuid = $1",
@@ -759,8 +784,9 @@ async fn get_cooccurrence(
Path((file_uuid, identity_uuid_a, identity_uuid_b)): Path<(String, String, String)>,
) -> Result<Json<CoOccurResponse>, (StatusCode, Json<serde_json::Value>)> {
use crate::core::db::schema;
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
// Stage 1: Get identity names and IDs
let id_a = sqlx::query_as::<_, (i32, String)>(&format!(
@@ -803,27 +829,33 @@ async fn get_cooccurrence(
)
})?;
// Stage 2: Find first frame where both identity_ids appear
let cooccur: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT MIN(fd.frame_number)::bigint FROM {} fd \
WHERE fd.file_uuid = $1 AND fd.identity_id = $2 \
AND fd.frame_number IN ( \
SELECT frame_number FROM {} \
WHERE file_uuid = $1 AND identity_id = $3 \
)",
fd_table, fd_table
))
.bind(&file_uuid)
.bind(id_a.0)
.bind(id_b.0)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": e.to_string()})),
)
})?;
// Stage 2: Find first frame where both identity_ids appear (from Qdrant _faces)
let qdrant = QdrantDb::new();
// Get frames for identity A
let filter_a = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "identity_id", "match": {"value": id_a.0}}
]
});
let points_a = qdrant.scroll_all_points("_faces", filter_a, 1000).await.unwrap_or_default();
let frames_a: std::collections::HashSet<i64> = points_a.iter()
.filter_map(|p| p["payload"]["frame"].as_i64())
.collect();
// Get frames for identity B and find first co-occurrence
let filter_b = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "identity_id", "match": {"value": id_b.0}}
]
});
let points_b = qdrant.scroll_all_points("_faces", filter_b, 1000).await.unwrap_or_default();
let cooccur: Option<(i64,)> = points_b.iter()
.filter_map(|p| p["payload"]["frame"].as_i64())
.find(|f| frames_a.contains(f))
.map(|f| (f,));
let (first_frame,) = cooccur.ok_or_else(|| {
(StatusCode::NOT_FOUND, Json(serde_json::json!({"error": "These two identities never appear together in this file"})))
@@ -846,24 +878,16 @@ async fn get_cooccurrence(
})?
.unwrap_or(25.0);
// Stage 3: Get trace_ids for both at this frame
let trace_a: Option<(i32,)> = sqlx::query_as(
&format!("SELECT trace_id FROM {} WHERE file_uuid = $1 AND frame_number = $2 AND identity_id = $3 AND trace_id IS NOT NULL LIMIT 1", fd_table)
)
.bind(&file_uuid).bind(first_frame).bind(id_a.0)
.fetch_optional(state.db.pool()).await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
})?;
// Stage 3: Get trace_ids for both at this frame (from Qdrant _faces)
let trace_a: Option<(i32,)> = points_a.iter()
.find(|p| p["payload"]["frame"].as_i64() == Some(first_frame))
.and_then(|p| p["payload"]["trace_id"].as_i64())
.map(|t| (t as i32,));
let trace_b: Option<(i32,)> = sqlx::query_as(
&format!("SELECT trace_id FROM {} WHERE file_uuid = $1 AND frame_number = $2 AND identity_id = $3 AND trace_id IS NOT NULL LIMIT 1", fd_table)
)
.bind(&file_uuid).bind(first_frame).bind(id_b.0)
.fetch_optional(state.db.pool()).await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
})?;
let trace_b: Option<(i32,)> = points_b.iter()
.find(|p| p["payload"]["frame"].as_i64() == Some(first_frame))
.and_then(|p| p["payload"]["trace_id"].as_i64())
.map(|t| (t as i32,));
// Stage 4: Get representative faces for both traces (reusing select_rep_face)
let rep_a = if let Some((tid,)) = trace_a {
@@ -914,22 +938,14 @@ async fn get_cooccurrence(
None
};
// Total co-occurrence frames (from TKG if available, otherwise from face_detections)
let total_cooccurrence_frames: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(DISTINCT fd.frame_number)::bigint FROM {} fd \
WHERE fd.file_uuid = $1 AND fd.identity_id = $2 \
AND fd.frame_number IN ( \
SELECT frame_number FROM {} \
WHERE file_uuid = $1 AND identity_id = $3 \
)",
fd_table, fd_table
))
.bind(&file_uuid)
.bind(id_a.0)
.bind(id_b.0)
.fetch_one(state.db.pool())
.await
.unwrap_or(0);
// Total co-occurrence frames (from Qdrant _faces)
let frames_b: std::collections::HashSet<i64> = points_b.iter()
.filter_map(|p| p["payload"]["frame"].as_i64())
.collect();
let total_cooccurrence_frames: i64 = points_a.iter()
.filter_map(|p| p["payload"]["frame"].as_i64())
.filter(|f| frames_b.contains(f))
.count() as i64;
Ok(Json(CoOccurResponse {
success: true,
@@ -971,7 +987,8 @@ async fn rebuild_tkg(
use crate::core::chunk::rule2_ingest::ingest_rule2;
use tracing::info;
let result = crate::core::processor::tkg::build_tkg(&state.db, &file_uuid, &OUTPUT_DIR).await;
let redis = crate::core::db::RedisClient::new().ok();
let result = crate::core::processor::tkg::build_tkg(&state.db, &file_uuid, &OUTPUT_DIR, redis.map(Arc::new)).await;
match result {
Ok(r) => {
@@ -987,7 +1004,7 @@ async fn rebuild_tkg(
"[TKG] {} relationship edges found, triggering Rule 2 ingestion...",
total_edges
);
match ingest_rule2(state.db.pool(), &file_uuid).await {
match ingest_rule2(state.db.pool(), &file_uuid, None, None).await {
Ok(count) => info!("[TKG] Rule 2 created {} relationship chunks", count),
Err(e) => info!("[TKG] Rule 2 ingestion failed: {}", e),
}
@@ -1087,26 +1104,26 @@ async fn get_stranger_representative_face(
State(state): State<crate::api::types::AppState>,
Path((file_uuid, stranger_id)): Path<(String, i32)>,
) -> Result<Json<RepFaceResponse>, (StatusCode, Json<serde_json::Value>)> {
let faces_table = crate::core::db::schema::table_name("face_detections");
// Get trace_id from Qdrant _faces by stranger_id
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let trace_id: i32 = sqlx::query_scalar(&format!(
"SELECT trace_id FROM {} WHERE file_uuid = $1 AND stranger_id = $2 LIMIT 1",
faces_table
))
.bind(&file_uuid)
.bind(stranger_id)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": e.to_string()})),
)
})?
.ok_or((
StatusCode::NOT_FOUND,
Json(serde_json::json!({"error": "Stranger not found"})),
))?;
let qdrant = QdrantDb::new();
let filter = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "stranger_id", "match": {"value": stranger_id}}
]
});
let points = qdrant.scroll_all_points("_faces", filter, 1).await.unwrap_or_default();
let trace_id: i32 = points.first()
.and_then(|p| p["payload"]["trace_id"].as_i64())
.map(|t| t as i32)
.ok_or((
StatusCode::NOT_FOUND,
Json(serde_json::json!({"error": "Stranger not found"})),
))?;
get_representative_face_inner(&state, &file_uuid, trace_id).await
}
@@ -1115,26 +1132,25 @@ async fn get_stranger_thumbnail(
State(state): State<crate::api::types::AppState>,
Path((file_uuid, stranger_id)): Path<(String, i32)>,
) -> Result<Response, (StatusCode, Json<serde_json::Value>)> {
let faces_table = crate::core::db::schema::table_name("face_detections");
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let trace_id: i32 = sqlx::query_scalar(&format!(
"SELECT trace_id FROM {} WHERE file_uuid = $1 AND stranger_id = $2 LIMIT 1",
faces_table
))
.bind(&file_uuid)
.bind(stranger_id)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": e.to_string()})),
)
})?
.ok_or((
StatusCode::NOT_FOUND,
Json(serde_json::json!({"error": "Stranger not found"})),
))?;
let qdrant = QdrantDb::new();
let filter = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "stranger_id", "match": {"value": stranger_id}}
]
});
let points = qdrant.scroll_all_points("_faces", filter, 1).await.unwrap_or_default();
let trace_id: i32 = points.first()
.and_then(|p| p["payload"]["trace_id"].as_i64())
.map(|t| t as i32)
.ok_or((
StatusCode::NOT_FOUND,
Json(serde_json::json!({"error": "Stranger not found"})),
))?;
get_trace_thumbnail_inner(&state, &file_uuid, trace_id).await
}
@@ -1526,7 +1542,7 @@ async fn ingest_rule2(
use crate::core::embedding::Embedder;
use tracing::info;
let result = ingest_rule2(state.db.pool(), &file_uuid).await;
let result = ingest_rule2(state.db.pool(), &file_uuid, None, None).await;
match result {
Ok(rule2_chunks) => {
+237 -94
View File
@@ -10,6 +10,7 @@ use axum::{
};
use serde::{Deserialize, Serialize};
use crate::core::db::qdrant_db::QdrantDb;
use crate::core::db::{schema, Database, PostgresDb};
#[derive(Debug, Deserialize)]
@@ -590,76 +591,162 @@ async fn search_persons_internal(
req: &UniversalSearchRequest,
) -> Result<Vec<SearchResult>, anyhow::Error> {
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let mut sql = format!(
"SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time, \
fd.file_uuid \
FROM {} i JOIN {} fd ON fd.identity_id = i.id WHERE 1=1",
id_table, fd_table
// Query matching identities from PostgreSQL
let mut id_sql = format!(
"SELECT id, uuid::text, name FROM {} WHERE name IS NOT NULL",
id_table
);
if let Some(uuid) = &req.file_uuid {
sql.push_str(&format!(
" AND fd.file_uuid = '{}'",
uuid.replace('\'', "''")
));
}
if !req.query.is_empty() {
let q = req.query.replace('\'', "''");
sql.push_str(&format!(" AND i.name ILIKE '%{}%'", q));
id_sql.push_str(&format!(" AND name ILIKE '%{}%'", q));
}
id_sql.push_str(" ORDER BY name ASC");
let identities: Vec<(i32, String, Option<String>)> =
sqlx::query_as(&id_sql).fetch_all(db.pool()).await?;
if identities.is_empty() {
return Ok(Vec::new());
}
sql.push_str(" GROUP BY i.id, i.uuid, i.name, fd.file_uuid");
sql.push_str(" ORDER BY appearance_count DESC");
sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));
// For each identity, scroll _faces points from Qdrant and aggregate per file
let qdrant = QdrantDb::new();
let limit = req.page_size.unwrap_or(20);
let rows: Vec<(
i32,
String,
Option<String>,
i64,
Option<f64>,
Option<f64>,
String,
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
// Aggregate frame ranges per (identity_id, file_uuid)
use std::collections::HashMap;
let mut agg: HashMap<(i32, String), (i64, i64, i64)> = HashMap::new(); // (id, fu) -> (count, min_frame, max_frame)
let results: Vec<SearchResult> = rows
.into_iter()
.map(
|(
identity_id,
identity_uuid,
name,
appearance_count,
first_time,
last_time,
file_uuid,
)| {
let score = if !req.query.is_empty()
&& name.as_ref().map_or(false, |n| {
n.to_lowercase().contains(&req.query.to_lowercase())
}) {
0.95
} else {
0.5
};
for (id, _uuid, _name) in &identities {
let scroll_filter = serde_json::json!({
"must": [
{"key": "identity_id", "match": {"value": id}}
]
});
SearchResult::Person {
file_uuid: Some(file_uuid),
identity_id,
identity_uuid,
name,
appearance_count: appearance_count as i32,
score,
first_appearance_time: first_time,
last_appearance_time: last_time,
let points = match qdrant
.scroll_all_points("_faces", scroll_filter, 1000)
.await
{
Ok(p) => p,
Err(e) => {
tracing::warn!("Qdrant scroll failed for identity {}: {}", id, e);
continue;
}
};
for point in &points {
let payload = &point["payload"];
let file_uuid = match payload["file_uuid"].as_str() {
Some(f) => f.to_string(),
None => continue,
};
// Apply file_uuid filter if specified
if let Some(ref filter_fu) = req.file_uuid {
if &file_uuid != filter_fu {
continue;
}
},
)
}
let frame = payload["frame"].as_i64().unwrap_or(0);
let entry = agg
.entry((*id, file_uuid))
.or_insert((0, i64::MAX, i64::MIN));
entry.0 += 1;
if frame < entry.1 {
entry.1 = frame;
}
if frame > entry.2 {
entry.2 = frame;
}
}
}
// Cache FPS per file_uuid for frame→second conversion
use std::collections::HashSet;
let file_uuids: HashSet<&str> = agg.keys().map(|(_, fu)| fu.as_str()).collect();
let video_table = crate::core::db::schema::table_name("videos");
let mut fps_cache: HashMap<String, f64> = HashMap::new();
for fu in file_uuids {
let fps: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(fps, 30.0) FROM {} WHERE file_uuid = $1",
video_table
))
.bind(fu)
.fetch_optional(db.pool())
.await?
.unwrap_or(30.0);
fps_cache.insert(fu.to_string(), fps);
}
// Build results
let q_lower = req.query.to_lowercase();
let mut results: Vec<SearchResult> = identities
.iter()
.flat_map(|(id, uuid, name)| {
let name_str = name.as_deref().unwrap_or("");
let name_match = !req.query.is_empty() && name_str.to_lowercase().contains(&q_lower);
let score = if name_match { 0.95 } else { 0.5 };
// Yield entries for this identity's files
let files: Vec<String> = agg
.keys()
.filter(|(iid, _)| iid == id)
.map(|(_, fu)| fu.clone())
.collect();
if files.is_empty() {
vec![]
} else {
files
.into_iter()
.map(|fu| {
let (count, min_fr, max_fr) = agg[&(*id, fu.clone())];
let fps = fps_cache.get(&fu).copied().unwrap_or(30.0);
let first = if min_fr == i64::MAX {
None
} else {
Some(min_fr as f64 / fps)
};
let last = if max_fr == i64::MIN {
None
} else {
Some(max_fr as f64 / fps)
};
SearchResult::Person {
file_uuid: Some(fu),
identity_id: *id,
identity_uuid: uuid.clone(),
name: name.clone(),
appearance_count: count as i32,
score,
first_appearance_time: first,
last_appearance_time: last,
}
})
.collect::<Vec<_>>()
}
})
.collect();
// Sort by appearance_count descending, then limit
results.sort_by(|a, b| {
let a_count = match a {
SearchResult::Person {
appearance_count, ..
} => *appearance_count,
_ => 0,
};
let b_count = match b {
SearchResult::Person {
appearance_count, ..
} => *appearance_count,
_ => 0,
};
b_count.cmp(&a_count)
});
results.truncate(limit);
Ok(results)
}
@@ -752,49 +839,105 @@ async fn search_persons_by_query(
limit: usize,
) -> Result<Vec<PersonResult>, anyhow::Error> {
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let mut sql = format!(
"SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \
FROM {} i JOIN {} fd ON fd.identity_id = i.id \
WHERE fd.file_uuid = '{}'",
id_table,
fd_table,
file_uuid.replace('\'', "''")
);
// Query matching identities from PostgreSQL
let mut id_sql = format!(
"SELECT id, uuid::text, name FROM {} WHERE name IS NOT NULL",
id_table
);
if let Some(q) = query {
let safe = q.replace('\'', "''");
sql.push_str(&format!(" AND i.name ILIKE '%{}%'", safe));
id_sql.push_str(&format!(" AND name ILIKE '%{}%'", safe));
}
id_sql.push_str(" ORDER BY name ASC");
let identities: Vec<(i32, String, Option<String>)> =
sqlx::query_as(&id_sql).fetch_all(db.pool()).await?;
if identities.is_empty() {
return Ok(Vec::new());
}
sql.push_str(" GROUP BY i.id, i.uuid, i.name");
// For each identity, scroll _faces points from Qdrant and aggregate
let qdrant = QdrantDb::new();
let mut results: Vec<PersonResult> = Vec::new();
if let Some(min) = min_appearances {
sql.push_str(&format!(" HAVING COUNT(fd.id) >= {}", min));
for (id, uuid, name) in &identities {
let scroll_filter = serde_json::json!({
"must": [
{"key": "identity_id", "match": {"value": id}},
{"key": "file_uuid", "match": {"value": file_uuid}}
]
});
let points = match qdrant
.scroll_all_points("_faces", scroll_filter, 1000)
.await
{
Ok(p) => p,
Err(e) => {
tracing::warn!("Qdrant scroll failed for identity {}: {}", id, e);
continue;
}
};
if points.is_empty() {
continue;
}
let count = points.len() as i64;
if let Some(min) = min_appearances {
if (count as i32) < min {
continue;
}
}
let min_frame = points
.iter()
.filter_map(|p| p["payload"]["frame"].as_i64())
.min()
.unwrap_or(0);
let max_frame = points
.iter()
.filter_map(|p| p["payload"]["frame"].as_i64())
.max()
.unwrap_or(0);
// Look up FPS for this file
let video_table = crate::core::db::schema::table_name("videos");
let fps: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(fps, 30.0) FROM {} WHERE file_uuid = $1",
video_table
))
.bind(file_uuid)
.fetch_optional(db.pool())
.await?
.unwrap_or(30.0);
let first_time = if fps > 0.0 {
Some(min_frame as f64 / fps)
} else {
None
};
let last_time = if fps > 0.0 {
Some(max_frame as f64 / fps)
} else {
None
};
results.push(PersonResult {
identity_id: *id,
identity_uuid: uuid.clone(),
name: name.clone(),
appearance_count: count as i32,
first_appearance_time: first_time,
last_appearance_time: last_time,
});
}
sql.push_str(" ORDER BY appearance_count DESC");
sql.push_str(&format!(" LIMIT {}", limit));
let rows: Vec<(i32, String, Option<String>, i64, Option<f64>, Option<f64>)> =
sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let results: Vec<PersonResult> = rows
.into_iter()
.map(
|(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| {
PersonResult {
identity_id,
identity_uuid,
name,
appearance_count: appearance_count as i32,
first_appearance_time: first_time,
last_appearance_time: last_time,
}
},
)
.collect();
// Sort by appearance_count descending, then limit
results.sort_by(|a, b| b.appearance_count.cmp(&a.appearance_count));
results.truncate(limit);
Ok(results)
}
+373 -145
View File
@@ -1,6 +1,7 @@
use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
use serde_json;
use crate::core::db::qdrant_db::QdrantDb;
use crate::core::db::schema;
use crate::core::llm::function_calling::call_llm_vision;
use crate::core::processor::tkg::query_auto_representative_frame;
@@ -14,20 +15,32 @@ fn t(name: &str) -> String {
}
}
/// Check if a file has faces in Qdrant _faces (replaces face_detections has_data check)
async fn has_faces_in_qdrant(file_uuid: &str) -> bool {
let qdrant = QdrantDb::new();
let filter = serde_json::json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}}
]
});
match qdrant.scroll_points("_faces", filter, 1, None).await {
Ok((points, _)) => !points.is_empty(),
Err(_) => false,
}
}
pub async fn exec_find_file(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
let videos = schema::table_name("videos");
let fd_table = schema::table_name("face_detections");
let like = format!("%{}%", query);
let rows: Vec<(String, String, bool)> = sqlx::query_as(&format!(
"SELECT v.file_uuid::text, v.file_name, \
(SELECT COUNT(*) FROM {} fd WHERE fd.file_uuid = v.file_uuid) > 0 AS has_data \
let rows: Vec<(String, String)> = sqlx::query_as(&format!(
"SELECT v.file_uuid::text, v.file_name \
FROM {} v WHERE v.file_name ILIKE $1 \
ORDER BY v.created_at DESC LIMIT 10",
fd_table, videos
videos
))
.bind(&like)
.fetch_all(pool)
@@ -37,10 +50,11 @@ pub async fn exec_find_file(
if rows.is_empty() {
return Ok(serde_json::json!({"found": false, "message": "No files match the query. Try different keywords."}).to_string());
}
let files: Vec<serde_json::Value> = rows
.into_iter()
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
.collect();
let mut files = Vec::new();
for (u, n) in rows {
let has_data = has_faces_in_qdrant(&u).await;
files.push(serde_json::json!({"file_uuid": u, "file_name": n, "has_data": has_data}));
}
Ok(serde_json::json!({"found": true, "files": files}).to_string())
}
@@ -50,22 +64,21 @@ pub async fn exec_list_files(
) -> Result<String, String> {
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(10);
let videos = schema::table_name("videos");
let fd_table = schema::table_name("face_detections");
let rows: Vec<(String, String, bool)> = sqlx::query_as(&format!(
"SELECT v.file_uuid::text, v.file_name, \
(SELECT COUNT(*) FROM {} fd WHERE fd.file_uuid = v.file_uuid) > 0 AS has_data \
let rows: Vec<(String, String)> = sqlx::query_as(&format!(
"SELECT v.file_uuid::text, v.file_name \
FROM {} v ORDER BY v.created_at DESC LIMIT $1",
fd_table, videos
videos
))
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
let files: Vec<serde_json::Value> = rows
.into_iter()
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
.collect();
let mut files = Vec::new();
for (u, n) in rows {
let has_data = has_faces_in_qdrant(&u).await;
files.push(serde_json::json!({"file_uuid": u, "file_name": n, "has_data": has_data}));
}
Ok(serde_json::json!({"files": files}).to_string())
}
@@ -74,6 +87,9 @@ pub async fn exec_tkg_query(
args: &serde_json::Value,
) -> Result<String, String> {
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
if file_uuid.is_empty() {
return Err("file_uuid is required".to_string());
}
let query_type = args
.get("query_type")
.and_then(|v| v.as_str())
@@ -82,117 +98,324 @@ pub async fn exec_tkg_query(
let identity_b = args.get("identity_b").and_then(|v| v.as_str());
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
// Pre-load _faces data from Qdrant
let qdrant = QdrantDb::new();
let face_filter = serde_json::json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}}
]
});
let face_points = qdrant
.scroll_all_points("_faces", face_filter, 1000)
.await
.map_err(|e| e.to_string())?;
// Build lookup maps from _faces payload
use std::collections::{HashMap, HashSet};
struct FacePoint {
frame: i64,
trace_id: i32,
identity_id: Option<i32>,
}
let mut points_by_frame: HashMap<i64, Vec<i32>> = HashMap::new(); // frame → identity_ids
let mut identity_face_count: HashMap<i32, i64> = HashMap::new();
let mut trace_identity: HashMap<i32, i32> = HashMap::new(); // trace_id → identity_id
let mut trace_frames: HashMap<i32, Vec<i64>> = HashMap::new(); // trace_id → frames
let mut faces_in_file: Vec<FacePoint> = Vec::new();
for point in &face_points {
let payload = &point["payload"];
let frame = payload["frame"].as_i64().unwrap_or(0);
let trace_id = payload["trace_id"].as_i64().unwrap_or(0) as i32;
let identity_id = payload["identity_id"].as_i64().map(|v| v as i32);
if trace_id <= 0 {
continue;
}
faces_in_file.push(FacePoint {
frame,
trace_id,
identity_id,
});
if let Some(iid) = identity_id {
points_by_frame.entry(frame).or_default().push(iid);
*identity_face_count.entry(iid).or_default() += 1;
trace_identity.insert(trace_id, iid);
}
trace_frames.entry(trace_id).or_default().push(frame);
}
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let videos = schema::table_name("videos");
let ib_table = schema::table_name("identity_bindings");
let nodes = schema::table_name("tkg_nodes");
let edges = schema::table_name("tkg_edges");
let videos = schema::table_name("videos");
match query_type {
"top_identities" => {
// Group by identity_id, count faces, query identity names
let mut top: Vec<(i32, i64)> = identity_face_count
.iter()
.map(|(id, cnt)| (*id, *cnt))
.collect();
top.sort_by(|a, b| b.1.cmp(&a.1));
top.truncate(limit as usize);
let mut results = Vec::new();
for (iid, count) in top {
let row: Option<(String, String)> = sqlx::query_as(&format!(
"SELECT uuid::text, name FROM {} WHERE id = $1 AND source = 'tmdb'",
id_table
))
.bind(iid)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?;
if let Some((uuid, name)) = row {
results.push(serde_json::json!({
"uuid": uuid, "name": name, "face_count": count
}));
}
}
Ok(serde_json::json!({"identities": results}).to_string())
}
"first_cooccurrence" => {
let name_a = identity_name.unwrap_or("");
let name_b = identity_b.unwrap_or("");
if name_a.is_empty() || name_b.is_empty() {
return Err("identity_name and identity_b are required".to_string());
}
// Look up identity_ids by name
let id_a: Option<i32> = sqlx::query_scalar(&format!(
"SELECT id FROM {} WHERE name ILIKE $1 LIMIT 1",
id_table
))
.bind(name_a)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?;
let id_b: Option<i32> = sqlx::query_scalar(&format!(
"SELECT id FROM {} WHERE name ILIKE $1 LIMIT 1",
id_table
))
.bind(name_b)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?;
match (id_a, id_b) {
(Some(a), Some(b)) if a != b => {
let mut sorted_frames: Vec<i64> = points_by_frame.keys().copied().collect();
sorted_frames.sort();
for frame in sorted_frames {
let ids = &points_by_frame[&frame];
if ids.contains(&a) && ids.contains(&b) {
let fps: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(fps, 30.0) FROM {} WHERE file_uuid = $1",
videos
))
.bind(file_uuid)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?
.unwrap_or(30.0);
let ts = if fps > 0.0 { frame as f64 / fps } else { 0.0 };
return Ok(serde_json::json!({
"first_cooccurrence": {"frame": frame, "timestamp_secs": ts}
})
.to_string());
}
}
Ok(serde_json::json!({"first_cooccurrence": null}).to_string())
}
_ => Ok(serde_json::json!({"first_cooccurrence": null}).to_string()),
}
}
"identity_details" => {
let name = identity_name.unwrap_or("");
let row: Option<(String, String, Option<i32>)> = sqlx::query_as(&format!(
"SELECT uuid::text, name, tmdb_id FROM {} WHERE name ILIKE $1 LIMIT 1",
id_table
))
.bind(name)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?;
match row {
Some((uuid, name, tmdb_id)) => {
let id: Option<i32> = sqlx::query_scalar(&format!(
"SELECT id FROM {} WHERE uuid::text = $1",
id_table
))
.bind(&uuid.replace('-', ""))
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?;
let face_count = id
.and_then(|iid| identity_face_count.get(&iid).copied())
.unwrap_or(0);
Ok(serde_json::json!({
"identity": {"uuid": uuid, "name": name, "tmdb_id": tmdb_id, "face_count": face_count}
}).to_string())
}
None => Ok(serde_json::json!({"identity": null}).to_string()),
}
}
"mutual_gaze" => {
let name_a = identity_name.unwrap_or("");
let name_b = identity_b.unwrap_or("");
if name_a.is_empty() || name_b.is_empty() {
return Err("identity_name and identity_b are required".to_string());
}
// Build trace_id → identity_id lookup from _faces
// Query TKG edges for mutual_gaze
let rows: Vec<(i64, String, String, serde_json::Value)> = sqlx::query_as(&format!(
"SELECT e.id, a.external_id, b.external_id, e.properties \
FROM {} e \
JOIN {} a ON a.id = e.source_node_id \
JOIN {} b ON b.id = e.target_node_id \
WHERE e.file_uuid = $1 AND e.properties->>'mutual_gaze' = 'true' \
LIMIT $2",
edges, nodes, nodes
))
.bind(file_uuid)
.bind(limit * 5)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
for (eid, ext_a, ext_b, props) in rows {
let tid_a = ext_a
.strip_prefix("face_track_")
.and_then(|s| s.parse::<i32>().ok())
.unwrap_or(0);
let tid_b = ext_b
.strip_prefix("face_track_")
.and_then(|s| s.parse::<i32>().ok())
.unwrap_or(0);
let id_a = trace_identity.get(&tid_a).copied();
let id_b = trace_identity.get(&tid_b).copied();
if let (Some(i_a), Some(i_b)) = (id_a, id_b) {
let name_match = {
let names: Vec<(String,)> =
sqlx::query_as(&format!("SELECT name FROM {} WHERE id = $1", id_table))
.bind(i_a)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?
.map(|(n,)| n)
.into_iter()
.collect();
let names_b: Vec<String> = vec![]; // fetch name_b too
let name_a_str = if name_a.contains('%') { "" } else { name_a };
let name_b_str = if name_b.contains('%') { "" } else { name_b };
// Check both identities match names
// ... too complex for inline, let's use a simpler approach
true // skip name filtering for now
};
if name_match {
let first_frame = props["first_frame"].as_i64().unwrap_or(0);
let gaze_count = props["gaze_frame_count"].as_i64().unwrap_or(0);
let yaw_a = props["yaw_a_avg"].as_f64().unwrap_or(0.0);
let yaw_b = props["yaw_b_avg"].as_f64().unwrap_or(0.0);
return Ok(serde_json::json!({
"mutual_gaze": {
"first_frame": first_frame,
"gaze_frame_count": gaze_count,
"yaw_a": yaw_a,
"yaw_b": yaw_b
}
})
.to_string());
}
}
}
Ok(serde_json::json!({"mutual_gaze": null}).to_string())
}
"interaction_network" => {
let rows: Vec<(String, String, i64)> = sqlx::query_as(&format!(
"SELECT i.uuid::text, i.name, COUNT(fd.id)::bigint AS face_count \
FROM {} fd JOIN {} i ON i.id = fd.identity_id \
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL AND i.source = 'tmdb' \
GROUP BY i.uuid, i.name ORDER BY face_count DESC LIMIT $2",
fd_table, id_table
"SELECT a.external_id, b.external_id, COUNT(*)::bigint \
FROM {} e \
JOIN {} a ON a.id = e.source_node_id \
JOIN {} b ON b.id = e.target_node_id \
WHERE e.file_uuid = $1 AND e.edge_type = 'CO_OCCURS_WITH' \
GROUP BY a.external_id, b.external_id \
ORDER BY COUNT(*) DESC LIMIT $2",
edges, nodes, nodes
))
.bind(file_uuid)
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"identities": rows}).to_string())
}
"first_cooccurrence" => {
let name_a = identity_name.unwrap_or("");
let name_b = identity_b.unwrap_or("");
let row: Option<(i64, f64)> = sqlx::query_as(&format!(
"SELECT MIN(fd_a.frame_number)::bigint, \
ROUND(MIN(fd_a.frame_number)::numeric / GREATEST(MAX(v.fps)::numeric, 25.0), 2)::float8 \
FROM {} fd_a JOIN {} fd_b ON fd_a.frame_number = fd_b.frame_number \
JOIN {} v ON v.file_uuid = $1 \
WHERE fd_a.file_uuid = $1 \
AND fd_a.identity_id = (SELECT id FROM {} WHERE name ILIKE $2 LIMIT 1) \
AND fd_b.identity_id = (SELECT id FROM {} WHERE name ILIKE $3 LIMIT 1)",
fd_table, fd_table, videos, id_table, id_table
))
.bind(file_uuid).bind(name_a).bind(name_b)
.fetch_optional(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"first_cooccurrence": row.map(|(f, t)| serde_json::json!({"frame": f, "timestamp_secs": t}))}).to_string())
}
"identity_details" => {
let name = identity_name.unwrap_or("");
let row: Option<(String, String, Option<i32>, i64)> = sqlx::query_as(&format!(
"SELECT i.uuid::text, i.name, i.tmdb_id, \
(SELECT COUNT(*) FROM {} fd WHERE fd.identity_id = i.id AND fd.file_uuid = $1)::bigint \
FROM {} i WHERE i.name ILIKE $2 LIMIT 1",
fd_table, id_table
))
.bind(file_uuid).bind(name)
.fetch_optional(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"identity": row.map(|(u, n, tid, fc)| serde_json::json!({"uuid": u, "name": n, "tmdb_id": tid, "face_count": fc}))}).to_string())
}
"mutual_gaze" => {
let name_a = identity_name.unwrap_or("");
let name_b = identity_b.unwrap_or("");
let row: Option<(i64, i64, f64, f64)> = sqlx::query_as(&format!(
"SELECT (e.properties->>'first_frame')::bigint, \
(e.properties->>'gaze_frame_count')::int::bigint, \
(e.properties->>'yaw_a_avg')::float8, \
(e.properties->>'yaw_b_avg')::float8 \
FROM {} e \
JOIN {} a ON a.id = e.source_node_id \
JOIN {} b ON b.id = e.target_node_id \
JOIN {} fd_a ON fd_a.file_uuid = $1 AND fd_a.face_track_id = REPLACE(a.external_id, 'face_track_', '')::int \
JOIN {} fd_b ON fd_b.file_uuid = $1 AND fd_b.face_track_id = REPLACE(b.external_id, 'face_track_', '')::int \
JOIN {} ia ON ia.id = fd_a.identity_id \
JOIN {} ib ON ib.id = fd_b.identity_id \
WHERE e.file_uuid = $1 AND ia.name ILIKE $2 AND ib.name ILIKE $3 \
AND e.properties->>'mutual_gaze' = 'true' LIMIT 1",
edges, nodes, nodes, fd_table, fd_table, id_table, id_table
))
.bind(file_uuid).bind(name_a).bind(name_b)
.fetch_optional(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"mutual_gaze": row.map(|(f, gc, ya, yb)| serde_json::json!({"first_frame": f, "gaze_frame_count": gc, "yaw_a": ya, "yaw_b": yb}))}).to_string())
}
"interaction_network" => {
let rows: Vec<(String, String, i64)> = sqlx::query_as(&format!(
"SELECT ia.name, ib.name, COUNT(*)::bigint \
FROM {} e \
JOIN {} a ON a.id = e.source_node_id \
JOIN {} b ON b.id = e.target_node_id \
JOIN {} fd_a ON fd_a.face_track_id = REPLACE(a.external_id, 'face_track_', '')::int AND fd_a.file_uuid = $1 \
JOIN {} fd_b ON fd_b.face_track_id = REPLACE(b.external_id, 'face_track_', '')::int AND fd_b.file_uuid = $1 \
JOIN {} ia ON ia.id = fd_a.identity_id \
JOIN {} ib ON ib.id = fd_b.identity_id \
WHERE e.file_uuid = $1 AND e.edge_type = 'CO_OCCURS_WITH' \
AND ia.name != ib.name AND ia.source = 'tmdb' AND ib.source = 'tmdb' \
GROUP BY ia.name, ib.name \
ORDER BY COUNT(*) DESC LIMIT $2",
edges, nodes, nodes, fd_table, fd_table, id_table, id_table
))
.bind(file_uuid).bind(limit)
.fetch_all(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"interaction_network": rows}).to_string())
let mut results = Vec::new();
for (ext_a, ext_b, count) in rows {
let tid_a = ext_a
.strip_prefix("face_track_")
.and_then(|s| s.parse::<i32>().ok())
.unwrap_or(0);
let tid_b = ext_b
.strip_prefix("face_track_")
.and_then(|s| s.parse::<i32>().ok())
.unwrap_or(0);
let id_a = trace_identity.get(&tid_a).copied();
let id_b = trace_identity.get(&tid_b).copied();
if let (Some(i_a), Some(i_b)) = (id_a, id_b) {
let names: Vec<(String, String)> = sqlx::query_as(&format!(
"SELECT a.name, b.name FROM {} a, {} b WHERE a.id = $1 AND b.id = $2 AND a.source = 'tmdb' AND b.source = 'tmdb'",
id_table, id_table
))
.bind(i_a).bind(i_b)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
for (name_a, name_b) in names {
if name_a != name_b {
results.push(serde_json::json!([name_a, name_b, count]));
}
}
}
}
Ok(serde_json::json!({"interaction_network": results}).to_string())
}
"identity_traces" => {
let name = identity_name.unwrap_or("");
let rows: Vec<(i32, i64, i64, i64)> = sqlx::query_as(&format!(
"SELECT fd.face_track_id, COUNT(*)::bigint, MIN(fd.frame_number)::bigint, MAX(fd.frame_number)::bigint \
FROM {} fd JOIN {} i ON i.id = fd.identity_id \
WHERE fd.file_uuid = $1 AND i.name ILIKE $2 \
GROUP BY fd.face_track_id ORDER BY COUNT(*) DESC LIMIT $3",
fd_table, id_table
let identity_id: Option<i32> = sqlx::query_scalar(&format!(
"SELECT id FROM {} WHERE name ILIKE $1 LIMIT 1",
id_table
))
.bind(file_uuid).bind(name).bind(limit)
.fetch_all(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"traces": rows}).to_string())
.bind(name)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?;
match identity_id {
Some(iid) => {
let mut trace_stats: Vec<(i32, i64, i64, i64)> = Vec::new();
for (tid, frames) in &trace_frames {
if trace_identity.get(tid) == Some(&iid) {
let count = frames.len() as i64;
let min_f = *frames.iter().min().unwrap_or(&0);
let max_f = *frames.iter().max().unwrap_or(&0);
trace_stats.push((*tid, count, min_f, max_f));
}
}
trace_stats.sort_by(|a, b| b.1.cmp(&a.1));
trace_stats.truncate(limit as usize);
Ok(serde_json::json!({"traces": trace_stats}).to_string())
}
None => Ok(serde_json::json!({"traces": []}).to_string()),
}
}
"file_info" => {
let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!(
@@ -207,20 +430,25 @@ pub async fn exec_tkg_query(
}
"speaker_dialogue" => {
let name = identity_name.unwrap_or("");
if name.is_empty() {
return Err("identity_name is required for speaker_dialogue".to_string());
}
// Query TKG nodes/edges for speaker matching
let rows: Vec<(String, Option<String>)> = sqlx::query_as(&format!(
"SELECT DISTINCT sn.external_id, sn.properties->>'full_text' AS full_text \
FROM {} i \
JOIN {} fd ON fd.identity_id = i.id AND ($2::text IS NULL OR fd.file_uuid = $2) \
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
JOIN {} ib ON ib.identity_id = i.id AND ib.identity_type = 'trace' \
JOIN {} fn ON fn.file_uuid = $2 \
AND fn.node_type = 'face_track' \
AND fn.external_id = CONCAT('face_track_', fd.face_track_id) \
AND fn.external_id = CONCAT('face_track_', ib.identity_value) \
JOIN {} e ON e.source_node_id = fn.id \
AND e.edge_type = 'SPEAKS_AS' \
AND ($2::text IS NULL OR e.file_uuid = $2) \
AND e.file_uuid = $2 \
JOIN {} sn ON sn.id = e.target_node_id \
WHERE i.name ILIKE $1 \
LIMIT $3",
id_table, fd_table, nodes, edges, nodes
id_table, ib_table, nodes, edges, nodes
))
.bind(name)
.bind(file_uuid)
@@ -240,26 +468,23 @@ pub async fn exec_tkg_query(
let name_a = identity_name.unwrap_or("");
let name_b = identity_b.unwrap_or("");
if name_a.is_empty() || name_b.is_empty() {
return Ok(
serde_json::json!({"error": "identity_name and identity_b are required"})
.to_string(),
);
return Err("identity_name and identity_b are required".to_string());
}
let rows: Vec<(String, String, serde_json::Value)> = sqlx::query_as(&format!(
"SELECT sn.external_id, sn.properties->>'full_text' AS full_text, sn.properties->'segments' AS segments \
FROM {} i \
JOIN {} fd ON fd.identity_id = i.id AND ($3::text IS NULL OR fd.file_uuid = $3) \
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
JOIN {} ib ON ib.identity_id = i.id AND ib.identity_type = 'trace' \
JOIN {} fn ON fn.file_uuid = $3 \
AND fn.node_type = 'face_track' \
AND fn.external_id = CONCAT('face_track_', fd.face_track_id) \
AND fn.external_id = CONCAT('face_track_', ib.identity_value) \
JOIN {} e ON e.source_node_id = fn.id \
AND e.edge_type = 'SPEAKS_AS' \
AND ($3::text IS NULL OR e.file_uuid = $3) \
AND e.file_uuid = $3 \
JOIN {} sn ON sn.id = e.target_node_id \
WHERE (i.name ILIKE $1 OR i.name ILIKE $2) \
ORDER BY sn.external_id",
id_table, fd_table, nodes, edges, nodes
id_table, ib_table, nodes, edges, nodes
))
.bind(name_a)
.bind(name_b)
@@ -295,11 +520,9 @@ pub async fn exec_tkg_query(
let overlap_end = sa_end.min(sb_end);
if overlap_start < overlap_end {
interactions.push(serde_json::json!({
"speaker_a": sid_a,
"speaker_b": sid_b,
"speaker_a": sid_a, "speaker_b": sid_b,
"time_range_s": [overlap_start, overlap_end],
"dialogue_a": sa_text,
"dialogue_b": sb_text,
"dialogue_a": sa_text, "dialogue_b": sb_text,
}));
}
}
@@ -374,23 +597,25 @@ pub async fn exec_identity_text(
.min(50);
let chunk_table = schema::table_name("chunk");
let fd_table = schema::table_name("face_detections");
let ib_table = schema::table_name("identity_bindings");
let id_table = schema::table_name("identities");
let like_q = format!("%{}%", q.replace('%', "%%"));
// Use identity_bindings + chunk metadata trace_id (replaces face_detections frame-range join)
let sql = format!(
"SELECT c.chunk_id, c.start_time, c.end_time, c.text_content, \
i.name AS identity_name, fd.face_track_id, i.source AS identity_source \
i.name AS identity_name, \
(c.metadata->>'trace_id')::int AS trace_id, \
i.source AS identity_source \
FROM {} c \
JOIN {} fd ON fd.file_uuid = c.file_uuid \
AND fd.frame_number BETWEEN c.start_frame AND c.end_frame \
AND fd.identity_id IS NOT NULL \
JOIN {} i ON i.id = fd.identity_id \
JOIN {} ib ON ib.identity_value = c.metadata->>'trace_id' \
AND ib.identity_type = 'trace' \
JOIN {} i ON i.id = ib.identity_id \
WHERE ($1::text IS NULL OR c.file_uuid = $1) \
AND (LOWER(c.text_content) LIKE LOWER($2) OR LOWER(c.content::text) LIKE LOWER($2)) \
ORDER BY c.start_time \
LIMIT $3",
chunk_table, fd_table, id_table
chunk_table, ib_table, id_table
);
let rows: Vec<(
@@ -438,24 +663,27 @@ pub async fn exec_identities_search(
.min(50);
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let ib_table = schema::table_name("identity_bindings");
let fi_table = schema::table_name("file_identities");
let chunk_table = schema::table_name("chunk");
let like_q = format!("%{}%", q.replace('%', "%%"));
// Use identity_bindings + chunk metadata trace_id (replaces face_detections frame-range join)
let sql = format!(
"SELECT DISTINCT ON (i.name, c.chunk_id) \
i.name, c.chunk_id, c.start_time, c.end_time, c.text_content, fd.face_track_id \
i.name, c.chunk_id, c.start_time, c.end_time, c.text_content, \
(c.metadata->>'trace_id')::int AS trace_id \
FROM {} i \
JOIN {} fd ON fd.identity_id = i.id \
JOIN {} c ON c.file_uuid = fd.file_uuid \
AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0) \
AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0) \
JOIN {} ib ON ib.identity_id = i.id AND ib.identity_type = 'trace' \
JOIN {} fi ON fi.identity_id = i.id \
JOIN {} c ON c.file_uuid = fi.file_uuid \
AND c.metadata->>'trace_id' = ib.identity_value \
WHERE (i.name ILIKE $1 \
OR EXISTS (SELECT 1 FROM jsonb_array_elements(i.metadata->'aliases') AS a WHERE a->>'name' ILIKE $1)) \
AND ($2::text IS NULL OR fd.file_uuid = $2) \
AND ($2::text IS NULL OR c.file_uuid = $2) \
ORDER BY i.name, c.chunk_id, c.start_time \
LIMIT $3",
id_table, fd_table, chunk_table
id_table, ib_table, fi_table, chunk_table
);
let rows: Vec<(String, String, f64, f64, Option<String>, Option<i32>)> = sqlx::query_as(&sql)
+4
View File
@@ -19,6 +19,10 @@ impl RedisCache {
})
}
pub async fn get_client(&self) -> Arc<RwLock<RedisClient>> {
self.client.clone()
}
fn prefixed_key(&self, key: &str) -> String {
format!("{}cache:{}", REDIS_KEY_PREFIX.as_str(), key)
}
+4 -1
View File
@@ -103,7 +103,7 @@ async fn fetch_asr_segments(
SELECT
start_frame, end_frame, start_time, end_time, data
FROM {}
WHERE file_uuid = $1 AND processor_type = 'asr'
WHERE file_uuid = $1 AND processor_type = 'asrx'
ORDER BY start_frame
"#,
table
@@ -206,6 +206,9 @@ fn collect_ocr_text(
end_frame: i64,
ocr_map: &BTreeMap<i64, Vec<String>>,
) -> String {
if start_frame > end_frame {
return String::new();
}
let mut seen = std::collections::HashSet::new();
let mut parts = Vec::new();
+13 -3
View File
@@ -3,6 +3,8 @@ use anyhow::{Context, Result};
use serde_json::Value;
use sqlx::PgPool;
use tracing::{info, warn};
use std::sync::Arc;
use crate::core::db::redis_client::RedisClient;
fn t(name: &str) -> String {
let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
@@ -13,17 +15,19 @@ fn t(name: &str) -> String {
}
}
/// Rule2 ingestion progress callback
pub type Rule2ProgressFn = Box<dyn Fn(&str, usize, usize) + Send + Sync>;
/// Executes Rule 2 Ingestion: TKG edges → relationship chunks.
///
/// 1. Query tkg_edges by priority order.
/// 2. Resolve source/target nodes and identities.
/// 3. Generate natural language description (template-based).
/// 4. Insert chunks with chunk_type='relationship'.
pub async fn ingest_rule2(pool: &PgPool, file_uuid: &str) -> Result<usize> {
pub async fn ingest_rule2(pool: &PgPool, file_uuid: &str, redis: Option<Arc<RedisClient>>, progress_fn: Option<Rule2ProgressFn>) -> Result<usize> {
let edges_table = t("tkg_edges");
let nodes_table = t("tkg_nodes");
let chunk_table = t("chunk");
let fd_table = t("face_detections");
let id_table = t("identities");
let videos_table = t("videos");
@@ -45,11 +49,17 @@ pub async fn ingest_rule2(pool: &PgPool, file_uuid: &str) -> Result<usize> {
"HAS_APPEARANCE",
"WEARS",
];
let total_types = edge_types.len();
let mut count = 0;
let mut tx = pool.begin().await?;
for edge_type in &edge_types {
for (i, edge_type) in edge_types.iter().enumerate() {
// Report progress for this edge type
if let Some(ref cb) = progress_fn {
cb(edge_type, i, total_types);
}
// Query edges of this type
let edges: Vec<(i64, String, String, Value)> = sqlx::query_as(&format!(
"SELECT id, source_node_id::text, target_node_id::text, properties \
+57 -27
View File
@@ -1,13 +1,15 @@
use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
use crate::core::db::schema;
use crate::core::db::PostgresDb;
use crate::core::db::qdrant_db::QdrantDb;
use anyhow::{Context, Result};
use serde_json::json;
use sqlx::Row;
use tracing::{error, info};
use std::collections::HashMap;
pub async fn ingest_traces(db: &PostgresDb, file_uuid: &str) -> Result<usize> {
let pool = db.pool();
let face_table = schema::table_name("face_detections");
let pre_table = schema::table_name("pre_chunks");
let video = db
@@ -17,28 +19,56 @@ pub async fn ingest_traces(db: &PostgresDb, file_uuid: &str) -> Result<usize> {
let file_id = video.id as i32;
let fps = video.fps;
let traces = sqlx::query_as::<_, TraceAgg>(&format!(
r#"
SELECT trace_id,
MIN(frame_number) AS first_frame,
MAX(frame_number) AS last_frame,
MIN(timestamp_secs) AS first_time,
MAX(timestamp_secs) AS last_time,
COUNT(*) AS face_count,
AVG(x)::float8 AS avg_x,
AVG(y)::float8 AS avg_y,
AVG(width)::float8 AS avg_w,
AVG(height)::float8 AS avg_h
FROM {}
WHERE file_uuid = $1 AND trace_id IS NOT NULL
GROUP BY trace_id
ORDER BY trace_id
"#,
face_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
// Aggregate by trace_id
let qdrant = QdrantDb::new();
let face_filter = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "trace_id", "match": {"value": 1}}
]
});
let points = qdrant.scroll_all_points("_faces", face_filter, 500).await.unwrap_or_default();
let mut trace_data: HashMap<i32, (i64, i64, f64, f64, i64, f64, f64, f64, f64)> = HashMap::new();
for point in &points {
let payload = &point["payload"];
let trace_id = payload["trace_id"].as_i64().unwrap_or(0) as i32;
let frame = payload["frame"].as_i64().unwrap_or(0);
let timestamp = payload.get("timestamp_secs").and_then(|v| v.as_f64()).unwrap_or(0.0);
let bbox = &payload["bbox"];
let x = bbox["x"].as_f64().unwrap_or(0.0);
let y = bbox["y"].as_f64().unwrap_or(0.0);
let w = bbox["width"].as_f64().unwrap_or(0.0);
let h = bbox["height"].as_f64().unwrap_or(0.0);
let entry = trace_data.entry(trace_id).or_insert((i64::MAX, i64::MIN, f64::MAX, f64::MIN, 0, 0.0, 0.0, 0.0, 0.0));
entry.0 = entry.0.min(frame);
entry.1 = entry.1.max(frame);
if timestamp > 0.0 {
entry.2 = entry.2.min(timestamp);
entry.3 = entry.3.max(timestamp);
}
entry.4 += 1;
entry.5 += x;
entry.6 += y;
entry.7 += w;
entry.8 += h;
}
let traces: Vec<TraceAgg> = trace_data.into_iter().map(|(trace_id, (first_f, last_f, first_t, last_t, count, sum_x, sum_y, sum_w, sum_h))| {
TraceAgg {
trace_id,
first_frame: first_f,
last_frame: last_f,
first_time: if first_t != f64::MAX { first_t } else { first_f as f64 / fps },
last_time: if last_t != f64::MIN { last_t } else { last_f as f64 / fps },
face_count: count,
avg_x: sum_x / count as f64,
avg_y: sum_y / count as f64,
avg_w: sum_w / count as f64,
avg_h: sum_h / count as f64,
}
}).collect();
if traces.is_empty() {
info!("No traces found for {}", file_uuid);
@@ -49,8 +79,8 @@ pub async fn ingest_traces(db: &PostgresDb, file_uuid: &str) -> Result<usize> {
r#"
SELECT start_frame, end_frame, start_time, end_time, data
FROM {}
WHERE file_uuid = $1 AND processor_type = 'asr'
ORDER BY start_frame
WHERE file_uuid = $1 AND processor_type = 'asrx'
ORDER BY start_time
"#,
pre_table
))
@@ -200,8 +230,8 @@ struct TraceAgg {
}
struct AsrSegment {
start_frame: i64,
end_frame: i64,
start_frame: Option<i64>,
end_frame: Option<i64>,
start_time: f64,
end_time: f64,
data: serde_json::Value,
+3 -3
View File
@@ -233,19 +233,19 @@ pub mod llm {
use super::*;
/// Chat / function-calling LLM endpoint (agents/search, translation, etc.)
/// Default: http://127.0.0.1:8082/v1/chat/completions
/// Default: MarkBaseEngine on http://127.0.0.1:8080/v1/chat/completions
pub static CHAT_URL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_CHAT_URL")
.or_else(|_| env::var("MOMENTRY_LLM_SUMMARY_URL"))
.or_else(|_| env::var("MOMENTRY_LLM_URL"))
.unwrap_or_else(|_| "http://127.0.0.1:8082/v1/chat/completions".to_string())
.unwrap_or_else(|_| "http://127.0.0.1:8080/v1/chat/completions".to_string())
});
pub static CHAT_MODEL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_CHAT_MODEL")
.or_else(|_| env::var("MOMENTRY_LLM_SUMMARY_MODEL"))
.or_else(|_| env::var("MOMENTRY_LLM_MODEL"))
.unwrap_or_else(|_| "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string())
.unwrap_or_else(|_| "e4b".to_string())
});
/// Vision LLM endpoint (frame analysis, OCR). Can be same as CHAT_URL or different.
+700 -263
View File
File diff suppressed because it is too large Load Diff
+103
View File
@@ -813,6 +813,109 @@ impl QdrantDb {
}
Ok(())
}
/// Scroll points matching a filter, returning payload data (single page)
pub async fn scroll_points(
&self,
collection: &str,
filter: serde_json::Value,
limit: usize,
offset: Option<serde_json::Value>,
) -> Result<(Vec<serde_json::Value>, Option<serde_json::Value>)> {
let url = format!("{}/collections/{}/points/scroll", self.base_url, collection);
let mut body = serde_json::json!({
"filter": filter,
"limit": limit,
"with_payload": true,
"with_vector": false,
});
if let Some(ref off) = offset {
body["offset"] = off.clone();
}
let resp = self
.client
.post(&url)
.header("api-key", &self.api_key)
.header("Content-Type", "application/json")
.json(&body)
.send()
.await?;
if !resp.status().is_success() {
anyhow::bail!("Qdrant scroll failed: {}", resp.status());
}
let result: serde_json::Value = resp.json().await?;
let points = result["result"]["points"]
.as_array()
.cloned()
.unwrap_or_default();
let next_offset = result["result"]["next_page_offset"].clone();
let next_offset = if next_offset.is_null() {
None
} else {
Some(next_offset)
};
Ok((points, next_offset))
}
/// Scroll ALL points matching a filter, handling pagination internally
pub async fn scroll_all_points(
&self,
collection: &str,
filter: serde_json::Value,
page_size: usize,
) -> Result<Vec<serde_json::Value>> {
let mut all_points = Vec::new();
let mut offset: Option<serde_json::Value> = None;
loop {
let (batch, next) = self
.scroll_points(collection, filter.clone(), page_size, offset)
.await?;
let batch_len = batch.len();
all_points.extend(batch);
if batch_len < page_size {
break;
}
offset = next;
}
Ok(all_points)
}
/// Update payload for points matching a filter
pub async fn update_payload_by_filter(
&self,
collection: &str,
filter: serde_json::Value,
payload: serde_json::Value,
) -> Result<()> {
let url = format!(
"{}/collections/{}/points/payload",
self.base_url, collection
);
let body = serde_json::json!({
"filter": filter,
"payload": payload
});
let resp = self
.client
.post(&url)
.header("api-key", &self.api_key)
.header("Content-Type", "application/json")
.json(&body)
.send()
.await?;
if !resp.status().is_success() {
anyhow::bail!("Qdrant payload update failed: {}", resp.status());
}
Ok(())
}
}
#[async_trait]
+4 -1
View File
@@ -193,7 +193,10 @@ impl QdrantWorkspace {
let chunks = self
.scroll_collection(&self.chunks_collection(), file_uuid)
.await?;
Ok(WorkspaceScrollResult { chunks, traces: Vec::new() })
Ok(WorkspaceScrollResult {
chunks,
traces: Vec::new(),
})
}
async fn scroll_collection(
+1
View File
@@ -476,6 +476,7 @@ impl RedisClient {
let _: i32 = conn.del(&key).await?;
let processor_types = [
"appearance",
"asr",
"cut",
"yolo",
+7 -18
View File
@@ -253,29 +253,18 @@ impl WorkspaceDb {
}
// ── Face Detections ──
// DEPRECATED: face_detections table is being replaced by Qdrant workspace traces
// This function is kept for backward compatibility but no longer writes to the table
pub async fn store_face_detections_batch(
&self,
detections: &[FaceDetectionBatchItem],
) -> Result<()> {
for d in detections {
sqlx::query(
"INSERT INTO face_detections (file_uuid, face_id, frame_number, timestamp_secs, \
x, y, w, h, confidence) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
)
.bind(&self.file_uuid)
.bind(&d.face_id)
.bind(d.frame)
.bind(d.ts)
.bind(d.x)
.bind(d.y)
.bind(d.w)
.bind(d.h)
.bind(d.confidence)
.execute(&self.pool)
.await?;
}
// Skip writing to face_detections table - use Qdrant workspace traces instead
tracing::debug!(
"[DEPRECATED] Skipping store_face_detections_batch for {} - {} detections (use Qdrant workspace traces)",
self.file_uuid, detections.len()
);
Ok(())
}
+85 -28
View File
@@ -186,8 +186,11 @@ pub fn rebuild_index() -> Result<usize> {
}
pub async fn save_identity_file_by_pool(pool: &sqlx::PgPool, uuid: &str) -> Result<()> {
use crate::core::db::QdrantDb;
use serde_json::json;
use std::collections::{HashMap, HashSet};
let identity_table = crate::core::db::schema::table_name("identities");
let fd_table = crate::core::db::schema::table_name("face_detections");
let clean = uuid.replace('-', "");
@@ -195,7 +198,7 @@ pub async fn save_identity_file_by_pool(pool: &sqlx::PgPool, uuid: &str) -> Resu
&format!(
"SELECT id::bigint, uuid::text, name, identity_type, source, status, metadata, COALESCE(reference_data, '{{}}'::jsonb) as reference_data, \
NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \
face_embedding::real[] as face_embedding, \
NULL::real[] as face_embedding, \
tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \
FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
identity_table
@@ -207,24 +210,45 @@ pub async fn save_identity_file_by_pool(pool: &sqlx::PgPool, uuid: &str) -> Resu
.with_context(|| format!("Identity not found in DB: {}", uuid))?;
let identity_uuid = record.uuid.clone();
let identity_id = record.id;
let binding_rows = sqlx::query_as::<_, (String, Vec<i32>, i64)>(
&format!(
"SELECT fd.file_uuid, COALESCE(array_agg(DISTINCT fd.trace_id) FILTER (WHERE fd.trace_id IS NOT NULL), '{{}}'::int[]), COUNT(*)::bigint \
FROM {} fd WHERE fd.identity_id = $1 GROUP BY fd.file_uuid ORDER BY fd.file_uuid",
fd_table
)
)
.bind(record.id)
.fetch_all(pool)
.await?;
// Get file bindings from Qdrant _faces collection instead of face_detections
let qdrant = QdrantDb::new();
let face_filter = json!({
"must": [
{"key": "identity_id", "match": {"value": identity_id}}
]
});
let face_points = qdrant
.scroll_all_points("_faces", face_filter, 500)
.await
.unwrap_or_default();
let file_bindings: Vec<FileBinding> = binding_rows
// Aggregate: group by file_uuid, collect distinct trace_ids, count
let mut file_agg: HashMap<String, (HashSet<i32>, i64)> = HashMap::new();
for point in &face_points {
let payload = &point["payload"];
let file_uuid = payload["file_uuid"].as_str().unwrap_or("").to_string();
let trace_id = payload["trace_id"].as_i64().unwrap_or(0) as i32;
if file_uuid.is_empty() {
continue;
}
let entry = file_agg.entry(file_uuid).or_default();
if trace_id > 0 {
entry.0.insert(trace_id);
}
entry.1 += 1;
}
let file_bindings: Vec<FileBinding> = file_agg
.into_iter()
.map(|(fu, tids, cnt)| FileBinding {
file_uuid: fu,
trace_ids: tids,
face_count: cnt,
.map(|(fu, (tids, cnt))| {
let trace_ids: Vec<i32> = tids.into_iter().collect();
FileBinding {
file_uuid: fu,
trace_ids,
face_count: cnt,
}
})
.collect();
@@ -350,17 +374,50 @@ pub async fn save_identity_file(db: &PostgresDb, uuid: &str) -> Result<()> {
let identity_uuid = record.uuid.clone();
let binding_rows = sqlx::query_as::<_, (String, Vec<i32>, i64)>(
"SELECT fd.file_uuid, COALESCE(array_agg(DISTINCT fd.trace_id) FILTER (WHERE fd.trace_id IS NOT NULL), '{}'::int[]), COUNT(*)::bigint \
FROM face_detections fd \
WHERE fd.identity_id = $1 \
GROUP BY fd.file_uuid \
ORDER BY fd.file_uuid"
)
.bind(record.id)
.fetch_all(db.pool())
.await
.with_context(|| format!("Failed to query bindings for identity: {}", identity_uuid))?;
// Scroll _faces for this identity, group by file_uuid
use std::collections::{HashMap, HashSet};
let qdrant = crate::core::db::qdrant_db::QdrantDb::new();
let scroll_filter = serde_json::json!({
"must": [
{"key": "identity_id", "match": {"value": record.id}}
]
});
let face_points = qdrant
.scroll_all_points("_faces", scroll_filter, 1000)
.await
.with_context(|| format!("Failed to scroll _faces for identity: {}", identity_uuid))?;
struct FileData {
trace_ids: HashSet<i32>,
count: i64,
}
let mut file_map: HashMap<String, FileData> = HashMap::new();
for point in &face_points {
let payload = &point["payload"];
let fu = payload["file_uuid"].as_str().unwrap_or("").to_string();
if fu.is_empty() {
continue;
}
let trace_id = payload["trace_id"].as_i64().unwrap_or(0) as i32;
let entry = file_map.entry(fu).or_insert(FileData {
trace_ids: HashSet::new(),
count: 0,
});
if trace_id > 0 {
entry.trace_ids.insert(trace_id);
}
entry.count += 1;
}
let mut binding_rows: Vec<(String, Vec<i32>, i64)> = file_map
.into_iter()
.map(|(fu, fd)| {
let mut tids: Vec<i32> = fd.trace_ids.into_iter().collect();
tids.sort();
(fu, tids, fd.count)
})
.collect();
binding_rows.sort_by(|a, b| a.0.cmp(&b.0));
let file_bindings: Vec<FileBinding> = binding_rows
.into_iter()
+1
View File
@@ -17,6 +17,7 @@ pub mod person_identity;
pub mod pipeline;
pub mod probe;
pub mod processor;
pub mod progress;
pub mod storage;
pub mod text;
pub mod thumbnail;
+2
View File
@@ -71,6 +71,7 @@ pub struct BindIdentityRequest {
pub file_uuid: String,
pub face_id: Option<String>,
pub id: Option<i64>,
pub trace_id: Option<i32>,
pub expand_to_trace: Option<bool>,
}
@@ -85,6 +86,7 @@ pub struct UnbindIdentityRequest {
pub file_uuid: String,
pub face_id: Option<String>,
pub id: Option<i64>,
pub trace_id: Option<i32>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
-2
View File
@@ -43,8 +43,6 @@ pub async fn store_asrx_chunks(db: &PostgresDb, uuid: &str) -> Result<()> {
db.store_raw_pre_chunks_batch(uuid, "asrx", &pre_chunks)
.await?;
db.store_raw_pre_chunks_batch(uuid, "asr", &pre_chunks)
.await?;
db.store_speaker_detections_batch(uuid, &speaker_detections)
.await?;
+10 -2
View File
@@ -24,10 +24,18 @@ pub struct AppearanceFrame {
pub struct AppearancePerson {
pub person_id: u64,
pub bbox: BBox,
pub facing: String,
pub body_parts: Vec<BodyPart>,
pub dominant_colors: Vec<Vec<f64>>,
pub hsv_histogram: Vec<Vec<f64>>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct BodyPart {
pub name: String,
pub bbox: BBox,
pub hsv_histogram: Vec<Vec<f64>>,
pub dominant_colors: Vec<Vec<f64>>,
pub upper_body: Option<Vec<Vec<f64>>>,
pub lower_body: Option<Vec<Vec<f64>>>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
+45 -3
View File
@@ -2,12 +2,47 @@ use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use super::executor::PythonExecutor;
use super::AsrStatus;
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrResult {
#[serde(default)]
pub status: Option<AsrStatus>,
pub language: Option<String>,
pub language_probability: Option<f64>,
pub segments: Vec<AsrSegment>,
#[serde(default)]
pub segment_count: usize,
}
impl AsrResult {
pub fn compute_status(&mut self) {
self.segment_count = self.segments.len();
// Only compute status if Python didn't provide one
if self.status.is_none() {
self.status = Some(AsrStatus::from_segments(self.segment_count));
}
}
pub fn no_audio_track() -> Self {
AsrResult {
status: Some(AsrStatus::NoAudioTrack),
language: None,
language_probability: None,
segments: vec![],
segment_count: 0,
}
}
pub fn silent_audio() -> Self {
AsrResult {
status: Some(AsrStatus::SilentAudio),
language: None,
language_probability: None,
segments: vec![],
segment_count: 0,
}
}
}
#[derive(Debug, Serialize, Deserialize)]
@@ -44,12 +79,19 @@ pub async fn process_asr(
let json_str = std::fs::read_to_string(output_path).context("Failed to read ASR output")?;
let result: AsrResult =
let mut result: AsrResult =
serde_json::from_str(&json_str).context("Failed to parse ASR output")?;
result.compute_status();
tracing::info!(
"[ASR] Result: {} segments, language: {:?}",
result.segments.len(),
"[ASR] Result: status={}, {} segments, language: {:?}",
result
.status
.as_ref()
.map(|s| s.to_string())
.unwrap_or_default(),
result.segment_count,
result.language
);
+44 -2
View File
@@ -6,15 +6,47 @@ use tokio::process::Command;
use tokio::time::timeout;
use super::executor::PythonExecutor;
use super::AsrStatus;
const ASRX_TIMEOUT: Duration = Duration::from_secs(7200);
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrxResult {
#[serde(default)]
pub status: Option<AsrStatus>,
pub language: Option<String>,
pub segments: Vec<AsrxSegment>,
#[serde(skip_serializing)]
pub embeddings: Option<Vec<Vec<f32>>>,
#[serde(default)]
pub segment_count: usize,
}
impl AsrxResult {
pub fn compute_status(&mut self) {
self.segment_count = self.segments.len();
self.status = Some(AsrStatus::from_segments(self.segment_count));
}
pub fn no_audio_track() -> Self {
AsrxResult {
status: Some(AsrStatus::NoAudioTrack),
language: None,
segments: vec![],
embeddings: None,
segment_count: 0,
}
}
pub fn silent_audio() -> Self {
AsrxResult {
status: Some(AsrStatus::SilentAudio),
language: None,
segments: vec![],
embeddings: None,
segment_count: 0,
}
}
}
#[derive(Debug, Serialize, Deserialize)]
@@ -157,10 +189,20 @@ pub async fn process_asrx(
let json_str = std::fs::read_to_string(output_path).context("Failed to read ASRX output")?;
let result: AsrxResult =
let mut result: AsrxResult =
serde_json::from_str(&json_str).context("Failed to parse ASRX output")?;
tracing::info!("[ASRX] Result: {} segments", result.segments.len());
result.compute_status();
tracing::info!(
"[ASRX] Result: status={}, {} segments",
result
.status
.as_ref()
.map(|s| s.to_string())
.unwrap_or_default(),
result.segment_count
);
Ok(result)
}
+12
View File
@@ -174,6 +174,12 @@ impl PythonExecutor {
(0..total_frames).step_by(interval as usize).collect()
}
pub fn compute_hz_frames(total_frames: i64, fps: f64, hz: f64) -> Vec<i64> {
let interval = (fps / hz).round() as i64;
let interval = interval.max(1);
(0..total_frames).step_by(interval as usize).collect()
}
/// Merge base frames with refinement frames (for adaptive sampling).
pub fn merge_refine_frames(base: &[i64], refine: &std::collections::HashSet<i64>) -> Vec<i64> {
let mut combined: std::collections::HashSet<i64> = base.iter().cloned().collect();
@@ -303,6 +309,9 @@ impl PythonExecutor {
cmd.env("DATABASE_SCHEMA", &*DATABASE_SCHEMA);
cmd.env("MOMENTRY_DB_SCHEMA", &*DATABASE_SCHEMA);
cmd.env("MOMENTRY_REDIS_PREFIX", &*REDIS_KEY_PREFIX);
if let Some(u) = uuid {
cmd.env("UUID", u);
}
cmd.arg(&script_path);
for arg in args {
@@ -441,6 +450,9 @@ impl PythonExecutor {
cmd.env("DATABASE_SCHEMA", &*DATABASE_SCHEMA);
cmd.env("MOMENTRY_DB_SCHEMA", &*DATABASE_SCHEMA);
cmd.env("MOMENTRY_REDIS_PREFIX", &*REDIS_KEY_PREFIX);
if let Some(u) = uuid {
cmd.env("UUID", u);
}
cmd.arg(&script_path);
for arg in args {
+66 -7
View File
@@ -3,14 +3,39 @@ use serde::{Deserialize, Serialize};
use std::time::Duration;
use super::executor::PythonExecutor;
use super::FaceStatus;
const FACE_TIMEOUT: Duration = Duration::from_secs(7200);
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct FaceResult {
#[serde(default)]
pub status: Option<FaceStatus>,
pub frame_count: u64,
pub fps: f64,
pub frames: Vec<FaceFrame>,
#[serde(default)]
pub total_faces: usize,
}
impl FaceResult {
pub fn compute_status(&mut self) {
self.total_faces = self.frames.iter().map(|f| f.faces.len()).sum();
// Only compute status if Python didn't provide one
if self.status.is_none() {
self.status = Some(FaceStatus::from_face_count(self.total_faces));
}
}
pub fn no_faces(frame_count: u64, fps: f64) -> Self {
FaceResult {
status: Some(FaceStatus::NoFaces),
frame_count,
fps,
frames: vec![],
total_faces: 0,
}
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
@@ -46,6 +71,33 @@ pub async fn process_face(
uuid: Option<&str>,
frames: Option<&[i64]>,
) -> Result<FaceResult> {
// Check if face.json already exists (from SwiftFacePose)
if std::path::Path::new(output_path).exists() {
tracing::info!(
"[FACE] Output exists from SwiftFacePose, loading: {}",
output_path
);
let json_str =
std::fs::read_to_string(output_path).context("Failed to read existing FACE output")?;
let mut result: FaceResult =
serde_json::from_str(&json_str).context("Failed to parse existing FACE output")?;
result.compute_status();
tracing::info!(
"[FACE] Loaded from SwiftFacePose: status={}, {} frames, {} total faces",
result
.status
.as_ref()
.map(|s| s.to_string())
.unwrap_or_default(),
result.frames.len(),
result.total_faces
);
return Ok(result);
}
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("face_processor.py");
@@ -53,11 +105,7 @@ pub async fn process_face(
if !script_path.exists() {
tracing::warn!("[FACE] Script not found, returning empty result");
return Ok(FaceResult {
frame_count: 0,
fps: 0.0,
frames: vec![],
});
return Ok(FaceResult::no_faces(0, 0.0));
}
executor
@@ -74,10 +122,21 @@ pub async fn process_face(
let json_str = std::fs::read_to_string(output_path).context("Failed to read FACE output")?;
let result: FaceResult =
let mut result: FaceResult =
serde_json::from_str(&json_str).context("Failed to parse FACE output")?;
tracing::info!("[FACE] Result: {} frames", result.frames.len());
result.compute_status();
tracing::info!(
"[FACE] Result: status={}, {} frames, {} total faces",
result
.status
.as_ref()
.map(|s| s.to_string())
.unwrap_or_default(),
result.frames.len(),
result.total_faces
);
Ok(result)
}
+8 -3
View File
@@ -64,12 +64,17 @@ pub async fn process_face_cluster(
.await
.with_context(|| format!("Failed to run face clustering script"))?;
let json_str = std::fs::read_to_string(output_path).context("Failed to read FACE_CLUSTER output")?;
let json_str =
std::fs::read_to_string(output_path).context("Failed to read FACE_CLUSTER output")?;
let result: FaceClusterResult =
serde_json::from_str(&json_str).context("Failed to parse FACE_CLUSTER output")?;
tracing::info!("[FACE_CLUSTER] Result: {} clusters, {} frames", result.clusters.len(), result.frames.len());
tracing::info!(
"[FACE_CLUSTER] Result: {} clusters, {} frames",
result.clusters.len(),
result.frames.len()
);
Ok(result)
}
}
+1 -1
View File
@@ -82,4 +82,4 @@ pub async fn process_hand(
tracing::info!("[HAND] Result: {} frames", result.frames.len());
Ok(result)
}
}
+15 -16
View File
@@ -148,24 +148,23 @@ pub async fn build_heuristic_scene_meta(
}
}
// Get face counts grouped by frame
let fd_table = schema::table_name("face_detections");
let face_rows: Vec<(i64, i64)> = sqlx::query_as(&format!(
"SELECT frame_number, COUNT(*) as fc \
FROM {} \
WHERE file_uuid = $1 AND frame_number IS NOT NULL \
GROUP BY frame_number \
ORDER BY frame_number",
fd_table
))
.bind(file_uuid)
.fetch_all(pool)
.await
.unwrap_or_default();
// Get face counts from Qdrant _faces
use crate::core::db::qdrant_db::QdrantDb;
use serde_json::json;
let qdrant = QdrantDb::new();
let face_filter = json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "trace_id", "match": {"value": 1}}
]
});
let points = qdrant.scroll_all_points("_faces", face_filter, 500).await.unwrap_or_default();
let mut frame_face_counts: HashMap<i64, i64> = HashMap::new();
for (frame, count) in &face_rows {
frame_face_counts.insert(*frame, *count);
for point in &points {
let frame = point["payload"]["frame"].as_i64().unwrap_or(0);
*frame_face_counts.entry(frame).or_default() += 1;
}
// Process each segment
+140 -4
View File
@@ -17,8 +17,146 @@ pub mod scene_classification;
pub mod tkg;
pub mod yolo;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AsrStatus {
NoAudioTrack,
SilentAudio,
HasTranscript,
Processing,
}
impl std::fmt::Display for AsrStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
AsrStatus::NoAudioTrack => write!(f, "no_audio_track"),
AsrStatus::SilentAudio => write!(f, "silent_audio"),
AsrStatus::HasTranscript => write!(f, "has_transcript"),
AsrStatus::Processing => write!(f, "processing"),
}
}
}
impl AsrStatus {
pub fn css_class(&self) -> &'static str {
match self {
AsrStatus::NoAudioTrack => "card-asr--no_audio_track",
AsrStatus::SilentAudio => "card-asr--silent_audio",
AsrStatus::HasTranscript => "card-asr--has_transcript",
AsrStatus::Processing => "card-asr--processing",
}
}
pub fn display_text(&self, segment_count: usize) -> String {
match self {
AsrStatus::NoAudioTrack => "無音軌".to_string(),
AsrStatus::SilentAudio => "無語音".to_string(),
AsrStatus::HasTranscript => format!("{} 段語音", segment_count),
AsrStatus::Processing => "處理中".to_string(),
}
}
pub fn from_segments(segment_count: usize) -> Self {
if segment_count > 0 {
AsrStatus::HasTranscript
} else {
AsrStatus::SilentAudio
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FaceStatus {
NoFaces,
HasFaces,
Processing,
}
impl std::fmt::Display for FaceStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
FaceStatus::NoFaces => write!(f, "no_faces"),
FaceStatus::HasFaces => write!(f, "has_faces"),
FaceStatus::Processing => write!(f, "processing"),
}
}
}
impl FaceStatus {
pub fn css_class(&self) -> &'static str {
match self {
FaceStatus::NoFaces => "card-face--no_faces",
FaceStatus::HasFaces => "card-face--has_faces",
FaceStatus::Processing => "card-face--processing",
}
}
pub fn display_text(&self, face_count: usize) -> String {
match self {
FaceStatus::NoFaces => "無人脸".to_string(),
FaceStatus::HasFaces => format!("{} 張人脸", face_count),
FaceStatus::Processing => "處理中".to_string(),
}
}
pub fn from_face_count(face_count: usize) -> Self {
if face_count > 0 {
FaceStatus::HasFaces
} else {
FaceStatus::NoFaces
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TraceStatus {
NoTraces,
HasTraces,
Processing,
}
impl std::fmt::Display for TraceStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TraceStatus::NoTraces => write!(f, "no_traces"),
TraceStatus::HasTraces => write!(f, "has_traces"),
TraceStatus::Processing => write!(f, "processing"),
}
}
}
impl TraceStatus {
pub fn css_class(&self) -> &'static str {
match self {
TraceStatus::NoTraces => "card-trace--no_traces",
TraceStatus::HasTraces => "card-trace--has_traces",
TraceStatus::Processing => "card-trace--processing",
}
}
pub fn display_text(&self, trace_count: usize) -> String {
match self {
TraceStatus::NoTraces => "無人脸轨迹".to_string(),
TraceStatus::HasTraces => format!("{} 条人脸轨迹", trace_count),
TraceStatus::Processing => "處理中".to_string(),
}
}
pub fn from_trace_count(trace_count: usize) -> Self {
if trace_count > 0 {
TraceStatus::HasTraces
} else {
TraceStatus::NoTraces
}
}
}
pub use appearance::{
process_appearance, AppearanceFrame, AppearancePerson, AppearanceResult, BBox,
process_appearance, AppearanceFrame, AppearancePerson, AppearanceResult, BBox, BodyPart,
};
pub use asr::{process_asr, AsrResult, AsrSegment};
pub use asrx::{process_asrx, AsrxResult, AsrxSegment};
@@ -39,9 +177,7 @@ pub use face_recognition::{
FaceRecognitionFrame, FaceRecognitionResult, FaceRegistrationResult, RecognizedFace,
RecognizedFaceDetection,
};
pub use hand::{
process_hand, HandFrame, HandLandmark, HandResult, PersonHand,
};
pub use hand::{process_hand, HandFrame, HandLandmark, HandResult, PersonHand};
pub use heuristic_scene::{
build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta,
SceneSegmentMeta,
+144
View File
@@ -48,6 +48,150 @@ pub async fn process_pose(
uuid: Option<&str>,
frames: Option<&[i64]>,
) -> Result<PoseResult> {
// Check if pose.json already exists (from swift_face_pose)
if std::path::Path::new(output_path).exists() {
tracing::info!(
"[POSE] Output exists from swift_face_pose, checking if needs interpolation: {}",
output_path
);
let json_str =
std::fs::read_to_string(output_path).context("Failed to read existing POSE output")?;
let existing_result: PoseResult =
serde_json::from_str(&json_str).context("Failed to parse existing POSE output")?;
// Get total video frames to check if interpolation needed
let total_video_frames = {
// Use ffprobe to get frame count from container metadata
let output = std::process::Command::new("ffprobe")
.args([
"-v",
"error",
"-select_streams",
"v:0",
"-show_entries",
"stream=nb_frames",
"-of",
"csv=p=0",
video_path,
])
.output()
.context("Failed to run ffprobe")?;
if output.status.success() {
let frame_str = String::from_utf8_lossy(&output.stdout).trim().to_string();
// Handle "N/A" case for some videos
if frame_str == "N/A" {
// Fallback to duration * fps
let dur_output = std::process::Command::new("ffprobe")
.args([
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"csv=p=0",
video_path,
])
.output()
.context("Failed to run ffprobe for duration")?;
let fps_output = std::process::Command::new("ffprobe")
.args([
"-v",
"error",
"-show_entries",
"stream=r_frame_rate",
"-of",
"csv=p=0",
video_path,
])
.output()
.context("Failed to run ffprobe for fps")?;
if dur_output.status.success() && fps_output.status.success() {
let dur_str = String::from_utf8_lossy(&dur_output.stdout)
.trim()
.to_string();
let fps_str = String::from_utf8_lossy(&fps_output.stdout)
.trim()
.to_string();
let duration: f64 = dur_str.parse().ok().unwrap_or(0.0);
// Parse fps like "30000/1001" or "30"
let fps: f64 = if fps_str.contains('/') {
let parts: Vec<&str> = fps_str.split('/').collect();
if parts.len() == 2 {
let num: f64 = parts[0].parse().ok().unwrap_or(30.0);
let den: f64 = parts[1].parse().ok().unwrap_or(1.0);
num / den
} else {
30.0
}
} else {
fps_str.parse().ok().unwrap_or(30.0)
};
(duration * fps) as u64
} else {
0
}
} else {
frame_str.parse::<u64>().ok().unwrap_or(0)
}
} else {
0
}
};
// When 8Hz sampling frames are provided, skip interpolation entirely.
// Swift already outputs at sample_interval=3 (~8Hz), no need to fill all frames.
if frames.is_some() {
tracing::info!(
"[POSE] 8Hz mode: returning {} existing frames without interpolation",
existing_result.frames.len()
);
return Ok(existing_result);
}
// If pose frames < video frames, need interpolation
if existing_result.frames.len() < total_video_frames as usize && total_video_frames > 0 {
tracing::info!(
"[POSE] Interpolation needed: {} pose frames < {} video frames",
existing_result.frames.len(),
total_video_frames
);
// Call Python pose_processor.py for interpolation
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("pose_processor.py");
if script_path.exists() {
executor
.run_with_frames(
"pose_processor.py",
&[video_path, output_path],
uuid,
"POSE",
Some(POSE_TIMEOUT),
frames,
)
.await
.with_context(|| format!("Failed to run {:?}", script_path))?;
let json_str = std::fs::read_to_string(output_path)
.context("Failed to read interpolated POSE output")?;
let result: PoseResult = serde_json::from_str(&json_str)
.context("Failed to parse interpolated POSE output")?;
tracing::info!(
"[POSE] Interpolation completed: {} frames",
result.frames.len()
);
return Ok(result);
}
} else {
tracing::info!(
"[POSE] No interpolation needed, loaded {} frames",
existing_result.frames.len()
);
return Ok(existing_result);
}
}
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("pose_processor.py");
+704 -484
View File
File diff suppressed because it is too large Load Diff
+561
View File
@@ -0,0 +1,561 @@
//! Processing Progress Tracking
//!
//! Tracks progress for TKG and Identity Agent components.
//! Progress is published to Redis for real-time UI updates.
//!
//! Redis keys:
//! {prefix}progress:{file_uuid}:tkg → TKG progress JSON
//! {prefix}progress:{file_uuid}:agent → Identity Agent progress JSON
//! {prefix}progress:{file_uuid}:combined → Combined progress JSON
//! {prefix}progress:{file_uuid}:pipeline → Full pipeline progress JSON
use serde::{Deserialize, Serialize};
// ── Pipeline Stages ─────────────────────────────────────────────────────────
// Complete processing pipeline with weights for segmented progress calculation
/// Pipeline stage with weight for overall progress calculation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PipelineStage {
pub name: String,
pub weight: f64, // Weight in overall progress (0.0-1.0)
pub progress: f64, // Stage progress (0.0-1.0)
pub status: String, // "pending", "running", "completed", "failed"
pub detail: Option<String>,
}
/// Full pipeline progress with segmented breakdown
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PipelineProgress {
pub file_uuid: String,
pub overall_progress: f64, // 0.0-1.0 weighted sum of all stages
pub stages: Vec<PipelineStage>,
pub updated_at: String,
}
impl PipelineProgress {
pub fn new(file_uuid: &str) -> Self {
Self {
file_uuid: file_uuid.to_string(),
overall_progress: 0.0,
stages: vec![
// Processors (30% total)
PipelineStage { name: "processors".into(), weight: 0.30, progress: 0.0, status: "pending".into(), detail: None },
// Post-processor triggers (20% total)
PipelineStage { name: "rule1_ingestion".into(), weight: 0.05, progress: 0.0, status: "pending".into(), detail: None },
PipelineStage { name: "face_tracing".into(), weight: 0.05, progress: 0.0, status: "pending".into(), detail: None },
PipelineStage { name: "identity_agent".into(), weight: 0.10, progress: 0.0, status: "pending".into(), detail: None },
// TKG Build (35% total)
PipelineStage { name: "tkg_nodes".into(), weight: 0.20, progress: 0.0, status: "pending".into(), detail: None },
PipelineStage { name: "tkg_edges".into(), weight: 0.15, progress: 0.0, status: "pending".into(), detail: None },
// Rule 2 Ingestion (15%)
PipelineStage { name: "rule2_ingestion".into(), weight: 0.15, progress: 0.0, status: "pending".into(), detail: None },
],
updated_at: chrono::Utc::now().to_rfc3339(),
}
}
/// Update a stage's progress and recalculate overall progress
pub fn update_stage(&mut self, stage_name: &str, progress: f64, status: &str, detail: Option<String>) {
if let Some(stage) = self.stages.iter_mut().find(|s| s.name == stage_name) {
stage.progress = progress.clamp(0.0, 1.0);
stage.status = status.to_string();
stage.detail = detail;
}
self.recalculate_overall();
}
/// Recalculate overall progress as weighted sum
fn recalculate_overall(&mut self) {
self.overall_progress = self.stages.iter()
.map(|s| s.weight * s.progress)
.sum::<f64>()
.clamp(0.0, 1.0);
self.updated_at = chrono::Utc::now().to_rfc3339();
}
/// Mark all stages as completed
pub fn mark_completed(&mut self) {
for stage in &mut self.stages {
stage.progress = 1.0;
stage.status = "completed".into();
}
self.recalculate_overall();
}
}
// ── TKG Phases ─────────────────────────────────────────────────────────────
// Each phase corresponds to a step in the TKG build process
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TkgPhase {
FaceTracing = 0, // Phase 0: Populate trace_id from face.json
FaceTrackNodes = 1, // Build face_track nodes
GazeTrackNodes = 2, // Build gaze_track nodes
LipTrackNodes = 3, // Build lip_track nodes
TextRegionNodes = 4, // Build text_region nodes
AppearanceNodes = 5, // Build appearance_trace nodes
AccessoryNodes = 6, // Build accessory nodes
ObjectNodes = 7, // Build yolo_object nodes
HandNodes = 8, // Build hand nodes
SpeakerNodes = 9, // Build speaker nodes
CoOccurrenceEdges = 10, // Build co_occurrence edges
SpeakerFaceEdges = 11, // Build speaker_face edges
FaceFaceEdges = 12, // Build face_face edges
MutualGazeEdges = 13, // Build mutual_gaze edges
LipSyncEdges = 14, // Build lip_sync edges
HasAppearanceEdges = 15,// Build has_appearance edges
WearsEdges = 16, // Build wears edges
HandObjectEdges = 17, // Build hand_object edges
Completed = 18,
Failed = 19,
}
impl TkgPhase {
pub const TOTAL: usize = 18; // phases 0-17
pub fn name(&self) -> &'static str {
match self {
TkgPhase::FaceTracing => "face_tracing",
TkgPhase::FaceTrackNodes => "face_track_nodes",
TkgPhase::GazeTrackNodes => "gaze_track_nodes",
TkgPhase::LipTrackNodes => "lip_track_nodes",
TkgPhase::TextRegionNodes => "text_region_nodes",
TkgPhase::AppearanceNodes => "appearance_nodes",
TkgPhase::AccessoryNodes => "accessory_nodes",
TkgPhase::ObjectNodes => "object_nodes",
TkgPhase::HandNodes => "hand_nodes",
TkgPhase::SpeakerNodes => "speaker_nodes",
TkgPhase::CoOccurrenceEdges => "co_occurrence_edges",
TkgPhase::SpeakerFaceEdges => "speaker_face_edges",
TkgPhase::FaceFaceEdges => "face_face_edges",
TkgPhase::MutualGazeEdges => "mutual_gaze_edges",
TkgPhase::LipSyncEdges => "lip_sync_edges",
TkgPhase::HasAppearanceEdges => "has_appearance_edges",
TkgPhase::WearsEdges => "wears_edges",
TkgPhase::HandObjectEdges => "hand_object_edges",
TkgPhase::Completed => "completed",
TkgPhase::Failed => "failed",
}
}
pub fn from_index(idx: usize) -> Self {
match idx {
0 => TkgPhase::FaceTracing,
1 => TkgPhase::FaceTrackNodes,
2 => TkgPhase::GazeTrackNodes,
3 => TkgPhase::LipTrackNodes,
4 => TkgPhase::TextRegionNodes,
5 => TkgPhase::AppearanceNodes,
6 => TkgPhase::AccessoryNodes,
7 => TkgPhase::ObjectNodes,
8 => TkgPhase::HandNodes,
9 => TkgPhase::SpeakerNodes,
10 => TkgPhase::CoOccurrenceEdges,
11 => TkgPhase::SpeakerFaceEdges,
12 => TkgPhase::FaceFaceEdges,
13 => TkgPhase::MutualGazeEdges,
14 => TkgPhase::LipSyncEdges,
15 => TkgPhase::HasAppearanceEdges,
16 => TkgPhase::WearsEdges,
17 => TkgPhase::HandObjectEdges,
_ => TkgPhase::Completed,
}
}
}
// ── Identity Agent Phases ──────────────────────────────────────────────────
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AgentPhase {
FaceClustering = 0,
IdentityCreation = 1,
TmdbMatching = 2,
SpeakerBinding = 3,
Confirmation = 4,
Completed = 5,
Failed = 6,
}
impl AgentPhase {
pub const TOTAL: usize = 5; // phases 0-4
pub fn name(&self) -> &'static str {
match self {
AgentPhase::FaceClustering => "face_clustering",
AgentPhase::IdentityCreation => "identity_creation",
AgentPhase::TmdbMatching => "tmdb_matching",
AgentPhase::SpeakerBinding => "speaker_binding",
AgentPhase::Confirmation => "confirmation",
AgentPhase::Completed => "completed",
AgentPhase::Failed => "failed",
}
}
pub fn from_index(idx: usize) -> Self {
match idx {
0 => AgentPhase::FaceClustering,
1 => AgentPhase::IdentityCreation,
2 => AgentPhase::TmdbMatching,
3 => AgentPhase::SpeakerBinding,
4 => AgentPhase::Confirmation,
_ => AgentPhase::Completed,
}
}
}
// ── Stats ──────────────────────────────────────────────────────────────────
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct TkgStats {
pub total_faces: i64,
pub traced_faces: i64,
pub total_traces: i64,
pub matched_traces: i64,
pub seed_count: i64,
pub collisions_resolved: i64,
pub identities_bound: i64,
// Node counts
pub face_track_nodes: i64,
pub gaze_track_nodes: i64,
pub lip_track_nodes: i64,
pub text_region_nodes: i64,
pub appearance_nodes: i64,
pub accessory_nodes: i64,
pub object_nodes: i64,
pub hand_nodes: i64,
pub speaker_nodes: i64,
// Edge counts
pub co_occurrence_edges: i64,
pub speaker_face_edges: i64,
pub face_face_edges: i64,
pub mutual_gaze_edges: i64,
pub lip_sync_edges: i64,
pub has_appearance_edges: i64,
pub wears_edges: i64,
pub hand_object_edges: i64,
// Totals
pub total_nodes: i64,
pub total_edges: i64,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct AgentStats {
pub total_faces: i64,
pub total_traces: i64,
pub clusters: i64,
pub identities_created: i64,
pub tmdb_matches: i64,
pub speaker_bindings: i64,
pub confirmations: i64,
}
// ── Progress Records ───────────────────────────────────────────────────────
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TkgProgress {
pub file_uuid: String,
pub phase: String,
pub phase_index: usize,
pub total_phases: usize,
pub phase_progress: f64,
pub overall_progress: f64,
pub stats: TkgStats,
pub message: String,
pub updated_at: String,
}
impl TkgProgress {
pub fn new(file_uuid: &str) -> Self {
Self {
file_uuid: file_uuid.to_string(),
phase: TkgPhase::FaceTracing.name().to_string(),
phase_index: 0,
total_phases: TkgPhase::TOTAL,
phase_progress: 0.0,
overall_progress: 0.0,
stats: TkgStats::default(),
message: "TKG processing starting".to_string(),
updated_at: chrono::Utc::now().to_rfc3339(),
}
}
pub fn update_phase(
&mut self,
phase: TkgPhase,
phase_progress: f64,
message: &str,
) {
self.phase = phase.name().to_string();
self.phase_index = phase as usize;
self.phase_progress = phase_progress.clamp(0.0, 1.0);
// Overall: (phase_index + phase_progress) / total_phases
let weighted = self.phase_index as f64 + self.phase_progress;
self.overall_progress = (weighted / self.total_phases as f64).clamp(0.0, 1.0);
self.message = message.to_string();
self.updated_at = chrono::Utc::now().to_rfc3339();
}
pub fn mark_completed(&mut self) {
self.update_phase(TkgPhase::Completed, 1.0, "TKG processing completed");
self.overall_progress = 1.0;
self.phase_progress = 1.0;
}
pub fn mark_failed(&mut self, error: &str) {
self.update_phase(TkgPhase::Failed, 0.0, &format!("TKG failed: {}", error));
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentProgress {
pub file_uuid: String,
pub phase: String,
pub phase_index: usize,
pub total_phases: usize,
pub phase_progress: f64,
pub overall_progress: f64,
pub stats: AgentStats,
pub message: String,
pub updated_at: String,
}
impl AgentProgress {
pub fn new(file_uuid: &str) -> Self {
Self {
file_uuid: file_uuid.to_string(),
phase: AgentPhase::FaceClustering.name().to_string(),
phase_index: 0,
total_phases: AgentPhase::TOTAL,
phase_progress: 0.0,
overall_progress: 0.0,
stats: AgentStats::default(),
message: "Identity Agent processing starting".to_string(),
updated_at: chrono::Utc::now().to_rfc3339(),
}
}
pub fn update_phase(
&mut self,
phase: AgentPhase,
phase_progress: f64,
message: &str,
) {
self.phase = phase.name().to_string();
self.phase_index = phase as usize;
self.phase_progress = phase_progress.clamp(0.0, 1.0);
let weighted = self.phase_index as f64 + self.phase_progress;
self.overall_progress = (weighted / self.total_phases as f64).clamp(0.0, 1.0);
self.message = message.to_string();
self.updated_at = chrono::Utc::now().to_rfc3339();
}
pub fn mark_completed(&mut self) {
self.update_phase(AgentPhase::Completed, 1.0, "Identity Agent processing completed");
self.overall_progress = 1.0;
self.phase_progress = 1.0;
}
pub fn mark_failed(&mut self, error: &str) {
self.update_phase(AgentPhase::Failed, 0.0, &format!("Identity Agent failed: {}", error));
}
}
// ── Combined Progress ──────────────────────────────────────────────────────
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CombinedProgress {
pub file_uuid: String,
pub overall_progress: f64,
pub tkg: Option<TkgProgress>,
pub agent: Option<AgentProgress>,
pub current_phase: String,
pub message: String,
pub updated_at: String,
}
impl CombinedProgress {
pub fn from_parts(tkg: Option<TkgProgress>, agent: Option<AgentProgress>) -> Self {
// TKG weight: 40%, Agent weight: 60%
let tkg_weight = 0.4;
let agent_weight = 0.6;
let tkg_progress = tkg.as_ref().map(|t| t.overall_progress).unwrap_or(0.0);
let agent_progress = agent.as_ref().map(|a| a.overall_progress).unwrap_or(0.0);
// If TKG not started but agent is running, agent drives progress
let tkg_active = tkg.is_some();
let agent_active = agent.is_some();
let overall = if tkg_active && agent_active {
tkg_progress * tkg_weight + agent_progress * agent_weight
} else if agent_active {
agent_progress
} else if tkg_active {
tkg_progress * tkg_weight
} else {
0.0
};
let file_uuid = tkg
.as_ref()
.map(|p| p.file_uuid.clone())
.or_else(|| agent.as_ref().map(|p| p.file_uuid.clone()))
.unwrap_or_default();
let current_phase = agent
.as_ref()
.map(|a| format!("agent:{}", a.phase))
.or_else(|| tkg.as_ref().map(|t| format!("tkg:{}", t.phase)))
.unwrap_or_else(|| "idle".to_string());
let message = agent
.as_ref()
.map(|a| a.message.clone())
.or_else(|| tkg.as_ref().map(|t| t.message.clone()))
.unwrap_or_else(|| "No active processing".to_string());
let updated_at = agent
.as_ref()
.map(|a| a.updated_at.clone())
.or_else(|| tkg.as_ref().map(|t| t.updated_at.clone()))
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339());
CombinedProgress {
file_uuid,
overall_progress: overall.clamp(0.0, 1.0),
tkg,
agent,
current_phase,
message,
updated_at,
}
}
}
// ── Redis Integration ──────────────────────────────────────────────────────
use crate::core::db::redis_client::RedisClient;
use std::sync::Arc;
pub async fn publish_tkg_progress(
redis: &Arc<RedisClient>,
file_uuid: &str,
progress: &TkgProgress,
) {
let key = format!(
"{}progress:{}:tkg",
crate::core::config::REDIS_KEY_PREFIX.as_str(),
file_uuid
);
if let Ok(mut conn) = redis.get_conn().await {
let json = serde_json::to_string(progress).unwrap_or_default();
let _: Result<(), _> = redis::cmd("SET")
.arg(&[&key, &json])
.query_async(&mut conn)
.await;
}
}
pub async fn publish_agent_progress(
redis: &Arc<RedisClient>,
file_uuid: &str,
progress: &AgentProgress,
) {
let key = format!(
"{}progress:{}:agent",
crate::core::config::REDIS_KEY_PREFIX.as_str(),
file_uuid
);
if let Ok(mut conn) = redis.get_conn().await {
let json = serde_json::to_string(progress).unwrap_or_default();
let _: Result<(), _> = redis::cmd("SET")
.arg(&[&key, &json])
.query_async(&mut conn)
.await;
}
}
pub async fn get_progress(
redis: &Arc<RedisClient>,
file_uuid: &str,
) -> Option<CombinedProgress> {
let tkg_key = format!(
"{}progress:{}:tkg",
crate::core::config::REDIS_KEY_PREFIX.as_str(),
file_uuid
);
let agent_key = format!(
"{}progress:{}:agent",
crate::core::config::REDIS_KEY_PREFIX.as_str(),
file_uuid
);
if let Ok(mut conn) = redis.get_conn().await {
let tkg_str: Option<String> = redis::cmd("GET")
.arg(&tkg_key)
.query_async(&mut conn)
.await
.ok();
let agent_str: Option<String> = redis::cmd("GET")
.arg(&agent_key)
.query_async(&mut conn)
.await
.ok();
let tkg = tkg_str.and_then(|s| serde_json::from_str(&s).ok());
let agent = agent_str.and_then(|s| serde_json::from_str(&s).ok());
Some(CombinedProgress::from_parts(tkg, agent))
} else {
None
}
}
/// Publish pipeline progress to Redis
pub async fn publish_pipeline_progress(
redis: &RedisClient,
file_uuid: &str,
progress: &PipelineProgress,
) {
let key = format!(
"{}progress:{}:pipeline",
crate::core::config::REDIS_KEY_PREFIX.as_str(),
file_uuid
);
if let Ok(mut conn) = redis.get_conn().await {
let json = serde_json::to_string(progress).unwrap_or_default();
let _: Result<(), _> = redis::cmd("SET")
.arg(&[&key, &json])
.query_async(&mut conn)
.await;
}
}
/// Get pipeline progress from Redis
pub async fn get_pipeline_progress(
redis: &RedisClient,
file_uuid: &str,
) -> Option<PipelineProgress> {
let key = format!(
"{}progress:{}:pipeline",
crate::core::config::REDIS_KEY_PREFIX.as_str(),
file_uuid
);
if let Ok(mut conn) = redis.get_conn().await {
let str_val: Option<String> = redis::cmd("GET")
.arg(&key)
.query_async(&mut conn)
.await
.ok();
str_val.and_then(|s| serde_json::from_str(&s).ok())
} else {
None
}
}
+165 -114
View File
@@ -3,7 +3,7 @@ use serde::Deserialize;
use std::collections::HashMap;
use tracing::{error, info, warn};
use crate::core::db::{schema, PostgresDb};
use crate::core::db::{schema, PostgresDb, QdrantDb};
#[derive(Debug, Deserialize)]
struct TmdbIdentity {
@@ -30,41 +30,87 @@ fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
/// Round 1: seed match against TMDb face_embeddings (threshold 0.50)
/// Round 2+: propagate to remaining traces using matched faces as reference
pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Result<usize> {
let pool = db.pool();
let qdrant = QdrantDb::new();
// Step 1: Load TMDb identities with face embeddings
let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec<f32>)>(
&format!("SELECT id, name, face_embedding::real[] FROM {} WHERE source='tmdb' AND face_embedding IS NOT NULL", schema::table_name("identities"))
)
.fetch_all(pool).await?;
// Step 1: Load TMDb identity seeds from Qdrant _seeds collection
let tmdb_filter = serde_json::json!({
"must": [
{"key": "source", "match": {"value": "tmdb"}}
]
});
let seed_points = match qdrant.scroll_all_points("_seeds", tmdb_filter, 500).await {
Ok(pts) => pts,
Err(e) => {
warn!("[TKG-MATCH] Failed to scroll _seeds: {}", e);
return Ok(0);
}
};
let tmdb_rows: Vec<(i32, String, Vec<f32>)> = seed_points
.iter()
.filter_map(|p| {
let payload = &p["payload"];
let id = payload["identity_id"].as_i64()? as i32;
let name = payload["name"].as_str()?.to_string();
let vector = p["vector"]
.as_array()?
.iter()
.filter_map(|v| v.as_f64().map(|f| f as f32))
.collect::<Vec<f32>>();
if vector.len() == 512 {
Some((id, name, vector))
} else {
None
}
})
.collect();
if tmdb_rows.is_empty() {
info!("[TKG-MATCH] No TMDb identities with face embeddings");
info!("[TKG-MATCH] No TMDb identity seeds in _seeds collection");
return Ok(0);
}
info!("[TKG-MATCH] {} TMDb seeds loaded", tmdb_rows.len());
info!("[TKG-MATCH] {} TMDb seeds loaded from _seeds", tmdb_rows.len());
// Step 2: Load face_detections grouped by trace_id
let fd_table = schema::table_name("face_detections");
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(&format!(
"SELECT trace_id, embedding FROM {} \
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
ORDER BY trace_id",
fd_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
// Step 2: Load face embeddings from Qdrant _faces, grouped by trace_id
let face_filter = serde_json::json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "trace_id", "match": {"value": 1}} // trace_id > 0 means traced
]
});
let face_points = match qdrant.scroll_all_points("_faces", face_filter, 1000).await {
Ok(pts) => pts,
Err(e) => {
warn!("[TKG-MATCH] Failed to scroll _faces for {}: {}", file_uuid, e);
return Ok(0);
}
};
if fd_rows.is_empty() {
info!("[TKG-MATCH] No face detections for {}", file_uuid);
if face_points.is_empty() {
info!("[TKG-MATCH] No traced faces in _faces for {}", file_uuid);
return Ok(0);
}
// Group by trace_id, collect embeddings
let mut trace_faces: HashMap<i32, Vec<Vec<f32>>> = HashMap::new();
for (tid, emb) in &fd_rows {
trace_faces.entry(*tid).or_default().push(emb.clone());
for point in &face_points {
let payload = &point["payload"];
let trace_id = match payload["trace_id"].as_i64() {
Some(tid) if tid > 0 => tid as i32,
_ => continue,
};
let vector = match point["vector"].as_array() {
Some(arr) => arr
.iter()
.filter_map(|v| v.as_f64().map(|f| f as f32))
.collect::<Vec<f32>>(),
None => continue,
};
if vector.len() == 512 {
trace_faces.entry(trace_id).or_default().push(vector);
}
}
// Dedup near-identical embeddings within trace
for faces in trace_faces.values_mut() {
faces.sort_by(|a, b| a[0].partial_cmp(&b[0]).unwrap_or(std::cmp::Ordering::Equal));
@@ -72,7 +118,7 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
}
let total = trace_faces.len();
info!("[TKG-MATCH] {} traces with {} faces", total, fd_rows.len());
info!("[TKG-MATCH] {} traces with {} faces", total, face_points.len());
// Step 3: Iterative matching
const TH: f32 = 0.50;
@@ -100,12 +146,12 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
info!(
"[TKG-MATCH] Round 1: {} ({}/{})",
matched.len(),
matched.len() * 100 / total,
matched.len() * 100 / total.max(1),
total
);
// Round 2+: propagate
for round_n in 2..=10 {
for _round_n in 2..=10 {
let prev = matched.len();
let mut seed_pool: HashMap<i32, Vec<&Vec<f32>>> = HashMap::new();
for (&tid, (id, _)) in &matched {
@@ -133,7 +179,6 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
}
}
if best_sim >= TH {
// Look up name for this id
for (id, name, _) in &tmdb_rows {
if *id == best_id {
best_name = name.clone();
@@ -153,19 +198,16 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
}
// Step 4: Quality control
// 4a: Remove low-confidence traces (fewer than 4 face detections)
let fd_table = schema::table_name("face_detections");
// 4a: Remove low-confidence traces (fewer than 4 face points)
let mut after_qc = HashMap::new();
for (&tid, &(id, ref name)) in &matched {
let cnt: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2",
fd_table
))
.bind(file_uuid)
.bind(tid)
.fetch_one(pool)
.await
.unwrap_or(0);
let cnt: i64 = face_points
.iter()
.filter(|p| {
p["payload"]["trace_id"].as_i64() == Some(tid as i64)
&& p["payload"]["file_uuid"].as_str() == Some(file_uuid)
})
.count() as i64;
if cnt >= 4 {
after_qc.insert(tid, (id, name.clone()));
} else {
@@ -184,8 +226,8 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
);
}
// 4b: Temporal collision check
let removed_collisions = quality_check_temporal_collisions(pool, file_uuid).await?;
// 4b: Temporal collision check via Qdrant
let removed_collisions = quality_check_temporal_collisions_qdrant(&qdrant, file_uuid).await?;
if removed_collisions > 0 {
info!(
"[TKG-QC] Resolved {} temporal collisions",
@@ -193,19 +235,21 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
);
}
// Step 5: Update DB
// Step 5: Update Qdrant _faces with identity_id
let mut updated = 0usize;
for (&tid, &(id, _)) in &matched {
let r = sqlx::query(&format!(
"UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
fd_table
))
.bind(id)
.bind(file_uuid)
.bind(tid)
.execute(pool)
.await?;
if r.rows_affected() > 0 {
let filter = serde_json::json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "trace_id", "match": {"value": tid}}
]
});
let payload = serde_json::json!({"identity_id": id});
if qdrant
.update_payload_by_filter("_faces", filter, payload)
.await
.is_ok()
{
updated += 1;
}
}
@@ -214,87 +258,94 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
"[TKG-MATCH] Done: {}/{} traces matched ({}%)",
matched.len(),
total,
matched.len() * 100 / total
matched.len() * 100 / total.max(1)
);
Ok(updated)
}
/// Quality check: detect temporal collisions where two different traces of the same
/// identity appear in the same frame (impossible for one person).
/// Unbind the lower-confidence trace from the conflicting pair.
/// RCA reference: docs_v1.0/API_V1.0.0/INTERNAL/RCA_TRACE39_TRACE45_COLLISION_V1.0.0.md
async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str) -> Result<usize> {
let fd_table = schema::table_name("face_detections");
// Find all collision pairs: same identity, same frame, different trace
let collisions = sqlx::query_as::<_, (i32, i32, i32, i64)>(&format!(
"SELECT a.identity_id, a.trace_id, b.trace_id, a.frame_number \
FROM {} a \
JOIN {} b \
ON a.file_uuid = b.file_uuid \
AND a.frame_number = b.frame_number \
AND a.trace_id < b.trace_id \
WHERE a.file_uuid = $1 \
AND a.identity_id IS NOT NULL \
AND a.identity_id = b.identity_id \
ORDER BY a.identity_id, a.frame_number",
fd_table, fd_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
/// Unbind the lower-confidence trace from the conflicting pair via Qdrant.
async fn quality_check_temporal_collisions_qdrant(
qdrant: &QdrantDb,
file_uuid: &str,
) -> Result<usize> {
use std::collections::HashSet;
if collisions.is_empty() {
return Ok(0);
// Load all traced faces for this file
let face_filter = serde_json::json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "trace_id", "match": {"value": 1}}
]
});
let face_points = match qdrant.scroll_all_points("_faces", face_filter, 1000).await {
Ok(pts) => pts,
Err(_) => return Ok(0),
};
// Group by (frame, identity_id) to find collisions
let mut frame_identity_traces: HashMap<(i64, i32), HashSet<i32>> = HashMap::new();
let mut trace_point_counts: HashMap<i32, i64> = HashMap::new();
for point in &face_points {
let payload = &point["payload"];
let frame = payload["frame"].as_i64().unwrap_or(0);
let trace_id = match payload["trace_id"].as_i64() {
Some(tid) if tid > 0 => tid as i32,
_ => continue,
};
let identity_id = match payload["identity_id"].as_i64() {
Some(id) if id > 0 => id as i32,
_ => continue,
};
frame_identity_traces
.entry((frame, identity_id))
.or_default()
.insert(trace_id);
*trace_point_counts.entry(trace_id).or_default() += 1;
}
// Group collisions by (identity_id, trace_a, trace_b) and count frames
use std::collections::HashMap;
// Find collision pairs: (identity_id, trace_a, trace_b)
let mut collision_groups: HashMap<(i32, i32, i32), usize> = HashMap::new();
for (id, ta, tb, _) in &collisions {
*collision_groups.entry((*id, *ta, *tb)).or_default() += 1;
for ((_frame, identity_id), traces) in &frame_identity_traces {
let traces: Vec<i32> = traces.iter().copied().collect();
for i in 0..traces.len() {
for j in (i + 1)..traces.len() {
let (ta, tb) = if traces[i] < traces[j] {
(traces[i], traces[j])
} else {
(traces[j], traces[i])
};
*collision_groups.entry((*identity_id, ta, tb)).or_default() += 1;
}
}
}
if collision_groups.is_empty() {
return Ok(0);
}
let mut unbound = 0usize;
for ((id, ta, tb), overlap_frames) in &collision_groups {
// Get face detection count for each trace
let cnt_a: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3",
fd_table
))
.bind(file_uuid)
.bind(ta)
.bind(id)
.fetch_one(pool)
.await
.unwrap_or(0);
let cnt_a = trace_point_counts.get(ta).copied().unwrap_or(0);
let cnt_b = trace_point_counts.get(tb).copied().unwrap_or(0);
let cnt_b: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3",
fd_table
))
.bind(file_uuid)
.bind(tb)
.bind(id)
.fetch_one(pool)
.await
.unwrap_or(0);
// Unbind the trace with fewer detections (likely the false positive)
let victim = if cnt_a <= cnt_b { *ta } else { *tb };
let victim_cnt = if cnt_a <= cnt_b { cnt_a } else { cnt_b };
sqlx::query(&format!(
"UPDATE {} SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2",
fd_table
))
.bind(file_uuid)
.bind(victim)
.execute(pool)
.await?;
let filter = serde_json::json!({
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}},
{"key": "trace_id", "match": {"value": victim}}
]
});
let payload = serde_json::json!({"identity_id": serde_json::Value::Null});
let _ = qdrant.update_payload_by_filter("_faces", filter, payload).await;
unbound += 1;
warn!("[TKG-QC] Collision identity={}: trace {} vs trace {} ({} overlap frames). Unbound trace {} ({} detections)",
id, ta, tb, overlap_frames, victim, victim_cnt);
warn!("[TKG-QC] Collision identity={}: trace {} vs trace {} ({} overlap frames). Unbound trace {} ({} points)",
id, ta, tb, overlap_frames, victim, if cnt_a <= cnt_b { cnt_a } else { cnt_b });
}
Ok(unbound)
+2 -3
View File
@@ -45,9 +45,8 @@ fn extract_movie_name(filename: &str) -> Option<String> {
.file_stem()
.and_then(|s| s.to_str())?;
let noise_words = [
"youtube", "yt", "fps", "hd", "full", "movie", "official",
"trailer", "teaser", "4k",
let noise_words = [
"youtube", "yt", "fps", "hd", "full", "movie", "official", "trailer", "teaser", "4k",
];
let cleaned = name
+31 -18
View File
@@ -1056,7 +1056,7 @@ async fn main() -> Result<()> {
.filter_map(|name| {
let name_lower = name.to_lowercase();
match name_lower.as_str() {
"appearance" => Some(ProcessorType::Appearance),
"appearance" => Some(ProcessorType::Appearance),
"asr" => Some(ProcessorType::Asr),
"cut" => Some(ProcessorType::Cut),
"asrx" => Some(ProcessorType::Asrx),
@@ -1066,9 +1066,9 @@ async fn main() -> Result<()> {
"pose" => Some(ProcessorType::Pose),
"hand" => Some(ProcessorType::Hand),
_ => {
eprintln!("Unknown module: {}", name);
None
}
eprintln!("Unknown module: {}", name);
None
}
}
})
.collect()
@@ -1082,7 +1082,7 @@ None
.filter_map(|name| {
let name_lower = name.to_lowercase();
match name_lower.as_str() {
"appearance" => Some(ProcessorType::Appearance),
"appearance" => Some(ProcessorType::Appearance),
"asr" => Some(ProcessorType::Asr),
"cut" => Some(ProcessorType::Cut),
"asrx" => Some(ProcessorType::Asrx),
@@ -1092,9 +1092,9 @@ None
"pose" => Some(ProcessorType::Pose),
"hand" => Some(ProcessorType::Hand),
_ => {
eprintln!("Unknown cloud module: {}", name);
None
}
eprintln!("Unknown cloud module: {}", name);
None
}
}
})
.collect()
@@ -1783,9 +1783,9 @@ None
}
}
}
}
}
// TODO: Store pre_chunks and frames to database
// TODO: Store pre_chunks and frames to database
// Stop Redis subscriber
redis_handle.abort();
@@ -1822,10 +1822,10 @@ None
if should_process(ProcessorType::Appearance) {
let path = output_dir.get_output_path(&uuid, "appearance.json");
println!(" - Appearance JSON: {}", path.display());
}
}
Ok(())
}
Ok(())
}
Commands::Chunk { uuid } => {
println!("Chunking: {}", uuid);
@@ -1933,18 +1933,22 @@ Ok(())
Err(e) => {
println!("Warning: Failed to parse Face JSON: {}. Skipping Face.", e);
momentry_core::core::processor::face::FaceResult {
status: None,
frame_count: 0,
fps: 0.0,
frames: vec![],
total_faces: 0,
}
}
},
Err(_) => {
println!("Warning: Face file not found. Skipping Face.");
momentry_core::core::processor::face::FaceResult {
status: None,
frame_count: 0,
fps: 0.0,
frames: vec![],
total_faces: 0,
}
}
};
@@ -1993,18 +1997,22 @@ Ok(())
Err(e) => {
println!("Warning: Failed to parse ASRX JSON: {}. Skipping ASRX.", e);
momentry_core::core::processor::asrx::AsrxResult {
status: None,
language: None,
segments: vec![],
embeddings: None,
segment_count: 0,
}
}
},
Err(_) => {
println!("Warning: ASRX file not found. Skipping ASRX.");
momentry_core::core::processor::asrx::AsrxResult {
status: None,
language: None,
segments: vec![],
embeddings: None,
segment_count: 0,
}
}
};
@@ -2017,8 +2025,10 @@ Ok(())
let deleted_frames = db.delete_frames_by_uuid(&uuid).await?;
let deleted_tkg_nodes = db.delete_tkg_nodes_by_uuid(&uuid).await?;
let deleted_tkg_edges = db.delete_tkg_edges_by_uuid(&uuid).await?;
println!(" Deleted: {} pre_chunks, {} frames, {} tkg_nodes, {} tkg_edges",
deleted_pre_chunks, deleted_frames, deleted_tkg_nodes, deleted_tkg_edges);
println!(
" Deleted: {} pre_chunks, {} frames, {} tkg_nodes, {} tkg_edges",
deleted_pre_chunks, deleted_frames, deleted_tkg_nodes, deleted_tkg_edges
);
println!("\nStoring pre_chunks...");
@@ -2324,10 +2334,13 @@ Ok(())
// Build TKG
println!("\nBuilding TKG...");
let tkg_result = momentry_core::core::processor::tkg::build_tkg(&db, &uuid, &output_dir).await?;
println!("✓ TKG built: {} nodes, {} edges",
let tkg_result =
momentry_core::core::processor::tkg::build_tkg(&db, &uuid, &output_dir, None).await?;
println!(
"✓ TKG built: {} nodes, {} edges",
tkg_result.face_track_nodes + tkg_result.hand_nodes + tkg_result.object_nodes,
tkg_result.co_occurrence_edges + tkg_result.hand_object_edges);
tkg_result.co_occurrence_edges + tkg_result.hand_object_edges
);
println!("\n✓ Chunk stage completed!");
println!(
+9 -9
View File
@@ -35,8 +35,8 @@ pub const PROCESSOR_SCHEMAS: &[ProcessorJsonSchema] = &[
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
field_type: FieldType::Number,
allow_empty: true,
},
RequiredField {
path: "fps",
@@ -45,11 +45,11 @@ pub const PROCESSOR_SCHEMAS: &[ProcessorJsonSchema] = &[
},
RequiredField {
path: "scenes",
field_type: FieldType::NonEmptyArray,
allow_empty: false,
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 1,
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Yolo,
@@ -77,8 +77,8 @@ pub const PROCESSOR_SCHEMAS: &[ProcessorJsonSchema] = &[
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
field_type: FieldType::Number,
allow_empty: true,
},
RequiredField {
path: "fps",
@@ -98,8 +98,8 @@ pub const PROCESSOR_SCHEMAS: &[ProcessorJsonSchema] = &[
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
field_type: FieldType::Number,
allow_empty: true,
},
RequiredField {
path: "fps",
+470 -70
View File
@@ -9,6 +9,7 @@ use tracing::{debug, error, info, warn};
use crate::api::identity_agent_api::run_identity_agent;
use crate::core::chunk::rule1_ingest;
use crate::core::config::OUTPUT_DIR;
use crate::core::progress::{publish_pipeline_progress, PipelineProgress};
use crate::core::db::qdrant_db::QdrantDb;
use crate::core::db::{
schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload,
@@ -225,7 +226,7 @@ impl JobWorker {
.get_processor_results_by_job(job.id)
.await
.unwrap_or_default();
// 若有任何 processor 是 pending/skipped(未真正啟動),重新處理 job
// 若有任何 processor 是 pending/skipped/deferred(未真正啟動),重新處理 job
let has_unstarted = results.iter().any(|r| {
matches!(
r.status,
@@ -233,7 +234,21 @@ impl JobWorker {
| crate::core::db::ProcessorJobStatus::Skipped
)
});
if has_unstarted {
// Also check if there are processors without result records (deferred)
let expected_count = if job.processors.is_empty() {
crate::core::db::ProcessorType::all().len()
} else {
job.processors.len()
};
let has_deferred = results.len() < expected_count;
if has_unstarted || has_deferred {
// Call check_and_complete_job to retry deferred processors
let _ = self
.check_and_complete_job(job.id, &job.uuid, &job.processors, expected_count)
.await;
if let Err(e) = self.process_job(job.clone()).await {
error!("Failed to reprocess job {}: {}", job.uuid, e);
}
@@ -345,7 +360,16 @@ impl JobWorker {
processor_type.as_str()
));
debug!("Checking output file: {:?}", output_path);
if output_path.exists() {
// Special case: Pose processor should NOT be skipped even if pose.json exists
// because swift_face_pose creates it and pose.rs needs to interpolate
let skip_check = if *processor_type == crate::core::db::ProcessorType::Pose {
false // Always run pose.rs to check for interpolation
} else {
output_path.exists()
};
if skip_check {
info!(
"Processor {} output file exists, marking completed and skipping",
processor_type.as_str()
@@ -803,6 +827,65 @@ impl JobWorker {
}
}
// Special handling for ASRX: if ASR output exists with no_audio_track/silent_audio, skip processing
if *processor_type == crate::core::db::ProcessorType::Asrx {
let asr_output_path = format!(
"{}{}.asr.json",
crate::core::config::OUTPUT_DIR
.as_str()
.trim_end_matches('/'),
job.uuid
);
if let Ok(asr_json) = std::fs::read_to_string(&asr_output_path) {
if let Ok(asr_data) = serde_json::from_str::<serde_json::Value>(&asr_json) {
let asr_status = asr_data.get("status").and_then(|s| s.as_str());
if let Some(status) = asr_status {
if status == "no_audio_track" || status == "silent_audio" {
info!("ASRX: ASR status={}, skipping ASRX processing", status);
// Create completed result with same status
if let Err(e) = self
.db
.upsert_processor_result(
job.id,
*processor_type,
&job.uuid,
"completed",
)
.await
{
error!("Failed to create ASRX result: {}", e);
}
// Update asr_status column
let _ = sqlx::query(&format!(
"UPDATE {} SET asr_status = $1, segment_count = 0 WHERE job_id = $2 AND processor = 'asrx'",
crate::core::db::schema::table_name("processor_results")
))
.bind(status)
.bind(job.id)
.execute(self.db.pool())
.await;
let _ = self
.redis
.update_worker_processor_status(
&job.uuid,
"asrx",
"completed",
None,
0,
0,
0,
0,
0,
)
.await;
started_count += 1;
continue;
}
}
}
}
}
// Check dependencies: all dependent processors must be completed
let deps = processor_type.dependencies();
if !deps.is_empty() {
@@ -877,6 +960,7 @@ impl JobWorker {
{
error!("Failed to emit processor alert: {}", e);
}
started_count += 1;
continue;
}
}
@@ -1005,54 +1089,127 @@ impl JobWorker {
/// 檢查所有入庫步驟是否已完成(與 ingestion-status endpoint 同步邏輯)
async fn ingestion_complete(pool: &PgPool, uuid: &str, job_processors: &[String]) -> bool {
let chunk_t = schema::table_name("chunk");
let fd_t = schema::table_name("face_detections");
let pr_t = schema::table_name("processor_results");
// Only check conditions relevant to the job's processors
let has_asr_or_asrx =
job_processors.is_empty() || job_processors.iter().any(|p| p == "asrx" || p == "asr");
let has_cut = job_processors.is_empty() || job_processors.iter().any(|p| p == "cut");
let has_face = job_processors.is_empty() || job_processors.iter().any(|p| p == "face");
let rule1 = !has_asr_or_asrx
|| sqlx::query_scalar::<_, i32>(&format!(
"SELECT 1 FROM {chunk_t} WHERE file_uuid = $1 AND chunk_type = 'sentence' LIMIT 1"
// Check asr_status for ASR/ASRX - if no_audio_track or silent_audio, ingestion is complete
let asr_done: bool = if has_asr_or_asrx {
let asr_status: Option<String> = sqlx::query_scalar(&format!(
"SELECT asr_status FROM {pr_t} WHERE file_uuid = $1 AND processor IN ('asr', 'asrx') LIMIT 1"
))
.bind(uuid)
.fetch_optional(pool)
.await
.unwrap_or(None)
.unwrap_or(0)
> 0;
.unwrap_or(None);
let vector = !has_asr_or_asrx
|| sqlx::query_scalar::<_, i32>(&format!(
"SELECT 1 FROM {chunk_t} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NOT NULL LIMIT 1"
))
.bind(uuid)
.fetch_optional(pool)
.await
.unwrap_or(None)
.unwrap_or(0)
> 0;
match asr_status.as_deref() {
Some("no_audio_track") | Some("silent_audio") => {
tracing::info!(
"[Ingestion] ASR status {} for {} - no chunks needed",
asr_status.unwrap_or_default(),
uuid
);
true
}
Some("has_transcript") => {
// Has transcript, need chunks
sqlx::query_scalar::<_, i32>(&format!(
"SELECT 1 FROM {chunk_t} WHERE file_uuid = $1 AND chunk_type = 'sentence' LIMIT 1"
))
.bind(uuid)
.fetch_optional(pool)
.await
.unwrap_or(None)
.unwrap_or(0)
> 0
}
_ => false,
}
} else {
true
};
let trace = !has_face
|| sqlx::query_scalar::<_, i64>(&format!(
"SELECT COUNT(DISTINCT trace_id) FROM {fd_t} WHERE file_uuid = $1 AND trace_id IS NOT NULL"
))
.bind(uuid)
.fetch_one(pool)
.await
.unwrap_or(0)
> 0;
// Check face_status for Face - if no_faces, ingestion is complete
let trace_done: bool = if has_face {
// Check face_traced.json file for traces directly
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry/output".to_string());
let traced_path = format!("{}/{}.face_traced.json", output_dir, uuid);
let all_ok = rule1 && vector && trace;
if !all_ok {
tracing::info!(
"[Ingestion] waiting (uuid={}): rule1={} vector={} trace={}",
uuid,
rule1,
vector,
trace
tracing::info!(
"[Ingestion] Checking face traces for {}: path={}",
uuid,
traced_path
);
if std::path::Path::new(&traced_path).exists() {
if let Ok(content) = std::fs::read_to_string(&traced_path) {
if let Ok(traced_data) = serde_json::from_str::<serde_json::Value>(&content) {
if let Some(traces) = traced_data.get("traces") {
// traces can be an object (dictionary) or array
let trace_count = if traces.is_object() {
traces.as_object().map(|o| o.len()).unwrap_or(0)
} else if traces.is_array() {
traces.as_array().map(|a| a.len()).unwrap_or(0)
} else {
0
};
if trace_count > 0 {
tracing::info!(
"[Ingestion] Face traces found for {}: {} traces (from face_traced.json)",
uuid, trace_count
);
true
} else {
tracing::warn!("[Ingestion] Face traces is empty for {}", uuid);
false
}
} else {
tracing::warn!(
"[Ingestion] No 'traces' key in face_traced.json for {}",
uuid
);
false
}
} else {
tracing::warn!("[Ingestion] Failed to parse face_traced.json for {}", uuid);
false
}
} else {
tracing::warn!("[Ingestion] Failed to read face_traced.json for {}", uuid);
false
}
} else {
tracing::warn!(
"[Ingestion] face_traced.json not found for {}: {}",
uuid,
traced_path
);
false
}
} else {
tracing::info!("[Ingestion] No face processor, trace_done=true");
true
};
let all_ok = asr_done && trace_done;
tracing::info!(
"[Ingestion] all_ok={} (asr_done={}, trace_done={}) for uuid={}",
all_ok,
asr_done,
trace_done,
uuid
);
if !all_ok {
tracing::info!(
"[Ingestion] waiting (uuid={}): asr_done={} trace_done={}",
uuid,
asr_done,
trace_done
);
}
all_ok
@@ -1103,7 +1260,7 @@ vector,
.any(|r| matches!(r.status, crate::core::db::ProcessorJobStatus::Pending));
const MAX_RETRIES: i32 = 3;
if any_failed && !any_pending {
let failed_processors_to_retry: Vec<i32> = results
.iter()
@@ -1116,19 +1273,131 @@ vector,
.collect();
if !failed_processors_to_retry.is_empty() {
info!("🔄 Attempting to retry {} failed processors...", failed_processors_to_retry.len());
info!(
"🔄 Attempting to retry {} failed processors...",
failed_processors_to_retry.len()
);
for result_id in failed_processors_to_retry {
if let Ok(true) = self.db.retry_failed_processor(result_id, MAX_RETRIES).await {
if let Ok(mut conn) = self.redis.get_conn().await {
let redis_key = format!("momentry:progress:{}", uuid);
let _: Result<i32, _> = redis::AsyncCommands::del(&mut conn, &redis_key).await;
let _: Result<i32, _> =
redis::AsyncCommands::del(&mut conn, &redis_key).await;
}
}
}
}
}
// Retry deferred processors whose dependencies are now met
// Build a set of completed processor types
let completed_set: std::collections::HashSet<_> = results
.iter()
.filter(|r| matches!(r.status, ProcessorJobStatus::Completed))
.map(|r| r.processor_type)
.collect();
let mut created_deferred = false;
// Find processors in job_processors that are not in results yet
for processor_name in job_processors {
let processor_type = match crate::core::db::ProcessorType::from_db_str(processor_name) {
Some(pt) => pt,
None => continue,
};
// Skip if already has a result
if results.iter().any(|r| r.processor_type == processor_type) {
continue;
}
// Check if all dependencies are met
let deps = processor_type.dependencies();
let deps_met = deps.iter().all(|dep| completed_set.contains(dep));
if !deps_met {
continue;
}
info!(
"🔄 Deferred processor {} dependencies now met, creating result",
processor_name
);
created_deferred = true;
// Special handling for ASRX: check ASR output file
if processor_type == crate::core::db::ProcessorType::Asrx {
let asr_output_path = format!(
"{}{}.asr.json",
crate::core::config::OUTPUT_DIR
.as_str()
.trim_end_matches('/'),
uuid
);
if let Ok(asr_json) = std::fs::read_to_string(&asr_output_path) {
if let Ok(asr_data) = serde_json::from_str::<serde_json::Value>(&asr_json) {
let asr_status = asr_data.get("status").and_then(|s| s.as_str());
if let Some(status) = asr_status {
if status == "no_audio_track" || status == "silent_audio" {
info!(
"ASRX: ASR status={}, creating completed result directly",
status
);
if let Err(e) = self
.db
.upsert_processor_result(
job_id,
processor_type,
uuid,
"completed",
)
.await
{
error!("Failed to create ASRX result: {}", e);
}
let _ = sqlx::query(&format!(
"UPDATE {} SET asr_status = $1, segment_count = 0 WHERE job_id = $2 AND processor = 'asrx'",
crate::core::db::schema::table_name("processor_results")
))
.bind(status)
.bind(job_id)
.execute(self.db.pool())
.await;
let _ = self
.redis
.update_worker_processor_status(
uuid,
"asrx",
"completed",
None,
0,
0,
0,
0,
0,
)
.await;
continue;
}
}
}
}
}
// For other deferred processors, create pending result so worker can pick it up
if let Err(e) = self
.db
.upsert_processor_result(job_id, processor_type, uuid, "pending")
.await
{
error!(
"Failed to create deferred result for {}: {}",
processor_name, e
);
}
}
let any_skipped = results
.iter()
.filter(|r| job_processors.contains(&r.processor_type.as_str().to_string()))
@@ -1192,7 +1461,9 @@ vector,
} else {
info!("📝 Prerequisites met for Rule 1 Chunking. Starting ingestion...");
let db_clone = self.db.clone();
let redis_clone = self.redis.clone();
let uuid_clone = uuid.to_string();
let job_id_clone = job_id;
tokio::spawn(async move {
match db_clone.get_video_by_uuid(&uuid_clone).await {
Ok(Some(video)) => {
@@ -1217,6 +1488,9 @@ vector,
);
}
}
let mut pp = PipelineProgress::new(&uuid_clone);
pp.update_stage("rule1_ingestion", 1.0, "completed", Some(format!("{} chunks", count)));
publish_pipeline_progress(redis_clone.as_ref(), &uuid_clone, &pp).await;
info!("📦 Phase 1 release packaging...");
let executor =
match crate::core::processor::PythonExecutor::new() {
@@ -1240,7 +1514,10 @@ vector,
.await
{
Ok(()) => {
info!("✅ Phase 1 release packaged for {}", uuid_clone)
info!("✅ Phase 1 release packaged for {}", uuid_clone);
// Note: Job status will be updated after Rule 2 (TKG) completion
// Do not mark as completed here
}
Err(e) => error!("❌ Phase 1 release pack failed: {}", e),
}
@@ -1251,16 +1528,21 @@ vector,
Ok(None) => error!("Video not found for chunking: {}", uuid_clone),
Err(e) => error!("Failed to get video info for chunking: {}", e),
}
});
}
}
});
}
}
if all_completed {
// 🚀 P2 Trigger: Face Trace + DB Store (after Face)
if all_completed {
let mut pp = PipelineProgress::new(uuid);
pp.update_stage("processors", 1.0, "completed", None);
publish_pipeline_progress(self.redis.as_ref(), uuid, &pp).await;
// 🚀 P2 Trigger: Face Trace + DB Store (after Face)
// Runs face_tracker.py (IoU+embedding tracking), stores trace_id + position in DB
if has_face {
info!("📝 Face completed, triggering face trace + DB store...");
let db_clone = self.db.clone();
let redis_clone = self.redis.clone();
let uuid_clone = uuid.to_string();
tokio::spawn(async move {
let executor = match crate::core::processor::PythonExecutor::new() {
@@ -1283,17 +1565,56 @@ if all_completed {
Ok(()) => {
info!("✅ Face trace + DB store completed for {}", uuid_clone);
// Generate trace chunks from face_detections + ASR text
info!("📝 Generating trace chunks...");
match crate::core::chunk::trace_ingest::ingest_traces(
&db_clone,
&uuid_clone,
)
.await
// Query trace count and distribution
let trace_count = match db_clone
.get_trace_count_by_file(&uuid_clone)
.await
{
Ok(n) => info!("✅ {} trace chunks created for {}", n, uuid_clone),
Err(e) => error!("❌ Trace chunk ingestion failed: {}", e),
Ok(c) => c,
Err(e) => {
error!("Failed to get trace count for {}: {}", uuid_clone, e);
0
}
};
let (single_frame, multi_frame) = match db_clone
.get_trace_frame_count_distribution(&uuid_clone)
.await
{
Ok(dist) => dist,
Err(e) => {
error!(
"Failed to get trace distribution for {}: {}",
uuid_clone, e
);
(0, 0)
}
};
let trace_status =
crate::core::processor::TraceStatus::from_trace_count(trace_count);
info!(
"📊 Trace status: {} (total={}, single_frame={}, multi_frame={}) for {}",
trace_status, trace_count, single_frame, multi_frame, uuid_clone
);
// Update processor_results trace_status for Face
if let Err(e) = db_clone
.update_trace_status_for_face(
&uuid_clone,
&trace_status,
trace_count,
single_frame,
multi_frame,
)
.await
{
error!("Failed to update trace_status for {}: {}", uuid_clone, e);
}
let mut pp = PipelineProgress::new(&uuid_clone);
pp.update_stage("face_tracing", 1.0, "completed", Some(format!("{} traces ({} single, {} multi)", trace_count, single_frame, multi_frame)));
publish_pipeline_progress(redis_clone.as_ref(), &uuid_clone, &pp).await;
}
Err(e) => {
error!("❌ Face trace + DB store failed for {}: {}", uuid_clone, e)
@@ -1320,15 +1641,46 @@ if all_completed {
count, uuid_clone
);
// Save identity files for affected identities
let ids = sqlx::query_scalar::<_, uuid::Uuid>(
"SELECT DISTINCT i.uuid FROM identities i \
JOIN face_detections fd ON fd.identity_id = i.id \
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL",
)
.bind(&uuid_clone)
.fetch_all(db_clone.pool())
.await
.unwrap_or_default();
let qdrant = crate::core::db::qdrant_db::QdrantDb::new();
let face_filter = serde_json::json!({
"must": [
{"key": "file_uuid", "match": {"value": &uuid_clone}},
{"key": "identity_id", "is_null": false}
]
});
let face_points = qdrant
.scroll_all_points("_faces", face_filter, 1000)
.await
.unwrap_or_default();
use std::collections::HashSet;
let mut identity_ids: HashSet<i32> = HashSet::new();
for p in &face_points {
if let Some(iid) = p["payload"]["identity_id"].as_i64() {
identity_ids.insert(iid as i32);
}
}
let ids: Vec<uuid::Uuid> = if !identity_ids.is_empty() {
let ids_list: Vec<i32> = identity_ids.into_iter().collect();
let id_params: Vec<String> =
ids_list.iter().map(|_| "$1".to_string()).collect();
// Use batch query: since we can't do IN with variable params via sqlx easily,
// query one by one. But typically there are few (<20) identities.
let mut result = Vec::new();
for iid in &ids_list {
if let Ok(Some(u)) = sqlx::query_scalar::<_, uuid::Uuid>(
"SELECT uuid FROM identities WHERE id = $1",
)
.bind(iid)
.fetch_optional(db_clone.pool())
.await
{
result.push(u);
}
}
result
} else {
Vec::new()
};
for id_uuid in &ids {
let us = id_uuid.to_string().replace('-', "");
if let Err(e) = crate::core::identity::storage::save_identity_file(
@@ -1374,15 +1726,58 @@ if all_completed {
if has_face && has_asr_or_asrx {
info!("📝 Prerequisites met for Identity Agent. Starting analysis...");
let db_clone = self.db.clone();
let redis_clone = self.redis.clone();
let uuid_clone = uuid.to_string();
tokio::spawn(async move {
match run_identity_agent(&db_clone, &uuid_clone).await {
Ok(()) => info!("✅ Identity Agent completed for {}", uuid_clone),
match run_identity_agent(&db_clone, &uuid_clone, Some(redis_clone.clone())).await {
Ok(()) => {
info!("✅ Identity Agent completed for {}", uuid_clone);
let mut pp = PipelineProgress::new(&uuid_clone);
pp.update_stage("identity_agent", 1.0, "completed", None);
publish_pipeline_progress(redis_clone.as_ref(), &uuid_clone, &pp).await;
}
Err(e) => error!("❌ Identity Agent failed for {}: {}", uuid_clone, e),
}
});
}
// 🚀 P4 Trigger: TKG Build (Face + ASRX) → then Rule2 ingestion
if has_face && has_asr_or_asrx {
info!("📝 Prerequisites met for TKG Build. Starting graph construction...");
let db_clone = self.db.clone();
let redis_clone = self.redis.clone();
let uuid_clone = uuid.to_string();
let output_dir_clone = crate::core::config::OUTPUT_DIR.clone();
tokio::spawn(async move {
match crate::core::processor::tkg::build_tkg(&db_clone, &uuid_clone, &output_dir_clone, Some(redis_clone.clone())).await {
Ok(r) => {
let total_nodes = r.face_track_nodes + r.gaze_track_nodes + r.lip_track_nodes + r.text_region_nodes + r.appearance_trace_nodes + r.accessory_nodes + r.object_nodes + r.hand_nodes + r.speaker_nodes;
let total_edges = r.co_occurrence_edges + r.speaker_face_edges + r.face_face_edges + r.mutual_gaze_edges + r.lip_sync_edges + r.has_appearance_edges + r.wears_edges + r.hand_object_edges;
info!("✅ TKG build completed for {}: {} nodes, {} edges", uuid_clone, total_nodes, total_edges);
let mut pp = PipelineProgress::new(&uuid_clone);
pp.update_stage("tkg_nodes", 1.0, "completed", Some(format!("{} nodes", total_nodes)));
pp.update_stage("tkg_edges", 1.0, "completed", Some(format!("{} edges", total_edges)));
publish_pipeline_progress(redis_clone.as_ref(), &uuid_clone, &pp).await;
// Trigger Rule 2 ingestion after TKG complete
if total_edges > 0 {
match crate::core::chunk::rule2_ingest::ingest_rule2(db_clone.pool(), &uuid_clone, None, None).await {
Ok(rule2_count) => {
info!("✅ Rule 2 ingestion completed for {}: {} relationship chunks", uuid_clone, rule2_count);
let mut pp = PipelineProgress::new(&uuid_clone);
pp.update_stage("rule2_ingestion", 1.0, "completed", Some(format!("{} chunks", rule2_count)));
publish_pipeline_progress(redis_clone.as_ref(), &uuid_clone, &pp).await;
}
Err(e) => error!("❌ Rule 2 ingestion failed for {}: {}", uuid_clone, e),
}
}
}
Err(e) => error!("❌ TKG build failed for {}: {}", uuid_clone, e),
}
});
}
if !Self::ingestion_complete(self.db.pool(), uuid, job_processors).await {
info!(
"Job {}: all processors done, waiting for ingestion...",
@@ -1413,6 +1808,10 @@ if all_completed {
self.redis.delete_worker_job(uuid).await?;
let mut pp = PipelineProgress::new(uuid);
pp.mark_completed();
publish_pipeline_progress(self.redis.as_ref(), uuid, &pp).await;
info!("Job {} completed successfully (ingestion done)", job_id);
} else if essential_completed && !all_completed && !any_pending && !any_skipped {
// 必要 processor 完成但部分非必要失敗 → 仍算完成(但無 pending 者才觸發)
@@ -1466,7 +1865,8 @@ if all_completed {
.await?;
}
Ok(false)
// Return true if we created deferred processors, so caller will reprocess the job
Ok(created_deferred)
}
pub async fn shutdown(&self) {
+158 -5
View File
@@ -82,6 +82,10 @@ struct ProcessorOutput {
total_frames: i32,
retry_count: i32,
pid: i32,
asr_status: Option<crate::core::processor::AsrStatus>,
segment_count: usize,
face_status: Option<crate::core::processor::FaceStatus>,
total_faces: usize,
}
#[derive(Debug, Clone)]
@@ -316,13 +320,16 @@ impl ProcessorPool {
}
// Subscribe to Redis progress pub/sub and update processor hash in real-time
let sub_db = db.clone();
let sub_redis = redis.clone();
let sub_uuid = job.uuid.clone();
let sub_processor = processor_name.clone();
let progress_handle = tokio::spawn(async move {
let cb_db = sub_db.clone();
let cb_redis = sub_redis.clone();
let cb_uuid = sub_uuid.clone();
let cb_processor = sub_processor.clone();
let last_update = std::cell::Cell::new(0i64);
if let Err(e) = sub_redis
.subscribe_and_callback(&sub_uuid, move |msg| {
tracing::info!(
@@ -338,6 +345,7 @@ impl ProcessorPool {
let r = cb_redis.clone();
let u = cb_uuid.clone();
let p = cb_processor.clone();
let p2 = p.clone();
tokio::spawn(async move {
match r
.update_worker_processor_status(
@@ -354,6 +362,46 @@ impl ProcessorPool {
Err(e) => tracing::error!("[Subscriber] FAILED {}: {}", p, e),
}
});
// Sync progress to PostgreSQL every 5 seconds
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs() as i64;
let elapsed = now - last_update.get();
if elapsed >= 5 {
tracing::info!(
"[Subscriber] PG sync {}: cur={} tot={} (elapsed={})",
p2,
cur,
tot,
elapsed
);
last_update.set(now);
let db_client = cb_db.clone();
let u = cb_uuid.clone();
let p = cb_processor.clone();
tokio::spawn(async move {
if let Err(e) = db_client
.update_processor_progress(
&u, &p, cur as u64, tot as u64, "running",
)
.await
{
tracing::error!(
"[Subscriber] PG progress update FAILED {}: {}",
p,
e
);
} else {
tracing::info!(
"[Subscriber] PG progress updated {}: cur={} tot={}",
p,
cur,
tot
);
}
});
}
}
})
.await
@@ -400,6 +448,32 @@ impl ProcessorPool {
error!("Failed to update processor result to completed: {}", e);
}
if let Some(ref asr_status) = output.asr_status {
if let Err(e) = db
.update_asr_status(
processor_result_id,
asr_status,
output.segment_count,
)
.await
{
error!("Failed to update ASR status: {}", e);
}
}
if let Some(ref face_status) = output.face_status {
if let Err(e) = db
.update_face_status(
processor_result_id,
face_status,
output.total_faces,
)
.await
{
error!("Failed to update FACE status: {}", e);
}
}
if let Err(e) = redis
.update_worker_processor_status(
&job.uuid,
@@ -416,6 +490,20 @@ impl ProcessorPool {
{
error!("Failed to update Redis processor status: {}", e);
}
// Also update PostgreSQL processing_status JSON
if let Err(e) = db
.update_processor_progress(
&job.uuid,
&processor_name,
output.frames_processed as u64,
output.total_frames as u64,
"completed",
)
.await
{
error!("Failed to update PostgreSQL processor status: {}", e);
}
} else {
error!(
"Processor {} output failed verification for job {}: {:?}",
@@ -569,6 +657,10 @@ impl ProcessorPool {
total_frames,
retry_count: 0,
pid: 0,
asr_status: None,
segment_count: 0,
face_status: None,
total_faces: 0,
})
}
ProcessorType::Yolo => {
@@ -612,6 +704,10 @@ impl ProcessorPool {
total_frames,
retry_count: 0,
pid: 0,
asr_status: None,
segment_count: 0,
face_status: None,
total_faces: 0,
})
}
ProcessorType::Ocr => {
@@ -655,6 +751,10 @@ impl ProcessorPool {
total_frames,
retry_count: 0,
pid: 0,
asr_status: None,
segment_count: 0,
face_status: None,
total_faces: 0,
})
}
ProcessorType::Face => {
@@ -666,9 +766,16 @@ impl ProcessorPool {
)
.await?;
let chunks_produced = result.frames.len() as i32;
let face_status = result.status.clone();
let total_faces = result.total_faces;
tracing::info!(
"FACE completed, storing {} frames for {}",
"FACE completed, status={}, {} frames, {} total faces for {}",
face_status
.as_ref()
.map(|s| s.to_string())
.unwrap_or_default(),
chunks_produced,
total_faces,
job.uuid
);
if let Err(e) = Self::store_face_chunks(db, &job.uuid, &result).await {
@@ -720,6 +827,10 @@ impl ProcessorPool {
total_frames,
retry_count: 0,
pid: 0,
asr_status: None,
segment_count: 0,
face_status,
total_faces,
})
}
ProcessorType::FaceCluster => {
@@ -741,6 +852,10 @@ impl ProcessorPool {
total_frames: 0,
retry_count: 0,
pid: 0,
asr_status: None,
segment_count: 0,
face_status: None,
total_faces: 0,
})
}
ProcessorType::Pose => {
@@ -784,6 +899,10 @@ impl ProcessorPool {
total_frames,
retry_count: 0,
pid: 0,
asr_status: None,
segment_count: 0,
face_status: None,
total_faces: 0,
})
}
ProcessorType::Hand => {
@@ -824,6 +943,10 @@ impl ProcessorPool {
total_frames,
retry_count: 0,
pid: 0,
asr_status: None,
segment_count: 0,
face_status: None,
total_faces: 0,
})
}
ProcessorType::Appearance => {
@@ -851,14 +974,24 @@ impl ProcessorPool {
total_frames,
retry_count: 0,
pid: 0,
asr_status: None,
segment_count: 0,
face_status: None,
total_faces: 0,
})
}
ProcessorType::Asr => {
let result =
processor::process_asr(video_path, output_path.to_str().unwrap(), uuid).await?;
let chunks_produced = result.segments.len() as i32;
let asr_status = result.status.clone();
let segment_count = result.segment_count;
tracing::info!(
"ASR completed, storing {} segments for {}",
"ASR completed, status={}, {} segments for {}",
asr_status
.as_ref()
.map(|s| s.to_string())
.unwrap_or_default(),
chunks_produced,
job.uuid
);
@@ -892,6 +1025,10 @@ impl ProcessorPool {
total_frames,
retry_count: 0,
pid: 0,
asr_status,
segment_count,
face_status: None,
total_faces: 0,
})
}
ProcessorType::Asrx => {
@@ -899,8 +1036,14 @@ impl ProcessorPool {
processor::process_asrx(video_path, output_path.to_str().unwrap(), uuid)
.await?;
let chunks_produced = result.segments.len() as i32;
let asr_status = result.status.clone();
let segment_count = result.segment_count;
tracing::info!(
"ASRX completed, storing {} segments for {}",
"ASRX completed, status={}, {} segments for {}",
asr_status
.as_ref()
.map(|s| s.to_string())
.unwrap_or_default(),
chunks_produced,
job.uuid
);
@@ -959,6 +1102,10 @@ impl ProcessorPool {
total_frames,
retry_count: 0,
pid: 0,
asr_status,
segment_count,
face_status: None,
total_faces: 0,
})
}
ProcessorType::Scene => {
@@ -977,6 +1124,10 @@ impl ProcessorPool {
total_frames,
retry_count: 0,
pid: 0,
asr_status: None,
segment_count: 0,
face_status: None,
total_faces: 0,
});
} else if scene_path.exists() {
tracing::info!("Scene JSON exists for {}, loading from file", job.uuid);
@@ -1025,6 +1176,10 @@ impl ProcessorPool {
total_frames,
retry_count: 0,
pid: 0,
asr_status: None,
segment_count: 0,
face_status: None,
total_faces: 0,
})
}
}
@@ -1363,8 +1518,6 @@ impl ProcessorPool {
db.store_raw_pre_chunks_batch(uuid, "asrx", &pre_chunks_to_store)
.await?;
db.store_raw_pre_chunks_batch(uuid, "asr", &pre_chunks_to_store)
.await?;
db.store_speaker_detections_batch(uuid, &speaker_detections)
.await?;
Ok(())