feat: ASRX hybrid pipeline, identity history, worker fixes, checkpoint system
This commit is contained in:
@@ -640,8 +640,9 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
);
|
||||
|
||||
// Step 2: 載入所有 face_detections(含 frame_number),按 trace_id 分組
|
||||
// frame_number is BIGINT (i64) in database
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(&format!(
|
||||
let fd_rows = sqlx::query_as::<_, (i32, i64, Vec<f32>)>(&format!(
|
||||
"SELECT trace_id, frame_number, embedding FROM {} \
|
||||
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
|
||||
ORDER BY trace_id, frame_number",
|
||||
@@ -658,7 +659,7 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
|
||||
// 分組:trace_id → (frame_number, embedding)
|
||||
use std::collections::HashMap;
|
||||
let mut trace_faces_raw: HashMap<i32, Vec<(i32, Vec<f32>)>> = HashMap::new();
|
||||
let mut trace_faces_raw: HashMap<i32, Vec<(i64, Vec<f32>)>> = HashMap::new();
|
||||
for (tid, frame, emb) in &fd_rows {
|
||||
trace_faces_raw
|
||||
.entry(*tid)
|
||||
@@ -723,6 +724,7 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
|
||||
// Step 5: 寫入 DB — Round 1 結果先存
|
||||
let identities_table = schema::table_name("identities");
|
||||
let strangers_table = schema::table_name("strangers");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let mut updated = 0usize;
|
||||
for (tid, name) in &matched {
|
||||
@@ -805,13 +807,28 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
}
|
||||
}
|
||||
|
||||
// Step 6: 未匹配的 trace 設 stranger_id = trace_id
|
||||
// trace_id 在同一個 file 內是 sequential integer,直接複用為 stranger_id
|
||||
// Step 6: 未匹配的 trace 設 stranger_id = strangers.id (FK)
|
||||
// First: ensure strangers records exist
|
||||
let _ = sqlx::query(&format!(
|
||||
"INSERT INTO {} (file_uuid, trace_id) \
|
||||
SELECT $1, fd.trace_id FROM {} fd \
|
||||
WHERE fd.file_uuid = $1 AND fd.trace_id IS NOT NULL \
|
||||
AND fd.identity_id IS NULL \
|
||||
ON CONFLICT (file_uuid, trace_id) DO NOTHING",
|
||||
strangers_table, fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
// Then: update face_detections.stranger_id = strangers.id
|
||||
let stranger_update = sqlx::query(&format!(
|
||||
"UPDATE {} SET stranger_id = trace_id \
|
||||
WHERE file_uuid = $1 AND trace_id IS NOT NULL AND identity_id IS NULL \
|
||||
AND (stranger_id IS NULL OR stranger_id != trace_id)",
|
||||
fd_table
|
||||
"UPDATE {} fd SET stranger_id = s.id \
|
||||
FROM {} s \
|
||||
WHERE s.file_uuid = fd.file_uuid AND s.trace_id = fd.trace_id \
|
||||
AND fd.file_uuid = $1 AND fd.identity_id IS NULL \
|
||||
AND fd.trace_id IS NOT NULL AND fd.stranger_id IS NULL",
|
||||
fd_table, strangers_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.execute(pool)
|
||||
@@ -971,16 +988,30 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
|
||||
|
||||
let ib_table = schema::table_name("identity_bindings");
|
||||
let _ = sqlx::query(
|
||||
&format!("INSERT INTO {} (identity_id, identity_type, identity_value, confidence, metadata) \
|
||||
VALUES ($1, 'speaker', $2, $3, $4::jsonb) \
|
||||
ON CONFLICT (identity_id, identity_type, identity_value) DO UPDATE SET confidence = EXCLUDED.confidence, metadata = EXCLUDED.metadata", ib_table)
|
||||
&format!("INSERT INTO {} (identity_id, identity_type, identity_value, file_uuid, confidence, metadata) \
|
||||
VALUES ($1, 'speaker', $2, $3, $4, $5::jsonb) \
|
||||
ON CONFLICT (identity_id, identity_type, identity_value, file_uuid) \
|
||||
DO UPDATE SET confidence = EXCLUDED.confidence, metadata = EXCLUDED.metadata", ib_table)
|
||||
)
|
||||
.bind(identity_id)
|
||||
.bind(&best_speaker)
|
||||
.bind(file_uuid)
|
||||
.bind(overlap_ratio)
|
||||
.bind(&metadata)
|
||||
.execute(pool).await;
|
||||
|
||||
// Also update speaker_detections with the identity_id
|
||||
let sd_table = schema::table_name("speaker_detections");
|
||||
let _ = sqlx::query(
|
||||
&format!("UPDATE {} SET identity_id = $1, confidence = $2 \
|
||||
WHERE file_uuid = $3 AND speaker_id = $4 AND identity_id IS NULL", sd_table)
|
||||
)
|
||||
.bind(identity_id)
|
||||
.bind(overlap_ratio)
|
||||
.bind(file_uuid)
|
||||
.bind(&best_speaker)
|
||||
.execute(pool).await;
|
||||
|
||||
bindings += 1;
|
||||
}
|
||||
}
|
||||
@@ -1028,31 +1059,31 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
|
||||
let speakers = extract_speakers_from_asrx_data(&asrx_data);
|
||||
let identities = analyze_person_speaker_overlap(&persons, &speakers);
|
||||
|
||||
for (idx, id_result) in identities.iter().enumerate() {
|
||||
let identity_name = format!("stranger_{}", idx);
|
||||
let _ = identities.len();
|
||||
if !identities.is_empty() {
|
||||
let metadata = serde_json::json!({
|
||||
"source": "identity_agent",
|
||||
"trace_ids": id_result.person_ids,
|
||||
"speaker_ids": id_result.speaker_ids,
|
||||
"confidence": id_result.confidence,
|
||||
"speaker_ids": identities[0].speaker_ids,
|
||||
"confidence": identities[0].confidence,
|
||||
"evidence": {
|
||||
"speaker_overlap": id_result.evidence.speaker_overlap,
|
||||
"frame_ratio": id_result.evidence.frame_ratio,
|
||||
"speaker_overlap": identities[0].evidence.speaker_overlap,
|
||||
"frame_ratio": identities[0].evidence.frame_ratio,
|
||||
},
|
||||
"reasoning": id_result.reasoning,
|
||||
"reasoning": identities[0].reasoning,
|
||||
});
|
||||
let _ = sqlx::query(
|
||||
&format!("INSERT INTO {} (name, identity_type, source, metadata, status) VALUES ($1, 'people', 'auto', $2::jsonb, 'pending') ON CONFLICT DO NOTHING", schema::table_name("identities"))
|
||||
)
|
||||
.bind(&identity_name)
|
||||
let _ = sqlx::query(&format!(
|
||||
"INSERT INTO {} (file_uuid, trace_id, metadata) \
|
||||
VALUES ($1, NULL, $2::jsonb) ON CONFLICT DO NOTHING",
|
||||
schema::table_name("strangers")
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&metadata)
|
||||
.execute(pool)
|
||||
.await;
|
||||
}
|
||||
let _created = identities.len();
|
||||
tracing::info!(
|
||||
"[IdentityAgent] Created {} auto identities from face_clustered for {}",
|
||||
_created,
|
||||
"[IdentityAgent] Analyzed {} face clusters from face_clustered for {}",
|
||||
identities.len(),
|
||||
file_uuid
|
||||
);
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user