feat: ASRX hybrid pipeline, identity history, worker fixes, checkpoint system

This commit is contained in:
Accusys
2026-06-02 07:13:23 +08:00
parent e3066c3f49
commit e1572907ae
198 changed files with 43705 additions and 8910 deletions

View File

@@ -640,8 +640,9 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
);
// Step 2: 載入所有 face_detections含 frame_number按 trace_id 分組
// frame_number is BIGINT (i64) in database
let fd_table = schema::table_name("face_detections");
let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(&format!(
let fd_rows = sqlx::query_as::<_, (i32, i64, Vec<f32>)>(&format!(
"SELECT trace_id, frame_number, embedding FROM {} \
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
ORDER BY trace_id, frame_number",
@@ -658,7 +659,7 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
// 分組trace_id → (frame_number, embedding)
use std::collections::HashMap;
let mut trace_faces_raw: HashMap<i32, Vec<(i32, Vec<f32>)>> = HashMap::new();
let mut trace_faces_raw: HashMap<i32, Vec<(i64, Vec<f32>)>> = HashMap::new();
for (tid, frame, emb) in &fd_rows {
trace_faces_raw
.entry(*tid)
@@ -723,6 +724,7 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
// Step 5: 寫入 DB — Round 1 結果先存
let identities_table = schema::table_name("identities");
let strangers_table = schema::table_name("strangers");
let fd_table = schema::table_name("face_detections");
let mut updated = 0usize;
for (tid, name) in &matched {
@@ -805,13 +807,28 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
}
}
// Step 6: 未匹配的 trace 設 stranger_id = trace_id
// trace_id 在同一個 file 內是 sequential integer直接複用為 stranger_id
// Step 6: 未匹配的 trace 設 stranger_id = strangers.id (FK)
// First: ensure strangers records exist
let _ = sqlx::query(&format!(
"INSERT INTO {} (file_uuid, trace_id) \
SELECT $1, fd.trace_id FROM {} fd \
WHERE fd.file_uuid = $1 AND fd.trace_id IS NOT NULL \
AND fd.identity_id IS NULL \
ON CONFLICT (file_uuid, trace_id) DO NOTHING",
strangers_table, fd_table
))
.bind(file_uuid)
.execute(pool)
.await?;
// Then: update face_detections.stranger_id = strangers.id
let stranger_update = sqlx::query(&format!(
"UPDATE {} SET stranger_id = trace_id \
WHERE file_uuid = $1 AND trace_id IS NOT NULL AND identity_id IS NULL \
AND (stranger_id IS NULL OR stranger_id != trace_id)",
fd_table
"UPDATE {} fd SET stranger_id = s.id \
FROM {} s \
WHERE s.file_uuid = fd.file_uuid AND s.trace_id = fd.trace_id \
AND fd.file_uuid = $1 AND fd.identity_id IS NULL \
AND fd.trace_id IS NOT NULL AND fd.stranger_id IS NULL",
fd_table, strangers_table
))
.bind(file_uuid)
.execute(pool)
@@ -971,16 +988,30 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
let ib_table = schema::table_name("identity_bindings");
let _ = sqlx::query(
&format!("INSERT INTO {} (identity_id, identity_type, identity_value, confidence, metadata) \
VALUES ($1, 'speaker', $2, $3, $4::jsonb) \
ON CONFLICT (identity_id, identity_type, identity_value) DO UPDATE SET confidence = EXCLUDED.confidence, metadata = EXCLUDED.metadata", ib_table)
&format!("INSERT INTO {} (identity_id, identity_type, identity_value, file_uuid, confidence, metadata) \
VALUES ($1, 'speaker', $2, $3, $4, $5::jsonb) \
ON CONFLICT (identity_id, identity_type, identity_value, file_uuid) \
DO UPDATE SET confidence = EXCLUDED.confidence, metadata = EXCLUDED.metadata", ib_table)
)
.bind(identity_id)
.bind(&best_speaker)
.bind(file_uuid)
.bind(overlap_ratio)
.bind(&metadata)
.execute(pool).await;
// Also update speaker_detections with the identity_id
let sd_table = schema::table_name("speaker_detections");
let _ = sqlx::query(
&format!("UPDATE {} SET identity_id = $1, confidence = $2 \
WHERE file_uuid = $3 AND speaker_id = $4 AND identity_id IS NULL", sd_table)
)
.bind(identity_id)
.bind(overlap_ratio)
.bind(file_uuid)
.bind(&best_speaker)
.execute(pool).await;
bindings += 1;
}
}
@@ -1028,31 +1059,31 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
let speakers = extract_speakers_from_asrx_data(&asrx_data);
let identities = analyze_person_speaker_overlap(&persons, &speakers);
for (idx, id_result) in identities.iter().enumerate() {
let identity_name = format!("stranger_{}", idx);
let _ = identities.len();
if !identities.is_empty() {
let metadata = serde_json::json!({
"source": "identity_agent",
"trace_ids": id_result.person_ids,
"speaker_ids": id_result.speaker_ids,
"confidence": id_result.confidence,
"speaker_ids": identities[0].speaker_ids,
"confidence": identities[0].confidence,
"evidence": {
"speaker_overlap": id_result.evidence.speaker_overlap,
"frame_ratio": id_result.evidence.frame_ratio,
"speaker_overlap": identities[0].evidence.speaker_overlap,
"frame_ratio": identities[0].evidence.frame_ratio,
},
"reasoning": id_result.reasoning,
"reasoning": identities[0].reasoning,
});
let _ = sqlx::query(
&format!("INSERT INTO {} (name, identity_type, source, metadata, status) VALUES ($1, 'people', 'auto', $2::jsonb, 'pending') ON CONFLICT DO NOTHING", schema::table_name("identities"))
)
.bind(&identity_name)
let _ = sqlx::query(&format!(
"INSERT INTO {} (file_uuid, trace_id, metadata) \
VALUES ($1, NULL, $2::jsonb) ON CONFLICT DO NOTHING",
schema::table_name("strangers")
))
.bind(file_uuid)
.bind(&metadata)
.execute(pool)
.await;
}
let _created = identities.len();
tracing::info!(
"[IdentityAgent] Created {} auto identities from face_clustered for {}",
_created,
"[IdentityAgent] Analyzed {} face clusters from face_clustered for {}",
identities.len(),
file_uuid
);
} else {