Files
momentry_core/src/core/checkin.rs
Accusys 074cdcdbed refactor: remove face embedding architecture - single Qdrant _faces collection
- Delete FaceEmbeddingDb module (face_embedding_db.rs)
- Stub match_faces_iterative, generate_seed_embeddings, tmdb_match_handler
- Remove sync_trace_embeddings, populate_face_embeddings_to_qdrant
- Remove embedding from face.json output (face_processor.py)
- Remove embedding from PG UPDATE (store_traced_faces.py)
- Remove workspace traces staging (checkin.rs, qdrant_workspace.rs)
- Fix tests: add pose_angle to Face, hand_nodes to TkgResult

Disabled functions (need reimplement with _faces):
- match_faces_iterative (identity agent)
- generate_seed_embeddings (TMDb seeds)
- tmdb_match_handler (TMDb matching)
- cluster_face_embeddings, search_similar_faces
- merge_traces_within_cuts
2026-06-24 22:27:09 +08:00

299 lines
9.5 KiB
Rust

use anyhow::{Context, Result};
use tracing::{info, warn};
use crate::core::db::{
workspace_sqlite::{SpeakerDetectionBatchItem, WorkspaceDb},
PostgresDb, QdrantDb, QdrantWorkspace,
};
#[derive(Debug)]
pub struct CheckinResult {
pub file_uuid: String,
pub pre_chunks_moved: usize,
pub speaker_detections_moved: usize,
pub vectors_moved: usize,
}
#[derive(Debug)]
pub struct CheckoutResult {
pub file_uuid: String,
pub rows_deleted: usize,
}
pub async fn checkin(db: &PostgresDb, file_uuid: &str) -> Result<CheckinResult> {
let schema = crate::core::config::DATABASE_SCHEMA.as_str();
info!("Checkin starting for {} (schema={})", file_uuid, schema);
let workspace = WorkspaceDb::open(file_uuid)
.await
.context("No workspace found for checkin")?;
let qdrant_ws = QdrantWorkspace::new();
let pre_chunks = workspace.get_all_pre_chunks().await?;
let spk_dets = workspace.get_all_speaker_detections().await?;
info!(
"Checkin {} workspace: {} pre_chunks, {} spk_dets",
file_uuid,
pre_chunks.len(),
spk_dets.len(),
);
// ── Pre-chunks ──
for chunk in &pre_chunks {
let data_value: serde_json::Value = chunk
.data
.as_ref()
.and_then(|d| serde_json::from_str(d).ok())
.unwrap_or(serde_json::Value::Null);
match chunk.processor_type.as_str() {
"asr" => {
let start = chunk.start_time.unwrap_or(0.0);
let end = chunk.end_time.unwrap_or(0.0);
let sf = chunk.start_frame.unwrap_or(0);
let ef = chunk.end_frame.unwrap_or(0);
let idx = chunk.id as i64;
db.store_asr_pre_chunks_batch(file_uuid, &[(idx, sf, ef, start, end, data_value)])
.await?;
}
"cut" => {
let start = chunk.start_time.unwrap_or(0.0);
let end = chunk.end_time.unwrap_or(0.0);
let sf = chunk.start_frame.unwrap_or(0);
let ef = chunk.end_frame.unwrap_or(0);
let idx = chunk.id as i64;
db.store_cut_pre_chunks_batch(file_uuid, &[(idx, sf, ef, start, end, data_value)])
.await?;
}
"scene" => {
let start = chunk.start_time.unwrap_or(0.0);
let end = chunk.end_time.unwrap_or(0.0);
let sf = chunk.start_frame.unwrap_or(0);
let ef = chunk.end_frame.unwrap_or(0);
let idx = chunk.id as i64;
db.store_scene_pre_chunks_batch(
file_uuid,
&[(idx, sf, ef, start, end, data_value)],
)
.await?;
}
_ => {
let frame = chunk.start_frame.unwrap_or(0);
let ts = chunk.start_time;
let text = chunk.text_content.clone();
db.store_raw_pre_chunks_batch(
file_uuid,
&chunk.processor_type,
&[(frame, ts, data_value, text, None)],
)
.await?;
}
}
}
// ── Speaker detections ──
if !spk_dets.is_empty() {
let batch: Vec<(String, f64, f64, String, Option<String>, f32)> = spk_dets
.iter()
.map(|s| {
(
s.speaker_id.clone().unwrap_or_default(),
s.start_time.unwrap_or(0.0),
s.end_time.unwrap_or(0.0),
s.text_content.clone().unwrap_or_default(),
s.chunk_id.clone(),
s.confidence.unwrap_or(0.0) as f32,
)
})
.collect();
db.store_speaker_detections_batch(file_uuid, &batch).await?;
}
// ── Qdrant vectors ──
let mut vectors_moved = 0usize;
match qdrant_ws.scroll_by_file_uuid(file_uuid).await {
Ok(ws_data) => {
let qdrant = QdrantDb::new();
// Chunks → production collection
for point in &ws_data.chunks {
if let Some(ref vector) = point.vector {
let payload_val: serde_json::Value =
serde_json::to_value(&point.payload).unwrap_or(serde_json::Value::Null);
let point_id: u64 = match point.id.parse::<u64>() {
Ok(id) => id,
Err(_) => {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
point.id.hash(&mut hasher);
hasher.finish()
}
};
if let Err(e) = qdrant
.upsert_vector_to_collection(
&qdrant.collection_name,
point_id,
vector,
Some(payload_val),
)
.await
{
warn!("Failed to checkin chunk vector {}: {}", point.id, e);
} else {
vectors_moved += 1;
}
}
}
}
Err(e) => {
warn!("Failed to scroll Qdrant workspace for {}: {}", file_uuid, e);
}
}
// ── Cleanup workspace ──
if let Err(e) = workspace.clear().await {
warn!("Failed to clear workspace for {}: {}", file_uuid, e);
}
if let Err(e) = qdrant_ws.delete_by_file_uuid(file_uuid).await {
warn!(
"Failed to delete workspace vectors for {}: {}",
file_uuid, e
);
}
info!(
"Checkin complete for {}: {} pre_chunks, {} spk_dets, {} vectors",
file_uuid,
pre_chunks.len(),
spk_dets.len(),
vectors_moved,
);
Ok(CheckinResult {
file_uuid: file_uuid.to_string(),
pre_chunks_moved: pre_chunks.len(),
speaker_detections_moved: spk_dets.len(),
vectors_moved,
})
}
pub async fn checkout(db: &PostgresDb, file_uuid: &str) -> Result<CheckoutResult> {
let schema = crate::core::config::DATABASE_SCHEMA.as_str();
let table = crate::core::db::schema::table_name;
info!("Checkout starting for {} (schema={})", file_uuid, schema);
// Delete face_detections
let face_table = table("face_detections");
let face_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", face_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
face_result.rows_affected(),
face_table
);
// Delete speaker_detections
let spk_table = table("speaker_detections");
let spk_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", spk_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
spk_result.rows_affected(),
spk_table
);
// Delete pre_chunks
let pc_table = table("pre_chunks");
let pc_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", pc_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
pc_result.rows_affected(),
pc_table
);
// Delete chunks
let chunk_table = table("chunk");
let chunk_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", chunk_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
chunk_result.rows_affected(),
chunk_table
);
// Delete processor_results
let pr_table = table("processor_results");
let pr_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", pr_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
pr_result.rows_affected(),
pr_table
);
// Delete Qdrant vectors from production
let qdrant = QdrantDb::new();
if let Err(e) = qdrant.delete_by_uuid(file_uuid).await {
warn!(
"Failed to delete chunk vectors from Qdrant for {}: {}",
file_uuid, e
);
}
let prefix = crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':');
let voice_coll = format!("{}_voice", file_uuid);
for coll in &[voice_coll] {
if let Err(e) = QdrantDb::delete_by_uuid_from_collection(
&qdrant.client,
&qdrant.base_url,
&qdrant.api_key,
coll,
file_uuid,
)
.await
{
warn!(
"Failed to delete vectors from {} for {}: {}",
coll, file_uuid, e
);
}
}
let rows_deleted = face_result.rows_affected()
+ spk_result.rows_affected()
+ pc_result.rows_affected()
+ chunk_result.rows_affected()
+ pr_result.rows_affected();
info!(
"Checkout complete for {}: {} PG rows deleted",
file_uuid, rows_deleted,
);
Ok(CheckoutResult {
file_uuid: file_uuid.to_string(),
rows_deleted: rows_deleted as usize,
})
}