refactor: remove face embedding architecture - single Qdrant _faces collection
- Delete FaceEmbeddingDb module (face_embedding_db.rs) - Stub match_faces_iterative, generate_seed_embeddings, tmdb_match_handler - Remove sync_trace_embeddings, populate_face_embeddings_to_qdrant - Remove embedding from face.json output (face_processor.py) - Remove embedding from PG UPDATE (store_traced_faces.py) - Remove workspace traces staging (checkin.rs, qdrant_workspace.rs) - Fix tests: add pose_angle to Face, hand_nodes to TkgResult Disabled functions (need reimplement with _faces): - match_faces_iterative (identity agent) - generate_seed_embeddings (TMDb seeds) - tmdb_match_handler (TMDb matching) - cluster_face_embeddings, search_similar_faces - merge_traces_within_cuts
This commit is contained in:
+69
-66
@@ -12,7 +12,7 @@ use std::collections::HashMap;
|
||||
use super::types::AppState;
|
||||
use crate::core::config;
|
||||
use crate::core::db::schema;
|
||||
use crate::core::db::{Database, PostgresDb, QdrantDb, RedisClient};
|
||||
use crate::core::db::{Database, PostgresDb, QdrantDb, QdrantWorkspace, RedisClient};
|
||||
use crate::core::storage::content_hash;
|
||||
use crate::FileManager;
|
||||
|
||||
@@ -463,7 +463,6 @@ async fn register_single_file(
|
||||
.execute(db.pool()).await;
|
||||
|
||||
let mut cut_done = false;
|
||||
let mut scene_done = false;
|
||||
if has_video && total_frames > 0 && fps > 0.0 {
|
||||
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
|
||||
.unwrap_or_else(|_| "/Users/accusys/momentry/output_dev".to_string());
|
||||
@@ -511,31 +510,6 @@ async fn register_single_file(
|
||||
}
|
||||
}
|
||||
|
||||
let scene_path =
|
||||
std::path::Path::new(&output_dir).join(format!("{}.scene.json", file_uuid));
|
||||
if !scene_path.exists() {
|
||||
let scene_script = std::path::Path::new(&scripts_dir).join("scene_classifier.py");
|
||||
if scene_script.exists() {
|
||||
let scene_output = std::process::Command::new(&python_path)
|
||||
.arg(&scene_script)
|
||||
.arg(&canonical_path)
|
||||
.arg(&scene_path)
|
||||
.arg("--sample-interval")
|
||||
.arg("2")
|
||||
.output();
|
||||
if let Ok(output) = scene_output {
|
||||
if output.status.success() {
|
||||
scene_done = true;
|
||||
tracing::info!(
|
||||
"[REGISTER] Scene classification completed for {}",
|
||||
file_uuid
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
scene_done = true;
|
||||
}
|
||||
}
|
||||
|
||||
let audio_tracks: Vec<serde_json::Value> = temp_probe_json
|
||||
@@ -584,9 +558,9 @@ async fn register_single_file(
|
||||
}
|
||||
}
|
||||
let _ = sqlx::query(
|
||||
&format!("UPDATE {} SET cut_done = $1, scene_done = $2, audio_tracks = $3, cut_count = $4, cut_max_duration = $5 WHERE file_uuid = $6", videos_table)
|
||||
&format!("UPDATE {} SET cut_done = $1, scene_done = false, audio_tracks = $3, cut_count = $4, cut_max_duration = $5 WHERE file_uuid = $6", videos_table)
|
||||
)
|
||||
.bind(cut_done).bind(scene_done).bind(&audio_tracks_json).bind(cut_count).bind(cut_max_duration).bind(&file_uuid)
|
||||
.bind(cut_done).bind(&audio_tracks_json).bind(cut_count).bind(cut_max_duration).bind(&file_uuid)
|
||||
.execute(db.pool()).await;
|
||||
|
||||
if let Some(json_val) = probe_json {
|
||||
@@ -599,41 +573,6 @@ async fn register_single_file(
|
||||
let _ = std::fs::write(&probe_path, json_str);
|
||||
}
|
||||
|
||||
if final_file_type.as_deref() == Some("video") {
|
||||
let auto_file_uuid = file_uuid.clone();
|
||||
let auto_db = db.clone();
|
||||
tokio::spawn(async move {
|
||||
let identities_dir =
|
||||
std::path::Path::new(&*crate::core::config::OUTPUT_DIR).join("identities");
|
||||
let index_path = identities_dir.join("_index.json");
|
||||
let cache_path = format!(
|
||||
"{}/{}.tmdb.json",
|
||||
*crate::core::config::OUTPUT_DIR,
|
||||
auto_file_uuid
|
||||
);
|
||||
let cache_file = std::path::Path::new(&cache_path);
|
||||
|
||||
if index_path.exists() && cache_file.exists() {
|
||||
tracing::info!(
|
||||
"[AUTO-TMDB] Offline cache found for {}, running probe",
|
||||
auto_file_uuid
|
||||
);
|
||||
if let Err(e) =
|
||||
crate::core::tmdb::probe::probe_from_cache(&auto_db, &auto_file_uuid).await
|
||||
{
|
||||
tracing::warn!("[AUTO-TMDB] Probe failed for {}: {}", auto_file_uuid, e);
|
||||
} else {
|
||||
tracing::info!("[AUTO-TMDB] Probe completed for {}", auto_file_uuid);
|
||||
}
|
||||
} else {
|
||||
tracing::info!(
|
||||
"[AUTO-TMDB] No offline cache for {}, skipping",
|
||||
auto_file_uuid
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
RegisterFileResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
@@ -978,8 +917,16 @@ struct UnregisterResponse {
|
||||
deleted_chunks: u64,
|
||||
deleted_tkg_nodes: u64,
|
||||
deleted_qdrant_vectors: Option<u64>,
|
||||
deleted_qdrant_workspace: Option<u64>,
|
||||
deleted_redis_keys: Option<u64>,
|
||||
deleted_output_files: u64,
|
||||
deleted_file_identities: u64,
|
||||
deleted_speaker_detections: u64,
|
||||
deleted_face_clusters: u64,
|
||||
deleted_face_recognition_results: u64,
|
||||
deleted_characters: u64,
|
||||
deleted_chunks_rule1: u64,
|
||||
deleted_processor_alerts: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
@@ -1011,6 +958,15 @@ fn delete_output_files(uuid: &str) -> u64 {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let workspace_sqlite = format!("{}.workspace.sqlite", uuid);
|
||||
for output_dir in &output_dirs {
|
||||
let path = std::path::Path::new(output_dir).join(&workspace_sqlite);
|
||||
if path.exists() && std::fs::remove_file(&path).is_ok() {
|
||||
deleted_count += 1;
|
||||
tracing::info!("[UNREGISTER] Deleted workspace SQLite: {}", path.display());
|
||||
}
|
||||
}
|
||||
deleted_count
|
||||
}
|
||||
|
||||
@@ -1037,6 +993,13 @@ async fn unregister(
|
||||
let chunk_vectors_table = schema::table_name("chunk_vectors");
|
||||
let monitor_jobs_table = schema::table_name("monitor_jobs");
|
||||
let frames_table = schema::table_name("frames");
|
||||
let file_identities_table = schema::table_name("file_identities");
|
||||
let speaker_detections_table = schema::table_name("speaker_detections");
|
||||
let face_clusters_table = schema::table_name("face_clusters");
|
||||
let face_recognition_results_table = schema::table_name("face_recognition_results");
|
||||
let characters_table = schema::table_name("characters");
|
||||
let chunks_rule1_table = schema::table_name("chunks_rule1");
|
||||
let processor_alerts_table = schema::table_name("processor_alerts");
|
||||
|
||||
let mut tx = state.db.pool().begin().await.map_err(|e| {
|
||||
tracing::error!("[unregister] Failed to start transaction: {}", e);
|
||||
@@ -1082,6 +1045,21 @@ async fn unregister(
|
||||
})?
|
||||
.rows_affected() as i64;
|
||||
|
||||
let deleted_file_identities =
|
||||
delete_safe!(file_identities_table, "file_uuid = $1", &uuid, "file identities");
|
||||
let deleted_speaker_detections =
|
||||
delete_safe!(speaker_detections_table, "file_uuid = $1", &uuid, "speaker detections");
|
||||
let deleted_face_clusters =
|
||||
delete_safe!(face_clusters_table, "file_uuid = $1", &uuid, "face clusters");
|
||||
let deleted_face_recognition =
|
||||
delete_safe!(face_recognition_results_table, "file_uuid = $1", &uuid, "face recognition results");
|
||||
let deleted_characters =
|
||||
delete_safe!(characters_table, "file_uuid = $1", &uuid, "characters");
|
||||
let deleted_chunks_rule1 =
|
||||
delete_safe!(chunks_rule1_table, "uuid = $1", &uuid, "chunks rule1");
|
||||
let deleted_processor_alerts =
|
||||
delete_safe!(processor_alerts_table, "file_uuid = $1", &uuid, "processor alerts");
|
||||
|
||||
sqlx::query(&format!(
|
||||
"DELETE FROM {} WHERE file_uuid = $1",
|
||||
videos_table
|
||||
@@ -1100,10 +1078,13 @@ async fn unregister(
|
||||
})?;
|
||||
|
||||
tracing::info!(
|
||||
"[UNREGISTER] Deleted: {} faces, {} processors, {} parent_chunks, {} chunks, {} pre_chunks, {} tkg_nodes, {} cuts, {} strangers, {} chunk_vectors, {} monitor_jobs, {} frames",
|
||||
"[UNREGISTER] Deleted: {} faces, {} processors, {} parent_chunks, {} chunks, {} pre_chunks, {} tkg_nodes, {} cuts, {} strangers, {} chunk_vectors, {} monitor_jobs, {} frames, {} file_identities, {} speaker_detections, {} face_clusters, {} face_recognition_results, {} characters, {} chunks_rule1, {} processor_alerts",
|
||||
deleted_faces, deleted_processors, deleted_parent_chunks, deleted_chunks,
|
||||
deleted_pre_chunks, deleted_tkg_nodes, deleted_cuts, deleted_strangers,
|
||||
deleted_chunk_vectors, deleted_monitor_jobs, deleted_frames
|
||||
deleted_chunk_vectors, deleted_monitor_jobs, deleted_frames,
|
||||
deleted_file_identities, deleted_speaker_detections, deleted_face_clusters,
|
||||
deleted_face_recognition, deleted_characters, deleted_chunks_rule1,
|
||||
deleted_processor_alerts
|
||||
);
|
||||
|
||||
let deleted_output_files = delete_output_files(&uuid);
|
||||
@@ -1141,6 +1122,20 @@ async fn unregister(
|
||||
}
|
||||
};
|
||||
|
||||
let deleted_qdrant_workspace = {
|
||||
let workspace = QdrantWorkspace::new();
|
||||
match workspace.delete_by_file_uuid(&uuid).await {
|
||||
Ok(_) => {
|
||||
tracing::info!("[UNREGISTER] Deleted Qdrant workspace vectors for {}", uuid);
|
||||
Some(1)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("[UNREGISTER] Failed to delete Qdrant workspace vectors: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Json(UnregisterResponse {
|
||||
success: true,
|
||||
message: format!("File {} unregistered successfully.", uuid),
|
||||
@@ -1150,8 +1145,16 @@ async fn unregister(
|
||||
deleted_chunks: (deleted_chunks + deleted_parent_chunks + deleted_pre_chunks) as u64,
|
||||
deleted_tkg_nodes: deleted_tkg_nodes as u64,
|
||||
deleted_qdrant_vectors,
|
||||
deleted_qdrant_workspace,
|
||||
deleted_redis_keys,
|
||||
deleted_output_files,
|
||||
deleted_file_identities: deleted_file_identities as u64,
|
||||
deleted_speaker_detections: deleted_speaker_detections as u64,
|
||||
deleted_face_clusters: deleted_face_clusters as u64,
|
||||
deleted_face_recognition_results: deleted_face_recognition as u64,
|
||||
deleted_characters: deleted_characters as u64,
|
||||
deleted_chunks_rule1: deleted_chunks_rule1 as u64,
|
||||
deleted_processor_alerts: deleted_processor_alerts as u64,
|
||||
}))
|
||||
}
|
||||
|
||||
|
||||
@@ -1,807 +0,0 @@
|
||||
use axum::{
|
||||
extract::State,
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::{get, post},
|
||||
Router,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::core::llm::function_calling::LLM_CLIENT;
|
||||
use sqlx::Row;
|
||||
|
||||
use crate::api::types::AppState;
|
||||
use crate::core::db::qdrant_db::QdrantDb;
|
||||
use crate::core::db::schema;
|
||||
use crate::core::db::{PostgresDb, VectorPayload};
|
||||
use crate::core::embedding::Embedder;
|
||||
|
||||
pub fn five_w1h_agent_routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/api/v1/agents/5w1h/analyze", post(analyze_5w1h))
|
||||
.route("/api/v1/agents/5w1h/batch", post(batch_analyze_5w1h))
|
||||
.route("/api/v1/agents/5w1h/status", get(get_5w1h_status))
|
||||
}
|
||||
|
||||
// ── Data Structures ──
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct Analyze5W1HRequest {
|
||||
pub file_uuid: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct Analyze5W1HResponse {
|
||||
pub success: bool,
|
||||
pub file_uuid: String,
|
||||
pub scenes_processed: usize,
|
||||
pub scenes_total: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct BatchAnalyze5W1HRequest {
|
||||
pub file_uuids: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct BatchAnalyze5W1HResponse {
|
||||
pub success: bool,
|
||||
pub jobs: Vec<BatchJobStatus>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct BatchJobStatus {
|
||||
pub file_uuid: String,
|
||||
pub status: String,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct CutScene {
|
||||
chunk_id: String,
|
||||
start_frame: i64,
|
||||
end_frame: i64,
|
||||
fps: f64,
|
||||
start_time: f64,
|
||||
end_time: f64,
|
||||
content: serde_json::Value,
|
||||
metadata: serde_json::Value,
|
||||
summary_text: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct SentenceChunk {
|
||||
chunk_id: String,
|
||||
text: String,
|
||||
start_time: f64,
|
||||
end_time: f64,
|
||||
start_frame: i64,
|
||||
end_frame: i64,
|
||||
content: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ChildSummary {
|
||||
chunk_id: String,
|
||||
enhanced: String,
|
||||
five_w1h: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct SceneSummaryResult {
|
||||
parent_summary: String,
|
||||
five_w1h: serde_json::Value,
|
||||
child_summaries: Vec<ChildSummary>,
|
||||
}
|
||||
|
||||
// ── LLM Endpoint ──
|
||||
|
||||
fn llm_base_url() -> String {
|
||||
crate::core::config::llm::SUMMARY_URL.clone()
|
||||
}
|
||||
|
||||
fn llm_model() -> String {
|
||||
crate::core::config::llm::SUMMARY_MODEL.clone()
|
||||
}
|
||||
|
||||
// ── Data Fetching ──
|
||||
|
||||
async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<Vec<CutScene>> {
|
||||
let table = schema::table_name("chunk");
|
||||
sqlx::query_as::<_, (String, i64, i64, f64, Option<f64>, Option<f64>, serde_json::Value, Option<serde_json::Value>, Option<String>)>(&format!(
|
||||
r#"SELECT chunk_id, start_frame, end_frame, fps, start_time, end_time, content, metadata, summary_text
|
||||
FROM {} WHERE file_uuid = $1 AND chunk_type = 'cut' ORDER BY start_frame"#, table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(db.pool()).await?
|
||||
.into_iter().map(|r| Ok(CutScene {
|
||||
chunk_id: r.0, start_frame: r.1, end_frame: r.2,
|
||||
fps: r.3, start_time: r.4.unwrap_or(0.0), end_time: r.5.unwrap_or(0.0),
|
||||
content: r.6, metadata: r.7.unwrap_or(serde_json::json!({})), summary_text: r.8,
|
||||
})).collect()
|
||||
}
|
||||
|
||||
async fn fetch_sentences_in_scene(
|
||||
db: &PostgresDb,
|
||||
file_uuid: &str,
|
||||
cut: &CutScene,
|
||||
) -> anyhow::Result<Vec<SentenceChunk>> {
|
||||
let table = schema::table_name("chunk");
|
||||
sqlx::query_as::<_, (String, String, Option<f64>, Option<f64>, i64, i64, serde_json::Value)>(&format!(
|
||||
r#"SELECT chunk_id, COALESCE(text_content,''), start_time, end_time, start_frame, end_frame, content
|
||||
FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence'
|
||||
AND start_time >= $2 AND end_time <= $3 ORDER BY start_time"#, table
|
||||
))
|
||||
.bind(file_uuid).bind(cut.start_time).bind(cut.end_time)
|
||||
.fetch_all(db.pool()).await?
|
||||
.into_iter().map(|r| Ok(SentenceChunk {
|
||||
chunk_id: r.0, text: r.1, start_time: r.2.unwrap_or(0.0), end_time: r.3.unwrap_or(0.0),
|
||||
start_frame: r.4, end_frame: r.5, content: r.6,
|
||||
})).collect()
|
||||
}
|
||||
|
||||
/// Fetch actor names present in this scene from face_detections + identity_bindings + identities
|
||||
async fn fetch_identity_names_for_scene(
|
||||
db: &PostgresDb,
|
||||
file_uuid: &str,
|
||||
cut: &CutScene,
|
||||
) -> anyhow::Result<Vec<String>> {
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let ib_table = schema::table_name("identity_bindings");
|
||||
let id_table = schema::table_name("identities");
|
||||
let rows = sqlx::query_scalar::<_, String>(&format!(
|
||||
r#"SELECT DISTINCT i.name
|
||||
FROM {} fd
|
||||
JOIN {} ib ON ib.identity_value = fd.trace_id::text AND ib.identity_type = 'trace'
|
||||
JOIN {} i ON i.id = ib.identity_id
|
||||
WHERE fd.file_uuid = $1 AND fd.frame_number >= $2 AND fd.frame_number <= $3
|
||||
AND fd.trace_id IS NOT NULL
|
||||
ORDER BY i.name"#,
|
||||
fd_table, ib_table, id_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(cut.start_frame)
|
||||
.bind(cut.end_frame)
|
||||
.fetch_all(db.pool())
|
||||
.await?;
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
/// Fetch YOLO object labels detected in this scene from pre_chunks
|
||||
async fn fetch_yolo_objects_for_scene(
|
||||
db: &PostgresDb,
|
||||
file_uuid: &str,
|
||||
cut: &CutScene,
|
||||
) -> anyhow::Result<Vec<String>> {
|
||||
let table = schema::table_name("pre_chunks");
|
||||
let rows = sqlx::query_scalar::<_, String>(&format!(
|
||||
r#"SELECT DISTINCT data->>'label'
|
||||
FROM {} WHERE file_uuid = $1 AND processor_type = 'yolo'
|
||||
AND frame_number >= $2 AND frame_number <= $3
|
||||
AND data->>'label' IS NOT NULL
|
||||
ORDER BY data->>'label'"#,
|
||||
table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(cut.start_frame)
|
||||
.bind(cut.end_frame)
|
||||
.fetch_all(db.pool())
|
||||
.await?;
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
/// Fetch active speakers + their actor names for a scene's frame range
|
||||
/// Uses identity_bindings to map SPEAKER_X to actor names
|
||||
async fn fetch_speakers_for_scene(
|
||||
db: &PostgresDb,
|
||||
file_uuid: &str,
|
||||
cut: &CutScene,
|
||||
) -> anyhow::Result<Vec<String>> {
|
||||
let pc_table = schema::table_name("pre_chunks");
|
||||
let speakers = sqlx::query_scalar::<_, String>(&format!(
|
||||
r#"SELECT DISTINCT data->>'speaker_id'
|
||||
FROM {} WHERE file_uuid = $1 AND processor_type = 'asrx'
|
||||
AND data->>'speaker_id' IS NOT NULL
|
||||
AND start_frame <= $3 AND end_frame >= $2
|
||||
ORDER BY data->>'speaker_id'"#,
|
||||
pc_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(cut.start_frame)
|
||||
.bind(cut.end_frame)
|
||||
.fetch_all(db.pool())
|
||||
.await?;
|
||||
|
||||
if speakers.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
// Map speaker_ids to actor names via identity_bindings
|
||||
let ib_table = schema::table_name("identity_bindings");
|
||||
let id_table = schema::table_name("identities");
|
||||
let mut result = Vec::new();
|
||||
for spk in &speakers {
|
||||
let name: Option<String> = sqlx::query_scalar(&format!(
|
||||
r#"SELECT i.name FROM {} ib JOIN {} i ON i.id = ib.identity_id
|
||||
WHERE ib.identity_type = 'speaker' AND ib.identity_value = $1 AND i.name IS NOT NULL
|
||||
LIMIT 1"#,
|
||||
ib_table, id_table
|
||||
))
|
||||
.bind(spk)
|
||||
.fetch_optional(db.pool())
|
||||
.await?;
|
||||
match name {
|
||||
Some(n) => result.push(format!("{} ({})", spk, n)),
|
||||
None => result.push(spk.clone()),
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Fetch trace IDs with identity names for a scene's frame range
|
||||
async fn fetch_trace_info(
|
||||
db: &PostgresDb,
|
||||
file_uuid: &str,
|
||||
cut: &CutScene,
|
||||
) -> anyhow::Result<Vec<String>> {
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let ib_table = schema::table_name("identity_bindings");
|
||||
let id_table = schema::table_name("identities");
|
||||
let rows = sqlx::query_as::<_, (i32, Option<String>)>(&format!(
|
||||
r#"SELECT DISTINCT fd.trace_id, i.name
|
||||
FROM {} fd
|
||||
LEFT JOIN {} ib ON ib.identity_value = fd.trace_id::text AND ib.identity_type = 'trace'
|
||||
LEFT JOIN {} i ON i.id = ib.identity_id
|
||||
WHERE fd.file_uuid = $1 AND fd.frame_number >= $2 AND fd.frame_number <= $3
|
||||
AND fd.trace_id IS NOT NULL
|
||||
ORDER BY fd.trace_id"#,
|
||||
fd_table, ib_table, id_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(cut.start_frame)
|
||||
.bind(cut.end_frame)
|
||||
.fetch_all(db.pool())
|
||||
.await?;
|
||||
|
||||
Ok(rows
|
||||
.iter()
|
||||
.map(|(trace, name)| {
|
||||
if let Some(n) = name {
|
||||
format!("trace_{} ({})", trace, n)
|
||||
} else {
|
||||
format!("trace_{}", trace)
|
||||
}
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
// ── LLM Prompt (Embedding-Optimized) ──
|
||||
|
||||
async fn summarize_one_scene(
|
||||
db: &PostgresDb,
|
||||
file_uuid: &str,
|
||||
cut: &CutScene,
|
||||
sentences: &[SentenceChunk],
|
||||
prev_context: &str,
|
||||
) -> anyhow::Result<SceneSummaryResult> {
|
||||
if sentences.is_empty() {
|
||||
return Ok(SceneSummaryResult {
|
||||
parent_summary: String::new(),
|
||||
five_w1h: serde_json::Value::Null,
|
||||
child_summaries: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
let faces = fetch_identity_names_for_scene(db, file_uuid, cut)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
let objects = fetch_yolo_objects_for_scene(db, file_uuid, cut)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
let traces = fetch_trace_info(db, file_uuid, cut)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
let speakers = fetch_speakers_for_scene(db, file_uuid, cut)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut dialogue = String::new();
|
||||
for (i, s) in sentences.iter().enumerate() {
|
||||
let t = s.text.trim();
|
||||
if !t.is_empty() {
|
||||
dialogue.push_str(&format!("[{}] {}\n", i + 1, t));
|
||||
}
|
||||
}
|
||||
|
||||
let story_so_far = if prev_context.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!("\nStory so far (previous scenes):\n{}\n", prev_context)
|
||||
};
|
||||
|
||||
let prompt = format!(
|
||||
r#"Analyze this movie scene and produce a structured summary. Be specific — quote actual dialogue. Avoid template phrases like "within the established dramatic setting."
|
||||
|
||||
Scene time: {:.0}s–{:.0}s
|
||||
|
||||
Dialogue:
|
||||
{}Actors: {}
|
||||
Objects: {}
|
||||
Face traces: {}
|
||||
Speakers: {}
|
||||
{}
|
||||
Output EXACTLY this JSON format:
|
||||
{{
|
||||
"scene_summary": "5 flowing sentences: who+what+where+when+why+how. Quote actual lines.",
|
||||
"5w1h": {{
|
||||
"who": "1 sentence with actor/character name",
|
||||
"what": "1 sentence describing the action, quote the line",
|
||||
"where": "1 sentence about setting",
|
||||
"when": "1 sentence about timing in story",
|
||||
"why": "1 sentence explaining why this moment matters",
|
||||
"how": "1 sentence about delivery, emotion, tone"
|
||||
}},
|
||||
"sentences": [
|
||||
{{
|
||||
"index": 1,
|
||||
"who": "1 sentence",
|
||||
"what": "1 sentence referencing the actual line",
|
||||
"where": "1 sentence",
|
||||
"when": "1 sentence",
|
||||
"why": "1 sentence why this is said",
|
||||
"how": "1 sentence describing delivery",
|
||||
"enhanced": "1 sentence with actual dialogue, self-contained for search"
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
Rules:
|
||||
- scene_summary: 5 sentences, natural paragraph. Use quotes. No template phrases.
|
||||
- Each 5w1h field: exactly 1 sentence. Specific details. Character names. Quotes.
|
||||
- Each sentence.enhanced: self-contained for search, include actual spoken words.
|
||||
- Return ONLY valid JSON. No markdown.
|
||||
- A short scene with 1-2 lines should have a short summary."#,
|
||||
cut.start_time,
|
||||
cut.end_time,
|
||||
dialogue,
|
||||
faces.join(", "),
|
||||
objects.join(", "),
|
||||
traces.join(", "),
|
||||
speakers.join(", "),
|
||||
story_so_far,
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"model": llm_model(),
|
||||
"messages": [
|
||||
{"role": "system", "content": "You output JSON only. Be specific. Quote actual dialogue. Avoid template phrases."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"temperature": 0.1,
|
||||
"max_tokens": 4096,
|
||||
"stream": false
|
||||
});
|
||||
|
||||
let resp = LLM_CLIENT
|
||||
.post(llm_base_url())
|
||||
.json(&body)
|
||||
.timeout(std::time::Duration::from_secs(180))
|
||||
.send()
|
||||
.await?
|
||||
.json::<serde_json::Value>()
|
||||
.await?;
|
||||
|
||||
let content = resp["choices"][0]["message"]["content"]
|
||||
.as_str()
|
||||
.unwrap_or("{}");
|
||||
// Strip markdown code fences if present
|
||||
let cleaned = content
|
||||
.trim_start_matches("```json")
|
||||
.trim_start_matches("```")
|
||||
.trim_end_matches("```")
|
||||
.trim();
|
||||
let parsed: serde_json::Value =
|
||||
serde_json::from_str(cleaned).unwrap_or(serde_json::Value::Null);
|
||||
|
||||
let parent_summary = parsed["scene_summary"].as_str().unwrap_or("").to_string();
|
||||
let five_w1h = parsed
|
||||
.get("5w1h")
|
||||
.cloned()
|
||||
.unwrap_or(serde_json::Value::Null);
|
||||
let mut child_summaries = Vec::new();
|
||||
|
||||
if let Some(arr) = parsed["sentences"].as_array() {
|
||||
for entry in arr {
|
||||
let idx = entry["index"].as_u64().unwrap_or(0).saturating_sub(1) as usize;
|
||||
if let Some(enhanced) = entry["enhanced"].as_str() {
|
||||
if idx < sentences.len() {
|
||||
let child_5w1h = serde_json::json!({
|
||||
"who": entry["who"].as_str().unwrap_or(""),
|
||||
"what": entry["what"].as_str().unwrap_or(""),
|
||||
"where": entry["where"].as_str().unwrap_or(""),
|
||||
"when": entry["when"].as_str().unwrap_or(""),
|
||||
"why": entry["why"].as_str().unwrap_or(""),
|
||||
"how": entry["how"].as_str().unwrap_or(""),
|
||||
});
|
||||
child_summaries.push(ChildSummary {
|
||||
chunk_id: sentences[idx].chunk_id.clone(),
|
||||
enhanced: enhanced.to_string(),
|
||||
five_w1h: child_5w1h,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback
|
||||
if child_summaries.is_empty() && !parent_summary.is_empty() {
|
||||
for s in sentences {
|
||||
let text = s.text.trim();
|
||||
if !text.is_empty() {
|
||||
child_summaries.push(ChildSummary {
|
||||
chunk_id: s.chunk_id.clone(),
|
||||
enhanced: format!("{} Scene: {}", text, parent_summary),
|
||||
five_w1h: serde_json::Value::Null,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(SceneSummaryResult {
|
||||
parent_summary,
|
||||
five_w1h,
|
||||
child_summaries,
|
||||
})
|
||||
}
|
||||
|
||||
// ── DB Storage ──
|
||||
|
||||
async fn store_parent_summary(
|
||||
db: &PostgresDb,
|
||||
cut_chunk_id: &str,
|
||||
file_uuid: &str,
|
||||
summary: &str,
|
||||
five_w1h: &serde_json::Value,
|
||||
sentences: &[SentenceChunk],
|
||||
) -> anyhow::Result<()> {
|
||||
let table = schema::table_name("chunk");
|
||||
let meta = serde_json::json!({
|
||||
"5w1h": five_w1h,
|
||||
"sentence_ids": sentences.iter().map(|s| s.chunk_id.clone()).collect::<Vec<_>>(),
|
||||
"sentence_count": sentences.len(),
|
||||
});
|
||||
sqlx::query(&format!(
|
||||
r#"UPDATE {} SET summary_text = $1, metadata = jsonb_deep_merge(COALESCE(metadata, '{{}}'::jsonb), $2::jsonb)
|
||||
WHERE chunk_id = $3 AND file_uuid = $4"#,
|
||||
table
|
||||
))
|
||||
.bind(summary)
|
||||
.bind(&meta)
|
||||
.bind(cut_chunk_id)
|
||||
.bind(file_uuid)
|
||||
.execute(db.pool())
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn store_child_summaries(
|
||||
db: &PostgresDb,
|
||||
file_uuid: &str,
|
||||
children: &[ChildSummary],
|
||||
) -> anyhow::Result<()> {
|
||||
let table = schema::table_name("chunk");
|
||||
for c in children {
|
||||
let text = c.enhanced.trim();
|
||||
if text.is_empty() || text.len() < 10 {
|
||||
continue;
|
||||
}
|
||||
// Update text_content (for embedding) + merge 5w1h into content
|
||||
let merge = serde_json::json!({ "5w1h": c.five_w1h });
|
||||
sqlx::query(&format!(
|
||||
r#"UPDATE {} SET text_content = $1, content = content || $2::jsonb, embedding = NULL
|
||||
WHERE chunk_id = $3 AND file_uuid = $4"#,
|
||||
table
|
||||
))
|
||||
.bind(text)
|
||||
.bind(&merge)
|
||||
.bind(&c.chunk_id)
|
||||
.bind(file_uuid)
|
||||
.execute(db.pool())
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ── API Handlers ──
|
||||
|
||||
async fn analyze_5w1h(
|
||||
State(state): State<AppState>,
|
||||
Json(req): Json<Analyze5W1HRequest>,
|
||||
) -> Result<Json<Analyze5W1HResponse>, (StatusCode, String)> {
|
||||
let db = PostgresDb::from_pool(state.db.pool().clone());
|
||||
|
||||
let cuts = fetch_cut_scenes(&db, &req.file_uuid)
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let total = cuts.len();
|
||||
let mut processed = 0usize;
|
||||
let mut prev_context: Vec<String> = Vec::new();
|
||||
|
||||
for cut in &cuts {
|
||||
// Skip already-summarized scenes but preserve context
|
||||
if let Some(ref t) = cut.summary_text {
|
||||
if t.len() > 20 {
|
||||
processed += 1;
|
||||
prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let sentences = match fetch_sentences_in_scene(&db, &req.file_uuid, cut).await {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
tracing::error!("[5W1H] fetch sentences failed: {}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if sentences.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let context = prev_context.join("\n");
|
||||
let result = match summarize_one_scene(&db, &req.file_uuid, cut, &sentences, &context).await
|
||||
{
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
tracing::error!("[5W1H] scene {} failed: {}", cut.chunk_id, e);
|
||||
processed += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if !result.parent_summary.is_empty() {
|
||||
if let Err(e) = store_parent_summary(
|
||||
&db,
|
||||
&cut.chunk_id,
|
||||
&req.file_uuid,
|
||||
&result.parent_summary,
|
||||
&result.five_w1h,
|
||||
&sentences,
|
||||
)
|
||||
.await
|
||||
{
|
||||
tracing::error!("[5W1H] parent: {}", e);
|
||||
}
|
||||
if let Err(e) =
|
||||
store_child_summaries(&db, &req.file_uuid, &result.child_summaries).await
|
||||
{
|
||||
tracing::error!("[5W1H] child: {}", e);
|
||||
}
|
||||
prev_context.push(format!(
|
||||
"Scene (t={:.0}s): {}",
|
||||
cut.start_time, result.parent_summary
|
||||
));
|
||||
}
|
||||
processed += 1;
|
||||
}
|
||||
|
||||
Ok(Json(Analyze5W1HResponse {
|
||||
success: true,
|
||||
file_uuid: req.file_uuid,
|
||||
scenes_processed: processed,
|
||||
scenes_total: total,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn batch_analyze_5w1h(
|
||||
State(state): State<AppState>,
|
||||
Json(req): Json<BatchAnalyze5W1HRequest>,
|
||||
) -> Result<Json<BatchAnalyze5W1HResponse>, (StatusCode, String)> {
|
||||
let db = PostgresDb::from_pool(state.db.pool().clone());
|
||||
let mut jobs = Vec::new();
|
||||
|
||||
for uuid in &req.file_uuids {
|
||||
let cuts = fetch_cut_scenes(&db, uuid).await.unwrap_or_default();
|
||||
let total = cuts.len();
|
||||
let mut processed = 0usize;
|
||||
let mut prev_context: Vec<String> = Vec::new();
|
||||
|
||||
for cut in &cuts {
|
||||
if let Some(ref t) = cut.summary_text {
|
||||
if t.len() > 20 {
|
||||
processed += 1;
|
||||
prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let sentences = fetch_sentences_in_scene(&db, uuid, cut)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
if sentences.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let context = prev_context.join("\n");
|
||||
if let Ok(result) = summarize_one_scene(&db, uuid, cut, &sentences, &context).await {
|
||||
if !result.parent_summary.is_empty() {
|
||||
let _ = store_parent_summary(
|
||||
&db,
|
||||
&cut.chunk_id,
|
||||
uuid,
|
||||
&result.parent_summary,
|
||||
&result.five_w1h,
|
||||
&sentences,
|
||||
)
|
||||
.await;
|
||||
let _ = store_child_summaries(&db, uuid, &result.child_summaries).await;
|
||||
prev_context.push(format!(
|
||||
"Scene (t={:.0}s): {}",
|
||||
cut.start_time, result.parent_summary
|
||||
));
|
||||
}
|
||||
}
|
||||
processed += 1;
|
||||
}
|
||||
|
||||
jobs.push(BatchJobStatus {
|
||||
file_uuid: uuid.clone(),
|
||||
status: if processed > 0 {
|
||||
"completed".to_string()
|
||||
} else {
|
||||
"no_cut_scenes".to_string()
|
||||
},
|
||||
message: format!("{}/{} scenes processed", processed, total),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Json(BatchAnalyze5W1HResponse {
|
||||
success: true,
|
||||
jobs,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn get_5w1h_status(
|
||||
State(state): State<AppState>,
|
||||
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
|
||||
let table = schema::table_name("videos");
|
||||
let rows = sqlx::query(&format!(
|
||||
r#"SELECT file_uuid, processing_status->'agents'->'five_w1h' as s
|
||||
FROM {} WHERE processing_status->'agents'->'five_w1h' IS NOT NULL
|
||||
ORDER BY updated_at DESC LIMIT 50"#,
|
||||
table
|
||||
))
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let videos: Vec<serde_json::Value> = rows
|
||||
.iter()
|
||||
.map(|r| {
|
||||
serde_json::json!({
|
||||
"uuid": r.try_get::<String,_>("file_uuid").unwrap_or_default(),
|
||||
"five_w1h_status": r.try_get::<Option<serde_json::Value>,_>("s").ok().flatten(),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(
|
||||
serde_json::json!({ "success": true, "videos": videos }),
|
||||
))
|
||||
}
|
||||
|
||||
/// Pipeline-triggered entry point: run 5W1H agent for a file.
|
||||
pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<()> {
|
||||
let cuts = fetch_cut_scenes(db, file_uuid).await?;
|
||||
let total = cuts.len();
|
||||
let mut processed = 0usize;
|
||||
let mut prev_context: Vec<String> = Vec::new();
|
||||
|
||||
for cut in &cuts {
|
||||
if let Some(ref t) = cut.summary_text {
|
||||
if t.len() > 20 {
|
||||
processed += 1;
|
||||
prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let sentences = fetch_sentences_in_scene(db, file_uuid, cut).await?;
|
||||
if sentences.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let context = prev_context.join("\n");
|
||||
match summarize_one_scene(db, file_uuid, cut, &sentences, &context).await {
|
||||
Ok(result) => {
|
||||
if !result.parent_summary.is_empty() {
|
||||
let _ = store_parent_summary(
|
||||
db,
|
||||
&cut.chunk_id,
|
||||
file_uuid,
|
||||
&result.parent_summary,
|
||||
&result.five_w1h,
|
||||
&sentences,
|
||||
)
|
||||
.await;
|
||||
let _ = store_child_summaries(db, file_uuid, &result.child_summaries).await;
|
||||
prev_context.push(format!(
|
||||
"Scene (t={:.0}s): {}",
|
||||
cut.start_time, result.parent_summary
|
||||
));
|
||||
}
|
||||
processed += 1;
|
||||
}
|
||||
Err(e) => tracing::error!("[5W1H] Scene {} failed: {}", cut.chunk_id, e),
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[5W1H] Done for {}: {}/{} scenes",
|
||||
file_uuid,
|
||||
processed,
|
||||
total
|
||||
);
|
||||
|
||||
// Auto-vectorize sentences with EmbeddingGemma (768D)
|
||||
tracing::info!("[5W1H] Starting vectorize for sentence chunks...");
|
||||
let embedder = Embedder::new("embeddinggemma-300m".to_string());
|
||||
let qdrant = QdrantDb::new();
|
||||
qdrant.init_collection(768).await?;
|
||||
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64)>(&format!(
|
||||
"SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time \
|
||||
FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL \
|
||||
AND (text_content IS NOT NULL AND text_content != '') ORDER BY id",
|
||||
chunk_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(db.pool())
|
||||
.await?;
|
||||
|
||||
let total_vec = rows.len();
|
||||
let mut stored = 0usize;
|
||||
for (chunk_id, _ctype, text, start_frame, end_frame, start_time, end_time) in &rows {
|
||||
let text = text.trim();
|
||||
if text.is_empty() || text.len() < 5 {
|
||||
continue;
|
||||
}
|
||||
match embedder.embed_document(text).await {
|
||||
Ok(vector) => {
|
||||
if let Err(e) = sqlx::query(&format!(
|
||||
"UPDATE {} SET embedding = $1::vector WHERE chunk_id = $2 AND file_uuid = $3",
|
||||
chunk_table
|
||||
))
|
||||
.bind(&vector as &[f32])
|
||||
.bind(chunk_id)
|
||||
.bind(file_uuid)
|
||||
.execute(db.pool())
|
||||
.await
|
||||
{
|
||||
tracing::error!("[Vectorize] PG failed for {}: {}", chunk_id, e);
|
||||
continue;
|
||||
}
|
||||
let payload = VectorPayload {
|
||||
file_uuid: file_uuid.to_string(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type: "sentence".to_string(),
|
||||
start_frame: *start_frame,
|
||||
end_frame: *end_frame,
|
||||
start_time: *start_time,
|
||||
end_time: *end_time,
|
||||
text: Some(text.to_string()),
|
||||
};
|
||||
if let Err(e) = qdrant.upsert_vector(chunk_id, &vector, payload).await {
|
||||
tracing::error!("[Vectorize] Qdrant failed for {}: {}", chunk_id, e);
|
||||
continue;
|
||||
}
|
||||
stored += 1;
|
||||
if stored % 50 == 0 {
|
||||
tracing::info!("[Vectorize] {}/{}", stored, total_vec);
|
||||
}
|
||||
}
|
||||
Err(e) => tracing::error!("[Vectorize] Embed failed for {}: {}", chunk_id, e),
|
||||
}
|
||||
}
|
||||
tracing::info!("[5W1H] Vectorize done: {}/{} stored", stored, total_vec);
|
||||
Ok(())
|
||||
}
|
||||
+15
-7
@@ -180,11 +180,11 @@ async fn list_identities(
|
||||
})?;
|
||||
|
||||
let sql = format!(
|
||||
"SELECT id::int, uuid, name, metadata FROM {} WHERE status IS NULL OR status != 'merged' ORDER BY id DESC LIMIT $1 OFFSET $2",
|
||||
"SELECT id::int, uuid, name, metadata, status, starred FROM {} WHERE status IS NULL OR status != 'merged' ORDER BY id DESC LIMIT $1 OFFSET $2",
|
||||
id_table
|
||||
);
|
||||
|
||||
let rows: Vec<(i32, uuid::Uuid, String, Option<serde_json::Value>)> = match sqlx::query_as(&sql)
|
||||
let rows: Vec<(i32, uuid::Uuid, String, Option<serde_json::Value>, Option<String>, Option<bool>)> = match sqlx::query_as(&sql)
|
||||
.bind(page_size as i64)
|
||||
.bind(offset)
|
||||
.fetch_all(db.pool())
|
||||
@@ -201,11 +201,16 @@ let sql = format!(
|
||||
|
||||
let identities: Vec<IdentityResponse> = rows
|
||||
.into_iter()
|
||||
.map(|r| IdentityResponse {
|
||||
id: r.0,
|
||||
identity_uuid: r.1.to_string().replace('-', ""),
|
||||
name: r.2,
|
||||
metadata: r.3,
|
||||
.map(|r| {
|
||||
IdentityResponse {
|
||||
id: r.0,
|
||||
identity_uuid: r.1.to_string().replace('-', ""),
|
||||
name: r.2,
|
||||
metadata: r.3,
|
||||
status: r.4,
|
||||
starred: r.5.unwrap_or(false),
|
||||
file_uuids: vec![], // Removed N+1 query
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -281,6 +286,9 @@ pub struct IdentityResponse {
|
||||
pub identity_uuid: String,
|
||||
pub name: String,
|
||||
pub metadata: Option<serde_json::Value>,
|
||||
pub status: Option<String>,
|
||||
pub starred: bool,
|
||||
pub file_uuids: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
|
||||
+13
-706
@@ -661,597 +661,21 @@ fn average_embeddings<'a>(embeddings: impl Iterator<Item = &'a Vec<f32>>) -> Vec
|
||||
/// Unknown: greedy stranger clustering (TH=0.40)
|
||||
/// Writes identity_ref/stranger_ref to Qdrant payload, TKG nodes, and face_detections.
|
||||
async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> {
|
||||
use crate::core::db::face_embedding_db::FaceEmbeddingDb;
|
||||
use std::collections::HashMap;
|
||||
|
||||
let face_db = FaceEmbeddingDb::new();
|
||||
|
||||
// Step 1: Load seeds from Qdrant (type=identity_seed)
|
||||
let seeds = face_db.get_seed_embeddings().await?;
|
||||
tracing::info!(
|
||||
"[FaceMatch] Loaded {} seeds from Qdrant",
|
||||
seeds.len()
|
||||
);
|
||||
|
||||
// Step 2: Preload identity internal IDs (uuid → (id, name))
|
||||
let id_table = schema::table_name("identities");
|
||||
let seed_identity_map: HashMap<String, (i32, String)> = if !seeds.is_empty() {
|
||||
let uuids: Vec<String> = seeds.iter().map(|(uuid, _, _)| uuid.clone()).collect();
|
||||
if uuids.is_empty() {
|
||||
HashMap::new()
|
||||
} else {
|
||||
let rows = sqlx::query_as::<_, (i32, String, String)>(&format!(
|
||||
"SELECT id, uuid::text, name FROM {} WHERE uuid::text = ANY($1)",
|
||||
id_table
|
||||
))
|
||||
.bind(&uuids)
|
||||
.fetch_all(pool)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(id, uuid, name)| (uuid, (id, name)))
|
||||
.collect();
|
||||
rows
|
||||
}
|
||||
} else {
|
||||
HashMap::new()
|
||||
};
|
||||
|
||||
// Step 3: Load face embeddings from Qdrant for this file
|
||||
let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?;
|
||||
|
||||
if qdrant_embeddings.is_empty() {
|
||||
tracing::warn!("[FaceMatch] No face embeddings in Qdrant for {}", file_uuid);
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Step 4: Group embeddings by trace_id, keeping confidence
|
||||
let mut trace_faces: HashMap<i32, Vec<(i64, Vec<f32>, f64)>> = HashMap::new();
|
||||
for (_, emb, payload) in &qdrant_embeddings {
|
||||
trace_faces
|
||||
.entry(payload.trace_id)
|
||||
.or_default()
|
||||
.push((payload.frame, emb.clone(), payload.confidence));
|
||||
}
|
||||
|
||||
// Step 5: Progressive multi-round matching with derived seeds
|
||||
// Each round: choose a face with best seed sim for matching; separately,
|
||||
// collect the highest-confidence face per trace for building derived seeds.
|
||||
const TH_MIN: f32 = 0.35;
|
||||
const DERIVED_CONF: f64 = 0.90;
|
||||
const MAX_DERIVED_PER_ID: usize = 9;
|
||||
const MAX_FACES_PER_TRACE: usize = 3;
|
||||
const ANGLE_SIM_THRESHOLD: f32 = 0.90;
|
||||
const TH_STRANGER: f32 = 0.40;
|
||||
|
||||
let total_traces = trace_faces.len();
|
||||
let total_embeddings: usize = trace_faces.values().map(|v| v.len()).sum();
|
||||
tracing::info!(
|
||||
"[FaceMatch] Loaded {} traces ({} face embeddings) from Qdrant for {}",
|
||||
total_traces,
|
||||
total_embeddings,
|
||||
tracing::warn!(
|
||||
"[FaceMatch] Face matching disabled - FaceEmbeddingDb removed. \
|
||||
TODO: Reimplement with _faces collection for {}",
|
||||
file_uuid
|
||||
);
|
||||
|
||||
let mut matched: HashMap<i32, (String, i32)> = HashMap::new();
|
||||
let mut trace_face_count: HashMap<i32, usize> = HashMap::new();
|
||||
|
||||
// All reference embeddings: start with original TMDb seeds
|
||||
let mut all_refs: Vec<(String, String, Vec<f32>)> = seeds.clone();
|
||||
let thresholds = [0.55f32, 0.50, 0.45, 0.40, 0.35];
|
||||
let mut prev_total = 0usize;
|
||||
|
||||
for (round_idx, &th) in thresholds.iter().enumerate() {
|
||||
if th < TH_MIN {
|
||||
break;
|
||||
}
|
||||
|
||||
let mut new_matches: HashMap<i32, (String, i32)> = HashMap::new();
|
||||
let mut seed_candidates: Vec<(i32, String, i32, Vec<f32>, f64)> = Vec::new();
|
||||
|
||||
for (&tid, faces) in &trace_faces {
|
||||
if matched.contains_key(&tid) {
|
||||
continue;
|
||||
}
|
||||
trace_face_count.entry(tid).or_insert(faces.len());
|
||||
|
||||
let mut best_sim = 0.0f32;
|
||||
let mut best_name = String::new();
|
||||
let mut best_id = 0i32;
|
||||
// Collect all high-confidence faces in this trace for derived seeds
|
||||
let mut trace_candidates: Vec<(Vec<f32>, f64)> = Vec::new();
|
||||
|
||||
for (_, emb, conf) in faces {
|
||||
for (ref_uuid, ref_name, ref_emb) in &all_refs {
|
||||
let s = cosine_similarity(emb, ref_emb);
|
||||
if s > best_sim {
|
||||
best_sim = s;
|
||||
best_name = ref_name.clone();
|
||||
if let Some(id_str) = ref_uuid.strip_prefix("derived:") {
|
||||
if let Ok(parsed) = id_str.parse::<i32>() {
|
||||
best_id = parsed;
|
||||
}
|
||||
} else if let Some((id, _)) = seed_identity_map.get(ref_uuid) {
|
||||
best_id = *id;
|
||||
}
|
||||
}
|
||||
}
|
||||
if *conf >= DERIVED_CONF {
|
||||
trace_candidates.push((emb.clone(), *conf));
|
||||
}
|
||||
}
|
||||
|
||||
if best_sim >= th && best_id > 0 {
|
||||
new_matches.insert(tid, (best_name.clone(), best_id));
|
||||
|
||||
// Top MAX_FACES_PER_TRACE highest-confidence faces with angular diversity
|
||||
trace_candidates.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
|
||||
let mut selected: Vec<Vec<f32>> = Vec::new();
|
||||
for (emb, conf) in trace_candidates {
|
||||
if selected.len() >= MAX_FACES_PER_TRACE {
|
||||
break;
|
||||
}
|
||||
if selected.iter().any(|e| cosine_similarity(e, &emb) >= ANGLE_SIM_THRESHOLD) {
|
||||
continue;
|
||||
}
|
||||
selected.push(emb.clone());
|
||||
seed_candidates.push((best_id, best_name.clone(), tid, emb, conf));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let new_count = new_matches.len();
|
||||
if new_count == 0 && round_idx > 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
matched.extend(new_matches);
|
||||
|
||||
// Build derived seeds: pick up to MAX_DERIVED_PER_ID per identity
|
||||
// (max MAX_FACES_PER_TRACE from each trace), sorted by confidence descending
|
||||
seed_candidates.sort_by(|a, b| b.4.partial_cmp(&a.4).unwrap());
|
||||
let mut per_id: HashMap<i32, usize> = HashMap::new();
|
||||
let mut trace_used_faces: HashMap<i32, usize> = HashMap::new();
|
||||
let mut added_seeds = 0usize;
|
||||
for (id, name, tid, emb, _) in &seed_candidates {
|
||||
let cnt = per_id.entry(*id).or_insert(0);
|
||||
if *cnt >= MAX_DERIVED_PER_ID {
|
||||
continue;
|
||||
}
|
||||
let trace_cnt = trace_used_faces.entry(*tid).or_insert(0);
|
||||
if *trace_cnt >= MAX_FACES_PER_TRACE {
|
||||
continue;
|
||||
}
|
||||
*trace_cnt += 1;
|
||||
*cnt += 1;
|
||||
all_refs.push((format!("derived:{}", id), name.clone(), emb.clone()));
|
||||
added_seeds += 1;
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[FaceMatch] Round {}: matched {}+{}={} total (TH={}, {} new derived seeds)",
|
||||
round_idx + 1,
|
||||
prev_total,
|
||||
new_count,
|
||||
matched.len(),
|
||||
th,
|
||||
added_seeds
|
||||
);
|
||||
|
||||
prev_total = matched.len();
|
||||
}
|
||||
|
||||
// Step 7: Stranger clustering for unmatched traces
|
||||
let unmatched_ids: Vec<i32> = trace_faces
|
||||
.keys()
|
||||
.filter(|tid| !matched.contains_key(tid))
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
let mut stranger_map: HashMap<i32, String> = HashMap::new();
|
||||
let mut assigned_stranger: std::collections::HashSet<i32> = std::collections::HashSet::new();
|
||||
let mut stranger_count = 0usize;
|
||||
|
||||
// Sort by face count descending (most reliable first)
|
||||
let mut sorted_unmatched: Vec<i32> = unmatched_ids.clone();
|
||||
sorted_unmatched.sort_by(|a, b| {
|
||||
trace_face_count
|
||||
.get(b)
|
||||
.unwrap_or(&0)
|
||||
.cmp(trace_face_count.get(a).unwrap_or(&0))
|
||||
});
|
||||
|
||||
for &tid in &sorted_unmatched {
|
||||
if assigned_stranger.contains(&tid) {
|
||||
continue;
|
||||
}
|
||||
let centroid_a = if let Some(faces) = trace_faces.get(&tid) {
|
||||
average_embeddings(faces.iter().map(|(_, emb, _)| emb))
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
stranger_count += 1;
|
||||
let stranger_id = format!("{}:stranger_{}", file_uuid, stranger_count);
|
||||
assigned_stranger.insert(tid);
|
||||
stranger_map.insert(tid, stranger_id.clone());
|
||||
|
||||
for &other_tid in &sorted_unmatched {
|
||||
if assigned_stranger.contains(&other_tid) || other_tid == tid {
|
||||
continue;
|
||||
}
|
||||
if let Some(faces_b) = trace_faces.get(&other_tid) {
|
||||
let centroid_b = average_embeddings(faces_b.iter().map(|(_, emb, _)| emb));
|
||||
let s = cosine_similarity(¢roid_a, ¢roid_b);
|
||||
if s >= TH_STRANGER {
|
||||
assigned_stranger.insert(other_tid);
|
||||
stranger_map.insert(other_tid, stranger_id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let stranger_trace_count = stranger_map.len();
|
||||
tracing::info!(
|
||||
"[FaceMatch] Stranger clusters: {} groups, {} traces",
|
||||
stranger_count,
|
||||
stranger_trace_count
|
||||
);
|
||||
|
||||
// Step 8: Write results to TKG nodes + Qdrant payload + face_detections
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let nodes_table = schema::table_name("tkg_nodes");
|
||||
let mut pg_updated = 0usize;
|
||||
|
||||
// Clear old identity assignments before writing new ones
|
||||
let _ = sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id = NULL WHERE file_uuid = $1",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.execute(pool)
|
||||
.await;
|
||||
|
||||
// 8a: Matched traces → identity_ref
|
||||
for (&tid, (name, identity_id)) in &matched {
|
||||
// Skip if identity_id is invalid (FK constraint would fail)
|
||||
if *identity_id <= 0 {
|
||||
tracing::warn!(
|
||||
"[FaceMatch] Skipping trace {}: invalid identity_id={}",
|
||||
tid, identity_id
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let identity_ref = format!("{}:{}", file_uuid, identity_id);
|
||||
|
||||
// TKG node
|
||||
let external_id = format!("face_track_{}", tid);
|
||||
if let Err(e) = sqlx::query(&format!(
|
||||
"UPDATE {} SET properties = jsonb_set(\
|
||||
jsonb_set(properties, '{{identity_ref}}', to_jsonb($1), true),\
|
||||
'{{identity_name}}', to_jsonb($2), true)\
|
||||
WHERE file_uuid = $3 AND node_type = 'face_track' AND external_id = $4",
|
||||
nodes_table
|
||||
))
|
||||
.bind(&identity_ref)
|
||||
.bind(name)
|
||||
.bind(file_uuid)
|
||||
.bind(&external_id)
|
||||
.execute(pool)
|
||||
.await
|
||||
{
|
||||
tracing::warn!("[FaceMatch] TKG update failed for trace {}: {:?}", tid, e);
|
||||
}
|
||||
|
||||
// Qdrant payload
|
||||
let _ = face_db
|
||||
.update_identity_ref_by_trace(file_uuid, tid, &identity_ref)
|
||||
.await;
|
||||
|
||||
// PostgreSQL face_detections (backward compat)
|
||||
let rows = sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3",
|
||||
fd_table
|
||||
))
|
||||
.bind(identity_id)
|
||||
.bind(file_uuid)
|
||||
.bind(tid)
|
||||
.execute(pool)
|
||||
.await
|
||||
.map(|r| r.rows_affected())
|
||||
.unwrap_or(0);
|
||||
pg_updated += rows as usize;
|
||||
}
|
||||
|
||||
// 8b: Stranger traces → stranger_ref
|
||||
for (&tid, stranger_ref) in &stranger_map {
|
||||
// TKG node
|
||||
let external_id = format!("face_track_{}", tid);
|
||||
if let Err(e) = sqlx::query(&format!(
|
||||
"UPDATE {} SET properties = jsonb_set(\
|
||||
properties, '{{stranger_ref}}', to_jsonb($1), true)\
|
||||
WHERE file_uuid = $2 AND node_type = 'face_track' AND external_id = $3",
|
||||
nodes_table
|
||||
))
|
||||
.bind(stranger_ref)
|
||||
.bind(file_uuid)
|
||||
.bind(&external_id)
|
||||
.execute(pool)
|
||||
.await
|
||||
{
|
||||
tracing::warn!("[FaceMatch] TKG stranger update failed for trace {}: {:?}", tid, e);
|
||||
}
|
||||
|
||||
// Qdrant payload
|
||||
let _ = face_db
|
||||
.update_stranger_ref_by_trace(file_uuid, tid, stranger_ref)
|
||||
.await;
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[FaceMatch] Done: {} matched, {} strangers — {} face_detections updated",
|
||||
matched.len(),
|
||||
stranger_trace_count,
|
||||
pg_updated
|
||||
);
|
||||
Ok(pg_updated)
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
/// Fallback: PostgreSQL-based matching (original implementation)
|
||||
/// Fallback: PostgreSQL-based matching (disabled - embedding column removed)
|
||||
async fn match_faces_iterative_pg(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> {
|
||||
// Step 1: 載入 TMDb identities (source='tmdb' 且有 face_embedding)
|
||||
let identities_table = schema::table_name("identities");
|
||||
let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec<f32>)>(
|
||||
&format!("SELECT id, name, face_embedding::real[] FROM {} WHERE source='tmdb' AND face_embedding IS NOT NULL", identities_table)
|
||||
)
|
||||
.fetch_all(pool).await?;
|
||||
|
||||
if tmdb_rows.is_empty() {
|
||||
tracing::warn!("[FaceMatch-PG] No TMDb identities with face embeddings");
|
||||
return Ok(0);
|
||||
}
|
||||
tracing::info!(
|
||||
"[FaceMatch-PG] Loaded {} TMDb seed identities",
|
||||
tmdb_rows.len()
|
||||
tracing::warn!(
|
||||
"[FaceMatch-PG] PostgreSQL matching disabled - embedding column removed for {}",
|
||||
file_uuid
|
||||
);
|
||||
|
||||
// Step 2: 載入所有 face_detections(含 frame_number),按 trace_id 分組
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let fd_rows = sqlx::query_as::<_, (i32, i64, Vec<f32>)>(&format!(
|
||||
"SELECT trace_id, frame_number, embedding FROM {} \
|
||||
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
|
||||
ORDER BY trace_id, frame_number",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
if fd_rows.is_empty() {
|
||||
tracing::warn!("[FaceMatch-PG] No face detections with embeddings");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// 分組:trace_id → (frame_number, embedding)
|
||||
use std::collections::HashMap;
|
||||
let mut face_track_faces_raw: HashMap<i32, Vec<(i64, Vec<f32>)>> = HashMap::new();
|
||||
for (tid, frame, emb) in &fd_rows {
|
||||
face_track_faces_raw
|
||||
.entry(*tid)
|
||||
.or_insert_with(Vec::new)
|
||||
.push((*frame, emb.clone()));
|
||||
}
|
||||
|
||||
// 從每個 trace 選取不同角度的 3 個 face embedding
|
||||
let mut face_track_samples: HashMap<i32, Vec<Vec<f32>>> = HashMap::new();
|
||||
for (tid, mut faces) in face_track_faces_raw {
|
||||
faces.sort_by_key(|(frame, _)| *frame);
|
||||
let n = faces.len();
|
||||
let indices = if n <= 3 {
|
||||
(0..n).collect()
|
||||
} else {
|
||||
let mid = n / 2;
|
||||
vec![0, mid, n - 1]
|
||||
};
|
||||
let samples: Vec<Vec<f32>> = indices.iter().map(|&i| faces[i].1.clone()).collect();
|
||||
face_track_samples.insert(tid, samples);
|
||||
}
|
||||
|
||||
let total_traces = face_track_samples.len();
|
||||
let sample_count: usize = face_track_samples.values().map(|v| v.len()).sum();
|
||||
tracing::info!(
|
||||
"[FaceMatch-PG] Loaded {} traces, sampled {} embeddings (3-angle)",
|
||||
total_traces,
|
||||
sample_count
|
||||
);
|
||||
|
||||
// Step 3: 建立 TMDb 查找表
|
||||
let tmdb_seeds: Vec<(i32, String, Vec<f32>)> = tmdb_rows;
|
||||
|
||||
// Step 4: 迭代匹配
|
||||
const TH: f32 = 0.50;
|
||||
let mut matched: HashMap<i32, String> = HashMap::new(); // trace_id → identity_name
|
||||
|
||||
// Round 1: 用 3-angle samples 比對 TMDb
|
||||
for (&tid, samples) in &face_track_samples {
|
||||
let mut best_name = String::new();
|
||||
let mut best_sim = 0.0f32;
|
||||
for (_, ref name, ref tmdb_emb) in &tmdb_seeds {
|
||||
for face_emb in samples {
|
||||
let s = cosine_similarity(face_emb, tmdb_emb);
|
||||
if s > best_sim {
|
||||
best_sim = s;
|
||||
best_name = name.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
if best_sim >= TH {
|
||||
matched.insert(tid, best_name);
|
||||
}
|
||||
}
|
||||
tracing::info!(
|
||||
"[FaceMatch] Round 1: {} matched ({}%) — writing to DB",
|
||||
matched.len(),
|
||||
matched.len() * 100 / total_traces
|
||||
);
|
||||
|
||||
// Step 5: 寫入 DB — Round 1 結果先存 (Phase 3: update both face_detections AND tkg_nodes)
|
||||
let identities_table = schema::table_name("identities");
|
||||
let strangers_table = schema::table_name("strangers");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let nodes_table = schema::table_name("tkg_nodes");
|
||||
let mut updated = 0usize;
|
||||
for (tid, name) in &matched {
|
||||
let id_opt = sqlx::query_scalar::<_, Option<i32>>(&format!(
|
||||
"SELECT id FROM {} WHERE name=$1 AND source='tmdb'",
|
||||
identities_table
|
||||
))
|
||||
.bind(name)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
if let Some(identity_id) = id_opt {
|
||||
let _ = sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
|
||||
fd_table
|
||||
))
|
||||
.bind(identity_id)
|
||||
.bind(file_uuid)
|
||||
.bind(tid)
|
||||
.execute(pool)
|
||||
.await;
|
||||
|
||||
// Phase 3: Also update TKG node
|
||||
let external_id = format!("face_track_{}", tid);
|
||||
let _ = sqlx::query(&format!(
|
||||
"UPDATE {} SET properties = jsonb_set(\
|
||||
jsonb_set(properties, '{{identity_id}}', $1::jsonb, false),\
|
||||
'{{identity_name}}', $2::jsonb, false)\
|
||||
WHERE file_uuid = $3 AND node_type = 'face_track' AND external_id = $4",
|
||||
nodes_table
|
||||
))
|
||||
.bind(identity_id)
|
||||
.bind(name.as_str())
|
||||
.bind(file_uuid)
|
||||
.bind(&external_id)
|
||||
.execute(pool)
|
||||
.await;
|
||||
|
||||
updated += 1;
|
||||
}
|
||||
}
|
||||
tracing::info!("[FaceMatch] Round 1: updated {} face_detections", updated);
|
||||
|
||||
// Round 2+: 用已匹配的 face 作為 seed 傳播(剩餘未匹配的 trace)
|
||||
let initial_matched = matched.len();
|
||||
for round_n in 2..=5 {
|
||||
let prev = matched.len();
|
||||
// 建立 seed pool: name → Vec<embedding>
|
||||
let mut seed_pool: HashMap<String, Vec<&Vec<f32>>> = HashMap::new();
|
||||
for (&tid, name) in &matched {
|
||||
if let Some(samples) = face_track_samples.get(&tid) {
|
||||
seed_pool
|
||||
.entry(name.clone())
|
||||
.or_default()
|
||||
.extend(samples.iter());
|
||||
}
|
||||
}
|
||||
|
||||
let mut new_matches: Vec<(i32, String)> = Vec::new();
|
||||
for (&tid, samples) in &face_track_samples {
|
||||
if matched.contains_key(&tid) {
|
||||
continue;
|
||||
}
|
||||
let mut best_name = String::new();
|
||||
let mut best_sim = 0.0f32;
|
||||
if samples.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// 用 3-angle samples 分別比對 seed,取最高 similarity
|
||||
for (name, seed_faces) in &seed_pool {
|
||||
for face_emb in samples {
|
||||
for seed in seed_faces {
|
||||
let s = cosine_similarity(face_emb, seed);
|
||||
if s > best_sim {
|
||||
best_sim = s;
|
||||
best_name = name.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if best_sim >= TH {
|
||||
new_matches.push((tid, best_name));
|
||||
}
|
||||
}
|
||||
for (tid, name) in new_matches {
|
||||
matched.insert(tid, name);
|
||||
}
|
||||
let new = matched.len() - prev;
|
||||
tracing::info!(
|
||||
"[FaceMatch] Round {}: +{} matched (total {}, {}%)",
|
||||
round_n,
|
||||
new,
|
||||
matched.len(),
|
||||
matched.len() * 100 / total_traces
|
||||
);
|
||||
if new < 5 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Step 6: 未匹配的 trace 設 stranger_id = strangers.id (FK)
|
||||
// First: ensure strangers records exist
|
||||
let _ = sqlx::query(&format!(
|
||||
"INSERT INTO {} (file_uuid, trace_id) \
|
||||
SELECT $1, fd.trace_id FROM {} fd \
|
||||
WHERE fd.file_uuid = $1 AND fd.trace_id IS NOT NULL \
|
||||
AND fd.identity_id IS NULL \
|
||||
ON CONFLICT (file_uuid, trace_id) DO NOTHING",
|
||||
strangers_table, fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
// Then: update face_detections.stranger_id = strangers.id
|
||||
let stranger_update = sqlx::query(&format!(
|
||||
"UPDATE {} fd SET stranger_id = s.id \
|
||||
FROM {} s \
|
||||
WHERE s.file_uuid = fd.file_uuid AND s.trace_id = fd.trace_id \
|
||||
AND fd.file_uuid = $1 AND fd.identity_id IS NULL \
|
||||
AND fd.trace_id IS NOT NULL AND fd.stranger_id IS NULL",
|
||||
fd_table, strangers_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
let stranger_count = stranger_update.rows_affected();
|
||||
|
||||
// Step 7: Save identity files for all affected identities
|
||||
let affected = sqlx::query_scalar::<_, uuid::Uuid>(&format!(
|
||||
"SELECT DISTINCT i.uuid FROM {} i \
|
||||
JOIN {} fd ON fd.identity_id = i.id \
|
||||
WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL",
|
||||
identities_table, fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
for uuid in &affected {
|
||||
let us = uuid.to_string().replace('-', "");
|
||||
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await
|
||||
{
|
||||
tracing::warn!("[FaceMatch] Failed to save identity file {}: {}", us, e);
|
||||
}
|
||||
}
|
||||
tracing::info!(
|
||||
"[FaceMatch] Done: {}/{} traces matched ({}%), {} strangers, {} identity files",
|
||||
matched.len(),
|
||||
total_traces,
|
||||
matched.len() * 100 / total_traces,
|
||||
stranger_count,
|
||||
affected.len()
|
||||
);
|
||||
Ok(updated)
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
/// Bind ASRX speakers to face traces based on temporal overlap.
|
||||
@@ -1589,126 +1013,9 @@ async fn run_identity_handler(
|
||||
|
||||
/// Read all TMDb identities with profile photos, extract face embeddings, store in Qdrant as seeds.
|
||||
pub async fn generate_seed_embeddings(db: &PostgresDb) -> anyhow::Result<usize> {
|
||||
use crate::core::db::face_embedding_db::FaceEmbeddingDb;
|
||||
use std::path::Path;
|
||||
|
||||
let pool = db.pool();
|
||||
let id_table = schema::table_name("identities");
|
||||
|
||||
let rows = sqlx::query_as::<_, (i32, String, String, i32, String)>(&format!(
|
||||
"SELECT id, name, uuid::text, tmdb_id, tmdb_profile FROM {} \
|
||||
WHERE source='tmdb' AND tmdb_profile IS NOT NULL",
|
||||
id_table
|
||||
))
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
if rows.is_empty() {
|
||||
tracing::warn!("[GenerateSeeds] No TMDb identities with profile photos");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
|
||||
.unwrap_or_else(|_| "/Users/accusys/momentry_core_0.1/scripts".to_string());
|
||||
let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
|
||||
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
|
||||
|
||||
let extract_script = Path::new(&scripts_dir).join("extract_face_embedding.py");
|
||||
let face_db = FaceEmbeddingDb::new();
|
||||
|
||||
let mut success = 0usize;
|
||||
for (id, name, uuid, tmdb_id, profile_url) in &rows {
|
||||
tracing::info!("[GenerateSeeds] Processing {} ({})", name, uuid);
|
||||
|
||||
// Download profile image
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.build()
|
||||
.unwrap_or_else(|_| reqwest::Client::new());
|
||||
let resp = client.get(profile_url).send().await;
|
||||
let image_bytes = match resp {
|
||||
Ok(r) if r.status().is_success() => r.bytes().await.unwrap_or_default(),
|
||||
_ => {
|
||||
tracing::warn!("[GenerateSeeds] Failed to download: {} from {}", name, profile_url);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if image_bytes.is_empty() {
|
||||
tracing::warn!("[GenerateSeeds] Empty image for {}", name);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Save to temp file
|
||||
let temp_dir = std::env::temp_dir().join("momentry_seed_faces");
|
||||
std::fs::create_dir_all(&temp_dir)?;
|
||||
let temp_img = temp_dir.join(format!("{}.jpg", uuid));
|
||||
std::fs::write(&temp_img, &image_bytes)?;
|
||||
|
||||
// Extract embedding with timeout
|
||||
use tokio::time::timeout;
|
||||
let output = timeout(
|
||||
std::time::Duration::from_secs(180),
|
||||
tokio::process::Command::new(&python_path)
|
||||
.arg(&extract_script)
|
||||
.arg(&temp_img)
|
||||
.output(),
|
||||
)
|
||||
.await
|
||||
.map_err(|_| anyhow::anyhow!("Extract embedding timed out for {}", name))??;
|
||||
|
||||
let _ = std::fs::remove_file(&temp_img);
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
tracing::warn!(
|
||||
"[GenerateSeeds] Extraction failed for {}: {}",
|
||||
name,
|
||||
stderr.trim()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let extract_result: serde_json::Value = match serde_json::from_str(&stdout) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
tracing::warn!("[GenerateSeeds] Parse error for {}: {}", name, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let embedding: Vec<f64> = match serde_json::from_value(
|
||||
extract_result.get("embedding").ok_or_else(|| anyhow::anyhow!("No embedding"))?.clone(),
|
||||
) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
tracing::warn!("[GenerateSeeds] Embedding format error for {}: {}", name, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let embedding_f32: Vec<f32> = embedding.into_iter().map(|v| v as f32).collect();
|
||||
|
||||
// Store in Qdrant
|
||||
match face_db
|
||||
.upsert_seed_embedding(uuid, name, *tmdb_id, &embedding_f32)
|
||||
.await
|
||||
{
|
||||
Ok(_) => {
|
||||
success += 1;
|
||||
tracing::info!("[GenerateSeeds] Stored seed for {}", name);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("[GenerateSeeds] Qdrant error for {}: {}", name, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[GenerateSeeds] Done: {}/{} seeds generated",
|
||||
success,
|
||||
rows.len()
|
||||
tracing::warn!(
|
||||
"[GenerateSeeds] Seed embedding generation disabled - FaceEmbeddingDb removed. \
|
||||
TODO: Reimplement with _faces collection"
|
||||
);
|
||||
Ok(success)
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
+32
-29
@@ -67,11 +67,13 @@ pub async fn bind_identity(
|
||||
Path(identity_uuid): Path<String>,
|
||||
Json(req): Json<BindIdentityRequest>,
|
||||
) -> Result<Json<ApiResponse<serde_json::Value>>, (StatusCode, Json<serde_json::Value>)> {
|
||||
tracing::info!("[bind_identity] req: {:?}", req);
|
||||
let table = crate::core::db::schema::table_name("face_detections");
|
||||
let id_table = crate::core::db::schema::table_name("identities");
|
||||
let history_table = crate::core::db::schema::table_name("identity_history");
|
||||
|
||||
let uuid_clean = identity_uuid.replace('-', "");
|
||||
tracing::info!("[bind_identity] uuid_clean={}, expand_to_trace={:?}", uuid_clean, req.expand_to_trace);
|
||||
let identity_row: Option<(i32, String)> = sqlx::query_as(&format!(
|
||||
"SELECT id, name FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
|
||||
id_table
|
||||
@@ -188,21 +190,32 @@ pub async fn bind_identity(
|
||||
})?
|
||||
.flatten();
|
||||
|
||||
// Update Qdrant + TKG if trace_id exists
|
||||
if let Some(tid) = trace_id {
|
||||
// 1. Update Qdrant payload
|
||||
let face_db = crate::core::db::FaceEmbeddingDb::new();
|
||||
if let Err(e) = face_db
|
||||
.update_identity_by_trace(&req.file_uuid, tid, &uuid_clean)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
"[bind] Failed to update Qdrant identity_uuid for trace {}: {}",
|
||||
tid, e
|
||||
);
|
||||
// Expand to entire trace if requested
|
||||
tracing::info!("[bind_identity] trace_id={:?}, expand_to_trace={:?}", trace_id, req.expand_to_trace);
|
||||
if req.expand_to_trace.unwrap_or(false) && trace_id.is_some() {
|
||||
let tid = trace_id.unwrap();
|
||||
tracing::info!("[bind_identity] Expanding to trace {} for file {}", tid, req.file_uuid);
|
||||
let expand_result = sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3",
|
||||
table
|
||||
))
|
||||
.bind(identity_id)
|
||||
.bind(&req.file_uuid)
|
||||
.bind(tid)
|
||||
.execute(state.db.pool())
|
||||
.await;
|
||||
if let Ok(r) = expand_result {
|
||||
tracing::info!("[bind] Expanded to trace {}: {} rows", tid, r.rows_affected());
|
||||
} else {
|
||||
tracing::error!("[bind] Failed to expand to trace {}: {:?}", tid, expand_result.err());
|
||||
}
|
||||
} else {
|
||||
tracing::info!("[bind_identity] NOT expanding: expand_to_trace={:?}, trace_id={:?}", req.expand_to_trace, trace_id);
|
||||
}
|
||||
|
||||
// 2. Update TKG face_track node (dual-field design)
|
||||
// Update TKG if trace_id exists
|
||||
if let Some(tid) = trace_id {
|
||||
// Update TKG face_track node (dual-field design)
|
||||
let tkg_table = crate::core::db::schema::table_name("tkg_nodes");
|
||||
let ext_id = format!("face_track_{}", tid);
|
||||
let identity_ref = format!("{}:identity_{}", req.file_uuid, identity_id);
|
||||
@@ -380,21 +393,9 @@ pub async fn unbind_identity(
|
||||
})?
|
||||
.flatten();
|
||||
|
||||
// Clear Qdrant + TKG if trace_id exists
|
||||
// Clear TKG if trace_id exists
|
||||
if let Some(tid) = trace_id {
|
||||
// 1. Clear Qdrant payload
|
||||
let face_db = crate::core::db::FaceEmbeddingDb::new();
|
||||
if let Err(e) = face_db
|
||||
.clear_identity_by_trace(&req.file_uuid, tid)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
"[unbind] Failed to clear Qdrant identity_uuid for trace {}: {}",
|
||||
tid, e
|
||||
);
|
||||
}
|
||||
|
||||
// 2. Update TKG face_track node (restore stranger_ref)
|
||||
// Update TKG face_track node (restore stranger_ref)
|
||||
let tkg_table = crate::core::db::schema::table_name("tkg_nodes");
|
||||
let ext_id = format!("face_track_{}", tid);
|
||||
let stranger_ref = format!("{}:stranger_trace_{}", req.file_uuid, tid);
|
||||
@@ -2199,8 +2200,10 @@ pub async fn list_pending_persons(
|
||||
let fd_table = crate::core::db::schema::table_name("face_detections");
|
||||
|
||||
let rows: Vec<(i32, String, String, chrono::NaiveDateTime)> = sqlx::query_as(&format!(
|
||||
"SELECT id, uuid::text, name, created_at FROM {} WHERE file_uuid = $1 AND status = 'pending' ORDER BY created_at DESC",
|
||||
id_table
|
||||
"SELECT DISTINCT i.id, i.uuid::text, i.name, i.created_at FROM {} i \
|
||||
JOIN {} fd ON fd.identity_id = i.id \
|
||||
WHERE fd.file_uuid = $1 AND i.status = 'pending' ORDER BY i.created_at DESC",
|
||||
id_table, fd_table
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_all(state.db.pool())
|
||||
|
||||
@@ -4,7 +4,6 @@ pub mod auth;
|
||||
pub mod checkin_api;
|
||||
pub mod docs;
|
||||
pub mod files;
|
||||
pub mod five_w1h_agent_api;
|
||||
pub mod health;
|
||||
pub mod identities;
|
||||
pub mod identity_agent_api;
|
||||
|
||||
+19
-1
@@ -260,7 +260,25 @@ async fn trigger_processing(
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
if existing_id.is_none() {
|
||||
if let Some(job_id) = existing_id {
|
||||
// Clean up stale processor_results from previous runs
|
||||
// Old entries with status='running' from a dead worker session
|
||||
// would block the worker from actually running processors.
|
||||
let pr_table = schema::table_name("processor_results");
|
||||
sqlx::query(&format!("DELETE FROM {pr_table} WHERE job_id = $1"))
|
||||
.bind(job_id)
|
||||
.execute(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(
|
||||
"[TRIGGER] Failed to clean processor_results for job {}: {}",
|
||||
job_id,
|
||||
e
|
||||
);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
tracing::info!("[TRIGGER] Cleaned processor_results for job {}", job_id);
|
||||
} else {
|
||||
state
|
||||
.db
|
||||
.create_monitor_job(&file_uuid, Some(&file_path))
|
||||
|
||||
@@ -14,7 +14,6 @@ use super::auth;
|
||||
use super::checkin_api;
|
||||
use super::docs;
|
||||
use super::files;
|
||||
use super::five_w1h_agent_api;
|
||||
use super::health;
|
||||
use super::identities;
|
||||
use super::identity_agent_api;
|
||||
@@ -116,7 +115,6 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
|
||||
.merge(agent_search::agent_search_routes())
|
||||
.merge(processing::processing_routes())
|
||||
.merge(identity_agent_api::identity_agent_routes())
|
||||
.merge(five_w1h_agent_api::five_w1h_agent_routes())
|
||||
.merge(media_api::bbox_routes())
|
||||
.merge(media_api::media_proxy_routes())
|
||||
.merge(trace_agent_api::trace_agent_routes())
|
||||
|
||||
+7
-112
@@ -608,122 +608,17 @@ async fn tmdb_match_handler(
|
||||
));
|
||||
}
|
||||
|
||||
// Get all TMDb identities with face_embedding
|
||||
let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec<f32>)>(
|
||||
&format!(
|
||||
"SELECT id, name, face_embedding::real[] FROM {} WHERE source='tmdb' AND face_embedding IS NOT NULL",
|
||||
crate::core::db::schema::table_name("identities")
|
||||
)
|
||||
)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
|
||||
})?;
|
||||
|
||||
if tmdb_rows.is_empty() {
|
||||
return Ok(Json(TmdbMatchResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
bindings_created: 0,
|
||||
tmdb_identities_available: 0,
|
||||
message: "No TMDb identities with face embeddings".to_string(),
|
||||
}));
|
||||
}
|
||||
|
||||
let face_collection = format!(
|
||||
"{}_faces",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':')
|
||||
tracing::warn!(
|
||||
"[TKG-MATCH] TMDb matching disabled - sync_trace_embeddings removed. \
|
||||
TODO: Reimplement with _faces collection for {}",
|
||||
file_uuid
|
||||
);
|
||||
|
||||
let qdrant = QdrantDb::new();
|
||||
let _ = qdrant.ensure_collection(&face_collection, 512).await;
|
||||
|
||||
let trace_collection = format!(
|
||||
"{}_traces",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':')
|
||||
);
|
||||
let _ = qdrant.ensure_collection(&trace_collection, 512).await;
|
||||
|
||||
// Sync trace embeddings (idempotent)
|
||||
if let Err(e) = crate::core::db::qdrant_db::sync_trace_embeddings(&file_uuid).await {
|
||||
tracing::error!("[TKG-MATCH] Trace sync failed: {}", e);
|
||||
}
|
||||
|
||||
let mut total_bindings = 0usize;
|
||||
|
||||
for (tmdb_id, tmdb_name, tmdb_embedding) in &tmdb_rows {
|
||||
// Search Qdrant trace collection with this TMDb embedding
|
||||
let results = match qdrant
|
||||
.search_face_collection(
|
||||
&trace_collection,
|
||||
tmdb_embedding,
|
||||
100,
|
||||
"source",
|
||||
"tmdb",
|
||||
Some(&file_uuid),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
tracing::warn!("[TKG-MATCH] Qdrant search failed for {}: {}", tmdb_name, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Filter results by threshold and file_uuid
|
||||
let filtered: Vec<_> = results
|
||||
.into_iter()
|
||||
.filter(|(score, payload)| {
|
||||
*score >= 0.50
|
||||
&& payload.get("file_uuid").and_then(|v| v.as_str()) == Some(&file_uuid)
|
||||
})
|
||||
.collect();
|
||||
|
||||
if filtered.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Bind matched traces directly
|
||||
let mut bound_count = 0usize;
|
||||
for (_score, payload) in &filtered {
|
||||
if let Some(tid) = payload.get("trace_id").and_then(|v| v.as_i64()) {
|
||||
let r = sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
|
||||
crate::core::db::schema::table_name("face_detections")
|
||||
))
|
||||
.bind(tmdb_id)
|
||||
.bind(&file_uuid)
|
||||
.bind(tid as i32)
|
||||
.execute(state.db.pool())
|
||||
.await;
|
||||
if let Ok(result) = r {
|
||||
bound_count += result.rows_affected() as usize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if bound_count > 0 {
|
||||
tracing::info!(
|
||||
"[TKG-MATCH] {}: bound {} traces to TMDb identity {}",
|
||||
tmdb_name,
|
||||
bound_count,
|
||||
tmdb_id
|
||||
);
|
||||
}
|
||||
total_bindings += bound_count;
|
||||
}
|
||||
|
||||
Ok(Json(TmdbMatchResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
bindings_created: total_bindings,
|
||||
tmdb_identities_available: tmdb_rows.len(),
|
||||
message: format!("{} traces matched to TMDb identities", total_bindings),
|
||||
bindings_created: 0,
|
||||
tmdb_identities_available: 0,
|
||||
message: "TMDb matching disabled - needs reimplementation with _faces collection".to_string(),
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -45,11 +45,6 @@ pub enum Commands {
|
||||
/// File UUID
|
||||
uuid: String,
|
||||
},
|
||||
/// Generate story for cut scenes
|
||||
Story {
|
||||
/// UUID
|
||||
uuid: String,
|
||||
},
|
||||
/// Detect objects in an image using CLIP or Qwen3-VL
|
||||
Detect {
|
||||
/// Image path
|
||||
|
||||
+1
-38
@@ -145,42 +145,6 @@ pub async fn checkin(db: &PostgresDb, file_uuid: &str) -> Result<CheckinResult>
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Traces → production traces collection
|
||||
let traces_coll = format!(
|
||||
"{}_traces",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':')
|
||||
);
|
||||
for point in &ws_data.traces {
|
||||
if let Some(ref vector) = point.vector {
|
||||
let payload_val: serde_json::Value =
|
||||
serde_json::to_value(&point.payload).unwrap_or(serde_json::Value::Null);
|
||||
let point_id: u64 = match point.id.parse::<u64>() {
|
||||
Ok(id) => id,
|
||||
Err(_) => {
|
||||
use std::hash::{Hash, Hasher};
|
||||
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
||||
point.id.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
};
|
||||
if let Err(e) = qdrant
|
||||
.upsert_vector_to_collection(
|
||||
&traces_coll,
|
||||
point_id,
|
||||
vector,
|
||||
Some(payload_val),
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!("Failed to checkin trace vector {}: {}", point.id, e);
|
||||
} else {
|
||||
vectors_moved += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to scroll Qdrant workspace for {}: {}", file_uuid, e);
|
||||
@@ -297,10 +261,9 @@ pub async fn checkout(db: &PostgresDb, file_uuid: &str) -> Result<CheckoutResult
|
||||
let prefix = crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':');
|
||||
let traces_coll = format!("{}_traces", prefix);
|
||||
let voice_coll = format!("{}_voice", file_uuid);
|
||||
|
||||
for coll in &[traces_coll, voice_coll] {
|
||||
for coll in &[voice_coll] {
|
||||
if let Err(e) = QdrantDb::delete_by_uuid_from_collection(
|
||||
&qdrant.client,
|
||||
&qdrant.base_url,
|
||||
|
||||
@@ -1,950 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
pub struct FaceEmbeddingDb {
|
||||
client: Client,
|
||||
base_url: String,
|
||||
api_key: String,
|
||||
collection_name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FaceEmbeddingPayload {
|
||||
pub file_uuid: String,
|
||||
pub trace_id: i32,
|
||||
pub frame: i64,
|
||||
pub bbox_x: f64,
|
||||
pub bbox_y: f64,
|
||||
pub bbox_w: f64,
|
||||
pub bbox_h: f64,
|
||||
pub confidence: f64,
|
||||
pub yaw: f64,
|
||||
pub pitch: f64,
|
||||
pub roll: f64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub identity_uuid: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub identity_ref: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub stranger_ref: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none", rename = "type")]
|
||||
pub r#type: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct FaceEmbeddingPoint {
|
||||
pub id: String,
|
||||
pub vector: Vec<f32>,
|
||||
pub payload: FaceEmbeddingPayload,
|
||||
pub score: f64,
|
||||
}
|
||||
|
||||
impl FaceEmbeddingDb {
|
||||
pub fn new() -> Self {
|
||||
let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
|
||||
let collection_name = format!("{}_face_embeddings", schema);
|
||||
|
||||
let base_url =
|
||||
std::env::var("QDRANT_URL").unwrap_or_else(|_| "http://localhost:6333".to_string());
|
||||
let api_key = std::env::var("QDRANT_API_KEY")
|
||||
.unwrap_or_else(|_| "Test3200Test3200Test3200".to_string());
|
||||
|
||||
Self {
|
||||
client: Client::new(),
|
||||
base_url,
|
||||
api_key,
|
||||
collection_name,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn init_collection(&self) -> Result<()> {
|
||||
let url = format!("{}/collections/{}", self.base_url, self.collection_name);
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.get(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if response.status().is_success() {
|
||||
tracing::info!(
|
||||
"[FaceEmbedding] Collection {} already exists",
|
||||
self.collection_name
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let create_url = format!("{}/collections/{}", self.base_url, self.collection_name);
|
||||
let body = serde_json::json!({
|
||||
"vectors": {
|
||||
"size": 512,
|
||||
"distance": "Cosine"
|
||||
}
|
||||
});
|
||||
|
||||
self.client
|
||||
.put(&create_url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to create face embeddings collection")?;
|
||||
|
||||
tracing::info!(
|
||||
"[FaceEmbedding] Created collection {} (dim=512)",
|
||||
self.collection_name
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn upsert_embedding(
|
||||
&self,
|
||||
point_id: &str,
|
||||
embedding: &[f32],
|
||||
payload: &FaceEmbeddingPayload,
|
||||
) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points?wait=true",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"points": [{
|
||||
"id": point_id,
|
||||
"vector": embedding,
|
||||
"payload": payload
|
||||
}]
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.put(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to upsert face embedding")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Qdrant upsert failed: {}", text);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn batch_upsert(
|
||||
&self,
|
||||
points: Vec<(String, Vec<f32>, FaceEmbeddingPayload)>,
|
||||
) -> Result<usize> {
|
||||
if points.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let url = format!(
|
||||
"{}/collections/{}/points?wait=true",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"points": points.iter().map(|(id, vec, payload)| {
|
||||
// Parse id as u64 for Qdrant (requires integer or UUID)
|
||||
let id_num: u64 = id.parse().unwrap_or(0);
|
||||
serde_json::json!({
|
||||
"id": id_num,
|
||||
"vector": vec,
|
||||
"payload": payload
|
||||
})
|
||||
}).collect::<Vec<_>>()
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.put(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to batch upsert face embeddings")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Qdrant batch upsert failed (HTTP {}): {}", status, text);
|
||||
}
|
||||
|
||||
Ok(points.len())
|
||||
}
|
||||
|
||||
pub async fn update_identity_by_trace(
|
||||
&self,
|
||||
file_uuid: &str,
|
||||
trace_id: i32,
|
||||
identity_uuid: &str,
|
||||
) -> Result<usize> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "file_uuid",
|
||||
"match": { "value": file_uuid }
|
||||
},
|
||||
{
|
||||
"key": "trace_id",
|
||||
"match": { "value": trace_id }
|
||||
}
|
||||
]
|
||||
},
|
||||
"payload": {
|
||||
"identity_uuid": identity_uuid
|
||||
}
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to update identity_uuid in Qdrant")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Qdrant identity update failed: {}", text);
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[FaceEmbedding] Updated identity_uuid={} for file={}, trace={}",
|
||||
identity_uuid, file_uuid, trace_id
|
||||
);
|
||||
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
pub async fn clear_identity_by_trace(
|
||||
&self,
|
||||
file_uuid: &str,
|
||||
trace_id: i32,
|
||||
) -> Result<usize> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "file_uuid",
|
||||
"match": { "value": file_uuid }
|
||||
},
|
||||
{
|
||||
"key": "trace_id",
|
||||
"match": { "value": trace_id }
|
||||
}
|
||||
]
|
||||
},
|
||||
"payload": {
|
||||
"identity_uuid": null
|
||||
}
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to clear identity_uuid in Qdrant")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Qdrant identity clear failed: {}", text);
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[FaceEmbedding] Cleared identity_uuid for file={}, trace={}",
|
||||
file_uuid, trace_id
|
||||
);
|
||||
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
pub async fn search_similar(
|
||||
&self,
|
||||
query_embedding: &[f32],
|
||||
file_uuid: Option<&str>,
|
||||
limit: usize,
|
||||
threshold: f64,
|
||||
) -> Result<Vec<FaceEmbeddingPoint>> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points/search",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let mut filter = serde_json::json!({});
|
||||
if let Some(fu) = file_uuid {
|
||||
filter = serde_json::json!({
|
||||
"must": [{
|
||||
"key": "file_uuid",
|
||||
"match": { "value": fu }
|
||||
}]
|
||||
});
|
||||
}
|
||||
|
||||
let body = serde_json::json!({
|
||||
"vector": query_embedding,
|
||||
"limit": limit,
|
||||
"with_payload": true,
|
||||
"with_vector": false,
|
||||
"filter": filter
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to search face embeddings")?;
|
||||
|
||||
let status = response.status();
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
|
||||
if !status.is_success() {
|
||||
anyhow::bail!("Qdrant search failed: {} - {}", status, text);
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct SearchResult {
|
||||
result: Vec<PointResult>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct PointResult {
|
||||
id: serde_json::Value,
|
||||
score: f64,
|
||||
payload: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
let parsed: SearchResult =
|
||||
serde_json::from_str(&text).context("Failed to parse Qdrant search response")?;
|
||||
|
||||
let results: Vec<FaceEmbeddingPoint> = parsed
|
||||
.result
|
||||
.into_iter()
|
||||
.filter(|r| r.score >= threshold)
|
||||
.map(|r| {
|
||||
let id = match r.id {
|
||||
serde_json::Value::String(s) => s,
|
||||
serde_json::Value::Number(n) => n.to_string(),
|
||||
_ => "unknown".to_string(),
|
||||
};
|
||||
let payload = FaceEmbeddingPayload {
|
||||
file_uuid: r
|
||||
.payload
|
||||
.get("file_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
trace_id: r
|
||||
.payload
|
||||
.get("trace_id")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(0) as i32,
|
||||
frame: r.payload.get("frame").and_then(|v| v.as_i64()).unwrap_or(0),
|
||||
bbox_x: r
|
||||
.payload
|
||||
.get("bbox_x")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
bbox_y: r
|
||||
.payload
|
||||
.get("bbox_y")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
bbox_w: r
|
||||
.payload
|
||||
.get("bbox_w")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
bbox_h: r
|
||||
.payload
|
||||
.get("bbox_h")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
confidence: r
|
||||
.payload
|
||||
.get("confidence")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
yaw: r.payload.get("yaw").and_then(|v| v.as_f64()).unwrap_or(0.0),
|
||||
pitch: r
|
||||
.payload
|
||||
.get("pitch")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
roll: r
|
||||
.payload
|
||||
.get("roll")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
identity_uuid: r
|
||||
.payload
|
||||
.get("identity_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string()),
|
||||
identity_ref: r
|
||||
.payload
|
||||
.get("identity_ref")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string()),
|
||||
stranger_ref: r
|
||||
.payload
|
||||
.get("stranger_ref")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string()),
|
||||
r#type: r
|
||||
.payload
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string()),
|
||||
};
|
||||
FaceEmbeddingPoint {
|
||||
id,
|
||||
vector: vec![], // Not returned with_vector=false
|
||||
payload,
|
||||
score: r.score,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub async fn get_embeddings_by_trace(
|
||||
&self,
|
||||
file_uuid: &str,
|
||||
trace_id: i32,
|
||||
) -> Result<Vec<(String, Vec<f32>)>> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points/scroll",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"limit": 1000,
|
||||
"with_payload": true,
|
||||
"with_vector": true,
|
||||
"filter": {
|
||||
"must": [
|
||||
{"key": "file_uuid", "match": { "value": file_uuid }},
|
||||
{"key": "trace_id", "match": { "value": trace_id }}
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to scroll face embeddings")?;
|
||||
|
||||
let status = response.status();
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
|
||||
if !status.is_success() {
|
||||
anyhow::bail!("Qdrant scroll failed: {} - {}", status, text);
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ScrollResult {
|
||||
result: ScrollPoints,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ScrollPoints {
|
||||
points: Vec<PointResult>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct PointResult {
|
||||
id: serde_json::Value,
|
||||
vector: Vec<f32>,
|
||||
}
|
||||
|
||||
let parsed: ScrollResult =
|
||||
serde_json::from_str(&text).context("Failed to parse Qdrant scroll response")?;
|
||||
|
||||
let results: Vec<(String, Vec<f32>)> = parsed
|
||||
.result
|
||||
.points
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let id = match r.id {
|
||||
serde_json::Value::String(s) => s,
|
||||
serde_json::Value::Number(n) => n.to_string(),
|
||||
_ => "unknown".to_string(),
|
||||
};
|
||||
(id, r.vector)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub async fn get_all_embeddings_for_file(
|
||||
&self,
|
||||
file_uuid: &str,
|
||||
) -> Result<Vec<(String, Vec<f32>, FaceEmbeddingPayload)>> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points/scroll",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"limit": 10000,
|
||||
"with_payload": true,
|
||||
"with_vector": true,
|
||||
"filter": {
|
||||
"must": [
|
||||
{"key": "file_uuid", "match": { "value": file_uuid }}
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to scroll face embeddings")?;
|
||||
|
||||
let status = response.status();
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
|
||||
if !status.is_success() {
|
||||
anyhow::bail!("Qdrant scroll failed: {} - {}", status, text);
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ScrollResult {
|
||||
result: ScrollPoints,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ScrollPoints {
|
||||
points: Vec<PointResult>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct PointResult {
|
||||
id: serde_json::Value,
|
||||
vector: Vec<f32>,
|
||||
payload: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
let parsed: ScrollResult =
|
||||
serde_json::from_str(&text).context("Failed to parse Qdrant scroll response")?;
|
||||
|
||||
let results: Vec<(String, Vec<f32>, FaceEmbeddingPayload)> = parsed
|
||||
.result
|
||||
.points
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let id = match r.id {
|
||||
serde_json::Value::String(s) => s,
|
||||
serde_json::Value::Number(n) => n.to_string(),
|
||||
_ => "unknown".to_string(),
|
||||
};
|
||||
let payload = FaceEmbeddingPayload {
|
||||
file_uuid: r
|
||||
.payload
|
||||
.get("file_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
trace_id: r
|
||||
.payload
|
||||
.get("trace_id")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(0) as i32,
|
||||
frame: r.payload.get("frame").and_then(|v| v.as_i64()).unwrap_or(0),
|
||||
bbox_x: r
|
||||
.payload
|
||||
.get("bbox_x")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
bbox_y: r
|
||||
.payload
|
||||
.get("bbox_y")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
bbox_w: r
|
||||
.payload
|
||||
.get("bbox_w")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
bbox_h: r
|
||||
.payload
|
||||
.get("bbox_h")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
confidence: r
|
||||
.payload
|
||||
.get("confidence")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
yaw: r.payload.get("yaw").and_then(|v| v.as_f64()).unwrap_or(0.0),
|
||||
pitch: r
|
||||
.payload
|
||||
.get("pitch")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
roll: r
|
||||
.payload
|
||||
.get("roll")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
identity_uuid: r
|
||||
.payload
|
||||
.get("identity_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string()),
|
||||
identity_ref: r
|
||||
.payload
|
||||
.get("identity_ref")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string()),
|
||||
stranger_ref: r
|
||||
.payload
|
||||
.get("stranger_ref")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string()),
|
||||
r#type: r
|
||||
.payload
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string()),
|
||||
};
|
||||
(id, r.vector, payload)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub async fn delete_file_embeddings(&self, file_uuid: &str) -> Result<usize> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points/delete?wait=true",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"filter": {
|
||||
"must": [
|
||||
{"key": "file_uuid", "match": { "value": file_uuid }}
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to delete face embeddings")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Qdrant delete failed: {}", text);
|
||||
}
|
||||
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
pub async fn upsert_seed_embedding(
|
||||
&self,
|
||||
identity_uuid: &str,
|
||||
identity_name: &str,
|
||||
tmdb_id: i32,
|
||||
embedding: &[f32],
|
||||
) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points?wait=true",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let point_id = identity_uuid.to_string();
|
||||
let payload = serde_json::json!({
|
||||
"file_uuid": "",
|
||||
"trace_id": 0,
|
||||
"frame": 0,
|
||||
"bbox_x": 0.0,
|
||||
"bbox_y": 0.0,
|
||||
"bbox_w": 0.0,
|
||||
"bbox_h": 0.0,
|
||||
"confidence": 0.0,
|
||||
"yaw": 0.0,
|
||||
"pitch": 0.0,
|
||||
"roll": 0.0,
|
||||
"identity_uuid": identity_uuid,
|
||||
"identity_ref": serde_json::Value::Null,
|
||||
"stranger_ref": serde_json::Value::Null,
|
||||
"identity_name": identity_name,
|
||||
"tmdb_id": tmdb_id,
|
||||
"type": "identity_seed",
|
||||
});
|
||||
|
||||
let body = serde_json::json!({
|
||||
"points": [{
|
||||
"id": point_id,
|
||||
"vector": embedding,
|
||||
"payload": payload
|
||||
}]
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.put(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to upsert seed embedding")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Qdrant seed upsert failed: {}", text);
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[SeedEmbedding] Stored seed for identity_uuid={}, name={}",
|
||||
identity_uuid, identity_name
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_seed_embeddings(
|
||||
&self,
|
||||
) -> Result<Vec<(String, String, Vec<f32>)>> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points/scroll",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"limit": 10000,
|
||||
"with_payload": true,
|
||||
"with_vector": true,
|
||||
"filter": {
|
||||
"must": [
|
||||
{"key": "type", "match": { "value": "identity_seed" }}
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to scroll seed embeddings")?;
|
||||
|
||||
let status = response.status();
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
|
||||
if !status.is_success() {
|
||||
anyhow::bail!("Qdrant scroll failed: {} - {}", status, text);
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ScrollResult {
|
||||
result: ScrollPoints,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ScrollPoints {
|
||||
points: Vec<PointResult>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct PointResult {
|
||||
id: serde_json::Value,
|
||||
vector: Vec<f32>,
|
||||
payload: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
let parsed: ScrollResult =
|
||||
serde_json::from_str(&text).context("Failed to parse Qdrant scroll response")?;
|
||||
|
||||
let results: Vec<(String, String, Vec<f32>)> = parsed
|
||||
.result
|
||||
.points
|
||||
.into_iter()
|
||||
.filter_map(|r| {
|
||||
let identity_uuid = r
|
||||
.payload
|
||||
.get("identity_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let identity_name = r
|
||||
.payload
|
||||
.get("identity_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
if identity_uuid.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some((identity_uuid, identity_name, r.vector))
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub async fn update_identity_ref_by_trace(
|
||||
&self,
|
||||
file_uuid: &str,
|
||||
trace_id: i32,
|
||||
identity_ref: &str,
|
||||
) -> Result<usize> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points/payload",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "file_uuid",
|
||||
"match": { "value": file_uuid }
|
||||
},
|
||||
{
|
||||
"key": "trace_id",
|
||||
"match": { "value": trace_id }
|
||||
}
|
||||
]
|
||||
},
|
||||
"payload": {
|
||||
"identity_ref": identity_ref
|
||||
}
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to update identity_ref in Qdrant")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Qdrant identity_ref update failed: {}", text);
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[FaceEmbedding] Updated identity_ref={} for file={}, trace={}",
|
||||
identity_ref, file_uuid, trace_id
|
||||
);
|
||||
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
pub async fn update_stranger_ref_by_trace(
|
||||
&self,
|
||||
file_uuid: &str,
|
||||
trace_id: i32,
|
||||
stranger_ref: &str,
|
||||
) -> Result<usize> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points/payload",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "file_uuid",
|
||||
"match": { "value": file_uuid }
|
||||
},
|
||||
{
|
||||
"key": "trace_id",
|
||||
"match": { "value": trace_id }
|
||||
}
|
||||
]
|
||||
},
|
||||
"payload": {
|
||||
"stranger_ref": stranger_ref
|
||||
}
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to update stranger_ref in Qdrant")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Qdrant stranger_ref update failed: {}", text);
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[FaceEmbedding] Updated stranger_ref={} for file={}, trace={}",
|
||||
stranger_ref, file_uuid, trace_id
|
||||
);
|
||||
|
||||
Ok(1)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for FaceEmbeddingDb {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
@@ -32,14 +32,12 @@ pub trait VectorStore: Send + Sync {
|
||||
async fn search(&self, query_vector: &[f32], limit: usize) -> Result<Vec<SearchResult>>;
|
||||
}
|
||||
|
||||
pub mod face_embedding_db;
|
||||
pub mod identity_merge_history;
|
||||
pub mod mongodb_db;
|
||||
pub mod postgres_db;
|
||||
pub mod qdrant_db;
|
||||
pub mod redis_client;
|
||||
pub mod redis_db;
|
||||
pub use face_embedding_db::{FaceEmbeddingDb, FaceEmbeddingPayload, FaceEmbeddingPoint};
|
||||
pub use identity_merge_history::{
|
||||
AliasEntry, FacesTransferred, IdentityMergeHistory, IdentityMergeHistoryStore,
|
||||
IdentitySnapshot, MergeHistoryEntry, MergeHistoryQuery, MergeParams, TargetIdentitySnapshot,
|
||||
|
||||
+17
-92
@@ -448,10 +448,7 @@ pub enum ProcessorType {
|
||||
Hand,
|
||||
Asrx,
|
||||
Scene,
|
||||
Story,
|
||||
FiveW1H,
|
||||
Appearance,
|
||||
MediaPipe,
|
||||
FaceCluster,
|
||||
}
|
||||
|
||||
@@ -488,10 +485,7 @@ impl ProcessorType {
|
||||
ProcessorType::Hand => "hand",
|
||||
ProcessorType::Asrx => "asrx",
|
||||
ProcessorType::Scene => "scene",
|
||||
ProcessorType::Story => "story",
|
||||
ProcessorType::FiveW1H => "5w1h",
|
||||
ProcessorType::Appearance => "appearance",
|
||||
ProcessorType::MediaPipe => "mediapipe",
|
||||
ProcessorType::FaceCluster => "face_cluster",
|
||||
}
|
||||
}
|
||||
@@ -507,10 +501,7 @@ impl ProcessorType {
|
||||
"hand" => Some(ProcessorType::Hand),
|
||||
"asrx" => Some(ProcessorType::Asrx),
|
||||
"scene" => Some(ProcessorType::Scene),
|
||||
"story" => Some(ProcessorType::Story),
|
||||
"5w1h" => Some(ProcessorType::FiveW1H),
|
||||
"appearance" => Some(ProcessorType::Appearance),
|
||||
"mediapipe" => Some(ProcessorType::MediaPipe),
|
||||
"face_cluster" => Some(ProcessorType::FaceCluster),
|
||||
_ => None,
|
||||
}
|
||||
@@ -527,10 +518,7 @@ impl ProcessorType {
|
||||
ProcessorType::Hand => 0.4,
|
||||
ProcessorType::Asrx => 0.8,
|
||||
ProcessorType::Scene => 0.3,
|
||||
ProcessorType::Story => 0.1,
|
||||
ProcessorType::FiveW1H => 0.1,
|
||||
ProcessorType::Appearance => 0.3,
|
||||
ProcessorType::MediaPipe => 0.3,
|
||||
ProcessorType::FaceCluster => 0.7,
|
||||
}
|
||||
}
|
||||
@@ -538,7 +526,6 @@ impl ProcessorType {
|
||||
pub fn uses_gpu(&self) -> bool {
|
||||
match self {
|
||||
ProcessorType::Yolo | ProcessorType::Face | ProcessorType::Pose | ProcessorType::Hand => true,
|
||||
ProcessorType::MediaPipe | ProcessorType::FaceCluster => false,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
@@ -554,10 +541,7 @@ impl ProcessorType {
|
||||
ProcessorType::Hand => 1024,
|
||||
ProcessorType::Asrx => 2048,
|
||||
ProcessorType::Scene => 512,
|
||||
ProcessorType::Story => 256,
|
||||
ProcessorType::FiveW1H => 256,
|
||||
ProcessorType::Appearance => 512,
|
||||
ProcessorType::MediaPipe => 1024,
|
||||
ProcessorType::FaceCluster => 1024,
|
||||
}
|
||||
}
|
||||
@@ -573,10 +557,7 @@ impl ProcessorType {
|
||||
ProcessorType::Hand => Some("vision/hand_pose"),
|
||||
ProcessorType::Asrx => Some("speechbrain/ecapa-tdnn"),
|
||||
ProcessorType::Scene => Some("places365"),
|
||||
ProcessorType::Story => None,
|
||||
ProcessorType::FiveW1H => Some("gemma4"),
|
||||
ProcessorType::Appearance => None,
|
||||
ProcessorType::MediaPipe => Some("mediapipe/holistic"),
|
||||
ProcessorType::FaceCluster => Some("sklearn/agglomerative"),
|
||||
}
|
||||
}
|
||||
@@ -585,17 +566,8 @@ impl ProcessorType {
|
||||
match self {
|
||||
ProcessorType::Asrx => vec![ProcessorType::Cut, ProcessorType::Asr],
|
||||
ProcessorType::Scene => vec![ProcessorType::Cut],
|
||||
ProcessorType::Story => vec![
|
||||
ProcessorType::Asrx,
|
||||
ProcessorType::Cut,
|
||||
ProcessorType::Yolo,
|
||||
ProcessorType::Face,
|
||||
],
|
||||
ProcessorType::FiveW1H => vec![ProcessorType::Story],
|
||||
ProcessorType::Appearance => vec![ProcessorType::Pose],
|
||||
ProcessorType::FaceCluster => vec![ProcessorType::Face],
|
||||
ProcessorType::Hand => vec![],
|
||||
ProcessorType::MediaPipe => vec![],
|
||||
_ => vec![],
|
||||
}
|
||||
}
|
||||
@@ -623,15 +595,12 @@ impl ProcessorType {
|
||||
| ProcessorType::Pose
|
||||
| ProcessorType::Hand
|
||||
| ProcessorType::Appearance
|
||||
| ProcessorType::MediaPipe
|
||||
| ProcessorType::FaceCluster => PipelineType::Frame,
|
||||
|
||||
ProcessorType::Cut
|
||||
| ProcessorType::Asr
|
||||
| ProcessorType::Asrx
|
||||
| ProcessorType::Scene
|
||||
| ProcessorType::Story
|
||||
| ProcessorType::FiveW1H => PipelineType::Time,
|
||||
| ProcessorType::Scene => PipelineType::Time,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2612,76 +2581,32 @@ sqlx::query(
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Face clustering: group unregistered faces within same trace by embedding similarity
|
||||
/// Face clustering: disabled - embedding column no longer used
|
||||
pub async fn cluster_face_embeddings(
|
||||
&self,
|
||||
file_uuid: &str,
|
||||
similarity_threshold: f64,
|
||||
_similarity_threshold: f64,
|
||||
) -> Result<Vec<FaceClusterGroup>> {
|
||||
let table = schema::table_name("face_detections");
|
||||
let rows = sqlx::query_as::<_, (String, i64)>(&format!(
|
||||
r#"
|
||||
SELECT trace_id::text, COUNT(DISTINCT frame_number) as frame_count
|
||||
FROM {}
|
||||
WHERE file_uuid = $1
|
||||
AND embedding IS NOT NULL
|
||||
AND identity_id IS NULL
|
||||
GROUP BY trace_id
|
||||
ORDER BY frame_count DESC
|
||||
"#,
|
||||
table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(&self.pool)
|
||||
.await?;
|
||||
|
||||
Ok(rows
|
||||
.into_iter()
|
||||
.map(|(trace_id, frame_count)| FaceClusterGroup {
|
||||
trace_id,
|
||||
frame_count: frame_count as i32,
|
||||
})
|
||||
.collect())
|
||||
tracing::warn!(
|
||||
"[cluster_face_embeddings] Disabled - embedding column removed for {}",
|
||||
file_uuid
|
||||
);
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
/// Search similar faces by embedding via pgvector cosine distance
|
||||
/// Search similar faces: disabled - embedding column no longer used
|
||||
pub async fn search_similar_faces(
|
||||
&self,
|
||||
query_embedding: &[f32],
|
||||
_query_embedding: &[f32],
|
||||
file_uuid: &str,
|
||||
limit: i64,
|
||||
threshold: f64,
|
||||
_limit: i64,
|
||||
_threshold: f64,
|
||||
) -> Result<Vec<SimilarFaceResult>> {
|
||||
let table = schema::table_name("face_detections");
|
||||
let rows = sqlx::query_as::<_, (i32, i32, f64)>(&format!(
|
||||
r#"
|
||||
SELECT id, trace_id,
|
||||
1 - (embedding::vector <=> $1::vector) as similarity
|
||||
FROM {}
|
||||
WHERE file_uuid = $2
|
||||
AND embedding IS NOT NULL
|
||||
AND 1 - (embedding::vector <=> $1::vector) >= $3
|
||||
ORDER BY embedding::vector <=> $1::vector
|
||||
LIMIT $4
|
||||
"#,
|
||||
table
|
||||
))
|
||||
.bind(query_embedding)
|
||||
.bind(file_uuid)
|
||||
.bind(threshold)
|
||||
.bind(limit)
|
||||
.fetch_all(&self.pool)
|
||||
.await?;
|
||||
|
||||
Ok(rows
|
||||
.into_iter()
|
||||
.map(|(id, trace_id, similarity)| SimilarFaceResult {
|
||||
id,
|
||||
trace_id,
|
||||
similarity,
|
||||
bbox: String::new(),
|
||||
})
|
||||
.collect())
|
||||
tracing::warn!(
|
||||
"[search_similar_faces] Disabled - embedding column removed for {}",
|
||||
file_uuid
|
||||
);
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
// ==========================================
|
||||
|
||||
@@ -768,45 +768,6 @@ impl QdrantDb {
|
||||
Ok(result.result.points_count)
|
||||
}
|
||||
|
||||
/// Store face embedding with trace_id + frame_number payload
|
||||
pub async fn upsert_face_embedding(
|
||||
&self,
|
||||
point_id: u64,
|
||||
vector: &[f32],
|
||||
file_uuid: &str,
|
||||
trace_id: i32,
|
||||
frame_number: i64,
|
||||
) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points?wait=true",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
let mut payload_map = std::collections::HashMap::new();
|
||||
payload_map.insert("file_uuid".to_string(), serde_json::json!(file_uuid));
|
||||
payload_map.insert("trace_id".to_string(), serde_json::json!(trace_id));
|
||||
payload_map.insert("frame_number".to_string(), serde_json::json!(frame_number));
|
||||
payload_map.insert("type".to_string(), serde_json::json!("face_embedding"));
|
||||
|
||||
let point = serde_json::json!({
|
||||
"points": [{
|
||||
"id": point_id,
|
||||
"vector": vector,
|
||||
"payload": payload_map
|
||||
}]
|
||||
});
|
||||
let resp = self
|
||||
.client
|
||||
.put(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.json(&point)
|
||||
.send()
|
||||
.await?;
|
||||
if !resp.status().is_success() {
|
||||
anyhow::bail!("Qdrant upsert face failed: {}", resp.status());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Store chunk embedding with parent-child metadata
|
||||
pub async fn upsert_chunk_embedding(
|
||||
&self,
|
||||
@@ -883,113 +844,3 @@ impl VectorStore for QdrantDb {
|
||||
self.search(query_vector, limit).await
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
|
||||
use crate::core::config::DATABASE_URL;
|
||||
use sqlx::Row;
|
||||
|
||||
let pool = sqlx::PgPool::connect(&DATABASE_URL).await?;
|
||||
let table = crate::core::db::schema::table_name("face_detections");
|
||||
let qdrant = QdrantDb::new();
|
||||
|
||||
let collection = format!(
|
||||
"{}_traces",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':')
|
||||
);
|
||||
qdrant.ensure_collection(&collection, 512).await?;
|
||||
|
||||
// Read all face_detections with embeddings, grouped by trace_id in Rust
|
||||
let rows = sqlx::query(&format!(
|
||||
"SELECT trace_id, embedding FROM {} \
|
||||
WHERE file_uuid = $1 AND embedding IS NOT NULL AND trace_id IS NOT NULL \
|
||||
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
|
||||
table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(&pool)
|
||||
.await?;
|
||||
|
||||
let mut trace_faces: std::collections::HashMap<i32, Vec<Vec<f32>>> =
|
||||
std::collections::HashMap::new();
|
||||
let mut trace_stats: std::collections::HashMap<i32, (i64, i64, i64)> =
|
||||
std::collections::HashMap::new(); // (count, min_frame, max_frame)
|
||||
|
||||
for row in &rows {
|
||||
let tid: Option<i32> = row.get(0);
|
||||
let emb: Option<Vec<f32>> = row.get(1);
|
||||
if let (Some(tid), Some(emb)) = (tid, emb) {
|
||||
trace_faces.entry(tid).or_default().push(emb);
|
||||
let entry = trace_stats.entry(tid).or_insert((0, i64::MAX, i64::MIN));
|
||||
entry.0 += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute average embedding per trace
|
||||
struct AvgTrace {
|
||||
tid: i32,
|
||||
avg_emb: Vec<f32>,
|
||||
frame_count: i64,
|
||||
}
|
||||
|
||||
let mut trace_avgs: Vec<AvgTrace> = Vec::new();
|
||||
|
||||
for (&tid, faces) in &trace_faces {
|
||||
let dim = faces[0].len();
|
||||
let mut avg = vec![0.0f32; dim];
|
||||
for face in faces {
|
||||
for (i, &v) in face.iter().enumerate() {
|
||||
avg[i] += v;
|
||||
}
|
||||
}
|
||||
let n = faces.len() as f32;
|
||||
for v in &mut avg {
|
||||
*v /= n;
|
||||
}
|
||||
|
||||
let stats = trace_stats.get(&tid).unwrap_or(&(0, 0, 0));
|
||||
trace_avgs.push(AvgTrace {
|
||||
tid,
|
||||
avg_emb: avg,
|
||||
frame_count: stats.0,
|
||||
});
|
||||
}
|
||||
|
||||
// Push to Qdrant in batches
|
||||
// Point ID: hash(file_uuid + trace_id) for global uniqueness
|
||||
for chunk in trace_avgs.chunks(500) {
|
||||
let batch: Vec<(u64, &[f32], Option<serde_json::Value>)> = chunk
|
||||
.iter()
|
||||
.map(|t| {
|
||||
let point_id = {
|
||||
use sha2::{Digest, Sha256};
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(file_uuid.as_bytes());
|
||||
hasher.update(b"_");
|
||||
hasher.update(t.tid.to_string().as_bytes());
|
||||
let hash = hasher.finalize();
|
||||
u64::from_be_bytes(hash[0..8].try_into().unwrap())
|
||||
};
|
||||
(
|
||||
point_id,
|
||||
t.avg_emb.as_slice(),
|
||||
Some(serde_json::json!({
|
||||
"trace_id": t.tid,
|
||||
"file_uuid": file_uuid,
|
||||
"frame_count": t.frame_count,
|
||||
"source": "trace",
|
||||
})),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
qdrant.upsert_vectors_batch(&collection, &batch).await?;
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Synced {} trace embeddings to Qdrant for {}",
|
||||
trace_faces.len(),
|
||||
file_uuid
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -187,34 +187,13 @@ impl QdrantWorkspace {
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn upsert_face_embedding(
|
||||
&self,
|
||||
point_id: u64,
|
||||
vector: &[f32],
|
||||
file_uuid: &str,
|
||||
trace_id: i32,
|
||||
frame_number: i64,
|
||||
) -> Result<()> {
|
||||
let payload = serde_json::json!({
|
||||
"file_uuid": file_uuid,
|
||||
"trace_id": trace_id,
|
||||
"frame_number": frame_number,
|
||||
"type": "face_embedding",
|
||||
});
|
||||
self.upsert_vector(&self.traces_collection(), point_id, vector, Some(payload))
|
||||
.await
|
||||
}
|
||||
|
||||
/// Scroll all points for a file from all workspace collections.
|
||||
/// Used during checkin to read vectors before moving to production.
|
||||
pub async fn scroll_by_file_uuid(&self, file_uuid: &str) -> Result<WorkspaceScrollResult> {
|
||||
let chunks = self
|
||||
.scroll_collection(&self.chunks_collection(), file_uuid)
|
||||
.await?;
|
||||
let traces = self
|
||||
.scroll_collection(&self.traces_collection(), file_uuid)
|
||||
.await?;
|
||||
Ok(WorkspaceScrollResult { chunks, traces })
|
||||
Ok(WorkspaceScrollResult { chunks, traces: Vec::new() })
|
||||
}
|
||||
|
||||
async fn scroll_collection(
|
||||
|
||||
+5
-32
@@ -1,7 +1,7 @@
|
||||
use anyhow::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
use tracing::{debug, error, warn};
|
||||
use tracing::{debug, error};
|
||||
|
||||
use crate::core::config;
|
||||
use crate::core::llm::function_calling::LLM_CLIENT;
|
||||
@@ -31,44 +31,17 @@ struct Choice {
|
||||
message: ChatMessage,
|
||||
}
|
||||
|
||||
/// Generates a 5W1H+ summary for a given scene context.
|
||||
/// Context should include the combined text of all sentences in the scene.
|
||||
pub async fn generate_5w1h_summary(scene_text: &str) -> Result<String> {
|
||||
if !*config::llm::SUMMARY_ENABLED {
|
||||
warn!("LLM Summary is disabled via config");
|
||||
return Ok("LLM Disabled".to_string());
|
||||
}
|
||||
|
||||
let prompt = format!(
|
||||
r#"Analyze the following video scene transcript and provide a concise 5W1H+ summary in JSON format.
|
||||
Focus on: Who, What, Where, When, Why, How, and Key Objects/Actions.
|
||||
|
||||
Transcript:
|
||||
"{}"
|
||||
|
||||
Output format:
|
||||
{{
|
||||
"who": "...",
|
||||
"what": "...",
|
||||
"where": "...",
|
||||
"when": "...",
|
||||
"why": "...",
|
||||
"how": "...",
|
||||
"summary": "..."
|
||||
}}"#,
|
||||
scene_text
|
||||
);
|
||||
|
||||
pub async fn ask_llm(prompt: &str, system_prompt: &str) -> Result<String> {
|
||||
let req = ChatRequest {
|
||||
model: (*config::llm::SUMMARY_MODEL).clone(),
|
||||
messages: vec![
|
||||
ChatMessage {
|
||||
role: "system".to_string(),
|
||||
content: "You are an expert video analyst assistant.".to_string(),
|
||||
content: system_prompt.to_string(),
|
||||
},
|
||||
ChatMessage {
|
||||
role: "user".to_string(),
|
||||
content: prompt,
|
||||
content: prompt.to_string(),
|
||||
},
|
||||
],
|
||||
temperature: 0.1,
|
||||
@@ -76,7 +49,7 @@ pub async fn generate_5w1h_summary(scene_text: &str) -> Result<String> {
|
||||
stream: false,
|
||||
};
|
||||
|
||||
debug!("Calling LLM for summary: {}", *config::llm::SUMMARY_URL);
|
||||
debug!("Calling LLM: {}", *config::llm::SUMMARY_URL);
|
||||
|
||||
let res = LLM_CLIENT
|
||||
.post(&*config::llm::SUMMARY_URL)
|
||||
|
||||
@@ -71,6 +71,7 @@ pub struct BindIdentityRequest {
|
||||
pub file_uuid: String,
|
||||
pub face_id: Option<String>,
|
||||
pub id: Option<i64>,
|
||||
pub expand_to_trace: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
|
||||
@@ -103,6 +103,7 @@ mod tests {
|
||||
confidence: 0.95,
|
||||
embedding: Some(vec![0.1, 0.2, 0.3]),
|
||||
landmarks: Some(serde_json::json!([[10.0, 20.0], [30.0, 40.0]])),
|
||||
pose_angle: None,
|
||||
attributes: Some(FaceAttributes {
|
||||
age: Some(30),
|
||||
gender: Some("male".to_string()),
|
||||
@@ -174,6 +175,7 @@ mod tests {
|
||||
confidence: 0.5,
|
||||
embedding: None,
|
||||
landmarks: None,
|
||||
pose_angle: None,
|
||||
attributes: None,
|
||||
};
|
||||
assert!(face.confidence >= 0.0 && face.confidence <= 1.0);
|
||||
@@ -190,6 +192,7 @@ mod tests {
|
||||
confidence: 0.95,
|
||||
embedding: Some(vec![0.1; 512]),
|
||||
landmarks: None,
|
||||
pose_angle: None,
|
||||
attributes: Some(FaceAttributes {
|
||||
age: Some(35),
|
||||
gender: Some("male".to_string()),
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
|
||||
use super::executor::PythonExecutor;
|
||||
|
||||
const MEDIAPIPE_TIMEOUT: Duration = Duration::from_secs(7200);
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct MediaPipeResult {
|
||||
pub frame_count: u64,
|
||||
pub fps: f64,
|
||||
pub frames: Vec<MediaPipeFrame>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct MediaPipeFrame {
|
||||
pub frame: u64,
|
||||
pub timestamp: f64,
|
||||
pub persons: Vec<MediaPipePerson>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct MediaPipePerson {
|
||||
pub person_id: u64,
|
||||
pub pose: Option<MediaPipePose>,
|
||||
pub left_hand: Option<MediaPipeHand>,
|
||||
pub right_hand: Option<MediaPipeHand>,
|
||||
pub face_mesh: Option<MediaPipeFaceMesh>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct MediaPipePose {
|
||||
pub landmarks: Vec<Vec<f64>>,
|
||||
pub keypoints_33: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct MediaPipeHand {
|
||||
pub landmarks: Vec<Vec<f64>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct MediaPipeFaceMesh {
|
||||
pub landmarks: Vec<Vec<f64>>,
|
||||
}
|
||||
|
||||
pub async fn process_mediapipe(
|
||||
video_path: &str,
|
||||
output_path: &str,
|
||||
uuid: Option<&str>,
|
||||
) -> Result<MediaPipeResult> {
|
||||
// If mediapipe.json already exists (written by face_processor), skip
|
||||
if std::path::Path::new(output_path).exists() {
|
||||
let json_str = std::fs::read_to_string(output_path).context("Failed to read MEDIAPIPE output")?;
|
||||
let result: MediaPipeResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse MEDIAPIPE output")?;
|
||||
tracing::info!("[MEDIAPIPE] Skipping (already exists): {} frames", result.frames.len());
|
||||
return Ok(result);
|
||||
}
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_name = "mediapipe_processor_v1.11.py";
|
||||
let script_path = executor.script_path(script_name);
|
||||
|
||||
tracing::info!("[MEDIAPIPE] Starting MediaPipe Holistic: {}", video_path);
|
||||
|
||||
if !script_path.exists() {
|
||||
tracing::warn!("[MEDIAPIPE] Script not found, returning empty result");
|
||||
return Ok(MediaPipeResult {
|
||||
frame_count: 0,
|
||||
fps: 0.0,
|
||||
frames: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
executor
|
||||
.run(
|
||||
script_name,
|
||||
&[video_path, output_path],
|
||||
uuid,
|
||||
"MEDIAPIPE",
|
||||
Some(MEDIAPIPE_TIMEOUT),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to run {:?}", script_path))?;
|
||||
|
||||
let json_str =
|
||||
std::fs::read_to_string(output_path).context("Failed to read MEDIAPIPE output")?;
|
||||
|
||||
let result: MediaPipeResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse MEDIAPIPE output")?;
|
||||
|
||||
tracing::info!("[MEDIAPIPE] Result: {} frames", result.frames.len());
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
@@ -1,203 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
use tokio::process::Command;
|
||||
use tokio::time::timeout;
|
||||
|
||||
use super::executor::PythonExecutor;
|
||||
|
||||
const MEDIAPIPE_TIMEOUT: Duration = Duration::from_secs(7200);
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct MediaPipeResult {
|
||||
pub metadata: MediaPipeMetadata,
|
||||
pub frames: HashMap<String, MediaPipeDictEntry>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct MediaPipeMetadata {
|
||||
pub fps: f64,
|
||||
pub total_frames: i64,
|
||||
pub processed_frames: i64,
|
||||
pub sample_interval: i64,
|
||||
pub width: i64,
|
||||
pub height: i64,
|
||||
pub processor: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct MediaPipeDictEntry {
|
||||
pub frame_number: i64,
|
||||
pub timestamp: f64,
|
||||
pub persons: Vec<MediaPipePerson>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct MediaPipePerson {
|
||||
pub person_id: i64,
|
||||
#[serde(default)]
|
||||
pub bbox: Option<MediaPipeBBox>,
|
||||
pub face_mesh: Option<serde_json::Value>,
|
||||
pub pose: Option<serde_json::Value>,
|
||||
pub hands: MediaPipeHands,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct MediaPipeBBox {
|
||||
pub x: i64,
|
||||
pub y: i64,
|
||||
pub width: i64,
|
||||
pub height: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct MediaPipeHands {
|
||||
pub left: Option<serde_json::Value>,
|
||||
pub right: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
pub async fn process_mediapipe_v2(
|
||||
video_path: &str,
|
||||
output_path: &str,
|
||||
uuid: Option<&str>,
|
||||
frames: Option<&[i64]>,
|
||||
) -> Result<MediaPipeResult> {
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("mediapipe_holistic_processor.py");
|
||||
|
||||
tracing::info!("[MEDIAPIPE] Starting MediaPipe Holistic: {}", video_path);
|
||||
|
||||
if !script_path.exists() {
|
||||
anyhow::bail!("mediapipe_holistic_processor.py not found");
|
||||
}
|
||||
|
||||
let mut cmd = Command::new(executor.python_path());
|
||||
cmd.arg(&script_path).arg(video_path).arg(output_path);
|
||||
|
||||
// Use explicit frame list if provided, otherwise calculate sample_interval for ~8Hz
|
||||
if let Some(frames) = frames {
|
||||
let frames_str = frames
|
||||
.iter()
|
||||
.map(|f| f.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(",");
|
||||
cmd.arg("--frames").arg(&frames_str);
|
||||
tracing::info!("[MEDIAPIPE] 8Hz sampling: {} frames", frames.len());
|
||||
} else {
|
||||
let sample_interval = calculate_sample_interval(video_path).await;
|
||||
cmd.arg("--sample-interval")
|
||||
.arg(sample_interval.to_string());
|
||||
}
|
||||
|
||||
if let Some(u) = uuid {
|
||||
cmd.arg("--uuid").arg(u);
|
||||
}
|
||||
|
||||
cmd.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped());
|
||||
|
||||
let child = cmd.spawn().context("Failed to run MEDIAPIPE processor")?;
|
||||
|
||||
let output = match timeout(MEDIAPIPE_TIMEOUT, child.wait_with_output()).await {
|
||||
Ok(Ok(output)) => output,
|
||||
Ok(Err(e)) => return Err(e).context("Failed to run MEDIAPIPE processor"),
|
||||
Err(_) => anyhow::bail!(
|
||||
"MEDIAPIPE processing timed out after {:?}",
|
||||
MEDIAPIPE_TIMEOUT
|
||||
),
|
||||
};
|
||||
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
|
||||
for line in stderr.lines() {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.starts_with("MEDIAPIPE_START") {
|
||||
tracing::info!("[MEDIAPIPE] Loading model...");
|
||||
} else if trimmed.starts_with("MEDIAPIPE_FRAME:") {
|
||||
let count = trimmed.trim_start_matches("MEDIAPIPE_FRAME:");
|
||||
tracing::info!("[MEDIAPIPE] Processed {} frames...", count);
|
||||
} else if trimmed.starts_with("MEDIAPIPE_COMPLETE:") {
|
||||
let count = trimmed.trim_start_matches("MEDIAPIPE_COMPLETE:");
|
||||
tracing::info!("[MEDIAPIPE] Completed! Total: {} frames", count);
|
||||
} else if trimmed.starts_with("MEDIAPIPE_INFO:") {
|
||||
let info = trimmed.trim_start_matches("MEDIAPIPE_INFO:");
|
||||
tracing::info!("[MEDIAPIPE] {}", info);
|
||||
} else if trimmed.starts_with("MEDIAPIPE_ERROR:") {
|
||||
let err = trimmed.trim_start_matches("MEDIAPIPE_ERROR:");
|
||||
tracing::error!("[MEDIAPIPE] {}", err);
|
||||
}
|
||||
}
|
||||
tracing::info!("[MEDIAPIPE] stderr output:\n{}", stderr);
|
||||
|
||||
if !output.status.success() {
|
||||
anyhow::bail!("MEDIAPIPE failed: {}", stderr);
|
||||
}
|
||||
|
||||
let json_str =
|
||||
std::fs::read_to_string(output_path).context("Failed to read MEDIAPIPE output")?;
|
||||
|
||||
let result: MediaPipeResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse MEDIAPIPE output")?;
|
||||
|
||||
tracing::info!("[MEDIAPIPE] Result: {} frames", result.frames.len());
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn calculate_sample_interval(video_path: &str) -> i64 {
|
||||
// Try ffprobe to get FPS, calculate sample_interval for ~8Hz
|
||||
let probe_cmd = Command::new("ffprobe")
|
||||
.args([
|
||||
"-v",
|
||||
"quiet",
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_streams",
|
||||
video_path,
|
||||
])
|
||||
.output()
|
||||
.await;
|
||||
|
||||
if let Ok(output) = probe_cmd {
|
||||
if output.status.success() {
|
||||
if let Ok(json_str) = String::from_utf8(output.stdout) {
|
||||
if let Ok(probe_data) = serde_json::from_str::<serde_json::Value>(&json_str) {
|
||||
if let Some(streams) = probe_data["streams"].as_array() {
|
||||
for stream in streams {
|
||||
if stream["codec_type"] == "video" {
|
||||
if let Some(fps_str) = stream["r_frame_rate"].as_str() {
|
||||
// Parse "30000/1001" style fps
|
||||
if let Some(fps) = parse_fractional_fps(fps_str) {
|
||||
let interval = (fps / 8.0).round() as i64;
|
||||
return interval.max(1);
|
||||
}
|
||||
}
|
||||
if let Some(fps_val) = stream["avg_frame_rate"].as_str() {
|
||||
if let Some(fps) = parse_fractional_fps(fps_val) {
|
||||
let interval = (fps / 8.0).round() as i64;
|
||||
return interval.max(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
4 // Default: assume 30fps / 8 = ~4
|
||||
}
|
||||
|
||||
fn parse_fractional_fps(s: &str) -> Option<f64> {
|
||||
let parts: Vec<&str> = s.split('/').collect();
|
||||
if parts.len() == 2 {
|
||||
let num: f64 = parts[0].parse().ok()?;
|
||||
let den: f64 = parts[1].parse().ok()?;
|
||||
if den > 0.0 {
|
||||
return Some(num / den);
|
||||
}
|
||||
}
|
||||
s.parse::<f64>().ok()
|
||||
}
|
||||
@@ -11,11 +11,9 @@ pub mod face_clustering;
|
||||
pub mod face_recognition;
|
||||
pub mod hand;
|
||||
pub mod heuristic_scene;
|
||||
pub mod mediapipe_v2;
|
||||
pub mod ocr;
|
||||
pub mod pose;
|
||||
pub mod scene_classification;
|
||||
pub mod story;
|
||||
pub mod tkg;
|
||||
pub mod yolo;
|
||||
|
||||
@@ -48,17 +46,12 @@ pub use heuristic_scene::{
|
||||
build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta,
|
||||
SceneSegmentMeta,
|
||||
};
|
||||
pub use mediapipe_v2::{
|
||||
process_mediapipe_v2, MediaPipeBBox, MediaPipeDictEntry, MediaPipeHands, MediaPipeMetadata,
|
||||
MediaPipePerson, MediaPipeResult,
|
||||
};
|
||||
pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText};
|
||||
pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
|
||||
pub use scene_classification::{
|
||||
load_scene_from_file, process_scene_classification, SceneClassificationResult, ScenePrediction,
|
||||
SceneSegment,
|
||||
};
|
||||
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
|
||||
pub use tkg::{
|
||||
build_tkg, query_auto_representative_frame, FrameTraceInfo, MainIdentityInfo,
|
||||
RepresentativeFrameResult, TkgResult,
|
||||
|
||||
@@ -1,690 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::executor::PythonExecutor;
|
||||
|
||||
const STORY_TIMEOUT: Duration = Duration::from_secs(3600);
|
||||
|
||||
// ── Input data structs (from JSON files) ──────────────────────────
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AsrData {
|
||||
segments: Vec<AsrSegmentInput>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AsrSegmentInput {
|
||||
#[serde(default, alias = "start")]
|
||||
start_time: f64,
|
||||
#[serde(default, alias = "end")]
|
||||
end_time: f64,
|
||||
#[serde(default)]
|
||||
text: String,
|
||||
#[serde(default)]
|
||||
confidence: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct CutData {
|
||||
scenes: Vec<CutSceneInput>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct CutSceneInput {
|
||||
scene_number: Option<i64>,
|
||||
#[allow(dead_code)]
|
||||
start_frame: Option<i64>,
|
||||
#[allow(dead_code)]
|
||||
end_frame: Option<i64>,
|
||||
start_time: Option<f64>,
|
||||
end_time: Option<f64>,
|
||||
}
|
||||
|
||||
// ── Output data structs ───────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct StoryResult {
|
||||
pub child_chunks: Vec<StoryChildChunk>,
|
||||
pub parent_chunks: Vec<StoryParentChunk>,
|
||||
pub stats: StoryStats,
|
||||
#[serde(default)]
|
||||
pub metadata: serde_json::Value,
|
||||
#[serde(default)]
|
||||
pub parent_chunk_size: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct StoryStats {
|
||||
pub total_child_chunks: usize,
|
||||
pub total_parent_chunks: usize,
|
||||
pub asr_children: usize,
|
||||
pub cut_children: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct StoryChildChunk {
|
||||
pub chunk_id: String,
|
||||
pub chunk_type: String,
|
||||
pub source: String,
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub text_content: Option<String>,
|
||||
pub content: serde_json::Value,
|
||||
#[serde(default)]
|
||||
pub child_chunk_ids: Vec<String>,
|
||||
pub parent_chunk_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct StoryParentChunk {
|
||||
pub chunk_id: String,
|
||||
pub chunk_type: String,
|
||||
pub source: String,
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
pub text_content: String,
|
||||
pub content: serde_json::Value,
|
||||
#[serde(default)]
|
||||
pub child_chunk_ids: Vec<String>,
|
||||
pub parent_chunk_id: Option<String>,
|
||||
}
|
||||
|
||||
// ── Public API ────────────────────────────────────────────────────
|
||||
|
||||
pub async fn process_story(
|
||||
video_path: &str,
|
||||
output_path: &str,
|
||||
uuid: Option<&str>,
|
||||
) -> Result<StoryResult> {
|
||||
// Try native Rust implementation first
|
||||
let result = try_native_story(video_path, output_path, uuid);
|
||||
if let Ok(r) = result {
|
||||
return Ok(r);
|
||||
}
|
||||
|
||||
// Fallback: Python script
|
||||
tracing::warn!(
|
||||
"[STORY] Native impl failed, falling back to Python: {:?}",
|
||||
result.err()
|
||||
);
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("story_processor.py");
|
||||
|
||||
if !script_path.exists() {
|
||||
return Ok(StoryResult {
|
||||
child_chunks: vec![],
|
||||
parent_chunks: vec![],
|
||||
stats: StoryStats {
|
||||
total_child_chunks: 0,
|
||||
total_parent_chunks: 0,
|
||||
asr_children: 0,
|
||||
cut_children: 0,
|
||||
},
|
||||
metadata: serde_json::json!({}),
|
||||
parent_chunk_size: 5,
|
||||
});
|
||||
}
|
||||
|
||||
executor
|
||||
.run(
|
||||
"story_processor.py",
|
||||
&[video_path, output_path],
|
||||
uuid,
|
||||
"STORY",
|
||||
Some(STORY_TIMEOUT),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to run {:?}", script_path))?;
|
||||
|
||||
let json_str = std::fs::read_to_string(output_path).context("Failed to read STORY output")?;
|
||||
let result: StoryResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse STORY output")?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// ── Native implementation ─────────────────────────────────────────
|
||||
|
||||
fn try_native_story(
|
||||
_video_path: &str,
|
||||
output_path: &str,
|
||||
_uuid: Option<&str>,
|
||||
) -> Result<StoryResult> {
|
||||
let output_dir = Path::new(output_path).parent().unwrap_or(Path::new("."));
|
||||
let basename = Path::new(output_path)
|
||||
.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.and_then(|s| s.split('.').next())
|
||||
.unwrap_or("unknown");
|
||||
|
||||
let asr_path = output_dir.join(format!("{}.asr.json", basename));
|
||||
let cut_path = output_dir.join(format!("{}.cut.json", basename));
|
||||
|
||||
// ASR data is required; CUT is optional
|
||||
let asr_data: AsrData = if asr_path.exists() {
|
||||
let content = std::fs::read_to_string(&asr_path)
|
||||
.with_context(|| format!("Failed to read {:?}", asr_path))?;
|
||||
serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", asr_path))?
|
||||
} else {
|
||||
AsrData { segments: vec![] }
|
||||
};
|
||||
|
||||
let cut_data: CutData = if cut_path.exists() {
|
||||
let content = std::fs::read_to_string(&cut_path)
|
||||
.with_context(|| format!("Failed to read {:?}", cut_path))?;
|
||||
serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", cut_path))?
|
||||
} else {
|
||||
CutData { scenes: vec![] }
|
||||
};
|
||||
|
||||
let parent_chunk_size: usize = 5;
|
||||
|
||||
// ── Build child chunks ────────────────────────────────────────
|
||||
let mut child_chunks: Vec<StoryChildChunk> = Vec::new();
|
||||
|
||||
// ASR child chunks
|
||||
for seg in &asr_data.segments {
|
||||
let chunk_id = format!("asr_{:.1}_{:.1}", seg.start_time, seg.end_time);
|
||||
child_chunks.push(StoryChildChunk {
|
||||
chunk_id,
|
||||
chunk_type: "asr".to_string(),
|
||||
source: "asr".to_string(),
|
||||
start_time: seg.start_time,
|
||||
end_time: seg.end_time,
|
||||
text_content: Some(seg.text.clone()),
|
||||
content: serde_json::json!({
|
||||
"text": seg.text,
|
||||
"confidence": seg.confidence,
|
||||
}),
|
||||
child_chunk_ids: vec![],
|
||||
parent_chunk_id: None,
|
||||
});
|
||||
}
|
||||
|
||||
// CUT child chunks
|
||||
for scene in &cut_data.scenes {
|
||||
let scene_num = scene.scene_number.unwrap_or(0);
|
||||
let start_time = scene.start_time.unwrap_or(0.0);
|
||||
let end_time = scene.end_time.unwrap_or(0.0);
|
||||
let chunk_id = format!("cut_{}", scene_num);
|
||||
child_chunks.push(StoryChildChunk {
|
||||
chunk_id,
|
||||
chunk_type: "cut".to_string(),
|
||||
source: "cut".to_string(),
|
||||
start_time,
|
||||
end_time,
|
||||
text_content: Some(format!("Scene {}", scene_num)),
|
||||
content: serde_json::json!({
|
||||
"scene_number": scene_num,
|
||||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
}),
|
||||
child_chunk_ids: vec![],
|
||||
parent_chunk_id: None,
|
||||
});
|
||||
}
|
||||
|
||||
let asr_child_ids: Vec<String> = child_chunks
|
||||
.iter()
|
||||
.filter(|c| c.source == "asr")
|
||||
.map(|c| c.chunk_id.clone())
|
||||
.collect();
|
||||
|
||||
let cut_child_ids: Vec<String> = child_chunks
|
||||
.iter()
|
||||
.filter(|c| c.source == "cut")
|
||||
.map(|c| c.chunk_id.clone())
|
||||
.collect();
|
||||
|
||||
// ── Build parent chunks from ASR ──────────────────────────────
|
||||
let mut parent_chunks: Vec<StoryParentChunk> = Vec::new();
|
||||
|
||||
for (i, batch) in asr_child_ids.chunks(parent_chunk_size).enumerate() {
|
||||
if batch.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut texts: Vec<String> = Vec::new();
|
||||
let mut times: Vec<(f64, f64)> = Vec::new();
|
||||
|
||||
for child_id in batch {
|
||||
if let Some(child) = child_chunks.iter().find(|c| &c.chunk_id == child_id) {
|
||||
if let Some(ref t) = child.text_content {
|
||||
texts.push(t.clone());
|
||||
}
|
||||
times.push((child.start_time, child.end_time));
|
||||
}
|
||||
}
|
||||
|
||||
let start_time = times.first().map(|t| t.0).unwrap_or(0.0);
|
||||
let end_time = times.last().map(|t| t.1).unwrap_or(0.0);
|
||||
|
||||
let narrative = generate_narrative(&texts, &[], start_time, end_time);
|
||||
|
||||
let chunk_id = format!("story_asr_{:04}", i);
|
||||
parent_chunks.push(StoryParentChunk {
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type: "story".to_string(),
|
||||
source: "story_asr".to_string(),
|
||||
start_time,
|
||||
end_time,
|
||||
text_content: narrative.clone(),
|
||||
content: serde_json::json!({
|
||||
"description": narrative,
|
||||
"child_count": batch.len(),
|
||||
"speech_preview": texts.iter().take(3).cloned().collect::<Vec<_>>().join(" "),
|
||||
}),
|
||||
child_chunk_ids: batch.to_vec(),
|
||||
parent_chunk_id: None,
|
||||
});
|
||||
|
||||
// Link children to parent
|
||||
for child in &mut child_chunks {
|
||||
if batch.contains(&child.chunk_id) {
|
||||
child.parent_chunk_id = Some(chunk_id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Build parent chunks from CUT ──────────────────────────────
|
||||
for (i, batch) in cut_child_ids.chunks(parent_chunk_size).enumerate() {
|
||||
if batch.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut times: Vec<(f64, f64)> = Vec::new();
|
||||
for child_id in batch {
|
||||
if let Some(child) = child_chunks.iter().find(|c| &c.chunk_id == child_id) {
|
||||
times.push((child.start_time, child.end_time));
|
||||
}
|
||||
}
|
||||
|
||||
let start_time = times.first().map(|t| t.0).unwrap_or(0.0);
|
||||
let end_time = times.last().map(|t| t.1).unwrap_or(0.0);
|
||||
|
||||
let narrative = generate_scene_narrative(&[], start_time, end_time, batch.len());
|
||||
|
||||
let chunk_id = format!("story_cut_{:04}", i);
|
||||
parent_chunks.push(StoryParentChunk {
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type: "story".to_string(),
|
||||
source: "story_cut".to_string(),
|
||||
start_time,
|
||||
end_time,
|
||||
text_content: narrative.clone(),
|
||||
content: serde_json::json!({
|
||||
"description": narrative,
|
||||
"child_count": batch.len(),
|
||||
"scenes": batch,
|
||||
}),
|
||||
child_chunk_ids: batch.to_vec(),
|
||||
parent_chunk_id: None,
|
||||
});
|
||||
|
||||
for child in &mut child_chunks {
|
||||
if batch.contains(&child.chunk_id) {
|
||||
child.parent_chunk_id = Some(chunk_id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Build result ──────────────────────────────────────────────
|
||||
let total_child = asr_child_ids.len() + cut_child_ids.len();
|
||||
let total_parent = parent_chunks.len();
|
||||
let asr_count = asr_child_ids.len();
|
||||
let cut_count = cut_child_ids.len();
|
||||
|
||||
let result = StoryResult {
|
||||
child_chunks,
|
||||
parent_chunks,
|
||||
stats: StoryStats {
|
||||
total_child_chunks: total_child,
|
||||
total_parent_chunks: total_parent,
|
||||
asr_children: asr_count,
|
||||
cut_children: cut_count,
|
||||
},
|
||||
metadata: serde_json::json!({}),
|
||||
parent_chunk_size,
|
||||
};
|
||||
|
||||
// Write output (for compatibility with Python path)
|
||||
let json_str = serde_json::to_string_pretty(&result)?;
|
||||
std::fs::write(output_path, &json_str)
|
||||
.with_context(|| format!("Failed to write {:?}", output_path))?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// ── Narrative generation (matching Python logic) ──────────────────
|
||||
|
||||
fn generate_narrative(texts: &[String], objects: &[String], start: f64, end: f64) -> String {
|
||||
if texts.is_empty() && objects.is_empty() {
|
||||
return format!("Video segment from {:.1}s to {:.1}s", start, end);
|
||||
}
|
||||
|
||||
let mut parts: Vec<String> = Vec::new();
|
||||
|
||||
if !texts.is_empty() {
|
||||
let combined = texts.join(" ");
|
||||
let truncated = if combined.len() > 150 {
|
||||
format!("{}...", &combined[..150])
|
||||
} else {
|
||||
combined
|
||||
};
|
||||
parts.push(format!("Speech: {}", truncated));
|
||||
}
|
||||
|
||||
if !objects.is_empty() {
|
||||
let mut unique: Vec<&String> = objects.iter().collect();
|
||||
unique.sort();
|
||||
unique.dedup();
|
||||
let objs = unique
|
||||
.iter()
|
||||
.take(5)
|
||||
.map(|s| (*s).as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
parts.push(format!("Visuals: {}", objs));
|
||||
}
|
||||
|
||||
format!("[{:.0}s-{:.0}s] {}", start, end, parts.join(" | "))
|
||||
}
|
||||
|
||||
fn generate_scene_narrative(
|
||||
objects: &[String],
|
||||
start: f64,
|
||||
end: f64,
|
||||
scene_count: usize,
|
||||
) -> String {
|
||||
let mut unique: Vec<&String> = objects.iter().collect();
|
||||
unique.sort();
|
||||
unique.dedup();
|
||||
let top5: Vec<&String> = unique.iter().take(5).cloned().collect();
|
||||
|
||||
if !top5.is_empty() {
|
||||
let obj_str = top5
|
||||
.iter()
|
||||
.map(|s| s.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
format!(
|
||||
"[{:.0}s-{:.0}s] {} scenes. Visuals: {}.",
|
||||
start, end, scene_count, obj_str
|
||||
)
|
||||
} else {
|
||||
format!("[{:.0}s-{:.0}s] {} video scenes.", start, end, scene_count)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Tests ─────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_generate_narrative_with_text() {
|
||||
let text = generate_narrative(
|
||||
&["Hello world".to_string()],
|
||||
&["person".to_string()],
|
||||
0.0,
|
||||
5.0,
|
||||
);
|
||||
assert!(text.contains("[0s-5s]"));
|
||||
assert!(text.contains("Speech:"));
|
||||
assert!(text.contains("Visuals:"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_narrative_empty() {
|
||||
let text = generate_narrative(&[], &[], 10.0, 20.0);
|
||||
assert!(text.contains("10.0s to 20.0s"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_scene_narrative() {
|
||||
let text = generate_scene_narrative(&["person".to_string()], 0.0, 10.0, 3);
|
||||
assert!(text.contains("3 scenes"));
|
||||
assert!(text.contains("person"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_scene_narrative_empty() {
|
||||
let text = generate_scene_narrative(&[], 0.0, 10.0, 1);
|
||||
assert!(text.contains("1 video scenes"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_narrative_truncation() {
|
||||
let long_text = "a".repeat(200);
|
||||
let text = generate_narrative(&[long_text], &[], 0.0, 5.0);
|
||||
assert!(text.len() < 200 + 50); // truncated with "..."
|
||||
assert!(text.ends_with("..."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_story_result_serialization() {
|
||||
let result = StoryResult {
|
||||
child_chunks: vec![StoryChildChunk {
|
||||
chunk_id: "asr_0001".to_string(),
|
||||
chunk_type: "sentence".to_string(),
|
||||
source: "asr".to_string(),
|
||||
start_time: 0.0,
|
||||
end_time: 5.0,
|
||||
text_content: Some("Hello world".to_string()),
|
||||
content: serde_json::json!({}),
|
||||
child_chunk_ids: vec![],
|
||||
parent_chunk_id: Some("story_asr_0000".to_string()),
|
||||
}],
|
||||
parent_chunks: vec![StoryParentChunk {
|
||||
chunk_id: "story_asr_0000".to_string(),
|
||||
chunk_type: "story".to_string(),
|
||||
source: "story_asr".to_string(),
|
||||
start_time: 0.0,
|
||||
end_time: 25.0,
|
||||
text_content: "[0s-25s] Hello world...".to_string(),
|
||||
content: serde_json::json!({
|
||||
"description": "[0s-25s] Hello world...",
|
||||
"child_count": 5
|
||||
}),
|
||||
child_chunk_ids: vec!["asr_0001".to_string()],
|
||||
parent_chunk_id: None,
|
||||
}],
|
||||
stats: StoryStats {
|
||||
total_child_chunks: 10,
|
||||
total_parent_chunks: 2,
|
||||
asr_children: 10,
|
||||
cut_children: 0,
|
||||
},
|
||||
metadata: serde_json::json!({}),
|
||||
parent_chunk_size: 5,
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
assert!(json.contains("asr_0001"));
|
||||
assert!(json.contains("story_asr_0000"));
|
||||
assert!(json.contains("Hello world"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_story_result_deserialization() {
|
||||
let json = r#"{
|
||||
"child_chunks": [{
|
||||
"chunk_id": "asr_0001",
|
||||
"chunk_type": "sentence",
|
||||
"source": "asr",
|
||||
"start_time": 0.0,
|
||||
"end_time": 5.0,
|
||||
"text_content": "Hello",
|
||||
"content": {},
|
||||
"child_chunk_ids": [],
|
||||
"parent_chunk_id": null
|
||||
}],
|
||||
"parent_chunks": [{
|
||||
"chunk_id": "story_asr_0000",
|
||||
"chunk_type": "story",
|
||||
"source": "story_asr",
|
||||
"start_time": 0.0,
|
||||
"end_time": 5.0,
|
||||
"text_content": "Hello segment",
|
||||
"content": {"description": "Hello segment"},
|
||||
"child_chunk_ids": ["asr_0001"],
|
||||
"parent_chunk_id": null
|
||||
}],
|
||||
"stats": {
|
||||
"total_child_chunks": 1,
|
||||
"total_parent_chunks": 1,
|
||||
"asr_children": 1,
|
||||
"cut_children": 0
|
||||
},
|
||||
"metadata": {},
|
||||
"parent_chunk_size": 5
|
||||
}"#;
|
||||
|
||||
let result: StoryResult = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(result.child_chunks.len(), 1);
|
||||
assert_eq!(result.parent_chunks.len(), 1);
|
||||
assert_eq!(result.stats.total_child_chunks, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parent_child_relationship() {
|
||||
let result = StoryResult {
|
||||
child_chunks: vec![
|
||||
StoryChildChunk {
|
||||
chunk_id: "asr_0001".to_string(),
|
||||
chunk_type: "sentence".to_string(),
|
||||
source: "asr".to_string(),
|
||||
start_time: 0.0,
|
||||
end_time: 5.0,
|
||||
text_content: Some("First".to_string()),
|
||||
content: serde_json::json!({}),
|
||||
child_chunk_ids: vec![],
|
||||
parent_chunk_id: Some("story_asr_0000".to_string()),
|
||||
},
|
||||
StoryChildChunk {
|
||||
chunk_id: "asr_0002".to_string(),
|
||||
chunk_type: "sentence".to_string(),
|
||||
source: "asr".to_string(),
|
||||
start_time: 5.0,
|
||||
end_time: 10.0,
|
||||
text_content: Some("Second".to_string()),
|
||||
content: serde_json::json!({}),
|
||||
child_chunk_ids: vec![],
|
||||
parent_chunk_id: Some("story_asr_0000".to_string()),
|
||||
},
|
||||
],
|
||||
parent_chunks: vec![StoryParentChunk {
|
||||
chunk_id: "story_asr_0000".to_string(),
|
||||
chunk_type: "story".to_string(),
|
||||
source: "story_asr".to_string(),
|
||||
start_time: 0.0,
|
||||
end_time: 10.0,
|
||||
text_content: "Combined narrative".to_string(),
|
||||
content: serde_json::json!({}),
|
||||
child_chunk_ids: vec!["asr_0001".to_string(), "asr_0002".to_string()],
|
||||
parent_chunk_id: None,
|
||||
}],
|
||||
stats: StoryStats {
|
||||
total_child_chunks: 2,
|
||||
total_parent_chunks: 1,
|
||||
asr_children: 2,
|
||||
cut_children: 0,
|
||||
},
|
||||
metadata: serde_json::json!({}),
|
||||
parent_chunk_size: 5,
|
||||
};
|
||||
|
||||
assert_eq!(result.parent_chunks[0].child_chunk_ids.len(), 2);
|
||||
assert!(result
|
||||
.child_chunks
|
||||
.iter()
|
||||
.all(|c| c.parent_chunk_id.is_some()));
|
||||
assert!(result.parent_chunks[0].parent_chunk_id.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_native_story_empty_data() {
|
||||
// Write empty ASR and CUT files, then test try_native_story
|
||||
let dir = std::env::temp_dir().join("story_test_empty");
|
||||
let _ = std::fs::create_dir_all(&dir);
|
||||
|
||||
let basename = "test_video";
|
||||
let asr_path = dir.join(format!("{}.asr.json", basename));
|
||||
let cut_path = dir.join(format!("{}.cut.json", basename));
|
||||
let out_path = dir.join(format!("{}.story.json", basename));
|
||||
|
||||
std::fs::write(&asr_path, r#"{"segments":[]}"#).unwrap();
|
||||
std::fs::write(&cut_path, r#"{"scenes":[]}"#).unwrap();
|
||||
|
||||
let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap();
|
||||
|
||||
assert_eq!(result.stats.total_child_chunks, 0);
|
||||
assert_eq!(result.stats.total_parent_chunks, 0);
|
||||
|
||||
let _ = std::fs::remove_dir_all(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_native_story_with_data() {
|
||||
let dir = std::env::temp_dir().join("story_test_data");
|
||||
let _ = std::fs::create_dir_all(&dir);
|
||||
|
||||
let basename = "test_video";
|
||||
let asr_path = dir.join(format!("{}.asr.json", basename));
|
||||
let cut_path = dir.join(format!("{}.cut.json", basename));
|
||||
let out_path = dir.join(format!("{}.story.json", basename));
|
||||
|
||||
std::fs::write(
|
||||
&asr_path,
|
||||
r#"{
|
||||
"segments": [
|
||||
{"start": 0.0, "end": 2.5, "text": "Hello", "confidence": 0.95},
|
||||
{"start": 2.5, "end": 5.0, "text": "World", "confidence": 0.92},
|
||||
{"start": 5.0, "end": 7.5, "text": "Foo", "confidence": 0.90}
|
||||
]
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
std::fs::write(&cut_path, r#"{
|
||||
"scenes": [
|
||||
{"scene_number": 1, "start_frame": 0, "end_frame": 150, "start_time": 0.0, "end_time": 5.0},
|
||||
{"scene_number": 2, "start_frame": 150, "end_frame": 300, "start_time": 5.0, "end_time": 10.0}
|
||||
]
|
||||
}"#).unwrap();
|
||||
|
||||
let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap();
|
||||
|
||||
assert_eq!(result.stats.asr_children, 3);
|
||||
assert_eq!(result.stats.cut_children, 2);
|
||||
assert_eq!(result.stats.total_child_chunks, 5);
|
||||
|
||||
// 3 ASR segments, parent_chunk_size=5 → 1 parent
|
||||
// 2 CUT scenes, parent_chunk_size=5 → 1 parent
|
||||
assert_eq!(result.stats.total_parent_chunks, 2);
|
||||
|
||||
// Verify child-parent linking
|
||||
for child in &result.child_chunks {
|
||||
if child.source == "asr" {
|
||||
assert!(child.parent_chunk_id.is_some());
|
||||
assert!(child
|
||||
.parent_chunk_id
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.starts_with("story_asr_"));
|
||||
}
|
||||
}
|
||||
|
||||
// Verify output file was written
|
||||
assert!(out_path.exists());
|
||||
let content = std::fs::read_to_string(&out_path).unwrap();
|
||||
assert!(content.contains("Hello"));
|
||||
assert!(content.contains("World"));
|
||||
|
||||
let _ = std::fs::remove_dir_all(&dir);
|
||||
}
|
||||
}
|
||||
+5
-1113
File diff suppressed because it is too large
Load Diff
-10
@@ -49,9 +49,6 @@ async fn main() -> Result<()> {
|
||||
Commands::StoreAsrx { uuid } => {
|
||||
handle_store_asrx(&uuid).await?;
|
||||
}
|
||||
Commands::Story { uuid } => {
|
||||
handle_story(&uuid).await?;
|
||||
}
|
||||
Commands::Vectorize { uuid } => {
|
||||
handle_vectorize(&uuid).await?;
|
||||
}
|
||||
@@ -169,13 +166,6 @@ async fn handle_chunk(uuid: &str) -> Result<()> {
|
||||
}
|
||||
|
||||
/// Handle story command
|
||||
async fn handle_story(uuid: &str) -> Result<()> {
|
||||
println!("Generating story for: {}", uuid);
|
||||
|
||||
// TODO: Implement story logic
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle vectorize command
|
||||
async fn handle_vectorize(uuid: &str) -> Result<()> {
|
||||
println!("Vectorizing chunks for: {}", uuid);
|
||||
|
||||
@@ -633,44 +633,6 @@ async fn process_appearance_module(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_story_module(
|
||||
story_path: &Path,
|
||||
video_path: &str,
|
||||
uuid: &str,
|
||||
progress_state: &Arc<Mutex<ProgressState>>,
|
||||
ui: &Arc<Mutex<Option<ProgressUi>>>,
|
||||
) -> anyhow::Result<()> {
|
||||
{
|
||||
let mut state = progress_state.lock().unwrap();
|
||||
state.get_processor(ProcessorType::Story).start(1);
|
||||
}
|
||||
let story_result = momentry_core::core::processor::process_story(
|
||||
video_path,
|
||||
story_path.to_str().unwrap(),
|
||||
Some(uuid),
|
||||
)
|
||||
.await?;
|
||||
let story_json = serde_json::to_string_pretty(&story_result)?;
|
||||
std::fs::write(story_path, &story_json)?;
|
||||
let output_dir = OutputDir::new();
|
||||
let _ = output_dir.backup_file(uuid, "story.json");
|
||||
println!(
|
||||
" ✓ Story saved: {} parent chunks, {} child chunks",
|
||||
story_result.stats.total_parent_chunks, story_result.stats.total_child_chunks
|
||||
);
|
||||
{
|
||||
let mut state = progress_state.lock().unwrap();
|
||||
state.get_processor(ProcessorType::Story).complete(&format!(
|
||||
"{} parents, {} children",
|
||||
story_result.stats.total_parent_chunks, story_result.stats.total_child_chunks
|
||||
));
|
||||
}
|
||||
if let Some(ref mut ui) = *ui.lock().unwrap() {
|
||||
let _ = ui.render();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_caption_module(
|
||||
caption_path: &Path,
|
||||
video_path: &str,
|
||||
@@ -745,11 +707,6 @@ enum Commands {
|
||||
/// UUID
|
||||
uuid: String,
|
||||
},
|
||||
/// Generate story for cut scenes
|
||||
Story {
|
||||
/// UUID
|
||||
uuid: String,
|
||||
},
|
||||
/// Vectorize chunks
|
||||
Vectorize {
|
||||
/// UUID (or 'all' for all)
|
||||
@@ -2382,150 +2339,6 @@ Ok(())
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Commands::Story { uuid } => {
|
||||
println!("Generating story for: {}", uuid);
|
||||
|
||||
let db = PostgresDb::init().await?;
|
||||
let video = db
|
||||
.get_video_by_uuid(&uuid)
|
||||
.await?
|
||||
.ok_or_else(|| anyhow::anyhow!("Video not found: {}", uuid))?;
|
||||
|
||||
let file_id = video.id;
|
||||
let _fps = video.fps;
|
||||
let duration = video.duration;
|
||||
|
||||
// Get all chunks
|
||||
let all_chunks = db.get_chunks_by_uuid(&uuid).await?;
|
||||
|
||||
// Try cut chunks first, fall back to sentence chunks
|
||||
let mut story_chunks: Vec<&Chunk> = all_chunks
|
||||
.iter()
|
||||
.filter(|c| c.chunk_type == ChunkType::Cut)
|
||||
.collect();
|
||||
|
||||
let story_type = if story_chunks.is_empty() {
|
||||
story_chunks = all_chunks
|
||||
.iter()
|
||||
.filter(|c| c.chunk_type == ChunkType::Sentence && c.text_content.is_some())
|
||||
.collect();
|
||||
"sentence"
|
||||
} else {
|
||||
"cut"
|
||||
};
|
||||
|
||||
if story_chunks.is_empty() {
|
||||
println!("No story chunks found. Run 'chunk' command first.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("Found {} {} scenes", story_chunks.len(), story_type);
|
||||
|
||||
for (i, story_chunk) in story_chunks.iter().enumerate() {
|
||||
println!("\n=== Scene {} ===", i + 1);
|
||||
println!(
|
||||
"Time: {:.2}s - {:.2}s",
|
||||
story_chunk.start_time().seconds(),
|
||||
story_chunk.end_time().seconds()
|
||||
);
|
||||
|
||||
let context_start = (story_chunk.start_time().seconds() - 5.0).max(0.0);
|
||||
let context_end = (story_chunk.end_time().seconds() + 5.0).min(duration);
|
||||
|
||||
let context_chunks = db
|
||||
.get_chunks_by_time_range(&uuid, context_start, context_end)
|
||||
.await?;
|
||||
|
||||
let context_frames = db
|
||||
.get_frames_by_time_range(&uuid, context_start, context_end)
|
||||
.await?;
|
||||
|
||||
let mut story = String::new();
|
||||
story.push_str(&format!(
|
||||
"Scene {} ({:.1}s - {:.1}s)\n\n",
|
||||
i + 1,
|
||||
story_chunk.start_time().seconds(),
|
||||
story_chunk.end_time().seconds()
|
||||
));
|
||||
|
||||
let sentence_chunks: Vec<&serde_json::Value> = context_chunks
|
||||
.iter()
|
||||
.filter(|c| c["chunk_type"] == "sentence")
|
||||
.collect();
|
||||
|
||||
if !sentence_chunks.is_empty() {
|
||||
story.push_str("【Speech】\n");
|
||||
for sc in &sentence_chunks {
|
||||
if let Some(text) = sc["text_content"].as_str() {
|
||||
story.push_str(&format!(" - {}\n", text));
|
||||
}
|
||||
}
|
||||
story.push('\n');
|
||||
}
|
||||
|
||||
let mut all_objects: std::collections::HashMap<String, u32> =
|
||||
std::collections::HashMap::new();
|
||||
for frame in &context_frames {
|
||||
if let Some(objects) = frame["yolo_objects"].as_array() {
|
||||
for obj in objects {
|
||||
if let Some(class_name) = obj.get("class_name").and_then(|v| v.as_str())
|
||||
{
|
||||
*all_objects.entry(class_name.to_string()).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !all_objects.is_empty() {
|
||||
story.push_str("【Objects】\n");
|
||||
let mut sorted_objects: Vec<_> = all_objects.iter().collect();
|
||||
sorted_objects.sort_by(|a, b| b.1.cmp(a.1));
|
||||
for (obj, count) in sorted_objects.iter().take(10) {
|
||||
story.push_str(&format!(" - {} ({} frames)\n", obj, count));
|
||||
}
|
||||
story.push('\n');
|
||||
}
|
||||
|
||||
let mut all_texts: Vec<String> = Vec::new();
|
||||
for frame in &context_frames {
|
||||
if let Some(texts) = frame["ocr_results"].as_array() {
|
||||
for txt in texts {
|
||||
if let Some(text) = txt.get("text").and_then(|v| v.as_str()) {
|
||||
if !text.is_empty() && text.len() > 2 {
|
||||
all_texts.push(text.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !all_texts.is_empty() {
|
||||
story.push_str("【Text in video】\n");
|
||||
for txt in all_texts.iter().take(10) {
|
||||
story.push_str(&format!(" - {}\n", txt));
|
||||
}
|
||||
story.push('\n');
|
||||
}
|
||||
|
||||
let mut face_count = 0;
|
||||
for frame in &context_frames {
|
||||
if let Some(faces) = frame["face_results"].as_array() {
|
||||
face_count += faces.len();
|
||||
}
|
||||
}
|
||||
|
||||
if face_count > 0 {
|
||||
story.push_str(&format!(
|
||||
"【Faces】\n - {} face(s) detected\n\n",
|
||||
face_count
|
||||
));
|
||||
}
|
||||
|
||||
println!("{}", story);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Commands::Vectorize { uuid } => {
|
||||
println!("Vectorizing: {}", uuid);
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ pub mod cut;
|
||||
pub mod face;
|
||||
pub mod ocr;
|
||||
pub mod pose;
|
||||
pub mod story;
|
||||
pub mod yolo;
|
||||
|
||||
pub use appearance::*;
|
||||
@@ -19,5 +18,4 @@ pub use cut::*;
|
||||
pub use face::*;
|
||||
pub use ocr::*;
|
||||
pub use pose::*;
|
||||
pub use story::*;
|
||||
pub use yolo::*;
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
//! Story generation processing module
|
||||
|
||||
use anyhow::Result;
|
||||
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
|
||||
use momentry_core::OutputDir;
|
||||
use std::path::Path;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
/// Process Story module
|
||||
pub async fn process_story_module(
|
||||
story_path: &Path,
|
||||
video_path: &str,
|
||||
uuid: &str,
|
||||
progress_state: &Arc<Mutex<ProgressState>>,
|
||||
ui: &Arc<Mutex<Option<ProgressUi>>>,
|
||||
) -> Result<()> {
|
||||
{
|
||||
let mut state = progress_state.lock().unwrap();
|
||||
state.get_processor(ProcessorType::Story).start(1);
|
||||
}
|
||||
|
||||
let story_result = momentry_core::core::processor::process_story(
|
||||
video_path,
|
||||
story_path.to_str().unwrap(),
|
||||
Some(uuid),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let story_json = serde_json::to_string_pretty(&story_result)?;
|
||||
std::fs::write(story_path, &story_json)?;
|
||||
|
||||
let output_dir = OutputDir::new();
|
||||
let _ = output_dir.backup_file(uuid, "story.json");
|
||||
|
||||
println!(
|
||||
" ✓ Story saved: {} parent chunks, {} child chunks",
|
||||
story_result.stats.total_parent_chunks, story_result.stats.total_child_chunks
|
||||
);
|
||||
|
||||
{
|
||||
let mut state = progress_state.lock().unwrap();
|
||||
state.get_processor(ProcessorType::Story).complete(&format!(
|
||||
"{} parents, {} children",
|
||||
story_result.stats.total_parent_chunks, story_result.stats.total_child_chunks
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(ref mut ui) = *ui.lock().unwrap() {
|
||||
let _ = ui.render();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -21,7 +21,6 @@ pub enum ProcessorType {
|
||||
Face,
|
||||
Pose,
|
||||
Hand,
|
||||
Story,
|
||||
Caption,
|
||||
}
|
||||
|
||||
@@ -37,7 +36,6 @@ impl ProcessorType {
|
||||
ProcessorType::Face => "Face",
|
||||
ProcessorType::Pose => "Pose",
|
||||
ProcessorType::Hand => "Hand",
|
||||
ProcessorType::Story => "Story",
|
||||
ProcessorType::Caption => "Caption",
|
||||
}
|
||||
}
|
||||
@@ -145,7 +143,6 @@ impl ProgressState {
|
||||
ProcessorProgress::new(ProcessorType::Face),
|
||||
ProcessorProgress::new(ProcessorType::Pose),
|
||||
ProcessorProgress::new(ProcessorType::Hand),
|
||||
ProcessorProgress::new(ProcessorType::Story),
|
||||
ProcessorProgress::new(ProcessorType::Caption),
|
||||
],
|
||||
video_name: video_name.to_string(),
|
||||
@@ -201,7 +198,6 @@ impl ProgressState {
|
||||
"OCR" => ProcessorType::Ocr,
|
||||
"FACE" => ProcessorType::Face,
|
||||
"POSE" => ProcessorType::Pose,
|
||||
"STORY" => ProcessorType::Story,
|
||||
"CAPTION" => ProcessorType::Caption,
|
||||
_ => return,
|
||||
};
|
||||
|
||||
@@ -209,48 +209,6 @@ pub const PROCESSOR_SCHEMAS: &[ProcessorJsonSchema] = &[
|
||||
],
|
||||
min_data_threshold: 1,
|
||||
},
|
||||
ProcessorJsonSchema {
|
||||
processor: ProcessorType::Story,
|
||||
required_fields: &[
|
||||
RequiredField {
|
||||
path: "child_chunks",
|
||||
field_type: FieldType::Array,
|
||||
allow_empty: true,
|
||||
},
|
||||
RequiredField {
|
||||
path: "parent_chunks",
|
||||
field_type: FieldType::Array,
|
||||
allow_empty: true,
|
||||
},
|
||||
RequiredField {
|
||||
path: "stats",
|
||||
field_type: FieldType::Object,
|
||||
allow_empty: false,
|
||||
},
|
||||
],
|
||||
min_data_threshold: 0,
|
||||
},
|
||||
ProcessorJsonSchema {
|
||||
processor: ProcessorType::MediaPipe,
|
||||
required_fields: &[
|
||||
RequiredField {
|
||||
path: "frame_count",
|
||||
field_type: FieldType::PositiveNumber,
|
||||
allow_empty: false,
|
||||
},
|
||||
RequiredField {
|
||||
path: "fps",
|
||||
field_type: FieldType::PositiveNumber,
|
||||
allow_empty: false,
|
||||
},
|
||||
RequiredField {
|
||||
path: "frames",
|
||||
field_type: FieldType::Array,
|
||||
allow_empty: true,
|
||||
},
|
||||
],
|
||||
min_data_threshold: 0,
|
||||
},
|
||||
];
|
||||
|
||||
/// Get schema for a processor
|
||||
|
||||
@@ -161,24 +161,6 @@ fn count_data_items(processor: &ProcessorType, value: &serde_json::Value) -> usi
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|a| a.len())
|
||||
.unwrap_or(0),
|
||||
ProcessorType::Story => {
|
||||
let child = value
|
||||
.get("child_chunks")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|a| a.len())
|
||||
.unwrap_or(0);
|
||||
let parent = value
|
||||
.get("parent_chunks")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|a| a.len())
|
||||
.unwrap_or(0);
|
||||
child + parent
|
||||
}
|
||||
ProcessorType::MediaPipe => value
|
||||
.get("frames")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|a| a.len())
|
||||
.unwrap_or(0),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
@@ -318,23 +300,6 @@ fn check_reasonableness(
|
||||
}
|
||||
}
|
||||
|
||||
// Story-specific: check chunk count vs cut scene count
|
||||
if *processor == ProcessorType::Story {
|
||||
if let Some(cut_value) = all_values.get("cut") {
|
||||
let story_chunks = count_data_items(processor, value);
|
||||
let cut_scenes = count_data_items(&ProcessorType::Cut, cut_value);
|
||||
if story_chunks > 0 && cut_scenes > 0 {
|
||||
// Story chunks should be >= cut scenes (one chunk per scene minimum)
|
||||
if story_chunks < cut_scenes / 2 {
|
||||
issues.push(format!(
|
||||
"story chunk count ({}) much less than cut scene count ({})",
|
||||
story_chunks, cut_scenes
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ASR-specific: check segments vs cut scenes
|
||||
if *processor == ProcessorType::Asr {
|
||||
if let Some(cut_value) = all_values.get("cut") {
|
||||
@@ -499,11 +464,6 @@ fn build_data_summary(processor: &ProcessorType, value: &serde_json::Value) -> s
|
||||
summary["speaker_count"] = serde_json::json!(speakers.len());
|
||||
}
|
||||
}
|
||||
ProcessorType::Story => {
|
||||
if let Some(stats) = value.get("stats") {
|
||||
summary["stats"] = stats.clone();
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
@@ -538,10 +498,7 @@ pub fn verify_file(file_uuid: &str) -> FileVerificationReport {
|
||||
let mut all_values: HashMap<String, serde_json::Value> = HashMap::new();
|
||||
for processor in &processors {
|
||||
let proc_name = processor.as_str();
|
||||
let filename = match processor {
|
||||
ProcessorType::Story => format!("{}.story_story.json", full_uuid),
|
||||
_ => format!("{}.{}.json", full_uuid, proc_name),
|
||||
};
|
||||
let filename = format!("{}.{}.json", full_uuid, proc_name);
|
||||
let path = PathBuf::from(OUTPUT_DIR.as_str()).join(&filename);
|
||||
|
||||
if let Ok(content) = std::fs::read_to_string(&path) {
|
||||
@@ -639,10 +596,7 @@ pub fn verify_file(file_uuid: &str) -> FileVerificationReport {
|
||||
/// Legacy verification function (backward compatible)
|
||||
pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> VerificationResult {
|
||||
let proc_name = processor.as_str();
|
||||
let filename = match processor {
|
||||
ProcessorType::Story => format!("{}.story_story.json", file_uuid),
|
||||
_ => format!("{}.{}.json", file_uuid, proc_name),
|
||||
};
|
||||
let filename = format!("{}.{}.json", file_uuid, proc_name);
|
||||
let output_path = PathBuf::from(OUTPUT_DIR.as_str()).join(&filename);
|
||||
|
||||
if !output_path.exists() {
|
||||
|
||||
+16
-115
@@ -14,9 +14,7 @@ struct ProcessorCleanupGuard {
|
||||
running_count: Arc<RwLock<usize>>,
|
||||
frame_count: Arc<RwLock<usize>>,
|
||||
time_count: Arc<RwLock<usize>>,
|
||||
best_effort_count: Arc<RwLock<usize>>,
|
||||
pipeline: PipelineType,
|
||||
is_best_effort: bool,
|
||||
}
|
||||
|
||||
impl Drop for ProcessorCleanupGuard {
|
||||
@@ -32,30 +30,22 @@ impl Drop for ProcessorCleanupGuard {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
if self.is_best_effort {
|
||||
if let Ok(mut guard) = self.best_effort_count.try_write() {
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
match self.pipeline {
|
||||
PipelineType::Frame => {
|
||||
if let Ok(mut guard) = self.frame_count.try_write() {
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
match self.pipeline {
|
||||
PipelineType::Frame => {
|
||||
if let Ok(mut guard) = self.frame_count.try_write() {
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
PipelineType::Time => {
|
||||
if let Ok(mut guard) = self.time_count.try_write() {
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
PipelineType::Time => {
|
||||
if let Ok(mut guard) = self.time_count.try_write() {
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
PipelineType::Cross => {} // cross pipeline not tracked in slot counts
|
||||
}
|
||||
PipelineType::Cross => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -106,8 +96,6 @@ pub struct ProcessorTask {
|
||||
const FRAME_SLOT_MAX: usize = 2;
|
||||
/// Time pipeline max concurrent processors (audio is heavy, run 1 at a time).
|
||||
const TIME_SLOT_MAX: usize = 1;
|
||||
/// Best-effort slot (used by low-priority processors like MediaPipe).
|
||||
const BEST_EFFORT_SLOT_MAX: usize = 1;
|
||||
|
||||
pub struct ProcessorPool {
|
||||
db: Arc<PostgresDb>,
|
||||
@@ -117,7 +105,6 @@ pub struct ProcessorPool {
|
||||
running_count: Arc<RwLock<usize>>,
|
||||
running_frame_count: Arc<RwLock<usize>>,
|
||||
running_time_count: Arc<RwLock<usize>>,
|
||||
running_best_effort_count: Arc<RwLock<usize>>,
|
||||
}
|
||||
|
||||
impl ProcessorPool {
|
||||
@@ -130,7 +117,6 @@ impl ProcessorPool {
|
||||
running_count: Arc::new(RwLock::new(0)),
|
||||
running_frame_count: Arc::new(RwLock::new(0)),
|
||||
running_time_count: Arc::new(RwLock::new(0)),
|
||||
running_best_effort_count: Arc::new(RwLock::new(0)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -240,22 +226,16 @@ impl ProcessorPool {
|
||||
*count += 1;
|
||||
}
|
||||
// 遞增產線專屬 slot
|
||||
let is_best_effort = processor_type == ProcessorType::MediaPipe;
|
||||
if is_best_effort {
|
||||
*self.running_best_effort_count.write().await += 1;
|
||||
} else {
|
||||
match pipeline {
|
||||
PipelineType::Frame => *self.running_frame_count.write().await += 1,
|
||||
PipelineType::Time => *self.running_time_count.write().await += 1,
|
||||
PipelineType::Cross => {} // cross pipeline uses global slot
|
||||
}
|
||||
match pipeline {
|
||||
PipelineType::Frame => *self.running_frame_count.write().await += 1,
|
||||
PipelineType::Time => *self.running_time_count.write().await += 1,
|
||||
PipelineType::Cross => {} // cross pipeline uses global slot
|
||||
}
|
||||
|
||||
let running = self.running.clone();
|
||||
let running_count = self.running_count.clone();
|
||||
let running_frame_count = self.running_frame_count.clone();
|
||||
let running_time_count = self.running_time_count.clone();
|
||||
let running_best_effort_count = self.running_best_effort_count.clone();
|
||||
let child_pid: Arc<RwLock<Option<i32>>> = Arc::new(RwLock::new(None));
|
||||
running.write().await.insert(
|
||||
job_id,
|
||||
@@ -287,9 +267,7 @@ impl ProcessorPool {
|
||||
running_count: running_count.clone(),
|
||||
frame_count: running_frame_count.clone(),
|
||||
time_count: running_time_count.clone(),
|
||||
best_effort_count: running_best_effort_count.clone(),
|
||||
pipeline,
|
||||
is_best_effort,
|
||||
};
|
||||
|
||||
info!("Starting processor {} for job {}", processor_name, job.uuid);
|
||||
@@ -528,10 +506,7 @@ impl ProcessorPool {
|
||||
|
||||
// Generate output path
|
||||
let output_dir = PathBuf::from(OUTPUT_DIR.as_str());
|
||||
let suffix = match processor_type {
|
||||
ProcessorType::Story => format!("{}.story_story", job.uuid),
|
||||
_ => format!("{}.{}", job.uuid, processor_type.as_str()),
|
||||
};
|
||||
let suffix = format!("{}.{}", job.uuid, processor_type.as_str());
|
||||
let output_path = output_dir.join(format!("{}.json", suffix));
|
||||
|
||||
// Ensure output directory exists
|
||||
@@ -1052,80 +1027,6 @@ impl ProcessorPool {
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Story => {
|
||||
let executor = crate::core::processor::PythonExecutor::new()?;
|
||||
let _ = executor
|
||||
.run(
|
||||
"parent_chunk_5w1h.py",
|
||||
&["--file-uuid", &job.uuid, "--embed"],
|
||||
uuid,
|
||||
"STORY",
|
||||
Some(std::time::Duration::from_secs(300)),
|
||||
)
|
||||
.await;
|
||||
let narratives_path = output_dir.join(format!("{}.narratives.json", job.uuid));
|
||||
let chunks_produced = if narratives_path.exists() {
|
||||
let content = std::fs::read_to_string(&narratives_path).unwrap_or_default();
|
||||
let count: i32 = serde_json::from_str::<Vec<String>>(&content)
|
||||
.map(|v| v.len() as i32)
|
||||
.unwrap_or(0);
|
||||
tracing::info!("Story generated {} narratives for {}", count, job.uuid);
|
||||
count
|
||||
} else {
|
||||
0
|
||||
};
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::Value::Null,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::FiveW1H => {
|
||||
let executor = crate::core::processor::PythonExecutor::new()?;
|
||||
let _ = executor
|
||||
.run(
|
||||
"parent_chunk_5w1h.py",
|
||||
&["--file-uuid", &job.uuid, "--embed", "--mode", "llm"],
|
||||
uuid,
|
||||
"5W1H",
|
||||
Some(std::time::Duration::from_secs(300)),
|
||||
)
|
||||
.await;
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::Value::Null,
|
||||
chunks_produced: 0,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::MediaPipe => {
|
||||
let result = processor::process_mediapipe_v2(
|
||||
video_path,
|
||||
output_path.to_str().unwrap(),
|
||||
uuid,
|
||||
Some(&sample_frames),
|
||||
)
|
||||
.await?;
|
||||
let chunks_produced = result.frames.len() as i32;
|
||||
tracing::info!(
|
||||
"MEDIAPIPE completed, {} frames for {}",
|
||||
chunks_produced,
|
||||
job.uuid
|
||||
);
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user