feat: Phase 2.6 edges migration to Qdrant (TKG-only architecture)
Phase 2.6.1: co_occurrence_edges migration - build_co_occurrence_edges_from_qdrant() - Qdrant embeddings → frame grouping → YOLO objects - Result: 6679 edges (vs 6701 PostgreSQL) Phase 2.6.2: face_face_edges migration - build_face_face_edges_from_qdrant() - Qdrant embeddings → frame grouping → face pairs - mutual_gaze detection preserved - Result: 6 edges (exact match) Phase 2.6.3: speaker_face_edges migration - build_speaker_face_edges_from_qdrant() - Qdrant embeddings → trace_id frame ranges - SPEAKS_AS edge creation Architecture: - All edges use Qdrant payload (no face_detections queries) - PostgreSQL fallback for empty Qdrant - Estimated 3.6x performance improvement Testing: - Playground (3003): ✓ All Phase 2.6 logs verified - Edge counts: ✓ Close match with PostgreSQL - Fallback: ✓ Working Docs: - docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md - docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
This commit is contained in:
@@ -12,7 +12,7 @@ use crate::core::config::OUTPUT_DIR;
|
||||
use crate::core::db::qdrant_db::QdrantDb;
|
||||
use crate::core::db::{
|
||||
schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload,
|
||||
VideoStatus,
|
||||
VideoStatus, WorkspaceDb,
|
||||
};
|
||||
use crate::core::embedding::Embedder;
|
||||
use crate::core::processor::heuristic_scene::generate_scene_meta;
|
||||
@@ -376,15 +376,109 @@ impl JobWorker {
|
||||
error!("Failed to create completed processor result: {}", e);
|
||||
}
|
||||
// Load output file and store to pre_chunks
|
||||
// Also dual-write to workspace if available
|
||||
let workspace = WorkspaceDb::open(&job.uuid).await.ok();
|
||||
if let Ok(json_str) = std::fs::read_to_string(&output_path) {
|
||||
let store_result = match processor_type {
|
||||
let store_result: Result<()> = match processor_type {
|
||||
crate::core::db::ProcessorType::Asr => {
|
||||
if let Ok(result) =
|
||||
serde_json::from_str::<crate::core::processor::AsrResult>(&json_str)
|
||||
{
|
||||
if let Err(e) =
|
||||
ProcessorPool::store_asr_chunks(&self.db, &job.uuid, &result)
|
||||
.await
|
||||
{
|
||||
error!("Failed to store ASR chunks: {}", e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
for segment in &result.segments {
|
||||
let data = serde_json::json!({
|
||||
"text": segment.text,
|
||||
"timestamp": segment.start_time,
|
||||
});
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"asr",
|
||||
"raw",
|
||||
segment.start_frame,
|
||||
segment.end_frame,
|
||||
Some(segment.start_time),
|
||||
Some(segment.end_time),
|
||||
Some(&data.to_string()),
|
||||
Some(&segment.text),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
error!(
|
||||
"Failed to parse ASR JSON for {}: {}",
|
||||
job.uuid,
|
||||
json_str.len()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
crate::core::db::ProcessorType::Asrx => {
|
||||
if let Ok(result) = serde_json::from_str::<
|
||||
crate::core::processor::AsrxResult,
|
||||
>(&json_str)
|
||||
{
|
||||
ProcessorPool::store_asrx_chunks(&self.db, &job.uuid, &result).await
|
||||
if let Err(e) =
|
||||
ProcessorPool::store_asrx_chunks(&self.db, &job.uuid, &result)
|
||||
.await
|
||||
{
|
||||
error!("Failed to store ASRX chunks: {}", e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
for segment in &result.segments {
|
||||
let data = serde_json::json!({"text": segment.text, "speaker_id": segment.speaker_id, "end_time": segment.end_time});
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"asrx",
|
||||
"raw",
|
||||
None,
|
||||
None,
|
||||
Some(segment.start_time),
|
||||
Some(segment.end_time),
|
||||
Some(&data.to_string()),
|
||||
Some(&segment.text),
|
||||
)
|
||||
.await;
|
||||
// Also store asr pre_chunks (needed by Rule 1 after checkin)
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"asr",
|
||||
"raw",
|
||||
None,
|
||||
None,
|
||||
Some(segment.start_time),
|
||||
Some(segment.end_time),
|
||||
Some(&data.to_string()),
|
||||
Some(&segment.text),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
let spk_dets: Vec<crate::core::db::workspace_sqlite::SpeakerDetectionBatchItem> = result.segments.iter().map(|s| {
|
||||
crate::core::db::workspace_sqlite::SpeakerDetectionBatchItem {
|
||||
speaker_id: s.speaker_id.clone().unwrap_or_default(),
|
||||
start_time: s.start_time,
|
||||
end_time: s.end_time,
|
||||
text: s.text.clone(),
|
||||
chunk_id: None,
|
||||
confidence: 0.0,
|
||||
}
|
||||
}).collect();
|
||||
let _ = ws.store_speaker_detections_batch(&spk_dets).await;
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
error!(
|
||||
"Failed to parse ASRX JSON for {}: {}",
|
||||
job.uuid,
|
||||
json_str.len()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -392,8 +486,35 @@ impl JobWorker {
|
||||
if let Ok(result) =
|
||||
serde_json::from_str::<crate::core::processor::CutResult>(&json_str)
|
||||
{
|
||||
ProcessorPool::store_cut_chunks(&self.db, &job.uuid, &result).await
|
||||
if let Err(e) =
|
||||
ProcessorPool::store_cut_chunks(&self.db, &job.uuid, &result)
|
||||
.await
|
||||
{
|
||||
error!("Failed to store CUT chunks: {}", e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
for scene in &result.scenes {
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"cut",
|
||||
"cut",
|
||||
Some(scene.start_frame as i64),
|
||||
Some(scene.end_frame as i64),
|
||||
Some(scene.start_time),
|
||||
Some(scene.end_time),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
error!(
|
||||
"Failed to parse CUT JSON for {}: {} bytes",
|
||||
job.uuid,
|
||||
json_str.len()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -402,8 +523,36 @@ impl JobWorker {
|
||||
crate::core::processor::YoloResult,
|
||||
>(&json_str)
|
||||
{
|
||||
ProcessorPool::store_yolo_chunks(&self.db, &job.uuid, &result).await
|
||||
if let Err(e) =
|
||||
ProcessorPool::store_yolo_chunks(&self.db, &job.uuid, &result)
|
||||
.await
|
||||
{
|
||||
error!("Failed to store YOLO chunks: {}", e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
for frame in &result.frames {
|
||||
let data = serde_json::json!({"objects": frame.objects});
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"yolo",
|
||||
"raw",
|
||||
Some(frame.frame as i64),
|
||||
None,
|
||||
Some(frame.timestamp),
|
||||
None,
|
||||
Some(&data.to_string()),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
error!(
|
||||
"Failed to parse YOLO JSON for {}: {} bytes",
|
||||
job.uuid,
|
||||
json_str.len()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -411,8 +560,36 @@ impl JobWorker {
|
||||
if let Ok(result) =
|
||||
serde_json::from_str::<crate::core::processor::OcrResult>(&json_str)
|
||||
{
|
||||
ProcessorPool::store_ocr_chunks(&self.db, &job.uuid, &result).await
|
||||
if let Err(e) =
|
||||
ProcessorPool::store_ocr_chunks(&self.db, &job.uuid, &result)
|
||||
.await
|
||||
{
|
||||
error!("Failed to store OCR chunks: {}", e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
for frame in &result.frames {
|
||||
let data = serde_json::json!({"texts": frame.texts});
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"ocr",
|
||||
"raw",
|
||||
Some(frame.frame as i64),
|
||||
None,
|
||||
Some(frame.timestamp),
|
||||
None,
|
||||
Some(&data.to_string()),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
error!(
|
||||
"Failed to parse OCR JSON for {}: {} bytes",
|
||||
job.uuid,
|
||||
json_str.len()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -421,8 +598,51 @@ impl JobWorker {
|
||||
crate::core::processor::FaceResult,
|
||||
>(&json_str)
|
||||
{
|
||||
ProcessorPool::store_face_chunks(&self.db, &job.uuid, &result).await
|
||||
if let Err(e) =
|
||||
ProcessorPool::store_face_chunks(&self.db, &job.uuid, &result)
|
||||
.await
|
||||
{
|
||||
error!("Failed to store FACE chunks: {}", e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
let dets: Vec<crate::core::db::workspace_sqlite::FaceDetectionBatchItem> = result.frames.iter().flat_map(|frame| {
|
||||
frame.faces.iter().map(|face| crate::core::db::workspace_sqlite::FaceDetectionBatchItem {
|
||||
face_id: face.face_id.clone(),
|
||||
frame: frame.frame as i64,
|
||||
ts: frame.timestamp,
|
||||
x: face.x,
|
||||
y: face.y,
|
||||
w: face.width,
|
||||
h: face.height,
|
||||
confidence: face.confidence,
|
||||
})
|
||||
}).collect();
|
||||
if !dets.is_empty() {
|
||||
let _ = ws.store_face_detections_batch(&dets).await;
|
||||
}
|
||||
for frame in &result.frames {
|
||||
let data = serde_json::json!({"faces": frame.faces});
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"face",
|
||||
"raw",
|
||||
Some(frame.frame as i64),
|
||||
None,
|
||||
Some(frame.timestamp),
|
||||
None,
|
||||
Some(&data.to_string()),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
error!(
|
||||
"Failed to parse FACE JSON for {}: {} bytes",
|
||||
job.uuid,
|
||||
json_str.len()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -431,11 +651,40 @@ impl JobWorker {
|
||||
crate::core::processor::PoseResult,
|
||||
>(&json_str)
|
||||
{
|
||||
ProcessorPool::store_pose_chunks(&self.db, &job.uuid, &result).await
|
||||
if let Err(e) =
|
||||
ProcessorPool::store_pose_chunks(&self.db, &job.uuid, &result)
|
||||
.await
|
||||
{
|
||||
error!("Failed to store POSE chunks: {}", e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
for frame in &result.frames {
|
||||
let data = serde_json::json!({"persons": frame.persons});
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"pose",
|
||||
"raw",
|
||||
Some(frame.frame as i64),
|
||||
None,
|
||||
Some(frame.timestamp),
|
||||
None,
|
||||
Some(&data.to_string()),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
error!(
|
||||
"Failed to parse POSE JSON for {}: {} bytes",
|
||||
job.uuid,
|
||||
json_str.len()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
crate::core::db::ProcessorType::Appearance => Ok(()),
|
||||
_ => Ok(()),
|
||||
};
|
||||
if let Err(e) = store_result {
|
||||
@@ -741,7 +990,7 @@ impl JobWorker {
|
||||
|
||||
macro_rules! check {
|
||||
($sql:expr) => {
|
||||
sqlx::query_scalar::<_, i64>($sql)
|
||||
sqlx::query_scalar::<_, i32>($sql)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap_or(0)
|
||||
@@ -797,7 +1046,7 @@ impl JobWorker {
|
||||
// 例如:Rule 1 只需 ASR+ASRX 完成即可觸發,不須等 face/pose/story 完成
|
||||
|
||||
// 定義必要 processor(必須完成的才算 job 成功)
|
||||
let essential_processors = ["cut", "asrx", "yolo"];
|
||||
let essential_processors = ["cut", "asr", "asrx", "yolo"];
|
||||
|
||||
let essential_completed = essential_processors.iter().all(|ep| {
|
||||
results.iter().any(|r| {
|
||||
@@ -864,7 +1113,7 @@ impl JobWorker {
|
||||
if has_asrx {
|
||||
// Guard: only spawn Rule 1 if sentence chunks don't exist yet
|
||||
let chunk_t = schema::table_name("chunk");
|
||||
let already_spawned: bool = sqlx::query_scalar::<_, i64>(&format!(
|
||||
let already_spawned: bool = sqlx::query_scalar::<_, i32>(&format!(
|
||||
"SELECT 1 FROM {chunk_t} WHERE file_uuid = $1 AND chunk_type = 'sentence' LIMIT 1"
|
||||
))
|
||||
.bind(uuid)
|
||||
@@ -1256,6 +1505,84 @@ impl JobWorker {
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Vectorize relationship chunks (from Rule 2) and store in PG + Qdrant
|
||||
async fn vectorize_relationship_chunks(db: &PostgresDb, uuid: &str) -> anyhow::Result<()> {
|
||||
let embedder = Embedder::new("embeddinggemma-300m".to_string());
|
||||
let qdrant = QdrantDb::new();
|
||||
let pool = db.pool();
|
||||
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let rows = sqlx::query_as::<_, (String, String, i64, i64, f64, f64)>(
|
||||
&format!(
|
||||
"SELECT chunk_id, text_content, start_frame, end_frame, start_time, end_time \
|
||||
FROM {} WHERE file_uuid = $1 AND chunk_type = 'relationship' \
|
||||
AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') \
|
||||
ORDER BY id",
|
||||
chunk_table
|
||||
),
|
||||
)
|
||||
.bind(uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
if rows.is_empty() {
|
||||
info!("[Vectorize-R2] No relationship chunks to vectorize for {}", uuid);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let total = rows.len();
|
||||
info!(
|
||||
"[Vectorize-R2] Starting vectorize of {} relationship chunks for {}",
|
||||
total, uuid
|
||||
);
|
||||
|
||||
let mut stored = 0usize;
|
||||
for (chunk_id, text, start_frame, end_frame, start_time, end_time) in &rows {
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
match embedder.embed_document(&text).await {
|
||||
Ok(vector) => {
|
||||
if let Err(e) = db.store_vector(&chunk_id, &vector, uuid).await {
|
||||
error!("[Vectorize-R2] PG store failed for {}: {}", chunk_id, e);
|
||||
continue;
|
||||
}
|
||||
let payload = VectorPayload {
|
||||
file_uuid: uuid.to_string(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type: "relationship".to_string(),
|
||||
start_frame: *start_frame,
|
||||
end_frame: *end_frame,
|
||||
start_time: *start_time,
|
||||
end_time: *end_time,
|
||||
text: Some(text.clone()),
|
||||
};
|
||||
if let Err(e) = qdrant.upsert_vector(&chunk_id, &vector, payload).await {
|
||||
error!("[Vectorize-R2] Qdrant upsert failed for {}: {}", chunk_id, e);
|
||||
continue;
|
||||
}
|
||||
stored += 1;
|
||||
if stored % 10 == 0 {
|
||||
info!(
|
||||
"[Vectorize-R2] {}/{} vectors stored for {}",
|
||||
stored, total, uuid
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("[Vectorize-R2] Embedding failed for {}: {}", chunk_id, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
"[Vectorize-R2] Completed: {}/{} relationship vectors stored for {}",
|
||||
stored, total, uuid
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -14,7 +14,9 @@ struct ProcessorCleanupGuard {
|
||||
running_count: Arc<RwLock<usize>>,
|
||||
frame_count: Arc<RwLock<usize>>,
|
||||
time_count: Arc<RwLock<usize>>,
|
||||
best_effort_count: Arc<RwLock<usize>>,
|
||||
pipeline: PipelineType,
|
||||
is_best_effort: bool,
|
||||
}
|
||||
|
||||
impl Drop for ProcessorCleanupGuard {
|
||||
@@ -30,22 +32,30 @@ impl Drop for ProcessorCleanupGuard {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
match self.pipeline {
|
||||
PipelineType::Frame => {
|
||||
if let Ok(mut guard) = self.frame_count.try_write() {
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
if self.is_best_effort {
|
||||
if let Ok(mut guard) = self.best_effort_count.try_write() {
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
PipelineType::Time => {
|
||||
if let Ok(mut guard) = self.time_count.try_write() {
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
} else {
|
||||
match self.pipeline {
|
||||
PipelineType::Frame => {
|
||||
if let Ok(mut guard) = self.frame_count.try_write() {
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
PipelineType::Time => {
|
||||
if let Ok(mut guard) = self.time_count.try_write() {
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
PipelineType::Cross => {} // cross pipeline not tracked in slot counts
|
||||
}
|
||||
PipelineType::Cross => {} // cross pipeline not tracked in slot counts
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -61,6 +71,7 @@ struct ProcessorHandle {
|
||||
use crate::core::config::{OUTPUT_DIR, PYTHON_PATH, SCRIPTS_DIR};
|
||||
use crate::core::db::{
|
||||
MonitorJob, PipelineType, PostgresDb, ProcessorJobStatus, ProcessorType, QdrantDb, RedisClient,
|
||||
WorkspaceDb,
|
||||
};
|
||||
use crate::core::processor;
|
||||
use crate::core::processor::asr::AsrResult;
|
||||
@@ -95,6 +106,8 @@ pub struct ProcessorTask {
|
||||
const FRAME_SLOT_MAX: usize = 2;
|
||||
/// Time pipeline max concurrent processors (audio is heavy, run 1 at a time).
|
||||
const TIME_SLOT_MAX: usize = 1;
|
||||
/// Best-effort slot (used by low-priority processors like MediaPipe).
|
||||
const BEST_EFFORT_SLOT_MAX: usize = 1;
|
||||
|
||||
pub struct ProcessorPool {
|
||||
db: Arc<PostgresDb>,
|
||||
@@ -104,6 +117,7 @@ pub struct ProcessorPool {
|
||||
running_count: Arc<RwLock<usize>>,
|
||||
running_frame_count: Arc<RwLock<usize>>,
|
||||
running_time_count: Arc<RwLock<usize>>,
|
||||
running_best_effort_count: Arc<RwLock<usize>>,
|
||||
}
|
||||
|
||||
impl ProcessorPool {
|
||||
@@ -116,6 +130,7 @@ impl ProcessorPool {
|
||||
running_count: Arc::new(RwLock::new(0)),
|
||||
running_frame_count: Arc::new(RwLock::new(0)),
|
||||
running_time_count: Arc::new(RwLock::new(0)),
|
||||
running_best_effort_count: Arc::new(RwLock::new(0)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -225,16 +240,22 @@ impl ProcessorPool {
|
||||
*count += 1;
|
||||
}
|
||||
// 遞增產線專屬 slot
|
||||
match pipeline {
|
||||
PipelineType::Frame => *self.running_frame_count.write().await += 1,
|
||||
PipelineType::Time => *self.running_time_count.write().await += 1,
|
||||
PipelineType::Cross => {} // cross pipeline uses global slot
|
||||
let is_best_effort = processor_type == ProcessorType::MediaPipe;
|
||||
if is_best_effort {
|
||||
*self.running_best_effort_count.write().await += 1;
|
||||
} else {
|
||||
match pipeline {
|
||||
PipelineType::Frame => *self.running_frame_count.write().await += 1,
|
||||
PipelineType::Time => *self.running_time_count.write().await += 1,
|
||||
PipelineType::Cross => {} // cross pipeline uses global slot
|
||||
}
|
||||
}
|
||||
|
||||
let running = self.running.clone();
|
||||
let running_count = self.running_count.clone();
|
||||
let running_frame_count = self.running_frame_count.clone();
|
||||
let running_time_count = self.running_time_count.clone();
|
||||
let running_best_effort_count = self.running_best_effort_count.clone();
|
||||
let child_pid: Arc<RwLock<Option<i32>>> = Arc::new(RwLock::new(None));
|
||||
running.write().await.insert(
|
||||
job_id,
|
||||
@@ -266,7 +287,9 @@ impl ProcessorPool {
|
||||
running_count: running_count.clone(),
|
||||
frame_count: running_frame_count.clone(),
|
||||
time_count: running_time_count.clone(),
|
||||
best_effort_count: running_best_effort_count.clone(),
|
||||
pipeline,
|
||||
is_best_effort,
|
||||
};
|
||||
|
||||
info!("Starting processor {} for job {}", processor_name, job.uuid);
|
||||
@@ -519,6 +542,14 @@ impl ProcessorPool {
|
||||
let uuid = Some(job.uuid.as_str());
|
||||
let video = db.get_video_by_uuid(&job.uuid).await?;
|
||||
let total_frames = video.as_ref().map(|v| v.total_frames as i32).unwrap_or(0);
|
||||
let fps = video.as_ref().map(|v| v.fps).unwrap_or(29.97);
|
||||
|
||||
// Compute 8Hz sample frames for frame-based processors
|
||||
let sample_frames =
|
||||
crate::core::processor::PythonExecutor::compute_8hz_frames(total_frames as i64, fps);
|
||||
|
||||
// Open workspace for dual-write (best-effort)
|
||||
let workspace = WorkspaceDb::open(&job.uuid).await.ok();
|
||||
|
||||
match processor_type {
|
||||
ProcessorType::Cut => {
|
||||
@@ -540,6 +571,22 @@ impl ProcessorPool {
|
||||
if let Err(e) = Self::store_cut_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store CUT chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
for scene in &result.scenes {
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"cut",
|
||||
"cut",
|
||||
Some(scene.start_frame as i64),
|
||||
Some(scene.end_frame as i64),
|
||||
Some(scene.start_time),
|
||||
Some(scene.end_time),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
@@ -550,9 +597,13 @@ impl ProcessorPool {
|
||||
})
|
||||
}
|
||||
ProcessorType::Yolo => {
|
||||
let result =
|
||||
processor::process_yolo(video_path, output_path.to_str().unwrap(), uuid)
|
||||
.await?;
|
||||
let result = processor::process_yolo(
|
||||
video_path,
|
||||
output_path.to_str().unwrap(),
|
||||
uuid,
|
||||
Some(&sample_frames),
|
||||
)
|
||||
.await?;
|
||||
let chunks_produced = result.frames.len() as i32;
|
||||
tracing::info!(
|
||||
"YOLO completed, storing {} frames for {}",
|
||||
@@ -562,6 +613,23 @@ impl ProcessorPool {
|
||||
if let Err(e) = Self::store_yolo_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store YOLO chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
for frame in &result.frames {
|
||||
let data = serde_json::json!({"objects": frame.objects});
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"yolo",
|
||||
"raw",
|
||||
Some(frame.frame as i64),
|
||||
None,
|
||||
Some(frame.timestamp),
|
||||
None,
|
||||
Some(&data.to_string()),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
@@ -572,8 +640,13 @@ impl ProcessorPool {
|
||||
})
|
||||
}
|
||||
ProcessorType::Ocr => {
|
||||
let result =
|
||||
processor::process_ocr(video_path, output_path.to_str().unwrap(), uuid).await?;
|
||||
let result = processor::process_ocr(
|
||||
video_path,
|
||||
output_path.to_str().unwrap(),
|
||||
uuid,
|
||||
Some(&sample_frames),
|
||||
)
|
||||
.await?;
|
||||
let chunks_produced = result.frames.len() as i32;
|
||||
tracing::info!(
|
||||
"OCR completed, storing {} frames for {}",
|
||||
@@ -583,6 +656,23 @@ impl ProcessorPool {
|
||||
if let Err(e) = Self::store_ocr_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store OCR chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
for frame in &result.frames {
|
||||
let data = serde_json::json!({"texts": frame.texts});
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"ocr",
|
||||
"raw",
|
||||
Some(frame.frame as i64),
|
||||
None,
|
||||
Some(frame.timestamp),
|
||||
None,
|
||||
Some(&data.to_string()),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
@@ -593,9 +683,13 @@ impl ProcessorPool {
|
||||
})
|
||||
}
|
||||
ProcessorType::Face => {
|
||||
let result =
|
||||
processor::process_face(video_path, output_path.to_str().unwrap(), uuid)
|
||||
.await?;
|
||||
let result = processor::process_face(
|
||||
video_path,
|
||||
output_path.to_str().unwrap(),
|
||||
uuid,
|
||||
Some(&sample_frames),
|
||||
)
|
||||
.await?;
|
||||
let chunks_produced = result.frames.len() as i32;
|
||||
tracing::info!(
|
||||
"FACE completed, storing {} frames for {}",
|
||||
@@ -605,6 +699,45 @@ impl ProcessorPool {
|
||||
if let Err(e) = Self::store_face_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store FACE chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
let dets: Vec<crate::core::db::workspace_sqlite::FaceDetectionBatchItem> =
|
||||
result
|
||||
.frames
|
||||
.iter()
|
||||
.flat_map(|frame| {
|
||||
frame.faces.iter().map(|face| {
|
||||
crate::core::db::workspace_sqlite::FaceDetectionBatchItem {
|
||||
face_id: face.face_id.clone(),
|
||||
frame: frame.frame as i64,
|
||||
ts: frame.timestamp,
|
||||
x: face.x,
|
||||
y: face.y,
|
||||
w: face.width,
|
||||
h: face.height,
|
||||
confidence: face.confidence,
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
if !dets.is_empty() {
|
||||
let _ = ws.store_face_detections_batch(&dets).await;
|
||||
}
|
||||
for frame in &result.frames {
|
||||
let data = serde_json::json!({"faces": frame.faces});
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"face",
|
||||
"raw",
|
||||
Some(frame.frame as i64),
|
||||
None,
|
||||
Some(frame.timestamp),
|
||||
None,
|
||||
Some(&data.to_string()),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
@@ -615,9 +748,13 @@ impl ProcessorPool {
|
||||
})
|
||||
}
|
||||
ProcessorType::Pose => {
|
||||
let result =
|
||||
processor::process_pose(video_path, output_path.to_str().unwrap(), uuid)
|
||||
.await?;
|
||||
let result = processor::process_pose(
|
||||
video_path,
|
||||
output_path.to_str().unwrap(),
|
||||
uuid,
|
||||
Some(&sample_frames),
|
||||
)
|
||||
.await?;
|
||||
let chunks_produced = result.frames.len() as i32;
|
||||
tracing::info!(
|
||||
"POSE completed, storing {} frames for {}",
|
||||
@@ -627,6 +764,91 @@ impl ProcessorPool {
|
||||
if let Err(e) = Self::store_pose_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store POSE chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
for frame in &result.frames {
|
||||
let data = serde_json::json!({"persons": frame.persons});
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"pose",
|
||||
"raw",
|
||||
Some(frame.frame as i64),
|
||||
None,
|
||||
Some(frame.timestamp),
|
||||
None,
|
||||
Some(&data.to_string()),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Appearance => {
|
||||
let pose_path =
|
||||
std::path::Path::new(&output_dir).join(format!("{}.pose.json", job.uuid));
|
||||
let pose_path_str = pose_path.to_str().unwrap_or("");
|
||||
let result = processor::process_appearance(
|
||||
video_path,
|
||||
pose_path_str,
|
||||
output_path.to_str().unwrap(),
|
||||
uuid,
|
||||
Some(&sample_frames),
|
||||
)
|
||||
.await?;
|
||||
let chunks_produced = result.frame_count as i32;
|
||||
tracing::info!(
|
||||
"APPEARANCE completed, {} frames for {}",
|
||||
chunks_produced,
|
||||
job.uuid
|
||||
);
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Asr => {
|
||||
let result =
|
||||
processor::process_asr(video_path, output_path.to_str().unwrap(), uuid).await?;
|
||||
let chunks_produced = result.segments.len() as i32;
|
||||
tracing::info!(
|
||||
"ASR completed, storing {} segments for {}",
|
||||
chunks_produced,
|
||||
job.uuid
|
||||
);
|
||||
if let Err(e) = Self::store_asr_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store ASR chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
for segment in &result.segments {
|
||||
let data = serde_json::json!({
|
||||
"text": segment.text,
|
||||
"timestamp": segment.start_time,
|
||||
});
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"asr",
|
||||
"raw",
|
||||
segment.start_frame,
|
||||
segment.end_frame,
|
||||
Some(segment.start_time),
|
||||
Some(segment.end_time),
|
||||
Some(&data.to_string()),
|
||||
Some(&segment.text),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
@@ -653,6 +875,47 @@ impl ProcessorPool {
|
||||
if let Err(e) = Self::store_voice_embeddings_to_qdrant(&job.uuid, &result).await {
|
||||
tracing::error!("Failed to store voice embeddings to Qdrant: {}", e);
|
||||
}
|
||||
// 寫入 workspace
|
||||
if let Some(ref ws) = workspace {
|
||||
for segment in &result.segments {
|
||||
let data = serde_json::json!({
|
||||
"text": segment.text,
|
||||
"speaker_id": segment.speaker_id,
|
||||
"timestamp": segment.start_time,
|
||||
});
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"asrx",
|
||||
"raw",
|
||||
None,
|
||||
None,
|
||||
Some(segment.start_time),
|
||||
Some(segment.end_time),
|
||||
Some(&data.to_string()),
|
||||
Some(&segment.text),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
let spk_dets: Vec<
|
||||
crate::core::db::workspace_sqlite::SpeakerDetectionBatchItem,
|
||||
> = result
|
||||
.segments
|
||||
.iter()
|
||||
.map(
|
||||
|s| crate::core::db::workspace_sqlite::SpeakerDetectionBatchItem {
|
||||
speaker_id: s.speaker_id.clone().unwrap_or_default(),
|
||||
start_time: s.start_time,
|
||||
end_time: s.end_time,
|
||||
text: s.text.clone(),
|
||||
chunk_id: None,
|
||||
confidence: 0.0,
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
if !spk_dets.is_empty() {
|
||||
let _ = ws.store_speaker_detections_batch(&spk_dets).await;
|
||||
}
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
@@ -703,6 +966,22 @@ impl ProcessorPool {
|
||||
if let Err(e) = Self::store_scene_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store Scene chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
if let Some(ref ws) = workspace {
|
||||
for scene in &result.scenes {
|
||||
let _ = ws
|
||||
.store_pre_chunk(
|
||||
"scene",
|
||||
"scene",
|
||||
None,
|
||||
None,
|
||||
Some(scene.start_time),
|
||||
Some(scene.end_time),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
@@ -763,6 +1042,29 @@ impl ProcessorPool {
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::MediaPipe => {
|
||||
let result = processor::process_mediapipe_v2(
|
||||
video_path,
|
||||
output_path.to_str().unwrap(),
|
||||
uuid,
|
||||
Some(&sample_frames),
|
||||
)
|
||||
.await?;
|
||||
let chunks_produced = result.frames.len() as i32;
|
||||
tracing::info!(
|
||||
"MEDIAPIPE completed, {} frames for {}",
|
||||
chunks_produced,
|
||||
job.uuid
|
||||
);
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -944,6 +1246,7 @@ impl ProcessorPool {
|
||||
detections_to_store.push((
|
||||
frame.frame as i64,
|
||||
frame.timestamp,
|
||||
face.face_id.clone(),
|
||||
face.x,
|
||||
face.y,
|
||||
face.width,
|
||||
@@ -960,23 +1263,20 @@ impl ProcessorPool {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 將 voice embeddings 寫入 Qdrant momentry_dev_voice collection
|
||||
/// 將 voice embeddings 寫入 Qdrant {file_uuid}_voice collection (per-file)
|
||||
pub async fn store_voice_embeddings_to_qdrant(
|
||||
uuid: &str,
|
||||
asrx_result: &AsrxResult,
|
||||
) -> Result<()> {
|
||||
let qdrant = QdrantDb::new();
|
||||
let collection = format!(
|
||||
"{}{}",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':'),
|
||||
"_voice"
|
||||
);
|
||||
let collection = format!("{}_voice", uuid);
|
||||
|
||||
// 確保 collection 存在(dim=192 for ASRX voice)
|
||||
if let Err(e) = qdrant.ensure_collection(&collection, 192).await {
|
||||
tracing::error!("Failed to ensure Qdrant voice collection: {}", e);
|
||||
tracing::error!(
|
||||
"Failed to ensure Qdrant voice collection {}: {}",
|
||||
collection,
|
||||
e
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -991,12 +1291,10 @@ impl ProcessorPool {
|
||||
if emb.len() != 192 {
|
||||
continue;
|
||||
}
|
||||
// Point ID: hash(file_uuid + speaker_id + index) for global uniqueness
|
||||
// Point ID: hash(speaker_id + index) — file_uuid redundant in per-file collection
|
||||
let point_id = {
|
||||
use sha2::{Digest, Sha256};
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(uuid.as_bytes());
|
||||
hasher.update(b"_");
|
||||
hasher.update(segment.speaker_id.clone().unwrap_or_default().as_bytes());
|
||||
hasher.update(b"_");
|
||||
hasher.update(i.to_string().as_bytes());
|
||||
@@ -1012,6 +1310,7 @@ impl ProcessorPool {
|
||||
"end_frame": segment.end_frame,
|
||||
"start_time": segment.start_time,
|
||||
"end_time": segment.end_time,
|
||||
"event_type": "speaker",
|
||||
});
|
||||
|
||||
if let Err(e) = qdrant
|
||||
@@ -1026,7 +1325,12 @@ impl ProcessorPool {
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
tracing::info!("Stored {} voice embeddings to Qdrant for {}", count, uuid);
|
||||
tracing::info!(
|
||||
"Stored {} voice embeddings to Qdrant per-file collection {} for {}",
|
||||
count,
|
||||
collection,
|
||||
uuid
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -1079,6 +1383,7 @@ impl ProcessorPool {
|
||||
"text": segment.text,
|
||||
"speaker_id": segment.speaker_id,
|
||||
"timestamp": segment.start_time,
|
||||
"end_time": segment.end_time,
|
||||
});
|
||||
|
||||
// ASRX is time-based, so we use segment index or start time as coordinate.
|
||||
|
||||
Reference in New Issue
Block a user