feat: ASRX hybrid pipeline, identity history, worker fixes, checkpoint system
This commit is contained in:
@@ -7,8 +7,6 @@ use std::sync::Arc;
|
||||
use tokio::sync::{mpsc, RwLock};
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
|
||||
|
||||
/// Guard that ensures processor pool cleanup runs even if the task panics.
|
||||
struct ProcessorCleanupGuard {
|
||||
job_id: i32,
|
||||
@@ -28,17 +26,23 @@ impl Drop for ProcessorCleanupGuard {
|
||||
warn!("[ProcessorCleanupGuard] running lock contended");
|
||||
}
|
||||
if let Ok(mut guard) = self.running_count.try_write() {
|
||||
if *guard > 0 { *guard -= 1; }
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
match self.pipeline {
|
||||
PipelineType::Frame => {
|
||||
if let Ok(mut guard) = self.frame_count.try_write() {
|
||||
if *guard > 0 { *guard -= 1; }
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
PipelineType::Time => {
|
||||
if let Ok(mut guard) = self.time_count.try_write() {
|
||||
if *guard > 0 { *guard -= 1; }
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
PipelineType::Cross => {} // cross pipeline not tracked in slot counts
|
||||
@@ -66,7 +70,6 @@ use crate::core::processor::face::FaceResult;
|
||||
use crate::core::processor::ocr::OcrResult;
|
||||
use crate::core::processor::pose::PoseResult;
|
||||
use crate::core::processor::scene_classification::SceneClassificationResult;
|
||||
use crate::core::processor::visual_chunk::VisualChunkResult;
|
||||
use crate::core::processor::yolo::YoloResult;
|
||||
use crate::worker::resources::SystemResources;
|
||||
|
||||
@@ -518,32 +521,10 @@ impl ProcessorPool {
|
||||
let total_frames = video.as_ref().map(|v| v.total_frames as i32).unwrap_or(0);
|
||||
|
||||
match processor_type {
|
||||
ProcessorType::Asr => {
|
||||
let result =
|
||||
processor::process_asr(video_path, output_path.to_str().unwrap(), uuid).await?;
|
||||
let chunks_produced = result.segments.len() as i32;
|
||||
tracing::info!(
|
||||
"ASR completed, storing {} segments for {}",
|
||||
chunks_produced,
|
||||
job.uuid
|
||||
);
|
||||
if let Err(e) = Self::store_asr_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store ASR chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Cut => {
|
||||
let cut_path =
|
||||
std::path::Path::new(&output_dir).join(format!("{}.cut.json", job.uuid));
|
||||
let result = if cut_path.exists() {
|
||||
// CUT 在 register 階段已完成,直接載入
|
||||
let content =
|
||||
std::fs::read_to_string(&cut_path).context("Failed to read cut.json")?;
|
||||
serde_json::from_str(&content).context("Failed to parse cut.json")?
|
||||
@@ -624,10 +605,6 @@ impl ProcessorPool {
|
||||
if let Err(e) = Self::store_face_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store FACE chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
// 將 face embedding 寫入 Qdrant
|
||||
if let Err(e) = Self::store_face_embeddings_to_qdrant(&job.uuid, &result).await {
|
||||
tracing::error!("Failed to store face embeddings to Qdrant: {}", e);
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
@@ -685,31 +662,6 @@ impl ProcessorPool {
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::VisualChunk => {
|
||||
let result = processor::process_visual_chunk_advanced(
|
||||
video_path,
|
||||
output_path.to_str().unwrap(),
|
||||
uuid,
|
||||
)
|
||||
.await?;
|
||||
let chunks_produced = result.chunk_count as i32;
|
||||
tracing::info!(
|
||||
"VisualChunk completed, storing {} chunks for {}",
|
||||
chunks_produced,
|
||||
job.uuid
|
||||
);
|
||||
if let Err(e) = Self::store_visual_chunk_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store VisualChunk chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Scene => {
|
||||
let scene_path =
|
||||
std::path::Path::new(&output_dir).join(format!("{}.scene.json", job.uuid));
|
||||
@@ -717,7 +669,6 @@ impl ProcessorPool {
|
||||
std::path::Path::new(&output_dir).join(format!("{}.scene.err", job.uuid));
|
||||
let scene_tmp =
|
||||
std::path::Path::new(&output_dir).join(format!("{}.scene.tmp", job.uuid));
|
||||
// 優先順序:.err(跳過)→ .json(載入)→ .tmp(等待或重新執行)
|
||||
let result = if scene_err.exists() {
|
||||
tracing::warn!("Scene previously failed for {}, skipping", job.uuid);
|
||||
return Ok(ProcessorOutput {
|
||||
@@ -1009,72 +960,6 @@ impl ProcessorPool {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 將 face embeddings 寫入 Qdrant momentry_dev_face collection
|
||||
pub async fn store_face_embeddings_to_qdrant(
|
||||
uuid: &str,
|
||||
face_result: &FaceResult,
|
||||
) -> Result<()> {
|
||||
let qdrant = QdrantDb::new();
|
||||
let collection = format!(
|
||||
"{}{}",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':'),
|
||||
"_face"
|
||||
);
|
||||
|
||||
// 確保 collection 存在(dim=512 for FaceNet)
|
||||
if let Err(e) = qdrant.ensure_collection(&collection, 512).await {
|
||||
tracing::error!("Failed to ensure Qdrant face collection: {}", e);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut count = 0;
|
||||
for frame in &face_result.frames {
|
||||
for face in &frame.faces {
|
||||
if let Some(embedding) = &face.embedding {
|
||||
if embedding.len() != 512 {
|
||||
continue;
|
||||
}
|
||||
// 使用 hash 作為 Qdrant point ID(需要 unsigned integer)
|
||||
// 使用 frame number 作為 Qdrant point ID(u64)
|
||||
let point_id = frame.frame as u64;
|
||||
|
||||
let payload = serde_json::json!({
|
||||
"file_uuid": uuid,
|
||||
"face_id": face.face_id,
|
||||
"frame": frame.frame,
|
||||
"timestamp": frame.timestamp,
|
||||
"x": face.x,
|
||||
"y": face.y,
|
||||
"width": face.width,
|
||||
"height": face.height,
|
||||
"confidence": face.confidence,
|
||||
});
|
||||
|
||||
if let Err(e) = qdrant
|
||||
.upsert_vector_to_collection(
|
||||
&collection,
|
||||
point_id,
|
||||
embedding,
|
||||
Some(payload),
|
||||
)
|
||||
.await
|
||||
{
|
||||
tracing::error!("Failed to upsert face vector {}: {}", point_id, e);
|
||||
} else {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
tracing::info!("Stored {} face embeddings to Qdrant for {}", count, uuid);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 將 voice embeddings 寫入 Qdrant momentry_dev_voice collection
|
||||
pub async fn store_voice_embeddings_to_qdrant(
|
||||
uuid: &str,
|
||||
@@ -1106,9 +991,22 @@ impl ProcessorPool {
|
||||
if emb.len() != 192 {
|
||||
continue;
|
||||
}
|
||||
// Point ID: hash(file_uuid + speaker_id + index) for global uniqueness
|
||||
let point_id = {
|
||||
use sha2::{Digest, Sha256};
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(uuid.as_bytes());
|
||||
hasher.update(b"_");
|
||||
hasher.update(segment.speaker_id.clone().unwrap_or_default().as_bytes());
|
||||
hasher.update(b"_");
|
||||
hasher.update(i.to_string().as_bytes());
|
||||
let hash = hasher.finalize();
|
||||
u64::from_be_bytes(hash[0..8].try_into().unwrap())
|
||||
};
|
||||
|
||||
let payload = serde_json::json!({
|
||||
"file_uuid": uuid,
|
||||
"speaker_id": segment.speaker_id,
|
||||
"speaker_id": segment.speaker_id.clone().unwrap_or_default(),
|
||||
"segment_index": i,
|
||||
"start_frame": segment.start_frame,
|
||||
"end_frame": segment.end_frame,
|
||||
@@ -1117,7 +1015,7 @@ impl ProcessorPool {
|
||||
});
|
||||
|
||||
if let Err(e) = qdrant
|
||||
.upsert_vector_to_collection(&collection, i as u64, emb, Some(payload))
|
||||
.upsert_vector_to_collection(&collection, point_id, emb, Some(payload))
|
||||
.await
|
||||
{
|
||||
tracing::error!("Failed to upsert voice vector {}: {}", i, e);
|
||||
@@ -1174,6 +1072,7 @@ impl ProcessorPool {
|
||||
);
|
||||
|
||||
let mut pre_chunks_to_store = Vec::new();
|
||||
let mut speaker_detections = Vec::new();
|
||||
|
||||
for (i, segment) in asrx_result.segments.iter().enumerate() {
|
||||
let data = serde_json::json!({
|
||||
@@ -1184,28 +1083,23 @@ impl ProcessorPool {
|
||||
|
||||
// ASRX is time-based, so we use segment index or start time as coordinate.
|
||||
pre_chunks_to_store.push((i as i64, Some(segment.start_time), data, None, None));
|
||||
|
||||
speaker_detections.push((
|
||||
segment.speaker_id.clone().unwrap_or_default(),
|
||||
segment.start_time,
|
||||
segment.end_time,
|
||||
segment.text.clone(),
|
||||
None::<String>, // chunk_id: unknown yet, filled later
|
||||
0.0, // confidence: updated after binding
|
||||
));
|
||||
}
|
||||
|
||||
db.store_raw_pre_chunks_batch(uuid, "asrx", &pre_chunks_to_store)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn store_visual_chunk_chunks(
|
||||
db: &PostgresDb,
|
||||
uuid: &str,
|
||||
visual_chunk_result: &VisualChunkResult,
|
||||
) -> Result<()> {
|
||||
for (i, chunk) in visual_chunk_result.chunks.iter().enumerate() {
|
||||
match db.store_chunk(chunk).await {
|
||||
Ok(_) => {
|
||||
tracing::info!("Stored VisualChunk chunk {} for video {}", i, uuid);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to store VisualChunk chunk {}: {}", i, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
db.store_raw_pre_chunks_batch(uuid, "asr", &pre_chunks_to_store)
|
||||
.await?;
|
||||
db.store_speaker_detections_batch(uuid, &speaker_detections)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1256,7 +1150,7 @@ impl ProcessorPool {
|
||||
});
|
||||
let chunk_table = crate::core::db::schema::table_name("chunk");
|
||||
let _ = sqlx::query(&format!(
|
||||
"UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3",
|
||||
"UPDATE {} SET metadata = jsonb_deep_merge(COALESCE(metadata, '{{}}'::jsonb), $1::jsonb) WHERE file_uuid=$2 AND chunk_id=$3",
|
||||
chunk_table
|
||||
))
|
||||
.bind(&meta)
|
||||
|
||||
Reference in New Issue
Block a user