feat: frame/time pipeline split + output validation
- Add PipelineType enum + pipeline() to ProcessorType - Split ProcessorPool into frame_slots (max 2) and time_slots (max 1) - Add can_start_for() for pipeline-aware scheduling - Add validate_output_file() — checks JSON validity before marking complete - Add 3 unit tests for validate_output_file() - Create DESIGN/FRAME_TIME_PIPELINE_V1.0.md (492 lines)
This commit is contained in:
+73
-14
@@ -7,35 +7,56 @@ use std::sync::Arc;
|
||||
use tokio::sync::{mpsc, RwLock};
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
|
||||
|
||||
/// Guard that ensures processor pool cleanup runs even if the task panics.
|
||||
struct ProcessorCleanupGuard {
|
||||
job_id: i32,
|
||||
running: Arc<RwLock<HashMap<i32, ProcessorHandle>>>,
|
||||
running_count: Arc<RwLock<usize>>,
|
||||
frame_count: Arc<RwLock<usize>>,
|
||||
time_count: Arc<RwLock<usize>>,
|
||||
pipeline: PipelineType,
|
||||
}
|
||||
|
||||
impl Drop for ProcessorCleanupGuard {
|
||||
fn drop(&mut self) {
|
||||
use tokio::sync::TryLockError;
|
||||
// 嘗試同步清理;若 lock 被佔用則跳過(避免 deadlock)
|
||||
if let Ok(mut guard) = self.running.try_write() {
|
||||
guard.remove(&self.job_id);
|
||||
} else {
|
||||
warn!("[ProcessorCleanupGuard] running lock contended, skipping cleanup");
|
||||
warn!("[ProcessorCleanupGuard] running lock contended");
|
||||
}
|
||||
if let Ok(mut guard) = self.running_count.try_write() {
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
if *guard > 0 { *guard -= 1; }
|
||||
}
|
||||
match self.pipeline {
|
||||
PipelineType::Frame => {
|
||||
if let Ok(mut guard) = self.frame_count.try_write() {
|
||||
if *guard > 0 { *guard -= 1; }
|
||||
}
|
||||
}
|
||||
} else {
|
||||
warn!("[ProcessorCleanupGuard] running_count lock contended, skipping cleanup");
|
||||
PipelineType::Time => {
|
||||
if let Ok(mut guard) = self.time_count.try_write() {
|
||||
if *guard > 0 { *guard -= 1; }
|
||||
}
|
||||
}
|
||||
PipelineType::Cross => {} // cross pipeline not tracked in slot counts
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ProcessorHandle {
|
||||
#[allow(dead_code)]
|
||||
processor_type: ProcessorType,
|
||||
cancel_tx: mpsc::Sender<()>,
|
||||
child_pid: Arc<RwLock<Option<i32>>>,
|
||||
}
|
||||
|
||||
use crate::core::config::{OUTPUT_DIR, PYTHON_PATH, SCRIPTS_DIR};
|
||||
use crate::core::db::{
|
||||
MonitorJob, PostgresDb, ProcessorJobStatus, ProcessorType, QdrantDb, RedisClient,
|
||||
MonitorJob, PipelineType, PostgresDb, ProcessorJobStatus, ProcessorType, QdrantDb, RedisClient,
|
||||
};
|
||||
use crate::core::processor;
|
||||
use crate::core::processor::asr::AsrResult;
|
||||
@@ -67,19 +88,19 @@ pub struct ProcessorTask {
|
||||
pub frame_dir: Option<String>,
|
||||
}
|
||||
|
||||
/// Frame pipeline max concurrent processors (hard limit).
|
||||
const FRAME_SLOT_MAX: usize = 2;
|
||||
/// Time pipeline max concurrent processors (audio is heavy, run 1 at a time).
|
||||
const TIME_SLOT_MAX: usize = 1;
|
||||
|
||||
pub struct ProcessorPool {
|
||||
db: Arc<PostgresDb>,
|
||||
redis: Arc<RedisClient>,
|
||||
config_max: usize,
|
||||
running: Arc<RwLock<HashMap<i32, ProcessorHandle>>>,
|
||||
running_count: Arc<RwLock<usize>>,
|
||||
}
|
||||
|
||||
struct ProcessorHandle {
|
||||
#[allow(dead_code)]
|
||||
processor_type: ProcessorType,
|
||||
cancel_tx: mpsc::Sender<()>,
|
||||
child_pid: Arc<RwLock<Option<i32>>>,
|
||||
running_frame_count: Arc<RwLock<usize>>,
|
||||
running_time_count: Arc<RwLock<usize>>,
|
||||
}
|
||||
|
||||
impl ProcessorPool {
|
||||
@@ -90,6 +111,8 @@ impl ProcessorPool {
|
||||
config_max: max_concurrent,
|
||||
running: Arc::new(RwLock::new(HashMap::new())),
|
||||
running_count: Arc::new(RwLock::new(0)),
|
||||
running_frame_count: Arc::new(RwLock::new(0)),
|
||||
running_time_count: Arc::new(RwLock::new(0)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,10 +128,27 @@ impl ProcessorPool {
|
||||
count < max
|
||||
}
|
||||
|
||||
/// 檢查特定產線是否可啟動新的 processor。
|
||||
/// Frame pipeline 最多 FRAME_SLOT_MAX 個,Time pipeline 最多 TIME_SLOT_MAX 個。
|
||||
pub async fn can_start_for(&self, pipeline: PipelineType) -> bool {
|
||||
let count = *self.running_count.read().await;
|
||||
let max = self.current_max().await;
|
||||
if count >= max {
|
||||
return false;
|
||||
}
|
||||
match pipeline {
|
||||
PipelineType::Frame => *self.running_frame_count.read().await < FRAME_SLOT_MAX,
|
||||
PipelineType::Time => *self.running_time_count.read().await < TIME_SLOT_MAX,
|
||||
PipelineType::Cross => false, // cross pipeline = wait until frame+time done
|
||||
}
|
||||
}
|
||||
|
||||
/// 清理 stale running state:若系統中實際運行的 processor 比記錄少,修正 count
|
||||
pub async fn sweep_stale(&self) {
|
||||
let handle_count = self.running.read().await.len();
|
||||
let count = *self.running_count.read().await;
|
||||
let frame_count = *self.running_frame_count.read().await;
|
||||
let time_count = *self.running_time_count.read().await;
|
||||
if handle_count != count {
|
||||
warn!(
|
||||
"[ProcessorPool] Stale count detected: handles={}, count={}, fixing",
|
||||
@@ -117,6 +157,13 @@ impl ProcessorPool {
|
||||
let mut c = self.running_count.write().await;
|
||||
*c = handle_count;
|
||||
}
|
||||
// 若 frame 或 time slot 超出 handle_count,降回合理值
|
||||
if frame_count + time_count > handle_count {
|
||||
let mut fc = self.running_frame_count.write().await;
|
||||
let mut tc = self.running_time_count.write().await;
|
||||
*fc = (*fc).min(handle_count);
|
||||
*tc = (*tc).min(handle_count.saturating_sub(*fc));
|
||||
}
|
||||
|
||||
if handle_count == 0 && count == 0 {
|
||||
if let Err(e) = self
|
||||
@@ -162,6 +209,7 @@ impl ProcessorPool {
|
||||
let job_id = task.job.id;
|
||||
let processor_type = task.processor_type;
|
||||
|
||||
let pipeline = task.processor_type.pipeline();
|
||||
let current_limit = self.current_max().await;
|
||||
{
|
||||
let mut count = self.running_count.write().await;
|
||||
@@ -173,9 +221,17 @@ impl ProcessorPool {
|
||||
}
|
||||
*count += 1;
|
||||
}
|
||||
// 遞增產線專屬 slot
|
||||
match pipeline {
|
||||
PipelineType::Frame => *self.running_frame_count.write().await += 1,
|
||||
PipelineType::Time => *self.running_time_count.write().await += 1,
|
||||
PipelineType::Cross => {} // cross pipeline uses global slot
|
||||
}
|
||||
|
||||
let running = self.running.clone();
|
||||
let running_count = self.running_count.clone();
|
||||
let running_frame_count = self.running_frame_count.clone();
|
||||
let running_time_count = self.running_time_count.clone();
|
||||
let child_pid: Arc<RwLock<Option<i32>>> = Arc::new(RwLock::new(None));
|
||||
running.write().await.insert(
|
||||
job_id,
|
||||
@@ -205,6 +261,9 @@ impl ProcessorPool {
|
||||
job_id,
|
||||
running: running.clone(),
|
||||
running_count: running_count.clone(),
|
||||
frame_count: running_frame_count.clone(),
|
||||
time_count: running_time_count.clone(),
|
||||
pipeline,
|
||||
};
|
||||
|
||||
info!("Starting processor {} for job {}", processor_name, job.uuid);
|
||||
|
||||
Reference in New Issue
Block a user