feat: add Vision LLM integration (CLIP + Qwen3-VL cascade)

- Add Qwen3-VL dynamic management (start/stop/status CLI)
- Add CLIP + Qwen3-VL cascade detection strategy
- Add Vision CLI commands (vision start/stop/status, detect)
- Add cascade_vision processor module
- Add clip processor module
- Add qwen_vl_manager module

Changes:
- scripts/start_qwen3vl.sh, stop_qwen3vl.sh: Qwen3-VL management scripts
- src/core/vision/: Qwen3-VL manager module
- src/core/processor/cascade_vision.rs: CLIP + Qwen3-VL cascade logic
- src/core/processor/clip.rs: CLIP classification and detection
- src/api/clip_api.rs: CLIP API endpoints
- src/cli/vision.rs: Vision CLI implementation
- src/cli/args.rs: Add Vision and Detect commands
- src/main.rs: Integrate Vision CLI
- src/core/mod.rs: Add vision module
- src/core/processor/mod.rs: Add cascade_vision module
This commit is contained in:
Accusys
2026-06-13 16:25:52 +08:00
parent 834b0d4865
commit 17e4e15860
37 changed files with 2185 additions and 294 deletions
+76 -62
View File
@@ -471,12 +471,19 @@ impl JobWorker {
);
continue;
}
debug!("Output file not found, checking result_map for {}", processor_type.as_str());
debug!(
"Output file not found, checking result_map for {}",
processor_type.as_str()
);
// Check if processor already in terminal state
if let Some(result) = result_map.get(processor_type) {
debug!("Found existing result for {}: status={:?}", processor_type.as_str(), result.status);
debug!(
"Found existing result for {}: status={:?}",
processor_type.as_str(),
result.status
);
match result.status {
ProcessorJobStatus::Completed => {
info!(
@@ -606,7 +613,10 @@ impl JobWorker {
}
}
debug!("Checking capacity before starting {}", processor_type.as_str());
debug!(
"Checking capacity before starting {}",
processor_type.as_str()
);
// Check capacity before starting processor
if !self.processor_pool.can_start().await {
info!(
@@ -679,7 +689,11 @@ impl JobWorker {
.upsert_processor_result(job.id, *processor_type, &job.uuid, "pending")
.await?;
info!("Upserted processor_result for {}: id={}", processor_type.as_str(), processor_result_id);
info!(
"Upserted processor_result for {}: id={}",
processor_type.as_str(),
processor_result_id
);
self.redis
.update_worker_processor_status(
@@ -737,12 +751,10 @@ impl JobWorker {
let fu = uuid;
// Only check conditions relevant to the job's processors
let has_asr_or_asrx = job_processors.is_empty()
|| job_processors.iter().any(|p| p == "asrx" || p == "asr");
let has_cut = job_processors.is_empty()
|| job_processors.iter().any(|p| p == "cut");
let has_face = job_processors.is_empty()
|| job_processors.iter().any(|p| p == "face");
let has_asr_or_asrx =
job_processors.is_empty() || job_processors.iter().any(|p| p == "asrx" || p == "asr");
let has_cut = job_processors.is_empty() || job_processors.iter().any(|p| p == "cut");
let has_face = job_processors.is_empty() || job_processors.iter().any(|p| p == "face");
let rule1 = !has_asr_or_asrx
|| check!(&format!(
@@ -852,11 +864,9 @@ impl JobWorker {
if has_asrx {
// Guard: only spawn Rule 1 if sentence chunks don't exist yet
let chunk_t = schema::table_name("chunk");
let already_spawned: bool = sqlx::query_scalar::<_, i64>(
&format!(
"SELECT 1 FROM {chunk_t} WHERE file_uuid = $1 AND chunk_type = 'sentence' LIMIT 1"
),
)
let already_spawned: bool = sqlx::query_scalar::<_, i64>(&format!(
"SELECT 1 FROM {chunk_t} WHERE file_uuid = $1 AND chunk_type = 'sentence' LIMIT 1"
))
.bind(uuid)
.fetch_optional(self.db.pool())
.await?
@@ -864,66 +874,70 @@ impl JobWorker {
> 0;
if already_spawned {
info!(
"✅ Rule 1 already completed for {}, skipping spawn",
uuid
);
info!("✅ Rule 1 already completed for {}, skipping spawn", uuid);
} else {
info!("📝 Prerequisites met for Rule 1 Chunking. Starting ingestion...");
let db_clone = self.db.clone();
let uuid_clone = uuid.to_string();
tokio::spawn(async move {
match db_clone.get_video_by_uuid(&uuid_clone).await {
Ok(Some(video)) => {
let fps = video.fps;
match rule1_ingest::execute_rule1(&db_clone, &uuid_clone, fps).await {
Ok(count) => {
info!("✅ Rule 1 Ingestion completed: {} chunks inserted.", count);
if count > 0 {
match db_clone.get_video_by_uuid(&uuid_clone).await {
Ok(Some(video)) => {
let fps = video.fps;
match rule1_ingest::execute_rule1(&db_clone, &uuid_clone, fps).await {
Ok(count) => {
info!(
"📝 Starting automatic vectorize for {} chunks...",
"✅ Rule 1 Ingestion completed: {} chunks inserted.",
count
);
if let Err(e) =
Self::vectorize_chunks(&db_clone, &uuid_clone).await
{
error!(
"❌ Auto-vectorize failed for {}: {}",
uuid_clone, e
if count > 0 {
info!(
"📝 Starting automatic vectorize for {} chunks...",
count
);
if let Err(e) =
Self::vectorize_chunks(&db_clone, &uuid_clone).await
{
error!(
"❌ Auto-vectorize failed for {}: {}",
uuid_clone, e
);
}
}
info!("📦 Phase 1 release packaging...");
let executor =
match crate::core::processor::PythonExecutor::new() {
Ok(ex) => ex,
Err(e) => {
error!(
"Failed PythonExecutor for release pack: {}",
e
);
return;
}
};
match executor
.run(
"release_pack.py",
&["--phase", "1", "--file-uuid", &uuid_clone],
None,
"RELEASE_P1",
Some(std::time::Duration::from_secs(120)),
)
.await
{
Ok(()) => {
info!("✅ Phase 1 release packaged for {}", uuid_clone)
}
Err(e) => error!("❌ Phase 1 release pack failed: {}", e),
}
}
info!("📦 Phase 1 release packaging...");
let executor = match crate::core::processor::PythonExecutor::new() {
Ok(ex) => ex,
Err(e) => {
error!("Failed PythonExecutor for release pack: {}", e);
return;
}
};
match executor
.run(
"release_pack.py",
&["--phase", "1", "--file-uuid", &uuid_clone],
None,
"RELEASE_P1",
Some(std::time::Duration::from_secs(120)),
)
.await
{
Ok(()) => {
info!("✅ Phase 1 release packaged for {}", uuid_clone)
}
Err(e) => error!("❌ Phase 1 release pack failed: {}", e),
}
Err(e) => error!("❌ Rule 1 Ingestion failed: {}", e),
}
Err(e) => error!("❌ Rule 1 Ingestion failed: {}", e),
}
Ok(None) => error!("Video not found for chunking: {}", uuid_clone),
Err(e) => error!("Failed to get video info for chunking: {}", e),
}
Ok(None) => error!("Video not found for chunking: {}", uuid_clone),
Err(e) => error!("Failed to get video info for chunking: {}", e),
}
});
});
}
}
+2 -2
View File
@@ -1089,8 +1089,8 @@ impl ProcessorPool {
segment.start_time,
segment.end_time,
segment.text.clone(),
None::<String>, // chunk_id: unknown yet, filled later
0.0, // confidence: updated after binding
None::<String>, // chunk_id: unknown yet, filled later
0.0, // confidence: updated after binding
));
}