feat: add Vision LLM integration (CLIP + Qwen3-VL cascade)
- Add Qwen3-VL dynamic management (start/stop/status CLI) - Add CLIP + Qwen3-VL cascade detection strategy - Add Vision CLI commands (vision start/stop/status, detect) - Add cascade_vision processor module - Add clip processor module - Add qwen_vl_manager module Changes: - scripts/start_qwen3vl.sh, stop_qwen3vl.sh: Qwen3-VL management scripts - src/core/vision/: Qwen3-VL manager module - src/core/processor/cascade_vision.rs: CLIP + Qwen3-VL cascade logic - src/core/processor/clip.rs: CLIP classification and detection - src/api/clip_api.rs: CLIP API endpoints - src/cli/vision.rs: Vision CLI implementation - src/cli/args.rs: Add Vision and Detect commands - src/main.rs: Integrate Vision CLI - src/core/mod.rs: Add vision module - src/core/processor/mod.rs: Add cascade_vision module
This commit is contained in:
194
src/api/clip_api.rs
Normal file
194
src/api/clip_api.rs
Normal file
@@ -0,0 +1,194 @@
|
||||
use axum::{
|
||||
extract::{Query, State},
|
||||
http::StatusCode,
|
||||
response::{IntoResponse, Response},
|
||||
routing::{get, post},
|
||||
Json, Router,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::core::processor::{classify_image, classify_images, detect_objects, ClipPrediction};
|
||||
use crate::api::types::AppState;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct ClassifyRequest {
|
||||
image_path: String,
|
||||
labels: String,
|
||||
#[serde(default = "default_top_k")]
|
||||
top_k: usize,
|
||||
#[serde(default)]
|
||||
model: Option<String>,
|
||||
}
|
||||
|
||||
fn default_top_k() -> usize {
|
||||
5
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct DetectRequest {
|
||||
image_path: String,
|
||||
objects: String,
|
||||
#[serde(default = "default_threshold")]
|
||||
threshold: f32,
|
||||
#[serde(default)]
|
||||
model: Option<String>,
|
||||
}
|
||||
|
||||
fn default_threshold() -> f32 {
|
||||
0.15
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct BatchClassifyRequest {
|
||||
image_paths: String,
|
||||
labels: String,
|
||||
#[serde(default = "default_top_k")]
|
||||
top_k: usize,
|
||||
#[serde(default)]
|
||||
model: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct ClassifyResponse {
|
||||
success: bool,
|
||||
predictions: Vec<ClipPrediction>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct DetectResponse {
|
||||
success: bool,
|
||||
detected: Vec<ClipPrediction>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct BatchClassifyResponse {
|
||||
success: bool,
|
||||
results: HashMap<String, Vec<ClipPrediction>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct ErrorResponse {
|
||||
success: bool,
|
||||
error: String,
|
||||
}
|
||||
|
||||
pub fn clip_routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/api/v1/clip/classify", post(classify_image_endpoint))
|
||||
.route("/api/v1/clip/detect", post(detect_objects_endpoint))
|
||||
.route("/api/v1/clip/batch", post(batch_classify_endpoint))
|
||||
}
|
||||
|
||||
async fn classify_image_endpoint(
|
||||
State(_state): State<AppState>,
|
||||
Json(req): Json<ClassifyRequest>,
|
||||
) -> Response {
|
||||
let labels: Vec<&str> = req.labels.split(',').map(|s| s.trim()).collect();
|
||||
|
||||
let result = classify_image(
|
||||
&req.image_path,
|
||||
&labels,
|
||||
Some(req.top_k),
|
||||
req.model.as_deref(),
|
||||
).await;
|
||||
|
||||
match result {
|
||||
Ok(predictions) => {
|
||||
tracing::info!(
|
||||
"[CLIP_API] Classified {} -> top: {} ({:.3})",
|
||||
req.image_path,
|
||||
predictions.first().map(|p| p.label.as_str()).unwrap_or("none"),
|
||||
predictions.first().map(|p| p.confidence).unwrap_or(0.0)
|
||||
);
|
||||
Json(ClassifyResponse {
|
||||
success: true,
|
||||
predictions,
|
||||
}).into_response()
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("[CLIP_API] Classification failed: {}", e);
|
||||
Json(ErrorResponse {
|
||||
success: false,
|
||||
error: e.to_string(),
|
||||
}).into_response()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn detect_objects_endpoint(
|
||||
State(_state): State<AppState>,
|
||||
Json(req): Json<DetectRequest>,
|
||||
) -> Response {
|
||||
let objects: Vec<&str> = req.objects.split(',').map(|s| s.trim()).collect();
|
||||
|
||||
let result = detect_objects(
|
||||
&req.image_path,
|
||||
&objects,
|
||||
Some(req.threshold),
|
||||
req.model.as_deref(),
|
||||
).await;
|
||||
|
||||
match result {
|
||||
Ok(detected) => {
|
||||
if !detected.is_empty() {
|
||||
tracing::info!(
|
||||
"[CLIP_API] Detected {} objects in {}: {}",
|
||||
detected.len(),
|
||||
req.image_path,
|
||||
detected.iter().map(|p| p.label.as_str()).collect::<Vec<_>>().join(", ")
|
||||
);
|
||||
} else {
|
||||
tracing::info!("[CLIP_API] No objects detected in {} (threshold: {:.2})", req.image_path, req.threshold);
|
||||
}
|
||||
Json(DetectResponse {
|
||||
success: true,
|
||||
detected,
|
||||
}).into_response()
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("[CLIP_API] Detection failed: {}", e);
|
||||
Json(ErrorResponse {
|
||||
success: false,
|
||||
error: e.to_string(),
|
||||
}).into_response()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn batch_classify_endpoint(
|
||||
State(_state): State<AppState>,
|
||||
Json(req): Json<BatchClassifyRequest>,
|
||||
) -> Response {
|
||||
let image_paths: Vec<&str> = req.image_paths.split(',').map(|s| s.trim()).collect();
|
||||
let labels: Vec<&str> = req.labels.split(',').map(|s| s.trim()).collect();
|
||||
|
||||
let result = classify_images(
|
||||
&image_paths,
|
||||
&labels,
|
||||
Some(req.top_k),
|
||||
req.model.as_deref(),
|
||||
).await;
|
||||
|
||||
match result {
|
||||
Ok(results_vec) => {
|
||||
let results: HashMap<String, Vec<ClipPrediction>> = results_vec
|
||||
.into_iter()
|
||||
.map(|r| (r.image_path, r.predictions))
|
||||
.collect();
|
||||
|
||||
tracing::info!("[CLIP_API] Batch classified {} images", results.len());
|
||||
Json(BatchClassifyResponse {
|
||||
success: true,
|
||||
results,
|
||||
}).into_response()
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("[CLIP_API] Batch classification failed: {}", e);
|
||||
Json(ErrorResponse {
|
||||
success: false,
|
||||
error: e.to_string(),
|
||||
}).into_response()
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user