feat: add Vision LLM integration (CLIP + Qwen3-VL cascade)
- Add Qwen3-VL dynamic management (start/stop/status CLI) - Add CLIP + Qwen3-VL cascade detection strategy - Add Vision CLI commands (vision start/stop/status, detect) - Add cascade_vision processor module - Add clip processor module - Add qwen_vl_manager module Changes: - scripts/start_qwen3vl.sh, stop_qwen3vl.sh: Qwen3-VL management scripts - src/core/vision/: Qwen3-VL manager module - src/core/processor/cascade_vision.rs: CLIP + Qwen3-VL cascade logic - src/core/processor/clip.rs: CLIP classification and detection - src/api/clip_api.rs: CLIP API endpoints - src/cli/vision.rs: Vision CLI implementation - src/cli/args.rs: Add Vision and Detect commands - src/main.rs: Integrate Vision CLI - src/core/mod.rs: Add vision module - src/core/processor/mod.rs: Add cascade_vision module
This commit is contained in:
+82
-5
@@ -36,6 +36,9 @@ pub struct SearchResult {
|
||||
pub summary: Option<String>,
|
||||
pub metadata: Option<serde_json::Value>,
|
||||
pub similarity: Option<f64>,
|
||||
pub file_name: Option<String>,
|
||||
pub serve_url: Option<String>,
|
||||
pub thumbnail_url: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
@@ -81,6 +84,9 @@ async fn enrich_from_pg(
|
||||
summary: Some(p.summary),
|
||||
metadata: p.metadata.clone(),
|
||||
similarity: Some(qdrant_score as f64),
|
||||
file_name: None,
|
||||
serve_url: None,
|
||||
thumbnail_url: None,
|
||||
}),
|
||||
Ok(None) => None,
|
||||
Err(e) => {
|
||||
@@ -105,6 +111,9 @@ fn pg_result_to_search(p: &SemanticSearchResult) -> SearchResult {
|
||||
summary: Some(p.summary.clone()),
|
||||
metadata: p.metadata.clone(),
|
||||
similarity: p.similarity,
|
||||
file_name: None,
|
||||
serve_url: None,
|
||||
thumbnail_url: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -156,7 +165,10 @@ pub async fn smart_search(
|
||||
.map(|h| (h.uuid, h.chunk_id, h.score as f64))
|
||||
.collect()
|
||||
} else {
|
||||
let qdrant_hits = qdrant.search(&embedding, fetch_limit).await.unwrap_or_default();
|
||||
let qdrant_hits = qdrant
|
||||
.search(&embedding, fetch_limit)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
qdrant_hits
|
||||
.into_iter()
|
||||
.map(|h| (h.uuid, h.chunk_id, h.score as f64))
|
||||
@@ -264,7 +276,11 @@ pub async fn smart_search(
|
||||
.and_modify(|e| {
|
||||
e.score = e.score.max(*score);
|
||||
e.semantic_score = Some(*score);
|
||||
e.source = format!("{}_{}", e.source.strip_prefix("semantic+").unwrap_or(&e.source), "semantic");
|
||||
e.source = format!(
|
||||
"{}_{}",
|
||||
e.source.strip_prefix("semantic+").unwrap_or(&e.source),
|
||||
"semantic"
|
||||
);
|
||||
})
|
||||
.or_insert(MergedResult {
|
||||
file_uuid: file_uuid.clone(),
|
||||
@@ -346,17 +362,36 @@ pub async fn smart_search(
|
||||
|
||||
// Sort by score descending (score-based merge)
|
||||
let mut ranked: Vec<&MergedResult> = merged.values().collect();
|
||||
ranked.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
|
||||
ranked.sort_by(|a, b| {
|
||||
b.score
|
||||
.partial_cmp(&a.score)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
|
||||
// 6. Enrich top results from PG and build final response
|
||||
let query_lower = req.query.to_lowercase();
|
||||
let mut final_results = Vec::new();
|
||||
for mr in ranked.iter().take(limit) {
|
||||
for mr in ranked.iter().take(limit * 3) { // 取更多結果以便過濾
|
||||
if let Some(pg) = db
|
||||
.get_chunk_by_file_and_chunk_id(&mr.file_uuid, &mr.chunk_id)
|
||||
.await
|
||||
.ok()
|
||||
.flatten()
|
||||
{
|
||||
// 關鍵字過濾
|
||||
let summary_lower = pg.summary.to_lowercase();
|
||||
let query_words: Vec<String> = query_lower.split_whitespace().map(|s| s.to_string()).collect();
|
||||
|
||||
// 檢查是否包含所有查詢詞(完整單詞)
|
||||
let text_match = !pg.summary.is_empty() && {
|
||||
let bordered = format!(" {} ", summary_lower);
|
||||
query_words.iter().all(|w| bordered.contains(&format!(" {} ", w)))
|
||||
};
|
||||
|
||||
if !text_match {
|
||||
continue;
|
||||
}
|
||||
|
||||
final_results.push(SearchResult {
|
||||
id: 0,
|
||||
file_uuid: pg.file_uuid.clone(),
|
||||
@@ -371,10 +406,52 @@ pub async fn smart_search(
|
||||
summary: Some(pg.summary),
|
||||
metadata: pg.metadata.clone(),
|
||||
similarity: Some(mr.score),
|
||||
file_name: None,
|
||||
serve_url: None,
|
||||
thumbnail_url: pg.file_uuid.as_ref().map(|fu| format!(
|
||||
"/wp-json/momentry/v1/media?type=chunk_thumbnail&file_uuid={}&chunk_id={}",
|
||||
fu, mr.chunk_id
|
||||
)),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Trim to requested limit
|
||||
final_results.truncate(limit);
|
||||
|
||||
// 7. Enrich results with file_name and serve_url from videos table
|
||||
if !final_results.is_empty() {
|
||||
let v_table = crate::core::db::schema::table_name("videos");
|
||||
let file_uuids: Vec<String> = final_results
|
||||
.iter()
|
||||
.filter_map(|r| r.file_uuid.clone())
|
||||
.collect();
|
||||
let file_rows: Vec<(String, String, String)> = sqlx::query_as(&format!(
|
||||
"SELECT file_uuid::text, file_name, file_path FROM {} WHERE file_uuid = ANY($1)",
|
||||
v_table
|
||||
))
|
||||
.bind(&file_uuids)
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
let file_map: std::collections::HashMap<String, (String, String)> = file_rows
|
||||
.into_iter()
|
||||
.map(|(uuid, name, path)| (uuid, (name, path)))
|
||||
.collect();
|
||||
let storage_root = crate::core::config::STORAGE_ROOT.as_str();
|
||||
let serve_base = crate::core::config::SERVE_BASE_URL.as_str();
|
||||
for r in &mut final_results {
|
||||
if let Some(ref uuid) = r.file_uuid {
|
||||
if let Some((name, path)) = file_map.get(uuid) {
|
||||
r.file_name = Some(name.clone());
|
||||
if let Some(relative) = path.strip_prefix(storage_root) {
|
||||
r.serve_url = Some(format!("{}{}", serve_base, relative));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine strategy string
|
||||
let mut strategies = vec!["semantic"];
|
||||
if !keyword_results.is_empty() {
|
||||
@@ -400,4 +477,4 @@ pub async fn smart_search(
|
||||
|
||||
pub fn search_routes() -> Router<crate::api::types::AppState> {
|
||||
Router::new().route("/api/v1/search/smart", post(smart_search))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user