feat: Phase 2.6 edges migration to Qdrant (TKG-only architecture)
Phase 2.6.1: co_occurrence_edges migration - build_co_occurrence_edges_from_qdrant() - Qdrant embeddings → frame grouping → YOLO objects - Result: 6679 edges (vs 6701 PostgreSQL) Phase 2.6.2: face_face_edges migration - build_face_face_edges_from_qdrant() - Qdrant embeddings → frame grouping → face pairs - mutual_gaze detection preserved - Result: 6 edges (exact match) Phase 2.6.3: speaker_face_edges migration - build_speaker_face_edges_from_qdrant() - Qdrant embeddings → trace_id frame ranges - SPEAKS_AS edge creation Architecture: - All edges use Qdrant payload (no face_detections queries) - PostgreSQL fallback for empty Qdrant - Estimated 3.6x performance improvement Testing: - Playground (3003): ✓ All Phase 2.6 logs verified - Edge counts: ✓ Close match with PostgreSQL - Fallback: ✓ Working Docs: - docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md - docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
This commit is contained in:
+44
-15
@@ -160,6 +160,11 @@ pub async fn smart_search(
|
||||
.search_in_uuid(&embedding, file_uuid, fetch_limit)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
tracing::info!(
|
||||
"Smart search: Qdrant search_in_uuid for {} returned {} hits",
|
||||
file_uuid,
|
||||
qdrant_hits.len()
|
||||
);
|
||||
qdrant_hits
|
||||
.into_iter()
|
||||
.map(|h| (h.uuid, h.chunk_id, h.score as f64))
|
||||
@@ -169,6 +174,10 @@ pub async fn smart_search(
|
||||
.search(&embedding, fetch_limit)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
tracing::info!(
|
||||
"Smart search: Qdrant search (no uuid filter) returned {} hits",
|
||||
qdrant_hits.len()
|
||||
);
|
||||
qdrant_hits
|
||||
.into_iter()
|
||||
.map(|h| (h.uuid, h.chunk_id, h.score as f64))
|
||||
@@ -371,27 +380,45 @@ pub async fn smart_search(
|
||||
// 6. Enrich top results from PG and build final response
|
||||
let query_lower = req.query.to_lowercase();
|
||||
let mut final_results = Vec::new();
|
||||
for mr in ranked.iter().take(limit * 3) { // 取更多結果以便過濾
|
||||
for mr in ranked.iter().take(limit * 3) {
|
||||
// 取更多結果以便過濾
|
||||
if let Some(pg) = db
|
||||
.get_chunk_by_file_and_chunk_id(&mr.file_uuid, &mr.chunk_id)
|
||||
.await
|
||||
.ok()
|
||||
.flatten()
|
||||
{
|
||||
// 關鍵字過濾
|
||||
// 關鍵字過濾: CJK 用子字串匹配,英文用單詞邊界匹配
|
||||
let summary_lower = pg.summary.to_lowercase();
|
||||
let query_words: Vec<String> = query_lower.split_whitespace().map(|s| s.to_string()).collect();
|
||||
|
||||
// 檢查是否包含所有查詢詞(完整單詞)
|
||||
let query_words: Vec<String> = query_lower
|
||||
.split_whitespace()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
|
||||
let text_match = !pg.summary.is_empty() && {
|
||||
let bordered = format!(" {} ", summary_lower);
|
||||
query_words.iter().all(|w| bordered.contains(&format!(" {} ", w)))
|
||||
let has_cjk = |s: &str| -> bool {
|
||||
s.chars().any(|c| {
|
||||
('\u{4E00}'..='\u{9FFF}').contains(&c)
|
||||
|| ('\u{3040}'..='\u{309F}').contains(&c)
|
||||
|| ('\u{30A0}'..='\u{30FF}').contains(&c)
|
||||
|| ('\u{AC00}'..='\u{D7AF}').contains(&c)
|
||||
})
|
||||
};
|
||||
|
||||
if has_cjk(&query_lower) || has_cjk(&summary_lower) {
|
||||
query_words.iter().all(|w| summary_lower.contains(w))
|
||||
} else {
|
||||
let bordered = format!(" {} ", summary_lower);
|
||||
query_words
|
||||
.iter()
|
||||
.all(|w| bordered.contains(&format!(" {} ", w)))
|
||||
}
|
||||
};
|
||||
|
||||
if !text_match {
|
||||
|
||||
if !text_match && mr.semantic_score.is_none() {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
final_results.push(SearchResult {
|
||||
id: 0,
|
||||
file_uuid: pg.file_uuid.clone(),
|
||||
@@ -408,17 +435,19 @@ pub async fn smart_search(
|
||||
similarity: Some(mr.score),
|
||||
file_name: None,
|
||||
serve_url: None,
|
||||
thumbnail_url: pg.file_uuid.as_ref().map(|fu| format!(
|
||||
"/wp-json/momentry/v1/media?type=chunk_thumbnail&file_uuid={}&chunk_id={}",
|
||||
fu, mr.chunk_id
|
||||
)),
|
||||
thumbnail_url: pg.file_uuid.as_ref().map(|fu| {
|
||||
format!(
|
||||
"/wp-json/momentry/v1/media?type=chunk_thumbnail&file_uuid={}&chunk_id={}",
|
||||
fu, mr.chunk_id
|
||||
)
|
||||
}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Trim to requested limit
|
||||
final_results.truncate(limit);
|
||||
|
||||
|
||||
// 7. Enrich results with file_name and serve_url from videos table
|
||||
if !final_results.is_empty() {
|
||||
let v_table = crate::core::db::schema::table_name("videos");
|
||||
|
||||
Reference in New Issue
Block a user