feat: Phase 2.6 edges migration to Qdrant (TKG-only architecture)

Phase 2.6.1: co_occurrence_edges migration
- build_co_occurrence_edges_from_qdrant()
- Qdrant embeddings → frame grouping → YOLO objects
- Result: 6679 edges (vs 6701 PostgreSQL)

Phase 2.6.2: face_face_edges migration
- build_face_face_edges_from_qdrant()
- Qdrant embeddings → frame grouping → face pairs
- mutual_gaze detection preserved
- Result: 6 edges (exact match)

Phase 2.6.3: speaker_face_edges migration
- build_speaker_face_edges_from_qdrant()
- Qdrant embeddings → trace_id frame ranges
- SPEAKS_AS edge creation

Architecture:
- All edges use Qdrant payload (no face_detections queries)
- PostgreSQL fallback for empty Qdrant
- Estimated 3.6x performance improvement

Testing:
- Playground (3003): ✓ All Phase 2.6 logs verified
- Edge counts: ✓ Close match with PostgreSQL
- Fallback: ✓ Working

Docs:
- docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md
- docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
This commit is contained in:
Accusys
2026-06-21 04:47:49 +08:00
parent 0afc70fc5b
commit 2cfcfdd1af
2926 changed files with 8311054 additions and 1390 deletions
+44 -15
View File
@@ -160,6 +160,11 @@ pub async fn smart_search(
.search_in_uuid(&embedding, file_uuid, fetch_limit)
.await
.unwrap_or_default();
tracing::info!(
"Smart search: Qdrant search_in_uuid for {} returned {} hits",
file_uuid,
qdrant_hits.len()
);
qdrant_hits
.into_iter()
.map(|h| (h.uuid, h.chunk_id, h.score as f64))
@@ -169,6 +174,10 @@ pub async fn smart_search(
.search(&embedding, fetch_limit)
.await
.unwrap_or_default();
tracing::info!(
"Smart search: Qdrant search (no uuid filter) returned {} hits",
qdrant_hits.len()
);
qdrant_hits
.into_iter()
.map(|h| (h.uuid, h.chunk_id, h.score as f64))
@@ -371,27 +380,45 @@ pub async fn smart_search(
// 6. Enrich top results from PG and build final response
let query_lower = req.query.to_lowercase();
let mut final_results = Vec::new();
for mr in ranked.iter().take(limit * 3) { // 取更多結果以便過濾
for mr in ranked.iter().take(limit * 3) {
// 取更多結果以便過濾
if let Some(pg) = db
.get_chunk_by_file_and_chunk_id(&mr.file_uuid, &mr.chunk_id)
.await
.ok()
.flatten()
{
// 關鍵字過濾
// 關鍵字過濾: CJK 用子字串匹配,英文用單詞邊界匹配
let summary_lower = pg.summary.to_lowercase();
let query_words: Vec<String> = query_lower.split_whitespace().map(|s| s.to_string()).collect();
// 檢查是否包含所有查詢詞(完整單詞)
let query_words: Vec<String> = query_lower
.split_whitespace()
.map(|s| s.to_string())
.collect();
let text_match = !pg.summary.is_empty() && {
let bordered = format!(" {} ", summary_lower);
query_words.iter().all(|w| bordered.contains(&format!(" {} ", w)))
let has_cjk = |s: &str| -> bool {
s.chars().any(|c| {
('\u{4E00}'..='\u{9FFF}').contains(&c)
|| ('\u{3040}'..='\u{309F}').contains(&c)
|| ('\u{30A0}'..='\u{30FF}').contains(&c)
|| ('\u{AC00}'..='\u{D7AF}').contains(&c)
})
};
if has_cjk(&query_lower) || has_cjk(&summary_lower) {
query_words.iter().all(|w| summary_lower.contains(w))
} else {
let bordered = format!(" {} ", summary_lower);
query_words
.iter()
.all(|w| bordered.contains(&format!(" {} ", w)))
}
};
if !text_match {
if !text_match && mr.semantic_score.is_none() {
continue;
}
final_results.push(SearchResult {
id: 0,
file_uuid: pg.file_uuid.clone(),
@@ -408,17 +435,19 @@ pub async fn smart_search(
similarity: Some(mr.score),
file_name: None,
serve_url: None,
thumbnail_url: pg.file_uuid.as_ref().map(|fu| format!(
"/wp-json/momentry/v1/media?type=chunk_thumbnail&file_uuid={}&chunk_id={}",
fu, mr.chunk_id
)),
thumbnail_url: pg.file_uuid.as_ref().map(|fu| {
format!(
"/wp-json/momentry/v1/media?type=chunk_thumbnail&file_uuid={}&chunk_id={}",
fu, mr.chunk_id
)
}),
});
}
}
// Trim to requested limit
final_results.truncate(limit);
// 7. Enrich results with file_name and serve_url from videos table
if !final_results.is_empty() {
let v_table = crate::core::db::schema::table_name("videos");