use anyhow::{Context, Result}; use momentry_core::core::db::{Database, PostgresDb, QdrantDb}; use momentry_core::Embedder; use std::env; use std::time::Instant; #[derive(Debug)] struct SearchComparison { query: String, bm25_results: usize, qdrant_results: usize, bm25_top_score: f32, qdrant_top_score: f32, bm25_time_ms: u128, qdrant_time_ms: u128, overlap_count: usize, } fn print_results( query: &str, bm25_results: &[momentry_core::core::db::postgres_db::Bm25Result], qdrant_results: &[momentry_core::core::db::SearchResult], limit: usize, ) { println!("\n=== 查詢: '{}' ===", query); println!( "BM25 結果 (共 {} 筆,顯示前 {} 筆):", bm25_results.len(), limit.min(bm25_results.len()) ); for (i, r) in bm25_results.iter().take(limit).enumerate() { println!( " {}. {} (uuid: {}, chunk_id: {})", i + 1, r.text.chars().take(60).collect::(), r.uuid, r.chunk_id ); println!( " 分數: {:.4}, 時間: {:.1}-{:.1}s, 類型: {}", r.bm25_score, r.start_time, r.end_time, r.chunk_type ); } println!( "\nQdrant 向量搜尋結果 (共 {} 筆,顯示前 {} 筆):", qdrant_results.len(), limit.min(qdrant_results.len()) ); for (i, r) in qdrant_results.iter().take(limit).enumerate() { println!(" {}. uuid: {}, chunk_id: {}", i + 1, r.uuid, r.chunk_id); println!(" 分數: {:.4}", r.score); } // 計算重疊 let bm25_ids: Vec = bm25_results .iter() .map(|r| format!("{}-{}", r.uuid, r.chunk_id)) .collect(); let qdrant_ids: Vec = qdrant_results .iter() .map(|r| format!("{}-{}", r.uuid, r.chunk_id)) .collect(); let overlap: Vec<&String> = bm25_ids .iter() .filter(|id| qdrant_ids.contains(id)) .collect(); println!( "\n結果重疊: {}/{} (BM25 與 Qdrant 共同返回)", overlap.len(), bm25_results.len().max(qdrant_results.len()) ); if !overlap.is_empty() { println!("重疊的 chunk IDs: {:?}", overlap); } } #[tokio::main] async fn main() -> Result<()> { // 設定環境變數 env::set_var("RUST_LOG", "info"); env::set_var("QDRANT_URL", "http://localhost:6333"); env::set_var("QDRANT_API_KEY", "Test3200Test3200Test3200"); env::set_var("QDRANT_COLLECTION", "momentry_rule1"); println!("=== BM25 與 Qdrant 搜尋比較測試 ===\n"); // 初始化元件 println!("初始化元件..."); let embedder = Embedder::new("nomic-embed-text-v2-moe:latest".to_string()); let pg = PostgresDb::init() .await .context("Failed to initialize PostgreSQL database")?; let qdrant = QdrantDb::new(); // 測試查詢清單 let test_queries = vec![ // 英文查詢 ("telephone", Some("384b0ff44aaaa1f1")), // Charade 電影 ("money", Some("384b0ff44aaaa1f1")), ("gold", Some("384b0ff44aaaa1f1")), // 中文查詢 ("工作", Some("9760d0820f0cf9a7")), // ExaSAN 影片 ("加快速度", Some("9760d0820f0cf9a7")), ("聲音", Some("9760d0820f0cf9a7")), // 全域查詢(無 uuid 限制) ("computer", None), ("technology", None), ]; let limit = 10; let mut comparisons = Vec::new(); for (query_str, uuid_opt) in test_queries { let query = query_str.to_string(); let uuid = uuid_opt.map(|s| s.to_string()); println!( "\n🔍 測試查詢: '{}' {}", query, uuid_opt .map(|u| format!("(uuid: {})", u)) .unwrap_or_default() ); // BM25 搜尋 let bm25_start = Instant::now(); let bm25_results = pg.search_bm25(&query, uuid_opt, limit).await?; let bm25_time = bm25_start.elapsed(); // Qdrant 向量搜尋 let qdrant_start = Instant::now(); let query_vector = embedder.embed_query(&query).await?; let qdrant_results = if let Some(ref uuid) = uuid { qdrant.search_in_uuid(&query_vector, uuid, limit).await? } else { qdrant.search(&query_vector, limit).await? }; let qdrant_time = qdrant_start.elapsed(); // 計算重疊 let bm25_ids: Vec = bm25_results .iter() .map(|r| format!("{}-{}", r.uuid, r.chunk_id)) .collect(); let qdrant_ids: Vec = qdrant_results .iter() .map(|r| format!("{}-{}", r.uuid, r.chunk_id)) .collect(); let overlap_count = bm25_ids.iter().filter(|id| qdrant_ids.contains(id)).count(); // 儲存比較結果 let comparison = SearchComparison { query: query.clone(), bm25_results: bm25_results.len(), qdrant_results: qdrant_results.len(), bm25_top_score: bm25_results.first().map(|r| r.bm25_score).unwrap_or(0.0), qdrant_top_score: qdrant_results.first().map(|r| r.score).unwrap_or(0.0), bm25_time_ms: bm25_time.as_millis(), qdrant_time_ms: qdrant_time.as_millis(), overlap_count, }; comparisons.push(comparison); // 顯示詳細結果 print_results(&query, &bm25_results, &qdrant_results, 5); // 顯示效能比較 println!("\n⏱️ 效能比較:"); println!(" BM25 搜尋時間: {}ms", bm25_time.as_millis()); println!( " Qdrant 搜尋時間: {}ms (含向量嵌入時間)", qdrant_time.as_millis() ); } // 顯示總結比較表 println!("\n📊 搜尋比較總結"); println!( "{:<15} {:<6} {:<6} {:<8} {:<8} {:<6} {:<6} {:<6}", "查詢", "BM25數", "QD數", "BM25分", "QD分", "BM25ms", "QDms", "重疊" ); println!("{}", "-".repeat(80)); for comp in &comparisons { println!( "{:<15} {:<6} {:<6} {:<8.4} {:<8.4} {:<6} {:<6} {:<6}/{}", &comp.query[..15.min(comp.query.len())], comp.bm25_results, comp.qdrant_results, comp.bm25_top_score, comp.qdrant_top_score, comp.bm25_time_ms, comp.qdrant_time_ms, comp.overlap_count, comp.bm25_results.max(comp.qdrant_results) ); } // 分析統計 let total_queries = comparisons.len(); let bm25_faster = comparisons .iter() .filter(|c| c.bm25_time_ms < c.qdrant_time_ms) .count(); let avg_overlap = comparisons .iter() .map(|c| c.overlap_count as f32 / c.bm25_results.max(c.qdrant_results).max(1) as f32) .sum::() / total_queries as f32 * 100.0; println!("\n📈 統計分析:"); println!(" • 總測試查詢數: {}", total_queries); println!( " • BM25 較快的查詢: {}/{} ({:.1}%)", bm25_faster, total_queries, bm25_faster as f32 / total_queries as f32 * 100.0 ); println!(" • 平均結果重疊率: {:.1}%", avg_overlap); Ok(()) }