- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
228 lines
7.1 KiB
Rust
228 lines
7.1 KiB
Rust
use anyhow::{Context, Result};
|
|
use momentry_core::core::db::{Database, PostgresDb, QdrantDb};
|
|
use momentry_core::Embedder;
|
|
use std::env;
|
|
use std::time::Instant;
|
|
|
|
#[derive(Debug)]
|
|
struct SearchComparison {
|
|
query: String,
|
|
bm25_results: usize,
|
|
qdrant_results: usize,
|
|
bm25_top_score: f32,
|
|
qdrant_top_score: f32,
|
|
bm25_time_ms: u128,
|
|
qdrant_time_ms: u128,
|
|
overlap_count: usize,
|
|
}
|
|
|
|
fn print_results(
|
|
query: &str,
|
|
bm25_results: &[momentry_core::core::db::postgres_db::Bm25Result],
|
|
qdrant_results: &[momentry_core::core::db::SearchResult],
|
|
limit: usize,
|
|
) {
|
|
println!("\n=== 查詢: '{}' ===", query);
|
|
println!(
|
|
"BM25 結果 (共 {} 筆,顯示前 {} 筆):",
|
|
bm25_results.len(),
|
|
limit.min(bm25_results.len())
|
|
);
|
|
for (i, r) in bm25_results.iter().take(limit).enumerate() {
|
|
println!(
|
|
" {}. {} (uuid: {}, chunk_id: {})",
|
|
i + 1,
|
|
r.text.chars().take(60).collect::<String>(),
|
|
r.uuid,
|
|
r.chunk_id
|
|
);
|
|
println!(
|
|
" 分數: {:.4}, 時間: {:.1}-{:.1}s, 類型: {}",
|
|
r.bm25_score, r.start_time, r.end_time, r.chunk_type
|
|
);
|
|
}
|
|
|
|
println!(
|
|
"\nQdrant 向量搜尋結果 (共 {} 筆,顯示前 {} 筆):",
|
|
qdrant_results.len(),
|
|
limit.min(qdrant_results.len())
|
|
);
|
|
for (i, r) in qdrant_results.iter().take(limit).enumerate() {
|
|
println!(" {}. uuid: {}, chunk_id: {}", i + 1, r.uuid, r.chunk_id);
|
|
println!(" 分數: {:.4}", r.score);
|
|
}
|
|
|
|
// 計算重疊
|
|
let bm25_ids: Vec<String> = bm25_results
|
|
.iter()
|
|
.map(|r| format!("{}-{}", r.uuid, r.chunk_id))
|
|
.collect();
|
|
let qdrant_ids: Vec<String> = qdrant_results
|
|
.iter()
|
|
.map(|r| format!("{}-{}", r.uuid, r.chunk_id))
|
|
.collect();
|
|
|
|
let overlap: Vec<&String> = bm25_ids
|
|
.iter()
|
|
.filter(|id| qdrant_ids.contains(id))
|
|
.collect();
|
|
|
|
println!(
|
|
"\n結果重疊: {}/{} (BM25 與 Qdrant 共同返回)",
|
|
overlap.len(),
|
|
bm25_results.len().max(qdrant_results.len())
|
|
);
|
|
if !overlap.is_empty() {
|
|
println!("重疊的 chunk IDs: {:?}", overlap);
|
|
}
|
|
}
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<()> {
|
|
// 設定環境變數
|
|
env::set_var("RUST_LOG", "info");
|
|
env::set_var("QDRANT_URL", "http://localhost:6333");
|
|
env::set_var("QDRANT_API_KEY", "Test3200Test3200Test3200");
|
|
env::set_var("QDRANT_COLLECTION", "momentry_rule1");
|
|
|
|
println!("=== BM25 與 Qdrant 搜尋比較測試 ===\n");
|
|
|
|
// 初始化元件
|
|
println!("初始化元件...");
|
|
let embedder = Embedder::new("nomic-embed-text-v2-moe:latest".to_string());
|
|
let pg = PostgresDb::init()
|
|
.await
|
|
.context("Failed to initialize PostgreSQL database")?;
|
|
let qdrant = QdrantDb::new();
|
|
|
|
// 測試查詢清單
|
|
let test_queries = vec![
|
|
// 英文查詢
|
|
("telephone", Some("384b0ff44aaaa1f1")), // Charade 電影
|
|
("money", Some("384b0ff44aaaa1f1")),
|
|
("gold", Some("384b0ff44aaaa1f1")),
|
|
// 中文查詢
|
|
("工作", Some("9760d0820f0cf9a7")), // ExaSAN 影片
|
|
("加快速度", Some("9760d0820f0cf9a7")),
|
|
("聲音", Some("9760d0820f0cf9a7")),
|
|
// 全域查詢(無 uuid 限制)
|
|
("computer", None),
|
|
("technology", None),
|
|
];
|
|
|
|
let limit = 10;
|
|
let mut comparisons = Vec::new();
|
|
|
|
for (query_str, uuid_opt) in test_queries {
|
|
let query = query_str.to_string();
|
|
let uuid = uuid_opt.map(|s| s.to_string());
|
|
|
|
println!(
|
|
"\n🔍 測試查詢: '{}' {}",
|
|
query,
|
|
uuid_opt
|
|
.map(|u| format!("(uuid: {})", u))
|
|
.unwrap_or_default()
|
|
);
|
|
|
|
// BM25 搜尋
|
|
let bm25_start = Instant::now();
|
|
let bm25_results = pg.search_bm25(&query, uuid_opt, limit).await?;
|
|
let bm25_time = bm25_start.elapsed();
|
|
|
|
// Qdrant 向量搜尋
|
|
let qdrant_start = Instant::now();
|
|
let query_vector = embedder.embed_query(&query).await?;
|
|
let qdrant_results = if let Some(ref uuid) = uuid {
|
|
qdrant.search_in_uuid(&query_vector, uuid, limit).await?
|
|
} else {
|
|
qdrant.search(&query_vector, limit).await?
|
|
};
|
|
let qdrant_time = qdrant_start.elapsed();
|
|
|
|
// 計算重疊
|
|
let bm25_ids: Vec<String> = bm25_results
|
|
.iter()
|
|
.map(|r| format!("{}-{}", r.uuid, r.chunk_id))
|
|
.collect();
|
|
let qdrant_ids: Vec<String> = qdrant_results
|
|
.iter()
|
|
.map(|r| format!("{}-{}", r.uuid, r.chunk_id))
|
|
.collect();
|
|
|
|
let overlap_count = bm25_ids.iter().filter(|id| qdrant_ids.contains(id)).count();
|
|
|
|
// 儲存比較結果
|
|
let comparison = SearchComparison {
|
|
query: query.clone(),
|
|
bm25_results: bm25_results.len(),
|
|
qdrant_results: qdrant_results.len(),
|
|
bm25_top_score: bm25_results.first().map(|r| r.bm25_score).unwrap_or(0.0),
|
|
qdrant_top_score: qdrant_results.first().map(|r| r.score).unwrap_or(0.0),
|
|
bm25_time_ms: bm25_time.as_millis(),
|
|
qdrant_time_ms: qdrant_time.as_millis(),
|
|
overlap_count,
|
|
};
|
|
comparisons.push(comparison);
|
|
|
|
// 顯示詳細結果
|
|
print_results(&query, &bm25_results, &qdrant_results, 5);
|
|
|
|
// 顯示效能比較
|
|
println!("\n⏱️ 效能比較:");
|
|
println!(" BM25 搜尋時間: {}ms", bm25_time.as_millis());
|
|
println!(
|
|
" Qdrant 搜尋時間: {}ms (含向量嵌入時間)",
|
|
qdrant_time.as_millis()
|
|
);
|
|
}
|
|
|
|
// 顯示總結比較表
|
|
println!("\n📊 搜尋比較總結");
|
|
println!(
|
|
"{:<15} {:<6} {:<6} {:<8} {:<8} {:<6} {:<6} {:<6}",
|
|
"查詢", "BM25數", "QD數", "BM25分", "QD分", "BM25ms", "QDms", "重疊"
|
|
);
|
|
println!("{}", "-".repeat(80));
|
|
|
|
for comp in &comparisons {
|
|
println!(
|
|
"{:<15} {:<6} {:<6} {:<8.4} {:<8.4} {:<6} {:<6} {:<6}/{}",
|
|
&comp.query[..15.min(comp.query.len())],
|
|
comp.bm25_results,
|
|
comp.qdrant_results,
|
|
comp.bm25_top_score,
|
|
comp.qdrant_top_score,
|
|
comp.bm25_time_ms,
|
|
comp.qdrant_time_ms,
|
|
comp.overlap_count,
|
|
comp.bm25_results.max(comp.qdrant_results)
|
|
);
|
|
}
|
|
|
|
// 分析統計
|
|
let total_queries = comparisons.len();
|
|
let bm25_faster = comparisons
|
|
.iter()
|
|
.filter(|c| c.bm25_time_ms < c.qdrant_time_ms)
|
|
.count();
|
|
let avg_overlap = comparisons
|
|
.iter()
|
|
.map(|c| c.overlap_count as f32 / c.bm25_results.max(c.qdrant_results).max(1) as f32)
|
|
.sum::<f32>()
|
|
/ total_queries as f32
|
|
* 100.0;
|
|
|
|
println!("\n📈 統計分析:");
|
|
println!(" • 總測試查詢數: {}", total_queries);
|
|
println!(
|
|
" • BM25 較快的查詢: {}/{} ({:.1}%)",
|
|
bm25_faster,
|
|
total_queries,
|
|
bm25_faster as f32 / total_queries as f32 * 100.0
|
|
);
|
|
println!(" • 平均結果重疊率: {:.1}%", avg_overlap);
|
|
|
|
Ok(())
|
|
}
|