diff --git a/Cargo.toml b/Cargo.toml index 48edaef..dd6b25c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -110,3 +110,6 @@ path = "src/bin/migrate_chinese_text.rs" [[bin]] name = "test_bm25_simple" path = "src/bin/test_bm25_simple.rs" + +[build-dependencies] +chrono = "0.4" diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..83882e8 --- /dev/null +++ b/build.rs @@ -0,0 +1,19 @@ +use chrono::Local; +use std::env; + +fn main() { + let now = Local::now(); + let build_time = now.format("%Y-%m-%d %H:%M:%S").to_string(); + + // Get version from Cargo.toml + let version = env!("CARGO_PKG_VERSION"); + let full_version = format!("{} (build: {})", version, build_time); + + // Set build-time environment variables + println!("cargo:rustc-env=BUILD_VERSION={}", full_version); + println!("cargo:rustc-env=BUILD_TIME={}", build_time); + println!("cargo:rustc-env=VERSION={}", version); + + // Also print for debugging + println!("cargo:warning=Building version: {}", full_version); +} diff --git a/src/api/server.rs b/src/api/server.rs index 258066a..6b33936 100644 --- a/src/api/server.rs +++ b/src/api/server.rs @@ -12,8 +12,10 @@ use std::time::Instant; use crate::core::cache::{keys, MongoCache, RedisCache}; use crate::core::db::{Database, PostgresDb, QdrantDb, RedisClient, VideoRecord, VideoStatus}; +use crate::core::text::tokenizer::tokenize_chinese_text; use crate::{Embedder, FileManager}; +use super::face_recognition; use super::middleware::api_key_validation; #[derive(Debug, Serialize)] @@ -56,7 +58,7 @@ fn get_uptime_ms() -> u64 { } #[derive(Clone)] -struct AppState { +pub struct AppState { embedder: std::sync::Arc, #[allow(dead_code)] embedder_model: String, @@ -238,13 +240,14 @@ struct HybridSearchResponse { } fn extract_text_from_content(content: &serde_json::Value) -> String { - content + let raw_text = content .get("data") .and_then(|data| data.get("text")) .and_then(|v| v.as_str()) .or_else(|| content.get("text").and_then(|v| v.as_str())) - .unwrap_or("") - .to_string() + .unwrap_or(""); + + tokenize_chinese_text(raw_text) } fn extract_title_from_content(content: &serde_json::Value) -> String { @@ -296,7 +299,7 @@ async fn health(State(state): State) -> Json { if let Ok(Some(status)) = state.redis_cache.get_health().await { return Json(HealthResponse { status, - version: env!("CARGO_PKG_VERSION").to_string(), + version: env!("BUILD_VERSION").to_string(), uptime_ms: get_uptime_ms(), }); } @@ -306,7 +309,7 @@ async fn health(State(state): State) -> Json { Json(HealthResponse { status, - version: env!("CARGO_PKG_VERSION").to_string(), + version: env!("BUILD_VERSION").to_string(), uptime_ms: get_uptime_ms(), }) } @@ -326,7 +329,7 @@ async fn health_detailed(State(state): State) -> Json, + Json(req): Json, +) -> Result, StatusCode> { + let limit = req.limit.unwrap_or(10); + let query_hash = generate_query_hash(&req.query, req.uuid.as_deref(), limit); + let cache_key = keys::bm25_search(&query_hash); + let ttl = state.mongo_cache.ttl_search(); + + let response = state + .mongo_cache + .get_or_fetch(&cache_key, ttl, keys::CATEGORY_SEARCH, || async { + let pg = PostgresDb::init() + .await + .map_err(|e| anyhow::anyhow!("PG init failed: {}", e))?; + + let bm25_results = pg + .search_bm25(&req.query, req.uuid.as_deref(), limit) + .await?; + + let results: Vec = bm25_results + .into_iter() + .map(|r| SearchResult { + uuid: r.uuid, + chunk_id: r.chunk_id, + chunk_type: r.chunk_type, + start_time: r.start_time, + end_time: r.end_time, + text: r.text, + score: r.bm25_score, + }) + .collect(); + + Ok::(SearchResponse { + results, + query: req.query.clone(), + }) + }) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + Ok(Json(response)) +} + +async fn n8n_search_bm25( + State(state): State, + Json(req): Json, +) -> Result, StatusCode> { + let limit = req.limit.unwrap_or(10); + let query_hash = generate_query_hash(&req.query, req.uuid.as_deref(), limit); + let cache_key = keys::n8n_bm25_search(&query_hash); + let ttl = state.mongo_cache.ttl_search(); + + let response = state + .mongo_cache + .get_or_fetch(&cache_key, ttl, keys::CATEGORY_N8N_SEARCH, || async { + let pg = PostgresDb::init() + .await + .map_err(|e| anyhow::anyhow!("PG init failed: {}", e))?; + + let bm25_results = pg + .search_bm25(&req.query, req.uuid.as_deref(), limit) + .await?; + + let mut hits = Vec::new(); + + for r in bm25_results { + if let Some(chunk) = pg + .get_chunk_by_chunk_id_and_uuid(&r.chunk_id, &r.uuid) + .await + .ok() + .flatten() + { + let text = r.text; // Use text from BM25 result + let title = extract_title_from_content(&chunk.content); + + let file_path = if chunk.uuid.is_empty() { + None + } else { + let video = pg.get_video_by_uuid(&chunk.uuid).await.ok().flatten(); + video.map(|v| v.file_path) + }; + + hits.push(N8nSearchHit { + id: chunk.chunk_id.clone(), + vid: chunk.uuid.clone(), + start: chunk.start_time().seconds(), + end: chunk.end_time().seconds(), + title: if title.is_empty() { + format!("Chunk {}", chunk.chunk_id) + } else { + title + }, + text, + score: r.bm25_score, + file_path, + }); + } + } + + Ok::(N8nSearchResponse { + query: req.query.clone(), + count: hits.len(), + hits, + }) + }) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + Ok(Json(response)) +} + async fn hybrid_search( State(state): State, Json(req): Json, @@ -1430,15 +1545,18 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> { .route("/api/v1/register", post(register)) .route("/api/v1/unregister", post(unregister)) .route("/api/v1/probe", post(probe)) + .route("/api/v1/search/hybrid", post(hybrid_search)) .route("/api/v1/search", post(search)) .route("/api/v1/n8n/search", post(n8n_search)) - .route("/api/v1/search/hybrid", post(hybrid_search)) + .route("/api/v1/search/bm25", post(search_bm25)) + .route("/api/v1/n8n/search/bm25", post(n8n_search_bm25)) .route("/api/v1/lookup", get(lookup)) .route("/api/v1/videos", get(list_videos)) .route("/api/v1/progress/:uuid", get(get_progress)) .route("/api/v1/jobs", get(list_jobs)) .route("/api/v1/jobs/:uuid", get(get_job)) .route("/api/v1/config/cache", post(cache_toggle)) + .merge(face_recognition::face_recognition_routes()) .layer(axum::middleware::from_fn_with_state( state.api_state.clone(), api_key_validation, diff --git a/src/main.rs b/src/main.rs index 1b3c2a6..d1fb061 100644 --- a/src/main.rs +++ b/src/main.rs @@ -625,6 +625,7 @@ async fn process_caption_module( #[derive(Parser)] #[command(name = "momentry")] #[command(about = "Digital asset management system with video analysis and RAG")] +#[command(version = env!("BUILD_VERSION"))] struct Cli { #[command(subcommand)] command: Commands, diff --git a/src/playground.rs b/src/playground.rs index 285f2ef..d0a810a 100644 --- a/src/playground.rs +++ b/src/playground.rs @@ -622,8 +622,9 @@ async fn process_caption_module( } #[derive(Parser)] -#[command(name = "momentry")] -#[command(about = "Digital asset management system with video analysis and RAG")] +#[command(name = "momentry_playground")] +#[command(about = "Momentry Development Server")] +#[command(version = env!("BUILD_VERSION"))] struct Cli { #[command(subcommand)] command: Commands,