Core search changes: - Replace RRF with score-based merge (max of semantic/keyword/identity) - Add video title ILIKE search for brand/name queries (score 0.9) - Add /api/v1/search/llm-smart endpoint with Gemma 4 re-ranking - Fix LLM JSON parsing (markdown fences, empty responses) Infrastructure: - Rebuild Qdrant collection (clear 347K contaminated points) - Add dotenv loading to main.rs for config parity - Implement store_pre_chunk in postgres_db.rs Pipeline module (WordPress): - store-asrx, rule1, vectorize, phase1, complete endpoints - CLI commands for pipeline operations Docs: - SEARCH_SCORE_IMPROVEMENT.md (score-based merge proposal)
152 lines
5.0 KiB
Rust
152 lines
5.0 KiB
Rust
use std::time::Duration;
|
|
|
|
use axum::Router;
|
|
use tokio::time::timeout;
|
|
use tower_http::cors::{Any, CorsLayer};
|
|
|
|
use crate::core::cache::{MongoCache, RedisCache};
|
|
use crate::core::db::{Database, PostgresDb, QdrantDb};
|
|
use crate::Embedder;
|
|
|
|
use super::agent_api;
|
|
use super::agent_search;
|
|
use super::auth;
|
|
use super::docs;
|
|
use super::files;
|
|
use super::five_w1h_agent_api;
|
|
use super::health;
|
|
use super::identities;
|
|
use super::identity_agent_api;
|
|
use super::identity_api;
|
|
use super::identity_binding;
|
|
use super::llm_search;
|
|
use super::pipeline;
|
|
use super::media_api;
|
|
use super::middleware::unified_auth;
|
|
use super::processing;
|
|
use super::scan;
|
|
use super::search::search_routes;
|
|
use super::tmdb_api;
|
|
use super::trace_agent_api;
|
|
use super::types::AppState;
|
|
use super::universal_search::universal_search_routes;
|
|
|
|
pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
|
|
health::init_server_state(host, port);
|
|
|
|
let embedder = std::sync::Arc::new(Embedder::new("embeddinggemma-300m".to_string()));
|
|
|
|
// ── ⚠️ WARNING: DO NOT move MongoCache::init() back to critical path ──
|
|
//
|
|
// MongoDB is ONLY a cache layer — if unavailable, the server MUST still
|
|
// start with Redis cache alone. This keeps 3002 bootable on machines
|
|
// without MongoDB installed. If you add a new dependency here, ask:
|
|
// "Can this service degrade gracefully if the dependant is missing?"
|
|
//
|
|
// See also: MongoCache::disabled() in mongo_cache.rs
|
|
let mongo_cache = match timeout(Duration::from_secs(5), MongoCache::init()).await {
|
|
Ok(Ok(cache)) => cache,
|
|
Ok(Err(e)) => {
|
|
tracing::warn!("MongoDB cache unavailable (continuing without): {e}");
|
|
MongoCache::disabled().await
|
|
}
|
|
Err(_) => {
|
|
tracing::warn!("MongoDB init timed out (continuing without cache)");
|
|
MongoCache::disabled().await
|
|
}
|
|
};
|
|
let redis_cache = RedisCache::new()?;
|
|
let db = PostgresDb::init().await?;
|
|
|
|
// Run migrations (create identity_history table if not exists)
|
|
PostgresDb::run_migrations(db.pool()).await?;
|
|
|
|
let schema_health = health::check_schema_migrations(db.pool()).await;
|
|
if schema_health.ok {
|
|
tracing::info!(
|
|
"[SCHEMA] All {}/{} required migrations applied ✓",
|
|
schema_health.required.len(),
|
|
schema_health.required.len()
|
|
);
|
|
} else if !schema_health.table_exists {
|
|
tracing::warn!("[SCHEMA] schema_migrations table not found!");
|
|
} else {
|
|
let missing: Vec<&str> = schema_health
|
|
.required
|
|
.iter()
|
|
.filter(|req| {
|
|
!schema_health
|
|
.applied
|
|
.iter()
|
|
.any(|app| app.filename == req.filename && app.checksum == req.checksum)
|
|
})
|
|
.map(|m| m.filename.as_str())
|
|
.collect();
|
|
tracing::warn!(
|
|
"[SCHEMA] {}/{} migrations match. Missing: {}",
|
|
schema_health.applied.len(),
|
|
schema_health.required.len(),
|
|
missing.join(", ")
|
|
);
|
|
}
|
|
|
|
let db = std::sync::Arc::new(db);
|
|
let api_state = super::middleware::ApiState { db: db.clone() };
|
|
|
|
let qdrant = std::sync::Arc::new(QdrantDb::new());
|
|
let state = AppState {
|
|
db,
|
|
qdrant,
|
|
embedder,
|
|
embedder_model: "nomic-embed-text-v2-moe:latest".to_string(),
|
|
mongo_cache,
|
|
redis_cache,
|
|
api_state,
|
|
};
|
|
|
|
let protected_routes = Router::new()
|
|
.merge(files::file_routes())
|
|
.merge(scan::scan_routes())
|
|
.merge(identity_binding::identity_binding_routes())
|
|
.merge(identities::identity_routes())
|
|
.merge(tmdb_api::tmdb_routes())
|
|
.merge(identity_api::identity_routes())
|
|
.merge(agent_api::agent_routes())
|
|
.merge(agent_search::agent_search_routes())
|
|
.merge(processing::processing_routes())
|
|
.merge(identity_agent_api::identity_agent_routes())
|
|
.merge(five_w1h_agent_api::five_w1h_agent_routes())
|
|
.merge(media_api::bbox_routes())
|
|
.merge(trace_agent_api::trace_agent_routes())
|
|
.merge(search_routes())
|
|
.merge(llm_search::llm_smart_routes())
|
|
.merge(universal_search_routes())
|
|
.merge(pipeline::pipeline_routes())
|
|
.layer(axum::middleware::from_fn_with_state(
|
|
state.api_state.clone(),
|
|
unified_auth,
|
|
))
|
|
.with_state(state.clone());
|
|
|
|
let cors = CorsLayer::new()
|
|
.allow_origin(Any)
|
|
.allow_methods(Any)
|
|
.allow_headers(Any);
|
|
|
|
let app = Router::new()
|
|
.merge(auth::auth_routes())
|
|
.merge(health::health_routes())
|
|
.merge(docs::doc_routes())
|
|
.merge(protected_routes)
|
|
.layer(cors)
|
|
.with_state(state);
|
|
|
|
let addr: std::net::SocketAddr = format!("{}:{}", host, port).parse().unwrap();
|
|
tracing::info!("Starting API server at http://{}", addr);
|
|
|
|
let listener = tokio::net::TcpListener::bind(addr).await?;
|
|
axum::serve(listener, app).await?;
|
|
|
|
Ok(())
|
|
}
|