Files
momentry_core/src/api/universal_search.rs
T

825 lines
26 KiB
Rust

//! Universal Search API
//! Unified search across chunks, frames, and persons.
use axum::{
extract::{Query, State},
http::StatusCode,
response::Json,
routing::{get, post},
Router,
};
use serde::{Deserialize, Serialize};
use crate::core::db::{schema, Database, PostgresDb};
#[derive(Debug, Deserialize)]
pub struct UniversalSearchRequest {
pub query: String,
pub file_uuid: Option<String>,
#[serde(default)]
pub types: Vec<String>, // chunk, frame, person
pub time_range: Option<[f64; 2]>,
pub filters: Option<SearchFilters>,
pub page: Option<usize>,
pub page_size: Option<usize>,
pub limit: Option<usize>,
pub offset: Option<usize>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct SearchFilters {
pub person_id: Option<String>,
pub object_class: Option<Vec<String>>,
pub ocr_text: Option<String>,
pub has_face: Option<bool>,
pub speaker_id: Option<String>,
/// 指定 chunk_type:如 "sentence", "cut", "trace", "visual"
pub chunk_type: Option<String>,
/// 搜尋與指定 trace_id 有時間重疊的 trace chunk
pub co_appears_with_trace_id: Option<i32>,
// Visual chunk filters
pub min_confidence: Option<f32>,
pub min_unique_classes: Option<u32>,
pub min_spatial_density: Option<f32>,
pub max_spatial_density: Option<f32>,
pub required_object_classes: Option<Vec<String>>,
}
#[derive(Debug, Serialize)]
pub struct UniversalSearchResponse {
pub query: String,
pub results: Vec<SearchResult>,
pub total: usize,
pub page: usize,
pub page_size: usize,
pub took_ms: u64,
}
#[derive(Debug, Serialize, Clone)]
#[serde(tag = "type")]
pub enum SearchResult {
#[serde(rename = "chunk")]
Chunk {
file_uuid: String,
chunk_id: String,
chunk_type: String,
start_frame: i64,
end_frame: i64,
fps: f64,
start_time: f64,
end_time: f64,
score: f64,
text: Option<String>,
speaker_id: Option<String>,
metadata: Option<serde_json::Value>,
},
#[serde(rename = "frame")]
Frame {
file_uuid: String,
frame_number: i64,
timestamp: f64,
score: f64,
objects: Option<Vec<serde_json::Value>>,
ocr_texts: Option<Vec<String>>,
faces: Option<Vec<serde_json::Value>>,
pose_persons: Option<Vec<serde_json::Value>>,
},
#[serde(rename = "person")]
Person {
file_uuid: Option<String>,
identity_id: i32,
identity_uuid: String,
name: Option<String>,
appearance_count: i32,
score: f64,
first_appearance_time: Option<f64>,
last_appearance_time: Option<f64>,
},
}
pub fn universal_search_routes() -> Router<crate::api::types::AppState> {
Router::new()
.route("/api/v1/search/universal", post(universal_search))
.route("/api/v1/search/frames", post(search_frames))
}
/// Unified search across all data types
pub async fn universal_search(
State(_state): State<crate::api::types::AppState>,
Json(req): Json<UniversalSearchRequest>,
) -> Result<Json<UniversalSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
let start_time = std::time::Instant::now();
let db = PostgresDb::init().await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({ "error": format!("DB error: {}", e) })),
)
})?;
let page = req.page.unwrap_or(1).max(1);
let page_size = req.page_size.unwrap_or(20).max(1).min(200);
// Backward compat: if old `offset` is used without `page`, derive from offset
let offset = if req.page.is_none() && req.offset.is_some() {
req.offset.unwrap()
} else {
(page - 1) * page_size
};
let types = if req.types.is_empty() {
vec![
"chunk".to_string(),
"frame".to_string(),
"person".to_string(),
]
} else {
req.types.clone()
};
let mut results = Vec::new();
// Search chunks
if types.contains(&"chunk".to_string()) {
let chunk_results = search_chunks(&db, &req).await.map_err(|e| {
(
StatusCode::BAD_REQUEST,
Json(serde_json::json!({ "error": e.to_string() })),
)
})?;
results.extend(chunk_results);
}
// Search frames
if types.contains(&"frame".to_string()) {
let frame_results = search_frames_internal(&db, &req).await.unwrap_or_default();
results.extend(frame_results);
}
// Search persons
if types.contains(&"person".to_string()) {
let person_results = search_persons_internal(&db, &req).await.unwrap_or_default();
results.extend(person_results);
}
// Deduplicate by chunk_id / frame_number / person_id
{
let mut seen_chunks = std::collections::HashSet::new();
let mut seen_frames = std::collections::HashSet::new();
let mut seen_persons = std::collections::HashSet::new();
results.retain(|r| match r {
SearchResult::Chunk { chunk_id, .. } => seen_chunks.insert(chunk_id.clone()),
SearchResult::Frame { frame_number, .. } => seen_frames.insert(*frame_number),
SearchResult::Person { identity_id, .. } => seen_persons.insert(*identity_id),
});
}
// Sort by score descending
results.sort_by(|a, b| {
let score_a = match a {
SearchResult::Chunk { score, .. } => *score,
SearchResult::Frame { score, .. } => *score,
SearchResult::Person { score, .. } => *score,
};
let score_b = match b {
SearchResult::Chunk { score, .. } => *score,
SearchResult::Frame { score, .. } => *score,
SearchResult::Person { score, .. } => *score,
};
score_b
.partial_cmp(&score_a)
.unwrap_or(std::cmp::Ordering::Equal)
});
let total = results.len();
let effective_limit = req.limit.unwrap_or(usize::MAX);
let end = std::cmp::min(offset + page_size, results.len()).min(effective_limit);
let paginated = if offset < results.len() {
results[offset..end].to_vec()
} else {
vec![]
};
let took = start_time.elapsed().as_millis() as u64;
Ok(Json(UniversalSearchResponse {
query: req.query,
results: paginated,
total,
page,
page_size,
took_ms: took,
}))
}
/// Search frames by YOLO objects, OCR text, or face IDs
pub async fn search_frames(
State(_state): State<crate::api::types::AppState>,
Json(req): Json<FrameSearchRequest>,
) -> Result<Json<FrameSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
let db = PostgresDb::init().await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({ "error": format!("DB error: {}", e) })),
)
})?;
let frames = search_frames_internal_v2(&db, &req).await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({ "error": format!("Search error: {}", e) })),
)
})?;
let frames_count = frames.len();
Ok(Json(FrameSearchResponse {
frames,
total: frames_count,
}))
}
/// Search persons by name or speaker_id
pub async fn search_persons(
State(_state): State<crate::api::types::AppState>,
Query(query): Query<PersonSearchQuery>,
) -> Result<Json<PersonSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
let db = PostgresDb::init().await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({ "error": format!("DB error: {}", e) })),
)
})?;
let limit = query.limit.unwrap_or(20);
let persons = search_persons_by_query(
&db,
&query.file_uuid,
&query.query,
query.min_appearances,
limit,
)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({ "error": format!("Search error: {}", e) })),
)
})?;
let persons_count = persons.len();
Ok(Json(PersonSearchResponse {
persons,
total: persons_count,
}))
}
// --- Internal search functions ---
#[derive(Debug, Deserialize)]
pub struct FrameSearchRequest {
pub file_uuid: Option<String>,
pub object_class: Option<String>,
pub ocr_text: Option<String>,
pub face_id: Option<String>,
pub time_range: Option<[f64; 2]>,
pub limit: Option<usize>,
}
#[derive(Debug, Serialize)]
pub struct FrameSearchResponse {
pub frames: Vec<FrameResult>,
pub total: usize,
}
#[derive(Debug, Serialize)]
pub struct FrameResult {
pub frame_number: i64,
pub timestamp: f64,
pub file_uuid: String,
pub objects: Option<Vec<serde_json::Value>>,
pub ocr_texts: Option<Vec<String>>,
pub faces: Option<Vec<serde_json::Value>>,
pub pose_persons: Option<Vec<serde_json::Value>>,
}
#[derive(Debug, Deserialize)]
pub struct PersonSearchQuery {
pub file_uuid: String,
pub query: Option<String>,
pub min_appearances: Option<i32>,
pub limit: Option<usize>,
}
#[derive(Debug, Serialize)]
pub struct PersonSearchResponse {
pub persons: Vec<PersonResult>,
pub total: usize,
}
#[derive(Debug, Serialize)]
pub struct PersonResult {
pub identity_id: i32,
pub identity_uuid: String,
pub name: Option<String>,
pub appearance_count: i32,
pub first_appearance_time: Option<f64>,
pub last_appearance_time: Option<f64>,
}
async fn search_chunks(
db: &PostgresDb,
req: &UniversalSearchRequest,
) -> Result<Vec<SearchResult>, anyhow::Error> {
let chunk_table = schema::table_name("chunk");
let mut sql = format!(
"SELECT file_uuid, chunk_id, chunk_type, start_time, end_time, (start_time * fps)::bigint as start_frame, (end_time * fps)::bigint as end_frame, fps, text_content, content FROM {} WHERE 1=1",
chunk_table
);
if let Some(uuid) = &req.file_uuid {
sql.push_str(&format!(" AND file_uuid = '{}'", uuid.replace('\'', "''")));
}
if let Some(tr) = &req.time_range {
sql.push_str(&format!(
" AND start_time >= {} AND end_time <= {}",
tr[0], tr[1]
));
}
if !req.query.is_empty() {
let q = req.query.replace('\'', "''");
sql.push_str(&format!(
" AND (text_content ILIKE '%{}%' OR content::text ILIKE '%{}%')",
q, q
));
}
if let Some(ref filters) = req.filters {
if let Some(ref speaker_id) = filters.speaker_id {
sql.push_str(&format!(
" AND content->>'speaker_id' = '{}'",
speaker_id.replace('\'', "''")
));
}
if let Some(ref person_id) = filters.person_id {
sql.push_str(&format!(
" AND content::text LIKE '%{}%'",
person_id.replace('\'', "''")
));
}
// Visual chunk filters
if let Some(min_confidence) = filters.min_confidence {
sql.push_str(&format!(
" AND (content->'metadata'->>'avg_confidence')::float >= {}",
min_confidence
));
}
if let Some(min_unique_classes) = filters.min_unique_classes {
sql.push_str(&format!(
" AND jsonb_array_length(content->'metadata'->'unique_classes') >= {}",
min_unique_classes
));
}
if let Some(min_density) = filters.min_spatial_density {
sql.push_str(&format!(
" AND (content->'metadata'->>'spatial_density')::float >= {}",
min_density
));
}
if let Some(max_density) = filters.max_spatial_density {
sql.push_str(&format!(
" AND (content->'metadata'->>'spatial_density')::float <= {}",
max_density
));
}
if let Some(ref required_classes) = filters.required_object_classes {
if !required_classes.is_empty() {
let class_conditions: Vec<String> = required_classes
.iter()
.map(|class| {
format!(
"content->'keyframe_objects' @> '[{{ \"class_name\": \"{}\"}}]'",
class.replace('\'', "''")
)
})
.collect();
sql.push_str(&format!(" AND ({})", class_conditions.join(" OR ")));
}
}
if let Some(ref chunk_type) = filters.chunk_type {
sql.push_str(&format!(
" AND chunk_type = '{}'",
chunk_type.replace('\'', "''")
));
}
if let Some(trace_id) = filters.co_appears_with_trace_id {
sql.push_str(&format!(
" AND metadata->'co_appearances' @> '[{{ \"trace_id\": {} }}]'",
trace_id
));
}
}
sql.push_str(" ORDER BY start_time ASC");
sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));
let rows: Vec<(
String,
String,
String,
f64,
f64,
i64,
i64,
f64,
Option<String>,
Option<serde_json::Value>,
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let results: Vec<SearchResult> = rows
.into_iter()
.map(
|(
file_uuid,
chunk_id,
chunk_type,
start_time,
end_time,
start_frame,
end_frame,
fps,
text_content,
content,
)| {
let text = text_content.or_else(|| {
content
.as_ref()
.and_then(|c| c.get("text").and_then(|v| v.as_str()).map(String::from))
});
let speaker_id = content.as_ref().and_then(|c| {
c.get("speaker_id")
.and_then(|v| v.as_str())
.map(String::from)
});
let score = if !req.query.is_empty()
&& text.as_ref().map_or(false, |t| {
t.to_lowercase().contains(&req.query.to_lowercase())
}) {
0.9
} else {
0.5
};
SearchResult::Chunk {
file_uuid,
chunk_id,
chunk_type,
start_time,
end_time,
start_frame,
end_frame,
fps,
score,
text,
speaker_id,
metadata: content,
}
},
)
.collect();
Ok(results)
}
async fn search_frames_internal(
db: &PostgresDb,
req: &UniversalSearchRequest,
) -> Result<Vec<SearchResult>, anyhow::Error> {
let table = "frames";
let video_table = "videos";
let mut sql = format!(
"SELECT f.frame_number, f.timestamp, f.yolo_objects, f.ocr_results, f.face_results, v.file_uuid
FROM {} f JOIN {} v ON f.file_id = v.id WHERE 1=1",
table, video_table
);
if let Some(uuid) = &req.file_uuid {
sql.push_str(&format!(" AND v.file_uuid = '{}'", uuid));
}
if let Some(tr) = &req.time_range {
sql.push_str(&format!(
" AND f.timestamp >= {} AND f.timestamp <= {}",
tr[0], tr[1]
));
}
if let Some(ref filters) = req.filters {
if let Some(ref classes) = filters.object_class {
for class in classes {
sql.push_str(&format!(" AND f.yolo_objects::text ILIKE '%{}%'", class));
}
}
if let Some(ref ocr) = filters.ocr_text {
sql.push_str(&format!(" AND f.ocr_results::text ILIKE '%{}%'", ocr));
}
if let Some(true) = filters.has_face {
sql.push_str(
" AND f.face_results IS NOT NULL AND jsonb_array_length(f.face_results) > 0",
);
}
if let Some(ref person_id) = filters.person_id {
sql.push_str(&format!(" AND f.face_results::text LIKE '%{}%'", person_id));
}
}
if !req.query.is_empty() {
// Search across all frame data
sql.push_str(&format!(
" AND (f.yolo_objects::text ILIKE '%{}%' OR f.ocr_results::text ILIKE '%{}%' OR f.face_results::text ILIKE '%{}%')",
req.query, req.query, req.query
));
}
sql.push_str(" ORDER BY f.timestamp ASC");
sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));
let rows: Vec<(
i64,
f64,
Option<serde_json::Value>,
Option<serde_json::Value>,
Option<serde_json::Value>,
String,
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let results: Vec<SearchResult> = rows
.into_iter()
.map(|(frame_number, timestamp, yolo, ocr, face, file_uuid)| {
let objects = yolo.as_ref().and_then(|v| {
v.get("objects")
.map(|o| o.as_array().cloned().unwrap_or_default())
});
let ocr_texts = ocr.as_ref().and_then(|v| {
v.get("texts").and_then(|t| {
t.as_array().map(|arr| {
arr.iter()
.filter_map(|item| {
item.get("text").and_then(|x| x.as_str()).map(String::from)
})
.collect()
})
})
});
let faces = face.as_ref().and_then(|v| {
v.get("faces")
.map(|f| f.as_array().cloned().unwrap_or_default())
});
SearchResult::Frame {
file_uuid,
frame_number,
timestamp,
score: 0.7,
objects: objects.map(|arr| arr.iter().map(|v| v.clone()).collect()),
ocr_texts,
faces,
pose_persons: None,
}
})
.collect();
Ok(results)
}
async fn search_persons_internal(
db: &PostgresDb,
req: &UniversalSearchRequest,
) -> Result<Vec<SearchResult>, anyhow::Error> {
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let mut sql = format!(
"SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time, \
fd.file_uuid \
FROM {} i JOIN {} fd ON fd.identity_id = i.id WHERE 1=1",
id_table, fd_table
);
if let Some(uuid) = &req.file_uuid {
sql.push_str(&format!(
" AND fd.file_uuid = '{}'",
uuid.replace('\'', "''")
));
}
if !req.query.is_empty() {
let q = req.query.replace('\'', "''");
sql.push_str(&format!(" AND i.name ILIKE '%{}%'", q));
}
sql.push_str(" GROUP BY i.id, i.uuid, i.name, fd.file_uuid");
sql.push_str(" ORDER BY appearance_count DESC");
sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));
let rows: Vec<(
i32,
String,
Option<String>,
i64,
Option<f64>,
Option<f64>,
String,
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let results: Vec<SearchResult> = rows
.into_iter()
.map(
|(
identity_id,
identity_uuid,
name,
appearance_count,
first_time,
last_time,
file_uuid,
)| {
let score = if !req.query.is_empty()
&& name.as_ref().map_or(false, |n| {
n.to_lowercase().contains(&req.query.to_lowercase())
}) {
0.95
} else {
0.5
};
SearchResult::Person {
file_uuid: Some(file_uuid),
identity_id,
identity_uuid,
name,
appearance_count: appearance_count as i32,
score,
first_appearance_time: first_time,
last_appearance_time: last_time,
}
},
)
.collect();
Ok(results)
}
async fn search_frames_internal_v2(
db: &PostgresDb,
req: &FrameSearchRequest,
) -> Result<Vec<FrameResult>, anyhow::Error> {
let table = "frames";
let video_table = "videos";
let mut sql = format!(
"SELECT f.frame_number, f.timestamp, f.yolo_objects, f.ocr_results, f.face_results, v.file_uuid
FROM {} f JOIN {} v ON f.file_id = v.id WHERE 1=1",
table, video_table
);
if let Some(uuid) = &req.file_uuid {
sql.push_str(&format!(" AND v.file_uuid = '{}'", uuid));
}
if let Some(tr) = &req.time_range {
sql.push_str(&format!(
" AND f.timestamp >= {} AND f.timestamp <= {}",
tr[0], tr[1]
));
}
if let Some(ref class) = req.object_class {
sql.push_str(&format!(" AND f.yolo_objects::text ILIKE '%{}%'", class));
}
if let Some(ref ocr) = req.ocr_text {
sql.push_str(&format!(" AND f.ocr_results::text ILIKE '%{}%'", ocr));
}
if let Some(ref face_id) = req.face_id {
sql.push_str(&format!(" AND f.face_results::text LIKE '%{}%'", face_id));
}
sql.push_str(" ORDER BY f.timestamp ASC");
sql.push_str(&format!(" LIMIT {}", req.limit.unwrap_or(50)));
let rows: Vec<(
i64,
f64,
Option<serde_json::Value>,
Option<serde_json::Value>,
Option<serde_json::Value>,
String,
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let results: Vec<FrameResult> = rows
.into_iter()
.map(|(frame_number, timestamp, yolo, ocr, face, uuid)| {
let objects = yolo.as_ref().and_then(|v| {
v.get("objects")
.map(|o| o.as_array().cloned().unwrap_or_default())
});
let ocr_texts = ocr.as_ref().and_then(|v| {
v.get("texts").and_then(|t| {
t.as_array().map(|arr| {
arr.iter()
.filter_map(|item| {
item.get("text").and_then(|x| x.as_str()).map(String::from)
})
.collect()
})
})
});
let faces = face.as_ref().and_then(|v| {
v.get("faces")
.map(|f| f.as_array().cloned().unwrap_or_default())
});
FrameResult {
frame_number,
timestamp,
file_uuid: uuid,
objects: objects.map(|arr| arr.iter().map(|v| v.clone()).collect()),
ocr_texts,
faces,
pose_persons: None,
}
})
.collect();
Ok(results)
}
async fn search_persons_by_query(
db: &PostgresDb,
file_uuid: &str,
query: &Option<String>,
min_appearances: Option<i32>,
limit: usize,
) -> Result<Vec<PersonResult>, anyhow::Error> {
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let mut sql = format!(
"SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \
FROM {} i JOIN {} fd ON fd.identity_id = i.id \
WHERE fd.file_uuid = '{}'",
id_table,
fd_table,
file_uuid.replace('\'', "''")
);
if let Some(q) = query {
let safe = q.replace('\'', "''");
sql.push_str(&format!(" AND i.name ILIKE '%{}%'", safe));
}
sql.push_str(" GROUP BY i.id, i.uuid, i.name");
if let Some(min) = min_appearances {
sql.push_str(&format!(" HAVING COUNT(fd.id) >= {}", min));
}
sql.push_str(" ORDER BY appearance_count DESC");
sql.push_str(&format!(" LIMIT {}", limit));
let rows: Vec<(i32, String, Option<String>, i64, Option<f64>, Option<f64>)> =
sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let results: Vec<PersonResult> = rows
.into_iter()
.map(
|(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| {
PersonResult {
identity_id,
identity_uuid,
name,
appearance_count: appearance_count as i32,
first_appearance_time: first_time,
last_appearance_time: last_time,
}
},
)
.collect();
Ok(results)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_search_filters_with_visual() {
let filters = SearchFilters {
person_id: None,
object_class: None,
ocr_text: None,
has_face: None,
speaker_id: None,
chunk_type: None,
co_appears_with_trace_id: None,
min_confidence: Some(0.8),
min_unique_classes: Some(3),
min_spatial_density: Some(0.5),
max_spatial_density: Some(0.9),
required_object_classes: Some(vec!["person".to_string()]),
};
assert_eq!(filters.min_confidence, Some(0.8));
}
}