feat: deploy hybrid search (semantic+keyword+identity) with RRF fusion

- Replace smart_search with hybrid RRF implementation
- Add speaker_detections table for identity-agent binding
- Fix identity queries: direct SQL to avoid type mismatches
- Add debug logs to job_worker for processor debugging
- Deployed to production (3002) successfully

Key changes:
- search.rs: Complete rewrite with 3 strategies + RRF
- postgres_db.rs: speaker_detections table + identity query fixes
- job_worker.rs: Debug logs for output file checks

Tested:
- Hybrid search works with semantic + keyword + identity
- Identity search: 'identity:Charade' returns correct results
- Chinese keyword search: '調光' matches Charade summaries

Bugs found:
- Case mismatch: 'ASRX' vs 'asrx' in processors field
- Missing CUT dependency for ASRX processor
This commit is contained in:
Accusys
2026-06-01 15:15:17 +08:00
parent 0d58a738a1
commit 874d688987
4 changed files with 549 additions and 74 deletions
+15 -5
View File
@@ -4,7 +4,7 @@ use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use tokio::time::sleep;
use tracing::{error, info, warn};
use tracing::{debug, error, info, warn};
use crate::api::identity_agent_api::run_identity_agent;
use crate::core::chunk::{rule1_ingest, rule3_ingest};
@@ -333,6 +333,7 @@ impl JobWorker {
job.uuid,
processor_type.as_str()
));
debug!("Checking output file: {:?}", output_path);
if output_path.exists() {
info!(
"Processor {} output file exists, marking completed and skipping",
@@ -464,9 +465,12 @@ impl JobWorker {
);
continue;
}
debug!("Output file not found, checking result_map for {}", processor_type.as_str());
// Check if processor already in terminal state
if let Some(result) = result_map.get(processor_type) {
debug!("Found existing result for {}: status={:?}", processor_type.as_str(), result.status);
match result.status {
ProcessorJobStatus::Completed => {
info!(
@@ -572,10 +576,12 @@ impl JobWorker {
);
let missing_deps: Vec<String> = deps
.iter()
.filter(|d| !matches!(
result_map.get(d).map(|r| &r.status),
Some(ProcessorJobStatus::Completed)
))
.filter(|d| {
!matches!(
result_map.get(d).map(|r| &r.status),
Some(ProcessorJobStatus::Completed)
)
})
.map(|d| d.as_str().to_string())
.collect();
if let Err(e) = self
@@ -594,6 +600,7 @@ impl JobWorker {
}
}
debug!("Checking capacity before starting {}", processor_type.as_str());
// Check capacity before starting processor
if !self.processor_pool.can_start().await {
info!(
@@ -666,6 +673,8 @@ impl JobWorker {
.upsert_processor_result(job.id, *processor_type, &job.uuid, "pending")
.await?;
info!("Upserted processor_result for {}: id={}", processor_type.as_str(), processor_result_id);
self.redis
.update_worker_processor_status(
&job.uuid,
@@ -687,6 +696,7 @@ impl JobWorker {
frame_dir: None,
};
info!("Calling start_processor for {}", processor_type.as_str());
self.processor_pool.start_processor(task).await?;
started_count += 1;
}