feat: deploy hybrid search (semantic+keyword+identity) with RRF fusion
- Replace smart_search with hybrid RRF implementation - Add speaker_detections table for identity-agent binding - Fix identity queries: direct SQL to avoid type mismatches - Add debug logs to job_worker for processor debugging - Deployed to production (3002) successfully Key changes: - search.rs: Complete rewrite with 3 strategies + RRF - postgres_db.rs: speaker_detections table + identity query fixes - job_worker.rs: Debug logs for output file checks Tested: - Hybrid search works with semantic + keyword + identity - Identity search: 'identity:Charade' returns correct results - Chinese keyword search: '調光' matches Charade summaries Bugs found: - Case mismatch: 'ASRX' vs 'asrx' in processors field - Missing CUT dependency for ASRX processor
This commit is contained in:
@@ -4,7 +4,7 @@ use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::time::sleep;
|
||||
use tracing::{error, info, warn};
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use crate::api::identity_agent_api::run_identity_agent;
|
||||
use crate::core::chunk::{rule1_ingest, rule3_ingest};
|
||||
@@ -333,6 +333,7 @@ impl JobWorker {
|
||||
job.uuid,
|
||||
processor_type.as_str()
|
||||
));
|
||||
debug!("Checking output file: {:?}", output_path);
|
||||
if output_path.exists() {
|
||||
info!(
|
||||
"Processor {} output file exists, marking completed and skipping",
|
||||
@@ -464,9 +465,12 @@ impl JobWorker {
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
debug!("Output file not found, checking result_map for {}", processor_type.as_str());
|
||||
|
||||
// Check if processor already in terminal state
|
||||
if let Some(result) = result_map.get(processor_type) {
|
||||
debug!("Found existing result for {}: status={:?}", processor_type.as_str(), result.status);
|
||||
match result.status {
|
||||
ProcessorJobStatus::Completed => {
|
||||
info!(
|
||||
@@ -572,10 +576,12 @@ impl JobWorker {
|
||||
);
|
||||
let missing_deps: Vec<String> = deps
|
||||
.iter()
|
||||
.filter(|d| !matches!(
|
||||
result_map.get(d).map(|r| &r.status),
|
||||
Some(ProcessorJobStatus::Completed)
|
||||
))
|
||||
.filter(|d| {
|
||||
!matches!(
|
||||
result_map.get(d).map(|r| &r.status),
|
||||
Some(ProcessorJobStatus::Completed)
|
||||
)
|
||||
})
|
||||
.map(|d| d.as_str().to_string())
|
||||
.collect();
|
||||
if let Err(e) = self
|
||||
@@ -594,6 +600,7 @@ impl JobWorker {
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Checking capacity before starting {}", processor_type.as_str());
|
||||
// Check capacity before starting processor
|
||||
if !self.processor_pool.can_start().await {
|
||||
info!(
|
||||
@@ -666,6 +673,8 @@ impl JobWorker {
|
||||
.upsert_processor_result(job.id, *processor_type, &job.uuid, "pending")
|
||||
.await?;
|
||||
|
||||
info!("Upserted processor_result for {}: id={}", processor_type.as_str(), processor_result_id);
|
||||
|
||||
self.redis
|
||||
.update_worker_processor_status(
|
||||
&job.uuid,
|
||||
@@ -687,6 +696,7 @@ impl JobWorker {
|
||||
frame_dir: None,
|
||||
};
|
||||
|
||||
info!("Calling start_processor for {}", processor_type.as_str());
|
||||
self.processor_pool.start_processor(task).await?;
|
||||
started_count += 1;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user