- Add helper functions to extract text from nested content structure - Update SearchResult to include uuid field - Add PostgreSQL function get_chunk_by_chunk_id_and_uuid to handle duplicate chunk_ids - Update Qdrant search functions to extract uuid from payload - Change embedding model to nomic-embed-text-v2-moe:latest - Update Qdrant collection name to momentry_rule1 - Fix MongoDB authentication and disable cache for development - Improve error handling in processor.rs - Update documentation with new embedding model
334 lines
12 KiB
Rust
334 lines
12 KiB
Rust
use anyhow::Result;
|
|
use ratatui::{
|
|
backend::CrosstermBackend,
|
|
layout::{Constraint, Direction, Layout},
|
|
style::{Color, Style},
|
|
text::{Line, Span},
|
|
widgets::{Block, Borders, List, ListItem, Paragraph},
|
|
Frame, Terminal,
|
|
};
|
|
use std::io;
|
|
use std::process::Command as StdCommand;
|
|
|
|
#[allow(dead_code)]
|
|
const QDRANT_URL: &str = "http://localhost:6333";
|
|
#[allow(dead_code)]
|
|
const QDRANT_API_KEY: &str = "Test3200Test3200Test3200";
|
|
#[allow(dead_code)]
|
|
const OLLAMA_URL: &str = "http://localhost:11434";
|
|
#[allow(dead_code)]
|
|
const MODEL: &str = "nomic-embed-text-v2-moe:latest";
|
|
|
|
#[derive(Debug, Clone)]
|
|
#[allow(dead_code)]
|
|
pub struct ChunkEntry {
|
|
pub chunk_id: String,
|
|
pub start_time: f64,
|
|
pub end_time: f64,
|
|
pub text: String,
|
|
pub score: f64,
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
impl ChunkEntry {
|
|
pub fn format_time_range(&self) -> String {
|
|
let start_mins = (self.start_time / 60.0) as u32;
|
|
let start_secs = (self.start_time % 60.0) as u32;
|
|
let end_mins = (self.end_time / 60.0) as u32;
|
|
let end_secs = (self.end_time % 60.0) as u32;
|
|
format!(
|
|
"{:02}:{:02} - {:02}:{:02}",
|
|
start_mins, start_secs, end_mins, end_secs
|
|
)
|
|
}
|
|
|
|
pub fn truncate_text(&self, max_len: usize) -> String {
|
|
if self.text.len() > max_len {
|
|
format!("{}...", &self.text[..max_len])
|
|
} else {
|
|
self.text.clone()
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
pub struct ChunkSelector {
|
|
chunks: Vec<ChunkEntry>,
|
|
selected_index: usize,
|
|
query: String,
|
|
video_uuid: String,
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
impl ChunkSelector {
|
|
pub fn new(video_uuid: &str) -> Self {
|
|
Self {
|
|
chunks: Vec::new(),
|
|
selected_index: 0,
|
|
query: String::new(),
|
|
video_uuid: video_uuid.to_string(),
|
|
}
|
|
}
|
|
|
|
pub fn search(&mut self, query: &str) -> Result<Vec<ChunkEntry>> {
|
|
self.query = query.to_string();
|
|
self.chunks = Vec::new();
|
|
self.selected_index = 0;
|
|
|
|
if query.is_empty() {
|
|
return Ok(Vec::new());
|
|
}
|
|
|
|
// Get embedding from Ollama
|
|
let embed_output = StdCommand::new("curl")
|
|
.args([
|
|
"-s",
|
|
&format!("{}/api/embeddings", OLLAMA_URL),
|
|
"-X",
|
|
"POST",
|
|
"-H",
|
|
"Content-Type: application/json",
|
|
"-d",
|
|
&format!(
|
|
r#"{{"model":"{}","prompt":"search_query: {}"}}"#,
|
|
MODEL, query
|
|
),
|
|
])
|
|
.output()?;
|
|
|
|
let embed_text = String::from_utf8_lossy(&embed_output.stdout);
|
|
|
|
// Parse embedding from response
|
|
let embedding: Vec<f64> = serde_json::from_str(&embed_text)
|
|
.ok()
|
|
.and_then(|v: serde_json::Value| {
|
|
v.get("embedding")
|
|
.and_then(|e| serde_json::from_value(e.clone()).ok())
|
|
})
|
|
.unwrap_or_default();
|
|
|
|
if embedding.is_empty() {
|
|
println!("Failed to get embedding for query: {}", query);
|
|
return Ok(Vec::new());
|
|
}
|
|
|
|
// Search Qdrant - use momentry_rule1 collection (Rule1 specification)
|
|
let collections = ["momentry_rule1"];
|
|
|
|
for collection in collections {
|
|
let vector_str = serde_json::to_string(&embedding)
|
|
.unwrap_or_default()
|
|
.replace(['[', ']'], "");
|
|
|
|
let qdrant_output = StdCommand::new("curl")
|
|
.args([
|
|
"-s",
|
|
&format!("{}/collections/{}/points/search", QDRANT_URL, collection),
|
|
"-X",
|
|
"POST",
|
|
"-H",
|
|
&format!("api-key: {}", QDRANT_API_KEY),
|
|
"-H",
|
|
"Content-Type: application/json",
|
|
"-d",
|
|
&format!(
|
|
r#"{{"vector":[{}],"limit":20,"with_payload":true}}"#,
|
|
vector_str
|
|
),
|
|
])
|
|
.output()?;
|
|
|
|
let qdrant_text = String::from_utf8_lossy(&qdrant_output.stdout);
|
|
|
|
if let Ok(response) = serde_json::from_str::<serde_json::Value>(&qdrant_text) {
|
|
if let Some(results) = response.get("result").and_then(|r| r.as_array()) {
|
|
for r in results {
|
|
let payload = r.get("payload");
|
|
|
|
// Try to match UUID - either exact match or partial match
|
|
let _uuid = payload
|
|
.and_then(|p| p.get("uuid"))
|
|
.and_then(|v| v.as_str())
|
|
.unwrap_or("");
|
|
|
|
// Accept all chunks (remove UUID filter for now since we want to find any content)
|
|
// The user can select which chunk to play
|
|
let uuid_match = true; // Accept all
|
|
|
|
if !uuid_match {
|
|
continue;
|
|
}
|
|
|
|
let chunk_id = payload
|
|
.and_then(|p| p.get("chunk_id"))
|
|
.and_then(|v| v.as_str())
|
|
.unwrap_or("")
|
|
.to_string();
|
|
let start_time = payload
|
|
.and_then(|p| p.get("start_time"))
|
|
.and_then(|v| v.as_f64())
|
|
.unwrap_or(0.0);
|
|
let end_time = payload
|
|
.and_then(|p| p.get("end_time"))
|
|
.and_then(|v| v.as_f64())
|
|
.unwrap_or(0.0);
|
|
let text = payload
|
|
.and_then(|p| p.get("text"))
|
|
.and_then(|v| v.as_str())
|
|
.unwrap_or("")
|
|
.to_string();
|
|
let score = r.get("score").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
|
|
|
if !text.is_empty() {
|
|
self.chunks.push(ChunkEntry {
|
|
chunk_id,
|
|
start_time,
|
|
end_time,
|
|
text,
|
|
score,
|
|
});
|
|
}
|
|
}
|
|
|
|
if !self.chunks.is_empty() {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(self.chunks.clone())
|
|
}
|
|
|
|
pub fn run(&mut self) -> Result<Option<ChunkEntry>> {
|
|
let stdout = io::stdout();
|
|
let backend = CrosstermBackend::new(stdout);
|
|
let mut terminal = Terminal::new(backend)?;
|
|
|
|
loop {
|
|
terminal.draw(|f| self.render(f))?;
|
|
|
|
match crossterm::event::read() {
|
|
Ok(crossterm::event::Event::Key(key)) => match key.code {
|
|
crossterm::event::KeyCode::Up => {
|
|
if self.selected_index > 0 {
|
|
self.selected_index -= 1;
|
|
}
|
|
}
|
|
crossterm::event::KeyCode::Down => {
|
|
if self.selected_index < self.chunks.len().saturating_sub(1) {
|
|
self.selected_index += 1;
|
|
}
|
|
}
|
|
crossterm::event::KeyCode::Enter => {
|
|
let selected = self.chunks.get(self.selected_index).cloned();
|
|
terminal.show_cursor()?;
|
|
return Ok(selected);
|
|
}
|
|
crossterm::event::KeyCode::Char(c) => {
|
|
if c == 'q' {
|
|
terminal.show_cursor()?;
|
|
return Ok(None);
|
|
}
|
|
self.query.push(c);
|
|
}
|
|
crossterm::event::KeyCode::Backspace => {
|
|
self.query.pop();
|
|
}
|
|
crossterm::event::KeyCode::Esc => {
|
|
terminal.show_cursor()?;
|
|
return Ok(None);
|
|
}
|
|
_ => {}
|
|
},
|
|
Ok(crossterm::event::Event::Resize(_, _)) => {}
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn render(&self, f: &mut Frame) {
|
|
let chunks = Layout::default()
|
|
.direction(Direction::Vertical)
|
|
.constraints([
|
|
Constraint::Length(3),
|
|
Constraint::Length(3),
|
|
Constraint::Min(0),
|
|
Constraint::Length(3),
|
|
])
|
|
.split(f.area());
|
|
|
|
// Title
|
|
let title = Paragraph::new("🔍 Chunk Search - Natural Language Query")
|
|
.style(Style::default().fg(Color::Cyan))
|
|
.block(Block::default().borders(Borders::ALL).title(" Search "));
|
|
f.render_widget(title, chunks[0]);
|
|
|
|
// Query input
|
|
let query_text = if self.query.is_empty() {
|
|
"Type to search...".to_string()
|
|
} else {
|
|
self.query.clone()
|
|
};
|
|
let query_style = if self.query.is_empty() {
|
|
Style::default().fg(Color::DarkGray)
|
|
} else {
|
|
Style::default().fg(Color::White)
|
|
};
|
|
let query = Paragraph::new(query_text)
|
|
.style(query_style)
|
|
.block(Block::default().borders(Borders::ALL).title(" Query "));
|
|
f.render_widget(query, chunks[1]);
|
|
|
|
// Results
|
|
if self.chunks.is_empty() {
|
|
let no_results = Paragraph::new("No results found. Type to search...")
|
|
.style(Style::default().fg(Color::DarkGray))
|
|
.block(Block::default().borders(Borders::ALL).title(" Results "));
|
|
f.render_widget(no_results, chunks[2]);
|
|
} else {
|
|
let items: Vec<ListItem> = self
|
|
.chunks
|
|
.iter()
|
|
.enumerate()
|
|
.map(|(i, chunk)| {
|
|
let style = if i == self.selected_index {
|
|
Style::default().fg(Color::Yellow).bg(Color::DarkGray)
|
|
} else {
|
|
Style::default()
|
|
};
|
|
|
|
let content = Line::from(vec![
|
|
Span::raw(format!(
|
|
"{} ",
|
|
if i == self.selected_index { "▶" } else { " " }
|
|
)),
|
|
Span::styled(chunk.format_time_range(), Style::default().fg(Color::Green)),
|
|
Span::raw(" "),
|
|
Span::raw(chunk.truncate_text(50)),
|
|
Span::styled(
|
|
format!(" [{:.2}]", chunk.score),
|
|
Style::default().fg(Color::Blue),
|
|
),
|
|
]);
|
|
|
|
ListItem::new(content).style(style)
|
|
})
|
|
.collect();
|
|
|
|
let list = List::new(items)
|
|
.block(Block::default().borders(Borders::ALL).title(" Results "))
|
|
.highlight_style(Style::default().fg(Color::Yellow));
|
|
|
|
f.render_widget(list, chunks[2]);
|
|
}
|
|
|
|
// Help text
|
|
let help =
|
|
Paragraph::new(" [↑/↓] Navigate [Enter] Play from here [Type] Search [q] Quit ")
|
|
.style(Style::default().fg(Color::DarkGray))
|
|
.block(Block::default().borders(Borders::ALL));
|
|
f.render_widget(help, chunks[3]);
|
|
}
|
|
}
|