Files
momentry_core/src/player/chunk_selector.rs
Warren 2393d81a3f feat: fix Chinese text search and duplicate chunk_id bug
- Add helper functions to extract text from nested content structure
- Update SearchResult to include uuid field
- Add PostgreSQL function get_chunk_by_chunk_id_and_uuid to handle duplicate chunk_ids
- Update Qdrant search functions to extract uuid from payload
- Change embedding model to nomic-embed-text-v2-moe:latest
- Update Qdrant collection name to momentry_rule1
- Fix MongoDB authentication and disable cache for development
- Improve error handling in processor.rs
- Update documentation with new embedding model
2026-03-29 04:44:28 +08:00

334 lines
12 KiB
Rust

use anyhow::Result;
use ratatui::{
backend::CrosstermBackend,
layout::{Constraint, Direction, Layout},
style::{Color, Style},
text::{Line, Span},
widgets::{Block, Borders, List, ListItem, Paragraph},
Frame, Terminal,
};
use std::io;
use std::process::Command as StdCommand;
#[allow(dead_code)]
const QDRANT_URL: &str = "http://localhost:6333";
#[allow(dead_code)]
const QDRANT_API_KEY: &str = "Test3200Test3200Test3200";
#[allow(dead_code)]
const OLLAMA_URL: &str = "http://localhost:11434";
#[allow(dead_code)]
const MODEL: &str = "nomic-embed-text-v2-moe:latest";
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct ChunkEntry {
pub chunk_id: String,
pub start_time: f64,
pub end_time: f64,
pub text: String,
pub score: f64,
}
#[allow(dead_code)]
impl ChunkEntry {
pub fn format_time_range(&self) -> String {
let start_mins = (self.start_time / 60.0) as u32;
let start_secs = (self.start_time % 60.0) as u32;
let end_mins = (self.end_time / 60.0) as u32;
let end_secs = (self.end_time % 60.0) as u32;
format!(
"{:02}:{:02} - {:02}:{:02}",
start_mins, start_secs, end_mins, end_secs
)
}
pub fn truncate_text(&self, max_len: usize) -> String {
if self.text.len() > max_len {
format!("{}...", &self.text[..max_len])
} else {
self.text.clone()
}
}
}
#[allow(dead_code)]
pub struct ChunkSelector {
chunks: Vec<ChunkEntry>,
selected_index: usize,
query: String,
video_uuid: String,
}
#[allow(dead_code)]
impl ChunkSelector {
pub fn new(video_uuid: &str) -> Self {
Self {
chunks: Vec::new(),
selected_index: 0,
query: String::new(),
video_uuid: video_uuid.to_string(),
}
}
pub fn search(&mut self, query: &str) -> Result<Vec<ChunkEntry>> {
self.query = query.to_string();
self.chunks = Vec::new();
self.selected_index = 0;
if query.is_empty() {
return Ok(Vec::new());
}
// Get embedding from Ollama
let embed_output = StdCommand::new("curl")
.args([
"-s",
&format!("{}/api/embeddings", OLLAMA_URL),
"-X",
"POST",
"-H",
"Content-Type: application/json",
"-d",
&format!(
r#"{{"model":"{}","prompt":"search_query: {}"}}"#,
MODEL, query
),
])
.output()?;
let embed_text = String::from_utf8_lossy(&embed_output.stdout);
// Parse embedding from response
let embedding: Vec<f64> = serde_json::from_str(&embed_text)
.ok()
.and_then(|v: serde_json::Value| {
v.get("embedding")
.and_then(|e| serde_json::from_value(e.clone()).ok())
})
.unwrap_or_default();
if embedding.is_empty() {
println!("Failed to get embedding for query: {}", query);
return Ok(Vec::new());
}
// Search Qdrant - use momentry_rule1 collection (Rule1 specification)
let collections = ["momentry_rule1"];
for collection in collections {
let vector_str = serde_json::to_string(&embedding)
.unwrap_or_default()
.replace(['[', ']'], "");
let qdrant_output = StdCommand::new("curl")
.args([
"-s",
&format!("{}/collections/{}/points/search", QDRANT_URL, collection),
"-X",
"POST",
"-H",
&format!("api-key: {}", QDRANT_API_KEY),
"-H",
"Content-Type: application/json",
"-d",
&format!(
r#"{{"vector":[{}],"limit":20,"with_payload":true}}"#,
vector_str
),
])
.output()?;
let qdrant_text = String::from_utf8_lossy(&qdrant_output.stdout);
if let Ok(response) = serde_json::from_str::<serde_json::Value>(&qdrant_text) {
if let Some(results) = response.get("result").and_then(|r| r.as_array()) {
for r in results {
let payload = r.get("payload");
// Try to match UUID - either exact match or partial match
let _uuid = payload
.and_then(|p| p.get("uuid"))
.and_then(|v| v.as_str())
.unwrap_or("");
// Accept all chunks (remove UUID filter for now since we want to find any content)
// The user can select which chunk to play
let uuid_match = true; // Accept all
if !uuid_match {
continue;
}
let chunk_id = payload
.and_then(|p| p.get("chunk_id"))
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let start_time = payload
.and_then(|p| p.get("start_time"))
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
let end_time = payload
.and_then(|p| p.get("end_time"))
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
let text = payload
.and_then(|p| p.get("text"))
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let score = r.get("score").and_then(|v| v.as_f64()).unwrap_or(0.0);
if !text.is_empty() {
self.chunks.push(ChunkEntry {
chunk_id,
start_time,
end_time,
text,
score,
});
}
}
if !self.chunks.is_empty() {
break;
}
}
}
}
Ok(self.chunks.clone())
}
pub fn run(&mut self) -> Result<Option<ChunkEntry>> {
let stdout = io::stdout();
let backend = CrosstermBackend::new(stdout);
let mut terminal = Terminal::new(backend)?;
loop {
terminal.draw(|f| self.render(f))?;
match crossterm::event::read() {
Ok(crossterm::event::Event::Key(key)) => match key.code {
crossterm::event::KeyCode::Up => {
if self.selected_index > 0 {
self.selected_index -= 1;
}
}
crossterm::event::KeyCode::Down => {
if self.selected_index < self.chunks.len().saturating_sub(1) {
self.selected_index += 1;
}
}
crossterm::event::KeyCode::Enter => {
let selected = self.chunks.get(self.selected_index).cloned();
terminal.show_cursor()?;
return Ok(selected);
}
crossterm::event::KeyCode::Char(c) => {
if c == 'q' {
terminal.show_cursor()?;
return Ok(None);
}
self.query.push(c);
}
crossterm::event::KeyCode::Backspace => {
self.query.pop();
}
crossterm::event::KeyCode::Esc => {
terminal.show_cursor()?;
return Ok(None);
}
_ => {}
},
Ok(crossterm::event::Event::Resize(_, _)) => {}
_ => {}
}
}
}
fn render(&self, f: &mut Frame) {
let chunks = Layout::default()
.direction(Direction::Vertical)
.constraints([
Constraint::Length(3),
Constraint::Length(3),
Constraint::Min(0),
Constraint::Length(3),
])
.split(f.area());
// Title
let title = Paragraph::new("🔍 Chunk Search - Natural Language Query")
.style(Style::default().fg(Color::Cyan))
.block(Block::default().borders(Borders::ALL).title(" Search "));
f.render_widget(title, chunks[0]);
// Query input
let query_text = if self.query.is_empty() {
"Type to search...".to_string()
} else {
self.query.clone()
};
let query_style = if self.query.is_empty() {
Style::default().fg(Color::DarkGray)
} else {
Style::default().fg(Color::White)
};
let query = Paragraph::new(query_text)
.style(query_style)
.block(Block::default().borders(Borders::ALL).title(" Query "));
f.render_widget(query, chunks[1]);
// Results
if self.chunks.is_empty() {
let no_results = Paragraph::new("No results found. Type to search...")
.style(Style::default().fg(Color::DarkGray))
.block(Block::default().borders(Borders::ALL).title(" Results "));
f.render_widget(no_results, chunks[2]);
} else {
let items: Vec<ListItem> = self
.chunks
.iter()
.enumerate()
.map(|(i, chunk)| {
let style = if i == self.selected_index {
Style::default().fg(Color::Yellow).bg(Color::DarkGray)
} else {
Style::default()
};
let content = Line::from(vec![
Span::raw(format!(
"{} ",
if i == self.selected_index { "" } else { " " }
)),
Span::styled(chunk.format_time_range(), Style::default().fg(Color::Green)),
Span::raw(" "),
Span::raw(chunk.truncate_text(50)),
Span::styled(
format!(" [{:.2}]", chunk.score),
Style::default().fg(Color::Blue),
),
]);
ListItem::new(content).style(style)
})
.collect();
let list = List::new(items)
.block(Block::default().borders(Borders::ALL).title(" Results "))
.highlight_style(Style::default().fg(Color::Yellow));
f.render_widget(list, chunks[2]);
}
// Help text
let help =
Paragraph::new(" [↑/↓] Navigate [Enter] Play from here [Type] Search [q] Quit ")
.style(Style::default().fg(Color::DarkGray))
.block(Block::default().borders(Borders::ALL));
f.render_widget(help, chunks[3]);
}
}