feat: Initial v0.9 release with API Key authentication

## v0.9.20260325_144654

### Features
- API Key Authentication System
- Job Worker System
- V2 Backup Versioning

### Bug Fixes
- get_processor_results_by_job column mapping

Co-authored-by: OpenCode
This commit is contained in:
accusys
2026-03-25 14:52:51 +08:00
parent 47e86b696f
commit 383201cacd
193 changed files with 40268 additions and 422 deletions

View File

@@ -1,66 +1,80 @@
use anyhow::Result;
use anyhow::{Context, Result};
use reqwest::Client;
use serde::{Deserialize, Serialize};
pub struct Embedder {
model_path: String,
model: String,
client: Client,
base_url: String,
}
#[derive(Serialize)]
struct EmbedRequest {
model: String,
prompt: String,
}
#[derive(Deserialize, Debug)]
struct EmbedResponse {
embedding: Vec<f32>,
}
impl Embedder {
pub fn new(model_path: String) -> Self {
Self { model_path }
pub fn new(model: String) -> Self {
Self {
model,
client: Client::new(),
base_url: "http://localhost:11434".to_string(),
}
}
pub async fn embed_text(&self, text: &str) -> Result<Vec<f32>> {
// TODO: Implement comic-embed-text model loading and inference
// This is a placeholder that generates a random 768-dimensional vector
//
// Implementation would use:
// - candle (Rust ML framework) or
// - ort (ONNX Runtime) to run the model
//
// Example with ort:
// let session = Session::builder()?
// .with_execution_providers([CPUExecutionProvider::default().build()])?
// .with_model_from_file(&self.model_path)?;
//
// // Preprocess text to tensor
// let input = preprocess_text(text);
//
// // Run inference
// let output = session.run(vec![input])?;
//
// // Extract embeddings
// let embedding = output[0].view()[..768].to_vec();
self.embed_with_prefix(text, "").await
}
let dim = 768;
let mut embedding = vec![0.0f32; dim];
pub async fn embed_document(&self, text: &str) -> Result<Vec<f32>> {
self.embed_with_prefix(text, "search_document: ").await
}
// Simple hash-based embedding for now
let hash = self.hash_text(text);
for i in 0..dim {
embedding[i] = ((hash >> i) & 1) as f32;
pub async fn embed_query(&self, text: &str) -> Result<Vec<f32>> {
self.embed_with_prefix(text, "search_query: ").await
}
async fn embed_with_prefix(&self, text: &str, prefix: &str) -> Result<Vec<f32>> {
let url = format!("{}/api/embeddings", self.base_url);
let prompt = format!("{}{}", prefix, text);
let response = self
.client
.post(&url)
.json(&EmbedRequest {
model: self.model.clone(),
prompt,
})
.send()
.await
.context("Failed to send embedding request to Ollama")?;
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
anyhow::bail!("Ollama API error ({}): {}", status, body);
}
// Normalize
let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
for v in &mut embedding {
*v /= norm;
}
}
let result: EmbedResponse = response
.json()
.await
.context("Failed to parse Ollama response")?;
Ok(embedding)
Ok(result.embedding)
}
pub async fn embed_chunk_content(&self, chunk: &crate::core::chunk::Chunk) -> Result<Vec<f32>> {
let text = serde_json::to_string(&chunk.content)?;
self.embed_text(&text).await
self.embed_document(&text).await
}
fn hash_text(&self, text: &str) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
text.hash(&mut hasher);
hasher.finish()
pub fn dimension(&self) -> usize {
768
}
}