feat: Initial v0.9 release with API Key authentication

## v0.9.20260325_144654 ### Features - API Key Authentication System - Job Worker System - V2 Backup Versioning ### Bug Fixes - get_processor_results_by_job column mapping Co-authored-by: OpenCode
2026-03-25 14:52:51 +08:00
parent 47e86b696f
commit 383201cacd
193 changed files with 40268 additions and 422 deletions
--- a/src/core/embedding/comic_embed.rs
+++ b/src/core/embedding/comic_embed.rs
@@ -1,66 +1,80 @@
-use anyhow::Result;
+use anyhow::{Context, Result};
+use reqwest::Client;
+use serde::{Deserialize, Serialize};

 pub struct Embedder {
-    model_path: String,
+    model: String,
+    client: Client,
+    base_url: String,
+}
+
+#[derive(Serialize)]
+struct EmbedRequest {
+    model: String,
+    prompt: String,
+}
+
+#[derive(Deserialize, Debug)]
+struct EmbedResponse {
+    embedding: Vec<f32>,
 }

 impl Embedder {
-    pub fn new(model_path: String) -> Self {
-        Self { model_path }
+    pub fn new(model: String) -> Self {
+        Self {
+            model,
+            client: Client::new(),
+            base_url: "http://localhost:11434".to_string(),
+        }
    }

    pub async fn embed_text(&self, text: &str) -> Result<Vec<f32>> {
-        // TODO: Implement comic-embed-text model loading and inference
-        // This is a placeholder that generates a random 768-dimensional vector
-        //
-        // Implementation would use:
-        // - candle (Rust ML framework) or
-        // - ort (ONNX Runtime) to run the model
-        //
-        // Example with ort:
-        // let session = Session::builder()?
-        //     .with_execution_providers([CPUExecutionProvider::default().build()])?
-        //     .with_model_from_file(&self.model_path)?;
-        //
-        // // Preprocess text to tensor
-        // let input = preprocess_text(text);
-        //
-        // // Run inference
-        // let output = session.run(vec![input])?;
-        //
-        // // Extract embeddings
-        // let embedding = output[0].view()[..768].to_vec();
+        self.embed_with_prefix(text, "").await
+    }

-        let dim = 768;
-        let mut embedding = vec![0.0f32; dim];
+    pub async fn embed_document(&self, text: &str) -> Result<Vec<f32>> {
+        self.embed_with_prefix(text, "search_document: ").await
+    }

-        // Simple hash-based embedding for now
-        let hash = self.hash_text(text);
-        for i in 0..dim {
-            embedding[i] = ((hash >> i) & 1) as f32;
+    pub async fn embed_query(&self, text: &str) -> Result<Vec<f32>> {
+        self.embed_with_prefix(text, "search_query: ").await
+    }
+
+    async fn embed_with_prefix(&self, text: &str, prefix: &str) -> Result<Vec<f32>> {
+        let url = format!("{}/api/embeddings", self.base_url);
+        let prompt = format!("{}{}", prefix, text);
+
+        let response = self
+            .client
+            .post(&url)
+            .json(&EmbedRequest {
+                model: self.model.clone(),
+                prompt,
+            })
+            .send()
+            .await
+            .context("Failed to send embedding request to Ollama")?;
+
+        if !response.status().is_success() {
+            let status = response.status();
+            let body = response.text().await.unwrap_or_default();
+            anyhow::bail!("Ollama API error ({}): {}", status, body);
        }

-        // Normalize
-        let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
-        if norm > 0.0 {
-            for v in &mut embedding {
-                *v /= norm;
-            }
-        }
+        let result: EmbedResponse = response
+            .json()
+            .await
+            .context("Failed to parse Ollama response")?;

-        Ok(embedding)
+        Ok(result.embedding)
    }

    pub async fn embed_chunk_content(&self, chunk: &crate::core::chunk::Chunk) -> Result<Vec<f32>> {
        let text = serde_json::to_string(&chunk.content)?;
-        self.embed_text(&text).await
+        self.embed_document(&text).await
    }

-    fn hash_text(&self, text: &str) -> u64 {
-        use std::collections::hash_map::DefaultHasher;
-        use std::hash::{Hash, Hasher};
-        let mut hasher = DefaultHasher::new();
-        text.hash(&mut hasher);
-        hasher.finish()
+    pub fn dimension(&self) -> usize {
+        768
    }
 }