update: pipeline, search, clip, embedding fixes

2026-05-17 19:46:35 +08:00
parent eec2eea880
commit 3164a65554
36 changed files with 4313 additions and 4061 deletions
@@ -41,22 +41,24 @@ async fn translate_text(
        req.target_language, req.text
    );

-    // Call Ollama API
+    // Call Gemma4 via llama.cpp (port 8082, OpenAI-compatible API)
    let client = Client::new();
-    let ollama_url = "http://localhost:11434/api/generate";
-
-    // Using qwen3:latest which is available locally
-    let model = "qwen3:latest".to_string();
+    let llm_url = "http://localhost:8082/v1/chat/completions";
+    let model = "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string();

    let body = serde_json::json!({
        "model": model,
-        "prompt": prompt,
-        "system": system_prompt,
-        "stream": false
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": prompt}
+        ],
+        "stream": false,
+        "max_tokens": 1024,
+        "temperature": 0.1
    });

    let response = client
-        .post(ollama_url)
+        .post(llm_url)
        .json(&body)
        .send()
        .await
@@ -67,15 +69,19 @@ async fn translate_text(
            )
        })?;

-    let ollama_resp: serde_json::Value = response.json().await.map_err(|e| {
+    let llm_resp: serde_json::Value = response.json().await.map_err(|e| {
        (
            StatusCode::INTERNAL_SERVER_ERROR,
            format!("Failed to parse LLM response: {}", e),
        )
    })?;

-    let translated_text = ollama_resp
-        .get("response")
+    let translated_text = llm_resp
+        .get("choices")
+        .and_then(|c| c.as_array())
+        .and_then(|c| c.first())
+        .and_then(|c| c.get("message"))
+        .and_then(|m| m.get("content"))
        .and_then(|v| v.as_str())
        .unwrap_or("Translation failed")
        .to_string();