Files
momentry_core/test_chinese_embed.py
Warren b54c2def30 feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid
- Add test scripts for ASR, face, search, processing
- Add portal frontend (Tauri)
- Add config, benchmark, and monitoring utilities
- Add model checkpoints and pretrained model references
2026-04-30 15:11:53 +08:00

57 lines
1.9 KiB
Python

import json
import requests
import sys
def get_embedding(text, prefix="search_query: "):
url = "http://localhost:11434/api/embeddings"
payload = {"model": "nomic-embed-text-v2-moe:latest", "prompt": f"{prefix}{text}"}
response = requests.post(url, json=payload)
if response.status_code != 200:
print(f"Error: {response.status_code} - {response.text}")
return None
data = response.json()
return data["embedding"]
def search_qdrant(vector, limit=10):
url = "http://127.0.0.1:6333/collections/momentry_rule1/points/search"
headers = {
"Content-Type": "application/json",
"api-key": "Test3200Test3200Test3200",
}
payload = {"vector": vector, "limit": limit, "with_payload": True}
response = requests.post(url, json=payload, headers=headers)
if response.status_code != 200:
print(f"Qdrant error: {response.status_code} - {response.text}")
return None
return response.json()
if __name__ == "__main__":
# Test Chinese text
text = "檔案傳輸"
print(f"Testing embedding for: '{text}'")
vector = get_embedding(text)
if vector:
print(f"Vector length: {len(vector)}")
print(f"First 5 values: {vector[:5]}")
# Search Qdrant
print("\nSearching Qdrant...")
results = search_qdrant(vector, limit=5)
if results:
print(f"Found {len(results['result'])} results")
for i, r in enumerate(results["result"]):
payload = r.get("payload", {})
text = payload.get("text", "No text")
chunk_id = payload.get("chunk_id", "N/A")
uuid = payload.get("uuid", "N/A")
score = r.get("score", 0)
print(f"{i + 1}. Score: {score:.4f}, UUID: {uuid}, Chunk: {chunk_id}")
print(f" Text: {text[:100]}...")
else:
print("No results")
else:
print("Failed to get embedding")