Files
momentry_core/scripts/compare_search.py
accusys 383201cacd feat: Initial v0.9 release with API Key authentication
## v0.9.20260325_144654

### Features
- API Key Authentication System
- Job Worker System
- V2 Backup Versioning

### Bug Fixes
- get_processor_results_by_job column mapping

Co-authored-by: OpenCode
2026-03-25 14:53:41 +08:00

132 lines
3.4 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
Search comparison script for PostgreSQL, MongoDB, and Qdrant
"""
import time
import requests
# Test queries
TEST_QUERIES = [
"Charade",
"Paris",
" Audrey Hepburn",
"Cary Grant",
]
# PostgreSQL connection
POSTGRES_CONFIG = {
"host": "localhost",
"port": 5432,
"user": "accusys",
"password": "Test3200",
"database": "momentry",
}
def test_postgres_text_search():
"""Test text search in PostgreSQL"""
import psycopg2
results = {}
conn = psycopg2.connect(**POSTGRES_CONFIG)
cur = conn.cursor()
for query in TEST_QUERIES:
start = time.time()
cur.execute(
"SELECT chunk_id, content->>'text' FROM chunks WHERE chunk_type = 'sentence' AND content->>'text' ILIKE %s LIMIT 10",
(f"%{query}%",),
)
rows = cur.fetchall()
elapsed = (time.time() - start) * 1000
results[query] = {
"method": "PostgreSQL ILIKE",
"ms": round(elapsed, 2),
"rows": len(rows),
}
print(f"PostgreSQL text search '{query}': {elapsed:.2f}ms, {len(rows)} rows")
cur.close()
conn.close()
return results
def test_qdrant_vector_search():
"""Test vector search in Qdrant"""
results = {}
# First, generate query embeddings
for query in TEST_QUERIES:
# Get embedding from Ollama
embed_resp = requests.post(
"http://localhost:11434/api/embeddings",
json={"model": "nomic-embed-text", "prompt": query},
)
embedding = embed_resp.json()["embedding"]
# Search in Qdrant (using AccusysDB collection)
start = time.time()
resp = requests.post(
"http://localhost:6333/collections/AccusysDB/points/search",
headers={"api-key": "Test3200Test3200Test3200"},
json={"vector": embedding, "limit": 10},
)
elapsed = (time.time() - start) * 1000
data = resp.json()
result_count = len(data.get("result", []))
results[query] = {
"method": "Qdrant HNSW",
"ms": round(elapsed, 2),
"rows": result_count,
}
print(f"Qdrant vector search '{query}': {elapsed:.2f}ms, {result_count} rows")
return results
def main():
print("=" * 60)
print("Search Performance Comparison Test")
print("=" * 60)
# Get chunk count
import psycopg2
conn = psycopg2.connect(**POSTGRES_CONFIG)
cur = conn.cursor()
cur.execute("SELECT COUNT(*) FROM chunks WHERE chunk_type = 'sentence'")
count = cur.fetchone()[0]
cur.close()
conn.close()
print(f"\nTotal sentence chunks: {count}")
print("\n" + "=" * 60)
print("A. Text Search Test (Priority a)")
print("=" * 60)
pg_results = test_postgres_text_search()
print("\n" + "=" * 60)
print("B. Vector Search Test (Priority b)")
print("=" * 60)
qdrant_results = test_qdrant_vector_search()
print("\n" + "=" * 60)
print("Summary")
print("=" * 60)
print(f"\n{'Query':<20} | {'PostgreSQL':<25} | {'Qdrant':<25}")
print("-" * 70)
for query in TEST_QUERIES:
pg = pg_results.get(query, {})
qd = qdrant_results.get(query, {})
print(
f"{query:<20} | {pg.get('ms', 0):.1f}ms ({pg.get('rows', 0)} rows) | {qd.get('ms', 0):.1f}ms ({qd.get('rows', 0)} rows)"
)
if __name__ == "__main__":
main()