Files
momentry_core/migrations/009_data_preservation_indexes.sql
Warren b54c2def30 feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid
- Add test scripts for ASR, face, search, processing
- Add portal frontend (Tauri)
- Add config, benchmark, and monitoring utilities
- Add model checkpoints and pretrained model references
2026-04-30 15:11:53 +08:00

67 lines
2.7 KiB
SQL

-- Phase 1: Data Preservation
-- 1. Add pose_results column to frames table
-- 2. Add GIN indexes for JSONB search on frames table
-- 3. Add GIN indexes for search optimization on existing columns
-- ============================================================
-- 1. Add pose_results column to frames table
-- ============================================================
ALTER TABLE frames
ADD COLUMN IF NOT EXISTS pose_results JSONB;
-- ============================================================
-- 2. GIN indexes for frames JSONB columns (enable JSONB search)
-- ============================================================
-- YOLO objects search: frames.yolo_objects @> '[{"class": "person"}]'
CREATE INDEX IF NOT EXISTS idx_frames_yolo_gin
ON frames USING GIN(yolo_objects);
-- OCR text search: frames.ocr_results @> '{"texts": [...]}'
CREATE INDEX IF NOT EXISTS idx_frames_ocr_gin
ON frames USING GIN(ocr_results);
-- Face results search: frames.face_results @> '{"faces": [...]}'
CREATE INDEX IF NOT EXISTS idx_frames_face_gin
ON frames USING GIN(face_results);
-- Pose results search: frames.pose_results @> '{"persons": [...]}'
CREATE INDEX IF NOT EXISTS idx_frames_pose_gin
ON frames USING GIN(pose_results);
-- ============================================================
-- 3. GIN index on chunks.content (currently exists but verify)
-- ============================================================
-- Note: idx_chunks_content_gin should already exist from earlier migrations.
-- This ensures it's present for content-based searches.
CREATE INDEX IF NOT EXISTS idx_chunks_content_gin
ON chunks USING GIN(content);
-- ============================================================
-- 4. Add text_content to ASRX trace chunks (backfill)
-- ASRX chunks stored as trace_asrx_* have text in content
-- but NULL text_content, making them invisible to BM25.
-- ============================================================
UPDATE chunks
SET text_content = content->>'text'
WHERE chunk_type = 'trace'
AND chunk_id LIKE 'trace_asrx_%'
AND text_content IS NULL
AND content ? 'text';
-- ============================================================
-- 5. Add text_content to YOLO trace chunks (backfill)
-- Concatenate object class names for BM25 search.
-- ============================================================
-- This is handled in the worker code for new imports.
-- For existing data, we can extract object names:
-- (commented out as it requires JSON array iteration)
-- UPDATE chunks
-- SET text_content = (
-- SELECT string_agg(obj->>'class', ' ')
-- FROM jsonb_array_elements(content->'objects') AS obj
-- )
-- WHERE chunk_type = 'trace'
-- AND chunk_id LIKE 'trace_yolo_%'
-- AND text_content IS NULL;