Files
momentry_core/migrations/023_extend_identities_embeddings.sql
Warren b54c2def30 feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid
- Add test scripts for ASR, face, search, processing
- Add portal frontend (Tauri)
- Add config, benchmark, and monitoring utilities
- Add model checkpoints and pretrained model references
2026-04-30 15:11:53 +08:00

331 lines
11 KiB
SQL

-- Migration 023: Extend identities table for multi-dimensional embeddings
-- Date: 2026-04-28
-- Purpose: Add identity_type, source, status, face_embedding, voice_embedding, identity_embedding, reference_data
-- Reference: docs_v1.0/ARCHITECTURE/MOMENTRY_CORE_ARCHITECTURE_V2.md
-- Strategy: Add columns to existing table (preserve existing data)
-- ============================================
-- Part 0: Ensure uuid column exists (primary key alternative)
-- ============================================
-- public schema: add uuid if not exists
ALTER TABLE public.identities
ADD COLUMN IF NOT EXISTS uuid UUID DEFAULT gen_random_uuid();
-- dev schema: uuid already exists
-- ============================================
-- Part 1: Rename embedding → face_embedding (if exists)
-- ============================================
-- public schema
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_schema = 'public'
AND table_name = 'identities'
AND column_name = 'embedding'
) THEN
-- Rename column
ALTER TABLE public.identities RENAME COLUMN embedding TO face_embedding;
-- Change dimension to 512 (if currently 768)
-- Note: We cannot easily change vector dimension, so we keep as is and will fix later
-- For now, just add comment
EXECUTE 'COMMENT ON COLUMN public.identities.face_embedding IS ''InsightFace 512-dim ArcFace embedding (or 768 legacy)''';
END IF;
END $$;
-- dev schema
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_schema = 'dev'
AND table_name = 'identities'
AND column_name = 'embedding'
) THEN
-- Rename column
ALTER TABLE dev.identities RENAME COLUMN embedding TO face_embedding;
-- Comment
EXECUTE 'COMMENT ON COLUMN dev.identities.face_embedding IS ''InsightFace 512-dim ArcFace embedding''';
END IF;
END $$;
-- ============================================
-- Part 2: Add identity_type VARCHAR(30)
-- ============================================
-- public schema
ALTER TABLE public.identities
ADD COLUMN IF NOT EXISTS identity_type VARCHAR(30) DEFAULT 'people';
-- dev schema
ALTER TABLE dev.identities
ADD COLUMN IF NOT EXISTS identity_type VARCHAR(30) DEFAULT 'people';
COMMENT ON COLUMN public.identities.identity_type IS
'Identity type: people, brand, object, concept, logo, symbol, scene, sound, animal, environmental';
COMMENT ON COLUMN dev.identities.identity_type IS
'Identity type: people, brand, object, concept, logo, symbol, scene, sound, animal, environmental';
-- ============================================
-- Part 3: Add source VARCHAR(20)
-- ============================================
-- public schema
ALTER TABLE public.identities
ADD COLUMN IF NOT EXISTS source VARCHAR(20) DEFAULT 'manual';
-- dev schema
ALTER TABLE dev.identities
ADD COLUMN IF NOT EXISTS source VARCHAR(20) DEFAULT 'manual';
COMMENT ON COLUMN public.identities.source IS
'Identity source: manual, tmdb, agent_suggested, ai_detection';
COMMENT ON COLUMN dev.identities.source IS
'Identity source: manual, tmdb, agent_suggested, ai_detection';
-- ============================================
-- Part 4: Add status VARCHAR(20)
-- ============================================
-- public schema
ALTER TABLE public.identities
ADD COLUMN IF NOT EXISTS status VARCHAR(20) DEFAULT 'pending';
-- dev schema
ALTER TABLE dev.identities
ADD COLUMN IF NOT EXISTS status VARCHAR(20) DEFAULT 'pending';
COMMENT ON COLUMN public.identities.status IS
'Identity status: pending, confirmed, skipped';
COMMENT ON COLUMN dev.identities.status IS
'Identity status: pending, confirmed, skipped';
-- ============================================
-- Part 5: Add voice_embedding VECTOR(192)
-- ============================================
-- public schema
ALTER TABLE public.identities
ADD COLUMN IF NOT EXISTS voice_embedding VECTOR(192);
-- dev schema
ALTER TABLE dev.identities
ADD COLUMN IF NOT EXISTS voice_embedding VECTOR(192);
COMMENT ON COLUMN public.identities.voice_embedding IS
'ECAPA-TDNN 192-dim voice embedding';
COMMENT ON COLUMN dev.identities.voice_embedding IS
'ECAPA-TDNN 192-dim voice embedding';
-- ============================================
-- Part 6: Add identity_embedding VECTOR(768)
-- ============================================
-- public schema
ALTER TABLE public.identities
ADD COLUMN IF NOT EXISTS identity_embedding VECTOR(768);
-- dev schema
ALTER TABLE dev.identities
ADD COLUMN IF NOT EXISTS identity_embedding VECTOR(768);
COMMENT ON COLUMN public.identities.identity_embedding IS
'CLIP ViT-L/14 768-dim embedding for logo/symbol/object identity';
COMMENT ON COLUMN dev.identities.identity_embedding IS
'CLIP ViT-L/14 768-dim embedding for logo/symbol/object identity';
-- ============================================
-- Part 7: Add reference_data JSONB (1-to-many embeddings)
-- ============================================
-- public schema
ALTER TABLE public.identities
ADD COLUMN IF NOT EXISTS reference_data JSONB DEFAULT '{}';
-- dev schema
ALTER TABLE dev.identities
ADD COLUMN IF NOT EXISTS reference_data JSONB DEFAULT '{}';
COMMENT ON COLUMN public.identities.reference_data IS
'JSONB: {face_embeddings[], voice_embeddings[], identity_embeddings[], sound_embeddings[], image_urls[]}';
COMMENT ON COLUMN dev.identities.reference_data IS
'JSONB: {face_embeddings[], voice_embeddings[], identity_embeddings[], sound_embeddings[], image_urls[]}';
-- ============================================
-- Part 8: Add created_at and updated_at (if not exists)
-- ============================================
-- public schema: add created_at
ALTER TABLE public.identities
ADD COLUMN IF NOT EXISTS created_at TIMESTAMPTZ DEFAULT NOW();
-- public schema: add updated_at
ALTER TABLE public.identities
ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ DEFAULT NOW();
-- dev schema: add updated_at (created_at already exists)
ALTER TABLE dev.identities
ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ DEFAULT NOW();
-- ============================================
-- Part 9: Add TMDB integration fields
-- ============================================
-- TMDB specific fields
ALTER TABLE public.identities
ADD COLUMN IF NOT EXISTS tmdb_id INTEGER;
ALTER TABLE public.identities
ADD COLUMN IF NOT EXISTS tmdb_profile TEXT;
ALTER TABLE dev.identities
ADD COLUMN IF NOT EXISTS tmdb_id INTEGER;
ALTER TABLE dev.identities
ADD COLUMN IF NOT EXISTS tmdb_profile TEXT;
COMMENT ON COLUMN public.identities.tmdb_id IS
'TMDB person ID';
COMMENT ON COLUMN dev.identities.tmdb_id IS
'TMDB person ID';
COMMENT ON COLUMN public.identities.tmdb_profile IS
'TMDB profile image URL';
COMMENT ON COLUMN dev.identities.tmdb_profile IS
'TMDB profile image URL';
-- ============================================
-- Part 10: Create vector indexes
-- ============================================
-- face_embedding index
CREATE INDEX IF NOT EXISTS idx_identities_face_embedding
ON public.identities USING ivfflat (face_embedding vector_cosine_ops)
WITH (lists = 100);
CREATE INDEX IF NOT EXISTS idx_dev_identities_face_embedding
ON dev.identities USING ivfflat (face_embedding vector_cosine_ops)
WITH (lists = 100);
-- voice_embedding index
CREATE INDEX IF NOT EXISTS idx_identities_voice_embedding
ON public.identities USING ivfflat (voice_embedding vector_cosine_ops)
WITH (lists = 50);
CREATE INDEX IF NOT EXISTS idx_dev_identities_voice_embedding
ON dev.identities USING ivfflat (voice_embedding vector_cosine_ops)
WITH (lists = 50);
-- identity_embedding index
CREATE INDEX IF NOT EXISTS idx_identities_identity_embedding
ON public.identities USING ivfflat (identity_embedding vector_cosine_ops)
WITH (lists = 100);
CREATE INDEX IF NOT EXISTS idx_dev_identities_identity_embedding
ON dev.identities USING ivfflat (identity_embedding vector_cosine_ops)
WITH (lists = 100);
-- reference_data JSONB index (GIN)
CREATE INDEX IF NOT EXISTS idx_identities_reference_data
ON public.identities USING GIN (reference_data);
CREATE INDEX IF NOT EXISTS idx_dev_identities_reference_data
ON dev.identities USING GIN (reference_data);
-- uuid index
CREATE INDEX IF NOT EXISTS idx_identities_uuid
ON public.identities (uuid);
CREATE INDEX IF NOT EXISTS idx_dev_identities_uuid
ON dev.identities (uuid);
-- ============================================
-- Part 11: Add identity_type check constraint
-- ============================================
-- Update identity_type constraint to include new types
ALTER TABLE public.identities
DROP CONSTRAINT IF EXISTS identities_identity_type_check;
ALTER TABLE public.identities
ADD CONSTRAINT identities_identity_type_check
CHECK (
identity_type IN (
'people', 'brand', 'object', 'concept', 'logo', 'symbol',
'scene', 'sound', 'animal', 'environmental'
)
);
ALTER TABLE dev.identities
DROP CONSTRAINT IF EXISTS identities_identity_type_check;
ALTER TABLE dev.identities
ADD CONSTRAINT identities_identity_type_check
CHECK (
identity_type IN (
'people', 'brand', 'object', 'concept', 'logo', 'symbol',
'scene', 'sound', 'animal', 'environmental'
)
);
-- ============================================
-- Part 12: Drop old embedding index (if exists)
-- ============================================
DROP INDEX IF EXISTS public.idx_identities_embedding;
DROP INDEX IF EXISTS dev.idx_identities_embedding;
-- ============================================
-- Verification
-- ============================================
-- Verify table structure
DO $$
DECLARE
col_count INTEGER;
BEGIN
SELECT COUNT(*) INTO col_count
FROM information_schema.columns
WHERE table_schema = 'public' AND table_name = 'identities'
AND column_name IN (
'uuid', 'identity_type', 'source', 'status',
'face_embedding', 'voice_embedding', 'identity_embedding',
'reference_data', 'tmdb_id', 'tmdb_profile',
'created_at', 'updated_at'
);
IF col_count < 12 THEN
RAISE NOTICE 'Migration 023: Some columns missing in public.identities (count=%, expected=12)', col_count;
ELSE
RAISE NOTICE 'Migration 023: All columns added successfully to public.identities';
END IF;
SELECT COUNT(*) INTO col_count
FROM information_schema.columns
WHERE table_schema = 'dev' AND table_name = 'identities'
AND column_name IN (
'uuid', 'identity_type', 'source', 'status',
'face_embedding', 'voice_embedding', 'identity_embedding',
'reference_data', 'tmdb_id', 'tmdb_profile',
'created_at', 'updated_at'
);
IF col_count < 12 THEN
RAISE NOTICE 'Migration 023: Some columns missing in dev.identities (count=%, expected=12)', col_count;
ELSE
RAISE NOTICE 'Migration 023: All columns added successfully to dev.identities';
END IF;
END $$;