Files
momentry_core/migrations/007_person_identity_tables.sql
Warren b54c2def30 feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid
- Add test scripts for ASR, face, search, processing
- Add portal frontend (Tauri)
- Add config, benchmark, and monitoring utilities
- Add model checkpoints and pretrained model references
2026-04-30 15:11:53 +08:00

328 lines
11 KiB
PL/PgSQL

-- ================================================================
-- Migration 007: Person Identity Integration Tables
-- Version: 007
-- Date: 2026-04-09
-- Description: Add tables for person identity integration
-- Links face recognition and speaker diarization
-- Enables person tracking across video chunks
-- ================================================================
-- 7.1: Create person_identities table
CREATE TABLE IF NOT EXISTS person_identities (
id SERIAL PRIMARY KEY,
person_id VARCHAR(255) NOT NULL UNIQUE,
-- Identity associations
face_identity_id INTEGER REFERENCES face_identities(id) ON DELETE SET NULL,
speaker_id VARCHAR(64), -- SPEAKER_00, SPEAKER_01, etc.
-- Association info
video_uuid VARCHAR(255) NOT NULL,
confidence DOUBLE PRECISION DEFAULT 0.0 CHECK (confidence >= 0.0 AND confidence <= 1.0),
-- Metadata
name VARCHAR(255), -- Person name (manually annotated)
metadata JSONB DEFAULT '{}'::jsonb,
-- Time tracking
first_appearance_time DOUBLE PRECISION,
last_appearance_time DOUBLE PRECISION,
total_appearance_duration DOUBLE PRECISION DEFAULT 0.0,
appearance_count INTEGER DEFAULT 0,
-- Audit fields
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
is_confirmed BOOLEAN DEFAULT FALSE, -- User-confirmed identity
-- Constraints
CONSTRAINT unique_person_identity UNIQUE (video_uuid, face_identity_id, speaker_id),
CONSTRAINT valid_time_range CHECK (
first_appearance_time IS NULL OR
last_appearance_time IS NULL OR
last_appearance_time >= first_appearance_time
)
);
-- 7.2: Create person_appearances table
CREATE TABLE IF NOT EXISTS person_appearances (
id SERIAL PRIMARY KEY,
person_id VARCHAR(255) NOT NULL REFERENCES person_identities(person_id) ON DELETE CASCADE,
-- Appearance info
video_uuid VARCHAR(255) NOT NULL,
start_time DOUBLE PRECISION NOT NULL CHECK (start_time >= 0),
end_time DOUBLE PRECISION NOT NULL CHECK (end_time >= 0),
duration DOUBLE PRECISION NOT NULL CHECK (duration > 0),
-- Source references
face_detection_id INTEGER REFERENCES face_detections(id) ON DELETE SET NULL,
asrx_segment_start DOUBLE PRECISION, -- Reference to ASRX segment
asrx_segment_end DOUBLE PRECISION,
-- Metadata
confidence DOUBLE PRECISION DEFAULT 0.0 CHECK (confidence >= 0.0 AND confidence <= 1.0),
metadata JSONB DEFAULT '{}'::jsonb,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
-- Constraints
CONSTRAINT valid_appearance_time CHECK (end_time > start_time),
CONSTRAINT valid_duration CHECK (end_time - start_time = duration)
);
-- 7.3: Create indexes for performance
-- Person identities indexes
CREATE INDEX IF NOT EXISTS idx_person_identities_video_uuid
ON person_identities(video_uuid);
CREATE INDEX IF NOT EXISTS idx_person_identities_face
ON person_identities(face_identity_id)
WHERE face_identity_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_person_identities_speaker
ON person_identities(speaker_id)
WHERE speaker_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_person_identities_name
ON person_identities(name)
WHERE name IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_person_identities_confirmed
ON person_identities(is_confirmed)
WHERE is_confirmed = TRUE;
-- Person appearances indexes
CREATE INDEX IF NOT EXISTS idx_person_appearances_person
ON person_appearances(person_id);
CREATE INDEX IF NOT EXISTS idx_person_appearances_video
ON person_appearances(video_uuid);
CREATE INDEX IF NOT EXISTS idx_person_appearances_time
ON person_appearances(video_uuid, start_time, end_time);
CREATE INDEX IF NOT EXISTS idx_person_appearances_face
ON person_appearances(face_detection_id)
WHERE face_detection_id IS NOT NULL;
-- 7.4: Create function to update person appearance statistics
CREATE OR REPLACE FUNCTION update_person_appearance_stats(p_person_id VARCHAR(255))
RETURNS VOID AS $$
BEGIN
UPDATE person_identities
SET
appearance_count = (
SELECT COUNT(*)
FROM person_appearances
WHERE person_id = p_person_id
),
total_appearance_duration = (
SELECT COALESCE(SUM(duration), 0.0)
FROM person_appearances
WHERE person_id = p_person_id
),
first_appearance_time = (
SELECT MIN(start_time)
FROM person_appearances
WHERE person_id = p_person_id
),
last_appearance_time = (
SELECT MAX(end_time)
FROM person_appearances
WHERE person_id = p_person_id
),
updated_at = CURRENT_TIMESTAMP
WHERE person_id = p_person_id;
END;
$$ LANGUAGE plpgsql;
-- 7.5: Create trigger to auto-update statistics
CREATE OR REPLACE FUNCTION trigger_update_person_stats()
RETURNS TRIGGER AS $$
BEGIN
IF TG_OP = 'INSERT' THEN
PERFORM update_person_appearance_stats(NEW.person_id);
ELSIF TG_OP = 'UPDATE' THEN
PERFORM update_person_appearance_stats(NEW.person_id);
IF NEW.person_id != OLD.person_id THEN
PERFORM update_person_appearance_stats(OLD.person_id);
END IF;
ELSIF TG_OP = 'DELETE' THEN
PERFORM update_person_appearance_stats(OLD.person_id);
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
-- Create trigger only if it doesn't exist
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_trigger
WHERE tgname = 'trigger_update_person_appearance_stats'
AND tgrelid = 'person_appearances'::regclass
) THEN
CREATE TRIGGER trigger_update_person_appearance_stats
AFTER INSERT OR UPDATE OR DELETE ON person_appearances
FOR EACH ROW
EXECUTE FUNCTION trigger_update_person_stats();
END IF;
END $$;
-- 7.6: Create function to find person by time overlap
CREATE OR REPLACE FUNCTION find_persons_at_time(
p_video_uuid VARCHAR(255),
p_time DOUBLE PRECISION,
p_tolerance DOUBLE PRECISION DEFAULT 0.0
)
RETURNS TABLE (
person_id VARCHAR(255),
name VARCHAR(255),
confidence DOUBLE PRECISION,
appearance_id INTEGER
) AS $$
BEGIN
RETURN QUERY
SELECT
pi.person_id,
pi.name,
pa.confidence,
pa.id AS appearance_id
FROM person_appearances pa
JOIN person_identities pi ON pa.person_id = pi.person_id
WHERE pa.video_uuid = p_video_uuid
AND pa.start_time <= p_time + p_tolerance
AND pa.end_time >= p_time - p_tolerance
ORDER BY pa.confidence DESC;
END;
$$ LANGUAGE plpgsql;
-- 7.7: Create function to find persons in time range
CREATE OR REPLACE FUNCTION find_persons_in_range(
p_video_uuid VARCHAR(255),
p_start_time DOUBLE PRECISION,
p_end_time DOUBLE PRECISION
)
RETURNS TABLE (
person_id VARCHAR(255),
name VARCHAR(255),
overlap_duration DOUBLE PRECISION,
confidence DOUBLE PRECISION
) AS $$
BEGIN
RETURN QUERY
SELECT
pi.person_id,
pi.name,
LEAST(pa.end_time, p_end_time) - GREATEST(pa.start_time, p_start_time) AS overlap_duration,
AVG(pa.confidence) AS confidence
FROM person_appearances pa
JOIN person_identities pi ON pa.person_id = pi.person_id
WHERE pa.video_uuid = p_video_uuid
AND pa.start_time < p_end_time
AND pa.end_time > p_start_time
GROUP BY pi.person_id, pi.name, pa.end_time, pa.start_time
ORDER BY overlap_duration DESC;
END;
$$ LANGUAGE plpgsql;
-- 7.8: Create function to merge person identities
CREATE OR REPLACE FUNCTION merge_person_identities(
p_target_person_id VARCHAR(255),
p_source_person_ids VARCHAR(255)[]
)
RETURNS VOID AS $$
BEGIN
-- Update all appearances to point to target person
UPDATE person_appearances
SET person_id = p_target_person_id
WHERE person_id = ANY(p_source_person_ids);
-- Delete source person identities
DELETE FROM person_identities
WHERE person_id = ANY(p_source_person_ids)
AND person_id != p_target_person_id;
-- Update target person statistics
PERFORM update_person_appearance_stats(p_target_person_id);
END;
$$ LANGUAGE plpgsql;
-- 7.9: Create function to auto-match face with speaker
CREATE OR REPLACE FUNCTION auto_match_face_speaker(
p_video_uuid VARCHAR(255),
p_threshold DOUBLE PRECISION DEFAULT 0.5
)
RETURNS TABLE (
face_id VARCHAR(255),
speaker_id VARCHAR(255),
confidence DOUBLE PRECISION,
match_count BIGINT
) AS $$
BEGIN
RETURN QUERY
-- Find face detections that overlap with ASRX segments
SELECT
fd.face_id,
seg.speaker_id,
COUNT(*)::DOUBLE PRECISION / NULLIF(COUNT(DISTINCT seg.speaker_id), 0) AS confidence,
COUNT(*) AS match_count
FROM face_detections fd
CROSS JOIN LATERAL (
SELECT
seg_data->>'speaker_id' AS speaker_id,
(seg_data->>'start')::DOUBLE PRECISION AS seg_start,
(seg_data->>'end')::DOUBLE PRECISION AS seg_end
FROM face_recognition_results frr,
jsonb_array_elements(frr.result_data->'frames') AS frame_data,
jsonb_array_elements(frame_data->'faces') AS face_data,
jsonb_array_elements(frr.result_data->'segments') AS seg_data
WHERE frr.video_uuid = p_video_uuid
AND face_data->>'face_id' = fd.face_id
) seg
WHERE fd.video_uuid = p_video_uuid
AND fd.timestamp_secs >= seg.seg_start
AND fd.timestamp_secs <= seg.seg_end
AND fd.face_id IS NOT NULL
AND seg.speaker_id IS NOT NULL
GROUP BY fd.face_id, seg.speaker_id
HAVING COUNT(*)::DOUBLE PRECISION / NULLIF(COUNT(DISTINCT seg.speaker_id), 0) >= p_threshold
ORDER BY confidence DESC;
END;
$$ LANGUAGE plpgsql;
-- 7.10: Add comments
COMMENT ON TABLE person_identities IS 'Stores person identity associations linking face and speaker identities';
COMMENT ON TABLE person_appearances IS 'Stores individual person appearance records with time ranges';
COMMENT ON FUNCTION update_person_appearance_stats IS 'Updates person identity statistics from appearances';
COMMENT ON FUNCTION find_persons_at_time IS 'Finds persons appearing at a specific time in video';
COMMENT ON FUNCTION find_persons_in_range IS 'Finds persons appearing in a time range with overlap calculation';
COMMENT ON FUNCTION merge_person_identities IS 'Merges multiple person identities into one';
COMMENT ON FUNCTION auto_match_face_speaker IS 'Automatically matches face detections with speaker segments';
-- 7.11: Create trigger for updated_at timestamp
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Create trigger only if it doesn't exist
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_trigger
WHERE tgname = 'update_person_identities_updated_at'
AND tgrelid = 'person_identities'::regclass
) THEN
CREATE TRIGGER update_person_identities_updated_at
BEFORE UPDATE ON person_identities
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
END IF;
END $$;