Files
momentry_core/migrations/017_create_pre_chunks.sql

48 lines
2.2 KiB
SQL

-- ============================================================================
-- Migration 017: Create pre_chunks table (Processor Output)
-- ============================================================================
-- Purpose:
-- 1. Move raw processor outputs (YOLO frames, Face detections, etc.)
-- from 'chunks' table to a dedicated 'pre_chunks' table.
-- 2. Support coordinate_type (frame for video, text for audio, etc.)
-- to allow future expansion for PDF/Audio files.
-- 3. Support Identity linking directly on pre_chunks (Face -> Identity).
-- ============================================================================
-- 0. Clean up existing conflicting table (if any)
DROP TABLE IF EXISTS pre_chunks CASCADE;
-- 1. Create pre_chunks table
CREATE TABLE pre_chunks (
id BIGSERIAL PRIMARY KEY,
file_uuid UUID NOT NULL,
processor_type VARCHAR(32) NOT NULL, -- 'yolo', 'face', 'asr', 'ocr', 'pose'...
-- Coordinate system (supports Video, Audio, Text...)
coordinate_type VARCHAR(20) DEFAULT 'frame', -- 'frame', 'time', 'page'
coordinate_index BIGINT NOT NULL, -- Frame number, or paragraph index
timestamp FLOAT, -- Time in seconds
data JSONB NOT NULL, -- Raw processor output (objects, bboxes, etc.)
-- Identity linkage (Face -> Identity, or Speaker -> Identity)
-- If NULL, this Face/Speaker is a "Candidate"
-- Note: FK removed temporarily due to schema migration in progress
identity_id UUID,
confidence FLOAT, -- Match confidence
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- 2. Indexes
CREATE INDEX idx_pre_chunks_file ON pre_chunks(file_uuid);
CREATE INDEX idx_pre_chunks_processor ON pre_chunks(processor_type);
CREATE INDEX idx_pre_chunks_identity ON pre_chunks(identity_id);
CREATE INDEX idx_pre_chunks_coord ON pre_chunks(file_uuid, processor_type, coordinate_index);
-- 3. Comment
COMMENT ON TABLE pre_chunks IS 'Raw output from Processors (Frames, Segments). Candidates are rows where identity_id IS NULL.';
COMMENT ON COLUMN pre_chunks.coordinate_type IS 'Coordinate unit: frame (Video), time (Audio), page (PDF)...';