48 lines
2.2 KiB
SQL
48 lines
2.2 KiB
SQL
-- ============================================================================
|
|
-- Migration 017: Create pre_chunks table (Processor Output)
|
|
-- ============================================================================
|
|
-- Purpose:
|
|
-- 1. Move raw processor outputs (YOLO frames, Face detections, etc.)
|
|
-- from 'chunks' table to a dedicated 'pre_chunks' table.
|
|
-- 2. Support coordinate_type (frame for video, text for audio, etc.)
|
|
-- to allow future expansion for PDF/Audio files.
|
|
-- 3. Support Identity linking directly on pre_chunks (Face -> Identity).
|
|
-- ============================================================================
|
|
|
|
-- 0. Clean up existing conflicting table (if any)
|
|
DROP TABLE IF EXISTS pre_chunks CASCADE;
|
|
|
|
-- 1. Create pre_chunks table
|
|
CREATE TABLE pre_chunks (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
file_uuid UUID NOT NULL,
|
|
processor_type VARCHAR(32) NOT NULL, -- 'yolo', 'face', 'asr', 'ocr', 'pose'...
|
|
|
|
-- Coordinate system (supports Video, Audio, Text...)
|
|
coordinate_type VARCHAR(20) DEFAULT 'frame', -- 'frame', 'time', 'page'
|
|
coordinate_index BIGINT NOT NULL, -- Frame number, or paragraph index
|
|
|
|
timestamp FLOAT, -- Time in seconds
|
|
data JSONB NOT NULL, -- Raw processor output (objects, bboxes, etc.)
|
|
|
|
-- Identity linkage (Face -> Identity, or Speaker -> Identity)
|
|
-- If NULL, this Face/Speaker is a "Candidate"
|
|
-- Note: FK removed temporarily due to schema migration in progress
|
|
identity_id UUID,
|
|
|
|
confidence FLOAT, -- Match confidence
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- 2. Indexes
|
|
CREATE INDEX idx_pre_chunks_file ON pre_chunks(file_uuid);
|
|
CREATE INDEX idx_pre_chunks_processor ON pre_chunks(processor_type);
|
|
CREATE INDEX idx_pre_chunks_identity ON pre_chunks(identity_id);
|
|
CREATE INDEX idx_pre_chunks_coord ON pre_chunks(file_uuid, processor_type, coordinate_index);
|
|
|
|
-- 3. Comment
|
|
COMMENT ON TABLE pre_chunks IS 'Raw output from Processors (Frames, Segments). Candidates are rows where identity_id IS NULL.';
|
|
COMMENT ON COLUMN pre_chunks.coordinate_type IS 'Coordinate unit: frame (Video), time (Audio), page (PDF)...';
|
|
|