- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
79 lines
2.0 KiB
Python
79 lines
2.0 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
Migrate ASR Segments to Child Chunks
|
|
將 ASR 的細碎語音片段寫入 child_chunks 表,並關聯到 parent_chunks。
|
|
"""
|
|
|
|
import json
|
|
import psycopg2
|
|
|
|
# Configuration
|
|
UUID = "384b0ff44aaaa1f1"
|
|
ASR_PATH = f"output/{UUID}/{UUID}.asr.json"
|
|
DB_URL = "postgresql://accusys@localhost:5432/momentry"
|
|
|
|
|
|
def migrate():
|
|
print(f"🚀 Starting migration for {UUID}...")
|
|
|
|
# 1. Load Data
|
|
with open(ASR_PATH, "r") as f:
|
|
asr_data = json.load(f)
|
|
segments = asr_data.get("segments", [])
|
|
print(f"📂 Loaded {len(segments)} ASR segments.")
|
|
|
|
# 2. Load Parent Chunks to map time ranges
|
|
conn = psycopg2.connect(DB_URL)
|
|
cur = conn.cursor()
|
|
|
|
cur.execute(
|
|
"SELECT id, start_time, end_time FROM parent_chunks WHERE uuid = %s", (UUID,)
|
|
)
|
|
parents = cur.fetchall()
|
|
print(f"📂 Found {len(parents)} Parent Chunks.")
|
|
|
|
# 3. Insert Child Chunks
|
|
count = 0
|
|
for seg in segments:
|
|
text = seg.get("text", "").strip()
|
|
start = seg.get("start", 0)
|
|
end = seg.get("end", 0)
|
|
|
|
if not text:
|
|
continue
|
|
|
|
# Find Parent
|
|
parent_id = None
|
|
for pid, p_start, p_end in parents:
|
|
# Tolerate 1s margin
|
|
if start >= p_start - 1.0 and end <= p_end + 1.0:
|
|
parent_id = pid
|
|
break
|
|
|
|
# Insert
|
|
# Note: raw_text_vector is null for now, we only do semantic search on Parent
|
|
cur.execute(
|
|
"""
|
|
INSERT INTO child_chunks (parent_id, uuid, start_time, end_time, raw_text, speaker_ids)
|
|
VALUES (%s, %s, %s, %s, %s, %s)
|
|
""",
|
|
(
|
|
parent_id,
|
|
UUID,
|
|
start,
|
|
end,
|
|
text,
|
|
[seg.get("speaker_id")] if seg.get("speaker_id") else [],
|
|
),
|
|
)
|
|
count += 1
|
|
|
|
conn.commit()
|
|
print(f"✅ Successfully migrated {count} Child Chunks.")
|
|
cur.close()
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
migrate()
|