refactor: remove face embedding architecture - single Qdrant _faces collection

- Delete FaceEmbeddingDb module (face_embedding_db.rs) - Stub match_faces_iterative, generate_seed_embeddings, tmdb_match_handler - Remove sync_trace_embeddings, populate_face_embeddings_to_qdrant - Remove embedding from face.json output (face_processor.py) - Remove embedding from PG UPDATE (store_traced_faces.py) - Remove workspace traces staging (checkin.rs, qdrant_workspace.rs) - Fix tests: add pose_angle to Face, hand_nodes to TkgResult Disabled functions (need reimplement with _faces): - match_faces_iterative (identity agent) - generate_seed_embeddings (TMDb seeds) - tmdb_match_handler (TMDb matching) - cluster_face_embeddings, search_similar_faces - merge_traces_within_cuts
2026-06-24 22:27:09 +08:00
parent 360cb991e1
commit 074cdcdbed
60 changed files with 657 additions and 9454 deletions
--- a/scripts/face_mediapipe_test.py
+++ b/scripts/face_mediapipe_test.py
@@ -1,200 +0,0 @@
-#!/opt/homebrew/bin/python3.11
-"""
-POC: MediaPipe Face Detection vs Apple Vision Framework vs InsightFace
-
-Tests face detection on video frames and reports:
- Detection count
- Bounding box quality
- Landmarks (468 face mesh)
- Processing speed
-"""
-import sys
-import json
-import os
-import time
-import subprocess
-import argparse
-
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-
-
-def extract_frames(video_path, sample_interval=30, max_frames=50):
-    """Extract frames using ffmpeg"""
-    import tempfile
-    tmpdir = tempfile.mkdtemp(prefix="face_test_")
-    pattern = os.path.join(tmpdir, "frame_%05d.jpg")
-    cmd = ["ffmpeg", "-y", "-v", "quiet", "-i", video_path,
-           "-vf", f"select=not(mod(n\\,{sample_interval}))",
-           "-vsync", "vfr", "-q:v", "5", pattern]
-    subprocess.run(cmd, check=True)
-    files = sorted([f for f in os.listdir(tmpdir) if f.endswith(".jpg")])[:max_frames]
-    return tmpdir, [os.path.join(tmpdir, f) for f in files]
-
-
-def test_mediapipe(frame_paths, fps):
-    """MediaPipe Face Detection + Face Mesh"""
-    try:
-        from mediapipe.tasks import vision
-        from mediapipe.tasks.python.core.base_options import BaseOptions
-        from mediapipe.tasks.python.vision.face_detector import FaceDetector, FaceDetectorOptions
-        from mediapipe.tasks.python.vision.face_landmarker import FaceLandmarker, FaceLandmarkerOptions
-    except ImportError:
-        print("[MediaPipe] Not available, skipping")
-        return None
-
-    model_dir = os.path.join(os.path.dirname(__file__), "models")
-    os.makedirs(model_dir, exist_ok=True)
-
-    # Check model files - MediaPipe downloads automatically via the API
-    base_opts_detect = BaseOptions(model_asset_path="")
-    detect_opts = FaceDetectorOptions(base_options=BaseOptions())
-
-    t0 = time.time()
-    total_faces = 0
-    frames_with_faces = 0
-    landmarks_total = 0
-
-    # MediaPipe Face Detector
-    try:
-        detector = vision.FaceDetector.create_from_options(
-            FaceDetectorOptions(
-                base_options=BaseOptions(model_asset_buffer=None),
-                running_mode=vision.RunningMode.IMAGE
-            )
-        )
-    except:
-        # Download model first
-        import urllib.request
-        model_url = "https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/latest/face_detector.task"
-        model_path = os.path.join(model_dir, "face_detector.task")
-        if not os.path.exists(model_path):
-            print(f"[MediaPipe] Downloading model: {model_url}")
-            urllib.request.urlretrieve(model_url, model_path)
-        
-        detector = vision.FaceDetector.create_from_options(
-            FaceDetectorOptions(
-                base_options=BaseOptions(model_asset_path=model_path),
-                running_mode=vision.RunningMode.IMAGE
-            )
-        )
-
-    import cv2
-    for path in frame_paths:
-        img = cv2.imread(path)
-        if img is None:
-            continue
-        h, w = img.shape[:2]
-        
-        mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=img)
-        result = detector.detect(mp_img)
-        
-        if result.detections:
-            frames_with_faces += 1
-            for det in result.detections:
-                total_faces += 1
-                bbox = det.bounding_box
-                # bbox is [x, y, width, height] in pixels
-
-    elapsed = time.time() - t0
-    print(f"[MediaPipe] Detection: {len(frame_paths)} frames, {frames_with_faces} with faces, {total_faces} faces, {elapsed:.2f}s")
-
-    # Face Landmarker (468 points)
-    landmark_path = os.path.join(model_dir, "face_landmarker.task")
-    if not os.path.exists(landmark_path):
-        model_url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task"
-        print(f"[MediaPipe] Downloading landmark model...")
-        import urllib.request
-        urllib.request.urlretrieve(model_url, landmark_path)
-
-    landmarker = vision.FaceLandmarker.create_from_options(
-        FaceLandmarkerOptions(
-            base_options=BaseOptions(model_asset_path=landmark_path),
-            running_mode=vision.RunningMode.IMAGE,
-            output_face_blendshapes=False,
-            output_facial_transformation_matrixes=False,
-        )
-    )
-
-    t1 = time.time()
-    for path in frame_paths[:10]:  # Only test 10 frames for landmarks
-        img = cv2.imread(path)
-        if img is None:
-            continue
-        mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=img)
-        result = landmarker.detect(mp_img)
-        if result.face_landmarks:
-            for face in result.face_landmarks:
-                landmarks_total += len(face)
-
-    elapsed2 = time.time() - t1
-    print(f"[MediaPipe] Face Mesh (10 frames): {landmarks_total} total landmarks (~{landmarks_total//max(len(result.face_landmarks),1)} per face)")
-
-    return {
-        "frames_processed": len(frame_paths),
-        "frames_with_faces": frames_with_faces,
-        "total_faces": total_faces,
-        "time_sec": elapsed,
-        "landmarks_per_face": 468,
-    }
-
-
-def test_vision_framework(frame_paths, fps):
-    """Apple Vision Framework face detection via swift binary"""
-    # Use the existing swift binary
-    swift_bin = os.path.join(os.path.dirname(__file__),
-                             "swift_processors/.build/debug/swift_ocr")
-    # swift_ocr doesn't do face detection, use the face_compare_test
-    swift_face = os.path.join(os.path.dirname(__file__),
-                              "swift_processors/.build/debug/face_compare_test")
-    
-    if not os.path.exists(swift_face):
-        print("[Vision] Binary not found, skipping")
-        return None
-    
-    print(f"[Vision] Running face compare test...")
-    t0 = time.time()
-    result = subprocess.run(
-        [swift_face, frame_paths[0].rsplit("/", 2)[0].replace("/frames", ""),  # This won't work for single files
-         "--sample-interval", "1", "--max-frames", str(len(frame_paths))],
-        capture_output=True, text=True, timeout=120
-    )
-    elapsed = time.time() - t0
-    print(result.stdout[-500:])
-    return {"time_sec": elapsed}
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("video_path")
-    parser.add_argument("--sample-interval", type=int, default=30)
-    parser.add_argument("--max-frames", type=int, default=50)
-    args = parser.parse_args()
-
-    print(f"Testing: {args.video_path}")
-    
-    # Extract frames
-    tmpdir, frames = extract_frames(args.video_path, args.sample_interval, args.max_frames)
-    print(f"Extracted {len(frames)} frames")
-
-    # MediaPipe
-    print("\n=== MediaPipe ===")
-    mp_result = test_mediapipe(frames, 24)
-    
-    # Vision Framework
-    print("\n=== Apple Vision Framework ===")
-    vf_result = test_vision_framework(frames, 24)
-
-    # Summary
-    print("\n=== Comparison ===")
-    if mp_result:
-        print(f"MediaPipe: {mp_result['total_faces']} faces in {mp_result['frames_with_faces']} frames, {mp_result['time_sec']:.2f}s")
-        print(f"  Landmarks: {mp_result['landmarks_per_face']} per face")
-    print(f"Vision Framework: (see above)")
-
-    # Cleanup
-    import shutil
-    shutil.rmtree(tmpdir, ignore_errors=True)
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/face_mediapipe_test_v1.11.py
+++ b/scripts/face_mediapipe_test_v1.11.py
@@ -1 +0,0 @@
-../v1.1/scripts/face_mediapipe_test_v1.11.py
--- a/scripts/face_processor.py
+++ b/scripts/face_processor.py
@@ -225,8 +225,9 @@ class FaceProcessorVision:
                if face_img.size == 0:
                    continue

-                # CoreML embedding
-                emb = self.extract_face_embedding(face_img)
+                # CoreML embedding - TODO: push to Qdrant _faces collection instead
+                # emb = self.extract_face_embedding(face_img)
+                emb = None
                if emb is not None:
                    embed_count += 1

@@ -240,7 +241,6 @@ class FaceProcessorVision:
                faces.append({
                    "x": x, "y": y, "width": w, "height": h,
                    "confidence": face.get("confidence", 0.5),
-                    "embedding": emb,
                    "pose_angle": {
                        "angle": pose_angle,
                        "roll": pose_info.get("roll", 0),
@@ -262,20 +262,17 @@ class FaceProcessorVision:

            if len(face_data["frames"]) % 100 == 0:
                elapsed = time.time() - t0
-                print(f"[FACE_V2] {len(face_data['frames'])} frames, {embed_count} embeddings, {elapsed:.0f}s")
+                print(f"[FACE_V2] {len(face_data['frames'])} frames, {elapsed:.0f}s")
                if self.publisher:
                    pct = int(len(face_data["frames"]) * 100 / max(len(frames), 1))
                    if pct > last_pct:
                        last_pct = pct
                        self.publisher.progress("face", len(face_data["frames"]), len(frames),
-                            f"{embed_count} faces", embed_count, "faces")
+                            "", 0, "faces")

        self.video.release()

-        # Finalize
        face_data["metadata"]["status"] = "completed"
-        face_data["metadata"]["total_embeddings"] = embed_count
-        face_data["metadata"]["embedder"] = "coreml_facenet"

        # Convert dict frames to list for Rust FaceResult format
        frames_list = []
--- a/scripts/generate_parent_chunks_gemma4.py
+++ b/scripts/generate_parent_chunks_gemma4.py
@@ -1,228 +0,0 @@
-#!/opt/homebrew/bin/python3.11
-"""
-Regenerate ALL parent chunks for 384b0ff44aaaa1f1 using gemma4
-Groups ASR chunks into ~17 logical scenes and generates summaries.
-"""
-
-import json
-import subprocess
-import psycopg2
-import psycopg2.extras
-
-DB_CONFIG = {"host": "localhost", "user": "accusys", "dbname": "momentry"}
-UUID = "384b0ff44aaaa1f1"
-OLLAMA_URL = "http://localhost:11434/api/generate"
-MODEL = "gemma4:latest"
-
-# Target ~17 scenes across 6865s = ~400s per scene
-# But use natural breaks (gaps in dialogue) to split
-SCENE_TARGET_COUNT = 17
-
-
-def get_chunks():
-    conn = psycopg2.connect(**DB_CONFIG)
-    cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
-    cur.execute(
-        """
-        SELECT id, chunk_id, start_time, end_time, start_frame, end_frame, 
-               text_content, fps
-        FROM chunks
-        WHERE uuid = %s AND chunk_type = 'sentence'
-        ORDER BY start_time
-    """,
-        (UUID,),
-    )
-    chunks = cur.fetchall()
-    cur.close()
-    conn.close()
-    return chunks
-
-
-def call_gemma4(prompt, max_tokens=300):
-    payload = {
-        "model": MODEL,
-        "prompt": prompt,
-        "stream": False,
-        "options": {"temperature": 0.3, "num_predict": max_tokens},
-    }
-    try:
-        resp = subprocess.run(
-            ["curl", "-s", OLLAMA_URL, "-d", json.dumps(payload)],
-            capture_output=True,
-            text=True,
-            timeout=180,
-        )
-        if resp.returncode == 0:
-            result = json.loads(resp.stdout)
-            return result.get("response", "").strip()
-    except Exception as e:
-        print(f"    ⚠️  Ollama error: {e}")
-    return ""
-
-
-def find_scene_boundaries(chunks, target_count=SCENE_TARGET_COUNT):
-    """Find optimal scene boundaries based on dialogue gaps"""
-    if not chunks:
-        return []
-
-    # Calculate gaps between consecutive chunks
-    gaps = []
-    for i in range(1, len(chunks)):
-        gap = chunks[i]["start_time"] - chunks[i - 1]["end_time"]
-        gaps.append((i, gap))
-
-    # Sort by gap size, take top (target_count - 1) gaps
-    gaps.sort(key=lambda x: x[1], reverse=True)
-    split_indices = sorted([g[0] for g in gaps[: target_count - 1]])
-
-    # Create scenes
-    scenes = []
-    start = 0
-    for split in split_indices:
-        scenes.append(chunks[start:split])
-        start = split
-    scenes.append(chunks[start:])
-
-    return scenes
-
-
-def generate_summary(scene_chunks, scene_num):
-    """Generate summary for a scene using gemma4"""
-    texts = [c["text_content"] for c in scene_chunks if c["text_content"]]
-    if not texts:
-        return f"Scene {scene_num}: No dialogue"
-
-    combined = " ".join(texts)[:3000]
-    duration = scene_chunks[-1]["end_time"] - scene_chunks[0]["start_time"]
-
-    prompt = f"""You are a professional film scene analyst. Given the following dialogue transcript from a movie scene, write a concise one-sentence English summary.
-
-Duration: {duration:.0f} seconds
-Dialogue:
-{combined}
-
-Provide ONLY the summary sentence, nothing else. Focus on plot events and character actions."""
-
-    summary = call_gemma4(prompt, max_tokens=250)
-    if not summary:
-        # Fallback: use first few words of dialogue
-        summary = f"Scene {scene_num}: {' '.join(texts[:3])[:80]}..."
-    return summary
-
-
-def insert_parent_chunks(scenes):
-    """Insert parent chunks and update child relationships"""
-    conn = psycopg2.connect(**DB_CONFIG)
-    cur = conn.cursor()
-
-    inserted = 0
-    for i, scene_chunks in enumerate(scenes):
-        start_time = scene_chunks[0]["start_time"]
-        end_time = scene_chunks[-1]["end_time"]
-        start_frame = int(scene_chunks[0]["start_frame"])
-        end_frame = int(scene_chunks[-1]["end_frame"])
-        fps = float(scene_chunks[0]["fps"]) if scene_chunks[0]["fps"] else 59.94
-        chunk_count = len(scene_chunks)
-
-        print(
-            f"  Scene {i}: {start_time:.0f}s-{end_time:.0f}s ({chunk_count} chunks, {end_time - start_time:.0f}s)"
-        )
-
-        # Generate summary
-        summary = generate_summary(scene_chunks, i)
-        print(f"    📝 {summary[:100]}...")
-
-        # Insert parent chunk
-        cur.execute(
-            """
-            INSERT INTO parent_chunks (
-                uuid, scene_order, start_time, end_time,
-                start_frame, end_frame, fps, summary_text,
-                metadata, rule_3_markers, created_at
-            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())
-            RETURNING id
-        """,
-            (
-                UUID,
-                i,
-                start_time,
-                end_time,
-                start_frame,
-                end_frame,
-                fps,
-                summary,
-                json.dumps({"auto_generated_by": "gemma4", "chunk_count": chunk_count}),
-                json.dumps({}),
-            ),
-        )
-        parent_id = cur.fetchone()[0]
-
-        # Update chunks with parent_chunk_id
-        chunk_ids = [c["chunk_id"] for c in scene_chunks]
-        child_ids_array = chunk_ids  # Store all child chunk IDs
-
-        cur.execute(
-            """
-            UPDATE chunks 
-            SET parent_chunk_id = %s::varchar
-            WHERE uuid = %s AND chunk_id = ANY(%s)
-        """,
-            (str(parent_id), UUID, chunk_ids),
-        )
-
-        inserted += 1
-        if i % 5 == 4 or i == len(scenes) - 1:
-            conn.commit()
-            print(f"    ✅ Committed scenes 0-{i}")
-
-    conn.commit()
-    cur.close()
-    conn.close()
-    return inserted
-
-
-def main():
-    print(f"🎬 Regenerating parent chunks for {UUID}")
-    print(f"   Using model: {MODEL}")
-    print("=" * 70)
-
-    # Step 1: Get all chunks
-    print("\n📥 Fetching ASR chunks...")
-    chunks = get_chunks()
-    print(f"   Found {len(chunks)} sentence chunks")
-    if chunks:
-        print(f"   Time range: 0-{chunks[-1]['end_time']:.0f}s")
-
-    # Step 2: Find scene boundaries
-    print(f"\n🔍 Finding {SCENE_TARGET_COUNT} scene boundaries...")
-    scenes = find_scene_boundaries(chunks, SCENE_TARGET_COUNT)
-    print(f"   Created {len(scenes)} scenes")
-    for i, s in enumerate(scenes):
-        print(
-            f"     Scene {i}: {s[0]['start_time']:.0f}s-{s[-1]['end_time']:.0f}s ({len(s)} chunks)"
-        )
-
-    # Step 3: Generate summaries and insert
-    print("\n🤖 Generating summaries with gemma4...")
-    inserted = insert_parent_chunks(scenes)
-
-    print(f"\n{'=' * 70}")
-    print(f"✅ Created {inserted} parent chunks")
-
-    # Step 4: Verify
-    print("\n📊 Verification:")
-    conn = psycopg2.connect(**DB_CONFIG)
-    cur = conn.cursor()
-    cur.execute("SELECT COUNT(*) FROM parent_chunks WHERE uuid = %s", (UUID,))
-    print(f"   parent_chunks: {cur.fetchone()[0]}")
-    cur.execute(
-        "SELECT COUNT(*) FROM chunks WHERE uuid = %s AND parent_chunk_id IS NULL AND chunk_type = 'sentence'",
-        (UUID,),
-    )
-    print(f"   orphan chunks: {cur.fetchone()[0]}")
-    cur.close()
-    conn.close()
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/generate_parent_chunks_gemma4_v1.11.py
+++ b/scripts/generate_parent_chunks_gemma4_v1.11.py
@@ -1 +0,0 @@
-../v1.1/scripts/generate_parent_chunks_gemma4_v1.11.py
--- a/scripts/mediapipe_holistic_processor.py
+++ b/scripts/mediapipe_holistic_processor.py
@@ -1,711 +0,0 @@
-#!/opt/homebrew/bin/python3.11
-"""
-MediaPipe Holistic Processor - Full body keypoint extraction
-
-Purpose:
-1. Extract Face Mesh (468 keypoints) → eye/mouth actions
-2. Extract Pose (33 keypoints) → arm/leg/feet actions
-3. Extract Hands (21 keypoints × 2) → hand gestures
-
-Output structure:
-{
-  "metadata": {...},
-  "frames": {
-    "frame_num": {
-      "persons": [
-        {
-          "person_id": 0,
-          "bbox": {...},
-          "face_mesh": {
-            "landmarks": [[x,y,z], ...],  # 468 points
-            "eye_features": {...},
-            "mouth_features": {...},
-          },
-          "pose": {
-            "landmarks": [[x,y,z,visibility], ...],  # 33 points
-            "arm_features": {...},
-            "leg_features": {...},
-          },
-          "hands": {
-            "left": {
-              "landmarks": [[x,y,z], ...],  # 21 points
-              "gesture": "...",
-            },
-            "right": {
-              "landmarks": [[x,y,z], ...],  # 21 points
-              "gesture": "...",
-            },
-          },
-        }
-      ]
-    }
-  }
-}
-"""
-
-import json
-import argparse
-import cv2
-import numpy as np
-import mediapipe as mp
-from typing import Dict
-
-
-class MediaPipeHolisticProcessor:
-    """
-    Process video with MediaPipe Holistic (Face + Pose + Hands)
-    """
-    
-    def __init__(
-        self,
-        model_complexity: int = 1,  # 0, 1, 2
-        refine_face_landmarks: bool = True,
-        enable_segmentation: bool = False,
-        min_detection_confidence: float = 0.5,
-        min_tracking_confidence: float = 0.5,
-    ):
-        """
-        Initialize MediaPipe Holistic
-        
-        Args:
-            model_complexity: 0 (lite), 1 (full), 2 (heavy)
-            refine_face_landmarks: Enable iris detection
-            enable_segmentation: Enable segmentation mask
-            min_detection_confidence: Detection confidence threshold
-            min_tracking_confidence: Tracking confidence threshold
-        """
-        self.mp_holistic = mp.solutions.holistic
-        self.mp_drawing = mp.solutions.drawing_utils
-        self.mp_drawing_styles = mp.solutions.drawing_styles
-        
-        self.holistic = self.mp_holistic.Holistic(
-            static_image_mode=False,  # Video mode
-            model_complexity=model_complexity,
-            smooth_landmarks=True,  # Smooth landmarks across frames
-            enable_segmentation=enable_segmentation,
-            smooth_segmentation=True,
-            refine_face_landmarks=refine_face_landmarks,
-            min_detection_confidence=min_detection_confidence,
-            min_tracking_confidence=min_tracking_confidence,
-        )
-        
-        # Eye landmark indices (Face Mesh)
-        self.LEFT_EYE_INDICES = [33, 133, 159, 145, 158, 144]  # 6 points
-        self.RIGHT_EYE_INDICES = [362, 263, 386, 374, 385, 373]
-        
-        # Iris indices
-        self.LEFT_IRIS_CENTER = 468
-        self.RIGHT_IRIS_CENTER = 473
-        
-        # Mouth indices
-        self.MOUTH_TOP = 13
-        self.MOUTH_BOTTOM = 14
-        self.MOUTH_LEFT = 61
-        self.MOUTH_RIGHT = 291
-        
-        # Pose key indices
-        self.POSE_KEYPOINTS = {
-            "nose": 0,
-            "left_shoulder": 11,
-            "right_shoulder": 12,
-            "left_elbow": 13,
-            "right_elbow": 14,
-            "left_wrist": 15,
-            "right_wrist": 16,
-            "left_hip": 23,
-            "right_hip": 24,
-            "left_knee": 25,
-            "right_knee": 26,
-            "left_ankle": 27,
-            "right_ankle": 28,
-        }
-        
-        # Hand key indices
-        self.HAND_KEYPOINTS = {
-            "wrist": 0,
-            "thumb_cmc": 1,
-            "thumb_mcp": 2,
-            "thumb_ip": 3,
-            "thumb_tip": 4,
-            "index_mcp": 5,
-            "index_pip": 6,
-            "index_dip": 7,
-            "index_tip": 8,
-            "middle_mcp": 9,
-            "middle_pip": 10,
-            "middle_dip": 11,
-            "middle_tip": 12,
-            "ring_mcp": 13,
-            "ring_pip": 14,
-            "ring_dip": 15,
-            "ring_tip": 16,
-            "pinky_mcp": 17,
-            "pinky_pip": 18,
-            "pinky_dip": 19,
-            "pinky_tip": 20,
-        }
-    
-    def process_frame(self, frame: np.ndarray) -> Dict:
-        """
-        Process single frame
-        
-        Args:
-            frame: BGR image
-        
-        Returns:
-            Dict with face_mesh, pose, hands data
-        """
-        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        
-        results = self.holistic.process(frame_rgb)
-        
-        person_data = {
-            "person_id": 0,
-            "bbox": None,
-            "face_mesh": None,
-            "pose": None,
-            "hands": {"left": None, "right": None},
-        }
-        
-# Extract face mesh
-        height, width = frame.shape[:2]
-        if results.face_landmarks:
-            person_data["face_mesh"] = self._extract_face_mesh(results.face_landmarks, width, height)
-
-        # Extract pose
-        if results.pose_landmarks:
-            person_data["pose"] = self._extract_pose(results.pose_landmarks, width, height)
-
-        # Extract hands
-        if results.left_hand_landmarks:
-            person_data["hands"]["left"] = self._extract_hand(results.left_hand_landmarks, "left", width, height)
-
-        if results.right_hand_landmarks:
-            person_data["hands"]["right"] = self._extract_hand(results.right_hand_landmarks, "right", width, height)
-        
-        # Calculate bbox from pose landmarks
-        if results.pose_landmarks:
-            landmarks = results.pose_landmarks.landmark
-            x_coords = [lm.x for lm in landmarks if lm.visibility > 0.5]
-            y_coords = [lm.y for lm in landmarks if lm.visibility > 0.5]
-
-            if x_coords and y_coords:
-                x_min, x_max = min(x_coords), max(x_coords)
-                y_min, y_max = min(y_coords), max(y_coords)
-
-                person_data["bbox"] = {
-                    "x": int(x_min * width),
-                    "y": int(y_min * height),
-                    "width": int((x_max - x_min) * width),
-                    "height": int((y_max - y_min) * height),
-                }
-
-        return person_data
-
-    def _extract_face_mesh(self, face_landmarks, width: int, height: int) -> Dict:
-        """
-        Extract face mesh landmarks and calculate features
-
-        Args:
-            face_landmarks: MediaPipe face landmarks
-            width: Frame width in pixels
-            height: Frame height in pixels
-
-        Returns:
-            Dict with landmarks (in pixels), eye_features, mouth_features
-        """
-        landmarks = []
-        for lm in face_landmarks.landmark:
-            landmarks.append([int(lm.x * width), int(lm.y * height), lm.z])
-        
-        # Eye Aspect Ratio (EAR)
-        def calculate_ear(eye_indices):
-            # Get eye points
-            p1 = face_landmarks.landmark[eye_indices[0]]
-            p2 = face_landmarks.landmark[eye_indices[1]]
-            p3 = face_landmarks.landmark[eye_indices[2]]
-            p4 = face_landmarks.landmark[eye_indices[3]]
-            p5 = face_landmarks.landmark[eye_indices[4]]
-            p6 = face_landmarks.landmark[eye_indices[5]]
-            
-            # Vertical distances
-            vertical_1 = np.linalg.norm([p3.x - p5.x, p3.y - p5.y])
-            vertical_2 = np.linalg.norm([p4.x - p6.x, p4.y - p6.y])
-            
-            # Horizontal distance
-            horizontal = np.linalg.norm([p1.x - p2.x, p1.y - p2.y])
-            
-            ear = (vertical_1 + vertical_2) / (2 * horizontal) if horizontal > 0 else 0
-            return ear
-        
-        left_ear = calculate_ear(self.LEFT_EYE_INDICES)
-        right_ear = calculate_ear(self.RIGHT_EYE_INDICES)
-        avg_ear = (left_ear + right_ear) / 2
-        
-        # Iris position (if refined landmarks enabled)
-        left_iris_x = None
-        right_iris_x = None
-        
-        if len(face_landmarks.landmark) > 477:
-            left_iris = face_landmarks.landmark[self.LEFT_IRIS_CENTER]
-            right_iris = face_landmarks.landmark[self.RIGHT_IRIS_CENTER]
-            
-            # Normalize iris position relative to eye
-            left_eye_center_x = (face_landmarks.landmark[33].x + face_landmarks.landmark[133].x) / 2
-            right_eye_center_x = (face_landmarks.landmark[362].x + face_landmarks.landmark[263].x) / 2
-            
-            left_eye_width = abs(face_landmarks.landmark[33].x - face_landmarks.landmark[133].x)
-            right_eye_width = abs(face_landmarks.landmark[362].x - face_landmarks.landmark[263].x)
-            
-            left_iris_x = (left_iris.x - left_eye_center_x) / left_eye_width if left_eye_width > 0 else 0
-            right_iris_x = (right_iris.x - right_eye_center_x) / right_eye_width if right_eye_width > 0 else 0
-        
-        # Eye action detection
-        eye_action = "unknown"
-        if avg_ear < 0.15:
-            eye_action = "closed"
-        elif avg_ear > 0.4:
-            eye_action = "wide_open"
-        elif 0.15 <= avg_ear < 0.25:
-            eye_action = "squint"
-        else:
-            eye_action = "normal"
-        
-        # Gaze direction
-        gaze_direction = "center"
-        if left_iris_x and right_iris_x:
-            avg_iris_x = (left_iris_x + right_iris_x) / 2
-            if avg_iris_x < -0.2:
-                gaze_direction = "left"
-            elif avg_iris_x > 0.2:
-                gaze_direction = "right"
-        
-        # Mouth Aspect Ratio (MAR)
-        mouth_top = face_landmarks.landmark[self.MOUTH_TOP]
-        mouth_bottom = face_landmarks.landmark[self.MOUTH_BOTTOM]
-        mouth_left = face_landmarks.landmark[self.MOUTH_LEFT]
-        mouth_right = face_landmarks.landmark[self.MOUTH_RIGHT]
-        
-        mouth_height = np.linalg.norm([mouth_top.x - mouth_bottom.x, mouth_top.y - mouth_bottom.y])
-        mouth_width = np.linalg.norm([mouth_left.x - mouth_right.x, mouth_left.y - mouth_right.y])
-        
-        mar = mouth_height / mouth_width if mouth_width > 0 else 0
-        
-        # Mouth corner distance (for smile detection)
-        mouth_center_y = (mouth_top.y + mouth_bottom.y) / 2
-        corner_lift = (mouth_center_y - mouth_left.y) + (mouth_center_y - mouth_right.y)
-        
-        # Mouth action detection
-        mouth_action = "unknown"
-        if mar > 0.7:
-            mouth_action = "yawn"
-        elif mar > 0.5:
-            mouth_action = "open"
-        elif mar < 0.2:
-            if corner_lift > 0.02:
-                mouth_action = "smile"
-            else:
-                mouth_action = "closed"
-        else:
-            mouth_action = "slightly_open"
-        
-        return {
-            "landmarks": landmarks,
-            "num_landmarks": len(landmarks),
-            "eye_features": {
-                "left_ear": round(left_ear, 4),
-                "right_ear": round(right_ear, 4),
-                "avg_ear": round(avg_ear, 4),
-                "left_iris_x": round(left_iris_x, 4) if left_iris_x else None,
-                "right_iris_x": round(right_iris_x, 4) if right_iris_x else None,
-                "eye_action": eye_action,
-                "gaze_direction": gaze_direction,
-            },
-            "mouth_features": {
-                "mar": round(mar, 4),
-                "mouth_height": round(mouth_height, 4),
-                "mouth_width": round(mouth_width, 4),
-                "corner_lift": round(corner_lift, 4),
-                "mouth_action": mouth_action,
-            },
-        }
-    
-    def _extract_pose(self, pose_landmarks, width: int, height: int) -> Dict:
-        """
-        Extract pose landmarks and calculate features
-
-        Args:
-            pose_landmarks: MediaPipe pose landmarks
-            width: Frame width in pixels
-            height: Frame height in pixels
-
-        Returns:
-            Dict with landmarks (in pixels), arm_features, leg_features
-        """
-        landmarks = []
-        for lm in pose_landmarks.landmark:
-            landmarks.append([int(lm.x * width), int(lm.y * height), lm.z, lm.visibility])
-        
-        # Helper function to calculate angle
-        def calculate_angle(p1_idx, p2_idx, p3_idx):
-            p1 = pose_landmarks.landmark[p1_idx]
-            p2 = pose_landmarks.landmark[p2_idx]
-            p3 = pose_landmarks.landmark[p3_idx]
-            
-            v1 = np.array([p1.x, p1.y]) - np.array([p2.x, p2.y])
-            v2 = np.array([p3.x, p3.y]) - np.array([p2.x, p2.y])
-            
-            angle = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
-            return np.degrees(angle)
-        
-        # Arm features
-        left_elbow_angle = calculate_angle(11, 13, 15)  # shoulder-elbow-wrist
-        right_elbow_angle = calculate_angle(12, 14, 16)
-        
-        # Check if arms raised
-        left_wrist = pose_landmarks.landmark[15]
-        left_elbow = pose_landmarks.landmark[13]
-        left_shoulder = pose_landmarks.landmark[11]
-        
-        right_wrist = pose_landmarks.landmark[16]
-        right_elbow = pose_landmarks.landmark[14]
-        right_shoulder = pose_landmarks.landmark[12]
-        
-        left_arm_raised = left_wrist.y < left_elbow.y < left_shoulder.y
-        right_arm_raised = right_wrist.y < right_elbow.y < right_shoulder.y
-        
-        # Arm action detection
-        left_arm_action = "unknown"
-        if left_arm_raised:
-            left_arm_action = "raise_left"
-        elif left_elbow_angle > 150:
-            left_arm_action = "extend_left"
-        elif left_elbow_angle < 90:
-            left_arm_action = "fold_left"
-        else:
-            left_arm_action = "neutral_left"
-        
-        right_arm_action = "unknown"
-        if right_arm_raised:
-            right_arm_action = "raise_right"
-        elif right_elbow_angle > 150:
-            right_arm_action = "extend_right"
-        elif right_elbow_angle < 90:
-            right_arm_action = "fold_right"
-        else:
-            right_arm_action = "neutral_right"
-        
-        # Cross arms detection
-        cross_arms = False
-        if left_wrist.x > right_wrist.x and right_wrist.x < left_shoulder.x:
-            cross_arms = True
-        
-        # Leg features
-        left_knee_angle = calculate_angle(23, 25, 27)  # hip-knee-ankle
-        right_knee_angle = calculate_angle(24, 26, 28)
-        
-        # Check standing/sitting
-        left_hip = pose_landmarks.landmark[23]
-        left_knee = pose_landmarks.landmark[25]
-        left_ankle = pose_landmarks.landmark[27]
-        
-        right_hip = pose_landmarks.landmark[24]
-        right_knee = pose_landmarks.landmark[26]
-        right_ankle = pose_landmarks.landmark[28]
-        
-        hip_avg_y = (left_hip.y + right_hip.y) / 2
-        knee_avg_y = (left_knee.y + right_knee.y) / 2
-        
-        # Standing: hip < knee < ankle (y increases downward)
-        standing = left_hip.y < left_knee.y < left_ankle.y and right_hip.y < right_knee.y < right_ankle.y
-        
-        # Sitting: hip ≈ knee height
-        sitting = abs(hip_avg_y - knee_avg_y) < 0.1
-        
-        # Leg action detection
-        leg_action = "unknown"
-        if sitting:
-            leg_action = "sit"
-        elif standing:
-            if left_knee_angle < 120 or right_knee_angle < 120:
-                leg_action = "knee_bend"
-            else:
-                leg_action = "stand"
-        
-        return {
-            "landmarks": landmarks,
-            "num_landmarks": len(landmarks),
-            "arm_features": {
-                "left_elbow_angle": round(left_elbow_angle, 2),
-                "right_elbow_angle": round(right_elbow_angle, 2),
-                "left_arm_raised": left_arm_raised,
-                "right_arm_raised": right_arm_raised,
-                "left_arm_action": left_arm_action,
-                "right_arm_action": right_arm_action,
-                "cross_arms": cross_arms,
-            },
-            "leg_features": {
-                "left_knee_angle": round(left_knee_angle, 2),
-                "right_knee_angle": round(right_knee_angle, 2),
-                "standing": standing,
-                "sitting": sitting,
-                "leg_action": leg_action,
-            },
-        }
-    
-    def _extract_hand(self, hand_landmarks, hand_type: str, width: int, height: int) -> Dict:
-        """
-        Extract hand landmarks and detect gesture
-
-        Args:
-            hand_landmarks: MediaPipe hand landmarks
-            hand_type: "left" or "right"
-            width: Frame width in pixels
-            height: Frame height in pixels
-
-        Returns:
-            Dict with landmarks (in pixels), gesture
-        """
-        landmarks = []
-        for lm in hand_landmarks.landmark:
-            landmarks.append([int(lm.x * width), int(lm.y * height), lm.z])
-        
-        # Check finger extensions
-        def is_finger_extended(tip_idx, pip_idx):
-            tip = hand_landmarks.landmark[tip_idx]
-            pip = hand_landmarks.landmark[pip_idx]
-            
-            # Finger is extended if tip is higher (lower y) than pip
-            return tip.y < pip.y
-        
-        thumb_extended = is_finger_extended(4, 3)
-        index_extended = is_finger_extended(8, 6)
-        middle_extended = is_finger_extended(12, 10)
-        ring_extended = is_finger_extended(16, 14)
-        pinky_extended = is_finger_extended(20, 18)
-        
-        extensions = {
-            "thumb": thumb_extended,
-            "index": index_extended,
-            "middle": middle_extended,
-            "ring": ring_extended,
-            "pinky": pinky_extended,
-        }
-        
-        # Gesture detection
-        gesture = "unknown"
-        
-        num_extended = sum(extensions.values())
-        
-        if num_extended == 5:
-            gesture = "open_hand"
-        elif num_extended == 0:
-            gesture = "fist"
-        elif thumb_extended and num_extended == 1:
-            gesture = "thumbs_up"
-        elif index_extended and middle_extended and num_extended == 2:
-            gesture = "peace_sign"
-        elif index_extended and num_extended == 1:
-            gesture = "pointing"
-        elif thumb_extended and index_extended and not any([middle_extended, ring_extended, pinky_extended]):
-            # Check thumb-index distance for OK gesture
-            thumb_tip = hand_landmarks.landmark[4]
-            index_tip = hand_landmarks.landmark[8]
-            
-            distance = np.linalg.norm([thumb_tip.x - index_tip.x, thumb_tip.y - index_tip.y])
-            
-            if distance < 0.05:
-                gesture = "ok_sign"
-            else:
-                gesture = "grab"
-        
-        return {
-            "landmarks": landmarks,
-            "num_landmarks": len(landmarks),
-            "finger_extensions": extensions,
-            "num_fingers_extended": num_extended,
-            "gesture": gesture,
-            "hand_type": hand_type,
-        }
-    
-    def process_video(
-        self,
-        video_path: str,
-        output_path: str,
-        sample_interval: int = 1,
-        uuid: str = "",
-    ) -> Dict:
-        """
-        Process entire video
-        
-        Args:
-            video_path: Path to video file
-            output_path: Path to output JSON
-            sample_interval: Process every N frames
-            uuid: UUID for progress reporting
-        
-        Returns:
-            Dict with all processed data
-        """
-        cap = cv2.VideoCapture(video_path)
-        
-        if not cap.isOpened():
-            print(f"MEDIAPIPE_ERROR:Cannot open video: {video_path}", file=sys.stderr)
-            return {}
-        
-        fps = cap.get(cv2.CAP_PROP_FPS)
-        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        
-        print(f"MEDIAPIPE_START", file=sys.stderr)
-        print(f"MEDIAPIPE_INFO:FPS={fps},total={total_frames},interval={sample_interval}", file=sys.stderr)
-        
-        output_data = {
-            "metadata": {
-                "video_path": video_path,
-                "fps": fps,
-                "width": width,
-                "height": height,
-                "total_frames": total_frames,
-                "sample_interval": sample_interval,
-                "processor": "mediapipe_holistic",
-                "model_complexity": 1,
-                "refine_face_landmarks": True,
-            },
-            "frames": {},
-        }
-        
-        frame_count = 0
-        processed_count = 0
-        
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-            
-            frame_count += 1
-            
-            if frame_count % sample_interval != 0:
-                continue
-            
-            # Process frame
-            person_data = self.process_frame(frame)
-            
-            # Only save if landmarks detected
-            if person_data["face_mesh"] or person_data["pose"] or person_data["hands"]["left"] or person_data["hands"]["right"]:
-                timestamp = frame_count / fps if fps > 0 else 0
-                
-                output_data["frames"][str(frame_count)] = {
-                    "frame_number": frame_count,
-                    "timestamp": round(timestamp, 3),
-                    "persons": [person_data],
-                }
-                
-                processed_count += 1
-                
-                if processed_count % 100 == 0:
-                    print(f"MEDIAPIPE_FRAME:{processed_count}", file=sys.stderr)
-        
-        cap.release()
-        
-        # Update metadata
-        output_data["metadata"]["processed_frames"] = processed_count
-        
-        # Save output
-        with open(output_path, "w") as f:
-            json.dump(output_data, f, indent=2)
-        
-        print(f"MEDIAPIPE_COMPLETE:{processed_count}", file=sys.stderr)
-        
-        return output_data
-    
-    def close(self):
-        """Close MediaPipe model"""
-        self.holistic.close()
-
-
-def main():
-    parser = argparse.ArgumentParser(description="MediaPipe Holistic Processor")
-    parser.add_argument("video_path", nargs="?", help="Path to video file (positional)")
-    parser.add_argument("output_path", nargs="?", help="Path to output JSON (positional)")
-    parser.add_argument("--video", help="Path to video file")
-    parser.add_argument("--output", help="Path to output JSON")
-    parser.add_argument("--sample-interval", type=int, default=1, help="Process every N frames")
-    parser.add_argument("--model-complexity", type=int, default=1, choices=[0, 1, 2], help="Model complexity")
-    parser.add_argument("--test-frame", type=int, help="Test single frame only")
-    parser.add_argument("--uuid", default="", help="UUID for progress reporting")
-    args = parser.parse_args()
-
-    # Resolve positional vs flagged args
-    video_path = args.video or args.video_path
-    output_path = args.output or args.output_path
-    if not video_path or not output_path:
-        parser.error("video_path and output_path are required")
-    
-    print("=" * 70)
-    print("MediaPipe Holistic Processor")
-    print("=" * 70)
-    
-    processor = MediaPipeHolisticProcessor(
-        model_complexity=args.model_complexity,
-        refine_face_landmarks=True,
-    )
-    
-    if args.test_frame:
-        # Test single frame
-        print(f"\nTesting frame {args.test_frame}...")
-        
-        cap = cv2.VideoCapture(video_path)
-        cap.set(cv2.CAP_PROP_POS_FRAMES, args.test_frame - 1)
-        
-        ret, frame = cap.read()
-        cap.release()
-        
-        if ret:
-            person_data = processor.process_frame(frame)
-            
-            print("\n=== Results ===")
-            
-            if person_data["face_mesh"]:
-                face = person_data["face_mesh"]
-                print(f"\nFace Mesh: {face['num_landmarks']} landmarks")
-                print(f"  Eye: {face['eye_features']['eye_action']} (EAR: {face['eye_features']['avg_ear']})")
-                print(f"  Gaze: {face['eye_features']['gaze_direction']}")
-                print(f"  Mouth: {face['mouth_features']['mouth_action']} (MAR: {face['mouth_features']['mar']})")
-            
-            if person_data["pose"]:
-                pose = person_data["pose"]
-                print(f"\nPose: {pose['num_landmarks']} keypoints")
-                print(f"  Left arm: {pose['arm_features']['left_arm_action']} (angle: {pose['arm_features']['left_elbow_angle']}°)")
-                print(f"  Right arm: {pose['arm_features']['right_arm_action']} (angle: {pose['arm_features']['right_elbow_angle']}°)")
-                print(f"  Cross arms: {pose['arm_features']['cross_arms']}")
-                print(f"  Leg: {pose['leg_features']['leg_action']}")
-            
-            if person_data["hands"]["left"]:
-                hand = person_data["hands"]["left"]
-                print(f"\nLeft hand: {hand['num_landmarks']} keypoints")
-                print(f"  Gesture: {hand['gesture']}")
-                print(f"  Fingers extended: {hand['num_fingers_extended']}")
-            
-            if person_data["hands"]["right"]:
-                hand = person_data["hands"]["right"]
-                print(f"\nRight hand: {hand['num_landmarks']} keypoints")
-                print(f"  Gesture: {hand['gesture']}")
-                print(f"  Fingers extended: {hand['num_fingers_extended']}")
-        else:
-            print("❌ Cannot read frame")
-    else:
-        # Process entire video
-        processor.process_video(
-            video_path,
-            output_path,
-            args.sample_interval,
-            uuid=args.uuid,
-        )
-    
-    processor.close()
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/mediapipe_holistic_processor_v1.11.py
+++ b/scripts/mediapipe_holistic_processor_v1.11.py
@@ -1 +0,0 @@
-../v1.1/scripts/mediapipe_holistic_processor_v1.11.py
--- a/scripts/mediapipe_processor_v1.11.py
+++ b/scripts/mediapipe_processor_v1.11.py
@@ -1 +0,0 @@
-../v1.1/scripts/mediapipe_processor_v1.11.py
--- a/scripts/parent_chunk_5w1h.py
+++ b/scripts/parent_chunk_5w1h.py
@@ -1,381 +0,0 @@
-#!/opt/homebrew/bin/python3.11
-"""
-Story Processor V2.0 — Dual Pipeline: Story-based + LLM-based Parent-Child Summarization
-
-Pipeline 1 (Story): Template-based, instant, no LLM cost
-  → Parent story summary + Child story summary
-  → Embedding (Ollama nomic-embed) → pgvector
-  → BM25 (PostgreSQL tsvector) → full-text search
-
-Pipeline 2 (LLM): LLM-based summarization (Gemma4/Qwen when resources allow)
-  → Parent LLM summary + Child LLM summary
-  → Embedding → pgvector + BM25
-
-Both pipelines store into chunks table with distinct chunk_types:
-  story_parent, story_child, llm_parent, llm_child
-
-Usage:
-  python parent_chunk_5w1h.py --file-uuid <uuid> --mode story [--embed]
-  python parent_chunk_5w1h.py --file-uuid <uuid> --mode llm   [--embed]
-"""
-
-import json, os, sys, argparse, time, requests, psycopg2
-from collections import defaultdict
-from typing import Dict, List, Optional
-
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-
-DB_URL = os.getenv("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
-SCHEMA = os.getenv("DATABASE_SCHEMA", "dev")
-OUTPUT_DIR = os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")
-EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://localhost:11436/v1/embeddings")
-
-def load_speaker_map(file_uuid: str) -> dict:
-    """Load speaker→identity mapping from DB (generalized, not hardcoded)"""
-    try:
-        conn = psycopg2.connect(DB_URL)
-        cur = conn.cursor()
-        cur.execute("SET search_path TO %s, public", (SCHEMA,))
-        cur.execute(
-            "SELECT metadata->>'speaker_id', name FROM identities "
-            "WHERE metadata->>'speaker_id' IS NOT NULL"
-        )
-        spk_map = {}
-        for spk_id, name in cur.fetchall():
-            spk_map[spk_id] = (name, 0.85)  # default confidence from MAR
-        cur.close(); conn.close()
-        return spk_map if spk_map else DEFAULT_SPEAKER_MAP
-    except Exception:
-        return DEFAULT_SPEAKER_MAP
-
-# Default fallback (used when DB has no speaker mapping)
-DEFAULT_SPEAKER_MAP = {}
-
-CURRENT_VERSIONS = {
-    "asr": "faster-whisper/small/v1",
-    "asrx": "speechbrain/ecapa-tdnn/v1",
-    "cut": "pyscenedetect/default",
-    "yolo": "yolov5-coreml/v2",
-    "face_detection": "apple-vision/v2",
-    "face_embedding": "coreml-facenet/v2",
-    "speaker_binding": "mar-lip/v1",
-    "identity_clustering": "cosine-threshold/v1",
-    "story_agent": "template/v2.0",
-    "embedding_agent": "nomic-embed-768d/v1",
-}
-
-LLM_URL = os.getenv("MOMENTRY_LLM_URL", os.getenv("MOMENTRY_LLM_SUMMARY_URL", "http://127.0.0.1:8082/v1/chat/completions"))
-LLM_MODEL = os.getenv("MOMENTRY_LLM_SUMMARY_MODEL", "gemma4")
-
-
-def load_data(file_uuid: str) -> dict:
-    data = {}
-    for name in ["asr", "asrx", "cut"]:
-        path = os.path.join(OUTPUT_DIR, f"{file_uuid}.{name}.json")
-        data[name] = json.load(open(path)) if os.path.exists(path) else None
-    return data
-
-
-def build_child_chunks(data: dict, file_uuid: str) -> List[dict]:
-    """Group ASR sentences by CUT scene boundaries → parent/child structure."""
-    asr_segs = data["asr"].get("segments", []) if data["asr"] else []
-    asrx_segs = data["asrx"].get("segments", []) if data["asrx"] else []
-    cut_scenes = data["cut"].get("scenes", []) if data["cut"] else []
-
-    # Dynamically load speaker→identity mapping from DB
-    speaker_map = load_speaker_map(file_uuid)
-
-    if not cut_scenes:
-        max_t = max(
-            (asr_segs[-1].get("end", 0) if asr_segs else 0),
-            (asrx_segs[-1].get("end_time", 0) if asrx_segs else 0),
-        )
-        cut_scenes = [{"start_time": t, "end_time": min(t + 60, max_t)} for t in range(0, int(max_t) + 60, 60)]
-
-    scenes = []
-    for cs in cut_scenes:
-        s, e = cs["start_time"], cs["end_time"]
-
-        children = []
-        for seg_idx, seg in enumerate(asr_segs):
-            st, en = seg.get("start", 0), seg.get("end", 0)
-            text = seg.get("text", "").strip()
-            if st < s or en > e or not text: continue
-
-            spk_id = "unknown"
-            for ax in asrx_segs:
-                if ax["start_time"] <= st and ax["end_time"] >= en:
-                    spk_id = ax.get("speaker_id", "unknown"); break
-
-            spk_info = speaker_map.get(spk_id)
-            if spk_info:
-                character, spk_conf = spk_info
-            else:
-                character, spk_conf = spk_id, 0.0
-
-            children.append({
-                "start": st, "end": en, "text": text,
-                "speaker_id": spk_id, "speaker_name": character,
-                "speaker_confidence": spk_conf,
-                "chunk_id": f"{file_uuid}_{seg_idx}",
-            })
-
-        # Boundary overlap: even empty scenes get partial children
-        for seg_idx, seg in enumerate(asr_segs):
-            st, en = seg.get("start", 0), seg.get("end", 0)
-            text = seg.get("text", "").strip()
-            if not text: continue
-            if st >= s and en <= e: continue
-            if not (st < e and en > s): continue
-            
-            spk_id = "unknown"
-            for ax in asrx_segs:
-                if ax["start_time"] <= st and ax["end_time"] >= en:
-                    spk_id = ax.get("speaker_id", "unknown"); break
-            spk_info = speaker_map.get(spk_id)
-            if spk_info:
-                character, spk_conf = spk_info
-            else:
-                character, spk_conf = spk_id, 0.0
-            children.append({
-                "start": st, "end": en, "text": text,
-                "speaker_id": spk_id, "speaker_name": character,
-                "speaker_confidence": spk_conf,
-                "chunk_id": f"{file_uuid}_{seg_idx}",
-                "overlap_type": "partial",
-            })
-
-        if children:
-            scenes.append({
-                "start_time": s, "end_time": e, "duration": e - s,
-                "children": children, "child_count": len(children),
-            })
-    return scenes
-
-
-# ===== Pipeline 1: Story (Template) Summaries =====
-
-def generate_story_parent_summary(scene: dict) -> str:
-    children = scene["children"]
-    characters = sorted(set(c["speaker_name"] for c in children))
-    total_words = sum(len(c["text"].split()) for c in children)
-    by_speaker = defaultdict(list)
-    for c in children: by_speaker[c["speaker_name"]].append(c["text"])
-    speakers = []
-    for char, texts in sorted(by_speaker.items()):
-        speakers.append(f"{char} ({len(texts)} lines)")
-
-    return (
-        f"[{scene['start_time']:.0f}s-{scene['end_time']:.0f}s, {scene['duration']:.0f}s] "
-        f"Cast: {', '.join(characters)}. Total: {len(children)} lines, {total_words} words. "
-        f"Speakers: {' | '.join(speakers[:3])}"
-    )
-
-
-def generate_story_child_summary(child: dict, parent_summary: str) -> str:
-    return (
-        f"[{child['start']:.0f}s-{child['end']:.0f}s] "
-        f"{child['speaker_name']}: \"{child['text']}\""
-    )
-
-
-# ===== Pipeline 2: LLM Summaries (requires LLM server) =====
-
-def generate_llm_parent_summary(scene: dict, max_scenes_processed: int) -> Optional[str]:
-    """LLM-based parent summary"""
-    if not LLM_URL: return None
-    children = scene["children"]
-    dialogue = "\n".join(
-        f"[{c['start']:.0f}s] {c['speaker_name']}: {c['text'][:150]}"
-        for c in children[:15]
-    )
-    prompt = (
-        "You are a film analyst. Summarize this scene in one flowing paragraph (60-100 words). "
-        "Include: who is present, what they discuss, tone/mood.\n\n"
-        f"Scene: {scene['start_time']:.0f}s - {scene['end_time']:.0f}s\n"
-        f"Dialogue:\n{dialogue}\n\nSummary:"
-    )
-    try:
-        resp = requests.post(LLM_URL, json={
-            "model": LLM_MODEL,
-            "messages": [{"role": "user", "content": prompt}],
-            "max_tokens": 200, "temperature": 0.3,
-        }, timeout=60)
-        return resp.json()["choices"][0]["message"]["content"].strip()
-    except Exception as e:
-        print(f"  ⚠️ LLM parent summary failed: {e}")
-        return None
-
-
-def generate_llm_child_summary(child: dict, parent_summary: str) -> Optional[str]:
-    """LLM-based child (sentence) summary"""
-    return f"[{child['start']:.0f}s-{child['end']:.0f}s] {child['speaker_name']}: \"{child['text']}\""
-
-
-# ===== Embedding (Ollama nomic-embed) =====
-
-def embed_text(text: str, max_retries: int = 3) -> Optional[List[float]]:
-    """Get embedding via EmbeddingGemma server"""
-    for attempt in range(max_retries):
-        try:
-            resp = requests.post(EMBEDDING_URL, json={
-                "input": [text],
-            }, timeout=30)
-            if resp.status_code == 200:
-                data = resp.json()
-                items = data.get("data", [])
-                if items:
-                    return items[0]["embedding"]
-        except Exception as e:
-            if attempt == max_retries - 1:
-                print(f"  ⚠️ Embedding failed: {e}")
-                return None
-            time.sleep(1)
-    return None
-
-
-# ===== DB Store (chunks table with embedding + BM25) =====
-
-def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool, conn):
-    """Store parent + child summaries into chunks table."""
-    cur = conn.cursor()
-    parent_type = f"{mode}_parent"
-    child_type = f"{mode}_child"
-
-    parent_count = 0
-    child_count = 0
-
-    # Get base chunk_index
-    cur.execute(
-        f"SELECT COALESCE(MAX(chunk_index), 0) FROM {SCHEMA}.chunk WHERE file_uuid = %s",
-        (file_uuid,),
-    )
-    next_index = (cur.fetchone()[0] or 0) + 1
-
-    for scene in scenes:
-        parent_text = generate_story_parent_summary(scene) if mode == "story" else generate_llm_parent_summary(scene, parent_count)
-        if not parent_text: continue
-
-        parent_id = f"{mode}_parent_{file_uuid}_{scene['start_time']:.0f}_{scene['end_time']:.0f}"
-
-        parent_embedding = embed_text(parent_text) if do_embed else None
-        if do_embed and parent_embedding:
-            cur.execute(
-                f"""
-                INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
-                                             start_time, end_time, content, text_content, parent_chunk_id, embedding)
-                VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector)
-                ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
-                    SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
-                        embedding = EXCLUDED.embedding
-                """,
-                (parent_id, parent_id, file_uuid, parent_type, next_index,
-                 scene["start_time"], scene["end_time"],
-                 json.dumps({"summary": parent_text, "mode": mode, "type": "parent",
-                             "source_versions": CURRENT_VERSIONS}),
-                 parent_text, None, parent_embedding),
-            )
-        else:
-            cur.execute(
-                f"""
-                INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
-                                             start_time, end_time, content, text_content, parent_chunk_id)
-                VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s)
-                ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
-                    SET content = EXCLUDED.content, text_content = EXCLUDED.text_content
-                """,
-                (parent_id, parent_id, file_uuid, parent_type, next_index,
-                 scene["start_time"], scene["end_time"],
-                 json.dumps({"summary": parent_text, "mode": mode, "type": "parent",
-                             "source_versions": CURRENT_VERSIONS}),
-                 parent_text, None),
-            )
-        next_index += 1
-        parent_count += 1
-
-        for child in scene["children"]:
-            child_id = child["chunk_id"]
-            child_text = generate_story_child_summary(child, parent_text) if mode == "story" else generate_llm_child_summary(child, parent_text)
-
-            child_embedding = embed_text(child_text) if do_embed else None
-            if do_embed and child_embedding:
-                cur.execute(
-                    f"""
-                    INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
-                                                 start_time, end_time, content, text_content, parent_chunk_id, embedding)
-                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector)
-                    ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
-                        SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
-                            parent_chunk_id = EXCLUDED.parent_chunk_id,
-                            embedding = EXCLUDED.embedding
-                    """,
-                    (child_id, child_id, file_uuid, child_type, next_index,
-                     child["start"], child["end"],
-                     json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode,
-                                 "speaker_confidence": child.get("speaker_confidence", 0),
-                                 "source_versions": CURRENT_VERSIONS}),
-                     child_text, parent_id, child_embedding),
-                )
-            else:
-                cur.execute(
-                    f"""
-                    INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
-                                                 start_time, end_time, content, text_content, parent_chunk_id)
-                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s)
-                    ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
-                        SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
-                            parent_chunk_id = EXCLUDED.parent_chunk_id
-                    """,
-                    (child_id, child_id, file_uuid, child_type, next_index,
-                     child["start"], child["end"],
-                     json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode,
-                                 "speaker_confidence": child.get("speaker_confidence", 0),
-                                 "source_versions": CURRENT_VERSIONS}),
-                     child_text, parent_id),
-                )
-            next_index += 1
-            child_count += 1
-
-    conn.commit()
-    cur.close()
-    return parent_count, child_count
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Story Processor V2.0")
-    parser.add_argument("--file-uuid", required=True)
-    parser.add_argument("--mode", choices=["story", "llm"], default="story")
-    parser.add_argument("--max-scenes", type=int, default=99999)
-    parser.add_argument("--embed", action="store_true", help="Generate embeddings (Ollama)")
-    parser.add_argument("--no-db", action="store_true", help="Skip DB storage")
-    args = parser.parse_args()
-
-    file_uuid = args.file_uuid
-    print(f"[STORY] Mode: {args.mode}, Embed: {args.embed}")
-
-    data = load_data(file_uuid)
-    if not data["asr"]:
-        print("[STORY] ❌ No ASR data"); return
-
-    scenes = build_child_chunks(data, file_uuid)[:args.max_scenes]
-    total_children = sum(s["child_count"] for s in scenes)
-    print(f"[STORY] {len(scenes)} scenes, {total_children} child chunks")
-
-    if not args.no_db:
-        conn = psycopg2.connect(DB_URL)
-        try:
-            pc, cc = store_chunks(file_uuid, scenes, args.mode, args.embed, conn)
-            print(f"[STORY] DB: {pc} parent, {cc} child chunks ({args.mode})")
-        finally:
-            conn.close()
-
-    # Save JSON output
-    out_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.story_{args.mode}.json")
-    out_data = {"file_uuid": file_uuid, "mode": args.mode, "scenes": scenes}
-    with open(out_path, "w") as f:
-        json.dump(out_data, f, indent=2, ensure_ascii=False, default=str)
-    print(f"[STORY] ✅ {out_path}")
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/parent_chunk_5w1h_v1.11.py
+++ b/scripts/parent_chunk_5w1h_v1.11.py
@@ -1 +0,0 @@
-../v1.1/scripts/parent_chunk_5w1h_v1.11.py
--- a/scripts/rebuild_story_content.py
+++ b/scripts/rebuild_story_content.py
@@ -1,320 +0,0 @@
-#!/opt/homebrew/bin/python3.11
-"""
-Rebuild story chunk text_content and regenerates summaries using new ASRX speaker assignments.
-Then updates Qdrant momentry_dev_stories and sentence_story/sentence_summary collections.
-"""
-
-import json, sys, time, urllib.request
-from urllib.request import Request, urlopen
-import psycopg2
-
-UUID = "aeed71342a899fe4b4c57b7d41bcb692"
-DB_URL = "postgresql://accusys@localhost:5432/momentry?host=/tmp"
-QDRANT_URL = "http://localhost:6333"
-LLM_URL = "http://localhost:8082/v1/chat/completions"
-EMBED_URL = "http://localhost:11436/v1/embeddings"
-
-def call_llm(dialogue_text):
-    prompt = f"Dialogue:\n{dialogue_text}\n\n50-word summary:"
-    body = json.dumps({"model": "google_gemma-4-26B-A4B-it-Q5_K_M.gguf",
-        "messages": [{"role": "user", "content": prompt}],
-        "temperature": 0.1, "max_tokens": 100}).encode()
-    req = Request(LLM_URL, data=body, headers={"Content-Type": "application/json"})
-    try:
-        resp = urlopen(req, timeout=120)
-        return json.loads(resp.read())["choices"][0]["message"]["content"].strip()
-    except Exception as e:
-        print(f"    LLM error: {e}")
-        return ""
-
-def call_embed(text):
-    body = json.dumps({"input": text}).encode()
-    req = Request(EMBED_URL, data=body, headers={"Content-Type": "application/json"})
-    try:
-        resp = urlopen(req, timeout=30)
-        return json.loads(resp.read())["data"][0]["embedding"]
-    except Exception as e:
-        print(f"    Embed error: {e}")
-        return [0.0] * 768
-
-print("=== Step 1: Load sentence chunks with new speaker info ===")
-conn = psycopg2.connect(DB_URL)
-cur = conn.cursor()
-
-cur.execute("""
-    SELECT chunk_index, text_content, metadata->>'new_speaker_name',
-           metadata->>'speaker_name', content
-    FROM dev.chunks
-    WHERE file_uuid = %s AND chunk_type = 'sentence'
-    ORDER BY chunk_index
-""", (UUID,))
-sentence_rows = cur.fetchall()
-print(f"Loaded {len(sentence_rows)} sentence chunks")
-
-# Build lookup
-sentences = {}
-for r in sentence_rows:
-    idx, old_text, new_name, old_name, content = r
-    sentences[idx] = {
-        "old_text": old_text or "",
-        "new_name": new_name or old_name or "Unknown",
-        "old_name": old_name or "Unknown",
-        "content": content or {},
-    }
-
-# Rebuild sentence text_content with new speaker names
-print("\n=== Step 2: Rebuild sentence text_content ===")
-updated_sentences = 0
-for r in sentence_rows:
-    idx, old_text, new_name, old_name, content = r
-    new_name = new_name or old_name or "Unknown"
-    
-    # Extract the text part (remove old speaker prefix if exists)
-    raw_text = ""
-    if content and isinstance(content, dict):
-        raw_text = content.get("data", {}).get("text", "")
-    if not raw_text and old_text:
-        # Parse old format: [Speaker] text
-        import re
-        m = re.search(r'\]\s*(.*)', old_text)
-        if m:
-            raw_text = m.group(1)
-        else:
-            raw_text = old_text
-    
-    new_text = f"[{new_name}] {raw_text}"
-    
-    cur.execute("""
-        UPDATE dev.chunks
-        SET text_content = %s, updated_at = NOW()
-        WHERE file_uuid = %s AND chunk_type = 'sentence' AND chunk_index = %s
-    """, (new_text, UUID, idx))
-    updated_sentences += 1
-
-conn.commit()
-print(f"Updated {updated_sentences} sentence chunks text_content")
-
-print("\n=== Step 3: Rebuild story chunk text_content ===")
-cur.execute("""
-    SELECT id, chunk_id, chunk_index, child_chunk_ids, start_time, end_time,
-           text_content, summary_text
-    FROM dev.chunks
-    WHERE file_uuid = %s AND chunk_type = 'story'
-    ORDER BY chunk_index
-""", (UUID,))
-story_rows = cur.fetchall()
-print(f"Loaded {len(story_rows)} story chunks")
-
-# Build child text per story chunk
-story_dialogue_texts = []
-for r in story_rows:
-    db_id, cid, idx, child_ids, st, et, old_text, old_summary = r
-    
-    dialogue_parts = []
-    for child_cid in (child_ids or []):
-        parts = child_cid.split("_")
-        child_idx = int(parts[-1])
-        if child_idx in sentences:
-            s = sentences[child_idx]
-            raw = ""
-            if s["content"] and isinstance(s["content"], dict):
-                raw = s["content"].get("data", {}).get("text", "")
-            if not raw:
-                import re
-                m = re.search(r'\]\s*(.*)', s["old_text"])
-                if m:
-                    raw = m.group(1)
-                else:
-                    raw = s["old_text"]
-            if raw:
-                dialogue_parts.append(f'({s["new_name"]}) {raw}')
-    
-    dialogue_text = " ".join(dialogue_parts)
-    story_dialogue_texts.append((db_id, cid, idx, st, et, dialogue_text, old_summary))
-
-print(f"Built {len(story_dialogue_texts)} story dialogue texts")
-
-# Update DB with new text_content (dialogue only, not summary yet)
-for item in story_dialogue_texts:
-    db_id, cid, idx, st, et, dialogue_text, old_summary = item
-    cur.execute("""
-        UPDATE dev.chunks
-        SET text_content = %s, updated_at = NOW()
-        WHERE id = %s
-    """, (dialogue_text, db_id))
-
-conn.commit()
-print("Updated story chunk dialogue texts")
-
-print("\n=== Step 4: Generate LLM summaries (all 228 stories) ===")
-summaries = []
-for i, item in enumerate(story_dialogue_texts):
-    db_id, cid, idx, st, et, dialogue_text, old_summary = item
-    
-    if len(dialogue_text) < 10:
-        summary = "[no dialogue]"
-        embedding = [0.0] * 768
-    else:
-        print(f"  [{i+1}/{len(story_dialogue_texts)}] {cid}: {len(dialogue_text)} chars", end="")
-        try:
-            summary = call_llm(dialogue_text[:3000])
-            print(f" -> {len(summary)} chars")
-            time.sleep(0.3)
-            embedding = call_embed(summary)
-        except Exception as e:
-            print(f" ERROR: {e}")
-            summary = "[error]"
-            embedding = [0.0] * 768
-    
-    # Update DB
-    s_esc = summary.replace("'", "''")
-    cur.execute(f"""
-        UPDATE dev.chunks
-        SET summary_text = '{s_esc}', updated_at = NOW()
-        WHERE id = {db_id}
-    """)
-    
-    summaries.append({
-        "db_id": db_id,
-        "chunk_id": cid,
-        "chunk_index": idx,
-        "start_time": st,
-        "end_time": et,
-        "dialogue": dialogue_text,
-        "summary": summary,
-        "embedding": embedding,
-    })
-
-conn.commit()
-print(f"\nGenerated {len(summaries)} summaries")
-
-print("\n=== Step 5: Rebuild Qdrant momentry_dev_stories ===")
-# Delete existing
-req = Request(f"{QDRANT_URL}/collections/momentry_dev_stories", method="DELETE")
-try:
-    urlopen(req)
-    time.sleep(0.3)
-except:
-    pass
-
-# Recreate
-req = Request(f"{QDRANT_URL}/collections/momentry_dev_stories",
-    data=json.dumps({"vectors": {"size": 768, "distance": "Cosine"}}).encode(),
-    headers={"Content-Type": "application/json"}, method="PUT")
-urlopen(req)
-time.sleep(0.3)
-
-# Upload dialogue points (0..227) and summary points (228..455)
-dialogue_points = []
-summary_points = []
-for s in summaries:
-    idx = s["chunk_index"]
-    dialogue_points.append({
-        "id": idx + 1,
-        "vector": [0.0] * 768,
-        "payload": {
-            "chunk_id": s["chunk_id"],
-            "file_uuid": UUID,
-            "start_time": s["start_time"],
-            "end_time": s["end_time"],
-            "type": "story_dialogue",
-            "text": s["dialogue"][:500],
-        }
-    })
-    summary_points.append({
-        "id": idx + 1 + 228,
-        "vector": s["embedding"],
-        "payload": {
-            "chunk_id": s["chunk_id"],
-            "file_uuid": UUID,
-            "start_time": s["start_time"],
-            "end_time": s["end_time"],
-            "type": "story_summary",
-            "summary": s["summary"],
-        }
-    })
-
-all_story_points = dialogue_points + summary_points
-
-batch_size = 100
-for start in range(0, len(all_story_points), batch_size):
-    batch = all_story_points[start:start+batch_size]
-    req = Request(f"{QDRANT_URL}/collections/momentry_dev_stories/points?wait=true",
-        data=json.dumps({"points": batch}).encode(),
-        headers={"Content-Type": "application/json"}, method="PUT")
-    try:
-        urlopen(req)
-    except Exception as e:
-        print(f"  Batch {start}: {e}")
-    if (start // batch_size) % 3 == 0:
-        print(f"  Uploaded {start + len(batch)}/{len(all_story_points)}")
-
-print(f"Uploaded {len(all_story_points)} points to momentry_dev_stories")
-
-print("\n=== Step 6: Populate sentence_story and sentence_summary ===")
-# These are the per-sentence template + summary collections
-# sentence_story: 3417 points, 768D, template payloads
-# sentence_summary: 3417 points, 768D, LLM summary payloads
-
-for col_name in ["sentence_story", "sentence_summary"]:
-    req = Request(f"{QDRANT_URL}/collections/{col_name}", method="DELETE")
-    try:
-        urlopen(req)
-        time.sleep(0.2)
-    except:
-        pass
-    
-    req = Request(f"{QDRANT_URL}/collections/{col_name}",
-        data=json.dumps({"vectors": {"size": 768, "distance": "Cosine"}}).encode(),
-        headers={"Content-Type": "application/json"}, method="PUT")
-    urlopen(req)
-    time.sleep(0.2)
-
-# Build points for sentence_story and sentence_summary
-story_sentence_points = []
-summary_sentence_points = []
-for idx in sorted(sentences.keys()):
-    s = sentences[idx]
-    raw_text = ""
-    if s["content"] and isinstance(s["content"], dict):
-        raw_text = s["content"].get("data", {}).get("text", "")
-    
-    dialog_line = f'({s["new_name"]}) {raw_text}'
-    
-    story_sentence_points.append({
-        "id": idx + 1,
-        "vector": [0.0] * 768,
-        "payload": {
-            "chunk_id": f"{UUID}_{idx}",
-            "file_uuid": UUID,
-            "start_time": 0,
-            "end_time": 0,
-            "text": dialog_line,
-            "speaker_name": s["new_name"],
-            "chunk_type": "sentence",
-        }
-    })
-
-# Upload sentence_story (dialogue template)
-batch_size = 200
-for start in range(0, len(story_sentence_points), batch_size):
-    batch = story_sentence_points[start:start+batch_size]
-    req = Request(f"{QDRANT_URL}/collections/sentence_story/points?wait=true",
-        data=json.dumps({"points": batch}).encode(),
-        headers={"Content-Type": "application/json"}, method="PUT")
-    try:
-        urlopen(req)
-    except Exception as e:
-        print(f"  sentence_story batch {start}: {e}")
-    if (start // batch_size) % 5 == 0:
-        print(f"  Uploaded {start + len(batch)}/3417 sentence_story")
-
-print("Uploaded sentence_story points")
-
-# sentence_summary will be populated when we generate per-sentence summaries
-# For now, mark as TODO
-print("sentence_summary: SKIPPED (needs per-sentence LLM summaries)")
-
-cur.close()
-conn.close()
-print("\n=== Done ===")
--- a/scripts/rebuild_story_content_v1.11.py
+++ b/scripts/rebuild_story_content_v1.11.py
@@ -1 +0,0 @@
-../v1.1/scripts/rebuild_story_content_v1.11.py
--- a/scripts/regenerate_parent_5w1h.py
+++ b/scripts/regenerate_parent_5w1h.py
@@ -1,197 +0,0 @@
-#!/opt/homebrew/bin/python3.11
-"""
-Regenerate parent chunk summaries using 5W1H multi-dimensional structure via gemma4.
-
-5W1H Structure:
- Who: Main characters/people involved
- What: Key actions/events
- When: Temporal context (sequence in story)
- Where: Location/setting
- Why: Motivation/conflict driving the scene
- How: Emotional tone/manner of events
-"""
-
-import json
-import requests
-import psycopg2
-import psycopg2.extras
-
-DB_CONFIG = {"host": "localhost", "user": "accusys", "dbname": "momentry"}
-UUID = "384b0ff44aaaa1f1"
-LLAMA_URL = "http://127.0.0.1:8081/v1/chat/completions"
-
-
-def get_parent_with_children():
-    """Get all parent chunks with their child chunk texts"""
-    conn = psycopg2.connect(**DB_CONFIG)
-    cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
-
-    cur.execute(
-        """
-        SELECT pc.id, pc.scene_order, pc.start_time, pc.end_time,
-               pc.start_frame, pc.end_frame, pc.fps, pc.summary_text as old_summary,
-               pc.metadata,
-               ARRAY_AGG(c.text_content ORDER BY c.start_time) as child_texts
-        FROM parent_chunks pc
-        LEFT JOIN chunks c ON c.parent_chunk_id = pc.id::varchar
-        WHERE pc.uuid = %s
-        GROUP BY pc.id, pc.scene_order, pc.start_time, pc.end_time,
-                 pc.start_frame, pc.end_frame, pc.fps, pc.summary_text, pc.metadata
-        ORDER BY pc.scene_order
-    """,
-        (UUID,),
-    )
-
-    parents = cur.fetchall()
-    cur.close()
-    conn.close()
-    return parents
-
-
-def call_gemma4(prompt, max_tokens=1500):
-    """Call Gemma4 via llama-server OpenAI-compatible API"""
-    payload = {
-        "messages": [{"role": "user", "content": prompt}],
-        "max_tokens": max_tokens,
-        "temperature": 0.3,
-        "min_p": 0.1,
-    }
-    try:
-        resp = requests.post(LLAMA_URL, json=payload, timeout=180)
-        if resp.status_code == 200:
-            result = resp.json()
-            content = (
-                result.get("choices", [{}])[0]
-                .get("message", {})
-                .get("content", "")
-                .strip()
-            )
-            return content
-    except Exception as e:
-        print(f"    ⚠️  llama-server error: {e}")
-    return ""
-
-
-def generate_5w1h_summary(parent, scene_num):
-    """Generate 5W1H structured summary using gemma4"""
-    texts = [t for t in (parent["child_texts"] or []) if t]
-    if not texts:
-        return None
-
-    # Use only first 3 and last 3 dialogue lines for context (much faster)
-    sample_texts = texts[:3] + ["..."] + texts[-3:] if len(texts) > 6 else texts
-    combined = "\n".join(sample_texts)[:1500]
-    duration = parent["end_time"] - parent["start_time"]
-
-    prompt = f"""You are a film scene analyst. Analyze this scene and provide 5W1H analysis.
-
-Scene {scene_num}/17 | {duration:.0f}s | {len(texts)} dialogue lines
-
-Key dialogue:
-{combined}
-
-Respond with ONLY this JSON:
-{{"summary_5lines":"...","who":"...","what":"...","when":"...","where":"...","why":"...","how":"...","characters":[],"tone":[],"key_events":[]}}
-IMPORTANT: "summary_5lines" must be EXACTLY 5 lines describing the scene. Each line should be a complete sentence separated by \\n."""
-
-    response = call_gemma4(prompt, max_tokens=2000)
-
-    if not response:
-        return None
-
-    # Simple JSON extraction: find first { and last }
-    try:
-        start = response.find("{")
-        end = response.rfind("}") + 1
-        if start >= 0 and end > start:
-            return json.loads(response[start:end])
-    except Exception:
-        pass
-
-    return None
-
-
-def update_parent_chunk(parent, analysis):
-    """Update parent chunk with 5W1H structured data"""
-    if not analysis:
-        return False
-
-    conn = psycopg2.connect(**DB_CONFIG)
-    cur = conn.cursor()
-
-    # Create structured summary text (5 lines)
-    structured_text = f"{analysis.get('summary_5lines', '')}"
-
-    # Update metadata with full 5W1H structure
-    metadata = parent["metadata"] if parent["metadata"] else {}
-    metadata["auto_generated_by"] = "gemma4"
-    metadata["chunk_count"] = len(parent["child_texts"] or [])
-    metadata["structured_summary"] = {
-        "summary_5lines": analysis.get("summary_5lines", ""),
-        "who": analysis.get("who", ""),
-        "what": analysis.get("what", ""),
-        "when": analysis.get("when", ""),
-        "where": analysis.get("where", ""),
-        "why": analysis.get("why", ""),
-        "how": analysis.get("how", ""),
-        "characters": analysis.get("characters", []),
-        "tone": analysis.get("tone", []),
-        "key_events": analysis.get("key_events", []),
-    }
-
-    cur.execute(
-        """
-        UPDATE parent_chunks
-        SET summary_text = %s,
-            metadata = %s::jsonb
-        WHERE id = %s
-    """,
-        (structured_text, json.dumps(metadata, ensure_ascii=False), parent["id"]),
-    )
-
-    conn.commit()
-    cur.close()
-    conn.close()
-    return True
-
-
-def main():
-    print(f"🎬 Regenerating 5W1H summaries for {UUID}")
-    print(f"   Using llama.cpp server at {LLAMA_URL}")
-    print("=" * 70)
-
-    parents = get_parent_with_children()
-    print(f"📥 Found {len(parents)} parent chunks")
-
-    success_count = 0
-    for i, parent in enumerate(parents):
-        duration = parent["end_time"] - parent["start_time"]
-        text_count = len(parent["child_texts"] or [])
-        print(
-            f"\n🎬 Scene {parent['scene_order']}: {parent['start_time']:.0f}s-{parent['end_time']:.0f}s ({duration:.0f}s, {text_count} chunks)"
-        )
-        if parent["old_summary"]:
-            print(f"   Old: {parent['old_summary'][:80]}...")
-
-        analysis = generate_5w1h_summary(parent, parent["scene_order"])
-
-        if analysis:
-            summary = analysis.get("summary_5lines", "N/A")
-            print(f"   ✅ Summary: {summary[:100]}...")
-            print(f"   👤 Who: {analysis.get('who', 'N/A')[:60]}")
-            print(f"   📍 Where: {analysis.get('where', 'N/A')[:60]}")
-            print(f"   💡 Why: {analysis.get('why', 'N/A')[:60]}")
-
-            if update_parent_chunk(parent, analysis):
-                success_count += 1
-        else:
-            print("   ❌ Failed to generate analysis")
-
-    print(f"\n{'=' * 70}")
-    print(
-        f"✅ Updated {success_count}/{len(parents)} parent chunks with 5W1H summaries"
-    )
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/regenerate_parent_5w1h_v1.11.py
+++ b/scripts/regenerate_parent_5w1h_v1.11.py
@@ -1 +0,0 @@
-../v1.1/scripts/regenerate_parent_5w1h_v1.11.py
--- a/scripts/store_traced_faces.py
+++ b/scripts/store_traced_faces.py
@@ -39,140 +39,8 @@ def get_conn():


 def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict:
-    """Merge traces within the same cut if they have similar embeddings (same person re-appeared)."""
-    frames = face_data.get("frames", {})
-    if not frames:
-        return face_data
-
-    # Map each frame to its scene/cut number
-    frame_to_scene = {}
-    for s in cut_scenes:
-        for f in range(s["start_frame"], s["end_frame"] + 1):
-            frame_to_scene[f] = s["scene_number"]
-
-    # Collect per-trace data: scene numbers, embeddings, face positions
-    trace_frames = defaultdict(list)
-    trace_embeddings = defaultdict(list)
-    trace_poses = {}
-
-    for fnum_str, frm_data in frames.items():
-        fnum = int(fnum_str)
-        for face in frm_data.get("faces", []):
-            tid = face.get("trace_id")
-            if tid is None:
-                continue
-            trace_frames[tid].append(fnum)
-            emb = face.get("embedding")
-            if emb is not None:
-                trace_embeddings[tid].append(emb)
-            if tid not in trace_poses:
-                trace_poses[tid] = (
-                    face.get("x", 0),
-                    face.get("y", 0),
-                    face.get("width", 0),
-                    face.get("height", 0),
-                )
-
-    if len(trace_embeddings) < 2:
-        return face_data
-
-    # Compute centroid per trace
-    trace_centroids = {}
-    for tid, embs in trace_embeddings.items():
-        centroid = np.mean(embs, axis=0)
-        norm = np.linalg.norm(centroid)
-        trace_centroids[tid] = centroid / norm if norm > 0 else centroid
-
-    # Determine which scene each trace belongs to (majority of frames)
-    trace_scene = {}
-    for tid, fns in trace_frames.items():
-        scene_votes = defaultdict(int)
-        for fn in fns:
-            scene = frame_to_scene.get(fn, -1)
-            scene_votes[scene] += 1
-        trace_scene[tid] = max(scene_votes, key=scene_votes.get) if scene_votes else -1
-
-    # Within each scene, merge traces with similar centroids
-    scene_traces = defaultdict(list)
-    for tid, scene in trace_scene.items():
-        if scene >= 0 and tid in trace_centroids:
-            scene_traces[scene].append(tid)
-
-    merged = 0
-    next_new_id = max(trace_frames.keys()) + 1 if trace_frames else 0
-    SIMILARITY_THRESHOLD = 0.75
-
-    for scene, tids in scene_traces.items():
-        if len(tids) < 2:
-            continue
-        used = set()
-        for i in range(len(tids)):
-            if tids[i] in used:
-                continue
-            keep_tid = tids[i]
-            for j in range(i + 1, len(tids)):
-                if tids[j] in used:
-                    continue
-                sim = float(np.dot(trace_centroids[tids[i]], trace_centroids[tids[j]]))
-                if sim >= SIMILARITY_THRESHOLD:
-                    # Merge tids[j] into keep_tid
-                    for fnum_str, frm_data in frames.items():
-                        for face in frm_data.get("faces", []):
-                            if face.get("trace_id") == tids[j]:
-                                face["trace_id"] = keep_tid
-                    used.add(tids[j])
-                    merged += 1
-
-    # If any merges happened, rebuild trace metadata
-    if merged > 0:
-        # Rebuild traces dict
-        new_traces = {}
-        new_trace_frames = defaultdict(list)
-        for fnum_str, frm_data in frames.items():
-            fnum = int(fnum_str)
-            for face in frm_data.get("faces", []):
-                tid = face.get("trace_id")
-                if tid is not None:
-                    new_trace_frames[tid].append(
-                        {
-                            "frame": fnum,
-                            "face_index": 0,
-                            "bbox": {
-                                "x": face.get("x", 0),
-                                "y": face.get("y", 0),
-                                "width": face.get("width", 0),
-                                "height": face.get("height", 0),
-                            },
-                            "confidence": face.get("confidence", 0.0),
-                        }
-                    )
-
-        for tid, path in new_trace_frames.items():
-            if len(path) >= 1:
-                frames_sorted = sorted(set(p["frame"] for p in path))
-                new_traces[str(tid)] = {
-                    "trace_id": tid,
-                    "start_frame": frames_sorted[0],
-                    "end_frame": frames_sorted[-1],
-                    "duration_frames": frames_sorted[-1] - frames_sorted[0] + 1,
-                    "duration_seconds": (frames_sorted[-1] - frames_sorted[0])
-                    / face_data.get("metadata", {}).get("fps", 25.0),
-                    "total_appearances": len(path),
-                    "path": path,
-                }
-
-        face_data["traces"] = new_traces
-        face_data["metadata"]["trace_stats"] = {
-            "total_traces": len(new_traces),
-            "active_traces": len(new_traces),
-            "long_traces": len(
-                [t for t in new_traces.values() if t["duration_frames"] >= 2]
-            ),
-        }
-        print(
-            f"[TRACE] Post-merge: {merged} traces merged, {len(new_traces)} total traces"
-        )
-
+    """Merge traces within the same cut - DISABLED (no embeddings)."""
+    # TODO: Reimplement with Qdrant _faces collection
    return face_data


@@ -235,57 +103,12 @@ def run_face_tracker(

    print(f"[TRACE] Processing {len(face_data.get('frames', {}))} frames")

-    # Load embeddings from DB for the face tracker
+    # Embeddings no longer loaded from DB - use IoU-only tracking
    file_uuid = (
        face_json_path.split("/")[-1]
        .replace(".face.json", "")
        .replace("_traced.json", "")
    )
-    try:
-        conn = get_conn()
-        cur = conn.cursor()
-        cur.execute(
-            f"""
-            SELECT frame_number, x, y, width, height, embedding
-            FROM {SCHEMA}.face_detections
-            WHERE file_uuid = %s AND embedding IS NOT NULL
-        """,
-            (file_uuid,),
-        )
-        emb_rows = cur.fetchall()
-        conn.close()
-        # Build lookup: frame_number → list of (bbox, embedding)
-        emb_map = {}
-        for fn, x, y, w, h, emb in emb_rows:
-            emb_map.setdefault(fn, []).append(((x, y, w, h), emb))
-        print(f"[TRACE] Loaded {len(emb_rows)} embeddings from DB")
-
-        # Attach embeddings to face data
-        attached = 0
-        for fnum_str, frm_data in face_data.get("frames", {}).items():
-            fnum = int(fnum_str)
-            for face in frm_data.get("faces", []):
-                x, y, w, h = (
-                    face.get("x", 0),
-                    face.get("y", 0),
-                    face.get("width", 0),
-                    face.get("height", 0),
-                )
-                candidates = emb_map.get(fnum, [])
-                # Find matching embedding by bbox proximity
-                for (ex, ey, ew, eh), emb in candidates:
-                    if (
-                        abs(x - ex) < 10
-                        and abs(y - ey) < 10
-                        and abs(w - ew) < 10
-                        and abs(h - eh) < 10
-                    ):
-                        face["embedding"] = emb
-                        attached += 1
-                        break
-        print(f"[TRACE] Attached {attached} embeddings to faces")
-    except Exception as e:
-        print(f"[TRACE] WARNING: Could not load embeddings: {e}")

    # Load cut boundaries from cut.json (same directory as face.json)
    cut_boundaries = None
@@ -301,7 +124,7 @@ def run_face_tracker(
        print(f"[TRACE] Loaded {len(cut_boundaries)} cut boundaries")

    face_data = track_faces(
-        face_data, use_embedding=True, cut_boundaries=cut_boundaries
+        face_data, use_embedding=False, cut_boundaries=cut_boundaries
    )

    # Merge traces within same cut (same person re-appearing after occlusion/pose change)
@@ -309,7 +132,7 @@ def run_face_tracker(
        face_data = merge_traces_within_cuts(face_data, cut_scenes)

    metadata = face_data.get("metadata", {})
-    metadata["tracking_method"] = "iou_embedding"
+    metadata["tracking_method"] = "iou_only"
    metadata["tracked_at"] = datetime.now().isoformat()
    face_data["metadata"] = metadata

@@ -350,22 +173,19 @@ def store_traced_faces(file_uuid: str, traced_json_path: str, schema: str = SCHE
            if face_id is None:
                face_id = f"face_{trace_id}"
            attributes = face.get("attributes")
-            embedding = face.get("embedding")

            bbox = json.dumps({"x": x, "y": y, "width": w, "height": h})
-            embed_vec = embedding if embedding and len(embedding) > 0 else None

            try:
                cur.execute(
                    f"""
                    UPDATE {schema}.face_detections
-                    SET trace_id = %s, embedding = %s, face_id = %s
+                    SET trace_id = %s, face_id = %s
                    WHERE file_uuid = %s AND frame_number = %s
                      AND x = %s AND y = %s AND width = %s AND height = %s
                    """,
                    (
                        trace_id,
-                        embed_vec,
                        face_id,
                        file_uuid,
                        frame_num,
--- a/scripts/story_embed.py
+++ b/scripts/story_embed.py
@@ -1,87 +0,0 @@
-#!/opt/homebrew/bin/python3.11
-"""
-Story Embedding Pipeline:
-1. Read story chunks → LLM summary (Gemma4)
-2. Embed summary (EmbeddingGemma)
-3. Store in chunks table + Qdrant
-"""
-
-import json, urllib.request, subprocess, sys, time, os
-
-UUID = "aeed71342a899fe4b4c57b7d41bcb692"
-PSQL = ["/Users/accusys/pgsql/18.3/bin/psql", "-U", "accusys", "-d", "momentry", "-t", "-A"]
-LLM_URL = "http://localhost:8082/v1/chat/completions"
-EMBED_URL = "http://localhost:11436/v1/embeddings"
-QDRANT_URL = "http://localhost:6333"
-QDRANT_COL = "momentry_dev_stories"
-
-def psql(sql):
-    r = subprocess.run(PSQL + ["-c", sql], capture_output=True, text=True, timeout=30)
-    return r.stdout.strip()
-
-def call_llm(dialogue):
-    prompt = f"Dialogue: {dialogue}\n\n50-word summary:"
-    body = json.dumps({"model": "google_gemma-4-26B-A4B-it-Q5_K_M.gguf",
-        "messages": [{"role": "user", "content": prompt}],
-        "temperature": 0.1, "max_tokens": 100}).encode()
-    req = urllib.request.Request(LLM_URL, data=body, headers={"Content-Type": "application/json"})
-    resp = urllib.request.urlopen(req, timeout=120)
-    return json.loads(resp.read())["choices"][0]["message"]["content"].strip()
-
-def call_embed(text):
-    body = json.dumps({"input": text}).encode()
-    req = urllib.request.Request(EMBED_URL, data=body, headers={"Content-Type": "application/json"})
-    resp = urllib.request.urlopen(req, timeout=30)
-    return json.loads(resp.read())["data"][0]["embedding"]
-
-# Step 0: Ensure Qdrant collection exists (768 dims)
-subprocess.run(["curl", "-s", "-X", "PUT", f"{QDRANT_URL}/collections/{QDRANT_COL}",
-    "-H", "Content-Type: application/json",
-    "-d", '{"vectors":{"size":768,"distance":"Cosine"}}'], capture_output=True)
-
-# Step 1: Get all story chunks that need summaries
-lines = [l for l in psql(f"SELECT chunk_id, chunk_index, start_time, end_time, text_content FROM dev.chunks WHERE file_uuid='{UUID}' AND chunk_type='story' AND (summary_text IS NULL OR summary_text = '') ORDER BY chunk_index").split('\n') if l.strip() and '|' in l]
-
-print(f"Chunks to process: {len(lines)}")
-total = len(lines)
-errors = 0
-
-for i, line in enumerate(lines):
-    parts = line.split('|', 4)
-    cid, idx, st, et, dialogue = parts[0].strip(), int(parts[1]), float(parts[2]), float(parts[3]), parts[4] if len(parts) > 4 else ""
-    
-    if len(dialogue) < 10:
-        summary = "[no dialogue]"
-        embedding = [0.0] * 768
-    else:
-        try:
-            summary = call_llm(dialogue)
-            time.sleep(0.3)
-            embedding = call_embed(summary)
-        except Exception as e:
-            print(f"[{i+1}/{total}] Error: {cid} - {e}")
-            errors += 1
-            summary = "[error]"
-            embedding = [0.0] * 768
-    
-    # Update DB
-    s_esc = summary.replace("'", "''")
-    psql(f"UPDATE dev.chunks SET summary_text='{s_esc}', updated_at=CURRENT_TIMESTAMP WHERE chunk_id='{cid}'")
-    
-    # Store in Qdrant
-    point = json.dumps({"points": [{"id": idx + 1, "vector": embedding,
-        "payload": {"chunk_id": cid, "file_uuid": UUID, "start_time": st, "end_time": et,
-                     "summary": summary, "type": "story_summary"}
-    }]}).encode()
-    req = urllib.request.Request(f"{QDRANT_URL}/collections/{QDRANT_COL}/points?wait=true",
-        data=point, headers={"Content-Type": "application/json"}, method="PUT")
-    try:
-        urllib.request.urlopen(req, timeout=10)
-    except:
-        pass
-    
-    if (i+1) % 20 == 0:
-        print(f"[{i+1}/{total}] {errors} errors so far")
-
-print(f"\nDone. Processed: {total}, Errors: {errors}")
-print(f"Qdrant: {QDRANT_COL}")
--- a/scripts/story_embed_v1.11.py
+++ b/scripts/story_embed_v1.11.py
@@ -1 +0,0 @@
-../v1.1/scripts/story_embed_v1.11.py
--- a/scripts/story_pipeline_full.py
+++ b/scripts/story_pipeline_full.py
@@ -1,230 +0,0 @@
-#!/opt/homebrew/bin/python3.11
-"""
-Story Pipeline Full — Speaker + Story + Summary
-Step 1: Update sentence chunks with speaker name
-Step 2: Rebuild story chunks + re-embed
-Step 3: LLM summary × 228 + embed
-"""
-
-import json, urllib.request, subprocess, sys, time, os
-
-UUID = "aeed71342a899fe4b4c57b7d41bcb692"
-DIR = "/Users/accusys/momentry/output_dev"
-PSQL = ["/Users/accusys/pgsql/18.3/bin/psql", "-U", "accusys", "-d", "momentry", "-t", "-A"]
-LLM_URL = "http://localhost:8082/v1/chat/completions"
-EMBED_URL = "http://localhost:11436/v1/embeddings"
-QDRANT_URL = "http://localhost:6333/collections/momentry_dev_stories/points"
-
-def psql(sql):
-    r = subprocess.run(PSQL + ["-c", sql], capture_output=True, text=True, timeout=30)
-    return r.stdout.strip()
-
-def psql_file(path):
-    r = subprocess.run(PSQL + ["-f", path], capture_output=True, text=True, timeout=60)
-    if r.stderr and "ERROR" in r.stderr:
-        print(f"SQL Error: {r.stderr[:200]}")
-    return r.returncode
-
-def embed_text(text):
-    body = json.dumps({"input": text[:1024]}).encode()
-    req = urllib.request.Request(EMBED_URL, data=body, headers={"Content-Type": "application/json"})
-    return json.loads(urllib.request.urlopen(req, timeout=30).read())["data"][0]["embedding"]
-
-def llm_summary(dialogue):
-    body = json.dumps({
-        "model": "google_gemma-4-26B-A4B-it-Q5_K_M.gguf",
-        "messages": [{"role": "user", "content": f"Summarize concisely:\n{dialogue}\n\n50-word summary:"}],
-        "temperature": 0.1, "max_tokens": 100,
-    }).encode()
-    req = urllib.request.Request(LLM_URL, data=body, headers={"Content-Type": "application/json"})
-    return json.loads(urllib.request.urlopen(req, timeout=120).read())["choices"][0]["message"]["content"].strip()
-
-fps = 25.0
-FILE_ID = 242
-
-# ═══════════════════════════════════════════════════
-# Step 0: Load ASR + ASRX + speaker map
-# ═══════════════════════════════════════════════════
-print("=" * 60)
-print("Step 0: Loading data...")
-asr = json.load(open(f"{DIR}/{UUID}.asr.json"))
-segs = asr["segments"]
-asrx = json.load(open(f"{DIR}/{UUID}.asrx.json"))
-asrx_segs = asrx["segments"]
-
-# Speaker map from identity_bindings
-r = psql("SELECT ib.identity_value, i.name FROM dev.identity_bindings ib JOIN dev.identities i ON i.id=ib.identity_id WHERE ib.identity_type='speaker'")
-speaker_map = {}
-for line in r.strip().split('\n'):
-    if line.strip() and '|' in line:
-        p = line.split('|')
-        speaker_map[p[0].strip()] = p[1].strip()
-speaker_map["SPEAKER_0"] = "Speaker_0"  # Fallback for unbounded
-
-# ═══════════════════════════════════════════════════
-# Step 1: Update sentence chunks with speaker
-# ═══════════════════════════════════════════════════
-print("\n" + "=" * 60)
-print("Step 1: Updating sentence chunks with speaker...")
-
-sql = ["BEGIN;"]
-chunk_meta = {}  # idx → {speaker_id, speaker_name}
-
-for idx, seg in enumerate(segs):
-    st, et = seg["start"], seg["end"]
-    text = seg["text"].strip()
-    if not text:
-        continue
-    
-    # Find overlapping ASRX segment → speaker_id
-    spk_id = "SPEAKER_0"
-    for ax in asrx_segs:
-        if ax.get("start_time", 0) <= st and ax.get("end_time", 0) >= et:
-            spk_id = ax.get("speaker_id", "SPEAKER_0")
-            break
-    
-    spk_name = speaker_map.get(spk_id, spk_id)
-    new_text = f"[{spk_name}] {text}"
-    meta = json.dumps({"speaker_id": spk_id, "speaker_name": spk_name})
-    esc = new_text.replace("'", "''")
-    
-    sql.append(f"UPDATE dev.chunks SET text_content='{esc}', metadata='{meta}'::jsonb WHERE file_uuid='{UUID}' AND chunk_id='{UUID}_{idx}';")
-    chunk_meta[idx] = {"speaker_id": spk_id, "speaker_name": spk_name}
-
-sql.append("COMMIT;")
-with open("/tmp/s1_speaker.sql", "w") as f:
-    f.write("\n".join(sql))
-
-psql_file("/tmp/s1_speaker.sql")
-print(f"  Updated {len(chunk_meta)} sentence chunks with speaker")
-
-# ═══════════════════════════════════════════════════
-# Step 2: Rebuild story chunks + re-embed
-# ═══════════════════════════════════════════════════
-print("\n" + "=" * 60)
-print("Step 2: Rebuilding story chunks...")
-
-# Delete old story chunks
-psql(f"DELETE FROM dev.chunks WHERE file_uuid='{UUID}' AND chunk_type='story';")
-
-# Recreate
-CHUNK_SIZE = 15
-sql2 = ["BEGIN;"]
-story_meta = []
-
-for i in range(0, len(segs), CHUNK_SIZE):
-    group = segs[i:i+CHUNK_SIZE]
-    st, et = group[0]["start"], group[-1]["end"]
-    idx = i // CHUNK_SIZE
-    chunk_id = f"{UUID}_story_{idx}"
-    
-    # Build speaker text from individual sentences
-    texts = []
-    speakers_used = {}
-    for j, seg in enumerate(group):
-        seg_idx = i + j
-        if seg_idx in chunk_meta:
-            cm = chunk_meta[seg_idx]
-            text = seg["text"].strip()
-            if text:
-                texts.append(f"[{cm['speaker_name']}] {text}")
-                speakers_used[cm['speaker_name']] = speakers_used.get(cm['speaker_name'], 0) + 1
-    
-    dialogue = " ".join(texts)
-    child_ids = ", ".join([f"'{UUID}_{j}'" for j in range(i, min(i+CHUNK_SIZE, len(segs)))])
-    words = sum(len(t.split()) for t in texts)
-    
-    meta = json.dumps({"method": "fixed_15", "seg_count": len(group), "words": words, "speakers": speakers_used})
-    esc = dialogue.replace("'", "''")
-    
-    sql2.append(f"""INSERT INTO dev.chunks (file_id,file_uuid,chunk_id,old_chunk_id,chunk_index,chunk_type,start_time,end_time,fps,start_frame,end_frame,text_content,content,metadata,frame_count,child_chunk_ids)
-    VALUES ({FILE_ID},'{UUID}','{chunk_id}','{chunk_id}',{idx},'story',{st},{et},{fps},{int(st*fps)},{int(et*fps)},'{esc}','{{"type":"story_parent"}}'::jsonb,'{meta}'::jsonb,{int((et-st)*fps)},ARRAY[{child_ids}]);""")
-    
-    story_meta.append({"idx": idx, "st": st, "et": et, "dialogue": dialogue, "words": words, "speakers": speakers_used})
-
-sql2.append("COMMIT;")
-with open("/tmp/s2_story.sql", "w") as f:
-    f.write("\n".join(sql2))
-psql_file("/tmp/s2_story.sql")
-print(f"  Created {len(story_meta)} story chunks")
-
-# Embed + upsert to Qdrant
-print("\n  Embedding story chunks...")
-points_dialogue = []
-for sm in story_meta:
-    if len(sm["dialogue"]) < 10:
-        continue
-    vec = embed_text(sm["dialogue"])
-    points_dialogue.append({"id": sm["idx"] + 1, "vector": vec, "payload": {
-        "chunk_id": f"{UUID}_story_{sm['idx']}", "file_uuid": UUID,
-        "start_time": sm["st"], "end_time": sm["et"], "type": "story_dialogue"
-    }})
-
-for i in range(0, len(points_dialogue), 100):
-    batch = points_dialogue[i:i+100]
-    data = json.dumps({"points": batch, "wait": True}).encode()
-    req = urllib.request.Request(f"{QDRANT_URL}?wait=true", data=data, headers={"Content-Type": "application/json"}, method="PUT")
-    urllib.request.urlopen(req, timeout=30)
-print(f"  Qdrant: {len(points_dialogue)} dialogue vectors")
-
-# ═══════════════════════════════════════════════════
-# Step 3: LLM summaries + embed
-# ═══════════════════════════════════════════════════
-print("\n" + "=" * 60)
-print("Step 3: LLM summaries...")
-
-points_summary = []
-summary_sql = ["BEGIN;"]
-
-for i, sm in enumerate(story_meta):
-    if len(sm["dialogue"]) < 10:
-        continue
-    
-    try:
-        summary = llm_summary(sm["dialogue"])
-        time.sleep(0.3)
-        vec = embed_text(summary)
-        time.sleep(0.1)
-    except Exception as e:
-        print(f"  Error on story {sm['idx']}: {e}")
-        summary = "[error]"
-        vec = [0.0] * 768
-    
-    s_esc = summary.replace("'", "''")
-    summary_sql.append(f"UPDATE dev.chunks SET summary_text='{s_esc}', updated_at=CURRENT_TIMESTAMP WHERE file_uuid='{UUID}' AND chunk_id='{UUID}_story_{sm['idx']}';")
-    
-    points_summary.append({"id": 100000 + sm["idx"] + 1, "vector": vec, "payload": {
-        "chunk_id": f"{UUID}_story_{sm['idx']}", "file_uuid": UUID,
-        "start_time": sm["st"], "end_time": sm["et"],
-        "summary": summary, "type": "story_summary"
-    }})
-    
-    if (i + 1) % 50 == 0:
-        print(f"  {i+1}/{len(story_meta)}")
-
-# Update DB with summaries
-summary_sql.append("COMMIT;")
-with open("/tmp/s3_summary.sql", "w") as f:
-    f.write("\n".join(summary_sql))
-psql_file("/tmp/s3_summary.sql")
-
-# Upsert summary vectors to Qdrant
-for i in range(0, len(points_summary), 100):
-    batch = points_summary[i:i+100]
-    data = json.dumps({"points": batch, "wait": True}).encode()
-    req = urllib.request.Request(f"{QDRANT_URL}?wait=true", data=data, headers={"Content-Type": "application/json"}, method="PUT")
-    urllib.request.urlopen(req, timeout=30)
-
-print(f"  Qdrant: {len(points_summary)} summary vectors")
-
-# ═══════════════════════════════════════════════════
-# Step 4: Verify
-# ═══════════════════════════════════════════════════
-print("\n" + "=" * 60)
-print("Done.")
-r1 = psql(f"SELECT count(*) FROM dev.chunks WHERE file_uuid='{UUID}' AND chunk_type='sentence' AND text_content LIKE '[%'")
-r2 = psql(f"SELECT count(*) FROM dev.chunks WHERE file_uuid='{UUID}' AND chunk_type='story'")
-r3 = psql(f"SELECT count(*) FROM dev.chunks WHERE file_uuid='{UUID}' AND chunk_type='story' AND summary_text IS NOT NULL")
-print(f"Sentence chunks with speaker: {r1}")
-print(f"Story chunks: {r2}")
-print(f"Story chunks with summary: {r3}")
--- a/scripts/story_pipeline_full_v1.11.py
+++ b/scripts/story_pipeline_full_v1.11.py
@@ -1 +0,0 @@
-../v1.1/scripts/story_pipeline_full_v1.11.py
--- a/scripts/story_processor.py
+++ b/scripts/story_processor.py
@@ -1,325 +0,0 @@
-#!/opt/homebrew/bin/python3.11
-"""
-Story Processor - Generate parent-child chunk hierarchy for RAG
-Uses LOCAL video analysis (ASR, YOLO, OCR, Scene) to create parent chunks.
-NO cloud API calls - fully offline processing
-"""
-
-import sys
-import json
-import os
-import argparse
-from typing import Dict, List, Any
-
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-from redis_publisher import RedisPublisher
-
-
-def extract_video_metadata(video_path: str) -> Dict[str, Any]:
-    """Extract basic video metadata using ffprobe"""
-    import subprocess
-
-    try:
-        cmd = [
-            "ffprobe",
-            "-v",
-            "quiet",
-            "-print_format",
-            "json",
-            "-show_format",
-            "-show_streams",
-            video_path,
-        ]
-        result = subprocess.run(cmd, capture_output=True, text=True)
-        if result.returncode == 0:
-            return json.loads(result.stdout)
-    except Exception:
-        pass
-    return {}
-
-
-def generate_parent_child_chunks(
-    asr_data: Dict,
-    cut_data: Dict,
-    yolo_data: Dict,
-    ocr_data: Dict,
-    scene_data: Dict,
-    parent_chunk_size: int = 5,
-) -> Dict:
-    """
-    Generate parent-child chunk hierarchy using LOCAL data only.
-    No LLM/API calls - uses template-based narrative generation.
-    """
-    child_chunks = []
-    parent_chunks = []
-
-    # Create child chunks from ASR
-    for seg in asr_data.get("segments", []):
-        child_chunks.append(
-            {
-                "chunk_id": f"asr_{seg.get('start', 0):.1f}_{seg.get('end', 0):.1f}",
-                "chunk_type": "asr",
-                "source": "asr",
-                "start_time": seg.get("start", 0),
-                "end_time": seg.get("end", 0),
-                "text_content": seg.get("text", ""),
-                "content": {
-                    "text": seg.get("text", ""),
-                    "confidence": seg.get("confidence", 0),
-                },
-                "child_chunk_ids": [],
-                "parent_chunk_id": None,
-            }
-        )
-
-    # Create child chunks from CUT scenes
-    for scene in cut_data.get("scenes", []):
-        child_chunks.append(
-            {
-                "chunk_id": f"cut_{scene.get('scene_number', 0)}",
-                "chunk_type": "cut",
-                "source": "cut",
-                "start_time": scene.get("start_time", 0),
-                "end_time": scene.get("end_time", 0),
-                "text_content": f"Scene {scene.get('scene_number', 0)}",
-                "content": {
-                    "scene_number": scene.get("scene_number", 0),
-                    "duration": scene.get("duration", 0),
-                },
-                "child_chunk_ids": [],
-                "parent_chunk_id": None,
-            }
-        )
-
-    asr_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "asr"]
-    cut_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "cut"]
-
-    yolo_frames = yolo_data.get("frames", [])
-    ocr_frames = ocr_data.get("frames", [])
-
-    # Group ASR segments into parent chunks
-    for i in range(0, len(asr_child_ids), parent_chunk_size):
-        batch = asr_child_ids[i : i + parent_chunk_size]
-        if not batch:
-            continue
-
-        batch_texts = []
-        batch_objects = []
-        batch_times = []
-
-        for child_id in batch:
-            for child in child_chunks:
-                if child["chunk_id"] == child_id:
-                    if child["text_content"]:
-                        batch_texts.append(child["text_content"])
-                    batch_times.append((child["start_time"], child["end_time"]))
-                    break
-
-        start_time = batch_times[0][0] if batch_times else 0
-        end_time = batch_times[-1][1] if batch_times else 0
-
-        # Find objects in this time range
-        for frame in yolo_frames[:50]:
-            ts = frame.get("timestamp", 0)
-            if start_time <= ts <= end_time:
-                for obj in frame.get("objects", []):
-                    batch_objects.append(obj.get("class_name", "unknown"))
-
-        narrative = generate_narrative(batch_texts, batch_objects, start_time, end_time)
-
-        parent_chunk = {
-            "chunk_id": f"story_asr_{i // parent_chunk_size:04d}",
-            "chunk_type": "story",
-            "source": "story_asr",
-            "start_time": start_time,
-            "end_time": end_time,
-            "text_content": narrative,
-            "content": {
-                "description": narrative,
-                "child_count": len(batch),
-                "speech_preview": " ".join(batch_texts[:3]) if batch_texts else None,
-                "detected_objects": list(set(batch_objects))[:5],
-            },
-            "child_chunk_ids": batch,
-            "parent_chunk_id": None,
-        }
-        parent_chunks.append(parent_chunk)
-
-        for child_id in batch:
-            for child in child_chunks:
-                if child["chunk_id"] == child_id:
-                    child["parent_chunk_id"] = parent_chunk["chunk_id"]
-                    break
-
-    # Group CUT scenes into parent chunks
-    for i in range(0, len(cut_child_ids), parent_chunk_size):
-        batch = cut_child_ids[i : i + parent_chunk_size]
-        if not batch:
-            continue
-
-        batch_times = []
-        batch_objects = []
-
-        for child_id in batch:
-            for child in child_chunks:
-                if child["chunk_id"] == child_id:
-                    batch_times.append((child["start_time"], child["end_time"]))
-                    break
-
-        start_time = batch_times[0][0] if batch_times else 0
-        end_time = batch_times[-1][1] if batch_times else 0
-
-        for frame in yolo_frames[:50]:
-            ts = frame.get("timestamp", 0)
-            if start_time <= ts <= end_time:
-                for obj in frame.get("objects", []):
-                    batch_objects.append(obj.get("class_name", "unknown"))
-
-        narrative = generate_scene_narrative(
-            batch_objects, start_time, end_time, len(batch)
-        )
-
-        parent_chunk = {
-            "chunk_id": f"story_cut_{i // parent_chunk_size:04d}",
-            "chunk_type": "story",
-            "source": "story_cut",
-            "start_time": start_time,
-            "end_time": end_time,
-            "text_content": narrative,
-            "content": {
-                "description": narrative,
-                "child_count": len(batch),
-                "scenes": batch,
-                "detected_objects": list(set(batch_objects))[:5],
-            },
-            "child_chunk_ids": batch,
-            "parent_chunk_id": None,
-        }
-        parent_chunks.append(parent_chunk)
-
-        for child_id in batch:
-            for child in child_chunks:
-                if child["chunk_id"] == child_id:
-                    child["parent_chunk_id"] = parent_chunk["chunk_id"]
-                    break
-
-    return {
-        "child_chunks": child_chunks,
-        "parent_chunks": parent_chunks,
-        "stats": {
-            "total_child_chunks": len(child_chunks),
-            "total_parent_chunks": len(parent_chunks),
-            "asr_children": len(asr_child_ids),
-            "cut_children": len(cut_child_ids),
-        },
-    }
-
-
-def generate_narrative(
-    texts: List[str], objects: List[str], start: float, end: float
-) -> str:
-    """Generate narrative description from LOCAL text snippets and objects"""
-    if not texts and not objects:
-        return f"Video segment from {start:.1f}s to {end:.1f}s"
-
-    parts = []
-    if texts:
-        combined = " ".join(texts[:5])
-        if len(combined) > 150:
-            combined = combined[:150] + "..."
-        parts.append(f"Speech: {combined}")
-
-    if objects:
-        unique_objs = list(set(objects))[:5]
-        parts.append(f"Visuals: {', '.join(unique_objs)}")
-
-    return f"[{start:.0f}s-{end:.0f}s] {' | '.join(parts)}"
-
-
-def generate_scene_narrative(
-    objects: List[str], start: float, end: float, scene_count: int
-) -> str:
-    """Generate scene narrative from LOCAL detected objects"""
-    unique_objects = list(set(objects))[:5]
-
-    if unique_objects:
-        obj_str = ", ".join(unique_objects)
-        return f"[{start:.0f}s-{end:.0f}s] {scene_count} scenes. Visuals: {obj_str}."
-    else:
-        return f"[{start:.0f}s-{end:.0f}s] {scene_count} video scenes."
-
-
-def run_story(
-    video_path: str, output_path: str, uuid: str = "", parent_chunk_size: int = 5
-):
-    publisher = RedisPublisher(uuid) if uuid else None
-    if publisher:
-        publisher.info("story", "STORY_START")
-
-    base_path = os.path.dirname(output_path)
-    uuid_name = os.path.basename(output_path).split(".")[0]
-
-    asr_data = {"segments": []}
-    cut_data = {"scenes": []}
-    yolo_data = {"frames": []}
-    ocr_data = {"frames": []}
-    scene_data = {"scenes": []}
-
-    for name, data_var in [
-        ("asr", asr_data),
-        ("cut", cut_data),
-        ("yolo", yolo_data),
-        ("ocr", ocr_data),
-        ("scene", scene_data),
-    ]:
-        path = os.path.join(base_path, f"{uuid_name}.{name}.json")
-        if os.path.exists(path):
-            with open(path) as f:
-                data_var.update(json.load(f))
-
-    result = generate_parent_child_chunks(
-        asr_data, cut_data, yolo_data, ocr_data, scene_data, parent_chunk_size
-    )
-
-    result["video_metadata"] = extract_video_metadata(video_path)
-    result["processing"] = {
-        "method": "local_aggregation",
-        "cloud_api_used": False,
-        "parent_chunk_size": parent_chunk_size,
-    }
-
-    with open(output_path, "w") as f:
-        json.dump(result, f, indent=2, ensure_ascii=False)
-
-    if publisher:
-        publisher.complete(
-            "story",
-            f"{result['stats']['total_parent_chunks']} parent, {result['stats']['total_child_chunks']} child chunks (LOCAL)",
-        )
-
-    return result
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Story Processor - Parent-Child Chunk Hierarchy (LOCAL ONLY)"
-    )
-    parser.add_argument("video_path", help="Path to video file")
-    parser.add_argument("output_path", help="Output JSON path")
-    parser.add_argument("--uuid", help="UUID for progress tracking", default="")
-    parser.add_argument(
-        "--parent-chunk-size",
-        type=int,
-        default=5,
-        help="Number of child chunks per parent",
-    )
-
-    args = parser.parse_args()
-
-    result = run_story(
-        args.video_path, args.output_path, args.uuid, args.parent_chunk_size
-    )
-    print(
-        f"Story generated: {result['stats']['total_parent_chunks']} parent, "
-        f"{result['stats']['total_child_chunks']} child chunks (LOCAL)"
-    )
--- a/scripts/story_processor_contract_v1.py
+++ b/scripts/story_processor_contract_v1.py
@@ -1,848 +0,0 @@
-#!/opt/homebrew/bin/python3.11
-"""
-Story Processor - AI-Driven Processor Contract Version 1.0
-
-Compliant with AI-Driven Processor Contract v1.0
-Effective Date: 2025-03-27
-
-Features:
-1. Standardized command-line interface
-2. Redis progress reporting
-3. Signal handling (SIGTERM, SIGINT)
-4. Health check mode
-5. Resource monitoring
-6. Contract-compliant JSON output
-7. Unified configuration
-"""
-
-import sys
-import json
-import os
-import argparse
-import signal
-import time
-import traceback
-from datetime import datetime
-from typing import Dict, Any, List
-
-# Redis Publisher for progress reporting
-try:
-    sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-    from redis_publisher import RedisPublisher
-
-    REDIS_AVAILABLE = True
-except ImportError:
-    REDIS_AVAILABLE = False
-    print(
-        "WARNING: RedisPublisher not available, progress reporting disabled",
-        file=sys.stderr,
-    )
-
-# Contract version
-CONTRACT_VERSION = "1.0"
-PROCESSOR_NAME = (
-    "/Users/accusys/momentry_core_0.1/scripts/story_processor_contract_v1.py"
-)
-PROCESSOR_VERSION = "1.0.0"
-MODEL_NAME = "gpt-4"
-MODEL_VERSION = "latest"
-
-# Unified configuration defaults
-DEFAULT_TIMEOUT = 3600  # 1 hour for story generation
-DEFAULT_PARENT_CHUNK_SIZE = 5
-DEFAULT_MIN_CHILD_CHUNKS = 3
-DEFAULT_MAX_CHILD_CHUNKS = 10
-DEFAULT_SUMMARY_LENGTH = 150
-DEFAULT_MODEL = "openai"  # openai, local, or template
-DEFAULT_MODEL_NAME = "gpt-4"
-DEFAULT_TEMPERATURE = 0.7
-DEFAULT_MAX_TOKENS = 500
-
-
-# Signal handling with timeout support
-class SignalHandler:
-    """Handle system signals for graceful shutdown"""
-
-    def __init__(self):
-        self.should_exit = False
-        self.exit_code = 0
-        signal.signal(signal.SIGTERM, self.handle_signal)
-        signal.signal(signal.SIGINT, self.handle_signal)
-
-    def handle_signal(self, signum, frame):
-        """Handle termination signals"""
-        print(f"\n收到信号 {signum}，正在优雅关闭...")
-        self.should_exit = True
-        self.exit_code = 128 + signum
-
-    def should_stop(self):
-        """Check if should stop processing"""
-        return self.should_exit
-
-
-# Timeout manager
-class TimeoutManager:
-    """Manage processing timeouts"""
-
-    def __init__(self, timeout_seconds: int):
-        self.timeout_seconds = timeout_seconds
-        self.start_time = time.time()
-        self.timer = None
-
-    def check_timeout(self) -> bool:
-        """Check if timeout has been reached"""
-        elapsed = time.time() - self.start_time
-        return elapsed > self.timeout_seconds
-
-    def get_remaining_time(self) -> float:
-        """Get remaining time in seconds"""
-        elapsed = time.time() - self.start_time
-        return max(0, self.timeout_seconds - elapsed)
-
-    def format_remaining_time(self) -> str:
-        """Format remaining time as HH:MM:SS"""
-        remaining = self.get_remaining_time()
-        hours = int(remaining // 3600)
-        minutes = int((remaining % 3600) // 60)
-        seconds = int(remaining % 60)
-        return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
-
-
-# Health check functions
-def check_environment() -> Dict[str, Any]:
-    """Check environment and dependencies"""
-    checks = []
-
-    # Check 1: OpenAI API (optional)
-    try:
-        import openai
-
-        checks.append(
-            {
-                "name": "openai",
-                "status": "available",
-                "version": openai.__version__,
-            }
-        )
-    except ImportError:
-        checks.append({"name": "openai", "status": "optional", "version": None})
-
-    # Check 2: Redis (optional)
-    checks.append(
-        {
-            "name": "redis",
-            "status": "available" if REDIS_AVAILABLE else "optional",
-            "version": None,
-        }
-    )
-
-    # Check 3: Python version
-    checks.append(
-        {
-            "name": "python",
-            "status": "available",
-            "version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
-        }
-    )
-
-    return {
-        "timestamp": datetime.now().isoformat(),
-        "processor_name": PROCESSOR_NAME,
-        "processor_version": PROCESSOR_VERSION,
-        "contract_version": CONTRACT_VERSION,
-        "model_name": MODEL_NAME,
-        "model_version": MODEL_VERSION,
-        "checks": checks,
-    }
-
-
-def check_input_files(input_files: Dict[str, str]) -> Dict[str, Any]:
-    """Check input files exist and are valid JSON"""
-    results = {}
-
-    for file_type, file_path in input_files.items():
-        if not file_path:
-            results[file_type] = {
-                "exists": False,
-                "valid": False,
-                "error": "No path provided",
-            }
-            continue
-
-        if not os.path.exists(file_path):
-            results[file_type] = {
-                "exists": False,
-                "valid": False,
-                "error": "File not found",
-            }
-            continue
-
-        try:
-            with open(file_path, "r") as f:
-                data = json.load(f)
-
-            # Basic validation based on file type
-            if file_type == "asr":
-                valid = isinstance(data, dict) and "segments" in data
-            elif file_type == "cut":
-                valid = isinstance(data, dict) and "scenes" in data
-            elif file_type == "yolo":
-                valid = isinstance(data, dict) and "detections" in data
-            elif file_type == "ocr":
-                valid = isinstance(data, dict) and "texts" in data
-            else:
-                valid = isinstance(data, dict)
-
-            results[file_type] = {
-                "exists": True,
-                "valid": valid,
-                "size": os.path.getsize(file_path),
-                "data_keys": list(data.keys()) if isinstance(data, dict) else [],
-            }
-
-        except json.JSONDecodeError as e:
-            results[file_type] = {
-                "exists": True,
-                "valid": False,
-                "error": f"Invalid JSON: {e}",
-            }
-        except Exception as e:
-            results[file_type] = {"exists": True, "valid": False, "error": str(e)}
-
-    return results
-
-
-def load_input_data(input_files: Dict[str, str]) -> Dict[str, Any]:
-    """Load input data from JSON files"""
-    data = {}
-
-    for file_type, file_path in input_files.items():
-        if not file_path or not os.path.exists(file_path):
-            data[file_type] = None
-            continue
-
-        try:
-            with open(file_path, "r") as f:
-                data[file_type] = json.load(f)
-        except:
-            data[file_type] = None
-
-    return data
-
-
-def generate_parent_child_chunks(
-    asr_data: Dict,
-    cut_data: Dict,
-    yolo_data: Dict,
-    ocr_data: Dict,
-    parent_chunk_size: int = DEFAULT_PARENT_CHUNK_SIZE,
-    min_child_chunks: int = DEFAULT_MIN_CHILD_CHUNKS,
-    max_child_chunks: int = DEFAULT_MAX_CHILD_CHUNKS,
-    summary_length: int = DEFAULT_SUMMARY_LENGTH,
-    model: str = DEFAULT_MODEL,
-    **kwargs,
-) -> List[Dict[str, Any]]:
-    """Generate parent-child chunk hierarchy for RAG"""
-
-    parent_chunks = []
-
-    # Extract ASR segments
-    asr_segments = asr_data.get("segments", []) if asr_data else []
-
-    # Extract scenes from CUT data
-    scenes = cut_data.get("scenes", []) if cut_data else []
-
-    # Extract detections from YOLO data
-    yolo_detections = yolo_data.get("detections", []) if yolo_data else []
-
-    # Extract OCR texts
-    ocr_texts = ocr_data.get("texts", []) if ocr_data else []
-
-    # If we have scenes, use them to group content
-    if scenes:
-        for scene in scenes:
-            scene_start = scene.get("start_time", 0)
-            scene_end = scene.get("end_time", 0)
-            scene_duration = scene.get("duration", 0)
-
-            # Find ASR segments in this scene
-            scene_asr_segments = []
-            for segment in asr_segments:
-                seg_start = segment.get("start", 0)
-                if scene_start <= seg_start <= scene_end:
-                    scene_asr_segments.append(segment)
-
-            # Find YOLO detections in this scene
-            scene_yolo_detections = []
-            for detection in yolo_detections:
-                det_time = detection.get("timestamp", 0)
-                if scene_start <= det_time <= scene_end:
-                    scene_yolo_detections.append(detection)
-
-            # Find OCR texts in this scene
-            scene_ocr_texts = []
-            for text in ocr_texts:
-                text_time = text.get("timestamp", 0)
-                if scene_start <= text_time <= scene_end:
-                    scene_ocr_texts.append(text)
-
-            # Create child chunks
-            child_chunks = []
-
-            # Add ASR segments as child chunks
-            for segment in scene_asr_segments[:max_child_chunks]:
-                child_chunks.append(
-                    {
-                        "type": "asr",
-                        "content": segment.get("text", ""),
-                        "start_time": segment.get("start", 0),
-                        "end_time": segment.get("end", 0),
-                        "confidence": segment.get("confidence", 0),
-                        "metadata": {"speaker": segment.get("speaker")},
-                    }
-                )
-
-            # Add YOLO detections as child chunks
-            for detection in scene_yolo_detections[:max_child_chunks]:
-                child_chunks.append(
-                    {
-                        "type": "yolo",
-                        "content": f"Detected {detection.get('class', 'object')} with confidence {detection.get('confidence', 0):.2f}",
-                        "timestamp": detection.get("timestamp", 0),
-                        "confidence": detection.get("confidence", 0),
-                        "metadata": {
-                            "class": detection.get("class"),
-                            "bbox": detection.get("bbox"),
-                        },
-                    }
-                )
-
-            # Add OCR texts as child chunks
-            for text in scene_ocr_texts[:max_child_chunks]:
-                child_chunks.append(
-                    {
-                        "type": "ocr",
-                        "content": text.get("text", ""),
-                        "timestamp": text.get("timestamp", 0),
-                        "confidence": text.get("confidence", 0),
-                        "metadata": {
-                            "bbox": text.get("bbox"),
-                            "language": text.get("language"),
-                        },
-                    }
-                )
-
-            # Skip if not enough child chunks
-            if len(child_chunks) < min_child_chunks:
-                continue
-
-            # Generate parent summary
-            if model == "openai":
-                parent_summary = generate_openai_summary(child_chunks, scene, **kwargs)
-            elif model == "local":
-                parent_summary = generate_local_summary(child_chunks, scene, **kwargs)
-            else:
-                parent_summary = generate_template_summary(child_chunks, scene)
-
-            # Create parent chunk
-            parent_chunks.append(
-                {
-                    "parent_id": len(parent_chunks) + 1,
-                    "scene_id": scene.get("scene_id", 0),
-                    "start_time": scene_start,
-                    "end_time": scene_end,
-                    "duration": scene_duration,
-                    "summary": parent_summary[:summary_length]
-                    if summary_length > 0
-                    else parent_summary,
-                    "child_count": len(child_chunks),
-                    "child_types": list(set(chunk["type"] for chunk in child_chunks)),
-                    "child_chunks": child_chunks[
-                        :parent_chunk_size
-                    ],  # Limit child chunks in output
-                }
-            )
-
-    # If no scenes, create chunks based on time windows
-    elif asr_segments:
-        # Group ASR segments by time windows
-        time_window = 30  # seconds
-        current_window = 0
-
-        while current_window * time_window < (
-            asr_segments[-1].get("end", 0) if asr_segments else 0
-        ):
-            window_start = current_window * time_window
-            window_end = (current_window + 1) * time_window
-
-            # Find segments in this window
-            window_segments = []
-            for segment in asr_segments:
-                seg_start = segment.get("start", 0)
-                if window_start <= seg_start < window_end:
-                    window_segments.append(segment)
-
-            if len(window_segments) >= min_child_chunks:
-                # Create child chunks
-                child_chunks = []
-                for segment in window_segments[:max_child_chunks]:
-                    child_chunks.append(
-                        {
-                            "type": "asr",
-                            "content": segment.get("text", ""),
-                            "start_time": segment.get("start", 0),
-                            "end_time": segment.get("end", 0),
-                            "confidence": segment.get("confidence", 0),
-                            "metadata": {"speaker": segment.get("speaker")},
-                        }
-                    )
-
-                # Generate parent summary
-                parent_summary = generate_template_summary(
-                    child_chunks,
-                    {
-                        "start_time": window_start,
-                        "end_time": window_end,
-                        "duration": time_window,
-                    },
-                )
-
-                # Create parent chunk
-                parent_chunks.append(
-                    {
-                        "parent_id": len(parent_chunks) + 1,
-                        "time_window": current_window,
-                        "start_time": window_start,
-                        "end_time": window_end,
-                        "duration": time_window,
-                        "summary": parent_summary[:summary_length]
-                        if summary_length > 0
-                        else parent_summary,
-                        "child_count": len(child_chunks),
-                        "child_types": ["asr"],
-                        "child_chunks": child_chunks[:parent_chunk_size],
-                    }
-                )
-
-            current_window += 1
-
-    return parent_chunks
-
-
-def generate_openai_summary(child_chunks: List[Dict], scene: Dict, **kwargs) -> str:
-    """Generate summary using OpenAI"""
-    try:
-        import openai
-
-        # Prepare context from child chunks
-        context_parts = []
-        for chunk in child_chunks[:10]:  # Limit context size
-            if chunk["type"] == "asr":
-                context_parts.append(f"Speech: {chunk['content']}")
-            elif chunk["type"] == "yolo":
-                context_parts.append(f"Visual: {chunk['content']}")
-            elif chunk["type"] == "ocr":
-                context_parts.append(f"Text: {chunk['content']}")
-
-        context = "\n".join(context_parts)
-
-        # Prepare prompt
-        prompt = f"""Summarize this video scene ({scene.get("duration", 0):.1f} seconds) based on the following elements:
-
-{context}
-
-Provide a concise narrative summary that connects the speech, visual elements, and text into a coherent description."""
-
-        # Call OpenAI API
-        response = openai.chat.completions.create(
-            model=kwargs.get("model_name", DEFAULT_MODEL_NAME),
-            messages=[
-                {
-                    "role": "system",
-                    "content": "You are a video analysis assistant that creates coherent narrative summaries from multiple data sources.",
-                },
-                {"role": "user", "content": prompt},
-            ],
-            max_tokens=kwargs.get("max_tokens", DEFAULT_MAX_TOKENS),
-            temperature=kwargs.get("temperature", DEFAULT_TEMPERATURE),
-        )
-
-        return response.choices[0].message.content
-
-    except ImportError:
-        return "OpenAI not available for summary generation"
-    except Exception as e:
-        return f"Summary generation error: {str(e)}"
-
-
-def generate_local_summary(child_chunks: List[Dict], scene: Dict, **kwargs) -> str:
-    """Generate summary using local model (placeholder)"""
-    # This is a placeholder for local model implementation
-    asr_count = sum(1 for chunk in child_chunks if chunk["type"] == "asr")
-    yolo_count = sum(1 for chunk in child_chunks if chunk["type"] == "yolo")
-    ocr_count = sum(1 for chunk in child_chunks if chunk["type"] == "ocr")
-
-    return f"Scene ({scene.get('duration', 0):.1f}s) with {asr_count} speech segments, {yolo_count} visual detections, and {ocr_count} text elements. Local summary model not implemented."
-
-
-def generate_template_summary(child_chunks: List[Dict], scene: Dict) -> str:
-    """Generate summary using template"""
-    asr_count = sum(1 for chunk in child_chunks if chunk["type"] == "asr")
-    yolo_count = sum(1 for chunk in child_chunks if chunk["type"] == "yolo")
-    ocr_count = sum(1 for chunk in child_chunks if chunk["type"] == "ocr")
-
-    # Extract some sample content
-    asr_samples = [
-        chunk["content"][:50] for chunk in child_chunks if chunk["type"] == "asr"
-    ][:2]
-    yolo_classes = list(
-        set(
-            chunk["metadata"].get("class", "object")
-            for chunk in child_chunks
-            if chunk["type"] == "yolo"
-        )
-    )
-
-    summary_parts = [f"Scene duration: {scene.get('duration', 0):.1f} seconds."]
-
-    if asr_count > 0:
-        summary_parts.append(f"Contains {asr_count} speech segments.")
-        if asr_samples:
-            summary_parts.append(f"Sample speech: {'; '.join(asr_samples)}...")
-
-    if yolo_count > 0:
-        summary_parts.append(
-            f"Detected {yolo_count} objects including: {', '.join(yolo_classes[:3])}."
-        )
-
-    if ocr_count > 0:
-        summary_parts.append(f"Extracted {ocr_count} text elements from the video.")
-
-    return " ".join(summary_parts)
-
-
-# Main processing function
-def process_story(
-    asr_path: str,
-    cut_path: str,
-    yolo_path: str,
-    ocr_path: str,
-    output_path: str,
-    uuid: str = "",
-    parent_chunk_size: int = DEFAULT_PARENT_CHUNK_SIZE,
-    min_child_chunks: int = DEFAULT_MIN_CHILD_CHUNKS,
-    max_child_chunks: int = DEFAULT_MAX_CHILD_CHUNKS,
-    summary_length: int = DEFAULT_SUMMARY_LENGTH,
-    model: str = DEFAULT_MODEL,
-    model_name: str = DEFAULT_MODEL_NAME,
-    temperature: float = DEFAULT_TEMPERATURE,
-    max_tokens: int = DEFAULT_MAX_TOKENS,
-    timeout: int = DEFAULT_TIMEOUT,
-) -> Dict[str, Any]:
-    """Process video analysis data to create parent-child chunk hierarchy"""
-
-    # Initialize
-    signal_handler = SignalHandler()
-    timeout_manager = TimeoutManager(timeout)
-    publisher = None
-    if REDIS_AVAILABLE and uuid:
-        try:
-            publisher = RedisPublisher(uuid)
-        except:
-            publisher = None
-
-    def publish(stage: str, message: str, data: Dict = None):
-        if publisher:
-            publisher.info(PROCESSOR_NAME, stage, message, data)
-
-    if publisher:
-        publish("STORY_START", "开始生成故事层次结构")
-
-    result = {
-        "processor_name": PROCESSOR_NAME,
-        "processor_version": PROCESSOR_VERSION,
-        "contract_version": CONTRACT_VERSION,
-        "model_name": MODEL_NAME,
-        "model_version": MODEL_VERSION,
-        "input_files": {
-            "asr": asr_path,
-            "cut": cut_path,
-            "yolo": yolo_path,
-            "ocr": ocr_path,
-        },
-        "output_path": output_path,
-        "uuid": uuid,
-        "timestamp": datetime.now().isoformat(),
-        "parameters": {
-            "parent_chunk_size": parent_chunk_size,
-            "min_child_chunks": min_child_chunks,
-            "max_child_chunks": max_child_chunks,
-            "summary_length": summary_length,
-            "model": model,
-            "model_name": model_name,
-            "temperature": temperature,
-            "max_tokens": max_tokens,
-            "timeout": timeout,
-        },
-        "success": False,
-        "error": None,
-        "parent_chunks": [],
-        "chunk_statistics": {},
-        "processing_time": 0,
-        "resource_usage": {},
-    }
-
-    start_time = time.time()
-
-    try:
-        # Check timeout
-        if timeout_manager.check_timeout():
-            raise TimeoutError(f"超时 ({timeout} 秒)")
-
-        # Check if should exit
-        if signal_handler.should_stop():
-            raise KeyboardInterrupt("收到停止信号")
-
-        # Check input files
-        if publisher:
-            publish("STORY_CHECK_FILES", "检查输入文件")
-
-        input_files = {
-            "asr": asr_path,
-            "cut": cut_path,
-            "yolo": yolo_path,
-            "ocr": ocr_path,
-        }
-
-        file_checks = check_input_files(input_files)
-        result["file_checks"] = file_checks
-
-        # Check if we have at least ASR data
-        if not file_checks.get("asr", {}).get("valid", False):
-            raise ValueError("缺少有效的 ASR 数据文件")
-
-        if publisher:
-            publish("STORY_FILES_VALID", "输入文件检查通过")
-
-        # Load input data
-        if publisher:
-            publish("STORY_LOAD_DATA", "加载输入数据")
-
-        input_data = load_input_data(input_files)
-
-        if publisher:
-            publish("STORY_DATA_LOADED", "数据加载完成")
-
-        # Generate parent-child chunks
-        if publisher:
-            publish("STORY_GENERATE_CHUNKS", "生成父-子块层次结构")
-
-        parent_chunks = generate_parent_child_chunks(
-            asr_data=input_data.get("asr"),
-            cut_data=input_data.get("cut"),
-            yolo_data=input_data.get("yolo"),
-            ocr_data=input_data.get("ocr"),
-            parent_chunk_size=parent_chunk_size,
-            min_child_chunks=min_child_chunks,
-            max_child_chunks=max_child_chunks,
-            summary_length=summary_length,
-            model=model,
-            model_name=model_name,
-            temperature=temperature,
-            max_tokens=max_tokens,
-        )
-
-        result["parent_chunks"] = parent_chunks
-        result["parent_chunk_count"] = len(parent_chunks)
-
-        # Calculate statistics
-        total_child_chunks = sum(chunk.get("child_count", 0) for chunk in parent_chunks)
-        child_types = {}
-        for chunk in parent_chunks:
-            for child_type in chunk.get("child_types", []):
-                child_types[child_type] = child_types.get(child_type, 0) + 1
-
-        result["chunk_statistics"] = {
-            "total_parent_chunks": len(parent_chunks),
-            "total_child_chunks": total_child_chunks,
-            "avg_children_per_parent": total_child_chunks / len(parent_chunks)
-            if parent_chunks
-            else 0,
-            "child_type_distribution": child_types,
-        }
-
-        result["success"] = True
-
-        if publisher:
-            publish("STORY_COMPLETE", f"完成: {len(parent_chunks)} 个父块")
-
-    except TimeoutError as e:
-        result["error"] = f"处理超时: {e}"
-        if publisher:
-            publish("STORY_TIMEOUT", f"超时: {e}")
-    except KeyboardInterrupt:
-        result["error"] = "处理被用户中断"
-        if publisher:
-            publish("STORY_INTERRUPTED", "处理被中断")
-    except ImportError as e:
-        result["error"] = f"依赖缺失: {e}"
-        if publisher:
-            publish("STORY_MISSING_DEPS", f"缺少依赖: {e}")
-    except Exception as e:
-        result["error"] = f"处理错误: {str(e)}"
-        if publisher:
-            publish("STORY_ERROR", f"错误: {str(e)}")
-        traceback.print_exc()
-
-    # Calculate processing time
-    processing_time = time.time() - start_time
-    result["processing_time"] = processing_time
-
-    # Add resource usage
-    try:
-        import psutil
-
-        process = psutil.Process()
-        memory_info = process.memory_info()
-        result["resource_usage"] = {
-            "cpu_percent": process.cpu_percent(),
-            "memory_mb": memory_info.rss / (1024 * 1024),
-            "user_time": process.cpu_times().user,
-            "system_time": process.cpu_times().system,
-        }
-    except ImportError:
-        result["resource_usage"] = {"error": "psutil not available"}
-
-    # Save result
-    try:
-        with open(output_path, "w") as f:
-            json.dump(result, f, indent=2, ensure_ascii=False)
-        if publisher:
-            publish("STORY_SAVED", f"结果保存到: {output_path}")
-    except Exception as e:
-        result["error"] = f"保存结果失败: {str(e)}"
-        if publisher:
-            publish("STORY_SAVE_ERROR", f"保存失败: {str(e)}")
-
-    return result
-
-
-def main():
-    """Main entry point"""
-    parser = argparse.ArgumentParser(
-        description=f"{PROCESSOR_NAME.upper()} Processor v{PROCESSOR_VERSION} - Parent-Child Chunk Generation"
-    )
-    parser.add_argument("--asr", help="Path to ASR JSON file", required=True)
-    parser.add_argument("--cut", help="Path to CUT JSON file", default="")
-    parser.add_argument("--yolo", help="Path to YOLO JSON file", default="")
-    parser.add_argument("--ocr", help="Path to OCR JSON file", default="")
-    parser.add_argument("--output", help="Path to output JSON file", required=True)
-    parser.add_argument("--uuid", help="UUID for progress tracking", default="")
-    parser.add_argument(
-        "--parent-chunk-size",
-        help=f"Maximum child chunks per parent (default: {DEFAULT_PARENT_CHUNK_SIZE})",
-        type=int,
-        default=DEFAULT_PARENT_CHUNK_SIZE,
-    )
-    parser.add_argument(
-        "--min-child-chunks",
-        help=f"Minimum child chunks to create parent (default: {DEFAULT_MIN_CHILD_CHUNKS})",
-        type=int,
-        default=DEFAULT_MIN_CHILD_CHUNKS,
-    )
-    parser.add_argument(
-        "--max-child-chunks",
-        help=f"Maximum child chunks per parent (default: {DEFAULT_MAX_CHILD_CHUNKS})",
-        type=int,
-        default=DEFAULT_MAX_CHILD_CHUNKS,
-    )
-    parser.add_argument(
-        "--summary-length",
-        help=f"Maximum summary length in characters (default: {DEFAULT_SUMMARY_LENGTH})",
-        type=int,
-        default=DEFAULT_SUMMARY_LENGTH,
-    )
-    parser.add_argument(
-        "--model",
-        help=f"Summary model to use (default: {DEFAULT_MODEL})",
-        default=DEFAULT_MODEL,
-        choices=["openai", "local", "template"],
-    )
-    parser.add_argument(
-        "--model-name",
-        help=f"Model name for OpenAI (default: {DEFAULT_MODEL_NAME})",
-        default=DEFAULT_MODEL_NAME,
-    )
-    parser.add_argument(
-        "--temperature",
-        help=f"Temperature for generation (default: {DEFAULT_TEMPERATURE})",
-        type=float,
-        default=DEFAULT_TEMPERATURE,
-    )
-    parser.add_argument(
-        "--max-tokens",
-        help=f"Maximum tokens per summary (default: {DEFAULT_MAX_TOKENS})",
-        type=int,
-        default=DEFAULT_MAX_TOKENS,
-    )
-    parser.add_argument(
-        "--timeout",
-        help=f"Timeout in seconds (default: {DEFAULT_TIMEOUT})",
-        type=int,
-        default=DEFAULT_TIMEOUT,
-    )
-    parser.add_argument(
-        "--health-check",
-        help="Run health check and exit",
-        action="store_true",
-    )
-
-    args = parser.parse_args()
-
-    # Health check mode
-    if args.health_check:
-        health = check_environment()
-        print(json.dumps(health, indent=2, ensure_ascii=False))
-        return (
-            0
-            if all(c["status"] in ["available", "optional"] for c in health["checks"])
-            else 1
-        )
-
-    # Normal processing mode
-    result = process_story(
-        asr_path=args.asr,
-        cut_path=args.cut,
-        yolo_path=args.yolo,
-        ocr_path=args.ocr,
-        output_path=args.output,
-        uuid=args.uuid,
-        parent_chunk_size=args.parent_chunk_size,
-        min_child_chunks=args.min_child_chunks,
-        max_child_chunks=args.max_child_chunks,
-        summary_length=args.summary_length,
-        model=args.model,
-        model_name=args.model_name,
-        temperature=args.temperature,
-        max_tokens=args.max_tokens,
-        timeout=args.timeout,
-    )
-
-    # Print result summary
-    if result.get("success", False):
-        print(f"✅ {PROCESSOR_NAME.upper()} 处理成功")
-        print(f"   父块数: {result.get('parent_chunk_count', 0)}")
-        stats = result.get("chunk_statistics", {})
-        print(f"   子块总数: {stats.get('total_child_chunks', 0)}")
-        print(f"   平均子块/父块: {stats.get('avg_children_per_parent', 0):.1f}")
-        print(f"   处理时间: {result.get('processing_time', 0):.1f} 秒")
-        print(f"   输出文件: {args.output}")
-        return 0
-    else:
-        print(f"❌ {PROCESSOR_NAME.upper()} 处理失败")
-        print(f"   错误: {result.get('error', '未知错误')}")
-        return 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/scripts/story_processor_contract_v1_v1.11.py
+++ b/scripts/story_processor_contract_v1_v1.11.py
@@ -1 +0,0 @@
-../v1.1/scripts/story_processor_contract_v1_v1.11.py
--- a/scripts/story_processor_v1.11.py
+++ b/scripts/story_processor_v1.11.py
@@ -1 +0,0 @@
-../v1.1/scripts/story_processor_v1.11.py
--- a/scripts/test_parent_chunk_generation.py
+++ b/scripts/test_parent_chunk_generation.py
@@ -1,121 +0,0 @@
-#!/opt/homebrew/bin/python3.11
-"""
-Test Parent Chunk Summary Generation (Gemma 4)
-"""
-
-import json
-import ollama
-import time
-
-# Configuration
-UUID = "384b0ff44aaaa1f1"
-ASR_PATH = f"output/{UUID}/{UUID}.asr.json"
-MODEL = "gemma4:latest"
-
-# The Prompt Template
-PARENT_SUMMARY_PROMPT = """
-You are an expert film analyst. Analyze the following movie dialogue segment (approx 60 seconds).
-Your task is to generate a structured JSON summary containing:
-1. **narrative_summary**: A one-sentence summary of the main event/plot point.
-2. **entities**: Key information extracted:
-   - `who`: List of characters involved.
-   - `where`: Inferred location (e.g., "Apartment", "Train").
-   - `objects`: Key props mentioned (e.g., "Ticket", "Money").
-3. **emotional_arc**: The emotional transition:
-   - `start_mood`: Mood at the beginning.
-   - `end_mood`: Mood at the end.
-4. **plot_sequence**:
-   - `scene_type`: Type of scene (e.g., "Confrontation", "Romance", "Discovery").
-   - `key_action`: The main action taking place.
-
-**IMPORTANT RULES:**
- Output **ONLY** valid JSON.
- Do NOT include "Thinking Process" or markdown formatting.
- If information is unknown, use "Unknown".
- Context: This is from the movie "Charade" (1963).
-
-Dialogue:
-{context}
-"""
-
-
-def load_sample(start_index, count=20):
-    """Load a slice of dialogue to simulate a Parent Chunk"""
-    try:
-        with open(ASR_PATH, "r") as f:
-            data = json.load(f)
-
-        segments = data.get("segments", [])
-        selected = segments[start_index : start_index + count]
-        text = " ".join([s.get("text", "") for s in selected])
-        print(f"📂 Loaded Sample {start_index}: {len(selected)} segments.")
-        return text
-    except Exception as e:
-        return f"Error: {e}"
-
-
-def run_test(name, context_text):
-    print(f"\n🧪 Testing: {name}")
-    print("-" * 50)
-    print(f"📖 Input Preview: {context_text[:100]}...")
-
-    prompt = PARENT_SUMMARY_PROMPT.format(context=context_text)
-
-    try:
-        start = time.time()
-        response = ollama.chat(
-            model=MODEL, messages=[{"role": "user", "content": prompt}]
-        )
-        duration = time.time() - start
-
-        content = response["message"]["content"]
-
-        # Clean up thinking tags if present
-        if "```json" in content:
-            content = content.split("```json")[1].split("```")[0]
-        elif "Thinking..." in content:
-            # crude cleanup for demo
-            content = content.split("...")[-1]
-
-        # Attempt parse
-        try:
-            result = json.loads(content.strip())
-            print(f"✅ Success ({duration:.2f}s)")
-            print(json.dumps(result, indent=2))
-            return True
-        except json.JSONDecodeError:
-            print(f"⚠️ JSON Parse Failed ({duration:.2f}s)")
-            print(content[:500])
-            return False
-
-    except Exception as e:
-        print(f"❌ API Error: {e}")
-        return False
-
-
-def main():
-    print(f"🚀 Starting Parent Chunk Summary Tests on '{UUID}'")
-
-    # Test 1: Early Dialogue (Entities & Narrative Focus)
-    # "possessed a ticket of passage..."
-    txt1 = load_sample(start_index=10)
-    res1 = run_test("Test 1: Early Plot (Entities & Narrative)", txt1)
-
-    time.sleep(2)  # Cool down
-
-    # Test 2: Middle Conflict (Emotional Arc Focus)
-    # "where did he keep his money..." (From previous context)
-    txt2 = load_sample(start_index=50)
-    res2 = run_test("Test 2: Conflict (Emotional Arc)", txt2)
-
-    time.sleep(2)  # Cool down
-
-    # Test 3: Later Dialogue (Plot Sequence Focus)
-    # Looking for a scene involving a conclusion or death aftermath
-    # Let's pick a later section to test robustness
-    txt3 = load_sample(start_index=150)
-    res3 = run_test("Test 3: Late Plot (Sequence)", txt3)
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/test_parent_chunk_generation_v1.11.py
+++ b/scripts/test_parent_chunk_generation_v1.11.py
@@ -1 +0,0 @@
-../v1.1/scripts/test_parent_chunk_generation_v1.11.py
				`@@ -1 +0,0 @@`
				`../v1.1/scripts/face_mediapipe_test_v1.11.py`
				`@@ -1 +0,0 @@`
				`../v1.1/scripts/generate_parent_chunks_gemma4_v1.11.py`
				`@@ -1 +0,0 @@`
				`../v1.1/scripts/mediapipe_holistic_processor_v1.11.py`
				`@@ -1 +0,0 @@`
				`../v1.1/scripts/mediapipe_processor_v1.11.py`
				`@@ -1 +0,0 @@`
				`../v1.1/scripts/rebuild_story_content_v1.11.py`
				`@@ -1 +0,0 @@`
				`../v1.1/scripts/regenerate_parent_5w1h_v1.11.py`
				`@@ -1 +0,0 @@`
				`../v1.1/scripts/story_pipeline_full_v1.11.py`
				`@@ -1 +0,0 @@`
				`../v1.1/scripts/story_processor_contract_v1_v1.11.py`
				`@@ -1 +0,0 @@`
				`../v1.1/scripts/test_parent_chunk_generation_v1.11.py`