fix: face_detections INSERT in pipeline, add dependency graph doc
This commit is contained in:
@@ -4,6 +4,52 @@
|
||||
|
||||
## Pipeline
|
||||
|
||||
### Dependency Graph
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Processors["10 Processors"]
|
||||
Cut[Cut] --> ASR[ASR]
|
||||
ASR --> ASRX[ASRX]
|
||||
ASRX --> Story[Story]
|
||||
Cut --> Story
|
||||
YOLO[YOLO] --> VisualChunk[VisualChunk]
|
||||
VisualChunk --> Story
|
||||
Face[Face] --> Story
|
||||
Story --> FiveW1H[5W1H]
|
||||
OCR[OCR]
|
||||
Pose[Pose]
|
||||
end
|
||||
|
||||
subgraph Ingestion["入庫 (Post-Processing)"]
|
||||
ASR --> Rule1[Rule 1 Sentence]
|
||||
ASRX --> Rule1
|
||||
Rule1 --> Vectorize[Auto-Vectorize]
|
||||
Rule1 --> Phase1[Phase 1 Pack]
|
||||
|
||||
Cut --> Rule3[Rule 3 Scene]
|
||||
ASR --> Rule3
|
||||
|
||||
Face --> Trace[Face Trace]
|
||||
Trace --> Qdrant[Qdrant Sync]
|
||||
Trace --> TraceChunks[Trace Chunks]
|
||||
Trace --> TKG[TKG Builder]
|
||||
|
||||
Face --> TMDbMatch[TMDb Match]
|
||||
Face --> SceneMeta[Scene Metadata]
|
||||
YOLO --> SceneMeta
|
||||
Face --> IdentityAgent[Identity Agent]
|
||||
ASRX --> IdentityAgent
|
||||
|
||||
Cut --> Agent5W1H[5W1H Agent]
|
||||
ASR --> Agent5W1H
|
||||
Agent5W1H --> Phase2[Phase 2 Pack]
|
||||
end
|
||||
|
||||
style Processors fill:#1a1a2e,stroke:#e94560
|
||||
style Ingestion fill:#16213e,stroke:#0f3460
|
||||
```
|
||||
|
||||
### 10 Processor Stages
|
||||
|
||||
| # | Processor | Depends On | Description |
|
||||
@@ -16,7 +62,7 @@
|
||||
| 6 | `Face` | — | Face detection + recognition (InsightFace + CoreML) |
|
||||
| 7 | `Pose` | — | Pose estimation |
|
||||
| 8 | `VisualChunk` | YOLO | Visual object chunking |
|
||||
| 9 | `Story` | ASRX + Cut | Narrative scene summarization (LLM, with embedding) |
|
||||
| 9 | `Story` | ASRX + Cut + YOLO + Face | Narrative scene summarization (LLM, with embedding) |
|
||||
| 10 | `5W1H` | Story | Who/What/When/Where/Why extraction (LLM, with embedding) |
|
||||
|
||||
### Post-Processing (入庫)
|
||||
@@ -27,16 +73,17 @@ After all 10 processors complete, the pipeline runs the following storage & enri
|
||||
|---|------|----------|----------|
|
||||
| 1 | **Rule 1 Sentence Chunking** | ASR + ASRX | `chunk` table, `chunk_type = 'sentence'` |
|
||||
| 2 | **Auto-Vectorize** | Rule 1 | `chunk.embedding` IS NOT NULL (pgvector) |
|
||||
| 3 | **Rule 3 Scene Chunking** | Cut + ASR | `chunk` table, `chunk_type = 'cut'` |
|
||||
| 4 | **Face Trace + DB Store** | Face | `face_detections.trace_id` IS NOT NULL |
|
||||
| 5 | **Qdrant Face Sync** | Face Trace | Qdrant collection (face embeddings) |
|
||||
| 6 | **Trace Chunks** | Face Trace | `chunk` table, `chunk_type = 'trace'` |
|
||||
| 7 | **TKG Builder** | Face Trace | `tkg_nodes` + `tkg_edges` tables |
|
||||
| 8 | **TMDb Face Matching** | Face + TMDb enabled | `face_detections.identity_id` IS NOT NULL |
|
||||
| 9 | **Heuristic Scene Metadata** | Face + YOLO | `{file_uuid}.scene_meta.json` on disk |
|
||||
| 10 | **Identity Agent** | Face + ASRX | `identities` with `source = 'identity_agent'` |
|
||||
| 11 | **5W1H Agent** | Cut + ASR | `chunk.summary_text` IS NOT NULL (chunk_type = 'cut') |
|
||||
| 12 | **Release Pack** | 5W1H Agent | `release_pack.py --phase 2` output |
|
||||
| 3 | **Phase 1 Pack** | Rule 1 | `release_pack.py --phase 1` |
|
||||
| 4 | **Rule 3 Scene Chunking** | Cut + ASR | `chunk` table, `chunk_type = 'cut'` |
|
||||
| 5 | **Face Trace** | Face | `face_detections.trace_id` IS NOT NULL |
|
||||
| 6 | **Qdrant Face Sync** | Face Trace | Qdrant face_embedding collection |
|
||||
| 7 | **Trace Chunks** | Face Trace | `chunk` table, `chunk_type = 'trace'` |
|
||||
| 8 | **TKG Builder** | Face Trace | `tkg_nodes` + `tkg_edges` tables |
|
||||
| 9 | **TMDb Face Matching** | Face + TMDb enabled | `face_detections.identity_id` IS NOT NULL |
|
||||
| 10 | **Heuristic Scene Metadata** | Face + YOLO | `{file_uuid}.scene_meta.json` on disk |
|
||||
| 11 | **Identity Agent** | Face + ASRX | `identities` with `source = 'identity_agent'` |
|
||||
| 12 | **5W1H Agent** | Cut + ASR | `chunk.summary_text` IS NOT NULL (chunk_type = 'cut') |
|
||||
| 13 | **Release Pack** | 5W1H Agent | `release_pack.py --phase 2` output |
|
||||
|
||||
### Ingestion Status
|
||||
|
||||
|
||||
@@ -30,6 +30,50 @@ a { color: #0066cc; }
|
||||
<!-- depends: 01_auth -->
|
||||
|
||||
<h2>Pipeline</h2>
|
||||
<h3>Dependency Graph</h3>
|
||||
<div class="codehilite"><pre><span></span><code><span class="n">flowchart</span><span class="w"> </span><span class="n">TB</span>
|
||||
<span class="w"> </span><span class="n">subgraph</span><span class="w"> </span><span class="n">Processors</span><span class="p">[</span><span class="s">"10 Processors"</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">Cut</span><span class="p">[</span><span class="n">Cut</span><span class="p">]</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">ASR</span><span class="p">[</span><span class="n">ASR</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">ASR</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">ASRX</span><span class="p">[</span><span class="n">ASRX</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">ASRX</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Story</span><span class="p">[</span><span class="n">Story</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">Cut</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Story</span>
|
||||
<span class="w"> </span><span class="n">YOLO</span><span class="p">[</span><span class="n">YOLO</span><span class="p">]</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">VisualChunk</span><span class="p">[</span><span class="n">VisualChunk</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">VisualChunk</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Story</span>
|
||||
<span class="w"> </span><span class="n">Face</span><span class="p">[</span><span class="n">Face</span><span class="p">]</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Story</span>
|
||||
<span class="w"> </span><span class="n">Story</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">FiveW1H</span><span class="p">[</span><span class="mi">5</span><span class="n">W1H</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">OCR</span><span class="p">[</span><span class="n">OCR</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">Pose</span><span class="p">[</span><span class="n">Pose</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">end</span>
|
||||
|
||||
<span class="w"> </span><span class="n">subgraph</span><span class="w"> </span><span class="n">Ingestion</span><span class="p">[</span><span class="s">"入庫 (Post-Processing)"</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">ASR</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Rule1</span><span class="p">[</span><span class="n">Rule</span><span class="w"> </span><span class="mi">1</span><span class="w"> </span><span class="n">Sentence</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">ASRX</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Rule1</span>
|
||||
<span class="w"> </span><span class="n">Rule1</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Vectorize</span><span class="p">[</span><span class="n">Auto</span><span class="o">-</span><span class="n">Vectorize</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">Rule1</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Phase1</span><span class="p">[</span><span class="n">Phase</span><span class="w"> </span><span class="mi">1</span><span class="w"> </span><span class="n">Pack</span><span class="p">]</span>
|
||||
|
||||
<span class="w"> </span><span class="n">Cut</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Rule3</span><span class="p">[</span><span class="n">Rule</span><span class="w"> </span><span class="mi">3</span><span class="w"> </span><span class="n">Scene</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">ASR</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Rule3</span>
|
||||
|
||||
<span class="w"> </span><span class="n">Face</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Trace</span><span class="p">[</span><span class="n">Face</span><span class="w"> </span><span class="n">Trace</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">Trace</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Qdrant</span><span class="p">[</span><span class="n">Qdrant</span><span class="w"> </span><span class="n">Sync</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">Trace</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">TraceChunks</span><span class="p">[</span><span class="n">Trace</span><span class="w"> </span><span class="n">Chunks</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">Trace</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">TKG</span><span class="p">[</span><span class="n">TKG</span><span class="w"> </span><span class="n">Builder</span><span class="p">]</span>
|
||||
|
||||
<span class="w"> </span><span class="n">Face</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">TMDbMatch</span><span class="p">[</span><span class="n">TMDb</span><span class="w"> </span><span class="n">Match</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">Face</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">SceneMeta</span><span class="p">[</span><span class="n">Scene</span><span class="w"> </span><span class="n">Metadata</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">YOLO</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">SceneMeta</span>
|
||||
<span class="w"> </span><span class="n">Face</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">IdentityAgent</span><span class="p">[</span><span class="n">Identity</span><span class="w"> </span><span class="n">Agent</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">ASRX</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">IdentityAgent</span>
|
||||
|
||||
<span class="w"> </span><span class="n">Cut</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Agent5W1H</span><span class="p">[</span><span class="mi">5</span><span class="n">W1H</span><span class="w"> </span><span class="n">Agent</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">ASR</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Agent5W1H</span>
|
||||
<span class="w"> </span><span class="n">Agent5W1H</span><span class="w"> </span><span class="o">--></span><span class="w"> </span><span class="n">Phase2</span><span class="p">[</span><span class="n">Phase</span><span class="w"> </span><span class="mi">2</span><span class="w"> </span><span class="n">Pack</span><span class="p">]</span>
|
||||
<span class="w"> </span><span class="n">end</span>
|
||||
|
||||
<span class="w"> </span><span class="n">style</span><span class="w"> </span><span class="n">Processors</span><span class="w"> </span><span class="n">fill</span><span class="o">:</span><span class="err">#</span><span class="mi">1</span><span class="n">a1a2e</span><span class="p">,</span><span class="n">stroke</span><span class="o">:</span><span class="err">#</span><span class="n">e94560</span>
|
||||
<span class="w"> </span><span class="n">style</span><span class="w"> </span><span class="n">Ingestion</span><span class="w"> </span><span class="n">fill</span><span class="o">:</span><span class="err">#</span><span class="mi">16213</span><span class="n">e</span><span class="p">,</span><span class="n">stroke</span><span class="o">:</span><span class="err">#</span><span class="mf">0f</span><span class="mi">3460</span>
|
||||
</code></pre></div>
|
||||
|
||||
<h3>10 Processor Stages</h3>
|
||||
<table class="table">
|
||||
<thead>
|
||||
@@ -92,7 +136,7 @@ a { color: #0066cc; }
|
||||
<tr>
|
||||
<td>9</td>
|
||||
<td><code>Story</code></td>
|
||||
<td>ASRX + Cut</td>
|
||||
<td>ASRX + Cut + YOLO + Face</td>
|
||||
<td>Narrative scene summarization (LLM, with embedding)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
@@ -129,60 +173,66 @@ a { color: #0066cc; }
|
||||
</tr>
|
||||
<tr>
|
||||
<td>3</td>
|
||||
<td><strong>Phase 1 Pack</strong></td>
|
||||
<td>Rule 1</td>
|
||||
<td><code>release_pack.py --phase 1</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>4</td>
|
||||
<td><strong>Rule 3 Scene Chunking</strong></td>
|
||||
<td>Cut + ASR</td>
|
||||
<td><code>chunk</code> table, <code>chunk_type = 'cut'</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>4</td>
|
||||
<td><strong>Face Trace + DB Store</strong></td>
|
||||
<td>5</td>
|
||||
<td><strong>Face Trace</strong></td>
|
||||
<td>Face</td>
|
||||
<td><code>face_detections.trace_id</code> IS NOT NULL</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>5</td>
|
||||
<td>6</td>
|
||||
<td><strong>Qdrant Face Sync</strong></td>
|
||||
<td>Face Trace</td>
|
||||
<td>Qdrant collection (face embeddings)</td>
|
||||
<td>Qdrant face_embedding collection</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>6</td>
|
||||
<td>7</td>
|
||||
<td><strong>Trace Chunks</strong></td>
|
||||
<td>Face Trace</td>
|
||||
<td><code>chunk</code> table, <code>chunk_type = 'trace'</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>7</td>
|
||||
<td>8</td>
|
||||
<td><strong>TKG Builder</strong></td>
|
||||
<td>Face Trace</td>
|
||||
<td><code>tkg_nodes</code> + <code>tkg_edges</code> tables</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>8</td>
|
||||
<td>9</td>
|
||||
<td><strong>TMDb Face Matching</strong></td>
|
||||
<td>Face + TMDb enabled</td>
|
||||
<td><code>face_detections.identity_id</code> IS NOT NULL</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>9</td>
|
||||
<td>10</td>
|
||||
<td><strong>Heuristic Scene Metadata</strong></td>
|
||||
<td>Face + YOLO</td>
|
||||
<td><code>{file_uuid}.scene_meta.json</code> on disk</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>10</td>
|
||||
<td>11</td>
|
||||
<td><strong>Identity Agent</strong></td>
|
||||
<td>Face + ASRX</td>
|
||||
<td><code>identities</code> with <code>source = 'identity_agent'</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>11</td>
|
||||
<td>12</td>
|
||||
<td><strong>5W1H Agent</strong></td>
|
||||
<td>Cut + ASR</td>
|
||||
<td><code>chunk.summary_text</code> IS NOT NULL (chunk_type = 'cut')</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>12</td>
|
||||
<td>13</td>
|
||||
<td><strong>Release Pack</strong></td>
|
||||
<td>5W1H Agent</td>
|
||||
<td><code>release_pack.py --phase 2</code> output</td>
|
||||
|
||||
@@ -2193,6 +2193,21 @@ impl PostgresDb {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn store_face_detections_batch(
|
||||
&self, uuid: &str, detections: &[(i64, f64, i32, i32, i32, i32, f32)]
|
||||
) -> Result<()> {
|
||||
let table = schema::table_name("face_detections");
|
||||
for (frame, ts, x, y, w, h, conf) in detections {
|
||||
sqlx::query(&format!(
|
||||
"INSERT INTO {} (file_uuid, frame_number, timestamp_secs, x, y, width, height, confidence) \
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8) ON CONFLICT DO NOTHING", table
|
||||
))
|
||||
.bind(uuid).bind(frame).bind(ts).bind(x).bind(y).bind(w).bind(h).bind(conf)
|
||||
.execute(&self.pool).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn store_scene_pre_chunks_batch(&self, uuid: &str, scenes: &[(i64, i64, i64, f64, f64, serde_json::Value)]) -> Result<()> {
|
||||
let table = schema::table_name("pre_chunks");
|
||||
for (_i, _sf, _ef, start, end, data) in scenes {
|
||||
|
||||
+14
-1
@@ -887,12 +887,14 @@ impl ProcessorPool {
|
||||
) -> Result<()> {
|
||||
let frames_count = face_result.frames.len();
|
||||
tracing::info!(
|
||||
"Storing {} Face pre-chunks for video {}",
|
||||
"Storing {} Face pre-chunks + {} detections for video {}",
|
||||
frames_count,
|
||||
face_result.frames.iter().map(|f| f.faces.len()).sum::<usize>(),
|
||||
uuid
|
||||
);
|
||||
|
||||
let mut pre_chunks_to_store = Vec::new();
|
||||
let mut detections_to_store = Vec::new();
|
||||
|
||||
for frame in face_result.frames.iter() {
|
||||
let data = serde_json::json!({
|
||||
@@ -901,10 +903,21 @@ impl ProcessorPool {
|
||||
});
|
||||
|
||||
pre_chunks_to_store.push((frame.frame as i64, Some(frame.timestamp), data, None, None));
|
||||
|
||||
for face in frame.faces.iter() {
|
||||
detections_to_store.push((
|
||||
frame.frame as i64,
|
||||
frame.timestamp,
|
||||
face.x, face.y, face.width, face.height,
|
||||
face.confidence,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
db.store_raw_pre_chunks_batch(uuid, "face", &pre_chunks_to_store)
|
||||
.await?;
|
||||
db.store_face_detections_batch(uuid, &detections_to_store)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user