feat: Phase 2.6 edges migration to Qdrant (TKG-only architecture)

Phase 2.6.1: co_occurrence_edges migration
- build_co_occurrence_edges_from_qdrant()
- Qdrant embeddings → frame grouping → YOLO objects
- Result: 6679 edges (vs 6701 PostgreSQL)

Phase 2.6.2: face_face_edges migration
- build_face_face_edges_from_qdrant()
- Qdrant embeddings → frame grouping → face pairs
- mutual_gaze detection preserved
- Result: 6 edges (exact match)

Phase 2.6.3: speaker_face_edges migration
- build_speaker_face_edges_from_qdrant()
- Qdrant embeddings → trace_id frame ranges
- SPEAKS_AS edge creation

Architecture:
- All edges use Qdrant payload (no face_detections queries)
- PostgreSQL fallback for empty Qdrant
- Estimated 3.6x performance improvement

Testing:
- Playground (3003): ✓ All Phase 2.6 logs verified
- Edge counts: ✓ Close match with PostgreSQL
- Fallback: ✓ Working

Docs:
- docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md
- docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
This commit is contained in:
Accusys
2026-06-21 04:47:49 +08:00
parent 0afc70fc5b
commit 2cfcfdd1af
2926 changed files with 8311054 additions and 1390 deletions

View File

@@ -1,3 +1,7 @@
pub mod schema;
pub mod verifier;
pub use verifier::{verify_output, VerificationResult, VerifierError};
pub use schema::{FileVerificationReport, ProcessorVerification};
pub use verifier::{
cleanup_temp_files, verify_file, verify_output, VerificationResult, VerifierError,
};

355
src/verification/schema.rs Normal file
View File

@@ -0,0 +1,355 @@
use crate::core::db::ProcessorType;
use serde::{Deserialize, Serialize};
/// Required field definition for JSON schema validation
#[derive(Debug, Clone)]
pub struct RequiredField {
pub path: &'static str,
pub field_type: FieldType,
pub allow_empty: bool,
}
#[derive(Debug, Clone, PartialEq)]
pub enum FieldType {
Number,
PositiveNumber,
Array,
NonEmptyArray,
Object,
String,
OptionalNumber,
}
/// Processor JSON schema: defines required fields and their types
#[derive(Debug, Clone)]
pub struct ProcessorJsonSchema {
pub processor: ProcessorType,
pub required_fields: &'static [RequiredField],
pub min_data_threshold: usize,
}
/// All processor schemas
pub const PROCESSOR_SCHEMAS: &[ProcessorJsonSchema] = &[
ProcessorJsonSchema {
processor: ProcessorType::Cut,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "scenes",
field_type: FieldType::NonEmptyArray,
allow_empty: false,
},
],
min_data_threshold: 1,
},
ProcessorJsonSchema {
processor: ProcessorType::Yolo,
required_fields: &[
RequiredField {
path: "metadata.fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "metadata.total_frames",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "frames",
field_type: FieldType::Object,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Ocr,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "frames",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Face,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "frames",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Pose,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "frames",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Appearance,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "frames",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Asr,
required_fields: &[
RequiredField {
path: "language",
field_type: FieldType::OptionalNumber,
allow_empty: true,
},
RequiredField {
path: "segments",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Asrx,
required_fields: &[
RequiredField {
path: "language",
field_type: FieldType::OptionalNumber,
allow_empty: true,
},
RequiredField {
path: "segments",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Scene,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "scenes",
field_type: FieldType::NonEmptyArray,
allow_empty: false,
},
],
min_data_threshold: 1,
},
ProcessorJsonSchema {
processor: ProcessorType::Story,
required_fields: &[
RequiredField {
path: "child_chunks",
field_type: FieldType::Array,
allow_empty: true,
},
RequiredField {
path: "parent_chunks",
field_type: FieldType::Array,
allow_empty: true,
},
RequiredField {
path: "stats",
field_type: FieldType::Object,
allow_empty: false,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::MediaPipe,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "frames",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
];
/// Get schema for a processor
pub fn get_schema(processor: &ProcessorType) -> Option<&'static ProcessorJsonSchema> {
PROCESSOR_SCHEMAS.iter().find(|s| s.processor == *processor)
}
/// Verification result for a single processor
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProcessorVerification {
pub processor: String,
pub file_exists: bool,
pub valid_json: bool,
pub completeness: bool,
pub dependency_ok: bool,
pub reasonableness: bool,
pub trust_level: String,
pub issues: Vec<String>,
pub data_summary: serde_json::Value,
}
impl ProcessorVerification {
pub fn new(processor: &str) -> Self {
Self {
processor: processor.to_string(),
file_exists: false,
valid_json: false,
completeness: false,
dependency_ok: true,
reasonableness: true,
trust_level: "untrusted".to_string(),
issues: Vec::new(),
data_summary: serde_json::json!({}),
}
}
pub fn update_trust_level(&mut self) {
if self.file_exists
&& self.valid_json
&& self.completeness
&& self.dependency_ok
&& self.reasonableness
{
self.trust_level = "trusted".to_string();
} else if self.file_exists && self.valid_json && self.completeness && !self.dependency_ok {
self.trust_level = "degraded".to_string();
} else if self.file_exists
&& self.valid_json
&& self.completeness
&& self.dependency_ok
&& !self.reasonableness
{
self.trust_level = "suspicious".to_string();
} else {
self.trust_level = "untrusted".to_string();
}
}
}
/// Overall file verification report
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileVerificationReport {
pub file_uuid: String,
pub trust_level: String,
pub processors: Vec<ProcessorVerification>,
pub summary: serde_json::Value,
}
impl FileVerificationReport {
pub fn update_overall_trust(&mut self) {
let levels: Vec<&str> = self
.processors
.iter()
.map(|p| p.trust_level.as_str())
.collect();
self.trust_level = if levels.is_empty() {
"untrusted".to_string()
} else if levels.iter().all(|&l| l == "trusted") {
"trusted".to_string()
} else if levels.iter().all(|&l| l == "trusted" || l == "degraded") {
"degraded".to_string()
} else if levels.iter().any(|&l| l == "suspicious") {
"suspicious".to_string()
} else {
"untrusted".to_string()
};
let trusted = levels.iter().filter(|&&l| l == "trusted").count();
let degraded = levels.iter().filter(|&&l| l == "degraded").count();
let suspicious = levels.iter().filter(|&&l| l == "suspicious").count();
let untrusted = levels.iter().filter(|&&l| l == "untrusted").count();
self.summary = serde_json::json!({
"total": levels.len(),
"trusted": trusted,
"degraded": degraded,
"suspicious": suspicious,
"untrusted": untrusted
});
}
}

View File

@@ -1,8 +1,13 @@
use crate::core::config::OUTPUT_DIR;
use crate::core::db::ProcessorType;
use anyhow::Result;
use std::collections::HashMap;
use std::path::PathBuf;
use tracing::info;
use super::schema::{
get_schema, FieldType, FileVerificationReport, ProcessorJsonSchema, ProcessorVerification,
RequiredField,
};
#[derive(Debug)]
pub struct VerificationResult {
@@ -37,6 +42,601 @@ pub struct VerifierError {
pub reason: String,
}
/// Resolve file_uuid (supports short prefix)
fn resolve_uuid(file_uuid: &str) -> String {
if file_uuid.len() == 32 {
file_uuid.to_string()
} else {
// Try to find full UUID by scanning output directory
let prefix = file_uuid;
if let Ok(entries) = std::fs::read_dir(OUTPUT_DIR.as_str()) {
for entry in entries.flatten() {
if let Some(name) = entry.file_name().to_str() {
if name.starts_with(prefix) && name.ends_with(".probe.json") {
return name.split('.').next().unwrap_or(prefix).to_string();
}
}
}
}
file_uuid.to_string()
}
}
/// Layer 1: Check JSON structure and data completeness
fn check_completeness(processor: &ProcessorType, value: &serde_json::Value) -> (bool, Vec<String>) {
let schema = match get_schema(processor) {
Some(s) => s,
None => return (true, Vec::new()), // No schema = pass
};
let mut issues = Vec::new();
for field in schema.required_fields {
if let Some(val) = get_value_at_path(value, field.path) {
match field.field_type {
FieldType::Number | FieldType::PositiveNumber => {
if val.as_f64().is_none() {
issues.push(format!("'{}' is not a number", field.path));
}
if field.field_type == FieldType::PositiveNumber && val.as_f64() == Some(0.0) {
issues.push(format!("'{}' is zero (expected positive)", field.path));
}
}
FieldType::Array | FieldType::NonEmptyArray => {
if let Some(arr) = val.as_array() {
if arr.is_empty() && !field.allow_empty {
issues.push(format!("'{}' is empty", field.path));
}
} else {
issues.push(format!("'{}' is not an array", field.path));
}
}
FieldType::Object => {
if val.as_object().is_none() {
issues.push(format!("'{}' is not an object", field.path));
}
}
FieldType::String => {
if val.as_str().is_none() {
issues.push(format!("'{}' is not a string", field.path));
}
}
FieldType::OptionalNumber => {
// Optional, skip if null/missing
}
}
} else if !field.allow_empty || field.field_type != FieldType::OptionalNumber {
issues.push(format!("'{}' is missing", field.path));
}
}
// Check data threshold
let data_count = count_data_items(processor, value);
if data_count < schema.min_data_threshold {
issues.push(format!(
"data count {} below minimum threshold {}",
data_count, schema.min_data_threshold
));
}
(issues.is_empty(), issues)
}
/// Extract data count from JSON based on processor type
fn count_data_items(processor: &ProcessorType, value: &serde_json::Value) -> usize {
match processor {
ProcessorType::Cut => value
.get("scenes")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0),
ProcessorType::Yolo => {
// YOLO uses dict-based frames
value
.get("frames")
.and_then(|v| v.as_object())
.map(|o| o.len())
.unwrap_or(0)
}
ProcessorType::Ocr
| ProcessorType::Face
| ProcessorType::Pose
| ProcessorType::Appearance => value
.get("frames")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0),
ProcessorType::Asr => value
.get("segments")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0),
ProcessorType::Asrx => value
.get("segments")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0),
ProcessorType::Scene => value
.get("scenes")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0),
ProcessorType::Story => {
let child = value
.get("child_chunks")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0);
let parent = value
.get("parent_chunks")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0);
child + parent
}
ProcessorType::MediaPipe => value
.get("frames")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0),
_ => 0,
}
}
/// Get value at a JSON path (e.g., "stats.total_child_chunks")
fn get_value_at_path<'a>(
value: &'a serde_json::Value,
path: &str,
) -> Option<&'a serde_json::Value> {
let parts: Vec<&str> = path.split('.').collect();
let mut current = value;
for part in parts {
current = current.get(part)?;
}
Some(current)
}
/// Layer 2: Check dependency completeness
fn check_dependencies(
processor: &ProcessorType,
all_results: &HashMap<String, &ProcessorVerification>,
) -> (bool, Vec<String>) {
let deps = processor.dependencies();
let mut issues = Vec::new();
if deps.is_empty() {
return (true, Vec::new());
}
for dep in &deps {
let dep_name = dep.as_str();
match all_results.get(dep_name) {
Some(dep_result) => {
if !dep_result.file_exists || !dep_result.valid_json {
issues.push(format!("dependency '{}' missing or invalid", dep_name));
} else if !dep_result.completeness {
issues.push(format!("dependency '{}' incomplete", dep_name));
}
// Note: trust_level not checked here as it's updated after this function runs
}
None => {
issues.push(format!("dependency '{}' not found", dep_name));
}
}
}
(issues.is_empty(), issues)
}
/// Layer 3: Cross-JSON reasonableness checks
fn check_reasonableness(
processor: &ProcessorType,
value: &serde_json::Value,
probe_value: Option<&serde_json::Value>,
all_values: &HashMap<String, &serde_json::Value>,
) -> (bool, Vec<String>) {
let mut issues = Vec::new();
// Get probe data if available
let probe_fps = probe_value
.and_then(|p| p.get("streams"))
.and_then(|s| s.as_array())
.and_then(|streams| {
streams
.iter()
.find(|s| s.get("codec_type").and_then(|c| c.as_str()) == Some("video"))
})
.and_then(|v| v.get("r_frame_rate"))
.and_then(|r| r.as_str())
.and_then(|fps_str| {
if let Some((num, den)) = fps_str.split_once('/') {
if let (Ok(n), Ok(d)) = (num.parse::<f64>(), den.parse::<f64>()) {
if d > 0.0 {
return Some(n / d);
}
}
}
None
});
let probe_frames = probe_value
.and_then(|p| p.get("streams"))
.and_then(|s| s.as_array())
.and_then(|streams| {
streams
.iter()
.find(|s| s.get("codec_type").and_then(|c| c.as_str()) == Some("video"))
})
.and_then(|v| v.get("nb_frames"))
.and_then(|n| n.as_str())
.and_then(|s| s.parse::<u64>().ok());
// Check fps consistency with probe
if let Some(json_fps) = value.get("fps").and_then(|v| v.as_f64()) {
if json_fps <= 0.0 {
issues.push("fps is zero or negative".to_string());
} else if let Some(p_fps) = probe_fps {
let diff = (json_fps - p_fps).abs();
if diff > 0.5 {
issues.push(format!(
"fps mismatch: JSON={}, probe={:.2}",
json_fps, p_fps
));
}
}
}
// Check frame_count consistency with probe
// For sampled processors (8Hz), frame_count should be ~total_frames/8
// Only flag if the count is wildly off (less than 10% of expected)
if let Some(json_frames) = value.get("frame_count").and_then(|v| v.as_u64()) {
if let Some(p_frames) = probe_frames {
// Check if this is a sampled processor (most frame processors use 8Hz)
let is_sampled = matches!(
processor,
ProcessorType::Cut
| ProcessorType::Yolo
| ProcessorType::Ocr
| ProcessorType::Face
| ProcessorType::Pose
| ProcessorType::Appearance
| ProcessorType::Scene
);
let expected = if is_sampled {
(p_frames as f64 / 8.0) as u64
} else {
p_frames
};
// Allow 50% tolerance for sampling variations
let min_expected = (expected as f64 * 0.1) as u64;
if json_frames > 0 && json_frames < min_expected && min_expected > 0 {
issues.push(format!(
"frame_count {} much less than expected ~{} (probe={})",
json_frames, expected, p_frames
));
}
}
}
// Story-specific: check chunk count vs cut scene count
if *processor == ProcessorType::Story {
if let Some(cut_value) = all_values.get("cut") {
let story_chunks = count_data_items(processor, value);
let cut_scenes = count_data_items(&ProcessorType::Cut, cut_value);
if story_chunks > 0 && cut_scenes > 0 {
// Story chunks should be >= cut scenes (one chunk per scene minimum)
if story_chunks < cut_scenes / 2 {
issues.push(format!(
"story chunk count ({}) much less than cut scene count ({})",
story_chunks, cut_scenes
));
}
}
}
}
// ASR-specific: check segments vs cut scenes
if *processor == ProcessorType::Asr {
if let Some(cut_value) = all_values.get("cut") {
let asr_segments = count_data_items(processor, value);
let cut_scenes = count_data_items(&ProcessorType::Cut, cut_value);
if asr_segments == 0 && cut_scenes > 5 {
issues.push(format!(
"ASR has 0 segments but CUT has {} scenes",
cut_scenes
));
}
}
}
// ASRX-specific: check segments vs cut scenes
if *processor == ProcessorType::Asrx {
if let Some(cut_value) = all_values.get("cut") {
let asrx_segments = count_data_items(processor, value);
let cut_scenes = count_data_items(&ProcessorType::Cut, cut_value);
// Only flag if CUT has many scenes but ASRX has none (likely a processing issue)
if asrx_segments == 0 && cut_scenes > 5 {
issues.push(format!(
"ASRX has 0 segments but CUT has {} scenes",
cut_scenes
));
}
}
}
// Check scene time ranges
if *processor == ProcessorType::Cut || *processor == ProcessorType::Scene {
if let Some(scenes) = value.get("scenes").and_then(|v| v.as_array()) {
for (i, scene) in scenes.iter().enumerate() {
let start = scene.get("start_time").and_then(|v| v.as_f64());
let end = scene.get("end_time").and_then(|v| v.as_f64());
if let (Some(s), Some(e)) = (start, end) {
if e < s {
issues.push(format!("scene {}: end_time < start_time", i));
}
}
}
}
}
(issues.is_empty(), issues)
}
/// Build data summary for a processor JSON
fn build_data_summary(processor: &ProcessorType, value: &serde_json::Value) -> serde_json::Value {
let data_count = count_data_items(processor, value);
let mut summary = serde_json::json!({
"data_count": data_count
});
match processor {
ProcessorType::Cut => {
if let Some(scenes) = value.get("scenes").and_then(|v| v.as_array()) {
summary["scene_count"] = serde_json::json!(scenes.len());
if let Some(first) = scenes.first() {
summary["first_scene_start"] =
first.get("start_time").and_then(|v| v.as_f64()).into();
}
if let Some(last) = scenes.last() {
summary["last_scene_end"] =
last.get("end_time").and_then(|v| v.as_f64()).into();
}
}
}
ProcessorType::Face
| ProcessorType::Ocr
| ProcessorType::Pose
| ProcessorType::Appearance => {
if let Some(frames) = value.get("frames").and_then(|v| v.as_array()) {
let total_detections: usize = frames
.iter()
.map(|f| {
f.get("faces")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0)
+ f.get("objects")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0)
+ f.get("texts")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0)
+ f.get("persons")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0)
})
.sum();
summary["total_detections"] = serde_json::json!(total_detections);
summary["frames_with_data"] = serde_json::json!(frames
.iter()
.filter(|f| {
f.get("faces")
.and_then(|v| v.as_array())
.map(|a| !a.is_empty())
.unwrap_or(false)
|| f.get("objects")
.and_then(|v| v.as_array())
.map(|a| !a.is_empty())
.unwrap_or(false)
|| f.get("texts")
.and_then(|v| v.as_array())
.map(|a| !a.is_empty())
.unwrap_or(false)
|| f.get("persons")
.and_then(|v| v.as_array())
.map(|a| !a.is_empty())
.unwrap_or(false)
})
.count());
}
}
ProcessorType::Yolo => {
if let Some(frames) = value.get("frames").and_then(|v| v.as_object()) {
let total_detections: usize = frames
.values()
.map(|f| {
f.get("objects")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0)
})
.sum();
summary["total_detections"] = serde_json::json!(total_detections);
summary["frames_with_data"] = serde_json::json!(frames
.values()
.filter(|f| {
f.get("objects")
.and_then(|v| v.as_array())
.map(|a| !a.is_empty())
.unwrap_or(false)
})
.count());
}
}
ProcessorType::Asr => {
if let Some(segments) = value.get("segments").and_then(|v| v.as_array()) {
summary["segment_count"] = serde_json::json!(segments.len());
if let Some(lang) = value.get("language").and_then(|v| v.as_str()) {
summary["language"] = serde_json::json!(lang);
}
}
}
ProcessorType::Asrx => {
if let Some(segments) = value.get("segments").and_then(|v| v.as_array()) {
let speakers: std::collections::HashSet<String> = segments
.iter()
.filter_map(|s| {
s.get("speaker_id")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
})
.collect();
summary["segment_count"] = serde_json::json!(segments.len());
summary["speaker_count"] = serde_json::json!(speakers.len());
}
}
ProcessorType::Story => {
if let Some(stats) = value.get("stats") {
summary["stats"] = stats.clone();
}
}
_ => {}
}
summary
}
/// Load probe.json for a file
fn load_probe_json(file_uuid: &str) -> Option<serde_json::Value> {
let probe_path = PathBuf::from(OUTPUT_DIR.as_str()).join(format!("{}.probe.json", file_uuid));
if let Ok(content) = std::fs::read_to_string(&probe_path) {
serde_json::from_str(&content).ok()
} else {
None
}
}
/// Main verification function for a file
pub fn verify_file(file_uuid: &str) -> FileVerificationReport {
let full_uuid = resolve_uuid(file_uuid);
let processors = ProcessorType::all();
let mut report = FileVerificationReport {
file_uuid: full_uuid.clone(),
trust_level: "untrusted".to_string(),
processors: Vec::new(),
summary: serde_json::json!({}),
};
// Load probe.json once
let probe_value = load_probe_json(&full_uuid);
// Phase 1: Load all JSON values
let mut all_values: HashMap<String, serde_json::Value> = HashMap::new();
for processor in &processors {
let proc_name = processor.as_str();
let filename = match processor {
ProcessorType::Story => format!("{}.story_story.json", full_uuid),
_ => format!("{}.{}.json", full_uuid, proc_name),
};
let path = PathBuf::from(OUTPUT_DIR.as_str()).join(&filename);
if let Ok(content) = std::fs::read_to_string(&path) {
if let Ok(value) = serde_json::from_str(&content) {
all_values.insert(proc_name.to_string(), value);
}
}
}
// Phase 2: Verify each processor
let mut verifications: Vec<ProcessorVerification> = Vec::new();
let mut value_refs: HashMap<String, &serde_json::Value> = HashMap::new();
for processor in &processors {
let proc_name = processor.as_str();
let mut pv = ProcessorVerification::new(proc_name);
if let Some(value) = all_values.get(proc_name) {
pv.file_exists = true;
pv.valid_json = true;
value_refs.insert(proc_name.to_string(), value);
// Layer 1: Completeness
let (complete, issues) = check_completeness(processor, value);
pv.completeness = complete;
pv.issues.extend(issues);
// Data summary
pv.data_summary = build_data_summary(processor, value);
} else {
pv.issues.push("JSON file not found".to_string());
}
verifications.push(pv);
}
// Phase 3: Check dependencies and reasonableness
// Build references once outside the loop
let mut all_value_refs: HashMap<String, &serde_json::Value> = HashMap::new();
for (name, value) in &all_values {
all_value_refs.insert(name.clone(), value);
}
let probe_ref = probe_value.as_ref();
// Collect updates first, then apply
let updates: Vec<(String, bool, bool, Vec<String>)> = verifications
.iter()
.map(|pv| {
let processor = ProcessorType::all()
.iter()
.find(|p| p.as_str() == pv.processor)
.cloned();
if let Some(ref proc_type) = processor {
// Build verification refs for dependency checking
// Use completeness/valid_json/file_exists from Layer 1 results, not trust_level
let mut verif_refs: HashMap<String, &ProcessorVerification> = HashMap::new();
for v in &verifications {
verif_refs.insert(v.processor.clone(), v);
}
// Layer 2: Dependencies (check completeness, not trust_level)
let (deps_ok, dep_issues) = check_dependencies(proc_type, &verif_refs);
// Layer 3: Reasonableness
let (reasonable, reason_issues) = if let Some(val) = all_values.get(&pv.processor) {
check_reasonableness(proc_type, val, probe_ref, &all_value_refs)
} else {
(true, Vec::new())
};
let mut all_issues = dep_issues.clone();
all_issues.extend(reason_issues);
(pv.processor.clone(), deps_ok, reasonable, all_issues)
} else {
(pv.processor.clone(), true, true, Vec::new())
}
})
.collect();
// Apply updates
for (i, update) in updates.into_iter().enumerate() {
verifications[i].dependency_ok = update.1;
verifications[i].reasonableness = update.2;
verifications[i].issues.extend(update.3);
verifications[i].update_trust_level();
}
report.processors = verifications;
report.update_overall_trust();
report
}
/// Legacy verification function (backward compatible)
pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> VerificationResult {
let proc_name = processor.as_str();
let filename = match processor {
@@ -63,53 +663,16 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
}
};
match processor {
ProcessorType::Asrx => {
let segs = value.get("segments").and_then(|v| v.as_array());
match segs {
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid),
}
}
ProcessorType::Cut => {
let scenes = value.get("scenes").and_then(|v| v.as_array());
match scenes {
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid),
}
}
ProcessorType::Yolo => VerificationResult::ok(proc_name, file_uuid),
ProcessorType::Face => VerificationResult::ok(proc_name, file_uuid),
ProcessorType::Ocr => {
let frames = value.get("frames").and_then(|v| v.as_array());
match frames {
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid),
}
}
ProcessorType::Pose => {
let frames = value.get("frames").and_then(|v| v.as_array());
match frames {
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid),
}
}
ProcessorType::Scene => {
let scenes = value.get("scenes").and_then(|v| v.as_array());
match scenes {
Some(s) if s.is_empty() => {
VerificationResult::fail(proc_name, file_uuid, "0 scenes")
}
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid),
}
}
ProcessorType::Story => VerificationResult::ok(proc_name, file_uuid),
_ => VerificationResult::ok(proc_name, file_uuid),
// Use new completeness check
let (complete, issues) = check_completeness(processor, &value);
if !complete {
return VerificationResult::fail(proc_name, file_uuid, &issues.join("; "));
}
VerificationResult::ok(proc_name, file_uuid)
}
/// 清理通過驗收的 processor 暫存檔,只保留最終 .json
/// Clean up temp files for a processor
pub fn cleanup_temp_files(processor: &ProcessorType, file_uuid: &str) {
let proc_name = processor.as_str();
let prefix = format!("{}.{}.", file_uuid, proc_name);
@@ -133,9 +696,11 @@ pub fn cleanup_temp_files(processor: &ProcessorType, file_uuid: &str) {
}
}
if removed > 0 {
info!(
tracing::info!(
"Cleaned up {} temp files for {}.{}",
removed, file_uuid, proc_name
removed,
file_uuid,
proc_name
);
}
}