- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
452 lines
14 KiB
Rust
452 lines
14 KiB
Rust
//! 視覺分片概念驗證測試
|
||
//!
|
||
//! 此測試驗證視覺分片的數據結構和基本功能
|
||
|
||
/// 視覺分片類型
|
||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||
pub enum ChunkType {
|
||
TimeBased,
|
||
Sentence,
|
||
Cut,
|
||
Trace,
|
||
Story,
|
||
Visual, // 視覺分片 (Phase 2.1)
|
||
}
|
||
|
||
impl ChunkType {
|
||
pub fn as_str(&self) -> &'static str {
|
||
match self {
|
||
ChunkType::TimeBased => "time",
|
||
ChunkType::Sentence => "sentence",
|
||
ChunkType::Cut => "cut",
|
||
ChunkType::Trace => "trace",
|
||
ChunkType::Story => "story",
|
||
ChunkType::Visual => "visual",
|
||
}
|
||
}
|
||
}
|
||
|
||
/// 檢測到的物件
|
||
#[derive(Debug, Clone)]
|
||
pub struct DetectedObject {
|
||
/// 物件類別名稱
|
||
pub class_name: String,
|
||
/// 物件類別 ID
|
||
pub class_id: u32,
|
||
/// 信心值 (0.0-1.0)
|
||
pub confidence: f32,
|
||
/// 邊界框 (x, y, width, height)
|
||
pub bbox: Option<(i32, i32, i32, i32)>,
|
||
}
|
||
|
||
/// 關鍵幀的物件列表
|
||
#[derive(Debug, Clone)]
|
||
pub struct KeyframeObjects {
|
||
/// 關鍵幀時間 (秒)
|
||
pub timestamp: f64,
|
||
/// 關鍵幀幀號
|
||
pub frame_number: u64,
|
||
/// 檢測到的物件
|
||
pub objects: Vec<DetectedObject>,
|
||
}
|
||
|
||
/// 視覺分片內容
|
||
#[derive(Debug, Clone)]
|
||
pub struct VisualChunkContent {
|
||
pub start_time: f64,
|
||
pub end_time: f64,
|
||
pub keyframe_objects: Vec<KeyframeObjects>,
|
||
pub dominant_objects: Vec<String>,
|
||
pub scene_description: Option<String>,
|
||
pub metadata: VisualMetadata,
|
||
}
|
||
|
||
/// 視覺元數據
|
||
#[derive(Debug, Clone)]
|
||
pub struct VisualMetadata {
|
||
pub object_count: u32,
|
||
pub unique_classes: Vec<String>,
|
||
pub max_confidence: f32,
|
||
pub avg_confidence: f32,
|
||
pub spatial_density: f32, // objects per frame
|
||
}
|
||
|
||
impl VisualChunkContent {
|
||
/// 獲取視覺分片的摘要
|
||
pub fn summary(&self) -> String {
|
||
let duration = self.end_time - self.start_time;
|
||
let frame_count = self.keyframe_objects.len();
|
||
|
||
format!(
|
||
"視覺分片: {:.1}s 到 {:.1}s (持續時間: {:.1}s, {} 幀). 物件: {} 個總計, {} 個唯一. 主要物件: {}",
|
||
self.start_time,
|
||
self.end_time,
|
||
duration,
|
||
frame_count,
|
||
self.metadata.object_count,
|
||
self.metadata.unique_classes.len(),
|
||
if self.dominant_objects.is_empty() {
|
||
"無".to_string()
|
||
} else {
|
||
self.dominant_objects.join(", ")
|
||
}
|
||
)
|
||
}
|
||
|
||
/// 檢查是否包含特定物件類別
|
||
pub fn contains_object(&self, class_name: &str) -> bool {
|
||
self.keyframe_objects
|
||
.iter()
|
||
.any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
|
||
}
|
||
}
|
||
|
||
/// 模擬 YOLO 結果
|
||
#[derive(Debug, Clone)]
|
||
pub struct MockYoloResult {
|
||
pub frames: Vec<MockYoloFrame>,
|
||
}
|
||
|
||
#[derive(Debug, Clone)]
|
||
pub struct MockYoloFrame {
|
||
pub frame: u64,
|
||
pub timestamp: f64,
|
||
pub objects: Vec<MockYoloObject>,
|
||
}
|
||
|
||
#[derive(Debug, Clone)]
|
||
pub struct MockYoloObject {
|
||
pub class_name: String,
|
||
pub class_id: u32,
|
||
pub x: i32,
|
||
pub y: i32,
|
||
pub width: i32,
|
||
pub height: i32,
|
||
pub confidence: f32,
|
||
}
|
||
|
||
impl MockYoloResult {
|
||
/// 從模擬 YOLO 結果創建視覺分片
|
||
pub fn to_visual_chunk(&self, start_frame: u64, end_frame: u64) -> Option<VisualChunkContent> {
|
||
let frames: Vec<_> = self
|
||
.frames
|
||
.iter()
|
||
.filter(|f| f.frame >= start_frame && f.frame <= end_frame)
|
||
.collect();
|
||
|
||
if frames.is_empty() {
|
||
return None;
|
||
}
|
||
|
||
// 轉換幀為關鍵幀物件
|
||
let keyframe_objects: Vec<KeyframeObjects> = frames
|
||
.iter()
|
||
.map(|frame| {
|
||
let objects: Vec<DetectedObject> = frame
|
||
.objects
|
||
.iter()
|
||
.map(|obj| DetectedObject {
|
||
class_name: obj.class_name.clone(),
|
||
class_id: obj.class_id,
|
||
confidence: obj.confidence,
|
||
bbox: Some((obj.x, obj.y, obj.width, obj.height)),
|
||
})
|
||
.collect();
|
||
KeyframeObjects {
|
||
timestamp: frame.timestamp,
|
||
frame_number: frame.frame,
|
||
objects,
|
||
}
|
||
})
|
||
.collect();
|
||
|
||
// 計算元數據
|
||
let total_objects: u32 = frames.iter().map(|f| f.objects.len() as u32).sum();
|
||
let all_classes: Vec<String> = frames
|
||
.iter()
|
||
.flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
|
||
.collect();
|
||
let unique_classes: Vec<String> = all_classes
|
||
.iter()
|
||
.cloned()
|
||
.collect::<std::collections::HashSet<_>>()
|
||
.into_iter()
|
||
.collect();
|
||
let confidences: Vec<f32> = frames
|
||
.iter()
|
||
.flat_map(|f| f.objects.iter().map(|o| o.confidence))
|
||
.collect();
|
||
let max_confidence = confidences.iter().copied().fold(0.0f32, f32::max);
|
||
let avg_confidence = if !confidences.is_empty() {
|
||
confidences.iter().sum::<f32>() / confidences.len() as f32
|
||
} else {
|
||
0.0
|
||
};
|
||
|
||
let start_time = frames.first().map(|f| f.timestamp).unwrap_or(0.0);
|
||
let end_time = frames.last().map(|f| f.timestamp).unwrap_or(0.0);
|
||
|
||
// 查找主要物件(出現在大多數幀中的物件)
|
||
let mut object_counts = std::collections::HashMap::new();
|
||
for frame in &frames {
|
||
let frame_classes: std::collections::HashSet<_> =
|
||
frame.objects.iter().map(|o| o.class_name.clone()).collect();
|
||
for class in frame_classes {
|
||
*object_counts.entry(class).or_insert(0) += 1;
|
||
}
|
||
}
|
||
|
||
let mut dominant_objects: Vec<String> = object_counts
|
||
.into_iter()
|
||
.filter(|(_, count)| *count as f32 / frames.len() as f32 > 0.5) // 出現在 >50% 的幀中
|
||
.map(|(class, _)| class)
|
||
.collect();
|
||
dominant_objects.sort();
|
||
|
||
Some(VisualChunkContent {
|
||
start_time,
|
||
end_time,
|
||
keyframe_objects,
|
||
dominant_objects,
|
||
scene_description: None, // 可由 LLM 後期生成
|
||
metadata: VisualMetadata {
|
||
object_count: total_objects,
|
||
unique_classes,
|
||
max_confidence,
|
||
avg_confidence,
|
||
spatial_density: if frames.len() > 0 {
|
||
total_objects as f32 / frames.len() as f32
|
||
} else {
|
||
0.0
|
||
},
|
||
},
|
||
})
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn test_chunk_type_visual() {
|
||
let chunk_type = ChunkType::Visual;
|
||
assert_eq!(chunk_type.as_str(), "visual");
|
||
assert_eq!(chunk_type, ChunkType::Visual);
|
||
}
|
||
|
||
#[test]
|
||
fn test_visual_chunk_creation() {
|
||
// 創建模擬 YOLO 結果
|
||
let yolo_result = MockYoloResult {
|
||
frames: vec![
|
||
MockYoloFrame {
|
||
frame: 0,
|
||
timestamp: 0.0,
|
||
objects: vec![
|
||
MockYoloObject {
|
||
class_name: "person".to_string(),
|
||
class_id: 0,
|
||
x: 100,
|
||
y: 200,
|
||
width: 50,
|
||
height: 100,
|
||
confidence: 0.95,
|
||
},
|
||
MockYoloObject {
|
||
class_name: "car".to_string(),
|
||
class_id: 2,
|
||
x: 300,
|
||
y: 150,
|
||
width: 80,
|
||
height: 60,
|
||
confidence: 0.87,
|
||
},
|
||
],
|
||
},
|
||
MockYoloFrame {
|
||
frame: 1,
|
||
timestamp: 0.033, // 1/30 秒
|
||
objects: vec![MockYoloObject {
|
||
class_name: "person".to_string(),
|
||
class_id: 0,
|
||
x: 110,
|
||
y: 210,
|
||
width: 52,
|
||
height: 102,
|
||
confidence: 0.92,
|
||
}],
|
||
},
|
||
],
|
||
};
|
||
|
||
// 從 YOLO 結果創建視覺分片
|
||
let chunk = yolo_result.to_visual_chunk(0, 1).unwrap();
|
||
|
||
// 驗證分片屬性
|
||
assert_eq!(chunk.start_time, 0.0);
|
||
assert_eq!(chunk.end_time, 0.033);
|
||
assert_eq!(chunk.metadata.object_count, 3);
|
||
assert_eq!(chunk.metadata.unique_classes.len(), 2);
|
||
assert!(chunk
|
||
.metadata
|
||
.unique_classes
|
||
.contains(&"person".to_string()));
|
||
assert!(chunk.metadata.unique_classes.contains(&"car".to_string()));
|
||
assert_eq!(chunk.dominant_objects, vec!["person"]);
|
||
assert_eq!(chunk.keyframe_objects.len(), 2);
|
||
}
|
||
|
||
#[test]
|
||
fn test_visual_chunk_content_methods() {
|
||
let content = VisualChunkContent {
|
||
start_time: 0.0,
|
||
end_time: 5.0,
|
||
keyframe_objects: vec![KeyframeObjects {
|
||
timestamp: 0.0,
|
||
frame_number: 0,
|
||
objects: vec![
|
||
DetectedObject {
|
||
class_name: "person".to_string(),
|
||
class_id: 0,
|
||
confidence: 0.95,
|
||
bbox: Some((100, 200, 50, 100)),
|
||
},
|
||
DetectedObject {
|
||
class_name: "car".to_string(),
|
||
class_id: 2,
|
||
confidence: 0.87,
|
||
bbox: Some((300, 150, 80, 60)),
|
||
},
|
||
],
|
||
}],
|
||
dominant_objects: vec!["person".to_string()],
|
||
scene_description: Some("一個人站在車旁".to_string()),
|
||
metadata: VisualMetadata {
|
||
object_count: 2,
|
||
unique_classes: vec!["person".to_string(), "car".to_string()],
|
||
max_confidence: 0.95,
|
||
avg_confidence: 0.91,
|
||
spatial_density: 2.0,
|
||
},
|
||
};
|
||
|
||
// 測試摘要方法
|
||
let summary = content.summary();
|
||
assert!(summary.contains("視覺分片"));
|
||
assert!(summary.contains("person"));
|
||
assert!(summary.contains("車"));
|
||
|
||
// 測試 contains_object 方法
|
||
assert!(content.contains_object("person"));
|
||
assert!(content.contains_object("car"));
|
||
assert!(!content.contains_object("dog"));
|
||
}
|
||
|
||
#[test]
|
||
fn test_frame_similarity_concept() {
|
||
// 測試幀相似度計算概念
|
||
let frame1_objects = vec![
|
||
DetectedObject {
|
||
class_name: "person".to_string(),
|
||
class_id: 0,
|
||
confidence: 0.95,
|
||
bbox: Some((100, 200, 50, 100)),
|
||
},
|
||
DetectedObject {
|
||
class_name: "car".to_string(),
|
||
class_id: 2,
|
||
confidence: 0.87,
|
||
bbox: Some((300, 150, 80, 60)),
|
||
},
|
||
];
|
||
|
||
let frame2_objects = vec![
|
||
DetectedObject {
|
||
class_name: "person".to_string(),
|
||
class_id: 0,
|
||
confidence: 0.92,
|
||
bbox: Some((110, 210, 52, 102)),
|
||
},
|
||
DetectedObject {
|
||
class_name: "car".to_string(),
|
||
class_id: 2,
|
||
confidence: 0.85,
|
||
bbox: Some((310, 155, 82, 62)),
|
||
},
|
||
];
|
||
|
||
// 創建集合
|
||
let set1: std::collections::HashSet<String> = frame1_objects
|
||
.iter()
|
||
.map(|o| o.class_name.clone())
|
||
.collect();
|
||
let set2: std::collections::HashSet<String> = frame2_objects
|
||
.iter()
|
||
.map(|o| o.class_name.clone())
|
||
.collect();
|
||
|
||
// 計算交集和聯集
|
||
let intersection: Vec<_> = set1.intersection(&set2).collect();
|
||
let union: Vec<_> = set1.union(&set2).collect();
|
||
|
||
// 驗證相似度
|
||
assert_eq!(intersection.len(), 2); // person, car
|
||
assert_eq!(union.len(), 2); // person, car
|
||
assert_eq!(intersection.len() as f32 / union.len() as f32, 1.0); // 完全相似
|
||
}
|
||
|
||
#[test]
|
||
fn test_dominant_objects_detection() {
|
||
let yolo_result = MockYoloResult {
|
||
frames: vec![
|
||
MockYoloFrame {
|
||
frame: 0,
|
||
timestamp: 0.0,
|
||
objects: vec![MockYoloObject {
|
||
class_name: "person".to_string(),
|
||
class_id: 0,
|
||
x: 100,
|
||
y: 200,
|
||
width: 50,
|
||
height: 100,
|
||
confidence: 0.95,
|
||
}],
|
||
},
|
||
MockYoloFrame {
|
||
frame: 1,
|
||
timestamp: 0.033,
|
||
objects: vec![MockYoloObject {
|
||
class_name: "person".to_string(),
|
||
class_id: 0,
|
||
x: 110,
|
||
y: 210,
|
||
width: 52,
|
||
height: 102,
|
||
confidence: 0.92,
|
||
}],
|
||
},
|
||
MockYoloFrame {
|
||
frame: 2,
|
||
timestamp: 0.066,
|
||
objects: vec![MockYoloObject {
|
||
class_name: "car".to_string(),
|
||
class_id: 2,
|
||
x: 300,
|
||
y: 150,
|
||
width: 80,
|
||
height: 60,
|
||
confidence: 0.87,
|
||
}],
|
||
},
|
||
],
|
||
};
|
||
|
||
let chunk = yolo_result.to_visual_chunk(0, 2).unwrap();
|
||
|
||
// person 出現在 2/3 幀中(67% > 50%),car 出現在 1/3 幀中(33% < 50%)
|
||
assert_eq!(chunk.dominant_objects, vec!["person"]);
|
||
assert!(!chunk.dominant_objects.contains(&"car".to_string()));
|
||
}
|
||
}
|