Files
momentry_core/src/core/chunk/splitter.rs

75 lines
2.0 KiB
Rust

use super::types::{Chunk, ChunkRule, ChunkType};
pub struct ChunkSplitter {
time_based_duration: f64,
fps: f64,
}
impl ChunkSplitter {
pub fn new(time_based_duration_seconds: f64) -> Self {
Self {
time_based_duration: time_based_duration_seconds,
fps: 24.0,
}
}
pub fn split_time_based(&self, uuid: &str, duration: f64) -> Vec<Chunk> {
let mut chunks = Vec::new();
let mut index = 0;
let mut current_time = 0.0;
while current_time < duration {
let end_time = (current_time + self.time_based_duration).min(duration);
chunks.push(Chunk::from_seconds(
0, // file_id
uuid.to_string(),
index,
ChunkType::TimeBased,
ChunkRule::Rule1,
current_time,
end_time,
self.fps,
serde_json::json!({
"source": "time_based",
"duration": self.time_based_duration,
}),
));
current_time = end_time;
index += 1;
}
chunks
}
pub fn split_sentence(&self, uuid: &str, asr_segments: &[AsrSegment]) -> Vec<Chunk> {
let mut chunks = Vec::new();
for (index, segment) in asr_segments.iter().enumerate() {
chunks.push(Chunk::from_seconds(
0, // file_id
uuid.to_string(),
index as u32,
ChunkType::Sentence,
ChunkRule::Rule1,
segment.start,
segment.end,
self.fps,
serde_json::json!({
"text": segment.text,
"speaker_id": segment.speaker_id,
}),
));
}
chunks
}
}
#[derive(Debug, Clone)]
pub struct AsrSegment {
pub start: f64,
pub end: f64,
pub text: String,
pub speaker_id: Option<String>,
}