75 lines
2.0 KiB
Rust
75 lines
2.0 KiB
Rust
use super::types::{Chunk, ChunkRule, ChunkType};
|
|
|
|
pub struct ChunkSplitter {
|
|
time_based_duration: f64,
|
|
fps: f64,
|
|
}
|
|
|
|
impl ChunkSplitter {
|
|
pub fn new(time_based_duration_seconds: f64) -> Self {
|
|
Self {
|
|
time_based_duration: time_based_duration_seconds,
|
|
fps: 24.0,
|
|
}
|
|
}
|
|
|
|
pub fn split_time_based(&self, uuid: &str, duration: f64) -> Vec<Chunk> {
|
|
let mut chunks = Vec::new();
|
|
let mut index = 0;
|
|
let mut current_time = 0.0;
|
|
|
|
while current_time < duration {
|
|
let end_time = (current_time + self.time_based_duration).min(duration);
|
|
chunks.push(Chunk::from_seconds(
|
|
0, // file_id
|
|
uuid.to_string(),
|
|
index,
|
|
ChunkType::TimeBased,
|
|
ChunkRule::Rule1,
|
|
current_time,
|
|
end_time,
|
|
self.fps,
|
|
serde_json::json!({
|
|
"source": "time_based",
|
|
"duration": self.time_based_duration,
|
|
}),
|
|
));
|
|
current_time = end_time;
|
|
index += 1;
|
|
}
|
|
|
|
chunks
|
|
}
|
|
|
|
pub fn split_sentence(&self, uuid: &str, asr_segments: &[AsrSegment]) -> Vec<Chunk> {
|
|
let mut chunks = Vec::new();
|
|
|
|
for (index, segment) in asr_segments.iter().enumerate() {
|
|
chunks.push(Chunk::from_seconds(
|
|
0, // file_id
|
|
uuid.to_string(),
|
|
index as u32,
|
|
ChunkType::Sentence,
|
|
ChunkRule::Rule1,
|
|
segment.start,
|
|
segment.end,
|
|
self.fps,
|
|
serde_json::json!({
|
|
"text": segment.text,
|
|
"speaker_id": segment.speaker_id,
|
|
}),
|
|
));
|
|
}
|
|
|
|
chunks
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct AsrSegment {
|
|
pub start: f64,
|
|
pub end: f64,
|
|
pub text: String,
|
|
pub speaker_id: Option<String>,
|
|
}
|