Phase 1: VfsBlockChecksum struct + JSON storage (~240 lines) - VfsBlockChecksum: offset + SHA-256 hash - VfsChecksumFile: block_size + algorithm + blocks + file_size - compute_block_hash() + verify_block_hash() - ChecksumMode: Lazy (default) + OnRead - ScrubResult: total/verified/corrupted/repaired blocks metrics Phase 2: ChecksumFile wrapper (~180 lines) - VfsFile wrapper with transparent checksum - Lazy verification (only on scrub) - Cache of verified blocks - Update checksum on flush() - read_at/write_at support Phase 3: Scrub API (~150 lines) - scrub_file(): verify single file integrity - scrub_all(): recursive directory scrub - create_checksums_for_file(): generate checksums - repair_block(): placeholder for RAID/Dedup Phase 4: RAID repair integration (~160 lines) - repair_block_from_parity(): reconstruct from RAID parity - reconstruct_from_p(): XOR reconstruction for RaidZ1 - reconstruct_from_pq/pqr(): placeholder for RaidZ2/3 Tests: 15 checksum tests pass (465 total) Files: - markbase-core/src/vfs/checksum.rs (NEW) - markbase-core/src/vfs/checksum_file.rs (NEW) - markbase-core/src/vfs/raid.rs (MOD +160 lines) - markbase-core/src/vfs/mod.rs (MOD +2 lines)
436 lines
13 KiB
Rust
436 lines
13 KiB
Rust
//! Block-level Checksum for Data Integrity
|
|
//!
|
|
//! Reference: ZFS/Btrfs checksum verification
|
|
//! - ZFS: Fletcher4/SHA256 per-block checksum
|
|
//! - Btrfs: CRC32C per-block checksum
|
|
//!
|
|
//! MarkBase uses SHA-256 (32 bytes) per 4KB block for integrity verification.
|
|
|
|
use std::collections::{HashMap, HashSet};
|
|
use std::path::PathBuf;
|
|
use std::io::{Read, Write, Seek, SeekFrom};
|
|
|
|
use sha2::{Sha256, Digest};
|
|
use serde::{Serialize, Deserialize};
|
|
|
|
use super::{VfsBackend, VfsFile, VfsError, VfsStat};
|
|
|
|
pub const BLOCK_SIZE: usize = 4096;
|
|
pub const HASH_SIZE: usize = 32; // SHA-256
|
|
pub const CHECKSUM_DIR: &str = ".checksums";
|
|
pub const CHECKSUM_EXT: &str = ".checksums";
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct VfsBlockChecksum {
|
|
pub offset: u64, // Block offset (multiple of BLOCK_SIZE)
|
|
pub hash: Vec<u8>, // SHA-256 hash (32 bytes)
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct VfsChecksumFile {
|
|
pub block_size: usize,
|
|
pub algorithm: String, // "sha256"
|
|
pub blocks: Vec<VfsBlockChecksum>,
|
|
pub file_size: u64, // Original file size
|
|
}
|
|
|
|
impl VfsChecksumFile {
|
|
pub fn new(file_size: u64) -> Self {
|
|
Self {
|
|
block_size: BLOCK_SIZE,
|
|
algorithm: "sha256".to_string(),
|
|
blocks: Vec::new(),
|
|
file_size,
|
|
}
|
|
}
|
|
|
|
pub fn from_bytes(data: &[u8]) -> Result<Self, VfsError> {
|
|
serde_json::from_slice(data)
|
|
.map_err(|e| VfsError::Io(format!("checksum parse failed: {}", e)))
|
|
}
|
|
|
|
pub fn to_bytes(&self) -> Result<Vec<u8>, VfsError> {
|
|
serde_json::to_vec(self)
|
|
.map_err(|e| VfsError::Io(format!("checksum serialize failed: {}", e)))
|
|
}
|
|
|
|
pub fn get_checksum(&self, offset: u64) -> Option<&[u8]> {
|
|
self.blocks.iter()
|
|
.find(|b| b.offset == offset)
|
|
.map(|b| b.hash.as_slice())
|
|
}
|
|
|
|
pub fn set_checksum(&mut self, offset: u64, hash: Vec<u8>) {
|
|
if let Some(block) = self.blocks.iter_mut().find(|b| b.offset == offset) {
|
|
block.hash = hash;
|
|
} else {
|
|
self.blocks.push(VfsBlockChecksum { offset, hash });
|
|
self.blocks.sort_by_key(|b| b.offset);
|
|
}
|
|
}
|
|
|
|
pub fn block_count(&self) -> usize {
|
|
(self.file_size as usize / BLOCK_SIZE) +
|
|
if self.file_size as usize % BLOCK_SIZE > 0 { 1 } else { 0 }
|
|
}
|
|
}
|
|
|
|
pub fn compute_block_hash(data: &[u8]) -> Vec<u8> {
|
|
let mut hasher = Sha256::new();
|
|
hasher.update(data);
|
|
hasher.finalize().to_vec()
|
|
}
|
|
|
|
pub fn verify_block_hash(data: &[u8], expected: &[u8]) -> bool {
|
|
let actual = compute_block_hash(data);
|
|
actual == expected
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum ChecksumMode {
|
|
Lazy, // Only verify on scrub (default)
|
|
OnRead, // Verify every read
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct ChecksumConfig {
|
|
pub mode: ChecksumMode,
|
|
pub cache_verified: bool,
|
|
}
|
|
|
|
impl Default for ChecksumConfig {
|
|
fn default() -> Self {
|
|
Self {
|
|
mode: ChecksumMode::Lazy,
|
|
cache_verified: true,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct ScrubResult {
|
|
pub path: PathBuf,
|
|
pub total_blocks: usize,
|
|
pub verified_blocks: usize,
|
|
pub corrupted_blocks: Vec<u64>,
|
|
pub repaired_blocks: Vec<u64>,
|
|
pub repair_failed: bool,
|
|
}
|
|
|
|
impl ScrubResult {
|
|
pub fn is_clean(&self) -> bool {
|
|
self.corrupted_blocks.is_empty()
|
|
}
|
|
|
|
pub fn repair_success_rate(&self) -> f64 {
|
|
if self.corrupted_blocks.is_empty() {
|
|
1.0
|
|
} else {
|
|
self.repaired_blocks.len() as f64 / self.corrupted_blocks.len() as f64
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn checksum_path_for_file(file_path: &PathBuf, root: &PathBuf) -> PathBuf {
|
|
let relative = file_path.strip_prefix(root)
|
|
.unwrap_or(file_path);
|
|
root.join(CHECKSUM_DIR)
|
|
.join(relative)
|
|
.with_extension(CHECKSUM_EXT)
|
|
}
|
|
|
|
pub fn ensure_checksum_dir(root: &PathBuf, backend: &dyn VfsBackend) -> Result<(), VfsError> {
|
|
let checksum_dir = root.join(CHECKSUM_DIR);
|
|
if !backend.exists(&checksum_dir) {
|
|
backend.create_dir(&checksum_dir, 0o755)?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Scrub a single file to verify integrity
|
|
///
|
|
/// This reads the file and verifies each block checksum.
|
|
/// If repair=true and corrupted blocks are found, attempts to repair from RAID/Dedup.
|
|
pub fn scrub_file(
|
|
backend: &dyn VfsBackend,
|
|
file_path: &PathBuf,
|
|
root_path: &PathBuf,
|
|
repair: bool,
|
|
) -> Result<ScrubResult, VfsError> {
|
|
let checksum_path = checksum_path_for_file(file_path, root_path);
|
|
|
|
if !backend.exists(&checksum_path) {
|
|
return Ok(ScrubResult {
|
|
path: file_path.clone(),
|
|
total_blocks: 0,
|
|
verified_blocks: 0,
|
|
corrupted_blocks: vec![],
|
|
repaired_blocks: vec![],
|
|
repair_failed: false,
|
|
});
|
|
}
|
|
|
|
let checksum_file_data = {
|
|
let mut checksum_file = backend.open_file(&checksum_path, &super::open_flags::OpenFlags::new().read())?;
|
|
checksum_file.read_all()?
|
|
};
|
|
let checksum_data = VfsChecksumFile::from_bytes(&checksum_file_data)?;
|
|
|
|
let mut file_handle = backend.open_file(file_path, &super::open_flags::OpenFlags::new().read())?;
|
|
let stat = file_handle.stat()?;
|
|
let file_size = stat.size;
|
|
|
|
let block_count = checksum_data.block_count();
|
|
let mut verified_blocks = 0;
|
|
let mut corrupted_blocks: Vec<u64> = vec![];
|
|
let mut repaired_blocks: Vec<u64> = vec![];
|
|
|
|
for block_idx in 0..block_count {
|
|
let offset = (block_idx as u64) * BLOCK_SIZE as u64;
|
|
let block_size = if offset + BLOCK_SIZE as u64 <= file_size {
|
|
BLOCK_SIZE
|
|
} else {
|
|
(file_size - offset) as usize
|
|
};
|
|
|
|
let mut buffer = vec![0u8; block_size];
|
|
let bytes_read = file_handle.read_at(&mut buffer, offset)?;
|
|
|
|
if bytes_read != block_size {
|
|
corrupted_blocks.push(offset);
|
|
continue;
|
|
}
|
|
|
|
let expected_hash = checksum_data.get_checksum(offset);
|
|
if expected_hash.is_none() {
|
|
verified_blocks += 1;
|
|
continue;
|
|
}
|
|
|
|
let is_valid = verify_block_hash(&buffer, expected_hash.unwrap());
|
|
if is_valid {
|
|
verified_blocks += 1;
|
|
} else {
|
|
corrupted_blocks.push(offset);
|
|
|
|
if repair {
|
|
if let Ok(_) = repair_block(backend, file_path, offset, &buffer) {
|
|
repaired_blocks.push(offset);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
let corrupted_count = corrupted_blocks.len();
|
|
let repaired_count = repaired_blocks.len();
|
|
|
|
Ok(ScrubResult {
|
|
path: file_path.clone(),
|
|
total_blocks: block_count,
|
|
verified_blocks,
|
|
corrupted_blocks,
|
|
repaired_blocks,
|
|
repair_failed: repair && repaired_count < corrupted_count,
|
|
})
|
|
}
|
|
|
|
/// Scrub all files in a directory
|
|
///
|
|
/// Recursively walks the directory and scrubs all files with checksums.
|
|
pub fn scrub_all(
|
|
backend: &dyn VfsBackend,
|
|
root_path: &PathBuf,
|
|
repair: bool,
|
|
) -> Result<Vec<ScrubResult>, VfsError> {
|
|
let mut results = vec![];
|
|
|
|
let checksum_dir = root_path.join(CHECKSUM_DIR);
|
|
if !backend.exists(&checksum_dir) {
|
|
return Ok(results);
|
|
}
|
|
|
|
scrub_recursive(backend, root_path, root_path, repair, &mut results)?;
|
|
|
|
Ok(results)
|
|
}
|
|
|
|
fn scrub_recursive(
|
|
backend: &dyn VfsBackend,
|
|
current_path: &PathBuf,
|
|
root_path: &PathBuf,
|
|
repair: bool,
|
|
results: &mut Vec<ScrubResult>,
|
|
) -> Result<(), VfsError> {
|
|
let entries = backend.read_dir(current_path)?;
|
|
|
|
for entry in entries {
|
|
let entry_path = current_path.join(&entry.name);
|
|
|
|
if entry.stat.is_dir {
|
|
if entry.name != CHECKSUM_DIR {
|
|
scrub_recursive(backend, &entry_path, root_path, repair, results)?;
|
|
}
|
|
} else if !entry.name.ends_with(CHECKSUM_EXT) {
|
|
let result = scrub_file(backend, &entry_path, root_path, repair)?;
|
|
results.push(result);
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Attempt to repair a corrupted block
|
|
///
|
|
/// This is a placeholder that returns error for now.
|
|
/// RAID/Dedup repair will be implemented in Phase 4/6.
|
|
fn repair_block(
|
|
backend: &dyn VfsBackend,
|
|
file_path: &PathBuf,
|
|
offset: u64,
|
|
corrupted_data: &[u8],
|
|
) -> Result<Vec<u8>, VfsError> {
|
|
Err(VfsError::Io("block repair not implemented (Phase 4/6)".to_string()))
|
|
}
|
|
|
|
/// Create checksums for a file
|
|
///
|
|
/// This reads the file and computes checksums for all blocks.
|
|
pub fn create_checksums_for_file(
|
|
backend: &dyn VfsBackend,
|
|
file_path: &PathBuf,
|
|
root_path: &PathBuf,
|
|
) -> Result<(), VfsError> {
|
|
ensure_checksum_dir(root_path, backend)?;
|
|
|
|
let mut file_handle = backend.open_file(file_path, &super::open_flags::OpenFlags::new().read())?;
|
|
let stat = file_handle.stat()?;
|
|
let file_size = stat.size;
|
|
|
|
let mut checksum_data = VfsChecksumFile::new(file_size);
|
|
|
|
let block_count = checksum_data.block_count();
|
|
|
|
for block_idx in 0..block_count {
|
|
let offset = (block_idx as u64) * BLOCK_SIZE as u64;
|
|
let block_size = if offset + BLOCK_SIZE as u64 <= file_size {
|
|
BLOCK_SIZE
|
|
} else {
|
|
(file_size - offset) as usize
|
|
};
|
|
|
|
let mut buffer = vec![0u8; block_size];
|
|
let bytes_read = file_handle.read_at(&mut buffer, offset)?;
|
|
|
|
if bytes_read > 0 {
|
|
let hash = compute_block_hash(&buffer[..bytes_read]);
|
|
checksum_data.set_checksum(offset, hash);
|
|
}
|
|
}
|
|
|
|
let checksum_path = checksum_path_for_file(file_path, root_path);
|
|
let checksum_bytes = checksum_data.to_bytes()?;
|
|
|
|
let mut checksum_file = backend.open_file(
|
|
&checksum_path,
|
|
&super::open_flags::OpenFlags::new().write().create().truncate(),
|
|
)?;
|
|
checksum_file.write_all(&checksum_bytes)?;
|
|
checksum_file.flush()?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_compute_block_hash() {
|
|
let data = b"test block data for hashing";
|
|
let hash = compute_block_hash(data);
|
|
assert_eq!(hash.len(), HASH_SIZE);
|
|
|
|
let hash2 = compute_block_hash(data);
|
|
assert_eq!(hash, hash2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_verify_block_hash() {
|
|
let data = b"test block data";
|
|
let hash = compute_block_hash(data);
|
|
assert!(verify_block_hash(data, &hash));
|
|
|
|
let wrong_data = b"wrong block data";
|
|
assert!(!verify_block_hash(wrong_data, &hash));
|
|
}
|
|
|
|
#[test]
|
|
fn test_checksum_file_roundtrip() {
|
|
let mut checksum_file = VfsChecksumFile::new(8192);
|
|
checksum_file.set_checksum(0, compute_block_hash(b"block0"));
|
|
checksum_file.set_checksum(4096, compute_block_hash(b"block1"));
|
|
|
|
let bytes = checksum_file.to_bytes().unwrap();
|
|
let decoded = VfsChecksumFile::from_bytes(&bytes).unwrap();
|
|
|
|
assert_eq!(decoded.block_size, BLOCK_SIZE);
|
|
assert_eq!(decoded.blocks.len(), 2);
|
|
assert_eq!(decoded.file_size, 8192);
|
|
}
|
|
|
|
#[test]
|
|
fn test_checksum_file_get_set() {
|
|
let mut checksum_file = VfsChecksumFile::new(4096);
|
|
|
|
let hash = compute_block_hash(b"test");
|
|
checksum_file.set_checksum(0, hash.clone());
|
|
|
|
let retrieved = checksum_file.get_checksum(0);
|
|
assert!(retrieved.is_some());
|
|
assert_eq!(retrieved.unwrap(), hash.as_slice());
|
|
|
|
checksum_file.set_checksum(0, compute_block_hash(b"new"));
|
|
let updated = checksum_file.get_checksum(0).unwrap();
|
|
assert_ne!(updated, hash.as_slice());
|
|
}
|
|
|
|
#[test]
|
|
fn test_block_count_calculation() {
|
|
let checksum_file = VfsChecksumFile::new(4096);
|
|
assert_eq!(checksum_file.block_count(), 1);
|
|
|
|
let checksum_file = VfsChecksumFile::new(8192);
|
|
assert_eq!(checksum_file.block_count(), 2);
|
|
|
|
let checksum_file = VfsChecksumFile::new(4097);
|
|
assert_eq!(checksum_file.block_count(), 2);
|
|
|
|
let checksum_file = VfsChecksumFile::new(0);
|
|
assert_eq!(checksum_file.block_count(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_scrub_result_metrics() {
|
|
let result = ScrubResult {
|
|
path: PathBuf::from("/test"),
|
|
total_blocks: 10,
|
|
verified_blocks: 10,
|
|
corrupted_blocks: vec![],
|
|
repaired_blocks: vec![],
|
|
repair_failed: false,
|
|
};
|
|
assert!(result.is_clean());
|
|
assert_eq!(result.repair_success_rate(), 1.0);
|
|
|
|
let result2 = ScrubResult {
|
|
path: PathBuf::from("/test"),
|
|
total_blocks: 10,
|
|
verified_blocks: 8,
|
|
corrupted_blocks: vec![4096, 8192],
|
|
repaired_blocks: vec![4096],
|
|
repair_failed: false,
|
|
};
|
|
assert!(!result2.is_clean());
|
|
assert_eq!(result2.repair_success_rate(), 0.5);
|
|
}
|
|
} |