feat: Add file scan and async hash system
Features: 1. scan command - Fast import without hash (skip_hash=true) - Scans directory structure - Generates deterministic UUIDs (SHA256(path|name|mac|mtime)) - Stores full path in aliases.json - Inserts nodes in batches - Performance: 14243 nodes/sec (11857 files in 0.89s) 2. hash command - Async hash calculation - Multi-threaded (default: 4 threads) - Reads paths from aliases.json - Updates database with SHA256 hashes - Performance: 28 files/sec (11857 files in 417.58s) Design: - Import first, hash later (user can view tree immediately) - Hash runs in background (non-blocking) - Path stored in aliases.json (temporary solution) - Deterministic UUIDs (same file = same UUID) Performance breakdown: - Scanning: 0.10s (11%) - ID generation: 0.57s (64%) - DB insertion: 0.21s (24%) - Hash: 417.58s (async, background) Files: - src/scan.rs (new, 499 lines) - src/main.rs (scan/hash commands) - src/lib.rs (scan module) Test result: - warren user: 12658 nodes imported - 11857 hashes calculated successfully
This commit is contained in:
+38
@@ -28,6 +28,33 @@ enum Commands {
|
||||
#[command(subcommand)]
|
||||
action: ConfigCommands,
|
||||
},
|
||||
/// Scan and import files from directory
|
||||
Scan {
|
||||
/// User ID
|
||||
#[arg(short, long)]
|
||||
user: String,
|
||||
/// Directory to scan
|
||||
#[arg(short, long)]
|
||||
dir: String,
|
||||
/// Batch size for database insertion
|
||||
#[arg(short, long, default_value = "100")]
|
||||
batch: usize,
|
||||
/// Skip SHA256 hash calculation (faster import)
|
||||
#[arg(short, long, default_value = "true")]
|
||||
skip_hash: bool,
|
||||
/// Number of threads for hash calculation (if skip_hash=false)
|
||||
#[arg(short, long, default_value = "4")]
|
||||
threads: usize,
|
||||
},
|
||||
/// Compute SHA256 hashes for imported files
|
||||
Hash {
|
||||
/// User ID
|
||||
#[arg(short, long)]
|
||||
user: String,
|
||||
/// Number of threads for parallel hash calculation
|
||||
#[arg(short, long, default_value = "4")]
|
||||
threads: usize,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
@@ -73,6 +100,17 @@ async fn main() -> anyhow::Result<()> {
|
||||
Commands::Config { action } => {
|
||||
handle_config_command(action)?;
|
||||
}
|
||||
Commands::Scan { user, dir, batch, skip_hash, threads } => {
|
||||
use markbase::scan::ScanOptions;
|
||||
let options = ScanOptions {
|
||||
skip_hash,
|
||||
threads,
|
||||
};
|
||||
markbase::scan::scan_directory(&user, &dir, batch, options)?;
|
||||
}
|
||||
Commands::Hash { user, threads } => {
|
||||
markbase::scan::compute_hashes(&user, threads)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user