feat: Add file scan and async hash system

Features:
1. scan command - Fast import without hash (skip_hash=true)
   - Scans directory structure
   - Generates deterministic UUIDs (SHA256(path|name|mac|mtime))
   - Stores full path in aliases.json
   - Inserts nodes in batches
   - Performance: 14243 nodes/sec (11857 files in 0.89s)

2. hash command - Async hash calculation
   - Multi-threaded (default: 4 threads)
   - Reads paths from aliases.json
   - Updates database with SHA256 hashes
   - Performance: 28 files/sec (11857 files in 417.58s)

Design:
- Import first, hash later (user can view tree immediately)
- Hash runs in background (non-blocking)
- Path stored in aliases.json (temporary solution)
- Deterministic UUIDs (same file = same UUID)

Performance breakdown:
- Scanning: 0.10s (11%)
- ID generation: 0.57s (64%)
- DB insertion: 0.21s (24%)
- Hash: 417.58s (async, background)

Files:
- src/scan.rs (new, 499 lines)
- src/main.rs (scan/hash commands)
- src/lib.rs (scan module)

Test result:
- warren user: 12658 nodes imported
- 11857 hashes calculated successfully
This commit is contained in:
Warren
2026-05-17 03:20:35 +08:00
parent e3bf885b6b
commit 05f89ea1ac
7 changed files with 562 additions and 42 deletions

View File

@@ -831,6 +831,11 @@ async fn upload_file(
// Save to database (user-specific SQLite)
let db_path = crate::filetree::FileTree::user_db_path(&user_id);
let file_uuid_clone = file_uuid.clone();
let file_hash_clone = file_hash.clone();
let filename_clone = filename.clone();
let file_path_clone = file_path.clone();
let db_result = tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
let conn = crate::filetree::FileTree::open_user_db(&db_path)?;
@@ -844,8 +849,8 @@ async fn upload_file(
"INSERT INTO file_registry (file_uuid, sha256, file_size, mime_type, registered_at)
VALUES (?1, ?2, ?3, ?4, ?5)",
rusqlite::params![
&file_uuid,
&file_hash,
&file_uuid_clone,
&file_hash_clone,
file_size,
"", // mime_type (optional)
now
@@ -856,20 +861,20 @@ async fn upload_file(
conn.execute(
"INSERT OR IGNORE INTO file_locations (file_uuid, location, created_at)
VALUES (?1, ?2, ?3)",
rusqlite::params![&file_uuid, &file_path, now],
rusqlite::params![&file_uuid_clone, &file_path_clone, now],
)?;
// Create file node
let node_id = format!("node-{}", uuid::Uuid::new_v4().to_string().replace('-', "")[0..8]);
let uuid_str = uuid::Uuid::new_v4().to_string().replace('-', "");
let node_id = format!("node-{}", &uuid_str[0..8]);
conn.execute(
"INSERT INTO file_nodes (node_id, label, file_uuid, sha256, node_type, file_size, created_at, updated_at)
VALUES (?1, ?2, ?3, ?4, 'file', ?5, ?6, ?7)",
rusqlite::params![
&node_id,
&filename,
&file_uuid,
&file_hash,
&filename_clone,
&file_uuid_clone,
&file_hash_clone,
file_size,
now,
now
@@ -880,40 +885,6 @@ async fn upload_file(
})
.await;
// Add to file tree
let sha_clone = file_hash.clone();
let fname_clone = filename.clone();
let fuuid_clone = file_uuid.clone();
let fpath_clone = file_path.clone();
let db_result = tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
let conn = FileTree::open_user_db("demo")?;
let other_id: Option<String> = conn
.query_row(
"SELECT node_id FROM file_nodes WHERE label = 'Other' AND node_type = 'folder' LIMIT 1",
[],
|row| row.get(0),
)
.ok();
let nid = uuid::Uuid::new_v4().to_string();
let now = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string();
conn.execute(
"INSERT INTO file_nodes (node_id, label, aliases_json, file_uuid, sha256, node_type, parent_id, file_size, created_at, updated_at) VALUES (?1, ?2, '{}', ?3, ?4, 'file', ?5, ?6, ?7, ?8)",
rusqlite::params![nid, fname_clone, fuuid_clone, sha_clone, other_id, file_size, now, now],
)?;
conn.execute(
"INSERT OR IGNORE INTO file_locations (file_uuid, location, label) VALUES (?1, ?2, 'origin')",
rusqlite::params![fuuid_clone, fpath_clone],
)?;
Ok(())
})
.await;
match db_result {
Ok(Ok(())) => {}
Ok(Err(e)) => {