Files
markbase/markbase-core/src/import_markdown.rs
Warren d94cb2df4c Fix code quality: trailing whitespace, unused imports, clippy warnings
- Fix trailing whitespace in kex.rs and s3.rs
- Add missing KexProposal import in kex_complete.rs
- Auto-fix clippy warnings across all crates
- All 153 tests pass
2026-06-19 05:21:38 +08:00

477 lines
16 KiB
Rust

use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::Path;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MarkdownFile {
pub filename: String,
pub size: Option<String>,
pub download_url: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CategorySection {
pub product: String,
pub files: Vec<MarkdownFile>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SeriesSection {
pub category: String,
pub files: Vec<MarkdownFile>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CategoryMarkdown {
pub category: String,
pub sections: Vec<CategorySection>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SeriesMarkdown {
pub series: String,
pub sections: Vec<SeriesSection>,
}
pub fn parse_category_markdown(content: &str) -> Result<CategoryMarkdown> {
let mut category = String::new();
let mut sections: Vec<CategorySection> = Vec::new();
let lines: Vec<&str> = content.lines().collect();
let mut current_product = String::new();
let mut current_files: Vec<MarkdownFile> = Vec::new();
let mut pending_file: Option<(String, String)> = None;
for i in 0..lines.len() {
let line = lines[i].trim();
if line.contains("**Category**:") {
category = line
.replace("**Category**:", "")
.replace("**", "")
.trim()
.to_string();
} else if line.starts_with("## ") {
if !current_product.is_empty() && !current_files.is_empty() {
sections.push(CategorySection {
product: current_product.clone(),
files: current_files.clone(),
});
current_files.clear();
}
current_product = line.replace("## ", "").trim().to_string();
} else if line.starts_with("**") && line.contains("** (") {
let clean = line.replace("**", "");
let parts: Vec<&str> = clean.splitn(2, '(').collect();
if parts.len() == 2 {
let filename = parts[0].trim().to_string();
let size = parts[1].trim_end_matches(')').trim().to_string();
pending_file = Some((filename, size));
}
} else if line.contains("https://download.accusys.ddns.net/api/v2/download") {
if let Some((filename, size)) = pending_file.clone() {
current_files.push(MarkdownFile {
filename,
size: Some(size),
download_url: line
.trim_start_matches('`')
.trim_end_matches('`')
.trim()
.to_string(),
});
pending_file = None;
}
}
}
if !current_product.is_empty() && !current_files.is_empty() {
sections.push(CategorySection {
product: current_product.clone(),
files: current_files.clone(),
});
}
Ok(CategoryMarkdown { category, sections })
}
pub fn parse_series_markdown(content: &str) -> Result<SeriesMarkdown> {
let mut series = String::new();
let mut sections: Vec<SeriesSection> = Vec::new();
let lines: Vec<&str> = content.lines().collect();
let mut current_category = String::new();
let mut current_files: Vec<MarkdownFile> = Vec::new();
let mut pending_file: Option<(String, String)> = None;
for i in 0..lines.len() {
let line = lines[i].trim();
if line.starts_with("# ") && line.contains("Download Links") {
series = line
.replace("# ", "")
.replace(" Download Links", "")
.trim()
.to_string();
} else if line.starts_with("## ") {
if !current_category.is_empty() && !current_files.is_empty() {
sections.push(SeriesSection {
category: current_category.clone(),
files: current_files.clone(),
});
current_files.clear();
}
current_category = line.replace("## ", "").trim().to_string();
} else if line.starts_with("**") && line.contains("(") {
let clean = line.replace("**", "");
let parts: Vec<&str> = clean.splitn(2, '(').collect();
if parts.len() == 2 {
let filename = parts[0].trim().to_string();
let size = parts[1].trim_end_matches(')').trim().to_string();
pending_file = Some((filename, size));
}
} else if line.contains("https://download.accusys.ddns.net/api/v2/download") {
if let Some((filename, size)) = pending_file.clone() {
current_files.push(MarkdownFile {
filename,
size: Some(size),
download_url: line
.trim_start_matches('`')
.trim_end_matches('`')
.trim()
.to_string(),
});
pending_file = None;
}
}
}
if !current_category.is_empty() && !current_files.is_empty() {
sections.push(SeriesSection {
category: current_category.clone(),
files: current_files.clone(),
});
}
Ok(SeriesMarkdown { series, sections })
}
pub fn read_category_files(dir: &Path) -> Result<Vec<(String, String)>> {
let mut files = Vec::new();
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.extension().is_some_and(|ext| ext == "md")
&& path.file_name() != Some(std::ffi::OsStr::new("README.md"))
{
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = fs::read_to_string(&path)?;
files.push((filename, content));
}
}
Ok(files)
}
pub fn read_series_files(dir: &Path) -> Result<Vec<(String, String)>> {
let mut files = Vec::new();
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.extension().is_some_and(|ext| ext == "md")
&& path.file_name() != Some(std::ffi::OsStr::new("README.md"))
{
let filename = path.file_name().unwrap().to_string_lossy().to_string();
let content = fs::read_to_string(&path)?;
files.push((filename, content));
}
}
Ok(files)
}
pub fn import_categories_to_db(
conn: &rusqlite::Connection,
user_id: &str,
tree_type: &str,
) -> Result<()> {
use crate::FileTree;
use filetree::node::{Aliases, FileNode, NodeType};
use std::collections::HashMap;
use uuid::Uuid;
let category_dir = Path::new("/Users/accusys/markbase/data/downloads/by_category");
let files = read_category_files(category_dir)?;
println!("Found {} Markdown files", files.len());
let mut tree = FileTree::load(conn, user_id, tree_type)?;
for (_filename, content) in files {
let parsed = parse_category_markdown(&content)?;
println!(
"Parsed category: '{}', sections: {}",
parsed.category,
parsed.sections.len()
);
if parsed.category.is_empty() {
println!("Warning: category is empty, skipping");
continue;
}
let category_node_id = Uuid::new_v4().to_string();
let mut aliases_map = HashMap::new();
aliases_map.insert("category_type".to_string(), "category".to_string());
let category_node = FileNode {
node_id: category_node_id.clone(),
label: parsed.category.clone(),
aliases: Aliases { map: aliases_map },
file_uuid: None,
sha256: None,
parent_id: None,
children: Vec::new(),
node_type: NodeType::Folder,
icon: Some("📁".to_string()),
color: None,
bg_color: None,
file_size: None,
registered_at: None,
created_at: chrono::Utc::now().to_rfc3339(),
updated_at: chrono::Utc::now().to_rfc3339(),
sort_order: 0,
};
println!(
"Inserting category node: {} (id: {})",
category_node.label, category_node_id
);
tree.insert_node(conn, &category_node)?;
println!("Category node inserted successfully");
for section in parsed.sections {
println!(
"Processing section: {} with {} files",
section.product,
section.files.len()
);
let product_node_id = Uuid::new_v4().to_string();
let mut aliases_map = HashMap::new();
aliases_map.insert("product".to_string(), section.product.clone());
let product_node = FileNode {
node_id: product_node_id.clone(),
label: section.product.clone(),
aliases: Aliases { map: aliases_map },
file_uuid: None,
sha256: None,
parent_id: Some(category_node_id.clone()),
children: Vec::new(),
node_type: NodeType::Folder,
icon: Some("📁".to_string()),
color: None,
bg_color: None,
file_size: None,
registered_at: None,
created_at: chrono::Utc::now().to_rfc3339(),
updated_at: chrono::Utc::now().to_rfc3339(),
sort_order: 0,
};
tree.insert_node(conn, &product_node)?;
for file in section.files {
let file_node_id = Uuid::new_v4().to_string();
let mut aliases_map = HashMap::new();
aliases_map.insert("download_url".to_string(), file.download_url.clone());
aliases_map.insert(
"file_size_display".to_string(),
file.size.clone().unwrap_or_else(|| "Unknown".to_string()),
);
let file_node = FileNode {
node_id: file_node_id.clone(),
label: file.filename.clone(),
aliases: Aliases { map: aliases_map },
file_uuid: None,
sha256: None,
parent_id: Some(product_node_id.clone()),
children: Vec::new(),
node_type: NodeType::File,
icon: Some("📄".to_string()),
color: None,
bg_color: None,
file_size: None,
registered_at: None,
created_at: chrono::Utc::now().to_rfc3339(),
updated_at: chrono::Utc::now().to_rfc3339(),
sort_order: 0,
};
tree.insert_node(conn, &file_node)?;
}
}
}
Ok(())
}
pub fn import_series_to_db(
conn: &rusqlite::Connection,
user_id: &str,
tree_type: &str,
) -> Result<()> {
use crate::FileTree;
use filetree::node::{Aliases, FileNode, NodeType};
use std::collections::HashMap;
use uuid::Uuid;
let series_dir = Path::new("/Users/accusys/markbase/data/downloads/by_series");
let files = read_series_files(series_dir)?;
println!("Found {} Markdown files for series", files.len());
let mut tree = FileTree::load(conn, user_id, tree_type)?;
for (_filename, content) in files {
let parsed = parse_series_markdown(&content)?;
println!(
"Parsed series: '{}', sections: {}",
parsed.series,
parsed.sections.len()
);
if parsed.series.is_empty() {
println!("Warning: series is empty, skipping");
continue;
}
let series_node_id = Uuid::new_v4().to_string();
let mut aliases_map = HashMap::new();
aliases_map.insert("series_type".to_string(), "series".to_string());
let series_node = FileNode {
node_id: series_node_id.clone(),
label: parsed.series.clone(),
aliases: Aliases { map: aliases_map },
file_uuid: None,
sha256: None,
parent_id: None,
children: Vec::new(),
node_type: NodeType::Folder,
icon: Some("📁".to_string()),
color: None,
bg_color: None,
file_size: None,
registered_at: None,
created_at: chrono::Utc::now().to_rfc3339(),
updated_at: chrono::Utc::now().to_rfc3339(),
sort_order: 0,
};
tree.insert_node(conn, &series_node)?;
println!("Series node inserted successfully");
for section in parsed.sections {
println!(
"Processing section: {} with {} files",
section.category,
section.files.len()
);
let category_node_id = Uuid::new_v4().to_string();
let mut aliases_map = HashMap::new();
aliases_map.insert("category".to_string(), section.category.clone());
let category_node = FileNode {
node_id: category_node_id.clone(),
label: section.category.clone(),
aliases: Aliases { map: aliases_map },
file_uuid: None,
sha256: None,
parent_id: Some(series_node_id.clone()),
children: Vec::new(),
node_type: NodeType::Folder,
icon: Some("📁".to_string()),
color: None,
bg_color: None,
file_size: None,
registered_at: None,
created_at: chrono::Utc::now().to_rfc3339(),
updated_at: chrono::Utc::now().to_rfc3339(),
sort_order: 0,
};
tree.insert_node(conn, &category_node)?;
for file in section.files {
let file_node_id = Uuid::new_v4().to_string();
let mut aliases_map = HashMap::new();
aliases_map.insert("download_url".to_string(), file.download_url.clone());
aliases_map.insert(
"file_size_display".to_string(),
file.size.clone().unwrap_or_else(|| "Unknown".to_string()),
);
let file_node = FileNode {
node_id: file_node_id.clone(),
label: file.filename.clone(),
aliases: Aliases { map: aliases_map },
file_uuid: None,
sha256: None,
parent_id: Some(category_node_id.clone()),
children: Vec::new(),
node_type: NodeType::File,
icon: Some("📄".to_string()),
color: None,
bg_color: None,
file_size: None,
registered_at: None,
created_at: chrono::Utc::now().to_rfc3339(),
updated_at: chrono::Utc::now().to_rfc3339(),
sort_order: 0,
};
tree.insert_node(conn, &file_node)?;
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_category_markdown() {
let content = r#"# GUI Download Links
**Category**: GUI
---
## ExaSAN-DAS
**C2M-QIG20170906.zip** (353.7KB)
```https://download.accusys.ddns.net/api/v2/download/products/ExaSAN-DAS/C1M_C2M/User%20Guide/C2M-QIG20170906.zip
```
"#;
let result = parse_category_markdown(content).unwrap();
assert_eq!(result.category, "GUI");
}
}