#!/usr/bin/env python3 """ 架構文檔一致性檢查腳本 功能: 1. 檢查所有架構文檔間的鏈接有效性 2. 驗證術語一致性 3. 檢查設計與實現差異標記 4. 生成文檔質量報告 使用方法: python3 scripts/check_architecture_docs.py [--report] [--verbose] """ import re import sys import glob import json import argparse from pathlib import Path from typing import Dict, List, Set, Optional from collections import defaultdict # 配置 ARCHITECTURE_DIR = Path(__file__).parent.parent / "docs_v1.0" / "ARCHITECTURE" DOC_EXTENSIONS = [".md"] IGNORE_FILES = ["README.md", "index.md"] # 術語一致性檢查配置 TERMINOLOGY_PATTERNS = { "chunk_type": [ r"chunk[_\\s]?type", r"分片類型", r"ChunkType", ], "sentence": [ r"sentence", r"句子", r"Rule 1", ], "visual": [ r"visual", r"視覺", r"Rule 2", ], "scene": [ r"scene", r"場景", r"Rule 3", ], "summary": [ r"summary", r"摘要", r"Rule 4", ], "time_based": [ r"time[_\\s]?based", r"時間基準", r"TimeBased", ], "cut": [ r"cut", r"CUT", r"場景分割", ], "trace": [ r"trace", r"軌跡", r"Trace", ], "story": [ r"story", r"故事", r"Story", ], } class DocumentIssue: """文檔問題記錄""" def __init__( self, file_path: Path, line_number: int, issue_type: str, description: str, severity: str, suggested_fix: Optional[str] = None, ): self.file_path = file_path self.line_number = line_number self.issue_type = ( issue_type # "broken_link", "terminology", "format", "consistency" ) self.description = description self.severity = severity # "error", "warning", "info" self.suggested_fix = suggested_fix class DocumentStats: """文檔統計信息""" def __init__(self, file_path: Path): self.file_path = file_path self.total_lines = 0 self.total_links = 0 self.broken_links = 0 self.terminology_issues = 0 self.format_issues = 0 self.consistency_issues = 0 self.issues: List[DocumentIssue] = [] class ArchitectureDocChecker: """架構文檔檢查器""" def __init__(self, architecture_dir: Path): self.architecture_dir = architecture_dir self.all_md_files: List[Path] = [] self.file_contents: Dict[Path, List[str]] = {} self.document_stats: Dict[Path, DocumentStats] = {} def load_all_documents(self) -> None: """加載所有文檔""" print(f"📁 掃描架構文檔目錄: {self.architecture_dir}") # 掃描所有 Markdown 文件 for ext in DOC_EXTENSIONS: pattern = self.architecture_dir / "**" / f"*{ext}" for file_path in glob.glob(str(pattern), recursive=True): file_path = Path(file_path) if file_path.name in IGNORE_FILES: continue self.all_md_files.append(file_path) # 加載文件內容 for file_path in self.all_md_files: try: with open(file_path, "r", encoding="utf-8") as f: content = f.readlines() self.file_contents[file_path] = content # 初始化統計信息 self.document_stats[file_path] = DocumentStats(file_path=file_path) self.document_stats[file_path].total_lines = len(content) except Exception as e: print(f"❌ 無法讀取文件 {file_path}: {e}") print(f"✅ 加載了 {len(self.all_md_files)} 個文檔文件") def check_links(self) -> None: """檢查文檔鏈接有效性""" print("\n🔗 檢查文檔鏈接...") # 收集所有可用的文件路徑(相對路徑) available_files = set() for file_path in self.all_md_files: # 相對於架構目錄的路徑 rel_path = file_path.relative_to(self.architecture_dir) available_files.add(str(rel_path)) available_files.add(str(rel_path).lower()) link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)") for file_path, content_lines in self.file_contents.items(): stats = self.document_stats[file_path] for line_num, line in enumerate(content_lines, 1): matches = link_pattern.findall(line) stats.total_links += len(matches) for link_text, link_url in matches: # 檢查鏈接有效性 issue = self._check_single_link( file_path, line_num, link_text, link_url, available_files ) if issue: stats.issues.append(issue) stats.broken_links += 1 def _check_single_link( self, file_path: Path, line_num: int, link_text: str, link_url: str, available_files: Set[str], ) -> Optional[DocumentIssue]: """檢查單個鏈接""" # 忽略外部鏈接 if link_url.startswith(("http://", "https://", "mailto:", "#")): return None # 清理鏈接(移除查詢參數和錨點) clean_url = link_url.split("#")[0].split("?")[0] # 檢查相對路徑鏈接 if clean_url.startswith("./"): # 相對於當前文件的鏈接 current_dir = file_path.parent target_path = (current_dir / clean_url[2:]).resolve() # 轉換為相對於架構目錄的路徑 try: rel_path = target_path.relative_to(self.architecture_dir) if str(rel_path) not in available_files: return DocumentIssue( file_path=file_path, line_number=line_num, issue_type="broken_link", description=f"鏈接目標不存在: {link_url} (解析為: {rel_path})", severity="error", suggested_fix=f"檢查文件是否存在: {target_path}", ) except ValueError: # 目標不在架構目錄內 if not target_path.exists(): return DocumentIssue( file_path=file_path, line_number=line_num, issue_type="broken_link", description=f"鏈接目標不存在: {link_url}", severity="error", suggested_fix=f"創建文件或修正鏈接: {target_path}", ) # 檢查絕對路徑鏈接(相對於架構目錄) elif not clean_url.startswith("/"): if clean_url not in available_files: return DocumentIssue( file_path=file_path, line_number=line_num, issue_type="broken_link", description=f"鏈接目標不存在: {link_url}", severity="error", suggested_fix=f"檢查文件是否存在: {clean_url}", ) return None def check_terminology(self) -> None: """檢查術語一致性""" print("\n📝 檢查術語一致性...") for file_path, content_lines in self.file_contents.items(): stats = self.document_stats[file_path] for line_num, line in enumerate(content_lines, 1): # 檢查設計與實現不一致的術語 design_terms = ["visual", "scene", "summary"] impl_terms = ["TimeBased", "Cut", "Trace", "Story"] # 如果文件提到設計術語,檢查是否有對應的實現說明 if any(term in line.lower() for term in design_terms): # 檢查是否在 DESIGN_IMPLEMENTATION_GAP.md 中有說明 if file_path.name != "DESIGN_IMPLEMENTATION_GAP.md": # 檢查前後文是否有提到實現差異 context_start = max(0, line_num - 3) context_end = min(len(content_lines), line_num + 2) context = content_lines[context_start:context_end] context_text = "".join(context) if not any( impl_term in context_text for impl_term in impl_terms ): stats.terminology_issues += 1 stats.issues.append( DocumentIssue( file_path=file_path, line_number=line_num, issue_type="terminology", description="設計術語缺少實現狀態說明", severity="warning", suggested_fix="添加實現狀態說明或參考 DESIGN_IMPLEMENTATION_GAP.md", ) ) def check_format(self) -> None: """檢查文檔格式""" print("\n📋 檢查文檔格式...") for file_path, content_lines in self.file_contents.items(): stats = self.document_stats[file_path] # 檢查文件頭部格式 if content_lines and not content_lines[0].startswith("# "): stats.format_issues += 1 stats.issues.append( DocumentIssue( file_path=file_path, line_number=1, issue_type="format", description="文件缺少 H1 標題", severity="warning", suggested_fix="在第一行添加 # 標題", ) ) # 檢查版本歷史表格 has_version_table = False for line in content_lines: if ( "版本歷史" in line or "版本记录" in line or "Version History" in line ): has_version_table = True break if not has_version_table: stats.format_issues += 1 stats.issues.append( DocumentIssue( file_path=file_path, line_number=1, issue_type="format", description="文件缺少版本歷史表格", severity="info", suggested_fix="添加版本歷史表格", ) ) def check_consistency(self) -> None: """檢查文檔間的一致性""" print("\n🔄 檢查文檔間一致性...") # 檢查 ARCHITECTURE_OVERVIEW.md 是否引用所有其他文檔 overview_file = self.architecture_dir / "ARCHITECTURE_OVERVIEW.md" if overview_file in self.file_contents: overview_content = "".join(self.file_contents[overview_file]) for other_file in self.all_md_files: if other_file == overview_file: continue other_filename = other_file.name if other_filename not in overview_content: stats = self.document_stats[overview_file] stats.consistency_issues += 1 stats.issues.append( DocumentIssue( file_path=overview_file, line_number=1, issue_type="consistency", description=f"總覽文件未引用: {other_filename}", severity="info", suggested_fix=f"在相關文件索引中添加對 {other_filename} 的引用", ) ) def generate_report(self, output_file: Optional[Path] = None) -> Dict: """生成檢查報告""" print("\n📊 生成檢查報告...") total_issues = 0 total_files = len(self.document_stats) report = { "summary": { "total_files": total_files, "total_issues": 0, "issues_by_type": defaultdict(int), "issues_by_severity": defaultdict(int), }, "files": [], } for file_path, stats in self.document_stats.items(): file_report = { "file": str(file_path.relative_to(self.architecture_dir.parent.parent)), "total_lines": stats.total_lines, "total_links": stats.total_links, "broken_links": stats.broken_links, "terminology_issues": stats.terminology_issues, "format_issues": stats.format_issues, "consistency_issues": stats.consistency_issues, "issues": [], } for issue in stats.issues: issue_dict = { "line": issue.line_number, "type": issue.issue_type, "severity": issue.severity, "description": issue.description, "suggested_fix": issue.suggested_fix, } file_report["issues"].append(issue_dict) # 更新統計 report["summary"]["total_issues"] += 1 report["summary"]["issues_by_type"][issue.issue_type] += 1 report["summary"]["issues_by_severity"][issue.severity] += 1 report["files"].append(file_report) total_issues += len(stats.issues) # 輸出報告 if output_file: with open(output_file, "w", encoding="utf-8") as f: json.dump(report, f, ensure_ascii=False, indent=2) print(f"✅ 報告已保存到: {output_file}") else: # 輸出簡要報告到控制台 print(f"\n{'=' * 60}") print("架構文檔檢查報告") print(f"{'=' * 60}") print(f"📁 檢查文件數: {total_files}") print(f"⚠️ 發現問題數: {total_issues}") print("\n問題分類:") for issue_type, count in report["summary"]["issues_by_type"].items(): print(f" - {issue_type}: {count}") print("\n嚴重程度:") for severity, count in report["summary"]["issues_by_severity"].items(): print(f" - {severity}: {count}") if total_issues > 0: print("\n🔍 詳細問題:") for file_report in report["files"]: if file_report["issues"]: print(f"\n文件: {file_report['file']}") for issue in file_report["issues"]: print( f" 行 {issue['line']} [{issue['severity']}] {issue['type']}: {issue['description']}" ) return report def run_all_checks(self) -> Dict: """運行所有檢查""" print("🚀 開始架構文檔一致性檢查") print(f"檢查目錄: {self.architecture_dir}") self.load_all_documents() self.check_links() self.check_terminology() self.check_format() self.check_consistency() return self.generate_report() def main(): """主函數""" parser = argparse.ArgumentParser(description="架構文檔一致性檢查工具") parser.add_argument("--report", type=str, help="生成 JSON 報告文件") parser.add_argument("--verbose", "-v", action="store_true", help="詳細輸出") parser.add_argument("--check-only", action="store_true", help="只檢查不生成報告") args = parser.parse_args() # 檢查目錄是否存在 if not ARCHITECTURE_DIR.exists(): print(f"❌ 架構目錄不存在: {ARCHITECTURE_DIR}") sys.exit(1) # 運行檢查 checker = ArchitectureDocChecker(ARCHITECTURE_DIR) if args.check_only: checker.load_all_documents() checker.check_links() checker.check_terminology() print("\n✅ 檢查完成(僅檢查模式)") else: output_file = Path(args.report) if args.report else None report = checker.run_all_checks() # 根據問題數量決定退出代碼 if report["summary"]["total_issues"] > 0: print(f"\n❌ 發現 {report['summary']['total_issues']} 個問題,請修復") sys.exit(1) else: print("\n✅ 所有檢查通過!") sys.exit(0) if __name__ == "__main__": main()