- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
197 lines
6.4 KiB
Python
197 lines
6.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
代碼與文檔一致性檢查工具 - Phase 1.2 成果
|
|
|
|
功能:檢查 Rust 代碼定義與架構文檔的一致性
|
|
核心原則:當設計與實現出現矛盾時,以實際的 Rust 代碼實現為最高權威
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def load_code_definitions():
|
|
"""加載 Rust 代碼定義"""
|
|
print("🔍 解析 Rust 代碼定義...")
|
|
|
|
project_root = Path(__file__).parent.parent
|
|
src_dir = project_root / "src"
|
|
|
|
chunk_type_pattern = re.compile(r"pub\s+enum\s+ChunkType\s*\{([^}]+)\}", re.DOTALL)
|
|
|
|
for file_path in src_dir.glob("**/*.rs"):
|
|
try:
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
match = chunk_type_pattern.search(content)
|
|
if match:
|
|
enum_body = match.group(1)
|
|
variants = []
|
|
for line in enum_body.split("\n"):
|
|
line = line.strip()
|
|
if line and not line.startswith("//"):
|
|
variant = line.split(",")[0].strip()
|
|
if variant:
|
|
variants.append(variant)
|
|
|
|
print(f"📝 找到 ChunkType 定義: {', '.join(variants)}")
|
|
return variants
|
|
except Exception as e:
|
|
print(f"⚠️ 解析文件 {file_path} 時出錯: {e}")
|
|
|
|
print("❌ 未找到 ChunkType 定義")
|
|
return []
|
|
|
|
|
|
def check_terminology_consistency(implemented_variants):
|
|
"""檢查術語一致性"""
|
|
print("\n📝 檢查術語一致性...")
|
|
|
|
project_root = Path(__file__).parent.parent
|
|
architecture_dir = project_root / "docs_v1.0" / "ARCHITECTURE"
|
|
|
|
# 設計術語集合
|
|
design_terms = {"sentence", "visual", "scene", "summary", "time"}
|
|
|
|
# 檢查關鍵文件
|
|
key_files = [
|
|
"ARCHITECTURE_OVERVIEW.md",
|
|
"CHUNKING_ARCHITECTURE.md",
|
|
"DESIGN_IMPLEMENTATION_GAP.md",
|
|
]
|
|
|
|
issues = []
|
|
|
|
for filename in key_files:
|
|
file_path = architecture_dir / filename
|
|
if not file_path.exists():
|
|
print(f" ⚠️ 文件不存在: {filename}")
|
|
continue
|
|
|
|
try:
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
except Exception as e:
|
|
print(f" ❌ 無法讀取文件 {file_path}: {e}")
|
|
continue
|
|
|
|
# 檢查設計術語
|
|
for design_term in design_terms:
|
|
if design_term in content.lower():
|
|
needs_implementation_note = design_term in [
|
|
"visual",
|
|
"scene",
|
|
"summary",
|
|
]
|
|
|
|
if needs_implementation_note:
|
|
# 檢查是否有狀態標記
|
|
has_status_marker = any(
|
|
marker in content
|
|
for marker in [
|
|
"✅",
|
|
"⚠️",
|
|
"❌",
|
|
"🔄",
|
|
"已實現",
|
|
"未實現",
|
|
"部分實現",
|
|
"概念調整",
|
|
]
|
|
)
|
|
|
|
if not has_status_marker:
|
|
# 確定對應的實現術語
|
|
impl_term = get_implementation_term(design_term)
|
|
status = get_status(impl_term)
|
|
|
|
issues.append(
|
|
{
|
|
"file": str(file_path.relative_to(project_root)),
|
|
"type": "terminology",
|
|
"description": f"設計術語 '{design_term}' 缺少實現狀態說明",
|
|
"severity": "warning",
|
|
"suggested_fix": f"添加狀態說明,例如: '{status}' 或參考 TERMINOLOGY_MAPPING.md",
|
|
}
|
|
)
|
|
|
|
# 檢查實現術語是否正確
|
|
for impl_term in implemented_variants:
|
|
if impl_term in content:
|
|
expected_status = get_status(impl_term)
|
|
if expected_status and expected_status not in content:
|
|
issues.append(
|
|
{
|
|
"file": str(file_path.relative_to(project_root)),
|
|
"type": "terminology",
|
|
"description": f"實現術語 '{impl_term}' 缺少正確的狀態標記",
|
|
"severity": "info",
|
|
"suggested_fix": f"添加狀態標記: {expected_status}",
|
|
}
|
|
)
|
|
|
|
return issues
|
|
|
|
|
|
def get_implementation_term(design_term):
|
|
"""根據設計術語獲取對應的實現術語"""
|
|
mapping = {
|
|
"sentence": "Sentence",
|
|
"visual": "", # 未實現
|
|
"scene": "Cut",
|
|
"summary": "Story",
|
|
"time": "TimeBased",
|
|
}
|
|
return mapping.get(design_term, "")
|
|
|
|
|
|
def get_status(impl_term):
|
|
"""獲取實現術語的狀態"""
|
|
status_map = {
|
|
"TimeBased": "✅ 已實現",
|
|
"Sentence": "✅ 已實現",
|
|
"Cut": "⚠️ 部分實現",
|
|
"Trace": "✅ 已實現",
|
|
"Story": "⚠️ 概念調整",
|
|
"visual": "❌ 未實現",
|
|
}
|
|
return status_map.get(impl_term, "❓ 狀態未知")
|
|
|
|
|
|
def main():
|
|
print("🚀 開始代碼與文檔一致性檢查 - Phase 1.2")
|
|
print("=" * 50)
|
|
|
|
# 1. 加載代碼定義
|
|
implemented_variants = load_code_definitions()
|
|
if not implemented_variants:
|
|
print("❌ 無法繼續檢查,請先確保 Rust 代碼正常編譯")
|
|
return
|
|
|
|
print(f"✅ 加載了 {len(implemented_variants)} 個代碼定義")
|
|
|
|
# 2. 檢查術語一致性
|
|
issues = check_terminology_consistency(implemented_variants)
|
|
|
|
# 3. 顯示結果
|
|
print(f"\n📊 檢查完成:")
|
|
print(f" 發現問題數: {len(issues)}")
|
|
|
|
if issues:
|
|
print("\n🔍 詳細問題列表:")
|
|
for issue in issues:
|
|
print(f" [{issue['severity'].upper()}] {issue['file']}")
|
|
print(f" 描述: {issue['description']}")
|
|
print(f" 建議: {issue['suggested_fix']}")
|
|
print()
|
|
|
|
print("=" * 50)
|
|
print("✅ 檢查完成。請參考 TERMINOLOGY_MAPPING.md 進行修復。")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|