Files
momentry_core/scripts/compare_asr_models.py
Warren 8f05a7c188 feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors
- Add release pre-flight check script
- Add synonym generation, chunk processing scripts
- Add face recognition, stamp search utilities
2026-04-30 15:07:49 +08:00

106 lines
2.8 KiB
Python
Executable File

#!/opt/homebrew/bin/python3.11
"""
ASR 模型比對工具
對比不同模型的輸出結果
"""
import json
import sys
from pathlib import Path
from datetime import datetime
def load_results(paths):
"""載入多個模型的輸出"""
results = {}
for name, path in paths.items():
with open(path) as f:
results[name] = json.load(f)
return results
def find_keyword(segments, keyword):
"""在片段中查找關鍵詞"""
for seg in segments:
if keyword in seg["text"]:
return seg
return None
def compare_models(results):
"""比對多個模型"""
print("# ASR 模型對比報告\n")
print(f"**生成時間**: {datetime.now().isoformat()}\n")
# 模型列表
print("## 模型資訊\n")
for name, result in results.items():
print(
f"- **{name}**: {result.get('language', 'unknown')} "
+ f"({result.get('language_probability', 0) * 100:.1f}%), "
+ f"{len(result.get('segments', []))} 片段"
)
print()
# 關鍵詞彙比對
keywords = ["剪輯師", "調光師", "錄音師", "特效", "套片"]
print("## 關鍵詞彙識別\n")
print("| 詞彙 | tiny | base | small |")
print("|------|------|------|-------|")
for keyword in keywords:
row = [keyword]
for model_name in ["tiny", "base", "small"]:
if model_name in results:
found = find_keyword(results[model_name]["segments"], keyword)
status = "" if found else ""
row.append(f"{status}")
else:
row.append("-")
print(f"| {' | '.join(row)} |")
print()
# 詳細比對(前 10 句)
print("## 前 10 句對比\n")
max_segments = max(len(r.get("segments", [])) for r in results.values())
for i in range(min(10, max_segments)):
print(f"### 片段 {i + 1}\n")
for model_name, result in results.items():
segments = result.get("segments", [])
if i < len(segments):
seg = segments[i]
print(
f"**{model_name}**: {seg['text']} "
+ f"({seg['start']:.1f}s - {seg['end']:.1f}s)"
)
print()
def main():
if len(sys.argv) < 3:
print(
"Usage: python3 compare_asr_models.py <tiny.json> <base.json> [small.json]"
)
print("Note: small.json is optional")
sys.exit(1)
paths = {"tiny": sys.argv[1], "base": sys.argv[2]}
if len(sys.argv) > 3:
paths["small"] = sys.argv[3]
# 檢查檔案存在
for name, path in paths.items():
if not Path(path).exists():
print(f"Error: {path} ({name}) not found")
sys.exit(1)
results = load_results(paths)
compare_models(results)
if __name__ == "__main__":
main()