- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
106 lines
2.8 KiB
Python
Executable File
106 lines
2.8 KiB
Python
Executable File
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
ASR 模型比對工具
|
|
對比不同模型的輸出結果
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
|
|
def load_results(paths):
|
|
"""載入多個模型的輸出"""
|
|
results = {}
|
|
for name, path in paths.items():
|
|
with open(path) as f:
|
|
results[name] = json.load(f)
|
|
return results
|
|
|
|
|
|
def find_keyword(segments, keyword):
|
|
"""在片段中查找關鍵詞"""
|
|
for seg in segments:
|
|
if keyword in seg["text"]:
|
|
return seg
|
|
return None
|
|
|
|
|
|
def compare_models(results):
|
|
"""比對多個模型"""
|
|
print("# ASR 模型對比報告\n")
|
|
print(f"**生成時間**: {datetime.now().isoformat()}\n")
|
|
|
|
# 模型列表
|
|
print("## 模型資訊\n")
|
|
for name, result in results.items():
|
|
print(
|
|
f"- **{name}**: {result.get('language', 'unknown')} "
|
|
+ f"({result.get('language_probability', 0) * 100:.1f}%), "
|
|
+ f"{len(result.get('segments', []))} 片段"
|
|
)
|
|
print()
|
|
|
|
# 關鍵詞彙比對
|
|
keywords = ["剪輯師", "調光師", "錄音師", "特效", "套片"]
|
|
print("## 關鍵詞彙識別\n")
|
|
print("| 詞彙 | tiny | base | small |")
|
|
print("|------|------|------|-------|")
|
|
|
|
for keyword in keywords:
|
|
row = [keyword]
|
|
for model_name in ["tiny", "base", "small"]:
|
|
if model_name in results:
|
|
found = find_keyword(results[model_name]["segments"], keyword)
|
|
status = "✅" if found else "❌"
|
|
row.append(f"{status}")
|
|
else:
|
|
row.append("-")
|
|
print(f"| {' | '.join(row)} |")
|
|
|
|
print()
|
|
|
|
# 詳細比對(前 10 句)
|
|
print("## 前 10 句對比\n")
|
|
max_segments = max(len(r.get("segments", [])) for r in results.values())
|
|
|
|
for i in range(min(10, max_segments)):
|
|
print(f"### 片段 {i + 1}\n")
|
|
for model_name, result in results.items():
|
|
segments = result.get("segments", [])
|
|
if i < len(segments):
|
|
seg = segments[i]
|
|
print(
|
|
f"**{model_name}**: {seg['text']} "
|
|
+ f"({seg['start']:.1f}s - {seg['end']:.1f}s)"
|
|
)
|
|
print()
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 3:
|
|
print(
|
|
"Usage: python3 compare_asr_models.py <tiny.json> <base.json> [small.json]"
|
|
)
|
|
print("Note: small.json is optional")
|
|
sys.exit(1)
|
|
|
|
paths = {"tiny": sys.argv[1], "base": sys.argv[2]}
|
|
|
|
if len(sys.argv) > 3:
|
|
paths["small"] = sys.argv[3]
|
|
|
|
# 檢查檔案存在
|
|
for name, path in paths.items():
|
|
if not Path(path).exists():
|
|
print(f"Error: {path} ({name}) not found")
|
|
sys.exit(1)
|
|
|
|
results = load_results(paths)
|
|
compare_models(results)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|