Files
momentry_core/scripts/face_statistics_report.py
Warren 8f05a7c188 feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors
- Add release pre-flight check script
- Add synonym generation, chunk processing scripts
- Add face recognition, stamp search utilities
2026-04-30 15:07:49 +08:00

247 lines
7.8 KiB
Python

#!/usr/bin/env python3
"""
人臉統計報告生成
"""
import psycopg2
import json
from datetime import datetime
import sys
def get_face_statistics():
"""獲取人臉統計數據"""
conn = psycopg2.connect(
host="localhost",
port=5432,
database="momentry",
user="accusys",
password="accusys",
)
cursor = conn.cursor()
# 1. 總體統計
cursor.execute("""
SELECT
COUNT(*) as total_faces,
SUM(CASE WHEN attributes->>'gender' = 'male' THEN 1 ELSE 0 END) as male_count,
SUM(CASE WHEN attributes->>'gender' = 'female' THEN 1 ELSE 0 END) as female_count,
ROUND(AVG(CASE WHEN attributes->>'age' ~ '^[0-9]+$' THEN (attributes->>'age')::numeric ELSE NULL END)::numeric, 1) as avg_age,
MIN(CASE WHEN attributes->>'age' ~ '^[0-9]+$' THEN (attributes->>'age')::numeric ELSE NULL END) as min_age,
MAX(CASE WHEN attributes->>'age' ~ '^[0-9]+$' THEN (attributes->>'age')::numeric ELSE NULL END) as max_age
FROM face_detections
""")
total_stats = cursor.fetchone()
# 2. 按視頻統計
cursor.execute("""
SELECT
video_uuid,
COUNT(*) as total_faces,
SUM(CASE WHEN attributes->>'gender' = 'male' THEN 1 ELSE 0 END) as male_count,
SUM(CASE WHEN attributes->>'gender' = 'female' THEN 1 ELSE 0 END) as female_count,
ROUND(AVG(CASE WHEN attributes->>'age' ~ '^[0-9]+$' THEN (attributes->>'age')::numeric ELSE NULL END)::numeric, 1) as avg_age
FROM face_detections
GROUP BY video_uuid
ORDER BY total_faces DESC
""")
video_stats = cursor.fetchall()
# 3. 年齡性別分布
cursor.execute("""
WITH age_groups AS (
SELECT
CASE
WHEN (attributes->>'age')::numeric < 20 THEN '10-19'
WHEN (attributes->>'age')::numeric < 30 THEN '20-29'
WHEN (attributes->>'age')::numeric < 40 THEN '30-39'
WHEN (attributes->>'age')::numeric < 50 THEN '40-49'
WHEN (attributes->>'age')::numeric < 60 THEN '50-59'
ELSE '60+'
END as age_group,
attributes->>'gender' as gender
FROM face_detections
WHERE attributes->>'gender' IN ('male', 'female')
AND attributes->>'age' ~ '^[0-9]+$'
)
SELECT
age_group,
gender,
COUNT(*) as count
FROM age_groups
GROUP BY age_group, gender
ORDER BY
CASE age_group
WHEN '10-19' THEN 1
WHEN '20-29' THEN 2
WHEN '30-39' THEN 3
WHEN '40-49' THEN 4
WHEN '50-59' THEN 5
ELSE 6
END,
gender DESC
""")
age_gender_dist = cursor.fetchall()
# 4. 置信度統計
cursor.execute("""
SELECT
ROUND(AVG(confidence)::numeric, 3) as avg_confidence,
MIN(confidence) as min_confidence,
MAX(confidence) as max_confidence,
COUNT(CASE WHEN confidence >= 0.8 THEN 1 END) as high_confidence,
COUNT(CASE WHEN confidence >= 0.6 AND confidence < 0.8 THEN 1 END) as medium_confidence,
COUNT(CASE WHEN confidence < 0.6 THEN 1 END) as low_confidence
FROM face_detections
""")
confidence_stats = cursor.fetchone()
# 5. 時間分布
cursor.execute("""
SELECT
FLOOR(timestamp_secs / 60) * 60 as minute_mark,
COUNT(*) as faces_in_minute,
SUM(CASE WHEN attributes->>'gender' = 'male' THEN 1 ELSE 0 END) as males_in_minute,
SUM(CASE WHEN attributes->>'gender' = 'female' THEN 1 ELSE 0 END) as females_in_minute
FROM face_detections
GROUP BY FLOOR(timestamp_secs / 60) * 60
ORDER BY minute_mark
""")
time_dist = cursor.fetchall()
cursor.close()
conn.close()
return {
"total_stats": total_stats,
"video_stats": video_stats,
"age_gender_dist": age_gender_dist,
"confidence_stats": confidence_stats,
"time_dist": time_dist,
}
def generate_report(stats):
"""生成統計報告"""
report = []
report.append("=" * 70)
report.append("人臉識別統計報告")
report.append("=" * 70)
report.append(f"生成時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
report.append("")
# 總體統計
total_stats = stats["total_stats"]
report.append("📊 總體統計")
report.append("-" * 40)
report.append(f"總人臉數: {total_stats[0]}")
report.append(
f"男性: {total_stats[1]} ({total_stats[1] / total_stats[0] * 100:.1f}%)"
)
report.append(
f"女性: {total_stats[2]} ({total_stats[2] / total_stats[0] * 100:.1f}%)"
)
report.append(f"平均年齡: {total_stats[3]}")
report.append(f"年齡範圍: {total_stats[4]} - {total_stats[5]}")
report.append("")
# 視頻統計
report.append("🎬 視頻統計")
report.append("-" * 40)
for video in stats["video_stats"]:
video_uuid, total, male, female, avg_age = video
video_name = (
"Old_Time_Movie_Show_-_Charade_1963.HD.mov"
if video_uuid == "384b0ff44aaaa1f1"
else "ExaSAN PCIe series"
)
report.append(f"視頻: {video_name}")
report.append(f" UUID: {video_uuid}")
report.append(f" 總人臉: {total}")
report.append(f" 男性: {male} ({male / total * 100:.1f}%)")
report.append(f" 女性: {female} ({female / total * 100:.1f}%)")
report.append(f" 平均年齡: {avg_age}")
report.append("")
# 年齡性別分布
report.append("👥 年齡性別分布")
report.append("-" * 40)
# 創建分布表
age_groups = {}
for age_group, gender, count in stats["age_gender_dist"]:
if age_group not in age_groups:
age_groups[age_group] = {"male": 0, "female": 0}
age_groups[age_group][gender] = count
for age_group in sorted(age_groups.keys(), key=lambda x: int(x.split("-")[0])):
male = age_groups[age_group]["male"]
female = age_groups[age_group]["female"]
total = male + female
if total > 0:
report.append(f"{age_group}歲: {total}人 (男{male}/女{female})")
report.append("")
# 置信度統計
conf_stats = stats["confidence_stats"]
report.append("🎯 檢測置信度")
report.append("-" * 40)
report.append(f"平均置信度: {conf_stats[0]:.3f}")
report.append(f"範圍: {conf_stats[1]:.3f} - {conf_stats[2]:.3f}")
report.append(
f"高置信度(≥0.8): {conf_stats[3]} ({conf_stats[3] / total_stats[0] * 100:.1f}%)"
)
report.append(
f"中置信度(0.6-0.8): {conf_stats[4]} ({conf_stats[4] / total_stats[0] * 100:.1f}%)"
)
report.append(
f"低置信度(<0.6): {conf_stats[5]} ({conf_stats[5] / total_stats[0] * 100:.1f}%)"
)
report.append("")
# 時間分布
report.append("⏰ 時間分布 (每分鐘)")
report.append("-" * 40)
for minute_mark, total, male, female in stats["time_dist"]:
minutes = int(minute_mark // 60)
seconds = int(minute_mark % 60)
report.append(f"{minutes:02d}:{seconds:02d} - {total}人 (男{male}/女{female})")
report.append("")
report.append("=" * 70)
return "\n".join(report)
def main():
print("正在生成人臉統計報告...")
try:
stats = get_face_statistics()
report = generate_report(stats)
# 輸出到控制台
print(report)
# 保存到文件
with open("/tmp/face_statistics_report.txt", "w") as f:
f.write(report)
print(f"\n報告已保存到: /tmp/face_statistics_report.txt")
except Exception as e:
print(f"❌ 生成報告時出錯: {e}")
sys.exit(1)
if __name__ == "__main__":
main()