- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
140 lines
4.6 KiB
Python
140 lines
4.6 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
Weather Sound Detector (Rain & Thunder)
|
|
職責:使用聲學特徵 (Librosa) 辨識雨聲 (Rain) 與雷聲 (Thunder)。
|
|
"""
|
|
|
|
import librosa
|
|
import numpy as np
|
|
import os
|
|
import json
|
|
|
|
# 設定
|
|
UUID = os.getenv("UUID", "384b0ff44aaaa1f1")
|
|
OUTPUT_DIR = os.getenv("MOMENTRY_OUTPUT_DIR", "./output")
|
|
AUDIO_PATH = os.path.join(OUTPUT_DIR, UUID, f"{UUID}.wav")
|
|
OUTPUT_JSON = os.path.join(OUTPUT_DIR, UUID, f"{UUID}.weather_events.json")
|
|
|
|
|
|
def detect_weather_sounds(audio_path):
|
|
print(f"🔍 Loading audio: {audio_path}")
|
|
# 使用 16kHz 取樣
|
|
y, sr = librosa.load(audio_path, sr=16000, mono=True)
|
|
total_dur = len(y) / sr
|
|
|
|
# 分析視窗:每 10 秒一幀
|
|
hop_length = int(10.0 * sr)
|
|
frame_length = int(10.0 * sr)
|
|
|
|
print("📊 Analyzing spectral features...")
|
|
|
|
# 1. 計算聲學特徵
|
|
# RMS: 能量 (響度) - shape (1, frames) -> take [0] to get (frames,)
|
|
rms = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0]
|
|
|
|
# Spectral Flatness: 頻譜平坦度 - shape (1, frames) -> take [0]
|
|
flatness = librosa.feature.spectral_flatness(
|
|
y=y, n_fft=frame_length, hop_length=hop_length
|
|
)[0]
|
|
|
|
# Spectral Centroid: 頻譜質心 - shape (1, frames) -> take [0]
|
|
centroid = librosa.feature.spectral_centroid(
|
|
y=y, sr=sr, n_fft=frame_length, hop_length=hop_length
|
|
)[0]
|
|
|
|
# Low Frequency Energy (LFE): 低頻能量 (計算 < 200Hz 的能量比例)
|
|
L = 200
|
|
n_bins = int(L * frame_length / sr)
|
|
stft = np.abs(librosa.stft(y, n_fft=frame_length, hop_length=hop_length))
|
|
lfe = np.sum(stft[:n_bins, :], axis=0) / (np.sum(stft, axis=0) + 1e-10)
|
|
|
|
print("🕵️♂️ Scanning for patterns...")
|
|
|
|
weather_events = []
|
|
|
|
# 滑動檢查
|
|
for i in range(len(rms)):
|
|
t = i * hop_length / sr
|
|
t_end = t + 10.0
|
|
|
|
r = rms[i]
|
|
f = flatness[i]
|
|
c = centroid[i]
|
|
l = lfe[i] if i < len(lfe) else 0
|
|
|
|
event_type = None
|
|
reason = ""
|
|
|
|
# 1. 雷聲偵測 (Thunder)
|
|
# 特徵:高能量 (響) + 低頻能量極高 (轟鳴)
|
|
if r > 0.08 and l > 0.4:
|
|
# 必須是低頻為主,且夠響
|
|
event_type = "Thunder"
|
|
reason = f"High LFE ({l:.2f}) & Loud"
|
|
|
|
# 2. 雨聲偵測 (Rain)
|
|
# 特徵:高平坦度 (噪音) + 持續能量 + 中頻質心
|
|
elif f > 0.30 and r > 0.015:
|
|
# 排除純靜音 (r 很低時 flatness 不準)
|
|
# 排除極低頻 (可能是風聲或空轉)
|
|
if 800 < c < 3000:
|
|
event_type = "Rain"
|
|
reason = f"High Flatness ({f:.2f}) & Mid Centroid"
|
|
|
|
if event_type:
|
|
weather_events.append(
|
|
{
|
|
"start": round(t, 1),
|
|
"end": round(t_end, 1),
|
|
"type": event_type,
|
|
"confidence": round(r + l + f, 4), # 簡單的綜合信心分數
|
|
"reason": reason,
|
|
}
|
|
)
|
|
|
|
return weather_events
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if not os.path.exists(AUDIO_PATH):
|
|
print(f"❌ No audio found at {AUDIO_PATH}")
|
|
exit()
|
|
|
|
print(f"🌦️ Starting Weather Sound Analysis for {UUID}...")
|
|
events = detect_weather_sounds(AUDIO_PATH)
|
|
|
|
# 合併連續片段 (例如連續 3 個雨聲 -> 1 個大雨聲)
|
|
merged_events = []
|
|
for ev in events:
|
|
if not merged_events:
|
|
merged_events.append(ev)
|
|
continue
|
|
|
|
last = merged_events[-1]
|
|
# 如果同類型且時間重疊/相鄰 (間隔 < 5秒)
|
|
if ev["type"] == last["type"] and (ev["start"] - last["end"]) < 5.0:
|
|
last["end"] = ev["end"]
|
|
last["confidence"] = max(last["confidence"], ev["confidence"])
|
|
else:
|
|
merged_events.append(ev)
|
|
|
|
print(f"\n🎉 Analysis Complete!")
|
|
print(f"✅ Found {len(merged_events)} weather segments.")
|
|
|
|
# 統計
|
|
rain_count = sum(1 for e in merged_events if e["type"] == "Rain")
|
|
thunder_count = sum(1 for e in merged_events if e["type"] == "Thunder")
|
|
print(f" 🌧️ Rain events: {rain_count}")
|
|
print(f" ⚡ Thunder events: {thunder_count}")
|
|
|
|
# 儲存
|
|
with open(OUTPUT_JSON, "w") as f:
|
|
json.dump({"weather_events": merged_events}, f, indent=2)
|
|
|
|
# 顯示 Top 20
|
|
print(f"\n🔥 Top Weather Moments (Sorted by Confidence):")
|
|
sorted_ev = sorted(merged_events, key=lambda x: x["confidence"], reverse=True)
|
|
for i, ev in enumerate(sorted_ev[:20]):
|
|
m, s = divmod(ev["start"], 60)
|
|
print(f" {i + 1:02d}. [{int(m):02d}:{s:05.2f}] {ev['type']} ({ev['reason']})")
|