Files
momentry_core/monitor/service/health_check.sh
accusys 75edf0aa71 Initial commit: Momentry Core v0.1
- Rust-based digital asset management system
- Video analysis: ASR, OCR, YOLO, Face, Pose
- RAG capabilities with Qdrant vector database
- Multi-database support: PostgreSQL, Redis, MongoDB
- Monitoring system with launchd plists
- n8n workflow automation integration
2026-03-25 14:53:41 +08:00

371 lines
10 KiB
Bash
Executable File

#!/bin/bash
# Momentry 服務健康檢查 (Layer 2)
# 路徑: /Users/accusys/momentry_core_0.1/monitor/service/health_check.sh
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
MONITOR_DIR="$(dirname "$SCRIPT_DIR")"
LOG_DIR="/Users/accusys/momentry/log/monitor"
mkdir -p "$LOG_DIR"
LOG_FILE="$LOG_DIR/service_check.log"
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
}
# 顏色
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# 記錄結果到資料庫
record_service() {
local service=$1
local status=$2
local response_time=$3
local error_msg=$4
psql -U accusys -h localhost -d momentry << EOF 2>/dev/null
INSERT INTO monitor_services (service_name, service_type, status, response_time_ms, error_message, checked_at)
VALUES ('$service', 'service', '$status', $response_time, '$error_msg', NOW());
EOF
}
# 檢查 PostgreSQL
check_postgresql() {
local start=$(date +%s%N)
if pg_isready -h localhost -p 5432 -U accusys > /dev/null 2>&1; then
local end=$(date +%s%N)
local ms=$(( (end - start) / 1000000 ))
echo -e "${GREEN}${NC} PostgreSQL (5432) - ${ms}ms"
record_service "postgresql" "up" "$ms" ""
return 0
else
echo -e "${RED}${NC} PostgreSQL (5432) - Down"
record_service "postgresql" "down" "0" "Connection failed"
return 1
fi
}
# 檢查 Redis
check_redis() {
local start=$(date +%s%N)
if redis-cli -a accusys ping 2>/dev/null | grep -q "PONG"; then
local end=$(date +%s%N)
local ms=$(( (end - start) / 1000000 ))
echo -e "${GREEN}${NC} Redis (6379) - ${ms}ms"
record_service "redis" "up" "$ms" ""
return 0
else
echo -e "${RED}${NC} Redis (6379) - Down"
record_service "redis" "down" "0" "Connection failed"
return 1
fi
}
# 檢查 MariaDB
check_mariadb() {
local start=$(date +%s%N)
if mysql -u accusys -e "SELECT 1" > /dev/null 2>&1; then
local end=$(date +%s%N)
local ms=$(( (end - start) / 1000000 ))
echo -e "${GREEN}${NC} MariaDB (3306) - ${ms}ms"
record_service "mariadb" "up" "$ms" ""
return 0
else
echo -e "${RED}${NC} MariaDB (3306) - Down"
record_service "mariadb" "down" "0" "Connection failed"
return 1
fi
}
# 檢查 n8n
check_n8n() {
local start=$(date +%s%N)
local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8085/ --max-time 5)
local end=$(date +%s%N)
local ms=$(( (end - start) / 1000000 ))
if [ "$http_code" = "200" ] || [ "$http_code" = "302" ]; then
echo -e "${GREEN}${NC} n8n (8085) - ${ms}ms"
record_service "n8n" "up" "$ms" ""
return 0
else
echo -e "${RED}${NC} n8n (8085) - HTTP $http_code"
record_service "n8n" "down" "0" "HTTP $http_code"
return 1
fi
}
# 檢查 Caddy
check_caddy() {
local start=$(date +%s%N)
local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:2019/config/ --max-time 5)
local end=$(date +%s%N)
local ms=$(( (end - start) / 1000000 ))
if [ "$http_code" = "200" ]; then
echo -e "${GREEN}${NC} Caddy (2019) - ${ms}ms"
record_service "caddy" "up" "$ms" ""
return 0
else
echo -e "${RED}${NC} Caddy (2019) - HTTP $http_code"
record_service "caddy" "down" "0" "HTTP $http_code"
return 1
fi
}
# 檢查 Gitea
check_gitea() {
local start=$(date +%s%N)
local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:3000/ --max-time 5)
local end=$(date +%s%N)
local ms=$(( (end - start) / 1000000 ))
if [ "$http_code" = "200" ]; then
echo -e "${GREEN}${NC} Gitea (3000) - ${ms}ms"
record_service "gitea" "up" "$ms" ""
return 0
else
echo -e "${RED}${NC} Gitea (3000) - HTTP $http_code"
record_service "gitea" "down" "0" "HTTP $http_code"
return 1
fi
}
# 檢查 SFTPGo
check_sftpgo() {
local start=$(date +%s%N)
local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080 --max-time 5)
local end=$(date +%s%N)
local ms=$(( (end - start) / 1000000 ))
if [ "$http_code" = "200" ] || [ "$http_code" = "301" ] || [ "$http_code" = "302" ]; then
echo -e "${GREEN}${NC} SFTPGo (8080) - ${ms}ms"
record_service "sftpgo" "up" "$ms" ""
return 0
else
echo -e "${RED}${NC} SFTPGo (8080) - HTTP $http_code"
record_service "sftpgo" "down" "0" "HTTP $http_code"
return 1
fi
}
# 檢查 Ollama
check_ollama() {
local start=$(date +%s%N)
local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:11434/api/tags --max-time 5)
local end=$(date +%s%N)
local ms=$(( (end - start) / 1000000 ))
if [ "$http_code" = "200" ]; then
echo -e "${GREEN}${NC} Ollama (11434) - ${ms}ms"
record_service "ollama" "up" "$ms" ""
return 0
else
echo -e "${RED}${NC} Ollama (11434) - HTTP $http_code"
record_service "ollama" "down" "0" "HTTP $http_code"
return 1
fi
}
# 檢查 Qdrant
check_qdrant() {
local start=$(date +%s%N)
local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:6333/collections --max-time 5)
local end=$(date +%s%N)
local ms=$(( (end - start) / 1000000 ))
if [ "$http_code" = "200" ] || [ "$http_code" = "401" ]; then
echo -e "${GREEN}${NC} Qdrant (6333) - ${ms}ms"
record_service "qdrant" "up" "$ms" ""
return 0
else
echo -e "${RED}${NC} Qdrant (6333) - HTTP $http_code"
record_service "qdrant" "down" "0" "HTTP $http_code"
return 1
fi
}
# 檢查 MongoDB
check_mongodb() {
local start=$(date +%s%N)
if mongosh --quiet --eval "db.adminCommand('ping')" > /dev/null 2>&1; then
local end=$(date +%s%N)
local ms=$(( (end - start) / 1000000 ))
echo -e "${GREEN}${NC} MongoDB (27017) - ${ms}ms"
record_service "mongodb" "up" "$ms" ""
return 0
else
echo -e "${RED}${NC} MongoDB (27017) - Down"
record_service "mongodb" "down" "0" "Connection failed"
return 1
fi
}
# 檢查 PHP-FPM
check_php() {
if pgrep -f "php-fpm" > /dev/null 2>&1; then
echo -e "${GREEN}${NC} PHP-FPM - Running"
record_service "php" "up" "1" ""
return 0
else
echo -e "${RED}${NC} PHP-FPM - Not running"
record_service "php" "down" "0" "Process not found"
return 1
fi
}
# 檢查 RustDesk
check_rustdesk() {
local hbbs_ok=false
local hbbr_ok=false
if nc -z localhost 21116 > /dev/null 2>&1; then
hbbs_ok=true
fi
if nc -z localhost 21117 > /dev/null 2>&1; then
hbbr_ok=true
fi
if $hbbs_ok && $hbbr_ok; then
echo -e "${GREEN}${NC} RustDesk (21116/21117) - Running"
record_service "rustdesk" "up" "1" ""
return 0
else
echo -e "${YELLOW}${NC} RustDesk - Partial (hbbs: $hbbs_ok, hbbr: $hbbr_ok)"
record_service "rustdesk" "degraded" "0" "hbbs:$hbbs_ok hbbr:$hbbr_ok"
return 1
fi
}
# 檢查 Node.js 版本
check_node() {
local LOCKED_NODE_VERSION="22"
local version_issues=0
local node_pids=$(pgrep -f "n8n" 2>/dev/null)
if [ -z "$node_pids" ]; then
echo -e "${YELLOW}${NC} Node.js - n8n not running"
record_service "node" "degraded" "1" "n8n not running"
return 1
fi
for pid in $node_pids; do
local node_path=$(lsof -p $pid 2>/dev/null | grep "txt" | grep "node" | head -1 | awk '{print $NF}' | grep -v "dylib")
if [ -n "$node_path" ] && [ -f "$node_path" ]; then
local node_version=$($node_path --version 2>/dev/null | sed 's/v//')
local node_major=$(echo "$node_version" | cut -d. -f1)
if [ "$node_major" != "$LOCKED_NODE_VERSION" ]; then
version_issues=$((version_issues + 1))
fi
fi
done
if [ $version_issues -gt 0 ]; then
echo -e "${RED}${NC} Node.js - Version issues detected"
record_service "node" "degraded" "1" "$version_issues version issues"
return 1
else
echo -e "${GREEN}${NC} Node.js (${LOCKED_NODE_VERSION}.x) - Running"
record_service "node" "up" "1" ""
return 0
fi
}
# 檢查 Python 版本
check_python() {
local LOCKED_PYTHON_VERSION="3.11.14"
local script_issues=0
local scripts=(
"/Users/accusys/momentry_core_0.1/scripts/asr_processor.py"
"/Users/accusys/momentry_core_0.1/scripts/thumbnail_extractor.py"
)
for script in "${scripts[@]}"; do
if [ -f "$script" ]; then
local shebang=$(head -1 "$script")
if [[ "$shebang" != *"python3.11"* ]]; then
script_issues=$((script_issues + 1))
fi
fi
done
if [ $script_issues -gt 0 ]; then
echo -e "${RED}${NC} Python - Script version issues"
record_service "python" "degraded" "1" "$script_issues script issues"
return 1
else
echo -e "${GREEN}${NC} Python (${LOCKED_PYTHON_VERSION}) - Configured"
record_service "python" "up" "1" ""
return 0
fi
}
# 主程序
echo "========================================"
echo "Layer 2: Service Health Check"
echo "Time: $(date)"
echo "========================================"
echo ""
total=0
passed=0
total=$((total + 1))
check_postgresql && passed=$((passed + 1))
total=$((total + 1))
check_redis && passed=$((passed + 1))
total=$((total + 1))
check_mariadb && passed=$((passed + 1))
total=$((total + 1))
check_n8n && passed=$((passed + 1))
total=$((total + 1))
check_caddy && passed=$((passed + 1))
total=$((total + 1))
check_gitea && passed=$((passed + 1))
total=$((total + 1))
check_sftpgo && passed=$((passed + 1))
total=$((total + 1))
check_ollama && passed=$((passed + 1))
total=$((total + 1))
check_qdrant && passed=$((passed + 1))
total=$((total + 1))
check_mongodb && passed=$((passed + 1))
total=$((total + 1))
check_php && passed=$((passed + 1))
total=$((total + 1))
check_rustdesk && passed=$((passed + 1))
total=$((total + 1))
check_node && passed=$((passed + 1))
total=$((total + 1))
check_python && passed=$((passed + 1))
echo ""
echo "========================================"
echo "Result: $passed / $total services healthy"
echo "========================================"
log "Service check completed: $passed/$total healthy"