Files
momentry_core/final_shutdown_tool.py
Warren b54c2def30 feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid
- Add test scripts for ASR, face, search, processing
- Add portal frontend (Tauri)
- Add config, benchmark, and monitoring utilities
- Add model checkpoints and pretrained model references
2026-04-30 15:11:53 +08:00

417 lines
13 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/opt/homebrew/bin/python3.11
"""
最终关机工具 - Final Shutdown Tool
解决所有关机问题认证、超时、进程树、sudo权限
"""
import os
import sys
import time
import signal
import subprocess
import psutil
from datetime import datetime
def run_command_with_auth(cmd, timeout=30, use_sudo=False, password=None):
"""运行命令支持认证和sudo"""
try:
if use_sudo and password:
# 使用 expect 处理 sudo 密码输入
sudo_cmd = f'echo "{password}" | sudo -S {cmd}'
result = subprocess.run(
sudo_cmd, shell=True, capture_output=True, text=True, timeout=timeout
)
elif use_sudo:
# 尝试直接 sudo可能需要终端交互
result = subprocess.run(
f"sudo {cmd}",
shell=True,
capture_output=True,
text=True,
timeout=timeout,
)
else:
result = subprocess.run(
cmd, shell=True, capture_output=True, text=True, timeout=timeout
)
return result.returncode == 0, result.stdout.strip(), result.stderr.strip()
except subprocess.TimeoutExpired:
return False, f"超时 ({timeout}s)", ""
except Exception as e:
return False, "", str(e)
def find_processes_by_keywords(keywords):
"""更可靠的进程查找"""
processes = []
for proc in psutil.process_iter(["pid", "name", "cmdline", "username"]):
try:
cmdline = " ".join(proc.info["cmdline"]) if proc.info["cmdline"] else ""
name = proc.info["name"] or ""
username = proc.info["username"] or ""
# 跳过系统进程和 root 进程(除非明确需要)
if username == "root" and "caddy" not in cmdline.lower():
continue
for keyword in keywords:
keyword_lower = keyword.lower()
if keyword_lower in cmdline.lower() or keyword_lower in name.lower():
processes.append(proc)
break
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
return processes
def stop_process_tree_completely(pid, timeout=15):
"""完全停止进程树"""
try:
parent = psutil.Process(pid)
# 获取所有子进程(递归)
children = parent.children(recursive=True)
all_processes = [parent] + children
print(f" 停止进程树: PID {pid} (共 {len(all_processes)} 个进程)")
# 1. 发送 SIGTERM 给所有进程
for proc in all_processes:
try:
proc.terminate()
except:
pass
# 2. 等待
time.sleep(3)
# 3. 检查哪些进程还在运行
still_running = []
for proc in all_processes:
try:
if proc.is_running():
still_running.append(proc)
except:
pass
# 4. 如果还有进程在运行,发送 SIGKILL
if still_running:
print(f" {len(still_running)} 个进程仍在运行,发送 SIGKILL...")
for proc in still_running:
try:
proc.kill()
except:
pass
# 最后等待
time.sleep(2)
# 5. 最终检查
final_running = []
for proc in all_processes:
try:
if proc.is_running():
final_running.append(proc)
except:
pass
return len(final_running) == 0
except psutil.NoSuchProcess:
return True
except Exception as e:
print(f" 停止进程树失败: {e}")
return False
def stop_service_comprehensive(
service_name, keywords, stop_commands=None, sudo_commands=None, password="accusys"
):
"""综合停止服务"""
print(f"\n停止 {service_name}...")
# 1. 查找进程
processes = find_processes_by_keywords(keywords)
print(f" 找到 {len(processes)} 个进程")
# 2. 执行停止命令(如果有)
if stop_commands:
for cmd in stop_commands:
print(f" 执行命令: {cmd}")
# 检查是否需要认证
needs_auth = "redis-cli" in cmd or "mysqladmin" in cmd
use_sudo = "pg_ctl" in cmd or "mongod" in cmd
success, stdout, stderr = run_command_with_auth(
cmd,
timeout=20,
use_sudo=use_sudo,
password=password if use_sudo else None,
)
if not success:
print(f" 命令失败")
if stderr:
print(f" 错误: {stderr[:100]}")
# 3. 执行 sudo 命令(如果需要)
if sudo_commands:
for cmd in sudo_commands:
print(f" 执行 sudo 命令: {cmd}")
success, stdout, stderr = run_command_with_auth(
cmd, timeout=15, use_sudo=True, password=password
)
if not success:
print(f" sudo 命令失败: {stderr[:100] if stderr else '未知错误'}")
# 4. 等待命令生效
time.sleep(5)
# 5. 停止所有找到的进程树
processes = find_processes_by_keywords(keywords)
if processes:
print(f" 仍有 {len(processes)} 个进程在运行,停止进程树...")
for proc in processes:
stop_process_tree_completely(proc.pid, timeout=10)
# 6. 最终检查
time.sleep(3)
remaining = find_processes_by_keywords(keywords)
if remaining:
print(f"{service_name} 仍在运行 ({len(remaining)} 个进程)")
# 显示剩余进程信息
for proc in remaining[:5]: # 只显示前5个
try:
cmdline = (
" ".join(proc.info["cmdline"])
if proc.info["cmdline"]
else proc.info["name"]
)
print(f" PID {proc.pid}: {cmdline[:80]}...")
except:
print(f" PID {proc.pid}: (无法获取信息)")
if len(remaining) > 5:
print(f" ... 还有 {len(remaining) - 5} 个进程")
return False
else:
print(f"{service_name} 已停止")
return True
def main():
print("=" * 70)
print("最终关机工具 - 解决所有关机问题")
print(f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 70)
# 密码(从环境变量或默认值)
password = os.getenv("SUDO_PASSWORD", "accusys")
# 服务定义(基于测试结果优化)
services = [
{
"name": "Redis",
"keywords": ["redis-server"],
"stop_commands": ["redis-cli -a accusys shutdown"],
"sudo_commands": None,
},
{
"name": "PostgreSQL",
"keywords": ["postgres"],
"stop_commands": [
"pg_ctl -D /Users/accusys/momentry/var/postgresql stop -m fast -t 60"
],
"sudo_commands": None,
},
{
"name": "AI 处理器",
"keywords": [
"asr_processor",
"ocr_processor",
"yolo_processor",
"face_processor",
"pose_processor",
"cut_processor",
],
"stop_commands": None,
"sudo_commands": None,
},
{
"name": "Momentry 服务",
"keywords": ["momentry server", "momentry worker", "momentry_playground"],
"stop_commands": None,
"sudo_commands": None,
},
{
"name": "MCP 服务器",
"keywords": [
"mcp-server-redis",
"mcp-server-postgres",
"mcp-server-filesystem",
"mcp-server-qdrant",
"mongodb-mcp-server",
"gitea-mcp-server",
],
"stop_commands": None,
"sudo_commands": None,
},
{
"name": "应用服务",
"keywords": ["php-fpm", "n8n", "ollama", "gitea web", "sftpgo serve"],
"stop_commands": None,
"sudo_commands": None,
},
{
"name": "Caddy",
"keywords": ["caddy"],
"stop_commands": None,
"sudo_commands": ["pkill -TERM caddy", "pkill -KILL caddy"],
},
{
"name": "MongoDB",
"keywords": ["mongod"],
"stop_commands": None,
"sudo_commands": [
"mongod --dbpath /opt/homebrew/var/mongodb --shutdown",
"pkill -TERM mongod",
"pkill -KILL mongod",
],
},
{
"name": "MariaDB",
"keywords": ["mariadbd"],
"stop_commands": ["mysqladmin -u root -paccusys shutdown"],
"sudo_commands": None,
},
{
"name": "Qdrant",
"keywords": ["qdrant"],
"stop_commands": None,
"sudo_commands": None,
},
]
results = []
# 停止所有服务
for service in services:
success = stop_service_comprehensive(
service["name"],
service["keywords"],
service.get("stop_commands"),
service.get("sudo_commands"),
password,
)
results.append((service["name"], success))
# 生成报告
print("\n" + "=" * 70)
print("关机完成报告")
print("=" * 70)
all_stopped = True
stopped_count = 0
for service_name, success in results:
if success:
print(f"{service_name}: 已停止")
stopped_count += 1
else:
print(f"{service_name}: 仍在运行")
all_stopped = False
print(f"\n停止进度: {stopped_count}/{len(services)} 个服务已停止")
# 收集所有关键词用于检查剩余进程
all_keywords = []
for service in services:
all_keywords.extend(service["keywords"])
# 列出所有仍在运行的进程
if not all_stopped:
print("\n⚠️ 仍在运行的进程:")
print("-" * 50)
remaining = find_processes_by_keywords(all_keywords)
for proc in remaining:
try:
cmdline = (
" ".join(proc.info["cmdline"])
if proc.info["cmdline"]
else proc.info["name"]
)
username = proc.info.get("username", "unknown")
print(f" PID {proc.pid} ({username}): {cmdline[:80]}...")
except:
print(f" PID {proc.pid}: (无法获取信息)")
# 最终建议
print("\n" + "=" * 70)
if all_stopped:
print("🎉 所有服务已成功停止!")
print("系统可以安全关机。")
print("\n建议关机命令:")
print(" sudo shutdown -h now # 立即关机")
print(" sudo reboot # 重启")
else:
print("⚠️ 部分服务仍在运行。")
print("\n下一步建议:")
print("1. 手动检查并停止剩余进程")
print("2. 使用以下命令强制关机:")
print(" sudo shutdown -h now")
print("3. 系统会在关机时自动处理剩余进程")
print("\n注意: 强制关机可能会导致数据丢失,建议先保存重要工作。")
# 保存详细报告
report_file = f"/tmp/final_shutdown_report_{int(time.time())}.txt"
with open(report_file, "w") as f:
f.write("最终关机工具报告\n")
f.write(f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write("=" * 50 + "\n")
f.write(f"结果: {'完全成功' if all_stopped else '部分成功'}\n")
f.write(f"停止进度: {stopped_count}/{len(services)} 个服务\n\n")
f.write("服务状态:\n")
for service_name, success in results:
f.write(f" {service_name}: {'✅ 已停止' if success else '❌ 仍在运行'}\n")
if not all_stopped:
f.write("\n仍在运行的进程:\n")
remaining = find_processes_by_keywords(all_keywords)
for proc in remaining:
try:
cmdline = (
" ".join(proc.info["cmdline"])
if proc.info["cmdline"]
else proc.info["name"]
)
f.write(f" PID {proc.pid}: {cmdline}\n")
except:
f.write(f" PID {proc.pid}: (无法获取信息)\n")
print(f"\n详细报告保存到: {report_file}")
print("=" * 70)
return all_stopped
if __name__ == "__main__":
try:
success = main()
sys.exit(0 if success else 1)
except KeyboardInterrupt:
print("\n\n操作被用户中断")
sys.exit(130)
except Exception as e:
print(f"\n错误: {e}")
import traceback
traceback.print_exc()
sys.exit(1)