diff --git a/.env.development b/.env.development index c8cf21c..861e3f8 100644 --- a/.env.development +++ b/.env.development @@ -41,8 +41,8 @@ MOMENTRY_PYTHON_PATH=/Users/accusys/momentry_core/venv/bin/python MOMENTRY_SCRIPTS_DIR=/Users/accusys/momentry_core/scripts # Logging -RUST_LOG=debug -MOMENTRY_LOG_LEVEL=debug +RUST_LOG=info +MOMENTRY_LOG_LEVEL=info # Media MOMENTRY_MEDIA_BASE_URL=https://wp.momentry.ddns.net @@ -77,5 +77,27 @@ MOMENTRY_LLM_SUMMARY_URL=http://127.0.0.1:8082/v1/chat/completions MOMENTRY_LLM_SUMMARY_MODEL=google_gemma-4-26B-A4B-it-Q5_K_M.gguf MOMENTRY_LLM_SUMMARY_ENABLED=true +# LLM Chat (A4B on port 8082) +MOMENTRY_LLM_CHAT_URL=http://127.0.0.1:8082/v1/chat/completions +MOMENTRY_LLM_CHAT_MODEL=google_gemma-4-26B-A4B-it-Q5_K_M.gguf + +# LLM Vision (E4B on port 8083) +MOMENTRY_LLM_VISION_URL=http://127.0.0.1:8083/v1/chat/completions +MOMENTRY_LLM_VISION_MODEL=gemma-4-E4B-it-Q4_K_M.gguf + # Embedding (ANE CoreML server) MOMENTRY_EMBED_URL=http://localhost:11436 + +# === Binary & Data Paths (for start_momentry.sh) === +MOMENTRY_LOG_DIR=/Users/accusys/momentry/logs +MOMENTRY_PG_BIN_DIR=/Users/accusys/pgsql/18.3/bin +MOMENTRY_PG_DATA_DIR=/Users/accusys/pgsql/data +MOMENTRY_QDRANT_BIN=/Users/accusys/.cargo/bin/qdrant +MOMENTRY_QDRANT_STORAGE_DIR=/Users/accusys/momentry/qdrant_storage +MOMENTRY_LLAMACPP_BIN=/Users/accusys/llama/bin/llama-server +MOMENTRY_LLM_A4B_MODEL_PATH=/Users/accusys/models/google_gemma-4-26B-A4B-it-Q5_K_M.gguf +MOMENTRY_LLM_A4B_MMPROJ_PATH=/Users/accusys/models/gemma-4-26B-A4B-it.mmproj-f16.gguf +MOMENTRY_LLM_E4B_MODEL_PATH=/Users/accusys/models/gemma-4-E4B-it-Q4_K_M.gguf +MOMENTRY_LLM_E4B_MMPROJ_PATH=/Users/accusys/models/mmproj-gemma-4-E4B-it-BF16.gguf +MOMENTRY_OLLAMA_BIN=/Users/accusys/bin/ollama +MOMENTRY_PLAYGROUND_BIN=target/debug/momentry_playground diff --git a/.env.example b/.env.example index 1cfbbdb..24d2e75 100644 --- a/.env.example +++ b/.env.example @@ -32,6 +32,16 @@ MOMENTRY_LLM_SUMMARY_URL=http://127.0.0.1:8082/v1/chat/completions MOMENTRY_LLM_SUMMARY_MODEL=google_gemma-4-26B-A4B-it-Q5_K_M.gguf MOMENTRY_LLM_SUMMARY_TIMEOUT=120 +# LLM Chat (A4B) +MOMENTRY_LLM_CHAT_URL=http://127.0.0.1:8082/v1/chat/completions +MOMENTRY_LLM_CHAT_MODEL=google_gemma-4-26B-A4B-it-Q5_K_M.gguf +MOMENTRY_LLM_CHAT_TIMEOUT=120 + +# LLM Vision (E4B) +MOMENTRY_LLM_VISION_URL=http://127.0.0.1:8083/v1/chat/completions +MOMENTRY_LLM_VISION_MODEL=gemma-4-E4B-it-Q4_K_M.gguf +MOMENTRY_LLM_VISION_TIMEOUT=120 + # === Paths === MOMENTRY_OUTPUT_DIR=/Users/accusys/momentry/output_dev MOMENTRY_BACKUP_DIR=/Users/accusys/momentry/backup diff --git a/.gitignore b/.gitignore index b5a894d..7753297 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,35 @@ __pycache__/ node_modules/ *.log /tmp/ -*.log +*.diff +*.bundle +*.probe.json +*.cut.json +.qdrant-initialized +dump.rdb +fix55.js +checksums.sha256 scripts/swift_processors/.build/ +.opencode/ +.vscode/ +backups/ +logs/ +output/ +models/ +data/ +storage/ +thumbnails/ +services/ +model_checkpoints/ +release/delivery/ +release/system/ +release/phase*/ +release/dev_*.sql +release/migrate_*.sql +release/files/ +package-lock.json +package.json +portal/dist/ +portal/src-tauri/icons/ +momentry_runtime/logs/ diff --git a/AGENTS.md b/AGENTS.md index ebdf62a..dde43ed 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,6 +14,7 @@ Rust-based digital asset management system with video analysis and RAG capabilit - **🔴 DELETE / REMOVE / DROP / CLEAR 任何資料前必須先問使用者「要刪嗎?」獲得明確同意後才能執行** - **🔴 Qdrant collection 刪除、DB truncate、檔案刪除、資料清空 — 一律要先問** - **🔴 不確定是否該刪 → 先問,不要自己決定** +- **🔴 改變議題前必須先存檔紀錄**:使用 `todowrite` 工具或建立紀錄文件(如 `docs_v1.0/M4_workspace/YYYY-MM-DD_topic_handoff.md`),確保上下文不丟失 ### 開發範圍界定 | 範圍 | 狀態 | 說明 | diff --git a/Cargo.toml b/Cargo.toml index c5fb540..b712fa6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -134,6 +134,14 @@ path = "src/bin/integrated_player.rs" name = "release" path = "src/bin/release.rs" +[[bin]] +name = "vectorize_missing" +path = "src/bin/vectorize_missing.rs" + +[[bin]] +name = "sync_qdrant_from_pg" +path = "src/bin/sync_qdrant_from_pg.rs" + [[bin]] name = "service" path = "src/bin/service.rs" diff --git a/config/README.md b/config/README.md index 4306b33..f55b39f 100644 --- a/config/README.md +++ b/config/README.md @@ -1,105 +1,178 @@ -# Momentry Core 配置管理 +# Momentry Core Config Management -## 目錄結構 +## Directory Structure ``` momentry_core_0.1/ -├── .env.example # 配置模板(已納入版本控制) -├── .env # 本地配置(已從版本控制排除) -├── .env.local # 本地覆蓋配置(已從版本控制排除) +├── .env.example # Template (version controlled) +├── .env # Local config (gitignored) +├── .env.development # Playground dev overrides (gitignored) +├── .env.local # Local overrides (gitignored) ├── config/ -│ └── README.md # 本文件 -└── src/core/config.rs # 配置代碼 +│ ├── README.md # This file +│ └── port_registry.tsv # Central port registry +└── src/core/config.rs # Config code with lazy_static env reading ``` -## 配置加載順序 +## Load Order -1. `.env` - 默認本地配置 -2. `.env.local` - 本地覆蓋(最高優先級) +For `momentry_playground` (development): +1. `.env` — shared defaults +2. `.env.development` — dev-specific overrides (loaded by playground binary) -## 環境變數列表 +For `momentry` (production): +1. `.env` — production config -### 數據庫配置 +In Rust: `config.rs` reads env vars with lazy_static, falling back to hardcoded defaults. -| 變數 | 說明 | 默認值 | -|------|------|--------| -| `DATABASE_URL` | PostgreSQL 連接字串 | `postgres://accusys@localhost:5432/momentry` | +## Environment Variables -### Redis 配置 +### Server -| 變數 | 說明 | 默認值 | -|------|------|--------| -| `REDIS_URL` | Redis 連接字串 | `redis://:accusys@localhost:6379` | -| `REDIS_PASSWORD` | Redis 密碼 | `accusys` | +| Variable | Description | Default | +|----------|-------------|---------| +| `MOMENTRY_SERVER_PORT` | Server port (3002=prod, 3003=dev) | `3002` | +| `MOMENTRY_REDIS_PREFIX` | Redis key prefix | `momentry:` (prod), `momentry_dev:` (dev) | -### 存儲路徑 +### Database -| 變數 | 說明 | 默認值 | -|------|------|--------| -| `MOMENTRY_OUTPUT_DIR` | 輸出目錄 | `/Users/accusys/momentry/output` | -| `MOMENTRY_BACKUP_DIR` | 備份目錄 | `/Users/accusys/momentry/backup/momentry` | -| `MOMENTRY_SCRIPTS_DIR` | 腳本目錄 | `/Users/accusys/momentry_core_0.1/scripts` | -| `MOMENTRY_PYTHON_PATH` | Python 路徑 | `/opt/homebrew/bin/python3.11` | +| Variable | Description | Default | +|----------|-------------|---------| +| `DATABASE_URL` | PostgreSQL connection string | `postgres://accusys@localhost:5432/momentry` | +| `DATABASE_SCHEMA` | Schema for dev isolation | `dev` | +| `MONGODB_URL` | MongoDB connection string | `mongodb://localhost:27017` | +| `MONGODB_DATABASE` | MongoDB database name | `momentry` (prod), `momentry_dev` (dev) | +| `MONGODB_CACHE_ENABLED` | MongoDB cache toggle | `true` | +| `MONGODB_CACHE_TTL_VIDEOS` | Cache TTL for videos | `300` | +| `MONGODB_CACHE_TTL_SEARCH` | Cache TTL for search | `300` | +| `MONGODB_CACHE_TTL_HYBRID_SEARCH` | Cache TTL for hybrid search | `600` | +| `MONGODB_CACHE_TTL_VIDEO_META` | Cache TTL for video metadata | `3600` | -### 處理器超時(秒) +### Redis -| 變數 | 說明 | 默認值 | -|------|------|--------| -| `MOMENTRY_ASR_TIMEOUT` | ASR 處理超時 | `3600` | -| `MOMENTRY_CUT_TIMEOUT` | CUT 處理超時 | `3600` | -| `MOMENTRY_DEFAULT_TIMEOUT` | 默認超時 | `7200` | +| Variable | Description | Default | +|----------|-------------|---------| +| `REDIS_URL` | Redis connection string | `redis://:accusys@localhost:6379` | +| `REDIS_PASSWORD` | Redis password | `accusys` | +| `REDIS_CACHE_TTL_HEALTH` | Health check cache TTL | `30` | +| `REDIS_CACHE_TTL_VIDEO_META` | Video metadata cache TTL | `3600` | -### 日誌 +### Qdrant -| 變數 | 說明 | 默認值 | -|------|------|--------| -| `RUST_LOG` | 日誌級別 | `info` | -| `MOMENTRY_LOG_LEVEL` | 日誌級別(備選) | `info` | +| Variable | Description | Default | +|----------|-------------|---------| +| `QDRANT_URL` | Qdrant server URL | `http://localhost:6333` | +| `QDRANT_API_KEY` | Qdrant API key | `Test3200Test3200Test3200` | +| `QDRANT_COLLECTION` | Collection name | `momentry_rule1` (prod), `momentry_dev_rule1_v2` (dev) | -## 使用方式 +### LLM -### 1. 首次設置 +| Variable | Description | Default | +|----------|-------------|---------| +| `MOMENTRY_LLM_CHAT_URL` | Chat/function-calling endpoint | `http://127.0.0.1:8082/v1/chat/completions` | +| `MOMENTRY_LLM_CHAT_MODEL` | Chat model name | `google_gemma-4-26B-A4B-it-Q5_K_M.gguf` | +| `MOMENTRY_LLM_VISION_URL` | Vision LLM endpoint (E4B) | falls back to CHAT_URL | +| `MOMENTRY_LLM_VISION_MODEL` | Vision model name (E4B) | falls back to CHAT_MODEL | +| `MOMENTRY_LLM_SUMMARY_URL` | Summary LLM endpoint (5W1H) | falls back to CHAT_URL | +| `MOMENTRY_LLM_SUMMARY_MODEL` | Summary model name | falls back to CHAT_MODEL | +| `MOMENTRY_LLM_SUMMARY_ENABLED` | Toggle 5W1H summary generation | `true` | +| `MOMENTRY_LLM_SUMMARY_TIMEOUT` | 5W1H timeout in seconds | `120` | +| `MOMENTRY_LLM_CHAT_TIMEOUT` | Chat LLM timeout in seconds | `120` | +| `MOMENTRY_LLM_VISION_TIMEOUT` | Vision LLM timeout in seconds | `120` | + +### Embedding + +| Variable | Description | Default | +|----------|-------------|---------| +| `MOMENTRY_EMBED_URL` | Embedding server URL | `http://localhost:11436` | + +### TMDb Integration + +| Variable | Description | Default | +|----------|-------------|---------| +| `TMDB_API_KEY` | TMDb API key (required for probe) | (none) | +| `MOMENTRY_TMDB_PROBE_ENABLED` | Enable TMDb probe during register | `false` | + +### Paths + +| Variable | Description | Default | +|----------|-------------|---------| +| `MOMENTRY_OUTPUT_DIR` | Output directory for processing | `/Users/accusys/momentry/output` | +| `MOMENTRY_BACKUP_DIR` | Backup directory | `/Users/accusys/momentry/backup/momentry` | +| `MOMENTRY_SCRIPTS_DIR` | Python scripts directory | `/Users/accusys/momentry_core_0.1/scripts` | +| `MOMENTRY_PYTHON_PATH` | Python interpreter path | `/opt/homebrew/bin/python3.11` | +| `MOMENTRY_MEDIA_BASE_URL` | Base URL for media serving | (none) | + +### Processor Timeouts + +| Variable | Description | Default | +|----------|-------------|---------| +| `MOMENTRY_ASR_TIMEOUT` | ASR timeout in seconds | `3600` | +| `MOMENTRY_CUT_TIMEOUT` | CUT timeout in seconds | `3600` | +| `MOMENTRY_DEFAULT_TIMEOUT` | Default timeout in seconds | `7200` | + +### Logging + +| Variable | Description | Default | +|----------|-------------|---------| +| `RUST_LOG` | Rust log level (tracing) | `info` | +| `MOMENTRY_LOG_LEVEL` | Fallback log level | `info` | + +### Worker + +| Variable | Description | Default | +|----------|-------------|---------| +| `MOMENTRY_WORKER_ENABLED` | Enable background worker | `true` | +| `MOMENTRY_MAX_CONCURRENT` | Max concurrent jobs | `6` | +| `MOMENTRY_POLL_INTERVAL` | Poll interval in seconds | `10` | +| `MOMENTRY_WORKER_BATCH_SIZE` | Batch size | `5` | + +### Synonym Expansion + +| Variable | Description | Default | +|----------|-------------|---------| +| `MOMENTRY_SYNONYM_FILES` | Comma-separated paths to synonym JSON files | (none) | +| `MOMENTRY_SYNONYM_FILE` | Single synonym file (deprecated) | (none) | + +### Encryption + +| Variable | Description | Default | +|----------|-------------|---------| +| `AUDIT_ENCRYPTION_KEY` | 32-byte hex encryption key (64 hex chars) | (none) | + +## Port Registry + +See `config/port_registry.tsv` for the authoritative list of all ports and their owners. + +| Port | Service | Owner | Config Key | +|------|---------|-------|------------| +| 5432 | PostgreSQL | postgres | `DATABASE_URL` | +| 6379 | Redis | redis-server | `REDIS_URL` | +| 6333 | Qdrant | qdrant | `QDRANT_URL` | +| 8082 | LLM Chat (A4B) | llama-server | `MOMENTRY_LLM_CHAT_URL` | +| 8083 | LLM Vision (E4B) | llama-server | `MOMENTRY_LLM_VISION_URL` | +| 11434 | Ollama | ollama | `MOMENTRY_OLLAMA_URL` | +| 11436 | Embedding | embeddinggemma_server.py | `MOMENTRY_EMBED_URL` | +| 27017 | MongoDB | mongod | `MONGODB_URL` | +| 3002 | Production API | momentry | `MOMENTRY_SERVER_PORT` | +| 3003 | Playground API | momentry_playground | `MOMENTRY_SERVER_PORT` | + +## Quick Start ```bash -# 複製模板 +# 1. Copy template cp .env.example .env -# 編輯配置 -nano .env +# 2. Edit .env for production or use .env.development for playground +# 3. Start all services +./scripts/start_momentry.sh ``` -### 2. 本地覆蓋 +## Version Control -創建 `.env.local` 設置僅本地適用的配置: - -```bash -# .env.local 示例 -DATABASE_URL=postgres://local:password@localhost:5432/momentry_dev -MOMENTRY_LOG_LEVEL=debug -``` - -### 3. 運行應用 - -```bash -# 加載配置並運行 -source .env && cargo run - -# 或使用 direnv -direnv allow -``` - -## 版本控制策略 - -| 文件 | 版本控制 | 說明 | -|------|---------|------| -| `.env.example` | ✅ 追蹤 | 模板,包含所有選項 | -| `.env` | ❌ 忽略 | 本地敏感配置 | -| `.env.local` | ❌ 忽略 | 本地覆蓋配置 | - -## 部署檢查清單 - -- [ ] 複製 `.env.example` 到 `.env` -- [ ] 設置數據庫連接 -- [ ] 設置 Redis 密碼 -- [ ] 配置目錄路徑 -- [ ] 確認日誌級別 +| File | Tracked | Purpose | +|------|---------|---------| +| `.env.example` | ✅ Yes | Template with all options documented | +| `.env` | ❌ No | Local sensitive config | +| `.env.development` | ❌ No | Dev-specific overrides | +| `.env.local` | ❌ No | Local overrides (highest priority) | diff --git a/config/port_registry.tsv b/config/port_registry.tsv index dc1b202..cba1061 100644 --- a/config/port_registry.tsv +++ b/config/port_registry.tsv @@ -16,7 +16,9 @@ 6379 redis redis-server REDIS_URL redis://...:6379 start_momentry.sh 6333 qdrant qdrant QDRANT_URL http://...:6333 start_momentry.sh 8081 wordpress Caddy - - Caddyfile -8082 llm llama-server MOMENTRY_LLM_CHAT_URL http://...:8082 start_momentry.sh +8082 llm-chat llama-server MOMENTRY_LLM_CHAT_URL http://...:8082 start_momentry.sh +8083 llm-vision llama-server MOMENTRY_LLM_VISION_URL http://...:8083 start_momentry.sh 9000 php-fpm php-fpm - 9000 brew services 11434 ollama ollama MOMENTRY_OLLAMA_URL http://...:11434 start_momentry.sh 11436 embedding embeddinggemma MOMENTRY_EMBED_URL http://...:11436 start_momentry.sh +27017 mongodb mongod MONGODB_URL mongodb://...:27017 start_momentry.sh diff --git a/deliverable_v1.1.0/AGENTS.md b/deliverable_v1.1.0/AGENTS.md new file mode 100644 index 0000000..179b760 --- /dev/null +++ b/deliverable_v1.1.0/AGENTS.md @@ -0,0 +1,761 @@ +# AGENTS.md - Momentry Core + +Rust-based digital asset management system with video analysis and RAG capabilities. + +--- + +## ⚠️ CRITICAL: 開發隔離原則 + +### 絕對禁止事項 +- **絕對不可修改 `/Users/accusys/wordpress/` 目錄下的任何檔案** +- **絕對不可修改 n8n 工作流或設定** +- **絕對不可修改 WordPress 或 n8n 的資料庫 table** +- **除非是 release 作業,絕對不可動 port 3002 (production)** +- **🔴 DELETE / REMOVE / DROP / CLEAR 任何資料前必須先問使用者「要刪嗎?」獲得明確同意後才能執行** +- **🔴 Qdrant collection 刪除、DB truncate、檔案刪除、資料清空 — 一律要先問** +- **🔴 不確定是否該刪 → 先問,不要自己決定** + +### 開發範圍界定 +| 範圍 | 狀態 | 說明 | +|------|------|------| +| `momentry_core_0.1/` | ✅ **可開發** | Momentry Core 主要開發目錄 | +| `momentry_core_0.1/portal/` | ✅ **可開發** | Tauri Portal 前端 | +| `momentry_core_0.1/src/` | ✅ **可開發** | Rust 後端程式碼 | +| `/Users/accusys/wordpress/` | ❌ **禁止修改** | WordPress/Marcom 團隊負責 | +| n8n 工作流 | ❌ **禁止修改** | 自動化流程,與 dev 無關 | +| WordPress/n8n 資料庫 table | ❌ **禁止修改** | Marcom 團隊管理,與 dev 無關 | + +### 開發環境 +| 服務 | Port | 用途 | 命令 | +|------|------|------|------| +| Playground | 3003 | **唯一開發環境** | `cargo run --bin momentry_playground -- server` | +| Production | 3002 | ❌ 禁止修改 | `cargo run -- server` (僅 release 時) | +| Portal (Tauri) | 1420 | 前端開發 | `npm run tauri dev` | + +## ⚠️ 交叉污染防制 (Cross-Contamination Prevention) + +**每個執行前必須評估是否會汙染其他獨立作業。** + +### Scope Isolation Matrix + +| 執行內容 | 允許的 Scope | 禁止影響 | 檢查事項 | +|----------|-------------|----------|----------| +| M4 delivery binary | `target/release/momentry` | Playground (3003), Production (3002) | 確認舊 process 未被誤殺 | +| Playground server | `localhost:3003`, `dev.*` schema | Production (3002), `public.*` schema | `DATABASE_SCHEMA=dev` | +| Production deploy | `localhost:3002`, `public.*` schema | Playground (3003), `dev.*` schema | 先停 production,不影響 playground | +| Git commit | 只包含意圖修改的檔案 | 無關的 untracked files | `git status` 確認 stage 內容正確 | +| CI / packaged tests | 測試環境 | 正式資料 | 測試用 DB 不能連到 production | +| Doc changes | 指定文件 | 其他文件、程式碼 | `git diff --stat` 檢查 scope | +| SQL migration | 目標 schema | 其他 schema、無關 table | `WHERE` clause 要精準 | +| `sed` / `grep` / mass edit | 目標檔案集 | 非目標檔案 | 先用 `grep -c` 確認只有目標檔案匹配 | + +### Recent Violations / Near-Misses + +| 事件 | 問題 | 防止方式 | +|------|------|----------| +| `sed` API doc 編號 | `sed -i '' 's/.../.../g'` 改到所有行 | 先 `grep -c` 確認匹配,`git diff` 再提交 | +| 亂加 `/api/v1/register` route | 不必要的 API 別名,汙染路由表 | 角色切換:路由設計不該由實作方決定 | +| `API_WORKSPACE/` vs `GUIDES/` vs `REFERENCE/` vs `DESIGN/` vs `OPERATIONS/` vs `INTEGRATIONS/` | 文件放到錯誤分類 | API 文件改在 API_WORKSPACE/modules/ 編輯,`make deploy` 生成到 GUIDES/ | +| Build release binary in plan mode | 浪費時間,無意義 | 嚴格遵守 plan/build mode 規定 | + +### ⛔ 嚴格測試隔離規則 (Strict Test Isolation) +- **所有測試 (Test) 必須在 Dev (3003) 進行**。 +- **絕對禁止 (ABSOLUTELY FORBIDDEN)** 在任何測試指令、Demo 流程或 API 檢查中使用 `localhost:3002`。 +- 即使是「測試 Unregister」或「檢查版本」,若未明確標示為 "Production Deployment",一律視為違規。 +- **預設行為**: 所有 curl, CLI, 或程式碼測試指令,預設 URL 必須為 `http://localhost:3003`。 + +### 違反後果 +- 修改 WordPress/n8n 可能影響 marcom 團隊工作與生產環境 +- 修改 WordPress/n8n 資料庫 table 可能破壞自動化流程與資料完整性 +- 修改 port 3002 可能中斷正在使用的服務 (這是非常嚴重的錯誤) +- 所有 dev 測試必須在 playground (3003) 進行 + +--- + +## AI Coding Principles (Karpathy-Inspired) + +Behavioral guidelines to reduce common LLM coding mistakes. +Source: [andrej-karpathy-skills](https://github.com/forrestchang/andrej-karpathy-skills) (94K stars) + +**Tradeoff:** These guidelines bias toward caution over speed. For trivial tasks, use judgment. + +### 1. Think Before Coding + +**Don't assume. Don't hide confusion. Surface tradeoffs.** + +- State your assumptions explicitly. If uncertain, ask. +- If multiple interpretations exist, present them - don't pick silently. +- If a simpler approach exists, say so. Push back when warranted. +- If something is unclear, stop. Name what's confusing. Ask. + +### 2. Simplicity First + +**Minimum code that solves the problem. Nothing speculative.** + +- No features beyond what was asked. +- No abstractions for single-use code. +- No "flexibility" or "configurability" that wasn't requested. +- No error handling for impossible scenarios. +- If you write 200 lines and it could be 50, rewrite it. + +Ask yourself: "Would a senior engineer say this is overcomplicated?" If yes, simplify. + +### 3. Surgical Changes + +**Touch only what you must. Clean up only your own mess.** + +When editing existing code: +- Don't "improve" adjacent code, comments, or formatting. +- Don't refactor things that aren't broken. +- Match existing style, even if you'd do it differently. +- If you notice unrelated dead code, mention it - don't delete it. + +When your changes create orphans: +- Remove imports/variables/functions that YOUR changes made unused. +- Don't remove pre-existing dead code unless asked. + +The test: Every changed line should trace directly to the user's request. + +### 4. Goal-Driven Execution + +**Define success criteria. Loop until verified.** + +Transform tasks into verifiable goals: +- "Add validation" -> "Write tests for invalid inputs, then make them pass" +- "Fix the bug" -> "Write a test that reproduces it, then make it pass" +- "Refactor X" -> "Ensure tests pass before and after" + +For multi-step tasks, state a brief plan: +``` +1. [Step] -> verify: [check] +2. [Step] -> verify: [check] +3. [Step] -> verify: [check] +``` + +Strong success criteria let you loop independently. Weak criteria ("make it work") require constant clarification. + +--- + +These guidelines are working if: fewer unnecessary changes in diffs, fewer rewrites due to overcomplication, and clarifying questions come before implementation rather than after mistakes. + +--- + +## Terminology (V4.0) + +| Term | Scope | Description | Example | +|------|-------|-------------|---------| +| **file_uuid** | Video file | Video file identifier (renamed from `video_uuid`) | `384b0ff44aaaa1f1` | +| **identity_uuid** | Global identity | Global person identity (cross-file) | `a9a90105-6d6b-46ff-92da-0c3c1a57dff4` | +| **face_id** | Single detection | Single face detection (frame-level) | `face_100` | +| **trace_id** | Face tracking | Face tracking ID (Face Tracker output) | `2` | +| **chunk_id** | Sentence chunk | Sentence chunk (from pre_chunks via rules) | `chunk_1` | +| **speaker_id** | Speaker segment | Speaker ID (from ASRX) | `SPEAKER_0` | +| **person_id** | ❌ **Deprecated** | Video-local person ID (removed in V4.0) | - | + +### Architecture (V4.0) + +``` +Face → Identity (Two-layer, direct binding) + ↓ + person_identities table: REMOVED + file_identities table: ADDED (N:N relationship) +``` + +### Key Changes (V3.x → V4.0) + +| Change | V3.x | V4.0 | +|--------|------|------| +| **video_uuid** | Used everywhere | **file_uuid** | +| **person_identities** | Required (303 records) | **Removed** | +| **person_id APIs** | 28 endpoints | **Removed** (except register/bind) | +| **Face binding** | Person → Identity | **Face → Identity** (direct) | +| **Chunk binding** | Manual | **Auto** (time alignment) | + +--- + +## Build & Run Commands + +```bash +# Build project (use debug builds for development/testing) +cargo build +cargo build --bin momentry +cargo build --bin momentry_playground + +# Build all binaries +cargo build --bins + +# Run CLI +cargo run -- --help +cargo run -- register /path/to/video.mp4 +cargo run -- server --host 0.0.0.0 --port 3002 + +# Run playground (development binary) +cargo run --bin momentry_playground -- server +cargo run --bin momentry_playground -- --help +``` + +### ⚠️ CRITICAL: `cargo build --release` PROHIBITION +- **NEVER run `cargo build --release` unless the user explicitly says "release the binary" or "正式 release"** +- `cargo build --release` is SLOW and only needed when producing a production binary for deployment +- For all development, testing, debugging, and linting: use `cargo build` or `cargo check` +- If uncertain, ALWAYS ask the user first + +## Binaries + +| Binary | Purpose | Port | Redis Prefix | Environment | +|--------|---------|------|--------------|-------------| +| `momentry` | Production | 3002 | `momentry:` | `.env` | +| `momentry_playground` | Development | 3003 | `momentry_dev:` | `.env.development` | +| `momentry_player` | Video player | - | - | - | + +## Testing + +```bash +# Run all tests +cargo test + +# Run single test by name +cargo test test_name + +# Run with output +cargo test -- --nocapture + +# Doc tests +cargo test --doc +``` + +## Linting & Formatting + +```bash +# Format code (edition=2021, max_width=100, tab_spaces=4) +cargo fmt +cargo fmt -- --check + +# Lint +cargo clippy +cargo clippy --all-features + +# Check for errors +cargo check +cargo check --all-features +``` + +## Code Style + +### General +- Use Rust 2021 edition +- Use tracing for logging (not println!) +- Keep lines under 100 characters + +### Imports (order: std → external → local) +```rust +use std::path::Path; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; + +use crate::core::chunk::Chunk; +``` + +### Error Handling +- Use `anyhow::Result` for application code +- Use `thiserror` for library code +- Use `.context()` for error context +- Use `anyhow::bail!()` for early returns + +```rust +fn example() -> Result { + let output = Command::new("ffprobe") + .args([...]) + .output() + .context("Failed to run ffprobe")?; + + if !output.status.success() { + anyhow::bail!("Command failed"); + } + Ok(result) +} +``` + +### Naming +- Types/Enums: PascalCase (`VideoRecord`, `ChunkType`) +- Functions/Variables: snake_case (`get_video_by_uuid`) +- Traits: PascalCase with -er suffix (`Database`, `ChunkStore`) +- Files: snake_case (`postgres_db.rs`) + +### Types +- Use `serde::{Deserialize, Serialize}` for serializable types +- Use `#[serde(rename_all = "snake_case")]` for enum variants +- Use explicit numeric types (i64, u32, f64) + +```rust +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VideoRecord { + pub id: i64, + pub uuid: String, + pub duration: f64, + pub width: u32, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum ChunkType { + TimeBased, + Sentence, + Cut, +} +``` + +### Async Programming +- Use `tokio` runtime with full features +- Use `#[async_trait]` for async trait methods + +```rust +#[async_trait] +pub trait Database: Send + Sync { + async fn init() -> Result + where Self: Sized; +} +``` + +## Code Structure + +``` +src/ +├── main.rs # CLI entry point +├── lib.rs # Library exports +├── core/ +│ ├── api_key/ # API key management (anomaly, blacklist, encryption, etc.) +│ ├── chunk/ # Chunking logic +│ ├── config.rs # Centralized configuration (env vars) +│ ├── db/ # Database (PostgreSQL, MongoDB, Redis, Qdrant) +│ ├── embedding/ # Vector embeddings +│ ├── overlay/ # Video overlay +│ ├── probe/ # ffprobe integration +│ ├── processor/ # ASR, OCR, YOLO, Face, Pose, CUT, ASRX +│ │ └── executor.rs # Unified Python script executor +│ ├── storage/ # File management +│ └── thumbnail/ # Thumbnail extraction +├── api/ # HTTP API (axum) +├── player/ # Video player +├── ui/ # TUI components +└── watcher/ # File system watcher +``` + +## Key Dependencies + +- **Error handling**: `anyhow`, `thiserror` +- **Async**: `tokio` (full features), `async-trait` +- **CLI**: `clap` (derive) +- **Serialization**: `serde`, `serde_json`, `chrono` +- **Database**: `sqlx`, `mongodb`, `redis` (1.0), `qdrant-client` +- **HTTP**: `axum`, `tower` +- **Logging**: `tracing`, `tracing-subscriber` +- **Config**: `once_cell` (lazy static config) + +## Environment Variables + +### Server +- `MOMENTRY_SERVER_PORT` - API server port (default: `3002` for production, `3003` for playground) +- `MOMENTRY_REDIS_PREFIX` - Redis key prefix (default: `momentry:` for production, `momentry_dev:` for playground) +- `MOMENTRY_API_KEY` - API key for Player online mode testing + +### Testing API Key +```bash +export MOMENTRY_API_KEY="muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" + +# Test Player online mode +cargo run --features player --bin momentry_player -- -o +``` + +### Database +- `DATABASE_URL` - PostgreSQL (default: `postgres://accusys@localhost:5432/momentry`) + +### Redis +- `REDIS_URL` - Redis URL (default: `redis://:accusys@localhost:6379`) +- `REDIS_PASSWORD` - Redis password (default: `accusys`) + +### Paths +- `MOMENTRY_OUTPUT_DIR` - Output directory (default: `/Users/accusys/momentry/output`) +- `MOMENTRY_BACKUP_DIR` - Backup directory +- `MOMENTRY_PYTHON_PATH` - Python path (default: `/opt/homebrew/bin/python3.11`) +- `MOMENTRY_SCRIPTS_DIR` - Scripts directory + +### Processor Timeouts +- `MOMENTRY_ASR_TIMEOUT` - ASR timeout in seconds (default: 3600) +- `MOMENTRY_CUT_TIMEOUT` - CUT timeout in seconds (default: 3600) +- `MOMENTRY_DEFAULT_TIMEOUT` - Default timeout (default: 7200) + +### TMDb Integration (Face Clustering) +- `TMDB_API_KEY` - TMDb API key for movie metadata lookup (required for `MOMENTRY_TMDB_PROBE_ENABLED=true`) +- `MOMENTRY_TMDB_PROBE_ENABLED` - Enable TMDb probe during registration (default: `false`) + - Register phase: searches TMDb by filename, creates identities with tmdb_id/tmdb_profile + - Post-process phase: matches detected faces against TMDb identities via cosine similarity + +### Synonym Expansion +- `MOMENTRY_SYNONYM_FILES` - Comma-separated paths to synonym JSON files (e.g., `data/english_synonyms.json,data/llm_synonyms.json`) +- `MOMENTRY_SYNONYM_FILE` - Single synonym JSON file path (deprecated, use above) + +### Logging +- `RUST_LOG` or `MOMENTRY_LOG_LEVEL` - Log level (default: `info`) + +## Notes + +- Unit tests exist (86 library tests) +- Video processing uses external tools (ffprobe, Python scripts) +- Multi-database architecture (PostgreSQL, MongoDB, Redis, Qdrant) +- Monitor directory is a separate system (not Rust) +- PythonExecutor provides unified script execution with timeout support +- Redis 1.0.x for improved performance +- FaceNet CoreML model (`models/facenet512.mlpackage`) replaces InsightFace for embedding extraction (MIT license, ANE-accelerated) + +### LLM Synonym Generation + +Generate synonym database using llama.cpp (Gemma4): + +```bash +# Generate full database (162 entries, ~5 minutes) +python3 scripts/generate_synonyms_llamacpp.py + +# Quick test +python3 scripts/generate_synonyms_llamacpp.py --test + +# Resume from existing file +python3 scripts/generate_synonyms_llamacpp.py --resume + +# Output: data/llm_synonyms.json (27 Chinese + 135 English words) +``` + +## Task Management + +### 使用 todowrite 追蹤任務 +```bash +# 創建任務清單 +/todo 建立配置模組 [in_progress] +/todo 添加單元測試 [pending] + +# 更新狀態 +/todo 完成標記 [completed] +``` + +### 任務批次建議 +- 一次處理 1-2 個功能 +- 每個功能完成後驗證 (clippy + test) +- 驗證通過後再繼續下一個 + +## Code Review Checklist + +完成任務後檢查: +- [ ] `cargo clippy --lib` 通過 +- [ ] `cargo test --lib` 通過 +- [ ] `cargo fmt -- --check` 通過 +- [ ] 文檔已更新 (如需要) +- [ ] 新功能有單元測試 + +## Commit Guidelines + +```bash +# feat: 新功能 +git commit -m "feat: add monitor_jobs table" + +# fix: 錯誤修復 +git commit -m "fix: resolve SQL injection in store_vector" + +# refactor: 重構 +git commit -m "refactor: use parameterized queries" + +# docs: 文檔更新 +git commit -m "docs: update AGENTS.md with new modules" +``` + +## Pre-commit Hook + +專案已配置 `.git/hooks/pre-commit`,提交前自動檢查: + +```bash +# 檢查內容 +1. cargo fmt --check # Rust 格式化檢查 +2. cargo clippy --lib # Rust Lint 檢查 +3. cargo test --lib # Rust 單元測試 +4. ruff check # Python Lint 檢查 +5. ruff format --check # Python 格式化檢查 +6. markdownlint # Markdown 格式檢查 +7. shellcheck # Shell 腳本檢查 + +# 跳過檢查(不建議) +git commit --no-verify + +# 跳過特定檢查 +git commit --skip-checks +``` + +**注意**: Hook 僅檢查已暫存的 Rust/Python/Markdown 文件。 + +### Python 環境設置 +```bash +# 安裝 ruff +pip install ruff==0.11.2 + +# 格式化 Python 文件 +ruff format scripts/ + +# Lint Python 文件 +ruff check scripts/ +``` + +### Markdown 環境設置 +```bash +# 安裝 markdownlint-cli (使用系統 Node.js) +npm install -g markdownlint-cli + +# 檢查 Markdown 文件 +markdownlint docs/ + +# 配置檔案 +.markdownlint.json +``` + +### Shell 環境設置 +```bash +# 安裝 shellcheck +brew install shellcheck + +# 檢查 Shell 腳本 +shellcheck scripts/*.sh monitor/**/*.sh +``` + +**注意**: Hook 只檢查 error 等級的 shellcheck 問題,style 警告會顯示但不阻擋提交。 + +## Release Workflow + +### Release 前準備 +每次 release production binary 前,必須: + +1. **建立 Release Tag** + ```bash + git tag -a v0.X.X -m "Release vX.X.X - YYYY-MM-DD" + git push origin v0.X.X + ``` + +2. **備份獨立 Source Code** + ```bash + # 建立 release 獨立目錄 + RELEASE_DIR="/Users/accusys/momentry_core_releases/v0.X.X" + mkdir -p "$RELEASE_DIR" + + # 複製完整原始碼(排除不必要的檔案) + rsync -av --exclude='.git' --exclude='target' --exclude='node_modules' \ + /Users/accusys/momentry_core_0.1/ "$RELEASE_DIR/" + + # 記錄 release 資訊 + echo "Release: v0.X.X" > "$RELEASE_DIR/RELEASE_INFO.txt" + echo "Date: $(date)" >> "$RELEASE_DIR/RELEASE_INFO.txt" + echo "Git Commit: $(git rev-parse HEAD)" >> "$RELEASE_DIR/RELEASE_INFO.txt" + echo "Binary: $(ls -la target/release/momentry)" >> "$RELEASE_DIR/RELEASE_INFO.txt" + ``` + +3. **備份 Binary** + ```bash + cp target/release/momentry "$RELEASE_DIR/momentry_v0.X.X" + cp target/release/momentry_playground "$RELEASE_DIR/momentry_playground_v0.X.X" 2>/dev/null + ``` + +4. **記錄資料庫 Schema** + ```bash + pg_dump -U accusys -d momentry --schema-only > "$RELEASE_DIR/schema_v0.X.X.sql" + ``` + +### 重要性 +- 避免 release binary 與 current source code 不一致 +- 方便追蹤特定 release 的程式碼狀態 +- 必要時可快速復原或比對差異 +- 確保資料庫 schema 與程式碼版本對應 + +## Reference Documents + +| 文件 | 用途 | +|------|------| +| `docs/OPENCODE_GUIDE.md` | OpenCode 使用規範 | +| `docs/ARCHITECTURE_EVALUATION.md` | 架構優化待評估項目 (含 GraphRAG) | +| `docs/PENDING_ISSUES.md` | 待解決問題追蹤 | +| `docs/MOMENTRY_CORE_MONITORING.md` | 監控系統規範 | +| `docs/MOMENTRY_CORE_REDIS_KEYS.md` | Redis Key 設計規範 | +| `docs/PYTHON.md` | Python 腳本規範 | +| `docs/FILE_CHANGE_MANAGEMENT.md` | 文件修改管理規範 | +| `docs/YOLO_RESUME_INTEGRATION.md` | YOLO Resume 功能整合記錄 | +| `docs/DOCUMENT_EMBEDDING_STRATEGY.md` | Parent-Child 嵌入策略 | +| `docs/PROCESSING_PIPELINE.md` | 處理流程文檔 | +| `docs/N8N_DEMO_WORKFLOW.md` | n8n 工作流文檔 | +| `docs/FRESH_MAC_INSTALLATION.md` | 全新 Mac 安裝指南 | +| `docs/SERVICES.md` | 服務總覽與管理 | +| `docs/SFTPGO_DEMO_USER.md` | SFTPGo 用戶指南 | + +## Document Change Workflow + +修改文件前請參考 `docs/FILE_CHANGE_MANAGEMENT.md`,確保: + +1. **修改前**:完整閱讀文件、執行預檢清單 +2. **修改中**:提供變更計畫、取得確認 +3. **修改後**:展示 diff、更新版本歷史 +4. **驗證**:執行 lint/test、提交前審查 + +### AI 工具修改規範 + +AI 工具修改文件時: +- 必須先完整閱讀文件(不可只讀取部分章節) +- 修改前先提出變更計畫供確認 +- 修改後展示 diff 內容 +- 更新版本歷史表 + +## PHP Development + +WordPress 作為 Momentry Portal,負責 n8n 自動化與 sftpgo 檔案服務的頁面整合。 + +### 編輯器設定 + +| 編輯器 | LSP 方案 | 安裝方式 | +|--------|----------|----------| +| VS Code | Intelephense | Extension Marketplace (推薦) | +| Cursor | Intelephense | Extension Marketplace (推薦) | +| CLI | phpactor | `~/bin/phpactor` | + +### Intelephense (VS Code/Cursor) + +1. 安裝 Extension: 搜尋 "Intelephense" +2. 設定: +```json +{ + "intelephense.stubs": ["wordpress"] +} +``` + +### phpactor (CLI) + +```bash +# 安裝方式 +brew install composer +curl -sSL https://github.com/phpactor/phpactor/releases/latest/download/phpactor.phar -o ~/bin/phpactor +chmod +x ~/bin/phpactor + +# 安裝 WordPress Stubs +cd /Users/accusys/wordpress/web +composer require --dev php-stubs/wordpress-stubs + +# 建立 WordPress 索引 +cd /Users/accusys/wordpress/web +~/bin/phpactor index:build --reset + +# 常用指令 +~/bin/phpactor class:search "WP_User" # 搜尋類別 +~/bin/phpactor index:query WP_User # 查看類別資訊 +~/bin/phpactor navigate /path/to/file.php # 導航到定義 +``` + +### WordPress 程式碼位置 +| 類型 | 路徑 | +|------|------| +| 主題 | `/Users/accusys/wordpress/web/wp-content/themes/` | +| 插件 | `/Users/accusys/wordpress/web/wp-content/plugins/` | + +### 與 marcom 團隊協作 +| 角色 | 負責 | +|------|------| +| marcom 團隊 | Figma 設計 / Elementor 建構 | +| OpenCode | 程式碼實作 / 重構 | + +### 開發時程 +``` +Phase 1: marcom 建構 (現在) → Elementor 頁面建構 +Phase 2: 交付審視 (TBD) → 功能確認 / 重構評估 +Phase 3: OpenCode 重構 → 純程式碼實作,交付無 Elementor 依賴版本 +``` + +## M4 通知規範 + +### 固定通知方式 + +通知 M4 的唯一管道:**`M4_workspace/` 下建立回覆文件 + `git commit`**。不需口頭、即時訊息、郵件。 + +### 命名規則 + +``` +docs_v1.0/M4_workspace/YYYY-MM-DD__response.md (回覆 M4 問題) +docs_v1.0/M4_workspace/YYYY-MM-DD_.md (主動通報) +docs_v1.0/M4_workspace/YYYY-MM-DD__test_report.md (測試報告) +``` + +### 觸發時機 + +| 情境 | 動作 | +|------|------| +| M4 提交問題報告到 `M4_workspace/` | 修復後,回覆 `*_response.md` | +| 完成 M4 要求的任務 | 回覆 `*_response.md` | +| 重大變更(模型替換、架構變更) | 主動通知 `*.md` | +| 新測試包產出 | `*_test_report.md` | + +### 交付檢查 + +1. 文件寫入 `docs_v1.0/M4_workspace/` +2. `git add` 包含該文件 +3. `git commit` 含相關變更 +4. M4 透過 git log 查看 + +詳細規範見 `docs_v1.0/M4_workspace/M4_NOTIFICATION_PROTOCOL.md`。 + +## UUID Naming Rule + +**Never use bare `uuid` in API route paths, query params, JSON keys, or code variable names. Always qualify:** + +| Context | Must use | Never | +|---------|----------|-------| +| Video/file resource | `file_uuid` | `uuid` | +| Identity resource | `identity_uuid` | `uuid` | +| Query parameter | `file_uuid=`, `identity_uuid=` | `uuid=` | +| Route path | `:file_uuid`, `:identity_uuid` | `:uuid` | +| JSON key | `"file_uuid"`, `"identity_uuid"` | `"uuid"` | + +This applies to docs, code, API responses, and curl examples. Exceptions: internal database primary key names (e.g. `identities.uuid` column). + +## Document Compliance Checklist + +Before creating any file in `docs_v1.0/` (API_WORKSPACE, GUIDES, REFERENCE, DESIGN, OPERATIONS, INTEGRATIONS), verify all items below. +**IMPORTANT**: API functional documents are generated from `API_WORKSPACE/modules/`. Edit modules there, then run `make deploy` in `API_WORKSPACE/` to update `GUIDES/`. Never edit generated files in `GUIDES/` directly. See `DESIGN/Modular_Doc_System_V1.0.md` for the full system design. + +### P0 — Mandatory (7 items) + +| # | Check | Rule | +|---|-------|------| +| 1 | YAML frontmatter | `title`, `version`, `date`, `author`, `status` present | +| 2 | Version history | Table at bottom of file tracking changes | +| 3 | Top info table | scope, status, applicable to, etc. | +| 4 | PascalCase filename | e.g. `DetectorRegistry.md`, not `detector_registry.md` | +| 5 | `_` separator | Within filenames use `_`, never spaces or other chars | +| 6 | English content | Entire file in English | +| 7 | Correct directory | File must reside in appropriate directory: `API_WORKSPACE/modules/` (API endpoint modules), `GUIDES/` (user docs, generated), `REFERENCE/` (data models), `DESIGN/` (architecture), `OPERATIONS/` (infra/release), `INTEGRATIONS/` (n8n/tests) | + +### P0b — UUID Naming + +| # | Check | Rule | +|---|-------|------| +| 8 | `file_uuid` not bare `uuid` | All file references use `file_uuid` (see UUID Naming Rule above) | +| 9 | `identity_uuid` not bare `uuid` | All identity references use `identity_uuid` | + +### P1 — Suggested (3 items) + +| # | Check | Note | +|---|-------|------| +| 1 | Cross-references | Link to related docs in API_WORKSPACE/, GUIDES/, REFERENCE/, DESIGN/, OPERATIONS/ | +| 2 | Glossary terms | Define non-obvious terms inline or link glossary | +| 3 | Diagrams | Include Mermaid/ASCII diagram for complex topics | + +### Exception + +`M4_workspace/` files are exempt from this checklist (free-format reply documents). + +--- + +## Delivery Procedure + +完整交付程序(M4_workspace → M5 → Release → Deploy → Public)見: + +`docs_v1.0/OPERATIONS/DELIVERY_PROCEDURE.md` diff --git a/deliverable_v1.1.0/SYSTEM_AUDIT_2026-05-17.md b/deliverable_v1.1.0/SYSTEM_AUDIT_2026-05-17.md new file mode 100644 index 0000000..eca8e02 --- /dev/null +++ b/deliverable_v1.1.0/SYSTEM_AUDIT_2026-05-17.md @@ -0,0 +1,71 @@ +# System Audit — 2026-05-17 + +## Current State + +### Embedding Storage (三重冗余,無主) + +| 資料類型 | PG pgvector | Qdrant | JSON 檔案 | +|---------|------------|--------|-----------| +| Sentence 向量 | `chunk.embedding` ✅ | `dev_v1` / `rule1_v2` / `sentence_*` ✅ | ❌ 無 | +| Story 向量 | `chunk.embedding` ✅ | `dev_v1` / `dev_stories` ✅ | `.story_llm.json` ✅ | +| Face 向量 | ❌ 已清除(依使用者指示) | `dev_faces` ✅ (97K) | `.face.json` ✅ | +| Voice 向量 | ❌ 無 | `dev_voice` ✅ (4K) | ❌ 無 | + +### Pipeline 問題 + +| 問題 | 影響 | +|------|------| +| `processor_results.duration_secs` 全為 0 | 無法查各步驟耗時 | +| `processor_results.started_at/completed_at` 全 NULL | 時間線遺失 | +| Redis timing 在 job 完成後被清掉 | 唯一 timing 來源消失 | +| `get_chunk_by_chunk_id_and_uuid` 原本是 stub(已修) | Smart search 找不到 PG chunk | +| `server.rs::search()` 未 mount 但仍編譯 | Dead code,混淆 Qdrant 用途 | +| Face embedding 只寫 Qdrant 不寫 PG | 已刪除則全失 | + +### Qdrant Collections 現況 + +| Collection | Points | 來源 | UUID | +|-----------|--------|------|------| +| `dev_v1` | 9,936 | PG rebuild | ✅ bd80fec... | +| `dev_faces` | 97,000 | face.json rebuild | ✅ bd80fec... | +| `dev_stories` | 560 | Snapshot | ✅ bd80fec... | +| `dev_voice` | 4,188 | Snapshot | ✅ bd80fec... | +| `dev_rule1_v2` | 3,417 | Snapshot | ✅ bd80fec... | +| `sentence_story` | 4,188 | Snapshot | ✅ bd80fec... | +| `sentence_summary` | 4,188 | Snapshot | ✅ bd80fec... | + +## Safeguards & Fixes + +### P0 — 必須修 + +| # | Fix | 做法 | +|---|-----|------| +| 1 | **Pipeline timing 寫入 DB** | `update_processor_result()` 加入 `started_at`、`completed_at`、`duration_secs` | +| 2 | **Qdrant 不當主要儲存** | Embedding 以 PG `chunk.embedding` 為 source of truth,Qdrant 唯讀 cache | +| 3 | **Smart search 只走 PG pgvector** | `search_parent_chunks_semantic` 已正確,無需 Qdrant | +| 4 | **移除 `server.rs::search()` dead code** | 或 mount 到正式 route 並確認可用 | + +### P1 — 建議修 + +| # | Fix | 做法 | +|---|-----|------| +| 5 | **刪除 Qdrant 前先 snapshot** | 自動 snapshot script | +| 6 | **清理多餘 Qdrant collections** | `dev_voice` / `dev_stories` / `dev_rule1_v2` / `sentence_*` 無 server reader,可移除 | +| 7 | **Face embedding 寫入 PG 或移除 dead code** | 目前 face Qdrant write 無人讀取,可移除 `sync_face_embeddings` | +| 8 | **UUID 一致性檢查** | 同一 content 不應產生不同 UUID | + +### P2 — 可選 + +| # | Fix | 做法 | +|---|-----|------| +| 9 | `chunk_selector.rs` (player binary)hardcode `momentry_rule1` | 改讀 env var 或 PG | +| 10 | AGENTS.md 已加入 delete 安全規則 | ✅ Done | + +## Data Recovery Path + +| 資料來源 | 可恢復到 | 方法 | +|---------|---------|------| +| `chunk.embedding` (PG) | Qdrant `dev_v1` | SQL → Qdrant upsert | +| `face.json` (磁碟) | Qdrant `dev_faces` | Python script | +| `story_llm.json` (磁碟) | Qdrant `dev_stories` | Python script | +| Qdrant snapshots (phase1) | Qdrant collections | Snapshot upload API | diff --git a/deliverable_v1.1.0/html_docs/doc/01_auth.html b/deliverable_v1.1.0/html_docs/doc/01_auth.html new file mode 100644 index 0000000..11a47c9 --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/01_auth.html @@ -0,0 +1,388 @@ + + + + +01 Auth - Momentry API Docs + + + +
+← Back to index + + + + +

Base URL

+ + + + + + + + + + + + + + + + + + + + +
EnvironmentURLPurpose
Productionhttp://localhost:3002Production deployment
External (M5)https://m5api.momentry.ddns.netRemote access
+

Variables

+

All examples in this documentation use these environment variables:

+
API="http://localhost:3002"
+KEY="your-api-key-here"
+
+ +

Authentication

+

All endpoints under /api/v1/* require authentication. +The following endpoints are public (no auth needed):

+
    +
  • GET /health
  • +
  • POST /api/v1/auth/login
  • +
  • POST /api/v1/auth/logout
  • +
+

Three Authentication Modes

+

The system supports three authentication methods, checked in priority order by the middleware:

+
Middleware priority:
+  1. Session Cookie (Portal/browser)
+  2. JWT Bearer (API clients, CLI)
+  3. API Key Header (legacy compatibility)
+  4. API Key Query Param (?api_key=)
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModeTransportExpiryScopeBest for
Session CookieCookie: session_id=<session_id>24hper-browser sessionPortal (browser)
JWTAuthorization: Bearer <token>1hper-login tokenAPI clients, CLI, scripts
API KeyX-API-Key: <key>90dfixed key for automationLegacy scripts, WordPress
+
+

Login

+

Default accounts & API keys:

+ + + + + + + + + + + + + + + + + + + + + + + +
UsernamePasswordAPI KeyRole
adminadminadmin
demodemomuser_demo_key_32chars_abcdef1234567890user
+

The demo API key is set via MOMENTRY_DEMO_API_KEY env var and can be used in place of JWT for marcom integrations:

+
# Using API key instead of JWT
+curl -s "$API/api/v1/files/scan" -H "X-API-Key: muser_demo_key_32chars_abcdef1234567890"
+
+ +
# Login as admin
+curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username": "admin", "password": "admin"}'
+
+# Login as demo user
+curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username": "demo", "password": "demo"}'
+
+ +

Success Response

+
{
+  "success": true,
+  "jwt": "eyJhbGciOiJIUzI1NiIs...",
+  "api_key": "muser_...",
+  "user": {
+    "username": "admin",
+    "role": "admin"
+  },
+  "expires_at": "2026-05-18T13:00:00Z"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
jwtstringJWT access token. Use as Authorization: Bearer <jwt>. Expires in 1 hour.
api_keystringLegacy API key. Use as X-API-Key: <key>. Good for 90 days.
user.usernamestringUsername
user.rolestringRole: admin, user, or readonly
expires_atstringISO8601 timestamp of JWT expiration
+

The login endpoint also sets a Set-Cookie header for browser-based clients:

+
Set-Cookie: session_id=<session_id>; Path=/; HttpOnly; SameSite=Strict; Max-Age=86400
+
+ +

Error Response (401)

+
{
+  "success": false,
+  "message": "Invalid username or password"
+}
+
+ +
+

Using JWT

+

JWT is preferred for API clients (CLI scripts, WordPress). It is validated by the middleware without a database lookup (stateless).

+
# Login and capture JWT
+JWT=$(curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' | python3 -c "import json,sys;print(json.load(sys.stdin)['jwt'])")
+
+# Use JWT for all subsequent requests
+curl -H "Authorization: Bearer $JWT" "$API/api/v1/files/scan"
+curl -H "Authorization: Bearer $JWT" "$API/api/v1/resource/tmdb"
+
+ +

JWT is short-lived (1 hour). When it expires, request a new one via login.

+
+

Using Session Cookie (Browser)

+

Browser-based clients (Portal) get a session cookie automatically after login. The browser sends the cookie with every request—no manual header needed.

+
# Login captures the session cookie from Set-Cookie header
+curl -v -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' 2>&1 | grep "Set-Cookie"
+
+# Browser automatically sends: Cookie: session_id=<session_id>
+# No manual header needed for subsequent requests
+
+ +

The session cookie is HttpOnly (not accessible from JavaScript) and SameSite=Strict (protected against CSRF).

+
+

Using Legacy API Key

+
curl -H "X-API-Key: $KEY" "$API/api/v1/files/scan"
+
+# Also accepted via Bearer header (non-JWT format) or query parameter:
+curl -H "Authorization: Bearer $KEY" "$API/api/v1/files/scan"
+curl "$API/api/v1/files/scan?api_key=$KEY"
+
+ +

API keys are validated via SHA256 hash lookup in the database. They are long-lived (90 days) and intended for automation.

+

Obtaining an API Key (CLI)

+
momentry api-key create "My API Key" --key-type user
+
+ +
+

Logout

+
# Logout using the session cookie (browser)
+curl -X POST "$API/api/v1/auth/logout" \
+  -H "Cookie: session_id=<uuid>"
+
+ +

What logout does

+ + + + + + + + + + + + + + + + + + + + + +
Auth modeEffect
Session CookieSession deleted from database. Same cookie returns 401 on subsequent requests.
JWTJWT remains valid until expiry. (JWT is stateless — logout adds JWT to a blacklist only if API key mode is used.)
API KeyAPI key remains valid. (Legacy keys are shared across sessions — revoking would break other clients.)
+

Example: full session lifecycle

+
# 1. Login
+SESSION_ID=$(curl -s -D - -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' | grep "Set-Cookie" | sed 's/.*session_id=\([^;]*\).*/\1/')
+
+# 2. Use session (works)
+curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → HTTP 200
+
+# 3. Logout
+curl -s -X POST "$API/api/v1/auth/logout" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → {"success": true}
+
+# 4. Use session again (rejected)
+curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → HTTP 401
+
+ +
+

Authentication Flow Summary

+
Login Request
+     │
+     ▼
+┌──────────────────┐
+│  1. Check users  │ ← users table (argon2 password verify)
+│     table        │
+└──────┬───────────┘
+       │
+   ┌───┴───┐
+   │ match │
+   └───┬───┘
+       │
+       ▼
+┌──────────────────┐
+│  2. Create JWT   │ ← 1h expiry, signed with JWT_SECRET
+├──────────────────┤
+│  3. Create       │ ← 24h expiry, stored in sessions table
+│     session      │
+├──────────────────┤
+│  4. Set-Cookie   │ ← HttpOnly, SameSite=Strict, Path=/
+├──────────────────┤
+│  5. Return       │ ← JWT + api_key + user info to client
+└──────────────────┘
+
+ +
Protected Request
+     │
+     ▼
+┌──────────────────────┐
+│  Middleware checks:  │
+│                      │
+│  1. Cookie session?  │ → DB lookup session → get api_key → verify
+│                      │
+│  2. JWT Bearer?      │ → verify JWT signature → decode claims
+│                      │
+│  3. X-API-Key?       │ → SHA256 hash → DB lookup → verify
+│                      │
+│  4. ?api_key=?       │ → same as #3
+│                      │
+│  5. None → 401       │
+└──────────────────────┘
+
+ +
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
401Missing or invalid authentication
401Session expired or logged out
401JWT expired
401API key revoked or inactive
+
+

Related

+
    +
  • POST /api/v1/resource/tmdb/check — test authentication + TMDb API connectivity
  • +
  • GET /health/detailed — view auth status (integrations section)
  • +
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc/02_health.html b/deliverable_v1.1.0/html_docs/doc/02_health.html new file mode 100644 index 0000000..0ed4b92 --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/02_health.html @@ -0,0 +1,277 @@ + + + + +02 Health - Momentry API Docs + + + +
+← Back to index + + + + +

Health Check

+

GET /health

+

Auth: Public +Scope: system-level

+

Returns basic server health status — used by load balancers and monitoring.

+

Example

+
curl "$API/health" | jq '{status, version}'
+
+ +

Response (200)

+
{
+  "status": "ok",
+  "version": "1.0.0",
+  "build_git_hash": "3a6c1865",
+  "build_timestamp": "2026-05-16T13:38:15Z",
+  "uptime_ms": 3015
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
statusstringok or degraded
versionstringSemver version
build_git_hashstringGit commit hash
build_timestampstringBinary build time
uptime_msintegerMilliseconds since server start
+
+

GET /health/detailed

+

Auth: Required +Scope: system-level

+

Returns full system health including each service status, resource utilization, pipeline readiness, schema migration status, identity file sync status, and external integrations.

+
+

Requires authentication (JWT, session cookie, or API key). The basic /health endpoint remains public for load balancer checks.

+
+

Example

+
curl "$API/health/detailed" | jq '{status, services, resources: {cpu: .resources.cpu_used_percent, memory: .resources.memory_used_percent}}'
+
+ +

Response (200)

+
{
+  "status": "ok",
+  "version": "1.0.0",
+  "services": {
+    "postgres": {"status": "ok", "latency_ms": 3},
+    "redis": {"status": "ok", "latency_ms": 1},
+    "qdrant": {"status": "ok", "latency_ms": 5}
+  },
+  "resources": {
+    "cpu_used_percent": 12.5,
+    "memory_available_mb": 32768,
+    "memory_used_percent": 31.7
+  },
+  "pipeline": {
+    "scripts_ready": true,
+    "scripts_count": 345,
+    "processors": {
+      "asr": true,
+      "yolo": true,
+      "face": true,
+      "pose": true,
+      "ocr": true,
+      "cut": true,
+      "scene": true,
+      "asrx": true,
+      "visual_chunk": true
+    },
+    "models_ready": true,
+    "models_count": 42,
+    "scripts_integrity": {"matched": 332, "total": 345, "ok": false},
+    "ffmpeg": true
+  },
+  "schema": {
+    "table_exists": true,
+    "applied": [{"filename": "migrate_add_users_table.sql"}],
+    "required": [],
+    "ok": true
+  },
+  "identities": {
+    "directory_exists": true,
+    "files_count": 3481,
+    "index_ok": true,
+    "db_count": 3481,
+    "synced": true
+  },
+  "integrations": {
+    "tmdb": {
+      "api_key_configured": false,
+      "enabled": false,
+      "api_reachable": null
+    }
+  }
+}
+
+ +

Response Fields

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
statusstringok if all essential services healthy
servicesobjectPer-service status (postgres, redis, qdrant)
services.*.statusstringok, error, or degraded
services.*.latency_msintResponse time in milliseconds
resourcesobjectCPU, memory usage
pipeline.scripts_readybooleanScripts directory accessible
pipeline.scripts_countintNumber of Python processor scripts
pipeline.processorsobjectPer-processor availability
pipeline.models_readybooleanModels directory accessible
pipeline.scripts_integrityobjectSHA256 checksum verification results
schema.okbooleanAll required migrations applied
identities.syncedbooleanIdentity file count matches DB count
integrations.tmdbobjectTMDB API key config and reachability
+

Health status rules

+ + + + + + + + + + + + + + + + + + + + + +
Conditionstatus
All services okok
Any service errordegraded
Postgres or Redis errordegraded (server still responds)
+
+

Stats Endpoints

+ + + + + + + + + + + + + + + + + +
MethodEndpointAuthDescription
GET/api/v1/stats/sftpgoNoSFTPGo service status
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc/03_register.html b/deliverable_v1.1.0/html_docs/doc/03_register.html new file mode 100644 index 0000000..8e211ba --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/03_register.html @@ -0,0 +1,444 @@ + + + + +03 Register - Momentry API Docs + + + +
+← Back to index + + + + +

File Registration

+

POST /api/v1/files/register

+

Auth: Required +Scope: file-level

+

Register a video file for processing. Returns the file's metadata and UUID.

+

New in v0.1.2: Registration now automatically triggers the processing pipeline — no need to call POST /api/v1/file/:file_uuid/process separately. The system will: +1. Register the file and run ffprobe +2. Auto-run offline TMDb probe (reads local identity files, no API calls) +3. Create a monitor job for the worker +4. Worker starts all 10 processors (Cut → ASR → ASRX → YOLO → OCR → Face → Pose → VisualChunk → Story → 5W1H)

+

If the file already exists (same content hash), returns the existing record with already_exists: true.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_pathstringYesPath to video file on disk
patternstringNoRegex pattern for batch register (requires file_path to be a directory)
user_idintegerNoUser ID to associate with registration
content_hashstringNoPre-computed SHA-256 hash (skips computation)
+

Example

+
# Register a single file
+curl -s -X POST "$API/api/v1/files/register" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/video.mp4"}'
+
+# Batch register files matching a pattern in a directory
+curl -s -X POST "$API/api/v1/files/register" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/dir", "pattern": ".*\\.mp4$"}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "3a6c1865...",
+  "file_name": "video.mp4",
+  "file_path": "/path/to/video.mp4",
+  "file_type": "video",
+  "duration": 120.5,
+  "width": 1920,
+  "height": 1080,
+  "fps": 24.0,
+  "total_frames": 2892,
+  "already_exists": false,
+  "message": "File registered successfully"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
file_uuidstring32-char hex UUID of the registered file
file_namestringFile name (auto-renamed if name conflict)
file_pathstringCanonical path on disk
file_typestring"video", "audio", or "unknown"
durationfloatDuration in seconds
widthintegerVideo width in pixels
heightintegerVideo height in pixels
fpsfloatFrames per second
total_framesintegerTotal frame count
already_existsbooleanTrue if same content was already registered
messagestringHuman-readable status
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
401Missing or invalid API key
400Invalid request body
404File path does not exist
+
+

GET /api/v1/files/scan

+

Auth: Required +Scope: file-level

+

Scan the filesystem directory and list all media files, showing which are registered, processing, or unregistered.

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
pageintegerNo1Page number (1-based)
page_sizeintegerNoallItems per page (alias: limit)
limitintegerNoallMax items (alias for page_size)
patternstringNoRegex filter on file name (e.g., .*\\.mp4$)
sort_bystringNonameSort field: name, size, modified, status
sort_orderstringNoascSort direction: asc or desc
+

Example

+
# Full scan
+curl -s "$API/api/v1/files/scan" -H "X-API-Key: $KEY" | jq '{total, registered_count, unregistered_count}'
+
+# Paginated (page 1, 5 per page)
+curl -s "$API/api/v1/files/scan?page=1&page_size=5" -H "X-API-Key: $KEY" | jq '{page, total_pages, files: [.files[].file_name]}'
+
+# Regex filter: only mp4 files
+curl -s "$API/api/v1/files/scan?pattern=.*\\.mp4$" -H "X-API-Key: $KEY" | jq '{filtered_total, files: [.files[].file_name]}'
+
+# Sort by file size (largest first)
+curl -s "$API/api/v1/files/scan?sort_by=size&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, file_size}]'
+
+# Sort by modified time (most recent first)
+curl -s "$API/api/v1/files/scan?sort_by=modified&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, modified_time}]'
+
+# Sort by status
+curl -s "$API/api/v1/files/scan?sort_by=status&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, status}]'
+
+ +

Response (200)

+
{
+  "files": [
+    {
+      "file_name": "video.mp4",
+      "file_size": 12345678,
+      "is_registered": true,
+      "file_uuid": "3a6c1865...",
+      "status": "completed",
+      "registration_time": "2026-05-16T12:00:00Z",
+      "job_id": 42
+    }
+  ],
+  "total": 107,
+  "filtered_total": 80,
+  "page": 1,
+  "page_size": 20,
+  "total_pages": 4,
+  "registered_count": 26,
+  "unregistered_count": 81
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
filesarrayArray of file info objects (paginated)
files[].file_namestringFile name
files[].relative_pathstringPath relative to scan root
files[].file_pathstringAbsolute path on disk
files[].file_sizeintegerFile size in bytes
files[].modified_timestringLast modified timestamp (ISO8601)
files[].is_registeredbooleanWhether file is registered in DB
files[].file_uuidstring32-char hex UUID (only if registered)
files[].statusstring"completed", "processing", "registered", "unregistered", or null
files[].registration_timestringDB registration timestamp (only if registered)
files[].job_idintegerProcessing job ID (only if a job exists)
totalintegerTotal files found on disk (unfiltered)
filtered_totalintegerFiles matching regex filter
pageintegerCurrent page number
page_sizeintegerItems per page
total_pagesintegerTotal pages
registered_countintegerFiles registered in DB
unregistered_countintegerFiles not yet registered
+

Notes

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FeatureBehavior
RegexCase-insensitive ((?i) prefix auto-applied). Applied to file_name.
Sort orderDefault (sort_by=name): registered files first, then alphabetically. sort_by=status: alphabetical by status string.
Paginationpage_size and limit are aliases. Default: show all results.
Processing orderpattern regex filter → sort_by/sort_orderpage/page_size slice.
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc/04_lookup.html b/deliverable_v1.1.0/html_docs/doc/04_lookup.html new file mode 100644 index 0000000..1ce9106 --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/04_lookup.html @@ -0,0 +1,291 @@ + + + + +04 Lookup - Momentry API Docs + + + +
+← Back to index + + + + +

File Lookup

+

GET /api/v1/files/lookup

+

Auth: Required +Scope: file-level

+

Search registered files by file name. Performs a case-insensitive LIKE search on the file name column. Returns basic info about matching files.

+

Query Parameters

+ + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_namestringYesFile name to search for (partial matches supported)
+

Example

+
# Look up a specific file
+curl -s "$API/api/v1/files/lookup?file_name=video.mp4" \
+  -H "X-API-Key: $KEY"
+
+# Partial name search
+curl -s "$API/api/v1/files/lookup?file_name=charade" \
+  -H "X-API-Key: $KEY" | jq '.matches[].file_name'
+
+ +

Response (200)

+
{
+  "file_name": "video.mp4",
+  "exists": true,
+  "matches": [
+    {
+      "file_uuid": "a03485a40b2df2d3",
+      "file_name": "video.mp4",
+      "file_type": "video",
+      "status": "completed"
+    }
+  ],
+  "next_name": "video (2).mp4"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_namestringSearched name
existsbooleanExact name match exists
matchesarrayArray of matching registered files
matches[].file_uuidstring32-char hex UUID
matches[].file_namestringRegistered file name
matches[].file_typestring"video", "audio", or null
matches[].statusstringRegistration/processing status
next_namestringSuggested name for avoiding conflicts
+
+

Unregister

+

POST /api/v1/unregister

+

Auth: Required +Scope: file-level

+

Delete a registered file from the system. Supports single file by UUID, or batch by directory + regex pattern.

+

What gets deleted

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Removed (default)Not removed
Database records (videos, chunks, embeddings, processor_results, pre_chunks)The original source video file on disk
Processor output JSON files ({uuid}.*.json) — unless delete_output_files: falseTemp/working directories
In-memory cache entries
MongoDB cached lists
+
+

⚠️ Database deletion is irreversible. To keep output files, set "delete_output_files": false.

+
+

Request Parameters

+

At least one mode must be specified: either file_uuid alone, or file_path + pattern together.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_uuidstring*Single file UUID to delete
file_pathstring*Directory path (for batch delete)
patternstring*Regex pattern (requires file_path)
delete_output_filesbooleanNotrueIf true, also delete processor output JSON files ({uuid}.*.json). Set to false to keep them.
+

Example

+
# Delete a single file by UUID (default: also deletes output JSON files)
+curl -s -X POST "$API/api/v1/unregister" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+# Keep output JSON files, only delete DB records
+curl -s -X POST "$API/api/v1/unregister" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "delete_output_files": false}'
+
+# Batch delete all mp4 files in a directory
+curl -s -X POST "$API/api/v1/unregister" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/dir", "pattern": ".*\\.mp4$"}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "a03485a40b2df2d3",
+  "message": "Video unregistered successfully"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanTrue if deletion succeeded
file_uuidstringUUID of the deleted file (single mode)
messagestringHuman-readable status
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
400Neither file_uuid nor file_path+pattern provided
404File UUID not found
401Missing or invalid API key
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc/05_process.html b/deliverable_v1.1.0/html_docs/doc/05_process.html new file mode 100644 index 0000000..69cba53 --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/05_process.html @@ -0,0 +1,505 @@ + + + + +05 Process - Momentry API Docs + + + +
+← Back to index + + + + +

Processing Pipeline

+

POST /api/v1/file/:file_uuid/process

+

Auth: Required +Scope: file-level

+

Trigger the processing pipeline for a registered file. Creates a monitor job that the worker picks up and processes sequentially. Returns immediately with the job info—processing runs asynchronously in the background.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
processorsstring[]NoallSpecific processors to run: ["cut","asr","asrx","yolo","ocr","face","pose","visual_chunk","story","5w1h"]
rulesstring[]NoallRule names to apply (currently unused)
+

Example

+
# Run all processors
+curl -s -X POST "$API/api/v1/file/$FILE_UUID/process" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" -d '{}'
+
+# Run specific processors only
+curl -s -X POST "$API/api/v1/file/$FILE_UUID/process" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"processors": ["asr", "face", "yolo"]}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "job_id": 42,
+  "file_uuid": "3a6c1865...",
+  "status": "processing",
+  "pids": [12345, 12346],
+  "message": "Processing triggered for video.mp4"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
job_idintegerMonitor job ID (for job tracking)
file_uuidstring32-char hex UUID of the file
statusstring"processing"
pidsinteger[]Process IDs of started processors
messagestringHuman-readable status
+

Error Responses

+ + + + + + + + + + + + + + + + + +
HTTPWhen
404File UUID not found
401Missing or invalid API key
+
+

GET /api/v1/file/:file_uuid/probe

+

Auth: Required +Scope: file-level

+

Get ffprobe metadata for a registered file. Returns video/audio stream info, codec details, duration, resolution, and frame rate.

+

Example

+
curl -s "$API/api/v1/file/$FILE_UUID/probe" -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "file_uuid": "3a6c1865...",
+  "file_name": "video.mp4",
+  "file_size": 794863677,
+  "duration": 120.5,
+  "width": 1920,
+  "height": 1080,
+  "fps": 24.0,
+  "total_frames": 2892,
+  "cached": true,
+  "format": {
+    "filename": "/path/to/video.mp4",
+    "format_name": "mov,mp4,m4a,3gp",
+    "duration": "120.5",
+    "size": "12345678",
+    "bit_rate": "819200"
+  },
+  "streams": [
+    {
+      "index": 0,
+      "codec_name": "h264",
+      "codec_type": "video",
+      "width": 1920,
+      "height": 1080,
+      "r_frame_rate": "24/1",
+      "duration": "120.5"
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstring32-char hex UUID
file_namestringFile name
file_sizeintegerFile size in bytes (from filesystem)
durationfloatDuration in seconds
widthintegerVideo width in pixels
heightintegerVideo height in pixels
fpsfloatFrames per second
total_framesintegerEstimated total frames
cachedbooleanTrue if result was from cached probe JSON
formatobjectContainer format info (ffprobe format section)
streamsarrayArray of stream info objects
+
+

GET /api/v1/progress/:file_uuid

+

Auth: Required +Scope: file-level

+

Get real-time processing progress for a file via Redis pub/sub. Includes per-processor status, current/total frames, ETA, and system resource stats.

+

Pipeline Order

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
OrderProcessorDependenciesDescription
1cutScene detection
2asrcutSpeech-to-text (per scene)
3asrxasrSpeaker diarization
4yoloObject detection
5ocrText recognition
6faceFace detection & embedding
7posePose estimation
8visual_chunkyoloVisual scene chunks
9storyasr, asrx, cut, yolo, faceScene summaries (template)
105w1hstory5W1H analysis (Gemma4 LLM)
+

All processors except story and 5w1h run concurrently when their dependencies are met. Story and 5W1H run sequentially after their prerequisites.

+

Example

+
curl -s "$API/api/v1/progress/$FILE_UUID" -H "X-API-Key: $KEY" | jq '{overall_progress, processors: [.processors[] | {processor_type, status}]}'
+
+ +

Response (200)

+
{
+  "file_uuid": "3a6c1865...",
+  "overall_progress": 71,
+  "cpu_percent": 45.2,
+  "gpu_percent": 30.1,
+  "memory_percent": 62.4,
+  "processors": [
+    {"processor_type": "asr", "status": "complete", "progress": 100},
+    {"processor_type": "yolo", "status": "running", "progress": 65},
+    {"processor_type": "face", "status": "pending", "progress": 0}
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstring32-char hex UUID
overall_progressintegerOverall progress percentage (0–100)
processorsarrayPer-processor status list
processors[].processor_typestringProcessor name (asr, cut, yolo, etc.)
processors[].statusstring"pending", "running", "complete", or "failed"
processors[].progressintegerPer-processor progress (0–100)
processors[].eta_secondsintegerEstimated seconds remaining (running processors)
processors[].currentintegerCurrent frame count
processors[].totalintegerTotal frame count
cpu_percentfloatCurrent CPU usage
gpu_percentfloatCurrent GPU utilization
memory_percentfloatCurrent memory usage
+
+

GET /api/v1/jobs

+

Auth: Required +Scope: system-level

+

List all processing jobs (monitor jobs) in the system. Shows job status, which file each job is processing, and current processor info.

+

Example

+
curl -s "$API/api/v1/jobs" -H "X-API-Key: $KEY" | jq '{count, jobs: [.jobs[] | {uuid, status}]}'
+
+ +

Response (200)

+
{
+  "jobs": [
+    {
+      "id": 42,
+      "uuid": "3a6c1865...",
+      "status": "running",
+      "current_processor": "yolo",
+      "created_at": "2026-05-16T12:00:00Z",
+      "started_at": "2026-05-16T12:01:00Z"
+    }
+  ],
+  "count": 15,
+  "page": 1,
+  "page_size": 20
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
jobsarrayArray of job info objects
jobs[].idintegerJob ID
jobs[].uuidstringFile UUID being processed
jobs[].statusstring"pending", "running", "completed", "failed"
jobs[].current_processorstringCurrently active processor, or null
countintegerTotal job count
pageintegerCurrent page number
page_sizeintegerJobs per page
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc/06_search.html b/deliverable_v1.1.0/html_docs/doc/06_search.html new file mode 100644 index 0000000..8f62e1a --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/06_search.html @@ -0,0 +1,280 @@ + + + + +06 Search - Momentry API Docs + + + +
+← Back to index + + + + +

Search APIs

+

POST /api/v1/search/smart

+

Auth: Required +Scope: file-level

+

Semantic vector search using EmbeddingGemma-300m. Generates a query embedding via EmbeddingGemma (port 11436), then searches pgvector story_parent and llm_parent chunks by cosine similarity.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_uuidstringYesFile UUID to search within
querystringYesSearch text
limitintegerNo5Max results to return
pageintegerNo1Page number
page_sizeintegerNo5Items per page
+

Example

+
curl -s -X POST "$API/api/v1/search/smart" \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $JWT" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "query": "Audrey Hepburn"}'
+
+ +

Response (200)

+
{
+  "query": "Audrey Hepburn",
+  "results": [
+    {
+      "parent_id": 1087822,
+      "scene_order": 1087822,
+      "start_frame": 104438,
+      "end_frame": 104538,
+      "fps": 24.0,
+      "start_time": 4351.6,
+      "end_time": 4355.76,
+      "summary": "[4352s-4356s, 4s] Cast: Audrey Hepburn. Total: 2 lines, 10 words. Speakers: Audrey Hepburn (2 lines)",
+      "similarity": 0.67
+    }
+  ],
+  "page": 1,
+  "page_size": 5,
+  "strategy": "semantic_vector_search"
+}
+
+ +
+

POST /api/v1/search/universal

+

Auth: Required +Scope: file-level

+

Multi-type BM25 full-text search across chunks, frames, and persons. Uses PostgreSQL tsvector.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
querystringYesSearch text
file_uuidstringNoRestrict to specific file
typesstring[]No["chunk","frame","person"]Search types
limitintegerNo10Max results per type
pageintegerNo1Page number
page_sizeintegerNo20Items per page
+

Example

+
curl -s -X POST "$API/api/v1/search/universal" \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $JWT" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "query": "Cary Grant"}'
+
+ +

Response (200)

+
{
+  "results": [
+    {
+      "type": "chunk",
+      "chunk_id": "bd80fec92b0b6963d177a2c55bf713e2_2",
+      "chunk_type": "story_child",
+      "start_frame": 5103,
+      "end_frame": 5127,
+      "start_time": 212.64,
+      "end_time": 213.64,
+      "text": "[213s-214s] Cary Grant: \"Olá!\"",
+      "score": 0.9
+    }
+  ],
+  "total": 20,
+  "took_ms": 18
+}
+
+ +
+

POST /api/v1/search/frames

+

Auth: Required +Scope: file-level

+

Search face detection frames by identity name or trace ID.

+
+

POST /api/v1/search/identity_text

+

Auth: Required +Scope: file-level

+

Search text chunks spoken by a specific identity.

+
+

Visual Search

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointDescription
POST/api/v1/search/visualSearch visual chunks
POST/api/v1/search/visual/classSearch by object class
POST/api/v1/search/visual/densitySearch by object density
POST/api/v1/search/visual/combinationSearch by object combination
POST/api/v1/search/visual/statsVisual chunk statistics
+

Embedding Model

+ + + + + + + + + + + + + + + + + + + + + + + + + +
DetailValue
ModelEmbeddingGemma-300m
EndpointPOST /api/v1/embeddings on port 11436
Dimension768
Storagepgvector (chunk.embedding column)
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc/07_identity.html b/deliverable_v1.1.0/html_docs/doc/07_identity.html new file mode 100644 index 0000000..cfeaaa4 --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/07_identity.html @@ -0,0 +1,510 @@ + + + + +07 Identity - Momentry API Docs + + + +
+← Back to index + + + + +

Global Identities

+

GET /api/v1/identities

+

Auth: Required +Scope: identity-level

+

List all registered identities with pagination.

+

Example

+
curl -s "$API/api/v1/identities?page=1&page_size=20" -H "X-API-Key: $KEY" | jq '{count, identities: [.identities[] | {name}]}'
+
+ +
+

GET /api/v1/identity/:identity_uuid

+

Auth: Required +Scope: identity-level

+

Get detailed information for a specific identity, including metadata and TMDb references.

+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID" -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "name": "Cary Grant",
+  "identity_type": "people",
+  "source": "tmdb",
+  "status": "confirmed",
+  "tmdb_id": 112,
+  "tmdb_profile": "{output}/identities/{identity_uuid}/profile.jpg",
+  "metadata": {},
+  "reference_data": {},
+  "created_at": "2026-05-16T12:00:00Z",
+  "updated_at": null
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
identity_uuidstringIdentity identifier
namestringIdentity name
identity_typestring"people" or null
sourcestring.json, auto, tmdb, user_defined, or merged
statusstring"confirmed", "pending", or "inactive"
tmdb_idintegerTMDb person ID (only if source = tmdb)
tmdb_profilestringLocal profile image path ({output}/identities/{uuid}/profile.jpg)
metadataobjectMetadata JSON (tmdb_character, cast_order, etc.)
created_atstringCreation timestamp
+
+

DELETE /api/v1/identity/:identity_uuid

+

Auth: Required +Scope: identity-level

+

Delete an identity permanently.

+
+

GET /api/v1/identity/:identity_uuid/files

+

Auth: Required +Scope: identity-level

+

Get all files where this identity appears. Returns per-file summary including face count, confidence, and appearance time range.

+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/files" -H "X-API-Key: $KEY"
+
+ +
+

GET /api/v1/identity/:identity_uuid/faces

+

Auth: Required +Scope: identity-level

+

Get all face detection records associated with this identity.

+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/faces" -H "X-API-Key: $KEY"
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstringFile where face was detected
frame_numberintegerFrame number of detection
face_idstringFace ID (format: face_{frame_number})
confidencefloatDetection confidence
+
+

GET /api/v1/identity/:identity_uuid/chunks

+

Auth: Required +Scope: identity-level

+

Get all text chunks (sentences) spoken while this identity's face was on screen. Useful for finding what a person said.

+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/chunks" -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "data": [
+    {
+      "id": 0,
+      "file_uuid": "bd80fec92b0b6963d177a2c55bf713e2",
+      "chunk_id": "bd80fec92b0b6963d177a2c55bf713e2_2",
+      "chunk_type": "sentence",
+      "start_frame": 5103,
+      "end_frame": 5127,
+      "fps": 24.0,
+      "start_time": 212.64,
+      "end_time": 213.64,
+      "text_content": "[213s-214s] Cary Grant: \"Olá!\""
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstringFile identifier
chunk_idstringSentence chunk identifier
start_frameintegerFrame-accurate start position
end_frameintegerFrame-accurate end position
fpsfloatFrames per second
start_timefloatStart time in seconds
end_timefloatEnd time in seconds
text_contentstringSpoken text content
+
+

POST /api/v1/identity/:identity_uuid/bind

+

Auth: Required +Scope: identity-level

+

Bind a face detection to an identity. Associates the face trace with the identity for future search and recognition.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile where face is detected
face_idstringYesFace ID (format: {frame}_{idx})
+

Example

+
curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "face_id": "1_5"}'
+
+ +
+

POST /api/v1/identity/:identity_uuid/unbind

+

Auth: Required +Scope: identity-level

+

Unbind a face detection from an identity. Removes the identity association from the face record.

+
+

GET /api/v1/identities/search

+

Auth: Required +Scope: identity-level

+

Search identities by name (ILIKE search). Returns matching identity records.

+

Example

+
curl -s "$API/api/v1/identities/search?q=Cary" -H "X-API-Key: $KEY"
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
namestringIdentity name
sourcestringIdentity source
tmdb_idintegerTMDb ID (if source = tmdb)
file_uuidstringAssociated file
+
+
+

POST /api/v1/identity/upload

+

Auth: Required +Scope: identity-level

+

Upload an identity.json file to create or update an identity. Accepts the same format as the identity.json files stored on disk.

+

If an identity with the same name already exists, it will be updated with the new values.

+

Request

+

The request body is an IdentityFile object:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
identity_uuidstringYesIdentity identifier
namestringYesIdentity display name
identity_typestringNo"people" or null
sourcestringNo.json, auto, tmdb, user_defined, or merged
statusstringNo"confirmed", "pending", or "inactive"
tmdb_idintegerNoTMDb person ID
tmdb_profilestringNoTMDb profile image URL
metadataobjectNoArbitrary metadata JSON
file_bindingsarrayNoArray of { file_uuid, trace_ids, face_count } (informational)
+

Example

+
curl -s -X POST "$API/api/v1/identity/upload" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "version": 1,
+    "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+    "name": "Cary Grant",
+    "identity_type": "people",
+    "source": ".json",
+    "status": "confirmed",
+    "metadata": {},
+    "file_bindings": []
+  }'
+
+ +

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "name": "Cary Grant",
+  "message": "Identity uploaded successfully"
+}
+
+ +
+
+

POST /api/v1/identity/:identity_uuid/profile-image

+

Auth: Required +Scope: identity-level

+

Upload a profile image (JPEG or PNG) for an identity. The image is saved to {output}/identities/{uuid}/profile.{ext}.

+

Uses multipart/form-data with field name image.

+

Example

+
curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/profile-image" \
+  -H "X-API-Key: $KEY" \
+  -F "image=@/path/to/photo.jpg"
+
+ +

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "path": "/path/to/output/identities/.../profile.jpg",
+  "message": "Profile image saved: profile.jpg"
+}
+
+ +

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
400Missing image field or unsupported format
404Identity not found
415Unsupported image type (use JPEG or PNG)
+
+

GET /api/v1/identity/:identity_uuid/profile-image

+

Auth: Required +Scope: identity-level

+

Retrieve the profile image for an identity. Returns the raw image data with appropriate Content-Type header.

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/profile-image" \
+  -H "X-API-Key: $KEY" -o profile.jpg
+
+ + + + + + + + + + + + + + +
Response HeaderValue
content-typeimage/jpeg or image/png
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc/08_identity_agent.html b/deliverable_v1.1.0/html_docs/doc/08_identity_agent.html new file mode 100644 index 0000000..64a8bed --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/08_identity_agent.html @@ -0,0 +1,97 @@ + + + + +08 Identity Agent - Momentry API Docs + + + +
+← Back to index + + + + +

Identity Agent

+

POST /api/v1/agents/identity/match-from-photo

+

Auth: Required +Scope: file-level

+

Upload a face photo to match against known identities. Detects face via InsightFace, extracts 512D embedding via CoreML FaceNet, then searches pgvector for the closest identity.

+

Request

+

multipart/form-data with field image (JPEG/PNG) and optional file_uuid.

+

Example

+
curl -s -X POST "$API/api/v1/agents/identity/match-from-photo" \
+  -H "Authorization: Bearer $JWT" \
+  -F "image=@/path/to/face.jpg" \
+  -F "file_uuid=$FILE_UUID"
+
+ +

Response (200)

+
{
+  "success": true,
+  "matches": [
+    {
+      "identity_uuid": "a9a90105...",
+      "name": "Cary Grant",
+      "similarity": 0.87
+    }
+  ]
+}
+
+ +
+

POST /api/v1/agents/identity/match-from-trace

+

Auth: Required +Scope: file-level

+

Match a face trace (tracked face across frames) against known identities. Samples 3 angles from the trace, generates embeddings, and searches pgvector.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile containing the trace
trace_idintegerYesFace trace ID to match
+

Example

+
curl -s -X POST "$API/api/v1/agents/identity/match-from-trace" \
+  -H "Authorization: Bearer $JWT" \
+  -H "Content-Type: application/json" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "trace_id": 10}'
+
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc/08_media.html b/deliverable_v1.1.0/html_docs/doc/08_media.html new file mode 100644 index 0000000..fae655d --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/08_media.html @@ -0,0 +1,303 @@ + + + + +08 Media - Momentry API Docs + + + +
+← Back to index + + + + +

Video Streaming & Frame Extraction

+

All video streaming endpoints support the following common query parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
modestringNonormalnormal or debug (draws detection overlays)
audiostringNoonon or off
+
+

GET /api/v1/file/:file_uuid/video

+

Stream the full video file with range support for seeking.

+

Auth: Required +Scope: file-level

+

Response

+
    +
  • 200: Video stream (Content-Type based on file extension)
  • +
  • 206: Partial content (range request)
  • +
  • Supports Range header for seeking
  • +
+
+

GET /api/v1/file/:file_uuid/trace/:trace_id/video

+

Stream video with highlights for a specific face trace (follows a single person across frames with bounding box overlay).

+

Auth: Required +Scope: file-level

+
+

GET /api/v1/file/:file_uuid/video/bbox

+

Stream video with bounding box overlay for all detected objects/faces.

+

Auth: Required +Scope: file-level

+

Uses a built-in 5×7 bitmap font renderer to draw labels directly on video frames via FFmpeg drawtext filter.

+
+

GET /api/v1/file/:file_uuid/thumbnail

+

Extract a single frame from a video as JPEG image. Uses FFmpeg select filter.

+

Auth: Required +Scope: file-level

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
frameintegerYesZero-based frame number to extract
xintegerNoCrop start X (left edge). Requires y, w, h.
yintegerNoCrop start Y (top edge). Requires x, w, h.
wintegerNoCrop width in pixels. Requires x, y, h.
hintegerNoCrop height in pixels. Requires x, y, w.
+

All four crop params (x, y, w, h) must be provided together or omitted.

+

Example

+
# Extract frame 1000 (full frame)
+curl -s "$API/api/v1/file/bd80fec92b0b6963d177a2c55bf713e2/thumbnail?frame=1000" \
+  -H "Authorization: Bearer $JWT" -o frame_1000.jpg
+
+# Extract and crop face region (x=320, y=240, w=160, h=160)
+curl -s "$API/api/v1/file/bd80fec92b0b6963d177a2c55bf713e2/thumbnail?frame=1000&x=320&y=240&w=160&h=160" \
+  -H "Authorization: Bearer $JWT" -o face_crop.jpg
+
+ +

Response

+
    +
  • 200: image/jpeg binary data
  • +
  • 404: File not found
  • +
  • 500: FFmpeg error (e.g., frame number exceeds video duration)
  • +
+

GET /api/v1/file/:file_uuid/clip

+

Extract a video clip (time range) as MPEG-TS stream. Uses FFmpeg -ss fast seek.

+

Auth: Required +Scope: file-level

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
start_frameintegerNo*Start frame (zero-based). Frame-accurate — use this for precision.
end_frameintegerNo*End frame (zero-based, inclusive). Requires start_frame.
start_timefloatNo*Start time in seconds. Approximate (FPS-dependent). Fallback if frames not given.
end_timefloatNo*End time in seconds. Approximate (FPS-dependent). Fallback if frames not given.
fpsfloatNovideo FPSOverride frames-per-second for frame↔time calculation. Defaults to video's detected FPS.
modestringNonormalnormal or debug (draws "CLIP" overlay)
audiostringNoonon or off
+

Either (start_frame+end_frame) OR (start_time+end_time) must be provided.

+

Example

+
# Clip by frame range (primary)
+curl -s "$API/api/v1/file/bd80fec92b0b6963d177a2c55bf713e2/clip?start_frame=0&end_frame=47" \
+  -H "Authorization: Bearer $JWT" -o clip.ts
+
+# Clip by time range (fallback)
+curl -s "$API/api/v1/file/bd80fec92b0b6963d177a2c55bf713e2/clip?start_time=30&end_time=45" \
+  -H "Authorization: Bearer $JWT" -o clip.ts
+
+ +

Response

+
    +
  • 200: video/mp2t MPEG-TS stream
  • +
  • 400: Missing/invalid range parameters
  • +
  • 404: File not found
  • +
  • 500: FFmpeg error
  • +
+

Technical Notes

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DetailValue
BackendFFmpeg (ffmpeg-full)
Seek-ss before -i (fast keyframe seek)
FormatMPEG-TS (mpegts muxer, pipe-safe)
CodecH.264 + AAC
CacheCache-Control: public, max-age=86400 (24h)
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DetailValue
BackendFFmpeg (ffmpeg-full)
Filterselect=eq(n\,FRAME) to select frame, optional crop=W:H:X:Y
OutputSingle JPEG via pipe (image2pipe, mjpeg codec)
CacheCache-Control: public, max-age=86400 (24h)
Frame numberZero-based (frame=0 = first frame of video)
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc/09_tmdb.html b/deliverable_v1.1.0/html_docs/doc/09_tmdb.html new file mode 100644 index 0000000..0f48d89 --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/09_tmdb.html @@ -0,0 +1,123 @@ + + + + +09 Tmdb - Momentry API Docs + + + +
+← Back to index + + + + +

TMDb Enrichment

+
+

Offline operation: TMDb prefetch now checks local identity files first (identities/_index.json + *.tmdb.json). +If local files exist, no external API call is made. Internet is only needed for initial data seeding.

+
+

Overview

+

TMDb enrichment is an optional identity enrichment step that can be run after Pipeline face detection completes. The workflow is:

+
    +
  1. Prefetch (requires internet): Download movie cast data from TMDb API → cache to {file_uuid}.tmdb.json
  2. +
  3. Probe: Read local cache → create identities for all cast members (source='tmdb') + save identity.json + download profile image to {OUTPUT}/identities/{uuid}/profile.jpg
  4. +
  5. Match: The worker automatically matches video faces against TMDb identities when MOMENTRY_TMDB_PROBE_ENABLED=true
  6. +
+

POST /api/v1/agents/tmdb/prefetch

+

Auth: Required +Scope: file-level

+

Fetch TMDb cast data for a registered file and cache it locally. This is the only step requiring internet access.

+

Request Parameters

+ + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID to enrich
+

Example

+
curl -s -X POST "$API/api/v1/agents/tmdb/prefetch" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+ +

Response (200)

+
{"success": true, "file_uuid": "...", "cache_path": "/output/...tmdb.json"}
+
+ +

POST /api/v1/file/:file_uuid/tmdb-probe

+

Auth: Required +Scope: file-level

+

Read local TMDb cache and create/update identities. Requires prefetch to have been run first.

+

Example

+
curl -s -X POST "$API/api/v1/file/$FILE_UUID/tmdb-probe" \
+  -H "X-API-Key: $KEY" | jq '{identities_created, movie_title}'
+
+ +

Response (200 — identities created)

+
{"success": true, "identities_created": 15, "movie_title": "Charade"}
+
+ +

Response (200 — no cache)

+
{"success": false, "message": "No TMDb cache found. Run tmdb-prefetch first."}
+
+ +

GET /api/v1/resource/tmdb

+

Auth: Required +Scope: system-level

+

View TMDb resource status including configuration, identity counts, and cache file count.

+

Example

+
curl -s "$API/api/v1/resource/tmdb" -H "X-API-Key: $KEY" \
+  | jq '{identities_seeded, cache_files}'
+
+ +

POST /api/v1/resource/tmdb/check

+

Auth: Required +Scope: system-level

+

Ping the TMDb API to verify connectivity and measure latency.

+

Example

+
curl -s -X POST "$API/api/v1/resource/tmdb/check" \
+  -H "X-API-Key: $KEY" | jq '.status'
+
+ +

Response

+
{
+  "api_key_configured": true,
+  "enabled": false,
+  "api_reachable": true,
+  "api_latency_ms": 120
+}
+
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc/10_pipeline.html b/deliverable_v1.1.0/html_docs/doc/10_pipeline.html new file mode 100644 index 0000000..319e394 --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/10_pipeline.html @@ -0,0 +1,364 @@ + + + + +10 Pipeline - Momentry API Docs + + + +
+← Back to index + + + + +

Pipeline

+

Dependency Graph

+
flowchart TB
+    subgraph Processors["10 Processors"]
+        Cut[Cut] --> ASR[ASR]
+        ASR --> ASRX[ASRX]
+        ASRX --> Story[Story]
+        Cut --> Story
+        YOLO[YOLO] --> VisualChunk[VisualChunk]
+        VisualChunk --> Story
+        Face[Face] --> Story
+        Story --> FiveW1H[5W1H]
+        OCR[OCR]
+        Pose[Pose]
+    end
+
+    subgraph Ingestion["入庫 (Post-Processing)"]
+        ASR --> Rule1[Rule 1 Sentence]
+        ASRX --> Rule1
+        Rule1 --> Vectorize[Auto-Vectorize]
+        Rule1 --> Phase1[Phase 1 Pack]
+
+        Cut --> Rule3[Rule 3 Scene]
+        ASR --> Rule3
+
+        Face --> Trace[Face Trace]
+        Trace --> Qdrant[Qdrant Sync]
+        Trace --> TraceChunks[Trace Chunks]
+        Trace --> TKG[TKG Builder]
+
+        Face --> TMDbMatch[TMDb Match]
+        Face --> SceneMeta[Scene Metadata]
+        YOLO --> SceneMeta
+        Face --> IdentityAgent[Identity Agent]
+        ASRX --> IdentityAgent
+
+        Cut --> Agent5W1H[5W1H Agent]
+        ASR --> Agent5W1H
+        Agent5W1H --> Phase2[Phase 2 Pack]
+    end
+
+    style Processors fill:#1a1a2e,stroke:#e94560
+    style Ingestion fill:#16213e,stroke:#0f3460
+
+ +

Pipeline Completion Flow

+

The pipeline is not complete until both the 10 processors AND the 入庫 (ingestion) steps have finished. The worker polls every 3 seconds and only marks the job as completed when all ingestion steps verify OK.

+
10 processors done
+       (job status stays "running")
+Algorithm 1 Trigger: Rule 1 + Vectorize + Phase 1 Pack
+       (job runs in parallel)
+Algorithm 2 Trigger: Face Trace  TKG, Scene Metadata, Identity Agent, 5W1H Agent
+       (poll checks every 3s)
+Ingestion verification: rule1  vectorize  rule3  face_trace  tkg  scene_meta  5w1h 
+     
+job status = "completed"
+
+ +

10 Processor Stages

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
#ProcessorDepends OnDescription
1CutScene boundary detection (PySceneDetect)
2ASRCutAutomatic speech recognition (faster-whisper)
3ASRXASRSpeaker diarization + ASR refinement
4YOLOObject detection (YOLOv8)
5OCROptical character recognition
6FaceFace detection + recognition (InsightFace + CoreML)
7PosePose estimation
8VisualChunkYOLOVisual object chunking
9StoryASRX + Cut + YOLO + FaceNarrative scene summarization (LLM, with embedding)
105W1HStoryWho/What/When/Where/Why extraction (LLM, with embedding)
+

入庫 (Post-Processing / Ingestion)

+

These steps run after the 10 processors and are required for pipeline completion. The worker checks all of them before marking the job as done.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
#StepTriggers WhenVerification
1Rule 1 Sentence ChunkingASR + ASRX donechunk table has rows with chunk_type = 'sentence'
2Auto-VectorizeRule 1 donechunk.embedding IS NOT NULL for sentence chunks
3Phase 1 PackRule 1 donerelease_pack.py --phase 1 executed
4Rule 3 Scene ChunkingAll 10 processors done + Cut + ASRchunk table has rows with chunk_type = 'cut'
5Face TraceAll 10 processors done + Faceface_detections.trace_id IS NOT NULL
6Qdrant Face SyncFace Trace doneQdrant face_embedding collection populated
7Trace ChunksFace Trace donechunk table has rows with chunk_type = 'trace'
8TKG BuilderFace Trace donetkg_nodes + tkg_edges tables have rows
9TMDb Face MatchingTMDb enabled + Face doneface_detections.identity_id IS NOT NULL
10Heuristic Scene MetadataFace + YOLO done{file_uuid}.scene_meta.json exists on disk
11Identity AgentFace + ASRX doneidentities with source = 'identity_agent'
125W1H AgentCut + ASR donechunk.summary_text IS NOT NULL for cut chunks
13Release Pack5W1H Agent donerelease_pack.py --phase 2 executed
+

Ingestion Status

+

Check real-time ingestion status for a file:

+
curl "$API/api/v1/stats/ingestion-status/{file_uuid}"
+
+ +

Returns per-step done / pending status with detail counts.

+

Example

+
curl "http://localhost:3003/api/v1/stats/ingestion-status/bd80fec9c42afb0307eb28f22c64c76a" | jq '.steps[] | {name, status, detail}'
+
+ +

Response

+
{
+  "file_uuid": "bd80fec9c42afb0307eb28f22c64c76a",
+  "steps": [
+    { "name": "rule1_sentence", "status": "pending", "detail": "0 sentence chunks" },
+    { "name": "auto_vectorize",  "status": "pending", "detail": "0 embedded" },
+    { "name": "rule3_scene",     "status": "pending", "detail": "0 scene chunks" },
+    { "name": "face_trace",      "status": "pending", "detail": "0 traces" },
+    { "name": "trace_chunks",    "status": "pending", "detail": "0 trace chunks" },
+    { "name": "tkg",             "status": "pending", "detail": "0 nodes, 0 edges" },
+    { "name": "identity_match",  "status": "pending", "detail": "0 identities" },
+    { "name": "scene_metadata",  "status": "pending", "detail": null },
+    { "name": "5w1h",            "status": "pending", "detail": "0 scenes with 5W1H" }
+  ]
+}
+
+ +

Stats Endpoints

+ + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointAuthDescription
GET/api/v1/stats/sftpgoNoSFTPGo service status
GET/api/v1/stats/ingestion-status/:file_uuidNoPer-file ingestion checklist
+

Configuration

+

POST /api/v1/config/cache

+

Auth: Required +Scope: system-level

+

Toggle the Redis cache on or off.

+

Request Parameters

+ + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
enabledbooleanYestrue to enable, false to disable
+

Example

+
curl -s -X POST "$API/api/v1/config/cache" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"enabled": false}'
+
+ +

Unmounted Routes

+

The following routes are defined in source code but are NOT currently mounted in the router:

+ + + + + + + + + + + + + + + + + + + + + +
EndpointSource file
/api/v1/search/personsuniversal_search.rs (not mounted)
/api/v1/whowho.rs
/api/v1/who/candidateswho.rs
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc/12_agent.html b/deliverable_v1.1.0/html_docs/doc/12_agent.html new file mode 100644 index 0000000..79f83ee --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/12_agent.html @@ -0,0 +1,207 @@ + + + + +12 Agent - Momentry API Docs + + + +
+← Back to index +

Agent Endpoints

+

Agent endpoints provide AI-powered capabilities including translation, identity analysis, and 5W1H extraction.

+

POST /api/v1/agents/translate

+

Translate text between languages using Gemma4 (llama.cpp, port 8082).

+

Request

+
{
+  "text": "Hello, welcome to Momentry Core.",
+  "target_language": "Traditional Chinese",
+  "source_language": "English"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
textstringText to translate
target_languagestringTarget language name (e.g. "Traditional Chinese", "Japanese")
source_languagestringSource language (default: "auto")
+

Response

+
{
+  "success": true,
+  "translated_text": "您好,歡迎使用 Momentry Core。",
+  "source_language_detected": "English",
+  "model_used": "google_gemma-4-26B-A4B-it-Q5_K_M.gguf"
+}
+
+ +

Supported Language Pairs (tested)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SourceTargetQuality
EnglishTraditional Chinese
EnglishJapanese
ChineseEnglish
EnglishFrench
ChineseJapanese
+

Model

+
    +
  • Model: Gemma4 26B (Q5_K_M)
  • +
  • Engine: llama.cpp at localhost:8082
  • +
  • Endpoint: /v1/chat/completions (OpenAI-compatible)
  • +
  • Temperature: 0.1
  • +
  • Max tokens: 1024
  • +
+

Errors

+ + + + + + + + + + + + + + + + + +
StatusCondition
500LLM unreachable or response parse failure
401Missing/invalid auth
+
+

POST /api/v1/agents/5w1h/analyze

+

Extract 5W1H (Who, What, When, Where, Why, How) from a scene. Uses Gemma4 LLM on port 8082.

+

Request

+
{
+  "file_uuid": "3abeee81d94597629ed8cb943f182e94",
+  "scene_id": 42
+}
+
+ +

Response

+
{
+  "success": true,
+  "5w1h": {
+    "who": ["Cary Grant"],
+    "what": ["discussing plans"],
+    "when": ["1963"],
+    "where": ["Paris"],
+    "why": ["vacation"],
+    "how": ["in person"]
+  }
+}
+
+ +

POST /api/v1/agents/5w1h/batch

+

Batch analyze all scenes in a file for 5W1H extraction. Uses the pipeline's parent_chunk_5w1h.py --mode llm.

+

Request

+
{
+  "file_uuid": "3abeee81d94597629ed8cb943f182e94"
+}
+
+ +

GET /api/v1/agents/5w1h/status

+

Get status of the 5W1H agent pipeline for a file.

+
+

Embedding Model

+ + + + + + + + + + + + + + + + + + + + + + + + + +
DetailValue
ModelEmbeddingGemma-300m
EndpointPOST /v1/embeddings on port 11436
Dimension768
Used byparent_chunk_5w1h.py --embed, story, 5W1H, search
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc/index.html b/deliverable_v1.1.0/html_docs/doc/index.html new file mode 100644 index 0000000..62b15f8 --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/index.html @@ -0,0 +1,29 @@ + + + + +Momentry API 文件 + + + +
+

Momentry API 文件

+

API 參考手冊 — 登入後可瀏覽各模組文件

+
安全認證Authentication
健康檢查Health
檔案註冊File Registration
檔案屬性查詢File Lookup
處理流程Processing
搜尋功能Search
身份識別Identity
智能身份綁定Smart Identity Binding
串流與截圖Streaming & Thumbnails
TMDb 整合TMDb Integration
生產線Pipeline
智慧代理AI Agents
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc/login.html b/deliverable_v1.1.0/html_docs/doc/login.html new file mode 100644 index 0000000..c199107 --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc/login.html @@ -0,0 +1,46 @@ + + + + +Login - Momentry Docs + + + +
+

Momentry Docs

+
+ + +
Invalid credentials
+ +
+
+ + + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc_developer/11_error_codes.html b/deliverable_v1.1.0/html_docs/doc_developer/11_error_codes.html new file mode 100644 index 0000000..e1bd41c --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc_developer/11_error_codes.html @@ -0,0 +1,180 @@ + + + + +11 Error Codes - Momentry API Docs + + + +
+← Back to index + + + + +

Error Response Format

+

All API errors follow this JSON structure:

+
{
+  "success": false,
+  "error": {
+    "code": "E001_NOT_FOUND",
+    "message": "Resource not found",
+    "details": {"resource": "file_uuid", "value": "abc"}
+  }
+}
+
+ +

Error Code List

+

Generic Errors (E0xx)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CodeHTTPDescription
E001_NOT_FOUND404Resource not found (file, identity, chunk)
E002_DUPLICATE409Resource already exists
E003_VALIDATION400Request parameter validation failed
E004_UNAUTHORIZED401Invalid API key or token
E005_INTERNAL500Internal server error
+

Processor Errors (E1xx)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CodeHTTPDescription
E101_PROCESSOR_FAIL500Python script execution failed
E102_TIMEOUT504Processing timeout
E103_RESUME_FAIL500Resume failed (checkpoint not found)
E104_NO_VIDEO400Video file path not found
+

Identity Errors (E2xx)

+ + + + + + + + + + + + + + + + + + + + + + + + + +
CodeHTTPDescription
E201_FACE_NOT_FOUND404Face detection not found
E202_MERGE_CONFLICT409Identity merge conflict
E203_CANDIDATE_EMPTY404No candidates available for confirmation
+

TMDb Errors (E3xx)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CodeHTTPDescription
E301_TMDB_NO_KEY400TMDB_API_KEY environment variable not set
E302_TMDB_UNREACHABLE502TMDb API unreachable or timed out
E303_TMDB_CACHE_NOT_FOUND200No local TMDb cache; run prefetch first
E304_TMDB_PROBE_FAILED500TMDb probe execution failed
E305_TMDB_MOVIE_NOT_FOUND404No matching TMDb movie found from filename
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc_developer/index.html b/deliverable_v1.1.0/html_docs/doc_developer/index.html new file mode 100644 index 0000000..6a66aa7 --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc_developer/index.html @@ -0,0 +1,29 @@ + + + + +Momentry API 文件 + + + +
+

Momentry API 文件

+

API 參考手冊 — 登入後可瀏覽各模組文件

+
錯誤碼Error Codes
+
+ + \ No newline at end of file diff --git a/deliverable_v1.1.0/html_docs/doc_developer/login.html b/deliverable_v1.1.0/html_docs/doc_developer/login.html new file mode 100644 index 0000000..c199107 --- /dev/null +++ b/deliverable_v1.1.0/html_docs/doc_developer/login.html @@ -0,0 +1,46 @@ + + + + +Login - Momentry Docs + + + +
+

Momentry Docs

+
+ + +
Invalid credentials
+ +
+
+ + + \ No newline at end of file diff --git a/deliverable_v1.1.0/modules/01_auth.md b/deliverable_v1.1.0/modules/01_auth.md new file mode 100644 index 0000000..96b8979 --- /dev/null +++ b/deliverable_v1.1.0/modules/01_auth.md @@ -0,0 +1,280 @@ + + + + +## Base URL + +| Environment | URL | Purpose | +|-------------|-----|---------| +| Production | `http://localhost:3002` | Production deployment | +| External (M5) | `https://m5api.momentry.ddns.net` | Remote access | + +## Variables + +All examples in this documentation use these environment variables: + +```bash +API="http://localhost:3002" +KEY="your-api-key-here" +``` + +## Authentication + +All endpoints under `/api/v1/*` require authentication. +The following endpoints are public (no auth needed): + +- `GET /health` +- `POST /api/v1/auth/login` +- `POST /api/v1/auth/logout` + +### Three Authentication Modes + +The system supports three authentication methods, checked in **priority order** by the middleware: + +``` +Middleware priority: + 1. Session Cookie (Portal/browser) + 2. JWT Bearer (API clients, CLI) + 3. API Key Header (legacy compatibility) + 4. API Key Query Param (?api_key=) +``` + +| Mode | Transport | Expiry | Scope | Best for | +|------|-----------|--------|-------|----------| +| **Session Cookie** | `Cookie: session_id=` | 24h | per-browser session | Portal (browser) | +| **JWT** | `Authorization: Bearer ` | 1h | per-login token | API clients, CLI, scripts | +| **API Key** | `X-API-Key: ` | 90d | fixed key for automation | Legacy scripts, WordPress | + +--- + +### Login + +**Default accounts & API keys:** + +| Username | Password | API Key | Role | +|----------|----------|---------|------| +| `admin` | `admin` | — | admin | +| `demo` | `demo` | `muser_demo_key_32chars_abcdef1234567890` | user | + +The demo API key is set via `MOMENTRY_DEMO_API_KEY` env var and can be used in place of JWT for marcom integrations: + +```bash +# Using API key instead of JWT +curl -s "$API/api/v1/files/scan" -H "X-API-Key: muser_demo_key_32chars_abcdef1234567890" +``` + +```bash +# Login as admin +curl -s -X POST "$API/api/v1/auth/login" \ + -H "Content-Type: application/json" \ + -d '{"username": "admin", "password": "admin"}' + +# Login as demo user +curl -s -X POST "$API/api/v1/auth/login" \ + -H "Content-Type: application/json" \ + -d '{"username": "demo", "password": "demo"}' +``` + +#### Success Response + +```json +{ + "success": true, + "jwt": "eyJhbGciOiJIUzI1NiIs...", + "api_key": "muser_...", + "user": { + "username": "admin", + "role": "admin" + }, + "expires_at": "2026-05-18T13:00:00Z" +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `jwt` | string | JWT access token. Use as `Authorization: Bearer `. Expires in 1 hour. | +| `api_key` | string | Legacy API key. Use as `X-API-Key: `. Good for 90 days. | +| `user.username` | string | Username | +| `user.role` | string | Role: `admin`, `user`, or `readonly` | +| `expires_at` | string | ISO8601 timestamp of JWT expiration | + +The login endpoint also sets a `Set-Cookie` header for browser-based clients: + +``` +Set-Cookie: session_id=; Path=/; HttpOnly; SameSite=Strict; Max-Age=86400 +``` + +#### Error Response (401) + +```json +{ + "success": false, + "message": "Invalid username or password" +} +``` + +--- + +### Using JWT + +JWT is preferred for API clients (CLI scripts, WordPress). It is validated by the middleware without a database lookup (stateless). + +```bash +# Login and capture JWT +JWT=$(curl -s -X POST "$API/api/v1/auth/login" \ + -H "Content-Type: application/json" \ + -d '{"username":"admin","password":"admin"}' | python3 -c "import json,sys;print(json.load(sys.stdin)['jwt'])") + +# Use JWT for all subsequent requests +curl -H "Authorization: Bearer $JWT" "$API/api/v1/files/scan" +curl -H "Authorization: Bearer $JWT" "$API/api/v1/resource/tmdb" +``` + +JWT is short-lived (1 hour). When it expires, request a new one via login. + +--- + +### Using Session Cookie (Browser) + +Browser-based clients (Portal) get a session cookie automatically after login. The browser sends the cookie with every request—no manual header needed. + +```bash +# Login captures the session cookie from Set-Cookie header +curl -v -X POST "$API/api/v1/auth/login" \ + -H "Content-Type: application/json" \ + -d '{"username":"admin","password":"admin"}' 2>&1 | grep "Set-Cookie" + +# Browser automatically sends: Cookie: session_id= +# No manual header needed for subsequent requests +``` + +The session cookie is HttpOnly (not accessible from JavaScript) and SameSite=Strict (protected against CSRF). + +--- + +### Using Legacy API Key + +```bash +curl -H "X-API-Key: $KEY" "$API/api/v1/files/scan" + +# Also accepted via Bearer header (non-JWT format) or query parameter: +curl -H "Authorization: Bearer $KEY" "$API/api/v1/files/scan" +curl "$API/api/v1/files/scan?api_key=$KEY" +``` + +API keys are validated via SHA256 hash lookup in the database. They are long-lived (90 days) and intended for automation. + +### Obtaining an API Key (CLI) + +```bash +momentry api-key create "My API Key" --key-type user +``` + +--- + +### Logout + +```bash +# Logout using the session cookie (browser) +curl -X POST "$API/api/v1/auth/logout" \ + -H "Cookie: session_id=" +``` + +#### What logout does + +| Auth mode | Effect | +|-----------|--------| +| **Session Cookie** | Session deleted from database. Same cookie returns 401 on subsequent requests. | +| **JWT** | JWT remains valid until expiry. (JWT is stateless — logout adds JWT to a blacklist only if API key mode is used.) | +| **API Key** | API key remains valid. (Legacy keys are shared across sessions — revoking would break other clients.) | + +#### Example: full session lifecycle + +```bash +# 1. Login +SESSION_ID=$(curl -s -D - -X POST "$API/api/v1/auth/login" \ + -H "Content-Type: application/json" \ + -d '{"username":"admin","password":"admin"}' | grep "Set-Cookie" | sed 's/.*session_id=\([^;]*\).*/\1/') + +# 2. Use session (works) +curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \ + -H "Cookie: session_id=$SESSION_ID" +# → HTTP 200 + +# 3. Logout +curl -s -X POST "$API/api/v1/auth/logout" \ + -H "Cookie: session_id=$SESSION_ID" +# → {"success": true} + +# 4. Use session again (rejected) +curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \ + -H "Cookie: session_id=$SESSION_ID" +# → HTTP 401 +``` + +--- + +### Authentication Flow Summary + +``` +Login Request + │ + ▼ +┌──────────────────┐ +│ 1. Check users │ ← users table (argon2 password verify) +│ table │ +└──────┬───────────┘ + │ + ┌───┴───┐ + │ match │ + └───┬───┘ + │ + ▼ +┌──────────────────┐ +│ 2. Create JWT │ ← 1h expiry, signed with JWT_SECRET +├──────────────────┤ +│ 3. Create │ ← 24h expiry, stored in sessions table +│ session │ +├──────────────────┤ +│ 4. Set-Cookie │ ← HttpOnly, SameSite=Strict, Path=/ +├──────────────────┤ +│ 5. Return │ ← JWT + api_key + user info to client +└──────────────────┘ +``` + +``` +Protected Request + │ + ▼ +┌──────────────────────┐ +│ Middleware checks: │ +│ │ +│ 1. Cookie session? │ → DB lookup session → get api_key → verify +│ │ +│ 2. JWT Bearer? │ → verify JWT signature → decode claims +│ │ +│ 3. X-API-Key? │ → SHA256 hash → DB lookup → verify +│ │ +│ 4. ?api_key=? │ → same as #3 +│ │ +│ 5. None → 401 │ +└──────────────────────┘ +``` + +--- + +### Error Responses + +| HTTP | When | +|------|------| +| `401` | Missing or invalid authentication | +| `401` | Session expired or logged out | +| `401` | JWT expired | +| `401` | API key revoked or inactive | + +--- + +### Related + +- `POST /api/v1/resource/tmdb/check` — test authentication + TMDb API connectivity +- `GET /health/detailed` — view auth status (integrations section) diff --git a/deliverable_v1.1.0/modules/02_health.md b/deliverable_v1.1.0/modules/02_health.md new file mode 100644 index 0000000..46f7df0 --- /dev/null +++ b/deliverable_v1.1.0/modules/02_health.md @@ -0,0 +1,147 @@ + + + + +## Health Check + +### `GET /health` + +**Auth**: Public +**Scope**: system-level + +Returns basic server health status — used by load balancers and monitoring. + +#### Example + +```bash +curl "$API/health" | jq '{status, version}' +``` + +#### Response (200) + +```json +{ + "status": "ok", + "version": "1.0.0", + "build_git_hash": "3a6c1865", + "build_timestamp": "2026-05-16T13:38:15Z", + "uptime_ms": 3015 +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `status` | string | `ok` or `degraded` | +| `version` | string | Semver version | +| `build_git_hash` | string | Git commit hash | +| `build_timestamp` | string | Binary build time | +| `uptime_ms` | integer | Milliseconds since server start | + +--- + +### `GET /health/detailed` + +**Auth**: Required +**Scope**: system-level + +Returns full system health including each service status, resource utilization, pipeline readiness, schema migration status, identity file sync status, and external integrations. + +> Requires authentication (JWT, session cookie, or API key). The basic `/health` endpoint remains public for load balancer checks. + +#### Example + +```bash +curl "$API/health/detailed" | jq '{status, services, resources: {cpu: .resources.cpu_used_percent, memory: .resources.memory_used_percent}}' +``` + +#### Response (200) + +```json +{ + "status": "ok", + "version": "1.0.0", + "services": { + "postgres": {"status": "ok", "latency_ms": 3}, + "redis": {"status": "ok", "latency_ms": 1}, + "qdrant": {"status": "ok", "latency_ms": 5} + }, + "resources": { + "cpu_used_percent": 12.5, + "memory_available_mb": 32768, + "memory_used_percent": 31.7 + }, + "pipeline": { + "scripts_ready": true, + "scripts_count": 345, + "processors": { + "asr": true, + "yolo": true, + "face": true, + "pose": true, + "ocr": true, + "cut": true, + "scene": true, + "asrx": true, + "visual_chunk": true + }, + "models_ready": true, + "models_count": 42, + "scripts_integrity": {"matched": 332, "total": 345, "ok": false}, + "ffmpeg": true + }, + "schema": { + "table_exists": true, + "applied": [{"filename": "migrate_add_users_table.sql"}], + "required": [], + "ok": true + }, + "identities": { + "directory_exists": true, + "files_count": 3481, + "index_ok": true, + "db_count": 3481, + "synced": true + }, + "integrations": { + "tmdb": { + "api_key_configured": false, + "enabled": false, + "api_reachable": null + } + } +} +``` + +#### Response Fields + +| Field | Type | Description | +|-------|------|-------------| +| `status` | string | `ok` if all essential services healthy | +| `services` | object | Per-service status (postgres, redis, qdrant) | +| `services.*.status` | string | `ok`, `error`, or `degraded` | +| `services.*.latency_ms` | int | Response time in milliseconds | +| `resources` | object | CPU, memory usage | +| `pipeline.scripts_ready` | boolean | Scripts directory accessible | +| `pipeline.scripts_count` | int | Number of Python processor scripts | +| `pipeline.processors` | object | Per-processor availability | +| `pipeline.models_ready` | boolean | Models directory accessible | +| `pipeline.scripts_integrity` | object | SHA256 checksum verification results | +| `schema.ok` | boolean | All required migrations applied | +| `identities.synced` | boolean | Identity file count matches DB count | +| `integrations.tmdb` | object | TMDB API key config and reachability | + +#### Health status rules + +| Condition | status | +|-----------|--------| +| All services ok | `ok` | +| Any service error | `degraded` | +| Postgres or Redis error | `degraded` (server still responds) | + +--- + +### Stats Endpoints + +| Method | Endpoint | Auth | Description | +|--------|----------|------|-------------| +| GET | `/api/v1/stats/sftpgo` | No | SFTPGo service status | diff --git a/deliverable_v1.1.0/modules/03_register.md b/deliverable_v1.1.0/modules/03_register.md new file mode 100644 index 0000000..9bae26f --- /dev/null +++ b/deliverable_v1.1.0/modules/03_register.md @@ -0,0 +1,184 @@ + + + + +## File Registration + +### `POST /api/v1/files/register` + +**Auth**: Required +**Scope**: file-level + +Register a video file for processing. Returns the file's metadata and UUID. + +**New in v0.1.2**: Registration now **automatically triggers the processing pipeline** — no need to call `POST /api/v1/file/:file_uuid/process` separately. The system will: +1. Register the file and run ffprobe +2. Auto-run offline TMDb probe (reads local identity files, no API calls) +3. Create a monitor job for the worker +4. Worker starts all 10 processors (Cut → ASR → ASRX → YOLO → OCR → Face → Pose → VisualChunk → Story → 5W1H) + +If the file already exists (same content hash), returns the existing record with `already_exists: true`. + +#### Request Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `file_path` | string | Yes | — | Path to video file on disk | +| `pattern` | string | No | — | Regex pattern for batch register (requires `file_path` to be a directory) | +| `user_id` | integer | No | — | User ID to associate with registration | +| `content_hash` | string | No | — | Pre-computed SHA-256 hash (skips computation) | + +#### Example + +```bash +# Register a single file +curl -s -X POST "$API/api/v1/files/register" \ + -H "Content-Type: application/json" \ + -H "X-API-Key: $KEY" \ + -d '{"file_path": "/path/to/video.mp4"}' + +# Batch register files matching a pattern in a directory +curl -s -X POST "$API/api/v1/files/register" \ + -H "Content-Type: application/json" \ + -H "X-API-Key: $KEY" \ + -d '{"file_path": "/path/to/dir", "pattern": ".*\\.mp4$"}' +``` + +#### Response (200) + +```json +{ + "success": true, + "file_uuid": "3a6c1865...", + "file_name": "video.mp4", + "file_path": "/path/to/video.mp4", + "file_type": "video", + "duration": 120.5, + "width": 1920, + "height": 1080, + "fps": 24.0, + "total_frames": 2892, + "already_exists": false, + "message": "File registered successfully" +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `success` | boolean | Always true on 200 | +| `file_uuid` | string | 32-char hex UUID of the registered file | +| `file_name` | string | File name (auto-renamed if name conflict) | +| `file_path` | string | Canonical path on disk | +| `file_type` | string | `"video"`, `"audio"`, or `"unknown"` | +| `duration` | float | Duration in seconds | +| `width` | integer | Video width in pixels | +| `height` | integer | Video height in pixels | +| `fps` | float | Frames per second | +| `total_frames` | integer | Total frame count | +| `already_exists` | boolean | True if same content was already registered | +| `message` | string | Human-readable status | + +#### Error Responses + +| HTTP | When | +|------|------| +| `401` | Missing or invalid API key | +| `400` | Invalid request body | +| `404` | File path does not exist | + +--- + +### `GET /api/v1/files/scan` + +**Auth**: Required +**Scope**: file-level + +Scan the filesystem directory and list all media files, showing which are registered, processing, or unregistered. + +#### Query Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `page` | integer | No | 1 | Page number (1-based) | +| `page_size` | integer | No | all | Items per page (alias: `limit`) | +| `limit` | integer | No | all | Max items (alias for `page_size`) | +| `pattern` | string | No | — | Regex filter on file name (e.g., `.*\\.mp4$`) | +| `sort_by` | string | No | `name` | Sort field: `name`, `size`, `modified`, `status` | +| `sort_order` | string | No | `asc` | Sort direction: `asc` or `desc` | + +#### Example + +```bash +# Full scan +curl -s "$API/api/v1/files/scan" -H "X-API-Key: $KEY" | jq '{total, registered_count, unregistered_count}' + +# Paginated (page 1, 5 per page) +curl -s "$API/api/v1/files/scan?page=1&page_size=5" -H "X-API-Key: $KEY" | jq '{page, total_pages, files: [.files[].file_name]}' + +# Regex filter: only mp4 files +curl -s "$API/api/v1/files/scan?pattern=.*\\.mp4$" -H "X-API-Key: $KEY" | jq '{filtered_total, files: [.files[].file_name]}' + +# Sort by file size (largest first) +curl -s "$API/api/v1/files/scan?sort_by=size&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, file_size}]' + +# Sort by modified time (most recent first) +curl -s "$API/api/v1/files/scan?sort_by=modified&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, modified_time}]' + +# Sort by status +curl -s "$API/api/v1/files/scan?sort_by=status&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, status}]' +``` + +#### Response (200) + +```json +{ + "files": [ + { + "file_name": "video.mp4", + "file_size": 12345678, + "is_registered": true, + "file_uuid": "3a6c1865...", + "status": "completed", + "registration_time": "2026-05-16T12:00:00Z", + "job_id": 42 + } + ], + "total": 107, + "filtered_total": 80, + "page": 1, + "page_size": 20, + "total_pages": 4, + "registered_count": 26, + "unregistered_count": 81 +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `files` | array | Array of file info objects (paginated) | +| `files[].file_name` | string | File name | +| `files[].relative_path` | string | Path relative to scan root | +| `files[].file_path` | string | Absolute path on disk | +| `files[].file_size` | integer | File size in bytes | +| `files[].modified_time` | string | Last modified timestamp (ISO8601) | +| `files[].is_registered` | boolean | Whether file is registered in DB | +| `files[].file_uuid` | string | 32-char hex UUID (only if registered) | +| `files[].status` | string | `"completed"`, `"processing"`, `"registered"`, `"unregistered"`, or `null` | +| `files[].registration_time` | string | DB registration timestamp (only if registered) | +| `files[].job_id` | integer | Processing job ID (only if a job exists) | +| `total` | integer | Total files found on disk (unfiltered) | +| `filtered_total` | integer | Files matching regex filter | +| `page` | integer | Current page number | +| `page_size` | integer | Items per page | +| `total_pages` | integer | Total pages | +| `registered_count` | integer | Files registered in DB | +| `unregistered_count` | integer | Files not yet registered | + +#### Notes + +| Feature | Behavior | +|---------|----------| +| **Regex** | Case-insensitive (`(?i)` prefix auto-applied). Applied to `file_name`. | +| **Sort order** | Default (`sort_by=name`): registered files first, then alphabetically. `sort_by=status`: alphabetical by status string. | +| **Pagination** | `page_size` and `limit` are aliases. Default: show all results. | +| **Processing order** | `pattern` regex filter → `sort_by`/`sort_order` → `page`/`page_size` slice. | diff --git a/deliverable_v1.1.0/modules/04_lookup.md b/deliverable_v1.1.0/modules/04_lookup.md new file mode 100644 index 0000000..019c8af --- /dev/null +++ b/deliverable_v1.1.0/modules/04_lookup.md @@ -0,0 +1,138 @@ + + + + +## File Lookup + +### `GET /api/v1/files/lookup` + +**Auth**: Required +**Scope**: file-level + +Search registered files by file name. Performs a case-insensitive LIKE search on the file name column. Returns basic info about matching files. + +#### Query Parameters + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `file_name` | string | Yes | File name to search for (partial matches supported) | + +#### Example + +```bash +# Look up a specific file +curl -s "$API/api/v1/files/lookup?file_name=video.mp4" \ + -H "X-API-Key: $KEY" + +# Partial name search +curl -s "$API/api/v1/files/lookup?file_name=charade" \ + -H "X-API-Key: $KEY" | jq '.matches[].file_name' +``` + +#### Response (200) + +```json +{ + "file_name": "video.mp4", + "exists": true, + "matches": [ + { + "file_uuid": "a03485a40b2df2d3", + "file_name": "video.mp4", + "file_type": "video", + "status": "completed" + } + ], + "next_name": "video (2).mp4" +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `file_name` | string | Searched name | +| `exists` | boolean | Exact name match exists | +| `matches` | array | Array of matching registered files | +| `matches[].file_uuid` | string | 32-char hex UUID | +| `matches[].file_name` | string | Registered file name | +| `matches[].file_type` | string | `"video"`, `"audio"`, or `null` | +| `matches[].status` | string | Registration/processing status | +| `next_name` | string | Suggested name for avoiding conflicts | + +--- + +## Unregister + +### `POST /api/v1/unregister` + +**Auth**: Required +**Scope**: file-level + +Delete a registered file from the system. Supports single file by UUID, or batch by directory + regex pattern. + +#### What gets deleted + +| Removed (default) | Not removed | +|---------|-------------| +| Database records (videos, chunks, embeddings, processor_results, pre_chunks) | The original source video file on disk | +| Processor output JSON files (`{uuid}.*.json`) — unless `delete_output_files: false` | Temp/working directories | +| In-memory cache entries | | +| MongoDB cached lists | | + +> ⚠️ Database deletion is **irreversible**. To keep output files, set `"delete_output_files": false`. + +#### Request Parameters + +At least one mode must be specified: either `file_uuid` alone, or `file_path` + `pattern` together. + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `file_uuid` | string | * | — | Single file UUID to delete | +| `file_path` | string | * | — | Directory path (for batch delete) | +| `pattern` | string | * | — | Regex pattern (requires `file_path`) | +| `delete_output_files` | boolean | No | `true` | If `true`, also delete processor output JSON files (`{uuid}.*.json`). Set to `false` to keep them. | + +#### Example + +```bash +# Delete a single file by UUID (default: also deletes output JSON files) +curl -s -X POST "$API/api/v1/unregister" \ + -H "Content-Type: application/json" \ + -H "X-API-Key: $KEY" \ + -d '{"file_uuid": "'"$FILE_UUID"'"}' + +# Keep output JSON files, only delete DB records +curl -s -X POST "$API/api/v1/unregister" \ + -H "Content-Type: application/json" \ + -H "X-API-Key: $KEY" \ + -d '{"file_uuid": "'"$FILE_UUID"'", "delete_output_files": false}' + +# Batch delete all mp4 files in a directory +curl -s -X POST "$API/api/v1/unregister" \ + -H "Content-Type: application/json" \ + -H "X-API-Key: $KEY" \ + -d '{"file_path": "/path/to/dir", "pattern": ".*\\.mp4$"}' +``` + +#### Response (200) + +```json +{ + "success": true, + "file_uuid": "a03485a40b2df2d3", + "message": "Video unregistered successfully" +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `success` | boolean | True if deletion succeeded | +| `file_uuid` | string | UUID of the deleted file (single mode) | +| `message` | string | Human-readable status | + +#### Error Responses + +| HTTP | When | +|------|------| +| `400` | Neither `file_uuid` nor `file_path`+`pattern` provided | +| `404` | File UUID not found | +| `401` | Missing or invalid API key | diff --git a/deliverable_v1.1.0/modules/05_process.md b/deliverable_v1.1.0/modules/05_process.md new file mode 100644 index 0000000..9f9baf8 --- /dev/null +++ b/deliverable_v1.1.0/modules/05_process.md @@ -0,0 +1,236 @@ + + + + +## Processing Pipeline + +### `POST /api/v1/file/:file_uuid/process` + +**Auth**: Required +**Scope**: file-level + +Trigger the processing pipeline for a registered file. Creates a monitor job that the worker picks up and processes sequentially. Returns immediately with the job info—processing runs asynchronously in the background. + +#### Request Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `processors` | string[] | No | all | Specific processors to run: `["cut","asr","asrx","yolo","ocr","face","pose","visual_chunk","story","5w1h"]` | +| `rules` | string[] | No | all | Rule names to apply (currently unused) | + +#### Example + +```bash +# Run all processors +curl -s -X POST "$API/api/v1/file/$FILE_UUID/process" \ + -H "Content-Type: application/json" \ + -H "X-API-Key: $KEY" -d '{}' + +# Run specific processors only +curl -s -X POST "$API/api/v1/file/$FILE_UUID/process" \ + -H "Content-Type: application/json" \ + -H "X-API-Key: $KEY" \ + -d '{"processors": ["asr", "face", "yolo"]}' +``` + +#### Response (200) + +```json +{ + "success": true, + "job_id": 42, + "file_uuid": "3a6c1865...", + "status": "processing", + "pids": [12345, 12346], + "message": "Processing triggered for video.mp4" +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `success` | boolean | Always true on 200 | +| `job_id` | integer | Monitor job ID (for job tracking) | +| `file_uuid` | string | 32-char hex UUID of the file | +| `status` | string | `"processing"` | +| `pids` | integer[] | Process IDs of started processors | +| `message` | string | Human-readable status | + +#### Error Responses + +| HTTP | When | +|------|------| +| `404` | File UUID not found | +| `401` | Missing or invalid API key | + +--- + +### `GET /api/v1/file/:file_uuid/probe` + +**Auth**: Required +**Scope**: file-level + +Get ffprobe metadata for a registered file. Returns video/audio stream info, codec details, duration, resolution, and frame rate. + +#### Example + +```bash +curl -s "$API/api/v1/file/$FILE_UUID/probe" -H "X-API-Key: $KEY" +``` + +#### Response (200) + +```json +{ + "file_uuid": "3a6c1865...", + "file_name": "video.mp4", + "file_size": 794863677, + "duration": 120.5, + "width": 1920, + "height": 1080, + "fps": 24.0, + "total_frames": 2892, + "cached": true, + "format": { + "filename": "/path/to/video.mp4", + "format_name": "mov,mp4,m4a,3gp", + "duration": "120.5", + "size": "12345678", + "bit_rate": "819200" + }, + "streams": [ + { + "index": 0, + "codec_name": "h264", + "codec_type": "video", + "width": 1920, + "height": 1080, + "r_frame_rate": "24/1", + "duration": "120.5" + } + ] +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `file_uuid` | string | 32-char hex UUID | +| `file_name` | string | File name | +| `file_size` | integer | File size in bytes (from filesystem) | +| `duration` | float | Duration in seconds | +| `width` | integer | Video width in pixels | +| `height` | integer | Video height in pixels | +| `fps` | float | Frames per second | +| `total_frames` | integer | Estimated total frames | +| `cached` | boolean | True if result was from cached probe JSON | +| `format` | object | Container format info (ffprobe format section) | +| `streams` | array | Array of stream info objects | + +--- + +### `GET /api/v1/progress/:file_uuid` + +**Auth**: Required +**Scope**: file-level + +Get real-time processing progress for a file via Redis pub/sub. Includes per-processor status, current/total frames, ETA, and system resource stats. + +#### Pipeline Order + +| Order | Processor | Dependencies | Description | +|-------|-----------|-------------|-------------| +| 1 | `cut` | — | Scene detection | +| 2 | `asr` | cut | Speech-to-text (per scene) | +| 3 | `asrx` | asr | Speaker diarization | +| 4 | `yolo` | — | Object detection | +| 5 | `ocr` | — | Text recognition | +| 6 | `face` | — | Face detection & embedding | +| 7 | `pose` | — | Pose estimation | +| 8 | `visual_chunk` | yolo | Visual scene chunks | +| 9 | `story` | asr, asrx, cut, yolo, face | Scene summaries (template) | +| 10 | `5w1h` | story | 5W1H analysis (Gemma4 LLM) | + +All processors except `story` and `5w1h` run concurrently when their dependencies are met. Story and 5W1H run sequentially after their prerequisites. + +#### Example + +```bash +curl -s "$API/api/v1/progress/$FILE_UUID" -H "X-API-Key: $KEY" | jq '{overall_progress, processors: [.processors[] | {processor_type, status}]}' +``` + +#### Response (200) + +```json +{ + "file_uuid": "3a6c1865...", + "overall_progress": 71, + "cpu_percent": 45.2, + "gpu_percent": 30.1, + "memory_percent": 62.4, + "processors": [ + {"processor_type": "asr", "status": "complete", "progress": 100}, + {"processor_type": "yolo", "status": "running", "progress": 65}, + {"processor_type": "face", "status": "pending", "progress": 0} + ] +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `file_uuid` | string | 32-char hex UUID | +| `overall_progress` | integer | Overall progress percentage (0–100) | +| `processors` | array | Per-processor status list | +| `processors[].processor_type` | string | Processor name (`asr`, `cut`, `yolo`, etc.) | +| `processors[].status` | string | `"pending"`, `"running"`, `"complete"`, or `"failed"` | +| `processors[].progress` | integer | Per-processor progress (0–100) | +| `processors[].eta_seconds` | integer | Estimated seconds remaining (running processors) | +| `processors[].current` | integer | Current frame count | +| `processors[].total` | integer | Total frame count | +| `cpu_percent` | float | Current CPU usage | +| `gpu_percent` | float | Current GPU utilization | +| `memory_percent` | float | Current memory usage | + +--- + +### `GET /api/v1/jobs` + +**Auth**: Required +**Scope**: system-level + +List all processing jobs (monitor jobs) in the system. Shows job status, which file each job is processing, and current processor info. + +#### Example + +```bash +curl -s "$API/api/v1/jobs" -H "X-API-Key: $KEY" | jq '{count, jobs: [.jobs[] | {uuid, status}]}' +``` + +#### Response (200) + +```json +{ + "jobs": [ + { + "id": 42, + "uuid": "3a6c1865...", + "status": "running", + "current_processor": "yolo", + "created_at": "2026-05-16T12:00:00Z", + "started_at": "2026-05-16T12:01:00Z" + } + ], + "count": 15, + "page": 1, + "page_size": 20 +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `jobs` | array | Array of job info objects | +| `jobs[].id` | integer | Job ID | +| `jobs[].uuid` | string | File UUID being processed | +| `jobs[].status` | string | `"pending"`, `"running"`, `"completed"`, `"failed"` | +| `jobs[].current_processor` | string | Currently active processor, or null | +| `count` | integer | Total job count | +| `page` | integer | Current page number | +| `page_size` | integer | Jobs per page | diff --git a/deliverable_v1.1.0/modules/06_search.md b/deliverable_v1.1.0/modules/06_search.md new file mode 100644 index 0000000..e5b13c3 --- /dev/null +++ b/deliverable_v1.1.0/modules/06_search.md @@ -0,0 +1,145 @@ + + + + +## Search APIs + +### `POST /api/v1/search/smart` + +**Auth**: Required +**Scope**: file-level + +Semantic vector search using EmbeddingGemma-300m. Generates a query embedding via EmbeddingGemma (port 11436), then searches pgvector `story_parent` and `llm_parent` chunks by cosine similarity. + +#### Request Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `file_uuid` | string | Yes | — | File UUID to search within | +| `query` | string | Yes | — | Search text | +| `limit` | integer | No | 5 | Max results to return | +| `page` | integer | No | 1 | Page number | +| `page_size` | integer | No | 5 | Items per page | + +#### Example + +```bash +curl -s -X POST "$API/api/v1/search/smart" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $JWT" \ + -d '{"file_uuid": "'"$FILE_UUID"'", "query": "Audrey Hepburn"}' +``` + +#### Response (200) + +```json +{ + "query": "Audrey Hepburn", + "results": [ + { + "parent_id": 1087822, + "scene_order": 1087822, + "start_frame": 104438, + "end_frame": 104538, + "fps": 24.0, + "start_time": 4351.6, + "end_time": 4355.76, + "summary": "[4352s-4356s, 4s] Cast: Audrey Hepburn. Total: 2 lines, 10 words. Speakers: Audrey Hepburn (2 lines)", + "similarity": 0.67 + } + ], + "page": 1, + "page_size": 5, + "strategy": "semantic_vector_search" +} +``` + +--- + +### `POST /api/v1/search/universal` + +**Auth**: Required +**Scope**: file-level + +Multi-type BM25 full-text search across chunks, frames, and persons. Uses PostgreSQL `tsvector`. + +#### Request Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `query` | string | Yes | — | Search text | +| `file_uuid` | string | No | — | Restrict to specific file | +| `types` | string[] | No | `["chunk","frame","person"]` | Search types | +| `limit` | integer | No | 10 | Max results per type | +| `page` | integer | No | 1 | Page number | +| `page_size` | integer | No | 20 | Items per page | + +#### Example + +```bash +curl -s -X POST "$API/api/v1/search/universal" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $JWT" \ + -d '{"file_uuid": "'"$FILE_UUID"'", "query": "Cary Grant"}' +``` + +#### Response (200) + +```json +{ + "results": [ + { + "type": "chunk", + "chunk_id": "bd80fec92b0b6963d177a2c55bf713e2_2", + "chunk_type": "story_child", + "start_frame": 5103, + "end_frame": 5127, + "start_time": 212.64, + "end_time": 213.64, + "text": "[213s-214s] Cary Grant: \"Olá!\"", + "score": 0.9 + } + ], + "total": 20, + "took_ms": 18 +} +``` + +--- + +### `POST /api/v1/search/frames` + +**Auth**: Required +**Scope**: file-level + +Search face detection frames by identity name or trace ID. + +--- + +### `POST /api/v1/search/identity_text` + +**Auth**: Required +**Scope**: file-level + +Search text chunks spoken by a specific identity. + +--- + +### Visual Search + +| Method | Endpoint | Description | +|--------|----------|-------------| +| POST | `/api/v1/search/visual` | Search visual chunks | +| POST | `/api/v1/search/visual/class` | Search by object class | +| POST | `/api/v1/search/visual/density` | Search by object density | +| POST | `/api/v1/search/visual/combination` | Search by object combination | +| POST | `/api/v1/search/visual/stats` | Visual chunk statistics | + +#### Embedding Model + +| Detail | Value | +|--------|-------| +| **Model** | EmbeddingGemma-300m | +| **Endpoint** | `POST /api/v1/embeddings` on port 11436 | +| **Dimension** | 768 | +| **Storage** | pgvector (`chunk.embedding` column) | diff --git a/deliverable_v1.1.0/modules/08_identity_agent.md b/deliverable_v1.1.0/modules/08_identity_agent.md new file mode 100644 index 0000000..f9c86e0 --- /dev/null +++ b/deliverable_v1.1.0/modules/08_identity_agent.md @@ -0,0 +1,65 @@ + + + + +## Identity Agent + +### `POST /api/v1/agents/identity/match-from-photo` + +**Auth**: Required +**Scope**: file-level + +Upload a face photo to match against known identities. Detects face via InsightFace, extracts 512D embedding via CoreML FaceNet, then searches pgvector for the closest identity. + +#### Request + +`multipart/form-data` with field `image` (JPEG/PNG) and optional `file_uuid`. + +#### Example + +```bash +curl -s -X POST "$API/api/v1/agents/identity/match-from-photo" \ + -H "Authorization: Bearer $JWT" \ + -F "image=@/path/to/face.jpg" \ + -F "file_uuid=$FILE_UUID" +``` + +#### Response (200) + +```json +{ + "success": true, + "matches": [ + { + "identity_uuid": "a9a90105...", + "name": "Cary Grant", + "similarity": 0.87 + } + ] +} +``` + +--- + +### `POST /api/v1/agents/identity/match-from-trace` + +**Auth**: Required +**Scope**: file-level + +Match a face trace (tracked face across frames) against known identities. Samples 3 angles from the trace, generates embeddings, and searches pgvector. + +#### Request Parameters + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `file_uuid` | string | Yes | File containing the trace | +| `trace_id` | integer | Yes | Face trace ID to match | + +#### Example + +```bash +curl -s -X POST "$API/api/v1/agents/identity/match-from-trace" \ + -H "Authorization: Bearer $JWT" \ + -H "Content-Type: application/json" \ + -d '{"file_uuid": "'"$FILE_UUID"'", "trace_id": 10}' +``` diff --git a/deliverable_v1.1.0/modules/09_tmdb.md b/deliverable_v1.1.0/modules/09_tmdb.md new file mode 100644 index 0000000..7ea3f27 --- /dev/null +++ b/deliverable_v1.1.0/modules/09_tmdb.md @@ -0,0 +1,109 @@ + + + + +## TMDb Enrichment + +> **Offline operation**: TMDb prefetch now checks local identity files first (`identities/_index.json` + `*.tmdb.json`). +> If local files exist, no external API call is made. Internet is only needed for initial data seeding. + +### Overview + +TMDb enrichment is an optional identity enrichment step that can be run after Pipeline face detection completes. The workflow is: + +1. **Prefetch** (requires internet): Download movie cast data from TMDb API → cache to `{file_uuid}.tmdb.json` +2. **Probe**: Read local cache → create identities for **all** cast members (`source='tmdb'`) + save `identity.json` + download profile image to `{OUTPUT}/identities/{uuid}/profile.jpg` +3. **Match**: The worker automatically matches video faces against TMDb identities when `MOMENTRY_TMDB_PROBE_ENABLED=true` + +### `POST /api/v1/agents/tmdb/prefetch` + +**Auth**: Required +**Scope**: file-level + +Fetch TMDb cast data for a registered file and cache it locally. This is the only step requiring internet access. + +#### Request Parameters + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `file_uuid` | string | Yes | File UUID to enrich | + +#### Example + +```bash +curl -s -X POST "$API/api/v1/agents/tmdb/prefetch" \ + -H "Content-Type: application/json" \ + -H "X-API-Key: $KEY" \ + -d '{"file_uuid": "'"$FILE_UUID"'"}' +``` + +#### Response (200) + +```json +{"success": true, "file_uuid": "...", "cache_path": "/output/...tmdb.json"} +``` + +### `POST /api/v1/file/:file_uuid/tmdb-probe` + +**Auth**: Required +**Scope**: file-level + +Read local TMDb cache and create/update identities. Requires prefetch to have been run first. + +#### Example + +```bash +curl -s -X POST "$API/api/v1/file/$FILE_UUID/tmdb-probe" \ + -H "X-API-Key: $KEY" | jq '{identities_created, movie_title}' +``` + +#### Response (200 — identities created) + +```json +{"success": true, "identities_created": 15, "movie_title": "Charade"} +``` + +#### Response (200 — no cache) + +```json +{"success": false, "message": "No TMDb cache found. Run tmdb-prefetch first."} +``` + +### `GET /api/v1/resource/tmdb` + +**Auth**: Required +**Scope**: system-level + +View TMDb resource status including configuration, identity counts, and cache file count. + +#### Example + +```bash +curl -s "$API/api/v1/resource/tmdb" -H "X-API-Key: $KEY" \ + | jq '{identities_seeded, cache_files}' +``` + +### `POST /api/v1/resource/tmdb/check` + +**Auth**: Required +**Scope**: system-level + +Ping the TMDb API to verify connectivity and measure latency. + +#### Example + +```bash +curl -s -X POST "$API/api/v1/resource/tmdb/check" \ + -H "X-API-Key: $KEY" | jq '.status' +``` + +#### Response + +```json +{ + "api_key_configured": true, + "enabled": false, + "api_reachable": true, + "api_latency_ms": 120 +} +``` diff --git a/deliverable_v1.1.0/modules/10_pipeline.md b/deliverable_v1.1.0/modules/10_pipeline.md new file mode 100644 index 0000000..05c1f4a --- /dev/null +++ b/deliverable_v1.1.0/modules/10_pipeline.md @@ -0,0 +1,178 @@ + + + + +## Pipeline + +### Dependency Graph + +```mermaid +flowchart TB + subgraph Processors["10 Processors"] + Cut[Cut] --> ASR[ASR] + ASR --> ASRX[ASRX] + ASRX --> Story[Story] + Cut --> Story + YOLO[YOLO] --> VisualChunk[VisualChunk] + VisualChunk --> Story + Face[Face] --> Story + Story --> FiveW1H[5W1H] + OCR[OCR] + Pose[Pose] + end + + subgraph Ingestion["入庫 (Post-Processing)"] + ASR --> Rule1[Rule 1 Sentence] + ASRX --> Rule1 + Rule1 --> Vectorize[Auto-Vectorize] + Rule1 --> Phase1[Phase 1 Pack] + + Cut --> Rule3[Rule 3 Scene] + ASR --> Rule3 + + Face --> Trace[Face Trace] + Trace --> Qdrant[Qdrant Sync] + Trace --> TraceChunks[Trace Chunks] + Trace --> TKG[TKG Builder] + + Face --> TMDbMatch[TMDb Match] + Face --> SceneMeta[Scene Metadata] + YOLO --> SceneMeta + Face --> IdentityAgent[Identity Agent] + ASRX --> IdentityAgent + + Cut --> Agent5W1H[5W1H Agent] + ASR --> Agent5W1H + Agent5W1H --> Phase2[Phase 2 Pack] + end + + style Processors fill:#1a1a2e,stroke:#e94560 + style Ingestion fill:#16213e,stroke:#0f3460 +``` + +### Pipeline Completion Flow + +The pipeline is **not complete** until both the 10 processors AND the 入庫 (ingestion) steps have finished. The worker polls every 3 seconds and only marks the job as `completed` when all ingestion steps verify OK. + +``` +10 processors done + ↓ (job status stays "running") +Algorithm 1 Trigger: Rule 1 + Vectorize + Phase 1 Pack + ↓ (job runs in parallel) +Algorithm 2 Trigger: Face Trace → TKG, Scene Metadata, Identity Agent, 5W1H Agent + ↓ (poll checks every 3s) +Ingestion verification: rule1 ✓ vectorize ✓ rule3 ✓ face_trace ✓ tkg ✓ scene_meta ✓ 5w1h ✓ + ↓ +job status = "completed" +``` + +### 10 Processor Stages + +| # | Processor | Depends On | Description | +|---|-----------|------------|-------------| +| 1 | `Cut` | — | Scene boundary detection (PySceneDetect) | +| 2 | `ASR` | Cut | Automatic speech recognition (faster-whisper) | +| 3 | `ASRX` | ASR | Speaker diarization + ASR refinement | +| 4 | `YOLO` | — | Object detection (YOLOv8) | +| 5 | `OCR` | — | Optical character recognition | +| 6 | `Face` | — | Face detection + recognition (InsightFace + CoreML) | +| 7 | `Pose` | — | Pose estimation | +| 8 | `VisualChunk` | YOLO | Visual object chunking | +| 9 | `Story` | ASRX + Cut + YOLO + Face | Narrative scene summarization (LLM, with embedding) | +| 10 | `5W1H` | Story | Who/What/When/Where/Why extraction (LLM, with embedding) | + +### 入庫 (Post-Processing / Ingestion) + +These steps run after the 10 processors and are **required for pipeline completion**. The worker checks all of them before marking the job as done. + +| # | Step | Triggers When | Verification | +|---|------|--------------|-------------| +| 1 | **Rule 1 Sentence Chunking** | ASR + ASRX done | `chunk` table has rows with `chunk_type = 'sentence'` | +| 2 | **Auto-Vectorize** | Rule 1 done | `chunk.embedding` IS NOT NULL for sentence chunks | +| 3 | **Phase 1 Pack** | Rule 1 done | `release_pack.py --phase 1` executed | +| 4 | **Rule 3 Scene Chunking** | All 10 processors done + Cut + ASR | `chunk` table has rows with `chunk_type = 'cut'` | +| 5 | **Face Trace** | All 10 processors done + Face | `face_detections.trace_id` IS NOT NULL | +| 6 | **Qdrant Face Sync** | Face Trace done | Qdrant face_embedding collection populated | +| 7 | **Trace Chunks** | Face Trace done | `chunk` table has rows with `chunk_type = 'trace'` | +| 8 | **TKG Builder** | Face Trace done | `tkg_nodes` + `tkg_edges` tables have rows | +| 9 | **TMDb Face Matching** | TMDb enabled + Face done | `face_detections.identity_id` IS NOT NULL | +| 10 | **Heuristic Scene Metadata** | Face + YOLO done | `{file_uuid}.scene_meta.json` exists on disk | +| 11 | **Identity Agent** | Face + ASRX done | `identities` with `source = 'identity_agent'` | +| 12 | **5W1H Agent** | Cut + ASR done | `chunk.summary_text` IS NOT NULL for cut chunks | +| 13 | **Release Pack** | 5W1H Agent done | `release_pack.py --phase 2` executed | + +### Ingestion Status + +Check real-time ingestion status for a file: + +```bash +curl "$API/api/v1/stats/ingestion-status/{file_uuid}" +``` + +Returns per-step `done` / `pending` status with detail counts. + +#### Example + +```bash +curl "http://localhost:3003/api/v1/stats/ingestion-status/bd80fec9c42afb0307eb28f22c64c76a" | jq '.steps[] | {name, status, detail}' +``` + +#### Response + +```json +{ + "file_uuid": "bd80fec9c42afb0307eb28f22c64c76a", + "steps": [ + { "name": "rule1_sentence", "status": "pending", "detail": "0 sentence chunks" }, + { "name": "auto_vectorize", "status": "pending", "detail": "0 embedded" }, + { "name": "rule3_scene", "status": "pending", "detail": "0 scene chunks" }, + { "name": "face_trace", "status": "pending", "detail": "0 traces" }, + { "name": "trace_chunks", "status": "pending", "detail": "0 trace chunks" }, + { "name": "tkg", "status": "pending", "detail": "0 nodes, 0 edges" }, + { "name": "identity_match", "status": "pending", "detail": "0 identities" }, + { "name": "scene_metadata", "status": "pending", "detail": null }, + { "name": "5w1h", "status": "pending", "detail": "0 scenes with 5W1H" } + ] +} +``` + +### Stats Endpoints + +| Method | Endpoint | Auth | Description | +|--------|----------|------|-------------| +| GET | `/api/v1/stats/sftpgo` | No | SFTPGo service status | +| GET | `/api/v1/stats/ingestion-status/:file_uuid` | No | Per-file ingestion checklist | + +### Configuration + +### `POST /api/v1/config/cache` + +**Auth**: Required +**Scope**: system-level + +Toggle the Redis cache on or off. + +#### Request Parameters + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `enabled` | boolean | Yes | `true` to enable, `false` to disable | + +#### Example + +```bash +curl -s -X POST "$API/api/v1/config/cache" \ + -H "Content-Type: application/json" \ + -H "X-API-Key: $KEY" \ + -d '{"enabled": false}' +``` + +### Unmounted Routes + +The following routes are defined in source code but are **NOT** currently mounted in the router: + +| Endpoint | Source file | +|----------|-------------| +| `/api/v1/search/persons` | `universal_search.rs` (not mounted) | +| `/api/v1/who` | `who.rs` | +| `/api/v1/who/candidates` | `who.rs` | diff --git a/deliverable_v1.1.0/modules/11_error_codes.md b/deliverable_v1.1.0/modules/11_error_codes.md new file mode 100644 index 0000000..15f2dad --- /dev/null +++ b/deliverable_v1.1.0/modules/11_error_codes.md @@ -0,0 +1,57 @@ + + + + +## Error Response Format + +All API errors follow this JSON structure: + +```json +{ + "success": false, + "error": { + "code": "E001_NOT_FOUND", + "message": "Resource not found", + "details": {"resource": "file_uuid", "value": "abc"} + } +} +``` + +## Error Code List + +### Generic Errors (E0xx) + +| Code | HTTP | Description | +|------|------|-------------| +| `E001_NOT_FOUND` | 404 | Resource not found (file, identity, chunk) | +| `E002_DUPLICATE` | 409 | Resource already exists | +| `E003_VALIDATION` | 400 | Request parameter validation failed | +| `E004_UNAUTHORIZED` | 401 | Invalid API key or token | +| `E005_INTERNAL` | 500 | Internal server error | + +### Processor Errors (E1xx) + +| Code | HTTP | Description | +|------|------|-------------| +| `E101_PROCESSOR_FAIL` | 500 | Python script execution failed | +| `E102_TIMEOUT` | 504 | Processing timeout | +| `E103_RESUME_FAIL` | 500 | Resume failed (checkpoint not found) | +| `E104_NO_VIDEO` | 400 | Video file path not found | + +### Identity Errors (E2xx) + +| Code | HTTP | Description | +|------|------|-------------| +| `E201_FACE_NOT_FOUND` | 404 | Face detection not found | +| `E202_MERGE_CONFLICT` | 409 | Identity merge conflict | +| `E203_CANDIDATE_EMPTY` | 404 | No candidates available for confirmation | + +### TMDb Errors (E3xx) + +| Code | HTTP | Description | +|------|------|-------------| +| `E301_TMDB_NO_KEY` | 400 | `TMDB_API_KEY` environment variable not set | +| `E302_TMDB_UNREACHABLE` | 502 | TMDb API unreachable or timed out | +| `E303_TMDB_CACHE_NOT_FOUND` | 200 | No local TMDb cache; run prefetch first | +| `E304_TMDB_PROBE_FAILED` | 500 | TMDb probe execution failed | +| `E305_TMDB_MOVIE_NOT_FOUND` | 404 | No matching TMDb movie found from filename | diff --git a/deliverable_v1.1.0/modules/12_agent.md b/deliverable_v1.1.0/modules/12_agent.md new file mode 100644 index 0000000..e17fbc3 --- /dev/null +++ b/deliverable_v1.1.0/modules/12_agent.md @@ -0,0 +1,118 @@ +# Agent Endpoints + +Agent endpoints provide AI-powered capabilities including translation, identity analysis, and 5W1H extraction. + +## POST /api/v1/agents/translate + +Translate text between languages using Gemma4 (llama.cpp, port 8082). + +### Request + +```json +{ + "text": "Hello, welcome to Momentry Core.", + "target_language": "Traditional Chinese", + "source_language": "English" +} +``` + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `text` | string | ✅ | Text to translate | +| `target_language` | string | ✅ | Target language name (e.g. "Traditional Chinese", "Japanese") | +| `source_language` | string | ❌ | Source language (default: "auto") | + +### Response + +```json +{ + "success": true, + "translated_text": "您好,歡迎使用 Momentry Core。", + "source_language_detected": "English", + "model_used": "google_gemma-4-26B-A4B-it-Q5_K_M.gguf" +} +``` + +### Supported Language Pairs (tested) + +| Source | Target | Quality | +|--------|--------|---------| +| English | Traditional Chinese | ✅ | +| English | Japanese | ✅ | +| Chinese | English | ✅ | +| English | French | ✅ | +| Chinese | Japanese | ✅ | + +### Model + +- **Model**: Gemma4 26B (Q5_K_M) +- **Engine**: llama.cpp at `localhost:8082` +- **Endpoint**: `/v1/chat/completions` (OpenAI-compatible) +- **Temperature**: 0.1 +- **Max tokens**: 1024 + +### Errors + +| Status | Condition | +|--------|-----------| +| 500 | LLM unreachable or response parse failure | +| 401 | Missing/invalid auth | + +--- + +## POST /api/v1/agents/5w1h/analyze + +Extract 5W1H (Who, What, When, Where, Why, How) from a scene. Uses Gemma4 LLM on port 8082. + +### Request + +```json +{ + "file_uuid": "3abeee81d94597629ed8cb943f182e94", + "scene_id": 42 +} +``` + +### Response + +```json +{ + "success": true, + "5w1h": { + "who": ["Cary Grant"], + "what": ["discussing plans"], + "when": ["1963"], + "where": ["Paris"], + "why": ["vacation"], + "how": ["in person"] + } +} +``` + +## POST /api/v1/agents/5w1h/batch + +Batch analyze all scenes in a file for 5W1H extraction. Uses the pipeline's `parent_chunk_5w1h.py --mode llm`. + +### Request + +```json +{ + "file_uuid": "3abeee81d94597629ed8cb943f182e94" +} +``` + +## GET /api/v1/agents/5w1h/status + +Get status of the 5W1H agent pipeline for a file. + +--- + +## Embedding Model + +| Detail | Value | +|--------|-------| +| **Model** | EmbeddingGemma-300m | +| **Endpoint** | `POST /v1/embeddings` on port 11436 | +| **Dimension** | 768 | +| **Used by** | `parent_chunk_5w1h.py --embed`, story, 5W1H, search | + diff --git a/deliverable_v1.1.0/modules/_template.md b/deliverable_v1.1.0/modules/_template.md new file mode 100644 index 0000000..3a14aff --- /dev/null +++ b/deliverable_v1.1.0/modules/_template.md @@ -0,0 +1,63 @@ +# {Module Name} — API Workspace Module + +> Use this template when adding or editing API endpoint documentation modules. + +## Module Metadata + +Every module MUST start with: + +```markdown + + + +``` + +## Endpoint Template + +Each endpoint MUST use this structure: + +### `METHOD /path/to/endpoint` + +**Auth**: Required / Optional / Public + +**Scope**: file-level / identity-level / system-level + +#### Request Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `param1` | string | Yes | — | Description | + +#### Example + +```bash +# brief description of what this example demonstrates +curl -s -X METHOD "$API/path" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"param1": "value"}' +``` + +#### Response (200) + +```json +{ "success": true } +``` + +| Field | Type | Description | +|-------|------|-------------| +| `success` | boolean | Always true on 200 | + +#### Error Codes + +| Code | HTTP | When | +|------|------|------| +| E0xx | 4xx | Description | + +## Rules + +1. Each module file covers ONE topic group (e.g., `09_tmdb.md` = all TMDb endpoints) +2. Use `$API` and `$KEY` in all curl examples +3. Use `$FILE_UUID`, `$IDENTITY_UUID` variables for UUID examples +4. Module filename = `NN_topic.md` (NN = execution order, 01-99) +5. `depends` metadata = which modules must be assembled before this one diff --git a/deliverable_v1.1.0/scripts/build_docs.py b/deliverable_v1.1.0/scripts/build_docs.py new file mode 100644 index 0000000..ad3a3a6 --- /dev/null +++ b/deliverable_v1.1.0/scripts/build_docs.py @@ -0,0 +1,225 @@ +#!/opt/homebrew/bin/python3.11 +"""Build HTML documentation from module source files.""" +import os, markdown, re, glob, shutil + +MODULES_DIR = os.path.join(os.path.dirname(__file__), "..", "docs_v1.0", "API_WORKSPACE", "modules") +DOC_DIR = os.path.join(os.path.dirname(__file__), "..", "docs_v1.0", "doc") +DOC_DEV_DIR = os.path.join(os.path.dirname(__file__), "..", "docs_v1.0", "doc_developer") + +# User-facing modules (no developer content) +USER_MODULES = { + "01_auth", "02_health", "03_register", "04_lookup", "05_process", + "06_search", "07_identity", "08_identity_agent", "08_media", + "09_tmdb", "10_pipeline", "12_agent", +} + + +def md_to_html(md_text: str) -> str: + """Convert Markdown to HTML.""" + html = markdown.markdown(md_text, extensions=['fenced_code', 'tables', 'codehilite']) + # Wrap tables + html = re.sub(r'', '
', html) + return html + +def build_index(files, dev=False): + """Build index.html.""" + links = [] + for fname in sorted(files): + name = os.path.splitext(fname)[0] + label = MODULE_LABELS.get(name, name.replace("_", " ").title()) + if "|" in label: + cn, en = label.split("|", 1) + else: + cn, en = label, "" + html_name = fname.replace(".md", ".html") + links.append(f'') + + title = "Momentry API 開發者文件" if dev else "Momentry API 文件" + subtitle = "開發者專用" if dev else "API 參考手冊 — 登入後可瀏覽各模組文件" + + return f""" + + + +{title} + + + +
+

{title}

+

{subtitle}

+
{cn}{en}
{"".join(links)}
+ + +""" + +MODULE_LABELS = { + "01_auth": "安全認證|Authentication", + "02_health": "健康檢查|Health", + "03_register": "檔案註冊|File Registration", + "04_lookup": "檔案屬性查詢|File Lookup", + "05_process": "處理流程|Processing", + "06_search": "搜尋功能|Search", + "07_identity": "身份識別|Identity", + "08_identity_agent": "智能身份綁定|Smart Identity Binding", + "08_media": "串流與截圖|Streaming & Thumbnails", + "09_tmdb": "TMDb 整合|TMDb Integration", + "10_pipeline": "生產線|Pipeline", + "11_error_codes": "錯誤碼|Error Codes", + "12_agent": "智慧代理|AI Agents", +} + +def build_html(md_text: str, title: str) -> str: + """Wrap MD content in HTML page.""" + content = md_to_html(md_text) + return f""" + + + +{title} - Momentry API Docs + + + +
+← Back to index +{content} +
+ +""" + +def login_page() -> str: + return """ + + + +Login - Momentry Docs + + + +
+

Momentry Docs

+
+ + +
Invalid credentials
+ +
+
+ + +""" + +def main(): + # Clean and recreate doc dirs + for d in [DOC_DIR, DOC_DEV_DIR]: + if os.path.exists(d): + shutil.rmtree(d) + os.makedirs(d) + + md_files = sorted(glob.glob(os.path.join(MODULES_DIR, "*.md"))) + if not md_files: + print(f"No MD files found in {MODULES_DIR}") + return + + user_html = [] + dev_html = [] + for md_path in md_files: + with open(md_path) as f: + md_text = f.read() + fname = os.path.basename(md_path) + stem = os.path.splitext(fname)[0] + + # Skip template + if stem == "_template": + continue + + # Skip error codes (developer-only) + if stem == "11_error_codes": + dev_only = True + else: + dev_only = stem not in USER_MODULES + + title = stem.replace("_", " ").title() + html = build_html(md_text, title) + + if dev_only: + out_path = os.path.join(DOC_DEV_DIR, fname.replace(".md", ".html")) + with open(out_path, "w") as f: + f.write(html) + dev_html.append(fname) + print(f" [dev] {fname}") + else: + out_path = os.path.join(DOC_DIR, fname.replace(".md", ".html")) + with open(out_path, "w") as f: + f.write(html) + user_html.append(fname) + print(f" [doc] {fname}") + + # Build indexes + login page + for d, files, label in [(DOC_DIR, user_html, "User"), (DOC_DEV_DIR, dev_html, "Dev")]: + index = build_index(files) + with open(os.path.join(d, "index.html"), "w") as f: + f.write(index) + with open(os.path.join(d, "login.html"), "w") as f: + f.write(login_page()) + print(f" {label}: {len(files)} pages -> {d}") + +if __name__ == "__main__": + main() diff --git a/deliverable_v1.1.0/scripts/sync_dev_to_public.sh b/deliverable_v1.1.0/scripts/sync_dev_to_public.sh new file mode 100755 index 0000000..16ebb14 --- /dev/null +++ b/deliverable_v1.1.0/scripts/sync_dev_to_public.sh @@ -0,0 +1,148 @@ +#!/bin/bash +# sync_dev_to_public.sh — 比對 dev/public schema,同步 pipeline 資料 +# Usage: ./sync_dev_to_public.sh [check|sync] [file_uuid] + +PSQL="/opt/homebrew/opt/libpq/bin/psql" + +set -euo pipefail + +SCHEMA="${MOMENTRY_DB_SCHEMA:-dev}" +DB_URL="${DATABASE_URL:-postgres://accusys@localhost:5432/momentry}" +MODE="${1:-check}" +FILE_UUID="${2:-}" + +TABLES=("videos" "chunk" "face_detections" "processor_results" "monitor_jobs" + "identities" "identity_bindings" "tkg_nodes" "tkg_edges") + +TARGET="public" + +if [ -z "$FILE_UUID" ]; then + echo "Usage: $0 [check|sync] " + echo "" + echo "Examples:" + echo " $0 check bd80fec92b0b6963d177a2c55bf713e2" + echo " $0 sync bd80fec92b0b6963d177a2c55bf713e2" + exit 1 +fi + +echo "=== Schema Sync: $SCHEMA → $TARGET ===" +echo "File UUID: $FILE_UUID" +echo "Mode: $MODE" +echo "" + +check_table() { + local table=$1 + local col=$2 + local src_count dev_count pub_count + + dev_count=$($PSQL -At "$DB_URL" -c "SELECT COUNT(*) FROM ${SCHEMA}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || echo "ERROR") + pub_count=$($PSQL -At "$DB_URL" -c "SELECT COUNT(*) FROM ${TARGET}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || echo "ERROR") + + if [ "$dev_count" = "ERROR" ] || [ "$pub_count" = "ERROR" ]; then + echo " ⚠️ $table — query error (table may not exist in $TARGET)" + return 1 + fi + + if [ "$dev_count" -eq "$pub_count" ]; then + echo " ✅ $table — $dev_count rows (match)" + return 0 + else + echo " ❌ $table — dev=$dev_count pub=$pub_count (MISMATCH)" + return 1 + fi +} + +sync_table() { + local table=$1 + local col=$2 + local src_count dev_count pub_count + + dev_count=$($PSQL -At "$DB_URL" -c "SELECT COUNT(*) FROM ${SCHEMA}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || echo "0") + pub_count=$($PSQL -At "$DB_URL" -c "SELECT COUNT(*) FROM ${TARGET}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || echo "0") + + if [ "$dev_count" = "0" ]; then + echo " ⏭️ $table — dev has 0 rows, skipping" + return + fi + + if [ "$dev_count" -eq "$pub_count" ]; then + echo " ✅ $table — already synced ($dev_count rows)" + return + fi + + echo " 🔄 Syncing $table: dev=$dev_count → pub=$pub_count ..." + + # Delete existing public rows, insert from dev + $PSQL "$DB_URL" -q -c "DELETE FROM ${TARGET}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || true + + # Get columns list (excluding id for SERIAL) + COLS=$($PSQL -At "$DB_URL" -c " + SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) + FROM information_schema.columns + WHERE table_schema='${SCHEMA}' AND table_name='${table}' + AND column_name != 'id' + AND is_updatable='YES'; + ") + + $PSQL "$DB_URL" -q -c " + INSERT INTO ${TARGET}.${table} (${COLS}) + SELECT ${COLS} + FROM ${SCHEMA}.${table} + WHERE ${col} = '${FILE_UUID}'; + " 2>/dev/null && echo " ✅ $table synced" || echo " ❌ $table sync FAILED" +} + +echo "=== Checking Tables ===" +echo "" +MISMATCH=0 +for table in "${TABLES[@]}"; do + # Determine the UUID column name for each table + case "$table" in + videos) col="file_uuid" ;; + chunk) col="file_uuid" ;; + face_detections) col="file_uuid" ;; + processor_results) col="file_uuid" ;; + monitor_jobs) col="uuid" ;; + identities) col="uuid" ;; # identities.uuid is UUID type + identity_bindings) col="uuid" ;; + tkg_nodes) col="file_uuid" ;; + tkg_edges) col="file_uuid" ;; + *) col="file_uuid" ;; + esac + + if ! check_table "$table" "$col"; then + MISMATCH=$((MISMATCH + 1)) + fi +done + +echo "" +if [ "$MISMATCH" -eq 0 ]; then + echo "✅ All tables in sync" + exit 0 +fi + +if [ "$MODE" != "sync" ]; then + echo "⚠️ $MISMATCH table(s) have mismatches. Run '$0 sync $FILE_UUID' to fix." + exit 1 +fi + +echo "=== Syncing Tables ===" +echo "" +for table in "${TABLES[@]}"; do + case "$table" in + videos) col="file_uuid" ;; + chunk) col="file_uuid" ;; + face_detections) col="file_uuid" ;; + processor_results) col="file_uuid" ;; + monitor_jobs) col="uuid" ;; + identities) col="uuid" ;; + identity_bindings) col="uuid" ;; + tkg_nodes) col="file_uuid" ;; + tkg_edges) col="file_uuid" ;; + *) col="file_uuid" ;; + esac + sync_table "$table" "$col" +done + +echo "" +echo "✅ Sync complete" diff --git a/deliverable_v1.1.0/scripts/update_qdrant_uuid.py b/deliverable_v1.1.0/scripts/update_qdrant_uuid.py new file mode 100644 index 0000000..0e72de7 --- /dev/null +++ b/deliverable_v1.1.0/scripts/update_qdrant_uuid.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +"""批量更新 Qdrant collection 中的 file_uuid (舊→新)""" + +import json +import subprocess +import sys + +QDRANT_URL = "http://localhost:6333" + +# UUID mapping: 舊 → 新 +UUID_MAP = { + "aeed71342a899fe4b4c57b7d41bcb692": [ + "bd80fec92b0b6963d177a2c55bf713e2", + ], +} + +# Collections to process +COLLECTIONS = [ + "momentry_dev_v1", + "momentry_dev_stories", + "momentry_dev_voice", + "momentry_dev_rule1_v2", + "momentry_dev_faces", + "sentence_story", + "sentence_summary", +] + + +def qdrant_get(path: str) -> dict: + res = subprocess.run( + ["curl", "-s", "-X", "GET", f"{QDRANT_URL}{path}"], + capture_output=True, text=True + ) + return json.loads(res.stdout) if res.stdout.strip() else {} + + +def qdrant_post(path: str, body: dict) -> dict: + tmp = "/tmp/qdrant_post.json" + with open(tmp, "w") as f: + json.dump(body, f) + res = subprocess.run( + ["curl", "-s", "-X", "POST", f"{QDRANT_URL}{path}", + "-H", "Content-Type: application/json", "-d", f"@{tmp}"], + capture_output=True, text=True + ) + return json.loads(res.stdout) if res.stdout.strip() else {} + + +def qdrant_put(path: str, body: dict) -> dict: + tmp = "/tmp/qdrant_update.json" + with open(tmp, "w") as f: + json.dump(body, f) + res = subprocess.run( + ["curl", "-s", "-X", "PUT", f"{QDRANT_URL}{path}", + "-H", "Content-Type: application/json", "-d", f"@{tmp}"], + capture_output=True, text=True + ) + return json.loads(res.stdout) if res.stdout.strip() else {} + + +def scroll_all(collection: str, filter_old: dict) -> list: + """Scroll all matching points from a collection""" + points = [] + offset = None + while True: + body = { + "limit": 1000, + "with_payload": True, + "with_vector": True, + "filter": filter_old, + } + if offset: + body["offset"] = offset + result = qdrant_post(f"/collections/{collection}/points/scroll", body) + batch = result.get("result", {}).get("points", []) + points.extend(batch) + next_offset = result.get("result", {}).get("next_page_offset") + if next_offset is None: + break + offset = next_offset + return points + + +def update_points(collection: str, points: list, old_uuid: str, new_uuid: str): + """Update file_uuid in payload for the given points""" + if not points: + return 0 + + updated = [] + for p in points: + pl = p.get("payload", {}) + # Check both 'uuid' and 'file_uuid' fields + changed = False + if pl.get("uuid") == old_uuid: + pl["uuid"] = new_uuid + changed = True + if pl.get("file_uuid") == old_uuid: + pl["file_uuid"] = new_uuid + changed = True + if changed: + updated.append({ + "id": p["id"], + "vector": p["vector"], + "payload": pl, + }) + + if not updated: + return 0 + + # Update in batches of 500 + total = len(updated) + for i in range(0, total, 500): + batch = updated[i:i+500] + result = qdrant_put( + f"/collections/{collection}/points?wait=true", + {"points": batch} + ) + if result.get("status") != "ok": + print(f" Error at {i}: {result}") + return i + return total + + +def main(): + for collection in COLLECTIONS: + # Check if collection exists + info = qdrant_get(f"/collections/{collection}") + if "result" not in info: + continue + + for old_uuid, new_uuids in UUID_MAP.items(): + for new_uuid in new_uuids: + # Scroll all points with this old UUID + filter_body = { + "must": [ + {"should": [ + {"key": "uuid", "match": {"value": old_uuid}}, + {"key": "file_uuid", "match": {"value": old_uuid}}, + ]} + ] + } + points = scroll_all(collection, filter_body) + if not points: + continue + + print(f"{collection}: {len(points)} points with UUID {old_uuid[:8]}...") + updated = update_points(collection, points, old_uuid, new_uuid) + print(f" → {updated} points updated to {new_uuid[:8]}...") + + # Verify + print("\n=== Verification ===") + for collection in COLLECTIONS: + for old_uuid, new_uuids in UUID_MAP.items(): + for what, uuid in [("old", old_uuid), ("new", new_uuids[0])]: + filter_body = { + "must": [ + {"should": [ + {"key": "uuid", "match": {"value": uuid}}, + {"key": "file_uuid", "match": {"value": uuid}}, + ]} + ] + } + result = qdrant_post( + f"/collections/{collection}/points/count", + {"filter": filter_body} + ) + cnt = result.get("result", {}).get("count", 0) + if cnt > 0: + print(f" {collection}: {cnt} points with {what} UUID") + print("✅ Done") + + +if __name__ == "__main__": + main() diff --git a/docs/3002_3003_SEPARATION_STATUS.md b/docs/3002_3003_SEPARATION_STATUS.md new file mode 100644 index 0000000..ad5a08c --- /dev/null +++ b/docs/3002_3003_SEPARATION_STATUS.md @@ -0,0 +1,70 @@ +# 3002/3003 Schema Separation Status + +Date: 2026-05-17 +Status: ✅ Pipeline tables created in `public`; schema incompatibilities remain + +## Summary + +| Schema | Has pipeline tables | Has auth tables | Used by | +|--------|-------------------|-----------------|---------| +| `public` | ✅ (newly created) | ✅ (original) | 3002 (production) — currently using `dev` as workaround | +| `dev` | ✅ (full, working) | ✅ (synced) | 3003 (playground) | + +## What Was Done + +### Pipeline tables created in `public` schema (11 tables) +- `videos`, `chunk`, `chunk_vectors`, `cuts`, `frames` +- `monitor_jobs`, `processor_results`, `processor_versions` +- `parent_chunks`, `tkg_edges`, `tkg_nodes` + +All include proper sequences, indexes, and constraints matching the `dev` schema. + +## Remaining Blockers + +### Schema incompatibilities between `dev` and `public` + +| Table | dev cols | public cols | Status | +|-------|---------|------------|--------| +| identities | 17 | 16 | ⚠️ Different columns (e.g. `name` vs `real_name`/`actor_name`) | +| face_detections | 16 | 17 | ⚠️ Column count mismatch | +| identity_bindings | 7 | 8 | ⚠️ Column count mismatch | +| person_identities | 16 | 15 | ⚠️ Column count mismatch | +| pre_chunks | 19 | 10 | ⚠️ Significantly different | +| api_keys | 19 | 19 | ✅ Match | +| resources | 9 | 9 | ✅ Match | +| users | 8 | 8 | ✅ Match | + +### Identities table key differences +- `public.identities` uses `real_name` + `actor_name` (old schema) +- `dev.identities` uses `name` (new unified schema) +- `dev.identities` has `tmdb_poster`, `file_uuid`, `face_embedding`, `voice_embedding`, `identity_embedding` +- `public.identities` only has `face_embedding`, `voice_embedding` (no `identity_embedding`) + +## Options + +### Option A: Full data migration (recommended for later) +1. Dump data from old public tables +2. Drop old public tables +3. Recreate from dev schema DDL +4. Migrate data with column mapping +5. Switch 3002 to `DATABASE_SCHEMA=public` + +### Option B: Keep current workaround (simplest for now) +- 3002 continues with `DATABASE_SCHEMA=dev` +- 3003 uses `DATABASE_SCHEMA=dev` +- Both share the same schema, but have separate Redis key prefixes + ports + +### Option C: Rename dev → public (requires downtime) +1. Stop all services +2. Rename `dev` schema to something else +3. Rename `public` to `public_old` +4. Rename `dev` to `public` +5. Update references + +## Current Status + +✅ Pipeline tables exist in both schemas +✅ auth tables (users, sessions, jwt_blacklist) exist in both +✅ Redis key prefixes separate (`momentry:` vs `momentry_dev:`) +⚠️ 3002 still uses `DATABASE_SCHEMA=dev` workaround +⛔ Shared tables need migration before 3002 can use `public` schema diff --git a/docs_v1.0/API_WORKSPACE/.gitignore b/docs_v1.0/API_WORKSPACE/.gitignore new file mode 100644 index 0000000..6bf8a98 --- /dev/null +++ b/docs_v1.0/API_WORKSPACE/.gitignore @@ -0,0 +1,2 @@ +_build/ +.DS_Store diff --git a/docs_v1.0/API_WORKSPACE/README.md b/docs_v1.0/API_WORKSPACE/README.md new file mode 100644 index 0000000..39a98c2 --- /dev/null +++ b/docs_v1.0/API_WORKSPACE/README.md @@ -0,0 +1,60 @@ +# API Workspace + +## Purpose + +This directory is the **single source of truth** for all API documentation modules. +Generated outputs go to `../GUIDES/` as assembled deliverable documents. + +## Workflow + +```bash +# 1. Edit a module +vim modules/09_tmdb.md + +# 2. Preview the generated output +make _build/API_ENDPOINTS.md + +# 3. Check diff against current GUIDES/ content +make check + +# 4. Deploy to GUIDES/ +make deploy + +# 5. Regenerate all +make all +``` + +## Directory Structure + +``` +API_WORKSPACE/ +├── modules/ ← 11 module files (01_auth ... 11_error_codes) +├── configs/ ← 7 assembly recipies (.toml) +├── narratives/ ← narrative intros for specific output files +├── _build/ ← generated output (gitignored) +├── Makefile ← build targets +├── assemble_docs.sh ← assembly engine +└── README.md +``` + +## Available `make` Targets + +| Target | Output | +|--------|--------| +| `make reference` | `_build/API_REFERENCE.md` | +| `make endpoints` | `_build/API_ENDPOINTS.md` | +| `make quickref` | `_build/API_QUICK_REFERENCE.md` | +| `make errors` | `_build/API_ERROR_CODES.md` | +| `make index` | `_build/API_INDEX.md` | +| `make marcom` | `_build/API_TRAINING_MARCOM.md` | +| `make tmdb` | `_build/TMDb_User_Guide.md` | +| `make all` | All of the above | +| `make deploy` | Copy `_build/*` → `../GUIDES/` | +| `make check` | `diff` against existing `../GUIDES/` files | + +## Adding a New Endpoint + +1. Add the endpoint to the appropriate module (e.g., `modules/XX_files.md`) +2. Follow the template in `modules/_template.md` +3. `make all && make check` +4. `make deploy` diff --git a/docs_v1.0/API_WORKSPACE/modules/06_search.md b/docs_v1.0/API_WORKSPACE/modules/06_search.md index 0fad9df..3d2d83a 100644 --- a/docs_v1.0/API_WORKSPACE/modules/06_search.md +++ b/docs_v1.0/API_WORKSPACE/modules/06_search.md @@ -7,7 +7,7 @@ ### `POST /api/v1/search/smart` **Auth**: Required -**Scope**: file-level +**Scope**: global / file-level Semantic vector search using EmbeddingGemma-300m. Generates a query embedding via EmbeddingGemma (port 11436), then searches pgvector `story_parent` and `llm_parent` chunks by cosine similarity. @@ -15,13 +15,22 @@ Semantic vector search using EmbeddingGemma-300m. Generates a query embedding vi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `file_uuid` | string | Yes | — | File UUID to search within | | `query` | string | Yes | — | Search text | +| `file_uuid` | string | No | — | File UUID to search within. If omitted, searches all files (global search) | | `limit` | integer | No | 5 | Max results to return | | `page` | integer | No | 1 | Page number | | `page_size` | integer | No | 5 | Items per page | -#### Example +#### Example (Global Search) + +```bash +curl -s -X POST "$API/api/v1/search/smart" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $JWT" \ + -d '{"query": "Audrey Hepburn"}' +``` + +#### Example (File-specific Search) ```bash curl -s -X POST "$API/api/v1/search/smart" \ @@ -37,6 +46,7 @@ curl -s -X POST "$API/api/v1/search/smart" \ "query": "Audrey Hepburn", "results": [ { + "file_uuid": "a6fb22eebefaef17e62af874997c5944", "parent_id": 1087822, "scene_order": 1087822, "start_frame": 104438, @@ -54,12 +64,16 @@ curl -s -X POST "$API/api/v1/search/smart" \ } ``` +| Field | Type | Description | +|-------|------|-------------| +| `results[].file_uuid` | string | File UUID where result was found | + --- ### `POST /api/v1/search/universal` **Auth**: Required -**Scope**: file-level +**Scope**: global / file-level Multi-type BM25 full-text search across chunks, frames, and persons. Uses PostgreSQL `tsvector`. @@ -68,13 +82,22 @@ Multi-type BM25 full-text search across chunks, frames, and persons. Uses Postgr | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `query` | string | Yes | — | Search text | -| `file_uuid` | string | No | — | Restrict to specific file | +| `file_uuid` | string | No | — | Restrict to specific file. If omitted, searches all files (global search) | | `types` | string[] | No | `["chunk","frame","person"]` | Search types | | `limit` | integer | No | 10 | Max results per type | | `page` | integer | No | 1 | Page number | | `page_size` | integer | No | 20 | Items per page | -#### Example +#### Example (Global Search) + +```bash +curl -s -X POST "$API/api/v1/search/universal" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $JWT" \ + -d '{"query": "Cary Grant"}' +``` + +#### Example (File-specific Search) ```bash curl -s -X POST "$API/api/v1/search/universal" \ @@ -90,6 +113,7 @@ curl -s -X POST "$API/api/v1/search/universal" \ "results": [ { "type": "chunk", + "file_uuid": "a6fb22eebefaef17e62af874997c5944", "chunk_id": "bd80fec92b0b6963d177a2c55bf713e2_2", "chunk_type": "story_child", "start_frame": 5103, @@ -98,6 +122,25 @@ curl -s -X POST "$API/api/v1/search/universal" \ "end_time": 213.64, "text": "[213s-214s] Cary Grant: \"Olá!\"", "score": 0.9 + }, + { + "type": "frame", + "file_uuid": "a6fb22eebefaef17e62af874997c5944", + "frame_number": 5105, + "timestamp": 212.72, + "score": 0.7, + "objects": null, + "ocr_texts": null, + "faces": null + }, + { + "type": "person", + "file_uuid": "a6fb22eebefaef17e62af874997c5944", + "identity_id": 12, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "name": "Cary Grant", + "appearance_count": 542, + "score": 0.95 } ], "total": 20, @@ -105,23 +148,78 @@ curl -s -X POST "$API/api/v1/search/universal" \ } ``` +| Field | Type | Description | +|-------|------|-------------| +| `results[].type` | string | Result type: `chunk`, `frame`, or `person` | +| `results[].file_uuid` | string | File UUID where result was found (all types) | + --- ### `POST /api/v1/search/frames` **Auth**: Required -**Scope**: file-level +**Scope**: global / file-level Search face detection frames by identity name or trace ID. --- -### `POST /api/v1/search/identity_text` +### `GET /api/v1/search/identity_text` **Auth**: Required -**Scope**: file-level +**Scope**: global / file-level -Search text chunks spoken by a specific identity. +Search text chunks → find associated identities. Returns chunks where face detections overlap with text content. + +#### Query Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `q` | string | Yes | — | Search text (ILIKE match) | +| `file_uuid` | string | No | — | Restrict to specific file. If omitted, searches all files (global search) | +| `limit` | integer | No | 50 | Max results | +| `page` | integer | No | 1 | Page number | +| `page_size` | integer | No | 50 | Items per page | + +#### Example (Global Search) + +```bash +curl -s "$API/api/v1/search/identity_text?q=love" -H "X-API-Key: $KEY" +``` + +#### Example (File-specific Search) + +```bash +curl -s "$API/api/v1/search/identity_text?file_uuid=$FILE_UUID&q=love" -H "X-API-Key: $KEY" +``` + +#### Response (200) + +```json +{ + "success": true, + "total": 5, + "results": [ + { + "file_uuid": "a6fb22eebefaef17e62af874997c5944", + "chunk_id": "llm_parent_..._256_270", + "start_time": 256.256, + "end_time": 270.228, + "text_content": "...lack of affection...", + "identity_id": 9, + "identity_name": "Audrey Hepburn", + "identity_source": "tmdb", + "trace_id": 94 + } + ] +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `results[].file_uuid` | string | File UUID where chunk was found | +| `results[].identity_id` | integer | Identity ID if face was detected | +| `results[].trace_id` | integer | Face trace ID | --- @@ -145,4 +243,4 @@ Search text chunks spoken by a specific identity. | **Storage** | pgvector (`chunk.embedding` column) | --- -*Updated: 2026-05-19 12:49:24* +*Updated: 2026-05-27 — Added global search support for smart, universal, identity_text APIs* diff --git a/docs_v1.0/API_WORKSPACE/modules/07_identity.md b/docs_v1.0/API_WORKSPACE/modules/07_identity.md index f662b2f..3178ba9 100644 --- a/docs_v1.0/API_WORKSPACE/modules/07_identity.md +++ b/docs_v1.0/API_WORKSPACE/modules/07_identity.md @@ -70,7 +70,16 @@ curl -s "$API/api/v1/identity/$IDENTITY_UUID" -H "X-API-Key: $KEY" **Auth**: Required **Scope**: identity-level -Delete an identity permanently. +Delete an identity permanently. All face detections bound to this identity are unbound (`identity_id` set to `NULL`). The identity JSON file is deleted from disk. + +#### History & Undo/Redo + +Every DELETE records a full snapshot of the identity and its unbound faces. See [`14_identity_history.md`](14_identity_history.md#4-delete-history--undoredo) for: + +- Undo via `POST /api/v1/identity/:identity_uuid/undo` — recreates identity and re-binds faces +- Redo via `POST /api/v1/identity/:identity_uuid/redo` — re-deletes the identity + +**Note**: Delete undo/redo reuses the same endpoints as PATCH undo/redo. The endpoint automatically detects whether the identity was deleted (undo) or needs to be re-deleted (redo) based on the history record. --- @@ -129,124 +138,75 @@ curl -s -X PATCH "$API/api/v1/identity/$IDENTITY_UUID" \ | HTTP | When | |------|------| -| `400` | No fields to update or invalid UUID format | | `404` | Identity not found | +| `500` | Database error | + +#### History & Undo/Redo + +Every bind records a before/after snapshot. See [`14_identity_history.md`](14_identity_history.md#2-bindunbindtrace-history--undoredo) for: + +- `POST /api/v1/identity/:identity_uuid/bind/undo` — Revert a bind +- `POST /api/v1/identity/:identity_uuid/bind/redo` — Reapply an undone bind +- `GET /api/v1/identity/:identity_uuid/bind/history` — Query bind operations --- -### `GET /api/v1/identity/:identity_uuid/files` +## Metadata (Embedded JSON) -**Auth**: Required -**Scope**: identity-level +The `identities.metadata` column is a **JSONB** field that stores arbitrary structured data alongside the identity's core fields (name, status, identity_type). No schema is enforced — any valid JSON object is accepted. -Get all files where this identity appears. Returns per-file summary including face count, confidence, and appearance time range. +### Merge Behavior -#### Example +| Operation | Strategy | Example | +|-----------|----------|---------| +| **PATCH** | Shallow top-level merge: `COALESCE(metadata,'{}'::jsonb) \|\| $1::jsonb` | Sending `{"tmdb_rating": 8.5}` only adds/overwrites `tmdb_rating`; all other existing keys are preserved. | +| **mergeinto** | Recursive deep merge — nested sub-keys are merged individually, not replaced wholesale | Target has `{"tmdb": {"biography": "..."}}`, source has `{"tmdb": {"birthday": "1904-01-18"}}` → result is `{"tmdb": {"biography": "...", "birthday": "1904-01-18"}}`. | +| **Upload (`POST`)** | Direct overwrite — the entire `metadata` field is replaced with the request value. | | -```bash -curl -s "$API/api/v1/identity/$IDENTITY_UUID/files" -H "X-API-Key: $KEY" -``` +### Validation ---- +| Scenario | Result | +|----------|--------| +| PATCH with non-object metadata (`string`, `array`, `number`, `null`) | `400 Bad Request: "metadata must be a JSON object"` | +| mergeinto with non-object metadata | Accepted (mergeinto validates at application level) | +| Upload with non-object metadata | Accepted (upload replaces directly) | -### `GET /api/v1/identity/:identity_uuid/faces` +### Conventional Keys -**Auth**: Required -**Scope**: identity-level +| Key | Type | Writer | Purpose | +|-----|------|--------|---------| +| `aliases` | `[{locale, name}]` | PATCH, mergeinto | Multilingual display names (see [Alias System](#alias-system-bcp-47-locale-tags)) | +| `merged_into` | `{uuid, at}` | mergeinto | Marks an identity as merged (undo mechanism reads this) | +| `tmdb_*` | various | TMDb probe | Movie metadata (biography, birthday, known_for, etc.). Written only when `MOMENTRY_TMDB_PROBE_ENABLED=true`. | +| `source` | string | mergeinto | Tagged on aliases/metadata when added by merge (`"merge"` value) | -Get all face detection records associated with this identity. +Custom keys are fully supported — no registration required. -#### Example +### Search Coverage -```bash -curl -s "$API/api/v1/identity/$IDENTITY_UUID/faces" -H "X-API-Key: $KEY" -``` +The identity search endpoint (`GET /api/v1/identity/search`) matches across three scopes: -| Field | Type | Description | -|-------|------|-------------| -| `file_uuid` | string | File where face was detected | -| `frame_number` | integer | Frame number of detection | -| `face_id` | string | Face ID (format: `face_{frame_number}`) | -| `confidence` | float | Detection confidence | +1. `i.name` — exact and ILIKE against display name +2. `jsonb_array_elements(i.metadata->'aliases')->>'name'` — locale-tagged alias names +3. `i.metadata::text ILIKE $1` — raw string search across the entire JSON blob (all keys, all values) ---- +This means searching for `"1904-01-18"` or `"biography"` will match identities whose metadata contains those strings anywhere. -### `GET /api/v1/identity/:identity_uuid/chunks` +### History Snapshots -**Auth**: Required -**Scope**: identity-level +Every `identity_history` record captures the **full metadata** in both `before_snapshot` and `after_snapshot` (as part of the complete identity JSONB dump). Undo restores the identity row — including metadata — to the `before_snapshot` state. -Get all text chunks (sentences) spoken while this identity's face was on screen. Useful for finding what a person said. +For merge operations, the MongoDB merge history records `metadata_fields_added` and `metadata_fields_added_paths` (dot-separated paths like `"tmdb.biography"`). Merge undo removes only those specific paths, preserving subsequent manual edits to other metadata keys. -#### Example +### Best Practices -```bash -curl -s "$API/api/v1/identity/$IDENTITY_UUID/chunks" -H "X-API-Key: $KEY" -``` - -#### Response (200) - -```json -{ - "success": true, - "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", - "data": [ - { - "id": 0, - "file_uuid": "bd80fec92b0b6963d177a2c55bf713e2", - "chunk_id": "bd80fec92b0b6963d177a2c55bf713e2_2", - "chunk_type": "sentence", - "start_frame": 5103, - "end_frame": 5127, - "fps": 24.0, - "start_time": 212.64, - "end_time": 213.64, - "text_content": "[213s-214s] Cary Grant: \"Olá!\"" - } - ] -} -``` - -| Field | Type | Description | -|-------|------|-------------| -| `file_uuid` | string | File identifier | -| `chunk_id` | string | Sentence chunk identifier | -| `start_frame` | integer | Frame-accurate start position | -| `end_frame` | integer | Frame-accurate end position | -| `fps` | float | Frames per second | -| `start_time` | float | Start time in seconds | -| `end_time` | float | End time in seconds | -| `text_content` | string | Spoken text content | - ---- - -### `POST /api/v1/identity/:identity_uuid/bind` - -**Auth**: Required -**Scope**: identity-level - -Bind a face detection to an identity. Associates the face trace with the identity for future search and recognition. - -#### Request Parameters - -| Field | Type | Required | Description | -|-------|------|----------|-------------| -| `file_uuid` | string | Yes | File where face is detected | -| `face_id` | string | Yes | Face ID (format: `{frame}_{idx}`) | - -#### Side Effects - -- 清除該 face detection row 的 `stranger_id`(設為 NULL) -- 不影響 `identities` 表中原有的 stranger auto-identity 記錄 - -#### Example - -```bash -curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind" \ - -H "X-API-Key: $KEY" \ - -H "Content-Type: application/json" \ - -d '{"file_uuid": "'"$FILE_UUID"'", "face_id": "1_5"}' -``` +| Guideline | Reason | +|-----------|--------| +| Deep nesting is allowed in metadata | All metadata merge operations use `jsonb_deep_merge()` — nested sub-keys are merged recursively, not replaced wholesale | +| Use `aliases` for display names | Frontend has built-in locale fallback logic (see [Alias System](#alias-system-bcp-47-locale-tags)) | +| Avoid >1MB per identity | Metadata is included in search indexing (`metadata::text ILIKE`); large blobs degrade query performance | +| Don't rely on metadata ordering | JSONB preserves insertion order but PostgreSQL does not guarantee it across operations | +| No LLM/Gemma4 agent writes to metadata | Only API endpoints (PATCH, mergeinto, upload) and TMDb probe modify `identities.metadata` | --- @@ -295,6 +255,10 @@ curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind/trace" \ | `404` | Identity not found | | `500` | Database error | +#### History & Undo/Redo + +Trace bind operations share the same history/undo/redo system as single-face binds. See [`14_identity_history.md`](14_identity_history.md#2-bindunbindtrace-history--undoredo) for endpoints. + --- ### `GET /api/v1/identity/:identity_uuid/traces` @@ -382,6 +346,13 @@ Unbind a face detection from an identity. Removes the identity association from - 被 unbind 的 face 不會自動成為 stranger - 要重新標記為 stranger 需重新跑 Agent API(`identity/analyze`) +#### History & Undo/Redo + +Unbind records a before/after snapshot. See [`14_identity_history.md`](14_identity_history.md#2-bindunbindtrace-history--undoredo) for: + +- `POST /api/v1/identity/:identity_uuid/bind/undo` — Revert an unbind +- `POST /api/v1/identity/:identity_uuid/bind/redo` — Reapply an undone unbind + --- ### `POST /api/v1/identity/:identity_uuid/mergeinto` @@ -391,6 +362,13 @@ Unbind a face detection from an identity. Removes the identity association from Transfer all face bindings from this identity to another identity, then optionally delete or mark the source as merged. +#### Two Merge Cases + +| Case | Description | Undo/Redo Support | +|------|-------------|-------------------| +| **stranger → identity** | Merge an auto-generated stranger identity into a known identity (TMDb or user-defined) | ✅ 24hr undo/redo | +| **identity A → identity B** | Merge two known identities (e.g., duplicate entries) | ✅ 24hr undo/redo | + #### Request Parameters | Field | Type | Required | Default | Description | @@ -402,8 +380,12 @@ Transfer all face bindings from this identity to another identity, then optional - 轉移所有 `face_detections.identity_id` 到目標 identity - 同時清除所有被轉移 rows 的 `stranger_id` +- 將 source name 加入 target aliases (with `source: "merge"` tag) +- 將 source aliases 加入 target aliases (if not already present) +- 將 source metadata fields 加入 target metadata (if not already present) - `keep_history: true`(預設):source identity 設為 `status='merged'`,保留記錄 - `keep_history: false`:**刪除** source identity 及其 identity JSON 檔案 +- **記錄 merge history 到 MongoDB**(支援 undo/redo) #### Example @@ -411,7 +393,7 @@ Transfer all face bindings from this identity to another identity, then optional curl -s -X POST "$API/api/v1/identity/$SOURCE_UUID/mergeinto" \ -H "X-API-Key: $KEY" \ -H "Content-Type: application/json" \ - -d '{"into_uuid": "'"$TARGET_UUID"'", "keep_history": false}' + -d '{"into_uuid": "'"$TARGET_UUID"'", "keep_history": true}' ``` #### Response (200) @@ -419,11 +401,23 @@ curl -s -X POST "$API/api/v1/identity/$SOURCE_UUID/mergeinto" \ ```json { "success": true, - "message": "Merged 'stranger_13894' into 'Louis Viret' (52 faces transferred, source deleted)", - "data": { "faces_transferred": 52 } + "message": "Merged 'stranger_13894' into 'Louis Viret' (52 faces transferred, history kept)", + "data": { + "merge_id": "550e8400-e29b-41d4-a716-446655440000", + "faces_transferred": 52, + "aliases_added": 1, + "metadata_fields_added": 2 + } } ``` +| Field | Type | Description | +|-------|------|-------------| +| `merge_id` | string | Unique merge operation ID (for undo) | +| `faces_transferred` | integer | Number of face detections transferred | +| `aliases_added` | integer | Number of aliases added to target | +| `metadata_fields_added` | integer | Number of metadata fields added to target | + #### Error Responses | HTTP | When | @@ -433,25 +427,189 @@ curl -s -X POST "$API/api/v1/identity/$SOURCE_UUID/mergeinto" \ --- -### `GET /api/v1/identities/search` +### `POST /api/v1/identity/merge/:merge_id/undo` **Auth**: Required **Scope**: identity-level -Search identities by name (ILIKE search). Returns matching identity records. +Undo a merge operation within 24 hours. Restores the source identity and reverts face bindings. + +#### Undo Behavior + +| Action | Description | +|--------|-------------| +| Restore source identity | If `keep_history=true`: restore status to `confirmed`
If `keep_history=false`: recreate identity from MongoDB snapshot | +| Restore faces | Transfer faces back to source identity | +| Remove aliases from target | Remove aliases with `source: "merge"` tag | +| Remove metadata fields from target | Remove fields that were added from source | +| **Preserve manual changes** | Keep aliases/metadata manually added after merge | #### Example ```bash -curl -s "$API/api/v1/identities/search?q=Cary" -H "X-API-Key: $KEY" +curl -s -X POST "$API/api/v1/identity/merge/550e8400-e29b-41d4-a716-446655440000/undo" \ + -H "X-API-Key: $KEY" +``` + +#### Response (200) + +```json +{ + "success": true, + "message": "Undo merge completed: 'stranger_13894' restored, 52 faces reverted", + "data": { + "source_identity_restored": { + "uuid": "a9a90105...", + "name": "stranger_13894", + "status": "confirmed" + }, + "faces_reverted": 52, + "aliases_removed_from_target": 1, + "metadata_fields_removed_from_target": 2 + } +} +``` + +#### Error Responses + +| HTTP | When | +|------|------| +| `400` | Undo deadline expired (>24hr) or already undone | +| `404` | Merge record not found | +| `500` | Database error | + +--- + +### `POST /api/v1/identity/merge/:merge_id/redo` + +**Auth**: Required +**Scope**: identity-level + +Redo a previously undone merge operation. See [`14_identity_history.md`](14_identity_history.md#post-apiv1identitymergemerge_idredo) for full details. + +--- + +### `GET /api/v1/identity/merge/history` + +**Auth**: Required +**Scope**: identity-level + +Query merge history records from MongoDB. + +#### Query Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `source_uuid` | string | No | — | Filter by source identity UUID | +| `target_uuid` | string | No | — | Filter by target identity UUID | +| `merge_id` | string | No | — | Filter by specific merge ID | +| `undone` | bool | No | — | Filter by undone status | +| `page` | int | No | 1 | Page number | +| `page_size` | int | No | 20 | Items per page | + +#### Example + +```bash +curl -s "$API/api/v1/identity/merge/history?page=1&page_size=10" \ + -H "X-API-Key: $KEY" +``` + +#### Response (200) + +```json +{ + "success": true, + "total": 5, + "page": 1, + "page_size": 10, + "results": [ + { + "merge_id": "550e8400-e29b-41d4-a716-446655440000", + "source_name": "stranger_13894", + "target_name": "Louis Viret", + "faces_transferred": 52, + "merged_at": "2026-05-27T10:00:00Z", + "undo_deadline": "2026-05-28T10:00:00Z", + "undone": false, + "undo_expired": false + } + ] +} ``` | Field | Type | Description | |-------|------|-------------| -| `name` | string | Identity name | -| `source` | string | Identity source | -| `tmdb_id` | integer | TMDb ID (if source = tmdb) | -| `file_uuid` | string | Associated file | +| `merge_id` | string | Unique merge operation ID | +| `source_name` | string | Source identity name | +| `target_name` | string | Target identity name | +| `faces_transferred` | integer | Number of faces transferred | +| `merged_at` | datetime | When merge occurred | +| `undo_deadline` | datetime | 24hr deadline for undo | +| `undone` | bool | Whether merge was undone | +| `undo_expired` | bool | Whether undo deadline passed | + +--- + +### `GET /api/v1/identities/search` + +**Auth**: Required +**Scope**: global / file-level + +Search identity name → find associated chunks. Searches identity name and aliases, returns identities with their associated text chunks. + +#### Query Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `q` | string | Yes | — | Search text (ILIKE match on name and aliases) | +| `file_uuid` | string | No | — | Restrict to specific file. If omitted, searches all files (global search) | +| `limit` | integer | No | 50 | Max results | + +#### Example (Global Search) + +```bash +curl -s "$API/api/v1/identities/search?q=Audrey" -H "X-API-Key: $KEY" +``` + +#### Example (File-specific Search) + +```bash +curl -s "$API/api/v1/identities/search?q=Audrey&file_uuid=$FILE_UUID" -H "X-API-Key: $KEY" +``` + +#### Response (200) + +```json +{ + "success": true, + "total": 5, + "results": [ + { + "identity_id": 9, + "name": "Audrey Hepburn", + "source": "tmdb", + "tmdb_id": 1932, + "file_uuid": "a6fb22eebefaef17e62af874997c5944", + "trace_id": 41, + "chunk_id": "llm_parent_..._204_207", + "start_time": 204.162, + "text_content": "...confrontation..." + } + ] +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `results[].identity_id` | integer | Identity ID | +| `results[].name` | string | Identity name | +| `results[].source` | string | Identity source (`tmdb`, `user_defined`, etc.) | +| `results[].tmdb_id` | integer | TMDb person ID (if source = tmdb) | +| `results[].file_uuid` | string | File where identity appears | +| `results[].trace_id` | integer | Face trace ID | +| `results[].chunk_id` | string | Associated chunk ID | +| `results[].start_time` | float | Chunk start time | +| `results[].text_content` | string | Chunk text content | --- @@ -628,4 +786,4 @@ PATCH /api/v1/identity/:identity_uuid This **replaces** the entire `aliases` array. To add to existing aliases, include all existing entries in the request. --- -*Updated: 2026-05-25 +*Updated: 2026-05-25 — Added `GET /api/v1/file/:file_uuid/faces` with 4 binding states, filters, strangers table split diff --git a/docs_v1.0/API_WORKSPACE/modules/14_identity_history.md b/docs_v1.0/API_WORKSPACE/modules/14_identity_history.md new file mode 100644 index 0000000..d61df17 --- /dev/null +++ b/docs_v1.0/API_WORKSPACE/modules/14_identity_history.md @@ -0,0 +1,696 @@ + + + + +## Identity Operation History + +Every mutation on an identity automatically records a before/after snapshot. Use undo/redo to revert or reapply changes, and history to inspect the operation log. + +Three independent undo/redo systems exist: + +| System | Storage | Operations Covered | +|--------|---------|-------------------| +| **PATCH** | PostgreSQL `identity_history` | `update` | +| **Bind** | PostgreSQL `identity_history` | `bind`, `unbind`, `bind_trace` | +| **Merge** | MongoDB `identity_merge_history` | mergeinto | +| **Delete** | PostgreSQL `identity_history` | `delete` | + +--- + +### 1. PATCH History & Undo/Redo + +#### Overview + +| Property | Value | +|----------|-------| +| Storage | PostgreSQL `identity_history` table | +| Snapshot | Full identity record (all fields) before and after each PATCH | +| Max records | 256 per identity (oldest auto-deleted when limit exceeded) | +| Undo steps | Unlimited (no expiry, no step limit) | +| Redo stack | Cleared on new PATCH (`is_undone=true` + `operation='update'` records are deleted) | + +##### Stack Model + +``` +PATCH 1 → PATCH 2 → PATCH 3 (undo stack, is_undone=false) + ↓ undo +PATCH 1 → PATCH 2 (undo stack) + PATCH 3 (redo stack, is_undone=true) + ↓ redo +PATCH 1 → PATCH 2 → PATCH 3 (undo stack) +``` + +A new PATCH after undo clears only the operation='update' redo stack (PATCH 3 is lost). Bind/merge redo stacks are not affected. + +--- + +#### `POST /api/v1/identity/:identity_uuid/undo` + +**Auth**: Required +**Scope**: identity-level + +Undo the most recent PATCH operations. Restores the identity's `before_snapshot` and marks the history records as undone. + +##### Request (JSON) + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `steps` | integer | No | `1` | Number of undo steps to apply (max records undone in one call) | + +##### Behavior + +- Queries `is_undone=false` records with `operation='update'`, ordered by `created_at DESC` +- Restores `name`, `identity_type`, `source`, `status`, `metadata`, `tmdb_id`, `tmdb_profile` from the last record's `before_snapshot` +- Marks the undone records as `is_undone=true` with `undone_at=NOW()` +- Syncs `identity.json` to disk +- Updates `_index.json` if name changed + +##### Example + +```bash +curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/undo" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"steps": 1}' +``` + +##### Response (200) + +```json +{ + "success": true, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "undone_count": 1, + "current_state": { + "id": 9, + "uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "name": "Cary Grant", + "identity_type": "people", + "source": "tmdb", + "status": "confirmed", + "metadata": {}, + "tmdb_id": 112, + "tmdb_profile": null + } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `undone_count` | integer | Number of history records undone | +| `current_state` | object | Full identity state after undo | + +##### Error Responses + +| HTTP | When | +|------|------| +| `400` | No undo operations available | +| `404` | Identity not found | +| `500` | Database error | + +--- + +#### `POST /api/v1/identity/:identity_uuid/redo` + +**Auth**: Required +**Scope**: identity-level + +Redo previously undone PATCH operations. Restores the identity's `after_snapshot` and marks the history records as no longer undone. + +##### Request (JSON) + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `steps` | integer | No | `1` | Number of redo steps to apply | + +##### Behavior + +- Queries `is_undone=true` records with `operation='update'`, ordered by `created_at DESC` +- Restores all identity fields from the last record's `after_snapshot` +- Marks records as `is_undone=false` with `undone_at=NULL` +- Syncs `identity.json` to disk +- Updates `_index.json` if name changed + +##### Example + +```bash +curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/redo" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"steps": 1}' +``` + +##### Response (200) + +```json +{ + "success": true, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "redone_count": 1, + "current_state": { + "id": 9, + "uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "name": "John Smith", + "identity_type": "people", + "source": "tmdb", + "status": "confirmed", + "metadata": { "aliases": [...] }, + "tmdb_id": 112, + "tmdb_profile": null + } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `redone_count` | integer | Number of history records redone | +| `current_state` | object | Full identity state after redo | + +##### Error Responses + +| HTTP | When | +|------|------| +| `400` | No redo operations available | +| `404` | Identity not found | +| `500` | Database error | + +--- + +#### `GET /api/v1/identity/:identity_uuid/history` + +**Auth**: Required +**Scope**: identity-level + +Query the PATCH operation history for an identity. Returns paginated records with undo/redo stack counts (filtered to `operation='update'`). + +##### Query Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `page` | integer | No | `1` | Page number (1-indexed) | +| `limit` | integer | No | `20` | Items per page (max 100) | + +##### Response (200) + +```json +{ + "success": true, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "total": 5, + "undo_stack_count": 3, + "redo_stack_count": 2, + "results": [ + { + "history_id": 42, + "operation": "update", + "is_undone": false, + "created_at": "2026-05-27T12:00:00Z", + "undone_at": null + }, + { + "history_id": 41, + "operation": "update", + "is_undone": true, + "created_at": "2026-05-27T11:30:00Z", + "undone_at": "2026-05-27T13:00:00Z" + } + ] +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `total` | integer | Total PATCH history records for this identity | +| `undo_stack_count` | integer | Records available for undo (`is_undone=false`) | +| `redo_stack_count` | integer | Records available for redo (`is_undone=true`) | +| `results[].history_id` | integer | History record ID | +| `results[].operation` | string | Operation type (`"update"` for PATCH) | +| `results[].is_undone` | boolean | Whether the operation has been undone | +| `results[].created_at` | string | When the PATCH was applied | +| `results[].undone_at` | string | When the undo occurred (null if not undone) | + +##### Example + +```bash +curl -s "$API/api/v1/identity/$IDENTITY_UUID/history?page=1&limit=10" \ + -H "X-API-Key: $KEY" +``` + +##### Error Responses + +| HTTP | When | +|------|------| +| `404` | Identity not found | +| `500` | Database error | + +--- + +### 2. Bind/Unbind/Trace History & Undo/Redo + +All three operations (`bind`, `unbind`, `bind_trace`) share a single history table and undo/redo stack. + +#### Bind Operation Overview + +| Property | Value | +|----------|-------| +| Storage | PostgreSQL `identity_history` table (same table as PATCH) | +| Snapshot | `{"file_uuid", "face_id" (or "trace_id"), "identity_id_before/after"}` | +| Max records | 256 per identity (shared limit across all operation types) | +| Undo steps | Unlimited (`steps` param) | +| Redo stack | Cleared on new bind/unbind/bind_trace (`operation IN ('bind','unbind','bind_trace')` + `is_undone=true` records deleted) | +| Stack isolation | Bind redo stack is **independent** from PATCH redo stack — clearing one does not affect the other | + +##### Stack Model + +``` +bind face_1 (to id=9) → unbind face_1 → bind trace 906 (to id=9) +(undo stack, is_undone=false) (undo stack) (undo stack) + ↓ undo (first undone: bind_trace) + bind trace 906 (is_undone=true) + (redo stack) + ↓ redo +bind face_1 → unbind face_1 → bind trace 906 +(undo stack) +``` + +A new bind/unbind/trace after undo clears only the bind redo stack (operations with `IN ('bind','unbind','bind_trace')`). + +##### Snapshot Format + +**Before (bind):** +```json +{ + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "face_id": "1_5", + "identity_id_before": null +} +``` + +**After (bind):** +```json +{ + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "face_id": "1_5", + "identity_id_after": 9 +} +``` + +**Before (unbind) — binding existed before:** +```json +{ + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "face_id": "1_5", + "identity_id_before": 9 +} +``` + +**After (unbind):** +```json +{ + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "face_id": "1_5", + "identity_id_after": null +} +``` + +For `bind_trace`, the snapshot uses `trace_id` instead of `face_id`, with `identity_id_before` capturing the first face's identity in that trace. + +--- + +#### `POST /api/v1/identity/:identity_uuid/bind/undo` + +**Auth**: Required +**Scope**: identity-level + +Undo the most recent bind/unbind/bind_trace operations. Restores `identity_id_before` from the snapshot and marks records as undone. + +##### Request (JSON) + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `steps` | integer | No | `1` | Number of undo steps to apply | + +##### Behavior + +- Queries `is_undone=false` records with `operation IN ('bind','unbind','bind_trace')`, ordered by `created_at DESC` +- Restores `identity_id_before` — for bind this is `null` (face was unbound), for unbind this is the original identity (face goes back), for bind_trace this is the trace's previous identity +- Marks the undone records as `is_undone=true` with `undone_at=NOW()` + +##### Example + +```bash +curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind/undo" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"steps": 1}' +``` + +##### Response (200) + +```json +{ + "success": true, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "operation": "bind", + "undone_count": 1, + "affected_rows": 53 +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `operation` | string | The actual operation undone (`bind`, `unbind`, or `bind_trace`) | +| `undone_count` | integer | Number of history records undone | +| `affected_rows` | integer | Number of `face_detections` rows updated | + +##### Error Responses + +| HTTP | When | +|------|------| +| `400` | No bind undo operations available | +| `404` | Identity not found | +| `500` | Database error | + +--- + +#### `POST /api/v1/identity/:identity_uuid/bind/redo` + +**Auth**: Required +**Scope**: identity-level + +Redo previously undone bind/unbind/bind_trace operations. Restores `identity_id_after` from the snapshot. + +##### Request (JSON) + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `steps` | integer | No | `1` | Number of redo steps to apply | + +##### Behavior + +- Queries `is_undone=true` records with `operation IN ('bind','unbind','bind_trace')`, ordered by `created_at DESC` +- Restores `identity_id_after` — for bind this is the identity the face was bound to, for unbind this is `null` +- Marks records as `is_undone=false` with `undone_at=NULL` + +##### Example + +```bash +curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind/redo" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"steps": 1}' +``` + +##### Response (200) + +```json +{ + "success": true, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "operation": "unbind", + "redone_count": 1, + "affected_rows": 1 +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `operation` | string | The actual operation redone (`bind`, `unbind`, or `bind_trace`) | +| `redone_count` | integer | Number of history records redone | +| `affected_rows` | integer | Number of `face_detections` rows updated | + +##### Error Responses + +| HTTP | When | +|------|------| +| `400` | No bind redo operations available | +| `404` | Identity not found | +| `500` | Database error | + +--- + +#### `GET /api/v1/identity/:identity_uuid/bind/history` + +**Auth**: Required +**Scope**: identity-level + +Query the bind/unbind/bind_trace operation history for an identity. Returns paginated records with undo/redo stack counts. + +##### Query Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `page` | integer | No | `1` | Page number (1-indexed) | +| `limit` | integer | No | `20` | Items per page (max 100) | + +##### Response (200) + +```json +{ + "success": true, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "total": 3, + "undo_stack_count": 2, + "redo_stack_count": 1, + "results": [ + { + "history_id": 52, + "operation": "bind_trace", + "is_undone": false, + "created_at": "2026-05-27T14:00:00Z", + "undone_at": null + }, + { + "history_id": 51, + "operation": "unbind", + "is_undone": true, + "created_at": "2026-05-27T13:00:00Z", + "undone_at": "2026-05-27T14:30:00Z" + }, + { + "history_id": 50, + "operation": "bind", + "is_undone": false, + "created_at": "2026-05-27T12:00:00Z", + "undone_at": null + } + ] +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `total` | integer | Total bind history records for this identity | +| `undo_stack_count` | integer | Records available for undo (`is_undone=false`) | +| `redo_stack_count` | integer | Records available for redo (`is_undone=true`) | +| `results[].history_id` | integer | History record ID | +| `results[].operation` | string | Operation type (`bind`, `unbind`, or `bind_trace`) | +| `results[].is_undone` | boolean | Whether the operation has been undone | +| `results[].created_at` | string | When the operation was applied | +| `results[].undone_at` | string | When the undo occurred (null if not undone) | + +##### Example + +```bash +curl -s "$API/api/v1/identity/$IDENTITY_UUID/bind/history?page=1&limit=10" \ + -H "X-API-Key: $KEY" +``` + +##### Error Responses + +| HTTP | When | +|------|------| +| `404` | Identity not found | +| `500` | Database error | + +--- + +### 3. Merge History & Undo/Redo + +Merge operations use MongoDB for richer record-keeping, with a 24-hour undo deadline. + +#### Merge Operation Overview + +| Property | Value | +|----------|-------| +| Storage | MongoDB `identity_merge_history` collection | +| Snapshot | Full source identity state + target identity state + aliases/metadata diffs | +| Trigger | Every mergeinto with `keep_history=true` | +| Undo deadline | 24 hours (renewed on redo) | +| Redo support | Yes — restores undone merges with new 24hr deadline | +| Max records | Unlimited | + +--- + +#### `POST /api/v1/identity/merge/:merge_id/undo` + +Already documented in [`07_identity.md`](07_identity.md#post-apiv1identitymergemerge_idundo). See that document for full details. + +--- + +#### `POST /api/v1/identity/merge/:merge_id/redo` + +**Auth**: Required +**Scope**: identity-level + +Redo a previously undone merge operation within the renewed 24-hour deadline. + +##### Request + +No body required. The merge ID is taken from the URL path. + +##### Behavior + +1. Validates the merge record exists and `undone=true` (not already active) +2. Checks the 24-hour undo deadline (if expired, the redo is rejected) +3. Restores face bindings: moves all faces from `target_identity` back to `source_identity` +4. Re-adds aliases that were removed by the undo (aliases with `source: "merge"` tag) +5. Re-adds metadata fields that were removed by the undo +6. If `keep_history=true`: sets `source_identity.status = 'merged'` again +7. If `keep_history=false`: recreates source identity from the `undone_snapshot` stored at undo time +8. Syncs both identity JSON files to disk +9. Sets `undone=false`, clears `undone_snapshot`, renews `undo_deadline = NOW() + 24h` +10. Records `redone_by` user for audit + +##### Example + +```bash +curl -s -X POST "$API/api/v1/identity/merge/550e8400-e29b-41d4-a716-446655440000/redo" \ + -H "X-API-Key: $KEY" +``` + +##### Response (200) + +```json +{ + "success": true, + "message": "Redo merge completed: merged 'stranger_13894' into 'Louis Viret' (52 faces transferred)", + "data": { + "merge_id": "550e8400-e29b-41d4-a716-446655440000", + "faces_transferred": 52, + "aliases_re_added": 1, + "metadata_fields_re_added": 2 + } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `merge_id` | string | The merge operation ID | +| `faces_transferred` | integer | Number of faces transferred from source to target | +| `aliases_re_added` | integer | Number of aliases restored to target | +| `metadata_fields_re_added` | integer | Number of metadata fields restored to target | + +##### Error Responses + +| HTTP | When | +|------|------| +| `400` | Merge not undone, deadline expired, or cannot redo | +| `404` | Merge record not found | +| `500` | Database error | + +--- + +### 4. Delete History & Undo/Redo + +#### Delete Operation Overview + +| Property | Value | +|----------|-------| +| Storage | PostgreSQL `identity_history` table | +| Snapshot | `{"identity": {...full row...}, "unbound_faces": [{file_uuid, face_id, trace_id}, ...]}` | +| Max records | 1 active delete record per identity (redo stack cleared on new delete) | +| Undo support | Yes — recreates identity row, re-binds faces | +| Redo support | Yes — re-deletes the identity | +| Identity file | Deleted on delete, recreated on undo | + +#### Snapshot Format + +```json +{ + "identity": { + "id": 9, + "uuid": "a9a90105-6d6b-46ff-92da-0c3c1a57dff4", + "name": "Cary Grant", + "identity_type": "people", + "source": "tmdb", + "status": "confirmed", + "metadata": {}, + "tmdb_id": 112, + "tmdb_profile": null + }, + "unbound_faces": [ + { + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "face_id": "1_5", + "trace_id": null + }, + { + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "face_id": "1_6", + "trace_id": 906 + } + ] +} +``` + +#### Stack Model + +``` +DELETE identity (undo stack, is_undone=false) + ↓ undo +Identity recreated, faces re-bound + → delete history marked is_undone=true + ↓ redo (re-delete) +Identity deleted again, faces unbound + → delete history marked is_undone=false +``` + +A new delete after an undo clears the delete redo stack (no redo possible for the old delete). + +#### Undo Behavior (via existing `POST /api/v1/identity/:identity_uuid/undo`) + +1. Normal identity lookup fails (row was deleted) +2. Checks `identity_history` for `operation='delete' AND is_undone=false` matching the UUID in the snapshot +3. Recreates the identity row (new internal `id`, same UUID) +4. Re-binds all faces listed in `unbound_faces` to the new identity +5. Deletes the `identity_history` delete record as `is_undone=true` with `undone_at=NOW()` +6. Syncs `identity.json` to disk +7. Updates `_index.json` + +#### Redo Behavior (via existing `POST /api/v1/identity/:identity_uuid/redo`) + +1. Identity lookup succeeds (identity was restored by prior undo) +2. Checks `identity_history` for `operation='delete' AND is_undone=true` matching the identity_id +3. Deletes `identity.json` from disk +4. Unbinds all faces (`identity_id = NULL`) +5. Deletes the identity row +6. Marks the delete history record as `is_undone=false` +7. Returns success + +#### Error Responses (delete undo/redo) + +| HTTP | Scenario | +|------|----------| +| `400` | No delete history available (either no delete or already undone/redone) | +| `404` | Identity not found (for redo — identity wasn't restored) | +| `500` | Database error | + +--- + +### Comparison: PATCH vs Bind vs Merge vs Delete Undo/Redo + +| Aspect | PATCH Undo/Redo | Bind Undo/Redo | Merge Undo/Redo | Delete Undo/Redo | +|--------|----------------|----------------|-----------------|------------------| +| Storage | PostgreSQL `identity_history` | PostgreSQL `identity_history` | MongoDB `identity_merge_history` | PostgreSQL `identity_history` | +| Operation filter | `operation='update'` | `operation IN ('bind','unbind','bind_trace')` | — | `operation='delete'` | +| Trigger | Every PATCH | Every bind/unbind/bind_trace | Every mergeinto with `keep_history=true` | Every DELETE | +| Undo deadline | None (unlimited) | None (unlimited) | 24 hours (renewed on redo) | None (unlimited) | +| Redo support | Yes | Yes | Yes | Yes | +| Step undo | Yes (`steps` param) | Yes (`steps` param) | No (full undo/redo only) | No (single record) | +| Max records | 256 per identity | 256 per identity (shared) | Unlimited | 256 per identity (shared) | +| User tracking | `user_id` + `user_source` | `user_id` + `user_source` | `performed_by_user` + `undone_by` / `redone_by` | `user_id` + `user_source` | + +--- + +*Updated: 2026-05-28* diff --git a/docs_v1.0/API_WORKSPACE/narratives/marcom_intro.md b/docs_v1.0/API_WORKSPACE/narratives/marcom_intro.md new file mode 100644 index 0000000..cf4fc12 --- /dev/null +++ b/docs_v1.0/API_WORKSPACE/narratives/marcom_intro.md @@ -0,0 +1,36 @@ + + + + +## About This Manual + +This training manual is designed for the Marcom team to understand and use the Momentry Core API. + +### Demo Credentials + +**API Key**: `muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69` + +**SFTPGo** (for video upload): + +| Item | Value | +|------|-------| +| SFTP Host | `sftpgo.momentry.ddns.net` | +| SFTP Port | `2022` | +| Username | `demo` | +| Password | `demopassword123` | +| Web UI | `https://sftpgo.momentry.ddns.net` | + +### Quick Examples + +**List all videos:** +```bash +curl -s -H "X-API-Key: $KEY" "$API/api/v1/files/scan" +``` + +**Search:** +```bash +curl -s -X POST "$API/api/v1/search" \ + -H "Content-Type: application/json" \ + -H "X-API-Key: $KEY" \ + -d '{"query": "example", "limit": 5}' +``` diff --git a/docs_v1.0/DESIGN/ASRX_HYBRID_PIPELINE_V1.0.md b/docs_v1.0/DESIGN/ASRX_HYBRID_PIPELINE_V1.0.md new file mode 100644 index 0000000..c3d2105 --- /dev/null +++ b/docs_v1.0/DESIGN/ASRX_HYBRID_PIPELINE_V1.0.md @@ -0,0 +1,588 @@ +# ASRX Hybrid Pipeline v1.0 — 聲紋分離混合架構 + +| 項目 | 內容 | +|------|------| +| **範圍** | ASRX 處理器重構:whisperx → VAD-first hybrid pipeline | +| **狀態** | Draft | +| **適用版本** | Momentry Core V4.0+ | +| **作者** | OpenCode / Warren | +| **建立日期** | 2026-06-01 | + +--- + +## 1. 問題 + +### 1.1 現有問題 + +| 問題 | 說明 | 影響 | +|------|------|------| +| **Whisper 合併短句** | `whisper small` 會將兩個人的對話錯認成一個連續段 (A+B → 一句) | ASR segment 內混兩人話語,speaker 無法分離 | +| **ASRX v2 speaker_id = null** | `asrx_processor_v2.py` 使用 `whisperx.DiarizationPipeline()` 但該 API 未在 whisperx `__init__.py` 暴露 | 所有 segment speaker 均為 null | +| **文字丟失** | `asrx_processor_custom.py` 的 `SelfASRXFixed.process_with_segments()` 只輸出 `text: ""` | Rule 1 合併時無文字可用 | +| **錯誤的聲紋後端** | `asrx_processor_v2.py` 依賴 whisperx 內建 diarization,但該功能不穩定 | 準確度 ~85%,需 HF token | +| **多版本混亂** | 7 個 root-level 變體、14 個 asrx_self 檔案,生產環境使用錯誤版本 | 維護困難,不知哪個是對的 | + +### 1.2 痛點場景 + +**兩個說話人短句來回切換**(訪談、對話): + +``` +Audio: A(2s) → B(1.5s) → A(3s) +Whisper: ───────[0-7s, "A+B+A 全部混在一起"]─────── +``` + +Whisper 在句間停頓處不切段,導致 ASR 時間邊界無法反映 speaker 切換。 + +--- + +## 2. 架構 + +### 2.1 核心原則 + +1. **VAD 先定邊界** — 用 VAD 在句間停頓處切段,取代 whisper 的邊界 +2. **ASR 後做** — 每段各自轉錄,保有獨立文字 +3. **聲紋聚類定 speaker** — ECAPA-TDNN + AgglomerativeClustering + +### 2.2 5 步 Pipeline + +``` +Audio + │ + ① whisper (一次, 粗略定位) + │ 找到說話段 + 初步文字 + 語種 + │ [0-7s, "今天天氣很好我覺得也不錯對啊", zh] + │ + ② VAD scan (在每段內細切) + │ 利用句間停頓切開 + │ 段1 [0-2s] 段2 [2-3.5s] 段3 [3.5-7s] + │ + ③ whisper per refined segment (各段轉錄) + │ 段1 → "今天天氣很好" (zh, 0.98) + │ 段2 → "我覺得也不錯" (zh, 0.97) + │ 段3 → "對啊" (zh, 0.96) + │ + ④ ECAPA-TDNN per refined segment (聲紋提取) + │ 段1 → emb[0] (192-dim) + │ 段2 → emb[1] (192-dim) + │ 段3 → emb[2] (192-dim) + │ + ⑤ AgglomerativeClustering (聚類定 speaker) + │ emb[0]=SPEAKER_0, emb[1]=SPEAKER_1, emb[2]=SPEAKER_0 + │ + 輸出: + start end text language speaker_id + 0.0 2.0 今天天氣很好 zh SPEAKER_0 + 2.0 3.5 我覺得也不錯 zh SPEAKER_1 + 3.5 7.0 對啊 zh SPEAKER_0 +``` + +### 2.3 流程圖 + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ asrx_processor.py │ +│ (wrapper) │ +│ │ +│ ① ffprobe → select best track → ffmpeg → 16kHz WAV │ +│ │ +│ ② SelfASRXFixed.process(audio_wav, file_uuid) │ +│ │ │ +│ ├─ Step 1: whisper.transcribe() → rough segments │ +│ ├─ Step 2: VAD scan each rough segment │ +│ ├─ Step 3: whisper per refined segment → text+language │ +│ ├─ Step 4: ECAPA-TDNN per segment → 192-dim embedding │ +│ ├─ Step 5: AgglomerativeClustering → speaker_labels │ +│ │ │ +│ ├─ Step 6: Store embeddings in Qdrant │ +│ │ └─ {file_uuid, speaker_id, text, language, start, end} │ +│ │ │ +│ └─ Step 7: Classify high-quality embeddings │ +│ ├─ quality > threshold → reference profile │ +│ ├─ 送入聲音分類模型推論性別/屬性 │ +│ └─ 寫入 Qdrant (type: speaker_reference) │ +│ │ +│ ③ 輸出 JSON 格式 (不含 embedding) │ +│ │ +│ Rust: rule1_ingest.rs │ +│ └─ pre_chunks(processor_type='asrx') → chunks │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## 3. 檔案組織 + +### 3.1 最終檔案結構 + +``` +scripts/ +├── asrx_processor.py ← production (cleaned custom.py) +│ +└── asrx_self/ ← 核心庫 + ├── __init__.py ← package marker + ├── vad.py ← Silero VAD (新增 scan_within_segment) + ├── whisper_local.py ← 🆕 封裝 whisper 載入+轉錄 + ├── speaker_encoder.py ← ECAPA-TDNN 192-dim + ├── speaker_cluster_fixed.py ← AgglomerativeClustering + └── main_fixed.py ← 🔧 重寫為 5 步 pipeline +``` + +### 3.2 刪除清單 + +**Root-level 變體**(全部刪除): + +| 檔案 | 原因 | +|------|------| +| `asrx_processor.py` | 原始 whisperx 版,diarization 壞的 | +| `asrx_processor_v2.py` | 同上,Rust 目前錯誤呼叫此檔 | +| `asrx_processor_v2_noalign.py` | 跳過對齊但 diarization 仍壞 | +| `asrx_processor_v2_transcribe.py` | 只轉錄不做 speaker | +| `asrx_processor_simplified.py` | 變體 | +| `asrx_processor_contract_v1.py` | 18KB,pyannote,需 HF token | + +**asrx_self 內被取代的舊版**: + +| 檔案 | 原因 | 取代者 | +|------|------|--------| +| `main.py` | 用 SpectralClustering,有 NaN 問題 | `main_fixed.py` | +| `speaker_cluster.py` | 用 SpectralClustering,不穩定 | `speaker_cluster_fixed.py` | + +### 3.3 搬離清單 + +非生產工具搬至 `tools/asrx/`: + +``` +tools/asrx/ +├── integrate_face_asrx_speaker.py +├── speaker_player_gui.py +├── speaker_player_gui_face.py +├── speaker_player_interactive.py +├── speaker_audio_player.py +├── test_long_movie.py +├── test_gui_face_player.py +└── docs/ + ├── FINAL_TEST_REPORT.md + ├── GUI_FACE_PLAYER_USAGE.md + ├── LONG_MOVIE_TEST_SUMMARY.md + └── SPEAKER_PLAYER_GUIDE.md +``` + +--- + +--- + +## 4. Qdrant 聲紋向量儲存 + +### 4.1 儲存流程 + +``` +Step 4 輸出: 每個 refined segment 有 {embedding: [192-dim], text, language, start, end} +Step 5 輸出: 每個 segment 被標上 speaker_id {SPEAKER_0, SPEAKER_1, ...} + +Step 6: Qdrant 儲存 + ┌─ 每個 segment → Qdrant point + │ point_id = hash(file_uuid + segment_index) ← 可重複查詢 + │ vector = embedding (192-dim) + │ payload = { + │ "file_uuid": str, ← 聚類後填入 + │ "speaker_id": str, ← 聚類後填入 + │ "text": str, ← ASR 轉錄結果 + │ "language": str, ← 語種 (zh/en/...) + │ "start_time": f64, ← 秒 + │ "end_time": f64, ← 秒 + │ "type": "speaker_embedding" ← 便於區分 + │ } + └─ +``` + +### 4.2 Qdrant Collection + +| 項目 | 內容 | +|------|------| +| Collection Name | `momentry_speaker` (或共用現有 collection) | +| Vector Dimension | 192 (ECAPA-TDNN 輸出) | +| Distance Metric | Cosine | +| Point ID | `hash(file_uuid + "_" + segment_index)` | + +### 4.3 Rust `upsert_speaker_embedding` + +```rust +impl QdrantDb { + pub async fn upsert_speaker_embedding( + &self, + point_id: u64, + vector: &[f32], + file_uuid: &str, + speaker_id: &str, + text: &str, + language: &str, + start_time: f64, + end_time: f64, + ) -> Result<()> { + // Qdrant PUT /collections/{collection}/points?wait=true + // payload: {file_uuid, speaker_id, text, language, start_time, end_time, type: "speaker_embedding"} + } +} +``` + +### 4.4 與現有 Face Embedding 的關係 + +| 類別 | Qdrant Collection | Dim | Payload | +|------|-------------------|-----|---------| +| Face | `momentry` (self.collection_name) | 512 (FaceNet) | `file_uuid, trace_id, frame_number` | +| **Speaker** | `momentry` 或獨立 collection | **192** (ECAPA-TDNN) | `file_uuid, speaker_id, text, language, start, end` | + +--- + +## 5. 模組詳細設計 + +### 5.1 `vad.py` — 語音活動檢測 + +| 項目 | 內容 | +|------|------| +| 模型 | Silero VAD (torch.hub, snakers4/silero-vad) | +| 現有函數 | `load_vad_model()`, `extract_speech_segments()` | +| **新增函數** | **`scan_within_segment(wav, start_sec, end_sec, model, utils, min_speech_duration_ms=500)`** | + +`scan_within_segment` 作用: +- 在一個時間範圍 `[start_sec, end_sec]` 內執行 VAD 掃描 +- 只回傳該範圍內的語音子片段 `[(s1, e1), (s2, e2), ...]` +- 利用句間停頓切分,解決 whisper 合併問題 + +### 5.2 `whisper_local.py` 🆕 — Whisper 封裝 + +| 項目 | 內容 | +|------|------| +| 模型 | `whisper.load_model("base")` (可設定) | +| 函數 | `load_model()`, `transcribe_segment(audio, start, end)` | + +```python +def transcribe_segment(wav, sample_rate, start_sec, end_sec, model) -> dict: + """轉錄單一段落,回傳 {text, language, lang_prob, segments}""" +``` + +每段獨立轉錄,保留語言與信心度。 + +### 5.3 `speaker_encoder.py` — 聲紋編碼器 + +| 項目 | 內容 | +|------|------| +| 模型 | SpeechBrain ECAPA-TDNN (`spkrec-ecapa-voxceleb`) | +| 輸出維度 | 192-dim | +| EER | 0.80% (VoxCeleb1) | +| 授權 | MIT (不需要 HuggingFace token) | +| 函數 | `load_speaker_encoder()`, `extract_speaker_embedding()`, `extract_speaker_embeddings_batch()` | + +### 5.4 `speaker_cluster_fixed.py` — 說話人聚類 + +| 項目 | 內容 | +|------|------| +| 演算法 | AgglomerativeClustering (cosine + average linkage) | +| 取代 | `speaker_cluster.py` (SpectralClustering, NaN 問題) | +| 函數 | `robust_speaker_clustering(embeddings, n_speakers=None, max_speakers=10)` | + +### 5.5 `main_fixed.py` 🔧 — 核心調度器(7 步 Pipeline) + +```python +class SelfASRXFixed: + def process(self, audio_path, output_path=None, file_uuid=None): + """ + 7 步 speaker diarization pipeline + + Steps: + 1. whisper.transcribe(audio) → rough segments + text + language + 2. VAD scan each rough segment → refined segments + 3. whisper per refined segment → {text, language, lang_prob} + 4. ECAPA-TDNN per refined segment → 192-dim embeddings + 5. AgglomerativeClustering → speaker_labels + 6. Store all embeddings in Qdrant (if file_uuid provided) + payload: {file_uuid, speaker_id, text, language, start_time, end_time, type: "speaker_embedding"} + 7. High-quality embeddings (quality > threshold) → classify + store reference + payload: {type: "speaker_reference", file_uuid, speaker_id, n_segments, avg_quality, ...} + + Returns: + { + "segments": [ + { + "start": float, "end": float, + "text": str, "language": str, + "lang_prob": float, "speaker": str, + "speaker_id": str, "quality": float + }, + ... + ], + "speaker_stats": {...}, + "n_speakers": int, + "total_duration": float, + "references": [ + { + "speaker_id": str, + "n_segments": int, + "avg_quality": float, + "gender": str + } + ] + } + """ + + def _store_speaker_embeddings(self, segments, file_uuid): + """Step 6: 每個 segment 的 192-dim embedding 存入 Qdrant""" + + def _classify_high_quality_speakers(self, segments, embeddings, labels, file_uuid): + """Step 7: 高品質聲紋分級 + 分類 → Qdrant reference profile""" + +**移除**: + +| 舊方法 | 原因 | +|--------|------| +| `process_with_segments(audio, asr_segments)` | 外部 ASR 邊界來源不可靠,被 VAD 取代 | +| `process()` VAD-only fallback | 無文字輸出,被完整 pipeline 取代 | + +### 5.6 `speaker_classifier.py` 🆕 — 高品質聲紋分級與分類 + +#### 目的 + +聚類後,對每個 cluster 的 embedding 進行品質評估,高於閾值的獨立建檔,並用外部模型做自動分類。 + +#### 流程 + +``` +Step ⑤ 聚類後,每個 segment 有 {embedding, speaker_id} + │ + └─ Compute quality score per embedding + │ + ├─ 低於閾值 → 寫入 Qdrant (一般 speaker_embedding) + │ + └─ 高於閾值 (quality > 0.85) + ├─ 獨立建 reference profile + └─ 送入「支持聲音的模型」做分類 + ├─ 語者性別 (male/female) + ├─ 語種口音 (zh-CN / zh-TW / en-US) + └─ 或跨影片 speaker 匹配用 +``` + +#### Quality Score 計算 + +```python +def compute_embedding_quality(embeddings, labels, threshold=0.85): + """ + 每個 embedding 到所屬 cluster centroid 的餘弦相似度 + + Args: + embeddings: [n_segments, 192] + labels: [n_segments] 聚類標籤 + threshold: 高品質門檻 + + Returns: + qualities: [n_segments] 每個 embedding 的品質分數 + high_quality_mask: [n_segments] bool 陣列 + """ + from sklearn.metrics.pairwise import cosine_similarity + + unique_labels = set(labels) + centroids = {} + for label in unique_labels: + mask = labels == label + centroid = np.mean(embeddings[mask], axis=0) + centroid = centroid / np.linalg.norm(centroid) + centroids[label] = centroid + + qualities = [] + for i, (emb, label) in enumerate(zip(embeddings, labels)): + sim = cosine_similarity([emb], [centroids[label]])[0][0] + qualities.append(sim) + + return np.array(qualities), np.array(qualities) >= threshold +``` + +#### Reference Profile 格式 + +```json +{ + "point_id": "hash(speaker_reference_" + file_uuid + "_" + speaker_id + "_" + cluster_index)", + "vector": "[192-dim centroid embedding]", + "payload": { + "type": "speaker_reference", + "file_uuid": "來源影片", + "speaker_id": "SPEAKER_0", + "n_segments": 25, + "avg_quality": 0.92, + "total_duration": 45.3, + "language": "zh", + "gender": "male", + "text_samples": ["今天天氣很好", "我覺得也不錯", "..."] + } +} +``` + +#### 支援的聲音分類模型(選項) + +| 模型 | 用途 | 優點 | 缺點 | +|------|------|------|------| +| **SpeechBrain gender classifier** | 性別分類 | 已整合 ECAPA-TDNN | 只分 male/female | +| **CLAP** (LAION) | 零樣本音頻分類 | 可自訂 label text | 需額外安裝 | +| **YAMNet** | 聲音事件分類 | Google 出品,521 classes | 不擅長語者屬性 | +| **Wav2Vec2-BERT** (speechbrain) | 情感/屬性 | 多維度分類 | 模型較大 | +| **自建 identity classifier** | 跨影片 speaker 匹配 | 與現有 identity 系統對接 | 需累積 reference data | + +> **待決定**: 選擇哪個分類模型,由後續 POC 決定。 + +#### `main_fixed.py` 新增方法 + +```python +class SelfASRXFixed: + # ... 既有 6 個步驟 ... + + def _classify_high_quality_speakers(self, segments, embeddings, labels, file_uuid): + """ + 步驟 7: 高品質聲紋分級與分類 + + 1. 計算 quality score + 2. 高於閾值者建立 reference profile + 3. 用分類模型推論性別/屬性 + 4. 寫入 Qdrant (type: speaker_reference) + """ + qualities, mask = compute_embedding_quality(embeddings, labels) + + for i, (seg, emb, label, quality, is_high) in enumerate( + zip(segments, embeddings, labels, qualities, mask) + ): + seg["quality"] = float(quality) + if is_high: + profile = self._build_reference_profile( + emb, seg, file_uuid + ) + # 分類 (placeholder) + # gender = classify_gender(embedding) + self._store_speaker_reference(profile) +``` + +### 5.7 `asrx_processor.py` — 清理後的 wrapper + +清理項目: + +| 問題 | 位置 | 修法 | +|------|------|------| +| 硬編碼 UUID `dd61fda8...` | line 155 | 移除該 fallback path | +| `os.chdir(script_dir)` | line 112 | 改區域性 Path 操作 | +| ASR 文字丟棄 | line 258 | `text` 來自新 pipeline | +| `_debug` dict | line 222 | 移除 | +| `max_speakers=10` 寫死 | line 201 | 改 CLI 參數 `--max-speakers` | +| 載入外部 ASR segments | line 148-174 | 移除(不再需要) | + +--- + +## 6. 輸出格式 + +### 6.1 ASRX JSON Output (由 `asrx_processor.py` 寫入) + +> **注意**: 192-dim embedding 不在此 JSON 中。embedding 在 Python 端直接送入 Qdrant,JSON 只保留中繼資料。 + +```json +{ + "language": "zh", + "segments": [ + { + "start_time": 0.0, + "end_time": 2.0, + "start_frame": 0, + "end_frame": 60, + "text": "今天天氣很好", + "speaker_id": "SPEAKER_0", + "language": "zh", + "lang_prob": 0.98 + }, + { + "start_time": 2.0, + "end_time": 3.5, + "start_frame": 60, + "end_frame": 105, + "text": "我覺得也不錯", + "speaker_id": "SPEAKER_1", + "language": "zh", + "lang_prob": 0.97 + } + ], + "n_speakers": 2, + "speaker_stats": { + "SPEAKER_0": {"count": 1, "duration": 2.0}, + "SPEAKER_1": {"count": 1, "duration": 1.5} + } +} +``` + +### 6.2 Qdrant Point 格式 (由 Python `_store_speaker_embeddings` 寫入) + +> Embedding 不經過 Rust,直接在 Python 端完成 Qdrant HTTP PUT。 + +| Qdrant 欄位 | 值 | 說明 | +|-------------|-----|------| +| `id` | `hash(file_uuid + "_" + segment_index)` | 可重複查詢的 point ID | +| `vector` | `[f32; 192]` | ECAPA-TDNN 聲紋向量 | +| `payload.file_uuid` | `str` | 影片識別碼 | +| `payload.speaker_id` | `str` | 聚類後的 speaker 標籤 | +| `payload.text` | `str` | 該段的轉錄文字 | +| `payload.language` | `str` | 語種 (`zh`/`en`) | +| `payload.start_time` | `f64` | 開始時間(秒) | +| `payload.end_time` | `f64` | 結束時間(秒) | +| `payload.type` | `"speaker_embedding"` | 便於與 face_embedding 區分 | + +### 6.3 Rust `AsrxResult` 對應 + +```rust +pub struct AsrxSegment { + pub start_time: f64, // serde(alias = "start") + pub end_time: f64, // serde(alias = "end") + pub start_frame: u64, // default 0 + pub end_frame: u64, // default 0 + pub text: String, + pub speaker_id: Option, + pub language: Option, // 🆕 新增 + pub lang_prob: Option, // 🆕 新增 +} +``` + +--- + +## 7. Rust 端變動 + +| 檔案 | 變動 | +|------|------| +| `src/core/processor/asrx.rs` | `asrx_processor_v2.py` → `asrx_processor.py` | +| `src/core/processor/asrx.rs` | `AsrxSegment` 新增 `language`, `lang_prob` 欄位 | +| `src/core/processor/asrx.rs` | 傳遞 `--file-uuid` 給 Python 腳本,讓 Python 端可直接寫入 Qdrant | +| `src/core/chunk/rule1_ingest.rs` | 若 `pre_chunks` data 含 `language` 則帶入 chunk metadata | +| `src/core/db/qdrant_db.rs` | 🆕 新增 `upsert_speaker_embedding()` 方法 (可選,若 Python 端直接寫 Qdrant 則不需) | + +--- + +## 8. 遷移計畫 + +### 實作順序 (依賴關係排序) + +| 步驟 | 內容 | 檔案 | 風險 | +|------|------|------|------| +| **S1** | `vad.py`: 新增 `scan_within_segment()` | `asrx_self/vad.py` | 低 | +| **S2** | 🆕 `whisper_local.py`: 封裝 whisper 載入 + 轉錄 | `asrx_self/whisper_local.py` | 低 | +| **S3** | 🔧 `main_fixed.py`: 重寫為 7 步 pipeline | `asrx_self/main_fixed.py` | 中 | +| **S4** | 🆕 `speaker_classifier.py`: 性別分類器 | `asrx_self/speaker_classifier.py` | 低 | +| **S5** | 🔧 `custom.py` cleanup + rename → `asrx_processor.py` | `asrx_processor_custom.py` | 低 | +| **S6** | 🔧 Rust `asrx.rs`: 改指向 + 傳 `--file-uuid` | `src/core/processor/asrx.rs` | 低 | +| **S7** | ✅ 驗證:build + playground 測試 | — | 中 | +| **S8** | 🧹 刪除變體 + 搬離工具 | — | 低 | + +### 驗證標準 + +1. `cargo build` 通過 +2. Playground 3003: 註冊影片 → ASRX processor 完成 +3. 輸出 JSON 中 `speaker_id` 非 `null` +4. Qdrant collection 有 `speaker_embedding` 點 +5. 性別正確標記 (male/female) + +--- + +## 9. 版本歷史 + +| 版本 | 日期 | 修改者 | 說明 | +|------|------|--------|------| +| V1.0 | 2026-06-01 | OpenCode | 初始版本:7 步 hybrid pipeline + Qdrant 聲紋儲存 + 高品質分類 | diff --git a/docs_v1.0/DESIGN/Modular_Doc_System_V1.0.md b/docs_v1.0/DESIGN/Modular_Doc_System_V1.0.md new file mode 100644 index 0000000..2fbe927 --- /dev/null +++ b/docs_v1.0/DESIGN/Modular_Doc_System_V1.0.md @@ -0,0 +1,385 @@ +--- +document_type: "design" +service: "MOMENTRY_CORE" +title: "模組生成式文件產出系統" +date: "2026-05-17" +version: "V1.0" +status: "active" +owner: "M5" +created_by: "OpenCode" +tags: + - "documentation" + - "modular" + - "generated-docs" + - "workspace" +ai_query_hints: + - "查詢模組生成式文件產出系統的設計理念" + - "如何使用 API_WORKSPACE" + - "如何新增 API endpoint 文檔" + - "make deploy 流程" + - "自定義交付文件" +related_documents: + - "STANDARDS/USER_DOCS_STANDARD.md" + - "STANDARDS/DOCS_STANDARD.md" + - "API_WORKSPACE/README.md" + - "API_WORKSPACE/modules/_template.md" +--- + +# 模組生成式文件產出系統 + +| 項目 | 內容 | +|------|------| +| 建立者 | OpenCode | +| 建立時間 | 2026-05-17 | +| 文件版本 | V1.0 | +| 目標讀者 | developer, documentation maintainer | + +--- + +## 版本歷史 + +| 版本 | 日期 | 目的 | 操作人 | +|------|------|------|--------| +| V1.0 | 2026-05-17 | 建立設計文件 | OpenCode | + +--- + +## 1. 設計理念 + +### 1.1 痛點 + +傳統 API 文件維護有常見問題: + +| 問題 | 具體表現 | +|------|----------| +| **內容重複** | 同一個 endpoint 在快速參考、完整手冊、教育訓練文件中寫三次 | +| **更新遺漏** | 修改 curl 範例後,忘記同步到另一份文件 | +| **交付僵化** | 無法按對象產出不同版本的 API 文件 | +| **版本失靈** | YAML frontmatter 版本號與實際內容脫節 | + +### 1.2 核心原則 + +``` +單一真理源(modules/)→ 組裝引擎(assemble_docs.sh)→ 多種交付產品(GUIDES/) + + 編輯 ──→ 生成 ──→ 部署 + 1 處修改模組 make all make deploy +``` + +| 原則 | 說明 | +|------|------| +| **單一真理源** | 每個 endpoint 只在 `modules/` 中定義一次 | +| **組裝而非撰寫** | 交付文件是 modules 的組合,不是手寫 | +| **開發與交付分離** | `API_WORKSPACE/` 開發,`GUIDES/` 交付 | +| **模組為最小可測試單位** | 每個 module 可獨立驗證正確性 | +| **配置驅動** | `.toml` 配置定義哪些 module 以何種模式組裝成何種輸出 | + +### 1.3 檔案類型對照 + +| 類型 | 角色 | 可編輯 | 位置 | +|------|------|--------|------| +| Module (模組) | 不可再拆的內容最小單位 | ✅ 是 | `API_WORKSPACE/modules/` | +| Config (配方) | 定義組裝規則 | ✅ 是 | `API_WORKSPACE/configs/` | +| Narrative (敘事) | 非結構化的前言/背景 | ✅ 是 | `API_WORKSPACE/narratives/` | +| Assembled (產出) | 從模組組裝的交付文件 | ❌ 否(generated) | `API_WORKSPACE/_build/` → `GUIDES/` | + +--- + +## 2. 目錄結構 + +``` +docs_v1.0/ +├── API_WORKSPACE/ ← 開發區 +│ ├── modules/ ← 端點模組(單一真理源) +│ │ ├── _template.md ← 模組撰寫規範 +│ │ ├── 01_auth.md ← 認證、Base URL +│ │ ├── 02_health.md ← 健康檢查 +│ │ ├── 03_register.md ← 註冊、掃描 +│ │ ├── 04_lookup.md ← 查詢、刪除 +│ │ ├── 05_process.md ← 處理、進度、任務 +│ │ ├── 06_search.md ← 搜尋(向量、n8n、視覺) +│ │ ├── 07_identity.md ← 身份 CRUD、bind/unbind +│ │ ├── 08_identity_agent.md ← Identity Agent +│ │ ├── 09_tmdb.md ← TMDb Enrichment +│ │ ├── 10_pipeline.md ← Stats、配置、未掛載端點 +│ │ └── 11_error_codes.md ← 錯誤碼對照表 +│ │ +│ ├── configs/ ← 組裝配方(每個輸出一份) +│ │ ├── reference.toml → API_REFERENCE.md +│ │ ├── endpoints.toml → API_ENDPOINTS.md +│ │ ├── quickref.toml → API_QUICK_REFERENCE.md +│ │ ├── errors.toml → API_ERROR_CODES.md +│ │ ├── index.toml → API_INDEX.md +│ │ ├── marcom.toml → API_TRAINING_MARCOM.md +│ │ └── tmdb.toml → TMDb_User_Guide.md +│ │ +│ ├── narratives/ ← 非端點敘事前言 +│ │ └── marcom_intro.md +│ │ +│ ├── _build/ ← 生成暫存區(gitignored) +│ ├── Makefile ← 組裝自動化入口 +│ ├── assemble_docs.sh ← 組裝引擎 +│ └── README.md ← 開發者速查 +│ +├── GUIDES/ ← 交付區 +│ ├── API_REFERENCE.md (generated) +│ ├── API_ENDPOINTS.md (generated) +│ ├── API_QUICK_REFERENCE.md (generated) +│ ├── API_ERROR_CODES.md (generated) +│ ├── API_INDEX.md (generated) +│ ├── API_TRAINING_MARCOM.md (generated) +│ ├── TMDb_User_Guide.md (generated) +│ ├── Demo_EndToEnd.md (手寫保留) +│ ├── Pipeline_API_Demo.md (手寫保留) +│ └── ... (其他手寫文件) +│ +├── DESIGN/ +├── REFERENCE/ +├── OPERATIONS/ +├── INTEGRATIONS/ +└── STANDARDS/ +``` + +--- + +## 3. 模組規範 + +### 3.1 檔名規則 + +- 格式:`NN_.md`(NN = 兩位數排序 01-99) +- 範例:`03_register.md`, `09_tmdb.md` +- 依賴序號決定組裝時的 endpoint 順序 + +### 3.2 Module Metadata 註解 + +每個 module 開頭必須有 metadata 註解: + +```markdown + + + +``` + +| 欄位 | 必填 | 說明 | +|------|------|------| +| `module` | Yes | 唯一名稱,無空格無數字開頭 | +| `description` | Yes | 一句話說明 | +| `depends` | No | 依賴的其他 module 名稱(逗號分隔) | + +### 3.3 Endpoint 結構 + +每個 endpoint 必須使用一致結構: + +```markdown +### `METHOD /path/to/endpoint` + +**Auth**: Required / Optional / Public +**Scope**: file-level / identity-level / system-level + +#### Request Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| + +#### Example + +```bash +curl -s -X METHOD "$API/path" \ + -H "X-API-Key: $KEY" \ + -d '{"field": "value"}' +``` + +#### Response (200) + +```json +{ ... } +``` + +#### Error Codes + +| Code | HTTP | When | +|------|------|------| +``` +``` + +### 3.4 變數規則 + +| 變數 | 用途 | 範例值 | +|------|------|--------| +| `$API` | Base URL | `http://localhost:3003` | +| `$KEY` | API Key | `your-api-key-here` | +| `$FILE_UUID` | File UUID | `3a6c1865...` | +| `$IDENTITY_UUID` | Identity UUID | `a9a90105...` | + +--- + +## 4. 組裝引擎 + +### 4.1 `assemble_docs.sh` + +Shell 腳本,接收三個參數: + +| 參數 | 說明 | 範例 | +|------|------|------| +| `--config` | TOML 配方路徑 | `configs/reference.toml` | +| `--modules` | Module 目錄 | `modules/` | +| `--build` | 輸出目錄 | `_build/` | + +### 4.2 三種組裝模式 + +| mode | 行為 | 適用 | +|------|------|------| +| `full` | 完整包含 module 全部內容(除 metadata) | API_REFERENCE, API_ENDPOINTS | +| `summary` | 僅擷取 endpoint 表格 + curl 範例 | API_QUICK_REFERENCE | +| `index` | 生成文件總覽(掃描 modules 目錄自動產生索引) | API_INDEX | + +### 4.3 組裝流程 + +``` +1. 讀取 config.toml → 解析 title, modules, mode, narrative +2. 生成 YAML frontmatter(含 document_type, date, version) +3. 生成 title heading + info block +4. (可選)摘自 TOC:從 modules ## headings 生成目錄 +5. (可選)插入 narrative intro +6. 遍歷 modules: + - full mode: 複製整份內容(跳過 註解) + - summary mode: 只提取 | table | + ```bash code block + - index mode: 自動掃描 modules 目錄生成清單 +7. 寫入 _build/ 輸出檔案 +``` + +--- + +## 5. 配方格式(config.toml) + +```toml +title = "輸出文件標題" +output = "_build/FILENAME.md" # 輸出路徑(相對於 API_WORKSPACE) +mode = "full" # full | summary | index +modules = ["01_auth", "03_register"] # 要包含的 module 名稱 +narrative = "narratives/xxx.md" # (可選)包含的敘事前言 +toc = true # (可選)是否生成目錄 + +[frontmatter] +document_type = "api_reference" # 用於 YAML frontmatter +service = "MOMENTRY_CORE" +version = "V1.0" +owner = "M5" +created_by = "OpenCode" +``` + +### 內建配方一覽 + +| 檔案 | 輸出 | Modules | Mode | +|------|------|---------|------| +| `reference.toml` | API_REFERENCE.md | 01-11 | full | +| `endpoints.toml` | API_ENDPOINTS.md | 01-10 | full | +| `quickref.toml` | API_QUICK_REFERENCE.md | 01-06,09 | summary | +| `errors.toml` | API_ERROR_CODES.md | 11 | full | +| `index.toml` | API_INDEX.md | (auto) | index | +| `marcom.toml` | API_TRAINING_MARCOM.md | 01,03,06 + narrative | full | +| `tmdb.toml` | TMDb_User_Guide.md | 01,03,09 | full | + +--- + +## 6. 工作流程 + +### 6.1 日常修改 + +```bash +# 1. 編輯模組 +cd API_WORKSPACE +vim modules/09_tmdb.md + +# 2. 重新生成單一文件 +make tmdb + +# 3. 預覽結果 +less _build/TMDb_User_Guide.md + +# 4. 部署 +make deploy +``` + +### 6.2 新增端點 + +```bash +# 1. 找到所屬模組 +ls modules/ +# 決定該 endpoint 屬於哪個模組(如 tmdb, identity, search) + +# 2. 在對應模組加入 endpoint 文檔 +vim modules/09_tmdb.md + +# 3. 重新生成所有文件 +make all + +# 4. 確認所有引用此端點的文件都有正確更新 +make check + +# 5. 部署 +make deploy +``` + +### 6.3 客製化交付 + +```bash +# 新增一個客製化配方 +cat > configs/integration_partner.toml << TOML +title = "Integration Partner API Guide" +output = "_build/PARTNER_GUIDE.md" +mode = "full" +modules = ["01_auth", "06_search", "09_tmdb", "11_error_codes"] +toc = true +[frontmatter] +document_type = "user_manual" +service = "MOMENTRY_CORE" +version = "V1.0" +owner = "M5" +created_by = "OpenCode" +TOML + +# 在 Makefile 中加入對應 target +echo "partner:" >> Makefile +echo ' @$$(SCRIPT) --config configs/integration_partner.toml --modules $$(MODULES) --build $$(BUILD)' >> Makefile + +# 生成 +make partner + +# 部署 +make deploy +``` + +--- + +## 7. 交付客製化對照表 + +| 對象 | 需要 modules | make target | 輸出 | +|------|-------------|-------------|------| +| API Developer | 01-11 (all) | `make reference` | API_REFERENCE.md | +| Quick Start User | 01-06,09 | `make quickref` | API_QUICK_REFERENCE.md | +| Marcom Team | 01,03,06 + narrative | `make marcom` | API_TRAINING_MARCOM.md | +| TMDb User | 01,03,09 | `make tmdb` | TMDb_User_Guide.md | +| Integration Partner | 01,06,09,11 | Custom config | PARTNER_GUIDE.md | + +--- + +## 8. GUIDES/ 文件類型說明 + +| 類型 | 來源 | 說明 | +|------|------|------| +| `API_*.md` (7 files) | Generated from API_WORKSPACE | API 功能文件,endpoint 列表 + curl 範例 | +| `Demo_*.md`, `M5API_*.md` | 手寫 | 敘事性指引,含完整 step-by-step 流程 | +| `PORTAL_*.md` | 手寫 | Portal 開發計畫與 Demo 指引 | +| `USER_MANUAL.md` | 手寫 | 系統操作使用手冊 | + +> **提醒**:不要直接修改 GUIDES/ 中的 generated files。修改應在 API_WORKSPACE/modules/ 中進行,然後執行 `make deploy`。 + +--- + +## 相關文件 + +- `API_WORKSPACE/README.md` — 開發者快速上手指南 +- `API_WORKSPACE/modules/_template.md` — 模組撰寫範本 +- `STANDARDS/DOCS_STANDARD.md` — 文件創建規範 +- `STANDARDS/USER_DOCS_STANDARD.md` — 使用者文件規範 diff --git a/docs_v1.0/DESIGN/REPRESENTATIVE_FRAME_API_V1.md b/docs_v1.0/DESIGN/REPRESENTATIVE_FRAME_API_V1.md new file mode 100644 index 0000000..d96cfab --- /dev/null +++ b/docs_v1.0/DESIGN/REPRESENTATIVE_FRAME_API_V1.md @@ -0,0 +1,128 @@ +# Representative Frame API V1.0 + +Portal 影片代表畫面 API — 沒有指定 frame_number 時自動偵測男女主角找到最佳互動 frame。 + +--- + +## 1. Overview + +### Purpose + +Portal 需要為每個影片顯示一張代表畫面(thumbnail),內容應為該影片最具代表性的 scene — 通常包含男女主角同框且互看的時刻。 + +### Principle + +**沒有指定 frame_number → auto-detect representative frame** + +既有端點不需改動,只需在 `frame` 參數為空時自動偵測。 + +--- + +## 2. Endpoint + +### `GET /api/v1/file/:file_uuid/thumbnail` + +**Query Parameters**: + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `frame` | i64 | ❌ | 指定 frame;不傳則 auto-detect | +| `x` | i32 | ❌ | bbox crop x | +| `y` | i32 | ❌ | bbox crop y | +| `w` | i32 | ❌ | bbox crop width | +| `h` | i32 | ❌ | bbox crop height | + +**Response**: Pure JPEG bytes (Content-Type: image/jpeg) + +**Examples**: +``` +GET /api/v1/file/:uuid/thumbnail → auto-detect +GET /api/v1/file/:uuid/thumbnail?frame=38165 → 指定 frame +GET /api/v1/file/:uuid/thumbnail?frame=38165&x=723&y=205&w=221&h=221 → 指定 crop +``` + +--- + +## 3. Internal Algorithm + +### Auto-detect Fallback Chain + +``` +Step 1: Auto-detect 主角 (top 2 by face count) + └─ face_detections JOIN identities + +Step 2: TKG Bridge — mutual_gaze? + ├── 有 mutual_gaze edge → first_frame ✅ + └── 無 → face_detections 第一次同框 frame ✅ + +Step 3: 只有一個主角? + └─ 該主角 face_quality (w×h×confidence) 最高 frame + +Step 4: 完全無 identity? + └─ 任 identity 的 face_quality 最高 frame + +Step 5: 完全無 face? + └─ 404 "No faces in this file" +``` + +### TKG Bridge Query + +```sql +-- 找兩主角各自的 main trace +SELECT trace_id FROM face_detections +WHERE file_uuid = $1 AND identity_id = $2 AND trace_id IS NOT NULL +GROUP BY trace_id ORDER BY COUNT(*) DESC LIMIT 1; + +-- TKG mutual_gaze 查詢 +SELECT (e.properties->>'first_frame')::bigint +FROM tkg_edges e +JOIN tkg_nodes a ON a.id = e.source_node_id +JOIN tkg_nodes b ON b.id = e.target_node_id +WHERE e.file_uuid = $1 + AND a.external_id = concat('trace_', $4) + AND b.external_id = concat('trace_', $5) + AND e.properties->>'mutual_gaze' = 'true' +LIMIT 1; + +-- Fallback: 第一次同框 +SELECT MIN(fd_a.frame_number)::bigint +FROM face_detections fd_a +JOIN face_detections fd_b ON fd_a.frame_number = fd_b.frame_number +WHERE fd_a.file_uuid = $1 AND fd_a.identity_id = $2 AND fd_b.identity_id = $3; +``` + +--- + +## 4. Implementation + +### Files Changed + +| File | Change | +|------|--------| +| `src/api/media_api.rs` | `ThumbQuery.frame` → `Option`; add auto-detect fallback | +| `src/core/processor/tkg.rs` | Add `query_auto_representative_frame()` + structs (已實作) | +| `src/core/processor/mod.rs` | Export new function + structs (已實作) | + +### Existing Trace-level Endpoints (不變) + +``` +GET /api/v1/file/:uuid/trace/:tid/representative-face → JSON (legacy) +GET /api/v1/file/:uuid/trace/:tid/thumbnail → JPEG (auto via select_rep_face) +``` + +### No Changes + +- ❌ No new DB tables / migrations +- ❌ No changes to `select_rep_face` / blurdetect +- ❌ No chunk / cut / pre_chunks dependency + +--- + +## 5. Version History + +| Date | Version | Author | Change | +|------|---------|--------|--------| +| 2026-05-22 | 1.0 | OpenCode | Initial design | +| 2026-05-22 | 1.1 | OpenCode | 簡化為單一 endpoint: frame 為 None 時 auto-detect | + +*Updated: 2026-05-22* diff --git a/docs_v1.0/DESIGN/Redis_Progress_Reporting_V1.0.md b/docs_v1.0/DESIGN/Redis_Progress_Reporting_V1.0.md new file mode 100644 index 0000000..0d35b62 --- /dev/null +++ b/docs_v1.0/DESIGN/Redis_Progress_Reporting_V1.0.md @@ -0,0 +1,270 @@ +--- +document_type: "design_doc" +service: "MOMENTRY_CORE" +title: "Redis Progress Reporting V1.0" +version: "V1.0" +date: "2026-05-17" +author: "M5" +status: "draft" +--- + +# Redis Progress Reporting V1.0 + +| 項目 | 內容 | +|------|------| +| Service | `MOMENTRY_CORE` | +| Version | V1.0 | +| Date | 2026-05-17 | +| Author | M5 (OpenCode) | +| Status | Draft | + +## 1. Overview + +This document defines the standardized progress reporting architecture for Momentry Core processors. It replaces the inconsistent ad-hoc progress patterns found across `scripts/`, `src/worker/`, and `src/api/`. + +### 1.1 Problems Addressed + +| # | Problem | Detail | +|---|---------|--------| +| 1 | Worker Redis key does not match `OPERATIONS/MOMENTRY_CORE_REDIS_KEYS.md` V1.0 spec | Worker writes `worker:job:{uuid}:processor:{name}` instead of spec `job:{uuid}:processor:{name}` | +| 2 | Progress API reads wrong key | `get_progress()` reads `worker:job:{uuid}:processor:{name}` — unresolved with Playground subscriber which writes `job:{uuid}:processor:{name}` | +| 3 | Swift processors (Face/OCR/Pose) lack RedisPublisher | Progress lost — only stdout text | +| 4 | ASRX/Story/Visual chunk have no incremental progress | Start + complete only, no `current/total` updates | +| 5 | `frames_processed` / `chunks_produced` never updated in real-time | Worker only writes processor hash at start and exit | +| 6 | No `output_count` / `output_type` fields | Impossible to know how many faces/objects/segments were produced | + +### 1.2 Key Design Decisions + +| Decision | Rationale | +|----------|-----------| +| Progress unit = frames for video processors | All media-level processors work frame by frame | +| Output count separate from progress | Processors may produce N outputs per frame (multiple faces, objects) | +| Pub/sub for real-time, Hash for final state | Pub/sub is transient; Hash persists for API queries | + +--- + +## 2. Redis Key Architecture + +### 2.1 Key Patterns + +All keys use the configured `REDIS_KEY_PREFIX` (default: `momentry:` for production, `momentry_dev:` for playground). + +| Pattern | Type | TTL | Purpose | Owner | +|---------|------|-----|---------|-------| +| `{prefix}progress:{uuid}` | Pub/Sub | — | Real-time progress messages | Python scripts | +| `{prefix}job:{uuid}` | Hash | 24h | Per-video job state | Worker | +| `{prefix}job:{uuid}:processor:{name}` | Hash | 24h | Per-processor final state | Worker | +| `{prefix}job:{uuid}:processor:{name}:output_count` | String | 24h | Output count by type | Worker | + +### 2.2 Processor Hash Fields + +``` +{prefix}job:{uuid}:processor:{name} +├── status String running / completed / failed / pending +├── current u32 Units processed (frames for video processors) +├── total u32 Total units +├── output_count u32 Output items produced (faces, objects, segments) +├── output_type String Type name of output: faces / objects / segments / cuts / etc. +├── pid i32 OS process ID (0 if not running) +├── error String Error message if failed +└── updated_at String ISO 8601 timestamp +``` + +### 2.3 Migrated Keys + +The following key patterns from the original implementation are REMOVED: + +| Old Key | Reason | +|---------|--------| +| `{prefix}worker:job:{uuid}:processor:{name}` | Non-standard prefix — not in `MOMENTRY_CORE_REDIS_KEYS.md` spec | +| `{prefix}job:{uuid}:processor:{name}:status` (flat) | Redundant — status stored in Hash | +| `{prefix}job:{uuid}:processor:{name}:progress` (flat) | Replaced by `current` + `total` for percent calculation | +| `{prefix}job:{uuid}:processor:{name}:current` (flat) | Replaced by Hash fields | +| `{prefix}job:{uuid}:processor:{name}:total` (flat) | Replaced by Hash fields | +| `{prefix}job:{uuid}:processor:{name}:started_at` (flat) | Replaced by Hash `updated_at` | + +--- + +## 3. Pub/Sub Message Format + +### 3.1 Channel + +``` +{prefix}progress:{uuid} +``` + +### 3.2 Message JSON + +```json +{ + "processor": "face", + "current": 150, + "total": 162696, + "output_count": 423, + "output_type": "faces", + "message": "Processing frame 150", + "timestamp": 1700000000 +} +``` + +### 3.3 Field Definitions + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `processor` | String | ✅ | Processor name: asr / asrx / yolo / ocr / face / pose / cut / story / visual_chunk | +| `current` | u32 | ✅ | Units processed (frames for video processors) | +| `total` | u32 | ✅ | Total units | +| `output_count` | u32 | ❌ | Output items produced so far | +| `output_type` | String | ❌ | Type name: faces / objects / segments / cuts / text_regions / persons / speakers / stories / visual_chunks | +| `message` | String | ❌ | Human-readable progress description | +| `timestamp` | u64 | ✅ | Unix timestamp | + +--- + +## 4. Per-Processor Metrics + +| Processor | current/total Unit | output_type | When to Publish | +|-----------|-------------------|-------------|-----------------| +| ASR | frames | `segments` | Every 100 segments processed | +| ASRX | frames | `speakers` | Every processing stage | +| YOLO | frames | `objects` | Every 500 frames | +| OCR | frames | `text_regions` | Every 5% | +| Face | frames | `faces` | Every batch (5% of frames) | +| Pose | frames | `persons` | Every 10% | +| CUT | frames | `cuts` | Every scene detected | +| Story | chunks | `stories` | Every chunk processed | +| Visual chunk | frames | `visual_chunks` | Every chunk processed | + +### 4.1 Output Type Enum + +```rust +pub enum OutputType { + Segments, // ASR + Speakers, // ASRX + Objects, // YOLO + TextRegions, // OCR + Faces, // Face + Persons, // Pose + Cuts, // CUT + Stories, // Story + VisualChunks, // Visual chunk +} +``` + +--- + +## 5. Data Flow + +``` +┌──────────────────┐ Pub/Sub ┌──────────────────────┐ +│ Python Processor │ ───────── progress:{uuid} ──────────→│ Worker (subscriber) │ +│ (ASR/YOLO/Face) │ {current, total, │ │ +│ │ output_count, output_type} │ ──→ HSET │ +└──────────────────┘ │ job:{uuid}: │ + │ processor:{name} │ +┌──────────────────┐ │ │ +│ Swift Processor │ ──→ Python wrapper ──→ pub/sub │ (status, current, │ +│ (Face/OCR/Pose) │ (add RedisPublisher) │ total, output_count,│ +└──────────────────┘ │ output_type) │ + └──────────┬───────────┘ + │ HGETALL + ┌──────────▼───────────┐ + │ Progress API │ + │ GET /progress/:uuid │ + │ │ + │ ─→ compute % │ + │ ─→ return JSON │ + └─────────────────────┘ +``` + +--- + +## 6. Implementation Plan + +### Phase 1: Python Processor RedisPublisher + +| Task | Files | Effort | +|------|-------|--------| +| Add `RedisPublisher` to `face_processor.py` | `scripts/face_processor.py` | Medium | +| Add `RedisPublisher` to `ocr_processor.py` | `scripts/ocr_processor.py` | Medium | +| Add `RedisPublisher` to `pose_processor.py` | `scripts/pose_processor.py` | Medium | +| Add incremental `.progress()` to `asrx_processor_custom.py` | `scripts/asrx_processor_custom.py` | Low | +| Standardize pub/sub message to include `output_count`, `output_type` | All processor scripts | Low | + +### Phase 2: Worker + +| Task | Files | Effort | +|------|-------|--------| +| Fix Redis key from `worker:job:` to `job:` | `src/worker/processor.rs`, `src/core/db/redis_client.rs` | Low | +| Subscribe to `progress:{uuid}` channel in `run_processor()` | `src/worker/processor.rs` | Medium | +| HSET Processor Hash on each progress message | `src/worker/processor.rs` | Medium | +| Set `output_count` and `output_type` from pub/sub message | `src/worker/processor.rs` | Low | + +### Phase 3: Progress API + +| Task | Files | Effort | +|------|-------|--------| +| Read `output_count`, `output_type` from Redis Hash | `src/api/server.rs` | Low | +| Compute percentage from `current` / `total` | `src/api/server.rs` | Low | +| Return `output_count`, `output_type` in response JSON | `src/api/server.rs` | Low | +| Remove `worker:` fallback path | `src/api/server.rs` | Low | + +### Phase 4: Cleanup + +| Task | Files | Effort | +|------|-------|--------| +| Remove old `worker:job:` keys from Redis | Deployment script | Low | +| Remove `update_processor_progress()` DB path (stale `processing_status` JSONB) | `src/core/db/postgres_db.rs` | Medium | + +--- + +## 7. API Response Changes + +### ProgressResponse (new fields) + +```json +{ + "processors": [ + { + "name": "face", + "status": "running", + "current": 150, + "total": 162696, + "progress": 0, + "frames_processed": 150, + "output_count": 423, + "output_type": "faces" + } + ] +} +``` + +--- + +## 8. Dependencies + +| Component | Version | Role | +|-----------|---------|------| +| Redis | ≥ 6.0 | Pub/Sub + Hash storage | +| `redis_publisher.py` | Existing | Python → Redis pub/sub client | +| `redis_client.rs` | Existing | Rust Redis client for worker + API | + +--- + +## 9. References + +| Doc | Relation | +|-----|----------| +| `OPERATIONS/MOMENTRY_CORE_REDIS_KEYS.md` | Parent spec — this doc supersedes sections 4, 7, 8 | +| `DESIGN/VIDEO_PROCESSING_SPEC.md` §2.3 | Original progress design (ProcessProgress struct) | +| `src/worker/processor.rs` | Worker progress write implementation | +| `scripts/redis_publisher.py` | Python pub/sub client | +| `src/api/server.rs` (get_progress) | Progress API handler | + +--- + +## Version History + +| Version | Date | Author | Change | +|---------|------|--------|--------| +| V1.0 | 2026-05-17 | M5 (OpenCode) | Initial draft — replaces ad-hoc progress patterns | diff --git a/docs_v1.0/M4_workspace/2026-05-27_charade_pipeline_checklist.md b/docs_v1.0/M4_workspace/2026-05-27_charade_pipeline_checklist.md new file mode 100644 index 0000000..6e255bc --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-27_charade_pipeline_checklist.md @@ -0,0 +1,242 @@ +--- +title: Charade Full Movie Pipeline Checklist +version: 1.0 +date: 2026-05-27 +author: M5Max48 +status: in_progress +--- + +# Charade Full Movie Pipeline Checklist + +**File UUID**: `c3c635e3641da80dde10cc555ffcdda5` +**File Name**: Charade (1963) Cary Grant & Audrey Hepburn | Comedy Mystery Romance Thriller | Full Movie.mp4 +**Duration**: 6785 seconds (113 minutes) +**Total Frames**: 169,625 + +--- + +## P0: Processor Outputs + +### Purpose +原始處理器輸出檔案,存放在 `/Users/accusys/momentry/output_dev/`。這些是後續 ingestion 的資料來源。 + +### Processor Details + +| Processor | Expected Output | Size Estimate | Purpose | Status | +|-----------|-----------------|---------------|---------|--------| +| CUT | `c3c635e3641da80dde10cc555ffcdda5.cut.json` | ~170KB | Scene boundary detection,切割點用於 Rule 3 chunking | ✅ Done | +| YOLO | `c3c635e3641da80dde10cc555ffcdda5.yolo.json` | ~50-80MB | Object detection,每幀的物件類別與位置 | 🔄 Running | +| Face | `c3c635e3641da80dde10cc555ffcdda5.face.json` | ~1.5GB | Face detection + 512-dim embedding (FaceNet CoreML) | 🔄 44% | +| Face Traced | `c3c635e3641da80dde10cc555ffcdda5.face_traced.json` | ~1.2GB | Face tracking,同一人物的連續出現 → trace_id | ⏳ Pending (after Face) | +| OCR | `c3c635e3641da80dde10cc555ffcdda5.ocr.json` | ~50KB | Text recognition from frames | ❌ Skipped | +| Pose | `c3c635e3641da80dde10cc555ffcdda5.pose.json` | ~20MB | Body pose estimation | 🔄 Running | +| ASRX | `c3c635e3641da80dde10cc555ffcdda5.asrx.json` | ~8MB | Speaker diarization,語者分段 | ✅ Done (reuse from public) | +| Visual Chunk | `c3c635e3641da80dde10cc555ffcdda5.visual_chunk.json` | ~60KB | Visual scene chunk metadata | ✅ Done | +| Scene | `c3c635e3641da80dde10cc555ffcdda5.scene.json` | ~300B | Scene list from CUT | ✅ Done | +| Scene Meta | `c3c635e3641da80dde10cc555ffcdda5.scene_meta.json` | ~50KB | Heuristic scene metadata (人物 + 物件統計) | ⏳ Pending | +| Story LLM | `c3c635e3641da80dde10cc555ffcdda5.story_llm.json` | ~800KB | LLM-generated story summaries per chunk | ✅ Done | +| Story Story | `c3c635e3641da80dde10cc555ffcdda5.story_story.json` | ~800KB | Story parent-child relationships | ✅ Done | +| TMDb | `c3c635e3641da80dde10cc555ffcdda5.tmdb.json` | ~5KB | TMDb cast list with face embeddings | ⏳ Pending | +| 5W1H | `c3c635e3641da80dde10cc555ffcdda5.5w1h.json` | ~500KB | 5W1H agent output (who/when/where/what/why/how) | ✅ Done | + +### Key Dependencies +- Face Traced 需要 Face 完成後才能執行 (face_traced.json = face.json + tracking) +- Scene Meta 需要 Face + YOLO 完成 +- TMDb 需要 Face Traced 完成後執行 matching + +--- + +## P1: Database Records + +### Purpose +將 processor outputs 存入 PostgreSQL,供 API query 使用。 + +### Table Details + +| Table | Expected Records | Purpose | Verification Query | Status | +|-------|------------------|---------|-------------------|--------| +| `dev.videos` | 1 row | Video metadata (duration, fps, status) | `SELECT file_uuid, status FROM dev.videos WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5'` | ✅ Registered | +| `dev.monitor_jobs` | 1 row | Processing job state machine | `SELECT uuid, status, completed_processors FROM dev.monitor_jobs WHERE uuid = 'c3c635e3641da80dde10cc555ffcdda5'` | 🔄 Running | +| `dev.pre_chunks` | ~7,000 rows | Raw processor outputs (ASR sentences, YOLO objects, etc.) | `SELECT COUNT(*) FROM dev.pre_chunks WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5'` | ⏳ Pending | +| `dev.face_detections` | ~70,000 rows | Face detection records (每幀每張臉) | `SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5'` | ⏳ Pending | +| `dev.face_detections.embedding` | ~70,000 non-NULL | 512-dim FaceNet embedding (用於 identity matching) | `SELECT COUNT(embedding) FROM dev.face_detections WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5'` | ⏳ Pending | +| `dev.face_detections.trace_id` | ~70,000 non-NULL | Face tracking ID (同一人物跨幀連續出現) | `SELECT COUNT(trace_id) FROM dev.face_detections WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5'` | ⏳ Pending | +| `dev.face_detections.identity_id` | ~50,000 non-NULL | TMDb identity binding (Audrey, Cary, etc.) | `SELECT COUNT(identity_id) FROM dev.face_detections WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5'` | ⏳ Pending | + +### Key Points +- `embedding` 必須非 NULL 才能進行 TMDb matching (之前 store_traced_faces.py bug 修復) +- `trace_id` 由 `store_traced_faces.py` 從 face_traced.json 計算 +- `identity_id` 由 `match_faces_to_tmdb.py` 計算 (cosine similarity > 0.5) + +--- + +## P2: Chunk Ingestion + +### Purpose +將 raw processor outputs 轉換為 searchable chunks,用於 RAG query。 + +### Chunk Types + +| Chunk Type | Expected Count | Purpose | Source | Verification Query | Status | +|------------|----------------|---------|--------|-------------------|--------| +| sentence (Rule 1) | ~1,700 | Sentence-level chunks for text search | ASR output → sentence split | `SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5' AND chunk_type = 'sentence'` | ⏳ Pending | +| llm_parent | ~800 | LLM-generated summary parent chunks | Story LLM output | `SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5' AND chunk_type = 'llm_parent'` | ⏳ Pending | +| story_parent | ~800 | Story parent chunks (narrative segments) | Story processor | `SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5' AND chunk_type = 'story_parent'` | ⏳ Pending | +| story_child | ~1,700 | Story child chunks (linked to sentence) | Story processor | `SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5' AND chunk_type = 'story_child'` | ⏳ Pending | +| cut (Rule 3) | ~500 | Scene-level chunks for scene search | CUT output → scene boundaries | `SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5' AND chunk_type = 'cut'` | ⏳ Pending | +| trace | ~3,600 | Face trace chunks (identity-centric) | Face Traced output | `SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5' AND chunk_type = 'trace'` | ⏳ Pending | + +### Ingestion Pipeline +1. **Rule 1**: ASR → sentence split → chunk + embedding → Qdrant +2. **Rule 3**: CUT + ASR → scene chunks → chunk + embedding → Qdrant +3. **Trace**: Face Traced → trace chunks → TKG nodes → Qdrant + +### Key Points +- `start_frame` / `end_frame` 必須正確計算 (之前 bug: frame=0) +- Chunks 必須有 `embedding` 才能 search + +--- + +## P3: Vector Embeddings + +### Purpose +將 chunks 的 text 轉換為 768-dim embeddings,存入 PostgreSQL + Qdrant,用於 semantic search。 + +### Embedding Targets + +| Target | Expected Count | Model | Purpose | Verification | Status | +|--------|----------------|-------|---------|--------------|--------| +| PostgreSQL `dev.chunk.embedding` | ~5,000 | Gemma-2-9B (768-dim) | Text semantic search | `SELECT COUNT(embedding) FROM dev.chunk WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5'` | ⏳ Pending | +| Qdrant `momentry_dev_rule1_v2` | ~5,000 points | Gemma-2-9B | Fast vector similarity search | `curl -H "api-key: Test3200Test3200Test3200" "http://localhost:6333/collections/momentry_dev_rule1_v2"` | ⏳ Pending | +| Qdrant `_face` collection | ~70,000 points | FaceNet-512 (512-dim) | Face identity search | Face embeddings sync via `sync_face_embeddings()` | ⏳ Pending | + +### Embedding Pipeline +1. **Text chunks**: `embeddinggemma_server.py` (port 11436) → 768-dim embedding +2. **Face embeddings**: FaceNet CoreML (from face.json) → 512-dim embedding (已在 P0 產生) +3. **Sync to Qdrant**: `sync_face_embeddings()` function in Rust + +### Key Points +- Text embeddings 使用 Gemma-2-9B (local LLM server) +- Face embeddings 使用 FaceNet-512 (CoreML ANE accelerated) +- Qdrant 提供 fast similarity search (cosine similarity) + +--- + +## P4: Identity Binding + +### Purpose +將 detected faces 綁定到 TMDb identities (Audrey Hepburn, Cary Grant, etc.),用於 identity_text search。 + +### Identity Matching Pipeline + +| Step | Expected Result | Method | Verification | Status | +|------|-----------------|--------|--------------|--------| +| TMDb seeds loaded | 23 identities | `tmdb_embed_extractor.py` → TMDb profile face embeddings | `SELECT COUNT(*) FROM dev.identities WHERE source = 'tmdb' AND face_embedding IS NOT NULL` | ✅ Done | +| Face matching | ~50,000 bindings | `match_faces_to_tmdb.py` → cosine similarity > 0.5 | `SELECT COUNT(identity_id) FROM dev.face_detections WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5' AND identity_id IS NOT NULL` | ⏳ Pending | +| Audrey Hepburn faces | ~16,000 | Highest similarity match | `SELECT COUNT(*) FROM dev.face_detections fd JOIN dev.identities i ON fd.identity_id = i.id WHERE fd.file_uuid = 'c3c635e3641da80dde10cc555ffcdda5' AND i.name = 'Audrey Hepburn'` | ⏳ Pending | +| Cary Grant faces | ~5,000 | Second highest match | Same query for Cary Grant | ⏳ Pending | + +### Matching Algorithm +```python +# match_faces_to_tmdb.py +for trace_id in traces: + for face_embedding in trace_faces: + for tmdb_identity in tmdb_identities: + similarity = cosine_similarity(face_embedding, tmdb_identity.face_embedding) + if similarity >= 0.5: + match trace_id → tmdb_identity +``` + +### Key Points +- TMDb seeds 需要 `face_embedding` (之前已驗證: 23 identities with embeddings) +- Face `embedding` 必須非 NULL (之前 store_traced_faces.py bug 修復) +- Threshold: 0.5 (可調整) + +--- + +## P5: API Endpoints + +### Purpose +驗證 API endpoints 可以正確返回 identity_text search results。 + +### API Tests + +| Endpoint | Purpose | Expected Response | Test Command | Status | +|----------|---------|-------------------|--------------|--------| +| `/api/v1/search/identity_text` | Search chunk text → identities | Results with `identity_name`, `trace_id`, `identity_source` | `curl "http://localhost:3003/api/v1/search/identity_text?file_uuid=c3c635e3641da80dde10cc555ffcdda5&q=Regina&limit=5"` | ⏳ Pending | +| `/api/v1/identities` | List identities with TMDb | Identity list with `tmdb_id`, `face_embedding` | `curl "http://localhost:3003/api/v1/identities?name=Audrey"` | ⏳ Pending | +| `/api/v1/progress/:file_uuid` | Check processing progress | JSON with `status`, `completed_processors` | `curl "http://localhost:3003/api/v1/progress/c3c635e3641da80dde10cc555ffcdda5"` | ⏳ Pending | + +### Expected API Response Example +```json +{ + "success": true, + "total": 5, + "results": [ + { + "chunk_id": "sentence_123", + "start_time": 355.0, + "text_content": "Oh, mine's Regina Lampert.", + "identity_id": 9, + "identity_name": "Audrey Hepburn", + "identity_source": "tmdb", + "trace_id": 169 + } + ] +} +``` + +### Key Points +- `identity_text` API 需要 `chunk.start_frame` / `chunk.end_frame` 正確 (之前 bug: frame=0) +- `identity_id` 必須非 NULL 才能返回 identity_name + +--- + +## P6: Completion Criteria + +### Purpose +驗證 pipeline 完整完成,所有 ingestion steps 成功。 + +### Final Verification Checklist + +| Criteria | Purpose | Check Command | Expected Result | Status | +|----------|---------|---------------|-----------------|--------| +| All processor outputs exist | 確認所有 processor JSON 檔案產生 | `ls -la output_dev/c3c635e3641da80dde10cc555ffcdda5.*` | 14+ files with size > 0 | ⏳ Pending | +| Job status = completed | 確認 worker 完成 job | `SELECT status FROM dev.monitor_jobs WHERE uuid = 'c3c635e3641da80dde10cc555ffcdda5'` | `completed` | ⏳ Pending | +| Video status = completed | 確認 video state 更新 | `SELECT status FROM dev.videos WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5'` | `completed` | ⏳ Pending | +| All chunks have embeddings | 確認 text embeddings 完成 | `SELECT COUNT(*) = COUNT(embedding) FROM dev.chunk WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5'` | `true` (all chunks have embedding) | ⏳ Pending | +| Face traces assigned | 確認 face tracking 完成 | `SELECT COUNT(*) = COUNT(trace_id) FROM dev.face_detections WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5'` | `true` (all faces have trace_id) | ⏳ Pending | +| TMDb matching done | 確認 identity binding 完成 | `SELECT COUNT(identity_id) > 40000 FROM dev.face_detections WHERE file_uuid = 'c3c635e3641da80dde10cc555ffcdda5'` | `true` (> 40K identity bindings) | ⏳ Pending | +| Qdrant synced | 確認 vector search ready | Check Qdrant points count | Points increased by ~5,000 | ⏳ Pending | + +### Success Thresholds +- **Face detections**: ~70,000 (169K frames / 3 sample interval) +- **Identity bindings**: > 40,000 (60% match rate) +- **Chunks with embeddings**: > 4,000 (all chunk types) +- **Qdrant points**: > 90,000 (current) → > 95,000 (after Charade) + +--- + +## Verification Script + +```bash +# Run after completion +./scripts/verify_charade_pipeline.sh c3c635e3641da80dde10cc555ffcdda5 +``` + +--- + +## Notes + +- OCR processor failed, skipped +- Face detection using SwiftFace (ANE accelerated) +- TMDb matching using `scripts/match_faces_to_tmdb.py` +- Expected total processing time: ~2-3 hours + +--- + +## Version History + +| Version | Date | Author | Changes | +|---------|------|--------|---------| +| 1.0 | 2026-05-27 | M5Max48 | Initial checklist | \ No newline at end of file diff --git a/docs_v1.0/M4_workspace/2026-05-29_identity_sync_and_wp_fixes.md b/docs_v1.0/M4_workspace/2026-05-29_identity_sync_and_wp_fixes.md new file mode 100644 index 0000000..3d62daf --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-29_identity_sync_and_wp_fixes.md @@ -0,0 +1,49 @@ +# Session Summary: Identity Fixes + WP Proxy Fixes + Data Sync + +**Date**: 2026-05-29 +**Author**: OpenCode +**Status**: Completed (marcom team testing) + +## What Was Done (Chronological) + +### 1. Production Identity Fixes (3002) +- **James Coburn restored** (id=18738, confirmed) +- **Chantal Goya restored** (id=18737, confirmed) +- **Louis Viret name/status fixed** +- **Sequences fixed**: `identities_id_seq` (48→18734), `face_detections_id_seq` (141383→932413), `identity_history_id_seq`, `identity_bindings_id_seq`, `pre_chunks_id_seq`, `file_identities_id_seq` +- **COALESCE fix** for `reference_data` NULL crash (`postgres_db.rs:3198`, `storage.rs:196`) + +### 2. Bug Fixes +- **DELETE identity**: Fixed binding order bug + removed `identity_confidence` column reference +- **PATCH identity**: `jsonb_deep_merge` Nested JSON metadata +- **mergeinto UNDO/REDO**: MongoDB deserialization fix (`Collection`) + +### 3. Library Page Infinite Load Fix +- **Root cause**: WP scan proxy (snippet 48) didn't forward query params → infinite pagination loop +- **Fix**: Added `$request->get_query_params()` forwarding in scan proxy +- **Safety**: Added `maxPages = 10` limit in JS pagination + +### 4. Identity Data Sync (Dev → Production) +- **Full replacement** of `public.identities`, `public.identity_bindings`, `public.identity_history` with dev data +- James Coburn id: 18738 → 11 +- Bindings: 11,892 → 12,834 (+942) +- **Verification**: 0 differences between schemas + +### 5. Snippet 55 Filter +- Added `.filter(f => f.is_registered)` to show only registered files on library page +- Changed `status:'unregistered'` → `status: f.status || 'unregistered'` + +## Key Decisions +- Library page filter: default show registered files only +- Identity sync: full DELETE + INSERT (not UPDATE) to ensure consistency +- No user-defined metadata fields (starred/notes/role) preserved — matches dev exactly + +## Handoff to Marcom +- `/people/` page should show correct identity state +- `/library/` page should show only registered files (4 currently) +- Login required for `/library/` — redirects to `/login/` if not authenticated + +## Files Modified +- `snippet 48` (/scan WP proxy — query param forwarding) +- `snippet 55` (library page JS — registered-only filter, maxPages safety) +- `docs_v1.0/M4_workspace/2026-05-29_identity_sync_prod.md` (sync record) diff --git a/docs_v1.0/M4_workspace/2026-05-29_identity_sync_prod.md b/docs_v1.0/M4_workspace/2026-05-29_identity_sync_prod.md new file mode 100644 index 0000000..fc7bdc8 --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-29_identity_sync_prod.md @@ -0,0 +1,45 @@ +# Identity Data Sync: Dev (3003) → Production (3002) + +**Date**: 2026-05-29 +**Author**: OpenCode +**Status**: Completed + +## Summary + +Fully synced all identity-related tables from dev schema to public schema on PostgreSQL `momentry` database. + +## What Was Done + +1. **Identities table** (`public.identities`): Replaced with `dev.identities` (69 records, original ids preserved) +2. **Identity_bindings** (`public.identity_bindings`): Replaced with `dev.identity_bindings` (12,834 records) +3. **Identity_history** (`public.identity_history`): Replaced with `dev.identity_history` (10 records) +4. **Sequences**: Updated `identities_id_seq`, `identity_bindings_id_seq`, `identity_history_id_seq` to match + +### Key Changes +- **James Coburn**: Changed from id=18738 → id=11 (dev's original id) +- **Chantal Goya**: Changed from id=18737 → id=18736 (dev's id) +- **Metadata**: Now matches dev schema — TMDB fields only, no user-defined fields (starred, notes, role, aliases, user_confirmed are removed as expected) +- **Bindings**: Increased from 11,892 → 12,834 (+942 bindings) + +### Not Changed +- `face_detections` — identical in both schemas (135,521 records) +- `pre_chunks` — large difference (public: 1.3M vs dev: 3.3M) but NOT related to identity +- All other non-identity tables unchanged + +## Verification + +```sql +-- Counts match +identities: 69 = 69 ✅ +identity_bindings: 12,834 = 12,834 ✅ +identity_history: 10 = 10 ✅ + +-- No differences +id/uuid mismatch: 0 +metadata/status/name diffs: 0 +``` + +## Files Referenced + +- `AGENTS.md` — Development isolation rules +- `/Users/accusys/momentry_core/docs_v1.0/M4_workspace/2026-05-29_wp_api_url_update.md` — Previous session handoff diff --git a/docs_v1.0/M4_workspace/2026-05-29_mergeinto_null_faceid_fix.md b/docs_v1.0/M4_workspace/2026-05-29_mergeinto_null_faceid_fix.md new file mode 100644 index 0000000..c606677 --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-29_mergeinto_null_faceid_fix.md @@ -0,0 +1,27 @@ +# 2026-05-29: Mergeinto NULL face_id Fix + +## Problem +Production server (3002) returned `"error":"error occurred while decoding column 0: unexpected null; try decoding as an 'Option'"` when using mergeinto after clicking undo on a merge. + +## Root Cause +`src/api/identity_binding.rs:428` decodes `face_id` from `face_detections` as `String` (non-Option), but **135,521 records** in the production `face_detections` table have NULL `face_id`. When merging an identity whose face_detections include NULL face_ids, the SQLx decode panics. + +## Fix +- Changed `(String, Option)` → `(Option, Option)` at line 428 +- Changed `face_id_list` to use `filter_map` instead of `map` to skip NULL face_ids +- Changed `faces_count` to use `face_id_list.len()` instead of `face_ids.len()` (matching the actual transferred count) + +## Files Changed +- `momentry_core/src/api/identity_binding.rs` — 3 lines changed + +## Verification +- 234 library tests pass +- `cargo fmt` passes +- Production binary rebuilt (`target/release/momentry`) +- Production server restarted on port 3002 (PID 92043) + +## Identities with NULL face_id (20 identities, ~135k records) +Audrey Hepburn (36k), Cary Grant (15k), Bernard Musson, Walter Matthau, Jacques Marin, George Kennedy, Michel Thomass, Antonio Passalia, etc. — all `type=people, status=confirmed`. These identities were likely imported from bulk face detection data without face_id generation. + +## Data Note +The NULL face_ids are a pre-existing data quality issue. The fix prevents crashes but doesn't clean up the NULL data. Faces with NULL face_id won't be tracked in undo history (they stay with the target after undo), but the bulk transfer (`WHERE identity_id = $1`) still works correctly. diff --git a/docs_v1.0/OPERATIONS/TMDb_Pipeline_Test_2026-05-17.md b/docs_v1.0/OPERATIONS/TMDb_Pipeline_Test_2026-05-17.md new file mode 100644 index 0000000..2162b21 --- /dev/null +++ b/docs_v1.0/OPERATIONS/TMDb_Pipeline_Test_2026-05-17.md @@ -0,0 +1,68 @@ +# TMDb Pipeline Test 2026-05-17 + +## Purpose +Verify full TMDb enrichment pipeline: register → process → TMDb prefetch → probe → identity files → downloads. + +## Environment +- **Server**: playground (port 3003) +- **Schema**: `dev` +- **TMDB_API_KEY**: `e9cde52197f6f8df4d9db99da93db1fb` +- **Build**: `momentry_playground` (debug, 0 errors) + +## Pre-cleanup +Unregistered old files + deleted output files: +```bash +POST /api/v1/unregister {"file_uuid": "3abeee81..."} +POST /api/v1.unregister {"file_uuid": "23b1c872..."} +``` + +## Step 1: Register + +| File | UUID | Result | +|------|------|--------| +| Charade main | `bd80fec92b0b6963d177a2c55bf713e2` | ✅ Registered (already_exists due to content_hash match) | +| Charade YouTube | `a6fb22eebefaef17e62af874997c5944` | ✅ Fresh registration | + +Register phase completed: probe → CUT → scene classification. + +## Step 2: Trigger Processing + +```bash +POST /api/v1/file/:uuid/process {} +``` + +Jobs created: +- Main: job_id=167, status=PENDING +- YouTube: job_id=168, status=PENDING + +Worker blocked by schema issue: `processor_results` missing `retry_count` column + `jsonb_set(text, text, jsonb)` signature mismatch. Fixed `retry_count` via ALTER TABLE. + +## Step 3: TMDb Prefetch (requires pipeline completion first) + +```bash +POST /api/v1/agents/tmdb/prefetch +``` + +## Step 4: TMDb Probe + +```bash +POST /api/v1/file/:uuid/tmdb-probe +``` + +## Known Issues +1. `jsonb_set(jsonb, text, jsonb)` → should be `jsonb_set(jsonb, text[], jsonb)` — pre-existing worker bug +2. `processor_results.retry_count` column missing — fixed via ALTER TABLE +3. Worker requires running as separate process: `./target/debug/momentry_playground worker` + +## Endpoint Changes in This Test +| Endpoint | Status | +|----------|--------| +| `GET /api/v1/stats/ingest` | ❌ Removed (stats moved to files/scan + identities) | +| `GET /api/v1/files/scan` | ➕ Added `total_chunks`, `searchable_chunks`, `pending_videos` | +| `GET /api/v1/identities` | ➕ Added `total_identities`, `tmdb_identities`, `auto_identities` | +| `POST /api/v1/agents/tmdb/prefetch` | ✅ Writes identity files directly | +| `POST /api/v1/file/:uuid/tmdb-probe` | ✅ Upserts from disk identity files | +| `GET /api/v1/identity/:uuid/json` | ✅ Download identity JSON | +| `GET /api/v1/file/:uuid/json/:processor` | ✅ Download processor JSON | +| `POST /api/v1/agents/identity/match-from-photo` | 🆕 New | +| `POST /api/v1/agents/identity/match-from-trace` | 🆕 New | diff --git a/docs_v1.0/REFERENCE/FACE_BINDING_STATES.md b/docs_v1.0/REFERENCE/FACE_BINDING_STATES.md new file mode 100644 index 0000000..f8bee1e --- /dev/null +++ b/docs_v1.0/REFERENCE/FACE_BINDING_STATES.md @@ -0,0 +1,375 @@ +# Face Binding States — Data Model Reference + +**Version**: 1.0.0 +**Date**: 2026-05-25 +**Related**: `GET /api/v1/file/:file_uuid/faces`, `identities`, `strangers`, `face_detections` + +--- + +## Glossary + +| Term | Definition | +|------|------------| +| **face detection** | A single face bounding box detected in one video frame. Stored in `face_detections` table. | +| **trace** | A sequence of face detections belonging to the same person across consecutive frames. Assigned by the face tracker. `trace_id` groups multiple face detections. | +| **identity** | A known person with a name. Sources: TMDb (movie stars), user-defined (manual entry). Stored in `identities` table with `source='tmdb'` or `source='user_defined'`. | +| **stranger** | An unknown person detected but not matched to any known identity. Created automatically for unmatched traces. Stored in `strangers` table. | +| **binding** | The association between a face detection and either an identity or a stranger. Represented by `identity_id` or `stranger_id` FK in `face_detections`. | +| **TMDb** | The Movie Database. Source of celebrity identity seeds with `face_embedding` for matching. | +| **auto identity** | Legacy term for identities created from `face_clustered.json` analysis. Now migrated to `strangers` table as reference records. | +| **dangling** | A face detection whose `identity_id` points to a deleted identity (e.g., auto identities removed during migration). | +| **unbound** | A face detection with no binding at all — `identity_id IS NULL AND stranger_id IS NULL`. | +| **PK** | Primary Key. A unique identifier for each row in a table. Example: `identities.id`, `strangers.id`, `face_detections.id`. | +| **FK** | Foreign Key. A column that references the PK of another table, creating a relationship. Example: `face_detections.identity_id` → `identities.id`, `face_detections.stranger_id` → `strangers.id`. FK ensures referential integrity — a face cannot point to a non-existent identity. | + +--- + +## Three Core Tables + +### ER Diagram + +``` +┌─────────────────────┐ ┌─────────────────────┐ +│ identities │ │ strangers │ +│─────────────────────│ │─────────────────────│ +│ id (PK) │ │ id (PK) │ +│ uuid │ │ file_uuid │ +│ name │ │ trace_id │ +│ source │ │ metadata │ +│ tmdb_id │ │ created_at │ +│ face_embedding │ │ │ +│ metadata │ │ UNIQUE(file_uuid, │ +│ status │ │ trace_id) │ +│ ... │ │ │ +└─────────┬───────────┘ └─────────┬───────────┘ + │ │ + │ FK │ FK + │ (ON DELETE SET NULL) │ (ON DELETE SET NULL) + │ │ + ▼ ▼ +┌─────────────────────────────────────────────────────┐ +│ face_detections │ +│─────────────────────────────────────────────────────│ +│ id (PK) │ +│ file_uuid — Video file identifier │ +│ frame_number — Frame where face was detected│ +│ timestamp_secs — Frame number / fps │ +│ trace_id — Face tracking ID │ +│ face_id — Format: `{frame}_{idx}` │ +│ identity_id (FK) — → identities.id │ +│ stranger_id (FK) — → strangers.id │ +│ x, y, width, height — Bounding box │ +│ confidence — Detection confidence (0–1) │ +│ embedding — Face embedding vector │ +│ metadata — JSON metadata │ +└─────────────────────────────────────────────────────┘ +``` + +### Table Summary + +| Table | Role | Record Count (public) | Primary Key | +|-------|------|----------------------|-------------| +| `identities` | Known persons (TMDb, user-defined) | 70 | `id`, `uuid` | +| `strangers` | Unknown persons (unmatched traces) | 0–N per file | `id`, `(file_uuid, trace_id)` | +| `face_detections` | Individual face detections | 70691 per file | `id` | + +### Key Columns in `face_detections` + +| Column | Type | Purpose | +|--------|------|---------| +| `identity_id` | INTEGER FK | Points to `identities.id` if matched to known person | +| `stranger_id` | INTEGER FK | Points to `strangers.id` if unmatched trace | +| `trace_id` | INTEGER | Groups faces belonging to same person across frames | + +**Design Rule**: `identity_id` and `stranger_id` are mutually exclusive in normal operation. A face should have only one binding. + +--- + +## Four Binding States + +### State Definitions + +| # | State | `binding` JSON | SQL Condition | Meaning | +|---|-------|----------------|---------------|---------| +| 1 | **identity** | `{"identity_id": 9, "identity_uuid": "...", "identity_name": "Audrey Hepburn"}` | `identity_id IN (SELECT id FROM identities)` | Face matched to a known TMDb or user-defined identity | +| 2 | **stranger** | `{"stranger_id": 845, "metadata": {}}` | `stranger_id IS NOT NULL` | Face belongs to an unmatched trace (unknown person) | +| 3 | **dangling** | `{"old_identity_id": 18052}` | `identity_id IS NOT NULL AND NOT EXISTS (SELECT 1 FROM identities WHERE id = face_detections.identity_id)` | Face was bound to an identity that has been deleted (orphaned reference) | +| 4 | **unbound** | `null` | `identity_id IS NULL AND stranger_id IS NULL` | Face has no binding at all | + +### State Detection Logic (Rust) + +```rust +let binding = if let (Some(iid), Some(iuuid), Some(iname)) = + (identity_id, identity_uuid, identity_name) +{ + FaceBinding::Identity { identity_id: iid, identity_uuid: iuuid, identity_name: iname } +} else if let Some(sid) = stranger_id { + FaceBinding::Stranger { stranger_id: sid, metadata: stranger_metadata } +} else if let Some(iid) = identity_id { + FaceBinding::Dangling { old_identity_id: iid } +} else { + FaceBinding::Unbound +}; +``` + +--- + +## Lifecycle Flow + +### Processing Pipeline + +``` + Video Registration + │ + ▼ + Face Detection + (face_detections created) + │ + ▼ + Face Tracking + (trace_id assigned) + │ + ▼ + ┌────────────────┐ + │ Identity Agent │ + │ Face Matching │ + └────────────────┘ + │ + ┌─────────┴─────────┐ + │ │ + ▼ ▼ + ┌──────────┐ ┌──────────┐ + │ MATCHED │ │ UNMATCHED│ + │ to TMDb │ │ trace │ + └─────┬────┘ └────┬─────┘ + │ │ + │ │ + ▼ ▼ + identity_id=X stranger_id=S + │ │ + │ │ + ▼ ▼ + ┌─────────┐ ┌─────────┐ + │ IDENTITY│ │ STRANGER│ + │ state │ │ state │ + └─────────┘ └─────────┘ +``` + +### User Operations + +``` +┌─────────┐ bind ┌─────────┐ +│ STRANGER│──────────────▶│ IDENTITY│ +└────┬────┘ └────┬────┘ + │ │ + │ unbind │ + │ (if stranger_id │ + │ preserved) │ + │ │ + ▼ ▼ +┌─────────┐ ┌─────────┐ +│ STRANGER│◀─────────────│ UNBOUND │ +│ (rollback) │ (if no │ +└─────────┘ │ stranger)│ + └─────────┘ +``` + +### Migration Effect + +``` +┌─────────────────────┐ +│ auto identities │ +│ (source='auto') │ +│ 943 records │ +└─────────┬───────────┘ + │ + │ DELETE + │ + ▼ +┌─────────────────────┐ +│ face_detections │ +│ identity_id=18052 │ +│ (points to deleted) │ +└─────────┬───────────┘ + │ + │ Cleanup SQL + │ SET identity_id=NULL + │ + ▼ +┌─────────────────────┐ +│ DANGLING → UNBOUND │ +│ 18641 faces cleaned │ +└─────────────────────┘ +``` + +--- + +## SQL Query Examples + +### Count by State + +```sql +SELECT + COUNT(*) FILTER (WHERE identity_id IN (SELECT id FROM identities)) AS identity, + COUNT(*) FILTER (WHERE stranger_id IS NOT NULL) AS stranger, + COUNT(*) FILTER (WHERE identity_id IS NOT NULL + AND NOT EXISTS (SELECT 1 FROM identities WHERE id = face_detections.identity_id)) AS dangling, + COUNT(*) FILTER (WHERE identity_id IS NULL AND stranger_id IS NULL) AS unbound +FROM face_detections +WHERE file_uuid = 'aeed71342a899fe4b4c57b7d41bcb692'; +``` + +### Filter by State + +```sql +-- Identity +SELECT * FROM face_detections fd +WHERE fd.identity_id IN (SELECT id FROM identities); + +-- Stranger +SELECT * FROM face_detections WHERE stranger_id IS NOT NULL; + +-- Dangling +SELECT * FROM face_detections fd +WHERE fd.identity_id IS NOT NULL + AND NOT EXISTS (SELECT 1 FROM identities WHERE id = fd.identity_id); + +-- Unbound +SELECT * FROM face_detections +WHERE identity_id IS NULL AND stranger_id IS NULL; +``` + +--- + +## bind/unbind Behavior + +### Current Implementation (stranger_id cleared on bind) + +| Operation | SQL Effect | Result | +|-----------|------------|--------| +| `bind_face_to_identity` | `SET identity_id=X, stranger_id=NULL` | Stranger info lost | +| `bind_trace_to_identity` | `SET identity_id=X, stranger_id=NULL` | Stranger info lost | +| `merge_identity` | `SET identity_id=X, stranger_id=NULL` | Stranger info lost | +| `unbind_face` | `SET identity_id=NULL` | Becomes unbound (cannot rollback) | + +**Problem**: After bind → unbind, face becomes unbound instead of returning to stranger. + +### Proposed Fix (preserve stranger_id on bind) + +| Operation | SQL Effect | Result | +|-----------|------------|--------| +| `bind_face_to_identity` | `SET identity_id=X` (keep stranger_id) | Stranger info preserved | +| `bind_trace_to_identity` | `SET identity_id=X` (keep stranger_id) | Stranger info preserved | +| `merge_identity` | `SET identity_id=X` (keep stranger_id) | Stranger info preserved | +| `unbind_face` | `SET identity_id=NULL` | Returns to stranger (if stranger_id exists) | + +**Change Required**: Remove `, stranger_id = NULL` from three UPDATE queries in `identity_binding.rs`. + +--- + +## Why Dangling Happens + +Dangling occurs when `face_detections.identity_id` points to a deleted row in `identities` table. + +### Root Cause + +At the time of migration, `face_detections.identity_id` **had no FK constraint** to `identities.id`. This allowed: + +1. `DELETE FROM identities WHERE source='auto'` succeeded without error +2. `face_detections.identity_id` values remained unchanged (pointing to deleted IDs) +3. No `ON DELETE SET NULL` triggered because no FK existed + +### Prevention + +With FK constraint in place: +```sql +ALTER TABLE face_detections +ADD CONSTRAINT fk_face_detections_identity +FOREIGN KEY (identity_id) REFERENCES identities(id) ON DELETE SET NULL; +``` + +Deleting an identity would automatically set `face_detections.identity_id = NULL` (no dangling). + +### Current Status + +After migration cleanup: +- Public schema: FK `fk_face_detections_stranger` exists (on `stranger_id`) +- Public schema: FK `fk_face_detections_identity` **does not exist** (historical reason) +- Dev schema: Same state as public + +--- + +## API Endpoint + +### `GET /api/v1/file/:file_uuid/faces` + +**Purpose**: List all face detections in a file with binding state. + +**Query Parameters**: + +| Param | Type | Default | Description | +|-------|------|---------|-------------| +| `page` | int | 1 | Page number | +| `page_size` | int | 50 | Items per page | +| `binding` | string | — | Filter: `identity`, `stranger`, `dangling`, `unbound` | +| `trace_id` | int | — | Filter by trace ID | +| `min_confidence` | float | — | Minimum confidence (0.0–1.0) | +| `start_frame` | int | — | Start frame (inclusive) | +| `end_frame` | int | — | End frame (inclusive) | + +**Response Example**: + +```json +{ + "success": true, + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "total": 52244, + "page": 1, + "page_size": 2, + "data": [ + { + "id": 661508, + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "frame_number": 21297, + "timestamp_secs": 851.88, + "face_id": "21297_0", + "trace_id": 485, + "bbox": { "x": 1072, "y": 390, "width": 56, "height": 56 }, + "confidence": 0.6114, + "binding": { + "identity_id": 9, + "identity_uuid": "c3545906-c82d-4b66-aa1d-150bc02decce", + "identity_name": "Audrey Hepburn" + } + } + ] +} +``` + +--- + +## Migration Reference + +### `migrate_strangers_table.sql` (Summary) + +1. `CREATE TABLE strangers` +2. Insert unmatched traces → strangers +3. Preserve auto identity metadata → strangers (NULL file_uuid/trace_id) +4. Update `face_detections.stranger_id` → FK +5. Add FK constraint +6. Delete legacy `identity_bindings` for auto identities +7. Delete `identities` where `source='auto'` +8. Cleanup dangling `identity_id` (set to NULL) + +### Cleanup SQL (Dangling) + +```sql +UPDATE face_detections fd +SET identity_id = NULL +WHERE NOT EXISTS (SELECT 1 FROM identities i WHERE i.id = fd.identity_id) + AND fd.identity_id IS NOT NULL; +``` + +--- + +*Updated: 2026-05-25* \ No newline at end of file diff --git a/docs_v1.0/doc-wasm b/docs_v1.0/doc-wasm new file mode 120000 index 0000000..f633f2c --- /dev/null +++ b/docs_v1.0/doc-wasm @@ -0,0 +1 @@ +doc_wasm \ No newline at end of file diff --git a/docs_v1.0/doc/06_search.html b/docs_v1.0/doc/06_search.html index fbd9cf7..86f5f8b 100644 --- a/docs_v1.0/doc/06_search.html +++ b/docs_v1.0/doc/06_search.html @@ -38,7 +38,7 @@ a { color: #0066cc; }

Search APIs

POST /api/v1/search/smart

Auth: Required -Scope: file-level

+Scope: global / file-level

Semantic vector search using EmbeddingGemma-300m. Generates a query embedding via EmbeddingGemma (port 11436), then searches pgvector story_parent and llm_parent chunks by cosine similarity.

Request Parameters

@@ -53,13 +53,6 @@ a { color: #0066cc; } - - - - - - - @@ -67,6 +60,13 @@ a { color: #0066cc; } + + + + + + + @@ -89,7 +89,14 @@ a { color: #0066cc; }
file_uuidstringYesFile UUID to search within
query string YesSearch text
file_uuidstringNoFile UUID to search within. If omitted, searches all files (global search)
limit integer No
-

Example

+

Example (Global Search)

+
curl -s -X POST "$API/api/v1/search/smart" \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $JWT" \
+  -d '{"query": "Audrey Hepburn"}'
+
+ +

Example (File-specific Search)

curl -s -X POST "$API/api/v1/search/smart" \
   -H "Content-Type: application/json" \
   -H "Authorization: Bearer $JWT" \
@@ -101,6 +108,7 @@ a { color: #0066cc; }
   "query": "Audrey Hepburn",
   "results": [
     {
+      "file_uuid": "a6fb22eebefaef17e62af874997c5944",
       "parent_id": 1087822,
       "scene_order": 1087822,
       "start_frame": 104438,
@@ -118,10 +126,26 @@ a { color: #0066cc; }
 }
 
+ + + + + + + + + + + + + + + +
FieldTypeDescription
results[].file_uuidstringFile UUID where result was found

POST /api/v1/search/universal

Auth: Required -Scope: file-level

+Scope: global / file-level

Multi-type BM25 full-text search across chunks, frames, and persons. Uses PostgreSQL tsvector.

Request Parameters

@@ -147,7 +171,7 @@ a { color: #0066cc; } - + @@ -179,7 +203,14 @@ a { color: #0066cc; }
string No Restrict to specific fileRestrict to specific file. If omitted, searches all files (global search)
types
-

Example

+

Example (Global Search)

+
curl -s -X POST "$API/api/v1/search/universal" \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $JWT" \
+  -d '{"query": "Cary Grant"}'
+
+ +

Example (File-specific Search)

curl -s -X POST "$API/api/v1/search/universal" \
   -H "Content-Type: application/json" \
   -H "Authorization: Bearer $JWT" \
@@ -191,6 +222,7 @@ a { color: #0066cc; }
   "results": [
     {
       "type": "chunk",
+      "file_uuid": "a6fb22eebefaef17e62af874997c5944",
       "chunk_id": "bd80fec92b0b6963d177a2c55bf713e2_2",
       "chunk_type": "story_child",
       "start_frame": 5103,
@@ -199,6 +231,25 @@ a { color: #0066cc; }
       "end_time": 213.64,
       "text": "[213s-214s] Cary Grant: \"Olá!\"",
       "score": 0.9
+    },
+    {
+      "type": "frame",
+      "file_uuid": "a6fb22eebefaef17e62af874997c5944",
+      "frame_number": 5105,
+      "timestamp": 212.72,
+      "score": 0.7,
+      "objects": null,
+      "ocr_texts": null,
+      "faces": null
+    },
+    {
+      "type": "person",
+      "file_uuid": "a6fb22eebefaef17e62af874997c5944",
+      "identity_id": 12,
+      "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+      "name": "Cary Grant",
+      "appearance_count": 542,
+      "score": 0.95
     }
   ],
   "total": 20,
@@ -206,16 +257,140 @@ a { color: #0066cc; }
 }
 
+ + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
results[].typestringResult type: chunk, frame, or person
results[].file_uuidstringFile UUID where result was found (all types)

POST /api/v1/search/frames

Auth: Required -Scope: file-level

+Scope: global / file-level

Search face detection frames by identity name or trace ID.


-

POST /api/v1/search/identity_text

+

GET /api/v1/search/identity_text

Auth: Required -Scope: file-level

-

Search text chunks spoken by a specific identity.

+Scope: global / file-level

+

Search text chunks → find associated identities. Returns chunks where face detections overlap with text content.

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
qstringYesSearch text (ILIKE match)
file_uuidstringNoRestrict to specific file. If omitted, searches all files (global search)
limitintegerNo50Max results
pageintegerNo1Page number
page_sizeintegerNo50Items per page
+

Example (Global Search)

+
curl -s "$API/api/v1/search/identity_text?q=love" -H "X-API-Key: $KEY"
+
+ +

Example (File-specific Search)

+
curl -s "$API/api/v1/search/identity_text?file_uuid=$FILE_UUID&q=love" -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "total": 5,
+  "results": [
+    {
+      "file_uuid": "a6fb22eebefaef17e62af874997c5944",
+      "chunk_id": "llm_parent_..._256_270",
+      "start_time": 256.256,
+      "end_time": 270.228,
+      "text_content": "...lack of affection...",
+      "identity_id": 9,
+      "identity_name": "Audrey Hepburn",
+      "identity_source": "tmdb",
+      "trace_id": 94
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
results[].file_uuidstringFile UUID where chunk was found
results[].identity_idintegerIdentity ID if face was detected
results[].trace_idintegerFace trace ID

Visual Search

@@ -282,7 +457,7 @@ a { color: #0066cc; }

-

Updated: 2026-05-19 12:49:24

+

Updated: 2026-05-27 — Added global search support for smart, universal, identity_text APIs

\ No newline at end of file diff --git a/docs_v1.0/doc/07_identity.html b/docs_v1.0/doc/07_identity.html index 2bedf0f..2180a55 100644 --- a/docs_v1.0/doc/07_identity.html +++ b/docs_v1.0/doc/07_identity.html @@ -218,6 +218,13 @@ curl -s -X +

History & Undo/Redo

+

Every PATCH records a before/after snapshot in the operation history. Up to 256 records per identity are kept (oldest auto-deleted). See 14_identity_history.md for:

+
    +
  • POST /api/v1/identity/:identity_uuid/undo — Revert PATCH changes
  • +
  • POST /api/v1/identity/:identity_uuid/redo — Reapply undone changes
  • +
  • GET /api/v1/identity/:identity_uuid/history — Query operation log
  • +

GET /api/v1/identity/:identity_uuid/files

Auth: Required @@ -227,13 +234,28 @@ curl -s -X

curl -s "$API/api/v1/identity/$IDENTITY_UUID/files" -H "X-API-Key: $KEY"
 
-
-

GET /api/v1/identity/:identity_uuid/faces

-

Auth: Required -Scope: identity-level

-

Get all face detection records associated with this identity.

-

Example

-
curl -s "$API/api/v1/identity/$IDENTITY_UUID/faces" -H "X-API-Key: $KEY"
+

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "name": "Cary Grant",
+  "total": 3,
+  "page": 1,
+  "page_size": 20,
+  "data": [
+    {
+      "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
+      "file_name": "charade.mp4",
+      "file_path": "/path/to/charade.mp4",
+      "status": "done",
+      "face_count": 16335,
+      "speaker_count": 0,
+      "first_appearance": 206.76,
+      "last_appearance": 6756.68,
+      "confidence": 0.8088
+    }
+  ]
+}
 
@@ -246,28 +268,268 @@ curl -s -X - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
file_uuidnamestringIdentity display name
data[].file_uuidstringFile identifier
data[].file_namestringFile name
data[].face_countintegerNumber of face detections in this file
data[].first_appearancefloatFirst appearance time in seconds
data[].last_appearancefloatLast appearance time in seconds
data[].confidencefloatAverage confidence (0.0–1.0)
+
+

GET /api/v1/identity/:identity_uuid/faces

+

Auth: Required +Scope: identity-level

+

Get all face detection records associated with this identity.

+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/faces" -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "name": "Cary Grant",
+  "total": 963,
+  "page": 1,
+  "page_size": 50,
+  "data": [
+    {
+      "id": 3902,
+      "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
+      "frame_number": 37974,
+      "timestamp_secs": 1518.96,
+      "face_id": "37974_1",
+      "confidence": 0.8197,
+      "bbox": { "x": 1097, "y": 310, "width": 177, "height": 177 }
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + - + - + - + - +
FieldTypeDescription
namestringIdentity display name
data[].file_uuid string File where face was detected
frame_numberdata[].frame_number integer Frame number of detection
face_iddata[].face_id stringFace ID (format: face_{frame_number})Face ID (format: {frame}_{idx})
confidencedata[].confidence float Detection confidence

+

GET /api/v1/file/:file_uuid/faces

+

Auth: Required +Scope: identity-level

+

List all face detections in a file with binding status. Each face is in one of four binding states:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Statebinding responseMeaning
identity{"identity_id": 9, "identity_uuid": "...", "identity_name": "Audrey Hepburn"}Face matched to a known TMDb or user-defined identity
stranger{"stranger_id": 845, "metadata": {}}Face matched to an unknown person (trace not matched to any known identity)
dangling{"old_identity_id": 18052}Face was previously bound to an auto-generated identity that has been deleted (orphaned reference)
unboundnullFace has no binding at all (identity_id and stranger_id are both NULL)
+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParamTypeDefaultDescription
pageint1Page number
page_sizeint50Items per page
bindingstringFilter by state: identity, stranger, dangling, or unbound
trace_idintFilter by trace ID
min_confidencefloatMinimum detection confidence (0.0–1.0)
start_frameintStarting frame number (inclusive)
end_frameintEnding frame number (inclusive)
+

Example

+
curl -s "$API/api/v1/file/aeed71342a899fe4b4c57b7d41bcb692/faces?page=1&page_size=2&binding=identity" -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
+  "total": 52244,
+  "page": 1,
+  "page_size": 2,
+  "data": [
+    {
+      "id": 661508,
+      "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
+      "frame_number": 21297,
+      "timestamp_secs": 851.88,
+      "face_id": "21297_0",
+      "trace_id": 485,
+      "bbox": { "x": 1072, "y": 390, "width": 56, "height": 56 },
+      "confidence": 0.6114,
+      "binding": {
+        "identity_id": 9,
+        "identity_uuid": "c3545906-c82d-4b66-aa1d-150bc02decce",
+        "identity_name": "Audrey Hepburn"
+      }
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
totalintNumber of faces matching the filter (not total in file)
data[].trace_idintFace tracking trace ID
data[].timestamp_secsfloatTimestamp in seconds (frame_number / fps)
data[].bboxobjectBounding box {x, y, width, height}
data[].bindingobject/nullOne of four binding states (see table above)
+

GET /api/v1/identity/:identity_uuid/chunks

Auth: Required Scope: identity-level

@@ -280,6 +542,10 @@ curl -s -X
{
   "success": true,
   "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "name": "Cary Grant",
+  "total": 20,
+  "page": 1,
+  "page_size": 20,
   "data": [
     {
       "id": 0,
@@ -307,42 +573,47 @@ curl -s -X 
 
 
 
-file_uuid
+name
+string
+Identity display name
+
+
+data[].file_uuid
 string
 File identifier
 
 
-chunk_id
+data[].chunk_id
 string
 Sentence chunk identifier
 
 
-start_frame
+data[].start_frame
 integer
 Frame-accurate start position
 
 
-end_frame
+data[].end_frame
 integer
 Frame-accurate end position
 
 
-fps
+data[].fps
 float
 Frames per second
 
 
-start_time
+data[].start_time
 float
 Start time in seconds
 
 
-end_time
+data[].end_time
 float
 End time in seconds
 
 
-text_content
+data[].text_content
 string
 Spoken text content
 
@@ -378,6 +649,11 @@ curl -s -X 
 
 
 
+

Side Effects

+
    +
  • 清除該 face detection row 的 stranger_id(設為 NULL)
  • +
  • 不影響 identities 表中原有的 stranger auto-identity 記錄
  • +

Example

curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind" \
   -H "X-API-Key: $KEY" \
@@ -386,17 +662,134 @@ curl -s -X 
 

-

POST /api/v1/identity/:identity_uuid/unbind

+

POST /api/v1/identity/:identity_uuid/bind/trace

Auth: Required Scope: identity-level

-

Unbind a face detection from an identity. Removes the identity association from the face record.

-
-

GET /api/v1/identities/search

-

Auth: Required -Scope: identity-level

-

Search identities by name (ILIKE search). Returns matching identity records.

+

Bind all face detections of a trace to an identity. Updates all rows in face_detections with the matching file_uuid and trace_id.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile where trace exists
trace_idintegerYesTrace ID (from face_detections.trace_id)
+

Side Effects

+
    +
  • 清除該 trace 所有 face detection rows 的 stranger_id(設為 NULL)
  • +
  • 不影響 identities 表中原有的 stranger auto-identity 記錄
  • +

Example

-
curl -s "$API/api/v1/identities/search?q=Cary" -H "X-API-Key: $KEY"
+
curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind/trace" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "trace_id": 919}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "message": "Bound trace 919 of aeed71342... to Cary Grant",
+  "data": { "rows_affected": 53 }
+}
+
+ +

Error Responses

+ + + + + + + + + + + + + + + + + +
HTTPWhen
404Identity not found
500Database error
+
+

GET /api/v1/identity/:identity_uuid/traces

+

Auth: Required +Scope: identity-level

+

Get paginated face traces (continuous tracking segments) associated with this identity across all files.

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
pageintegerNo1Page number
page_sizeintegerNo20Items per page
+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/traces?page=1&page_size=3" \
+  -H "X-API-Key: $KEY" | jq '{total, total_faces, traces}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "name": "Cary Grant",
+  "total": 18,
+  "page": 1,
+  "page_size": 3,
+  "total_faces": 542,
+  "traces": [
+    {
+      "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
+      "trace_id": 906,
+      "frame_count": 52,
+      "first_frame": 37974,
+      "last_frame": 38127,
+      "first_sec": 1519.0,
+      "last_sec": 1525.1,
+      "avg_confidence": 0.8254
+    }
+  ]
+}
 
@@ -409,24 +802,578 @@ curl -s -X + + + + + + + + + + - + - - - - - - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
successboolAlways true
identity_uuidstringIdentity UUID
name stringIdentity nameIdentity display name
sourcestringIdentity source
tmdb_idtotal integerTMDb ID (if source = tmdb)Total number of traces (across all pages)
total_facesintegerSum of all face detections in returned traces
traces[].file_uuidstringFile where trace exists
traces[].trace_idintegerTrace tracking ID
traces[].frame_countintegerNumber of frames in this trace
traces[].first_frameintegerStart frame number
traces[].last_frameintegerEnd frame number
traces[].first_secfloatStart time in seconds
traces[].last_secfloatEnd time in seconds
traces[].avg_confidencefloatAverage detection confidence (0.0–1.0)
+

Error Responses

+ + + + + + + + + + + + + + + + + +
HTTPWhen
404Identity not found
500Database error
+
+

POST /api/v1/identity/:identity_uuid/unbind

+

Auth: Required +Scope: identity-level

+

Unbind a face detection from an identity. Removes the identity association from the face record.

+

Side Effects

+
    +
  • 只清除 identity_id(設為 NULL),不會恢復 stranger_id
  • +
  • 被 unbind 的 face 不會自動成為 stranger
  • +
  • 要重新標記為 stranger 需重新跑 Agent API(identity/analyze
  • +
+
+

POST /api/v1/identity/:identity_uuid/mergeinto

+

Auth: Required +Scope: identity-level

+

Transfer all face bindings from this identity to another identity, then optionally delete or mark the source as merged.

+

Two Merge Cases

+ + + + + + + + + + + + + + + + + + + + +
CaseDescriptionUndo Support
stranger → identityMerge an auto-generated stranger identity into a known identity (TMDb or user-defined)✅ 24hr undo
identity A → identity BMerge two known identities (e.g., duplicate entries)✅ 24hr undo
+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
into_uuidstringYesTarget identity UUID to merge into
keep_historyboolNotrueKeep source identity record with status='merged' (true) or delete it (false)
+

Side Effects

+
    +
  • 轉移所有 face_detections.identity_id 到目標 identity
  • +
  • 同時清除所有被轉移 rows 的 stranger_id
  • +
  • 將 source name 加入 target aliases (with source: "merge" tag)
  • +
  • 將 source aliases 加入 target aliases (if not already present)
  • +
  • 將 source metadata fields 加入 target metadata (if not already present)
  • +
  • keep_history: true(預設):source identity 設為 status='merged',保留記錄
  • +
  • keep_history: false刪除 source identity 及其 identity JSON 檔案
  • +
  • 記錄 merge history 到 MongoDB(支援 undo)
  • +
+

Example

+
curl -s -X POST "$API/api/v1/identity/$SOURCE_UUID/mergeinto" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"into_uuid": "'"$TARGET_UUID"'", "keep_history": true}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "message": "Merged 'stranger_13894' into 'Louis Viret' (52 faces transferred, history kept)",
+  "data": {
+    "merge_id": "550e8400-e29b-41d4-a716-446655440000",
+    "faces_transferred": 52,
+    "aliases_added": 1,
+    "metadata_fields_added": 2
+  }
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
merge_idstringUnique merge operation ID (for undo)
faces_transferredintegerNumber of face detections transferred
aliases_addedintegerNumber of aliases added to target
metadata_fields_addedintegerNumber of metadata fields added to target
+

Error Responses

+ + + + + + + + + + + + + + + + + +
HTTPWhen
404Source or target identity not found
500Database error
+
+

POST /api/v1/identity/merge/:merge_id/undo

+

Auth: Required +Scope: identity-level

+

Undo a merge operation within 24 hours. Restores the source identity and reverts face bindings.

+

Undo Behavior

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ActionDescription
Restore source identityIf keep_history=true: restore status to confirmed
If keep_history=false: recreate identity from MongoDB snapshot
Restore facesTransfer faces back to source identity
Remove aliases from targetRemove aliases with source: "merge" tag
Remove metadata fields from targetRemove fields that were added from source
Preserve manual changesKeep aliases/metadata manually added after merge
+

Example

+
curl -s -X POST "$API/api/v1/identity/merge/550e8400-e29b-41d4-a716-446655440000/undo" \
+  -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "message": "Undo merge completed: 'stranger_13894' restored, 52 faces reverted",
+  "data": {
+    "source_identity_restored": {
+      "uuid": "a9a90105...",
+      "name": "stranger_13894",
+      "status": "confirmed"
+    },
+    "faces_reverted": 52,
+    "aliases_removed_from_target": 1,
+    "metadata_fields_removed_from_target": 2
+  }
+}
+
+ +

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
400Undo deadline expired (>24hr) or already undone
404Merge record not found
500Database error
+
+

GET /api/v1/identity/merge/history

+

Auth: Required +Scope: identity-level

+

Query merge history records from MongoDB.

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
source_uuidstringNoFilter by source identity UUID
target_uuidstringNoFilter by target identity UUID
merge_idstringNoFilter by specific merge ID
undoneboolNoFilter by undone status
pageintNo1Page number
page_sizeintNo20Items per page
+

Example

+
curl -s "$API/api/v1/identity/merge/history?page=1&page_size=10" \
+  -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "total": 5,
+  "page": 1,
+  "page_size": 10,
+  "results": [
+    {
+      "merge_id": "550e8400-e29b-41d4-a716-446655440000",
+      "source_name": "stranger_13894",
+      "target_name": "Louis Viret",
+      "faces_transferred": 52,
+      "merged_at": "2026-05-27T10:00:00Z",
+      "undo_deadline": "2026-05-28T10:00:00Z",
+      "undone": false,
+      "undo_expired": false
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
merge_idstringUnique merge operation ID
source_namestringSource identity name
target_namestringTarget identity name
faces_transferredintegerNumber of faces transferred
merged_atdatetimeWhen merge occurred
undo_deadlinedatetime24hr deadline for undo
undoneboolWhether merge was undone
undo_expiredboolWhether undo deadline passed
+
+

GET /api/v1/identities/search

+

Auth: Required +Scope: global / file-level

+

Search identity name → find associated chunks. Searches identity name and aliases, returns identities with their associated text chunks.

+

Query Parameters

+ + + + + + + + + + + + + + + + + - + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
qstringYesSearch text (ILIKE match on name and aliases)
file_uuid stringAssociated fileNoRestrict to specific file. If omitted, searches all files (global search)
limitintegerNo50Max results
+

Example (Global Search)

+
curl -s "$API/api/v1/identities/search?q=Audrey" -H "X-API-Key: $KEY"
+
+ +

Example (File-specific Search)

+
curl -s "$API/api/v1/identities/search?q=Audrey&file_uuid=$FILE_UUID" -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "total": 5,
+  "results": [
+    {
+      "identity_id": 9,
+      "name": "Audrey Hepburn",
+      "source": "tmdb",
+      "tmdb_id": 1932,
+      "file_uuid": "a6fb22eebefaef17e62af874997c5944",
+      "trace_id": 41,
+      "chunk_id": "llm_parent_..._204_207",
+      "start_time": 204.162,
+      "text_content": "...confrontation..."
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
results[].identity_idintegerIdentity ID
results[].namestringIdentity name
results[].sourcestringIdentity source (tmdb, user_defined, etc.)
results[].tmdb_idintegerTMDb person ID (if source = tmdb)
results[].file_uuidstringFile where identity appears
results[].trace_idintegerFace trace ID
results[].chunk_idstringAssociated chunk ID
results[].start_timefloatChunk start time
results[].text_contentstringChunk text content
@@ -699,7 +1646,7 @@ curl -s -X

This replaces the entire aliases array. To add to existing aliases, include all existing entries in the request.


-

*Updated: 2026-05-22

+

*Updated: 2026-05-25 — Added GET /api/v1/file/:file_uuid/faces with 4 binding states, filters, strangers table split

\ No newline at end of file diff --git a/docs_v1.0/doc/08_media.html b/docs_v1.0/doc/08_media.html index fe451e0..99543ef 100644 --- a/docs_v1.0/doc/08_media.html +++ b/docs_v1.0/doc/08_media.html @@ -294,6 +294,7 @@ curl -s "

GET /api/v1/file/:file_uuid/thumbnail

Extract a single frame from a video as JPEG image. Uses FFmpeg select filter.

+

When frame is omitted, the system automatically selects the best representative frame using the TKG bridge (see algorithm below).

Auth: Required Scope: file-level

Query Parameters

@@ -311,9 +312,9 @@ curl -s " frame integer -Yes -— -Zero-based frame number to extract +No +auto-detect +Zero-based frame number to extract. Omit for auto-detect. x @@ -346,8 +347,23 @@ curl -s "

All four crop params (x, y, w, h) must be provided together or omitted.

-

Example

-
# Extract frame 1000 (full frame)
+

Auto-detect Algorithm

+

When frame is not provided, the endpoint finds the best frame using this fallback chain:

+
    +
  1. Main characters: find the two identities with the most face detections (TMDb source)
  2. +
  3. Mutual gaze: if their face traces have a TKG CO_OCCURS_WITH edge with mutual_gaze=true, take first_frame
  4. +
  5. Co-occurrence: fallback to the first frame where both identities appear together
  6. +
  7. Single identity: if only one main identity exists, take its highest-quality face frame
  8. +
  9. Any identity: fallback to the best-quality face frame across all identities
  10. +
  11. Error: if no face exists, returns 404
  12. +
+

The selected frame is constrained to the first half of the video (total_frames / 2).

+

Examples

+
# Auto-detect best representative frame
+curl -s "$API/api/v1/file/$FILE_UUID/thumbnail" \
+  -H "X-API-Key: $KEY" -o representative.jpg
+
+# Extract frame 1000 (full frame)
 curl -s "$API/api/v1/file/bd80fec92b0b6963d177a2c55bf713e2/thumbnail?frame=1000" \
   -H "Authorization: Bearer $JWT" -o frame_1000.jpg
 
@@ -359,10 +375,185 @@ curl -s "Response
 
  • 200: image/jpeg binary data
  • -
  • 404: File not found
  • +
  • 404: File not found / No faces in file (auto-detect)
  • 500: FFmpeg error (e.g., frame number exceeds video duration)
-

GET /api/v1/file/:file_uuid/clip

+

Technical Details

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DetailValue
BackendFFmpeg (ffmpeg-full)
Filterselect=eq(n\,FRAME) to select frame, optional crop=W:H:X:Y
OutputSingle JPEG via pipe (image2pipe, mjpeg codec)
CacheCache-Control: public, max-age=86400 (24h)
Frame numberZero-based (frame=0 = first frame of video)
+
+

GET /api/v1/file/:file_uuid/representative-frame

+

Return JSON metadata about the best representative frame for the video. Uses the same auto-detect algorithm as GET /thumbnail (without crop support).

+

Auth: Required +Scope: file-level

+

Example

+
curl -s "$API/api/v1/file/$FILE_UUID/representative-frame" \
+  -H "X-API-Key: $KEY" | jq '.'
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
+  "frame_number": 38165,
+  "timestamp_secs": 1526.6,
+  "face_quality": 37292.97,
+  "main_identities": [
+    {
+      "identity_uuid": "c3545906-c82d-4b66-aa1d-150bc02decce",
+      "name": "Audrey Hepburn",
+      "face_count": 16456
+    },
+    {
+      "identity_uuid": "2b0ddefe-e2a9-4533-9308-b375594604d5",
+      "name": "Cary Grant",
+      "face_count": 10643
+    }
+  ],
+  "traces": [
+    {
+      "trace_id": 919,
+      "identity_uuid": "2b0ddefe-e2a9-4533-9308-b375594604d5",
+      "name": "Cary Grant",
+      "x": 764,
+      "y": 237,
+      "width": 199,
+      "height": 199,
+      "confidence": 0.8426
+    },
+    {
+      "trace_id": 920,
+      "identity_uuid": "c3545906-c82d-4b66-aa1d-150bc02decce",
+      "name": "Audrey Hepburn",
+      "x": 1143,
+      "y": 312,
+      "width": 215,
+      "height": 215,
+      "confidence": 0.8068
+    }
+  ]
+}
+
+ +

Response Fields

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
frame_numberintegerSelected representative frame number (primary coordinate)
timestamp_secsfloatTime in seconds (derived from frame_number / fps)
face_qualityfloatQuality score area × confidence of the best face at this frame
main_identitiesarrayTop 2 most frequent TMDb identities in the file
main_identities[].namestringIdentity display name
main_identities[].face_countintegerTotal face detections count
tracesarrayAll face traces present at the selected frame
traces[].trace_idintegerFace trace ID
traces[].identity_uuidstring or nullMatched identity UUID
traces[].namestring or nullIdentity name
traces[].x, y, width, heightintegerBounding box coordinates
traces[].confidencefloatDetection confidence (0.0–1.0)
+

Error Responses

+ + + + + + + + + + + + + + + + + +
HTTPWhen
404File not found / No faces in file
500Database error

Extract a video clip (time range) as MPEG-TS stream. Uses FFmpeg -ss fast seek.

Auth: Required Scope: file-level

diff --git a/docs_v1.0/doc/12_agent.html b/docs_v1.0/doc/12_agent.html index 6532a47..166ef43 100644 --- a/docs_v1.0/doc/12_agent.html +++ b/docs_v1.0/doc/12_agent.html @@ -209,7 +209,191 @@ a { color: #0066cc; }
-

Updated: 2026-05-19 12:49:24

+

POST /api/v1/agents/search

+

Conversational search assistant. Uses Gemma4 function calling to automatically decide which tools to call based on the user's natural language query. Supports multi-turn conversation.

+

Request

+
{
+  "query": "Audrey Hepburn 和 Cary Grant 第一次同框在哪個 frame?",
+  "conversation_id": null,
+  "file_uuid": null
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
querystring自然語言查詢
conversation_idstring延續對話時傳入;新對話不傳
file_uuidstringPortal 有選中檔案時可指定
+

Response

+
{
+  "success": true,
+  "conversation_id": "conv_abc123",
+  "answer": "在 Charade (1963) 中,Audrey Hepburn 與 Cary Grant 第一次同框在第 38619 幀(約 1544.76 秒)。",
+  "need_input": false,
+  "sources": [
+    {
+      "tool": "tkg_query",
+      "result": "{\"first_cooccurrence\":{\"frame\":38619,\"timestamp_secs\":1544.76}}"
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
conversation_idstring後續對話需要傳入此 ID
answerstringAgent 的自然語言回答(或反問)
need_inputbooleantrue 表示 agent 需要更多資訊才能回答
suggestionsstring[]建議用戶提供的線索(當 need_input=true
sourcesarray引用的工具執行結果
+

Conversation Flow

+
Round 1: POST /agents/search { query: "我想看男女主角同框" }
+         → need_input: true, suggestions: ["片名", "演員", "年代"]
+         → answer: "請問是哪部電影?請提供更多線索"
+
+Round 2: POST /agents/search { query: "奧黛麗赫本", conversation_id: "..." }
+         → need_input: false
+         → answer: "找到 Charade (1963),Audrey Hepburn 和 Cary Grant..."
+
+ +

Available Tools

+

Agent 內部使用 Gemma4 function calling 自動調用以下工具:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ToolDescription
find_file透過片名/演員/年份關鍵字搜尋影片,回傳 file_uuid + has_data 狀態
list_files列出近期註冊的影片
tkg_query查詢人物互動資料(7 種子類型:top_identities、first_cooccurrence、identity_details、mutual_gaze、interaction_network、identity_traces、file_info)
smart_search文字內容 ILIKE 搜尋 chunk(可指定 file_uuid 限制範圍)
get_identity_detail查詢單一身份的詳細資料(角色、TMDb 資訊)
get_file_info查詢影片基本資訊(片長、解析度)
get_representative_frame查詢影片最具代表性的 frame 資訊
+

Design Principles

+
    +
  • 用戶不需要知道 file_uuid — Agent 會自動用 find_file 搜尋或反問
  • +
  • 不推薦無資料的影片has_data=false 的影片不會被推薦給用戶
  • +
  • 多輪對話 — 透過 conversation_id 延續上下文,agent 會記得之前的交流
  • +
  • 並行工具呼叫 — Gemma4 可以一次呼叫多個工具再綜合回答
  • +
+

Model

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DetailValue
LLMGemma4 26B (Q5_K_M)
Enginellama.cpp at localhost:8082
Endpoint/v1/chat/completions (OpenAI-compatible)
Temperature0.1
Max rounds5 (tool call iterations)
Conversation TTL30 minutes
+
+

Updated: 2026-05-22

\ No newline at end of file diff --git a/docs_v1.0/doc_developer/14_identity_history.html b/docs_v1.0/doc_developer/14_identity_history.html new file mode 100644 index 0000000..568e9a9 --- /dev/null +++ b/docs_v1.0/doc_developer/14_identity_history.html @@ -0,0 +1,470 @@ + + + + +14 Identity History - Momentry API Docs + + + +
+ + + + + +

Identity Operation History

+

Every PATCH /api/v1/identity/:identity_uuid automatically records a before/after snapshot in the identity_history table. Use undo/redo to revert or reapply changes, and history to inspect the operation log.

+

History System Overview

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PropertyValue
StoragePostgreSQL identity_history table
SnapshotFull identity record (all fields) before and after each PATCH
Max records256 per identity (oldest auto-deleted when limit exceeded)
Undo stepsUnlimited (no expiry, no step limit)
Redo stackCleared on new PATCH (is_undone=true records are deleted)
+

Stack Model

+
PATCH 1 → PATCH 2 → PATCH 3         (undo stack, is_undone=false)
+                           ↓ undo
+PATCH 1 → PATCH 2                   (undo stack)
+           PATCH 3                   (redo stack, is_undone=true)
+                           ↓ redo
+PATCH 1 → PATCH 2 → PATCH 3         (undo stack)
+
+ +

A new PATCH after undo clears the redo stack (PATCH 3 is lost).

+
+

POST /api/v1/identity/:identity_uuid/undo

+

Auth: Required +Scope: identity-level

+

Undo the most recent PATCH operations. Restores the identity's before_snapshot and marks the history records as undone.

+

Request (JSON)

+ + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
stepsintegerNo1Number of undo steps to apply (max records undone in one call)
+

Behavior

+
    +
  • Queries is_undone=false records, ordered by created_at DESC
  • +
  • Restores name, identity_type, source, status, metadata, tmdb_id, tmdb_profile from the last record's before_snapshot
  • +
  • Marks the undone records as is_undone=true with undone_at=NOW()
  • +
  • Syncs identity.json to disk
  • +
  • Updates _index.json if name changed
  • +
+

Example

+
curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/undo" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"steps": 1}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "undone_count": 1,
+  "current_state": {
+    "id": 9,
+    "uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+    "name": "Cary Grant",
+    "identity_type": "people",
+    "source": "tmdb",
+    "status": "confirmed",
+    "metadata": {},
+    "tmdb_id": 112,
+    "tmdb_profile": null
+  }
+}
+
+ + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
undone_countintegerNumber of history records undone
current_stateobjectFull identity state after undo
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
400No undo operations available
404Identity not found
500Database error
+
+

POST /api/v1/identity/:identity_uuid/redo

+

Auth: Required +Scope: identity-level

+

Redo previously undone PATCH operations. Restores the identity's after_snapshot and marks the history records as no longer undone.

+

Request (JSON)

+ + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
stepsintegerNo1Number of redo steps to apply
+

Behavior

+
    +
  • Queries is_undone=true records, ordered by created_at DESC
  • +
  • Restores all identity fields from the last record's after_snapshot
  • +
  • Marks records as is_undone=false with undone_at=NULL
  • +
  • Syncs identity.json to disk
  • +
  • Updates _index.json if name changed
  • +
+

Example

+
curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/redo" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"steps": 1}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "redone_count": 1,
+  "current_state": {
+    "id": 9,
+    "uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+    "name": "John Smith",
+    "identity_type": "people",
+    "source": "tmdb",
+    "status": "confirmed",
+    "metadata": { "aliases": [...] },
+    "tmdb_id": 112,
+    "tmdb_profile": null
+  }
+}
+
+ + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
redone_countintegerNumber of history records redone
current_stateobjectFull identity state after redo
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
400No redo operations available
404Identity not found
500Database error
+
+

GET /api/v1/identity/:identity_uuid/history

+

Auth: Required +Scope: identity-level

+

Query the operation history for an identity. Returns paginated records with undo/redo stack counts.

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
pageintegerNo1Page number (1-indexed)
limitintegerNo20Items per page (max 100)
+

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "total": 5,
+  "undo_stack_count": 3,
+  "redo_stack_count": 2,
+  "results": [
+    {
+      "history_id": 42,
+      "operation": "update",
+      "is_undone": false,
+      "created_at": "2026-05-27T12:00:00Z",
+      "undone_at": null
+    },
+    {
+      "history_id": 41,
+      "operation": "update",
+      "is_undone": true,
+      "created_at": "2026-05-27T11:30:00Z",
+      "undone_at": "2026-05-27T13:00:00Z"
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
totalintegerTotal history records for this identity
undo_stack_countintegerRecords available for undo (is_undone=false)
redo_stack_countintegerRecords available for redo (is_undone=true)
results[].history_idintegerHistory record ID
results[].operationstringOperation type ("update" for PATCH)
results[].is_undonebooleanWhether the operation has been undone
results[].created_atstringWhen the PATCH was applied
results[].undone_atstringWhen the undo occurred (null if not undone)
+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/history?page=1&limit=10" \
+  -H "X-API-Key: $KEY"
+
+ +

Error Responses

+ + + + + + + + + + + + + + + + + +
HTTPWhen
404Identity not found
500Database error
+
+

Comparison: PATCH Undo vs Merge Undo

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AspectPATCH Undo/RedoMerge Undo
StoragePostgreSQL identity_historyMongoDB identity_merge_history
TriggerEvery PATCHEvery mergeinto with keep_history=true
Undo deadlineNone (unlimited)24 hours
Redo supportYesNo
Step undoYes (steps param)No (full undo only)
Max records256 per identityUnlimited
+
+

Updated: 2026-05-28

+
+ + \ No newline at end of file diff --git a/docs_v1.0/doc_developer/index.html b/docs_v1.0/doc_developer/index.html index 033e6da..d87cd1a 100644 --- a/docs_v1.0/doc_developer/index.html +++ b/docs_v1.0/doc_developer/index.html @@ -29,7 +29,7 @@ a:hover td { background: #f8f8f8; border-radius: 4px; } Logout

API 參考手冊 — 登入後可瀏覽各模組文件

-
錯誤碼Error Codes
+
錯誤碼Error Codes
14 Identity History
\ No newline at end of file diff --git a/docs_v1.0/doc_user/API_ACCESS.html b/docs_v1.0/doc_user/API_ACCESS.html new file mode 100644 index 0000000..257ca11 --- /dev/null +++ b/docs_v1.0/doc_user/API_ACCESS.html @@ -0,0 +1,358 @@ + + + + +Api Access - Momentry API Docs + + + +
+← Back to index +

Momentry Core API 存取指南

+ + + + + + + + + + + + + + + + + + + + + +
項目內容
版本V1.3
日期2026-03-25
用途API 存取方式、端點與整合指南
+
+

版本歷史

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
版本日期目的操作人工具/模型
V1.32026-03-25更新: n8n 搜尋回傳 file_path 取代 media_url,新增 API Key 驗證說明OpenCodedeepseek-reasoner
V1.22026-03-24更新網址與服務列表WarrenOpenCode / MiniMax M2.5
V1.12026-03-23初始版本WarrenOpenCode / MiniMax M2.5
+
+

基本網址

+ + + + + + + + + + + + + + + + + + + + +
環境URL說明
本地開發http://localhost:3002直接訪問 API,繞過反向代理
外部訪問https://m5api.momentry.ddns.net通過 Caddy 反向代理訪問,需網路可達
+

何時使用哪個 URL

+

使用 localhost:3002 +- 開發/測試環境 +- 直接在伺服器上操作 +- 當反向代理有問題時

+

使用 m5api.momentry.ddns.net +- n8n workflow 中呼叫 API +- 外部系統整合 +- 生產環境

+

認證

+

所有 /api/v1/* 端點(除了健康檢查 /health/health/detailed)都需要 API Key 認證。

+

請在請求標頭中加入:

+
X-API-Key: YOUR_API_KEY
+
+ +

目前示範使用的 API Key: demo_api_key_12345

+
+

注意: 正式環境請使用安全的 API Key 管理機制,避免在客戶端暴露 API Key。

+
+
+

影片搜尋 API

+

語意搜尋

+

端點: POST /api/v1/search

+

請求:

+
{
+  "query": "charade",
+  "limit": 5,
+  "uuid": "a1b10138a6bbb0cd"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
欄位類型必填說明
query字串搜尋文字
limit整數最大回傳結果數(預設 10)
uuid字串依影片 UUID 過濾
+

回應:

+
{
+  "results": [
+    {
+      "uuid": "a1b10138a6bbb0cd",
+      "chunk_id": "sentence_0006",
+      "chunk_type": "sentence",
+      "start_time": 48.8,
+      "end_time": 55.44,
+      "text": "fun plot twists, Woody Dialog and charming performances...",
+      "score": 0.526
+    }
+  ],
+  "query": "charade"
+}
+
+ +
+

n8n 整合搜尋

+

端點: POST /api/v1/n8n/search

+

請求:

+
{
+  "query": "charade",
+  "limit": 5
+}
+
+ +

回應:

+
{
+  "query": "charade",
+  "count": 5,
+  "hits": [
+    {
+      "id": "sentence_0006",
+      "vid": "a1b10138a6bbb0cd",
+      "start": 48.8,
+      "end": 55.44,
+      "title": "Chunk sentence_0006",
+      "text": "fun plot twists...",
+      "score": 0.526,
+      "file_path": "/Users/accusys/momentry/var/sftpgo/data/demo/Old_Time_Movie_Show_-_Charade_1963.HD.mov"
+    }
+  ]
+}
+
+ +
+

注意: API 現在返回 file_path(檔案系統路徑)而非 media_url(網頁 URL)。如需在網頁中播放影片,請將檔案路徑轉換為可訪問的 URL(例如透過 SFTPGo 分享連結)。

+
+
+

影片管理 API

+

列出所有影片

+

端點: GET /api/v1/videos

+

查詢影片資訊

+

端點: GET /api/v1/lookup?uuid={uuid}GET /api/v1/lookup?path={path}

+

取得處理進度

+

端點: GET /api/v1/progress/{uuid}

+
+

區塊資料結構

+

每個搜尋結果包含影片播放的時間資訊:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
欄位說明
uuid影片識別碼
chunk_id區塊唯一識別碼
chunk_type類型:sentencecuttime_based
start_time開始時間(秒)
end_time結束時間(秒)
text語音轉文字內容
score相關性分數(0-1)
+
+

整合範例

+

JavaScript/fetch

+
const response = await fetch('http://localhost:3002/api/v1/search', {
+  method: 'POST',
+  headers: { 
+    'Content-Type': 'application/json',
+    'X-API-Key': 'YOUR_API_KEY'  // 替換為實際的 API Key
+  },
+  body: JSON.stringify({ query: 'charade', limit: 5 })
+});
+const data = await response.json();
+console.log(data.results);
+
+ +

PHP/cURL

+
$ch = curl_init('http://localhost:3002/api/v1/search');
+curl_setopt($ch, CURLOPT_POST, true);
+curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode([
+  'query' => 'charade',
+  'limit' => 5
+]));
+curl_setopt($ch, CURLOPT_HTTPHEADER, [
+  'Content-Type: application/json',
+  'X-API-Key: YOUR_API_KEY'  // 替換為實際的 API Key
+]);
+$response = curl_exec($ch);
+$data = json_decode($response, true);
+
+ +
+

影片嵌入網址

+
+

重要: API 現在返回 file_path(檔案系統路徑),而非直接可訪問的網址。您需要將檔案路徑轉換為 SFTPGo 分享連結才能嵌入影片。

+
+

檔案路徑轉換為網址: +- API 返回的 file_path 範例:/Users/accusys/momentry/var/sftpgo/data/demo/video.mp4 +- 對應的 SFTPGo 分享連結:https://wp.momentry.ddns.net/demo/video.mp4 +- 轉換方式:移除 /Users/accusys/momentry/var/sftpgo/data/ 前綴,將剩餘路徑附加到 https://wp.momentry.ddns.net/

+

手動建立分享連結: +1. 開啟 SFTPGo Web UI:http://localhost:8080 +2. 使用帳號 demo / 密碼 demopassword123 登入 +3. 導航至 Files → 選擇影片檔案 +4. 點擊 ShareCreate Link +5. 複製產生的分享連結

+

使用搜尋結果中的 start_timeend_time 來嵌入影片片段。

+
+

服務列表

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
服務網址用途
Momentry APIhttp://localhost:3002核心 API
SFTPGohttp://localhost:8080檔案儲存
Qdranthttp://localhost:6333向量搜尋
PostgreSQLlocalhost:5432資料庫
+
+

示範影片

+
    +
  • 檔案: Old_Time_Movie_Show_-_Charade_1963.HD.mov
  • +
  • UUID: a1b10138a6bbb0cd
  • +
  • 長度: 約 6879 秒(約 1.9 小時)
  • +
  • 區塊數: 3886 個(句子 + 場景 + 時間)
  • +
+
+ + \ No newline at end of file diff --git a/docs_v1.0/doc_user/API_ENDPOINTS.html b/docs_v1.0/doc_user/API_ENDPOINTS.html new file mode 100644 index 0000000..5d9c247 --- /dev/null +++ b/docs_v1.0/doc_user/API_ENDPOINTS.html @@ -0,0 +1,3537 @@ + + + + +Api Endpoints - Momentry API Docs + + + +
+← Back to index +
+

document_type: "api_reference" +service: "MOMENTRY_CORE" +title: "Momentry Core API 端點總覽" +date: "2026-05-17" +version: "V1.4" +status: "active" +owner: "M5" +created_by: "OpenCode"

+
+

Momentry Core API 端點總覽

+ + + + + + + + + + + + + + + + + +
項目內容
目標讀者developer
預備知識需有 API Key
+
+

Base URL

+ + + + + + + + + + + + + + + + + + + + + + + + + +
EnvironmentURLPurpose
Playground (Dev)http://localhost:3003Development and testing
Productionhttp://localhost:3002Production deployment
External (M5)https://m5api.momentry.ddns.netRemote access
+

Variables

+

All examples in this documentation use these environment variables:

+
API="http://localhost:3003"
+KEY="your-api-key-here"
+
+ +

Authentication

+

All endpoints under /api/v1/* require authentication. +The following endpoints are public (no auth needed):

+
    +
  • GET /health
  • +
  • POST /api/v1/auth/login
  • +
  • POST /api/v1/auth/logout
  • +
+

Three Authentication Modes

+

The system supports three authentication methods, checked in priority order by the middleware:

+
Middleware priority:
+  1. Session Cookie (Portal/browser)
+  2. JWT Bearer (API clients: n8n, CLI)
+  3. API Key Header (legacy compatibility)
+  4. API Key Query Param (?api_key=)
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModeTransportExpiryScopeBest for
Session CookieCookie: session_id=<uuid>24hper-browser sessionPortal (browser)
JWTAuthorization: Bearer <token>1hper-login tokenAPI clients (n8n, CLI, scripts)
API KeyX-API-Key: <key>90dfixed key for automationLegacy scripts, WordPress
+
+

Login

+

Default accounts & API keys:

+ + + + + + + + + + + + + + + + + + + + + + + +
UsernamePasswordAPI KeyRole
adminadminadmin
demodemomuser_demo_key_32chars_abcdef1234567890user
+

The demo API key is set via MOMENTRY_DEMO_API_KEY env var and can be used in place of JWT for marcom integrations:

+
# Using API key instead of JWT
+curl -s "$API/api/v1/files/scan" -H "X-API-Key: muser_demo_key_32chars_abcdef1234567890"
+
+ +
# Login as admin
+curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username": "admin", "password": "admin"}'
+
+# Login as demo user
+curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username": "demo", "password": "demo"}'
+
+ +

Success Response

+
{
+  "success": true,
+  "jwt": "eyJhbGciOiJIUzI1NiIs...",
+  "api_key": "muser_...",
+  "user": {
+    "username": "admin",
+    "role": "admin"
+  },
+  "expires_at": "2026-05-18T13:00:00Z"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
jwtstringJWT access token. Use as Authorization: Bearer <jwt>. Expires in 1 hour.
api_keystringLegacy API key. Use as X-API-Key: <key>. Good for 90 days.
user.usernamestringUsername
user.rolestringRole: admin, user, or readonly
expires_atstringISO8601 timestamp of JWT expiration
+

The login endpoint also sets a Set-Cookie header for browser-based clients:

+
Set-Cookie: session_id=<uuid>; Path=/api; HttpOnly; SameSite=Strict; Max-Age=86400
+
+ +

Error Response (401)

+
{
+  "success": false,
+  "message": "Invalid username or password"
+}
+
+ +
+

Using JWT

+

JWT is preferred for API clients (n8n, CLI scripts, WordPress). It is validated by the middleware without a database lookup (stateless).

+
# Login and capture JWT
+JWT=$(curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' | python3 -c "import json,sys;print(json.load(sys.stdin)['jwt'])")
+
+# Use JWT for all subsequent requests
+curl -H "Authorization: Bearer $JWT" "$API/api/v1/files/scan"
+curl -H "Authorization: Bearer $JWT" "$API/api/v1/resource/tmdb"
+
+ +

JWT is short-lived (1 hour). When it expires, request a new one via login.

+
+

Using Session Cookie (Browser)

+

Browser-based clients (Portal) get a session cookie automatically after login. The browser sends the cookie with every request—no manual header needed.

+
# Login captures the session cookie from Set-Cookie header
+curl -v -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' 2>&1 | grep "Set-Cookie"
+
+# Browser automatically sends: Cookie: session_id=<uuid>
+# No manual header needed for subsequent requests
+
+ +

The session cookie is HttpOnly (not accessible from JavaScript) and SameSite=Strict (protected against CSRF).

+
+

Using Legacy API Key

+
curl -H "X-API-Key: $KEY" "$API/api/v1/files/scan"
+
+# Also accepted via Bearer header (non-JWT format) or query parameter:
+curl -H "Authorization: Bearer $KEY" "$API/api/v1/files/scan"
+curl "$API/api/v1/files/scan?api_key=$KEY"
+
+ +

API keys are validated via SHA256 hash lookup in the database. They are long-lived (90 days) and intended for automation.

+

Obtaining an API Key (CLI)

+
momentry api-key create "My API Key" --key-type user
+
+ +
+

Logout

+
# Logout using the session cookie (browser)
+curl -X POST "$API/api/v1/auth/logout" \
+  -H "Cookie: session_id=<uuid>"
+
+ +

What logout does

+ + + + + + + + + + + + + + + + + + + + + +
Auth modeEffect
Session CookieSession deleted from database. Same cookie returns 401 on subsequent requests.
JWTJWT remains valid until expiry. (JWT is stateless — logout adds JWT to a blacklist only if API key mode is used.)
API KeyAPI key remains valid. (Legacy keys are shared across sessions — revoking would break other clients.)
+

Example: full session lifecycle

+
# 1. Login
+SESSION_ID=$(curl -s -D - -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' | grep "Set-Cookie" | sed 's/.*session_id=\([^;]*\).*/\1/')
+
+# 2. Use session (works)
+curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → HTTP 200
+
+# 3. Logout
+curl -s -X POST "$API/api/v1/auth/logout" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → {"success": true}
+
+# 4. Use session again (rejected)
+curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → HTTP 401
+
+ +
+

Authentication Flow Summary

+
Login Request
+     │
+     ▼
+┌──────────────────┐
+│  1. Check users  │ ← users table (argon2 password verify)
+│     table        │
+└──────┬───────────┘
+       │
+   ┌───┴───┐
+   │ match │
+   └───┬───┘
+       │
+       ▼
+┌──────────────────┐
+│  2. Create JWT   │ ← 1h expiry, signed with JWT_SECRET
+├──────────────────┤
+│  3. Create       │ ← 24h expiry, stored in sessions table
+│     session      │
+├──────────────────┤
+│  4. Set-Cookie   │ ← HttpOnly, SameSite=Strict, Path=/api
+├──────────────────┤
+│  5. Return       │ ← JWT + api_key + user info to client
+└──────────────────┘
+
+ +
Protected Request
+     │
+     ▼
+┌──────────────────────┐
+│  Middleware checks:  │
+│                      │
+│  1. Cookie session?  │ → DB lookup session → get api_key → verify
+│                      │
+│  2. JWT Bearer?      │ → verify JWT signature → decode claims
+│                      │
+│  3. X-API-Key?       │ → SHA256 hash → DB lookup → verify
+│                      │
+│  4. ?api_key=?       │ → same as #3
+│                      │
+│  5. None → 401       │
+└──────────────────────┘
+
+ +
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
401Missing or invalid authentication
401Session expired or logged out
401JWT expired
401API key revoked or inactive
+
+

Related

+
    +
  • POST /api/v1/resource/tmdb/check — test authentication + TMDb API connectivity
  • +
  • GET /health/detailed — view auth status (integrations section)
  • +
+
+

Health Check

+

GET /health

+

Auth: Public +Scope: system-level

+

Returns basic server health status — used by load balancers and monitoring.

+

Example

+
curl "$API/health" | jq '{status, version}'
+
+ +

Response (200)

+
{
+  "status": "ok",
+  "version": "1.0.0",
+  "build_git_hash": "3a6c1865",
+  "build_timestamp": "2026-05-16T13:38:15Z",
+  "uptime_ms": 3015
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
statusstringok or degraded
versionstringSemver version
build_git_hashstringGit commit hash
build_timestampstringBinary build time
uptime_msintegerMilliseconds since server start
+
+

GET /health/detailed

+

Auth: Required +Scope: system-level

+

Returns full system health including each service status, resource utilization, pipeline readiness, schema migration status, identity file sync status, and external integrations.

+
+

Requires authentication (JWT, session cookie, or API key). The basic /health endpoint remains public for load balancer checks.

+
+

Example

+
curl "$API/health/detailed" | jq '{status, services, resources: {cpu: .resources.cpu_used_percent, memory: .resources.memory_used_percent}}'
+
+ +

Response (200)

+
{
+  "status": "ok",
+  "version": "1.0.0",
+  "services": {
+    "postgres": {"status": "ok", "latency_ms": 3},
+    "redis": {"status": "ok", "latency_ms": 1},
+    "qdrant": {"status": "ok", "latency_ms": 5}
+  },
+  "resources": {
+    "cpu_used_percent": 12.5,
+    "memory_available_mb": 32768,
+    "memory_used_percent": 31.7
+  },
+  "pipeline": {
+    "scripts_ready": true,
+    "scripts_count": 345,
+    "processors": {
+      "asr": true,
+      "yolo": true,
+      "face": true,
+      "pose": true,
+      "ocr": true,
+      "cut": true,
+      "scene": true,
+      "asrx": true,
+      "visual_chunk": true
+    },
+    "models_ready": true,
+    "models_count": 42,
+    "scripts_integrity": {"matched": 332, "total": 345, "ok": false},
+    "ffmpeg": true
+  },
+  "schema": {
+    "table_exists": true,
+    "applied": [{"filename": "migrate_add_users_table.sql"}],
+    "required": [],
+    "ok": true
+  },
+  "identities": {
+    "directory_exists": true,
+    "files_count": 3481,
+    "index_ok": true,
+    "db_count": 3481,
+    "synced": true
+  },
+  "integrations": {
+    "tmdb": {
+      "api_key_configured": false,
+      "enabled": false,
+      "api_reachable": null
+    }
+  }
+}
+
+ +

Response Fields

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
statusstringok if all essential services healthy
servicesobjectPer-service status (postgres, redis, qdrant)
services.*.statusstringok, error, or degraded
services.*.latency_msintResponse time in milliseconds
resourcesobjectCPU, memory usage
pipeline.scripts_readybooleanScripts directory accessible
pipeline.scripts_countintNumber of Python processor scripts
pipeline.processorsobjectPer-processor availability
pipeline.models_readybooleanModels directory accessible
pipeline.scripts_integrityobjectSHA256 checksum verification results
schema.okbooleanAll required migrations applied
identities.syncedbooleanIdentity file count matches DB count
integrations.tmdbobjectTMDB API key config and reachability
+

Health status rules

+ + + + + + + + + + + + + + + + + + + + + +
Conditionstatus
All services okok
Any service errordegraded
Postgres or Redis errordegraded (server still responds)
+
+

Stats Endpoints

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointAuthDescription
GET/api/v1/stats/ingestNoIngest pipeline statistics
GET/api/v1/stats/sftpgoNoSFTPGo service status
GET/api/v1/stats/inferenceNoInference service (LLM) health
+
curl "$API/api/v1/stats/ingest"
+
+ +
+

File Registration

+

POST /api/v1/files/register

+

Auth: Required +Scope: file-level

+

Register a video file for processing. Returns the file's metadata and UUID.

+

New in v0.1.2: Registration now automatically triggers the processing pipeline — no need to call POST /api/v1/file/:uuid/process separately. The system will: +1. Register the file and run ffprobe +2. Auto-run offline TMDb probe (reads local identity files, no API calls) +3. Create a monitor job for the worker +4. Worker starts all 10 processors (Cut → ASR → ASRX → YOLO → OCR → Face → Pose → VisualChunk → Story → 5W1H)

+

If the file already exists (same content hash), returns the existing record with already_exists: true.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_pathstringYesPath to video file on disk
patternstringNoRegex pattern for batch register (requires file_path to be a directory)
user_idintegerNoUser ID to associate with registration
content_hashstringNoPre-computed SHA-256 hash (skips computation)
+

Example

+
# Register a single file
+curl -s -X POST "$API/api/v1/files/register" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/video.mp4"}'
+
+# Batch register files matching a pattern in a directory
+curl -s -X POST "$API/api/v1/files/register" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/dir", "pattern": ".*\\.mp4$"}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "3a6c1865...",
+  "file_name": "video.mp4",
+  "file_path": "/path/to/video.mp4",
+  "file_type": "video",
+  "duration": 120.5,
+  "width": 1920,
+  "height": 1080,
+  "fps": 24.0,
+  "total_frames": 2892,
+  "already_exists": false,
+  "message": "File registered successfully"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
file_uuidstring32-char hex UUID of the registered file
file_namestringFile name (auto-renamed if name conflict)
file_pathstringCanonical path on disk
file_typestring"video", "audio", or "unknown"
durationfloatDuration in seconds
widthintegerVideo width in pixels
heightintegerVideo height in pixels
fpsfloatFrames per second
total_framesintegerTotal frame count
already_existsbooleanTrue if same content was already registered
messagestringHuman-readable status
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
401Missing or invalid API key
400Invalid request body
404File path does not exist
+
+

GET /api/v1/files/scan

+

Auth: Required +Scope: file-level

+

Scan the filesystem directory and list all media files, showing which are registered, processing, or unregistered.

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
pageintegerNo1Page number (1-based)
page_sizeintegerNoallItems per page (alias: limit)
limitintegerNoallMax items (alias for page_size)
patternstringNoRegex filter on file name (e.g., .*\\.mp4$)
sort_bystringNonameSort field: name, size, modified, status
sort_orderstringNoascSort direction: asc or desc
+

Example

+
# Full scan
+curl -s "$API/api/v1/files/scan" -H "X-API-Key: $KEY" | jq '{total, registered_count, unregistered_count}'
+
+# Paginated (page 1, 5 per page)
+curl -s "$API/api/v1/files/scan?page=1&page_size=5" -H "X-API-Key: $KEY" | jq '{page, total_pages, files: [.files[].file_name]}'
+
+# Regex filter: only mp4 files
+curl -s "$API/api/v1/files/scan?pattern=.*\\.mp4$" -H "X-API-Key: $KEY" | jq '{filtered_total, files: [.files[].file_name]}'
+
+# Sort by file size (largest first)
+curl -s "$API/api/v1/files/scan?sort_by=size&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, file_size}]'
+
+# Sort by modified time (most recent first)
+curl -s "$API/api/v1/files/scan?sort_by=modified&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, modified_time}]'
+
+# Sort by status
+curl -s "$API/api/v1/files/scan?sort_by=status&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, status}]'
+
+ +

Response (200)

+
{
+  "files": [
+    {
+      "file_name": "video.mp4",
+      "file_size": 12345678,
+      "is_registered": true,
+      "file_uuid": "3a6c1865...",
+      "status": "completed",
+      "registration_time": "2026-05-16T12:00:00Z",
+      "job_id": 42
+    }
+  ],
+  "total": 107,
+  "filtered_total": 80,
+  "page": 1,
+  "page_size": 20,
+  "total_pages": 4,
+  "registered_count": 26,
+  "unregistered_count": 81
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
filesarrayArray of file info objects (paginated)
files[].file_namestringFile name
files[].relative_pathstringPath relative to scan root
files[].file_pathstringAbsolute path on disk
files[].file_sizeintegerFile size in bytes
files[].modified_timestringLast modified timestamp (ISO8601)
files[].is_registeredbooleanWhether file is registered in DB
files[].file_uuidstring32-char hex UUID (only if registered)
files[].statusstring"completed", "processing", "registered", "unregistered", or null
files[].registration_timestringDB registration timestamp (only if registered)
files[].job_idintegerProcessing job ID (only if a job exists)
totalintegerTotal files found on disk (unfiltered)
filtered_totalintegerFiles matching regex filter
pageintegerCurrent page number
page_sizeintegerItems per page
total_pagesintegerTotal pages
registered_countintegerFiles registered in DB
unregistered_countintegerFiles not yet registered
+

Notes

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FeatureBehavior
RegexCase-insensitive ((?i) prefix auto-applied). Applied to file_name.
Sort orderDefault (sort_by=name): registered files first, then alphabetically. sort_by=status: alphabetical by status string.
Paginationpage_size and limit are aliases. Default: show all results.
Processing orderpattern regex filter → sort_by/sort_orderpage/page_size slice.
+
+

File Lookup

+

GET /api/v1/files/lookup

+

Auth: Required +Scope: file-level

+

Search registered files by file name. Performs a case-insensitive LIKE search on the file name column. Returns basic info about matching files.

+

Query Parameters

+ + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_namestringYesFile name to search for (partial matches supported)
+

Example

+
# Look up a specific file
+curl -s "$API/api/v1/files/lookup?file_name=video.mp4" \
+  -H "X-API-Key: $KEY"
+
+# Partial name search
+curl -s "$API/api/v1/files/lookup?file_name=charade" \
+  -H "X-API-Key: $KEY" | jq '.matches[].file_name'
+
+ +

Response (200)

+
{
+  "file_name": "video.mp4",
+  "exists": true,
+  "matches": [
+    {
+      "file_uuid": "a03485a40b2df2d3",
+      "file_name": "video.mp4",
+      "file_type": "video",
+      "status": "completed"
+    }
+  ],
+  "next_name": "video (2).mp4"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_namestringSearched name
existsbooleanExact name match exists
matchesarrayArray of matching registered files
matches[].file_uuidstring32-char hex UUID
matches[].file_namestringRegistered file name
matches[].file_typestring"video", "audio", or null
matches[].statusstringRegistration/processing status
next_namestringSuggested name for avoiding conflicts
+
+

Unregister

+

POST /api/v1/unregister

+

Auth: Required +Scope: file-level

+

Delete a registered file from the system. Supports single file by UUID, or batch by directory + regex pattern.

+

What gets deleted

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Removed (default)Not removed
Database records (videos, chunks, embeddings, processor_results, pre_chunks)The original source video file on disk
Processor output JSON files ({uuid}.*.json) — unless delete_output_files: falseTemp/working directories
In-memory cache entries
MongoDB cached lists
+
+

⚠️ Database deletion is irreversible. To keep output files, set "delete_output_files": false.

+
+

Request Parameters

+

At least one mode must be specified: either file_uuid alone, or file_path + pattern together.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_uuidstring*Single file UUID to delete
file_pathstring*Directory path (for batch delete)
patternstring*Regex pattern (requires file_path)
delete_output_filesbooleanNotrueIf true, also delete processor output JSON files ({uuid}.*.json). Set to false to keep them.
+

Example

+
# Delete a single file by UUID (default: also deletes output JSON files)
+curl -s -X POST "$API/api/v1/unregister" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+# Keep output JSON files, only delete DB records
+curl -s -X POST "$API/api/v1/unregister" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "delete_output_files": false}'
+
+# Batch delete all mp4 files in a directory
+curl -s -X POST "$API/api/v1/unregister" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/dir", "pattern": ".*\\.mp4$"}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "a03485a40b2df2d3",
+  "message": "Video unregistered successfully"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanTrue if deletion succeeded
file_uuidstringUUID of the deleted file (single mode)
messagestringHuman-readable status
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
400Neither file_uuid nor file_path+pattern provided
404File UUID not found
401Missing or invalid API key
+
+

Processing Pipeline

+

POST /api/v1/file/:file_uuid/process

+

Auth: Required +Scope: file-level

+

Trigger the processing pipeline for a registered file. Creates a monitor job that the worker picks up and processes sequentially. Returns immediately with the job info—processing runs asynchronously in the background.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
processorsstring[]NoallSpecific processors to run: ["asr","cut","yolo","ocr","face","pose","asrx","visual_chunk"]
rulesstring[]NoallRule names to apply (currently unused)
+

Example

+
# Run all processors
+curl -s -X POST "$API/api/v1/file/$FILE_UUID/process" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" -d '{}'
+
+# Run specific processors only
+curl -s -X POST "$API/api/v1/file/$FILE_UUID/process" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"processors": ["asr", "face", "yolo"]}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "job_id": 42,
+  "file_uuid": "3a6c1865...",
+  "status": "processing",
+  "pids": [12345, 12346],
+  "message": "Processing triggered for video.mp4"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
job_idintegerMonitor job ID (for job tracking)
file_uuidstring32-char hex UUID of the file
statusstring"processing"
pidsinteger[]Process IDs of started processors
messagestringHuman-readable status
+

Error Responses

+ + + + + + + + + + + + + + + + + +
HTTPWhen
404File UUID not found
401Missing or invalid API key
+
+

GET /api/v1/file/:file_uuid/probe

+

Auth: Required +Scope: file-level

+

Get ffprobe metadata for a registered file. Returns video/audio stream info, codec details, duration, resolution, and frame rate.

+

Example

+
curl -s "$API/api/v1/file/$FILE_UUID/probe" -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "file_uuid": "3a6c1865...",
+  "file_name": "video.mp4",
+  "file_size": 794863677,
+  "duration": 120.5,
+  "width": 1920,
+  "height": 1080,
+  "fps": 24.0,
+  "total_frames": 2892,
+  "cached": true,
+  "format": {
+    "filename": "/path/to/video.mp4",
+    "format_name": "mov,mp4,m4a,3gp",
+    "duration": "120.5",
+    "size": "12345678",
+    "bit_rate": "819200"
+  },
+  "streams": [
+    {
+      "index": 0,
+      "codec_name": "h264",
+      "codec_type": "video",
+      "width": 1920,
+      "height": 1080,
+      "r_frame_rate": "24/1",
+      "duration": "120.5"
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstring32-char hex UUID
file_namestringFile name
file_sizeintegerFile size in bytes (from filesystem)
durationfloatDuration in seconds
widthintegerVideo width in pixels
heightintegerVideo height in pixels
fpsfloatFrames per second
total_framesintegerEstimated total frames
cachedbooleanTrue if result was from cached probe JSON
formatobjectContainer format info (ffprobe format section)
streamsarrayArray of stream info objects
+
+

GET /api/v1/progress/:file_uuid

+

Auth: Required +Scope: file-level

+

Get real-time processing progress for a file. Queries Redis for per-processor status and the database for file metadata. Also includes system resource stats.

+

Example

+
curl -s "$API/api/v1/progress/$FILE_UUID" -H "X-API-Key: $KEY" | jq '{overall_progress, processors: [.processors[] | {processor_type, status}]}'
+
+ +

Response (200)

+
{
+  "file_uuid": "3a6c1865...",
+  "overall_progress": 71,
+  "cpu_percent": 45.2,
+  "gpu_percent": 30.1,
+  "memory_percent": 62.4,
+  "processors": [
+    {"processor_type": "asr", "status": "complete", "progress": 100},
+    {"processor_type": "yolo", "status": "running", "progress": 65},
+    {"processor_type": "face", "status": "pending", "progress": 0}
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstring32-char hex UUID
overall_progressintegerOverall progress percentage (0–100)
processorsarrayPer-processor status list
processors[].processor_typestringProcessor name (asr, cut, yolo, etc.)
processors[].statusstring"pending", "running", "complete", or "failed"
processors[].progressintegerPer-processor progress (0–100)
cpu_percentfloatCurrent CPU usage
gpu_percentfloatCurrent GPU utilization
memory_percentfloatCurrent memory usage
+
+

GET /api/v1/jobs

+

Auth: Required +Scope: system-level

+

List all processing jobs (monitor jobs) in the system. Shows job status, which file each job is processing, and current processor info.

+

Example

+
curl -s "$API/api/v1/jobs" -H "X-API-Key: $KEY" | jq '{count, jobs: [.jobs[] | {uuid, status}]}'
+
+ +

Response (200)

+
{
+  "jobs": [
+    {
+      "id": 42,
+      "uuid": "3a6c1865...",
+      "status": "running",
+      "current_processor": "yolo",
+      "created_at": "2026-05-16T12:00:00Z",
+      "started_at": "2026-05-16T12:01:00Z"
+    }
+  ],
+  "count": 15,
+  "page": 1,
+  "page_size": 20
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
jobsarrayArray of job info objects
jobs[].idintegerJob ID
jobs[].uuidstringFile UUID being processed
jobs[].statusstring"pending", "running", "completed", "failed"
jobs[].current_processorstringCurrently active processor, or null
countintegerTotal job count
pageintegerCurrent page number
page_sizeintegerJobs per page
+
+

Search APIs

+

Standard Search

+

POST /api/v1/search

+

Auth: Required +Scope: file-level

+

Semantic/vector search across indexed video chunks. Returns matching chunks with scores.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
querystringYesSearch text
file_uuidstringNoRestrict search to a specific file
limitintegerNo10Max results
pageintegerNo1Page number (1-based)
page_sizeintegerNolimitItems per page (alias: page_size)
modestringNosmartSearch mode: "vector" or "smart"
+

Example

+
# Search across all files
+curl -s -X POST "$API/api/v1/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "limit": 5}'
+
+# Search within a specific file
+curl -s -X POST "$API/api/v1/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "file_uuid": "'"$FILE_UUID"'"}'
+
+# Paginated search
+curl -s -X POST "$API/api/v1/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "page": 1, "page_size": 3}'
+
+ +

Response (200)

+
{
+  "results": [
+    {
+      "uuid": "3a6c1865...",
+      "chunk_id": "sentence_0012",
+      "chunk_type": "sentence",
+      "start_time": 48.8,
+      "end_time": 55.4,
+      "text": "charade is a classic film...",
+      "score": 0.92
+    }
+  ],
+  "query": "charade",
+  "total": 15,
+  "page": 1,
+  "page_size": 3
+}
+
+> Results are deduplicated by `chunk_id` (highest score wins) before pagination.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `results` | array | Array of matched chunks (deduplicated) |
+| `results[].uuid` | string | File UUID |
+| `results[].chunk_id` | string | Chunk identifier |
+| `results[].chunk_type` | string | `"sentence"`, `"cut"`, `"trace"`, `"visual"` |
+| `results[].start_time` | float | Start time in seconds |
+| `results[].end_time` | float | End time in seconds |
+| `results[].text` | string | Chunk text content |
+| `results[].score` | float | Similarity score (0.01.0) |
+| `query` | string | Original search query |
+| `total` | integer | Total matching results |
+| `page` | integer | Current page number |
+| `page_size` | integer | Items per page |
+
+#### Error Responses
+
+| HTTP | When |
+|------|------|
+| `401` | Missing or invalid API key |
+
+---
+
+### `POST /api/v1/search/hybrid`
+
+**Auth**: Required
+**Scope**: file-level
+
+Hybrid search combining vector similarity score and BM25 text score into a combined score.
+
+#### Request Parameters
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `query` | string | Yes |  | Search text |
+| `file_uuid` | string | No |  | Restrict search to a specific file |
+| `limit` | integer | No | 10 | Max results |
+| `page` | integer | No | 1 | Page number (1-based) |
+| `page_size` | integer | No | `limit` | Items per page |
+| `vector_weight` | float | No | 0.5 | Weight for vector score (0.01.0) |
+| `bm25_weight` | float | No | 0.5 | Weight for BM25 score (0.01.0) |
+
+#### Response
+
+```json
+{
+  "results": [
+    {
+      "uuid": "3a6c1865...",
+      "chunk_id": "sentence_0012",
+      "chunk_type": "sentence",
+      "start_time": 48.8,
+      "end_time": 55.4,
+      "text": "charade is a classic film...",
+      "vector_score": 0.85,
+      "bm25_score": 0.72,
+      "combined_score": 0.79
+    }
+  ],
+  "query": "charade"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
results[].vector_scorefloatVector similarity score
results[].bm25_scorefloatBM25 text score
results[].combined_scorefloatWeighted combination of both scores
+
+

POST /api/v1/search/bm25

+

Auth: Required +Scope: file-level

+

BM25 full-text keyword search. Good for exact term matching. Returns results with BM25 score only.

+

Request Parameters

+

Same as standard search: query, file_uuid, limit.

+

Response

+

Returns the same structure as standard search, with score representing the BM25 relevance score.

+
+

N8N Search

+

N8N-format search endpoints. Response format is optimized for n8n workflow consumption.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointDescription
POST/api/v1/n8n/searchN8N-format vector search
POST/api/v1/n8n/search/bm25N8N-format BM25 search
POST/api/v1/n8n/search/hybridN8N-format hybrid search
POST/api/v1/n8n/search/smartN8N-format smart search
+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
querystringYesSearch text
file_uuidstringNoRestrict to specific file
typesstring[]No["chunk"]Search types: chunk, frame, person
filtersobjectNoFilter criteria (confidence, object class, speaker, etc.)
pageintegerNo1Page number
page_sizeintegerNo20Items per page
time_rangefloat[2]NoTime range [start, end] in seconds
+

Example

+
curl -s -X POST "$API/api/v1/n8n/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "limit": 5}'
+
+ +

Response (200)

+
{
+  "query": "charade",
+  "results": [
+    {
+      "type": "chunk",
+      "chunk_id": "sentence_0012",
+      "chunk_type": "sentence",
+      "file_uuid": "3a6c1865...",
+      "start_time": 48.8,
+      "end_time": 55.4,
+      "text": "charade is a classic film...",
+      "score": 0.92
+    }
+  ],
+  "total": 15,
+  "page": 1,
+  "page_size": 20,
+  "took_ms": 42
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
resultsarrayArray of search results
results[].typestringResult type: "chunk", "frame", or "person"
results[].chunk_idstringChunk identifier
results[].chunk_typestringChunk type
results[].file_uuidstringFile UUID
results[].start_timefloatStart time in seconds
results[].end_timefloatEnd time in seconds
results[].textstringContent text
results[].scorefloatRelevance score
totalintegerTotal matching results
pageintegerCurrent page
page_sizeintegerItems per page
took_msintegerQuery execution time in milliseconds
+
+

Identity Text Search

+

Two paths for searching identities by text.

+

Path A: GET /api/v1/search/identity_text

+

Search chunk text content and return associated identities (if any).

+

Auth: Required +Scope: file-level

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID to search within
qstringYesText search query
limitintegerNoMax results (default 50)
pageintegerNo1
page_sizeintegerNolimit
+
curl -s "$API/api/v1/search/identity_text?file_uuid=$FILE_UUID&q=charade&page=1&page_size=5" \
+  -H "X-API-Key: $KEY"
+
+ +
Response
+
{
+  "success": true,
+  "total": 0,
+  "page": 1,
+  "page_size": 5,
+  "limit": 50,
+  "results": []
+}
+
+ +

Each result (IdentityTextHit):

+
{
+  "file_uuid": "3a6c1865...",
+  "chunk_id": "sentence_0012",
+  "start_time": 48.8,
+  "end_time": 55.4,
+  "text_content": "charade is a classic film...",
+  "identity_id": 42,
+  "identity_name": "Cary Grant",
+  "identity_source": "tmdb",
+  "trace_id": 10
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstringFile UUID
chunk_idstringChunk identifier
start_timefloatStart time in seconds
end_timefloatEnd time in seconds
text_contentstringChunk text content
identity_idintegerIdentity ID (null if no identity matched)
identity_namestringIdentity name (null if no identity matched)
identity_sourcestringIdentity origin. .json = established from identity.json files on disk (unified format for all sources). tmdb = from .json via TMDb enrichment probe, then matched against faces. auto = pipeline face matching result; only stranger identities are auto-created. user_defined = manual. merged = merged identities.
trace_idintegerFace trace ID (null if no trace matched)
+

Path B: POST /api/v1/identities/search

+

Search identity names and return associated face detection text.

+

Auth: Required +Scope: identity-level

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
qstringYesIdentity name search
file_uuidstringNoRestrict to specific file
limitintegerNoMax results (default 50)
+
curl -s -X POST "$API/api/v1/identities/search" \
+  -H "X-API-Key: $KEY" \
+  -d '{"q": "Cary Grant"}'
+
+ +
+

Visual Chunk Search

+

Search video frames by visual content (object detection results).

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointDescription
POST/api/v1/search/visualSearch visual chunks by criteria (object classes, density)
POST/api/v1/search/visual/classSearch by specific object class
POST/api/v1/search/visual/densitySearch by spatial density range
POST/api/v1/search/visual/statsGet visual detection statistics
POST/api/v1/search/visual/combinationSearch by object class combination
+

Request Parameters (Visual Search)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID
criteria.required_classesstring[]NoRequired object classes (e.g., ["person", "car"])
criteria.min_confidencefloatNoMinimum confidence threshold
criteria.min_spatial_densityfloatNoMinimum spatial density
criteria.max_spatial_densityfloatNoMaximum spatial density
+

Example

+
curl -s -X POST "$API/api/v1/search/visual" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "criteria": {"required_classes": ["person", "car"]}}'
+
+ +

Response

+
{
+  "chunks": [
+    {
+      "file_uuid": "3a6c1865...",
+      "chunk_id": "visual_001",
+      "chunk_type": "visual",
+      "detections": [
+        {"class": "person", "confidence": 0.95, "bbox": [100, 200, 150, 350]}
+      ],
+      "start_time": 120.5,
+      "end_time": 125.3
+    }
+  ],
+  "total": 42
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
chunksarrayArray of matching visual chunks
chunks[].file_uuidstringFile UUID
chunks[].chunk_idstringChunk identifier
chunks[].detectionsarrayObject detections in this chunk
totalintegerTotal matching chunks
+
+

Global Identities

+

GET /api/v1/identities

+

Auth: Required +Scope: identity-level

+

List all registered identities with pagination.

+

Example

+
curl -s "$API/api/v1/identities?page=1&page_size=20" -H "X-API-Key: $KEY" | jq '{count, identities: [.identities[] | {name}]}'
+
+ +
+

GET /api/v1/identity/:identity_uuid

+

Auth: Required +Scope: identity-level

+

Get detailed information for a specific identity, including metadata and TMDb references.

+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID" -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "name": "Cary Grant",
+  "identity_type": "people",
+  "source": "tmdb",
+  "status": "confirmed",
+  "tmdb_id": 112,
+  "tmdb_profile": "https://image.tmdb.org/t/p/w185/abc.jpg",
+  "metadata": {},
+  "reference_data": {},
+  "created_at": "2026-05-16T12:00:00Z",
+  "updated_at": null
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
uuidstring32-char hex identity UUID
namestringIdentity name
identity_typestring"people" or null
sourcestring.json, auto, tmdb, user_defined, or merged
statusstring"confirmed", "pending", or "inactive"
tmdb_idintegerTMDb person ID (only if source = tmdb)
tmdb_profilestringTMDb profile image URL
metadataobjectMetadata JSON (tmdb_character, cast_order, etc.)
created_atstringCreation timestamp
+
+

DELETE /api/v1/identity/:identity_uuid

+

Auth: Required +Scope: identity-level

+

Delete an identity permanently.

+
+

GET /api/v1/identity/:identity_uuid/files

+

Auth: Required +Scope: identity-level

+

Get all files where this identity appears. Returns per-file summary including face count, confidence, and appearance time range.

+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/files" -H "X-API-Key: $KEY"
+
+ +
+

GET /api/v1/identity/:identity_uuid/faces

+

Auth: Required +Scope: identity-level

+

Get all face detection records associated with this identity.

+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/faces" -H "X-API-Key: $KEY"
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstringFile UUID where face was detected
frame_numberintegerFrame number of detection
face_idstringFace ID (format: face_{frame_number})
confidencefloatDetection confidence
+
+

GET /api/v1/identity/:identity_uuid/chunks

+

Auth: Required +Scope: identity-level

+

Get all text chunks (sentences) spoken while this identity's face was on screen. Useful for finding what a person said.

+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/chunks" -H "X-API-Key: $KEY"
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstringFile UUID
chunk_idstringSentence chunk identifier
start_timefloatStart time in seconds
end_timefloatEnd time in seconds
textstringSpoken text content
+
+

POST /api/v1/identity/:identity_uuid/bind

+

Auth: Required +Scope: identity-level

+

Bind a face detection to an identity. Associates the face trace with the identity for future search and recognition.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID where face is detected
face_idstringYesFace ID (format: {frame}_{idx})
+

Example

+
curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "face_id": "1_5"}'
+
+ +
+

POST /api/v1/identity/:identity_uuid/unbind

+

Auth: Required +Scope: identity-level

+

Unbind a face detection from an identity. Removes the identity association from the face record.

+
+

GET /api/v1/identities/search

+

Auth: Required +Scope: identity-level

+

Search identities by name (ILIKE search). Returns matching identity records.

+

Example

+
curl -s "$API/api/v1/identities/search?q=Cary" -H "X-API-Key: $KEY"
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
namestringIdentity name
sourcestringIdentity source
tmdb_idintegerTMDb ID (if source = tmdb)
file_uuidstringAssociated file UUID
+
+
+

POST /api/v1/identity/upload

+

Auth: Required +Scope: identity-level

+

Upload an identity.json file to create or update an identity. Accepts the same format as the identity.json files stored on disk.

+

If an identity with the same name already exists, it will be updated with the new values.

+

Request

+

The request body is an IdentityFile object:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
identity_uuidstringYes32-char hex UUID (hyphens allowed, will be stripped)
namestringYesIdentity display name
identity_typestringNo"people" or null
sourcestringNo.json, auto, tmdb, user_defined, or merged
statusstringNo"confirmed", "pending", or "inactive"
tmdb_idintegerNoTMDb person ID
tmdb_profilestringNoTMDb profile image URL
metadataobjectNoArbitrary metadata JSON
file_bindingsarrayNoArray of { file_uuid, trace_ids, face_count } (informational)
+

Example

+
curl -s -X POST "$API/api/v1/identity/upload" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "version": 1,
+    "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+    "name": "Cary Grant",
+    "identity_type": "people",
+    "source": ".json",
+    "status": "confirmed",
+    "metadata": {},
+    "file_bindings": []
+  }'
+
+ +

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "name": "Cary Grant",
+  "message": "Identity uploaded successfully"
+}
+
+ +
+
+

POST /api/v1/identity/:identity_uuid/profile-image

+

Auth: Required +Scope: identity-level

+

Upload a profile image (JPEG or PNG) for an identity. The image is saved to {output}/identities/{uuid}/profile.{ext}.

+

Uses multipart/form-data with field name image.

+

Example

+
curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/profile-image" \
+  -H "X-API-Key: $KEY" \
+  -F "image=@/path/to/photo.jpg"
+
+ +

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "path": "/path/to/output/identities/.../profile.jpg",
+  "message": "Profile image saved: profile.jpg"
+}
+
+ +

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
400Missing image field or unsupported format
404Identity not found
415Unsupported image type (use JPEG or PNG)
+
+

GET /api/v1/identity/:identity_uuid/profile-image

+

Auth: Required +Scope: identity-level

+

Retrieve the profile image for an identity. Returns the raw image data with appropriate Content-Type header.

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/profile-image" \
+  -H "X-API-Key: $KEY" -o profile.jpg
+
+ + + + + + + + + + + + + + +
Response HeaderValue
content-typeimage/jpeg or image/png
+
+

GET /api/v1/signals/unbound

+

Auth: Required +Scope: identity-level

+

List unbound face signals — face detections that have not yet been assigned to any identity.

+

Example

+
curl -s "$API/api/v1/signals/unbound" -H "X-API-Key: $KEY"
+
+ +
+

Identity Agent

+

POST /api/v1/agents/identity/analyze

+

Auth: Required +Scope: file-level

+

Run identity matching on a processed file: matches face detection traces against known identities (TMDb, .json, auto) and creates bindings. Optionally uses LLM for enhanced analysis.

+
+

Requires the file's face processor to have completed. Will return an error if face traces are not available.

+
+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_uuidstringYesFile UUID to analyze
use_llmbooleanNofalseEnable LLM-assisted identity analysis
modelstringNoLLM model name (e.g., "gemma4")
auto_merge_thresholdfloatNoConfidence threshold (0.0–1.0) for auto-merging
+

Example

+
# Basic analysis
+curl -s -X POST "$API/api/v1/agents/identity/analyze" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+# With LLM enhancement
+curl -s -X POST "$API/api/v1/agents/identity/analyze" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "use_llm": true, "model": "gemma4"}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "3a6c1865...",
+  "identities": [
+    {
+      "identity_id": "a9a901056d6b46ff92da0c3c1a57dff4",
+      "person_ids": ["trace_10", "trace_23"],
+      "confidence": 0.87,
+      "matched": true,
+      "name": "Cary Grant",
+      "source": "tmdb",
+      "stranger": false
+    }
+  ],
+  "processing_status": {
+    "phase": "COMPLETED",
+    "progress": 100
+  }
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
file_uuidstringAnalyzed file UUID
identitiesarrayArray of matched identity results
identities[].identity_idstring32-char identity UUID
identities[].person_idsstring[]Matched trace/person IDs
identities[].confidencefloatMatching confidence (0.0–1.0)
identities[].matchedbooleanWhether this identity was matched to a known entity
identities[].namestringIdentity display name
identities[].sourcestringIdentity source (.json, auto, tmdb, etc.)
identities[].strangerbooleanWhether this is an unmatched stranger trace
processing_statusobjectIdentity agent processing progress
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
400File UUID not provided or invalid
404File not found or face processor not completed
500Analysis failed (LLM error, DB error)
+
+

POST /api/v1/agents/identity/suggest

+

Auth: Required +Scope: identity-level

+

Suggest identity merges based on face embedding similarity analysis. Returns pairs of identities that are similar enough to potentially be the same person.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_uuidstringYesFile UUID to analyze for merge suggestions
auto_merge_thresholdfloatNoConfidence threshold for auto-suggest
+
curl -s -X POST "$API/api/v1/agents/identity/suggest" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+ +
+

GET /api/v1/agents/identity/status

+

Auth: Required +Scope: system-level

+

Get the identity agent processing status for a file. Shows current phase and progress.

+
curl -s "$API/api/v1/agents/identity/status?file_uuid=$FILE_UUID" \
+  -H "X-API-Key: $KEY"
+
+ +
+

POST /api/v1/agents/suggest/merge

+

Auth: Required +Scope: identity-level

+

Execute a suggested identity merge. Combines two identities into one, consolidating their face bindings.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
source_uuidstringYesIdentity UUID to merge FROM (will be removed)
target_uuidstringYesIdentity UUID to merge INTO (will be kept)
+
curl -s -X POST "$API/api/v1/agents/suggest/merge" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"source_uuid": "uuid_to_discard", "target_uuid": "uuid_to_keep"}'
+
+ +
+

POST /api/v1/agents/suggest/clustering

+

Auth: Required +Scope: identity-level

+

Suggest face clustering results. Analyzes all face embeddings in a file and groups similar faces into candidate identity clusters.

+
curl -s -X POST "$API/api/v1/agents/suggest/clustering" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+ +
+

TMDb Enrichment

+
+

⚠️ External resource: TMDb requires internet access, violating Momentry's local-only principle. +All core processing (ASR, YOLO, Face, OCR, Pose, embeddings) runs fully offline. +TMDb enrichment is optional and gated behind TMDB_API_KEY + MOMENTRY_TMDB_PROBE_ENABLED.

+
+

Overview

+

TMDb enrichment is an optional identity enrichment step that can be run after Pipeline face detection completes. The workflow is:

+
    +
  1. Prefetch (requires internet): Download movie cast data from TMDb API → cache to {file_uuid}.tmdb.json
  2. +
  3. Probe: Read local cache → create identities for all cast members (source='tmdb') + save identity.json + download profile image to {OUTPUT}/identities/{uuid}/profile.jpg
  4. +
  5. Match: The worker automatically matches video faces against TMDb identities when MOMENTRY_TMDB_PROBE_ENABLED=true
  6. +
+

POST /api/v1/agents/tmdb/prefetch

+

Auth: Required +Scope: file-level

+

Fetch TMDb cast data for a registered file and cache it locally. This is the only step requiring internet access.

+

Request Parameters

+ + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID to enrich
+

Example

+
curl -s -X POST "$API/api/v1/agents/tmdb/prefetch" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+ +

Response (200)

+
{"success": true, "file_uuid": "...", "cache_path": "/output/...tmdb.json"}
+
+ +

POST /api/v1/file/:file_uuid/tmdb-probe

+

Auth: Required +Scope: file-level

+

Read local TMDb cache and create/update identities. Requires prefetch to have been run first.

+

Example

+
curl -s -X POST "$API/api/v1/file/$FILE_UUID/tmdb-probe" \
+  -H "X-API-Key: $KEY" | jq '{identities_created, movie_title}'
+
+ +

Response (200 — identities created)

+
{"success": true, "identities_created": 15, "movie_title": "Charade"}
+
+ +

Response (200 — no cache)

+
{"success": false, "message": "No TMDb cache found. Run tmdb-prefetch first."}
+
+ +

GET /api/v1/resource/tmdb

+

Auth: Required +Scope: system-level

+

View TMDb resource status including configuration, identity counts, and cache file count.

+

Example

+
curl -s "$API/api/v1/resource/tmdb" -H "X-API-Key: $KEY" \
+  | jq '{identities_seeded, cache_files}'
+
+ +

POST /api/v1/resource/tmdb/check

+

Auth: Required +Scope: system-level

+

Ping the TMDb API to verify connectivity and measure latency.

+

Example

+
curl -s -X POST "$API/api/v1/resource/tmdb/check" \
+  -H "X-API-Key: $KEY" | jq '.status'
+
+ +

Response

+
{
+  "api_key_configured": true,
+  "enabled": false,
+  "api_reachable": true,
+  "api_latency_ms": 120
+}
+
+ +
+

Stats & Pipeline

+

Stats Endpoints

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointAuthDescription
GET/api/v1/stats/ingestNoIngest statistics
GET/api/v1/stats/sftpgoNoSFTPGo service status
GET/api/v1/stats/inferenceNoInference service health
+

Configuration

+

POST /api/v1/config/cache

+

Auth: Required +Scope: system-level

+

Toggle the Redis cache on or off.

+

Request Parameters

+ + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
enabledbooleanYestrue to enable, false to disable
+

Example

+
curl -s -X POST "$API/api/v1/config/cache" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"enabled": false}'
+
+ +

Unmounted Routes

+

The following routes are defined in source code but are NOT currently mounted in the router:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
EndpointSource file
/api/v1/search/universaluniversal_search.rs
/api/v1/search/framesuniversal_search.rs
/api/v1/search/personsuniversal_search.rs
/api/v1/whowho.rs
/api/v1/who/candidateswho.rs
+
+

Agent Endpoints

+

Agent endpoints provide AI-powered capabilities including translation, identity analysis, and 5W1H extraction.

+

POST /api/v1/agents/translate

+

Translate text between languages using Gemma4 (llama.cpp, port 8082).

+

Request

+
{
+  "text": "Hello, welcome to Momentry Core.",
+  "target_language": "Traditional Chinese",
+  "source_language": "English"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
textstringText to translate
target_languagestringTarget language name (e.g. "Traditional Chinese", "Japanese")
source_languagestringSource language (default: "auto")
+

Response

+
{
+  "success": true,
+  "translated_text": "您好,歡迎使用 Momentry Core。",
+  "source_language_detected": "English",
+  "model_used": "google_gemma-4-26B-A4B-it-Q5_K_M.gguf"
+}
+
+ +

Supported Language Pairs (tested)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SourceTargetQuality
EnglishTraditional Chinese
EnglishJapanese
ChineseEnglish
EnglishFrench
ChineseJapanese
+

Model

+
    +
  • Model: Gemma4 26B (Q5_K_M)
  • +
  • Engine: llama.cpp at localhost:8082
  • +
  • Endpoint: /v1/chat/completions (OpenAI-compatible)
  • +
  • Temperature: 0.1
  • +
  • Max tokens: 1024
  • +
+

Errors

+ + + + + + + + + + + + + + + + + +
StatusCondition
500LLM unreachable or response parse failure
401Missing/invalid auth
+

GET /api/v1/agents/identity/status

+

Get status of the identity agent pipeline.

+

Response

+
{
+  "status": "idle",
+  "last_analysis": "2026-05-17T12:00:00Z",
+  "identities_processed": 27
+}
+
+ +

POST /api/v1/agents/suggest/clustering

+

Run face clustering to suggest new identity groupings.

+

Request

+
{
+  "file_uuid": "3abeee81d94597629ed8cb943f182e94"
+}
+
+ +

POST /api/v1/agents/suggest/merge

+

Merge two identities into one.

+

Request

+
{
+  "from_uuid": "...",
+  "into_uuid": "..."
+}
+
+ +

POST /api/v1/agents/5w1h/analyze

+

Extract 5W1H (Who, What, When, Where, Why, How) from video chunk text.

+

Request

+
{
+  "chunk_id": "chunk_42",
+  "file_uuid": "3abeee81d94597629ed8cb943f182e94"
+}
+
+ +

Response

+
{
+  "success": true,
+  "5w1h": {
+    "who": ["Cary Grant"],
+    "what": ["discussing plans"],
+    "when": ["1963"],
+    "where": ["Paris"],
+    "why": ["vacation"],
+    "how": ["in person"]
+  }
+}
+
+ +

POST /api/v1/agents/5w1h/batch

+

Batch analyze multiple chunks for 5W1h extraction.

+

Request

+
{
+  "file_uuid": "3abeee81d94597629ed8cb943f182e94",
+  "chunk_ids": ["chunk_1", "chunk_2", "chunk_3"]
+}
+
+ +

GET /api/v1/agents/5w1h/status

+

Get status of the 5W1H agent pipeline.

+
+
+ + \ No newline at end of file diff --git a/docs_v1.0/doc_user/API_ERROR_CODES.html b/docs_v1.0/doc_user/API_ERROR_CODES.html new file mode 100644 index 0000000..04ccbb3 --- /dev/null +++ b/docs_v1.0/doc_user/API_ERROR_CODES.html @@ -0,0 +1,207 @@ + + + + +Api Error Codes - Momentry API Docs + + + +
+← Back to index +
+

document_type: "api_reference" +service: "MOMENTRY_CORE" +title: "API Error Codes (API 標準錯誤碼)" +date: "2026-05-17" +version: "V1.1" +status: "active" +owner: "M5" +created_by: "OpenCode"

+
+

API Error Codes (API 標準錯誤碼)

+ + + + + + + + + + + + + + + + + +
項目內容
目標讀者developer
預備知識需有 API Key
+
+

Error Response Format

+

All API errors follow this JSON structure:

+
{
+  "success": false,
+  "error": {
+    "code": "E001_NOT_FOUND",
+    "message": "Resource not found",
+    "details": {"resource": "file_uuid", "value": "abc"}
+  }
+}
+
+ +

Error Code List

+

Generic Errors (E0xx)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CodeHTTPDescription
E001_NOT_FOUND404Resource not found (file, identity, chunk)
E002_DUPLICATE409Resource already exists
E003_VALIDATION400Request parameter validation failed
E004_UNAUTHORIZED401Invalid API key or token
E005_INTERNAL500Internal server error
+

Processor Errors (E1xx)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CodeHTTPDescription
E101_PROCESSOR_FAIL500Python script execution failed
E102_TIMEOUT504Processing timeout
E103_RESUME_FAIL500Resume failed (checkpoint not found)
E104_NO_VIDEO400Video file path not found
+

Identity Errors (E2xx)

+ + + + + + + + + + + + + + + + + + + + + + + + + +
CodeHTTPDescription
E201_FACE_NOT_FOUND404Face detection not found
E202_MERGE_CONFLICT409Identity merge conflict
E203_CANDIDATE_EMPTY404No candidates available for confirmation
+

TMDb Errors (E3xx)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CodeHTTPDescription
E301_TMDB_NO_KEY400TMDB_API_KEY environment variable not set
E302_TMDB_UNREACHABLE502TMDb API unreachable or timed out
E303_TMDB_CACHE_NOT_FOUND200No local TMDb cache; run prefetch first
E304_TMDB_PROBE_FAILED500TMDb probe execution failed
E305_TMDB_MOVIE_NOT_FOUND404No matching TMDb movie found from filename
+
+
+ + \ No newline at end of file diff --git a/docs_v1.0/doc_user/API_INDEX.html b/docs_v1.0/doc_user/API_INDEX.html new file mode 100644 index 0000000..a345701 --- /dev/null +++ b/docs_v1.0/doc_user/API_INDEX.html @@ -0,0 +1,125 @@ + + + + +Api Index - Momentry API Docs + + + +
+← Back to index +
+

document_type: "api_reference" +service: "MOMENTRY_CORE" +title: "Momentry Core API 文件總覽" +date: "2026-05-17" +version: "V1.0" +status: "active" +owner: "M5" +created_by: "OpenCode"

+
+

Momentry Core API 文件總覽

+ + + + + + + + + + + + + + + + + +
項目內容
目標讀者developer
預備知識需有 API Key
+
+

📁 文件結構

+
API_WORKSPACE/
+└── modules/
+   ├── _template.md   One-line description of what this module covers
+   ├── 01_auth.md   Authentication  login, logout, JWT, session cookie, API key
+   ├── 02_health.md   Health check endpoints
+   ├── 03_register.md   File registration  register, scan
+   ├── 04_lookup.md   File lookup by name and unregistration
+   ├── 05_process.md   Processing pipeline  trigger, probe, progress, jobs
+   ├── 06_search.md   Vector search, hybrid search, BM25, n8n, visual, identity text search
+   ├── 07_identity.md   Global identities  CRUD, detail, files, faces, bind, unbind, search
+   ├── 08_identity_agent.md   Identity agent  analyze, suggest, merge, clustering
+   ├── 08_media.md   Video streaming & frame extraction
+   ├── 09_tmdb.md   TMDb enrichment endpoints  prefetch, probe, resource, check
+   ├── 10_pipeline.md   Stats endpoints, inference health, stfpgo status
+   ├── 11_error_codes.md   Standard API error codes
+   ├── 12_agent.md   
+└── (generated files  GUIDES/)
+
+ +

快速選擇指南

+ + + + + + + + + + + + + + + + + + + + + + + + + +
需求閱讀文件
查看所有 API 端點(curl 範例版)GUIDES/API_ENDPOINTS.md
查看快速端點摘要GUIDES/API_QUICK_REFERENCE.md
執行 TMDb EnrichmentGUIDES/TMDb_User_Guide.md
查看錯誤碼GUIDES/API_ERROR_CODES.md
+

文件模組清單

+
    +
  • _template — One-line description of what this module covers
  • +
  • 01_auth — Authentication — login, logout, JWT, session cookie, API key
  • +
  • 02_health — Health check endpoints
  • +
  • 03_register — File registration — register, scan
  • +
  • 04_lookup — File lookup by name and unregistration
  • +
  • 05_process — Processing pipeline — trigger, probe, progress, jobs
  • +
  • 06_search — Vector search, hybrid search, BM25, n8n, visual, identity text search
  • +
  • 07_identity — Global identities — CRUD, detail, files, faces, bind, unbind, search
  • +
  • 08_identity_agent — Identity agent — analyze, suggest, merge, clustering
  • +
  • 08_media — Video streaming & frame extraction
  • +
  • 09_tmdb — TMDb enrichment endpoints — prefetch, probe, resource, check
  • +
  • 10_pipeline — Stats endpoints, inference health, stfpgo status
  • +
  • 11_error_codes — Standard API error codes
  • +
  • 12_agent
  • +
+
+
+ + \ No newline at end of file diff --git a/docs_v1.0/doc_user/API_QUICK_REFERENCE.html b/docs_v1.0/doc_user/API_QUICK_REFERENCE.html new file mode 100644 index 0000000..70bd218 --- /dev/null +++ b/docs_v1.0/doc_user/API_QUICK_REFERENCE.html @@ -0,0 +1,2105 @@ + + + + +Api Quick Reference - Momentry API Docs + + + +
+← Back to index +
+

document_type: "api_reference" +service: "MOMENTRY_CORE" +title: "Momentry Core API 快速查詢表" +date: "2026-05-17" +version: "V1.1" +status: "active" +owner: "M5" +created_by: "OpenCode"

+
+

Momentry Core API 快速查詢表

+

Base URL

+ + + + + + + + + + + + + + + + + + + + + + + + + +
EnvironmentURLPurpose
Playground (Dev)http://localhost:3003Development and testing
Productionhttp://localhost:3002Production deployment
External (M5)https://m5api.momentry.ddns.netRemote access
+

Variables

+
API="http://localhost:3003"
+KEY="your-api-key-here"
+
+ +

Authentication

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModeTransportExpiryScopeBest for
Session CookieCookie: session_id=<uuid>24hper-browser sessionPortal (browser)
JWTAuthorization: Bearer <token>1hper-login tokenAPI clients (n8n, CLI, scripts)
API KeyX-API-Key: <key>90dfixed key for automationLegacy scripts, WordPress
+ + + + + + + + + + + + + + + + + + + + + + + +
UsernamePasswordAPI KeyRole
adminadminadmin
demodemomuser_demo_key_32chars_abcdef1234567890user
+
# Using API key instead of JWT
+curl -s "$API/api/v1/files/scan" -H "X-API-Key: muser_demo_key_32chars_abcdef1234567890"
+
+ +
# Login as admin
+curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username": "admin", "password": "admin"}'
+
+# Login as demo user
+curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username": "demo", "password": "demo"}'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
jwtstringJWT access token. Use as Authorization: Bearer <jwt>. Expires in 1 hour.
api_keystringLegacy API key. Use as X-API-Key: <key>. Good for 90 days.
user.usernamestringUsername
user.rolestringRole: admin, user, or readonly
expires_atstringISO8601 timestamp of JWT expiration
+
# Login and capture JWT
+JWT=$(curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' | python3 -c "import json,sys;print(json.load(sys.stdin)['jwt'])")
+
+# Use JWT for all subsequent requests
+curl -H "Authorization: Bearer $JWT" "$API/api/v1/files/scan"
+curl -H "Authorization: Bearer $JWT" "$API/api/v1/resource/tmdb"
+
+ +
# Login captures the session cookie from Set-Cookie header
+curl -v -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' 2>&1 | grep "Set-Cookie"
+
+# Browser automatically sends: Cookie: session_id=<uuid>
+# No manual header needed for subsequent requests
+
+ +
curl -H "X-API-Key: $KEY" "$API/api/v1/files/scan"
+
+# Also accepted via Bearer header (non-JWT format) or query parameter:
+curl -H "Authorization: Bearer $KEY" "$API/api/v1/files/scan"
+curl "$API/api/v1/files/scan?api_key=$KEY"
+
+ +
momentry api-key create "My API Key" --key-type user
+
+ +
# Logout using the session cookie (browser)
+curl -X POST "$API/api/v1/auth/logout" \
+  -H "Cookie: session_id=<uuid>"
+
+ + + + + + + + + + + + + + + + + + + + + + +
Auth modeEffect
Session CookieSession deleted from database. Same cookie returns 401 on subsequent requests.
JWTJWT remains valid until expiry. (JWT is stateless — logout adds JWT to a blacklist only if API key mode is used.)
API KeyAPI key remains valid. (Legacy keys are shared across sessions — revoking would break other clients.)
+
# 1. Login
+SESSION_ID=$(curl -s -D - -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' | grep "Set-Cookie" | sed 's/.*session_id=\([^;]*\).*/\1/')
+
+# 2. Use session (works)
+curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → HTTP 200
+
+# 3. Logout
+curl -s -X POST "$API/api/v1/auth/logout" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → {"success": true}
+
+# 4. Use session again (rejected)
+curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → HTTP 401
+
+ +

Error Responses

+ + + + + + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
401Missing or invalid authentication
401Session expired or logged out
401JWT expired
401API key revoked or inactive
+
+
    +
  • POST /api/v1/resource/tmdb/check — test authentication + TMDb API connectivity
  • +
  • GET /health/detailed — view auth status (integrations section)
  • +
+
+

Health Check

+
curl "$API/health" | jq '{status, version}'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
statusstringok or degraded
versionstringSemver version
build_git_hashstringGit commit hash
build_timestampstringBinary build time
uptime_msintegerMilliseconds since server start
+
curl "$API/health/detailed" | jq '{status, services, resources: {cpu: .resources.cpu_used_percent, memory: .resources.memory_used_percent}}'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
statusstringok if all essential services healthy
servicesobjectPer-service status (postgres, redis, qdrant)
services.*.statusstringok, error, or degraded
services.*.latency_msintResponse time in milliseconds
resourcesobjectCPU, memory usage
pipeline.scripts_readybooleanScripts directory accessible
pipeline.scripts_countintNumber of Python processor scripts
pipeline.processorsobjectPer-processor availability
pipeline.models_readybooleanModels directory accessible
pipeline.scripts_integrityobjectSHA256 checksum verification results
schema.okbooleanAll required migrations applied
identities.syncedbooleanIdentity file count matches DB count
integrations.tmdbobjectTMDB API key config and reachability
+ + + + + + + + + + + + + + + + + + + + + +
Conditionstatus
All services okok
Any service errordegraded
Postgres or Redis errordegraded (server still responds)
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointAuthDescription
GET/api/v1/stats/ingestNoIngest pipeline statistics
GET/api/v1/stats/sftpgoNoSFTPGo service status
GET/api/v1/stats/inferenceNoInference service (LLM) health
+
curl "$API/api/v1/stats/ingest"
+
+ +
+

File Registration

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_pathstringYesPath to video file on disk
patternstringNoRegex pattern for batch register (requires file_path to be a directory)
user_idintegerNoUser ID to associate with registration
content_hashstringNoPre-computed SHA-256 hash (skips computation)
+
# Register a single file
+curl -s -X POST "$API/api/v1/files/register" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/video.mp4"}'
+
+# Batch register files matching a pattern in a directory
+curl -s -X POST "$API/api/v1/files/register" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/dir", "pattern": ".*\\.mp4$"}'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
file_uuidstring32-char hex UUID of the registered file
file_namestringFile name (auto-renamed if name conflict)
file_pathstringCanonical path on disk
file_typestring"video", "audio", or "unknown"
durationfloatDuration in seconds
widthintegerVideo width in pixels
heightintegerVideo height in pixels
fpsfloatFrames per second
total_framesintegerTotal frame count
already_existsbooleanTrue if same content was already registered
messagestringHuman-readable status
+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
401Missing or invalid API key
400Invalid request body
404File path does not exist
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
pageintegerNo1Page number (1-based)
page_sizeintegerNoallItems per page (alias: limit)
limitintegerNoallMax items (alias for page_size)
patternstringNoRegex filter on file name (e.g., .*\\.mp4$)
sort_bystringNonameSort field: name, size, modified, status
sort_orderstringNoascSort direction: asc or desc
+
# Full scan
+curl -s "$API/api/v1/files/scan" -H "X-API-Key: $KEY" | jq '{total, registered_count, unregistered_count}'
+
+# Paginated (page 1, 5 per page)
+curl -s "$API/api/v1/files/scan?page=1&page_size=5" -H "X-API-Key: $KEY" | jq '{page, total_pages, files: [.files[].file_name]}'
+
+# Regex filter: only mp4 files
+curl -s "$API/api/v1/files/scan?pattern=.*\\.mp4$" -H "X-API-Key: $KEY" | jq '{filtered_total, files: [.files[].file_name]}'
+
+# Sort by file size (largest first)
+curl -s "$API/api/v1/files/scan?sort_by=size&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, file_size}]'
+
+# Sort by modified time (most recent first)
+curl -s "$API/api/v1/files/scan?sort_by=modified&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, modified_time}]'
+
+# Sort by status
+curl -s "$API/api/v1/files/scan?sort_by=status&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, status}]'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
filesarrayArray of file info objects (paginated)
files[].file_namestringFile name
files[].relative_pathstringPath relative to scan root
files[].file_pathstringAbsolute path on disk
files[].file_sizeintegerFile size in bytes
files[].modified_timestringLast modified timestamp (ISO8601)
files[].is_registeredbooleanWhether file is registered in DB
files[].file_uuidstring32-char hex UUID (only if registered)
files[].statusstring"completed", "processing", "registered", "unregistered", or null
files[].registration_timestringDB registration timestamp (only if registered)
files[].job_idintegerProcessing job ID (only if a job exists)
totalintegerTotal files found on disk (unfiltered)
filtered_totalintegerFiles matching regex filter
pageintegerCurrent page number
page_sizeintegerItems per page
total_pagesintegerTotal pages
registered_countintegerFiles registered in DB
unregistered_countintegerFiles not yet registered
+ + + + + + + + + + + + + + + + + + + + + + + + + +
FeatureBehavior
RegexCase-insensitive ((?i) prefix auto-applied). Applied to file_name.
Sort orderDefault (sort_by=name): registered files first, then alphabetically. sort_by=status: alphabetical by status string.
Paginationpage_size and limit are aliases. Default: show all results.
Processing orderpattern regex filter → sort_by/sort_orderpage/page_size slice.
+
+

File Lookup

+ + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_namestringYesFile name to search for (partial matches supported)
+
# Look up a specific file
+curl -s "$API/api/v1/files/lookup?file_name=video.mp4" \
+  -H "X-API-Key: $KEY"
+
+# Partial name search
+curl -s "$API/api/v1/files/lookup?file_name=charade" \
+  -H "X-API-Key: $KEY" | jq '.matches[].file_name'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_namestringSearched name
existsbooleanExact name match exists
matchesarrayArray of matching registered files
matches[].file_uuidstring32-char hex UUID
matches[].file_namestringRegistered file name
matches[].file_typestring"video", "audio", or null
matches[].statusstringRegistration/processing status
next_namestringSuggested name for avoiding conflicts
+

Unregister

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Removed (default)Not removed
Database records (videos, chunks, embeddings, processor_results, pre_chunks)The original source video file on disk
Processor output JSON files ({uuid}.*.json) — unless delete_output_files: falseTemp/working directories
In-memory cache entries
MongoDB cached lists
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_uuidstring*Single file UUID to delete
file_pathstring*Directory path (for batch delete)
patternstring*Regex pattern (requires file_path)
delete_output_filesbooleanNotrueIf true, also delete processor output JSON files ({uuid}.*.json). Set to false to keep them.
+
# Delete a single file by UUID (default: also deletes output JSON files)
+curl -s -X POST "$API/api/v1/unregister" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+# Keep output JSON files, only delete DB records
+curl -s -X POST "$API/api/v1/unregister" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "delete_output_files": false}'
+
+# Batch delete all mp4 files in a directory
+curl -s -X POST "$API/api/v1/unregister" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/dir", "pattern": ".*\\.mp4$"}'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanTrue if deletion succeeded
file_uuidstringUUID of the deleted file (single mode)
messagestringHuman-readable status
+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
400Neither file_uuid nor file_path+pattern provided
404File UUID not found
401Missing or invalid API key
+
+

Processing Pipeline

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
processorsstring[]NoallSpecific processors to run: ["asr","cut","yolo","ocr","face","pose","asrx","visual_chunk"]
rulesstring[]NoallRule names to apply (currently unused)
+
# Run all processors
+curl -s -X POST "$API/api/v1/file/$FILE_UUID/process" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" -d '{}'
+
+# Run specific processors only
+curl -s -X POST "$API/api/v1/file/$FILE_UUID/process" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"processors": ["asr", "face", "yolo"]}'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
job_idintegerMonitor job ID (for job tracking)
file_uuidstring32-char hex UUID of the file
statusstring"processing"
pidsinteger[]Process IDs of started processors
messagestringHuman-readable status
+ + + + + + + + + + + + + + + + + +
HTTPWhen
404File UUID not found
401Missing or invalid API key
+
curl -s "$API/api/v1/file/$FILE_UUID/probe" -H "X-API-Key: $KEY"
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstring32-char hex UUID
file_namestringFile name
file_sizeintegerFile size in bytes (from filesystem)
durationfloatDuration in seconds
widthintegerVideo width in pixels
heightintegerVideo height in pixels
fpsfloatFrames per second
total_framesintegerEstimated total frames
cachedbooleanTrue if result was from cached probe JSON
formatobjectContainer format info (ffprobe format section)
streamsarrayArray of stream info objects
+
curl -s "$API/api/v1/progress/$FILE_UUID" -H "X-API-Key: $KEY" | jq '{overall_progress, processors: [.processors[] | {processor_type, status}]}'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstring32-char hex UUID
overall_progressintegerOverall progress percentage (0–100)
processorsarrayPer-processor status list
processors[].processor_typestringProcessor name (asr, cut, yolo, etc.)
processors[].statusstring"pending", "running", "complete", or "failed"
processors[].progressintegerPer-processor progress (0–100)
cpu_percentfloatCurrent CPU usage
gpu_percentfloatCurrent GPU utilization
memory_percentfloatCurrent memory usage
+
curl -s "$API/api/v1/jobs" -H "X-API-Key: $KEY" | jq '{count, jobs: [.jobs[] | {uuid, status}]}'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
jobsarrayArray of job info objects
jobs[].idintegerJob ID
jobs[].uuidstringFile UUID being processed
jobs[].statusstring"pending", "running", "completed", "failed"
jobs[].current_processorstringCurrently active processor, or null
countintegerTotal job count
pageintegerCurrent page number
page_sizeintegerJobs per page
+
+

Search APIs

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
querystringYesSearch text
file_uuidstringNoRestrict search to a specific file
limitintegerNo10Max results
pageintegerNo1Page number (1-based)
page_sizeintegerNolimitItems per page (alias: page_size)
modestringNosmartSearch mode: "vector" or "smart"
+
# Search across all files
+curl -s -X POST "$API/api/v1/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "limit": 5}'
+
+# Search within a specific file
+curl -s -X POST "$API/api/v1/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "file_uuid": "'"$FILE_UUID"'"}'
+
+# Paginated search
+curl -s -X POST "$API/api/v1/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "page": 1, "page_size": 3}'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
resultsarrayArray of matched chunks (deduplicated)
results[].uuidstringFile UUID
results[].chunk_idstringChunk identifier
results[].chunk_typestring"sentence", "cut", "trace", "visual"
results[].start_timefloatStart time in seconds
results[].end_timefloatEnd time in seconds
results[].textstringChunk text content
results[].scorefloatSimilarity score (0.0–1.0)
querystringOriginal search query
totalintegerTotal matching results
pageintegerCurrent page number
page_sizeintegerItems per page
+ + + + + + + + + + + + + +
HTTPWhen
401Missing or invalid API key
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
querystringYesSearch text
file_uuidstringNoRestrict search to a specific file
limitintegerNo10Max results
pageintegerNo1Page number (1-based)
page_sizeintegerNolimitItems per page
vector_weightfloatNo0.5Weight for vector score (0.0–1.0)
bm25_weightfloatNo0.5Weight for BM25 score (0.0–1.0)
+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
results[].vector_scorefloatVector similarity score
results[].bm25_scorefloatBM25 text score
results[].combined_scorefloatWeighted combination of both scores
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointDescription
POST/api/v1/n8n/searchN8N-format vector search
POST/api/v1/n8n/search/bm25N8N-format BM25 search
POST/api/v1/n8n/search/hybridN8N-format hybrid search
POST/api/v1/n8n/search/smartN8N-format smart search
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
querystringYesSearch text
file_uuidstringNoRestrict to specific file
typesstring[]No["chunk"]Search types: chunk, frame, person
filtersobjectNoFilter criteria (confidence, object class, speaker, etc.)
pageintegerNo1Page number
page_sizeintegerNo20Items per page
time_rangefloat[2]NoTime range [start, end] in seconds
+
curl -s -X POST "$API/api/v1/n8n/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "limit": 5}'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
resultsarrayArray of search results
results[].typestringResult type: "chunk", "frame", or "person"
results[].chunk_idstringChunk identifier
results[].chunk_typestringChunk type
results[].file_uuidstringFile UUID
results[].start_timefloatStart time in seconds
results[].end_timefloatEnd time in seconds
results[].textstringContent text
results[].scorefloatRelevance score
totalintegerTotal matching results
pageintegerCurrent page
page_sizeintegerItems per page
took_msintegerQuery execution time in milliseconds
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID to search within
qstringYesText search query
limitintegerNoMax results (default 50)
pageintegerNo1
page_sizeintegerNolimit
+
curl -s "$API/api/v1/search/identity_text?file_uuid=$FILE_UUID&q=charade&page=1&page_size=5" \
+  -H "X-API-Key: $KEY"
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstringFile UUID
chunk_idstringChunk identifier
start_timefloatStart time in seconds
end_timefloatEnd time in seconds
text_contentstringChunk text content
identity_idintegerIdentity ID (null if no identity matched)
identity_namestringIdentity name (null if no identity matched)
identity_sourcestringIdentity origin. .json = established from identity.json files on disk (unified format for all sources). tmdb = from .json via TMDb enrichment probe, then matched against faces. auto = pipeline face matching result; only stranger identities are auto-created. user_defined = manual. merged = merged identities.
trace_idintegerFace trace ID (null if no trace matched)
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
qstringYesIdentity name search
file_uuidstringNoRestrict to specific file
limitintegerNoMax results (default 50)
+
curl -s -X POST "$API/api/v1/identities/search" \
+  -H "X-API-Key: $KEY" \
+  -d '{"q": "Cary Grant"}'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointDescription
POST/api/v1/search/visualSearch visual chunks by criteria (object classes, density)
POST/api/v1/search/visual/classSearch by specific object class
POST/api/v1/search/visual/densitySearch by spatial density range
POST/api/v1/search/visual/statsGet visual detection statistics
POST/api/v1/search/visual/combinationSearch by object class combination
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID
criteria.required_classesstring[]NoRequired object classes (e.g., ["person", "car"])
criteria.min_confidencefloatNoMinimum confidence threshold
criteria.min_spatial_densityfloatNoMinimum spatial density
criteria.max_spatial_densityfloatNoMaximum spatial density
+
curl -s -X POST "$API/api/v1/search/visual" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "criteria": {"required_classes": ["person", "car"]}}'
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
chunksarrayArray of matching visual chunks
chunks[].file_uuidstringFile UUID
chunks[].chunk_idstringChunk identifier
chunks[].detectionsarrayObject detections in this chunk
totalintegerTotal matching chunks
+
+

TMDb Enrichment

+ + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID to enrich
+
curl -s -X POST "$API/api/v1/agents/tmdb/prefetch" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+ +
curl -s -X POST "$API/api/v1/file/$FILE_UUID/tmdb-probe" \
+  -H "X-API-Key: $KEY" | jq '{identities_created, movie_title}'
+
+ +
curl -s "$API/api/v1/resource/tmdb" -H "X-API-Key: $KEY" \
+  | jq '{identities_seeded, cache_files}'
+
+ +
curl -s -X POST "$API/api/v1/resource/tmdb/check" \
+  -H "X-API-Key: $KEY" | jq '.status'
+
+ +
+

POST /api/v1/agents/translate

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
textstringText to translate
target_languagestringTarget language name (e.g. "Traditional Chinese", "Japanese")
source_languagestringSource language (default: "auto")
+

Response

+

```json +| Source | Target | Quality | +|--------|--------|---------| +| English | Traditional Chinese | ✅ | +| English | Japanese | ✅ | +| Chinese | English | ✅ | +| English | French | ✅ | +| Chinese | Japanese | ✅ |

+

Errors

+ + + + + + + + + + + + + + + + + +
StatusCondition
500LLM unreachable or response parse failure
401Missing/invalid auth
+

GET /api/v1/agents/identity/status

+

Get status of the identity agent pipeline.

+

Response

+

```json

+

POST /api/v1/agents/suggest/clustering

+

POST /api/v1/agents/suggest/merge

+

POST /api/v1/agents/5w1h/analyze

+

Response

+

```json

+

POST /api/v1/agents/5w1h/batch

+

GET /api/v1/agents/5w1h/status

+
+
+ + \ No newline at end of file diff --git a/docs_v1.0/doc_user/API_REFERENCE.html b/docs_v1.0/doc_user/API_REFERENCE.html new file mode 100644 index 0000000..50d2d1d --- /dev/null +++ b/docs_v1.0/doc_user/API_REFERENCE.html @@ -0,0 +1,3684 @@ + + + + +Api Reference - Momentry API Docs + + + +
+← Back to index +
+

document_type: "api_reference" +service: "MOMENTRY_CORE" +title: "Momentry Core API Reference" +date: "2026-05-17" +version: "V1.1" +status: "active" +owner: "M5" +created_by: "OpenCode"

+
+

Momentry Core API Reference

+ + + + + + + + + + + + + + + + + +
項目內容
目標讀者developer
預備知識需有 API Key
+
+

Base URL

+ + + + + + + + + + + + + + + + + + + + + + + + + +
EnvironmentURLPurpose
Playground (Dev)http://localhost:3003Development and testing
Productionhttp://localhost:3002Production deployment
External (M5)https://m5api.momentry.ddns.netRemote access
+

Variables

+

All examples in this documentation use these environment variables:

+
API="http://localhost:3003"
+KEY="your-api-key-here"
+
+ +

Authentication

+

All endpoints under /api/v1/* require authentication. +The following endpoints are public (no auth needed):

+
    +
  • GET /health
  • +
  • POST /api/v1/auth/login
  • +
  • POST /api/v1/auth/logout
  • +
+

Three Authentication Modes

+

The system supports three authentication methods, checked in priority order by the middleware:

+
Middleware priority:
+  1. Session Cookie (Portal/browser)
+  2. JWT Bearer (API clients: n8n, CLI)
+  3. API Key Header (legacy compatibility)
+  4. API Key Query Param (?api_key=)
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModeTransportExpiryScopeBest for
Session CookieCookie: session_id=<uuid>24hper-browser sessionPortal (browser)
JWTAuthorization: Bearer <token>1hper-login tokenAPI clients (n8n, CLI, scripts)
API KeyX-API-Key: <key>90dfixed key for automationLegacy scripts, WordPress
+
+

Login

+

Default accounts & API keys:

+ + + + + + + + + + + + + + + + + + + + + + + +
UsernamePasswordAPI KeyRole
adminadminadmin
demodemomuser_demo_key_32chars_abcdef1234567890user
+

The demo API key is set via MOMENTRY_DEMO_API_KEY env var and can be used in place of JWT for marcom integrations:

+
# Using API key instead of JWT
+curl -s "$API/api/v1/files/scan" -H "X-API-Key: muser_demo_key_32chars_abcdef1234567890"
+
+ +
# Login as admin
+curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username": "admin", "password": "admin"}'
+
+# Login as demo user
+curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username": "demo", "password": "demo"}'
+
+ +

Success Response

+
{
+  "success": true,
+  "jwt": "eyJhbGciOiJIUzI1NiIs...",
+  "api_key": "muser_...",
+  "user": {
+    "username": "admin",
+    "role": "admin"
+  },
+  "expires_at": "2026-05-18T13:00:00Z"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
jwtstringJWT access token. Use as Authorization: Bearer <jwt>. Expires in 1 hour.
api_keystringLegacy API key. Use as X-API-Key: <key>. Good for 90 days.
user.usernamestringUsername
user.rolestringRole: admin, user, or readonly
expires_atstringISO8601 timestamp of JWT expiration
+

The login endpoint also sets a Set-Cookie header for browser-based clients:

+
Set-Cookie: session_id=<uuid>; Path=/api; HttpOnly; SameSite=Strict; Max-Age=86400
+
+ +

Error Response (401)

+
{
+  "success": false,
+  "message": "Invalid username or password"
+}
+
+ +
+

Using JWT

+

JWT is preferred for API clients (n8n, CLI scripts, WordPress). It is validated by the middleware without a database lookup (stateless).

+
# Login and capture JWT
+JWT=$(curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' | python3 -c "import json,sys;print(json.load(sys.stdin)['jwt'])")
+
+# Use JWT for all subsequent requests
+curl -H "Authorization: Bearer $JWT" "$API/api/v1/files/scan"
+curl -H "Authorization: Bearer $JWT" "$API/api/v1/resource/tmdb"
+
+ +

JWT is short-lived (1 hour). When it expires, request a new one via login.

+
+

Using Session Cookie (Browser)

+

Browser-based clients (Portal) get a session cookie automatically after login. The browser sends the cookie with every request—no manual header needed.

+
# Login captures the session cookie from Set-Cookie header
+curl -v -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' 2>&1 | grep "Set-Cookie"
+
+# Browser automatically sends: Cookie: session_id=<uuid>
+# No manual header needed for subsequent requests
+
+ +

The session cookie is HttpOnly (not accessible from JavaScript) and SameSite=Strict (protected against CSRF).

+
+

Using Legacy API Key

+
curl -H "X-API-Key: $KEY" "$API/api/v1/files/scan"
+
+# Also accepted via Bearer header (non-JWT format) or query parameter:
+curl -H "Authorization: Bearer $KEY" "$API/api/v1/files/scan"
+curl "$API/api/v1/files/scan?api_key=$KEY"
+
+ +

API keys are validated via SHA256 hash lookup in the database. They are long-lived (90 days) and intended for automation.

+

Obtaining an API Key (CLI)

+
momentry api-key create "My API Key" --key-type user
+
+ +
+

Logout

+
# Logout using the session cookie (browser)
+curl -X POST "$API/api/v1/auth/logout" \
+  -H "Cookie: session_id=<uuid>"
+
+ +

What logout does

+ + + + + + + + + + + + + + + + + + + + + +
Auth modeEffect
Session CookieSession deleted from database. Same cookie returns 401 on subsequent requests.
JWTJWT remains valid until expiry. (JWT is stateless — logout adds JWT to a blacklist only if API key mode is used.)
API KeyAPI key remains valid. (Legacy keys are shared across sessions — revoking would break other clients.)
+

Example: full session lifecycle

+
# 1. Login
+SESSION_ID=$(curl -s -D - -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' | grep "Set-Cookie" | sed 's/.*session_id=\([^;]*\).*/\1/')
+
+# 2. Use session (works)
+curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → HTTP 200
+
+# 3. Logout
+curl -s -X POST "$API/api/v1/auth/logout" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → {"success": true}
+
+# 4. Use session again (rejected)
+curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → HTTP 401
+
+ +
+

Authentication Flow Summary

+
Login Request
+     │
+     ▼
+┌──────────────────┐
+│  1. Check users  │ ← users table (argon2 password verify)
+│     table        │
+└──────┬───────────┘
+       │
+   ┌───┴───┐
+   │ match │
+   └───┬───┘
+       │
+       ▼
+┌──────────────────┐
+│  2. Create JWT   │ ← 1h expiry, signed with JWT_SECRET
+├──────────────────┤
+│  3. Create       │ ← 24h expiry, stored in sessions table
+│     session      │
+├──────────────────┤
+│  4. Set-Cookie   │ ← HttpOnly, SameSite=Strict, Path=/api
+├──────────────────┤
+│  5. Return       │ ← JWT + api_key + user info to client
+└──────────────────┘
+
+ +
Protected Request
+     │
+     ▼
+┌──────────────────────┐
+│  Middleware checks:  │
+│                      │
+│  1. Cookie session?  │ → DB lookup session → get api_key → verify
+│                      │
+│  2. JWT Bearer?      │ → verify JWT signature → decode claims
+│                      │
+│  3. X-API-Key?       │ → SHA256 hash → DB lookup → verify
+│                      │
+│  4. ?api_key=?       │ → same as #3
+│                      │
+│  5. None → 401       │
+└──────────────────────┘
+
+ +
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
401Missing or invalid authentication
401Session expired or logged out
401JWT expired
401API key revoked or inactive
+
+

Related

+
    +
  • POST /api/v1/resource/tmdb/check — test authentication + TMDb API connectivity
  • +
  • GET /health/detailed — view auth status (integrations section)
  • +
+
+

Health Check

+

GET /health

+

Auth: Public +Scope: system-level

+

Returns basic server health status — used by load balancers and monitoring.

+

Example

+
curl "$API/health" | jq '{status, version}'
+
+ +

Response (200)

+
{
+  "status": "ok",
+  "version": "1.0.0",
+  "build_git_hash": "3a6c1865",
+  "build_timestamp": "2026-05-16T13:38:15Z",
+  "uptime_ms": 3015
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
statusstringok or degraded
versionstringSemver version
build_git_hashstringGit commit hash
build_timestampstringBinary build time
uptime_msintegerMilliseconds since server start
+
+

GET /health/detailed

+

Auth: Required +Scope: system-level

+

Returns full system health including each service status, resource utilization, pipeline readiness, schema migration status, identity file sync status, and external integrations.

+
+

Requires authentication (JWT, session cookie, or API key). The basic /health endpoint remains public for load balancer checks.

+
+

Example

+
curl "$API/health/detailed" | jq '{status, services, resources: {cpu: .resources.cpu_used_percent, memory: .resources.memory_used_percent}}'
+
+ +

Response (200)

+
{
+  "status": "ok",
+  "version": "1.0.0",
+  "services": {
+    "postgres": {"status": "ok", "latency_ms": 3},
+    "redis": {"status": "ok", "latency_ms": 1},
+    "qdrant": {"status": "ok", "latency_ms": 5}
+  },
+  "resources": {
+    "cpu_used_percent": 12.5,
+    "memory_available_mb": 32768,
+    "memory_used_percent": 31.7
+  },
+  "pipeline": {
+    "scripts_ready": true,
+    "scripts_count": 345,
+    "processors": {
+      "asr": true,
+      "yolo": true,
+      "face": true,
+      "pose": true,
+      "ocr": true,
+      "cut": true,
+      "scene": true,
+      "asrx": true,
+      "visual_chunk": true
+    },
+    "models_ready": true,
+    "models_count": 42,
+    "scripts_integrity": {"matched": 332, "total": 345, "ok": false},
+    "ffmpeg": true
+  },
+  "schema": {
+    "table_exists": true,
+    "applied": [{"filename": "migrate_add_users_table.sql"}],
+    "required": [],
+    "ok": true
+  },
+  "identities": {
+    "directory_exists": true,
+    "files_count": 3481,
+    "index_ok": true,
+    "db_count": 3481,
+    "synced": true
+  },
+  "integrations": {
+    "tmdb": {
+      "api_key_configured": false,
+      "enabled": false,
+      "api_reachable": null
+    }
+  }
+}
+
+ +

Response Fields

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
statusstringok if all essential services healthy
servicesobjectPer-service status (postgres, redis, qdrant)
services.*.statusstringok, error, or degraded
services.*.latency_msintResponse time in milliseconds
resourcesobjectCPU, memory usage
pipeline.scripts_readybooleanScripts directory accessible
pipeline.scripts_countintNumber of Python processor scripts
pipeline.processorsobjectPer-processor availability
pipeline.models_readybooleanModels directory accessible
pipeline.scripts_integrityobjectSHA256 checksum verification results
schema.okbooleanAll required migrations applied
identities.syncedbooleanIdentity file count matches DB count
integrations.tmdbobjectTMDB API key config and reachability
+

Health status rules

+ + + + + + + + + + + + + + + + + + + + + +
Conditionstatus
All services okok
Any service errordegraded
Postgres or Redis errordegraded (server still responds)
+
+

Stats Endpoints

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointAuthDescription
GET/api/v1/stats/ingestNoIngest pipeline statistics
GET/api/v1/stats/sftpgoNoSFTPGo service status
GET/api/v1/stats/inferenceNoInference service (LLM) health
+
curl "$API/api/v1/stats/ingest"
+
+ +
+

File Registration

+

POST /api/v1/files/register

+

Auth: Required +Scope: file-level

+

Register a video file for processing. Returns the file's metadata and UUID.

+

New in v0.1.2: Registration now automatically triggers the processing pipeline — no need to call POST /api/v1/file/:uuid/process separately. The system will: +1. Register the file and run ffprobe +2. Auto-run offline TMDb probe (reads local identity files, no API calls) +3. Create a monitor job for the worker +4. Worker starts all 10 processors (Cut → ASR → ASRX → YOLO → OCR → Face → Pose → VisualChunk → Story → 5W1H)

+

If the file already exists (same content hash), returns the existing record with already_exists: true.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_pathstringYesPath to video file on disk
patternstringNoRegex pattern for batch register (requires file_path to be a directory)
user_idintegerNoUser ID to associate with registration
content_hashstringNoPre-computed SHA-256 hash (skips computation)
+

Example

+
# Register a single file
+curl -s -X POST "$API/api/v1/files/register" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/video.mp4"}'
+
+# Batch register files matching a pattern in a directory
+curl -s -X POST "$API/api/v1/files/register" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/dir", "pattern": ".*\\.mp4$"}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "3a6c1865...",
+  "file_name": "video.mp4",
+  "file_path": "/path/to/video.mp4",
+  "file_type": "video",
+  "duration": 120.5,
+  "width": 1920,
+  "height": 1080,
+  "fps": 24.0,
+  "total_frames": 2892,
+  "already_exists": false,
+  "message": "File registered successfully"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
file_uuidstring32-char hex UUID of the registered file
file_namestringFile name (auto-renamed if name conflict)
file_pathstringCanonical path on disk
file_typestring"video", "audio", or "unknown"
durationfloatDuration in seconds
widthintegerVideo width in pixels
heightintegerVideo height in pixels
fpsfloatFrames per second
total_framesintegerTotal frame count
already_existsbooleanTrue if same content was already registered
messagestringHuman-readable status
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
401Missing or invalid API key
400Invalid request body
404File path does not exist
+
+

GET /api/v1/files/scan

+

Auth: Required +Scope: file-level

+

Scan the filesystem directory and list all media files, showing which are registered, processing, or unregistered.

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
pageintegerNo1Page number (1-based)
page_sizeintegerNoallItems per page (alias: limit)
limitintegerNoallMax items (alias for page_size)
patternstringNoRegex filter on file name (e.g., .*\\.mp4$)
sort_bystringNonameSort field: name, size, modified, status
sort_orderstringNoascSort direction: asc or desc
+

Example

+
# Full scan
+curl -s "$API/api/v1/files/scan" -H "X-API-Key: $KEY" | jq '{total, registered_count, unregistered_count}'
+
+# Paginated (page 1, 5 per page)
+curl -s "$API/api/v1/files/scan?page=1&page_size=5" -H "X-API-Key: $KEY" | jq '{page, total_pages, files: [.files[].file_name]}'
+
+# Regex filter: only mp4 files
+curl -s "$API/api/v1/files/scan?pattern=.*\\.mp4$" -H "X-API-Key: $KEY" | jq '{filtered_total, files: [.files[].file_name]}'
+
+# Sort by file size (largest first)
+curl -s "$API/api/v1/files/scan?sort_by=size&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, file_size}]'
+
+# Sort by modified time (most recent first)
+curl -s "$API/api/v1/files/scan?sort_by=modified&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, modified_time}]'
+
+# Sort by status
+curl -s "$API/api/v1/files/scan?sort_by=status&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, status}]'
+
+ +

Response (200)

+
{
+  "files": [
+    {
+      "file_name": "video.mp4",
+      "file_size": 12345678,
+      "is_registered": true,
+      "file_uuid": "3a6c1865...",
+      "status": "completed",
+      "registration_time": "2026-05-16T12:00:00Z",
+      "job_id": 42
+    }
+  ],
+  "total": 107,
+  "filtered_total": 80,
+  "page": 1,
+  "page_size": 20,
+  "total_pages": 4,
+  "registered_count": 26,
+  "unregistered_count": 81
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
filesarrayArray of file info objects (paginated)
files[].file_namestringFile name
files[].relative_pathstringPath relative to scan root
files[].file_pathstringAbsolute path on disk
files[].file_sizeintegerFile size in bytes
files[].modified_timestringLast modified timestamp (ISO8601)
files[].is_registeredbooleanWhether file is registered in DB
files[].file_uuidstring32-char hex UUID (only if registered)
files[].statusstring"completed", "processing", "registered", "unregistered", or null
files[].registration_timestringDB registration timestamp (only if registered)
files[].job_idintegerProcessing job ID (only if a job exists)
totalintegerTotal files found on disk (unfiltered)
filtered_totalintegerFiles matching regex filter
pageintegerCurrent page number
page_sizeintegerItems per page
total_pagesintegerTotal pages
registered_countintegerFiles registered in DB
unregistered_countintegerFiles not yet registered
+

Notes

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FeatureBehavior
RegexCase-insensitive ((?i) prefix auto-applied). Applied to file_name.
Sort orderDefault (sort_by=name): registered files first, then alphabetically. sort_by=status: alphabetical by status string.
Paginationpage_size and limit are aliases. Default: show all results.
Processing orderpattern regex filter → sort_by/sort_orderpage/page_size slice.
+
+

File Lookup

+

GET /api/v1/files/lookup

+

Auth: Required +Scope: file-level

+

Search registered files by file name. Performs a case-insensitive LIKE search on the file name column. Returns basic info about matching files.

+

Query Parameters

+ + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_namestringYesFile name to search for (partial matches supported)
+

Example

+
# Look up a specific file
+curl -s "$API/api/v1/files/lookup?file_name=video.mp4" \
+  -H "X-API-Key: $KEY"
+
+# Partial name search
+curl -s "$API/api/v1/files/lookup?file_name=charade" \
+  -H "X-API-Key: $KEY" | jq '.matches[].file_name'
+
+ +

Response (200)

+
{
+  "file_name": "video.mp4",
+  "exists": true,
+  "matches": [
+    {
+      "file_uuid": "a03485a40b2df2d3",
+      "file_name": "video.mp4",
+      "file_type": "video",
+      "status": "completed"
+    }
+  ],
+  "next_name": "video (2).mp4"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_namestringSearched name
existsbooleanExact name match exists
matchesarrayArray of matching registered files
matches[].file_uuidstring32-char hex UUID
matches[].file_namestringRegistered file name
matches[].file_typestring"video", "audio", or null
matches[].statusstringRegistration/processing status
next_namestringSuggested name for avoiding conflicts
+
+

Unregister

+

POST /api/v1/unregister

+

Auth: Required +Scope: file-level

+

Delete a registered file from the system. Supports single file by UUID, or batch by directory + regex pattern.

+

What gets deleted

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Removed (default)Not removed
Database records (videos, chunks, embeddings, processor_results, pre_chunks)The original source video file on disk
Processor output JSON files ({uuid}.*.json) — unless delete_output_files: falseTemp/working directories
In-memory cache entries
MongoDB cached lists
+
+

⚠️ Database deletion is irreversible. To keep output files, set "delete_output_files": false.

+
+

Request Parameters

+

At least one mode must be specified: either file_uuid alone, or file_path + pattern together.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_uuidstring*Single file UUID to delete
file_pathstring*Directory path (for batch delete)
patternstring*Regex pattern (requires file_path)
delete_output_filesbooleanNotrueIf true, also delete processor output JSON files ({uuid}.*.json). Set to false to keep them.
+

Example

+
# Delete a single file by UUID (default: also deletes output JSON files)
+curl -s -X POST "$API/api/v1/unregister" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+# Keep output JSON files, only delete DB records
+curl -s -X POST "$API/api/v1/unregister" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "delete_output_files": false}'
+
+# Batch delete all mp4 files in a directory
+curl -s -X POST "$API/api/v1/unregister" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/dir", "pattern": ".*\\.mp4$"}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "a03485a40b2df2d3",
+  "message": "Video unregistered successfully"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanTrue if deletion succeeded
file_uuidstringUUID of the deleted file (single mode)
messagestringHuman-readable status
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
400Neither file_uuid nor file_path+pattern provided
404File UUID not found
401Missing or invalid API key
+
+

Processing Pipeline

+

POST /api/v1/file/:file_uuid/process

+

Auth: Required +Scope: file-level

+

Trigger the processing pipeline for a registered file. Creates a monitor job that the worker picks up and processes sequentially. Returns immediately with the job info—processing runs asynchronously in the background.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
processorsstring[]NoallSpecific processors to run: ["asr","cut","yolo","ocr","face","pose","asrx","visual_chunk"]
rulesstring[]NoallRule names to apply (currently unused)
+

Example

+
# Run all processors
+curl -s -X POST "$API/api/v1/file/$FILE_UUID/process" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" -d '{}'
+
+# Run specific processors only
+curl -s -X POST "$API/api/v1/file/$FILE_UUID/process" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"processors": ["asr", "face", "yolo"]}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "job_id": 42,
+  "file_uuid": "3a6c1865...",
+  "status": "processing",
+  "pids": [12345, 12346],
+  "message": "Processing triggered for video.mp4"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
job_idintegerMonitor job ID (for job tracking)
file_uuidstring32-char hex UUID of the file
statusstring"processing"
pidsinteger[]Process IDs of started processors
messagestringHuman-readable status
+

Error Responses

+ + + + + + + + + + + + + + + + + +
HTTPWhen
404File UUID not found
401Missing or invalid API key
+
+

GET /api/v1/file/:file_uuid/probe

+

Auth: Required +Scope: file-level

+

Get ffprobe metadata for a registered file. Returns video/audio stream info, codec details, duration, resolution, and frame rate.

+

Example

+
curl -s "$API/api/v1/file/$FILE_UUID/probe" -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "file_uuid": "3a6c1865...",
+  "file_name": "video.mp4",
+  "file_size": 794863677,
+  "duration": 120.5,
+  "width": 1920,
+  "height": 1080,
+  "fps": 24.0,
+  "total_frames": 2892,
+  "cached": true,
+  "format": {
+    "filename": "/path/to/video.mp4",
+    "format_name": "mov,mp4,m4a,3gp",
+    "duration": "120.5",
+    "size": "12345678",
+    "bit_rate": "819200"
+  },
+  "streams": [
+    {
+      "index": 0,
+      "codec_name": "h264",
+      "codec_type": "video",
+      "width": 1920,
+      "height": 1080,
+      "r_frame_rate": "24/1",
+      "duration": "120.5"
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstring32-char hex UUID
file_namestringFile name
file_sizeintegerFile size in bytes (from filesystem)
durationfloatDuration in seconds
widthintegerVideo width in pixels
heightintegerVideo height in pixels
fpsfloatFrames per second
total_framesintegerEstimated total frames
cachedbooleanTrue if result was from cached probe JSON
formatobjectContainer format info (ffprobe format section)
streamsarrayArray of stream info objects
+
+

GET /api/v1/progress/:file_uuid

+

Auth: Required +Scope: file-level

+

Get real-time processing progress for a file. Queries Redis for per-processor status and the database for file metadata. Also includes system resource stats.

+

Example

+
curl -s "$API/api/v1/progress/$FILE_UUID" -H "X-API-Key: $KEY" | jq '{overall_progress, processors: [.processors[] | {processor_type, status}]}'
+
+ +

Response (200)

+
{
+  "file_uuid": "3a6c1865...",
+  "overall_progress": 71,
+  "cpu_percent": 45.2,
+  "gpu_percent": 30.1,
+  "memory_percent": 62.4,
+  "processors": [
+    {"processor_type": "asr", "status": "complete", "progress": 100},
+    {"processor_type": "yolo", "status": "running", "progress": 65},
+    {"processor_type": "face", "status": "pending", "progress": 0}
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstring32-char hex UUID
overall_progressintegerOverall progress percentage (0–100)
processorsarrayPer-processor status list
processors[].processor_typestringProcessor name (asr, cut, yolo, etc.)
processors[].statusstring"pending", "running", "complete", or "failed"
processors[].progressintegerPer-processor progress (0–100)
cpu_percentfloatCurrent CPU usage
gpu_percentfloatCurrent GPU utilization
memory_percentfloatCurrent memory usage
+
+

GET /api/v1/jobs

+

Auth: Required +Scope: system-level

+

List all processing jobs (monitor jobs) in the system. Shows job status, which file each job is processing, and current processor info.

+

Example

+
curl -s "$API/api/v1/jobs" -H "X-API-Key: $KEY" | jq '{count, jobs: [.jobs[] | {uuid, status}]}'
+
+ +

Response (200)

+
{
+  "jobs": [
+    {
+      "id": 42,
+      "uuid": "3a6c1865...",
+      "status": "running",
+      "current_processor": "yolo",
+      "created_at": "2026-05-16T12:00:00Z",
+      "started_at": "2026-05-16T12:01:00Z"
+    }
+  ],
+  "count": 15,
+  "page": 1,
+  "page_size": 20
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
jobsarrayArray of job info objects
jobs[].idintegerJob ID
jobs[].uuidstringFile UUID being processed
jobs[].statusstring"pending", "running", "completed", "failed"
jobs[].current_processorstringCurrently active processor, or null
countintegerTotal job count
pageintegerCurrent page number
page_sizeintegerJobs per page
+
+

Search APIs

+

Standard Search

+

POST /api/v1/search

+

Auth: Required +Scope: file-level

+

Semantic/vector search across indexed video chunks. Returns matching chunks with scores.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
querystringYesSearch text
file_uuidstringNoRestrict search to a specific file
limitintegerNo10Max results
pageintegerNo1Page number (1-based)
page_sizeintegerNolimitItems per page (alias: page_size)
modestringNosmartSearch mode: "vector" or "smart"
+

Example

+
# Search across all files
+curl -s -X POST "$API/api/v1/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "limit": 5}'
+
+# Search within a specific file
+curl -s -X POST "$API/api/v1/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "file_uuid": "'"$FILE_UUID"'"}'
+
+# Paginated search
+curl -s -X POST "$API/api/v1/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "page": 1, "page_size": 3}'
+
+ +

Response (200)

+
{
+  "results": [
+    {
+      "uuid": "3a6c1865...",
+      "chunk_id": "sentence_0012",
+      "chunk_type": "sentence",
+      "start_time": 48.8,
+      "end_time": 55.4,
+      "text": "charade is a classic film...",
+      "score": 0.92
+    }
+  ],
+  "query": "charade",
+  "total": 15,
+  "page": 1,
+  "page_size": 3
+}
+
+> Results are deduplicated by `chunk_id` (highest score wins) before pagination.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `results` | array | Array of matched chunks (deduplicated) |
+| `results[].uuid` | string | File UUID |
+| `results[].chunk_id` | string | Chunk identifier |
+| `results[].chunk_type` | string | `"sentence"`, `"cut"`, `"trace"`, `"visual"` |
+| `results[].start_time` | float | Start time in seconds |
+| `results[].end_time` | float | End time in seconds |
+| `results[].text` | string | Chunk text content |
+| `results[].score` | float | Similarity score (0.01.0) |
+| `query` | string | Original search query |
+| `total` | integer | Total matching results |
+| `page` | integer | Current page number |
+| `page_size` | integer | Items per page |
+
+#### Error Responses
+
+| HTTP | When |
+|------|------|
+| `401` | Missing or invalid API key |
+
+---
+
+### `POST /api/v1/search/hybrid`
+
+**Auth**: Required
+**Scope**: file-level
+
+Hybrid search combining vector similarity score and BM25 text score into a combined score.
+
+#### Request Parameters
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `query` | string | Yes |  | Search text |
+| `file_uuid` | string | No |  | Restrict search to a specific file |
+| `limit` | integer | No | 10 | Max results |
+| `page` | integer | No | 1 | Page number (1-based) |
+| `page_size` | integer | No | `limit` | Items per page |
+| `vector_weight` | float | No | 0.5 | Weight for vector score (0.01.0) |
+| `bm25_weight` | float | No | 0.5 | Weight for BM25 score (0.01.0) |
+
+#### Response
+
+```json
+{
+  "results": [
+    {
+      "uuid": "3a6c1865...",
+      "chunk_id": "sentence_0012",
+      "chunk_type": "sentence",
+      "start_time": 48.8,
+      "end_time": 55.4,
+      "text": "charade is a classic film...",
+      "vector_score": 0.85,
+      "bm25_score": 0.72,
+      "combined_score": 0.79
+    }
+  ],
+  "query": "charade"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
results[].vector_scorefloatVector similarity score
results[].bm25_scorefloatBM25 text score
results[].combined_scorefloatWeighted combination of both scores
+
+

POST /api/v1/search/bm25

+

Auth: Required +Scope: file-level

+

BM25 full-text keyword search. Good for exact term matching. Returns results with BM25 score only.

+

Request Parameters

+

Same as standard search: query, file_uuid, limit.

+

Response

+

Returns the same structure as standard search, with score representing the BM25 relevance score.

+
+

N8N Search

+

N8N-format search endpoints. Response format is optimized for n8n workflow consumption.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointDescription
POST/api/v1/n8n/searchN8N-format vector search
POST/api/v1/n8n/search/bm25N8N-format BM25 search
POST/api/v1/n8n/search/hybridN8N-format hybrid search
POST/api/v1/n8n/search/smartN8N-format smart search
+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
querystringYesSearch text
file_uuidstringNoRestrict to specific file
typesstring[]No["chunk"]Search types: chunk, frame, person
filtersobjectNoFilter criteria (confidence, object class, speaker, etc.)
pageintegerNo1Page number
page_sizeintegerNo20Items per page
time_rangefloat[2]NoTime range [start, end] in seconds
+

Example

+
curl -s -X POST "$API/api/v1/n8n/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "limit": 5}'
+
+ +

Response (200)

+
{
+  "query": "charade",
+  "results": [
+    {
+      "type": "chunk",
+      "chunk_id": "sentence_0012",
+      "chunk_type": "sentence",
+      "file_uuid": "3a6c1865...",
+      "start_time": 48.8,
+      "end_time": 55.4,
+      "text": "charade is a classic film...",
+      "score": 0.92
+    }
+  ],
+  "total": 15,
+  "page": 1,
+  "page_size": 20,
+  "took_ms": 42
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
resultsarrayArray of search results
results[].typestringResult type: "chunk", "frame", or "person"
results[].chunk_idstringChunk identifier
results[].chunk_typestringChunk type
results[].file_uuidstringFile UUID
results[].start_timefloatStart time in seconds
results[].end_timefloatEnd time in seconds
results[].textstringContent text
results[].scorefloatRelevance score
totalintegerTotal matching results
pageintegerCurrent page
page_sizeintegerItems per page
took_msintegerQuery execution time in milliseconds
+
+

Identity Text Search

+

Two paths for searching identities by text.

+

Path A: GET /api/v1/search/identity_text

+

Search chunk text content and return associated identities (if any).

+

Auth: Required +Scope: file-level

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID to search within
qstringYesText search query
limitintegerNoMax results (default 50)
pageintegerNo1
page_sizeintegerNolimit
+
curl -s "$API/api/v1/search/identity_text?file_uuid=$FILE_UUID&q=charade&page=1&page_size=5" \
+  -H "X-API-Key: $KEY"
+
+ +
Response
+
{
+  "success": true,
+  "total": 0,
+  "page": 1,
+  "page_size": 5,
+  "limit": 50,
+  "results": []
+}
+
+ +

Each result (IdentityTextHit):

+
{
+  "file_uuid": "3a6c1865...",
+  "chunk_id": "sentence_0012",
+  "start_time": 48.8,
+  "end_time": 55.4,
+  "text_content": "charade is a classic film...",
+  "identity_id": 42,
+  "identity_name": "Cary Grant",
+  "identity_source": "tmdb",
+  "trace_id": 10
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstringFile UUID
chunk_idstringChunk identifier
start_timefloatStart time in seconds
end_timefloatEnd time in seconds
text_contentstringChunk text content
identity_idintegerIdentity ID (null if no identity matched)
identity_namestringIdentity name (null if no identity matched)
identity_sourcestringIdentity origin. .json = established from identity.json files on disk (unified format for all sources). tmdb = from .json via TMDb enrichment probe, then matched against faces. auto = pipeline face matching result; only stranger identities are auto-created. user_defined = manual. merged = merged identities.
trace_idintegerFace trace ID (null if no trace matched)
+

Path B: POST /api/v1/identities/search

+

Search identity names and return associated face detection text.

+

Auth: Required +Scope: identity-level

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
qstringYesIdentity name search
file_uuidstringNoRestrict to specific file
limitintegerNoMax results (default 50)
+
curl -s -X POST "$API/api/v1/identities/search" \
+  -H "X-API-Key: $KEY" \
+  -d '{"q": "Cary Grant"}'
+
+ +
+

Visual Chunk Search

+

Search video frames by visual content (object detection results).

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointDescription
POST/api/v1/search/visualSearch visual chunks by criteria (object classes, density)
POST/api/v1/search/visual/classSearch by specific object class
POST/api/v1/search/visual/densitySearch by spatial density range
POST/api/v1/search/visual/statsGet visual detection statistics
POST/api/v1/search/visual/combinationSearch by object class combination
+

Request Parameters (Visual Search)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID
criteria.required_classesstring[]NoRequired object classes (e.g., ["person", "car"])
criteria.min_confidencefloatNoMinimum confidence threshold
criteria.min_spatial_densityfloatNoMinimum spatial density
criteria.max_spatial_densityfloatNoMaximum spatial density
+

Example

+
curl -s -X POST "$API/api/v1/search/visual" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "criteria": {"required_classes": ["person", "car"]}}'
+
+ +

Response

+
{
+  "chunks": [
+    {
+      "file_uuid": "3a6c1865...",
+      "chunk_id": "visual_001",
+      "chunk_type": "visual",
+      "detections": [
+        {"class": "person", "confidence": 0.95, "bbox": [100, 200, 150, 350]}
+      ],
+      "start_time": 120.5,
+      "end_time": 125.3
+    }
+  ],
+  "total": 42
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
chunksarrayArray of matching visual chunks
chunks[].file_uuidstringFile UUID
chunks[].chunk_idstringChunk identifier
chunks[].detectionsarrayObject detections in this chunk
totalintegerTotal matching chunks
+
+

Global Identities

+

GET /api/v1/identities

+

Auth: Required +Scope: identity-level

+

List all registered identities with pagination.

+

Example

+
curl -s "$API/api/v1/identities?page=1&page_size=20" -H "X-API-Key: $KEY" | jq '{count, identities: [.identities[] | {name}]}'
+
+ +
+

GET /api/v1/identity/:identity_uuid

+

Auth: Required +Scope: identity-level

+

Get detailed information for a specific identity, including metadata and TMDb references.

+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID" -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "name": "Cary Grant",
+  "identity_type": "people",
+  "source": "tmdb",
+  "status": "confirmed",
+  "tmdb_id": 112,
+  "tmdb_profile": "https://image.tmdb.org/t/p/w185/abc.jpg",
+  "metadata": {},
+  "reference_data": {},
+  "created_at": "2026-05-16T12:00:00Z",
+  "updated_at": null
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
uuidstring32-char hex identity UUID
namestringIdentity name
identity_typestring"people" or null
sourcestring.json, auto, tmdb, user_defined, or merged
statusstring"confirmed", "pending", or "inactive"
tmdb_idintegerTMDb person ID (only if source = tmdb)
tmdb_profilestringTMDb profile image URL
metadataobjectMetadata JSON (tmdb_character, cast_order, etc.)
created_atstringCreation timestamp
+
+

DELETE /api/v1/identity/:identity_uuid

+

Auth: Required +Scope: identity-level

+

Delete an identity permanently.

+
+

GET /api/v1/identity/:identity_uuid/files

+

Auth: Required +Scope: identity-level

+

Get all files where this identity appears. Returns per-file summary including face count, confidence, and appearance time range.

+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/files" -H "X-API-Key: $KEY"
+
+ +
+

GET /api/v1/identity/:identity_uuid/faces

+

Auth: Required +Scope: identity-level

+

Get all face detection records associated with this identity.

+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/faces" -H "X-API-Key: $KEY"
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstringFile UUID where face was detected
frame_numberintegerFrame number of detection
face_idstringFace ID (format: face_{frame_number})
confidencefloatDetection confidence
+
+

GET /api/v1/identity/:identity_uuid/chunks

+

Auth: Required +Scope: identity-level

+

Get all text chunks (sentences) spoken while this identity's face was on screen. Useful for finding what a person said.

+

Example

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/chunks" -H "X-API-Key: $KEY"
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstringFile UUID
chunk_idstringSentence chunk identifier
start_timefloatStart time in seconds
end_timefloatEnd time in seconds
textstringSpoken text content
+
+

POST /api/v1/identity/:identity_uuid/bind

+

Auth: Required +Scope: identity-level

+

Bind a face detection to an identity. Associates the face trace with the identity for future search and recognition.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID where face is detected
face_idstringYesFace ID (format: {frame}_{idx})
+

Example

+
curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "face_id": "1_5"}'
+
+ +
+

POST /api/v1/identity/:identity_uuid/unbind

+

Auth: Required +Scope: identity-level

+

Unbind a face detection from an identity. Removes the identity association from the face record.

+
+

GET /api/v1/identities/search

+

Auth: Required +Scope: identity-level

+

Search identities by name (ILIKE search). Returns matching identity records.

+

Example

+
curl -s "$API/api/v1/identities/search?q=Cary" -H "X-API-Key: $KEY"
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
namestringIdentity name
sourcestringIdentity source
tmdb_idintegerTMDb ID (if source = tmdb)
file_uuidstringAssociated file UUID
+
+
+

POST /api/v1/identity/upload

+

Auth: Required +Scope: identity-level

+

Upload an identity.json file to create or update an identity. Accepts the same format as the identity.json files stored on disk.

+

If an identity with the same name already exists, it will be updated with the new values.

+

Request

+

The request body is an IdentityFile object:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
identity_uuidstringYes32-char hex UUID (hyphens allowed, will be stripped)
namestringYesIdentity display name
identity_typestringNo"people" or null
sourcestringNo.json, auto, tmdb, user_defined, or merged
statusstringNo"confirmed", "pending", or "inactive"
tmdb_idintegerNoTMDb person ID
tmdb_profilestringNoTMDb profile image URL
metadataobjectNoArbitrary metadata JSON
file_bindingsarrayNoArray of { file_uuid, trace_ids, face_count } (informational)
+

Example

+
curl -s -X POST "$API/api/v1/identity/upload" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "version": 1,
+    "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+    "name": "Cary Grant",
+    "identity_type": "people",
+    "source": ".json",
+    "status": "confirmed",
+    "metadata": {},
+    "file_bindings": []
+  }'
+
+ +

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "name": "Cary Grant",
+  "message": "Identity uploaded successfully"
+}
+
+ +
+
+

POST /api/v1/identity/:identity_uuid/profile-image

+

Auth: Required +Scope: identity-level

+

Upload a profile image (JPEG or PNG) for an identity. The image is saved to {output}/identities/{uuid}/profile.{ext}.

+

Uses multipart/form-data with field name image.

+

Example

+
curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/profile-image" \
+  -H "X-API-Key: $KEY" \
+  -F "image=@/path/to/photo.jpg"
+
+ +

Response (200)

+
{
+  "success": true,
+  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
+  "path": "/path/to/output/identities/.../profile.jpg",
+  "message": "Profile image saved: profile.jpg"
+}
+
+ +

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
400Missing image field or unsupported format
404Identity not found
415Unsupported image type (use JPEG or PNG)
+
+

GET /api/v1/identity/:identity_uuid/profile-image

+

Auth: Required +Scope: identity-level

+

Retrieve the profile image for an identity. Returns the raw image data with appropriate Content-Type header.

+
curl -s "$API/api/v1/identity/$IDENTITY_UUID/profile-image" \
+  -H "X-API-Key: $KEY" -o profile.jpg
+
+ + + + + + + + + + + + + + +
Response HeaderValue
content-typeimage/jpeg or image/png
+
+

GET /api/v1/signals/unbound

+

Auth: Required +Scope: identity-level

+

List unbound face signals — face detections that have not yet been assigned to any identity.

+

Example

+
curl -s "$API/api/v1/signals/unbound" -H "X-API-Key: $KEY"
+
+ +
+

Identity Agent

+

POST /api/v1/agents/identity/analyze

+

Auth: Required +Scope: file-level

+

Run identity matching on a processed file: matches face detection traces against known identities (TMDb, .json, auto) and creates bindings. Optionally uses LLM for enhanced analysis.

+
+

Requires the file's face processor to have completed. Will return an error if face traces are not available.

+
+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_uuidstringYesFile UUID to analyze
use_llmbooleanNofalseEnable LLM-assisted identity analysis
modelstringNoLLM model name (e.g., "gemma4")
auto_merge_thresholdfloatNoConfidence threshold (0.0–1.0) for auto-merging
+

Example

+
# Basic analysis
+curl -s -X POST "$API/api/v1/agents/identity/analyze" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+# With LLM enhancement
+curl -s -X POST "$API/api/v1/agents/identity/analyze" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "use_llm": true, "model": "gemma4"}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "3a6c1865...",
+  "identities": [
+    {
+      "identity_id": "a9a901056d6b46ff92da0c3c1a57dff4",
+      "person_ids": ["trace_10", "trace_23"],
+      "confidence": 0.87,
+      "matched": true,
+      "name": "Cary Grant",
+      "source": "tmdb",
+      "stranger": false
+    }
+  ],
+  "processing_status": {
+    "phase": "COMPLETED",
+    "progress": 100
+  }
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
file_uuidstringAnalyzed file UUID
identitiesarrayArray of matched identity results
identities[].identity_idstring32-char identity UUID
identities[].person_idsstring[]Matched trace/person IDs
identities[].confidencefloatMatching confidence (0.0–1.0)
identities[].matchedbooleanWhether this identity was matched to a known entity
identities[].namestringIdentity display name
identities[].sourcestringIdentity source (.json, auto, tmdb, etc.)
identities[].strangerbooleanWhether this is an unmatched stranger trace
processing_statusobjectIdentity agent processing progress
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
400File UUID not provided or invalid
404File not found or face processor not completed
500Analysis failed (LLM error, DB error)
+
+

POST /api/v1/agents/identity/suggest

+

Auth: Required +Scope: identity-level

+

Suggest identity merges based on face embedding similarity analysis. Returns pairs of identities that are similar enough to potentially be the same person.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_uuidstringYesFile UUID to analyze for merge suggestions
auto_merge_thresholdfloatNoConfidence threshold for auto-suggest
+
curl -s -X POST "$API/api/v1/agents/identity/suggest" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+ +
+

GET /api/v1/agents/identity/status

+

Auth: Required +Scope: system-level

+

Get the identity agent processing status for a file. Shows current phase and progress.

+
curl -s "$API/api/v1/agents/identity/status?file_uuid=$FILE_UUID" \
+  -H "X-API-Key: $KEY"
+
+ +
+

POST /api/v1/agents/suggest/merge

+

Auth: Required +Scope: identity-level

+

Execute a suggested identity merge. Combines two identities into one, consolidating their face bindings.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
source_uuidstringYesIdentity UUID to merge FROM (will be removed)
target_uuidstringYesIdentity UUID to merge INTO (will be kept)
+
curl -s -X POST "$API/api/v1/agents/suggest/merge" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"source_uuid": "uuid_to_discard", "target_uuid": "uuid_to_keep"}'
+
+ +
+

POST /api/v1/agents/suggest/clustering

+

Auth: Required +Scope: identity-level

+

Suggest face clustering results. Analyzes all face embeddings in a file and groups similar faces into candidate identity clusters.

+
curl -s -X POST "$API/api/v1/agents/suggest/clustering" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+ +
+

TMDb Enrichment

+
+

⚠️ External resource: TMDb requires internet access, violating Momentry's local-only principle. +All core processing (ASR, YOLO, Face, OCR, Pose, embeddings) runs fully offline. +TMDb enrichment is optional and gated behind TMDB_API_KEY + MOMENTRY_TMDB_PROBE_ENABLED.

+
+

Overview

+

TMDb enrichment is an optional identity enrichment step that can be run after Pipeline face detection completes. The workflow is:

+
    +
  1. Prefetch (requires internet): Download movie cast data from TMDb API → cache to {file_uuid}.tmdb.json
  2. +
  3. Probe: Read local cache → create identities for all cast members (source='tmdb') + save identity.json + download profile image to {OUTPUT}/identities/{uuid}/profile.jpg
  4. +
  5. Match: The worker automatically matches video faces against TMDb identities when MOMENTRY_TMDB_PROBE_ENABLED=true
  6. +
+

POST /api/v1/agents/tmdb/prefetch

+

Auth: Required +Scope: file-level

+

Fetch TMDb cast data for a registered file and cache it locally. This is the only step requiring internet access.

+

Request Parameters

+ + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID to enrich
+

Example

+
curl -s -X POST "$API/api/v1/agents/tmdb/prefetch" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+ +

Response (200)

+
{"success": true, "file_uuid": "...", "cache_path": "/output/...tmdb.json"}
+
+ +

POST /api/v1/file/:file_uuid/tmdb-probe

+

Auth: Required +Scope: file-level

+

Read local TMDb cache and create/update identities. Requires prefetch to have been run first.

+

Example

+
curl -s -X POST "$API/api/v1/file/$FILE_UUID/tmdb-probe" \
+  -H "X-API-Key: $KEY" | jq '{identities_created, movie_title}'
+
+ +

Response (200 — identities created)

+
{"success": true, "identities_created": 15, "movie_title": "Charade"}
+
+ +

Response (200 — no cache)

+
{"success": false, "message": "No TMDb cache found. Run tmdb-prefetch first."}
+
+ +

GET /api/v1/resource/tmdb

+

Auth: Required +Scope: system-level

+

View TMDb resource status including configuration, identity counts, and cache file count.

+

Example

+
curl -s "$API/api/v1/resource/tmdb" -H "X-API-Key: $KEY" \
+  | jq '{identities_seeded, cache_files}'
+
+ +

POST /api/v1/resource/tmdb/check

+

Auth: Required +Scope: system-level

+

Ping the TMDb API to verify connectivity and measure latency.

+

Example

+
curl -s -X POST "$API/api/v1/resource/tmdb/check" \
+  -H "X-API-Key: $KEY" | jq '.status'
+
+ +

Response

+
{
+  "api_key_configured": true,
+  "enabled": false,
+  "api_reachable": true,
+  "api_latency_ms": 120
+}
+
+ +
+

Stats & Pipeline

+

Stats Endpoints

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointAuthDescription
GET/api/v1/stats/ingestNoIngest statistics
GET/api/v1/stats/sftpgoNoSFTPGo service status
GET/api/v1/stats/inferenceNoInference service health
+

Configuration

+

POST /api/v1/config/cache

+

Auth: Required +Scope: system-level

+

Toggle the Redis cache on or off.

+

Request Parameters

+ + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
enabledbooleanYestrue to enable, false to disable
+

Example

+
curl -s -X POST "$API/api/v1/config/cache" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"enabled": false}'
+
+ +

Unmounted Routes

+

The following routes are defined in source code but are NOT currently mounted in the router:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
EndpointSource file
/api/v1/search/universaluniversal_search.rs
/api/v1/search/framesuniversal_search.rs
/api/v1/search/personsuniversal_search.rs
/api/v1/whowho.rs
/api/v1/who/candidateswho.rs
+
+

Error Response Format

+

All API errors follow this JSON structure:

+
{
+  "success": false,
+  "error": {
+    "code": "E001_NOT_FOUND",
+    "message": "Resource not found",
+    "details": {"resource": "file_uuid", "value": "abc"}
+  }
+}
+
+ +

Error Code List

+

Generic Errors (E0xx)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CodeHTTPDescription
E001_NOT_FOUND404Resource not found (file, identity, chunk)
E002_DUPLICATE409Resource already exists
E003_VALIDATION400Request parameter validation failed
E004_UNAUTHORIZED401Invalid API key or token
E005_INTERNAL500Internal server error
+

Processor Errors (E1xx)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CodeHTTPDescription
E101_PROCESSOR_FAIL500Python script execution failed
E102_TIMEOUT504Processing timeout
E103_RESUME_FAIL500Resume failed (checkpoint not found)
E104_NO_VIDEO400Video file path not found
+

Identity Errors (E2xx)

+ + + + + + + + + + + + + + + + + + + + + + + + + +
CodeHTTPDescription
E201_FACE_NOT_FOUND404Face detection not found
E202_MERGE_CONFLICT409Identity merge conflict
E203_CANDIDATE_EMPTY404No candidates available for confirmation
+

TMDb Errors (E3xx)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CodeHTTPDescription
E301_TMDB_NO_KEY400TMDB_API_KEY environment variable not set
E302_TMDB_UNREACHABLE502TMDb API unreachable or timed out
E303_TMDB_CACHE_NOT_FOUND200No local TMDb cache; run prefetch first
E304_TMDB_PROBE_FAILED500TMDb probe execution failed
E305_TMDB_MOVIE_NOT_FOUND404No matching TMDb movie found from filename
+
+

Agent Endpoints

+

Agent endpoints provide AI-powered capabilities including translation, identity analysis, and 5W1H extraction.

+

POST /api/v1/agents/translate

+

Translate text between languages using Gemma4 (llama.cpp, port 8082).

+

Request

+
{
+  "text": "Hello, welcome to Momentry Core.",
+  "target_language": "Traditional Chinese",
+  "source_language": "English"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
textstringText to translate
target_languagestringTarget language name (e.g. "Traditional Chinese", "Japanese")
source_languagestringSource language (default: "auto")
+

Response

+
{
+  "success": true,
+  "translated_text": "您好,歡迎使用 Momentry Core。",
+  "source_language_detected": "English",
+  "model_used": "google_gemma-4-26B-A4B-it-Q5_K_M.gguf"
+}
+
+ +

Supported Language Pairs (tested)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SourceTargetQuality
EnglishTraditional Chinese
EnglishJapanese
ChineseEnglish
EnglishFrench
ChineseJapanese
+

Model

+
    +
  • Model: Gemma4 26B (Q5_K_M)
  • +
  • Engine: llama.cpp at localhost:8082
  • +
  • Endpoint: /v1/chat/completions (OpenAI-compatible)
  • +
  • Temperature: 0.1
  • +
  • Max tokens: 1024
  • +
+

Errors

+ + + + + + + + + + + + + + + + + +
StatusCondition
500LLM unreachable or response parse failure
401Missing/invalid auth
+

GET /api/v1/agents/identity/status

+

Get status of the identity agent pipeline.

+

Response

+
{
+  "status": "idle",
+  "last_analysis": "2026-05-17T12:00:00Z",
+  "identities_processed": 27
+}
+
+ +

POST /api/v1/agents/suggest/clustering

+

Run face clustering to suggest new identity groupings.

+

Request

+
{
+  "file_uuid": "3abeee81d94597629ed8cb943f182e94"
+}
+
+ +

POST /api/v1/agents/suggest/merge

+

Merge two identities into one.

+

Request

+
{
+  "from_uuid": "...",
+  "into_uuid": "..."
+}
+
+ +

POST /api/v1/agents/5w1h/analyze

+

Extract 5W1H (Who, What, When, Where, Why, How) from video chunk text.

+

Request

+
{
+  "chunk_id": "chunk_42",
+  "file_uuid": "3abeee81d94597629ed8cb943f182e94"
+}
+
+ +

Response

+
{
+  "success": true,
+  "5w1h": {
+    "who": ["Cary Grant"],
+    "what": ["discussing plans"],
+    "when": ["1963"],
+    "where": ["Paris"],
+    "why": ["vacation"],
+    "how": ["in person"]
+  }
+}
+
+ +

POST /api/v1/agents/5w1h/batch

+

Batch analyze multiple chunks for 5W1h extraction.

+

Request

+
{
+  "file_uuid": "3abeee81d94597629ed8cb943f182e94",
+  "chunk_ids": ["chunk_1", "chunk_2", "chunk_3"]
+}
+
+ +

GET /api/v1/agents/5w1h/status

+

Get status of the 5W1H agent pipeline.

+
+
+ + \ No newline at end of file diff --git a/docs_v1.0/doc_user/API_TRAINING_MARCOM.html b/docs_v1.0/doc_user/API_TRAINING_MARCOM.html new file mode 100644 index 0000000..e0f78d0 --- /dev/null +++ b/docs_v1.0/doc_user/API_TRAINING_MARCOM.html @@ -0,0 +1,1603 @@ + + + + +Api Training Marcom - Momentry API Docs + + + +
+← Back to index +
+

document_type: "user_manual" +service: "MOMENTRY_CORE" +title: "Momentry Core API 教育訓練手冊" +date: "2026-05-17" +version: "V1.5" +status: "active" +owner: "M5" +created_by: "OpenCode"

+
+

Momentry Core API 教育訓練手冊

+ + + + + + + + + + + + + + + + + +
項目內容
目標讀者developer
預備知識需有 API Key
+
+ + + + +

About This Manual

+

This training manual is designed for the Marcom team to understand and use the Momentry Core API.

+

Demo Credentials

+

API Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69

+

SFTPGo (for video upload):

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ItemValue
SFTP Hostsftpgo.momentry.ddns.net
SFTP Port2022
Usernamedemo
Passworddemopassword123
Web UIhttps://sftpgo.momentry.ddns.net
+

Quick Examples

+

List all videos:

+
curl -s -H "X-API-Key: $KEY" "$API/api/v1/files/scan"
+
+ +

Search:

+
curl -s -X POST "$API/api/v1/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "example", "limit": 5}'
+
+ +
+

Base URL

+ + + + + + + + + + + + + + + + + + + + + + + + + +
EnvironmentURLPurpose
Playground (Dev)http://localhost:3003Development and testing
Productionhttp://localhost:3002Production deployment
External (M5)https://m5api.momentry.ddns.netRemote access
+

Variables

+

All examples in this documentation use these environment variables:

+
API="http://localhost:3003"
+KEY="your-api-key-here"
+
+ +

Authentication

+

All endpoints under /api/v1/* require authentication. +The following endpoints are public (no auth needed):

+
    +
  • GET /health
  • +
  • POST /api/v1/auth/login
  • +
  • POST /api/v1/auth/logout
  • +
+

Three Authentication Modes

+

The system supports three authentication methods, checked in priority order by the middleware:

+
Middleware priority:
+  1. Session Cookie (Portal/browser)
+  2. JWT Bearer (API clients: n8n, CLI)
+  3. API Key Header (legacy compatibility)
+  4. API Key Query Param (?api_key=)
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModeTransportExpiryScopeBest for
Session CookieCookie: session_id=<uuid>24hper-browser sessionPortal (browser)
JWTAuthorization: Bearer <token>1hper-login tokenAPI clients (n8n, CLI, scripts)
API KeyX-API-Key: <key>90dfixed key for automationLegacy scripts, WordPress
+
+

Login

+

Default accounts & API keys:

+ + + + + + + + + + + + + + + + + + + + + + + +
UsernamePasswordAPI KeyRole
adminadminadmin
demodemomuser_demo_key_32chars_abcdef1234567890user
+

The demo API key is set via MOMENTRY_DEMO_API_KEY env var and can be used in place of JWT for marcom integrations:

+
# Using API key instead of JWT
+curl -s "$API/api/v1/files/scan" -H "X-API-Key: muser_demo_key_32chars_abcdef1234567890"
+
+ +
# Login as admin
+curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username": "admin", "password": "admin"}'
+
+# Login as demo user
+curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username": "demo", "password": "demo"}'
+
+ +

Success Response

+
{
+  "success": true,
+  "jwt": "eyJhbGciOiJIUzI1NiIs...",
+  "api_key": "muser_...",
+  "user": {
+    "username": "admin",
+    "role": "admin"
+  },
+  "expires_at": "2026-05-18T13:00:00Z"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
jwtstringJWT access token. Use as Authorization: Bearer <jwt>. Expires in 1 hour.
api_keystringLegacy API key. Use as X-API-Key: <key>. Good for 90 days.
user.usernamestringUsername
user.rolestringRole: admin, user, or readonly
expires_atstringISO8601 timestamp of JWT expiration
+

The login endpoint also sets a Set-Cookie header for browser-based clients:

+
Set-Cookie: session_id=<uuid>; Path=/api; HttpOnly; SameSite=Strict; Max-Age=86400
+
+ +

Error Response (401)

+
{
+  "success": false,
+  "message": "Invalid username or password"
+}
+
+ +
+

Using JWT

+

JWT is preferred for API clients (n8n, CLI scripts, WordPress). It is validated by the middleware without a database lookup (stateless).

+
# Login and capture JWT
+JWT=$(curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' | python3 -c "import json,sys;print(json.load(sys.stdin)['jwt'])")
+
+# Use JWT for all subsequent requests
+curl -H "Authorization: Bearer $JWT" "$API/api/v1/files/scan"
+curl -H "Authorization: Bearer $JWT" "$API/api/v1/resource/tmdb"
+
+ +

JWT is short-lived (1 hour). When it expires, request a new one via login.

+
+

Using Session Cookie (Browser)

+

Browser-based clients (Portal) get a session cookie automatically after login. The browser sends the cookie with every request—no manual header needed.

+
# Login captures the session cookie from Set-Cookie header
+curl -v -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' 2>&1 | grep "Set-Cookie"
+
+# Browser automatically sends: Cookie: session_id=<uuid>
+# No manual header needed for subsequent requests
+
+ +

The session cookie is HttpOnly (not accessible from JavaScript) and SameSite=Strict (protected against CSRF).

+
+

Using Legacy API Key

+
curl -H "X-API-Key: $KEY" "$API/api/v1/files/scan"
+
+# Also accepted via Bearer header (non-JWT format) or query parameter:
+curl -H "Authorization: Bearer $KEY" "$API/api/v1/files/scan"
+curl "$API/api/v1/files/scan?api_key=$KEY"
+
+ +

API keys are validated via SHA256 hash lookup in the database. They are long-lived (90 days) and intended for automation.

+

Obtaining an API Key (CLI)

+
momentry api-key create "My API Key" --key-type user
+
+ +
+

Logout

+
# Logout using the session cookie (browser)
+curl -X POST "$API/api/v1/auth/logout" \
+  -H "Cookie: session_id=<uuid>"
+
+ +

What logout does

+ + + + + + + + + + + + + + + + + + + + + +
Auth modeEffect
Session CookieSession deleted from database. Same cookie returns 401 on subsequent requests.
JWTJWT remains valid until expiry. (JWT is stateless — logout adds JWT to a blacklist only if API key mode is used.)
API KeyAPI key remains valid. (Legacy keys are shared across sessions — revoking would break other clients.)
+

Example: full session lifecycle

+
# 1. Login
+SESSION_ID=$(curl -s -D - -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' | grep "Set-Cookie" | sed 's/.*session_id=\([^;]*\).*/\1/')
+
+# 2. Use session (works)
+curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → HTTP 200
+
+# 3. Logout
+curl -s -X POST "$API/api/v1/auth/logout" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → {"success": true}
+
+# 4. Use session again (rejected)
+curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → HTTP 401
+
+ +
+

Authentication Flow Summary

+
Login Request
+     │
+     ▼
+┌──────────────────┐
+│  1. Check users  │ ← users table (argon2 password verify)
+│     table        │
+└──────┬───────────┘
+       │
+   ┌───┴───┐
+   │ match │
+   └───┬───┘
+       │
+       ▼
+┌──────────────────┐
+│  2. Create JWT   │ ← 1h expiry, signed with JWT_SECRET
+├──────────────────┤
+│  3. Create       │ ← 24h expiry, stored in sessions table
+│     session      │
+├──────────────────┤
+│  4. Set-Cookie   │ ← HttpOnly, SameSite=Strict, Path=/api
+├──────────────────┤
+│  5. Return       │ ← JWT + api_key + user info to client
+└──────────────────┘
+
+ +
Protected Request
+     │
+     ▼
+┌──────────────────────┐
+│  Middleware checks:  │
+│                      │
+│  1. Cookie session?  │ → DB lookup session → get api_key → verify
+│                      │
+│  2. JWT Bearer?      │ → verify JWT signature → decode claims
+│                      │
+│  3. X-API-Key?       │ → SHA256 hash → DB lookup → verify
+│                      │
+│  4. ?api_key=?       │ → same as #3
+│                      │
+│  5. None → 401       │
+└──────────────────────┘
+
+ +
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
401Missing or invalid authentication
401Session expired or logged out
401JWT expired
401API key revoked or inactive
+
+

Related

+
    +
  • POST /api/v1/resource/tmdb/check — test authentication + TMDb API connectivity
  • +
  • GET /health/detailed — view auth status (integrations section)
  • +
+
+

File Registration

+

POST /api/v1/files/register

+

Auth: Required +Scope: file-level

+

Register a video file for processing. Returns the file's metadata and UUID.

+

New in v0.1.2: Registration now automatically triggers the processing pipeline — no need to call POST /api/v1/file/:uuid/process separately. The system will: +1. Register the file and run ffprobe +2. Auto-run offline TMDb probe (reads local identity files, no API calls) +3. Create a monitor job for the worker +4. Worker starts all 10 processors (Cut → ASR → ASRX → YOLO → OCR → Face → Pose → VisualChunk → Story → 5W1H)

+

If the file already exists (same content hash), returns the existing record with already_exists: true.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_pathstringYesPath to video file on disk
patternstringNoRegex pattern for batch register (requires file_path to be a directory)
user_idintegerNoUser ID to associate with registration
content_hashstringNoPre-computed SHA-256 hash (skips computation)
+

Example

+
# Register a single file
+curl -s -X POST "$API/api/v1/files/register" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/video.mp4"}'
+
+# Batch register files matching a pattern in a directory
+curl -s -X POST "$API/api/v1/files/register" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/dir", "pattern": ".*\\.mp4$"}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "3a6c1865...",
+  "file_name": "video.mp4",
+  "file_path": "/path/to/video.mp4",
+  "file_type": "video",
+  "duration": 120.5,
+  "width": 1920,
+  "height": 1080,
+  "fps": 24.0,
+  "total_frames": 2892,
+  "already_exists": false,
+  "message": "File registered successfully"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
file_uuidstring32-char hex UUID of the registered file
file_namestringFile name (auto-renamed if name conflict)
file_pathstringCanonical path on disk
file_typestring"video", "audio", or "unknown"
durationfloatDuration in seconds
widthintegerVideo width in pixels
heightintegerVideo height in pixels
fpsfloatFrames per second
total_framesintegerTotal frame count
already_existsbooleanTrue if same content was already registered
messagestringHuman-readable status
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
401Missing or invalid API key
400Invalid request body
404File path does not exist
+
+

GET /api/v1/files/scan

+

Auth: Required +Scope: file-level

+

Scan the filesystem directory and list all media files, showing which are registered, processing, or unregistered.

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
pageintegerNo1Page number (1-based)
page_sizeintegerNoallItems per page (alias: limit)
limitintegerNoallMax items (alias for page_size)
patternstringNoRegex filter on file name (e.g., .*\\.mp4$)
sort_bystringNonameSort field: name, size, modified, status
sort_orderstringNoascSort direction: asc or desc
+

Example

+
# Full scan
+curl -s "$API/api/v1/files/scan" -H "X-API-Key: $KEY" | jq '{total, registered_count, unregistered_count}'
+
+# Paginated (page 1, 5 per page)
+curl -s "$API/api/v1/files/scan?page=1&page_size=5" -H "X-API-Key: $KEY" | jq '{page, total_pages, files: [.files[].file_name]}'
+
+# Regex filter: only mp4 files
+curl -s "$API/api/v1/files/scan?pattern=.*\\.mp4$" -H "X-API-Key: $KEY" | jq '{filtered_total, files: [.files[].file_name]}'
+
+# Sort by file size (largest first)
+curl -s "$API/api/v1/files/scan?sort_by=size&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, file_size}]'
+
+# Sort by modified time (most recent first)
+curl -s "$API/api/v1/files/scan?sort_by=modified&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, modified_time}]'
+
+# Sort by status
+curl -s "$API/api/v1/files/scan?sort_by=status&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, status}]'
+
+ +

Response (200)

+
{
+  "files": [
+    {
+      "file_name": "video.mp4",
+      "file_size": 12345678,
+      "is_registered": true,
+      "file_uuid": "3a6c1865...",
+      "status": "completed",
+      "registration_time": "2026-05-16T12:00:00Z",
+      "job_id": 42
+    }
+  ],
+  "total": 107,
+  "filtered_total": 80,
+  "page": 1,
+  "page_size": 20,
+  "total_pages": 4,
+  "registered_count": 26,
+  "unregistered_count": 81
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
filesarrayArray of file info objects (paginated)
files[].file_namestringFile name
files[].relative_pathstringPath relative to scan root
files[].file_pathstringAbsolute path on disk
files[].file_sizeintegerFile size in bytes
files[].modified_timestringLast modified timestamp (ISO8601)
files[].is_registeredbooleanWhether file is registered in DB
files[].file_uuidstring32-char hex UUID (only if registered)
files[].statusstring"completed", "processing", "registered", "unregistered", or null
files[].registration_timestringDB registration timestamp (only if registered)
files[].job_idintegerProcessing job ID (only if a job exists)
totalintegerTotal files found on disk (unfiltered)
filtered_totalintegerFiles matching regex filter
pageintegerCurrent page number
page_sizeintegerItems per page
total_pagesintegerTotal pages
registered_countintegerFiles registered in DB
unregistered_countintegerFiles not yet registered
+

Notes

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FeatureBehavior
RegexCase-insensitive ((?i) prefix auto-applied). Applied to file_name.
Sort orderDefault (sort_by=name): registered files first, then alphabetically. sort_by=status: alphabetical by status string.
Paginationpage_size and limit are aliases. Default: show all results.
Processing orderpattern regex filter → sort_by/sort_orderpage/page_size slice.
+
+

Search APIs

+

Standard Search

+

POST /api/v1/search

+

Auth: Required +Scope: file-level

+

Semantic/vector search across indexed video chunks. Returns matching chunks with scores.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
querystringYesSearch text
file_uuidstringNoRestrict search to a specific file
limitintegerNo10Max results
pageintegerNo1Page number (1-based)
page_sizeintegerNolimitItems per page (alias: page_size)
modestringNosmartSearch mode: "vector" or "smart"
+

Example

+
# Search across all files
+curl -s -X POST "$API/api/v1/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "limit": 5}'
+
+# Search within a specific file
+curl -s -X POST "$API/api/v1/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "file_uuid": "'"$FILE_UUID"'"}'
+
+# Paginated search
+curl -s -X POST "$API/api/v1/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "page": 1, "page_size": 3}'
+
+ +

Response (200)

+
{
+  "results": [
+    {
+      "uuid": "3a6c1865...",
+      "chunk_id": "sentence_0012",
+      "chunk_type": "sentence",
+      "start_time": 48.8,
+      "end_time": 55.4,
+      "text": "charade is a classic film...",
+      "score": 0.92
+    }
+  ],
+  "query": "charade",
+  "total": 15,
+  "page": 1,
+  "page_size": 3
+}
+
+> Results are deduplicated by `chunk_id` (highest score wins) before pagination.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `results` | array | Array of matched chunks (deduplicated) |
+| `results[].uuid` | string | File UUID |
+| `results[].chunk_id` | string | Chunk identifier |
+| `results[].chunk_type` | string | `"sentence"`, `"cut"`, `"trace"`, `"visual"` |
+| `results[].start_time` | float | Start time in seconds |
+| `results[].end_time` | float | End time in seconds |
+| `results[].text` | string | Chunk text content |
+| `results[].score` | float | Similarity score (0.01.0) |
+| `query` | string | Original search query |
+| `total` | integer | Total matching results |
+| `page` | integer | Current page number |
+| `page_size` | integer | Items per page |
+
+#### Error Responses
+
+| HTTP | When |
+|------|------|
+| `401` | Missing or invalid API key |
+
+---
+
+### `POST /api/v1/search/hybrid`
+
+**Auth**: Required
+**Scope**: file-level
+
+Hybrid search combining vector similarity score and BM25 text score into a combined score.
+
+#### Request Parameters
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `query` | string | Yes |  | Search text |
+| `file_uuid` | string | No |  | Restrict search to a specific file |
+| `limit` | integer | No | 10 | Max results |
+| `page` | integer | No | 1 | Page number (1-based) |
+| `page_size` | integer | No | `limit` | Items per page |
+| `vector_weight` | float | No | 0.5 | Weight for vector score (0.01.0) |
+| `bm25_weight` | float | No | 0.5 | Weight for BM25 score (0.01.0) |
+
+#### Response
+
+```json
+{
+  "results": [
+    {
+      "uuid": "3a6c1865...",
+      "chunk_id": "sentence_0012",
+      "chunk_type": "sentence",
+      "start_time": 48.8,
+      "end_time": 55.4,
+      "text": "charade is a classic film...",
+      "vector_score": 0.85,
+      "bm25_score": 0.72,
+      "combined_score": 0.79
+    }
+  ],
+  "query": "charade"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
results[].vector_scorefloatVector similarity score
results[].bm25_scorefloatBM25 text score
results[].combined_scorefloatWeighted combination of both scores
+
+

POST /api/v1/search/bm25

+

Auth: Required +Scope: file-level

+

BM25 full-text keyword search. Good for exact term matching. Returns results with BM25 score only.

+

Request Parameters

+

Same as standard search: query, file_uuid, limit.

+

Response

+

Returns the same structure as standard search, with score representing the BM25 relevance score.

+
+

N8N Search

+

N8N-format search endpoints. Response format is optimized for n8n workflow consumption.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointDescription
POST/api/v1/n8n/searchN8N-format vector search
POST/api/v1/n8n/search/bm25N8N-format BM25 search
POST/api/v1/n8n/search/hybridN8N-format hybrid search
POST/api/v1/n8n/search/smartN8N-format smart search
+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
querystringYesSearch text
file_uuidstringNoRestrict to specific file
typesstring[]No["chunk"]Search types: chunk, frame, person
filtersobjectNoFilter criteria (confidence, object class, speaker, etc.)
pageintegerNo1Page number
page_sizeintegerNo20Items per page
time_rangefloat[2]NoTime range [start, end] in seconds
+

Example

+
curl -s -X POST "$API/api/v1/n8n/search" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "charade", "limit": 5}'
+
+ +

Response (200)

+
{
+  "query": "charade",
+  "results": [
+    {
+      "type": "chunk",
+      "chunk_id": "sentence_0012",
+      "chunk_type": "sentence",
+      "file_uuid": "3a6c1865...",
+      "start_time": 48.8,
+      "end_time": 55.4,
+      "text": "charade is a classic film...",
+      "score": 0.92
+    }
+  ],
+  "total": 15,
+  "page": 1,
+  "page_size": 20,
+  "took_ms": 42
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
resultsarrayArray of search results
results[].typestringResult type: "chunk", "frame", or "person"
results[].chunk_idstringChunk identifier
results[].chunk_typestringChunk type
results[].file_uuidstringFile UUID
results[].start_timefloatStart time in seconds
results[].end_timefloatEnd time in seconds
results[].textstringContent text
results[].scorefloatRelevance score
totalintegerTotal matching results
pageintegerCurrent page
page_sizeintegerItems per page
took_msintegerQuery execution time in milliseconds
+
+

Identity Text Search

+

Two paths for searching identities by text.

+

Path A: GET /api/v1/search/identity_text

+

Search chunk text content and return associated identities (if any).

+

Auth: Required +Scope: file-level

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID to search within
qstringYesText search query
limitintegerNoMax results (default 50)
pageintegerNo1
page_sizeintegerNolimit
+
curl -s "$API/api/v1/search/identity_text?file_uuid=$FILE_UUID&q=charade&page=1&page_size=5" \
+  -H "X-API-Key: $KEY"
+
+ +
Response
+
{
+  "success": true,
+  "total": 0,
+  "page": 1,
+  "page_size": 5,
+  "limit": 50,
+  "results": []
+}
+
+ +

Each result (IdentityTextHit):

+
{
+  "file_uuid": "3a6c1865...",
+  "chunk_id": "sentence_0012",
+  "start_time": 48.8,
+  "end_time": 55.4,
+  "text_content": "charade is a classic film...",
+  "identity_id": 42,
+  "identity_name": "Cary Grant",
+  "identity_source": "tmdb",
+  "trace_id": 10
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
file_uuidstringFile UUID
chunk_idstringChunk identifier
start_timefloatStart time in seconds
end_timefloatEnd time in seconds
text_contentstringChunk text content
identity_idintegerIdentity ID (null if no identity matched)
identity_namestringIdentity name (null if no identity matched)
identity_sourcestringIdentity origin. .json = established from identity.json files on disk (unified format for all sources). tmdb = from .json via TMDb enrichment probe, then matched against faces. auto = pipeline face matching result; only stranger identities are auto-created. user_defined = manual. merged = merged identities.
trace_idintegerFace trace ID (null if no trace matched)
+

Path B: POST /api/v1/identities/search

+

Search identity names and return associated face detection text.

+

Auth: Required +Scope: identity-level

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
qstringYesIdentity name search
file_uuidstringNoRestrict to specific file
limitintegerNoMax results (default 50)
+
curl -s -X POST "$API/api/v1/identities/search" \
+  -H "X-API-Key: $KEY" \
+  -d '{"q": "Cary Grant"}'
+
+ +
+

Visual Chunk Search

+

Search video frames by visual content (object detection results).

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodEndpointDescription
POST/api/v1/search/visualSearch visual chunks by criteria (object classes, density)
POST/api/v1/search/visual/classSearch by specific object class
POST/api/v1/search/visual/densitySearch by spatial density range
POST/api/v1/search/visual/statsGet visual detection statistics
POST/api/v1/search/visual/combinationSearch by object class combination
+

Request Parameters (Visual Search)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID
criteria.required_classesstring[]NoRequired object classes (e.g., ["person", "car"])
criteria.min_confidencefloatNoMinimum confidence threshold
criteria.min_spatial_densityfloatNoMinimum spatial density
criteria.max_spatial_densityfloatNoMaximum spatial density
+

Example

+
curl -s -X POST "$API/api/v1/search/visual" \
+  -H "X-API-Key: $KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "criteria": {"required_classes": ["person", "car"]}}'
+
+ +

Response

+
{
+  "chunks": [
+    {
+      "file_uuid": "3a6c1865...",
+      "chunk_id": "visual_001",
+      "chunk_type": "visual",
+      "detections": [
+        {"class": "person", "confidence": 0.95, "bbox": [100, 200, 150, 350]}
+      ],
+      "start_time": 120.5,
+      "end_time": 125.3
+    }
+  ],
+  "total": 42
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
chunksarrayArray of matching visual chunks
chunks[].file_uuidstringFile UUID
chunks[].chunk_idstringChunk identifier
chunks[].detectionsarrayObject detections in this chunk
totalintegerTotal matching chunks
+
+
+ + \ No newline at end of file diff --git a/docs_v1.0/doc_user/Demo_EndToEnd.html b/docs_v1.0/doc_user/Demo_EndToEnd.html new file mode 100644 index 0000000..527ce46 --- /dev/null +++ b/docs_v1.0/doc_user/Demo_EndToEnd.html @@ -0,0 +1,1084 @@ + + + + +Demo Endtoend - Momentry API Docs + + + +
+← Back to index +
+

document_type: "demo_guide" +service: "MOMENTRY_CORE" +title: "Pipeline Demo End-to-End" +date: "2026-05-15" +version: "V1.0" +status: "active" +owner: "M5" +created_by: "OpenCode" +tags: + - "demo" + - "pipeline" + - "end-to-end" + - "api" +ai_query_hints: + - "如何執行端到端 Pipeline demo" + - "Pipeline 處理流程" + - "註冊影片並觸發處理的完整流程" +related_documents: + - "GUIDES/API_ENDPOINTS.md" + - "GUIDES/Pipeline_API_Demo.md"

+
+

Momentry Core — Pipeline Demo End-to-End

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
項目內容
建立者OpenCode
建立時間2026-05-15
文件版本V1.0
目標讀者developer
預備知識需有 API Key、Pipeline 基本概念
+
+

Table of Contents

+

Pipeline Phases

+ + + + + + + + + + + + + + + + + + + + + + + + + +
PhaseStepWhat happens
Pre1–4System check, scan, register, probe
處理中5–6Submit job → Worker picks up → Each processor runs (pending→running→completed)
處理後7–9All results → Search → Identities → Schema verification
+
+

1. 檢查系統狀況

+
API="http://m5api.momentry.ddns.net"
+KEY="muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69"
+
+# Basic health
+curl -sf "$API/health" | jq '{status, version, build_git_hash, uptime_ms}'
+
+# Detailed health
+curl -sf "$API/health/detailed" | jq '{
+  services, 
+  schema: .schema.ok, 
+  scripts: .pipeline.scripts_count, 
+  integrity: .pipeline.scripts_integrity,
+  procs: [.pipeline.processors | to_entries[] | select(.value == true and .key != "total_py_files") | .key]
+}'
+
+ +

Output:

+
{
+  "status": "ok",
+  "version": "1.0.0",
+  "build_git_hash": "c41f7e0c",
+  "uptime_ms": 2756192
+}
+{
+  "services": {"postgres": "ok", "redis": "ok", "qdrant": "ok"},
+  "schema": false,
+  "scripts": 291,
+  "integrity": {"matched": 332, "total": 345, "ok": false},
+  "procs": ["asr","yolo","face","pose","ocr","cut","caption","scene","story","asrx","probe","visual_chunk"]
+}
+
+ +
+

2. 掃描檔案

+

掃描伺服器上所有與 exasan 相關的檔案(支援規則表達式):

+
curl -sf -H "X-API-Key: $KEY" "$API/api/v1/files/scan?pattern=exasan" | \
+  jq '[.files[] | {uuid: .file_uuid, name: .file_name, size: .file_size}]'
+
+ +

輸出(節錄):

+
[
+  {"uuid": "dd61fda85fee441f...", "name": "ExaSAN PCIe series - Director Ou Yu-Zhi Shares His Experience.mp4", "size": 6827600},
+  {"uuid": "8e2e98c49355935f...", "name": "ExaSAN Webinar by Blake Jones, Vision2see.mp4", "size": 38635889},
+  {"uuid": "477d8fa7bc0e1a7...", "name": "Thunderbolt ExaSAN at CCBN.mp4", "size": 13126748}
+]
+
+ +

Note: files/scan 也可以掃所有檔案,或用於批次註冊。若不指定 pattern,回傳伺服器 sftpgo/data/demo/ 目錄下所有檔案。

+
+

3. 註冊或確認

+

若檔案尚未註冊,使用 register API。若已存在(如本次示範),直接確認狀態:

+
UUID="dd61fda85fee441fdd00ab5528213ff7"
+
+# 確認檔案狀態
+curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}" | jq '{uuid: .file_uuid[0:16], name: .file_name, status, duration, fps}'
+
+# 若檔案不存在,使用註冊 API:
+# curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \
+#   -d '{"file_path": "/path/to/video.mp4"}' \
+#   "$API/api/v1/files/register" | jq '.'
+
+ +

註冊流程

+
POST /files/register
+  ├─ SHA256 content_hash (dedup 檢查)
+  ├─ file_name 衝突檢查 (自動 rename)
+  ├─ Pre-process (SHA256 + ffprobe + UUID  .pre.json)
+  ├─ UUID = f(mac, mtime, path, filename)
+  ├─ Unified probe (videoffprobe, docPython)
+  └─ INSERT INTO videos
+
+ +
+

4. Probe 確認

+

The probe endpoint returns ffprobe metadata about the registered file.

+
# Substitute the actual file_uuid from step 3
+FILE_UUID="e1111111111111111111111111111111"
+
+curl -s -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \
+    "http://m5api.momentry.ddns.net/api/v1/file/${FILE_UUID}/probe" | python3 -m json.tool
+
+ +

Output (abbreviated):

+
{
+    "file_uuid": "e1111111111111111111111111111111",
+    "file_name": "demo_test_video.mp4",
+    "duration": 5.005,
+    "width": 640,
+    "height": 480,
+    "fps": 24.0,
+    "total_frames": 120,
+    "cached": true,
+    "format": {
+        "filename": "/tmp/demo_test_video.mp4",
+        "format_name": "mov,mp4,m4a,3gp,3g2,mj2",
+        "duration": "5.005000",
+        "size": "98304",
+        "bit_rate": "157184"
+    },
+    "streams": [
+        {"index": 0, "codec_type": "video", "codec_name": "h264", "width": 640, "height": 480, ...},
+        {"index": 1, "codec_type": "audio", "codec_name": "aac", ...}
+    ]
+}
+
+ +

Error handling (Bug #3 fix): +- Non-existent UUID → {"error":"Video not found"} + HTTP 404 +- File deleted from disk → {"error":"File does not exist at registered path"} + HTTP 404 +- ffprobe failure → {"error":"ffprobe failed: ..."} + HTTP 500

+

⚡ Intermediate Check — Bug #3: Probe Error Verification

+

Test both error cases return proper JSON + HTTP code instead of bare 500:

+
echo "=== Non-existent UUID → expect 404 ==="
+curl -s -w "\nHTTP: %{http_code}\n" -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \
+    "http://m5api.momentry.ddns.net/api/v1/file/bad_uuid_12345/probe"
+# Expect: {"error":"Video not found","file_uuid":"bad_uuid_12345"}  HTTP 404
+
+echo ""
+echo "=== Non-existent file path → expect 404 ==="
+# Temporarily change file_path to a non-existent location
+"$PG_BIN/psql" -U accusys -d momentry -c \
+    "UPDATE dev.videos SET file_path = '/tmp/NONEXISTENT_FILE' WHERE file_uuid = '${FILE_UUID}'"
+curl -s -w "\nHTTP: %{http_code}\n" -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \
+    "http://m5api.momentry.ddns.net/api/v1/file/${FILE_UUID}/probe"
+# Expect: {"error":"File does not exist at registered path",...}  HTTP 404
+# Restore path
+"$PG_BIN/psql" -U accusys -d momentry -c \
+    "UPDATE dev.videos SET file_path = '/tmp/demo_test_video.mp4' WHERE file_uuid = '${FILE_UUID}'"
+
+ +

Output:

+
=== Non-existent UUID  expect 404 ===
+{"error":"Video not found","file_uuid":"bad_uuid_12345"}
+HTTP: 404
+
+=== Non-existent file path  expect 404 ===
+{"error":"File does not exist at registered path","file_uuid":"e1111111111111111111111111111111","file_path":"/tmp/NONEXISTENT_FILE"}
+HTTP: 404
+
+ +
+

5. Process Video

+

Trigger pipeline processing for specific processors. The available processors are:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ProcessorFunctionScript
asrSpeech-to-text (faster-whisper)asr_processor.py
cutScene detection (PySceneDetect)cut_processor.py
yoloObject detection (YOLOv8)yolo_processor.py
faceFace detection (InsightFace)face_processor.py
posePose estimation (MediaPipe)pose_processor.py
ocrText detection (PaddleOCR)ocr_processor.py
asrxSpeaker diarizationasrx_processor.py
visual_chunkVisual content analysisvisual_chunk_processor.py
sceneScene classificationscene_classifier.py
storyStory generation (LLM)story_processor.py
captionCaption generationcaption_processor.py
+
# Trigger only ASR + CUT for quick test
+curl -s -X POST "http://m5api.momentry.ddns.net/api/v1/file/${FILE_UUID}/process" \
+    -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \
+    -H "Content-Type: application/json" \
+    -d '{"processors": ["asr", "cut"]}' | python3 -m json.tool
+
+ +

Output:

+
{
+    "job_id": 161,
+    "file_uuid": "e1111111111111111111111111111111",
+    "status": "PENDING",
+    "pids": [],
+    "message": "Processing triggered for demo_test_video.mp4"
+}
+
+ +

Processing flow:

+
POST /process  trigger_processing()
+  ├─ Validate file exists (DB lookup)
+  ├─ Create monitor_job (status: PENDING)
+  ├─ Create processor_result rows for each requested processor (status: pending)
+  └─ Response { job_id, status: "PENDING" }
+
+ +

Note: If no processors are specified, all processors are used:

+
{"processors": ["asr", "cut", "yolo", "ocr", "face", "pose", "asrx", "visual_chunk"]}
+
+ +

⚡ Intermediate Check — Verify Job + Processor Results after Trigger

+
PG_BIN="/Users/accusys/pgsql/18.3/bin"
+
+# Check monitor_jobs table
+"$PG_BIN/psql" -U accusys -d momentry -c "
+SELECT id, uuid, status, current_processor,
+       to_char(created_at, 'HH24:MI:SS') AS created
+FROM dev.monitor_jobs
+WHERE uuid = '${FILE_UUID}'
+ORDER BY id DESC LIMIT 1
+\gx
+"
+
+# Check processor_results table
+"$PG_BIN/psql" -U accusys -d momentry -c "
+SELECT id, processor, status
+FROM dev.processor_results
+WHERE file_uuid = '${FILE_UUID}'
+ORDER BY id
+"
+
+ +

Output:

+
-[ RECORD 1 ]------+-----------------------------
+id                 | 161
+uuid               | e1111111111111111111111111111111
+status             | PENDING
+current_processor  | (null)
+created            | 19:00:30
+
+ id | processor | status
+----+-----------+---------
+  1 | asr       | pending
+  2 | cut       | pending
+
+ +

Checklist after trigger: +- [ ] monitor_jobs.status = 'PENDING' — job created, awaiting worker +- [ ] processor_results rows match requested processors (2 rows for asr, cut) +- [ ] Each processor.status = 'pending' — not yet executed

+
+

6. Worker Execution

+

The worker polls for pending jobs and executes them one by one.

+
DATABASE_SCHEMA=dev cargo run --bin momentry_playground -- worker \
+    --max-concurrent 2 --poll-interval 5
+
+ +

Or in background:

+
DATABASE_SCHEMA=dev nohup target/debug/momentry_playground worker \
+    --max-concurrent 2 --poll-interval 5 > /tmp/worker_demo.log 2>&1 &
+
+ +

Worker flow:

+
Worker loop (every 5 seconds):
+  ├─ Poll: SELECT * FROM monitor_jobs WHERE status = 'PENDING'
+  ├─ Set job status → RUNNING
+  ├─ For each pending processor:
+  │    ├─ SHA256 integrity check (verify_script_integrity)
+  │    │    └─ checksums.sha256 manifest lookup
+  │    ├─ Execute script via PythonExecutor
+  │    │    └─ Command: {MOMENTRY_PYTHON_PATH} scripts/<processor>.py <args>
+  │    ├─ Verify output (file exists, content valid)
+  │    └─ Update processor_result (completed/failed)
+  ├─ Check completion: all processors done?
+  ├─ Yes → Set job + video status → COMPLETED
+  └─ No → Wait for next poll cycle
+
+ +

Worker log output:

+
[CHECKSUMS] Loaded 345 entries from checksums.sha256
+[INTEGRITY] asr_processor.py checksum OK
+[ASR] Starting asr_processor.py
+[INTEGRITY] cut_processor.py checksum OK  
+[CUT] Starting cut_processor.py
+[ASR] Completed successfully
+[CUT] Completed successfully
+check_and_complete_job: results=2/2  Job COMPLETED
+
+ +

⚡ Intermediate Check — Poll Progress During Worker Execution

+

While the worker is running, poll the progress endpoint to watch state transitions:

+
# Poll every 5 seconds until completed
+FILE_UUID="e1111111111111111111111111111111"
+for i in $(seq 1 12); do
+    sleep 5
+    STATUS=$(curl -sf -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \
+        "http://m5api.momentry.ddns.net/api/v1/progress/${FILE_UUID}" \
+        | python3 -c "import json,sys;d=json.load(sys.stdin);print(d.get('status','?'))" 2>/dev/null || echo "pending")
+    echo "Poll $i: status=$STATUS"
+    [ "$STATUS" = "completed" ] || [ "$STATUS" = "failed" ] && break
+done
+
+ +

Output (typical):

+
Poll 1: status=registered          worker hasn't picked it up yet
+Poll 2: status=pending             worker picked up, job status changed
+Poll 3: status=processing          worker running ASR
+Poll 4: status=processing          worker running CUT
+Poll 5: status=completed           all done
+
+ +

Check status transitions in DB:

+
"$PG_BIN/psql" -U accusys -d momentry -c "
+SELECT id, processor, status,
+       to_char(started_at, 'HH24:MI:SS') AS started,
+       to_char(completed_at, 'HH24:MI:SS') AS completed
+FROM dev.processor_results
+WHERE file_uuid = '${FILE_UUID}'
+ORDER BY id
+"
+
+ +

Output:

+
 id | processor |  status    | started   | completed
+----+-----------+------------+-----------+-----------
+  1 | asr       | completed  | 19:01:02  | 19:01:25
+  2 | cut       | completed  | 19:01:02  | 19:01:08
+
+ +

⚡ Processing Checklist — Step-by-Step Verification

+

This checklist covers every stage of the pipeline processing flow:

+
# ──────────────────────────────────────────────────────
+# Stage A: Before Worker Starts
+# ──────────────────────────────────────────────────────
+PG_BIN="/Users/accusys/pgsql/18.3/bin"
+FILE_UUID="e1111111111111111111111111111111"
+KEY="muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69"
+
+echo "=== A1. Job status = PENDING ==="
+"$PG_BIN/psql" -U accusys -d momentry -c "
+SELECT id, status, current_processor, created_at FROM dev.monitor_jobs WHERE uuid = '${FILE_UUID}'
+"
+
+echo "=== A2. Processor results = pending ==="
+"$PG_BIN/psql" -U accusys -d momentry -c "
+SELECT id, processor, status FROM dev.processor_results WHERE file_uuid = '${FILE_UUID}' ORDER BY id
+"
+
+# ──────────────────────────────────────────────────────
+# Stage B: Worker Running
+# ──────────────────────────────────────────────────────
+echo "=== Start worker ==="
+DATABASE_SCHEMA=dev nohup target/debug/momentry_playground worker \
+    --max-concurrent 1 --poll-interval 3 > /tmp/worker_check.log 2>&1 &
+WPID=$!
+
+echo "=== B1. Worker picks up job (within 3-10s) ==="
+for i in $(seq 1 10); do
+    sleep 3
+    JOB_STATUS=$("$PG_BIN/psql" -U accusys -d momentry -t -A -c \
+        "SELECT status FROM dev.monitor_jobs WHERE uuid = '${FILE_UUID}'" 2>/dev/null)
+    VIDEO_STATUS=$("$PG_BIN/psql" -U accusys -d momentry -t -A -c \
+        "SELECT status FROM dev.videos WHERE file_uuid = '${FILE_UUID}'" 2>/dev/null)
+    echo "  Poll $i: job=$JOB_STATUS video=$VIDEO_STATUS"
+    echo "  $(grep '\[INTEGRITY\]\|\[SCHEMA\]\|Starting:\|Completed\|failed\|Job ' /tmp/worker_check.log 2>/dev/null | tail -3)"
+
+    # Check alive
+    kill -0 $WPID 2>/dev/null || { echo "  Worker died unexpectedly"; break; }
+
+    if [ "$VIDEO_STATUS" = "completed" ] || [ "$VIDEO_STATUS" = "failed" ]; then break; fi
+done
+
+echo "=== B2. Each processor status ==="
+"$PG_BIN/psql" -U accusys -d momentry -c "
+SELECT id, processor, status,
+       to_char(started_at, 'HH24:MI:SS') AS started,
+       to_char(completed_at, 'HH24:MI:SS') AS completed,
+       COALESCE(chunks_produced, 0) AS chunks,
+       COALESCE(frames_processed, 0) AS frames,
+       COALESCE(error_message, '') AS error
+FROM dev.processor_results
+WHERE file_uuid = '${FILE_UUID}'
+ORDER BY id
+"
+
+kill $WPID 2>/dev/null || true
+
+# ──────────────────────────────────────────────────────
+# Stage C: After Completion
+# ──────────────────────────────────────────────────────
+echo "=== C1. Video final status ==="
+"$PG_BIN/psql" -U accusys -d momentry -c "
+SELECT file_uuid, file_name, status, duration, fps, total_frames FROM dev.videos WHERE file_uuid = '${FILE_UUID}'
+"
+
+echo "=== C2. Chunks produced ==="
+"$PG_BIN/psql" -U accusys -d momentry -c "
+SELECT chunk_type, count(*) FROM dev.chunk WHERE file_uuid = '${FILE_UUID}' GROUP BY chunk_type ORDER BY chunk_type
+"
+
+echo "=== C3. Job final status ==="
+"$PG_BIN/psql" -U accusys -d momentry -c "
+SELECT id, status, current_processor FROM dev.monitor_jobs WHERE uuid = '${FILE_UUID}'
+"
+
+ +

Expected output (all green):

+
=== A1. Job status = PENDING ===
+ id | status  | current_processor | created_at
+----+---------+-------------------+-------------------
+ 161| PENDING |                   | 2026-05-15 19:00:30
+
+=== A2. Processor results = pending ===
+ id | processor | status
+----+-----------+---------
+  1 | asr       | pending
+  2 | cut       | pending
+
+=== Start worker ===
+=== B1. Worker picks up job (within 3-10s) ===
+  Poll 1: job=PENDING video=registered
+  Poll 2: job=RUNNING video=processing
+  [INTEGRITY] asr_processor.py checksum OK
+  Poll 3: job=RUNNING video=processing
+  [ASR] Starting: asr_processor.py
+  Poll 4: job=RUNNING video=processing
+  [ASR] Completed successfully
+  Poll 5: job=RUNNING video=processing
+  [CUT] Completed successfully
+  Poll 6: job=COMPLETED video=completed
+
+=== B2. Each processor status ===
+ id | processor |  status   | started   | completed | chunks | frames | error
+----+-----------+-----------+-----------+-----------+--------+--------+-------
+  1 | asr       | completed | 19:01:02  | 19:01:25 |      3 |    120 |
+  2 | cut       | completed | 19:01:02  | 19:01:08 |      1 |    120 |
+
+=== C1. Video final status ===
+  file_uuid   |      file_name      |  status   | duration | fps | total_frames
+--------------+---------------------+-----------+----------+-----+--------------
+ e11111111... | demo_test_video.mp4 | completed |    5.005 |  24 |          120
+
+=== C2. Chunks produced ===
+ chunk_type | count
+------------+-------
+ cut        |     1
+ sentence   |     3
+
+=== C3. Job final status ===
+ id |  status   | current_processor
+----+-----------+-------------------
+ 161| COMPLETED | (null)
+
+ +

Checklist during execution:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Stage#CheckExpectedPass
A. Pre-workerA1monitor_jobs.statusPENDING
A2processor_results rows= requested processor count
A3Each processor_results.statuspending
B. RunningB1Job picked up (within poll interval)status → RUNNING
B2SHA256 integrity check in logs[INTEGRITY] *.py checksum OK
B3Each processor transitionspending → running → completed
B4started_at populatedNOT NULL per processor
B5Processors complete without errorerror_message is NULL
B6Max concurrent respected--max-concurrent running at once
C. Post-completionC1videos.statuscompleted (not failed)
C2chunks_produced > 0ASR has sentence chunks
C3monitor_jobs.statusCOMPLETED
C4chunk table has datarows with this file_uuid
C5Chunk IDs formatted correctly{uuid}_{start}_{end}
+
+

7. Check Results

+

Monitor job progress:

+
# Check job status
+curl -s -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \
+    "http://m5api.momentry.ddns.net/api/v1/jobs?page=1&page_size=5&status=pending,running,completed,failed" \
+    | python3 -c "import json,sys;d=json.load(sys.stdin);[print(f'{j[\"uuid\"]}: {j[\"status\"]}') for j in d.get('jobs',[])]"
+
+ +

Output:

+
9eca53f422f668dd59a9995d29dc9388: completed
+e1111111111111111111111111111111: completed
+
+ +

⚡ Intermediate Check — Bug #2: Chunk Fallback Verification

+

Verify that both new and old chunk_id formats resolve correctly:

+
# Pick a chunk_id from the DB
+CHUNK_INFO=$("$PG_BIN/psql" -U accusys -d momentry -t -A -c "
+SELECT chunk_id, id FROM dev.chunk WHERE file_uuid = '${FILE_UUID}' LIMIT 1
+")
+NEW_ID=$(echo "$CHUNK_INFO" | cut -d'|' -f1)
+DB_ID=$(echo "$CHUNK_INFO" | cut -d'|' -f2)
+
+echo "=== New format: $NEW_ID ==="
+curl -s -w " HTTP %{http_code}" -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \
+    "http://m5api.momentry.ddns.net/api/v1/file/${FILE_UUID}/chunk/${NEW_ID}" \
+    | python3 -c "import json,sys;d=json.load(sys.stdin);print(f'chunk_id={d.get(\"chunk_id\")}')" 2>/dev/null
+
+echo ""
+echo "=== Old integer fallback (id=$DB_ID) ==="
+curl -s -w " HTTP %{http_code}" -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \
+    "http://m5api.momentry.ddns.net/api/v1/file/${FILE_UUID}/chunk/${DB_ID}" \
+    | python3 -c "import json,sys;d=json.load(sys.stdin);print(f'chunk_id={d.get(\"chunk_id\")}')" 2>/dev/null
+
+ +

Output:

+
=== New format: e1111111111111111111111111111111_0_5 ===
+chunk_id=e1111111111111111111111111111111_0_5 HTTP 200
+
+=== Old integer fallback (id=1075655) ===
+chunk_id=e1111111111111111111111111111111_0_5 HTTP 200
+
+ +

Both return chunk_id=e1111111111111111111111111111111_0_5 — the fallback correctly resolves id=1075655 to the same chunk.

+

⚡ Intermediate Check — Verify Chunks after Processing

+
PG_BIN="/Users/accusys/pgsql/18.3/bin"
+
+# Count chunks produced
+"$PG_BIN/psql" -U accusys -d momentry -c "
+SELECT chunk_type, count(*) AS count
+FROM dev.chunk
+WHERE file_uuid = '${FILE_UUID}'
+GROUP BY chunk_type
+ORDER BY chunk_type
+"
+
+# Sample chunk content
+"$PG_BIN/psql" -U accusys -d momentry -c "
+SELECT chunk_id, chunk_type, start_frame, end_frame,
+       substring(text_content, 1, 60) AS text_preview
+FROM dev.chunk
+WHERE file_uuid = '${FILE_UUID}'
+ORDER BY start_frame
+LIMIT 5
+"
+
+ +

Output:

+
 chunk_type | count
+------------+-------
+ cut        |     1
+ sentence   |     3
+
+                     chunk_id                     | chunk_type | start_frame | end_frame |                    text_preview
+--------------------------------------------------+------------+-------------+-----------+-----------------------------------------------------
+ e1111111111111111111111111111111_0_5              | cut        |           0 |       120 | demo_test_video_auto_demo.mp4
+ e1111111111111111111111111111111_0_0              | sentence   |           0 |       120 | test pattern test pattern color bars test pattern ...
+
+ +

Check per-processor results in DB:

+
"$PG_BIN/psql" -U accusys -d momentry -c "
+SELECT processor, status, error_message,
+       to_char(started_at, 'HH24:MI:SS') AS started,
+       to_char(completed_at, 'HH24:MI:SS') AS completed,
+       COALESCE(chunks_produced, 0) AS chunks
+FROM dev.processor_results
+WHERE file_uuid='${FILE_UUID}'
+ORDER BY id;
+"
+
+ +

Output:

+
 processor |  status   | error_message | started   | completed | chunks
+-----------+-----------+---------------+-----------+-----------+--------
+ asr       | completed |               | 19:01:02  | 19:01:25 |      3
+ cut       | completed |               | 19:01:02  | 19:01:08 |      1
+
+ +

Checklist after processing: +- [ ] video.status = 'completed' — pipeline finished +- [ ] processor_results all show status = 'completed' +- [ ] chunks_produced > 0 — each processor produced output +- [ ] chunk table has rows with correct chunk_type (cut, sentence) +- [ ] chunk_id format is {file_uuid}_{start}_{end} (Bug #2 fix verified)

+
+

8. Search Chunks

+

After processing, search the generated chunks:

+
# Text search (ASR output)
+curl -s -X POST "http://m5api.momentry.ddns.net/api/v1/search/universal" \
+    -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \
+    -H "Content-Type: application/json" \
+    -d "{\"query\": \"test\", \"uuid\": \"${FILE_UUID}\", \"limit\": 5}" \
+    | python3 -c "
+import json,sys;d=json.load(sys.stdin)
+print(f'Total hits: {d[\"total\"]}')
+for r in d['results']:
+    if r.get('chunk_id'):
+        print(f'  {r[\"chunk_id\"]}: \"{r.get(\"text\",\"\")[:60]}\" score={r.get(\"score\",0):.3f}')
+"
+
+ +

Output:

+
Total hits: 3
+  e1111111111111111111111111111111_0_5: "test pattern test pattern..." score=0.423
+  e1111111111111111111111111111111_5_10: "silence" score=0.215
+
+ +

Get a specific chunk by ID:

+
# Single chunk detail  
+curl -s -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \
+    "http://m5api.momentry.ddns.net/api/v1/file/${FILE_UUID}/chunk/${FILE_UUID}_0_5" \
+    | python3 -c "
+import json,sys;d=json.load(sys.stdin)
+print(f'Type: {d[\"chunk_type\"]}  Rule: {d[\"rule\"]}')
+print(f'Frame: {d[\"start_frame\"]}–{d[\"end_frame\"]}  FPS: {d[\"fps\"]}')
+print(f'Text: {d[\"text_content\"][:100]}')
+"
+
+ +
+

9. Health Check

+
# Basic health
+curl -sf http://m5api.momentry.ddns.net/health | python3 -m json.tool
+
+# Detailed health (services + pipeline + schema + resources)
+curl -sf http://m5api.momentry.ddns.net/health/detailed | python3 -c "
+import json,sys;d=json.load(sys.stdin)
+p=d['pipeline'];s=d['schema']
+print(f'Status:   {d[\"status\"]}')
+print(f'Build:    {d[\"build_git_hash\"]}')
+print(f'Services: postgres={d[\"services\"][\"postgres\"][\"status\"]} redis={d[\"services\"][\"redis\"][\"status\"]}')
+print(f'Schema:   {s[\"applied\"][-1][\"filename\"] if s[\"applied\"] else \"none\"} ({len(s[\"applied\"])}/{len(s[\"required\"])} applied, ok={s[\"ok\"]})')
+print(f'Scripts:  {p[\"scripts_count\"]} files, integrity={p[\"scripts_integrity\"][\"matched\"]}/{p[\"scripts_integrity\"][\"total\"]}')
+print(f'Procs:    ' + ' '.join([k for k,v in p['processors'].items() if v and k != 'total_py_files']))
+"
+
+ +

Output:

+
Status:   ok
+Build:    0e73d2a
+Services: postgres=ok redis=ok
+Schema:   migrate_fix_chunk_id_format.sql (8/8 applied, ok=True)
+Scripts:  286 files, integrity=345/345
+Procs:    asr yolo face pose ocr cut caption scene story asrx probe visual_chunk
+
+ +
+

10. Schema Version

+

Each binary embeds a list of required migrations. At startup and via /health/detailed, the server verifies all migrations are applied.

+
# Check schema version via API
+curl -sf http://m5api.momentry.ddns.net/health/detailed | python3 -c "
+import json,sys;d=json.load(sys.stdin)['schema']
+print(f'Table exists: {d[\"table_exists\"]}')
+print(f'All OK:       {d[\"ok\"]}')
+for m in d['required']:
+    match = '✓' if any(a['filename']==m['filename'] and a['checksum']==m['checksum'] for a in d['applied']) else '✗'
+    print(f'  {match} {m[\"filename\"]}  {m[\"checksum\"][:16]}')
+"
+
+ +

Output:

+
Table exists: True
+All OK:       True
+   migrate_add_content_hash.sql  42b81554248c4bec
+   migrate_add_registered_status.sql  566fdfcdc624f6fa
+   migrate_add_schema_version.sql  585b31df6056a937
+   migrate_cleanup_inactive_identities.sql  daa52a0827b24a77
+   migrate_fix_chunk_id_format.sql  a1b2c3d4e5f6a7b8
+   migrate_public_schema_v4.sql  973908076c614363
+   migrate_public_schema_v4_tables.sql  1d62dc42e4dec8f4
+   migrate_public_v4_complete.sql  2a6fda7d2c5660e4
+
+ +

If a migration is missing at startup:

+
[SCHEMA] 7/8 migrations applied. Missing: migrate_fix_chunk_id_format.sql
+
+ +
+
+

Summary Checklist

+

After completing a pipeline run, verify all items:

+

Registration

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
#CheckExpectedPass
1videos.statusregistered
2file_uuid consistencyAPI response uuid = DB uuid
3Probe returns metadataduration > 0, fps > 0
4Probe error (Bug #3)Bad UUID → JSON error + 404
+

Processing

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
#CheckExpectedPass
5Job createdmonitor_jobs.status = PENDING
6Processors queuedprocessor_results rows = requested count
7Worker picks up jobmonitor_jobs.status → RUNNING
8SHA256 integrity (Bug #2)[INTEGRITY] *.py checksum OK
9Each processor completesprocessor_results.status = completed
10No processor errorserror_message all NULL
11Pipeline completesvideos.status = completed
+

Results

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
#CheckExpectedPass
12Chunks producedchunk table has > 0 rows
13Chunk ID formatchunk_id = {uuid}_{start}_{end}
14Chunk fallback (Bug #2)Old integer ID → 200 via handler fallback
15Search worksPOST /search/universal returns hits
16Schema versionschema.ok = true in /health/detailed
+
+

Full Automation Script

+

Save as demo_full_cycle.sh:

+
#!/bin/bash
+set -euo pipefail
+
+API="http://m5api.momentry.ddns.net"
+KEY="muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69"
+PG="/Users/accusys/pgsql/18.3/bin"
+
+# Generate test video
+ffmpeg -y -f lavfi -i "testsrc=duration=5:size=640x480:rate=24" \
+    -f lavfi -i "anullsrc=r=44100:cl=mono" \
+    -c:v libx264 -preset ultrafast -crf 28 -c:a aac -shortest \
+    /tmp/auto_demo.mp4 2>/dev/null
+
+# Register
+UUID=$(curl -sf -X POST "$API/api/v1/files/register" \
+    -H "X-API-Key: $KEY" -H "Content-Type: application/json" \
+    -d '{"file_path": "/tmp/auto_demo.mp4"}' | python3 -c "import json,sys;print(json.load(sys.stdin)['file_uuid'])")
+echo "Registered: $UUID"
+
+# Process
+curl -sf -X POST "$API/api/v1/file/$UUID/process" \
+    -H "X-API-Key: $KEY" -H "Content-Type: application/json" \
+    -d '{"processors":["asr","cut"]}' > /dev/null
+echo "Processing triggered"
+
+# Run worker
+DATABASE_SCHEMA=dev target/debug/momentry_playground worker \
+    --max-concurrent 1 --poll-interval 3 &
+WPID=$!
+sleep 30
+kill $WPID 2>/dev/null || true
+
+# Results
+"$PG/psql" -U accusys -d momentry -c "
+SELECT processor, status FROM dev.processor_results WHERE file_uuid='$UUID' ORDER BY id"
+echo "Done: $UUID"
+
+
+ + \ No newline at end of file diff --git a/docs_v1.0/doc_user/M5API_Pipeline_Demo.html b/docs_v1.0/doc_user/M5API_Pipeline_Demo.html new file mode 100644 index 0000000..141ef3f --- /dev/null +++ b/docs_v1.0/doc_user/M5API_Pipeline_Demo.html @@ -0,0 +1,472 @@ + + + + +M5Api Pipeline Demo - Momentry API Docs + + + +
+← Back to index +
+

document_type: "demo_guide" +service: "MOMENTRY_CORE" +title: "M5API Pipeline Demo" +date: "2026-05-16" +version: "V1.0" +status: "active" +owner: "M5" +created_by: "OpenCode" +tags: + - "demo" + - "pipeline" + - "api" + - "m5api" +ai_query_hints: + - "M5API Pipeline demo" + - "如何透過 M5 的 API 執行 Pipeline" +related_documents: + - "GUIDES/Demo_EndToEnd.md" + - "GUIDES/API_ENDPOINTS.md"

+
+

Momentry Core — M5API Pipeline Demo

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
項目內容
建立者OpenCode
建立時間2026-05-16
文件版本V1.0
目標讀者developer
預備知識需有 API Key、M5 服務已啟動
+
+

Prerequisites

+
API="https://m5api.momentry.ddns.net"
+KEY="muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69"
+
+ +
+

Step 1: System Health Check

+
curl -sf "$API/health" | jq '{ip, port, status, version, build_git_hash}'
+
+ +

Response:

+
{
+  "ip": "192.168.110.201",
+  "port": 3002,
+  "status": "ok",
+  "version": "1.0.0",
+  "build_git_hash": "c41f7e0c"
+}
+
+ +

All core services verified:

+
curl -sf "$API/health/detailed" | jq '{
+  services, schema: .schema.ok,
+  scripts: .pipeline.scripts_count,
+  integrity: .pipeline.scripts_integrity,
+  procs: [.pipeline.processors | to_entries[] | select(.value==true and .key!="total_py_files") | .key]
+}'
+
+ +

Response:

+
{
+  "services": {
+    "postgres": {"status": "ok"},
+    "redis": {"status": "ok"},
+    "qdrant": {"status": "ok"},
+    "mongodb": {"status": "ok"}
+  },
+  "schema": true,
+  "scripts": 286,
+  "integrity": {"matched": 345, "total": 345, "ok": true},
+  "procs": ["asr","yolo","face","pose","ocr","cut","caption","scene","story","asrx","probe","visual_chunk"]
+}
+
+ +
+

Step 2: List Registered Files

+
curl -sf -H "X-API-Key: $KEY" "$API/api/v1/files?page=1&page_size=5" | \
+  jq '{total, files: [.data[]? | {name: .file_name[0:50], status}]}'
+
+ +

Response:

+
{
+  "total": 56,
+  "files": [
+    {"name": "Charade (1963) Cary Grant & Audrey Hepburn ...", "status": "completed"},
+    {"name": "ExaSAN PCIe series - Director Ou Yu-Zhi ...", "status": "completed"},
+    {"name": "Old_Time_Movie_Show_-_Charade_1963.HD.mov", "status": "completed"},
+    {"name": "Old Felix the Cat Cartoon.mp4", "status": "unregistered"},
+    {"name": "short_clip.mov", "status": "completed"}
+  ]
+}
+
+ +
+

Step 3: Register a New File

+
# POST with file_path (must exist on server filesystem)
+curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \
+  -d '{"file_path": "/path/to/video.mp4"}' \
+  "$API/api/v1/files/register" | jq '{success, file_uuid, file_name, file_type, duration, fps, already_exists}'
+
+ +

Response (new registration):

+
{
+  "success": true,
+  "file_uuid": "3abeee81d94597629ed8cb943f182e94",
+  "file_name": "Charade (1963) Cary Grant & Audrey Hepburn ...mp4",
+  "file_type": "video",
+  "duration": 6785.014,
+  "fps": 23.976,
+  "already_exists": false
+}
+
+ +

Response (duplicate content — SHA256 dedup):

+
{
+  "success": true,
+  "already_exists": true,
+  "message": "Content already registered (identical file)"
+}
+
+ +
+

Step 4: Probe (ffprobe Metadata)

+
UUID="3abeee81d94597629ed8cb943f182e94"
+
+curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/probe" | \
+  jq '{name: .file_name, video: "\(.width)x\(.height)", fps, duration, cached, streams: [.streams[] | {type: .codec_type, codec: .codec_name}]}'
+
+ +

Response:

+
{
+  "name": "Charade (1963) Cary Grant & Audrey Hepburn ...mp4",
+  "video": "720x304",
+  "fps": 23.976,
+  "duration": 6785.014,
+  "cached": true,
+  "streams": [
+    {"type": "video", "codec": "h264"},
+    {"type": "audio", "codec": "aac"}
+  ]
+}
+
+ +

Error cases:

+
# Non-existent UUID
+curl -sf "https://m5api.momentry.ddns.net/api/v1/file/bad_uuid/probe"
+# → {"error":"Video not found","file_uuid":"bad_uuid"}  HTTP 404
+
+# File deleted from disk
+# → {"error":"File does not exist at registered path","file_uuid":"...","file_path":"..."}  HTTP 404
+
+ +
+

Step 5: Submit Processing Job

+
# Specific processors
+curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \
+  -d '{"processors":["asr","cut","yolo","face","pose","ocr"]}' \
+  "$API/api/v1/file/${UUID}/process" | jq '{job_id, file_uuid: .file_uuid[0:16], status}'
+
+ +

Response:

+
{
+  "job_id": 167,
+  "file_uuid": "3abeee81d9459762",
+  "status": "PENDING"
+}
+
+ +
+

All processors: Send {} (empty body) to run all 12 processors. +Available: asr, cut, yolo, face, pose, ocr, asrx, visual_chunk, scene, story, caption

+
+
+

Step 6: Monitor Progress

+
while true; do
+  PROGRESS=$(curl -sf -H "X-API-Key: $KEY" "$API/api/v1/progress/${UUID}")
+  STATUS=$(echo "$PROGRESS" | jq -r '.status // "?"')
+  PROCS=$(echo "$PROGRESS" | jq -r '[.processors[]? | "\(.name)=\(.status)(\(.frames_processed))"] | join("  ")')
+  echo "$(date +%H:%M:%S): $PROCS"
+  echo "$PROCS" | grep -q "completed" && break
+  sleep 10
+done
+
+ +

Typical output:

+
12:30:01: asr=pending(0)  cut=pending(0)  yolo=pending(0)  face=pending(0)  pose=pending(0)  ocr=pending(0)
+12:30:11: asr=running(0)  cut=running(0)  yolo=pending(0)  face=pending(0)  pose=pending(0)  ocr=pending(0)
+12:30:21: asr=running(0)  cut=completed(8951)  yolo=running(0)  face=pending(0)  pose=pending(0)  ocr=pending(0)
+12:30:31: asr=running(0)  cut=completed(8951)  yolo=completed(8951)  face=running(0)  pose=pending(0)
+12:30:41: asr=running(0)  cut=completed(8951)  yolo=completed(8951)  face=completed(8951)  pose=running(0)
+12:30:51: asr=completed(8951)  cut=completed(8951)  yolo=completed(8951)  face=completed(8951)  pose=completed(8951)  ocr=running(0)
+12:31:01: asr=completed(8951)  cut=completed(8951)  yolo=completed(8951)  face=completed(8951)  pose=completed(8951)  ocr=completed(8951)
+
+ +

Status transition chain: pending → running → completed

+

Check job state:

+
curl -sf -H "X-API-Key: $KEY" "$API/api/v1/jobs?uuid=${UUID}" | \
+  jq '[.jobs[]? | {id, status}]'
+
+ +
+

Step 7: Verify Results

+
curl -sf -H "X-API-Key: $KEY" "$API/api/v1/progress/${UUID}" | \
+  jq '{processors: [.processors[] | {name, status, frames: .frames_processed}]}'
+
+ +

Response:

+
{
+  "processors": [
+    {"name": "asr", "status": "completed", "frames": 162568},
+    {"name": "cut", "status": "completed", "frames": 162568},
+    {"name": "yolo", "status": "completed", "frames": 162568},
+    {"name": "face", "status": "completed", "frames": 162568},
+    {"name": "pose", "status": "completed", "frames": 162568},
+    {"name": "ocr", "status": "completed", "frames": 162568}
+  ]
+}
+
+ +
+

Step 8: Universal Search

+
# Search for a person name
+curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \
+  -d "{\"query\":\"Audrey\",\"uuid\":\"${UUID}\",\"limit\":3}" \
+  "$API/api/v1/search/universal" | \
+  jq '{total, hits: [.results[]? | {chunk_id: .chunk_id[0:40], text: .text[0:80], score}]}'
+
+ +

Response:

+
{
+  "total": 2,
+  "hits": [
+    {
+      "chunk_id": "3abeee81d94597629ed8cb943f182e94_998192",
+      "text": "Shorede stars two legends of classical Hollywood, Audrey Hepburn and Carrie Gran",
+      "score": 0.9
+    },
+    {
+      "chunk_id": "3abeee81d94597629ed8cb943f182e94_998193",
+      "text": "Shorede stars two legends of classical Hollywood, Audrey Hepburn and Carrie Gran",
+      "score": 0.9
+    }
+  ]
+}
+
+ +
# Search Chinese text
+curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \
+  -d "{\"query\":\"導演\",\"uuid\":\"${UUID}\",\"limit\":3}" \
+  "$API/api/v1/search/universal" | jq '{total}'
+
+ +

Search modes: The universal search endpoint supports: +- Text match (ILIKE on text_content and content columns) +- Time range filtering (time_range: [start, end]) +- Speaker/person ID filtering +- Chunk type filtering +- Visual content filtering (objects, density, classes)

+
+

Step 9: Get Chunk Detail

+
CHUNK_ID="3abeee81d94597629ed8cb943f182e94_998192"
+
+curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/chunk/${CHUNK_ID}" | \
+  jq '{chunk_id, chunk_type, text: .text_content, fps, start_frame, end_frame}'
+
+ +

Response:

+
{
+  "chunk_id": "3abeee81d94597629ed8cb943f182e94_998192",
+  "chunk_type": "sentence",
+  "text": "Shorede stars two legends of classical Hollywood, Audrey Hepburn and Carrie Gran",
+  "fps": 23.976,
+  "start_frame": 2395281,
+  "end_frame": 2395341
+}
+
+ +
+

Step 10: Chunk Fallback (Stale Qdrant Compatibility)

+

Old integer-format chunk_ids from stale Qdrant payloads are automatically resolved via WHERE id = int(chunk_id):

+
# Integer format (old Qdrant payload)
+curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/chunk/998192" | \
+  jq '{chunk_id, text: .text_content}'
+
+ +

Response (same chunk as above):

+
{
+  "chunk_id": "3abeee81d94597629ed8cb943f182e94_998192",
+  "text": "Shorede stars two legends of classical Hollywood, Audrey Hepburn and Carrie Gran"
+}
+
+ +

Both formats work: +- chunk/{uuid}_{id} → exact chunk_id match +- chunk/{id} → fallback by primary key id

+
+

Step 11: File Detail

+
curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}" | \
+  jq '{file_name, status, file_type, file_path}'
+
+ +

Response:

+
{
+  "file_name": "Charade (1963) Cary Grant & Audrey Hepburn ...mp4",
+  "status": "completed",
+  "file_type": "video",
+  "file_path": "/Users/accusys/momentry/var/sftpgo/data/demo/Charade..."
+}
+
+ +
+

Step 12: File Identities

+
curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/identities" | \
+  jq '{total, identities: [.data[]? | {name, face_count, confidence}]}'
+
+ +

Response:

+
{
+  "total": 2,
+  "identities": [
+    {"name": "Audrey Hepburn", "face_count": 22082, "confidence": 0.93},
+    {"name": "Cary Grant", "face_count": 15334, "confidence": 0.91}
+  ]
+}
+
+ +
+

Step 13: Identity Detail

+
# List all global identities
+curl -sf -H "X-API-Key: $KEY" "$API/api/v1/identities?page=1&page_size=3" | \
+  jq '{total, identities: [.data[]? | {name, type: .identity_type, source}]}'
+
+ +
# Get identity files (cross-file faces)
+IDENTITY_UUID="c3545906-c82d-4b66-aa1d-150bc02decce"
+curl -sf -H "X-API-Key: $KEY" "$API/api/v1/identity/${IDENTITY_UUID}/files" | \
+  jq '{total, files: [.data[]? | {file_uuid: .file_uuid[0:16], face_count}]}'
+
+ +
+

Step 14: Schema & Integrity Verification

+
curl -sf "$API/health/detailed" | jq '{
+  ip, port,
+  schema: .schema.ok,
+  migrations: [.schema.applied[]?.filename],
+  integrity: .pipeline.scripts_integrity
+}'
+
+ +

Response:

+
{
+  "ip": "192.168.110.201",
+  "port": 3002,
+  "schema": true,
+  "migrations": [
+    "migrate_add_content_hash.sql",
+    "migrate_add_registered_status.sql",
+    "migrate_add_schema_version.sql",
+    "migrate_cleanup_inactive_identities.sql",
+    "migrate_public_schema_v4_tables.sql",
+    "migrate_public_schema_v4.sql",
+    "migrate_public_v4_complete.sql",
+    "migrate_fix_chunk_id_format.sql",
+    "migrate_add_identity_indexes.sql"
+  ],
+  "integrity": {"matched": 345, "total": 345, "ok": true}
+}
+
+ +
+

Full Automation Script

+
#!/bin/bash
+set -euo pipefail
+
+API="${API:-https://m5api.momentry.ddns.net}"
+KEY="${KEY:-muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69}"
+
+# 1. Health
+echo "=== Health ==="
+curl -sf "$API/health" | jq '{status, version, build_git_hash}'
+
+# 2. Register file (argument: file path)
+FILE_PATH="${1:?Usage: $0 <file_path>}"
+echo "=== Register ==="
+REG=$(curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \
+  -d "{\"file_path\":\"$FILE_PATH\"}" "$API/api/v1/files/register")
+echo "$REG" | jq '{success, file_uuid, file_name}'
+UUID=$(echo "$REG" | jq -r '.file_uuid')
+[ -z "$UUID" ] && { echo "Registration failed"; exit 1; }
+
+# 3. Probe
+echo "=== Probe ==="
+curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/probe" | \
+  jq '{name, fps, duration}'
+
+# 4. Submit job
+echo "=== Process ==="
+curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \
+  -d '{}' "$API/api/v1/file/${UUID}/process" | jq '{job_id, status}'
+
+# 5. Poll progress
+echo "=== Waiting for pipeline... ==="
+while true; do
+  PROGRESS=$(curl -sf -H "X-API-Key: $KEY" "$API/api/v1/progress/${UUID}")
+  STATUS=$(echo "$PROGRESS" | jq -r '.status // "?"')
+  echo "$(date +%H:%M:%S): $(echo "$PROGRESS" | jq -r '[.processors[]? | "\(.name)=\(.status)(\(.frames_processed))"] | join("  ")')"
+  echo "$PROGRESS" | jq -e '[.processors[]? | select(.status == "pending")] | length == 0' >/dev/null && break
+  sleep 10
+done
+
+# 6. Search
+echo "=== Search ==="
+curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \
+  -d "{\"query\":\"test\",\"uuid\":\"${UUID}\",\"limit\":3}" \
+  "$API/api/v1/search/universal" | jq '{total, hits: [.results[]? | {chunk_id: .chunk_id[0:30], text: .text[0:60]}]}'
+
+echo ""
+echo "✅ Done: $UUID"
+
+
+ + \ No newline at end of file diff --git a/docs_v1.0/doc_user/TMDb_User_Guide.html b/docs_v1.0/doc_user/TMDb_User_Guide.html new file mode 100644 index 0000000..9c1e1d1 --- /dev/null +++ b/docs_v1.0/doc_user/TMDb_User_Guide.html @@ -0,0 +1,923 @@ + + + + +Tmdb User Guide - Momentry API Docs + + + +
+← Back to index +
+

document_type: "user_manual" +service: "MOMENTRY_CORE" +title: "TMDb Enrichment 使用指南" +date: "2026-05-17" +version: "V1.0" +status: "active" +owner: "M5" +created_by: "OpenCode"

+
+

TMDb Enrichment 使用指南

+ + + + + + + + + + + + + + + + + +
項目內容
目標讀者developer
預備知識需有 API Key
+
+

Base URL

+ + + + + + + + + + + + + + + + + + + + + + + + + +
EnvironmentURLPurpose
Playground (Dev)http://localhost:3003Development and testing
Productionhttp://localhost:3002Production deployment
External (M5)https://m5api.momentry.ddns.netRemote access
+

Variables

+

All examples in this documentation use these environment variables:

+
API="http://localhost:3003"
+KEY="your-api-key-here"
+
+ +

Authentication

+

All endpoints under /api/v1/* require authentication. +The following endpoints are public (no auth needed):

+
    +
  • GET /health
  • +
  • POST /api/v1/auth/login
  • +
  • POST /api/v1/auth/logout
  • +
+

Three Authentication Modes

+

The system supports three authentication methods, checked in priority order by the middleware:

+
Middleware priority:
+  1. Session Cookie (Portal/browser)
+  2. JWT Bearer (API clients: n8n, CLI)
+  3. API Key Header (legacy compatibility)
+  4. API Key Query Param (?api_key=)
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModeTransportExpiryScopeBest for
Session CookieCookie: session_id=<uuid>24hper-browser sessionPortal (browser)
JWTAuthorization: Bearer <token>1hper-login tokenAPI clients (n8n, CLI, scripts)
API KeyX-API-Key: <key>90dfixed key for automationLegacy scripts, WordPress
+
+

Login

+

Default accounts & API keys:

+ + + + + + + + + + + + + + + + + + + + + + + +
UsernamePasswordAPI KeyRole
adminadminadmin
demodemomuser_demo_key_32chars_abcdef1234567890user
+

The demo API key is set via MOMENTRY_DEMO_API_KEY env var and can be used in place of JWT for marcom integrations:

+
# Using API key instead of JWT
+curl -s "$API/api/v1/files/scan" -H "X-API-Key: muser_demo_key_32chars_abcdef1234567890"
+
+ +
# Login as admin
+curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username": "admin", "password": "admin"}'
+
+# Login as demo user
+curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username": "demo", "password": "demo"}'
+
+ +

Success Response

+
{
+  "success": true,
+  "jwt": "eyJhbGciOiJIUzI1NiIs...",
+  "api_key": "muser_...",
+  "user": {
+    "username": "admin",
+    "role": "admin"
+  },
+  "expires_at": "2026-05-18T13:00:00Z"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
jwtstringJWT access token. Use as Authorization: Bearer <jwt>. Expires in 1 hour.
api_keystringLegacy API key. Use as X-API-Key: <key>. Good for 90 days.
user.usernamestringUsername
user.rolestringRole: admin, user, or readonly
expires_atstringISO8601 timestamp of JWT expiration
+

The login endpoint also sets a Set-Cookie header for browser-based clients:

+
Set-Cookie: session_id=<uuid>; Path=/api; HttpOnly; SameSite=Strict; Max-Age=86400
+
+ +

Error Response (401)

+
{
+  "success": false,
+  "message": "Invalid username or password"
+}
+
+ +
+

Using JWT

+

JWT is preferred for API clients (n8n, CLI scripts, WordPress). It is validated by the middleware without a database lookup (stateless).

+
# Login and capture JWT
+JWT=$(curl -s -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' | python3 -c "import json,sys;print(json.load(sys.stdin)['jwt'])")
+
+# Use JWT for all subsequent requests
+curl -H "Authorization: Bearer $JWT" "$API/api/v1/files/scan"
+curl -H "Authorization: Bearer $JWT" "$API/api/v1/resource/tmdb"
+
+ +

JWT is short-lived (1 hour). When it expires, request a new one via login.

+
+

Using Session Cookie (Browser)

+

Browser-based clients (Portal) get a session cookie automatically after login. The browser sends the cookie with every request—no manual header needed.

+
# Login captures the session cookie from Set-Cookie header
+curl -v -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' 2>&1 | grep "Set-Cookie"
+
+# Browser automatically sends: Cookie: session_id=<uuid>
+# No manual header needed for subsequent requests
+
+ +

The session cookie is HttpOnly (not accessible from JavaScript) and SameSite=Strict (protected against CSRF).

+
+

Using Legacy API Key

+
curl -H "X-API-Key: $KEY" "$API/api/v1/files/scan"
+
+# Also accepted via Bearer header (non-JWT format) or query parameter:
+curl -H "Authorization: Bearer $KEY" "$API/api/v1/files/scan"
+curl "$API/api/v1/files/scan?api_key=$KEY"
+
+ +

API keys are validated via SHA256 hash lookup in the database. They are long-lived (90 days) and intended for automation.

+

Obtaining an API Key (CLI)

+
momentry api-key create "My API Key" --key-type user
+
+ +
+

Logout

+
# Logout using the session cookie (browser)
+curl -X POST "$API/api/v1/auth/logout" \
+  -H "Cookie: session_id=<uuid>"
+
+ +

What logout does

+ + + + + + + + + + + + + + + + + + + + + +
Auth modeEffect
Session CookieSession deleted from database. Same cookie returns 401 on subsequent requests.
JWTJWT remains valid until expiry. (JWT is stateless — logout adds JWT to a blacklist only if API key mode is used.)
API KeyAPI key remains valid. (Legacy keys are shared across sessions — revoking would break other clients.)
+

Example: full session lifecycle

+
# 1. Login
+SESSION_ID=$(curl -s -D - -X POST "$API/api/v1/auth/login" \
+  -H "Content-Type: application/json" \
+  -d '{"username":"admin","password":"admin"}' | grep "Set-Cookie" | sed 's/.*session_id=\([^;]*\).*/\1/')
+
+# 2. Use session (works)
+curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → HTTP 200
+
+# 3. Logout
+curl -s -X POST "$API/api/v1/auth/logout" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → {"success": true}
+
+# 4. Use session again (rejected)
+curl -s -o /dev/null -w "HTTP %{http_code}\n" "$API/api/v1/resource/tmdb" \
+  -H "Cookie: session_id=$SESSION_ID"
+# → HTTP 401
+
+ +
+

Authentication Flow Summary

+
Login Request
+     │
+     ▼
+┌──────────────────┐
+│  1. Check users  │ ← users table (argon2 password verify)
+│     table        │
+└──────┬───────────┘
+       │
+   ┌───┴───┐
+   │ match │
+   └───┬───┘
+       │
+       ▼
+┌──────────────────┐
+│  2. Create JWT   │ ← 1h expiry, signed with JWT_SECRET
+├──────────────────┤
+│  3. Create       │ ← 24h expiry, stored in sessions table
+│     session      │
+├──────────────────┤
+│  4. Set-Cookie   │ ← HttpOnly, SameSite=Strict, Path=/api
+├──────────────────┤
+│  5. Return       │ ← JWT + api_key + user info to client
+└──────────────────┘
+
+ +
Protected Request
+     │
+     ▼
+┌──────────────────────┐
+│  Middleware checks:  │
+│                      │
+│  1. Cookie session?  │ → DB lookup session → get api_key → verify
+│                      │
+│  2. JWT Bearer?      │ → verify JWT signature → decode claims
+│                      │
+│  3. X-API-Key?       │ → SHA256 hash → DB lookup → verify
+│                      │
+│  4. ?api_key=?       │ → same as #3
+│                      │
+│  5. None → 401       │
+└──────────────────────┘
+
+ +
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
401Missing or invalid authentication
401Session expired or logged out
401JWT expired
401API key revoked or inactive
+
+

Related

+
    +
  • POST /api/v1/resource/tmdb/check — test authentication + TMDb API connectivity
  • +
  • GET /health/detailed — view auth status (integrations section)
  • +
+
+

File Registration

+

POST /api/v1/files/register

+

Auth: Required +Scope: file-level

+

Register a video file for processing. Returns the file's metadata and UUID.

+

New in v0.1.2: Registration now automatically triggers the processing pipeline — no need to call POST /api/v1/file/:uuid/process separately. The system will: +1. Register the file and run ffprobe +2. Auto-run offline TMDb probe (reads local identity files, no API calls) +3. Create a monitor job for the worker +4. Worker starts all 10 processors (Cut → ASR → ASRX → YOLO → OCR → Face → Pose → VisualChunk → Story → 5W1H)

+

If the file already exists (same content hash), returns the existing record with already_exists: true.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_pathstringYesPath to video file on disk
patternstringNoRegex pattern for batch register (requires file_path to be a directory)
user_idintegerNoUser ID to associate with registration
content_hashstringNoPre-computed SHA-256 hash (skips computation)
+

Example

+
# Register a single file
+curl -s -X POST "$API/api/v1/files/register" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/video.mp4"}'
+
+# Batch register files matching a pattern in a directory
+curl -s -X POST "$API/api/v1/files/register" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_path": "/path/to/dir", "pattern": ".*\\.mp4$"}'
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "3a6c1865...",
+  "file_name": "video.mp4",
+  "file_path": "/path/to/video.mp4",
+  "file_type": "video",
+  "duration": 120.5,
+  "width": 1920,
+  "height": 1080,
+  "fps": 24.0,
+  "total_frames": 2892,
+  "already_exists": false,
+  "message": "File registered successfully"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
file_uuidstring32-char hex UUID of the registered file
file_namestringFile name (auto-renamed if name conflict)
file_pathstringCanonical path on disk
file_typestring"video", "audio", or "unknown"
durationfloatDuration in seconds
widthintegerVideo width in pixels
heightintegerVideo height in pixels
fpsfloatFrames per second
total_framesintegerTotal frame count
already_existsbooleanTrue if same content was already registered
messagestringHuman-readable status
+

Error Responses

+ + + + + + + + + + + + + + + + + + + + + +
HTTPWhen
401Missing or invalid API key
400Invalid request body
404File path does not exist
+
+

GET /api/v1/files/scan

+

Auth: Required +Scope: file-level

+

Scan the filesystem directory and list all media files, showing which are registered, processing, or unregistered.

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
pageintegerNo1Page number (1-based)
page_sizeintegerNoallItems per page (alias: limit)
limitintegerNoallMax items (alias for page_size)
patternstringNoRegex filter on file name (e.g., .*\\.mp4$)
sort_bystringNonameSort field: name, size, modified, status
sort_orderstringNoascSort direction: asc or desc
+

Example

+
# Full scan
+curl -s "$API/api/v1/files/scan" -H "X-API-Key: $KEY" | jq '{total, registered_count, unregistered_count}'
+
+# Paginated (page 1, 5 per page)
+curl -s "$API/api/v1/files/scan?page=1&page_size=5" -H "X-API-Key: $KEY" | jq '{page, total_pages, files: [.files[].file_name]}'
+
+# Regex filter: only mp4 files
+curl -s "$API/api/v1/files/scan?pattern=.*\\.mp4$" -H "X-API-Key: $KEY" | jq '{filtered_total, files: [.files[].file_name]}'
+
+# Sort by file size (largest first)
+curl -s "$API/api/v1/files/scan?sort_by=size&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, file_size}]'
+
+# Sort by modified time (most recent first)
+curl -s "$API/api/v1/files/scan?sort_by=modified&sort_order=desc&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, modified_time}]'
+
+# Sort by status
+curl -s "$API/api/v1/files/scan?sort_by=status&page_size=5" -H "X-API-Key: $KEY" | jq '[.files[] | {file_name, status}]'
+
+ +

Response (200)

+
{
+  "files": [
+    {
+      "file_name": "video.mp4",
+      "file_size": 12345678,
+      "is_registered": true,
+      "file_uuid": "3a6c1865...",
+      "status": "completed",
+      "registration_time": "2026-05-16T12:00:00Z",
+      "job_id": 42
+    }
+  ],
+  "total": 107,
+  "filtered_total": 80,
+  "page": 1,
+  "page_size": 20,
+  "total_pages": 4,
+  "registered_count": 26,
+  "unregistered_count": 81
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
filesarrayArray of file info objects (paginated)
files[].file_namestringFile name
files[].relative_pathstringPath relative to scan root
files[].file_pathstringAbsolute path on disk
files[].file_sizeintegerFile size in bytes
files[].modified_timestringLast modified timestamp (ISO8601)
files[].is_registeredbooleanWhether file is registered in DB
files[].file_uuidstring32-char hex UUID (only if registered)
files[].statusstring"completed", "processing", "registered", "unregistered", or null
files[].registration_timestringDB registration timestamp (only if registered)
files[].job_idintegerProcessing job ID (only if a job exists)
totalintegerTotal files found on disk (unfiltered)
filtered_totalintegerFiles matching regex filter
pageintegerCurrent page number
page_sizeintegerItems per page
total_pagesintegerTotal pages
registered_countintegerFiles registered in DB
unregistered_countintegerFiles not yet registered
+

Notes

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FeatureBehavior
RegexCase-insensitive ((?i) prefix auto-applied). Applied to file_name.
Sort orderDefault (sort_by=name): registered files first, then alphabetically. sort_by=status: alphabetical by status string.
Paginationpage_size and limit are aliases. Default: show all results.
Processing orderpattern regex filter → sort_by/sort_orderpage/page_size slice.
+
+

TMDb Enrichment

+
+

⚠️ External resource: TMDb requires internet access, violating Momentry's local-only principle. +All core processing (ASR, YOLO, Face, OCR, Pose, embeddings) runs fully offline. +TMDb enrichment is optional and gated behind TMDB_API_KEY + MOMENTRY_TMDB_PROBE_ENABLED.

+
+

Overview

+

TMDb enrichment is an optional identity enrichment step that can be run after Pipeline face detection completes. The workflow is:

+
    +
  1. Prefetch (requires internet): Download movie cast data from TMDb API → cache to {file_uuid}.tmdb.json
  2. +
  3. Probe: Read local cache → create identities for all cast members (source='tmdb') + save identity.json + download profile image to {OUTPUT}/identities/{uuid}/profile.jpg
  4. +
  5. Match: The worker automatically matches video faces against TMDb identities when MOMENTRY_TMDB_PROBE_ENABLED=true
  6. +
+

POST /api/v1/agents/tmdb/prefetch

+

Auth: Required +Scope: file-level

+

Fetch TMDb cast data for a registered file and cache it locally. This is the only step requiring internet access.

+

Request Parameters

+ + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID to enrich
+

Example

+
curl -s -X POST "$API/api/v1/agents/tmdb/prefetch" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'"}'
+
+ +

Response (200)

+
{"success": true, "file_uuid": "...", "cache_path": "/output/...tmdb.json"}
+
+ +

POST /api/v1/file/:file_uuid/tmdb-probe

+

Auth: Required +Scope: file-level

+

Read local TMDb cache and create/update identities. Requires prefetch to have been run first.

+

Example

+
curl -s -X POST "$API/api/v1/file/$FILE_UUID/tmdb-probe" \
+  -H "X-API-Key: $KEY" | jq '{identities_created, movie_title}'
+
+ +

Response (200 — identities created)

+
{"success": true, "identities_created": 15, "movie_title": "Charade"}
+
+ +

Response (200 — no cache)

+
{"success": false, "message": "No TMDb cache found. Run tmdb-prefetch first."}
+
+ +

GET /api/v1/resource/tmdb

+

Auth: Required +Scope: system-level

+

View TMDb resource status including configuration, identity counts, and cache file count.

+

Example

+
curl -s "$API/api/v1/resource/tmdb" -H "X-API-Key: $KEY" \
+  | jq '{identities_seeded, cache_files}'
+
+ +

POST /api/v1/resource/tmdb/check

+

Auth: Required +Scope: system-level

+

Ping the TMDb API to verify connectivity and measure latency.

+

Example

+
curl -s -X POST "$API/api/v1/resource/tmdb/check" \
+  -H "X-API-Key: $KEY" | jq '.status'
+
+ +

Response

+
{
+  "api_key_configured": true,
+  "enabled": false,
+  "api_reachable": true,
+  "api_latency_ms": 120
+}
+
+ +
+
+ + \ No newline at end of file diff --git a/docs_v1.0/doc_user/index.html b/docs_v1.0/doc_user/index.html new file mode 100644 index 0000000..a1019c2 --- /dev/null +++ b/docs_v1.0/doc_user/index.html @@ -0,0 +1,26 @@ + + + + +Momentry API Docs + + + + + + \ No newline at end of file diff --git a/docs_v1.0/doc_user/login.html b/docs_v1.0/doc_user/login.html new file mode 100644 index 0000000..c199107 --- /dev/null +++ b/docs_v1.0/doc_user/login.html @@ -0,0 +1,46 @@ + + + + +Login - Momentry Docs + + + +
+

Momentry Docs

+
+ + +
Invalid credentials
+ +
+
+ + + \ No newline at end of file diff --git a/docs_v1.0/doc_wasm/index.html b/docs_v1.0/doc_wasm/index.html index 9b0f3f9..3e1f5f0 100644 --- a/docs_v1.0/doc_wasm/index.html +++ b/docs_v1.0/doc_wasm/index.html @@ -66,6 +66,7 @@ const MODULES = [ ["10_pipeline","生產線","Pipeline"], ["12_agent","智慧代理","AI Agents"], ["13_config","系統設定","System Config"], + ["14_identity_history","操作歷史","Operation History (Undo/Redo)"], ]; const el = document.getElementById('content'); diff --git a/docs_v1.0/doc_wasm/modules/06_search.md b/docs_v1.0/doc_wasm/modules/06_search.md index 0fad9df..3d2d83a 100644 --- a/docs_v1.0/doc_wasm/modules/06_search.md +++ b/docs_v1.0/doc_wasm/modules/06_search.md @@ -7,7 +7,7 @@ ### `POST /api/v1/search/smart` **Auth**: Required -**Scope**: file-level +**Scope**: global / file-level Semantic vector search using EmbeddingGemma-300m. Generates a query embedding via EmbeddingGemma (port 11436), then searches pgvector `story_parent` and `llm_parent` chunks by cosine similarity. @@ -15,13 +15,22 @@ Semantic vector search using EmbeddingGemma-300m. Generates a query embedding vi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `file_uuid` | string | Yes | — | File UUID to search within | | `query` | string | Yes | — | Search text | +| `file_uuid` | string | No | — | File UUID to search within. If omitted, searches all files (global search) | | `limit` | integer | No | 5 | Max results to return | | `page` | integer | No | 1 | Page number | | `page_size` | integer | No | 5 | Items per page | -#### Example +#### Example (Global Search) + +```bash +curl -s -X POST "$API/api/v1/search/smart" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $JWT" \ + -d '{"query": "Audrey Hepburn"}' +``` + +#### Example (File-specific Search) ```bash curl -s -X POST "$API/api/v1/search/smart" \ @@ -37,6 +46,7 @@ curl -s -X POST "$API/api/v1/search/smart" \ "query": "Audrey Hepburn", "results": [ { + "file_uuid": "a6fb22eebefaef17e62af874997c5944", "parent_id": 1087822, "scene_order": 1087822, "start_frame": 104438, @@ -54,12 +64,16 @@ curl -s -X POST "$API/api/v1/search/smart" \ } ``` +| Field | Type | Description | +|-------|------|-------------| +| `results[].file_uuid` | string | File UUID where result was found | + --- ### `POST /api/v1/search/universal` **Auth**: Required -**Scope**: file-level +**Scope**: global / file-level Multi-type BM25 full-text search across chunks, frames, and persons. Uses PostgreSQL `tsvector`. @@ -68,13 +82,22 @@ Multi-type BM25 full-text search across chunks, frames, and persons. Uses Postgr | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `query` | string | Yes | — | Search text | -| `file_uuid` | string | No | — | Restrict to specific file | +| `file_uuid` | string | No | — | Restrict to specific file. If omitted, searches all files (global search) | | `types` | string[] | No | `["chunk","frame","person"]` | Search types | | `limit` | integer | No | 10 | Max results per type | | `page` | integer | No | 1 | Page number | | `page_size` | integer | No | 20 | Items per page | -#### Example +#### Example (Global Search) + +```bash +curl -s -X POST "$API/api/v1/search/universal" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $JWT" \ + -d '{"query": "Cary Grant"}' +``` + +#### Example (File-specific Search) ```bash curl -s -X POST "$API/api/v1/search/universal" \ @@ -90,6 +113,7 @@ curl -s -X POST "$API/api/v1/search/universal" \ "results": [ { "type": "chunk", + "file_uuid": "a6fb22eebefaef17e62af874997c5944", "chunk_id": "bd80fec92b0b6963d177a2c55bf713e2_2", "chunk_type": "story_child", "start_frame": 5103, @@ -98,6 +122,25 @@ curl -s -X POST "$API/api/v1/search/universal" \ "end_time": 213.64, "text": "[213s-214s] Cary Grant: \"Olá!\"", "score": 0.9 + }, + { + "type": "frame", + "file_uuid": "a6fb22eebefaef17e62af874997c5944", + "frame_number": 5105, + "timestamp": 212.72, + "score": 0.7, + "objects": null, + "ocr_texts": null, + "faces": null + }, + { + "type": "person", + "file_uuid": "a6fb22eebefaef17e62af874997c5944", + "identity_id": 12, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "name": "Cary Grant", + "appearance_count": 542, + "score": 0.95 } ], "total": 20, @@ -105,23 +148,78 @@ curl -s -X POST "$API/api/v1/search/universal" \ } ``` +| Field | Type | Description | +|-------|------|-------------| +| `results[].type` | string | Result type: `chunk`, `frame`, or `person` | +| `results[].file_uuid` | string | File UUID where result was found (all types) | + --- ### `POST /api/v1/search/frames` **Auth**: Required -**Scope**: file-level +**Scope**: global / file-level Search face detection frames by identity name or trace ID. --- -### `POST /api/v1/search/identity_text` +### `GET /api/v1/search/identity_text` **Auth**: Required -**Scope**: file-level +**Scope**: global / file-level -Search text chunks spoken by a specific identity. +Search text chunks → find associated identities. Returns chunks where face detections overlap with text content. + +#### Query Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `q` | string | Yes | — | Search text (ILIKE match) | +| `file_uuid` | string | No | — | Restrict to specific file. If omitted, searches all files (global search) | +| `limit` | integer | No | 50 | Max results | +| `page` | integer | No | 1 | Page number | +| `page_size` | integer | No | 50 | Items per page | + +#### Example (Global Search) + +```bash +curl -s "$API/api/v1/search/identity_text?q=love" -H "X-API-Key: $KEY" +``` + +#### Example (File-specific Search) + +```bash +curl -s "$API/api/v1/search/identity_text?file_uuid=$FILE_UUID&q=love" -H "X-API-Key: $KEY" +``` + +#### Response (200) + +```json +{ + "success": true, + "total": 5, + "results": [ + { + "file_uuid": "a6fb22eebefaef17e62af874997c5944", + "chunk_id": "llm_parent_..._256_270", + "start_time": 256.256, + "end_time": 270.228, + "text_content": "...lack of affection...", + "identity_id": 9, + "identity_name": "Audrey Hepburn", + "identity_source": "tmdb", + "trace_id": 94 + } + ] +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `results[].file_uuid` | string | File UUID where chunk was found | +| `results[].identity_id` | integer | Identity ID if face was detected | +| `results[].trace_id` | integer | Face trace ID | --- @@ -145,4 +243,4 @@ Search text chunks spoken by a specific identity. | **Storage** | pgvector (`chunk.embedding` column) | --- -*Updated: 2026-05-19 12:49:24* +*Updated: 2026-05-27 — Added global search support for smart, universal, identity_text APIs* diff --git a/docs_v1.0/doc_wasm/modules/07_identity.md b/docs_v1.0/doc_wasm/modules/07_identity.md index f662b2f..3178ba9 100644 --- a/docs_v1.0/doc_wasm/modules/07_identity.md +++ b/docs_v1.0/doc_wasm/modules/07_identity.md @@ -70,7 +70,16 @@ curl -s "$API/api/v1/identity/$IDENTITY_UUID" -H "X-API-Key: $KEY" **Auth**: Required **Scope**: identity-level -Delete an identity permanently. +Delete an identity permanently. All face detections bound to this identity are unbound (`identity_id` set to `NULL`). The identity JSON file is deleted from disk. + +#### History & Undo/Redo + +Every DELETE records a full snapshot of the identity and its unbound faces. See [`14_identity_history.md`](14_identity_history.md#4-delete-history--undoredo) for: + +- Undo via `POST /api/v1/identity/:identity_uuid/undo` — recreates identity and re-binds faces +- Redo via `POST /api/v1/identity/:identity_uuid/redo` — re-deletes the identity + +**Note**: Delete undo/redo reuses the same endpoints as PATCH undo/redo. The endpoint automatically detects whether the identity was deleted (undo) or needs to be re-deleted (redo) based on the history record. --- @@ -129,124 +138,75 @@ curl -s -X PATCH "$API/api/v1/identity/$IDENTITY_UUID" \ | HTTP | When | |------|------| -| `400` | No fields to update or invalid UUID format | | `404` | Identity not found | +| `500` | Database error | + +#### History & Undo/Redo + +Every bind records a before/after snapshot. See [`14_identity_history.md`](14_identity_history.md#2-bindunbindtrace-history--undoredo) for: + +- `POST /api/v1/identity/:identity_uuid/bind/undo` — Revert a bind +- `POST /api/v1/identity/:identity_uuid/bind/redo` — Reapply an undone bind +- `GET /api/v1/identity/:identity_uuid/bind/history` — Query bind operations --- -### `GET /api/v1/identity/:identity_uuid/files` +## Metadata (Embedded JSON) -**Auth**: Required -**Scope**: identity-level +The `identities.metadata` column is a **JSONB** field that stores arbitrary structured data alongside the identity's core fields (name, status, identity_type). No schema is enforced — any valid JSON object is accepted. -Get all files where this identity appears. Returns per-file summary including face count, confidence, and appearance time range. +### Merge Behavior -#### Example +| Operation | Strategy | Example | +|-----------|----------|---------| +| **PATCH** | Shallow top-level merge: `COALESCE(metadata,'{}'::jsonb) \|\| $1::jsonb` | Sending `{"tmdb_rating": 8.5}` only adds/overwrites `tmdb_rating`; all other existing keys are preserved. | +| **mergeinto** | Recursive deep merge — nested sub-keys are merged individually, not replaced wholesale | Target has `{"tmdb": {"biography": "..."}}`, source has `{"tmdb": {"birthday": "1904-01-18"}}` → result is `{"tmdb": {"biography": "...", "birthday": "1904-01-18"}}`. | +| **Upload (`POST`)** | Direct overwrite — the entire `metadata` field is replaced with the request value. | | -```bash -curl -s "$API/api/v1/identity/$IDENTITY_UUID/files" -H "X-API-Key: $KEY" -``` +### Validation ---- +| Scenario | Result | +|----------|--------| +| PATCH with non-object metadata (`string`, `array`, `number`, `null`) | `400 Bad Request: "metadata must be a JSON object"` | +| mergeinto with non-object metadata | Accepted (mergeinto validates at application level) | +| Upload with non-object metadata | Accepted (upload replaces directly) | -### `GET /api/v1/identity/:identity_uuid/faces` +### Conventional Keys -**Auth**: Required -**Scope**: identity-level +| Key | Type | Writer | Purpose | +|-----|------|--------|---------| +| `aliases` | `[{locale, name}]` | PATCH, mergeinto | Multilingual display names (see [Alias System](#alias-system-bcp-47-locale-tags)) | +| `merged_into` | `{uuid, at}` | mergeinto | Marks an identity as merged (undo mechanism reads this) | +| `tmdb_*` | various | TMDb probe | Movie metadata (biography, birthday, known_for, etc.). Written only when `MOMENTRY_TMDB_PROBE_ENABLED=true`. | +| `source` | string | mergeinto | Tagged on aliases/metadata when added by merge (`"merge"` value) | -Get all face detection records associated with this identity. +Custom keys are fully supported — no registration required. -#### Example +### Search Coverage -```bash -curl -s "$API/api/v1/identity/$IDENTITY_UUID/faces" -H "X-API-Key: $KEY" -``` +The identity search endpoint (`GET /api/v1/identity/search`) matches across three scopes: -| Field | Type | Description | -|-------|------|-------------| -| `file_uuid` | string | File where face was detected | -| `frame_number` | integer | Frame number of detection | -| `face_id` | string | Face ID (format: `face_{frame_number}`) | -| `confidence` | float | Detection confidence | +1. `i.name` — exact and ILIKE against display name +2. `jsonb_array_elements(i.metadata->'aliases')->>'name'` — locale-tagged alias names +3. `i.metadata::text ILIKE $1` — raw string search across the entire JSON blob (all keys, all values) ---- +This means searching for `"1904-01-18"` or `"biography"` will match identities whose metadata contains those strings anywhere. -### `GET /api/v1/identity/:identity_uuid/chunks` +### History Snapshots -**Auth**: Required -**Scope**: identity-level +Every `identity_history` record captures the **full metadata** in both `before_snapshot` and `after_snapshot` (as part of the complete identity JSONB dump). Undo restores the identity row — including metadata — to the `before_snapshot` state. -Get all text chunks (sentences) spoken while this identity's face was on screen. Useful for finding what a person said. +For merge operations, the MongoDB merge history records `metadata_fields_added` and `metadata_fields_added_paths` (dot-separated paths like `"tmdb.biography"`). Merge undo removes only those specific paths, preserving subsequent manual edits to other metadata keys. -#### Example +### Best Practices -```bash -curl -s "$API/api/v1/identity/$IDENTITY_UUID/chunks" -H "X-API-Key: $KEY" -``` - -#### Response (200) - -```json -{ - "success": true, - "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", - "data": [ - { - "id": 0, - "file_uuid": "bd80fec92b0b6963d177a2c55bf713e2", - "chunk_id": "bd80fec92b0b6963d177a2c55bf713e2_2", - "chunk_type": "sentence", - "start_frame": 5103, - "end_frame": 5127, - "fps": 24.0, - "start_time": 212.64, - "end_time": 213.64, - "text_content": "[213s-214s] Cary Grant: \"Olá!\"" - } - ] -} -``` - -| Field | Type | Description | -|-------|------|-------------| -| `file_uuid` | string | File identifier | -| `chunk_id` | string | Sentence chunk identifier | -| `start_frame` | integer | Frame-accurate start position | -| `end_frame` | integer | Frame-accurate end position | -| `fps` | float | Frames per second | -| `start_time` | float | Start time in seconds | -| `end_time` | float | End time in seconds | -| `text_content` | string | Spoken text content | - ---- - -### `POST /api/v1/identity/:identity_uuid/bind` - -**Auth**: Required -**Scope**: identity-level - -Bind a face detection to an identity. Associates the face trace with the identity for future search and recognition. - -#### Request Parameters - -| Field | Type | Required | Description | -|-------|------|----------|-------------| -| `file_uuid` | string | Yes | File where face is detected | -| `face_id` | string | Yes | Face ID (format: `{frame}_{idx}`) | - -#### Side Effects - -- 清除該 face detection row 的 `stranger_id`(設為 NULL) -- 不影響 `identities` 表中原有的 stranger auto-identity 記錄 - -#### Example - -```bash -curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind" \ - -H "X-API-Key: $KEY" \ - -H "Content-Type: application/json" \ - -d '{"file_uuid": "'"$FILE_UUID"'", "face_id": "1_5"}' -``` +| Guideline | Reason | +|-----------|--------| +| Deep nesting is allowed in metadata | All metadata merge operations use `jsonb_deep_merge()` — nested sub-keys are merged recursively, not replaced wholesale | +| Use `aliases` for display names | Frontend has built-in locale fallback logic (see [Alias System](#alias-system-bcp-47-locale-tags)) | +| Avoid >1MB per identity | Metadata is included in search indexing (`metadata::text ILIKE`); large blobs degrade query performance | +| Don't rely on metadata ordering | JSONB preserves insertion order but PostgreSQL does not guarantee it across operations | +| No LLM/Gemma4 agent writes to metadata | Only API endpoints (PATCH, mergeinto, upload) and TMDb probe modify `identities.metadata` | --- @@ -295,6 +255,10 @@ curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind/trace" \ | `404` | Identity not found | | `500` | Database error | +#### History & Undo/Redo + +Trace bind operations share the same history/undo/redo system as single-face binds. See [`14_identity_history.md`](14_identity_history.md#2-bindunbindtrace-history--undoredo) for endpoints. + --- ### `GET /api/v1/identity/:identity_uuid/traces` @@ -382,6 +346,13 @@ Unbind a face detection from an identity. Removes the identity association from - 被 unbind 的 face 不會自動成為 stranger - 要重新標記為 stranger 需重新跑 Agent API(`identity/analyze`) +#### History & Undo/Redo + +Unbind records a before/after snapshot. See [`14_identity_history.md`](14_identity_history.md#2-bindunbindtrace-history--undoredo) for: + +- `POST /api/v1/identity/:identity_uuid/bind/undo` — Revert an unbind +- `POST /api/v1/identity/:identity_uuid/bind/redo` — Reapply an undone unbind + --- ### `POST /api/v1/identity/:identity_uuid/mergeinto` @@ -391,6 +362,13 @@ Unbind a face detection from an identity. Removes the identity association from Transfer all face bindings from this identity to another identity, then optionally delete or mark the source as merged. +#### Two Merge Cases + +| Case | Description | Undo/Redo Support | +|------|-------------|-------------------| +| **stranger → identity** | Merge an auto-generated stranger identity into a known identity (TMDb or user-defined) | ✅ 24hr undo/redo | +| **identity A → identity B** | Merge two known identities (e.g., duplicate entries) | ✅ 24hr undo/redo | + #### Request Parameters | Field | Type | Required | Default | Description | @@ -402,8 +380,12 @@ Transfer all face bindings from this identity to another identity, then optional - 轉移所有 `face_detections.identity_id` 到目標 identity - 同時清除所有被轉移 rows 的 `stranger_id` +- 將 source name 加入 target aliases (with `source: "merge"` tag) +- 將 source aliases 加入 target aliases (if not already present) +- 將 source metadata fields 加入 target metadata (if not already present) - `keep_history: true`(預設):source identity 設為 `status='merged'`,保留記錄 - `keep_history: false`:**刪除** source identity 及其 identity JSON 檔案 +- **記錄 merge history 到 MongoDB**(支援 undo/redo) #### Example @@ -411,7 +393,7 @@ Transfer all face bindings from this identity to another identity, then optional curl -s -X POST "$API/api/v1/identity/$SOURCE_UUID/mergeinto" \ -H "X-API-Key: $KEY" \ -H "Content-Type: application/json" \ - -d '{"into_uuid": "'"$TARGET_UUID"'", "keep_history": false}' + -d '{"into_uuid": "'"$TARGET_UUID"'", "keep_history": true}' ``` #### Response (200) @@ -419,11 +401,23 @@ curl -s -X POST "$API/api/v1/identity/$SOURCE_UUID/mergeinto" \ ```json { "success": true, - "message": "Merged 'stranger_13894' into 'Louis Viret' (52 faces transferred, source deleted)", - "data": { "faces_transferred": 52 } + "message": "Merged 'stranger_13894' into 'Louis Viret' (52 faces transferred, history kept)", + "data": { + "merge_id": "550e8400-e29b-41d4-a716-446655440000", + "faces_transferred": 52, + "aliases_added": 1, + "metadata_fields_added": 2 + } } ``` +| Field | Type | Description | +|-------|------|-------------| +| `merge_id` | string | Unique merge operation ID (for undo) | +| `faces_transferred` | integer | Number of face detections transferred | +| `aliases_added` | integer | Number of aliases added to target | +| `metadata_fields_added` | integer | Number of metadata fields added to target | + #### Error Responses | HTTP | When | @@ -433,25 +427,189 @@ curl -s -X POST "$API/api/v1/identity/$SOURCE_UUID/mergeinto" \ --- -### `GET /api/v1/identities/search` +### `POST /api/v1/identity/merge/:merge_id/undo` **Auth**: Required **Scope**: identity-level -Search identities by name (ILIKE search). Returns matching identity records. +Undo a merge operation within 24 hours. Restores the source identity and reverts face bindings. + +#### Undo Behavior + +| Action | Description | +|--------|-------------| +| Restore source identity | If `keep_history=true`: restore status to `confirmed`
If `keep_history=false`: recreate identity from MongoDB snapshot | +| Restore faces | Transfer faces back to source identity | +| Remove aliases from target | Remove aliases with `source: "merge"` tag | +| Remove metadata fields from target | Remove fields that were added from source | +| **Preserve manual changes** | Keep aliases/metadata manually added after merge | #### Example ```bash -curl -s "$API/api/v1/identities/search?q=Cary" -H "X-API-Key: $KEY" +curl -s -X POST "$API/api/v1/identity/merge/550e8400-e29b-41d4-a716-446655440000/undo" \ + -H "X-API-Key: $KEY" +``` + +#### Response (200) + +```json +{ + "success": true, + "message": "Undo merge completed: 'stranger_13894' restored, 52 faces reverted", + "data": { + "source_identity_restored": { + "uuid": "a9a90105...", + "name": "stranger_13894", + "status": "confirmed" + }, + "faces_reverted": 52, + "aliases_removed_from_target": 1, + "metadata_fields_removed_from_target": 2 + } +} +``` + +#### Error Responses + +| HTTP | When | +|------|------| +| `400` | Undo deadline expired (>24hr) or already undone | +| `404` | Merge record not found | +| `500` | Database error | + +--- + +### `POST /api/v1/identity/merge/:merge_id/redo` + +**Auth**: Required +**Scope**: identity-level + +Redo a previously undone merge operation. See [`14_identity_history.md`](14_identity_history.md#post-apiv1identitymergemerge_idredo) for full details. + +--- + +### `GET /api/v1/identity/merge/history` + +**Auth**: Required +**Scope**: identity-level + +Query merge history records from MongoDB. + +#### Query Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `source_uuid` | string | No | — | Filter by source identity UUID | +| `target_uuid` | string | No | — | Filter by target identity UUID | +| `merge_id` | string | No | — | Filter by specific merge ID | +| `undone` | bool | No | — | Filter by undone status | +| `page` | int | No | 1 | Page number | +| `page_size` | int | No | 20 | Items per page | + +#### Example + +```bash +curl -s "$API/api/v1/identity/merge/history?page=1&page_size=10" \ + -H "X-API-Key: $KEY" +``` + +#### Response (200) + +```json +{ + "success": true, + "total": 5, + "page": 1, + "page_size": 10, + "results": [ + { + "merge_id": "550e8400-e29b-41d4-a716-446655440000", + "source_name": "stranger_13894", + "target_name": "Louis Viret", + "faces_transferred": 52, + "merged_at": "2026-05-27T10:00:00Z", + "undo_deadline": "2026-05-28T10:00:00Z", + "undone": false, + "undo_expired": false + } + ] +} ``` | Field | Type | Description | |-------|------|-------------| -| `name` | string | Identity name | -| `source` | string | Identity source | -| `tmdb_id` | integer | TMDb ID (if source = tmdb) | -| `file_uuid` | string | Associated file | +| `merge_id` | string | Unique merge operation ID | +| `source_name` | string | Source identity name | +| `target_name` | string | Target identity name | +| `faces_transferred` | integer | Number of faces transferred | +| `merged_at` | datetime | When merge occurred | +| `undo_deadline` | datetime | 24hr deadline for undo | +| `undone` | bool | Whether merge was undone | +| `undo_expired` | bool | Whether undo deadline passed | + +--- + +### `GET /api/v1/identities/search` + +**Auth**: Required +**Scope**: global / file-level + +Search identity name → find associated chunks. Searches identity name and aliases, returns identities with their associated text chunks. + +#### Query Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `q` | string | Yes | — | Search text (ILIKE match on name and aliases) | +| `file_uuid` | string | No | — | Restrict to specific file. If omitted, searches all files (global search) | +| `limit` | integer | No | 50 | Max results | + +#### Example (Global Search) + +```bash +curl -s "$API/api/v1/identities/search?q=Audrey" -H "X-API-Key: $KEY" +``` + +#### Example (File-specific Search) + +```bash +curl -s "$API/api/v1/identities/search?q=Audrey&file_uuid=$FILE_UUID" -H "X-API-Key: $KEY" +``` + +#### Response (200) + +```json +{ + "success": true, + "total": 5, + "results": [ + { + "identity_id": 9, + "name": "Audrey Hepburn", + "source": "tmdb", + "tmdb_id": 1932, + "file_uuid": "a6fb22eebefaef17e62af874997c5944", + "trace_id": 41, + "chunk_id": "llm_parent_..._204_207", + "start_time": 204.162, + "text_content": "...confrontation..." + } + ] +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `results[].identity_id` | integer | Identity ID | +| `results[].name` | string | Identity name | +| `results[].source` | string | Identity source (`tmdb`, `user_defined`, etc.) | +| `results[].tmdb_id` | integer | TMDb person ID (if source = tmdb) | +| `results[].file_uuid` | string | File where identity appears | +| `results[].trace_id` | integer | Face trace ID | +| `results[].chunk_id` | string | Associated chunk ID | +| `results[].start_time` | float | Chunk start time | +| `results[].text_content` | string | Chunk text content | --- @@ -628,4 +786,4 @@ PATCH /api/v1/identity/:identity_uuid This **replaces** the entire `aliases` array. To add to existing aliases, include all existing entries in the request. --- -*Updated: 2026-05-25 +*Updated: 2026-05-25 — Added `GET /api/v1/file/:file_uuid/faces` with 4 binding states, filters, strangers table split diff --git a/docs_v1.0/doc_wasm/modules/14_identity_history.md b/docs_v1.0/doc_wasm/modules/14_identity_history.md new file mode 100644 index 0000000..d61df17 --- /dev/null +++ b/docs_v1.0/doc_wasm/modules/14_identity_history.md @@ -0,0 +1,696 @@ + + + + +## Identity Operation History + +Every mutation on an identity automatically records a before/after snapshot. Use undo/redo to revert or reapply changes, and history to inspect the operation log. + +Three independent undo/redo systems exist: + +| System | Storage | Operations Covered | +|--------|---------|-------------------| +| **PATCH** | PostgreSQL `identity_history` | `update` | +| **Bind** | PostgreSQL `identity_history` | `bind`, `unbind`, `bind_trace` | +| **Merge** | MongoDB `identity_merge_history` | mergeinto | +| **Delete** | PostgreSQL `identity_history` | `delete` | + +--- + +### 1. PATCH History & Undo/Redo + +#### Overview + +| Property | Value | +|----------|-------| +| Storage | PostgreSQL `identity_history` table | +| Snapshot | Full identity record (all fields) before and after each PATCH | +| Max records | 256 per identity (oldest auto-deleted when limit exceeded) | +| Undo steps | Unlimited (no expiry, no step limit) | +| Redo stack | Cleared on new PATCH (`is_undone=true` + `operation='update'` records are deleted) | + +##### Stack Model + +``` +PATCH 1 → PATCH 2 → PATCH 3 (undo stack, is_undone=false) + ↓ undo +PATCH 1 → PATCH 2 (undo stack) + PATCH 3 (redo stack, is_undone=true) + ↓ redo +PATCH 1 → PATCH 2 → PATCH 3 (undo stack) +``` + +A new PATCH after undo clears only the operation='update' redo stack (PATCH 3 is lost). Bind/merge redo stacks are not affected. + +--- + +#### `POST /api/v1/identity/:identity_uuid/undo` + +**Auth**: Required +**Scope**: identity-level + +Undo the most recent PATCH operations. Restores the identity's `before_snapshot` and marks the history records as undone. + +##### Request (JSON) + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `steps` | integer | No | `1` | Number of undo steps to apply (max records undone in one call) | + +##### Behavior + +- Queries `is_undone=false` records with `operation='update'`, ordered by `created_at DESC` +- Restores `name`, `identity_type`, `source`, `status`, `metadata`, `tmdb_id`, `tmdb_profile` from the last record's `before_snapshot` +- Marks the undone records as `is_undone=true` with `undone_at=NOW()` +- Syncs `identity.json` to disk +- Updates `_index.json` if name changed + +##### Example + +```bash +curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/undo" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"steps": 1}' +``` + +##### Response (200) + +```json +{ + "success": true, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "undone_count": 1, + "current_state": { + "id": 9, + "uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "name": "Cary Grant", + "identity_type": "people", + "source": "tmdb", + "status": "confirmed", + "metadata": {}, + "tmdb_id": 112, + "tmdb_profile": null + } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `undone_count` | integer | Number of history records undone | +| `current_state` | object | Full identity state after undo | + +##### Error Responses + +| HTTP | When | +|------|------| +| `400` | No undo operations available | +| `404` | Identity not found | +| `500` | Database error | + +--- + +#### `POST /api/v1/identity/:identity_uuid/redo` + +**Auth**: Required +**Scope**: identity-level + +Redo previously undone PATCH operations. Restores the identity's `after_snapshot` and marks the history records as no longer undone. + +##### Request (JSON) + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `steps` | integer | No | `1` | Number of redo steps to apply | + +##### Behavior + +- Queries `is_undone=true` records with `operation='update'`, ordered by `created_at DESC` +- Restores all identity fields from the last record's `after_snapshot` +- Marks records as `is_undone=false` with `undone_at=NULL` +- Syncs `identity.json` to disk +- Updates `_index.json` if name changed + +##### Example + +```bash +curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/redo" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"steps": 1}' +``` + +##### Response (200) + +```json +{ + "success": true, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "redone_count": 1, + "current_state": { + "id": 9, + "uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "name": "John Smith", + "identity_type": "people", + "source": "tmdb", + "status": "confirmed", + "metadata": { "aliases": [...] }, + "tmdb_id": 112, + "tmdb_profile": null + } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `redone_count` | integer | Number of history records redone | +| `current_state` | object | Full identity state after redo | + +##### Error Responses + +| HTTP | When | +|------|------| +| `400` | No redo operations available | +| `404` | Identity not found | +| `500` | Database error | + +--- + +#### `GET /api/v1/identity/:identity_uuid/history` + +**Auth**: Required +**Scope**: identity-level + +Query the PATCH operation history for an identity. Returns paginated records with undo/redo stack counts (filtered to `operation='update'`). + +##### Query Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `page` | integer | No | `1` | Page number (1-indexed) | +| `limit` | integer | No | `20` | Items per page (max 100) | + +##### Response (200) + +```json +{ + "success": true, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "total": 5, + "undo_stack_count": 3, + "redo_stack_count": 2, + "results": [ + { + "history_id": 42, + "operation": "update", + "is_undone": false, + "created_at": "2026-05-27T12:00:00Z", + "undone_at": null + }, + { + "history_id": 41, + "operation": "update", + "is_undone": true, + "created_at": "2026-05-27T11:30:00Z", + "undone_at": "2026-05-27T13:00:00Z" + } + ] +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `total` | integer | Total PATCH history records for this identity | +| `undo_stack_count` | integer | Records available for undo (`is_undone=false`) | +| `redo_stack_count` | integer | Records available for redo (`is_undone=true`) | +| `results[].history_id` | integer | History record ID | +| `results[].operation` | string | Operation type (`"update"` for PATCH) | +| `results[].is_undone` | boolean | Whether the operation has been undone | +| `results[].created_at` | string | When the PATCH was applied | +| `results[].undone_at` | string | When the undo occurred (null if not undone) | + +##### Example + +```bash +curl -s "$API/api/v1/identity/$IDENTITY_UUID/history?page=1&limit=10" \ + -H "X-API-Key: $KEY" +``` + +##### Error Responses + +| HTTP | When | +|------|------| +| `404` | Identity not found | +| `500` | Database error | + +--- + +### 2. Bind/Unbind/Trace History & Undo/Redo + +All three operations (`bind`, `unbind`, `bind_trace`) share a single history table and undo/redo stack. + +#### Bind Operation Overview + +| Property | Value | +|----------|-------| +| Storage | PostgreSQL `identity_history` table (same table as PATCH) | +| Snapshot | `{"file_uuid", "face_id" (or "trace_id"), "identity_id_before/after"}` | +| Max records | 256 per identity (shared limit across all operation types) | +| Undo steps | Unlimited (`steps` param) | +| Redo stack | Cleared on new bind/unbind/bind_trace (`operation IN ('bind','unbind','bind_trace')` + `is_undone=true` records deleted) | +| Stack isolation | Bind redo stack is **independent** from PATCH redo stack — clearing one does not affect the other | + +##### Stack Model + +``` +bind face_1 (to id=9) → unbind face_1 → bind trace 906 (to id=9) +(undo stack, is_undone=false) (undo stack) (undo stack) + ↓ undo (first undone: bind_trace) + bind trace 906 (is_undone=true) + (redo stack) + ↓ redo +bind face_1 → unbind face_1 → bind trace 906 +(undo stack) +``` + +A new bind/unbind/trace after undo clears only the bind redo stack (operations with `IN ('bind','unbind','bind_trace')`). + +##### Snapshot Format + +**Before (bind):** +```json +{ + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "face_id": "1_5", + "identity_id_before": null +} +``` + +**After (bind):** +```json +{ + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "face_id": "1_5", + "identity_id_after": 9 +} +``` + +**Before (unbind) — binding existed before:** +```json +{ + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "face_id": "1_5", + "identity_id_before": 9 +} +``` + +**After (unbind):** +```json +{ + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "face_id": "1_5", + "identity_id_after": null +} +``` + +For `bind_trace`, the snapshot uses `trace_id` instead of `face_id`, with `identity_id_before` capturing the first face's identity in that trace. + +--- + +#### `POST /api/v1/identity/:identity_uuid/bind/undo` + +**Auth**: Required +**Scope**: identity-level + +Undo the most recent bind/unbind/bind_trace operations. Restores `identity_id_before` from the snapshot and marks records as undone. + +##### Request (JSON) + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `steps` | integer | No | `1` | Number of undo steps to apply | + +##### Behavior + +- Queries `is_undone=false` records with `operation IN ('bind','unbind','bind_trace')`, ordered by `created_at DESC` +- Restores `identity_id_before` — for bind this is `null` (face was unbound), for unbind this is the original identity (face goes back), for bind_trace this is the trace's previous identity +- Marks the undone records as `is_undone=true` with `undone_at=NOW()` + +##### Example + +```bash +curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind/undo" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"steps": 1}' +``` + +##### Response (200) + +```json +{ + "success": true, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "operation": "bind", + "undone_count": 1, + "affected_rows": 53 +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `operation` | string | The actual operation undone (`bind`, `unbind`, or `bind_trace`) | +| `undone_count` | integer | Number of history records undone | +| `affected_rows` | integer | Number of `face_detections` rows updated | + +##### Error Responses + +| HTTP | When | +|------|------| +| `400` | No bind undo operations available | +| `404` | Identity not found | +| `500` | Database error | + +--- + +#### `POST /api/v1/identity/:identity_uuid/bind/redo` + +**Auth**: Required +**Scope**: identity-level + +Redo previously undone bind/unbind/bind_trace operations. Restores `identity_id_after` from the snapshot. + +##### Request (JSON) + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `steps` | integer | No | `1` | Number of redo steps to apply | + +##### Behavior + +- Queries `is_undone=true` records with `operation IN ('bind','unbind','bind_trace')`, ordered by `created_at DESC` +- Restores `identity_id_after` — for bind this is the identity the face was bound to, for unbind this is `null` +- Marks records as `is_undone=false` with `undone_at=NULL` + +##### Example + +```bash +curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind/redo" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"steps": 1}' +``` + +##### Response (200) + +```json +{ + "success": true, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "operation": "unbind", + "redone_count": 1, + "affected_rows": 1 +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `operation` | string | The actual operation redone (`bind`, `unbind`, or `bind_trace`) | +| `redone_count` | integer | Number of history records redone | +| `affected_rows` | integer | Number of `face_detections` rows updated | + +##### Error Responses + +| HTTP | When | +|------|------| +| `400` | No bind redo operations available | +| `404` | Identity not found | +| `500` | Database error | + +--- + +#### `GET /api/v1/identity/:identity_uuid/bind/history` + +**Auth**: Required +**Scope**: identity-level + +Query the bind/unbind/bind_trace operation history for an identity. Returns paginated records with undo/redo stack counts. + +##### Query Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `page` | integer | No | `1` | Page number (1-indexed) | +| `limit` | integer | No | `20` | Items per page (max 100) | + +##### Response (200) + +```json +{ + "success": true, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "total": 3, + "undo_stack_count": 2, + "redo_stack_count": 1, + "results": [ + { + "history_id": 52, + "operation": "bind_trace", + "is_undone": false, + "created_at": "2026-05-27T14:00:00Z", + "undone_at": null + }, + { + "history_id": 51, + "operation": "unbind", + "is_undone": true, + "created_at": "2026-05-27T13:00:00Z", + "undone_at": "2026-05-27T14:30:00Z" + }, + { + "history_id": 50, + "operation": "bind", + "is_undone": false, + "created_at": "2026-05-27T12:00:00Z", + "undone_at": null + } + ] +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `total` | integer | Total bind history records for this identity | +| `undo_stack_count` | integer | Records available for undo (`is_undone=false`) | +| `redo_stack_count` | integer | Records available for redo (`is_undone=true`) | +| `results[].history_id` | integer | History record ID | +| `results[].operation` | string | Operation type (`bind`, `unbind`, or `bind_trace`) | +| `results[].is_undone` | boolean | Whether the operation has been undone | +| `results[].created_at` | string | When the operation was applied | +| `results[].undone_at` | string | When the undo occurred (null if not undone) | + +##### Example + +```bash +curl -s "$API/api/v1/identity/$IDENTITY_UUID/bind/history?page=1&limit=10" \ + -H "X-API-Key: $KEY" +``` + +##### Error Responses + +| HTTP | When | +|------|------| +| `404` | Identity not found | +| `500` | Database error | + +--- + +### 3. Merge History & Undo/Redo + +Merge operations use MongoDB for richer record-keeping, with a 24-hour undo deadline. + +#### Merge Operation Overview + +| Property | Value | +|----------|-------| +| Storage | MongoDB `identity_merge_history` collection | +| Snapshot | Full source identity state + target identity state + aliases/metadata diffs | +| Trigger | Every mergeinto with `keep_history=true` | +| Undo deadline | 24 hours (renewed on redo) | +| Redo support | Yes — restores undone merges with new 24hr deadline | +| Max records | Unlimited | + +--- + +#### `POST /api/v1/identity/merge/:merge_id/undo` + +Already documented in [`07_identity.md`](07_identity.md#post-apiv1identitymergemerge_idundo). See that document for full details. + +--- + +#### `POST /api/v1/identity/merge/:merge_id/redo` + +**Auth**: Required +**Scope**: identity-level + +Redo a previously undone merge operation within the renewed 24-hour deadline. + +##### Request + +No body required. The merge ID is taken from the URL path. + +##### Behavior + +1. Validates the merge record exists and `undone=true` (not already active) +2. Checks the 24-hour undo deadline (if expired, the redo is rejected) +3. Restores face bindings: moves all faces from `target_identity` back to `source_identity` +4. Re-adds aliases that were removed by the undo (aliases with `source: "merge"` tag) +5. Re-adds metadata fields that were removed by the undo +6. If `keep_history=true`: sets `source_identity.status = 'merged'` again +7. If `keep_history=false`: recreates source identity from the `undone_snapshot` stored at undo time +8. Syncs both identity JSON files to disk +9. Sets `undone=false`, clears `undone_snapshot`, renews `undo_deadline = NOW() + 24h` +10. Records `redone_by` user for audit + +##### Example + +```bash +curl -s -X POST "$API/api/v1/identity/merge/550e8400-e29b-41d4-a716-446655440000/redo" \ + -H "X-API-Key: $KEY" +``` + +##### Response (200) + +```json +{ + "success": true, + "message": "Redo merge completed: merged 'stranger_13894' into 'Louis Viret' (52 faces transferred)", + "data": { + "merge_id": "550e8400-e29b-41d4-a716-446655440000", + "faces_transferred": 52, + "aliases_re_added": 1, + "metadata_fields_re_added": 2 + } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `merge_id` | string | The merge operation ID | +| `faces_transferred` | integer | Number of faces transferred from source to target | +| `aliases_re_added` | integer | Number of aliases restored to target | +| `metadata_fields_re_added` | integer | Number of metadata fields restored to target | + +##### Error Responses + +| HTTP | When | +|------|------| +| `400` | Merge not undone, deadline expired, or cannot redo | +| `404` | Merge record not found | +| `500` | Database error | + +--- + +### 4. Delete History & Undo/Redo + +#### Delete Operation Overview + +| Property | Value | +|----------|-------| +| Storage | PostgreSQL `identity_history` table | +| Snapshot | `{"identity": {...full row...}, "unbound_faces": [{file_uuid, face_id, trace_id}, ...]}` | +| Max records | 1 active delete record per identity (redo stack cleared on new delete) | +| Undo support | Yes — recreates identity row, re-binds faces | +| Redo support | Yes — re-deletes the identity | +| Identity file | Deleted on delete, recreated on undo | + +#### Snapshot Format + +```json +{ + "identity": { + "id": 9, + "uuid": "a9a90105-6d6b-46ff-92da-0c3c1a57dff4", + "name": "Cary Grant", + "identity_type": "people", + "source": "tmdb", + "status": "confirmed", + "metadata": {}, + "tmdb_id": 112, + "tmdb_profile": null + }, + "unbound_faces": [ + { + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "face_id": "1_5", + "trace_id": null + }, + { + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "face_id": "1_6", + "trace_id": 906 + } + ] +} +``` + +#### Stack Model + +``` +DELETE identity (undo stack, is_undone=false) + ↓ undo +Identity recreated, faces re-bound + → delete history marked is_undone=true + ↓ redo (re-delete) +Identity deleted again, faces unbound + → delete history marked is_undone=false +``` + +A new delete after an undo clears the delete redo stack (no redo possible for the old delete). + +#### Undo Behavior (via existing `POST /api/v1/identity/:identity_uuid/undo`) + +1. Normal identity lookup fails (row was deleted) +2. Checks `identity_history` for `operation='delete' AND is_undone=false` matching the UUID in the snapshot +3. Recreates the identity row (new internal `id`, same UUID) +4. Re-binds all faces listed in `unbound_faces` to the new identity +5. Deletes the `identity_history` delete record as `is_undone=true` with `undone_at=NOW()` +6. Syncs `identity.json` to disk +7. Updates `_index.json` + +#### Redo Behavior (via existing `POST /api/v1/identity/:identity_uuid/redo`) + +1. Identity lookup succeeds (identity was restored by prior undo) +2. Checks `identity_history` for `operation='delete' AND is_undone=true` matching the identity_id +3. Deletes `identity.json` from disk +4. Unbinds all faces (`identity_id = NULL`) +5. Deletes the identity row +6. Marks the delete history record as `is_undone=false` +7. Returns success + +#### Error Responses (delete undo/redo) + +| HTTP | Scenario | +|------|----------| +| `400` | No delete history available (either no delete or already undone/redone) | +| `404` | Identity not found (for redo — identity wasn't restored) | +| `500` | Database error | + +--- + +### Comparison: PATCH vs Bind vs Merge vs Delete Undo/Redo + +| Aspect | PATCH Undo/Redo | Bind Undo/Redo | Merge Undo/Redo | Delete Undo/Redo | +|--------|----------------|----------------|-----------------|------------------| +| Storage | PostgreSQL `identity_history` | PostgreSQL `identity_history` | MongoDB `identity_merge_history` | PostgreSQL `identity_history` | +| Operation filter | `operation='update'` | `operation IN ('bind','unbind','bind_trace')` | — | `operation='delete'` | +| Trigger | Every PATCH | Every bind/unbind/bind_trace | Every mergeinto with `keep_history=true` | Every DELETE | +| Undo deadline | None (unlimited) | None (unlimited) | 24 hours (renewed on redo) | None (unlimited) | +| Redo support | Yes | Yes | Yes | Yes | +| Step undo | Yes (`steps` param) | Yes (`steps` param) | No (full undo/redo only) | No (single record) | +| Max records | 256 per identity | 256 per identity (shared) | Unlimited | 256 per identity (shared) | +| User tracking | `user_id` + `user_source` | `user_id` + `user_source` | `performed_by_user` + `undone_by` / `redone_by` | `user_id` + `user_source` | + +--- + +*Updated: 2026-05-28* diff --git a/docs_v1.0/doc_wasm/pkg/md_wasm_bg.wasm b/docs_v1.0/doc_wasm/pkg/md_wasm_bg.wasm index c1fdac7..bad117d 100644 Binary files a/docs_v1.0/doc_wasm/pkg/md_wasm_bg.wasm and b/docs_v1.0/doc_wasm/pkg/md_wasm_bg.wasm differ diff --git a/migrations/033_create_identity_history_table.sql b/migrations/033_create_identity_history_table.sql new file mode 100644 index 0000000..2755dc7 --- /dev/null +++ b/migrations/033_create_identity_history_table.sql @@ -0,0 +1,28 @@ +-- Migration: Create identity_history table for undo/redo support +-- Description: Stores PATCH operation history for identity undo/redo functionality +-- Date: 2026-05-28 + +-- Create identity_history table +CREATE TABLE IF NOT EXISTS identity_history ( + id BIGSERIAL PRIMARY KEY, + identity_id INTEGER NOT NULL REFERENCES identities(id) ON DELETE CASCADE, + operation VARCHAR(20) NOT NULL, -- 'update', 'create', 'delete' + before_snapshot JSONB, -- 操作前完整狀態 + after_snapshot JSONB, -- 操作後完整狀態 + is_undone BOOLEAN DEFAULT FALSE, -- 是否已被 undo + undone_at TIMESTAMPTZ, -- undo 時間 + user_id VARCHAR(100), -- 操作者 + user_source VARCHAR(50), -- 'wordpress', 'api', 'cli' + created_at TIMESTAMPTZ DEFAULT NOW() +); + +-- Create indexes for efficient undo/redo operations +CREATE INDEX IF NOT EXISTS idx_identity_history_identity_time + ON identity_history(identity_id, created_at DESC); + +CREATE INDEX IF NOT EXISTS idx_identity_history_not_undone + ON identity_history(identity_id, created_at DESC) + WHERE NOT is_undone; + +-- Add comment +COMMENT ON TABLE identity_history IS 'Stores identity PATCH operation history for undo/redo support. Max 256 records per identity.'; \ No newline at end of file diff --git a/migrations/3002_public_schema_pipeline_tables.sql b/migrations/3002_public_schema_pipeline_tables.sql new file mode 100644 index 0000000..ada9e26 --- /dev/null +++ b/migrations/3002_public_schema_pipeline_tables.sql @@ -0,0 +1,314 @@ +-- ============================================================ +-- 3002/3003 Schema Separation: Create pipeline tables in public +-- Generated: 2026-05-17 +-- ============================================================ +-- Run: /Users/accusys/pgsql/18.3/bin/psql "postgres://accusys@localhost:5432/momentry" -f migrations/3002_public_schema_pipeline_tables.sql +-- ============================================================ + +BEGIN; + +-- ============================================================ +-- 1. videos +-- ============================================================ +CREATE SEQUENCE IF NOT EXISTS public.videos_id_seq AS integer START WITH 1; + +CREATE TABLE IF NOT EXISTS public.videos ( + id integer DEFAULT nextval('public.videos_id_seq') NOT NULL, + file_uuid character varying(32) NOT NULL, + file_path text NOT NULL, + file_name text NOT NULL, + duration double precision, + width integer, + height integer, + fps double precision, + probe_json jsonb, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + fs_video boolean DEFAULT false, + fs_json boolean DEFAULT false, + psql_chunk boolean DEFAULT false, + pobject_chunk boolean DEFAULT false, + mobject_chunk boolean DEFAULT false, + pvector_chunk boolean DEFAULT false, + qvector_chunk boolean DEFAULT false, + status character varying(20) DEFAULT 'pending'::character varying, + user_id bigint, + job_id integer, + registration_time timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + file_type character varying(20), + processing_status jsonb DEFAULT '{}'::jsonb, + birth_registration jsonb, + total_frames bigint DEFAULT 0, + parent_uuid character varying(32), + cut_done boolean DEFAULT false, + scene_done boolean DEFAULT false, + audio_tracks jsonb DEFAULT '[]'::jsonb, + cut_count integer DEFAULT 0, + cut_max_duration double precision DEFAULT 0, + content_hash text +); + +ALTER SEQUENCE public.videos_id_seq OWNED BY public.videos.id; + +-- ============================================================ +-- 2. chunk (with pgvector support) +-- ============================================================ +CREATE SEQUENCE IF NOT EXISTS public.chunks_id_seq AS integer START WITH 1; + +CREATE TABLE IF NOT EXISTS public.chunk ( + id integer DEFAULT nextval('public.chunks_id_seq') NOT NULL, + file_uuid character varying(32) NOT NULL, + chunk_type character varying(32) NOT NULL, + start_time double precision NOT NULL, + end_time double precision NOT NULL, + content jsonb NOT NULL, + vector_id character varying(64), + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + file_id integer, + text_content text, + frame_count integer DEFAULT 0, + pre_chunk_ids integer[], + parent_chunk_id character varying(64), + child_chunk_ids text[], + search_vector tsvector, + fps double precision DEFAULT 24.0, + start_frame bigint DEFAULT 0, + end_frame bigint DEFAULT 0, + metadata jsonb, + updated_at timestamp with time zone DEFAULT now(), + visual_stats jsonb, + summary_text text, + chunk_id character varying(128) NOT NULL, + embedding public.vector, + old_chunk_id character varying(128), + chunk_index integer DEFAULT 0, + unique_key character varying +); + +ALTER SEQUENCE public.chunks_id_seq OWNED BY public.chunk.id; + +-- ============================================================ +-- 3. chunk_vectors +-- ============================================================ +CREATE SEQUENCE IF NOT EXISTS public.chunk_vectors_id_seq AS integer START WITH 1; + +CREATE TABLE IF NOT EXISTS public.chunk_vectors ( + id integer DEFAULT nextval('public.chunk_vectors_id_seq') NOT NULL, + chunk_id character varying(64) NOT NULL, + uuid character varying(64) NOT NULL, + chunk_type character varying(32) DEFAULT 'sentence'::character varying NOT NULL, + embedding jsonb, + created_at timestamp with time zone DEFAULT now() +); + +ALTER SEQUENCE public.chunk_vectors_id_seq OWNED BY public.chunk_vectors.id; + +-- ============================================================ +-- 4. cuts +-- ============================================================ +CREATE SEQUENCE IF NOT EXISTS public.cuts_id_seq AS integer START WITH 1; + +CREATE TABLE IF NOT EXISTS public.cuts ( + id integer DEFAULT nextval('public.cuts_id_seq') NOT NULL, + file_uuid character varying(32) NOT NULL, + cut_number integer NOT NULL, + start_frame bigint NOT NULL, + end_frame bigint NOT NULL, + start_time double precision, + end_time double precision, + fps double precision, + metadata jsonb DEFAULT '{}'::jsonb, + created_at timestamp with time zone DEFAULT now() +); + +ALTER SEQUENCE public.cuts_id_seq OWNED BY public.cuts.id; + +-- ============================================================ +-- 5. frames +-- ============================================================ +CREATE SEQUENCE IF NOT EXISTS public.frames_id_seq AS integer START WITH 1; + +CREATE TABLE IF NOT EXISTS public.frames ( + id integer DEFAULT nextval('public.frames_id_seq') NOT NULL, + file_id integer NOT NULL, + frame_number bigint NOT NULL, + timestamp double precision NOT NULL, + fps double precision DEFAULT 24.0, + yolo_objects jsonb, + ocr_results jsonb, + face_results jsonb, + frame_path text, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +ALTER SEQUENCE public.frames_id_seq OWNED BY public.frames.id; + +-- ============================================================ +-- 6. monitor_jobs +-- ============================================================ +CREATE SEQUENCE IF NOT EXISTS public.monitor_jobs_id_seq AS integer START WITH 1; + +CREATE TABLE IF NOT EXISTS public.monitor_jobs ( + id integer DEFAULT nextval('public.monitor_jobs_id_seq') NOT NULL, + uuid character varying(32) NOT NULL, + video_path character varying(512), + status character varying(20) DEFAULT 'pending'::character varying NOT NULL, + current_processor character varying(20), + progress_total integer DEFAULT 0, + progress_current integer DEFAULT 0, + error_count integer DEFAULT 0, + last_error text, + started_at timestamp without time zone, + updated_at timestamp with time zone, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + video_id bigint, + user_id bigint, + processors text[], + completed_processors text[], + failed_processors text[] +); + +ALTER SEQUENCE public.monitor_jobs_id_seq OWNED BY public.monitor_jobs.id; + +-- ============================================================ +-- 7. processor_results +-- ============================================================ +CREATE SEQUENCE IF NOT EXISTS public.processor_results_id_seq AS integer START WITH 1; + +CREATE TABLE IF NOT EXISTS public.processor_results ( + id integer DEFAULT nextval('public.processor_results_id_seq') NOT NULL, + job_id integer, + video_id bigint, + processor character varying(20), + status character varying(20) DEFAULT 'pending'::character varying NOT NULL, + output_path text, + started_at timestamp with time zone, + completed_at timestamp with time zone, + error_message text, + progress_total integer DEFAULT 0, + progress_current integer DEFAULT 0, + last_checkpoint jsonb, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + duration_secs double precision, + chunks_produced integer DEFAULT 0, + frames_processed integer DEFAULT 0, + output_size_bytes bigint DEFAULT 0, + file_uuid character varying(32), + result jsonb, + output_data jsonb, + retry_count integer DEFAULT 0, + processor_type character varying(64), + uuid character varying(255) +); + +ALTER SEQUENCE public.processor_results_id_seq OWNED BY public.processor_results.id; + +-- ============================================================ +-- 8. processor_versions +-- ============================================================ +CREATE TABLE IF NOT EXISTS public.processor_versions ( + processor character varying(64) NOT NULL, + model_version character varying(128) NOT NULL, + processor_type character varying(32) DEFAULT 'processor'::character varying NOT NULL, + dependencies text[] DEFAULT '{}'::text[], + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + file_uuid character varying(64) +); + +-- ============================================================ +-- 9. parent_chunks +-- ============================================================ +CREATE SEQUENCE IF NOT EXISTS public.parent_chunks_id_seq AS integer START WITH 1; + +CREATE TABLE IF NOT EXISTS public.parent_chunks ( + id integer DEFAULT nextval('public.parent_chunks_id_seq') NOT NULL, + uuid character varying(32) NOT NULL, + chunk_id character varying(64), + summary_text text, + summary_tsvector tsvector, + metadata jsonb DEFAULT '{}'::jsonb +); + +ALTER SEQUENCE public.parent_chunks_id_seq OWNED BY public.parent_chunks.id; + +-- ============================================================ +-- 10. tkg_edges +-- ============================================================ +CREATE SEQUENCE IF NOT EXISTS public.tkg_edges_id_seq AS bigint START WITH 1; + +CREATE TABLE IF NOT EXISTS public.tkg_edges ( + id bigint DEFAULT nextval('public.tkg_edges_id_seq') NOT NULL, + edge_type character varying(64) NOT NULL, + source_node_id bigint NOT NULL, + target_node_id bigint NOT NULL, + file_uuid character varying(64) NOT NULL, + properties jsonb NOT NULL DEFAULT '{}'::jsonb, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP +); + +ALTER SEQUENCE public.tkg_edges_id_seq OWNED BY public.tkg_edges.id; + +-- ============================================================ +-- 11. tkg_nodes +-- ============================================================ +CREATE SEQUENCE IF NOT EXISTS public.tkg_nodes_id_seq AS bigint START WITH 1; + +CREATE TABLE IF NOT EXISTS public.tkg_nodes ( + id bigint DEFAULT nextval('public.tkg_nodes_id_seq') NOT NULL, + node_type character varying(64) NOT NULL, + external_id character varying(256) NOT NULL, + file_uuid character varying(64) NOT NULL, + label character varying(512), + properties jsonb NOT NULL DEFAULT '{}'::jsonb, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP +); + +ALTER SEQUENCE public.tkg_nodes_id_seq OWNED BY public.tkg_nodes.id; + +-- ============================================================ +-- Indexes & Constraints +-- ============================================================ + +-- videos +CREATE UNIQUE INDEX IF NOT EXISTS idx_videos_file_uuid ON public.videos (file_uuid); + +-- chunk +CREATE INDEX IF NOT EXISTS idx_chunk_file_uuid ON public.chunk (file_uuid); +CREATE INDEX IF NOT EXISTS idx_chunk_type ON public.chunk (chunk_type); +CREATE INDEX IF NOT EXISTS idx_chunk_parent ON public.chunk (parent_chunk_id); +CREATE INDEX IF NOT EXISTS idx_chunk_file_type ON public.chunk (file_uuid, chunk_type); +CREATE UNIQUE INDEX IF NOT EXISTS idx_chunk_file_old_id ON public.chunk (file_uuid, old_chunk_id) WHERE old_chunk_id IS NOT NULL; + +-- chunk_vectors +CREATE INDEX IF NOT EXISTS idx_chunk_vec_uuid ON public.chunk_vectors (uuid); +CREATE INDEX IF NOT EXISTS idx_chunk_vec_chunk ON public.chunk_vectors (chunk_id); + +-- cuts +CREATE INDEX IF NOT EXISTS idx_cuts_file_uuid ON public.cuts (file_uuid); + +-- frames +CREATE INDEX IF NOT EXISTS idx_frames_file_id ON public.frames (file_id); + +-- monitor_jobs +CREATE UNIQUE INDEX IF NOT EXISTS idx_monitor_jobs_uuid ON public.monitor_jobs (uuid); +CREATE INDEX IF NOT EXISTS idx_monitor_jobs_status ON public.monitor_jobs (status); + +-- processor_results +CREATE INDEX IF NOT EXISTS idx_pr_job_id ON public.processor_results (job_id); +CREATE INDEX IF NOT EXISTS idx_pr_uuid ON public.processor_results (uuid); +CREATE UNIQUE INDEX IF NOT EXISTS idx_pr_job_processor_type ON public.processor_results (job_id, processor_type); + +-- parent_chunks +CREATE INDEX IF NOT EXISTS idx_parent_chunks_uuid ON public.parent_chunks (uuid); + +-- tkg_edges +CREATE INDEX IF NOT EXISTS idx_tkg_edges_file_uuid ON public.tkg_edges (file_uuid); +CREATE INDEX IF NOT EXISTS idx_tkg_edges_type ON public.tkg_edges (edge_type); + +-- tkg_nodes +CREATE INDEX IF NOT EXISTS idx_tkg_nodes_file_uuid ON public.tkg_nodes (file_uuid); +CREATE INDEX IF NOT EXISTS idx_tkg_nodes_type ON public.tkg_nodes (node_type); +CREATE INDEX IF NOT EXISTS idx_tkg_nodes_external ON public.tkg_nodes (external_id); + +COMMIT; diff --git a/momentry_runtime/plist/com.momentry.api.plist b/momentry_runtime/plist/com.momentry.api.plist index 78a0b78..7a327be 100644 --- a/momentry_runtime/plist/com.momentry.api.plist +++ b/momentry_runtime/plist/com.momentry.api.plist @@ -12,43 +12,13 @@ staff WorkingDirectory - /Users/accusys/momentry_core_0.1 + /Users/accusys/momentry_core ProgramArguments - /Users/accusys/momentry_core_0.1/target/release/momentry - server - --port - 3002 + /Users/accusys/momentry_core/scripts/wrapper_production.sh - EnvironmentVariables - - PATH - /opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin - - DATABASE_URL - postgres://accusys@localhost:5432/momentry - - DB_MAX_CONNECTIONS - 50 - - DB_ACQUIRE_TIMEOUT - 30 - - REDIS_URL - redis://:accusys@localhost:6379 - - REDIS_PASSWORD - accusys - - OLLAMA_HOST - http://localhost:11434 - - QDRANT_URL - http://127.0.0.1:6333 - - RunAtLoad diff --git a/momentry_runtime/plist/com.momentry.embedding.plist b/momentry_runtime/plist/com.momentry.embedding.plist new file mode 100644 index 0000000..6b34587 --- /dev/null +++ b/momentry_runtime/plist/com.momentry.embedding.plist @@ -0,0 +1,34 @@ + + + + + Label + com.momentry.embedding + + UserName + accusys + + GroupName + staff + + WorkingDirectory + /Users/accusys/momentry_core + + ProgramArguments + + /Users/accusys/momentry_core/scripts/wrapper_embedding.sh + + + RunAtLoad + + + KeepAlive + + + StandardOutPath + /Users/accusys/momentry/log/embedding.log + + StandardErrorPath + /Users/accusys/momentry/log/embedding.error.log + + diff --git a/momentry_runtime/plist/com.momentry.gitea.plist b/momentry_runtime/plist/com.momentry.gitea.plist index cc719cc..ec1d667 100644 --- a/momentry_runtime/plist/com.momentry.gitea.plist +++ b/momentry_runtime/plist/com.momentry.gitea.plist @@ -13,7 +13,7 @@ ProgramArguments - /opt/homebrew/opt/gitea/bin/gitea + /Users/accusys/gitea/bin/gitea web --config /Users/accusys/momentry/etc/gitea/app.ini diff --git a/momentry_runtime/plist/com.momentry.llamacpp-a4b.plist b/momentry_runtime/plist/com.momentry.llamacpp-a4b.plist new file mode 100644 index 0000000..ea1cf4a --- /dev/null +++ b/momentry_runtime/plist/com.momentry.llamacpp-a4b.plist @@ -0,0 +1,48 @@ + + + + + Label + com.momentry.llamacpp-a4b + + UserName + accusys + + WorkingDirectory + /Users/accusys + + ProgramArguments + + /Users/accusys/llama/bin/llama-server + -m + /Users/accusys/models/google_gemma-4-26B-A4B-it-Q5_K_M.gguf + --mmproj + /Users/accusys/models/gemma-4-26B-A4B-it.mmproj-f16.gguf + --host + 0.0.0.0 + --port + 8082 + -ngl + 99 + -c + 16384 + --temp + 0.1 + --mlock + --reasoning + off + + + RunAtLoad + + + KeepAlive + + + StandardOutPath + /Users/accusys/momentry/log/llamacpp-a4b.log + + StandardErrorPath + /Users/accusys/momentry/log/llamacpp-a4b.error.log + + diff --git a/momentry_runtime/plist/com.momentry.llamacpp-e4b.plist b/momentry_runtime/plist/com.momentry.llamacpp-e4b.plist new file mode 100644 index 0000000..8c27cfb --- /dev/null +++ b/momentry_runtime/plist/com.momentry.llamacpp-e4b.plist @@ -0,0 +1,46 @@ + + + + + Label + com.momentry.llamacpp-e4b + + UserName + accusys + + WorkingDirectory + /Users/accusys + + ProgramArguments + + /Users/accusys/llama/bin/llama-server + -m + /Users/accusys/models/gemma-4-E4B-it-Q4_K_M.gguf + --mmproj + /Users/accusys/models/mmproj-gemma-4-E4B-it-BF16.gguf + --host + 0.0.0.0 + --port + 8083 + -ngl + 99 + -c + 16384 + --temp + 0.1 + --mlock + + + RunAtLoad + + + KeepAlive + + + StandardOutPath + /Users/accusys/momentry/log/llamacpp-e4b.log + + StandardErrorPath + /Users/accusys/momentry/log/llamacpp-e4b.error.log + + diff --git a/momentry_runtime/plist/com.momentry.postgresql.plist b/momentry_runtime/plist/com.momentry.postgresql.plist index d799ab2..8a4dcf2 100644 --- a/momentry_runtime/plist/com.momentry.postgresql.plist +++ b/momentry_runtime/plist/com.momentry.postgresql.plist @@ -15,13 +15,13 @@ WorkingDirectory - /Users/accusys/momentry/var/postgresql + /Users/accusys ProgramArguments - /opt/homebrew/opt/postgresql@18/bin/postgres + /Users/accusys/pgsql/18.3/bin/postgres -D - /Users/accusys/momentry/var/postgresql + /Users/accusys/pgsql/data RunAtLoad diff --git a/momentry_runtime/plist/com.momentry.sftpgo.plist b/momentry_runtime/plist/com.momentry.sftpgo.plist index e6c3c60..fc3c9c9 100644 --- a/momentry_runtime/plist/com.momentry.sftpgo.plist +++ b/momentry_runtime/plist/com.momentry.sftpgo.plist @@ -9,14 +9,14 @@ accusys WorkingDirectory - /Users/accusys/workspace/sftpgo + /Users/accusys/momentry/var/sftpgo ProgramArguments /Users/accusys/bin/sftpgo serve - --config-file - /Users/accusys/momentry/etc/sftpgo/sftpgo.json + -c + /Users/accusys/momentry/etc/sftpgo EnvironmentVariables diff --git a/portal/src/components/Face3DViewer.vue b/portal/src/components/Face3DViewer.vue new file mode 100644 index 0000000..83a80db --- /dev/null +++ b/portal/src/components/Face3DViewer.vue @@ -0,0 +1,174 @@ + + + diff --git a/portal/src/components/IdentitySwimlane.vue b/portal/src/components/IdentitySwimlane.vue new file mode 100644 index 0000000..25dbfbc --- /dev/null +++ b/portal/src/components/IdentitySwimlane.vue @@ -0,0 +1,80 @@ + + + diff --git a/portal/src/components/ServiceStatusCard.vue b/portal/src/components/ServiceStatusCard.vue new file mode 100644 index 0000000..4195d2a --- /dev/null +++ b/portal/src/components/ServiceStatusCard.vue @@ -0,0 +1,33 @@ + + + \ No newline at end of file diff --git a/portal/src/components/SpaceTimeCube.vue b/portal/src/components/SpaceTimeCube.vue new file mode 100644 index 0000000..a427ae1 --- /dev/null +++ b/portal/src/components/SpaceTimeCube.vue @@ -0,0 +1,354 @@ + + + \ No newline at end of file diff --git a/portal/src/components/TraceDurationHistogram.vue b/portal/src/components/TraceDurationHistogram.vue new file mode 100644 index 0000000..4a49710 --- /dev/null +++ b/portal/src/components/TraceDurationHistogram.vue @@ -0,0 +1,41 @@ + + + diff --git a/portal/src/components/TraceSimilarityMatrix.vue b/portal/src/components/TraceSimilarityMatrix.vue new file mode 100644 index 0000000..5ae7a96 --- /dev/null +++ b/portal/src/components/TraceSimilarityMatrix.vue @@ -0,0 +1,63 @@ + + + diff --git a/portal/src/components/TraceThumbnailTimeline.vue b/portal/src/components/TraceThumbnailTimeline.vue new file mode 100644 index 0000000..f38dc7f --- /dev/null +++ b/portal/src/components/TraceThumbnailTimeline.vue @@ -0,0 +1,86 @@ + + + diff --git a/portal/src/test-setup.ts b/portal/src/test-setup.ts new file mode 100644 index 0000000..57a3826 --- /dev/null +++ b/portal/src/test-setup.ts @@ -0,0 +1,16 @@ +import { vi } from 'vitest' + +const mockStorage = new Map() + +Object.defineProperty(globalThis, 'localStorage', { + value: { + getItem: (key: string) => mockStorage.get(key) ?? null, + setItem: (key: string, value: string) => mockStorage.set(key, String(value)), + removeItem: (key: string) => mockStorage.delete(key), + clear: () => mockStorage.clear(), + get length() { return mockStorage.size }, + key: (index: number) => [...mockStorage.keys()][index] ?? null, + }, + configurable: true, + writable: true, +}) diff --git a/portal/src/views/LoginView.test.ts b/portal/src/views/LoginView.test.ts new file mode 100644 index 0000000..8717128 --- /dev/null +++ b/portal/src/views/LoginView.test.ts @@ -0,0 +1,184 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' +import { mount } from '@vue/test-utils' +import { nextTick } from 'vue' + +const mockPush = vi.fn() +const mockReplace = vi.fn() +const mockUseRoute = vi.fn(() => ({ + query: { username: '', password: '' }, + path: '/login', +})) + +vi.mock('vue-router', () => ({ + useRouter: () => ({ push: mockPush, replace: mockReplace }), + useRoute: mockUseRoute, +})) + +const mockHttpFetch = vi.fn() +vi.mock('@/api/client', () => ({ + httpFetch: mockHttpFetch, + getCurrentConfig: () => ({ + api_base_url: 'http://localhost:3003', + api_key: '', + timeout_secs: 30, + }), + saveConfig: vi.fn(), +})) + +beforeEach(() => { + vi.clearAllMocks() + localStorage.clear() + mockUseRoute.mockReturnValue({ + query: { username: '', password: '' }, + path: '/login', + }) +}) + +async function createWrapper() { + const { default: LoginView } = await import('./LoginView.vue') + return mount(LoginView, { + attachTo: document.body, + }) +} + +describe('LoginView', () => { + it('renders login form', async () => { + const wrapper = await createWrapper() + expect(wrapper.find('h1').text()).toBe('Momentry') + expect(wrapper.find('input[type="text"]').exists()).toBe(true) + expect(wrapper.find('input[type="password"]').exists()).toBe(true) + expect(wrapper.find('button[type="submit"]').text()).toBe('Login') + }) + + it('updates username and password on input', async () => { + const wrapper = await createWrapper() + const usernameInput = wrapper.find('input[type="text"]') + const passwordInput = wrapper.find('input[type="password"]') + + await usernameInput.setValue('demo') + await passwordInput.setValue('secret') + + expect((usernameInput.element as HTMLInputElement).value).toBe('demo') + expect((passwordInput.element as HTMLInputElement).value).toBe('secret') + }) + + it('toggles password visibility', async () => { + const wrapper = await createWrapper() + const toggleBtn = wrapper.find('button[type="button"]') + const passwordInput = wrapper.find('input[type="password"]') + + expect(passwordInput.attributes('type')).toBe('password') + await toggleBtn.trigger('click') + expect(passwordInput.attributes('type')).toBe('text') + await toggleBtn.trigger('click') + expect(passwordInput.attributes('type')).toBe('password') + }) + + it('shows loading state on submit', async () => { + mockHttpFetch.mockImplementation(() => new Promise(() => {})) + const wrapper = await createWrapper() + + await wrapper.find('input[type="text"]').setValue('demo') + await wrapper.find('input[type="password"]').setValue('demo') + await wrapper.find('form').trigger('submit.prevent') + await nextTick() + + expect(wrapper.find('button[type="submit"]').text()).toBe('Logging in...') + expect(wrapper.find('button[type="submit"]').attributes('disabled')).toBeDefined() + }) + + it('shows error on login failure with message', async () => { + mockHttpFetch.mockResolvedValue({ success: false, message: 'Account disabled' }) + const wrapper = await createWrapper() + + await wrapper.find('input[type="text"]').setValue('demo') + await wrapper.find('input[type="password"]').setValue('demo') + await wrapper.find('form').trigger('submit.prevent') + await nextTick() + + expect(wrapper.text()).toContain('Account disabled') + }) + + it('shows generic error for 401', async () => { + mockHttpFetch.mockRejectedValue(new Error('401 Unauthorized')) + const wrapper = await createWrapper() + + await wrapper.find('input[type="text"]').setValue('bad') + await wrapper.find('input[type="password"]').setValue('creds') + await wrapper.find('form').trigger('submit.prevent') + await nextTick() + + expect(wrapper.text()).toContain('Invalid username or password') + }) + + it('shows connection error on network failure', async () => { + mockHttpFetch.mockRejectedValue(new Error('NetworkError')) + const wrapper = await createWrapper() + + await wrapper.find('input[type="text"]').setValue('demo') + await wrapper.find('input[type="password"]').setValue('demo') + await wrapper.find('form').trigger('submit.prevent') + await nextTick() + + expect(wrapper.text()).toContain('Connection error') + }) + + it('redirects on successful login', async () => { + mockHttpFetch.mockResolvedValue({ + success: true, + api_key: 'test_key_123', + user: { name: 'demo', role: 'admin' }, + }) + const wrapper = await createWrapper() + + await wrapper.find('input[type="text"]').setValue('admin') + await wrapper.find('input[type="password"]').setValue('admin') + await wrapper.find('form').trigger('submit.prevent') + await nextTick() + + expect(localStorage.getItem('momentry_user')).toBe( + JSON.stringify({ name: 'demo', role: 'admin' }), + ) + expect(localStorage.getItem('momentry_api_key')).toBe('test_key_123') + expect(mockPush).toHaveBeenCalledWith('/home') + }) + + it('redirects to original redirect path after login', async () => { + mockUseRoute.mockReturnValue({ + query: { redirect: '/search?q=test' }, + path: '/login', + }) + mockHttpFetch.mockResolvedValue({ + success: true, + api_key: 'key', + user: { name: 'demo' }, + }) + const wrapper = await createWrapper() + + await wrapper.find('input[type="text"]').setValue('demo') + await wrapper.find('input[type="password"]').setValue('demo') + await wrapper.find('form').trigger('submit.prevent') + await nextTick() + + expect(mockPush).toHaveBeenCalledWith('/search?q=test') + }) + + it('auto-submits when query params are present', async () => { + mockUseRoute.mockReturnValue({ + query: { username: 'auto', password: 'login' }, + path: '/login', + }) + mockHttpFetch.mockResolvedValue({ + success: true, + api_key: 'auto_key', + user: { name: 'auto_user' }, + }) + + await createWrapper() + await nextTick() + await nextTick() + + expect(mockHttpFetch).toHaveBeenCalled() + expect(localStorage.getItem('momentry_api_key')).toBe('auto_key') + }) +}) diff --git a/portal/src/views/NotFoundView.vue b/portal/src/views/NotFoundView.vue new file mode 100644 index 0000000..df26672 --- /dev/null +++ b/portal/src/views/NotFoundView.vue @@ -0,0 +1,11 @@ + \ No newline at end of file diff --git a/portal/src/views/PipelineProgressView.vue b/portal/src/views/PipelineProgressView.vue new file mode 100644 index 0000000..a3d57d1 --- /dev/null +++ b/portal/src/views/PipelineProgressView.vue @@ -0,0 +1,370 @@ + + + diff --git a/portal/src/views/TraceDetailView.vue b/portal/src/views/TraceDetailView.vue new file mode 100644 index 0000000..4f5dfc6 --- /dev/null +++ b/portal/src/views/TraceDetailView.vue @@ -0,0 +1,85 @@ + + + diff --git a/portal/src/views/TraceVizView.vue b/portal/src/views/TraceVizView.vue new file mode 100644 index 0000000..7e7af6b --- /dev/null +++ b/portal/src/views/TraceVizView.vue @@ -0,0 +1,64 @@ + + + \ No newline at end of file diff --git a/release/dev_upgrade_v1.0.3/UPGRADE.md b/release/dev_upgrade_v1.0.3/UPGRADE.md new file mode 100644 index 0000000..cc0eb9a --- /dev/null +++ b/release/dev_upgrade_v1.0.3/UPGRADE.md @@ -0,0 +1,15 @@ +# Dev Upgrade v1.0.3 + +## Steps + +```bash +# 1. Apply migration +psql -U accusys -d momentry < schema/migration_v1.0.3.sql + +# 2. Replace binary +cp bin/momentry_playground /Users/accusys/momentry_core_0.1/target/debug/ + +# 3. Restart +pkill momentry_playground +DATABASE_SCHEMA=dev /Users/accusys/momentry_core_0.1/target/debug/momentry_playground server --port 3003 +``` diff --git a/release/dev_upgrade_v1.0.3/bin/momentry_playground b/release/dev_upgrade_v1.0.3/bin/momentry_playground new file mode 100755 index 0000000..66a1e48 Binary files /dev/null and b/release/dev_upgrade_v1.0.3/bin/momentry_playground differ diff --git a/release/dev_upgrade_v1.0.3/schema/migration_v1.0.3.sql b/release/dev_upgrade_v1.0.3/schema/migration_v1.0.3.sql new file mode 100644 index 0000000..20b257e --- /dev/null +++ b/release/dev_upgrade_v1.0.3/schema/migration_v1.0.3.sql @@ -0,0 +1,10 @@ +-- Migration: dev schema v1.0.3 +-- Date: 2026-05-11 +-- Auto-generated from current DB schema + +SET search_path TO dev; + +ALTER TABLE dev.face_detections ADD COLUMN IF NOT EXISTS timestamp_secs float8; + +-- Chunk index removals (already applied) +-- chunk_vectors: standard columns diff --git a/release/public_schema_v1.0.0.sql b/release/public_schema_v1.0.0.sql new file mode 100644 index 0000000..5b15732 --- /dev/null +++ b/release/public_schema_v1.0.0.sql @@ -0,0 +1,4863 @@ +-- +-- PostgreSQL database dump +-- + +\restrict hRi4nBNv2E5FXxBTf47fTk0vxfJNiXtnegSYFeraY46zaCyMMlYNanEdl70C1E7 + +-- Dumped from database version 18.1 (Homebrew) +-- Dumped by pg_dump version 18.1 (Homebrew) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +-- +-- Name: public; Type: SCHEMA; Schema: -; Owner: pg_database_owner +-- + +CREATE SCHEMA public; + + +ALTER SCHEMA public OWNER TO pg_database_owner; + +-- +-- Name: SCHEMA public; Type: COMMENT; Schema: -; Owner: pg_database_owner +-- + +COMMENT ON SCHEMA public IS 'standard public schema'; + + +-- +-- Name: auto_match_face_speaker(character varying, double precision); Type: FUNCTION; Schema: public; Owner: accusys +-- + +CREATE FUNCTION public.auto_match_face_speaker(p_video_uuid character varying, p_threshold double precision DEFAULT 0.5) RETURNS TABLE(face_id character varying, speaker_id character varying, confidence double precision, match_count bigint) + LANGUAGE plpgsql + AS $$ +BEGIN + RETURN QUERY + -- Find face detections that overlap with ASRX segments + SELECT + fd.face_id, + seg.speaker_id, + COUNT(*)::DOUBLE PRECISION / NULLIF(COUNT(DISTINCT seg.speaker_id), 0) AS confidence, + COUNT(*) AS match_count + FROM face_detections fd + CROSS JOIN LATERAL ( + SELECT + seg_data->>'speaker_id' AS speaker_id, + (seg_data->>'start')::DOUBLE PRECISION AS seg_start, + (seg_data->>'end')::DOUBLE PRECISION AS seg_end + FROM face_recognition_results frr, + jsonb_array_elements(frr.result_data->'frames') AS frame_data, + jsonb_array_elements(frame_data->'faces') AS face_data, + jsonb_array_elements(frr.result_data->'segments') AS seg_data + WHERE frr.video_uuid = p_video_uuid + AND face_data->>'face_id' = fd.face_id + ) seg + WHERE fd.video_uuid = p_video_uuid + AND fd.timestamp_secs >= seg.seg_start + AND fd.timestamp_secs <= seg.seg_end + AND fd.face_id IS NOT NULL + AND seg.speaker_id IS NOT NULL + GROUP BY fd.face_id, seg.speaker_id + HAVING COUNT(*)::DOUBLE PRECISION / NULLIF(COUNT(DISTINCT seg.speaker_id), 0) >= p_threshold + ORDER BY confidence DESC; +END; +$$; + + +ALTER FUNCTION public.auto_match_face_speaker(p_video_uuid character varying, p_threshold double precision) OWNER TO accusys; + +-- +-- Name: FUNCTION auto_match_face_speaker(p_video_uuid character varying, p_threshold double precision); Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON FUNCTION public.auto_match_face_speaker(p_video_uuid character varying, p_threshold double precision) IS 'Automatically matches face detections with speaker segments'; + + +-- +-- Name: find_or_create_face_identity(character varying, character varying, public.vector, jsonb, jsonb); Type: FUNCTION; Schema: public; Owner: accusys +-- + +CREATE FUNCTION public.find_or_create_face_identity(p_face_id character varying, p_name character varying DEFAULT NULL::character varying, p_embedding public.vector DEFAULT NULL::public.vector, p_attributes jsonb DEFAULT NULL::jsonb, p_metadata jsonb DEFAULT '{}'::jsonb) RETURNS integer + LANGUAGE plpgsql + AS $$ +DECLARE + v_id INTEGER; +BEGIN + -- Try to find existing face identity + SELECT id INTO v_id + FROM face_identities + WHERE face_id = p_face_id; + + IF v_id IS NULL THEN + -- Create new face identity + INSERT INTO face_identities (face_id, name, embedding, attributes, metadata) + VALUES (p_face_id, p_name, p_embedding, p_attributes, p_metadata) + RETURNING id INTO v_id; + ELSE + -- Update existing face identity + UPDATE face_identities + SET + name = COALESCE(p_name, name), + embedding = COALESCE(p_embedding, embedding), + attributes = COALESCE(p_attributes, attributes), + metadata = COALESCE(p_metadata, metadata), + updated_at = CURRENT_TIMESTAMP + WHERE id = v_id; + END IF; + + RETURN v_id; +END; +$$; + + +ALTER FUNCTION public.find_or_create_face_identity(p_face_id character varying, p_name character varying, p_embedding public.vector, p_attributes jsonb, p_metadata jsonb) OWNER TO accusys; + +-- +-- Name: FUNCTION find_or_create_face_identity(p_face_id character varying, p_name character varying, p_embedding public.vector, p_attributes jsonb, p_metadata jsonb); Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON FUNCTION public.find_or_create_face_identity(p_face_id character varying, p_name character varying, p_embedding public.vector, p_attributes jsonb, p_metadata jsonb) IS 'Finds or creates a face identity record'; + + +-- +-- Name: find_persons_at_time(character varying, double precision, double precision); Type: FUNCTION; Schema: public; Owner: accusys +-- + +CREATE FUNCTION public.find_persons_at_time(p_video_uuid character varying, p_time double precision, p_tolerance double precision DEFAULT 0.0) RETURNS TABLE(person_id character varying, name character varying, confidence double precision, appearance_id integer) + LANGUAGE plpgsql + AS $$ +BEGIN + RETURN QUERY + SELECT + pi.person_id, + pi.name, + pa.confidence, + pa.id AS appearance_id + FROM person_appearances pa + JOIN person_identities pi ON pa.person_id = pi.person_id + WHERE pa.video_uuid = p_video_uuid + AND pa.start_time <= p_time + p_tolerance + AND pa.end_time >= p_time - p_tolerance + ORDER BY pa.confidence DESC; +END; +$$; + + +ALTER FUNCTION public.find_persons_at_time(p_video_uuid character varying, p_time double precision, p_tolerance double precision) OWNER TO accusys; + +-- +-- Name: FUNCTION find_persons_at_time(p_video_uuid character varying, p_time double precision, p_tolerance double precision); Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON FUNCTION public.find_persons_at_time(p_video_uuid character varying, p_time double precision, p_tolerance double precision) IS 'Finds persons appearing at a specific time in video'; + + +-- +-- Name: find_persons_in_range(character varying, double precision, double precision); Type: FUNCTION; Schema: public; Owner: accusys +-- + +CREATE FUNCTION public.find_persons_in_range(p_video_uuid character varying, p_start_time double precision, p_end_time double precision) RETURNS TABLE(person_id character varying, name character varying, overlap_duration double precision, confidence double precision) + LANGUAGE plpgsql + AS $$ +BEGIN + RETURN QUERY + SELECT + pi.person_id, + pi.name, + LEAST(pa.end_time, p_end_time) - GREATEST(pa.start_time, p_start_time) AS overlap_duration, + AVG(pa.confidence) AS confidence + FROM person_appearances pa + JOIN person_identities pi ON pa.person_id = pi.person_id + WHERE pa.video_uuid = p_video_uuid + AND pa.start_time < p_end_time + AND pa.end_time > p_start_time + GROUP BY pi.person_id, pi.name, pa.end_time, pa.start_time + ORDER BY overlap_duration DESC; +END; +$$; + + +ALTER FUNCTION public.find_persons_in_range(p_video_uuid character varying, p_start_time double precision, p_end_time double precision) OWNER TO accusys; + +-- +-- Name: FUNCTION find_persons_in_range(p_video_uuid character varying, p_start_time double precision, p_end_time double precision); Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON FUNCTION public.find_persons_in_range(p_video_uuid character varying, p_start_time double precision, p_end_time double precision) IS 'Finds persons appearing in a time range with overlap calculation'; + + +-- +-- Name: find_similar_faces(public.vector, double precision, integer); Type: FUNCTION; Schema: public; Owner: accusys +-- + +CREATE FUNCTION public.find_similar_faces(query_embedding public.vector, similarity_threshold double precision DEFAULT 0.6, limit_count integer DEFAULT 10) RETURNS TABLE(face_id character varying, name character varying, similarity double precision, attributes jsonb, metadata jsonb) + LANGUAGE plpgsql + AS $$ +BEGIN + RETURN QUERY + SELECT + fi.face_id, + fi.name, + 1 - (fi.embedding <=> query_embedding) AS similarity, + fi.attributes, + fi.metadata + FROM face_identities fi + WHERE fi.is_active = TRUE + AND fi.embedding IS NOT NULL + AND 1 - (fi.embedding <=> query_embedding) >= similarity_threshold + ORDER BY fi.embedding <=> query_embedding + LIMIT limit_count; +END; +$$; + + +ALTER FUNCTION public.find_similar_faces(query_embedding public.vector, similarity_threshold double precision, limit_count integer) OWNER TO accusys; + +-- +-- Name: FUNCTION find_similar_faces(query_embedding public.vector, similarity_threshold double precision, limit_count integer); Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON FUNCTION public.find_similar_faces(query_embedding public.vector, similarity_threshold double precision, limit_count integer) IS 'Finds similar faces based on embedding similarity'; + + +-- +-- Name: merge_person_identities(character varying, character varying[]); Type: FUNCTION; Schema: public; Owner: accusys +-- + +CREATE FUNCTION public.merge_person_identities(p_target_person_id character varying, p_source_person_ids character varying[]) RETURNS void + LANGUAGE plpgsql + AS $$ +BEGIN + -- Update all appearances to point to target person + UPDATE person_appearances + SET person_id = p_target_person_id + WHERE person_id = ANY(p_source_person_ids); + + -- Delete source person identities + DELETE FROM person_identities + WHERE person_id = ANY(p_source_person_ids) + AND person_id != p_target_person_id; + + -- Update target person statistics + PERFORM update_person_appearance_stats(p_target_person_id); +END; +$$; + + +ALTER FUNCTION public.merge_person_identities(p_target_person_id character varying, p_source_person_ids character varying[]) OWNER TO accusys; + +-- +-- Name: FUNCTION merge_person_identities(p_target_person_id character varying, p_source_person_ids character varying[]); Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON FUNCTION public.merge_person_identities(p_target_person_id character varying, p_source_person_ids character varying[]) IS 'Merges multiple person identities into one'; + + +-- +-- Name: trigger_update_person_stats(); Type: FUNCTION; Schema: public; Owner: accusys +-- + +CREATE FUNCTION public.trigger_update_person_stats() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + IF TG_OP = 'INSERT' THEN + PERFORM update_person_appearance_stats(NEW.person_id); + ELSIF TG_OP = 'UPDATE' THEN + PERFORM update_person_appearance_stats(NEW.person_id); + IF NEW.person_id != OLD.person_id THEN + PERFORM update_person_appearance_stats(OLD.person_id); + END IF; + ELSIF TG_OP = 'DELETE' THEN + PERFORM update_person_appearance_stats(OLD.person_id); + END IF; + + RETURN NULL; +END; +$$; + + +ALTER FUNCTION public.trigger_update_person_stats() OWNER TO accusys; + +-- +-- Name: update_cluster_centroid(character varying); Type: FUNCTION; Schema: public; Owner: accusys +-- + +CREATE FUNCTION public.update_cluster_centroid(cluster_uuid character varying) RETURNS void + LANGUAGE plpgsql + AS $$ +DECLARE + new_centroid VECTOR(512); +BEGIN + -- Calculate new centroid from all face embeddings in the cluster + SELECT AVG(embedding) INTO new_centroid + FROM face_detections + WHERE cluster_id = cluster_uuid + AND embedding IS NOT NULL; + + -- Update cluster centroid + UPDATE face_clusters + SET centroid = new_centroid, + size = (SELECT COUNT(*) FROM face_detections WHERE cluster_id = cluster_uuid) + WHERE cluster_id = cluster_uuid; +END; +$$; + + +ALTER FUNCTION public.update_cluster_centroid(cluster_uuid character varying) OWNER TO accusys; + +-- +-- Name: FUNCTION update_cluster_centroid(cluster_uuid character varying); Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON FUNCTION public.update_cluster_centroid(cluster_uuid character varying) IS 'Updates cluster centroid from member embeddings'; + + +-- +-- Name: update_person_appearance_stats(character varying); Type: FUNCTION; Schema: public; Owner: accusys +-- + +CREATE FUNCTION public.update_person_appearance_stats(p_person_id character varying) RETURNS void + LANGUAGE plpgsql + AS $$ +BEGIN + UPDATE person_identities + SET + appearance_count = ( + SELECT COUNT(*) + FROM person_appearances + WHERE person_id = p_person_id + ), + total_appearance_duration = ( + SELECT COALESCE(SUM(duration), 0.0) + FROM person_appearances + WHERE person_id = p_person_id + ), + first_appearance_time = ( + SELECT MIN(start_time) + FROM person_appearances + WHERE person_id = p_person_id + ), + last_appearance_time = ( + SELECT MAX(end_time) + FROM person_appearances + WHERE person_id = p_person_id + ), + updated_at = CURRENT_TIMESTAMP + WHERE person_id = p_person_id; +END; +$$; + + +ALTER FUNCTION public.update_person_appearance_stats(p_person_id character varying) OWNER TO accusys; + +-- +-- Name: FUNCTION update_person_appearance_stats(p_person_id character varying); Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON FUNCTION public.update_person_appearance_stats(p_person_id character varying) IS 'Updates person identity statistics from appearances'; + + +-- +-- Name: update_search_vector(); Type: FUNCTION; Schema: public; Owner: accusys +-- + +CREATE FUNCTION public.update_search_vector() RETURNS trigger + LANGUAGE plpgsql + AS $$ + BEGIN + NEW.search_vector := to_tsvector('english', COALESCE(NEW.text_content, '')); + RETURN NEW; + END; + $$; + + +ALTER FUNCTION public.update_search_vector() OWNER TO accusys; + +-- +-- Name: update_updated_at_column(); Type: FUNCTION; Schema: public; Owner: accusys +-- + +CREATE FUNCTION public.update_updated_at_column() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + NEW.updated_at = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$; + + +ALTER FUNCTION public.update_updated_at_column() OWNER TO accusys; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: api_key_anomalies; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.api_key_anomalies ( + id integer NOT NULL, + key_id character varying(32) NOT NULL, + anomaly_type character varying(30) NOT NULL, + severity character varying(10) NOT NULL, + ip_address character varying(45), + request_count integer, + error_count integer, + error_rate double precision, + unique_ips integer, + details jsonb, + resolved boolean DEFAULT false, + resolved_at timestamp without time zone, + resolved_by character varying(128), + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + + +ALTER TABLE public.api_key_anomalies OWNER TO accusys; + +-- +-- Name: api_key_anomalies_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.api_key_anomalies_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.api_key_anomalies_id_seq OWNER TO accusys; + +-- +-- Name: api_key_anomalies_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.api_key_anomalies_id_seq OWNED BY public.api_key_anomalies.id; + + +-- +-- Name: api_key_audit_log; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.api_key_audit_log ( + id integer NOT NULL, + key_id character varying(32) NOT NULL, + action character varying(50) NOT NULL, + actor character varying(128), + ip_address character varying(45), + user_agent text, + request_path text, + response_code integer, + anomaly_type character varying(30), + details jsonb, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + + +ALTER TABLE public.api_key_audit_log OWNER TO accusys; + +-- +-- Name: api_key_audit_log_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.api_key_audit_log_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.api_key_audit_log_id_seq OWNER TO accusys; + +-- +-- Name: api_key_audit_log_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.api_key_audit_log_id_seq OWNED BY public.api_key_audit_log.id; + + +-- +-- Name: api_keys; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.api_keys ( + id integer NOT NULL, + key_id character varying(48) NOT NULL, + key_hash character varying(64) NOT NULL, + key_prefix character varying(8) NOT NULL, + name character varying(128) NOT NULL, + key_type character varying(20) DEFAULT 'user'::character varying NOT NULL, + user_id bigint, + service_name character varying(64), + permissions jsonb DEFAULT '["read", "write"]'::jsonb, + expires_at timestamp with time zone, + last_used_at timestamp with time zone, + last_used_ip character varying(45), + usage_count bigint DEFAULT 0, + status character varying(20) DEFAULT 'active'::character varying NOT NULL, + rotation_required boolean DEFAULT false, + rotation_reason text, + grace_period_end timestamp with time zone, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP +); + + +ALTER TABLE public.api_keys OWNER TO accusys; + +-- +-- Name: api_keys_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.api_keys_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.api_keys_id_seq OWNER TO accusys; + +-- +-- Name: api_keys_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.api_keys_id_seq OWNED BY public.api_keys.id; + + +-- +-- Name: backup_history; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.backup_history ( + id integer NOT NULL, + service_name character varying(50) NOT NULL, + operation character varying(20), + backup_file character varying(500), + backup_tier character varying(20), + source_tier character varying(20), + dest_tier character varying(20), + file_count bigint, + size_bytes bigint, + duration_seconds integer, + status character varying(20), + error_message text, + executed_at timestamp without time zone DEFAULT now(), + CONSTRAINT backup_history_operation_check CHECK (((operation)::text = ANY ((ARRAY['backup'::character varying, 'restore'::character varying, 'tier_migration'::character varying, 'cleanup'::character varying, 'verify'::character varying])::text[]))), + CONSTRAINT backup_history_status_check CHECK (((status)::text = ANY ((ARRAY['success'::character varying, 'failed'::character varying, 'partial'::character varying])::text[]))) +); + + +ALTER TABLE public.backup_history OWNER TO accusys; + +-- +-- Name: backup_history_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.backup_history_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.backup_history_id_seq OWNER TO accusys; + +-- +-- Name: backup_history_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.backup_history_id_seq OWNED BY public.backup_history.id; + + +-- +-- Name: backup_registry; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.backup_registry ( + id integer NOT NULL, + service_name character varying(50) NOT NULL, + backup_file character varying(500) NOT NULL, + backup_size_bytes bigint, + backup_type character varying(20), + status character varying(20), + created_at timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.backup_registry OWNER TO accusys; + +-- +-- Name: backup_registry_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.backup_registry_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.backup_registry_id_seq OWNER TO accusys; + +-- +-- Name: backup_registry_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.backup_registry_id_seq OWNED BY public.backup_registry.id; + + +-- +-- Name: backup_storage_stats; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.backup_storage_stats ( + id integer NOT NULL, + tier character varying(20), + file_count bigint, + total_size_bytes bigint, + record_time timestamp without time zone DEFAULT now(), + CONSTRAINT backup_storage_stats_tier_check CHECK (((tier)::text = ANY ((ARRAY['daily'::character varying, 'weekly'::character varying, 'monthly'::character varying, 'archive'::character varying, 'total'::character varying])::text[]))) +); + + +ALTER TABLE public.backup_storage_stats OWNER TO accusys; + +-- +-- Name: backup_storage_stats_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.backup_storage_stats_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.backup_storage_stats_id_seq OWNER TO accusys; + +-- +-- Name: backup_storage_stats_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.backup_storage_stats_id_seq OWNED BY public.backup_storage_stats.id; + + +-- +-- Name: castings; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.castings ( + id bigint NOT NULL, + character_id bigint, + talent_id bigint, + track_type character varying(32) DEFAULT 'original'::character varying, + role_type character varying(32) DEFAULT 'both'::character varying +); + + +ALTER TABLE public.castings OWNER TO accusys; + +-- +-- Name: TABLE castings; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON TABLE public.castings IS 'Talent 與 Character 的飾演關係'; + + +-- +-- Name: castings_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.castings_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.castings_id_seq OWNER TO accusys; + +-- +-- Name: castings_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.castings_id_seq OWNED BY public.castings.id; + + +-- +-- Name: characters; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.characters ( + id bigint NOT NULL, + video_uuid text NOT NULL, + name text NOT NULL, + language_track text DEFAULT 'original'::text, + is_voice_only boolean DEFAULT false, + metadata jsonb DEFAULT '{}'::jsonb +); + + +ALTER TABLE public.characters OWNER TO accusys; + +-- +-- Name: TABLE characters; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON TABLE public.characters IS '視頻中的劇中角色'; + + +-- +-- Name: characters_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.characters_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.characters_id_seq OWNER TO accusys; + +-- +-- Name: characters_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.characters_id_seq OWNED BY public.characters.id; + + +-- +-- Name: child_chunks; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.child_chunks ( + id integer NOT NULL, + parent_id integer, + uuid text NOT NULL, + start_time double precision NOT NULL, + end_time double precision NOT NULL, + raw_text text, + raw_text_vector public.vector(768), + speaker_ids text[], + tags text[], + created_at timestamp with time zone DEFAULT now(), + face_ids text[] DEFAULT '{}'::text[], + start_frame bigint, + end_frame bigint, + fps double precision DEFAULT 24.0 +); + + +ALTER TABLE public.child_chunks OWNER TO accusys; + +-- +-- Name: child_chunks_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.child_chunks_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.child_chunks_id_seq OWNER TO accusys; + +-- +-- Name: child_chunks_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.child_chunks_id_seq OWNED BY public.child_chunks.id; + + +-- +-- Name: chunk_vectors; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.chunk_vectors ( + id integer NOT NULL, + chunk_id character varying(64) NOT NULL, + uuid character varying(32) NOT NULL, + chunk_type character varying(32) NOT NULL, + start_time double precision, + end_time double precision, + embedding text, + metadata jsonb, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + embedding_vector public.vector(768), + file_id integer +); + + +ALTER TABLE public.chunk_vectors OWNER TO accusys; + +-- +-- Name: chunk_vectors_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.chunk_vectors_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.chunk_vectors_id_seq OWNER TO accusys; + +-- +-- Name: chunk_vectors_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.chunk_vectors_id_seq OWNED BY public.chunk_vectors.id; + + +-- +-- Name: chunks; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.chunks ( + id integer NOT NULL, + uuid character varying(32) NOT NULL, + chunk_id character varying(64) NOT NULL, + chunk_index integer NOT NULL, + chunk_type character varying(32) NOT NULL, + start_time double precision NOT NULL, + end_time double precision NOT NULL, + content jsonb NOT NULL, + vector_id character varying(64), + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + fps double precision DEFAULT 24.0, + start_frame bigint DEFAULT 0, + end_frame bigint DEFAULT 0, + metadata jsonb, + updated_at timestamp without time zone DEFAULT now(), + file_id integer, + text_content text, + frame_count integer DEFAULT 0, + pre_chunk_ids integer[], + parent_chunk_id character varying(64), + child_chunk_ids text[], + search_vector tsvector, + speaker_ids text[] DEFAULT '{}'::text[], + face_ids text[] DEFAULT '{}'::text[], + visual_stats jsonb DEFAULT '{}'::jsonb +); + + +ALTER TABLE public.chunks OWNER TO accusys; + +-- +-- Name: chunks_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.chunks_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.chunks_id_seq OWNER TO accusys; + +-- +-- Name: chunks_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.chunks_id_seq OWNED BY public.chunks.id; + + +-- +-- Name: face_clusters; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.face_clusters ( + id integer NOT NULL, + cluster_id character varying(255) NOT NULL, + video_uuid character varying(255) NOT NULL, + centroid public.vector(512), + size integer DEFAULT 0 NOT NULL, + representative_face_id character varying(255), + metadata jsonb DEFAULT '{}'::jsonb, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP +); + + +ALTER TABLE public.face_clusters OWNER TO accusys; + +-- +-- Name: TABLE face_clusters; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON TABLE public.face_clusters IS 'Stores face clusters from video analysis'; + + +-- +-- Name: face_clusters_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.face_clusters_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.face_clusters_id_seq OWNER TO accusys; + +-- +-- Name: face_clusters_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.face_clusters_id_seq OWNED BY public.face_clusters.id; + + +-- +-- Name: face_detections; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.face_detections ( + id integer NOT NULL, + video_uuid character varying(255) NOT NULL, + frame_number bigint NOT NULL, + timestamp_secs double precision NOT NULL, + face_id character varying(255), + x integer NOT NULL, + y integer NOT NULL, + width integer NOT NULL, + height integer NOT NULL, + confidence double precision NOT NULL, + embedding public.vector(512), + attributes jsonb, + identity_id integer, + identity_confidence double precision, + cluster_id character varying(255), + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP +); + + +ALTER TABLE public.face_detections OWNER TO accusys; + +-- +-- Name: TABLE face_detections; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON TABLE public.face_detections IS 'Stores individual face detections from videos'; + + +-- +-- Name: face_detections_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.face_detections_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.face_detections_id_seq OWNER TO accusys; + +-- +-- Name: face_detections_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.face_detections_id_seq OWNED BY public.face_detections.id; + + +-- +-- Name: face_identities; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.face_identities ( + id integer NOT NULL, + face_id character varying(255) NOT NULL, + name character varying(255), + embedding public.vector(512), + attributes jsonb, + metadata jsonb DEFAULT '{}'::jsonb, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + is_active boolean DEFAULT true +); + + +ALTER TABLE public.face_identities OWNER TO accusys; + +-- +-- Name: TABLE face_identities; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON TABLE public.face_identities IS 'Stores registered face identities with embeddings'; + + +-- +-- Name: face_identities_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.face_identities_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.face_identities_id_seq OWNER TO accusys; + +-- +-- Name: face_identities_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.face_identities_id_seq OWNED BY public.face_identities.id; + + +-- +-- Name: face_recognition_results; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.face_recognition_results ( + id integer NOT NULL, + video_uuid character varying(255) NOT NULL, + frame_count bigint DEFAULT 0 NOT NULL, + fps double precision DEFAULT 0.0 NOT NULL, + total_faces integer DEFAULT 0 NOT NULL, + recognized_faces integer DEFAULT 0 NOT NULL, + clusters_count integer DEFAULT 0 NOT NULL, + result_data jsonb NOT NULL, + processing_time_secs double precision, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP +); + + +ALTER TABLE public.face_recognition_results OWNER TO accusys; + +-- +-- Name: TABLE face_recognition_results; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON TABLE public.face_recognition_results IS 'Stores face recognition processing results'; + + +-- +-- Name: face_recognition_results_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.face_recognition_results_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.face_recognition_results_id_seq OWNER TO accusys; + +-- +-- Name: face_recognition_results_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.face_recognition_results_id_seq OWNED BY public.face_recognition_results.id; + + +-- +-- Name: file_lifecycle; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.file_lifecycle ( + id integer NOT NULL, + file_uuid uuid, + file_path text, + user_cluster character varying(50), + storage_tier character varying(20), + created_at timestamp without time zone, + last_accessed_at timestamp without time zone, + last_modified_at timestamp without time zone, + access_count integer DEFAULT 0, + current_status character varying(20) DEFAULT 'active'::character varying, + tier_migration_count integer DEFAULT 0, + migrated_at timestamp without time zone +); + + +ALTER TABLE public.file_lifecycle OWNER TO accusys; + +-- +-- Name: file_lifecycle_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.file_lifecycle_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.file_lifecycle_id_seq OWNER TO accusys; + +-- +-- Name: file_lifecycle_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.file_lifecycle_id_seq OWNED BY public.file_lifecycle.id; + + +-- +-- Name: file_registry; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.file_registry ( + file_uuid uuid DEFAULT gen_random_uuid() NOT NULL, + file_name character varying(255) NOT NULL, + file_path text NOT NULL, + file_path_hash character varying(64) NOT NULL, + file_size bigint NOT NULL, + file_hash character varying(64), + mime_type character varying(100), + user_cluster character varying(50), + owner_id character varying(100), + storage_tier character varying(20) DEFAULT 'hot'::character varying, + storage_location character varying(500), + status character varying(20) DEFAULT 'active'::character varying, + is_registered boolean DEFAULT true, + created_at timestamp without time zone DEFAULT now(), + updated_at timestamp without time zone DEFAULT now(), + last_accessed_at timestamp without time zone, + access_count integer DEFAULT 0, + archived_at timestamp without time zone, + archive_location character varying(500), + retention_until timestamp without time zone, + CONSTRAINT file_registry_status_check CHECK (((status)::text = ANY ((ARRAY['active'::character varying, 'temporary'::character varying, 'archived'::character varying, 'deleted'::character varying])::text[]))), + CONSTRAINT file_registry_storage_tier_check CHECK (((storage_tier)::text = ANY ((ARRAY['hot'::character varying, 'warm'::character varying, 'cold'::character varying])::text[]))), + CONSTRAINT file_registry_user_cluster_check CHECK (((user_cluster)::text = ANY ((ARRAY['family'::character varying, 'work'::character varying, 'wordpress'::character varying, 'shared'::character varying, 'system'::character varying])::text[]))) +); + + +ALTER TABLE public.file_registry OWNER TO accusys; + +-- +-- Name: frames; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.frames ( + id integer NOT NULL, + file_id integer NOT NULL, + frame_number bigint NOT NULL, + "timestamp" double precision NOT NULL, + fps double precision DEFAULT 24.0 NOT NULL, + yolo_objects jsonb, + ocr_results jsonb, + face_results jsonb, + frame_path text, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + pose_results jsonb +); + + +ALTER TABLE public.frames OWNER TO accusys; + +-- +-- Name: frames_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.frames_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.frames_id_seq OWNER TO accusys; + +-- +-- Name: frames_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.frames_id_seq OWNED BY public.frames.id; + + +-- +-- Name: gitea_tokens; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.gitea_tokens ( + id integer NOT NULL, + gitea_token_id bigint NOT NULL, + gitea_user character varying(128) NOT NULL, + token_name character varying(128) NOT NULL, + token_last_eight character varying(8) NOT NULL, + scopes jsonb DEFAULT '[]'::jsonb, + api_key_id character varying(48), + last_verified timestamp without time zone, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + + +ALTER TABLE public.gitea_tokens OWNER TO accusys; + +-- +-- Name: gitea_tokens_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.gitea_tokens_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.gitea_tokens_id_seq OWNER TO accusys; + +-- +-- Name: gitea_tokens_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.gitea_tokens_id_seq OWNED BY public.gitea_tokens.id; + + +-- +-- Name: identities; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.identities ( + id integer NOT NULL, + name character varying(255) NOT NULL, + metadata jsonb DEFAULT '{}'::jsonb, + uuid uuid DEFAULT gen_random_uuid(), + identity_type character varying(30) DEFAULT 'people'::character varying, + source character varying(20) DEFAULT 'manual'::character varying, + status character varying(20) DEFAULT 'pending'::character varying, + voice_embedding public.vector(192), + identity_embedding public.vector(768), + reference_data jsonb DEFAULT '{}'::jsonb, + created_at timestamp with time zone DEFAULT now(), + updated_at timestamp with time zone DEFAULT now(), + tmdb_id integer, + tmdb_profile text, + face_embedding public.vector(512), + CONSTRAINT identities_identity_type_check CHECK (((identity_type)::text = ANY ((ARRAY['people'::character varying, 'brand'::character varying, 'object'::character varying, 'concept'::character varying, 'logo'::character varying, 'symbol'::character varying, 'scene'::character varying, 'sound'::character varying, 'animal'::character varying, 'environmental'::character varying])::text[]))) +); + + +ALTER TABLE public.identities OWNER TO accusys; + +-- +-- Name: COLUMN identities.identity_type; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.identities.identity_type IS 'Identity type: people, brand, object, concept, logo, symbol, scene, sound, animal, environmental'; + + +-- +-- Name: COLUMN identities.source; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.identities.source IS 'Identity source: manual, tmdb, agent_suggested, ai_detection'; + + +-- +-- Name: COLUMN identities.status; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.identities.status IS 'Identity status: pending, confirmed, skipped'; + + +-- +-- Name: COLUMN identities.voice_embedding; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.identities.voice_embedding IS 'ECAPA-TDNN 192-dim voice embedding'; + + +-- +-- Name: COLUMN identities.identity_embedding; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.identities.identity_embedding IS 'CLIP ViT-L/14 768-dim embedding for logo/symbol/object identity'; + + +-- +-- Name: COLUMN identities.reference_data; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.identities.reference_data IS 'JSONB: {face_embeddings[], voice_embeddings[], identity_embeddings[], sound_embeddings[], image_urls[]}'; + + +-- +-- Name: COLUMN identities.tmdb_id; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.identities.tmdb_id IS 'TMDB person ID'; + + +-- +-- Name: COLUMN identities.tmdb_profile; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.identities.tmdb_profile IS 'TMDB profile image URL'; + + +-- +-- Name: COLUMN identities.face_embedding; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.identities.face_embedding IS 'InsightFace ArcFace 512-dim embedding'; + + +-- +-- Name: identities_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.identities_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.identities_id_seq OWNER TO accusys; + +-- +-- Name: identities_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.identities_id_seq OWNED BY public.identities.id; + + +-- +-- Name: identity_bindings; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.identity_bindings ( + id integer NOT NULL, + identity_id integer, + uuid text NOT NULL, + binding_type character varying(32) NOT NULL, + binding_value character varying(64) NOT NULL +); + + +ALTER TABLE public.identity_bindings OWNER TO accusys; + +-- +-- Name: identity_bindings_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.identity_bindings_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.identity_bindings_id_seq OWNER TO accusys; + +-- +-- Name: identity_bindings_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.identity_bindings_id_seq OWNED BY public.identity_bindings.id; + + +-- +-- Name: merge_history; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.merge_history ( + id integer NOT NULL, + merge_id uuid DEFAULT gen_random_uuid() NOT NULL, + target_person_id character varying(255) NOT NULL, + source_person_ids text[] NOT NULL, + original_target_stats jsonb NOT NULL, + original_source_stats jsonb NOT NULL, + merged_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + merged_by character varying(255), + is_undone boolean DEFAULT false, + undone_at timestamp with time zone, + metadata jsonb DEFAULT '{}'::jsonb +); + + +ALTER TABLE public.merge_history OWNER TO accusys; + +-- +-- Name: TABLE merge_history; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON TABLE public.merge_history IS 'Tracks person merges for undo capability'; + + +-- +-- Name: merge_history_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.merge_history_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.merge_history_id_seq OWNER TO accusys; + +-- +-- Name: merge_history_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.merge_history_id_seq OWNED BY public.merge_history.id; + + +-- +-- Name: monitor_anomalies; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_anomalies ( + id integer NOT NULL, + anomaly_type character varying(50), + severity character varying(20), + source_type character varying(20), + username character varying(100), + source_ip character varying(45), + description text, + details jsonb, + detected_at timestamp without time zone DEFAULT now(), + resolved boolean DEFAULT false, + resolved_at timestamp without time zone, + CONSTRAINT monitor_anomalies_anomaly_type_check CHECK (((anomaly_type)::text = ANY ((ARRAY['brute_force'::character varying, 'privilege_escalation'::character varying, 'unusual_access'::character varying, 'unusual_time'::character varying, 'excessive_queries'::character varying, 'idle_session'::character varying, 'schema_change'::character varying])::text[]))), + CONSTRAINT monitor_anomalies_severity_check CHECK (((severity)::text = ANY ((ARRAY['low'::character varying, 'medium'::character varying, 'high'::character varying, 'critical'::character varying])::text[]))) +); + + +ALTER TABLE public.monitor_anomalies OWNER TO accusys; + +-- +-- Name: monitor_anomalies_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_anomalies_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_anomalies_id_seq OWNER TO accusys; + +-- +-- Name: monitor_anomalies_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_anomalies_id_seq OWNED BY public.monitor_anomalies.id; + + +-- +-- Name: monitor_config; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_config ( + id integer NOT NULL, + config_key character varying(50) NOT NULL, + config_value text, + description character varying(255), + updated_at timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.monitor_config OWNER TO accusys; + +-- +-- Name: monitor_config_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_config_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_config_id_seq OWNER TO accusys; + +-- +-- Name: monitor_config_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_config_id_seq OWNED BY public.monitor_config.id; + + +-- +-- Name: monitor_databases; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_databases ( + id integer NOT NULL, + db_type character varying(20) NOT NULL, + db_name character varying(50), + metric_name character varying(50) NOT NULL, + metric_value jsonb, + checked_at timestamp without time zone DEFAULT now(), + CONSTRAINT monitor_databases_db_type_check CHECK (((db_type)::text = ANY ((ARRAY['postgresql'::character varying, 'redis'::character varying, 'qdrant'::character varying, 'mariadb'::character varying, 'mongodb'::character varying])::text[]))) +); + + +ALTER TABLE public.monitor_databases OWNER TO accusys; + +-- +-- Name: monitor_databases_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_databases_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_databases_id_seq OWNER TO accusys; + +-- +-- Name: monitor_databases_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_databases_id_seq OWNED BY public.monitor_databases.id; + + +-- +-- Name: monitor_external; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_external ( + id integer NOT NULL, + target_name character varying(50) NOT NULL, + target_type character varying(20), + target_host character varying(255), + is_reachable boolean, + response_time_ms integer, + dns_resolved_ip character varying(45), + error_message text, + checked_at timestamp without time zone DEFAULT now(), + CONSTRAINT monitor_external_target_type_check CHECK (((target_type)::text = ANY ((ARRAY['ddns'::character varying, 'gateway'::character varying, 'internet'::character varying, 'api'::character varying])::text[]))) +); + + +ALTER TABLE public.monitor_external OWNER TO accusys; + +-- +-- Name: monitor_external_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_external_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_external_id_seq OWNER TO accusys; + +-- +-- Name: monitor_external_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_external_id_seq OWNED BY public.monitor_external.id; + + +-- +-- Name: monitor_jobs; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_jobs ( + id integer NOT NULL, + uuid character varying(16) NOT NULL, + video_path character varying(512), + status character varying(20) DEFAULT 'pending'::character varying NOT NULL, + current_processor character varying(20), + progress_total integer DEFAULT 0, + progress_current integer DEFAULT 0, + error_count integer DEFAULT 0, + last_error text, + started_at timestamp without time zone, + updated_at timestamp without time zone, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + video_id bigint, + user_id bigint, + processors character varying(20)[] DEFAULT '{asr,cut,yolo,ocr,face,pose,asrx,caption,story}'::character varying[], + completed_processors character varying(20)[] DEFAULT '{}'::character varying[], + failed_processors character varying(20)[] DEFAULT '{}'::character varying[], + CONSTRAINT chk_monitor_jobs_status CHECK (((status)::text = ANY ((ARRAY['pending'::character varying, 'running'::character varying, 'completed'::character varying, 'failed'::character varying, 'cancelled'::character varying])::text[]))) +); + + +ALTER TABLE public.monitor_jobs OWNER TO accusys; + +-- +-- Name: COLUMN monitor_jobs.video_id; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.monitor_jobs.video_id IS 'Foreign key to videos.id'; + + +-- +-- Name: COLUMN monitor_jobs.user_id; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.monitor_jobs.user_id IS 'WordPress user ID'; + + +-- +-- Name: COLUMN monitor_jobs.processors; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.monitor_jobs.processors IS 'Processors to run: asr, cut, yolo, ocr, face, pose, asrx'; + + +-- +-- Name: COLUMN monitor_jobs.completed_processors; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.monitor_jobs.completed_processors IS 'Successfully completed processors'; + + +-- +-- Name: COLUMN monitor_jobs.failed_processors; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.monitor_jobs.failed_processors IS 'Failed processors'; + + +-- +-- Name: monitor_jobs_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_jobs_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_jobs_id_seq OWNER TO accusys; + +-- +-- Name: monitor_jobs_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_jobs_id_seq OWNED BY public.monitor_jobs.id; + + +-- +-- Name: monitor_logins; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_logins ( + id integer NOT NULL, + user_type character varying(20), + username character varying(100), + source_ip character varying(45), + user_agent text, + login_method character varying(20), + success boolean, + failure_reason character varying(200), + login_at timestamp without time zone DEFAULT now(), + CONSTRAINT monitor_logins_user_type_check CHECK (((user_type)::text = ANY ((ARRAY['system'::character varying, 'wordpress'::character varying, 'n8n'::character varying, 'gitea'::character varying, 'sftpgo'::character varying, 'database'::character varying])::text[]))) +); + + +ALTER TABLE public.monitor_logins OWNER TO accusys; + +-- +-- Name: monitor_logins_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_logins_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_logins_id_seq OWNER TO accusys; + +-- +-- Name: monitor_logins_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_logins_id_seq OWNED BY public.monitor_logins.id; + + +-- +-- Name: monitor_pg_schema_changes; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_pg_schema_changes ( + id integer NOT NULL, + database_name character varying(50), + schema_name character varying(50), + table_name character varying(100), + change_type character varying(20), + column_name character varying(100), + old_value text, + new_value text, + detected_at timestamp without time zone DEFAULT now(), + CONSTRAINT monitor_pg_schema_changes_change_type_check CHECK (((change_type)::text = ANY ((ARRAY['table_created'::character varying, 'table_dropped'::character varying, 'column_added'::character varying, 'column_removed'::character varying, 'column_type_changed'::character varying])::text[]))) +); + + +ALTER TABLE public.monitor_pg_schema_changes OWNER TO accusys; + +-- +-- Name: monitor_pg_schema_changes_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_pg_schema_changes_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_pg_schema_changes_id_seq OWNER TO accusys; + +-- +-- Name: monitor_pg_schema_changes_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_pg_schema_changes_id_seq OWNED BY public.monitor_pg_schema_changes.id; + + +-- +-- Name: monitor_pg_tables; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_pg_tables ( + id integer NOT NULL, + database_name character varying(50), + schema_name character varying(50), + table_name character varying(100), + table_type character varying(20), + row_count bigint, + table_size_bytes bigint, + index_size_bytes bigint, + snapshot_at timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.monitor_pg_tables OWNER TO accusys; + +-- +-- Name: monitor_pg_tables_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_pg_tables_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_pg_tables_id_seq OWNER TO accusys; + +-- +-- Name: monitor_pg_tables_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_pg_tables_id_seq OWNED BY public.monitor_pg_tables.id; + + +-- +-- Name: monitor_portal_pages; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_portal_pages ( + id integer NOT NULL, + page_url character varying(500) NOT NULL, + page_type character varying(20), + is_accessible boolean, + response_time_ms integer, + http_status integer, + error_message text, + checked_at timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.monitor_portal_pages OWNER TO accusys; + +-- +-- Name: monitor_portal_pages_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_portal_pages_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_portal_pages_id_seq OWNER TO accusys; + +-- +-- Name: monitor_portal_pages_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_portal_pages_id_seq OWNED BY public.monitor_portal_pages.id; + + +-- +-- Name: monitor_portal_users; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_portal_users ( + id integer NOT NULL, + user_id bigint, + username character varying(100), + email character varying(255), + role character varying(50), + is_active boolean, + last_login timestamp without time zone, + created_at timestamp without time zone, + detected_at timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.monitor_portal_users OWNER TO accusys; + +-- +-- Name: monitor_portal_users_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_portal_users_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_portal_users_id_seq OWNER TO accusys; + +-- +-- Name: monitor_portal_users_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_portal_users_id_seq OWNED BY public.monitor_portal_users.id; + + +-- +-- Name: monitor_qdrant_collections; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_qdrant_collections ( + id integer NOT NULL, + collection_name character varying(100), + vectors_count bigint, + points_count bigint, + disk_size_bytes bigint, + status character varying(20), + snapshot_at timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.monitor_qdrant_collections OWNER TO accusys; + +-- +-- Name: monitor_qdrant_collections_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_qdrant_collections_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_qdrant_collections_id_seq OWNER TO accusys; + +-- +-- Name: monitor_qdrant_collections_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_qdrant_collections_id_seq OWNED BY public.monitor_qdrant_collections.id; + + +-- +-- Name: monitor_resource_usage; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_resource_usage ( + id integer NOT NULL, + user_type character varying(20), + username character varying(100), + service_name character varying(50), + cpu_percent numeric(5,2), + memory_mb integer, + disk_io_read_mb bigint, + disk_io_write_mb bigint, + network_rx_mb bigint, + network_tx_mb bigint, + recorded_at timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.monitor_resource_usage OWNER TO accusys; + +-- +-- Name: monitor_resource_usage_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_resource_usage_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_resource_usage_id_seq OWNER TO accusys; + +-- +-- Name: monitor_resource_usage_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_resource_usage_id_seq OWNED BY public.monitor_resource_usage.id; + + +-- +-- Name: monitor_services; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_services ( + id integer NOT NULL, + service_name character varying(50) NOT NULL, + service_type character varying(20), + port integer, + status character varying(20), + response_time_ms integer, + error_message text, + checked_at timestamp without time zone DEFAULT now(), + CONSTRAINT monitor_services_status_check CHECK (((status)::text = ANY ((ARRAY['up'::character varying, 'down'::character varying, 'degraded'::character varying, 'unknown'::character varying])::text[]))) +); + + +ALTER TABLE public.monitor_services OWNER TO accusys; + +-- +-- Name: monitor_services_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_services_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_services_id_seq OWNER TO accusys; + +-- +-- Name: monitor_services_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_services_id_seq OWNED BY public.monitor_services.id; + + +-- +-- Name: monitor_sessions; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_sessions ( + id integer NOT NULL, + session_type character varying(20), + service_name character varying(50), + username character varying(100), + source_ip character varying(45), + source_port integer, + connected_at timestamp without time zone, + last_activity_at timestamp without time zone, + disconnected_at timestamp without time zone, + bytes_sent bigint, + bytes_received bigint, + status character varying(20), + CONSTRAINT monitor_sessions_session_type_check CHECK (((session_type)::text = ANY ((ARRAY['ssh'::character varying, 'web'::character varying, 'db'::character varying, 'sftp'::character varying, 'rdp'::character varying])::text[]))), + CONSTRAINT monitor_sessions_status_check CHECK (((status)::text = ANY ((ARRAY['active'::character varying, 'disconnected'::character varying, 'timeout'::character varying])::text[]))) +); + + +ALTER TABLE public.monitor_sessions OWNER TO accusys; + +-- +-- Name: monitor_sessions_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_sessions_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_sessions_id_seq OWNER TO accusys; + +-- +-- Name: monitor_sessions_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_sessions_id_seq OWNED BY public.monitor_sessions.id; + + +-- +-- Name: monitor_sudo_history; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_sudo_history ( + id integer NOT NULL, + username character varying(100), + command text, + run_as character varying(100), + tty character varying(50), + source_ip character varying(45), + exit_code integer, + executed_at timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.monitor_sudo_history OWNER TO accusys; + +-- +-- Name: monitor_sudo_history_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_sudo_history_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_sudo_history_id_seq OWNER TO accusys; + +-- +-- Name: monitor_sudo_history_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_sudo_history_id_seq OWNED BY public.monitor_sudo_history.id; + + +-- +-- Name: monitor_workflows; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.monitor_workflows ( + id integer NOT NULL, + workflow_id character varying(50) NOT NULL, + workflow_name character varying(255), + workflow_type character varying(50), + is_active boolean DEFAULT false, + last_executed_at timestamp without time zone, + execution_count integer DEFAULT 0, + success_count integer DEFAULT 0, + failure_count integer DEFAULT 0, + avg_duration_ms integer, + has_schedule boolean DEFAULT false, + has_webhook boolean DEFAULT false, + idle_days integer, + suggestion character varying(100), + checked_at timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.monitor_workflows OWNER TO accusys; + +-- +-- Name: monitor_workflows_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.monitor_workflows_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.monitor_workflows_id_seq OWNER TO accusys; + +-- +-- Name: monitor_workflows_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.monitor_workflows_id_seq OWNED BY public.monitor_workflows.id; + + +-- +-- Name: n8n_api_keys; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.n8n_api_keys ( + id integer NOT NULL, + n8n_key_id character varying(64) NOT NULL, + label character varying(100) NOT NULL, + api_key_last_eight character varying(8) NOT NULL, + momentry_api_key_id character varying(48), + expires_at timestamp with time zone, + last_verified timestamp with time zone, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP +); + + +ALTER TABLE public.n8n_api_keys OWNER TO accusys; + +-- +-- Name: n8n_api_keys_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.n8n_api_keys_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.n8n_api_keys_id_seq OWNER TO accusys; + +-- +-- Name: n8n_api_keys_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.n8n_api_keys_id_seq OWNED BY public.n8n_api_keys.id; + + +-- +-- Name: node_process_tracking; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.node_process_tracking ( + id integer NOT NULL, + process_name character varying(100) NOT NULL, + pid integer, + command character varying(500), + node_version character varying(20), + is_managed boolean DEFAULT false, + started_at timestamp without time zone, + checked_at timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.node_process_tracking OWNER TO accusys; + +-- +-- Name: node_process_tracking_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.node_process_tracking_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.node_process_tracking_id_seq OWNER TO accusys; + +-- +-- Name: node_process_tracking_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.node_process_tracking_id_seq OWNED BY public.node_process_tracking.id; + + +-- +-- Name: node_version_baseline; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.node_version_baseline ( + id integer NOT NULL, + runtime_name character varying(50) NOT NULL, + required_version character varying(20) NOT NULL, + current_version character varying(20), + process_name character varying(100), + process_path text, + is_compliant boolean, + locked_path character varying(500), + checked_at timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.node_version_baseline OWNER TO accusys; + +-- +-- Name: node_version_baseline_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.node_version_baseline_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.node_version_baseline_id_seq OWNER TO accusys; + +-- +-- Name: node_version_baseline_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.node_version_baseline_id_seq OWNED BY public.node_version_baseline.id; + + +-- +-- Name: parent_chunks; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.parent_chunks ( + id integer NOT NULL, + uuid text NOT NULL, + scene_order integer, + start_time double precision NOT NULL, + end_time double precision NOT NULL, + summary_text text, + summary_vector public.vector(768), + metadata jsonb, + created_at timestamp with time zone DEFAULT now(), + rule_3_markers jsonb DEFAULT '{}'::jsonb, + start_frame bigint, + end_frame bigint, + fps double precision DEFAULT 24.0 +); + + +ALTER TABLE public.parent_chunks OWNER TO accusys; + +-- +-- Name: parent_chunks_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.parent_chunks_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.parent_chunks_id_seq OWNER TO accusys; + +-- +-- Name: parent_chunks_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.parent_chunks_id_seq OWNED BY public.parent_chunks.id; + + +-- +-- Name: parent_chunks_poc; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.parent_chunks_poc ( + id integer NOT NULL, + uuid text NOT NULL, + scene_order integer, + start_time double precision NOT NULL, + end_time double precision NOT NULL, + summary_text text, + summary_vector public.vector(768), + metadata jsonb, + created_at timestamp with time zone DEFAULT now() +); + + +ALTER TABLE public.parent_chunks_poc OWNER TO accusys; + +-- +-- Name: parent_chunks_poc_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.parent_chunks_poc_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.parent_chunks_poc_id_seq OWNER TO accusys; + +-- +-- Name: parent_chunks_poc_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.parent_chunks_poc_id_seq OWNED BY public.parent_chunks_poc.id; + + +-- +-- Name: person_appearances; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.person_appearances ( + id integer NOT NULL, + person_id character varying(255) NOT NULL, + video_uuid character varying(255) NOT NULL, + start_time double precision NOT NULL, + end_time double precision NOT NULL, + duration double precision NOT NULL, + face_detection_id integer, + asrx_segment_start double precision, + asrx_segment_end double precision, + confidence double precision DEFAULT 0.0, + metadata jsonb DEFAULT '{}'::jsonb, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + CONSTRAINT person_appearances_confidence_check CHECK (((confidence >= (0.0)::double precision) AND (confidence <= (1.0)::double precision))), + CONSTRAINT person_appearances_duration_check CHECK ((duration > (0)::double precision)), + CONSTRAINT person_appearances_end_time_check CHECK ((end_time >= (0)::double precision)), + CONSTRAINT person_appearances_start_time_check CHECK ((start_time >= (0)::double precision)), + CONSTRAINT valid_appearance_time CHECK ((end_time > start_time)), + CONSTRAINT valid_duration CHECK (((end_time - start_time) = duration)) +); + + +ALTER TABLE public.person_appearances OWNER TO accusys; + +-- +-- Name: TABLE person_appearances; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON TABLE public.person_appearances IS 'Stores individual person appearance records with time ranges'; + + +-- +-- Name: person_appearances_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.person_appearances_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.person_appearances_id_seq OWNER TO accusys; + +-- +-- Name: person_appearances_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.person_appearances_id_seq OWNED BY public.person_appearances.id; + + +-- +-- Name: person_identities; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.person_identities ( + id integer NOT NULL, + person_id character varying(255) NOT NULL, + face_identity_id integer, + speaker_id character varying(64), + video_uuid character varying(255) NOT NULL, + confidence double precision DEFAULT 0.0, + name character varying(255), + metadata jsonb DEFAULT '{}'::jsonb, + first_appearance_time double precision, + last_appearance_time double precision, + total_appearance_duration double precision DEFAULT 0.0, + appearance_count integer DEFAULT 0, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + is_confirmed boolean DEFAULT false, + aliases jsonb DEFAULT '[]'::jsonb, + original_name character varying(255), + character_name character varying(255), + age integer, + gender character varying(20), + CONSTRAINT person_identities_confidence_check CHECK (((confidence >= (0.0)::double precision) AND (confidence <= (1.0)::double precision))), + CONSTRAINT valid_time_range CHECK (((first_appearance_time IS NULL) OR (last_appearance_time IS NULL) OR (last_appearance_time >= first_appearance_time))) +); + + +ALTER TABLE public.person_identities OWNER TO accusys; + +-- +-- Name: TABLE person_identities; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON TABLE public.person_identities IS 'Stores person identity associations linking face and speaker identities'; + + +-- +-- Name: person_identities_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.person_identities_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.person_identities_id_seq OWNER TO accusys; + +-- +-- Name: person_identities_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.person_identities_id_seq OWNED BY public.person_identities.id; + + +-- +-- Name: processor_results; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.processor_results ( + id integer NOT NULL, + job_id integer, + video_id bigint, + processor character varying(20) NOT NULL, + status character varying(20) DEFAULT 'pending'::character varying NOT NULL, + output_path text, + started_at timestamp without time zone, + completed_at timestamp without time zone, + error_message text, + progress_total integer DEFAULT 0, + progress_current integer DEFAULT 0, + last_checkpoint jsonb, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + output_data jsonb, + duration_secs double precision GENERATED ALWAYS AS ( +CASE + WHEN ((completed_at IS NOT NULL) AND (started_at IS NOT NULL)) THEN EXTRACT(epoch FROM (completed_at - started_at)) + ELSE NULL::numeric +END) STORED, + processor_version character varying(50), + model_name character varying(100), + model_version character varying(50), + contract_version character varying(20), + CONSTRAINT chk_processor_results_processor CHECK (((processor)::text = ANY ((ARRAY['asr'::character varying, 'cut'::character varying, 'yolo'::character varying, 'ocr'::character varying, 'face'::character varying, 'pose'::character varying, 'asrx'::character varying])::text[]))), + CONSTRAINT chk_processor_results_status CHECK (((status)::text = ANY ((ARRAY['pending'::character varying, 'running'::character varying, 'completed'::character varying, 'failed'::character varying, 'skipped'::character varying])::text[]))) +); + + +ALTER TABLE public.processor_results OWNER TO accusys; + +-- +-- Name: TABLE processor_results; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON TABLE public.processor_results IS 'Tracks individual processor execution status'; + + +-- +-- Name: COLUMN processor_results.status; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.processor_results.status IS 'pending, running, completed, failed, skipped'; + + +-- +-- Name: COLUMN processor_results.output_data; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.processor_results.output_data IS 'JSON output from processor execution'; + + +-- +-- Name: COLUMN processor_results.duration_secs; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.processor_results.duration_secs IS 'Computed duration in seconds (completed - started) as double precision'; + + +-- +-- Name: processor_results_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.processor_results_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.processor_results_id_seq OWNER TO accusys; + +-- +-- Name: processor_results_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.processor_results_id_seq OWNED BY public.processor_results.id; + + +-- +-- Name: python_script_tracking; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.python_script_tracking ( + id integer NOT NULL, + script_path text NOT NULL, + shebang_version character varying(20), + actual_version character varying(20), + is_compliant boolean DEFAULT false, + last_run_at timestamp without time zone, + exit_code integer, + error_output text, + checked_at timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.python_script_tracking OWNER TO accusys; + +-- +-- Name: python_script_tracking_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.python_script_tracking_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.python_script_tracking_id_seq OWNER TO accusys; + +-- +-- Name: python_script_tracking_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.python_script_tracking_id_seq OWNED BY public.python_script_tracking.id; + + +-- +-- Name: python_version_baseline; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.python_version_baseline ( + id integer NOT NULL, + runtime_name character varying(50) NOT NULL, + required_version character varying(20) NOT NULL, + current_version character varying(20), + interpreter_path character varying(500), + is_compliant boolean, + checked_at timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.python_version_baseline OWNER TO accusys; + +-- +-- Name: python_version_baseline_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.python_version_baseline_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.python_version_baseline_id_seq OWNER TO accusys; + +-- +-- Name: python_version_baseline_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.python_version_baseline_id_seq OWNED BY public.python_version_baseline.id; + + +-- +-- Name: storage_access_logs; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.storage_access_logs ( + id integer NOT NULL, + user_cluster character varying(50), + owner_id character varying(100), + file_path text, + access_type character varying(20), + access_time timestamp without time zone DEFAULT now(), + client_ip character varying(45), + access_method character varying(20), + CONSTRAINT storage_access_logs_access_type_check CHECK (((access_type)::text = ANY ((ARRAY['read'::character varying, 'write'::character varying, 'delete'::character varying, 'download'::character varying, 'move'::character varying])::text[]))) +); + + +ALTER TABLE public.storage_access_logs OWNER TO accusys; + +-- +-- Name: storage_access_logs_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.storage_access_logs_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.storage_access_logs_id_seq OWNER TO accusys; + +-- +-- Name: storage_access_logs_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.storage_access_logs_id_seq OWNED BY public.storage_access_logs.id; + + +-- +-- Name: storage_usage_stats; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.storage_usage_stats ( + id integer NOT NULL, + user_cluster character varying(50), + storage_tier character varying(20), + file_count bigint, + total_size_bytes bigint, + record_time timestamp without time zone DEFAULT now() +); + + +ALTER TABLE public.storage_usage_stats OWNER TO accusys; + +-- +-- Name: storage_usage_stats_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.storage_usage_stats_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.storage_usage_stats_id_seq OWNER TO accusys; + +-- +-- Name: storage_usage_stats_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.storage_usage_stats_id_seq OWNED BY public.storage_usage_stats.id; + + +-- +-- Name: v_idle_workflows; Type: VIEW; Schema: public; Owner: accusys +-- + +CREATE VIEW public.v_idle_workflows AS + SELECT workflow_name, + idle_days, + suggestion, + last_executed_at + FROM public.monitor_workflows + WHERE ((idle_days > 30) AND (is_active = true)) + ORDER BY idle_days DESC; + + +ALTER VIEW public.v_idle_workflows OWNER TO accusys; + +-- +-- Name: v_recent_anomalies; Type: VIEW; Schema: public; Owner: accusys +-- + +CREATE VIEW public.v_recent_anomalies AS + SELECT anomaly_type, + severity, + username, + source_ip, + description, + detected_at + FROM public.monitor_anomalies + WHERE (detected_at > (now() - '24:00:00'::interval)) + ORDER BY detected_at DESC; + + +ALTER VIEW public.v_recent_anomalies OWNER TO accusys; + +-- +-- Name: v_service_health; Type: VIEW; Schema: public; Owner: accusys +-- + +CREATE VIEW public.v_service_health AS + SELECT service_name, + status, + count(*) AS check_count, + count(*) FILTER (WHERE ((status)::text = 'up'::text)) AS up_count, + count(*) FILTER (WHERE ((status)::text = 'down'::text)) AS down_count, + avg(response_time_ms) AS avg_response_time, + max(checked_at) AS last_check + FROM public.monitor_services + WHERE (checked_at > (now() - '24:00:00'::interval)) + GROUP BY service_name, status; + + +ALTER VIEW public.v_service_health OWNER TO accusys; + +-- +-- Name: v_storage_overview; Type: VIEW; Schema: public; Owner: accusys +-- + +CREATE VIEW public.v_storage_overview AS + SELECT user_cluster, + storage_tier, + count(*) AS file_count, + sum(file_size) AS total_size + FROM public.file_registry + WHERE ((status)::text = 'active'::text) + GROUP BY user_cluster, storage_tier; + + +ALTER VIEW public.v_storage_overview OWNER TO accusys; + +-- +-- Name: video_events; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.video_events ( + id integer NOT NULL, + uuid text NOT NULL, + start_time double precision NOT NULL, + end_time double precision NOT NULL, + event_type text NOT NULL, + confidence double precision DEFAULT 0.0, + metadata jsonb, + created_at timestamp with time zone DEFAULT now() +); + + +ALTER TABLE public.video_events OWNER TO accusys; + +-- +-- Name: video_events_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.video_events_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.video_events_id_seq OWNER TO accusys; + +-- +-- Name: video_events_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.video_events_id_seq OWNED BY public.video_events.id; + + +-- +-- Name: video_identities; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.video_identities ( + id integer NOT NULL, + uuid text NOT NULL, + name text NOT NULL, + metadata jsonb DEFAULT '{}'::jsonb, + created_at timestamp with time zone DEFAULT now() +); + + +ALTER TABLE public.video_identities OWNER TO accusys; + +-- +-- Name: video_identities_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.video_identities_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.video_identities_id_seq OWNER TO accusys; + +-- +-- Name: video_identities_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.video_identities_id_seq OWNED BY public.video_identities.id; + + +-- +-- Name: videos; Type: TABLE; Schema: public; Owner: accusys +-- + +CREATE TABLE public.videos ( + id integer NOT NULL, + file_uuid character varying(32) CONSTRAINT videos_uuid_not_null NOT NULL, + file_path text NOT NULL, + file_name text NOT NULL, + duration double precision, + width integer, + height integer, + fps double precision, + probe_json jsonb, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + fs_video boolean DEFAULT false, + fs_json boolean DEFAULT false, + psql_chunk boolean DEFAULT false, + pobject_chunk boolean DEFAULT false, + mobject_chunk boolean DEFAULT false, + pvector_chunk boolean DEFAULT false, + qvector_chunk boolean DEFAULT false, + status character varying(20) DEFAULT 'pending'::character varying, + user_id bigint, + job_id integer, + registered_at timestamp without time zone, + registration_time timestamp with time zone, + total_frames bigint DEFAULT 0, + processing_status jsonb DEFAULT '{}'::jsonb, + file_type character varying(20), + birth_registration jsonb DEFAULT '{}'::jsonb, + CONSTRAINT chk_videos_status CHECK (((status)::text = ANY ((ARRAY['pending'::character varying, 'processing'::character varying, 'completed'::character varying, 'failed'::character varying])::text[]))) +); + + +ALTER TABLE public.videos OWNER TO accusys; + +-- +-- Name: COLUMN videos.status; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.videos.status IS 'Video processing status: pending, processing, completed, failed'; + + +-- +-- Name: COLUMN videos.user_id; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.videos.user_id IS 'WordPress user ID (for user association tracking)'; + + +-- +-- Name: COLUMN videos.job_id; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.videos.job_id IS 'Associated monitor_jobs ID'; + + +-- +-- Name: COLUMN videos.processing_status; Type: COMMENT; Schema: public; Owner: accusys +-- + +COMMENT ON COLUMN public.videos.processing_status IS 'Processing progress JSON: {"active_processors": [...], "progress": {...}}'; + + +-- +-- Name: videos_id_seq; Type: SEQUENCE; Schema: public; Owner: accusys +-- + +CREATE SEQUENCE public.videos_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +ALTER SEQUENCE public.videos_id_seq OWNER TO accusys; + +-- +-- Name: videos_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: accusys +-- + +ALTER SEQUENCE public.videos_id_seq OWNED BY public.videos.id; + + +-- +-- Name: api_key_anomalies id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.api_key_anomalies ALTER COLUMN id SET DEFAULT nextval('public.api_key_anomalies_id_seq'::regclass); + + +-- +-- Name: api_key_audit_log id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.api_key_audit_log ALTER COLUMN id SET DEFAULT nextval('public.api_key_audit_log_id_seq'::regclass); + + +-- +-- Name: api_keys id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.api_keys ALTER COLUMN id SET DEFAULT nextval('public.api_keys_id_seq'::regclass); + + +-- +-- Name: backup_history id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.backup_history ALTER COLUMN id SET DEFAULT nextval('public.backup_history_id_seq'::regclass); + + +-- +-- Name: backup_registry id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.backup_registry ALTER COLUMN id SET DEFAULT nextval('public.backup_registry_id_seq'::regclass); + + +-- +-- Name: backup_storage_stats id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.backup_storage_stats ALTER COLUMN id SET DEFAULT nextval('public.backup_storage_stats_id_seq'::regclass); + + +-- +-- Name: castings id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.castings ALTER COLUMN id SET DEFAULT nextval('public.castings_id_seq'::regclass); + + +-- +-- Name: characters id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.characters ALTER COLUMN id SET DEFAULT nextval('public.characters_id_seq'::regclass); + + +-- +-- Name: child_chunks id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.child_chunks ALTER COLUMN id SET DEFAULT nextval('public.child_chunks_id_seq'::regclass); + + +-- +-- Name: chunk_vectors id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.chunk_vectors ALTER COLUMN id SET DEFAULT nextval('public.chunk_vectors_id_seq'::regclass); + + +-- +-- Name: chunks id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.chunks ALTER COLUMN id SET DEFAULT nextval('public.chunks_id_seq'::regclass); + + +-- +-- Name: face_clusters id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.face_clusters ALTER COLUMN id SET DEFAULT nextval('public.face_clusters_id_seq'::regclass); + + +-- +-- Name: face_detections id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.face_detections ALTER COLUMN id SET DEFAULT nextval('public.face_detections_id_seq'::regclass); + + +-- +-- Name: face_identities id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.face_identities ALTER COLUMN id SET DEFAULT nextval('public.face_identities_id_seq'::regclass); + + +-- +-- Name: face_recognition_results id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.face_recognition_results ALTER COLUMN id SET DEFAULT nextval('public.face_recognition_results_id_seq'::regclass); + + +-- +-- Name: file_lifecycle id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.file_lifecycle ALTER COLUMN id SET DEFAULT nextval('public.file_lifecycle_id_seq'::regclass); + + +-- +-- Name: frames id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.frames ALTER COLUMN id SET DEFAULT nextval('public.frames_id_seq'::regclass); + + +-- +-- Name: gitea_tokens id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.gitea_tokens ALTER COLUMN id SET DEFAULT nextval('public.gitea_tokens_id_seq'::regclass); + + +-- +-- Name: identities id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.identities ALTER COLUMN id SET DEFAULT nextval('public.identities_id_seq'::regclass); + + +-- +-- Name: identity_bindings id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.identity_bindings ALTER COLUMN id SET DEFAULT nextval('public.identity_bindings_id_seq'::regclass); + + +-- +-- Name: merge_history id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.merge_history ALTER COLUMN id SET DEFAULT nextval('public.merge_history_id_seq'::regclass); + + +-- +-- Name: monitor_anomalies id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_anomalies ALTER COLUMN id SET DEFAULT nextval('public.monitor_anomalies_id_seq'::regclass); + + +-- +-- Name: monitor_config id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_config ALTER COLUMN id SET DEFAULT nextval('public.monitor_config_id_seq'::regclass); + + +-- +-- Name: monitor_databases id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_databases ALTER COLUMN id SET DEFAULT nextval('public.monitor_databases_id_seq'::regclass); + + +-- +-- Name: monitor_external id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_external ALTER COLUMN id SET DEFAULT nextval('public.monitor_external_id_seq'::regclass); + + +-- +-- Name: monitor_jobs id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_jobs ALTER COLUMN id SET DEFAULT nextval('public.monitor_jobs_id_seq'::regclass); + + +-- +-- Name: monitor_logins id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_logins ALTER COLUMN id SET DEFAULT nextval('public.monitor_logins_id_seq'::regclass); + + +-- +-- Name: monitor_pg_schema_changes id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_pg_schema_changes ALTER COLUMN id SET DEFAULT nextval('public.monitor_pg_schema_changes_id_seq'::regclass); + + +-- +-- Name: monitor_pg_tables id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_pg_tables ALTER COLUMN id SET DEFAULT nextval('public.monitor_pg_tables_id_seq'::regclass); + + +-- +-- Name: monitor_portal_pages id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_portal_pages ALTER COLUMN id SET DEFAULT nextval('public.monitor_portal_pages_id_seq'::regclass); + + +-- +-- Name: monitor_portal_users id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_portal_users ALTER COLUMN id SET DEFAULT nextval('public.monitor_portal_users_id_seq'::regclass); + + +-- +-- Name: monitor_qdrant_collections id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_qdrant_collections ALTER COLUMN id SET DEFAULT nextval('public.monitor_qdrant_collections_id_seq'::regclass); + + +-- +-- Name: monitor_resource_usage id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_resource_usage ALTER COLUMN id SET DEFAULT nextval('public.monitor_resource_usage_id_seq'::regclass); + + +-- +-- Name: monitor_services id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_services ALTER COLUMN id SET DEFAULT nextval('public.monitor_services_id_seq'::regclass); + + +-- +-- Name: monitor_sessions id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_sessions ALTER COLUMN id SET DEFAULT nextval('public.monitor_sessions_id_seq'::regclass); + + +-- +-- Name: monitor_sudo_history id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_sudo_history ALTER COLUMN id SET DEFAULT nextval('public.monitor_sudo_history_id_seq'::regclass); + + +-- +-- Name: monitor_workflows id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_workflows ALTER COLUMN id SET DEFAULT nextval('public.monitor_workflows_id_seq'::regclass); + + +-- +-- Name: n8n_api_keys id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.n8n_api_keys ALTER COLUMN id SET DEFAULT nextval('public.n8n_api_keys_id_seq'::regclass); + + +-- +-- Name: node_process_tracking id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.node_process_tracking ALTER COLUMN id SET DEFAULT nextval('public.node_process_tracking_id_seq'::regclass); + + +-- +-- Name: node_version_baseline id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.node_version_baseline ALTER COLUMN id SET DEFAULT nextval('public.node_version_baseline_id_seq'::regclass); + + +-- +-- Name: parent_chunks id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.parent_chunks ALTER COLUMN id SET DEFAULT nextval('public.parent_chunks_id_seq'::regclass); + + +-- +-- Name: parent_chunks_poc id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.parent_chunks_poc ALTER COLUMN id SET DEFAULT nextval('public.parent_chunks_poc_id_seq'::regclass); + + +-- +-- Name: person_appearances id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.person_appearances ALTER COLUMN id SET DEFAULT nextval('public.person_appearances_id_seq'::regclass); + + +-- +-- Name: person_identities id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.person_identities ALTER COLUMN id SET DEFAULT nextval('public.person_identities_id_seq'::regclass); + + +-- +-- Name: processor_results id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.processor_results ALTER COLUMN id SET DEFAULT nextval('public.processor_results_id_seq'::regclass); + + +-- +-- Name: python_script_tracking id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.python_script_tracking ALTER COLUMN id SET DEFAULT nextval('public.python_script_tracking_id_seq'::regclass); + + +-- +-- Name: python_version_baseline id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.python_version_baseline ALTER COLUMN id SET DEFAULT nextval('public.python_version_baseline_id_seq'::regclass); + + +-- +-- Name: storage_access_logs id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.storage_access_logs ALTER COLUMN id SET DEFAULT nextval('public.storage_access_logs_id_seq'::regclass); + + +-- +-- Name: storage_usage_stats id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.storage_usage_stats ALTER COLUMN id SET DEFAULT nextval('public.storage_usage_stats_id_seq'::regclass); + + +-- +-- Name: video_events id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.video_events ALTER COLUMN id SET DEFAULT nextval('public.video_events_id_seq'::regclass); + + +-- +-- Name: video_identities id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.video_identities ALTER COLUMN id SET DEFAULT nextval('public.video_identities_id_seq'::regclass); + + +-- +-- Name: videos id; Type: DEFAULT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.videos ALTER COLUMN id SET DEFAULT nextval('public.videos_id_seq'::regclass); + + +-- +-- Name: api_key_anomalies api_key_anomalies_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.api_key_anomalies + ADD CONSTRAINT api_key_anomalies_pkey PRIMARY KEY (id); + + +-- +-- Name: api_key_audit_log api_key_audit_log_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.api_key_audit_log + ADD CONSTRAINT api_key_audit_log_pkey PRIMARY KEY (id); + + +-- +-- Name: api_keys api_keys_key_id_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.api_keys + ADD CONSTRAINT api_keys_key_id_key UNIQUE (key_id); + + +-- +-- Name: api_keys api_keys_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.api_keys + ADD CONSTRAINT api_keys_pkey PRIMARY KEY (id); + + +-- +-- Name: backup_history backup_history_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.backup_history + ADD CONSTRAINT backup_history_pkey PRIMARY KEY (id); + + +-- +-- Name: backup_registry backup_registry_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.backup_registry + ADD CONSTRAINT backup_registry_pkey PRIMARY KEY (id); + + +-- +-- Name: backup_storage_stats backup_storage_stats_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.backup_storage_stats + ADD CONSTRAINT backup_storage_stats_pkey PRIMARY KEY (id); + + +-- +-- Name: castings castings_character_id_talent_id_track_type_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.castings + ADD CONSTRAINT castings_character_id_talent_id_track_type_key UNIQUE (character_id, talent_id, track_type); + + +-- +-- Name: castings castings_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.castings + ADD CONSTRAINT castings_pkey PRIMARY KEY (id); + + +-- +-- Name: characters characters_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.characters + ADD CONSTRAINT characters_pkey PRIMARY KEY (id); + + +-- +-- Name: characters characters_video_uuid_name_language_track_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.characters + ADD CONSTRAINT characters_video_uuid_name_language_track_key UNIQUE (video_uuid, name, language_track); + + +-- +-- Name: child_chunks child_chunks_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.child_chunks + ADD CONSTRAINT child_chunks_pkey PRIMARY KEY (id); + + +-- +-- Name: chunk_vectors chunk_vectors_chunk_id_uuid_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.chunk_vectors + ADD CONSTRAINT chunk_vectors_chunk_id_uuid_key UNIQUE (chunk_id, uuid); + + +-- +-- Name: chunk_vectors chunk_vectors_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.chunk_vectors + ADD CONSTRAINT chunk_vectors_pkey PRIMARY KEY (id); + + +-- +-- Name: chunks chunks_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.chunks + ADD CONSTRAINT chunks_pkey PRIMARY KEY (id); + + +-- +-- Name: chunks chunks_uuid_chunk_id_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.chunks + ADD CONSTRAINT chunks_uuid_chunk_id_key UNIQUE (uuid, chunk_id); + + +-- +-- Name: face_clusters face_clusters_cluster_id_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.face_clusters + ADD CONSTRAINT face_clusters_cluster_id_key UNIQUE (cluster_id); + + +-- +-- Name: face_clusters face_clusters_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.face_clusters + ADD CONSTRAINT face_clusters_pkey PRIMARY KEY (id); + + +-- +-- Name: face_detections face_detections_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.face_detections + ADD CONSTRAINT face_detections_pkey PRIMARY KEY (id); + + +-- +-- Name: face_identities face_identities_face_id_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.face_identities + ADD CONSTRAINT face_identities_face_id_key UNIQUE (face_id); + + +-- +-- Name: face_identities face_identities_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.face_identities + ADD CONSTRAINT face_identities_pkey PRIMARY KEY (id); + + +-- +-- Name: face_recognition_results face_recognition_results_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.face_recognition_results + ADD CONSTRAINT face_recognition_results_pkey PRIMARY KEY (id); + + +-- +-- Name: face_recognition_results face_recognition_results_video_uuid_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.face_recognition_results + ADD CONSTRAINT face_recognition_results_video_uuid_key UNIQUE (video_uuid); + + +-- +-- Name: file_lifecycle file_lifecycle_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.file_lifecycle + ADD CONSTRAINT file_lifecycle_pkey PRIMARY KEY (id); + + +-- +-- Name: file_registry file_registry_file_path_hash_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.file_registry + ADD CONSTRAINT file_registry_file_path_hash_key UNIQUE (file_path_hash); + + +-- +-- Name: file_registry file_registry_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.file_registry + ADD CONSTRAINT file_registry_pkey PRIMARY KEY (file_uuid); + + +-- +-- Name: frames frames_file_id_frame_number_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.frames + ADD CONSTRAINT frames_file_id_frame_number_key UNIQUE (file_id, frame_number); + + +-- +-- Name: frames frames_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.frames + ADD CONSTRAINT frames_pkey PRIMARY KEY (id); + + +-- +-- Name: gitea_tokens gitea_tokens_gitea_user_token_name_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.gitea_tokens + ADD CONSTRAINT gitea_tokens_gitea_user_token_name_key UNIQUE (gitea_user, token_name); + + +-- +-- Name: gitea_tokens gitea_tokens_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.gitea_tokens + ADD CONSTRAINT gitea_tokens_pkey PRIMARY KEY (id); + + +-- +-- Name: identities identities_name_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.identities + ADD CONSTRAINT identities_name_key UNIQUE (name); + + +-- +-- Name: identities identities_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.identities + ADD CONSTRAINT identities_pkey PRIMARY KEY (id); + + +-- +-- Name: identity_bindings identity_bindings_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.identity_bindings + ADD CONSTRAINT identity_bindings_pkey PRIMARY KEY (id); + + +-- +-- Name: identity_bindings identity_bindings_uuid_binding_type_binding_value_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.identity_bindings + ADD CONSTRAINT identity_bindings_uuid_binding_type_binding_value_key UNIQUE (uuid, binding_type, binding_value); + + +-- +-- Name: merge_history merge_history_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.merge_history + ADD CONSTRAINT merge_history_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_anomalies monitor_anomalies_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_anomalies + ADD CONSTRAINT monitor_anomalies_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_config monitor_config_config_key_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_config + ADD CONSTRAINT monitor_config_config_key_key UNIQUE (config_key); + + +-- +-- Name: monitor_config monitor_config_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_config + ADD CONSTRAINT monitor_config_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_databases monitor_databases_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_databases + ADD CONSTRAINT monitor_databases_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_external monitor_external_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_external + ADD CONSTRAINT monitor_external_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_jobs monitor_jobs_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_jobs + ADD CONSTRAINT monitor_jobs_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_logins monitor_logins_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_logins + ADD CONSTRAINT monitor_logins_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_pg_schema_changes monitor_pg_schema_changes_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_pg_schema_changes + ADD CONSTRAINT monitor_pg_schema_changes_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_pg_tables monitor_pg_tables_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_pg_tables + ADD CONSTRAINT monitor_pg_tables_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_portal_pages monitor_portal_pages_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_portal_pages + ADD CONSTRAINT monitor_portal_pages_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_portal_users monitor_portal_users_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_portal_users + ADD CONSTRAINT monitor_portal_users_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_qdrant_collections monitor_qdrant_collections_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_qdrant_collections + ADD CONSTRAINT monitor_qdrant_collections_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_resource_usage monitor_resource_usage_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_resource_usage + ADD CONSTRAINT monitor_resource_usage_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_services monitor_services_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_services + ADD CONSTRAINT monitor_services_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_sessions monitor_sessions_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_sessions + ADD CONSTRAINT monitor_sessions_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_sudo_history monitor_sudo_history_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_sudo_history + ADD CONSTRAINT monitor_sudo_history_pkey PRIMARY KEY (id); + + +-- +-- Name: monitor_workflows monitor_workflows_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_workflows + ADD CONSTRAINT monitor_workflows_pkey PRIMARY KEY (id); + + +-- +-- Name: n8n_api_keys n8n_api_keys_n8n_key_id_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.n8n_api_keys + ADD CONSTRAINT n8n_api_keys_n8n_key_id_key UNIQUE (n8n_key_id); + + +-- +-- Name: n8n_api_keys n8n_api_keys_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.n8n_api_keys + ADD CONSTRAINT n8n_api_keys_pkey PRIMARY KEY (id); + + +-- +-- Name: node_process_tracking node_process_tracking_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.node_process_tracking + ADD CONSTRAINT node_process_tracking_pkey PRIMARY KEY (id); + + +-- +-- Name: node_version_baseline node_version_baseline_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.node_version_baseline + ADD CONSTRAINT node_version_baseline_pkey PRIMARY KEY (id); + + +-- +-- Name: parent_chunks parent_chunks_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.parent_chunks + ADD CONSTRAINT parent_chunks_pkey PRIMARY KEY (id); + + +-- +-- Name: parent_chunks_poc parent_chunks_poc_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.parent_chunks_poc + ADD CONSTRAINT parent_chunks_poc_pkey PRIMARY KEY (id); + + +-- +-- Name: person_appearances person_appearances_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.person_appearances + ADD CONSTRAINT person_appearances_pkey PRIMARY KEY (id); + + +-- +-- Name: person_identities person_identities_person_id_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.person_identities + ADD CONSTRAINT person_identities_person_id_key UNIQUE (person_id); + + +-- +-- Name: person_identities person_identities_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.person_identities + ADD CONSTRAINT person_identities_pkey PRIMARY KEY (id); + + +-- +-- Name: processor_results processor_results_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.processor_results + ADD CONSTRAINT processor_results_pkey PRIMARY KEY (id); + + +-- +-- Name: python_script_tracking python_script_tracking_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.python_script_tracking + ADD CONSTRAINT python_script_tracking_pkey PRIMARY KEY (id); + + +-- +-- Name: python_version_baseline python_version_baseline_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.python_version_baseline + ADD CONSTRAINT python_version_baseline_pkey PRIMARY KEY (id); + + +-- +-- Name: storage_access_logs storage_access_logs_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.storage_access_logs + ADD CONSTRAINT storage_access_logs_pkey PRIMARY KEY (id); + + +-- +-- Name: storage_usage_stats storage_usage_stats_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.storage_usage_stats + ADD CONSTRAINT storage_usage_stats_pkey PRIMARY KEY (id); + + +-- +-- Name: face_detections unique_detection_per_frame; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.face_detections + ADD CONSTRAINT unique_detection_per_frame UNIQUE (video_uuid, frame_number, x, y, width, height); + + +-- +-- Name: processor_results unique_job_processor; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.processor_results + ADD CONSTRAINT unique_job_processor UNIQUE (job_id, processor); + + +-- +-- Name: person_identities unique_person_identity; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.person_identities + ADD CONSTRAINT unique_person_identity UNIQUE (video_uuid, face_identity_id, speaker_id); + + +-- +-- Name: video_events video_events_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.video_events + ADD CONSTRAINT video_events_pkey PRIMARY KEY (id); + + +-- +-- Name: video_identities video_identities_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.video_identities + ADD CONSTRAINT video_identities_pkey PRIMARY KEY (id); + + +-- +-- Name: videos videos_pkey; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.videos + ADD CONSTRAINT videos_pkey PRIMARY KEY (id); + + +-- +-- Name: videos videos_uuid_key; Type: CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.videos + ADD CONSTRAINT videos_uuid_key UNIQUE (file_uuid); + + +-- +-- Name: idx_anomalies_key_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_anomalies_key_id ON public.api_key_anomalies USING btree (key_id); + + +-- +-- Name: idx_anomalies_resolved; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_anomalies_resolved ON public.api_key_anomalies USING btree (resolved); + + +-- +-- Name: idx_api_keys_hash; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_api_keys_hash ON public.api_keys USING btree (key_hash); + + +-- +-- Name: idx_api_keys_key_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_api_keys_key_id ON public.api_keys USING btree (key_id); + + +-- +-- Name: idx_api_keys_status; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_api_keys_status ON public.api_keys USING btree (status); + + +-- +-- Name: idx_api_keys_type; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_api_keys_type ON public.api_keys USING btree (key_type); + + +-- +-- Name: idx_audit_action; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_audit_action ON public.api_key_audit_log USING btree (action); + + +-- +-- Name: idx_audit_created_at; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_audit_created_at ON public.api_key_audit_log USING btree (created_at); + + +-- +-- Name: idx_audit_key_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_audit_key_id ON public.api_key_audit_log USING btree (key_id); + + +-- +-- Name: idx_backup_history_service; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_backup_history_service ON public.backup_history USING btree (service_name); + + +-- +-- Name: idx_backup_history_time; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_backup_history_time ON public.backup_history USING btree (executed_at); + + +-- +-- Name: idx_backup_registry_service; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_backup_registry_service ON public.backup_registry USING btree (service_name); + + +-- +-- Name: idx_backup_registry_time; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_backup_registry_time ON public.backup_registry USING btree (created_at); + + +-- +-- Name: idx_backup_storage_stats_tier; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_backup_storage_stats_tier ON public.backup_storage_stats USING btree (tier); + + +-- +-- Name: idx_backup_storage_stats_time; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_backup_storage_stats_time ON public.backup_storage_stats USING btree (record_time); + + +-- +-- Name: idx_bindings_identity; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_bindings_identity ON public.identity_bindings USING btree (identity_id); + + +-- +-- Name: idx_bindings_lookup; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_bindings_lookup ON public.identity_bindings USING btree (uuid, binding_type, binding_value); + + +-- +-- Name: idx_child_parent; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_child_parent ON public.child_chunks USING btree (parent_id); + + +-- +-- Name: idx_chunk_vectors_chunk_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_chunk_vectors_chunk_id ON public.chunk_vectors USING btree (chunk_id); + + +-- +-- Name: idx_chunk_vectors_embedding_hnsw; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_chunk_vectors_embedding_hnsw ON public.chunk_vectors USING hnsw (embedding_vector public.vector_cosine_ops); + + +-- +-- Name: idx_chunk_vectors_file_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_chunk_vectors_file_id ON public.chunk_vectors USING btree (file_id); + + +-- +-- Name: idx_chunk_vectors_uuid; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_chunk_vectors_uuid ON public.chunk_vectors USING btree (uuid); + + +-- +-- Name: idx_chunks_content_gin; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_chunks_content_gin ON public.chunks USING gin (content); + + +-- +-- Name: idx_chunks_file_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_chunks_file_id ON public.chunks USING btree (file_id); + + +-- +-- Name: idx_chunks_search_vector; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_chunks_search_vector ON public.chunks USING gin (search_vector); + + +-- +-- Name: idx_chunks_time; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_chunks_time ON public.chunks USING btree (start_time, end_time); + + +-- +-- Name: idx_chunks_type; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_chunks_type ON public.chunks USING btree (chunk_type); + + +-- +-- Name: idx_chunks_uuid; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_chunks_uuid ON public.chunks USING btree (uuid); + + +-- +-- Name: idx_chunks_uuid_type; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_chunks_uuid_type ON public.chunks USING btree (uuid, chunk_type); + + +-- +-- Name: idx_face_clusters_video_uuid; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_face_clusters_video_uuid ON public.face_clusters USING btree (video_uuid); + + +-- +-- Name: idx_face_detections_cluster; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_face_detections_cluster ON public.face_detections USING btree (cluster_id); + + +-- +-- Name: idx_face_detections_embedding; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_face_detections_embedding ON public.face_detections USING ivfflat (embedding public.vector_cosine_ops) WITH (lists='100'); + + +-- +-- Name: idx_face_detections_face_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_face_detections_face_id ON public.face_detections USING btree (face_id); + + +-- +-- Name: idx_face_detections_frame; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_face_detections_frame ON public.face_detections USING btree (video_uuid, frame_number); + + +-- +-- Name: idx_face_detections_identity; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_face_detections_identity ON public.face_detections USING btree (identity_id); + + +-- +-- Name: idx_face_detections_video_uuid; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_face_detections_video_uuid ON public.face_detections USING btree (video_uuid); + + +-- +-- Name: idx_face_identities_embedding; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_face_identities_embedding ON public.face_identities USING ivfflat (embedding public.vector_cosine_ops) WITH (lists='100'); + + +-- +-- Name: idx_face_recognition_results_created_at; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_face_recognition_results_created_at ON public.face_recognition_results USING btree (created_at); + + +-- +-- Name: idx_file_registry_cluster; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_file_registry_cluster ON public.file_registry USING btree (user_cluster); + + +-- +-- Name: idx_file_registry_status; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_file_registry_status ON public.file_registry USING btree (status); + + +-- +-- Name: idx_file_registry_tier; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_file_registry_tier ON public.file_registry USING btree (storage_tier); + + +-- +-- Name: idx_frames_face_gin; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_frames_face_gin ON public.frames USING gin (face_results); + + +-- +-- Name: idx_frames_file_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_frames_file_id ON public.frames USING btree (file_id); + + +-- +-- Name: idx_frames_frame; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_frames_frame ON public.frames USING btree (file_id, frame_number); + + +-- +-- Name: idx_frames_ocr_gin; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_frames_ocr_gin ON public.frames USING gin (ocr_results); + + +-- +-- Name: idx_frames_pose_gin; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_frames_pose_gin ON public.frames USING gin (pose_results); + + +-- +-- Name: idx_frames_timestamp; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_frames_timestamp ON public.frames USING btree (file_id, "timestamp"); + + +-- +-- Name: idx_frames_yolo_gin; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_frames_yolo_gin ON public.frames USING gin (yolo_objects); + + +-- +-- Name: idx_gitea_tokens_key_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_gitea_tokens_key_id ON public.gitea_tokens USING btree (api_key_id); + + +-- +-- Name: idx_gitea_tokens_user; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_gitea_tokens_user ON public.gitea_tokens USING btree (gitea_user); + + +-- +-- Name: idx_identities_face_embedding; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_identities_face_embedding ON public.identities USING ivfflat (face_embedding public.vector_cosine_ops) WITH (lists='100'); + + +-- +-- Name: idx_identities_identity_embedding; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_identities_identity_embedding ON public.identities USING ivfflat (identity_embedding public.vector_cosine_ops) WITH (lists='100'); + + +-- +-- Name: idx_identities_reference_data; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_identities_reference_data ON public.identities USING gin (reference_data); + + +-- +-- Name: idx_identities_uuid; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_identities_uuid ON public.identities USING btree (uuid); + + +-- +-- Name: idx_identities_voice_embedding; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_identities_voice_embedding ON public.identities USING ivfflat (voice_embedding public.vector_cosine_ops) WITH (lists='50'); + + +-- +-- Name: idx_monitor_anomalies_severity; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_anomalies_severity ON public.monitor_anomalies USING btree (severity); + + +-- +-- Name: idx_monitor_anomalies_time; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_anomalies_time ON public.monitor_anomalies USING btree (detected_at); + + +-- +-- Name: idx_monitor_anomalies_type; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_anomalies_type ON public.monitor_anomalies USING btree (anomaly_type); + + +-- +-- Name: idx_monitor_databases_time; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_databases_time ON public.monitor_databases USING btree (checked_at); + + +-- +-- Name: idx_monitor_databases_type; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_databases_type ON public.monitor_databases USING btree (db_type); + + +-- +-- Name: idx_monitor_external_name; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_external_name ON public.monitor_external USING btree (target_name); + + +-- +-- Name: idx_monitor_external_time; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_external_time ON public.monitor_external USING btree (checked_at); + + +-- +-- Name: idx_monitor_jobs_created_at; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_jobs_created_at ON public.monitor_jobs USING btree (created_at); + + +-- +-- Name: idx_monitor_jobs_processors; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_jobs_processors ON public.monitor_jobs USING gin (processors); + + +-- +-- Name: idx_monitor_jobs_status; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_jobs_status ON public.monitor_jobs USING btree (status); + + +-- +-- Name: idx_monitor_jobs_status_created; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_jobs_status_created ON public.monitor_jobs USING btree (status, created_at); + + +-- +-- Name: idx_monitor_jobs_uuid; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_jobs_uuid ON public.monitor_jobs USING btree (uuid); + + +-- +-- Name: idx_monitor_jobs_video_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_jobs_video_id ON public.monitor_jobs USING btree (video_id); + + +-- +-- Name: idx_monitor_logins_time; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_logins_time ON public.monitor_logins USING btree (login_at); + + +-- +-- Name: idx_monitor_logins_type; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_logins_type ON public.monitor_logins USING btree (user_type); + + +-- +-- Name: idx_monitor_portal_pages_url; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_portal_pages_url ON public.monitor_portal_pages USING btree (page_url); + + +-- +-- Name: idx_monitor_portal_users_username; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_portal_users_username ON public.monitor_portal_users USING btree (username); + + +-- +-- Name: idx_monitor_services_name; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_services_name ON public.monitor_services USING btree (service_name); + + +-- +-- Name: idx_monitor_services_time; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_services_time ON public.monitor_services USING btree (checked_at); + + +-- +-- Name: idx_monitor_sessions_type; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_sessions_type ON public.monitor_sessions USING btree (session_type); + + +-- +-- Name: idx_monitor_sessions_username; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_sessions_username ON public.monitor_sessions USING btree (username); + + +-- +-- Name: idx_monitor_workflows_active; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_workflows_active ON public.monitor_workflows USING btree (is_active); + + +-- +-- Name: idx_monitor_workflows_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_workflows_id ON public.monitor_workflows USING btree (workflow_id); + + +-- +-- Name: idx_monitor_workflows_idle; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_monitor_workflows_idle ON public.monitor_workflows USING btree (idle_days); + + +-- +-- Name: idx_n8n_api_keys_key_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_n8n_api_keys_key_id ON public.n8n_api_keys USING btree (momentry_api_key_id); + + +-- +-- Name: idx_n8n_api_keys_label; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_n8n_api_keys_label ON public.n8n_api_keys USING btree (label); + + +-- +-- Name: idx_node_process_name; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_node_process_name ON public.node_process_tracking USING btree (process_name); + + +-- +-- Name: idx_node_version_name; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_node_version_name ON public.node_version_baseline USING btree (runtime_name); + + +-- +-- Name: idx_person_appearances_face; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_person_appearances_face ON public.person_appearances USING btree (face_detection_id) WHERE (face_detection_id IS NOT NULL); + + +-- +-- Name: idx_person_appearances_person; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_person_appearances_person ON public.person_appearances USING btree (person_id); + + +-- +-- Name: idx_person_appearances_time; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_person_appearances_time ON public.person_appearances USING btree (video_uuid, start_time, end_time); + + +-- +-- Name: idx_person_appearances_video; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_person_appearances_video ON public.person_appearances USING btree (video_uuid); + + +-- +-- Name: idx_person_identities_confirmed; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_person_identities_confirmed ON public.person_identities USING btree (is_confirmed) WHERE (is_confirmed = true); + + +-- +-- Name: idx_person_identities_face; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_person_identities_face ON public.person_identities USING btree (face_identity_id) WHERE (face_identity_id IS NOT NULL); + + +-- +-- Name: idx_person_identities_name; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_person_identities_name ON public.person_identities USING btree (name) WHERE (name IS NOT NULL); + + +-- +-- Name: idx_person_identities_speaker; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_person_identities_speaker ON public.person_identities USING btree (speaker_id) WHERE (speaker_id IS NOT NULL); + + +-- +-- Name: idx_person_identities_video_uuid; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_person_identities_video_uuid ON public.person_identities USING btree (video_uuid); + + +-- +-- Name: idx_processor_results_job; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_processor_results_job ON public.processor_results USING btree (job_id); + + +-- +-- Name: idx_processor_results_output_data; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_processor_results_output_data ON public.processor_results USING gin (output_data); + + +-- +-- Name: idx_processor_results_status; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_processor_results_status ON public.processor_results USING btree (status); + + +-- +-- Name: idx_processor_results_video; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_processor_results_video ON public.processor_results USING btree (video_id); + + +-- +-- Name: idx_python_script_path; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_python_script_path ON public.python_script_tracking USING btree (script_path); + + +-- +-- Name: idx_python_version_name; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_python_version_name ON public.python_version_baseline USING btree (runtime_name); + + +-- +-- Name: idx_storage_usage_cluster; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_storage_usage_cluster ON public.storage_usage_stats USING btree (user_cluster); + + +-- +-- Name: idx_storage_usage_time; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_storage_usage_time ON public.storage_usage_stats USING btree (record_time); + + +-- +-- Name: idx_video_events_type; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_video_events_type ON public.video_events USING btree (event_type); + + +-- +-- Name: idx_video_events_uuid; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_video_events_uuid ON public.video_events USING btree (uuid); + + +-- +-- Name: idx_video_identities_uuid; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_video_identities_uuid ON public.video_identities USING btree (uuid); + + +-- +-- Name: idx_videos_file_uuid; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_videos_file_uuid ON public.videos USING btree (file_uuid); + + +-- +-- Name: idx_videos_job_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_videos_job_id ON public.videos USING btree (job_id); + + +-- +-- Name: idx_videos_status; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_videos_status ON public.videos USING btree (status); + + +-- +-- Name: idx_videos_user_id; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_videos_user_id ON public.videos USING btree (user_id); + + +-- +-- Name: idx_videos_uuid; Type: INDEX; Schema: public; Owner: accusys +-- + +CREATE INDEX idx_videos_uuid ON public.videos USING btree (file_uuid); + + +-- +-- Name: chunks chunks_search_vector_trigger; Type: TRIGGER; Schema: public; Owner: accusys +-- + +CREATE TRIGGER chunks_search_vector_trigger BEFORE INSERT OR UPDATE ON public.chunks FOR EACH ROW EXECUTE FUNCTION public.update_search_vector(); + + +-- +-- Name: person_appearances trigger_update_person_appearance_stats; Type: TRIGGER; Schema: public; Owner: accusys +-- + +CREATE TRIGGER trigger_update_person_appearance_stats AFTER INSERT OR DELETE OR UPDATE ON public.person_appearances FOR EACH ROW EXECUTE FUNCTION public.trigger_update_person_stats(); + + +-- +-- Name: face_identities update_face_identities_updated_at; Type: TRIGGER; Schema: public; Owner: accusys +-- + +CREATE TRIGGER update_face_identities_updated_at BEFORE UPDATE ON public.face_identities FOR EACH ROW EXECUTE FUNCTION public.update_updated_at_column(); + + +-- +-- Name: face_recognition_results update_face_recognition_results_updated_at; Type: TRIGGER; Schema: public; Owner: accusys +-- + +CREATE TRIGGER update_face_recognition_results_updated_at BEFORE UPDATE ON public.face_recognition_results FOR EACH ROW EXECUTE FUNCTION public.update_updated_at_column(); + + +-- +-- Name: monitor_jobs update_monitor_jobs_updated_at; Type: TRIGGER; Schema: public; Owner: accusys +-- + +CREATE TRIGGER update_monitor_jobs_updated_at BEFORE UPDATE ON public.monitor_jobs FOR EACH ROW EXECUTE FUNCTION public.update_updated_at_column(); + + +-- +-- Name: person_identities update_person_identities_updated_at; Type: TRIGGER; Schema: public; Owner: accusys +-- + +CREATE TRIGGER update_person_identities_updated_at BEFORE UPDATE ON public.person_identities FOR EACH ROW EXECUTE FUNCTION public.update_updated_at_column(); + + +-- +-- Name: processor_results update_processor_results_updated_at; Type: TRIGGER; Schema: public; Owner: accusys +-- + +CREATE TRIGGER update_processor_results_updated_at BEFORE UPDATE ON public.processor_results FOR EACH ROW EXECUTE FUNCTION public.update_updated_at_column(); + + +-- +-- Name: videos update_videos_updated_at; Type: TRIGGER; Schema: public; Owner: accusys +-- + +CREATE TRIGGER update_videos_updated_at BEFORE UPDATE ON public.videos FOR EACH ROW EXECUTE FUNCTION public.update_updated_at_column(); + + +-- +-- Name: castings castings_character_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.castings + ADD CONSTRAINT castings_character_id_fkey FOREIGN KEY (character_id) REFERENCES public.characters(id) ON DELETE CASCADE; + + +-- +-- Name: child_chunks child_chunks_parent_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.child_chunks + ADD CONSTRAINT child_chunks_parent_id_fkey FOREIGN KEY (parent_id) REFERENCES public.parent_chunks(id) ON DELETE CASCADE; + + +-- +-- Name: chunk_vectors chunk_vectors_file_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.chunk_vectors + ADD CONSTRAINT chunk_vectors_file_id_fkey FOREIGN KEY (file_id) REFERENCES public.videos(id); + + +-- +-- Name: chunks chunks_file_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.chunks + ADD CONSTRAINT chunks_file_id_fkey FOREIGN KEY (file_id) REFERENCES public.videos(id); + + +-- +-- Name: face_detections face_detections_identity_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.face_detections + ADD CONSTRAINT face_detections_identity_id_fkey FOREIGN KEY (identity_id) REFERENCES public.face_identities(id) ON DELETE SET NULL; + + +-- +-- Name: file_lifecycle file_lifecycle_file_uuid_fkey; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.file_lifecycle + ADD CONSTRAINT file_lifecycle_file_uuid_fkey FOREIGN KEY (file_uuid) REFERENCES public.file_registry(file_uuid); + + +-- +-- Name: monitor_jobs fk_monitor_jobs_video_id; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.monitor_jobs + ADD CONSTRAINT fk_monitor_jobs_video_id FOREIGN KEY (video_id) REFERENCES public.videos(id) ON DELETE CASCADE; + + +-- +-- Name: videos fk_videos_job_id; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.videos + ADD CONSTRAINT fk_videos_job_id FOREIGN KEY (job_id) REFERENCES public.monitor_jobs(id) ON DELETE SET NULL; + + +-- +-- Name: frames frames_file_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.frames + ADD CONSTRAINT frames_file_id_fkey FOREIGN KEY (file_id) REFERENCES public.videos(id); + + +-- +-- Name: identity_bindings identity_bindings_identity_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.identity_bindings + ADD CONSTRAINT identity_bindings_identity_id_fkey FOREIGN KEY (identity_id) REFERENCES public.video_identities(id) ON DELETE CASCADE; + + +-- +-- Name: person_appearances person_appearances_face_detection_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.person_appearances + ADD CONSTRAINT person_appearances_face_detection_id_fkey FOREIGN KEY (face_detection_id) REFERENCES public.face_detections(id) ON DELETE SET NULL; + + +-- +-- Name: person_appearances person_appearances_person_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.person_appearances + ADD CONSTRAINT person_appearances_person_id_fkey FOREIGN KEY (person_id) REFERENCES public.person_identities(person_id) ON DELETE CASCADE; + + +-- +-- Name: person_identities person_identities_face_identity_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.person_identities + ADD CONSTRAINT person_identities_face_identity_id_fkey FOREIGN KEY (face_identity_id) REFERENCES public.face_identities(id) ON DELETE SET NULL; + + +-- +-- Name: processor_results processor_results_job_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.processor_results + ADD CONSTRAINT processor_results_job_id_fkey FOREIGN KEY (job_id) REFERENCES public.monitor_jobs(id) ON DELETE CASCADE; + + +-- +-- Name: processor_results processor_results_video_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: accusys +-- + +ALTER TABLE ONLY public.processor_results + ADD CONSTRAINT processor_results_video_id_fkey FOREIGN KEY (video_id) REFERENCES public.videos(id) ON DELETE CASCADE; + + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict hRi4nBNv2E5FXxBTf47fTk0vxfJNiXtnegSYFeraY46zaCyMMlYNanEdl70C1E7 + diff --git a/scripts/__pycache__/redis_publisher.cpython-311.pyc b/scripts/__pycache__/redis_publisher.cpython-311.pyc deleted file mode 100644 index 5217280..0000000 Binary files a/scripts/__pycache__/redis_publisher.cpython-311.pyc and /dev/null differ diff --git a/scripts/asrx_processor.py b/scripts/asrx_processor.py index aaa2bc7..6511b33 100755 --- a/scripts/asrx_processor.py +++ b/scripts/asrx_processor.py @@ -1,124 +1,320 @@ #!/opt/homebrew/bin/python3.11 """ -ASRX Processor - Speaker Diarization -Uses whisperx for speaker diarization (local model) +ASRX Processor - Hybrid Pipeline Wrapper + +Pipeline: + 1. ffprobe → select best audio track → ffmpeg → 16kHz mono WAV + 2. SelfASRXFixed.process() (7-step hybrid speaker diarization) + 3. Convert to Rust-expected format """ import sys import json import argparse import os +import subprocess +import tempfile +from pathlib import Path sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +sys.path.insert( + 0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "asrx_self") +) + from redis_publisher import RedisPublisher -def process_asrx(video_path: str, output_path: str, uuid: str = ""): - """Process video for speaker diarization using whisperx""" +def probe_audio_tracks(video_path: str) -> list: + """ffprobe 列出所有音軌""" + cmd = [ + "ffprobe", "-v", "quiet", "-print_format", "json", + "-show_streams", "-select_streams", "a", video_path, + ] + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) + data = json.loads(result.stdout) + tracks = [] + for stream in data.get("streams", []): + tracks.append({ + "index": stream.get("index"), + "codec": stream.get("codec_name"), + "language": stream.get("tags", {}).get("language", "und"), + "channels": stream.get("channels", 0), + "sample_rate": stream.get("sample_rate", "0"), + }) + return tracks + except Exception as e: + print(f"[ASRX] ffprobe failed: {e}") + return [] + +def select_best_track(tracks: list) -> int: + """選最佳音軌: English > 最多channels > 0""" + if not tracks: + return 0 + for i, t in enumerate(tracks): + if t["language"] in ("eng", "en"): + return i + best = 0 + for i, t in enumerate(tracks): + if t["channels"] > tracks[best]["channels"]: + best = i + return best + + +def extract_audio_to_wav(video_path: str, track_index: int, output_wav: str) -> bool: + """ffmpeg 提取音軌為 16kHz mono WAV""" + cmd = [ + "ffmpeg", "-y", "-v", "quiet", + "-i", video_path, + "-map", f"0:{track_index}", + "-ar", "16000", + "-ac", "1", + "-sample_fmt", "s16", + output_wav, + ] + try: + subprocess.run(cmd, check=True, capture_output=True, timeout=300) + return True + except Exception as e: + print(f"[ASRX] ffmpeg extraction failed: {e}") + return False + + +def _cleanup(tmp_dir): + if tmp_dir and os.path.exists(tmp_dir): + import shutil + shutil.rmtree(tmp_dir, ignore_errors=True) + + +def _atomic_write(path: str, data: dict): + tmp = path + ".tmp" + with open(tmp, "w") as f: + json.dump(data, f, indent=2) + os.rename(tmp, path) + + +def _shared_audio_setup(video_path): + """提取音頻,回傳 (tmp_dir, wav_path)""" + tracks = probe_audio_tracks(video_path) + track_idx = select_best_track(tracks) if tracks else 0 + actual_track_index = tracks[track_idx]["index"] if tracks else track_idx + + tmp_dir = tempfile.mkdtemp(prefix="asrx_") + wav_path = os.path.join(tmp_dir, "audio.wav") + + if extract_audio_to_wav(video_path, actual_track_index, wav_path): + return tmp_dir, wav_path + print("[ASRX] Audio extraction failed, falling back to original file", + file=sys.stderr) + return tmp_dir, video_path + + +def _convert_result(result, output_path): + """Stage 3: 將 SelfASRXFixed result 轉為 Rust-expected format""" + fps = 30.0 + base_name = os.path.basename(output_path) + uuid_part = base_name.split(".")[0] + probe_path = os.path.join(os.path.dirname(output_path), + f"{uuid_part}.probe.json") + if os.path.exists(probe_path): + try: + with open(probe_path) as pf: + probe_data = json.load(pf) + if "fps" in probe_data: + fps = float(probe_data["fps"]) + except Exception: + pass + + output_result = { + "language": result.get("language"), + "segments": [], + "n_speakers": result.get("n_speakers", 0), + "speaker_stats": result.get("speaker_stats", {}), + } + + for seg in result.get("segments", []): + start_sec = seg["start"] + end_sec = seg["end"] + output_result["segments"].append({ + "start_time": start_sec, + "end_time": end_sec, + "start_frame": int(start_sec * fps), + "end_frame": int(end_sec * fps), + "text": seg.get("text", ""), + "speaker_id": seg.get("speaker_id", seg.get("speaker", "")), + "language": seg.get("language", ""), + "lang_prob": seg.get("lang_prob", 0.0), + "quality": seg.get("quality", 0.0), + }) + + if "references" in result: + output_result["references"] = result["references"] + + return output_result + + +def process_asrx(video_path: str, output_path: str, uuid: str = "", + file_uuid: str = "", resume: bool = False): + """主處理函數""" publisher = RedisPublisher(uuid) if uuid else None if publisher: publisher.info("asrx", "ASRX_START") - try: - import whisperx - import torch - except ImportError: - if publisher: - publisher.error("asrx", "whisperx not installed") - result = {"language": None, "segments": []} - if publisher: - publisher.complete("asrx", "0 segments") - with open(output_path, "w") as f: - json.dump(result, f, indent=2) - return result + checkpoint_path = output_path + ".stage1.json" - if publisher: - publisher.info("asrx", "ASRX_LOADING_MODEL") + # ── Phase 2: Resume from checkpoint (Steps 4-7 only) ── + if resume and os.path.exists(checkpoint_path): + print(f"[ASRX] Found checkpoint, resuming from Step 4...") + tmp_dir, audio_input = _shared_audio_setup(video_path) + try: + from asrx_self.main_fixed import SelfASRXFixed + asrx = SelfASRXFixed() + + result = asrx.resume_from_checkpoint( + checkpoint_path, audio_input, output_path=output_path, + ) + + if "error" in result: + if publisher: + publisher.error("asrx", result["error"]) + output_result = {"language": None, "segments": []} + _atomic_write(output_path, output_result) + if publisher: + publisher.complete("asrx", "0 segments") + _cleanup(tmp_dir) + return output_result + + output_result = _convert_result(result, output_path) + + if publisher: + publisher.info("asrx", + f"ASRX_COMPLETE:{len(output_result['segments'])}") + + _atomic_write(output_path, output_result) + + if publisher: + publisher.complete( + "asrx", f"{len(output_result['segments'])} segments") + + print(f"[ASRX] Saved {len(output_result['segments'])} segments " + f"to {output_path}", file=sys.stderr) + + # 刪除 checkpoint(完成後清理) + try: + os.remove(checkpoint_path) + print(f"[ASRX] Removed checkpoint: {checkpoint_path}") + except Exception: + pass + + _cleanup(tmp_dir) + return output_result + except Exception as e: + if publisher: + publisher.error("asrx", str(e)) + import traceback + traceback.print_exc() + output_result = {"language": None, "segments": []} + _atomic_write(output_path, output_result) + if publisher: + publisher.complete("asrx", "0 segments") + _cleanup(tmp_dir) + return output_result + + # ── Phase 1: Full 7-step pipeline ── + tmp_dir = None try: - # Fix for PyTorch 2.6+ compatibility - # Allow omegaconf types in torch.load - import omegaconf + # Stage 1: Audio Track Preprocessing + tmp_dir, audio_input = _shared_audio_setup(video_path) - torch.serialization.add_safe_globals( - [omegaconf.listconfig.ListConfig, omegaconf.dictconfig.DictConfig] - ) + # Stage 2: SelfASRXFixed 7-step pipeline + from asrx_self.main_fixed import SelfASRXFixed - # Load model - using faster-whisper for better performance - # You can also use: "large-v3", "medium", "small", "base", "tiny" - model = whisperx.load_model("base", device="cpu", compute_type="int8") + if publisher: + publisher.info("asrx", "ASRX_LOADING_MODEL") + + asrx = SelfASRXFixed() if publisher: publisher.info("asrx", "ASRX_TRANSCRIBING") - # Transcribe audio - result = model.transcribe(video_path, language="en") - - # Align timestamps - model_a, metadata = whisperx.load_align_model(language_code=result["language"]) - result = whisperx.align( - result["segments"], model_a, metadata, video_path, device="cpu" + result = asrx.process( + audio_input, + output_path=None, + file_uuid=file_uuid or None, + max_speakers=10, + quality_threshold=0.85, + checkpoint_path=checkpoint_path, ) - # Diarization (speaker segmentation) - try: - from whisperx.diarize import DiarizationPipeline - - # DiarizationPipeline parameters: model_name, token, device, cache_dir - diarize_model = DiarizationPipeline( - model_name="pyannote/speaker-diarization", - token=None, # HuggingFace token (None for public models) - device="cpu", - ) - diarize_segments = diarize_model(video_path) - - # Assign speaker labels - result = whisperx.assign_word_speakers(diarize_segments, result) - except Exception as e: + if "error" in result: if publisher: - publisher.info("asrx", f"Diarization skipped: {e}") + publisher.error("asrx", result["error"]) + output_result = {"language": None, "segments": []} + _atomic_write(output_path, output_result) + if publisher: + publisher.complete("asrx", "0 segments") + _cleanup(tmp_dir) + return output_result - # Build output - segments = [] - for seg in result.get("segments", []): - text = seg.get("text", "").strip() - if text: - segments.append( - { - "start": seg.get("start", 0.0), - "end": seg.get("end", 0.0), - "text": text, - "speaker_id": seg.get("speaker", None), - } - ) - - output_result = {"language": result.get("language"), "segments": segments} + # Stage 3: Convert to Rust-expected format + output_result = _convert_result(result, output_path) if publisher: - publisher.complete("asrx", f"{len(segments)} segments") + publisher.info("asrx", f"ASRX_COMPLETE:{len(output_result['segments'])}") - with open(output_path, "w") as f: - json.dump(output_result, f, indent=2) + _atomic_write(output_path, output_result) + if publisher: + publisher.complete("asrx", + f"{len(output_result['segments'])} segments") + + print(f"[ASRX] Saved {len(output_result['segments'])} segments " + f"to {output_path}", file=sys.stderr) + + _cleanup(tmp_dir) return output_result except Exception as e: if publisher: - publisher.error("asrx", f"Error: {e}") - result = {"language": None, "segments": []} + publisher.error("asrx", str(e)) + import traceback + traceback.print_exc() + + output_result = {"language": None, "segments": []} + _atomic_write(output_path, output_result) if publisher: publisher.complete("asrx", "0 segments") - with open(output_path, "w") as f: - json.dump(result, f, indent=2) - return result + # 如果 checkpoint 已存在(Step 3 完成後 crash),保留 WAV 給 resume + if not os.path.exists(checkpoint_path): + _cleanup(tmp_dir) + else: + print(f"[ASRX] Checkpoint saved, keeping temp dir for resume: {tmp_dir}") + return output_result if __name__ == "__main__": - parser = argparse.ArgumentParser(description="ASRX Speaker Diarization") - parser.add_argument("video_path", help="Path to video file") - parser.add_argument("output_path", help="Output JSON path") - parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="") + parser = argparse.ArgumentParser(description="ASRX Processor (Hybrid Pipeline)") + parser.add_argument("video_path", help="Path to video/audio file") + parser.add_argument("output_path", help="Path to output JSON file") + parser.add_argument("--uuid", help="UUID for Redis publishing", default="") + parser.add_argument("--file-uuid", help="File UUID for Qdrant storage", default="") + parser.add_argument("--resume", action="store_true", + help="Resume from checkpoint (skip Steps 1-3)") + args = parser.parse_args() - process_asrx(args.video_path, args.output_path, args.uuid) + if not args.resume and not Path(args.video_path).exists(): + print(f"Error: Video file not found: {args.video_path}") + sys.exit(1) + + result = process_asrx(args.video_path, args.output_path, args.uuid, + args.file_uuid, resume=args.resume) + + print("\n[Summary]") + print(f" Total segments: {len(result.get('segments', []))}") + if "speaker_stats" in result: + print(f" Detected speakers: {len(result['speaker_stats'])}") + for speaker, stats in result["speaker_stats"].items(): + print(f" {speaker}: {stats['count']} segments") diff --git a/scripts/asrx_processor_contract_v1.py b/scripts/asrx_processor_contract_v1.py deleted file mode 100644 index a06bcc3..0000000 --- a/scripts/asrx_processor_contract_v1.py +++ /dev/null @@ -1,584 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -ASRX Processor - AI-Driven Processor Contract Version 1.0 - -Compliant with AI-Driven Processor Contract v1.0 -Effective Date: 2025-03-27 - -Features: -1. Standardized command-line interface -2. Redis progress reporting -3. Signal handling (SIGTERM, SIGINT) -4. Health check mode -5. Resource monitoring -6. Contract-compliant JSON output -7. Unified configuration -""" - -import sys -import json -import os -import argparse -import signal -import time -import subprocess -import traceback -from datetime import datetime -from typing import Dict, Any - -# Redis Publisher for progress reporting -try: - sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - from redis_publisher import RedisPublisher - - REDIS_AVAILABLE = True -except ImportError: - REDIS_AVAILABLE = False - print( - "WARNING: RedisPublisher not available, progress reporting disabled", - file=sys.stderr, - ) - -# Contract version -CONTRACT_VERSION = "1.0" -PROCESSOR_NAME = ( - "/Users/accusys/momentry_core_0.1/scripts/asrx_processor_contract_v1.py" -) -PROCESSOR_VERSION = "1.0.0" -MODEL_NAME = "pyannote" -MODEL_VERSION = "3.1" - -# Unified configuration defaults -DEFAULT_TIMEOUT = 7200 # 2 hours for speaker diarization -DEFAULT_MODEL_SIZE = "base" -DEFAULT_DEVICE = "cpu" -DEFAULT_LANGUAGE = "auto" -DEFAULT_BATCH_SIZE = 16 -DEFAULT_DIARIZATION = True -DEFAULT_MIN_SPEAKERS = 1 -DEFAULT_MAX_SPEAKERS = 10 - - -# Signal handling with timeout support -class SignalHandler: - """Handle system signals for graceful shutdown""" - - def __init__(self): - self.should_exit = False - self.exit_code = 0 - signal.signal(signal.SIGTERM, self.handle_signal) - signal.signal(signal.SIGINT, self.handle_signal) - - def handle_signal(self, signum, frame): - """Handle termination signals""" - print(f"\n收到信号 {signum},正在优雅关闭...") - self.should_exit = True - self.exit_code = 128 + signum - - def should_stop(self): - """Check if should stop processing""" - return self.should_exit - - -# Timeout manager -class TimeoutManager: - """Manage processing timeouts""" - - def __init__(self, timeout_seconds: int): - self.timeout_seconds = timeout_seconds - self.start_time = time.time() - self.timer = None - - def check_timeout(self) -> bool: - """Check if timeout has been reached""" - elapsed = time.time() - self.start_time - return elapsed > self.timeout_seconds - - def get_remaining_time(self) -> float: - """Get remaining time in seconds""" - elapsed = time.time() - self.start_time - return max(0, self.timeout_seconds - elapsed) - - def format_remaining_time(self) -> str: - """Format remaining time as HH:MM:SS""" - remaining = self.get_remaining_time() - hours = int(remaining // 3600) - minutes = int((remaining % 3600) // 60) - seconds = int(remaining % 60) - return f"{hours:02d}:{minutes:02d}:{seconds:02d}" - - -# Health check functions -def check_environment() -> Dict[str, Any]: - """Check environment and dependencies""" - checks = [] - - # Check 1: whisperx for speaker diarization - try: - import whisperx - - checks.append( - { - "name": "whisperx", - "status": "available", - "version": getattr(whisperx, "__version__", "unknown"), - } - ) - except ImportError: - checks.append({"name": "whisperx", "status": "missing", "version": None}) - - # Check 2: FFmpeg/FFprobe - try: - ffprobe_result = subprocess.run( - ["ffprobe", "-version"], - capture_output=True, - text=True, - timeout=5, - ) - if ffprobe_result.returncode == 0: - version_line = ffprobe_result.stdout.split("\n")[0] - checks.append( - {"name": "ffprobe", "status": "available", "version": version_line} - ) - else: - checks.append({"name": "ffprobe", "status": "error", "version": None}) - except (subprocess.TimeoutExpired, FileNotFoundError): - checks.append({"name": "ffprobe", "status": "missing", "version": None}) - - # Check 3: Redis (optional) - checks.append( - { - "name": "redis", - "status": "available" if REDIS_AVAILABLE else "optional", - "version": None, - } - ) - - # Check 4: Python version - checks.append( - { - "name": "python", - "status": "available", - "version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}", - } - ) - - # Check 5: CUDA/GPU availability (optional) - try: - import torch - - cuda_available = torch.cuda.is_available() - checks.append( - { - "name": "cuda", - "status": "available" if cuda_available else "optional", - "version": torch.version.cuda if cuda_available else None, - } - ) - except ImportError: - checks.append({"name": "cuda", "status": "optional", "version": None}) - - return { - "timestamp": datetime.now().isoformat(), - "processor_name": PROCESSOR_NAME, - "processor_version": PROCESSOR_VERSION, - "contract_version": CONTRACT_VERSION, - "model_name": MODEL_NAME, - "model_version": MODEL_VERSION, - "checks": checks, - } - - -def check_video_file(video_path: str) -> Dict[str, Any]: - """Check video file properties""" - try: - result = subprocess.run( - [ - "ffprobe", - "-v", - "error", - "-select_streams", - "v:0", - "-show_entries", - "stream=codec_name,width,height,duration,r_frame_rate", - "-show_entries", - "format=duration,size", - "-of", - "json", - video_path, - ], - capture_output=True, - text=True, - timeout=10, - ) - - if result.returncode != 0: - return { - "valid": False, - "error": result.stderr[:200] if result.stderr else "Unknown error", - } - - info = json.loads(result.stdout) - - video_info = {} - if "streams" in info and len(info["streams"]) > 0: - stream = info["streams"][0] - video_info = { - "codec": stream.get("codec_name", "unknown"), - "width": int(stream.get("width", 0)), - "height": int(stream.get("height", 0)), - "duration": float(stream.get("duration", 0)), - "frame_rate": stream.get("r_frame_rate", "0/0"), - } - - format_info = {} - if "format" in info: - format_info = { - "format_duration": float(info["format"].get("duration", 0)), - "file_size": int(info["format"].get("size", 0)), - } - - return { - "valid": True, - "video_info": video_info, - "format_info": format_info, - "exists": os.path.exists(video_path), - "file_size": os.path.getsize(video_path) - if os.path.exists(video_path) - else 0, - } - - except Exception as e: - return {"valid": False, "error": str(e)} - - -# Main processing function -def process_asrx( - video_path: str, - output_path: str, - uuid: str = "", - model_size: str = DEFAULT_MODEL_SIZE, - device: str = DEFAULT_DEVICE, - language: str = DEFAULT_LANGUAGE, - batch_size: int = DEFAULT_BATCH_SIZE, - diarization: bool = DEFAULT_DIARIZATION, - min_speakers: int = DEFAULT_MIN_SPEAKERS, - max_speakers: int = DEFAULT_MAX_SPEAKERS, - timeout: int = DEFAULT_TIMEOUT, -) -> Dict[str, Any]: - """Process video for speaker diarization using whisperx""" - - # Initialize - signal_handler = SignalHandler() - timeout_manager = TimeoutManager(timeout) - publisher = RedisPublisher(uuid) if REDIS_AVAILABLE and uuid else None - - def publish(stage: str, message: str, data: Dict = None): - if publisher: - publisher.info(PROCESSOR_NAME, stage, message, data) - - publish("ASRX_START", f"开始处理: {os.path.basename(video_path)}") - - result = { - "processor_name": PROCESSOR_NAME, - "processor_version": PROCESSOR_VERSION, - "contract_version": CONTRACT_VERSION, - "model_name": MODEL_NAME, - "model_version": MODEL_VERSION, - "video_path": video_path, - "output_path": output_path, - "uuid": uuid, - "timestamp": datetime.now().isoformat(), - "parameters": { - "model_size": model_size, - "device": device, - "language": language, - "batch_size": batch_size, - "diarization": diarization, - "min_speakers": min_speakers, - "max_speakers": max_speakers, - "timeout": timeout, - }, - "success": False, - "error": None, - "segments": [], - "speakers": [], - "processing_time": 0, - "resource_usage": {}, - } - - start_time = time.time() - - try: - # Check timeout - if timeout_manager.check_timeout(): - raise TimeoutError(f"超时 ({timeout} 秒)") - - # Check if should exit - if signal_handler.should_stop(): - raise KeyboardInterrupt("收到停止信号") - - # Check video file - publish("ASRX_CHECK_VIDEO", "检查视频文件") - video_check = check_video_file(video_path) - if not video_check.get("valid", False): - raise ValueError(f"无效的视频文件: {video_check.get('error', '未知错误')}") - - result["video_info"] = video_check.get("video_info", {}) - result["format_info"] = video_check.get("format_info", {}) - - # Import whisperx - publish("ASRX_LOAD_MODEL", f"加载模型: {model_size}") - try: - import whisperx - except ImportError as e: - raise ImportError(f"whisperx 未安装: {e}") - - # Load model - publish("ASRX_LOADING", f"加载 whisperx 模型 ({model_size}, {device})") - model = whisperx.load_model( - model_size, - device=device, - compute_type="int8" if device == "cpu" else "float16", - ) - - # Transcribe - publish("ASRX_TRANSCRIBING", "转录音频") - transcript = model.transcribe( - video_path, - language=language if language != "auto" else None, - batch_size=batch_size, - ) - - # Align timestamps - publish("ASRX_ALIGNING", "对齐时间戳") - model_a, metadata = whisperx.load_align_model( - language_code=transcript["language"] - ) - transcript = whisperx.align( - transcript["segments"], - model_a, - metadata, - video_path, - device, - return_char_alignments=False, - ) - - # Speaker diarization - if diarization: - publish("ASRX_DIARIZATION", "说话人分离") - diarize_model = whisperx.DiarizationPipeline( - use_auth_token=None, device=device - ) - - # Add min/max speakers - diarize_segments = diarize_model( - video_path, - min_speakers=min_speakers, - max_speakers=max_speakers, - ) - - transcript = whisperx.assign_word_speakers(diarize_segments, transcript) - - # Extract speaker information - speakers = {} - for segment in transcript["segments"]: - if "speaker" in segment: - speaker_id = segment["speaker"] - if speaker_id not in speakers: - speakers[speaker_id] = { - "id": speaker_id, - "segment_count": 0, - "total_words": 0, - "total_duration": 0.0, - } - - speakers[speaker_id]["segment_count"] += 1 - speakers[speaker_id]["total_words"] += len( - segment.get("text", "").split() - ) - speakers[speaker_id]["total_duration"] += segment.get( - "end", 0 - ) - segment.get("start", 0) - - result["speakers"] = list(speakers.values()) - - # Format segments - segments = [] - for segment in transcript.get("segments", []): - segments.append( - { - "start": segment.get("start", 0.0), - "end": segment.get("end", 0.0), - "text": segment.get("text", ""), - "speaker": segment.get("speaker", None), - "words": segment.get("words", []), - "confidence": segment.get("confidence", 0.0), - } - ) - - result["segments"] = segments - result["language"] = transcript.get("language", "unknown") - result["success"] = True - - publish("ASRX_COMPLETE", f"完成: {len(segments)} 个片段") - - except TimeoutError as e: - result["error"] = f"处理超时: {e}" - publish("ASRX_TIMEOUT", f"超时: {e}") - except KeyboardInterrupt: - result["error"] = "处理被用户中断" - publish("ASRX_INTERRUPTED", "处理被中断") - except ImportError as e: - result["error"] = f"依赖缺失: {e}" - publish("ASRX_MISSING_DEPS", f"缺少依赖: {e}") - except Exception as e: - result["error"] = f"处理错误: {str(e)}" - publish("ASRX_ERROR", f"错误: {str(e)}") - traceback.print_exc() - - # Calculate processing time - processing_time = time.time() - start_time - result["processing_time"] = processing_time - - # Add resource usage - try: - import psutil - - process = psutil.Process() - memory_info = process.memory_info() - result["resource_usage"] = { - "cpu_percent": process.cpu_percent(), - "memory_mb": memory_info.rss / (1024 * 1024), - "user_time": process.cpu_times().user, - "system_time": process.cpu_times().system, - } - except ImportError: - result["resource_usage"] = {"error": "psutil not available"} - - # Save result - try: - with open(output_path, "w") as f: - json.dump(result, f, indent=2, ensure_ascii=False) - publish("ASRX_SAVED", f"结果保存到: {output_path}") - except Exception as e: - result["error"] = f"保存结果失败: {str(e)}" - publish("ASRX_SAVE_ERROR", f"保存失败: {str(e)}") - - return result - - -def main(): - """Main entry point""" - parser = argparse.ArgumentParser( - description=f"{PROCESSOR_NAME.upper()} Processor v{PROCESSOR_VERSION} - Speaker Diarization" - ) - parser.add_argument("video_path", help="Path to input video file") - parser.add_argument("output_path", help="Path to output JSON file") - parser.add_argument("--uuid", help="UUID for progress tracking", default="") - parser.add_argument( - "--model-size", - help=f"Model size (default: {DEFAULT_MODEL_SIZE})", - default=DEFAULT_MODEL_SIZE, - choices=["tiny", "base", "small", "medium", "large-v3"], - ) - parser.add_argument( - "--device", - help=f"Device to use (default: {DEFAULT_DEVICE})", - default=DEFAULT_DEVICE, - choices=["cpu", "cuda"], - ) - parser.add_argument( - "--language", - help=f"Language code or 'auto' (default: {DEFAULT_LANGUAGE})", - default=DEFAULT_LANGUAGE, - ) - parser.add_argument( - "--batch-size", - help=f"Batch size for processing (default: {DEFAULT_BATCH_SIZE})", - type=int, - default=DEFAULT_BATCH_SIZE, - ) - parser.add_argument( - "--no-diarization", - help="Disable speaker diarization", - action="store_true", - ) - parser.add_argument( - "--min-speakers", - help=f"Minimum number of speakers (default: {DEFAULT_MIN_SPEAKERS})", - type=int, - default=DEFAULT_MIN_SPEAKERS, - ) - parser.add_argument( - "--max-speakers", - help=f"Maximum number of speakers (default: {DEFAULT_MAX_SPEAKERS})", - type=int, - default=DEFAULT_MAX_SPEAKERS, - ) - parser.add_argument( - "--timeout", - help=f"Timeout in seconds (default: {DEFAULT_TIMEOUT})", - type=int, - default=DEFAULT_TIMEOUT, - ) - parser.add_argument( - "--health-check", - help="Run health check and exit", - action="store_true", - ) - parser.add_argument( - "--check-video", - help="Check video file and exit", - action="store_true", - ) - - args = parser.parse_args() - - # Health check mode - if args.health_check: - health = check_environment() - print(json.dumps(health, indent=2, ensure_ascii=False)) - return ( - 0 - if all(c["status"] in ["available", "optional"] for c in health["checks"]) - else 1 - ) - - # Video check mode - if args.check_video: - video_check = check_video_file(args.video_path) - print(json.dumps(video_check, indent=2, ensure_ascii=False)) - return 0 if video_check.get("valid", False) else 1 - - # Normal processing mode - result = process_asrx( - video_path=args.video_path, - output_path=args.output_path, - uuid=args.uuid, - model_size=args.model_size, - device=args.device, - language=args.language, - batch_size=args.batch_size, - diarization=not args.no_diarization, - min_speakers=args.min_speakers, - max_speakers=args.max_speakers, - timeout=args.timeout, - ) - - # Print result summary - if result.get("success", False): - print(f"✅ {PROCESSOR_NAME.upper()} 处理成功") - print(f" 片段数: {len(result.get('segments', []))}") - print(f" 说话人数: {len(result.get('speakers', []))}") - print(f" 处理时间: {result.get('processing_time', 0):.1f} 秒") - print(f" 输出文件: {args.output_path}") - return 0 - else: - print(f"❌ {PROCESSOR_NAME.upper()} 处理失败") - print(f" 错误: {result.get('error', '未知错误')}") - return 1 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/asrx_processor_custom.py b/scripts/asrx_processor_custom.py deleted file mode 100644 index 33168e8..0000000 --- a/scripts/asrx_processor_custom.py +++ /dev/null @@ -1,328 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -ASRX Processor - Custom Implementation Wrapper -Uses SpeechBrain ECAPA-TDNN (no HuggingFace token required) - -Pipeline: - 1. Preprocess: ffprobe audio tracks → select best track → extract WAV - 2. Process: VAD (Silero) → Speaker embedding (ECAPA-TDNN) → Spectral clustering - 3. Output: segments with speaker_id -""" - -import sys -import json -import argparse -import os -import subprocess -import tempfile -from pathlib import Path - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -sys.path.insert( - 0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "asrx_self") -) - -from redis_publisher import RedisPublisher - - -def probe_audio_tracks(video_path: str) -> list: - """Use ffprobe to list all audio tracks in the video file.""" - cmd = [ - "ffprobe", "-v", "quiet", "-print_format", "json", - "-show_streams", "-select_streams", "a", video_path, - ] - try: - result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) - data = json.loads(result.stdout) - tracks = [] - for stream in data.get("streams", []): - track = { - "index": stream.get("index"), - "codec": stream.get("codec_name"), - "language": stream.get("tags", {}).get("language", "und"), - "channels": stream.get("channels", 0), - "sample_rate": stream.get("sample_rate", "0"), - } - tracks.append(track) - return tracks - except Exception as e: - print(f"[ASRX] ffprobe failed: {e}") - return [] - - -def select_best_track(tracks: list) -> int: - """Select the best audio track: English > first available > fallback to 0.""" - if not tracks: - return 0 - - # Priority 1: English track - for i, t in enumerate(tracks): - if t["language"] == "eng" or t["language"] == "en": - print(f"[ASRX] Selected English track (index {t['index']})") - return i - - # Priority 2: First track with the most channels - best = 0 - for i, t in enumerate(tracks): - if t["channels"] > tracks[best]["channels"]: - best = i - - print(f"[ASRX] Selected track {best} (lang={tracks[best]['language']}, ch={tracks[best]['channels']})") - return best - - -def extract_audio_to_wav(video_path: str, track_index: int, output_wav: str) -> bool: - """Extract selected audio track to 16kHz mono WAV using ffmpeg.""" - cmd = [ - "ffmpeg", "-y", "-v", "quiet", - "-i", video_path, - "-map", f"0:{track_index}", - "-ar", "16000", - "-ac", "1", - "-sample_fmt", "s16", - output_wav, - ] - try: - subprocess.run(cmd, check=True, capture_output=True, timeout=300) - return True - except Exception as e: - print(f"[ASRX] ffmpeg extraction failed: {e}") - return False - - -def _cleanup(tmp_dir): - """Clean up temporary directory.""" - if tmp_dir and os.path.exists(tmp_dir): - import shutil - shutil.rmtree(tmp_dir, ignore_errors=True) - - -def process_asrx_custom(video_path: str, output_path: str, uuid: str = ""): - """Process video for speaker diarization using custom implementation""" - - publisher = RedisPublisher(uuid) if uuid else None - if publisher: - publisher.info("asrx", "ASRX_START") - - tmp_dir = None - - try: - # Ensure working directory is the scripts dir for model loading - script_dir = os.path.dirname(os.path.abspath(__file__)) - os.chdir(script_dir) - - # Debug: check ffmpeg availability - import shutil - ffmpeg_path = shutil.which("ffmpeg") - print(f"[ASRX] ffmpeg: {ffmpeg_path}", file=sys.stderr) - print(f"[ASRX] CWD: {os.getcwd()}", file=sys.stderr) - - # ---- Stage 1: Audio Track Preprocessing ---- - print("\n[ASRX] ===== Stage 1: Audio Track Analysis =====", file=sys.stderr) - print(f"[ASRX] Input: {video_path}", file=sys.stderr) - - tracks = probe_audio_tracks(video_path) - if tracks: - print(f"[ASRX] Found {len(tracks)} audio track(s):", file=sys.stderr) - for t in tracks: - print(f" Track {t['index']}: {t['codec']} {t['channels']}ch {t['sample_rate']}Hz lang={t['language']}", file=sys.stderr) - else: - print("[ASRX] No audio tracks found via ffprobe, using raw file", file=sys.stderr) - - # Select best track - track_idx = select_best_track(tracks) if tracks else 0 - actual_track_index = tracks[track_idx]["index"] if tracks else track_idx - - # Extract audio to WAV - tmp_dir = tempfile.mkdtemp(prefix="asrx_") - wav_path = os.path.join(tmp_dir, "audio.wav") - - if extract_audio_to_wav(video_path, actual_track_index, wav_path): - wav_size = os.path.getsize(wav_path) - print(f"[ASRX] Audio extracted: {wav_path} ({wav_size / 1024 / 1024:.1f}MB)", file=sys.stderr) - audio_input = wav_path - else: - print("[ASRX] Audio extraction failed, falling back to original file", file=sys.stderr) - audio_input = video_path - - # ---- Stage 2: Load ASR segments for time alignment ---- - # Try multiple paths to find ASR JSON - asr_segments = [] - asr_fallback_reason = "" - asr_candidates = [ - output_path.replace(".asrx.json", ".asr.json") if output_path else "", - os.path.join(os.path.dirname(output_path) if output_path else ".", os.path.basename(video_path).rsplit(".", 1)[0] + ".asr.json"), - os.path.join(os.path.dirname(output_path) if output_path else ".", "dd61fda85fee441fdd00ab5528213ff7.asr.json"), - ] - asr_path = "" - for candidate in asr_candidates: - if candidate and os.path.exists(candidate): - asr_path = candidate - break - if asr_path: - try: - with open(asr_path) as f: - asr_data = json.load(f) - asr_segments = asr_data.get("segments", []) - print(f"[ASRX] Loaded {len(asr_segments)} ASR segments from {asr_path}", file=sys.stderr) - asr_fallback_reason = f"loaded_{len(asr_segments)}_segments" - except Exception as e: - asr_fallback_reason = f"load_error_{e}" - print(f"[ASRX] Failed to load ASR segments: {e}", file=sys.stderr) - else: - asr_fallback_reason = f"asr_json_not_found_tried_{len(asr_candidates)}_paths" - print(f"[ASRX] ASR output not found, tried {len(asr_candidates)} paths. First candidate: {asr_candidates[0]}", file=sys.stderr) - - # ---- Stage 3: ASRX Processing ---- - from asrx_self.main_fixed import SelfASRXFixed - - if publisher: - publisher.info("asrx", "ASRX_LOADING_MODEL") - - asrx = SelfASRXFixed() - - if publisher: - publisher.info("asrx", "ASRX_TRANSCRIBING") - - if asr_segments: - # Use ASR segment boundaries for speaker embedding extraction - print(f"[ASRX] Using {len(asr_segments)} ASR segments for diarization", file=sys.stderr) - result = asrx.process_with_segments( - audio_input, - asr_segments, - output_path=None, - ) - else: - # Fallback: VAD-based diarization - result = asrx.process( - audio_input, - output_path=None, - min_speech_duration_ms=500, - max_speakers=10, - ) - - if "error" in result: - if publisher: - publisher.error("asrx", result["error"]) - - # Return empty result - output_result = {"language": None, "segments": []} - - with open(output_path, "w") as f: - json.dump(output_result, f, indent=2) - - if publisher: - publisher.complete("asrx", "0 segments") - - _cleanup(tmp_dir) - return output_result - - # Convert to Rust-expected format (start_frame/end_frame/speaker) - # Read fps from probe json ({file_uuid}.probe.json) - _debug = {"asr_fallback": asr_fallback_reason, "asr_path": asr_path} - fps = 30.0 - output_dir = os.path.dirname(output_path) if output_path else "." - base_name = os.path.basename(output_path) if output_path else "" - # Extract uuid from {uuid}.{type}.json format - uuid_part = base_name.split(".")[0] if base_name else "" - probe_candidates = [ - os.path.join(output_dir, f"{uuid_part}.probe.json"), - ] - for p in probe_candidates: - if os.path.exists(p): - try: - with open(p) as pf: - probe_data = json.load(pf) - if "fps" in probe_data: - fps = float(probe_data["fps"]) - print(f"[ASRX] FPS from probe: {fps}", file=sys.stderr) - break - except: - pass - output_result = { - "language": None, - "segments": [], - } - - # Convert segments - for seg in result["segments"]: - start_sec = seg["start"] - end_sec = seg["end"] - output_result["segments"].append( - { - "start_time": start_sec, - "end_time": end_sec, - "start_frame": int(start_sec * fps), - "end_frame": int(end_sec * fps), - "text": "", - "speaker_id": seg["speaker"], - } - ) - - # Add speaker_stats as optional metadata - if "speaker_stats" in result: - output_result["speaker_stats"] = result["speaker_stats"] - - # 傳遞 embeddings(每個 segment 對應的 192-D speaker embedding) - if "embeddings" in result: - output_result["embeddings"] = result["embeddings"] - - if publisher: - publisher.info("asrx", f"ASRX_COMPLETE:{len(output_result['segments'])}") - - # Save output - output_result["_debug"] = _debug - with open(output_path, "w") as f: - json.dump(output_result, f, indent=2) - - if publisher: - publisher.complete("asrx", f"{len(output_result['segments'])} segments") - - print(f"[ASRX-Custom] Saved {len(output_result['segments'])} segments to {output_path}", file=sys.stderr) - - _cleanup(tmp_dir) - return output_result - - except Exception as e: - if publisher: - publisher.error("asrx", str(e)) - - import traceback - - traceback.print_exc() - - # Return empty result on error - output_result = {"language": None, "segments": []} - - with open(output_path, "w") as f: - json.dump(output_result, f, indent=2) - - if publisher: - publisher.complete("asrx", "0 segments") - - _cleanup(tmp_dir) - return output_result - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="ASRX Processor (Custom Implementation)" - ) - parser.add_argument("video_path", help="Path to video/audio file") - parser.add_argument("output_path", help="Path to output JSON file") - parser.add_argument("--uuid", help="UUID for Redis publishing", default="") - - args = parser.parse_args() - - if not Path(args.video_path).exists(): - print(f"Error: Video file not found: {args.video_path}") - sys.exit(1) - - result = process_asrx_custom(args.video_path, args.output_path, args.uuid) - - print("\n[Summary]") - print(f" Total segments: {len(result['segments'])}") - if "speaker_stats" in result: - print(f" Detected speakers: {len(result['speaker_stats'])}") - for speaker, stats in result["speaker_stats"].items(): - print(f" {speaker}: {stats['count']} segments") diff --git a/scripts/asrx_processor_simplified.py b/scripts/asrx_processor_simplified.py deleted file mode 100755 index deace63..0000000 --- a/scripts/asrx_processor_simplified.py +++ /dev/null @@ -1,177 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -ASRX 處理器 - 簡化版 -先做轉錄,說話人分離可選 -修復 PyTorch 2.6 兼容性問題 -""" - -# Fix for PyTorch 2.6+ compatibility - MUST be set before importing torch -import os -os.environ["TORCH_FORCE_WEIGHTS_ONLY_LOAD"] = "0" - -import sys -import json -import argparse -import signal -import subprocess - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from redis_publisher import RedisPublisher - - -def signal_handler(signum, frame): - print(f"ASRX: Received signal {signum}, exiting...") - sys.exit(1) - - -def has_audio_stream(video_path): - """Check if video file has audio stream using ffprobe.""" - try: - cmd = [ - "ffprobe", - "-v", - "error", - "-select_streams", - "a", - "-show_entries", - "stream=codec_type", - "-of", - "csv=p=0", - video_path, - ] - result = subprocess.run(cmd, capture_output=True, text=True, check=True) - return bool(result.stdout.strip()) - except subprocess.CalledProcessError: - return False - except FileNotFoundError: - print("WARNING: ffprobe not found, assuming audio exists") - return True - - -def process_asrx(video_path: str, output_path: str, uuid: str = "", skip_diarization: bool = True): - """ - Process video for speaker diarization using whisperx - - Args: - video_path: Path to video file - output_path: Path to output JSON - uuid: UUID for Redis progress - skip_diarization: Skip speaker diarization (only transcription) - """ - - signal.signal(signal.SIGTERM, signal_handler) - signal.signal(signal.SIGINT, signal_handler) - - publisher = RedisPublisher(uuid) if uuid else None - if publisher: - publisher.info("asrx", "ASRX_START") - - try: - import whisperx - import torch - except ImportError as e: - if publisher: - publisher.error("asrx", f"Missing dependency: {e}") - result = {"language": None, "segments": []} - if publisher: - publisher.complete("asrx", "0 segments") - with open(output_path, "w") as f: - json.dump(result, f, indent=2) - sys.exit(1) - - # Check for audio stream - if not has_audio_stream(video_path): - if publisher: - publisher.info("asrx", "No audio stream detected, skipping transcription") - output = {"language": "", "language_probability": 0.0, "segments": []} - with open(output_path, "w") as f: - json.dump(output, f, indent=2) - if publisher: - publisher.complete("asrx", "0 segments (no audio)") - sys.stderr.write("ASRX: No audio stream, skipping transcription\n") - sys.stderr.flush() - sys.exit(0) - - if publisher: - publisher.info("asrx", "ASRX_LOADING_MODEL") - - try: - # Load model - if publisher: - publisher.info("asrx", "Loading whisperx base model (this may take a while)...") - - model = whisperx.load_model("base", device="cpu", compute_type="int8") - - if publisher: - publisher.info("asrx", "ASRX_TRANSCRIBING") - - # Transcribe with language detection - result = model.transcribe(video_path) - - if publisher: - publisher.info("asrx", f"ASRX_LANGUAGE:{result.get('language', 'unknown')}") - - # Build output (without diarization for now) - segments = [] - for seg in result.get("segments", []): - text = seg.get("text", "").strip() - if text: - segments.append( - { - "start": seg.get("start", 0.0), - "end": seg.get("end", 0.0), - "text": text, - "speaker_id": None, # Will be added when diarization is enabled - } - ) - - output_result = { - "language": result.get("language"), - "language_probability": result.get("language_probability", 0), - "segments": segments, - "diarization_enabled": not skip_diarization - } - - if publisher: - publisher.complete("asrx", f"{len(segments)} segments") - - with open(output_path, "w") as f: - json.dump(output_result, f, indent=2, ensure_ascii=False) - - sys.stderr.write( - f"ASRX: Transcription complete, {len(segments)} segments written to {output_path}\n" - ) - sys.stderr.flush() - sys.exit(0) - - except Exception as e: - if publisher: - publisher.error("asrx", f"Error: {e}") - import traceback - traceback.print_exc() - result = {"language": None, "segments": [], "error": str(e)} - if publisher: - publisher.complete("asrx", "0 segments (error)") - with open(output_path, "w") as f: - json.dump(result, f, indent=2) - sys.exit(1) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="ASRX Speaker Diarization (Simplified)") - parser.add_argument("video_path", help="Path to video file") - parser.add_argument("output_path", help="Output JSON path") - parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="") - parser.add_argument( - "--skip-diarization", - action="store_true", - help="Skip speaker diarization (only transcription)" - ) - args = parser.parse_args() - - process_asrx( - args.video_path, - args.output_path, - args.uuid, - args.skip_diarization - ) diff --git a/scripts/asrx_processor_v2.py b/scripts/asrx_processor_v2.py deleted file mode 100755 index 61a4faf..0000000 --- a/scripts/asrx_processor_v2.py +++ /dev/null @@ -1,212 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -ASRX 處理器 v2 - 說話人分離 -使用 whisperx 進行轉錄和說話人分離 -需要 PyTorch 2.5.0 + torchvision 0.20.0 + torchaudio 2.5.0 -""" - -# Fix for PyTorch 2.5 compatibility -import os -os.environ["TORCH_FORCE_WEIGHTS_ONLY_LOAD"] = "0" - -import sys -import json -import argparse -import signal -import subprocess - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from redis_publisher import RedisPublisher - - -def signal_handler(signum, frame): - print(f"ASRX: Received signal {signum}, exiting...") - sys.exit(1) - - -def has_audio_stream(video_path): - """Check if video file has audio stream using ffprobe.""" - try: - cmd = [ - "ffprobe", - "-v", - "error", - "-select_streams", - "a", - "-show_entries", - "stream=codec_type", - "-of", - "csv=p=0", - video_path, - ] - result = subprocess.run(cmd, capture_output=True, text=True, check=True) - return bool(result.stdout.strip()) - except subprocess.CalledProcessError: - return False - except FileNotFoundError: - print("WARNING: ffprobe not found, assuming audio exists") - return True - - -def process_asrx(video_path: str, output_path: str, uuid: str = "", skip_diarization: bool = False): - """ - Process video for speaker diarization using whisperx - - Args: - video_path: Path to video file - output_path: Path to output JSON - uuid: UUID for Redis progress - skip_diarization: Skip speaker diarization (only transcription) - """ - - signal.signal(signal.SIGTERM, signal_handler) - signal.signal(signal.SIGINT, signal_handler) - - publisher = RedisPublisher(uuid) if uuid else None - if publisher: - publisher.info("asrx", "ASRX_START") - - # Check for audio stream - if not has_audio_stream(video_path): - if publisher: - publisher.info("asrx", "No audio stream detected, skipping transcription") - output = {"language": "", "language_probability": 0.0, "segments": []} - with open(output_path, "w") as f: - json.dump(output, f, indent=2) - if publisher: - publisher.complete("asrx", "0 segments (no audio)") - sys.stderr.write("ASRX: No audio stream, skipping transcription\n") - sys.stderr.flush() - sys.exit(0) - - if publisher: - publisher.info("asrx", "ASRX_LOADING_MODEL") - - try: - import whisperx - import torch - except ImportError as e: - if publisher: - publisher.error("asrx", f"Missing dependency: {e}") - result = {"language": None, "segments": [], "error": str(e)} - if publisher: - publisher.complete("asrx", "0 segments") - with open(output_path, "w") as f: - json.dump(result, f, indent=2) - sys.exit(1) - - try: - # Load model - if publisher: - publisher.info("asrx", "Loading whisperx base model (this may take a while)...") - - model = whisperx.load_model("base", device="cpu", compute_type="int8") - - if publisher: - publisher.info("asrx", "ASRX_TRANSCRIBING") - - # Transcribe with language detection - result = model.transcribe(video_path) - - if publisher: - publisher.info("asrx", f"ASRX_LANGUAGE:{result.get('language', 'unknown')}") - - # Align timestamps - if publisher: - publisher.info("asrx", "ASRX_ALIGNING_TIMESTAMPS") - - model_a, metadata = whisperx.load_align_model( - language_code=result["language"], - device="cpu" - ) - result = whisperx.align( - result["segments"], - model_a, - metadata, - video_path, - device="cpu" - ) - - # Diarization (speaker segmentation) - if not skip_diarization: - if publisher: - publisher.info("asrx", "ASRX_DIARIZATION") - - try: - diarize_model = whisperx.DiarizationPipeline(use_auth_token=None) - diarize_segments = diarize_model(video_path) - - # Assign speaker labels - result = whisperx.assign_word_speakers(diarize_segments, result) - - if publisher: - publisher.info("asrx", "Diarization completed") - except Exception as e: - if publisher: - publisher.info("asrx", f"Diarization skipped: {e}") - sys.stderr.write(f"ASRX: Diarization failed: {e}\n") - - # Build output - segments = [] - for seg in result.get("segments", []): - text = seg.get("text", "").strip() - if text: - segments.append( - { - "start": seg.get("start", 0.0), - "end": seg.get("end", 0.0), - "text": text, - "speaker_id": seg.get("speaker", None), - } - ) - - output_result = { - "language": result.get("language"), - "language_probability": result.get("language_probability", 0), - "segments": segments, - "diarization_enabled": not skip_diarization - } - - if publisher: - publisher.complete("asrx", f"{len(segments)} segments") - - with open(output_path, "w") as f: - json.dump(output_result, f, indent=2, ensure_ascii=False) - - sys.stderr.write( - f"ASRX: Transcription complete, {len(segments)} segments written to {output_path}\n" - ) - sys.stderr.flush() - sys.exit(0) - - except Exception as e: - if publisher: - publisher.error("asrx", f"Error: {e}") - import traceback - traceback.print_exc() - result = {"language": None, "segments": [], "error": str(e)} - if publisher: - publisher.complete("asrx", "0 segments (error)") - with open(output_path, "w") as f: - json.dump(result, f, indent=2) - sys.exit(1) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="ASRX Speaker Diarization v2") - parser.add_argument("video_path", help="Path to video file") - parser.add_argument("output_path", help="Output JSON path") - parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="") - parser.add_argument( - "--skip-diarization", - action="store_true", - help="Skip speaker diarization (only transcription)" - ) - args = parser.parse_args() - - process_asrx( - args.video_path, - args.output_path, - args.uuid, - args.skip_diarization - ) diff --git a/scripts/asrx_processor_v2_noalign.py b/scripts/asrx_processor_v2_noalign.py deleted file mode 100755 index 85c9664..0000000 --- a/scripts/asrx_processor_v2_noalign.py +++ /dev/null @@ -1,184 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -ASRX 處理器 v2 - 快速版(跳過對齊) -使用 whisperx 進行轉錄和說話人分離 -跳過時間戳對齊以避開 PyTorch 版本問題 -""" - -import os -os.environ["TORCH_FORCE_WEIGHTS_ONLY_LOAD"] = "0" - -import sys -import json -import argparse -import signal -import subprocess - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from redis_publisher import RedisPublisher - - -def signal_handler(signum, frame): - print(f"ASRX: Received signal {signum}, exiting...") - sys.exit(1) - - -def has_audio_stream(video_path): - """Check if video file has audio stream using ffprobe.""" - try: - cmd = [ - "ffprobe", - "-v", - "error", - "-select_streams", - "a", - "-show_entries", - "stream=codec_type", - "-of", - "csv=p=0", - video_path, - ] - result = subprocess.run(cmd, capture_output=True, text=True, check=True) - return bool(result.stdout.strip()) - except subprocess.CalledProcessError: - return False - except FileNotFoundError: - print("WARNING: ffprobe not found, assuming audio exists") - return True - - -def process_asrx(video_path: str, output_path: str, uuid: str = ""): - """ - Process video for speaker diarization using whisperx (no alignment) - - Args: - video_path: Path to video file - output_path: Path to output JSON - uuid: UUID for Redis progress - """ - - signal.signal(signal.SIGTERM, signal_handler) - signal.signal(signal.SIGINT, signal_handler) - - publisher = RedisPublisher(uuid) if uuid else None - if publisher: - publisher.info("asrx", "ASRX_START") - - # Check for audio stream - if not has_audio_stream(video_path): - if publisher: - publisher.info("asrx", "No audio stream detected") - output = {"language": "", "language_probability": 0.0, "segments": []} - with open(output_path, "w") as f: - json.dump(output, f, indent=2) - if publisher: - publisher.complete("asrx", "0 segments (no audio)") - sys.exit(0) - - if publisher: - publisher.info("asrx", "ASRX_LOADING_MODEL") - - try: - import whisperx - import torch - except ImportError as e: - if publisher: - publisher.error("asrx", f"Missing dependency: {e}") - result = {"language": None, "segments": [], "error": str(e)} - if publisher: - publisher.complete("asrx", "0 segments") - with open(output_path, "w") as f: - json.dump(result, f, indent=2) - sys.exit(1) - - try: - # Load model - if publisher: - publisher.info("asrx", "Loading whisperx base model...") - - model = whisperx.load_model("base", device="cpu", compute_type="int8") - - if publisher: - publisher.info("asrx", "ASRX_TRANSCRIBING") - - # Transcribe with language detection - result = model.transcribe(video_path) - - if publisher: - publisher.info("asrx", f"ASRX_LANGUAGE:{result.get('language', 'unknown')}") - - # Skip alignment (requires PyTorch 2.6+) - # Go directly to diarization - if publisher: - publisher.info("asrx", "ASRX_DIARIZATION") - - try: - diarize_model = whisperx.DiarizationPipeline(use_auth_token=None) - diarize_segments = diarize_model(video_path) - - # Assign speaker labels - result = whisperx.assign_word_speakers(diarize_segments, result) - - if publisher: - publisher.info("asrx", "Diarization completed") - except Exception as e: - if publisher: - publisher.info("asrx", f"Diarization info: {e}") - sys.stderr.write(f"ASRX: Diarization note: {e}\n") - - # Build output - segments = [] - for seg in result.get("segments", []): - text = seg.get("text", "").strip() - if text: - segments.append( - { - "start": seg.get("start", 0.0), - "end": seg.get("end", 0.0), - "text": text, - "speaker_id": seg.get("speaker", None), - } - ) - - output_result = { - "language": result.get("language"), - "language_probability": result.get("language_probability", 0), - "segments": segments, - "diarization_enabled": True, - "alignment_enabled": False, - "note": "Alignment skipped due to PyTorch version compatibility" - } - - if publisher: - publisher.complete("asrx", f"{len(segments)} segments") - - with open(output_path, "w") as f: - json.dump(output_result, f, indent=2, ensure_ascii=False) - - sys.stderr.write( - f"ASRX: Transcription complete, {len(segments)} segments written to {output_path}\n" - ) - sys.stderr.flush() - sys.exit(0) - - except Exception as e: - if publisher: - publisher.error("asrx", f"Error: {e}") - import traceback - traceback.print_exc() - result = {"language": None, "segments": [], "error": str(e)} - if publisher: - publisher.complete("asrx", "0 segments (error)") - with open(output_path, "w") as f: - json.dump(result, f, indent=2) - sys.exit(1) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="ASRX Speaker Diarization v2 (No Alignment)") - parser.add_argument("video_path", help="Path to video file") - parser.add_argument("output_path", help="Output JSON path") - parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="") - args = parser.parse_args() - - process_asrx(args.video_path, args.output_path, args.uuid) diff --git a/scripts/asrx_processor_v2_transcribe.py b/scripts/asrx_processor_v2_transcribe.py deleted file mode 100755 index a6e92d7..0000000 --- a/scripts/asrx_processor_v2_transcribe.py +++ /dev/null @@ -1,165 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -ASRX 處理器 v2 - 轉錄版 -使用 whisperx 進行轉錄(不含說話人分離) -說話人分離需要額外安裝 pyannote.audio 並配置 HuggingFace token -""" - -import os -os.environ["TORCH_FORCE_WEIGHTS_ONLY_LOAD"] = "0" - -import sys -import json -import argparse -import signal -import subprocess - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from redis_publisher import RedisPublisher - - -def signal_handler(signum, frame): - print(f"ASRX: Received signal {signum}, exiting...") - sys.exit(1) - - -def has_audio_stream(video_path): - """Check if video file has audio stream using ffprobe.""" - try: - cmd = [ - "ffprobe", - "-v", - "error", - "-select_streams", - "a", - "-show_entries", - "stream=codec_type", - "-of", - "csv=p=0", - video_path, - ] - result = subprocess.run(cmd, capture_output=True, text=True, check=True) - return bool(result.stdout.strip()) - except subprocess.CalledProcessError: - return False - except FileNotFoundError: - print("WARNING: ffprobe not found, assuming audio exists") - return True - - -def process_asrx(video_path: str, output_path: str, uuid: str = ""): - """ - Process video for transcription using whisperx - - Args: - video_path: Path to video file - output_path: Path to output JSON - uuid: UUID for Redis progress - """ - - signal.signal(signal.SIGTERM, signal_handler) - signal.signal(signal.SIGINT, signal_handler) - - publisher = RedisPublisher(uuid) if uuid else None - if publisher: - publisher.info("asrx", "ASRX_START") - - # Check for audio stream - if not has_audio_stream(video_path): - if publisher: - publisher.info("asrx", "No audio stream detected") - output = {"language": "", "language_probability": 0.0, "segments": []} - with open(output_path, "w") as f: - json.dump(output, f, indent=2) - if publisher: - publisher.complete("asrx", "0 segments (no audio)") - sys.exit(0) - - if publisher: - publisher.info("asrx", "ASRX_LOADING_MODEL") - - try: - import whisperx - import torch - except ImportError as e: - if publisher: - publisher.error("asrx", f"Missing dependency: {e}") - result = {"language": None, "segments": [], "error": str(e)} - if publisher: - publisher.complete("asrx", "0 segments") - with open(output_path, "w") as f: - json.dump(result, f, indent=2) - sys.exit(1) - - try: - # Load model - if publisher: - publisher.info("asrx", "Loading whisperx base model...") - - model = whisperx.load_model("base", device="cpu", compute_type="int8") - - if publisher: - publisher.info("asrx", "ASRX_TRANSCRIBING") - - # Transcribe with language detection - result = model.transcribe(video_path) - - if publisher: - publisher.info("asrx", f"ASRX_LANGUAGE:{result.get('language', 'unknown')}") - - # Build output (without alignment and diarization due to PyTorch version) - segments = [] - for seg in result.get("segments", []): - text = seg.get("text", "").strip() - if text: - segments.append( - { - "start": seg.get("start", 0.0), - "end": seg.get("end", 0.0), - "text": text, - "speaker_id": None, # Requires pyannote.audio + HuggingFace token - } - ) - - output_result = { - "language": result.get("language"), - "language_probability": result.get("language_probability", 0), - "segments": segments, - "diarization_enabled": False, - "alignment_enabled": False, - "note": "PyTorch 2.5.0 compatibility - alignment and diarization require additional setup" - } - - if publisher: - publisher.complete("asrx", f"{len(segments)} segments") - - with open(output_path, "w") as f: - json.dump(output_result, f, indent=2, ensure_ascii=False) - - sys.stderr.write( - f"ASRX: Transcription complete, {len(segments)} segments written to {output_path}\n" - ) - sys.stderr.flush() - sys.exit(0) - - except Exception as e: - if publisher: - publisher.error("asrx", f"Error: {e}") - import traceback - traceback.print_exc() - result = {"language": None, "segments": [], "error": str(e)} - if publisher: - publisher.complete("asrx", "0 segments (error)") - with open(output_path, "w") as f: - json.dump(result, f, indent=2) - sys.exit(1) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="ASRX Transcription (PyTorch 2.5.0)") - parser.add_argument("video_path", help="Path to video file") - parser.add_argument("output_path", help="Output JSON path") - parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="") - args = parser.parse_args() - - process_asrx(args.video_path, args.output_path, args.uuid) diff --git a/scripts/asrx_self/integrate_face_asrx_speaker.py b/scripts/asrx_self/integrate_face_asrx_speaker.py deleted file mode 100755 index bc4141a..0000000 --- a/scripts/asrx_self/integrate_face_asrx_speaker.py +++ /dev/null @@ -1,178 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -整合 Face + ASRX 說話人分離(版本 3 - 修復 face_detected 檢查) -""" - -import json -import argparse -from pathlib import Path -from typing import Dict, List - - -def load_json(path: str): - """載入 JSON 文件""" - with open(path, 'r', encoding='utf-8') as f: - return json.load(f) - - -def match_face_with_speaker_v3(face_data: Dict, asrx_data: Dict, - time_threshold: float = 3.0) -> List[Dict]: - """ - 匹配人臉與說話人(版本 3 - 修復版) - - 修復:Face 數據沒有 face_detected 欄位,改用 faces 列表是否為空判斷 - """ - face_frames = face_data.get('frames', []) - asrx_segments = asrx_data.get('segments', []) - - # 將 Face 幀按時間排序 - face_frames_sorted = sorted(face_frames, key=lambda x: x.get('timestamp', 0)) - - print(f" Face frames: {len(face_frames_sorted)}") - print(f" ASRX segments: {len(asrx_segments)}") - - # 匹配 - integrated = [] - - for i, seg in enumerate(asrx_segments): - start = seg['start'] - end = seg['end'] - speaker = seg['speaker'] - mid_time = (start + end) / 2 - - # 找到時間範圍內的人臉 - faces_in_range = [] - for frame in face_frames_sorted: - ts = frame.get('timestamp', 0) - - # 檢查是否在時間範圍內 - if start - time_threshold <= ts <= end + time_threshold: - # 檢查是否有人臉(faces 列表不為空) - faces = frame.get('faces', []) - if faces and len(faces) > 0: - faces_in_range.append({ - 'timestamp': ts, - 'faces': faces, - 'distance_from_mid': abs(ts - mid_time) - }) - - # 選擇最接近片段中間的人臉 - if faces_in_range: - faces_in_range.sort(key=lambda x: x['distance_from_mid']) - best_face = faces_in_range[0] - else: - best_face = None - - # 建立整合結果 - integrated.append({ - 'start': start, - 'end': end, - 'duration': seg.get('duration', end - start), - 'speaker': speaker, - 'has_face': best_face is not None, - 'face_timestamp': best_face['timestamp'] if best_face else None, - 'face_location': best_face['faces'][0] if best_face and best_face['faces'] else None, - 'face_count_in_range': len(faces_in_range) - }) - - # 進度顯示 - if (i + 1) % 200 == 0: - print(f" Processed {i+1}/{len(asrx_segments)} segments...") - - return integrated - - -def analyze_speaker_face(integrated: List[Dict]): - """分析說話人與人臉的對應""" - speaker_stats = {} - - for item in integrated: - speaker = item['speaker'] - if speaker not in speaker_stats: - speaker_stats[speaker] = { - 'total_segments': 0, - 'with_face': 0, - 'without_face': 0, - 'total_duration': 0 - } - - speaker_stats[speaker]['total_segments'] += 1 - speaker_stats[speaker]['total_duration'] += item['duration'] - - if item['has_face']: - speaker_stats[speaker]['with_face'] += 1 - else: - speaker_stats[speaker]['without_face'] += 1 - - return speaker_stats - - -def main(): - parser = argparse.ArgumentParser(description='整合 Face + ASRX 說話人') - parser.add_argument('face_json', help='Face 檢測結果 JSON') - parser.add_argument('asrx_json', help='ASRX 說話人分離 JSON') - parser.add_argument('-o', '--output', help='輸出整合結果 JSON') - parser.add_argument('--threshold', type=float, default=3.0, - help='時間閾值(秒)') - parser.add_argument('--stats', action='store_true', help='只显示統計') - - args = parser.parse_args() - - # 載入數據 - print(f"[Load] Face: {args.face_json}") - face_data = load_json(args.face_json) - - print(f"[Load] ASRX: {args.asrx_json}") - asrx_data = load_json(args.asrx_json) - - # 匹配 - print(f"\n[Match] Matching faces with speakers (threshold={args.threshold}s)...") - integrated = match_face_with_speaker_v3(face_data, asrx_data, args.threshold) - - # 分析 - print("\n[Analyze] Analyzing speaker-face correspondence...") - speaker_stats = analyze_speaker_face(integrated) - - # 顯示統計 - print(f"\n{'='*70}") - print("說話人 - 人臉對應統計") - print(f"{'='*70}") - - total_segments = len(integrated) - total_with_face = sum(1 for item in integrated if item['has_face']) - - for speaker, stats in sorted(speaker_stats.items()): - with_face_pct = stats['with_face'] / stats['total_segments'] * 100 if stats['total_segments'] > 0 else 0 - print(f"\n🔊 {speaker}:") - print(f" 總片段:{stats['total_segments']}") - print(f" 有人臉:{stats['with_face']} ({with_face_pct:.1f}%)") - print(f" 無人臉:{stats['without_face']}") - print(f" 總時長:{stats['total_duration']:.1f}s ({stats['total_duration']/60:.1f}分鐘)") - - print(f"\n{'='*70}") - print(f"總計:{total_segments} 片段,{total_with_face} 片段有人臉 ({total_with_face/total_segments*100:.1f}%)") - print(f"{'='*70}") - - # 保存結果 - if args.output: - output_path = Path(args.output) - output_path.parent.mkdir(parents=True, exist_ok=True) - - result = { - 'face_source': str(args.face_json), - 'asrx_source': str(args.asrx_json), - 'time_threshold': args.threshold, - 'integrated_segments': integrated, - 'speaker_stats': speaker_stats - } - - with open(output_path, 'w', encoding='utf-8') as f: - json.dump(result, f, indent=2, ensure_ascii=False) - - print(f"\n[Save] Results saved to: {output_path}") - - return integrated, speaker_stats - - -if __name__ == "__main__": - main() diff --git a/scripts/asrx_self/main.py b/scripts/asrx_self/main.py deleted file mode 100644 index e26a419..0000000 --- a/scripts/asrx_self/main.py +++ /dev/null @@ -1,268 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Self-implemented ASRX - 自實作說話人分離系統 -基於聲紋嵌入 + 譜聚類 - -技術架構: -1. VAD (Silero VAD) - 語音活動檢測 -2. Speaker Encoder (ECAPA-TDNN) - 聲紋特徵提取 -3. Spectral Clustering - 譜聚類 -4. Post-processing - 後處理 - -流程: -音頻 → VAD → 語音片段 → 聲紋嵌入 → 相似度矩陣 → 譜聚類 → 說話人 ID -""" - -import sys -import json -import time -from pathlib import Path - -# 導入自定義模組 -from vad import load_vad_model, extract_speech_segments -from speaker_encoder import ( - load_speaker_encoder, - extract_speaker_embeddings_batch, - compute_similarity_matrix, - normalize_embeddings, -) -from speaker_cluster import spectral_clustering_speaker, smooth_speaker_labels - - -class SelfASRX: - """ - 自實作說話人分離系統 - """ - - def __init__(self): - """初始化模型""" - print("[SelfASRX] Initializing models...") - - # 載入 VAD 模型 - print("[SelfASRX] Loading VAD model (Silero)...") - self.vad_model, self.vad_utils = load_vad_model() - - # 載入聲紋模型 - print("[SelfASRX] Loading speaker encoder (ECAPA-TDNN)...") - self.speaker_encoder = load_speaker_encoder() - - print("[SelfASRX] Models loaded successfully") - - def process( - self, - audio_path, - output_path=None, - min_speech_duration_ms=500, - n_speakers=None, - smooth_window=5, - ): - """ - 處理音頻文件進行說話人分離 - - Args: - audio_path: 音頻文件路徑 - output_path: 輸出 JSON 路徑(可選) - min_speech_duration_ms: 最小語音持續時間 - n_speakers: 說話人數量(None=自動估計) - smooth_window: 平滑窗口大小 - - Returns: - result: 說話人分離結果 - """ - start_time = time.time() - print(f"\n[SelfASRX] Processing: {audio_path}") - print("=" * 60) - - # 步驟 1: VAD - 語音活動檢測 - print("\n[Step 1] Voice Activity Detection...") - step1_start = time.time() - - speech_segments, wav, sample_rate = extract_speech_segments( - audio_path, - self.vad_model, - self.vad_utils, - min_speech_duration_ms=min_speech_duration_ms, - ) - - step1_time = time.time() - step1_start - print(f" Speech segments: {len(speech_segments)}") - print(f" Total duration: {len(wav) / sample_rate:.2f}s") - print(f" VAD time: {step1_time:.2f}s") - - if len(speech_segments) == 0: - print("[SelfASRX] No speech detected!") - return {"error": "No speech detected", "segments": []} - - # 步驟 2: 聲紋特徵提取 - print("\n[Step 2] Speaker embedding extraction...") - step2_start = time.time() - - # 提取語音片段音頻 - audio_segments = [] - for start_sec, end_sec in speech_segments: - start_sample = int(start_sec * sample_rate) - end_sample = int(end_sec * sample_rate) - audio_segments.append(wav[start_sample:end_sample]) - - # 批量提取嵌入 - embeddings = extract_speaker_embeddings_batch( - self.speaker_encoder, audio_segments, sample_rate - ) - - # 正規化 - embeddings = normalize_embeddings(embeddings) - - step2_time = time.time() - step2_start - print(f" Embedding shape: {embeddings.shape}") - print(f" Embedding time: {step2_time:.2f}s") - - # 步驟 3: 計算相似度矩陣 - print("\n[Step 3] Computing similarity matrix...") - step3_start = time.time() - - similarity_matrix = compute_similarity_matrix(embeddings, method="cosine") - - step3_time = time.time() - step3_start - print(f" Similarity matrix shape: {similarity_matrix.shape}") - print(f" Similarity time: {step3_time:.2f}s") - - # 步驟 4: 譜聚類 - print("\n[Step 4] Spectral clustering...") - step4_start = time.time() - - speaker_labels, estimated_n_speakers = spectral_clustering_speaker( - similarity_matrix, n_speakers=n_speakers, auto_estimate=(n_speakers is None) - ) - - # 平滑標籤 - if smooth_window > 1: - speaker_labels = smooth_speaker_labels( - speaker_labels, window_size=smooth_window - ) - - step4_time = time.time() - step4_start - print(f" Estimated speakers: {estimated_n_speakers}") - print(f" Clustering time: {step4_time:.2f}s") - - # 步驟 5: 建立輸出結果 - print("\n[Step 5] Building output...") - - result = { - "audio_path": str(audio_path), - "total_duration": len(wav) / sample_rate, - "n_speech_segments": len(speech_segments), - "n_speakers": int(estimated_n_speakers), - "segments": [], - } - - for i, ((start, end), label) in enumerate(zip(speech_segments, speaker_labels)): - result["segments"].append( - { - "index": i, - "start": round(start, 3), - "end": round(end, 3), - "duration": round(end - start, 3), - "speaker": f"SPEAKER_{int(label)}", - } - ) - - # 統計每個說話人的總時長 - speaker_stats = {} - for seg in result["segments"]: - speaker = seg["speaker"] - if speaker not in speaker_stats: - speaker_stats[speaker] = {"count": 0, "duration": 0} - speaker_stats[speaker]["count"] += 1 - speaker_stats[speaker]["duration"] += seg["duration"] - - result["speaker_stats"] = speaker_stats - - total_time = time.time() - start_time - result["processing_time"] = round(total_time, 2) - result["realtime_factor"] = round(result["total_duration"] / total_time, 2) - - print("\n[SelfASRX] Processing completed!") - print(f" Total time: {total_time:.2f}s") - print(f" Realtime factor: {result['realtime_factor']:.2f}x") - print(f" Detected speakers: {estimated_n_speakers}") - - # 保存結果 - if output_path: - output_path = Path(output_path) - output_path.parent.mkdir(parents=True, exist_ok=True) - - with open(output_path, "w", encoding="utf-8") as f: - json.dump(result, f, indent=2, ensure_ascii=False) - - print(f" Results saved to: {output_path}") - - print("=" * 60) - - return result - - -def main(): - """主函數""" - import argparse - - parser = argparse.ArgumentParser( - description="Self-implemented ASRX - Speaker Diarization" - ) - parser.add_argument("audio_path", help="Path to audio file") - parser.add_argument("-o", "--output", help="Output JSON path") - parser.add_argument( - "--min-speech-duration", - type=int, - default=500, - help="Minimum speech duration in ms (default: 500)", - ) - parser.add_argument( - "--n-speakers", - type=int, - default=None, - help="Number of speakers (default: auto-estimate)", - ) - parser.add_argument( - "--smooth-window", - type=int, - default=5, - help="Smoothing window size (default: 5)", - ) - - args = parser.parse_args() - - # 檢查文件是否存在 - if not Path(args.audio_path).exists(): - print(f"Error: Audio file not found: {args.audio_path}") - sys.exit(1) - - # 創建 ASRX 實例並處理 - asrx = SelfASRX() - result = asrx.process( - args.audio_path, - args.output, - min_speech_duration_ms=args.min_speech_duration, - n_speakers=args.n_speakers, - smooth_window=args.smooth_window, - ) - - # 顯示結果摘要 - if "error" not in result: - print("\n[Summary]") - print(f" Audio duration: {result['total_duration']:.2f}s") - print(f" Speech segments: {result['n_speech_segments']}") - print(f" Detected speakers: {result['n_speakers']}") - print(f" Processing time: {result['processing_time']:.2f}s") - print(f" Realtime factor: {result['realtime_factor']:.2f}x") - - print("\n[Speaker Statistics]") - for speaker, stats in result["speaker_stats"].items(): - pct = stats["duration"] / result["total_duration"] * 100 - print( - f" {speaker}: {stats['count']} segments, " - + f"{stats['duration']:.2f}s ({pct:.1f}%)" - ) - - -if __name__ == "__main__": - main() diff --git a/scripts/asrx_self/main_fixed.py b/scripts/asrx_self/main_fixed.py index 34b498f..4def1a3 100755 --- a/scripts/asrx_self/main_fixed.py +++ b/scripts/asrx_self/main_fixed.py @@ -1,308 +1,728 @@ -#!/opt/homebrew/bin/python3.11 """ -Self-implemented ASRX - Fixed Version -使用魯棒的聚類算法 +SelfASRXFixed - 7 步 Hybrid Speaker Diarization Pipeline + +Pipeline: + 1. whisper.transcribe(full_audio) → rough segments + text + language + 2. VAD scan each rough segment → refined segments + 3. whisper per refined segment → {text, language, lang_prob} + 4. ECAPA-TDNN per refined segment → 192-dim embeddings + 5. AgglomerativeClustering → speaker_labels + 6. Store all embeddings in Qdrant (payload: file_uuid, speaker_id, text, ...) + 7. High-quality embeddings → gender classify + store reference in Qdrant """ import sys import json import time +import os import numpy as np from pathlib import Path +from urllib.request import Request, urlopen +from urllib.error import URLError -# 導入自定義模組 -from vad import load_vad_model, extract_speech_segments -from speaker_encoder import ( - load_speaker_encoder, - extract_speaker_embeddings_batch, - normalize_embeddings -) -from speaker_cluster_fixed import robust_speaker_clustering + +def _load_audio(path): + """載入音頻文件,回傳 (wav_numpy, sample_rate)""" + import soundfile as sf + wav, sr = sf.read(path) + if len(wav.shape) > 1: + wav = np.mean(wav, axis=1) + return wav, sr + + +def _load_whisper_model(size="small"): + from whisper_local import load_model + return load_model(size) + + +def _load_vad(): + from vad import load_vad_model + return load_vad_model() + + +def _load_speaker_encoder(): + from speaker_encoder import load_speaker_encoder + return load_speaker_encoder() + + +def _load_gender_classifier(): + try: + from speechbrain.inference.classifiers import EncoderClassifier + classifier = EncoderClassifier.from_hparams( + source="speechbrain/gender-recognition-ecapa", + run_opts={"device": "cpu"}, + ) + print("[Gender] Classifier loaded: speechbrain/gender-recognition-ecapa") + return classifier + except Exception as e: + print(f"[Gender] Classifier not available: {e}") + return None + + +def _ensure_speaker_collection(qdrant_url, api_key, collection): + """確認 Qdrant speaker collection 存在,不存在則建立 (dim=192, cosine)""" + try: + url = f"{qdrant_url}/collections/{collection}" + req = Request(url, method="GET", + headers={"api-key": api_key} if api_key else {}) + try: + urlopen(req) + return True + except URLError as e: + if getattr(e, "code", None) == 404: + body = json.dumps({ + "vectors": { + "size": 192, + "distance": "Cosine" + } + }).encode() + req = Request(url, data=body, method="PUT", + headers={"Content-Type": "application/json", + **({"api-key": api_key} if api_key else {})}) + urlopen(req) + print(f"[Qdrant] Created collection: {collection} (dim=192)") + return True + raise + except Exception as e: + print(f"[Qdrant] Cannot access Qdrant: {e}") + return False + + +def _qdrant_upsert(qdrant_url, api_key, collection, points): + """批量寫入 Qdrant points""" + try: + url = f"{qdrant_url}/collections/{collection}/points?wait=true" + body = json.dumps({"points": points}).encode() + headers = {"Content-Type": "application/json"} + if api_key: + headers["api-key"] = api_key + req = Request(url, data=body, headers=headers, method="PUT") + urlopen(req) + return True + except Exception as e: + print(f"[Qdrant] Upsert failed: {e}") + return False + + +def _hash_point_id(file_uuid, label): + """產生一致的 point ID""" + s = f"{file_uuid}_{label}" + return hash(s) & 0x7FFFFFFFFFFFFFFF + + +def _save_checkpoint(path: str, data: dict): + """原子寫入 checkpoint(先 .tmp 再 rename)""" + tmp = path + ".tmp" + Path(tmp).parent.mkdir(parents=True, exist_ok=True) + with open(tmp, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + os.replace(tmp, path) + + +def compute_embedding_quality(embeddings, labels): + """每個 embedding 到所屬 cluster centroid 的餘弦相似度""" + from sklearn.metrics.pairwise import cosine_similarity + unique_labels = set(labels) + centroids = {} + for label in unique_labels: + mask = labels == label + centroid = np.mean(embeddings[mask], axis=0) + norm = np.linalg.norm(centroid) + if norm > 0: + centroid = centroid / norm + centroids[label] = centroid + qualities = [] + for emb, label in zip(embeddings, labels): + sim = cosine_similarity([emb], [centroids[label]])[0][0] + qualities.append(sim) + return np.array(qualities) class SelfASRXFixed: - """自實作說話人分離系統(修復版)""" - + """7 步 Hybrid Speaker Diarization Pipeline""" + def __init__(self): - print("[SelfASRX-Fixed] Initializing models...") - - # 載入 VAD 模型 - print("[SelfASRX-Fixed] Loading VAD model (Silero)...") - self.vad_model, self.vad_utils = load_vad_model() - - # 載入聲紋模型 - print("[SelfASRX-Fixed] Loading speaker encoder (ECAPA-TDNN)...") - self.speaker_encoder = load_speaker_encoder() - - print("[SelfASRX-Fixed] Models loaded successfully") - - def process(self, audio_path, output_path=None, - min_speech_duration_ms=500, - n_speakers=None, - max_speakers=10): - """處理音頻文件""" - start_time = time.time() - print(f"\n[SelfASRX-Fixed] Processing: {audio_path}") - print("=" * 60) - - # 步驟 1: VAD - print("\n[Step 1] Voice Activity Detection...") - step1_start = time.time() - - speech_segments, wav, sample_rate = extract_speech_segments( - audio_path, self.vad_model, self.vad_utils, - min_speech_duration_ms=min_speech_duration_ms - ) - - step1_time = time.time() - step1_start - print(f" Speech segments: {len(speech_segments)}") - print(f" Total duration: {len(wav)/sample_rate:.2f}s") - print(f" VAD time: {step1_time:.2f}s") - - if len(speech_segments) == 0: - print("[SelfASRX-Fixed] No speech detected!") - return {"error": "No speech detected", "segments": []} - - # 步驟 2: 聲紋特徵提取 - print("\n[Step 2] Speaker embedding extraction...") - step2_start = time.time() - - # 提取語音片段音頻 - audio_segments = [] - for start_sec, end_sec in speech_segments: - start_sample = int(start_sec * sample_rate) - end_sample = int(end_sec * sample_rate) - audio_segments.append(wav[start_sample:end_sample]) - - # 批量提取嵌入 - embeddings = extract_speaker_embeddings_batch( - self.speaker_encoder, audio_segments, sample_rate - ) - - # 正規化 - embeddings = normalize_embeddings(embeddings) - - step2_time = time.time() - step2_start - print(f" Embedding shape: {embeddings.shape}") - print(f" Embedding time: {step2_time:.2f}s") - - # 步驟 3: 魯棒聚類 - print("\n[Step 3] Robust speaker clustering...") - step3_start = time.time() - - speaker_labels, estimated_n_speakers = robust_speaker_clustering( - embeddings, - n_speakers=n_speakers, - max_speakers=max_speakers - ) - - step3_time = time.time() - step3_start - print(f" Clustering time: {step3_time:.2f}s") - - # 步驟 4: 建立輸出 - print("\n[Step 4] Building output...") - - result = { - "audio_path": str(audio_path), - "total_duration": len(wav) / sample_rate, - "n_speech_segments": len(speech_segments), - "n_speakers": int(estimated_n_speakers), - "segments": [] - } - - for i, ((start, end), label) in enumerate(zip(speech_segments, speaker_labels)): - result["segments"].append({ - "index": i, - "start": round(start, 3), - "end": round(end, 3), - "duration": round(end - start, 3), - "speaker": f"SPEAKER_{int(label)}" - }) - - # 統計每個說話人的總時長 - speaker_stats = {} - for seg in result["segments"]: - speaker = seg["speaker"] - if speaker not in speaker_stats: - speaker_stats[speaker] = {"count": 0, "duration": 0} - speaker_stats[speaker]["count"] += 1 - speaker_stats[speaker]["duration"] += seg["duration"] - - result["speaker_stats"] = speaker_stats - - total_time = time.time() - start_time - result["processing_time"] = round(total_time, 2) - result["realtime_factor"] = round(result["total_duration"] / total_time, 2) - - print("\n[SelfASRX-Fixed] Processing completed!") - print(f" Total time: {total_time:.2f}s") - print(f" Realtime factor: {result['realtime_factor']:.2f}x") - print(f" Detected speakers: {estimated_n_speakers}") - - # 保存結果 - if output_path: - output_path = Path(output_path) - output_path.parent.mkdir(parents=True, exist_ok=True) - - with open(output_path, 'w', encoding='utf-8') as f: - json.dump(result, f, indent=2, ensure_ascii=False) - - print(f" Results saved to: {output_path}") - - print("=" * 60) - - return result + print("[SelfASRX] Initializing models...") + print("[SelfASRX] Loading whisper model...") + self.whisper = _load_whisper_model("small") + + print("[SelfASRX] Loading VAD model (Silero)...") + self.vad_model, self.vad_utils = _load_vad() + + print("[SelfASRX] Loading speaker encoder (ECAPA-TDNN)...") + self.speaker_encoder = _load_speaker_encoder() + + print("[SelfASRX] Loading gender classifier...") + self.gender_classifier = _load_gender_classifier() + + # Qdrant 設定 + self.qdrant_url = os.environ.get("QDRANT_URL", "http://localhost:6333") + self.qdrant_api_key = os.environ.get("QDRANT_API_KEY", "") + schema = os.environ.get("DATABASE_SCHEMA", "public") + self.qdrant_collection = os.environ.get( + "QDRANT_SPEAKER_COLLECTION", + f"momentry_{schema}_speaker" + ) + self._qdrant_ok = False + + print("[SelfASRX] Models loaded successfully") + + def process(self, audio_path, output_path=None, file_uuid=None, + max_speakers=10, quality_threshold=0.85, + checkpoint_path=None): + """7 步 speaker diarization pipeline - def process_with_segments(self, audio_path, asr_segments, output_path=None): - """ - 使用 ASR segment 邊界進行 speaker diarization,取代 VAD 步驟。 - Args: - audio_path: 音頻文件路徑(WAV) - asr_segments: ASR segment 列表,每個包含 start/end(秒) - output_path: 輸出 JSON 路徑(可選) + audio_path: 音頻文件路徑 (WAV 16kHz mono) + output_path: 輸出 JSON 路徑 (可選) + file_uuid: 檔案 UUID (用於 Qdrant 儲存) + max_speakers: 最大說話人數 + quality_threshold: 高品質聲紋門檻 (0-1) + checkpoint_path: Step 3 完成後儲存 checkpoint 路徑 + + Returns: + dict: segments, speaker_stats, n_speakers, total_duration, references """ start_time = time.time() - print(f"\n[SelfASRX-Fixed] Processing with {len(asr_segments)} ASR segments: {audio_path}") + print(f"\n[SelfASRX] Processing: {audio_path}") print("=" * 60) - # 載入完整音頻 - import soundfile as sf - wav, sample_rate = sf.read(audio_path) - if len(wav.shape) > 1: - wav = np.mean(wav, axis=1) # 轉 mono - print(f" Audio loaded: {len(wav)/sample_rate:.2f}s, {sample_rate}Hz") + # 載入音頻 + wav, sample_rate = _load_audio(audio_path) + total_duration = len(wav) / sample_rate + print(f" Audio: {total_duration:.2f}s, {sample_rate}Hz") - # 使用 ASR segments 取代 VAD (audio处理用time) - speech_segments = [(s["start_time"], s["end_time"]) for s in asr_segments] - print(f" Speech segments from ASR: {len(speech_segments)}") + # ── Step 1: whisper 粗略定位 (faster-whisper) ── + print("\n[Step 1] Initial whisper transcription...") + t1 = time.time() + seg_gen, info = self.whisper.transcribe(audio_path) + rough_segments = [] + for seg in seg_gen: + rough_segments.append({"start": seg.start, "end": seg.end, "text": seg.text}) + language = info.language if info else None + print(f" Rough segments: {len(rough_segments)}") + print(f" Language: {language}") + print(f" Step 1 time: {time.time() - t1:.2f}s") - if len(speech_segments) == 0: - print("[SelfASRX-Fixed] No ASR segments provided!") - return {"error": "No ASR segments", "segments": []} + if not rough_segments: + print("[SelfASRX] No speech detected by whisper!") + return {"error": "No speech detected", "segments": []} - # 提取語音片段 - audio_segments = [] - for start_sec, end_sec in speech_segments: - start_sample = int(start_sec * sample_rate) - end_sample = int(end_sec * sample_rate) - if start_sample >= len(wav): + # ── Step 2: VAD scan 每個 rough segment 細切 ── + print("\n[Step 2] VAD scan for refined segmentation...") + t2 = time.time() + refined_segments = [] + for seg in rough_segments: + s = seg["start"] + e = seg["end"] + sub = self._vad_scan_segment(wav, sample_rate, s, e) + if sub: + refined_segments.extend(sub) + else: + refined_segments.append((s, e)) + print(f" Refined segments: {len(refined_segments)}") + print(f" Step 2 time: {time.time() - t2:.2f}s") + + if not refined_segments: + return {"error": "No segments after VAD scan", "segments": []} + + # ── Step 3: whisper per refined segment ── + print("\n[Step 3] Per-segment transcription...") + t3 = time.time() + CHECKPOINT_INTERVAL = 50 + + segment_texts = [] + resume_from = 0 + + # 載入既有 partial checkpoint(中斷續接) + if checkpoint_path and os.path.exists(checkpoint_path): + try: + with open(checkpoint_path, "r") as f: + cp = json.load(f) + if cp.get("checkpoint_version") == 2 and not cp.get("step3_completed"): + saved = cp.get("segment_texts", []) + if saved: + resume_from = len(saved) + segment_texts = saved + print(f"[Step 3] Resuming from #{resume_from}/{len(refined_segments)}") + except Exception: + pass + + for i, (start_sec, end_sec) in enumerate(refined_segments): + if i < resume_from: continue - audio_segments.append(wav[start_sample:min(end_sample, len(wav))]) + seg_text = self._transcribe_segment(wav, sample_rate, start_sec, end_sec) + segment_texts.append(seg_text) - print(f" Audio segments extracted: {len(audio_segments)}") + if checkpoint_path and (i + 1) % CHECKPOINT_INTERVAL == 0: + _save_checkpoint(checkpoint_path, { + "checkpoint_version": 2, + "step3_completed": False, + "step3_progress": i + 1, + "language": language, + "total_duration": total_duration, + "refined_segments": [[s, e] for s, e in refined_segments], + "segment_texts": [{ + "text": st["text"], + "language": st["language"], + "lang_prob": st["lang_prob"], + } for st in segment_texts], + "file_uuid": file_uuid, + "max_speakers": max_speakers, + "quality_threshold": quality_threshold, + }) + print(f"[Checkpoint] Step 3: {i+1}/{len(refined_segments)}") - # 批量提取聲紋嵌入 - print("\n[Step 2] Speaker embedding extraction...") - step2_start = time.time() + print(f" Step 3 time: {time.time() - t3:.2f}s") + + # ── Save final checkpoint after Step 3 ── + if checkpoint_path: + _save_checkpoint(checkpoint_path, { + "checkpoint_version": 2, + "step3_completed": True, + "language": language, + "total_duration": total_duration, + "refined_segments": [[s, e] for s, e in refined_segments], + "segment_texts": [{ + "text": st["text"], + "language": st["language"], + "lang_prob": st["lang_prob"], + } for st in segment_texts], + "file_uuid": file_uuid, + "max_speakers": max_speakers, + "quality_threshold": quality_threshold, + }) + print(f"[Checkpoint] Step 3 complete, saved to {checkpoint_path}") + + # ── Step 4: ECAPA-TDNN per refined segment ── + print("\n[Step 4] Speaker embedding extraction...") + t4 = time.time() + audio_segments = [] + for start_sec, end_sec in refined_segments: + s = int(start_sec * sample_rate) + e = int(end_sec * sample_rate) + audio_segments.append(wav[s:min(e, len(wav))]) + + from speaker_encoder import extract_speaker_embeddings_batch, normalize_embeddings embeddings = extract_speaker_embeddings_batch( self.speaker_encoder, audio_segments, sample_rate ) embeddings = normalize_embeddings(embeddings) - step2_time = time.time() - step2_start - print(f" Embedding shape: {embeddings.shape}") - print(f" Embedding time: {step2_time:.2f}s") + print(f" Embeddings: {embeddings.shape}") + print(f" Step 4 time: {time.time() - t4:.2f}s") - # 聚類 - print("\n[Step 3] Robust speaker clustering...") - step3_start = time.time() + # ── Step 5: AgglomerativeClustering ── + print("\n[Step 5] Speaker clustering...") + t5 = time.time() + from speaker_cluster_fixed import robust_speaker_clustering speaker_labels, estimated_n_speakers = robust_speaker_clustering( - embeddings, n_speakers=None, max_speakers=10 + embeddings, n_speakers=None, max_speakers=max_speakers ) - step3_time = time.time() - step3_start - print(f" Clustering time: {step3_time:.2f}s") + print(f" Speakers: {estimated_n_speakers}") + print(f" Step 5 time: {time.time() - t5:.2f}s") - # 建立輸出 - result = { - "audio_path": str(audio_path), - "total_duration": len(wav) / sample_rate, - "n_speech_segments": len(speech_segments), - "n_speakers": int(estimated_n_speakers), - "segments": [] - } + # 品質計算 + qualities = compute_embedding_quality(embeddings, speaker_labels) - for i, ((start, end), label) in enumerate(zip(speech_segments, speaker_labels)): - result["segments"].append({ - "index": i, - "start": round(start, 3), - "end": round(end, 3), - "duration": round(end - start, 3), - "speaker": f"SPEAKER_{int(label)}" - }) - - # 加入 embeddings(每個 segment 對應的 192-D speaker embedding) - result["embeddings"] = [] - for emb in embeddings: - result["embeddings"].append(emb.tolist()) + # 建立輸出 segments + segments = [] + for i, ((start_sec, end_sec), label) in enumerate( + zip(refined_segments, speaker_labels)): + seg = { + "start": round(start_sec, 3), + "end": round(end_sec, 3), + "start_frame": int(start_sec * 30), + "end_frame": int(end_sec * 30), + "text": segment_texts[i]["text"], + "language": segment_texts[i]["language"], + "lang_prob": segment_texts[i]["lang_prob"], + "speaker": f"SPEAKER_{int(label)}", + "speaker_id": f"SPEAKER_{int(label)}", + "quality": float(qualities[i]), + } + segments.append(seg) # 統計 speaker_stats = {} - for seg in result["segments"]: - speaker = seg["speaker"] - if speaker not in speaker_stats: - speaker_stats[speaker] = {"count": 0, "duration": 0} - speaker_stats[speaker]["count"] += 1 - speaker_stats[speaker]["duration"] += seg["duration"] - result["speaker_stats"] = speaker_stats + for seg in segments: + spk = seg["speaker_id"] + dur = seg["end"] - seg["start"] + if spk not in speaker_stats: + speaker_stats[spk] = {"count": 0, "duration": 0} + speaker_stats[spk]["count"] += 1 + speaker_stats[spk]["duration"] += dur + + result = { + "language": language or "", + "segments": segments, + "n_speakers": int(estimated_n_speakers), + "speaker_stats": speaker_stats, + "total_duration": total_duration, + "n_segments": len(segments), + } + + # ── Step 6: Store embeddings in Qdrant ── + if file_uuid: + print("\n[Step 6] Storing embeddings in Qdrant...") + t6 = time.time() + self._store_speaker_embeddings(segments, embeddings, speaker_labels, + file_uuid) + print(f" Step 6 time: {time.time() - t6:.2f}s") + + # ── Step 7: High-quality classification ── + if file_uuid: + print("\n[Step 7] Classifying high-quality embeddings...") + t7 = time.time() + references = self._classify_high_quality_speakers( + segments, embeddings, speaker_labels, file_uuid, + wav, sample_rate, quality_threshold + ) + if references: + result["references"] = references + print(f" Step 7 time: {time.time() - t7:.2f}s") total_time = time.time() - start_time result["processing_time"] = round(total_time, 2) - result["realtime_factor"] = round(result["total_duration"] / total_time, 2) - - print("\n[SelfASRX-Fixed] Processing completed!") - print(f" Total time: {total_time:.2f}s") - print(f" Realtime factor: {result['realtime_factor']:.2f}x") - print(f" Detected speakers: {estimated_n_speakers}") + if total_duration > 0: + result["realtime_factor"] = round(total_duration / total_time, 2) + # 保存輸出 if output_path: - import json - with open(output_path, 'w', encoding='utf-8') as f: + Path(output_path).parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w", encoding="utf-8") as f: json.dump(result, f, indent=2, ensure_ascii=False) - print(f" Results saved to: {output_path}") + print(f"\n[SelfASRX] Saved to: {output_path}") + + print(f"\n[SelfASRX] Done! {len(segments)} segments, " + f"{estimated_n_speakers} speakers, " + f"{total_time:.2f}s") - print("=" * 60) return result + def resume_from_checkpoint(self, checkpoint_path, audio_path, + output_path=None): + """從 checkpoint 載入 Steps 1-3 結果,執行 Steps 4-7""" + print(f"\n[SelfASRX] Resuming from checkpoint: {checkpoint_path}") + print("=" * 60) + + with open(checkpoint_path, "r", encoding="utf-8") as f: + cp = json.load(f) + + if not cp.get("step3_completed"): + error_msg = f"Checkpoint step3 not completed (progress: {cp.get('step3_progress', '?')})" + print(f"[SelfASRX] {error_msg}") + return {"error": error_msg, "segments": []} + + wav, sample_rate = _load_audio(audio_path) + refined_segments = [tuple(s) for s in cp["refined_segments"]] + segment_texts = cp["segment_texts"] + language = cp.get("language", "") + total_duration = cp.get("total_duration", 0) + file_uuid = cp.get("file_uuid") + max_speakers = cp.get("max_speakers", 10) + quality_threshold = cp.get("quality_threshold", 0.85) + + print(f" Loaded checkpoint: {len(refined_segments)} segments, " + f"language={language}, duration={total_duration:.2f}s") + + start_time = time.time() + + # ── Step 4: ECAPA-TDNN per refined segment ── + print("\n[Step 4] Speaker embedding extraction...") + t4 = time.time() + audio_segments = [] + for start_sec, end_sec in refined_segments: + s = int(start_sec * sample_rate) + e = int(end_sec * sample_rate) + audio_segments.append(wav[s:min(e, len(wav))]) + + from speaker_encoder import extract_speaker_embeddings_batch, normalize_embeddings + embeddings = extract_speaker_embeddings_batch( + self.speaker_encoder, audio_segments, sample_rate + ) + embeddings = normalize_embeddings(embeddings) + print(f" Embeddings: {embeddings.shape}") + print(f" Step 4 time: {time.time() - t4:.2f}s") + + # ── Step 5: AgglomerativeClustering ── + print("\n[Step 5] Speaker clustering...") + t5 = time.time() + from speaker_cluster_fixed import robust_speaker_clustering + speaker_labels, estimated_n_speakers = robust_speaker_clustering( + embeddings, n_speakers=None, max_speakers=max_speakers + ) + print(f" Speakers: {estimated_n_speakers}") + print(f" Step 5 time: {time.time() - t5:.2f}s") + + # 品質計算 + qualities = compute_embedding_quality(embeddings, speaker_labels) + + # 建立輸出 segments + segments = [] + for i, ((start_sec, end_sec), label) in enumerate( + zip(refined_segments, speaker_labels)): + seg = { + "start": round(start_sec, 3), + "end": round(end_sec, 3), + "start_frame": int(start_sec * 30), + "end_frame": int(end_sec * 30), + "text": segment_texts[i]["text"], + "language": segment_texts[i]["language"], + "lang_prob": segment_texts[i]["lang_prob"], + "speaker": f"SPEAKER_{int(label)}", + "speaker_id": f"SPEAKER_{int(label)}", + "quality": float(qualities[i]), + } + segments.append(seg) + + # 統計 + speaker_stats = {} + for seg in segments: + spk = seg["speaker_id"] + dur = seg["end"] - seg["start"] + if spk not in speaker_stats: + speaker_stats[spk] = {"count": 0, "duration": 0} + speaker_stats[spk]["count"] += 1 + speaker_stats[spk]["duration"] += dur + + result = { + "language": language or "", + "segments": segments, + "n_speakers": int(estimated_n_speakers), + "speaker_stats": speaker_stats, + "total_duration": total_duration, + "n_segments": len(segments), + } + + # ── Step 6: Store embeddings in Qdrant ── + if file_uuid: + print("\n[Step 6] Storing embeddings in Qdrant...") + t6 = time.time() + self._store_speaker_embeddings(segments, embeddings, speaker_labels, + file_uuid) + print(f" Step 6 time: {time.time() - t6:.2f}s") + + # ── Step 7: High-quality classification ── + if file_uuid: + print("\n[Step 7] Classifying high-quality embeddings...") + t7 = time.time() + references = self._classify_high_quality_speakers( + segments, embeddings, speaker_labels, file_uuid, + wav, sample_rate, quality_threshold + ) + if references: + result["references"] = references + print(f" Step 7 time: {time.time() - t7:.2f}s") + + total_time = time.time() - start_time + result["processing_time"] = round(total_time, 2) + if total_duration > 0: + result["realtime_factor"] = round(total_duration / total_time, 2) + + # 保存輸出 + if output_path: + Path(output_path).parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, ensure_ascii=False) + print(f"\n[SelfASRX] Saved to: {output_path}") + + print(f"\n[SelfASRX] Done! {len(segments)} segments, " + f"{estimated_n_speakers} speakers, " + f"{total_time:.2f}s") + + return result + + # ── Internal helpers ── + + def _vad_scan_segment(self, wav, sample_rate, start_sec, end_sec): + """VAD 細切單一段落""" + from vad import scan_within_segment + return scan_within_segment( + wav, sample_rate, start_sec, end_sec, + self.vad_model, self.vad_utils + ) + + def _transcribe_segment(self, wav, sample_rate, start_sec, end_sec): + """轉錄單一段落""" + from whisper_local import transcribe_segment + return transcribe_segment(wav, sample_rate, start_sec, end_sec, self.whisper) + + def _store_speaker_embeddings(self, segments, embeddings, labels, file_uuid): + """Step 6: 所有 embedding 存入 Qdrant""" + if not self._ensure_qdrant(): + return + + points = [] + for i, (seg, emb, label) in enumerate( + zip(segments, embeddings, labels)): + point_id = _hash_point_id(file_uuid, f"{i}") + points.append({ + "id": point_id, + "vector": emb.tolist(), + "payload": { + "type": "speaker_embedding", + "file_uuid": file_uuid, + "speaker_id": seg["speaker_id"], + "text": seg["text"], + "language": seg["language"], + "start_time": seg["start"], + "end_time": seg["end"], + } + }) + + ok = _qdrant_upsert(self.qdrant_url, self.qdrant_api_key, + self.qdrant_collection, points) + if ok: + print(f" Stored {len(points)} speaker embeddings to Qdrant") + return ok + + def _classify_high_quality_speakers(self, segments, embeddings, labels, + file_uuid, wav, sample_rate, + threshold=0.85): + """Step 7: 高品質聲紋分級 + 性別分類 → Qdrant reference""" + qualities = compute_embedding_quality(embeddings, labels) + high_mask = qualities >= threshold + + if not np.any(high_mask): + print(" No high-quality embeddings found") + return [] + + unique_labels = set(labels) + references = [] + for label in unique_labels: + mask = (labels == label) & high_mask + if not np.any(mask): + continue + high_indices = [i for i in range(len(segments)) if mask[i]] + high_segs = [segments[i] for i in high_indices] + + # 取品質最高的 segment index + best_idx = high_indices[int(np.argmax(qualities[mask]))] + best_seg = segments[best_idx] + + centroid = np.mean(embeddings[mask], axis=0) + norm = np.linalg.norm(centroid) + if norm > 0: + centroid = centroid / norm + + avg_quality = float(np.mean(qualities[mask])) + speaker_id = f"SPEAKER_{int(label)}" + text_samples = [s["text"] for s in high_segs[:5] if s["text"]] + total_dur = sum(s["end"] - s["start"] for s in high_segs) + + ref_id = _hash_point_id(file_uuid, f"ref_{label}") + ref_payload = { + "type": "speaker_reference", + "file_uuid": file_uuid, + "speaker_id": speaker_id, + "n_segments": int(np.sum(mask)), + "avg_quality": avg_quality, + "total_duration": round(total_dur, 2), + "language": best_seg.get("language", ""), + "text_samples": text_samples, + } + + # 性別分類:用最佳 segment 的音頻 + if self.gender_classifier is not None: + try: + import torch + s = int(best_seg["start"] * sample_rate) + e = int(best_seg["end"] * sample_rate) + seg_wav = wav[s:min(e, len(wav))] + seg_tensor = torch.from_numpy(seg_wav).float().unsqueeze(0) + # SpeechBrain gender classifier 接受音頻 + out = self.gender_classifier.classify_batch(seg_tensor) + probs = torch.softmax(out[0], dim=-1).squeeze().cpu().detach().numpy() + if len(probs) >= 2: + idx = int(np.argmax(probs)) + ref_payload["gender"] = "male" if idx == 0 else "female" + ref_payload["gender_conf"] = float(probs[idx]) + else: + ref_payload["gender"] = "unknown" + ref_payload["gender_conf"] = 0.0 + except Exception as e: + print(f"[Gender] Classify error: {e}") + ref_payload["gender"] = "unknown" + ref_payload["gender_conf"] = 0.0 + else: + ref_payload["gender"] = "unknown" + ref_payload["gender_conf"] = 0.0 + + _qdrant_upsert(self.qdrant_url, self.qdrant_api_key, + self.qdrant_collection, [{ + "id": ref_id, + "vector": centroid.tolist(), + "payload": ref_payload, + }]) + + references.append({ + "speaker_id": speaker_id, + "n_segments": int(np.sum(mask)), + "avg_quality": avg_quality, + "gender": ref_payload["gender"], + }) + + print(f" Ref: {speaker_id}, gender={ref_payload['gender']}" + f" ({ref_payload['gender_conf']:.2f}), q={avg_quality:.3f}") + + return references + + def _ensure_qdrant(self): + """確保 Qdrant collection 可用""" + if not self._qdrant_ok: + ok = _ensure_speaker_collection( + self.qdrant_url, self.qdrant_api_key, self.qdrant_collection + ) + self._qdrant_ok = ok + return self._qdrant_ok + def main(): import argparse - - parser = argparse.ArgumentParser(description="Self-implemented ASRX (Fixed)") - parser.add_argument("audio_path", help="Path to audio file") + parser = argparse.ArgumentParser(description="SelfASRX - Hybrid Speaker Diarization") + parser.add_argument("audio_path", help="Path to audio file (WAV)") parser.add_argument("-o", "--output", help="Output JSON path") - parser.add_argument("--min-speech-duration", type=int, default=500) - parser.add_argument("--n-speakers", type=int, default=None) + parser.add_argument("--file-uuid", help="File UUID for Qdrant storage") parser.add_argument("--max-speakers", type=int, default=10) - + parser.add_argument("--quality-threshold", type=float, default=0.85) + parser.add_argument("--resume", help="Checkpoint path to resume from") + parser.add_argument("--checkpoint", help="Save checkpoint path after Step 3") args = parser.parse_args() - - if not Path(args.audio_path).exists(): - print(f"Error: Audio file not found: {args.audio_path}") - sys.exit(1) - + asrx = SelfASRXFixed() - result = asrx.process( - args.audio_path, - args.output, - min_speech_duration_ms=args.min_speech_duration, - n_speakers=args.n_speakers, - max_speakers=args.max_speakers - ) - + + if args.resume: + if not Path(args.resume).exists(): + print(f"Error: Checkpoint not found: {args.resume}") + sys.exit(1) + result = asrx.resume_from_checkpoint( + args.resume, args.audio_path, + output_path=args.output, + ) + else: + if not Path(args.audio_path).exists(): + print(f"Error: Audio file not found: {args.audio_path}") + sys.exit(1) + + result = asrx.process( + args.audio_path, + output_path=args.output, + file_uuid=args.file_uuid, + max_speakers=args.max_speakers, + quality_threshold=args.quality_threshold, + checkpoint_path=args.checkpoint, + ) + if "error" not in result: print("\n[Summary]") - print(f" Audio duration: {result['total_duration']:.2f}s") - print(f" Speech segments: {result['n_speech_segments']}") - print(f" Detected speakers: {result['n_speakers']}") - print(f" Processing time: {result['processing_time']:.2f}s") - print(f" Realtime factor: {result['realtime_factor']:.2f}x") - - print("\n[Speaker Statistics]") - for speaker, stats in result['speaker_stats'].items(): - pct = stats['duration'] / result['total_duration'] * 100 - print(f" {speaker}: {stats['count']} segments, " + - f"{stats['duration']:.2f}s ({pct:.1f}%)") + print(f" Duration: {result['total_duration']:.2f}s") + print(f" Segments: {result['n_segments']}") + print(f" Speakers: {result['n_speakers']}") + if "references" in result: + for ref in result["references"]: + print(f" {ref['speaker_id']}: gender={ref['gender']}, " + f"quality={ref['avg_quality']:.3f}") if __name__ == "__main__": diff --git a/scripts/asrx_self/speaker_audio_player.py b/scripts/asrx_self/speaker_audio_player.py deleted file mode 100644 index 7f26275..0000000 --- a/scripts/asrx_self/speaker_audio_player.py +++ /dev/null @@ -1,280 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Speaker Audio Player - 說話人語音播放器 -從 ASRX 結果中提取並播放每個說話人的語音片段 -""" - -import json -import argparse -import subprocess -import tempfile -import os -from pathlib import Path -from typing import List, Dict - - -def load_asrx_result(result_path: str) -> Dict: - """載入 ASRX 結果""" - with open(result_path, "r", encoding="utf-8") as f: - return json.load(f) - - -def extract_audio_segment( - audio_path: str, start_sec: float, end_sec: float, output_path: str -) -> bool: - """ - 使用 ffmpeg 提取音頻片段 - - Args: - audio_path: 原始音頻路徑 - start_sec: 開始時間(秒) - end_sec: 結束時間(秒) - output_path: 輸出路徑 - - Returns: - bool: 是否成功 - """ - duration = end_sec - start_sec - - cmd = [ - "ffmpeg", - "-y", - "-i", - audio_path, - "-ss", - str(start_sec), - "-t", - str(duration), - "-acodec", - "pcm_s16le", - "-ar", - "16000", - "-ac", - "1", - output_path, - ] - - try: - result = subprocess.run(cmd, capture_output=True, text=True) - return result.returncode == 0 - except Exception as e: - print(f"Error extracting audio: {e}") - return False - - -def play_audio(audio_path: str) -> bool: - """ - 播放音頻文件 - - 使用 macOS 的 afplay 或 Linux 的 aplay - """ - try: - # 嘗試使用 afplay (macOS) - if os.path.exists("/usr/bin/afplay"): - subprocess.run(["afplay", audio_path], check=True) - # 嘗試使用 aplay (Linux) - elif os.path.exists("/usr/bin/aplay"): - subprocess.run(["aplay", audio_path], check=True) - else: - print( - "No audio player found. Please install afplay (macOS) or aplay (Linux)" - ) - return False - return True - except Exception as e: - print(f"Error playing audio: {e}") - return False - - -def group_segments_by_speaker(segments: List[Dict]) -> Dict[str, List[Dict]]: - """將語音片段按說話人分組""" - speaker_segments = {} - - for seg in segments: - speaker = seg["speaker"] - if speaker not in speaker_segments: - speaker_segments[speaker] = [] - speaker_segments[speaker].append(seg) - - # 按開始時間排序 - for speaker in speaker_segments: - speaker_segments[speaker].sort(key=lambda x: x["start"]) - - return speaker_segments - - -def play_speaker_segments( - audio_path: str, - result_path: str, - speaker_id: str = None, - limit: int = None, - temp_dir: str = None, -): - """ - 播放指定說話人的語音片段 - - Args: - audio_path: 原始音頻路徑 - result_path: ASRX 結果 JSON 路徑 - speaker_id: 說話人 ID(None=播放所有) - limit: 最多播放幾個片段(None=全部) - temp_dir: 臨時目錄 - """ - # 載入結果 - print(f"[Load] Loading ASRX result: {result_path}") - result = load_asrx_result(result_path) - - segments = result.get("segments", []) - total_duration = result.get("total_duration", 0) - - print(f"[Info] Total segments: {len(segments)}") - print(f"[Info] Total duration: {total_duration / 60:.1f} minutes") - - # 分組 - speaker_segments = group_segments_by_speaker(segments) - - # 選擇說話人 - if speaker_id: - speakers_to_play = [speaker_id] - else: - speakers_to_play = sorted(speaker_segments.keys()) - - # 創建臨時目錄 - if temp_dir is None: - temp_dir = tempfile.mkdtemp(prefix="speaker_audio_") - - print(f"\n[Info] Temp directory: {temp_dir}") - print(f"[Info] Speakers to play: {speakers_to_play}") - print("=" * 60) - - # 播放每個說話人的片段 - for speaker in speakers_to_play: - if speaker not in speaker_segments: - print(f"\n[Warning] Speaker {speaker} not found!") - continue - - segs = speaker_segments[speaker] - if limit: - segs = segs[:limit] - - print(f"\n▶️ {speaker} ({len(segs)} segments)") - print("-" * 60) - - for i, seg in enumerate(segs, 1): - start = seg["start"] - end = seg["end"] - duration = seg["duration"] - - # 提取音頻 - temp_audio = os.path.join(temp_dir, f"{speaker}_{i:03d}.wav") - - print( - f" [{i:3d}] {start:7.2f}s - {end:7.2f}s ({duration:5.2f}s) ... ", - end="", - flush=True, - ) - - if extract_audio_segment(audio_path, start, end, temp_audio): - print("✅", end="", flush=True) - - # 播放 - if play_audio(temp_audio): - print(" ▶️ Played") - else: - print(" ❌ Play failed") - else: - print(" ❌ Extract failed") - - print() - - -def show_speaker_stats(result_path: str): - """顯示說話人統計資訊""" - result = load_asrx_result(result_path) - - segments = result.get("segments", []) - speaker_segments = group_segments_by_speaker(segments) - - print("\n" + "=" * 60) - print("說話人統計") - print("=" * 60) - - # 按時長排序 - speaker_stats = [] - for speaker, segs in speaker_segments.items(): - total_duration = sum(seg["duration"] for seg in segs) - speaker_stats.append((speaker, len(segs), total_duration)) - - speaker_stats.sort(key=lambda x: x[2], reverse=True) - - total_duration = result.get("total_duration", 0) - - for speaker, count, duration in speaker_stats: - pct = duration / total_duration * 100 if total_duration > 0 else 0 - print(f"{speaker:12} {count:4} segments {duration:8.1f}s ({pct:5.1f}%)") - - print("=" * 60) - - -def main(): - parser = argparse.ArgumentParser( - description="Speaker Audio Player - 播放說話人語音片段", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - # 顯示說話人統計 - python3 speaker_audio_player.py --stats result.json - - # 播放所有說話人的前 3 個片段 - python3 speaker_audio_player.py audio.wav result.json --limit 3 - - # 播放特定說話人的所有片段 - python3 speaker_audio_player.py audio.wav result.json --speaker SPEAKER_0 - - # 播放 SPEAKER_1 的前 5 個片段 - python3 speaker_audio_player.py audio.wav result.json --speaker SPEAKER_1 --limit 5 - """, - ) - - parser.add_argument("audio_path", nargs="?", help="原始音頻文件路徑") - parser.add_argument("result_path", help="ASRX 結果 JSON 路徑") - parser.add_argument("--stats", action="store_true", help="只显示說話人統計") - parser.add_argument("--speaker", type=str, help="指定說話人 ID(如 SPEAKER_0)") - parser.add_argument( - "--limit", - type=int, - default=None, - help="每個說話人最多播放幾個片段(None=全部)", - ) - parser.add_argument("--temp-dir", type=str, default=None, help="臨時目錄路徑") - - args = parser.parse_args() - - if args.stats: - show_speaker_stats(args.result_path) - return - - if not args.audio_path: - print("Error: audio_path is required unless --stats is specified") - parser.print_help() - return - - if not Path(args.audio_path).exists(): - print(f"Error: Audio file not found: {args.audio_path}") - return - - if not Path(args.result_path).exists(): - print(f"Error: Result file not found: {args.result_path}") - return - - play_speaker_segments( - args.audio_path, - args.result_path, - speaker_id=args.speaker, - limit=args.limit, - temp_dir=args.temp_dir, - ) - - -if __name__ == "__main__": - main() diff --git a/scripts/asrx_self/speaker_classifier.py b/scripts/asrx_self/speaker_classifier.py new file mode 100644 index 0000000..f22f20d --- /dev/null +++ b/scripts/asrx_self/speaker_classifier.py @@ -0,0 +1,65 @@ +""" +Speaker Classifier - 聲紋品質評估與性別分類 + +提供品質計算與性別分類功能,作為 main_fixed.py 的輔助模組。 +""" + +import numpy as np + + +def compute_embedding_quality(embeddings, labels): + """每個 embedding 到所屬 cluster centroid 的餘弦相似度 + + Args: + embeddings: [n_segments, 192] 聲紋向量矩陣 + labels: [n_segments] 聚類標籤 + + Returns: + qualities: [n_segments] 品質分數 (0-1) + """ + from sklearn.metrics.pairwise import cosine_similarity + + unique_labels = set(labels) + centroids = {} + for label in unique_labels: + mask = labels == label + centroid = np.mean(embeddings[mask], axis=0) + norm = np.linalg.norm(centroid) + if norm > 0: + centroid = centroid / norm + centroids[label] = centroid + + qualities = [] + for emb, label in zip(embeddings, labels): + sim = cosine_similarity([emb], [centroids[label]])[0][0] + qualities.append(sim) + + return np.array(qualities) + + +def classify_gender(audio_wav, sample_rate, classifier): + """從音頻段分類性別 + + Args: + audio_wav: 音頻波形 (numpy array) + sample_rate: 採樣率 + classifier: SpeechBrain EncoderClassifier (gender-recognition-ecapa) + + Returns: + dict: {"gender": "male"|"female"|"unknown", "confidence": float} + """ + default = {"gender": "unknown", "confidence": 0.0} + if classifier is None or len(audio_wav) == 0: + return default + try: + import torch + seg_tensor = torch.from_numpy(audio_wav).float().unsqueeze(0) + out = classifier.classify_batch(seg_tensor) + probs = torch.softmax(out[0], dim=-1).squeeze().cpu().detach().numpy() + if len(probs) >= 2: + idx = int(np.argmax(probs)) + label = "male" if idx == 0 else "female" + return {"gender": label, "confidence": float(probs[idx])} + except Exception as e: + pass + return default diff --git a/scripts/asrx_self/speaker_cluster.py b/scripts/asrx_self/speaker_cluster.py deleted file mode 100644 index 6c60a9a..0000000 --- a/scripts/asrx_self/speaker_cluster.py +++ /dev/null @@ -1,310 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Speaker Clustering - 說話人聚類 -使用譜聚類算法將聲紋嵌入分組 - -技術來源: -- 譜聚類:Shi & Malik (2000), IEEE TPAMI -- 論文:https://ieeexplore.ieee.org/document/868688 -- 應用於說話人分離:Wooters & Huijbregts (2008), ICASSP -""" - -import numpy as np -from sklearn.cluster import SpectralClustering, AgglomerativeClustering -from sklearn.metrics.pairwise import cosine_similarity - - -def estimate_n_speakers_eigengap(similarity_matrix, max_speakers=10): - """ - 使用特徵值間隙方法估計說話人數量 - - 技術來源: - - 特徵值間隙理論:Lu et al. (2010) - - 原理:相似度矩陣的特徵值分佈中,最大間隙對應最佳聚類數 - - Args: - similarity_matrix: 相似度矩陣 [n, n] - max_speakers: 最大說話人數 - - Returns: - n_speakers: 估計的說話人數量 - """ - # 計算特徵值 - eigenvalues = np.linalg.eigvalsh(similarity_matrix) - - # 降序排列 - eigenvalues = np.sort(eigenvalues)[::-1] - - # 只考慮前 max_speakers 個特徵值 - eigenvalues = eigenvalues[:max_speakers] - - # 計算間隙 - gaps = np.diff(eigenvalues) - - # 找到最大間隙的位置 - if len(gaps) > 0: - n_speakers = np.argmax(np.abs(gaps)) + 1 - else: - n_speakers = 1 - - # 限制範圍 - n_speakers = max(2, min(n_speakers, max_speakers)) - - return n_speakers - - -def estimate_n_speakers_silhouette(embeddings, max_speakers=10): - """ - 使用輪廓係數估計說話人數量 - - Args: - embeddings: 嵌入矩陣 [n, d] - max_speakers: 最大說話人數 - - Returns: - n_speakers: 估計的說話人數量 - """ - from sklearn.metrics import silhouette_score - - best_score = -1 - best_n = 2 - - for n in range(2, min(max_speakers + 1, len(embeddings))): - clustering = AgglomerativeClustering(n_clusters=n) - labels = clustering.fit_predict(embeddings) - - if len(np.unique(labels)) > 1: - score = silhouette_score(embeddings, labels) - if score > best_score: - best_score = score - best_n = n - - return best_n - - -def spectral_clustering_speaker( - similarity_matrix, n_speakers=None, auto_estimate=True, max_speakers=10 -): - """ - 使用譜聚類進行說話人分離 - - Args: - similarity_matrix: 相似度矩陣 [n, n] - n_speakers: 說話人數量(可選,如果為 None 則自動估計) - auto_estimate: 是否自動估計說話人數量 - max_speakers: 最大說話人數 - - Returns: - speaker_labels: 說話人標籤 [n,] - n_speakers: 使用的說話人數量 - """ - n_segments = len(similarity_matrix) - - # 清洗相似度矩陣 - similarity_matrix = np.nan_to_num( - similarity_matrix, nan=0.5, posinf=1.0, neginf=-1.0 - ) - - # 確保對角線為 1 - np.fill_diagonal(similarity_matrix, 1.0) - - # 確保值在 [-1, 1] 範圍 - similarity_matrix = np.clip(similarity_matrix, -1.0, 1.0) - - # 自動估計說話人數量 - if n_speakers is None and auto_estimate: - n_speakers = estimate_n_speakers_eigengap( - similarity_matrix, max_speakers=max_speakers - ) - print(f"[Clustering] Estimated n_speakers: {n_speakers}") - - if n_speakers is None: - n_speakers = 2 # 預設值 - - # 確保 n_speakers 不超過樣本數 - n_speakers = min(n_speakers, n_segments) - - print(f"[Clustering] Running spectral clustering with {n_speakers} clusters...") - - # 譜聚類 - try: - clustering = SpectralClustering( - n_clusters=int(n_speakers), - affinity="precomputed", - assign_labels="kmeans", - random_state=42, - n_init=10, - ) - - speaker_labels = clustering.fit_predict(similarity_matrix) - - print("[Clustering] Spectral clustering completed") - print(f"[Clustering] n_speakers: {n_speakers}") - print(f"[Clustering] n_segments: {n_segments}") - - return speaker_labels, n_speakers - - except Exception as e: - print(f"[Clustering] Spectral clustering failed: {e}") - print("[Clustering] Using fallback: 2 speakers") - # 簡單分配:前一半是 SPEAKER_0,後一半是 SPEAKER_1 - speaker_labels = np.array( - [0] * (n_segments // 2) + [1] * (n_segments - n_segments // 2) - ) - return speaker_labels, 2 - - -def agglomerative_clustering_speaker( - embeddings, n_speakers=None, threshold=0.5, max_speakers=10 -): - """ - 使用層次聚類進行說話人分離 - - Args: - embeddings: 嵌入矩陣 [n, d] - n_speakers: 說話人數量(可選) - threshold: 距離閾值(用於自動決定聚類數) - max_speakers: 最大說話人數 - - Returns: - speaker_labels: 說話人標籤 [n,] - n_speakers: 使用的說話人數量 - """ - n_segments = len(embeddings) - - if n_speakers is None: - # 使用距離閾值自動決定 - from sklearn.metrics.pairwise import cosine_distances - - distances = cosine_distances(embeddings) - - # 計算平均最近鄰距離 - avg_distances = [] - for i in range(min(100, n_segments)): - dists = distances[i] - dists = np.sort(dists) - if len(dists) > 1: - avg_distances.append(dists[1]) # 最近鄰(排除自己) - - if avg_distances: - avg_dist = np.mean(avg_distances) - # 根據平均距離估計聚類數 - n_speakers = max(2, int(avg_dist / threshold)) - n_speakers = min(n_speakers, max_speakers) - else: - n_speakers = 2 - - n_speakers = min(n_speakers, n_segments) - - # 層次聚類 - clustering = AgglomerativeClustering( - n_clusters=n_speakers, metric="cosine", linkage="average" - ) - - speaker_labels = clustering.fit_predict(embeddings) - - print("[Clustering] Agglomerative clustering completed") - print(f"[Clustering] n_speakers: {n_speakers}") - - return speaker_labels, n_speakers - - -def smooth_speaker_labels(speaker_labels, window_size=5): - """ - 平滑說話人標籤(去除噪聲) - - Args: - speaker_labels: 原始說話人標籤 - window_size: 平滑窗口大小 - - Returns: - smoothed_labels: 平滑後的標籤 - """ - from scipy import stats - - smoothed = np.copy(speaker_labels) - half_window = window_size // 2 - - for i in range(len(speaker_labels)): - start = max(0, i - half_window) - end = min(len(speaker_labels), i + half_window + 1) - - window_labels = speaker_labels[start:end] - mode_result = stats.mode(window_labels, keepdims=True) - smoothed[i] = mode_result.mode[0] - - return smoothed - - -def compute_diarization_purity(speaker_labels, ground_truth_labels=None): - """ - 計算說話人分離純度(如果有 ground truth) - - Args: - speaker_labels: 預測的說話人標籤 - ground_truth_labels: 真實的說話人標籤(可選) - - Returns: - purity: 純度分數(0-1) - """ - if ground_truth_labels is None: - # 沒有 ground truth,使用聚類純度近似 - - # 使用餘弦相似度作為距離 - purity = 0.5 # 預設值 - else: - # 計算純度 - from sklearn.metrics import adjusted_rand_score - - purity = adjusted_rand_score(ground_truth_labels, speaker_labels) - - return purity - - -if __name__ == "__main__": - # 測試聚類算法 - print("[Test] Testing speaker clustering algorithms") - - # 生成模擬數據 - np.random.seed(42) - n_speakers = 3 - n_segments_per_speaker = 20 - - # 生成 3 個說話人的嵌入 - embeddings = [] - for i in range(n_speakers): - # 每個說話人有不同的中心 - center = np.random.randn(192) * 2 + i * 3 - # 添加噪聲 - for _ in range(n_segments_per_speaker): - emb = center + np.random.randn(192) * 0.5 - embeddings.append(emb) - - embeddings = np.array(embeddings) - print(f"[Test] Generated {len(embeddings)} embeddings for {n_speakers} speakers") - - # 計算相似度矩陣 - similarity = cosine_similarity(embeddings) - print(f"[Test] Similarity matrix shape: {similarity.shape}") - - # 估計說話人數量 - estimated_n = estimate_n_speakers_eigengap(similarity, max_speakers=10) - print(f"[Test] Estimated n_speakers (eigengap): {estimated_n}") - - estimated_n_silhouette = estimate_n_speakers_silhouette(embeddings, max_speakers=10) - print(f"[Test] Estimated n_speakers (silhouette): {estimated_n_silhouette}") - - # 譜聚類 - labels, n_clusters = spectral_clustering_speaker( - similarity, n_speakers=None, auto_estimate=True - ) - - print("\n[Test] Clustering results:") - print(f" True n_speakers: {n_speakers}") - print(f" Estimated n_speakers: {n_clusters}") - print(f" Unique labels: {np.unique(labels)}") - - # 計算每個聚類的大小 - for label in np.unique(labels): - count = np.sum(labels == label) - print(f" Cluster {label}: {count} segments") diff --git a/scripts/asrx_self/speaker_player_gui.py b/scripts/asrx_self/speaker_player_gui.py deleted file mode 100644 index fe21d1f..0000000 --- a/scripts/asrx_self/speaker_player_gui.py +++ /dev/null @@ -1,431 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Speaker Player GUI - 說話人語音播放器(圖形界面) -使用 tkinter 顯示播放進度和 Speaker ID -""" - -import json -import subprocess -import tempfile -import os -import threading -import time -from pathlib import Path - -try: - import tkinter as tk - from tkinter import ttk, filedialog, messagebox - - HAS_TKINTER = True -except ImportError: - HAS_TKINTER = False - - -class SpeakerPlayerGUI: - """說話人語音播放器 GUI""" - - def __init__(self, root): - self.root = root - self.root.title("🎬 Speaker Audio Player - Face Integration") - self.root.geometry("1100x800") - - # 數據 - self.audio_path = None - self.result_path = None - self.face_path = None - self.result_data = None - self.face_data = None - self.integrated_data = None - self.speaker_segments = {} - self.speakers = [] - self.current_speaker_idx = 0 - self.is_playing = False - self.stop_flag = False - - # 創建界面 - self.create_widgets() - - def create_widgets(self): - """創建界面組件""" - # 頂部:文件選擇 - top_frame = ttk.Frame(self.root, padding="10") - top_frame.pack(fill=tk.X) - - ttk.Label(top_frame, text="📁 Audio:").pack(side=tk.LEFT) - self.audio_label = ttk.Label(top_frame, text="未選擇", width=50) - self.audio_label.pack(side=tk.LEFT, padx=5) - ttk.Button(top_frame, text="選擇音頻", command=self.select_audio).pack( - side=tk.LEFT, padx=5 - ) - - ttk.Label(top_frame, text=" 📊 Result:").pack(side=tk.LEFT, padx=(20, 0)) - self.result_label = ttk.Label(top_frame, text="未選擇", width=50) - self.result_label.pack(side=tk.LEFT, padx=5) - ttk.Button(top_frame, text="選擇結果", command=self.select_result).pack( - side=tk.LEFT, padx=5 - ) - - # 中間:說話人列表和片段列表 - mid_frame = ttk.Frame(self.root, padding="10") - mid_frame.pack(fill=tk.BOTH, expand=True) - - # 左側:說話人列表 - left_frame = ttk.LabelFrame(mid_frame, text="📢 說話人列表", padding="10") - left_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=False) - - self.speaker_listbox = tk.Listbox( - left_frame, width=35, height=20, font=("Arial", 11) - ) - self.speaker_listbox.pack(fill=tk.BOTH, expand=True) - self.speaker_listbox.bind("<>", self.on_speaker_select) - - # 右側:片段列表 - right_frame = ttk.LabelFrame(mid_frame, text="🎵 語音片段", padding="10") - right_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=10) - - # 片段列表(带滚动条) - list_frame = ttk.Frame(right_frame) - list_frame.pack(fill=tk.BOTH, expand=True) - - scrollbar = ttk.Scrollbar(list_frame) - scrollbar.pack(side=tk.RIGHT, fill=tk.Y) - - self.segment_listbox = tk.Listbox( - list_frame, - width=50, - height=20, - font=("Courier", 10), - yscrollcommand=scrollbar.set, - ) - self.segment_listbox.pack(fill=tk.BOTH, expand=True) - scrollbar.config(command=self.segment_listbox.yview) - - self.segment_listbox.bind("", self.on_segment_double_click) - - # 底部:播放控制和進度 - bottom_frame = ttk.Frame(self.root, padding="10") - bottom_frame.pack(fill=tk.X) - - # 播放控制 - control_frame = ttk.Frame(bottom_frame) - control_frame.pack(fill=tk.X) - - self.play_button = ttk.Button( - control_frame, text="▶️ 播放所選", command=self.play_selected, width=15 - ) - self.play_button.pack(side=tk.LEFT, padx=5) - - self.stop_button = ttk.Button( - control_frame, text="⏹️ 停止", command=self.stop_playing, width=10 - ) - self.stop_button.pack(side=tk.LEFT, padx=5) - self.stop_button.config(state=tk.DISABLED) - - self.play_all_button = ttk.Button( - control_frame, text="▶️▶️ 播放全部", command=self.play_all, width=15 - ) - self.play_all_button.pack(side=tk.LEFT, padx=5) - - # 進度條 - progress_frame = ttk.Frame(bottom_frame) - progress_frame.pack(fill=tk.X, pady=(10, 0)) - - ttk.Label(progress_frame, text="⏱️ 進度:").pack(side=tk.LEFT) - self.progress_bar = ttk.Progressbar(progress_frame, mode="determinate") - self.progress_bar.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=10) - - self.progress_label = ttk.Label(progress_frame, text="0:00 / 0:00", width=20) - self.progress_label.pack(side=tk.LEFT) - - # 狀態欄 - self.status_label = ttk.Label( - bottom_frame, text="就緒", relief=tk.SUNKEN, anchor=tk.W - ) - self.status_label.pack(fill=tk.X, pady=(10, 0)) - - def select_audio(self): - """選擇音頻文件""" - filename = filedialog.askopenfilename( - title="選擇音頻文件", - filetypes=[("WAV files", "*.wav"), ("All files", "*.*")], - ) - if filename: - self.audio_path = filename - self.audio_label.config(text=Path(filename).name) - self.check_ready() - - def select_result(self): - """選擇結果文件""" - filename = filedialog.askopenfilename( - title="選擇 ASRX 結果文件", - filetypes=[("JSON files", "*.json"), ("All files", "*.*")], - ) - if filename: - self.result_path = filename - self.result_label.config(text=Path(filename).name) - self.load_result() - self.check_ready() - - def load_result(self): - """載入 ASRX 結果""" - try: - with open(self.result_path, "r", encoding="utf-8") as f: - self.result_data = json.load(f) - - # 分組 - self.speaker_segments = {} - for seg in self.result_data.get("segments", []): - speaker = seg["speaker"] - if speaker not in self.speaker_segments: - self.speaker_segments[speaker] = [] - self.speaker_segments[speaker].append(seg) - - # 排序 - for speaker in self.speaker_segments: - self.speaker_segments[speaker].sort(key=lambda x: x["start"]) - - # 說話人列表(按時長排序) - self.speakers = sorted( - self.speaker_segments.keys(), - key=lambda s: sum(seg["duration"] for seg in self.speaker_segments[s]), - reverse=True, - ) - - # 更新列表框 - self.speaker_listbox.delete(0, tk.END) - for speaker in self.speakers: - segs = self.speaker_segments[speaker] - total_dur = sum(seg["duration"] for seg in segs) - total_dur_min = total_dur / 60 - self.speaker_listbox.insert( - tk.END, - f"🔊 {speaker:12} | {len(segs):4d}段 | {total_dur_min:5.1f}分鐘", - ) - - self.status_label.config( - text=f"載入成功:{len(self.speakers)} 個說話人,{len(self.result_data.get('segments', []))} 個片段" - ) - - except Exception as e: - messagebox.showerror("錯誤", f"載入結果文件失敗:{e}") - self.result_path = None - self.result_label.config(text="載入失敗") - - def check_ready(self): - """檢查是否就緒""" - if self.audio_path and self.result_path: - self.status_label.config(text="✅ 就緒 - 請選擇說話人並播放") - self.play_button.config(state=tk.NORMAL) - self.play_all_button.config(state=tk.NORMAL) - else: - self.status_label.config(text="⚠️ 請選擇音頻和結果文件") - self.play_button.config(state=tk.DISABLED) - self.play_all_button.config(state=tk.DISABLED) - - def on_speaker_select(self, event): - """說話人選擇事件""" - selection = self.speaker_listbox.curselection() - if not selection: - return - - self.current_speaker_idx = selection[0] - speaker = self.speakers[self.current_speaker_idx] - - # 更新片段列表 - self.segment_listbox.delete(0, tk.END) - for i, seg in enumerate(self.speaker_segments[speaker], 1): - start = seg["start"] - end = seg["end"] - duration = seg["duration"] - self.segment_listbox.insert( - tk.END, - f"[{i:4d}] {speaker:12} | {start:7.2f}s - {end:7.2f}s ({duration:5.2f}s)", - ) - - self.status_label.config( - text=f"選擇:{speaker} - {len(self.speaker_segments[speaker])} 個片段" - ) - - def on_segment_double_click(self, event): - """片段雙擊事件""" - self.play_selected() - - def extract_and_play(self, start_sec: float, end_sec: float) -> bool: - """提取並播放音頻""" - duration = end_sec - start_sec - temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) - temp_path = temp_file.name - temp_file.close() - - try: - # 提取 - cmd = [ - "ffmpeg", - "-y", - "-loglevel", - "quiet", - "-i", - self.audio_path, - "-ss", - str(start_sec), - "-t", - str(duration), - "-acodec", - "pcm_s16le", - "-ar", - "16000", - "-ac", - "1", - temp_path, - ] - - result = subprocess.run(cmd, capture_output=True) - if result.returncode != 0: - return False - - # 播放 - if os.path.exists("/usr/bin/afplay"): - subprocess.run(["afplay", temp_path], capture_output=True) - elif os.path.exists("/usr/bin/aplay"): - subprocess.run(["aplay", temp_path], capture_output=True) - else: - return False - - return True - finally: - if os.path.exists(temp_path): - os.unlink(temp_path) - - def play_segment(self, speaker: str, seg: dict, seg_idx: int, total: int): - """播放單個片段""" - if self.stop_flag: - return False - - start = seg["start"] - end = seg["end"] - duration = seg["duration"] - - # 更新 UI - self.root.after( - 0, - lambda: self.status_label.config( - text=f"▶️ {speaker} [{seg_idx}/{total}] {start:.2f}s - {end:.2f}s" - ), - ) - - # 更新進度 - progress = (seg_idx / total) * 100 - self.root.after(0, lambda: self.progress_bar.config(value=progress)) - self.root.after( - 0, lambda: self.progress_label.config(text=f"{seg_idx}:{total}") - ) - - # 播放 - if self.extract_and_play(start, end): - return True - else: - self.root.after( - 0, - lambda: messagebox.showwarning( - "警告", f"播放失敗:{speaker} [{seg_idx}]" - ), - ) - return True - - def play_selected(self): - """播放所選片段""" - selection = self.segment_listbox.curselection() - if not selection: - # 如果沒選擇,播放第一個 - if self.speakers: - speaker = self.speakers[self.current_speaker_idx] - segs = self.speaker_segments[speaker] - if segs: - self.play_all() - return - - # 播放所選 - seg_idx = selection[0] - speaker = self.speakers[self.current_speaker_idx] - seg = self.speaker_segments[speaker][seg_idx] - - self.is_playing = True - self.stop_flag = False - self.play_button.config(state=tk.DISABLED) - self.stop_button.config(state=tk.NORMAL) - - # 在後台線程播放 - def play_thread(): - success = self.play_segment(speaker, seg, seg_idx + 1, 1) - self.root.after(0, lambda: self.on_play_done()) - - thread = threading.Thread(target=play_thread, daemon=True) - thread.start() - - def play_all(self): - """播放所選說話人的所有片段""" - if not self.speakers: - return - - speaker = self.speakers[self.current_speaker_idx] - segs = self.speaker_segments[speaker] - - if not segs: - return - - self.is_playing = True - self.stop_flag = False - self.play_button.config(state=tk.DISABLED) - self.play_all_button.config(state=tk.DISABLED) - self.stop_button.config(state=tk.NORMAL) - - # 在後台線程播放 - def play_thread(): - for i, seg in enumerate(segs, 1): - if self.stop_flag: - break - self.play_segment(speaker, seg, i, len(segs)) - time.sleep(0.3) # 片段間隔 - - self.root.after(0, lambda: self.on_play_done()) - - thread = threading.Thread(target=play_thread, daemon=True) - thread.start() - - def stop_playing(self): - """停止播放""" - self.stop_flag = True - self.is_playing = False - self.on_play_done() - - def on_play_done(self): - """播放完成""" - self.is_playing = False - self.stop_flag = False - self.play_button.config(state=tk.NORMAL) - self.play_all_button.config(state=tk.NORMAL) - self.stop_button.config(state=tk.DISABLED) - self.progress_bar.config(value=0) - self.progress_label.config(text="0:00 / 0:00") - - if self.stop_flag: - self.status_label.config(text="⏹️ 已停止") - else: - self.status_label.config(text="✅ 播放完成") - - -def main(): - """主函數""" - if not HAS_TKINTER: - print("❌ tkinter 未安裝") - print("請使用以下命令安裝:") - print(" brew install python-tk@3.9") - return - - root = tk.Tk() - app = SpeakerPlayerGUI(root) - root.mainloop() - - -if __name__ == "__main__": - main() diff --git a/scripts/asrx_self/speaker_player_gui_face.py b/scripts/asrx_self/speaker_player_gui_face.py deleted file mode 100644 index 22c1f51..0000000 --- a/scripts/asrx_self/speaker_player_gui_face.py +++ /dev/null @@ -1,522 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Speaker Player GUI - 說話人語音播放器(Face 整合版) -使用 tkinter 顯示播放進度、Speaker ID 和人臉信息 -""" - -import json -import subprocess -import tempfile -import os -import threading -import time -from pathlib import Path - -try: - import tkinter as tk - from tkinter import ttk, filedialog, messagebox - - HAS_TKINTER = True -except ImportError: - HAS_TKINTER = False - - -class SpeakerPlayerGUI: - """說話人語音播放器 GUI(Face 整合版)""" - - def __init__(self, root): - self.root = root - self.root.title("🎬 Speaker Player - Face Integration") - self.root.geometry("1200x800") - - # 數據 - self.audio_path = None - self.result_path = None - self.face_path = None - self.result_data = None - self.face_data = None - self.integrated_data = None - self.speaker_segments = {} - self.speakers = [] - self.current_speaker_idx = 0 - self.is_playing = False - self.stop_flag = False - - # 創建界面 - self.create_widgets() - - def create_widgets(self): - """創建界面組件""" - # 頂部:文件選擇 - top_frame = ttk.Frame(self.root, padding="10") - top_frame.pack(fill=tk.X) - - # 第一行:音頻和 ASRX 結果 - row1_frame = ttk.Frame(top_frame) - row1_frame.pack(fill=tk.X) - - ttk.Label(row1_frame, text="📁 Audio:").pack(side=tk.LEFT) - self.audio_label = ttk.Label(row1_frame, text="未選擇", width=50) - self.audio_label.pack(side=tk.LEFT, padx=5) - ttk.Button(row1_frame, text="選擇音頻", command=self.select_audio).pack( - side=tk.LEFT, padx=5 - ) - - ttk.Label(row1_frame, text=" 📊 ASRX:").pack(side=tk.LEFT, padx=(20, 0)) - self.result_label = ttk.Label(row1_frame, text="未選擇", width=50) - self.result_label.pack(side=tk.LEFT, padx=5) - ttk.Button(row1_frame, text="選擇結果", command=self.select_result).pack( - side=tk.LEFT, padx=5 - ) - - # 第二行:Face 結果 - row2_frame = ttk.Frame(top_frame) - row2_frame.pack(fill=tk.X, pady=(5, 0)) - - ttk.Label(row2_frame, text="👤 Face:").pack(side=tk.LEFT) - self.face_label = ttk.Label(row2_frame, text="未選擇 (可選)", width=50) - self.face_label.pack(side=tk.LEFT, padx=5) - ttk.Button(row2_frame, text="選擇 Face", command=self.select_face).pack( - side=tk.LEFT, padx=5 - ) - self.integrate_button = ttk.Button( - row2_frame, - text="🔗 整合 Face", - command=self.integrate_face, - state=tk.DISABLED, - ) - self.integrate_button.pack(side=tk.LEFT, padx=5) - - # 中間:說話人列表和片段列表 - mid_frame = ttk.Frame(self.root, padding="10") - mid_frame.pack(fill=tk.BOTH, expand=True) - - # 左側:說話人列表(帶 Face 統計) - left_frame = ttk.LabelFrame(mid_frame, text="📢 說話人列表", padding="10") - left_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=False) - - self.speaker_listbox = tk.Listbox( - left_frame, width=45, height=20, font=("Arial", 11) - ) - self.speaker_listbox.pack(fill=tk.BOTH, expand=True) - self.speaker_listbox.bind("<>", self.on_speaker_select) - - # 右側:片段列表(帶 Face 信息) - right_frame = ttk.LabelFrame( - mid_frame, text="🎵 語音片段 + 👥 人臉", padding="10" - ) - right_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=10) - - # 片段列表(带滚动条) - list_frame = ttk.Frame(right_frame) - list_frame.pack(fill=tk.BOTH, expand=True) - - scrollbar = ttk.Scrollbar(list_frame) - scrollbar.pack(side=tk.RIGHT, fill=tk.Y) - - self.segment_listbox = tk.Listbox( - list_frame, - width=65, - height=20, - font=("Courier", 9), - yscrollcommand=scrollbar.set, - ) - self.segment_listbox.pack(fill=tk.BOTH, expand=True) - scrollbar.config(command=self.segment_listbox.yview) - - self.segment_listbox.bind("", self.on_segment_double_click) - - # 底部:播放控制和進度 - bottom_frame = ttk.Frame(self.root, padding="10") - bottom_frame.pack(fill=tk.X) - - # 播放控制 - control_frame = ttk.Frame(bottom_frame) - control_frame.pack(fill=tk.X) - - self.play_button = ttk.Button( - control_frame, text="▶️ 播放所選", command=self.play_selected, width=15 - ) - self.play_button.pack(side=tk.LEFT, padx=5) - self.play_button.config(state=tk.DISABLED) - - self.stop_button = ttk.Button( - control_frame, text="⏹️ 停止", command=self.stop_playing, width=10 - ) - self.stop_button.pack(side=tk.LEFT, padx=5) - self.stop_button.config(state=tk.DISABLED) - - self.play_all_button = ttk.Button( - control_frame, text="▶️▶️ 播放全部", command=self.play_all, width=15 - ) - self.play_all_button.pack(side=tk.LEFT, padx=5) - self.play_all_button.config(state=tk.DISABLED) - - # 進度條 - progress_frame = ttk.Frame(bottom_frame) - progress_frame.pack(fill=tk.X, pady=(10, 0)) - - ttk.Label(progress_frame, text="⏱️ 進度:").pack(side=tk.LEFT) - self.progress_bar = ttk.Progressbar(progress_frame, mode="determinate") - self.progress_bar.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=10) - - self.progress_label = ttk.Label(progress_frame, text="0:00 / 0:00", width=20) - self.progress_label.pack(side=tk.LEFT) - - # 狀態欄 - self.status_label = ttk.Label( - bottom_frame, text="就緒", relief=tk.SUNKEN, anchor=tk.W - ) - self.status_label.pack(fill=tk.X, pady=(10, 0)) - - def select_audio(self): - """選擇音頻文件""" - filename = filedialog.askopenfilename( - title="選擇音頻文件", - filetypes=[("WAV files", "*.wav"), ("All files", "*.*")], - ) - if filename: - self.audio_path = filename - self.audio_label.config(text=Path(filename).name) - self.check_ready() - - def select_result(self): - """選擇 ASRX 結果文件""" - filename = filedialog.askopenfilename( - title="選擇 ASRX 結果文件", - filetypes=[("JSON files", "*.json"), ("All files", "*.*")], - ) - if filename: - self.result_path = filename - self.result_label.config(text=Path(filename).name) - self.load_result() - self.check_ready() - - def select_face(self): - """選擇 Face 結果文件""" - filename = filedialog.askopenfilename( - title="選擇 Face 檢測結果", - filetypes=[("JSON files", "*.json"), ("All files", "*.*")], - ) - if filename: - self.face_path = filename - self.face_label.config(text=Path(filename).name) - self.integrate_button.config(state=tk.NORMAL) - self.status_label.config(text="✅ Face 已選擇 - 請點擊整合") - - def integrate_face(self): - """整合 Face 與 ASRX""" - if not self.face_path or not self.result_path: - messagebox.showwarning("警告", "請先選擇 Face 和 ASRX 文件") - return - - self.status_label.config(text="🔄 整合中...") - self.root.update() - - try: - # 載入 Face 數據 - with open(self.face_path, "r", encoding="utf-8") as f: - self.face_data = json.load(f) - - # 重新載入 ASRX 數據並整合 - self.load_result(integrate_with_face=True) - - self.status_label.config(text="✅ Face 整合完成") - self.integrate_button.config(state=tk.DISABLED) - - except Exception as e: - messagebox.showerror("錯誤", f"整合失敗:{e}") - self.status_label.config(text="❌ 整合失敗") - - def load_result(self, integrate_with_face=False): - """載入 ASRX 結果""" - try: - with open(self.result_path, "r", encoding="utf-8") as f: - self.result_data = json.load(f) - - # 分組 - self.speaker_segments = {} - for seg in self.result_data.get("segments", []): - speaker = seg["speaker"] - if speaker not in self.speaker_segments: - self.speaker_segments[speaker] = [] - self.speaker_segments[speaker].append(seg) - - # 排序 - for speaker in self.speaker_segments: - self.speaker_segments[speaker].sort(key=lambda x: x["start"]) - - # 說話人列表(按時長排序) - self.speakers = sorted( - self.speaker_segments.keys(), - key=lambda s: sum(seg["duration"] for seg in self.speaker_segments[s]), - reverse=True, - ) - - # 更新列表框 - self.speaker_listbox.delete(0, tk.END) - for speaker in self.speakers: - segs = self.speaker_segments[speaker] - total_dur = sum(seg["duration"] for seg in segs) - total_dur_min = total_dur / 60 - - # 如果有 Face 數據,計算有人臉的片段數 - face_info = "" - if integrate_with_face and self.integrated_data: - speaker_integrated = [ - item - for item in self.integrated_data - if item["speaker"] == speaker - ] - with_face = sum( - 1 for item in speaker_integrated if item.get("has_face", False) - ) - face_info = f" | 👥 {with_face}/{len(segs)}" - - self.speaker_listbox.insert( - tk.END, - f"🔊 {speaker:12} | {len(segs):4d}段 | {total_dur_min:5.1f}分鐘{face_info}", - ) - - total_segments = len(self.result_data.get("segments", [])) - self.status_label.config( - text=f"載入成功:{len(self.speakers)} 個說話人,{total_segments} 個片段" - ) - - except Exception as e: - messagebox.showerror("錯誤", f"載入結果文件失敗:{e}") - self.result_path = None - self.result_label.config(text="載入失敗") - - def check_ready(self): - """檢查是否就緒""" - if self.audio_path and self.result_path: - self.status_label.config(text="✅ 就緒 - 請選擇說話人並播放") - self.play_button.config(state=tk.NORMAL) - self.play_all_button.config(state=tk.NORMAL) - else: - self.status_label.config(text="⚠️ 請選擇音頻和結果文件") - self.play_button.config(state=tk.DISABLED) - self.play_all_button.config(state=tk.DISABLED) - - def on_speaker_select(self, event): - """說話人選擇事件""" - selection = self.speaker_listbox.curselection() - if not selection: - return - - self.current_speaker_idx = selection[0] - speaker = self.speakers[self.current_speaker_idx] - - # 更新片段列表 - self.segment_listbox.delete(0, tk.END) - for i, seg in enumerate(self.speaker_segments[speaker], 1): - start = seg["start"] - end = seg["end"] - duration = seg["duration"] - - # 如果有整合 Face 數據 - face_info = "" - if self.integrated_data: - matching = [ - item - for item in self.integrated_data - if abs(item["start"] - start) < 0.1 and item["speaker"] == speaker - ] - if matching and matching[0].get("has_face", False): - face_info = " 👥✅" - elif matching: - face_info = " 👥❌" - - self.segment_listbox.insert( - tk.END, - f"[{i:4d}] {speaker:12} | {start:7.2f}s - {end:7.2f}s ({duration:5.2f}s){face_info}", - ) - - self.status_label.config( - text=f"選擇:{speaker} - {len(self.speaker_segments[speaker])} 個片段" - ) - - def on_segment_double_click(self, event): - """片段雙擊事件""" - self.play_selected() - - def extract_and_play(self, start_sec: float, end_sec: float) -> bool: - """提取並播放音頻""" - duration = end_sec - start_sec - temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) - temp_path = temp_file.name - temp_file.close() - - try: - # 提取 - cmd = [ - "ffmpeg", - "-y", - "-loglevel", - "quiet", - "-i", - self.audio_path, - "-ss", - str(start_sec), - "-t", - str(duration), - "-acodec", - "pcm_s16le", - "-ar", - "16000", - "-ac", - "1", - temp_path, - ] - - result = subprocess.run(cmd, capture_output=True) - if result.returncode != 0: - return False - - # 播放 - if os.path.exists("/usr/bin/afplay"): - subprocess.run(["afplay", temp_path], capture_output=True) - elif os.path.exists("/usr/bin/aplay"): - subprocess.run(["aplay", temp_path], capture_output=True) - else: - return False - - return True - finally: - if os.path.exists(temp_path): - os.unlink(temp_path) - - def play_segment(self, speaker: str, seg: dict, seg_idx: int, total: int): - """播放單個片段""" - if self.stop_flag: - return False - - start = seg["start"] - end = seg["end"] - duration = seg["duration"] - - # 更新 UI - self.root.after( - 0, - lambda: self.status_label.config( - text=f"▶️ {speaker} [{seg_idx}/{total}] {start:.2f}s - {end:.2f}s" - ), - ) - - # 更新進度 - progress = (seg_idx / total) * 100 - self.root.after(0, lambda: self.progress_bar.config(value=progress)) - self.root.after( - 0, lambda: self.progress_label.config(text=f"{seg_idx}:{total}") - ) - - # 播放 - if self.extract_and_play(start, end): - return True - else: - self.root.after( - 0, - lambda: messagebox.showwarning( - "警告", f"播放失敗:{speaker} [{seg_idx}]" - ), - ) - return True - - def play_selected(self): - """播放所選片段""" - selection = self.segment_listbox.curselection() - if not selection: - # 如果沒選擇,播放第一個 - if self.speakers: - speaker = self.speakers[self.current_speaker_idx] - segs = self.speaker_segments[speaker] - if segs: - self.play_all() - return - - # 播放所選 - seg_idx = selection[0] - speaker = self.speakers[self.current_speaker_idx] - seg = self.speaker_segments[speaker][seg_idx] - - self.is_playing = True - self.stop_flag = False - self.play_button.config(state=tk.DISABLED) - self.stop_button.config(state=tk.NORMAL) - - # 在後台線程播放 - def play_thread(): - success = self.play_segment(speaker, seg, seg_idx + 1, 1) - self.root.after(0, lambda: self.on_play_done()) - - thread = threading.Thread(target=play_thread, daemon=True) - thread.start() - - def play_all(self): - """播放所選說話人的所有片段""" - if not self.speakers: - return - - speaker = self.speakers[self.current_speaker_idx] - segs = self.speaker_segments[speaker] - - if not segs: - return - - self.is_playing = True - self.stop_flag = False - self.play_button.config(state=tk.DISABLED) - self.play_all_button.config(state=tk.DISABLED) - self.stop_button.config(state=tk.NORMAL) - - # 在後台線程播放 - def play_thread(): - for i, seg in enumerate(segs, 1): - if self.stop_flag: - break - self.play_segment(speaker, seg, i, len(segs)) - time.sleep(0.3) # 片段間隔 - - self.root.after(0, lambda: self.on_play_done()) - - thread = threading.Thread(target=play_thread, daemon=True) - thread.start() - - def stop_playing(self): - """停止播放""" - self.stop_flag = True - self.is_playing = False - self.on_play_done() - - def on_play_done(self): - """播放完成""" - self.is_playing = False - self.stop_flag = False - self.play_button.config(state=tk.NORMAL) - self.play_all_button.config(state=tk.NORMAL) - self.stop_button.config(state=tk.DISABLED) - self.progress_bar.config(value=0) - self.progress_label.config(text="0:00 / 0:00") - - if self.stop_flag: - self.status_label.config(text="⏹️ 已停止") - else: - self.status_label.config(text="✅ 播放完成") - - -def main(): - """主函數""" - if not HAS_TKINTER: - print("❌ tkinter 未安裝") - print("請使用以下命令安裝:") - print(" brew install python-tk@3.9") - return - - root = tk.Tk() - app = SpeakerPlayerGUI(root) - root.mainloop() - - -if __name__ == "__main__": - main() diff --git a/scripts/asrx_self/speaker_player_interactive.py b/scripts/asrx_self/speaker_player_interactive.py deleted file mode 100644 index e844c89..0000000 --- a/scripts/asrx_self/speaker_player_interactive.py +++ /dev/null @@ -1,267 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Interactive Speaker Audio Player - 交互式說話人語音播放器 -可以選擇播放哪個說話人的哪些片段 -""" - -import json -import subprocess -import tempfile -import os -from pathlib import Path -from typing import List, Dict - - -def load_asrx_result(result_path: str) -> Dict: - """載入 ASRX 結果""" - with open(result_path, "r", encoding="utf-8") as f: - return json.load(f) - - -def extract_and_play(audio_path: str, start_sec: float, end_sec: float) -> bool: - """提取並播放音頻片段""" - duration = end_sec - start_sec - temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) - temp_path = temp_file.name - temp_file.close() - - try: - # 提取 - cmd = [ - "ffmpeg", - "-y", - "-loglevel", - "quiet", - "-i", - audio_path, - "-ss", - str(start_sec), - "-t", - str(duration), - "-acodec", - "pcm_s16le", - "-ar", - "16000", - "-ac", - "1", - temp_path, - ] - - result = subprocess.run(cmd, capture_output=True) - if result.returncode != 0: - return False - - # 播放 - if os.path.exists("/usr/bin/afplay"): - subprocess.run(["afplay", temp_path], capture_output=True) - elif os.path.exists("/usr/bin/aplay"): - subprocess.run(["aplay", temp_path], capture_output=True) - else: - print(" ⚠️ No audio player found") - return False - - return True - finally: - if os.path.exists(temp_path): - os.unlink(temp_path) - - -def show_menu(speaker_segments: Dict[str, List[Dict]], speaker_id: str): - """顯示選單""" - segs = speaker_segments[speaker_id] - total_duration = sum(seg["duration"] for seg in segs) - - print(f"\n{'=' * 70}") - print(f"🔊 {speaker_id}") - print(f"{'=' * 70}") - print(f" Segments: {len(segs)}") - print( - f" Total duration: {total_duration / 60:.1f} minutes ({total_duration:.1f}s)" - ) - print(f"{'=' * 70}") - - # 顯示前 20 個片段 - for i, seg in enumerate(segs[:20], 1): - start = seg["start"] - end = seg["end"] - duration = seg["duration"] - print( - f" [{i:3d}] {speaker_id:12} | {start:7.2f}s - {end:7.2f}s ({duration:5.2f}s)" - ) - - if len(segs) > 20: - print(f" ... and {len(segs) - 20} more segments") - - print(f"\n{'=' * 70}") - print("Commands:") - print(f" [1-{min(20, len(segs))}] Play specific segment") - print(" all Play all segments (may take a while)") - print(" first N Play first N segments") - print(" next Next speaker") - print(" prev Previous speaker") - print(" list List all speakers") - print(" quit Exit") - print(f"{'=' * 70}") - - -def interactive_player(audio_path: str, result_path: str): - """交互式播放器""" - # 載入結果 - result = load_asrx_result(result_path) - segments = result.get("segments", []) - total_duration = result.get("total_duration", 0) - - # 分組 - speaker_segments = {} - for seg in segments: - speaker = seg["speaker"] - if speaker not in speaker_segments: - speaker_segments[speaker] = [] - speaker_segments[speaker].append(seg) - - # 排序 - for speaker in speaker_segments: - speaker_segments[speaker].sort(key=lambda x: x["start"]) - - # 說話人列表 - speakers = sorted( - speaker_segments.keys(), - key=lambda s: sum(seg["duration"] for seg in speaker_segments[s]), - reverse=True, - ) - - current_speaker_idx = 0 - - print("\n🎬 Speaker Audio Player") - print(f"📁 Audio: {audio_path}") - print(f"📊 Speakers: {len(speakers)}") - print(f"{'=' * 70}") - - while True: - current_speaker = speakers[current_speaker_idx] - show_menu(speaker_segments, current_speaker) - - try: - cmd = input(f"\n▶️ {current_speaker} > ").strip().lower() - except (EOFError, KeyboardInterrupt): - print("\n\nExiting...") - break - - if not cmd: - continue - - # 播放特定片段 - if cmd.isdigit(): - idx = int(cmd) - 1 - if 0 <= idx < len(speaker_segments[current_speaker]): - seg = speaker_segments[current_speaker][idx] - print(f"\n 🔊 {current_speaker} - Segment {idx + 1}") - print( - f" ⏱️ {seg['start']:.2f}s - {seg['end']:.2f}s ({seg['duration']:.2f}s)" - ) - print(" ▶️ Playing...", end="", flush=True) - if extract_and_play(audio_path, seg["start"], seg["end"]): - print(" ✅ Done") - else: - print(" ❌ Failed") - else: - print( - f" Invalid segment number (1-{len(speaker_segments[current_speaker])})" - ) - - # 播放所有 - elif cmd == "all": - print( - f"\n 🔊 {current_speaker} - Playing all {len(speaker_segments[current_speaker])} segments..." - ) - print("=" * 70) - for i, seg in enumerate(speaker_segments[current_speaker], 1): - print( - f" [{i:3d}/{len(speaker_segments[current_speaker])}] {current_speaker} | " - + f"{seg['start']:7.2f}s - {seg['end']:7.2f}s ({seg['duration']:5.2f}s)", - end="", - flush=True, - ) - if extract_and_play(audio_path, seg["start"], seg["end"]): - print(" ✅") - else: - print(" ❌") - print("=" * 70) - - # 播放前 N 個 - elif cmd.startswith("first "): - try: - n = int(cmd.split()[1]) - print(f"\n 🔊 {current_speaker} - Playing first {n} segments...") - print("=" * 70) - for i, seg in enumerate(speaker_segments[current_speaker][:n], 1): - print( - f" [{i:3d}/{n}] {current_speaker} | " - + f"{seg['start']:7.2f}s - {seg['end']:7.2f}s ({seg['duration']:5.2f}s)", - end="", - flush=True, - ) - if extract_and_play(audio_path, seg["start"], seg["end"]): - print(" ✅") - else: - print(" ❌") - print("=" * 70) - except (IndexError, ValueError): - print(" Usage: first N") - - # 下一個說話人 - elif cmd == "next": - current_speaker_idx = (current_speaker_idx + 1) % len(speakers) - - # 上一個說話人 - elif cmd == "prev": - current_speaker_idx = (current_speaker_idx - 1) % len(speakers) - - # 列出所有說話人 - elif cmd == "list": - print(f"\n{'=' * 70}") - print("📢 All speakers:") - print(f"{'=' * 70}") - for i, speaker in enumerate(speakers, 1): - segs = speaker_segments[speaker] - total_dur = sum(seg["duration"] for seg in segs) - pct = total_dur / total_duration * 100 if total_duration > 0 else 0 - print( - f" {i:2d}. 🔊 {speaker:12} | {len(segs):4d} segments, " - + f"{total_dur:7.1f}s ({pct:5.1f}%)" - ) - print(f"{'=' * 70}") - print(f" Current: 🔊 {speakers[current_speaker_idx]}") - print(f"{'=' * 70}") - - # 退出 - elif cmd == "quit" or cmd == "exit" or cmd == "q": - print("\nExiting...") - break - - else: - print(f" Unknown command: {cmd}") - - -def main(): - import argparse - - parser = argparse.ArgumentParser(description="Interactive Speaker Audio Player") - parser.add_argument("audio_path", help="原始音頻文件路徑") - parser.add_argument("result_path", help="ASRX 結果 JSON 路徑") - - args = parser.parse_args() - - if not Path(args.audio_path).exists(): - print(f"Error: Audio file not found: {args.audio_path}") - return - - if not Path(args.result_path).exists(): - print(f"Error: Result file not found: {args.result_path}") - return - - interactive_player(args.audio_path, args.result_path) - - -if __name__ == "__main__": - main() diff --git a/scripts/asrx_self/test_gui_face_player.py b/scripts/asrx_self/test_gui_face_player.py deleted file mode 100755 index 08dd112..0000000 --- a/scripts/asrx_self/test_gui_face_player.py +++ /dev/null @@ -1,164 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -GUI Face Player 自動化測試腳本 -測試所有功能並生成測試報告 -""" - -import json -import subprocess -from pathlib import Path - - -def check_file_exists(path, description): - """檢查文件是否存在""" - exists = Path(path).exists() - status = "✅" if exists else "❌" - size = Path(path).stat().st_size / 1024 / 1024 if exists else 0 - print(f"{status} {description}: {path} ({size:.1f} MB)") - return exists - - -def check_process_running(pattern): - """檢查進程是否運行""" - result = subprocess.run(['pgrep', '-f', pattern], capture_output=True, text=True) - running = result.returncode == 0 - status = "✅" if running else "❌" - print(f"{status} 進程:{pattern} ({'運行中' if running else '未運行'})") - return running - - -def test_json_structure(path, required_keys, description): - """測試 JSON 文件結構""" - try: - with open(path, 'r', encoding='utf-8') as f: - data = json.load(f) - - missing_keys = [key for key in required_keys if key not in data] - if missing_keys: - print(f"❌ {description}: 缺少鍵 {missing_keys}") - return False - else: - print(f"✅ {description}: 結構正確") - return True - except Exception as e: - print(f"❌ {description}: {e}") - return False - - -def test_integration_script(): - """測試整合腳本""" - print("\n" + "="*70) - print("測試整合腳本") - print("="*70) - - cmd = [ - 'python3', - 'integrate_face_asrx_speaker.py', - '/tmp/face_long.json', - '/tmp/asrx_charade_optimized.json', - '--threshold', '3.0', - '--stats' - ] - - result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) - - # 檢查輸出 - if '99.8%' in result.stdout: - print("✅ 整合腳本:匹配率正確 (99.8%)") - return True - else: - print("❌ 整合腳本:匹配率異常") - print(result.stdout) - return False - - -def test_gui_startup(): - """測試 GUI 啟動""" - print("\n" + "="*70) - print("測試 GUI 啟動") - print("="*70) - - # 檢查進程 - running = check_process_running('speaker_player_gui_face') - - if running: - print("✅ GUI 進程:正常運行") - return True - else: - print("❌ GUI 進程:未運行") - return False - - -def main(): - """主測試函數""" - print("="*70) - print("GUI Face Player 自動化測試") - print("="*70) - - # 測試文件 - print("\n" + "="*70) - print("測試文件") - print("="*70) - - files_ok = True - files_ok &= check_file_exists('/tmp/charade_audio.wav', '音頻文件') - files_ok &= check_file_exists('/tmp/asrx_charade_optimized.json', 'ASRX 結果') - files_ok &= check_file_exists('/tmp/face_long.json', 'Face 結果') - files_ok &= check_file_exists('/tmp/charade_integrated.json', '整合結果') - - # 測試 JSON 結構 - print("\n" + "="*70) - print("測試 JSON 結構") - print("="*70) - - json_ok = True - json_ok &= test_json_structure( - '/tmp/asrx_charade_optimized.json', - ['segments', 'n_speakers'], - 'ASRX 結果' - ) - json_ok &= test_json_structure( - '/tmp/face_long.json', - ['frames', 'frame_count'], - 'Face 結果' - ) - json_ok &= test_json_structure( - '/tmp/charade_integrated.json', - ['integrated_segments', 'speaker_stats'], - '整合結果' - ) - - # 測試整合腳本 - integration_ok = test_integration_script() - - # 測試 GUI - gui_ok = test_gui_startup() - - # 總結 - print("\n" + "="*70) - print("測試總結") - print("="*70) - - all_ok = files_ok and json_ok and integration_ok and gui_ok - - if all_ok: - print("✅ 所有測試通過!") - else: - print("❌ 部分測試失敗") - if not files_ok: - print(" - 文件測試失敗") - if not json_ok: - print(" - JSON 結構測試失敗") - if not integration_ok: - print(" - 整合腳本測試失敗") - if not gui_ok: - print(" - GUI 啟動測試失敗") - - print("\n" + "="*70) - - return all_ok - - -if __name__ == "__main__": - success = main() - exit(0 if success else 1) diff --git a/scripts/asrx_self/test_long_movie.py b/scripts/asrx_self/test_long_movie.py deleted file mode 100755 index 1b5ee60..0000000 --- a/scripts/asrx_self/test_long_movie.py +++ /dev/null @@ -1,240 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -長影片(Charade 1963,114 分鐘)完整測試腳本 -""" - -import json -import subprocess -from pathlib import Path -from datetime import datetime - - -def print_header(title): - """打印標題""" - print("\n" + "="*70) - print(f" {title}") - print("="*70) - - -def test_data_files(): - """測試數據文件""" - print_header("1. 數據文件測試") - - files = { - '音頻文件': '/tmp/charade_audio.wav', - 'ASRX 結果': '/tmp/asrx_charade_optimized.json', - 'Face 結果': '/tmp/face_long.json', - '整合結果': '/tmp/charade_integrated.json' - } - - all_ok = True - for name, path in files.items(): - exists = Path(path).exists() - size = Path(path).stat().st_size / 1024 / 1024 if exists else 0 - status = "✅" if exists else "❌" - print(f"{status} {name}: {size:.1f} MB") - all_ok = all_ok and exists - - return all_ok - - -def test_asrx_results(): - """測試 ASRX 結果""" - print_header("2. ASRX 結果測試") - - with open('/tmp/asrx_charade_optimized.json', 'r', encoding='utf-8') as f: - data = json.load(f) - - total_duration = data.get('total_duration', 0) - n_speakers = data.get('n_speakers', 0) - n_segments = data.get('n_speech_segments', 0) - - print(f"📊 影片時長:{total_duration/60:.1f} 分鐘 ({total_duration:.1f}秒)") - print(f" 說話人數量:{n_speakers}") - print(f"📊 語音片段:{n_segments}") - - # 說話人統計 - print("\n📢 說話人分佈:") - speaker_stats = data.get('speaker_stats', {}) - for speaker, stats in sorted(speaker_stats.items(), key=lambda x: x[1]['duration'], reverse=True): - duration = stats.get('duration', 0) - count = stats.get('count', 0) - pct = duration / total_duration * 100 if total_duration > 0 else 0 - print(f" {speaker}: {count} 片段,{duration/60:.1f}分鐘 ({pct:.1f}%)") - - return n_speakers >= 2 and n_segments > 100 - - -def test_face_results(): - """測試 Face 結果""" - print_header("3. Face 結果測試") - - with open('/tmp/face_long.json', 'r', encoding='utf-8') as f: - data = json.load(f) - - total_frames = data.get('frame_count', 0) - detected_frames = data.get('frames', []) - fps = data.get('fps', 0) - - print(f"📊 總數:{total_frames:,}") - print(f"📊 檢測到人臉:{len(detected_frames):,}") - print(f"📊 FPS: {fps:.2f}") - print(f"📊 檢測率:{len(detected_frames)/total_frames*100:.2f}%") - - return len(detected_frames) > 0 - - -def test_integration(): - """測試整合結果""" - print_header("4. Face + ASRX 整合測試") - - with open('/tmp/charade_integrated.json', 'r', encoding='utf-8') as f: - data = json.load(f) - - segments = data.get('integrated_segments', []) - total = len(segments) - with_face = sum(1 for seg in segments if seg.get('has_face', False)) - match_rate = with_face / total * 100 if total > 0 else 0 - - print(f"📊 總片段:{total}") - print(f"📊 有人臉:{with_face}") - print(f"📊 匹配率:{match_rate:.2f}%") - - # 說話人匹配統計 - print("\n📢 說話人匹配詳情:") - speaker_stats = data.get('speaker_stats', {}) - for speaker, stats in sorted(speaker_stats.items()): - total_seg = stats.get('total_segments', 0) - with_face_seg = stats.get('with_face', 0) - rate = with_face_seg / total_seg * 100 if total_seg > 0 else 0 - status = "✅" if rate >= 99 else "⚠️" if rate >= 50 else "❌" - print(f" {status} {speaker}: {with_face_seg}/{total_seg} ({rate:.1f}%)") - - return match_rate >= 95 - - -def test_gui_process(): - """測試 GUI 進程""" - print_header("5. GUI 進程測試") - - result = subprocess.run(['pgrep', '-f', 'speaker_player_gui_face'], - capture_output=True, text=True) - running = result.returncode == 0 - - if running: - pid = result.stdout.strip() - print(f"✅ GUI 進程運行中 (PID: {pid})") - - # 檢查進程資源使用 - ps_result = subprocess.run(['ps', 'aux'], capture_output=True, text=True) - for line in ps_result.stdout.split('\n'): - if 'speaker_player_gui_face' in line and 'grep' not in line: - parts = line.split() - if len(parts) >= 8: - cpu = parts[2] - mem = parts[3] - print(f" CPU: {cpu}%, 記憶體:{mem}%") - else: - print("❌ GUI 進程未運行") - - return running - - -def test_playback(): - """測試播放功能(模擬)""" - print_header("6. 播放功能測試") - - # 測試 ffmpeg 是否可用 - result = subprocess.run(['which', 'ffmpeg'], capture_output=True, text=True) - ffmpeg_ok = result.returncode == 0 - print(f"{'✅' if ffmpeg_ok else '❌'} ffmpeg: {'可用' if ffmpeg_ok else '不可用'}") - - # 測試 afplay 是否可用 - result = subprocess.run(['which', 'afplay'], capture_output=True, text=True) - afplay_ok = result.returncode == 0 - print(f"{'✅' if afplay_ok else '❌'} afplay: {'可用' if afplay_ok else '不可用'}") - - # 測試音頻提取(第一個片段) - with open('/tmp/asrx_charade_optimized.json', 'r', encoding='utf-8') as f: - asrx_data = json.load(f) - - first_seg = asrx_data['segments'][0] - start = first_seg['start'] - end = first_seg['end'] - duration = end - start - - print("\n🎵 測試提取第一個片段:") - print(f" 時間:{start:.2f}s - {end:.2f}s ({duration:.2f}s)") - - # 實際提取測試 - temp_file = '/tmp/test_segment.wav' - cmd = [ - 'ffmpeg', '-y', '-loglevel', 'quiet', - '-i', '/tmp/charade_audio.wav', - '-ss', str(start), - '-t', str(duration), - temp_file - ] - - result = subprocess.run(cmd, capture_output=True) - extract_ok = result.returncode == 0 and Path(temp_file).exists() - - print(f"{'✅' if extract_ok else '❌'} 音頻提取: {'成功' if extract_ok else '失敗'}") - - if extract_ok: - size = Path(temp_file).stat().st_size / 1024 - print(f" 文件大小:{size:.1f} KB") - Path(temp_file).unlink() # 清理 - - return ffmpeg_ok and afplay_ok and extract_ok - - -def generate_report(): - """生成測試報告""" - print_header("測試報告") - - tests = [ - ("數據文件", test_data_files()), - ("ASRX 結果", test_asrx_results()), - ("Face 結果", test_face_results()), - ("整合結果", test_integration()), - ("GUI 進程", test_gui_process()), - ("播放功能", test_playback()) - ] - - passed = sum(1 for _, result in tests if result) - total = len(tests) - - print("\n" + "="*70) - print(f" 測試總結:{passed}/{total} 通過") - print("="*70) - - for name, result in tests: - status = "✅" if result else "❌" - print(f"{status} {name}") - - if passed == total: - print("\n🎉 所有測試通過!") - else: - print(f"\n⚠️ {total - passed} 個測試失敗") - - # 保存報告 - report_path = '/tmp/long_movie_test_report.md' - with open(report_path, 'w', encoding='utf-8') as f: - f.write("# 長影片測試報告\n\n") - f.write(f"**測試時間**: {datetime.now().isoformat()}\n") - f.write("**測試影片**: Charade 1963 (114.7 分鐘)\n\n") - f.write("## 結果\n\n") - f.write(f"**通過**: {passed}/{total}\n\n") - for name, result in tests: - status = "✅" if result else "❌" - f.write(f"- {status} {name}\n") - - print(f"\n📄 報告已保存:{report_path}") - - return passed == total - - -if __name__ == "__main__": - success = generate_report() - exit(0 if success else 1) diff --git a/scripts/asrx_self/vad.py b/scripts/asrx_self/vad.py index b900709..4527c73 100644 --- a/scripts/asrx_self/vad.py +++ b/scripts/asrx_self/vad.py @@ -126,6 +126,52 @@ def extract_speech_audio(audio_path, model, utils, output_dir=None): return speech_audios, speech_segments +def scan_within_segment(wav, sample_rate, start_sec, end_sec, model, utils, + min_speech_duration_ms=500, min_silence_duration_ms=300): + """ + 在一個時間範圍內執行 VAD 掃描,切出子片段。 + + 用途: whisper 給出的粗略時間段內,利用句間停頓細切。 + + Args: + wav: 完整音頻波形 (numpy array) + sample_rate: 採樣率 + start_sec: 掃描起始時間 (秒) + end_sec: 掃描結束時間 (秒) + model: VAD 模型 + utils: VAD 工具函數 + min_speech_duration_ms: 最小語音持續時間 + min_silence_duration_ms: 最小靜音持續時間 + + Returns: + sub_segments: [(start_sec, end_sec), ...] 子片段列表 (原始時間軸) + """ + get_speech_timestamps, _, _, _, _ = utils + + # 提取該時間範圍內的音頻 + start_sample = int(start_sec * sample_rate) + end_sample = int(end_sec * sample_rate) + segment_wav = wav[start_sample:end_sample] + + # 在子音頻上執行 VAD + speech_ts = get_speech_timestamps( + segment_wav, + model, + sampling_rate=sample_rate, + min_speech_duration_ms=min_speech_duration_ms, + min_silence_duration_ms=min_silence_duration_ms, + return_seconds=True, + ) + + # 轉換回原始時間軸 + sub_segments = [ + (ts["start"] + start_sec, ts["end"] + start_sec) + for ts in speech_ts + ] + + return sub_segments + + if __name__ == "__main__": # 測試 VAD import sys diff --git a/scripts/asrx_self/whisper_local.py b/scripts/asrx_self/whisper_local.py new file mode 100644 index 0000000..7029731 --- /dev/null +++ b/scripts/asrx_self/whisper_local.py @@ -0,0 +1,35 @@ +""" +Whisper Local - uses faster-whisper for per-segment transcription +""" + +import numpy as np + + +def load_model(size="small"): + from faster_whisper import WhisperModel + return WhisperModel(size, device="cpu", compute_type="int8") + + +def transcribe_segment(wav, sample_rate, start_sec, end_sec, model): + start_sample = int(start_sec * sample_rate) + end_sample = int(end_sec * sample_rate) + if start_sample >= len(wav): + return {"text": "", "language": "", "lang_prob": 0.0, "segments": []} + segment_wav = wav[start_sample:min(end_sample, len(wav))] + + segments_generator, info = model.transcribe(segment_wav, language=None) + + text = "" + lang_prob = info.language_probability if info else 0.0 + language = info.language if info else "" + + segs = list(segments_generator) + for seg in segs: + text += seg.text + " " + + return { + "text": text.strip(), + "language": language, + "lang_prob": lang_prob, + "segments": segs, + } diff --git a/scripts/checksums.sha256 b/scripts/checksums.sha256 index e0068a5..6ec1fe9 100644 --- a/scripts/checksums.sha256 +++ b/scripts/checksums.sha256 @@ -1,346 +1,293 @@ -2bfe6a1c1263f35916d4a28981814515fc40cb473f7bbc801f84842904c888f6 ./add_yolo_to_chunks.py -f61f7126698018b346c8bafc45501708c17e3b45d9db54be5f0109afeee63176 ./age_benchmark.py -8efb13239db2a25a728abbdebd92affe685b69402a277cceb0d76e62ed9451ac ./analyze_asr_lip.py -432b3e3b30578e71ef973aca758bd1964102cbbb19530620df8ac02df00eefb8 ./analyze_video_faces.py -732609ef1882e14dc7ed60488697f6ae7e2607ec90b240a86ea9e585f052b9be ./apply_asr_corrections.py -790bd25424e93ca5a0743ea1a740a9a70f6ae6f8a9ca411012eb1e9b03907eb4 ./asr_benchmark_runner.py -18744dc3bebdce0d89ea7076b5e43febd35ad3c84064bb52adde4d128d50bc9f ./asr_face_stats.py -1577d055328a73561f9ccfaf0c54727532e3dddcd1bf0f33e3c38081415cced8 ./asr_model_benchmark.py -fcbb81639f53e9e08bee436853c84d918c0eeac09d985b34634d5ddc00055b61 ./asr_processor_base.py -25948a204e45ce844d43606b7e45c9532321d48df44887d261fc886748276b10 ./asr_processor_contract_v1.py -e9209cf028a11bdc45514124826374e58458ee06b054cfedffe8013d751735ea ./asr_processor_contract_v2.py -407dd0ec772027e0df27af0b66ea8130cb390595ccdeca4350e7bdc210acee6c ./asr_processor_debug.py -dcee1b80071b47c974bcffe3d27ec2f2269f4b8de7e7409ceaec7e6f271d31aa ./asr_processor_legacy_v2.py -10728a05a6ff2d56a70bb831abb51e05b03309e45bc5fa068c5a0702a4c73769 ./asr_processor_legacy.py -9106bfe07de9cfc920f4f4d2f821dc024df612f4c2a8f5f75d35f012d26440f0 ./asr_processor_simplified.py -7eabdcf7320302ee65c67e801f3ac7ca5801abc76165faa182348d30a8113e9f ./asr_processor_small_multilingual.py -2714f7be88f286635ea8465daf8fa969e6b27d2b2d1f73ac5e98f5e496139cad ./asr_processor_small.py -1089ff10b9b0a9f528cac79580aec25e33f8eeea485ac44b6aaf8c7c0cab5b42 ./asr_processor_v2.py -e9e622d737990bea8ecc139fa310a7cb4b0ca0309d6783f8105e74f864dfb850 ./asr_processor.py -5431b57d4369a841d51a6d6c5e1fb5e6c2932cb97cb4601f5e1b41ffe9f7ecaf ./asr_side_by_side_comparison.py -6c11efc3d40e559bfbeadcbf4f51eb353b744cc4f765bd8abc472a701e3f33cb ./asrx_processor_contract_v1.py -93501463af84d6541405057da3783d40492aec5e536b4210dcaffe460cdb5503 ./asrx_processor_custom.py -6adfbee842d134b9d180e2d1104694ed5cdc1fa4febcd0c502801b8f87b3ce66 ./asrx_processor_simplified.py -60fc3465f9c461583f8d0b888e85b3a6e04e1f252a1e1c21d036b52e1ce4b43c ./asrx_processor_v2_noalign.py -82d65b71bd86874e484870c40214d3fbd9343c39d5d635896fb4d257d13a410f ./asrx_processor_v2_transcribe.py -5a0c9905a2e10c847aa74f108e4054de4704bbafb2004589db15bf33833ea3c7 ./asrx_processor_v2.py -b16b00cf9e5de96abc512022af9bb81196405b10988f5a39dfd3a9b6471f1155 ./asrx_processor.py -d570fbe89bf84c50f180e8f3ec26c30092e07e3fa4883fb83a644670c13b8588 ./asrx_self/__init__.py -3b7a788e5fe2fa1a7518bf2a639ccd09b304b264b952c88a3e6612aba30faef5 ./asrx_self/integrate_face_asrx_speaker.py -1fe4b9ac1d04c2f2ef5361d8325cf9333e434b126be6a53a4c0d40a04f32a34d ./asrx_self/main_fixed.py -e4a2894bd4207f6d034c86e1d232001e2e0f9e65856c89d84d8a038473a5e50b ./asrx_self/main.py -46f61075b403729e4ff9bf0b05367b5319acf5d8c696a0517033699dcba36276 ./asrx_self/speaker_audio_player.py -2a072521662906e5ca84ec54cb1963930a1c795f8d64906b66e889c0f442198b ./asrx_self/speaker_cluster_fixed.py -db4ddc98d563bf4a8c34fcd1fe40edd34fab63fa8c293644a8a40ae87be521dd ./asrx_self/speaker_cluster.py -a50d0ae549b733532f940332e4656a4dcf0623703240eb74832524eedf54f888 ./asrx_self/speaker_encoder.py -42f325168e1f6edd514eb00321f18ce581f7b61d18c50798271c3da8410cb248 ./asrx_self/speaker_player_gui_face.py -54a847a8862e2f7400c4d8425f4bebaeb230fd50932933734819fbb6729bb560 ./asrx_self/speaker_player_gui.py -43508b714f2f1aa8bacdb9c4f52152f3fd14f6c2e2529460e5b24b29846c8c37 ./asrx_self/speaker_player_interactive.py -e25e789552fef129bd6f536140ec4deead8e242091ab60ab679b544ff9d43307 ./asrx_self/test_gui_face_player.py -788014df1faf7cfa09fbce16781f8bf9da1acef75e8891592b3b4d51b91e93f2 ./asrx_self/test_long_movie.py -8bac63ea24cd06b9d398c2650ac396e10db64e33f0686a01bd460e17286e7574 ./asrx_self/vad.py -f11b67ada6167540d2f95cb2af93d0e3a0de55bce659745baa37c4aa4805212e ./audio_taxonomy_processor_v2.py -ded810b81cda24e31e82de14ba9846770ee2b18d84d52b9d570de5877e9e2513 ./audio_taxonomy_processor.py -f7c53be5a031a8bff15c3165543586529932d81c4312521654d132b1f0ed6bc3 ./auto_identify_persons.py -5497a6f1f7ae267c796a398a9f020ea485aa45f980f2eca932b904ad61ce9b40 ./backfill_demographics.py -39a479ca4f8986f3255b0bcd0d9162a1f2ae339bb4dcf081f931ff9b304797a1 ./backfill_frame_data.py -308c8e3f3d45ee273504f9f415eaf6c025f06aaf1cca33156a66431ed6e64f43 ./build_semantic_index_poc.py -4eb37768edd252d94f0d751f219c317e905bc093f414b2a6350efb8294131138 ./build_semantic_index.py -debbd058957d09c2397f3f4c028edaa0a658002921dcca95eae2a20070ba95fb ./caption_processor_contract_v1.py -7236cdb5deaeada266cc246ee11380248bb9f2255888c25a152b2f6ab1f981cc ./caption_processor.py -e73cbb688dade5c5b6fc4276f0c78b377903ff83f3830b63d8bcdacd8da8aecf ./check_all_stamps.py -7ecdbd4b1f94be8ebab9935ea210a868330e7030b6e19c73229c579c1189fd5c ./check_architecture_all.py -7179ed1a87241904af29542f9018398f8afd9b9dd89af7bb11909310ab7b49e0 ./check_architecture_docs.py -7e6bd7d14582e494baf8b28354bbded3f79b43f0bd271ab33874da55b9086311 ./check_code_document_consistency.py -5ffca7c55edafad755e84499981553fcb48ce6056ca7b04130acafb9e6a9b1c3 ./check_frame_112_36.py -f49c7b0cfa53b657f69b2ad97a6e18393741cc2151b32c9d7dde2e078b75953f ./check_frame_91_59.py -d2cb7475262ee711a4b06e53559f0927242be4a924a56e7fe212225f318f4193 ./chinese_vector_test.py -ecde3d3df773916f62de4e34f8d8693feaedf112a3ef9955e22417c8421722bd ./chunk_statistics.py -2588ecf27c13020d894e46ba70a76de89f09556b475f555dae59db36da0b90a0 ./clean_sentence_text.py -98ab1129032f42fddc020f9b3492d1fc133851d1af33ddeb57e2385d88425af4 ./clip_logo_integration.py -bf6f74c09b8f8c7f25c5fffb9c36f16a8afb483a7b65903cfc75e2ea641bdf49 ./compare_asr_content.py -1f2caadcded724aa04a929018a35ace53dd79d172f5ee2720308fbd4581b0c6c ./compare_asr_models.py -1ed8a9530f40e304b556ff76c7cac40468c86a0cd32ff2a8bc7bf2a69669121d ./compare_models_gun_test.py -6bf790fe75a7a2a5220052ca14c31e90a97eabc4558cd5e9059280913862a81e ./compare_search.py -875e7a598982c8ad7222a51b7b147e91cd5e1a930f41214b3942107cb932fc5c ./compare_segmentation.py -e432b6f2364d5a9aaf207a1de0dca3fb14ab8d118c53ee34306abfe6fd211ba8 ./comprehensive_search_test.py -43df85cf860ac28e083de35b511bb2a7b91ed48f596757f52f19487768987500 ./coreml_embed_server.py -9149ccc8de5adfec69c6f3f2ec502ae7d5e7844518a228ba587af2e08cb38805 ./crop_opencv_stamp.py -fc36ecbb1455d959456945266e193b601a29c4210b4938a3f0d4a9aaf44b5cee ./crop_real_stamps.py -34a694624ce94d916b06a847bc4d41e7665985b85e55a626a4bc3a4370c21acf ./crop_stamp_112_36.py -27099dc9c8ee52a6949ce18c505089afef1720fe70858b90d0801972c3b43fff ./crop_stamp_closeup.py -01b5a3b091ebcffc0c1e2637b7af8192ba597239fa80d152738e3b8cfdf8174d ./crop_stamp.py -71b2a362b5395c6e4d70e62766820db92d94eaf140d98eecb2880bcd98d55be9 ./crop_top_candidates.py -60f18c5fa03ffbc80c209337cd1c8b6acd0b8471e600119340aa8cdfeef14f5b ./cut_benchmark_runner.py -deba86a1645ca5b1acf413dd9edfad77b93ff213897d739a32de1ba629bfce52 ./cut_processor_contract_v1.py -01024f947f0326c124293a30e4f2cdb859f21cfb2d4c07f9c1030e2934f7bc44 ./cut_processor.py -ff092ad2373b57321f87d1dd123fff8a99c8207057591e8526e56cb1424d47c6 ./dashboard.py -f184bf3e546db0253ffb71895e8d42aeb06588c71c4914c2fe656f42ef463c9a ./debug_face_registration.py -a9acce1ebd6ea821a8dc5009b8fc40586a98d31c23e93c97fd844bdadbda4ed2 ./deep_analysis_112_36.py -7767ee7455a956d14d286ad558c4c312c2ad3ccee1c73adc1bc8f761c96ad72a ./demo_dashboard.py -425290c12161c5cfcb0c505a737ba3951656b39e425e792919d4812e15b9b8e3 ./demo_face_learning.py -d7e3e27e6a65b1fa62530ee954c227dbb4f97593c5a5dcc48b39e5ebae4656e5 ./dense_scan_traces.py -df79b7fc7a03a8e754de5123a23bb33b1d5c23d832adc1886fb846ca517dd24d ./detect_language.py -f6f8047e24ebbec81ef27dd38f4242e63385f8ebe5be471cae156b8aa5fc4477 ./detect_objects_keyframes.py -e61d2ef5043bda3674a0050d83ba3bc6a70c47f54e456124a736b4328f0c0638 ./detect_stamp_shapes.py -f23a382113e9c7de2ec3b24e95160daef48f9336ae6d4ec9ee7a18f4bf529f6d ./download_places365_classes.py -a747e5e17960b972549714786bb9e28ea578e10e6c80788e298a0149c970bcc5 ./embed_faces.py -f1a2b3820e1a763eba6d8d905a5bb87f5a9b4a2f005e709e313bb7505ba7ddaa ./embeddinggemma_server.py -43c540c02c1be992e7d44ab4fc76a759815db3ed5f25bcbb594328b50ed7c73b ./export_file_package.py -19d23e4604d5532928412afe4d5d39ff49194ab4a046825286ae1be154326a1f ./export_file.py -5f10bab1dcb0b5fad233a74069f9e2f89043e7c848c9c38ae7e2806e6940c75d ./export_identities.py -2a1d0a1b853fd2c28f9a404871d33912f93521358576833be0999271bae02bcb ./export_person_thumbnails.py -a81bf1d6af78c052e638f5d5677b4edb512d0de5441025d86fd970d3e7993922 ./export_sqlite.py -8b5cc0ff437fb4dd0df28b7b20a78469cdca3621e2eeb4b6d46ad2391acb0596 ./extract_female_faces.py -bdecbaf0496bf536dce2ef4897f7090749820d15dcca03492d4d736ab0f8c6c5 ./face_benchmark_runner.py -22319a38bd684fb235fec681ddc60f45821e4bb2181f2b31fdf945f7ad9a1b85 ./face_clustering_processor.py -5adce4e444743331fa592e13d71e52f26554eadb9744d350a7654a449a8fb8a3 ./face_count_comparison.py -3574454c74eaf11021f9052f77d93044cca4ae0285d0f2630b4016c2ec0df783 ./face_cross_validate.py -4f09b3b66b14a5eefb14fcf915a1ad1e9147010f6ae7671731566679b1cae461 ./face_embedding_extractor.py -87f1b69affbac03fbd87331a99cd7c4faba6c72d359ffcfebb62d6ad8f70445b ./face_landmark_qc.py -28776dfcc6ac40e9481c25467438745fed60fecdfd4fc19f9f4c7396397591a7 ./face_mediapipe_test.py -f4d1b4334a49357b74b80e390ad5a3d16263e51cbe5cab661af92bd2e9721f02 ./face_processor_contract_v1.py -802015c73dfce0866f2a0bc94c645aa35ba30a6de78244af23090bb1f1828c6e ./face_processor_mps.py -96ffdbde3f4d87e9942f9e1f4c93cbd999dc404b43e00d4cdcbb22de3c0f16b7 ./face_processor_optimized.py -17e7d0bd142bddfead94b1dd959c1f41c0dad7063ffc677dff1a99d62aab6cf8 ./face_processor_v1.py -d6ddad29a5e53b43b887554072d7965f0535e47fb62dad1a8b87e44fa1be6015 ./face_processor.py -8edab61189ad1a8fa60c203077e814e82d46c5bae67054fa2ab1958e199c05f9 ./face_recognition_processor.py -9ea19f357b3fcec6c8b3875c538e53cb46e407ab188cd544963e0123e535fa03 ./face_registration.py -72648816de611fd9b84d2b98c177b8b4f24374024b69184e8151c06cf44d633b ./face_statistics_report.py -499f197a06f50839ebd5350af380fa56506ce08f073ba40c0e863b8e02b34133 ./fast_face_clustering_processor.py -0191781635b98d0675969fb87733af19525d7b5c148723346c5378c08a00fe33 ./fast_stamp_search.py -00e7e8ed06f6a0f2c46c84a47d7e7f5d366acee941d546a52c4b1b7885c71e08 ./filter_stamp_colors.py -5341fd648cffafc77568070313b06417636943d50ff3b4380a61381260acaafa ./final_face_validation.py -213793ab719f4ef42ec9b22f351dd86d4739211c17be486a46b76ba7e64fd8f1 ./find_blue_stamp_opencv.py -e1490317c0f56b895f73cfbb6f57c8e3ea5c65304bfdd7663f103f6b564e148c ./find_kids_pose.py -08d4cba0650f6a22fc134d07fd15fe8784c8472c3ba687b587e31e0b980e2b1c ./find_kids_refined.py -aecec0784ce5d0e98176c15798f05d4f67ab6a686f9ffafba71fbd82157027f8 ./find_magnifying_glass.py -620db08dd84f00af0c6d744dac54c68360548dd5b2cc26b12ddcefd936239b2e ./find_pink_stamp.py -1f4555b3578f4dc6bc08aa37e34eda1d91ea25d8134439771678d1a57bfdaeb9 ./find_realistic_stamp_opencv.py -277aa3b48eec2e739de3bb95ef501ffbd24104aa2a1bdef28c844ef44fd75013 ./find_small_stamp_opencv.py -fc73bbc9605938db495bd33ea74955e454e9384130531a16d42f25dbd9b515d8 ./find_stamp_in_hands.py -c6ed0f12e78c12df977ddca5d699f58edb174b47199f584e7a24dbdc3b7d02b1 ./find_stamp_in_magnifier_scene.py -ecf12e346619c27a985452e9f84ee262c2da25de9df0ff6e0b293279ccba559b ./find_stamp_opencv.py -4ff93cbcc781a5cff023f78006f1aebbe2d954405ae7d00a473fef6b41b2ebee ./fix_asr_text.py -4090cb892115843a909aa41426c0f39c5a53d8d88a5db69499ec8bafcb780d77 ./florence2_scan_stamps.py -e90e4447db3328b64a2062ca13ed41f6a045220d8fb640542dff5b790d3c4d3b ./gdino_comparison_test.py -7071a9999057c347e2275381f1f0c58e19aa8581d70a572d3170ed14a295a48d ./gdino_frame_api.py -891410310b415ff68a0f7ee0aa39e84eef7f2c75887487bdb88b8f4718d40e94 ./generate_asr1.py -24efe7db016387b40bd9caae449f0445a3d47eb878c00399803bb6e78e6dd5fc ./generate_benchmark_summary.py -dc956a78a3ed26686f45dd6d6d9cb42c023751fcd9b8789585450b6df63670a1 ./generate_chunk_summaries.py -8a0922d75fdc7c5994ebfb31881d765db4b105cbcddfcaa4b4c49d11950b8df4 ./generate_chunk_visual_stats.py -4860bfd00cc6c1c842c2f8e17e725eebca191d81067af3cb5a28661b45d74bd3 ./generate_parent_chunks_gemma4.py -e9fca223a8329ff6bdcb8552fecedb2d8b4607c6516c373c3023f29edfd42e06 ./generate_sentence_summaries.py -cbae7c3e85457274e8c284005196c39dc97f9d9200ed6b0e4ea266e48a381d3a ./generate_synonyms_llamacpp.py -57512cd7a5ec2f52813717fd3d81dec1aaa69dc9c91a9edbca847e7012b1c86f ./generate_synonyms_ollama.py -dc495cb8127858fa03a5f8b8bb4a772c5934ada1abecf97459bf71de80417672 ./gun_detector_scan.py -1a7cfb72723b3b94e3f4fe368477ba693ac3d20ac7af7351962bc548c700b451 ./head_shoulder_bench.py -b2fe8e4d8d7d1057ba928fc5e190f4a06cb60e83e2a02c5d7c423791596c11b8 ./head_shoulder_quick.py -ba5e67a97cb465e6a1a942c2f7342406031759ffcea2b897ae963bee4bc551c4 ./hybrid_stamp_search.py -f5847b6c8ed4c7c51290df9032d5a192317b5f03b5ff418ead1181a6e1b655f2 ./identity_agent.py -61bea1980af5861a02d6e9b47ac5ad0bd04a4fd633af477d2179b7361ae58c01 ./identity_bind.py -046aa90eb4a4b830910912362a9865d1e6170f5bc176fae42be630f967f9d3ff ./import_file_package.py -7cc260d4411ab13559803686f8b645afa07738d652d9459830aecac268597fa7 ./import_file.py -071e3a5141d04cb9e6bd31489a835c778608785896b18ea7fa65e8db9f1547e5 ./insert_chunks.py -d3d53f44daa7f1526488677b141e90fbf4aa5625369b96a3ca275b802414802f ./integrate_face_asrx.py -4cb6a93ef8006cb69e8bdb1bc72899ee9bab1bf7eceaafe9896923bb7023bbd5 ./integrate_rule3_markers.py -75aa3e4bffc9f9cb8b9254db19095c93c3efb43d465fb5dcca8c7b9b730f5c59 ./integrated_body_action_decoder.py -f4dd2e21fb6b668bdf0c51cc56e214188b46937b96a2b4a10d13783e171d0472 ./language_router.py -bef426641645fcf7dcc68c87e3325a6edf3f70925febaf1df84f7c6ff87681e5 ./lip_analyzer.py -7f98b0cc8379b3759cc7e805dd56f736cc518093e83f43b2e5ecf559a19b95f0 ./lip_processor_cv.py -a1473eeba17fce25e4678234fe4e8793a132514e0566b03b36a0bec04eb93acb ./lip_processor_media.py -0df61396756ee22d35356776c189b354458661916c8baf85bcef97c9f8b62ec8 ./lip_processor_mp.py -3202aeca29e651ef1a54f47681c6b3b2d0680555fe3c6d318a932bb12b49e58c ./lip_processor_simple.py -fed15bafb5e09715cc03962f465b2ff618bf05ebeafdf932643690c9635c9840 ./lip_processor.py -1773054e8d563b493865880d0d8bda105e3eb6fb536a25817517237b3bb76afe ./magnifying_glass_analyze.py -7d4d048c452bf273f4a6d96da13eb7bab6aa60ca9dd51de5ca0fb0a01e587b13 ./magnifying_glass_extract.py -8528bbf89d2770fa5a23f461274038898be251fb6e48c5d3adece5aab3bf976d ./magnifying_glass_owl.py -cb645f5e29ee5a36b2f97812039abfdaed7328386bcd25ad7b742af6a6b16399 ./map_speakers_v2.py -a90bd3fb729a05010c29a213134c60cc0bdd17769e27a7d3f1250919b7bf1613 ./match_face_identity.py -2d864dc831c2fd0142b19b8ad2cda169c2a05facd9662d31861d29bb710c4979 ./match_face_with_pose_filtering.py -889d4853707896885ed96ab945d4266acb213f4b122e2ba7c4563eb0e3e9e865 ./match_identities_to_tmdb.py -b34ec373bcf65139e08e41967f58a2fc8ebb67a59c361074d3590cd16541415a ./match_speakers_to_chunks.py -fe6260a94d01d8b43d0d3b59eb820cfd7b4711c907343a1261c69f9010ae990d ./mediapipe_holistic_processor.py -bb36844b4d13bba8edc1b7f0703f02081b62bea795535b8cd8dcbfdb4281f402 ./migrate_asr_to_children.py -819312cbfce6e68a0d8d731e02d283946f79de6044f207991ddf9a28ac853d79 ./migrate_face_results.py -c418f6e50054fa7eae1d0d879e28997b98f57437acec48b53ecb09f332728867 ./migrate_to_4188.py -6f60aa899e06f05e575cb5b461ea517481119cc32644566245d74c96eccde722 ./multi_stage_stamp_search.py -b24e2289c00f803c8339f59c34d44ed6c53a3c19dafc13e72c4b260d6bb312a6 ./music_segmentation_processor.py -da2546f84d0dbd711c8800ae4e32e59d9c38de9e62e1b423c4518fa1fda1dbea ./natural_language_top10.py -78c3d1a9302dbfacdf9b3655dab07348957fd9dbb4af94aae83eefecd5343a33 ./natural_language_vector_detailed.py -e924f04d68c9a8211ad373da811aa6671d2c5654281c1634dbf8b1e5e5b51533 ./natural_language_vector_test.py -df6ac92367b1afb50c0af958e362d87555fe569f608a8d213e0a593e2a43cde8 ./object_search_agent.py -fd39b779a0337f521940f3f7b159931f1f207f200eefd610183781fdcf3dfafd ./object_search.py -42d2952fc78b57302b0d12bc3d45790a2c2c46d4ffa3c713a82686134bd63f13 ./ocr_benchmark_runner.py -7b3ccb5c4ddd4c62c5ad04d0e3aafaecc2c1441012b6a98613cdcf055e2e50e8 ./ocr_processor_contract_v1.py -271023eec42d6be4a1ce6ae2ce3f29e825210a57e6bb37554a6f7fdf54616f9a ./ocr_processor_mps.py -2e73c41285e52ef013594fcd4d20df9f5781bfc26bcf62e54dd2c04ec44200c3 ./ocr_processor.py -62196108cb3337b5f9a873d70d2981ac8f49152369afbcc8a12b3a13de579e80 ./opencv_stamp_search.py -b2e8d552c272fd173c77693e9453a85fe16dfc12f7c2cd304d299c6188c14077 ./paligemma_vs_gdino.py -1534d5b7617dbae77f7a37a2c33a89b90f965247a6828f00b73ea6b720f6f4fc ./parent_chunk_5w1h.py -5208c738d4b615282813d351daf09872ce516121bb604caa64968ef5e52c53d3 ./pipeline_checklist.py -8f80c3a2be5c330e2d1853d9250a171c75db84598dbf3304280c42237ed4fb1f ./pipeline_status.py -94db44c0f49115a677d117d4901a1b7991c1517905300eaa495dd62b8ac1c79c ./pose_processor_contract_v1.py -167dee5e42c6bd46674bcffcfd92f368fc0b48a1f42c459c806853b281bc6482 ./pose_processor_mps.py -a6ef3a785ef5c6dc47fa38dbed80d76bc7d4bf48cbaf0f7edb3d26df98d7262c ./pose_processor.py -45e6798dc5900f2f7c8776a2d260c122aae5068a075256b8a5c02e8d0be6c131 ./probe_file.py -139a68b5915680ec697d4bb5420adbd20b89637de2c16a15d68aca4fc22da02b ./qa/executor.py -4a59b36c29e1ee6e2b169db3b0201d2f7088c6ccbfdf642a3b522aeb182bbeea ./qa/judges/facenet.py -0dcea0258ae3309cdec93dc4dd534d1a42511c327d528a117c8e3085f5b30386 ./qa/judges/gdino.py -7c9392436477662bc1b49d719f0c78f96e8e7e180fd281d4c59c36fd241a3e6a ./qa/judges/gemma4.py -84c6f793538981bdafdc08bb9bd5f12401b442441fae54936f610a758d18e972 ./qa/judges/maskformer.py -2f9b5dd3373fdec77a84f117ab620230e208f96d015c960275ab60a0656575b6 ./qa/judges/paligemma.py -52dedc276f6f9806710f1ef510aabd88032afe4abad364f5963fd2bd5b6cf14d ./qa/judges/yolo.py -c4e4424aad1847d822e9cf7dc98a1b2e903735a61e8ec056c6a9be75f79486bd ./qa/pipeline.py -96f5ab509622118db307641082a19daff6b9a36bcc66451c35ed2abee4fe4249 ./qa/query_generator.py -00b1716423a184856bbe44d4132fd6d84ca13f3ae018964caa6f3389c1ab98a5 ./qa/scorer.py -01c7b3c30c1531224f9605f0ee633285fe8489ab2d0a3c9c6a41f2b2b60d6626 ./quick_stamp_search.py -e3143673a2bff6139e05c82446fd8770c4b7e59a854a42c3b29662f5ac75efe2 ./rebuild_parents.py -4aa98981632d4f8a11039c510e86aa296ae1cd4b399fc871ed664ac11e445bd9 ./rebuild_story_content.py -205cfc47b603b5ab94d97dae8c25486b342b7c2858afe6d6dae27615ca0b2aeb ./redis_publisher.py -750f778946b56bc57c47d9d2295332bb0f8cec2c1aa03c6b882d39ef4432673d ./refine_search.py -0f8a6a6866a5797e964d3b17e2b7ef146fe7a798f09fcea982fcda6f629b4d06 ./regenerate_parent_5w1h.py -3ee192b623f290136b36bd63abd018aad6e6639a9543970c3415734628b33bd6 ./register_sample_faces.py -334782f0f66d0ad3818a51adf6343186a2de65467378ab68a81ade806e496af9 ./release_manager.py -9a44cdd155953778b52ac0cfb118504c56eb6b1141984365ffbb717e28f3e65b ./release_pack.py -3906b48f3a7764d19605def2bf8ef84a54a6afe64c9291a7cc0881a91472a826 ./render_face_heatmap.py -44e432c31a35211a37dd26695772b7e250487ac42ba4f16a56f843277c2fabbf ./render_offline_report.py -3fac1e6a4125042185a2ce82771f695c562b3137c7aa58a912bada00ad8ecf78 ./rescan_single_frame_traces.py -9c3212cb455c2a6230be918448560fee00c153a8956ffd04fcb62974d5e1abff ./resume_framework.py -7c95ec08daf4f980bd53233503b7a4fa01afc08660e8fe8cd031ea3613ead8f7 ./save_events_to_db.py -24795e1531fe05e33d515104e4fb2f9567b46d802ef1b5a38f11268cf105be76 ./scan_charade_stamps.py -cad2da5073577f851c5cb2abdbd7cab05b39caa0d1179ccc89c378a7df2736c8 ./scan_full_video_stamps.py -03ae71470331fe5b7f8e394f7f789eee08cad4ed5ec9196b46ab2c9dbefa7fec ./scan_handheld_objects.py -d3935ba498786cf260d9d5370ca60d3af7bc4fd438f6be33ce23cfd0b7bab593 ./scan_keyframes_opencv.py -12c9b35212f587f5adb37584bf3c3844804d2bc642ebfc5d82b86b44f46d2472 ./scan_keyframes.py -f386130ac203308c904ba7efea09ce0ca0d640d36762b113bf0cfedc24d7f885 ./scene_classifier.py -482edae04e5467a68c77729760db53d3653e8d7654fa49e5ec9a36f1f8f22616 ./search_blue_stamp.py -e3786422932138272d1096ad4c800594e62c9640952a286a9158372a1e5443e3 ./search_envelope.py -2df1e259c2e52d10d79b20856cb94ffff5a9bfdbe47cee587b1148b2f1c16101 ./search_objects_in_hands.py -9fd49be8ab16f94fd82efc5ae035c029372a7ddeb7fd779b557f1917cdc14592 ./search_vase.py -7a6d8e7c435368f6218db972c04a7be16d7d6680d8d4374f82c05b7162716b9d ./select_face_reference_vectors_v2.py -2bcf7c1b3c407b51a134a5ee4982713f0ea387cfd6df01ed75554c94603971a6 ./select_face_reference_vectors_v3.py -d52098fcf1f9f7ba14f31a9a90bc5b3bc933e1a5e5697e3d09eff389c153cb18 ./select_face_reference_vectors.py -a02cb37639275d86ae0b4504d21f50963b45aaf94630c59472ba30d07722e50c ./simple_api_test.py -02516ab1616c1756c4f8041f48ff12811cc5d672c53b34850b84ce682fefdff1 ./simple_face_stats.py -b024d9bfe244d0d058daae0acd314b9344d6f0912e4f3b02dbc618f9fe3e4949 ./simple_test.py -af8703506769f3cdb89ff7849b071c2421307717850596dd86d2fe0b053e7809 ./smart_stamp_v2.py -5e5f86d47ea2b75bcaa8662689f73af1963645149c0da688dc43482616aa4e76 ./sound_event_detector.py -bab7697e4b4b05e93babc116e0c5b13cbaf1f4d419a65acd5dc1de5bdfc510dc ./speaker_assign.py -381ff240ce806ead7d6463ee40c5b830035eb6252180b4b0901b3c8313fa4bbd ./speaker_bind_lip.py -5eede29fa0966974c1943792d7fcca2dd9179d4f23570cf1a3964dc97bc9ac1e ./specific_stamp_search.py -d5363d832272bdb3c1d6f6d93eee7b7894893b9164a3f5ad5fa08a4a0eaeeb47 ./split_asr_segments.py -8e1269f173f2c72de78857c2d83d3111b62ec89bd79f4fb00c3f57390986ae4f ./step3_asr_fine.py -7592df8be5dc58376b33960bfa7fc0003c51114b70ebc01f1589f39ee9568d3b ./store_traced_faces.py -7ac32c1e2146a19e6654ab3e4bbbfd42e1a6540fb8717d40d55c61e9f5d1bf71 ./story_embed.py -74cc24b328a075f48b1f44a465611157f44eadc8f5dabf6d95cd5cc5f80dd9dc ./story_pipeline_full.py -97628f0f1270825dabafdf0a69f10ef12c4ffe2be4ac12941315f06bfb084e7c ./story_processor_contract_v1.py -1b1f42fc4bbff26551f26f4ac1e8a995dfe3ff98b940a29c9e130410965d0fa0 ./story_processor.py -cdbc7ef88551e2b3a3771eac5be5e0360989e71fa009ac28c97e548507e08a5e ./sync_face_speaker_to_chunks.py -8b08e9a33f5917aad10e070d6aa48805f5e7c23f905ba8fff3b8697b2109d962 ./sync_to_mongodb.py -f64cc6dcb72f54d3e97aa981b40591aef4804ca769e1f14628d901b98bc6aeac ./terminology_manager.py -455546b9bb3a2c2c877c7720229b254e75b28eea33b3715d1731c02ca85294ae ./test_api_correct_usage.py -b03dc1bbb091672e7da2b131850b17badac896b4fbba92fe9bce76c232c99be4 ./test_api_with_key_id.py -7d295c77d5bcd4c72c5673370af48cc89bbccf9292c3b82aad3a230d242547a9 ./test_args.py -f474ec88e6634decbf178da497443fa709096b174bb4a4320a07256f516b1044 ./test_asr_large_model.py -aa952524dd86f346740ffe555075b74adf2e60bb822bb04a943a51b1fd262445 ./test_birth_uuid.py -db87badad7948527325a528400d67a4eeef76abf8d13f5c4254c812e944e4e0c ./test_end_to_end.py -e191c98a82f7e089f7dccfc4c536244da2bf14339f982a3afef05d33332c3755 ./test_face_api_final.py -1b97c9aae2e1744aa7aefb192eaef86c64e6134efc8f08ffa9a274bff16a58d3 ./test_face_api_with_correct_key.py -f7e4078f31b1ca8494c18878219cf2f90c301f19fc851b9e7084657b71a5e150 ./test_face_api.py -9eafc49f8fa42b4cd58109e9b725b3aec3b06943ec426919b1788838ccf1ed92 ./test_face_db_fix.py -38bce82b167e0c97b257cc6b955fdc2e9ded581ce2d39eb0fd2c60249275394b ./test_face_direct.py -24e82bf0af82407e6c04361e9a671770cbfb0b05d92df589bd0d5a0118bb5a98 ./test_face_learning.py -8dcdb144c4253fbb466f220359b42c2a9579193865e320a56e682e384c2ae176 ./test_face_recognition_integration.py -b921e3256fdea176d4391116d1ead472c4f3ca8aac6999140367818818c35ec3 ./test_face_registration_api.py -9af6c6ff0c766b3de92185c3602f2b8b62b815bf88dcb0e3251c2676e61e0a48 ./test_face_tracker.py -4f70eadb6a8b80eb8febe32b17b77e58d1a4823cc5d598e5ea45555342d2d4cb ./test_florence2_direct.py -0588be0acea540950d737943073f71e769b6301374eaa4ff7fdb96a80145c4e0 ./test_florence2_pipeline.py -694c15193616157ddae4bdb0a45feada2a8f8490f01d290a28aa77a4b24eabb2 ./test_florence2_stamps.py -2c281f698616a83e9eeccd610555d9f9ab657b2deac65ae9e3dbfba0b450d9b0 ./test_identity_db.py -7a73e8314ea7e91ca9dad3867a83b9c1101fdab09bdc0fdac0f798d0a7a204f3 ./test_llm_capabilities.py -68300f87b96a474f06a3071a833e6b3ae48d1db5fb8a7e5a3ec1834fd878d808 ./test_multilingual.py -c17cdd0f4ffb7a151a634add08d13cc576ba7a848bb20f54fb97d0c1d9d81cc0 ./test_object_search.py -d07bd363a2878259fbf4ffcba40e367f7f1bf4171b5a5dfdda97f7a53b450d0e ./test_ollama_feasibility.py -8421003b1f66cbd21c6fe5d3aff0a526897753e959b23905ca8f502f644f66a5 ./test_owl_vit_debug.py -6f9e8b7947229ea4aa0a62b59bda5fcec05bd74f6c00dc4a7b06d932bd1b730f ./test_owl_vit_stamps.py -da91a7c97466ce7f03cde13aa9bf6e691b3e482d2cac74519a2e1a61a2abb05a ./test_parent_chunk_generation.py -19d9f2492d3b04b7dafa008f106767d3107dd36b0c8e4601765dca30131027cd ./test_places365_scene.py -de44553023067362e8b2223f03e1bff55fcbd2f11ddf3d01060dc02c4675a744 ./test_probe_file.py -c0e987ba06a61cc0426ffbca8af1eb51a97bd79acab59b70453cfbb18eaee093 ./test_processor_performance.py -7b4b55e23dff35ba107b3da5b0560d03b1b41dfdea1d3a59eac777b4be4d4033 ./test_pyannote_audio.py -5cb8b42033ffba41f25e7ef74ef04cf352c0c277a9971e9eaef53fd673902712 ./test_pyannote_multilingual.py -8580e689ae148754e03d958419e108241040a012584ba49e8a90db114a9f8c13 ./test_scene_api.py -1194d450070b1f42e045d98e532f41205bb3e52fc48ba26e7c9b72a188fe1b2c ./test_segment_count.py -147bfffeac9561cfa407207b04a825862ac623ba97deecf5ed7c6257432dc62c ./test_speechbrain.py -22e4b865bc769329c1146c2f914395044a9bc84cd2a13acf68fb374a57fe1e3e ./test_v2_detailed.py -a616570a2a080b5b19f4bf783877147e714a014103b274143dd37984a946ca08 ./test_v2_model.py -7b83611f6b3028500c91c62197f774c0769e299136eca8dc4b612a7b5743e3d6 ./test_v2_with_text.py -1dd983c78074a61ceec26d7e3623d40772ca55fd6ee63ba368afe756c66ae091 ./test_with_real_image.py -1b738cc0d69d33e967cbb775def0a7f58dc02f1911404af56a5825bd60a5b75b ./text_semantic_analysis.py -a4221417ae00add76881c6c715ee4257c263e2dfd0a846a8887738682dfe8cda ./thumbnail_extractor.py -0d188a738a0df79ead10065d9f17c366fe159c862bd4bafa2860d0e6ba2640c3 ./tkg_builder.py -8b97f0fdfc0899460bf23d420dba0a51a34737c74ebad0519856909d198662bf ./tmdb_cast_fetcher.py -4858909a0beaf8397becf4103be17fcc350841217afcdc1d917c48c512a9041b ./tmdb_embed_extractor.py -54d8321dfe0f8caa669e4a9d1b48dc772a5b25817eab95b552944140c91f457d ./tmdb_identity_integration.py -2a84aa2dcfb83ac385d2c394f884926f306c81798e4277a26dbd1f3c5506be46 ./trace_face_aggregator.py -61d3b4b362722ce24326a204f1b72cc7b1dcc20cf3264a4f526d4ea343a8d33d ./transcribe.py -ede9a184fd51ef4c87eb3e2541f09b91739a49986cb588591a7c6fbb33433020 ./unified_synonym_processor.py -a408f294c3a71eb6a0eea80b9b586f73dedcefe286c62233f713a7428a9979be ./update_all_demographics.py -e6520bb10ae6835ceade487ceb5e3fa549ca6f06de35b2c785d649921ef443f4 ./update_fine_speakers.py -a2191daff2ad228725b6a66f0e472ec659a6b4fa8f2cbbd74d1bf9c35cca63eb ./update_person_demographics.py -60060753cfd2a6d1241e55bf40a0c74f1df15739656d0349e22e8543036b2424 ./update_speaker_assignments.py -fdc61009c351263e0018801b32ad90ffd8919af611a2a0580546be7fd62c99c4 ./update_terminology.py -0d337c821b36eb7761c0e439b63b8192ff54a542c539d1279efa6854f0b0cdc2 ./utils/body_action_decoder.py -3b384a8d88f6147d1953b14bd6b55672f4f161885e29bc241a466d4cfbd50e99 ./utils/face_trace_visualizer.py -52a7b79ade15f213841c70416565d3c5e46c145c9a72724ce545143c6e0bdea8 ./utils/face_tracker.py -ecd902a4a6f1084d8396af0b4d88079105c84fa6170e3a394720a6452ff3aa3b ./utils/pose_action_decoder.py -29dd3e0f802c0347cd9d9465123915b4604c990d7250048b7ae388af03cf5f36 ./utils/pose_analyzer.py -bc6184153096e5cd8d89d02fa3279c6587f60a49c6b3366b4d82cee722bbf352 ./utils/pose_transition_analyzer.py -d0ec8f4a67c1a1eb1356ad6e9b2f466575691bd336621cdbbfd31dd10159f2dc ./utils/test_mediapipe.py -4840c11964a59eabad26b97fe01033ccaf7903e2d24edd5e1035f6dd5fc995ea ./vectorize_4188.py -078979114c5f248d2bfd43aa8df55235fa03ab812f26998b984cd485a3d2cda8 ./vectorize_chunk_summaries.py -ff98864f1b11795cc3bb64f30ccb6f8609771ddc7a5df2c003ba7c2233d16fc2 ./vectorize_chunks.py -5880c128400e6e36c8eb7dffd009dbbc99dd13f8575b0037bdc854e25ddc41fb ./video_comparison_statistics.py -0a1501ffdc027236cdf88706b3d61229e2998ab268fd57fb60e399ccb734b6a1 ./vision_agent.py -eac8f90fbbb655614abcefc4b887e346bf94db5f015d33d37bc9514fb030489d ./visual_chunk_processor.py -c165dfc5fc981dc731b25ef414184ee58e56b73b148d41a32fdce985c701efd5 ./visualize_stamp.py -6c65a82fdd1d585e20bee4fcb2d1bdec2e6220bda71d6ef9cd00d6a3cf74c4d7 ./voice_embedding_extractor.py -2b3a7b357db4ddd07ca30bf200c6600724e33441d8def0a4d9a39673e2cfb1c0 ./weather_sound_detector.py -206b61ebf3c91d7ce3f1488247b52aca6e955042d8aa979c59723e3ff10dd36a ./yolo_benchmark_runner.py -e8cb0963c90fbd1c2aa91141f80340edd3c9560d69780dd825d107c6ed14fa64 ./yolo_count_comparison.py -dad775ecdca0144bd14b7abaa7ec8fb213e8b9428e39906abce541e93db496b6 ./yolo_processor_contract_v1.py -74ff880e664ec514223a4f220b682fbc87089f8c0851c93ac68c97269b8a59b6 ./yolo_processor_mps.py -8af0a6db683b6626e07820b302135ac5960d38e3d4b3d187c640b23ce8a14f72 ./yolo_processor.py -e13cf22b9aeae96c7e28b4512dd2137743a25eb59027da446966c1aaaaf4ce71 ./zero_shot_combined_test.py -f4aaf017ff588999f06cd9ba1787517e06c6d6e6228a15a54d8aa4f54fde5eb3 ./zero_shot_gun_test.py -0a285b8ec33d7999e9d4ae8d43ce768c9f06ee1929e13a6809e98bdabe6357ce ./zero_shot_objects_test.py -5711c6d18acba76511a3f088d4d0f095b47c978a6c6ae3e086e2b7cbee7b9e55 ./backup_all.sh -c8860e3d55b99745265998abaae63efe28c83d7c1bfd91b30dfba54d146793d3 ./check_config.sh -6321793085bfb33b751b2848dddc41f13d9ead9763f6e581f9dcfceea9090f8b ./demo_identity_full_cycle.sh -77382d8671059ff99fd5ca3db42590de47ecf4e1555eea950bd3a7016b1547b0 ./deploy_package.sh -09bda12152917b969259ff3ca0bcda63f615bdf4873dbb8bb7f7ce5eec742a9f ./final_validation.sh -491e609bb43526b0c41d3dd060a3813bbeb3defc70fc88fe36f9fbbd2280e720 ./install_mongodb.sh -09e21960f0d7fdd00ff1d30334b753a8216ad17fc3644c9dbb129b4446ecc12c ./package_delivery.sh -0c2fe9288f9b51ad34aadf87093c1e1a423483ad7a972861ace811250e30204c ./package_file.sh -c233bb7b854dfd68e75808640fdea379af6952095a93cc8884d7e8b7ecbb4539 ./package_release.sh -02e85ba83e8d3da68bf9320ff25506714ce460736b8824309027a5ec375ea86b ./package_system.sh -7557f1999bde53ef397b78208713e8df8817171dfbc053d6bed130b57a229517 ./release_preflight_check.sh -091087dad7f38e8a0d98458b64fdeb0ac5770534f7dfebdbdf3b80d945ff39df ./security_check.sh -25711049adabfd179d4e19c2a4c4bd675ed9da8e8913ed1bdaac7519f6cde7ac ./setup_fresh_mac.sh -f6dae232edd5d2d111468be125609feb0dbd8db1895846f3d1c48f0e411e3a16 ./setup/01_postgresql.sh -8a405e2372ddb5958f7bfac15d330a2f189ffe2583ae37bba4c953ac45412c80 ./setup/check_momentry.sh -72dc22172a201a060a20f21b89af38c80ecb6399f594ecca81cafa8a918c764f ./setup/install_momentry.sh -5eccd14e8e4b3c91159b17756892dd03a7d26cb7bbc1961d783188ed10411770 ./setup/upgrade_momentry.sh -e48ab4673f71370dc7d4ce5c32d159bf9438e9e1dd7c9edd9c6053156fbdaa99 ./start_momentry.sh -ffe7e91a24fbfa826eb816f66cdb315097fe841a7b67a476865aec1ad7a4dda0 ./swift_processors/.build/checkouts/swift-argument-parser/Scripts/environment.sh -b2ee4f8a445a7e83f7b99ae5d4139fd525d9e3e58a360bfef054d441aa21d901 ./swift_processors/.build/checkouts/swift-argument-parser/Scripts/format.sh -9461213a77531fb3a5742fda0c9024304abe47988bb33852da55e96ae01a382a ./test_api_validation.sh -7cb98fb67007abe03bb57ef58a5e7499ae389693b33a14e015c9ef6061d6b0f5 ./test_face_recognition.sh -46bf67f794dbcd2c191f1933f1c05a1eef0ad3f5bb2e1d64e11e5f23a44ffc10 ./test_identity_agent.sh -7763d5bfbd83ede94e31eb8e44dd0d422fe2d1221b9e112d73fc637f29fdb7ea ./test_multilingual.sh -8a730fedf9252b7ed352b8447773c9c256f064fd64ca20efa05f9021766b09e5 ./test_search_modes_v2.sh -fbca5ba0783153c4e21c174b0cbf75b582514f6ef0f92750a82d3178bc170f48 ./test_search_modes.sh -f8c1647cdb4db8adef1829e41fbecd97f6b3b2e62927f195cd8e68127876069d ./troubleshoot.sh -992296b5218f3ef97ce53325be12f71848f3c3aeb3ee81d764bfe4bd61e1de05 ./verify_package.sh -b6f95fa070cc0258bc5d005f10d13025ba8b08d3ee1598bcdad405ff1d3332ed ./tmdb_agent.py +2bfe6a1c1263f35916d4a28981814515fc40cb473f7bbc801f84842904c888f6 add_yolo_to_chunks.py +f61f7126698018b346c8bafc45501708c17e3b45d9db54be5f0109afeee63176 age_benchmark.py +8efb13239db2a25a728abbdebd92affe685b69402a277cceb0d76e62ed9451ac analyze_asr_lip.py +432b3e3b30578e71ef973aca758bd1964102cbbb19530620df8ac02df00eefb8 analyze_video_faces.py +732609ef1882e14dc7ed60488697f6ae7e2607ec90b240a86ea9e585f052b9be apply_asr_corrections.py +790bd25424e93ca5a0743ea1a740a9a70f6ae6f8a9ca411012eb1e9b03907eb4 asr_benchmark_runner.py +18744dc3bebdce0d89ea7076b5e43febd35ad3c84064bb52adde4d128d50bc9f asr_face_stats.py +1577d055328a73561f9ccfaf0c54727532e3dddcd1bf0f33e3c38081415cced8 asr_model_benchmark.py +fcbb81639f53e9e08bee436853c84d918c0eeac09d985b34634d5ddc00055b61 asr_processor_base.py +25948a204e45ce844d43606b7e45c9532321d48df44887d261fc886748276b10 asr_processor_contract_v1.py +e9209cf028a11bdc45514124826374e58458ee06b054cfedffe8013d751735ea asr_processor_contract_v2.py +407dd0ec772027e0df27af0b66ea8130cb390595ccdeca4350e7bdc210acee6c asr_processor_debug.py +dcee1b80071b47c974bcffe3d27ec2f2269f4b8de7e7409ceaec7e6f271d31aa asr_processor_legacy_v2.py +10728a05a6ff2d56a70bb831abb51e05b03309e45bc5fa068c5a0702a4c73769 asr_processor_legacy.py +9106bfe07de9cfc920f4f4d2f821dc024df612f4c2a8f5f75d35f012d26440f0 asr_processor_simplified.py +7eabdcf7320302ee65c67e801f3ac7ca5801abc76165faa182348d30a8113e9f asr_processor_small_multilingual.py +2714f7be88f286635ea8465daf8fa969e6b27d2b2d1f73ac5e98f5e496139cad asr_processor_small.py +1089ff10b9b0a9f528cac79580aec25e33f8eeea485ac44b6aaf8c7c0cab5b42 asr_processor_v2.py +b9e826f23f080ae67f5961ad750ec2a6834cd18335955c3b3175b8cd06ebd6d3 asr_processor.py +5431b57d4369a841d51a6d6c5e1fb5e6c2932cb97cb4601f5e1b41ffe9f7ecaf asr_side_by_side_comparison.py +6c11efc3d40e559bfbeadcbf4f51eb353b744cc4f765bd8abc472a701e3f33cb asrx_processor_contract_v1.py +93501463af84d6541405057da3783d40492aec5e536b4210dcaffe460cdb5503 asrx_processor_custom.py +6adfbee842d134b9d180e2d1104694ed5cdc1fa4febcd0c502801b8f87b3ce66 asrx_processor_simplified.py +60fc3465f9c461583f8d0b888e85b3a6e04e1f252a1e1c21d036b52e1ce4b43c asrx_processor_v2_noalign.py +82d65b71bd86874e484870c40214d3fbd9343c39d5d635896fb4d257d13a410f asrx_processor_v2_transcribe.py +5a0c9905a2e10c847aa74f108e4054de4704bbafb2004589db15bf33833ea3c7 asrx_processor_v2.py +b16b00cf9e5de96abc512022af9bb81196405b10988f5a39dfd3a9b6471f1155 asrx_processor.py +f11b67ada6167540d2f95cb2af93d0e3a0de55bce659745baa37c4aa4805212e audio_taxonomy_processor_v2.py +ded810b81cda24e31e82de14ba9846770ee2b18d84d52b9d570de5877e9e2513 audio_taxonomy_processor.py +f7c53be5a031a8bff15c3165543586529932d81c4312521654d132b1f0ed6bc3 auto_identify_persons.py +5497a6f1f7ae267c796a398a9f020ea485aa45f980f2eca932b904ad61ce9b40 backfill_demographics.py +39a479ca4f8986f3255b0bcd0d9162a1f2ae339bb4dcf081f931ff9b304797a1 backfill_frame_data.py +77a98d9b7cb97eceae4c0fcf2c353933e0fb36ee7406b57d59b1e216b1a44601 build_docs.py +308c8e3f3d45ee273504f9f415eaf6c025f06aaf1cca33156a66431ed6e64f43 build_semantic_index_poc.py +4eb37768edd252d94f0d751f219c317e905bc093f414b2a6350efb8294131138 build_semantic_index.py +debbd058957d09c2397f3f4c028edaa0a658002921dcca95eae2a20070ba95fb caption_processor_contract_v1.py +7236cdb5deaeada266cc246ee11380248bb9f2255888c25a152b2f6ab1f981cc caption_processor.py +e73cbb688dade5c5b6fc4276f0c78b377903ff83f3830b63d8bcdacd8da8aecf check_all_stamps.py +7ecdbd4b1f94be8ebab9935ea210a868330e7030b6e19c73229c579c1189fd5c check_architecture_all.py +7179ed1a87241904af29542f9018398f8afd9b9dd89af7bb11909310ab7b49e0 check_architecture_docs.py +7e6bd7d14582e494baf8b28354bbded3f79b43f0bd271ab33874da55b9086311 check_code_document_consistency.py +5ffca7c55edafad755e84499981553fcb48ce6056ca7b04130acafb9e6a9b1c3 check_frame_112_36.py +f49c7b0cfa53b657f69b2ad97a6e18393741cc2151b32c9d7dde2e078b75953f check_frame_91_59.py +d2cb7475262ee711a4b06e53559f0927242be4a924a56e7fe212225f318f4193 chinese_vector_test.py +ecde3d3df773916f62de4e34f8d8693feaedf112a3ef9955e22417c8421722bd chunk_statistics.py +2588ecf27c13020d894e46ba70a76de89f09556b475f555dae59db36da0b90a0 clean_sentence_text.py +98ab1129032f42fddc020f9b3492d1fc133851d1af33ddeb57e2385d88425af4 clip_logo_integration.py +bf6f74c09b8f8c7f25c5fffb9c36f16a8afb483a7b65903cfc75e2ea641bdf49 compare_asr_content.py +1f2caadcded724aa04a929018a35ace53dd79d172f5ee2720308fbd4581b0c6c compare_asr_models.py +1ed8a9530f40e304b556ff76c7cac40468c86a0cd32ff2a8bc7bf2a69669121d compare_models_gun_test.py +6bf790fe75a7a2a5220052ca14c31e90a97eabc4558cd5e9059280913862a81e compare_search.py +875e7a598982c8ad7222a51b7b147e91cd5e1a930f41214b3942107cb932fc5c compare_segmentation.py +e432b6f2364d5a9aaf207a1de0dca3fb14ab8d118c53ee34306abfe6fd211ba8 comprehensive_search_test.py +43df85cf860ac28e083de35b511bb2a7b91ed48f596757f52f19487768987500 coreml_embed_server.py +9149ccc8de5adfec69c6f3f2ec502ae7d5e7844518a228ba587af2e08cb38805 crop_opencv_stamp.py +fc36ecbb1455d959456945266e193b601a29c4210b4938a3f0d4a9aaf44b5cee crop_real_stamps.py +34a694624ce94d916b06a847bc4d41e7665985b85e55a626a4bc3a4370c21acf crop_stamp_112_36.py +27099dc9c8ee52a6949ce18c505089afef1720fe70858b90d0801972c3b43fff crop_stamp_closeup.py +01b5a3b091ebcffc0c1e2637b7af8192ba597239fa80d152738e3b8cfdf8174d crop_stamp.py +71b2a362b5395c6e4d70e62766820db92d94eaf140d98eecb2880bcd98d55be9 crop_top_candidates.py +60f18c5fa03ffbc80c209337cd1c8b6acd0b8471e600119340aa8cdfeef14f5b cut_benchmark_runner.py +deba86a1645ca5b1acf413dd9edfad77b93ff213897d739a32de1ba629bfce52 cut_processor_contract_v1.py +01024f947f0326c124293a30e4f2cdb859f21cfb2d4c07f9c1030e2934f7bc44 cut_processor.py +ff092ad2373b57321f87d1dd123fff8a99c8207057591e8526e56cb1424d47c6 dashboard.py +f184bf3e546db0253ffb71895e8d42aeb06588c71c4914c2fe656f42ef463c9a debug_face_registration.py +a9acce1ebd6ea821a8dc5009b8fc40586a98d31c23e93c97fd844bdadbda4ed2 deep_analysis_112_36.py +7767ee7455a956d14d286ad558c4c312c2ad3ccee1c73adc1bc8f761c96ad72a demo_dashboard.py +425290c12161c5cfcb0c505a737ba3951656b39e425e792919d4812e15b9b8e3 demo_face_learning.py +d7e3e27e6a65b1fa62530ee954c227dbb4f97593c5a5dcc48b39e5ebae4656e5 dense_scan_traces.py +df79b7fc7a03a8e754de5123a23bb33b1d5c23d832adc1886fb846ca517dd24d detect_language.py +f6f8047e24ebbec81ef27dd38f4242e63385f8ebe5be471cae156b8aa5fc4477 detect_objects_keyframes.py +e61d2ef5043bda3674a0050d83ba3bc6a70c47f54e456124a736b4328f0c0638 detect_stamp_shapes.py +f23a382113e9c7de2ec3b24e95160daef48f9336ae6d4ec9ee7a18f4bf529f6d download_places365_classes.py +a747e5e17960b972549714786bb9e28ea578e10e6c80788e298a0149c970bcc5 embed_faces.py +f1a2b3820e1a763eba6d8d905a5bb87f5a9b4a2f005e709e313bb7505ba7ddaa embeddinggemma_server.py +43c540c02c1be992e7d44ab4fc76a759815db3ed5f25bcbb594328b50ed7c73b export_file_package.py +19d23e4604d5532928412afe4d5d39ff49194ab4a046825286ae1be154326a1f export_file.py +5f10bab1dcb0b5fad233a74069f9e2f89043e7c848c9c38ae7e2806e6940c75d export_identities.py +2a1d0a1b853fd2c28f9a404871d33912f93521358576833be0999271bae02bcb export_person_thumbnails.py +a81bf1d6af78c052e638f5d5677b4edb512d0de5441025d86fd970d3e7993922 export_sqlite.py +2fe8c0131dde21382cae1483825d489fd467c2491a0cb91d5c1881df2e402e9f extract_face_embedding.py +8b5cc0ff437fb4dd0df28b7b20a78469cdca3621e2eeb4b6d46ad2391acb0596 extract_female_faces.py +bdecbaf0496bf536dce2ef4897f7090749820d15dcca03492d4d736ab0f8c6c5 face_benchmark_runner.py +22319a38bd684fb235fec681ddc60f45821e4bb2181f2b31fdf945f7ad9a1b85 face_clustering_processor.py +5adce4e444743331fa592e13d71e52f26554eadb9744d350a7654a449a8fb8a3 face_count_comparison.py +3574454c74eaf11021f9052f77d93044cca4ae0285d0f2630b4016c2ec0df783 face_cross_validate.py +4f09b3b66b14a5eefb14fcf915a1ad1e9147010f6ae7671731566679b1cae461 face_embedding_extractor.py +d05c65221cbe787e4e29a4de1966edb9e89fed47e9e89c9d065e1d5cb46cf178 face_landmark_qc.py +28776dfcc6ac40e9481c25467438745fed60fecdfd4fc19f9f4c7396397591a7 face_mediapipe_test.py +f4d1b4334a49357b74b80e390ad5a3d16263e51cbe5cab661af92bd2e9721f02 face_processor_contract_v1.py +802015c73dfce0866f2a0bc94c645aa35ba30a6de78244af23090bb1f1828c6e face_processor_mps.py +96ffdbde3f4d87e9942f9e1f4c93cbd999dc404b43e00d4cdcbb22de3c0f16b7 face_processor_optimized.py +4c3915a7465f524e706940c9813614ec4920cd6f8647602ef32e88fdbbaf8fc0 face_processor_v1.py +d6ddad29a5e53b43b887554072d7965f0535e47fb62dad1a8b87e44fa1be6015 face_processor.py +8edab61189ad1a8fa60c203077e814e82d46c5bae67054fa2ab1958e199c05f9 face_recognition_processor.py +9ea19f357b3fcec6c8b3875c538e53cb46e407ab188cd544963e0123e535fa03 face_registration.py +72648816de611fd9b84d2b98c177b8b4f24374024b69184e8151c06cf44d633b face_statistics_report.py +499f197a06f50839ebd5350af380fa56506ce08f073ba40c0e863b8e02b34133 fast_face_clustering_processor.py +0191781635b98d0675969fb87733af19525d7b5c148723346c5378c08a00fe33 fast_stamp_search.py +00e7e8ed06f6a0f2c46c84a47d7e7f5d366acee941d546a52c4b1b7885c71e08 filter_stamp_colors.py +5341fd648cffafc77568070313b06417636943d50ff3b4380a61381260acaafa final_face_validation.py +213793ab719f4ef42ec9b22f351dd86d4739211c17be486a46b76ba7e64fd8f1 find_blue_stamp_opencv.py +e1490317c0f56b895f73cfbb6f57c8e3ea5c65304bfdd7663f103f6b564e148c find_kids_pose.py +08d4cba0650f6a22fc134d07fd15fe8784c8472c3ba687b587e31e0b980e2b1c find_kids_refined.py +aecec0784ce5d0e98176c15798f05d4f67ab6a686f9ffafba71fbd82157027f8 find_magnifying_glass.py +620db08dd84f00af0c6d744dac54c68360548dd5b2cc26b12ddcefd936239b2e find_pink_stamp.py +1f4555b3578f4dc6bc08aa37e34eda1d91ea25d8134439771678d1a57bfdaeb9 find_realistic_stamp_opencv.py +277aa3b48eec2e739de3bb95ef501ffbd24104aa2a1bdef28c844ef44fd75013 find_small_stamp_opencv.py +fc73bbc9605938db495bd33ea74955e454e9384130531a16d42f25dbd9b515d8 find_stamp_in_hands.py +c6ed0f12e78c12df977ddca5d699f58edb174b47199f584e7a24dbdc3b7d02b1 find_stamp_in_magnifier_scene.py +ecf12e346619c27a985452e9f84ee262c2da25de9df0ff6e0b293279ccba559b find_stamp_opencv.py +4ff93cbcc781a5cff023f78006f1aebbe2d954405ae7d00a473fef6b41b2ebee fix_asr_text.py +4090cb892115843a909aa41426c0f39c5a53d8d88a5db69499ec8bafcb780d77 florence2_scan_stamps.py +e90e4447db3328b64a2062ca13ed41f6a045220d8fb640542dff5b790d3c4d3b gdino_comparison_test.py +7071a9999057c347e2275381f1f0c58e19aa8581d70a572d3170ed14a295a48d gdino_frame_api.py +891410310b415ff68a0f7ee0aa39e84eef7f2c75887487bdb88b8f4718d40e94 generate_asr1.py +24efe7db016387b40bd9caae449f0445a3d47eb878c00399803bb6e78e6dd5fc generate_benchmark_summary.py +dc956a78a3ed26686f45dd6d6d9cb42c023751fcd9b8789585450b6df63670a1 generate_chunk_summaries.py +8a0922d75fdc7c5994ebfb31881d765db4b105cbcddfcaa4b4c49d11950b8df4 generate_chunk_visual_stats.py +4860bfd00cc6c1c842c2f8e17e725eebca191d81067af3cb5a28661b45d74bd3 generate_parent_chunks_gemma4.py +e9fca223a8329ff6bdcb8552fecedb2d8b4607c6516c373c3023f29edfd42e06 generate_sentence_summaries.py +cbae7c3e85457274e8c284005196c39dc97f9d9200ed6b0e4ea266e48a381d3a generate_synonyms_llamacpp.py +57512cd7a5ec2f52813717fd3d81dec1aaa69dc9c91a9edbca847e7012b1c86f generate_synonyms_ollama.py +dc495cb8127858fa03a5f8b8bb4a772c5934ada1abecf97459bf71de80417672 gun_detector_scan.py +1a7cfb72723b3b94e3f4fe368477ba693ac3d20ac7af7351962bc548c700b451 head_shoulder_bench.py +b2fe8e4d8d7d1057ba928fc5e190f4a06cb60e83e2a02c5d7c423791596c11b8 head_shoulder_quick.py +ba5e67a97cb465e6a1a942c2f7342406031759ffcea2b897ae963bee4bc551c4 hybrid_stamp_search.py +f5847b6c8ed4c7c51290df9032d5a192317b5f03b5ff418ead1181a6e1b655f2 identity_agent.py +12237fa6cc5f0d2dcdd05f26fd50c0a7bfd541d1c922a1640d131fa0c4d6f4fc identity_bind.py +046aa90eb4a4b830910912362a9865d1e6170f5bc176fae42be630f967f9d3ff import_file_package.py +7cc260d4411ab13559803686f8b645afa07738d652d9459830aecac268597fa7 import_file.py +071e3a5141d04cb9e6bd31489a835c778608785896b18ea7fa65e8db9f1547e5 insert_chunks.py +d3d53f44daa7f1526488677b141e90fbf4aa5625369b96a3ca275b802414802f integrate_face_asrx.py +4cb6a93ef8006cb69e8bdb1bc72899ee9bab1bf7eceaafe9896923bb7023bbd5 integrate_rule3_markers.py +75aa3e4bffc9f9cb8b9254db19095c93c3efb43d465fb5dcca8c7b9b730f5c59 integrated_body_action_decoder.py +f4dd2e21fb6b668bdf0c51cc56e214188b46937b96a2b4a10d13783e171d0472 language_router.py +bef426641645fcf7dcc68c87e3325a6edf3f70925febaf1df84f7c6ff87681e5 lip_analyzer.py +7f98b0cc8379b3759cc7e805dd56f736cc518093e83f43b2e5ecf559a19b95f0 lip_processor_cv.py +a1473eeba17fce25e4678234fe4e8793a132514e0566b03b36a0bec04eb93acb lip_processor_media.py +0df61396756ee22d35356776c189b354458661916c8baf85bcef97c9f8b62ec8 lip_processor_mp.py +3202aeca29e651ef1a54f47681c6b3b2d0680555fe3c6d318a932bb12b49e58c lip_processor_simple.py +fed15bafb5e09715cc03962f465b2ff618bf05ebeafdf932643690c9635c9840 lip_processor.py +b9532949bd145c0411876bdf3a8cbf1540b4233f7585465ce6389928e1bfd908 llm_metadata_enhancer.py +1773054e8d563b493865880d0d8bda105e3eb6fb536a25817517237b3bb76afe magnifying_glass_analyze.py +7d4d048c452bf273f4a6d96da13eb7bab6aa60ca9dd51de5ca0fb0a01e587b13 magnifying_glass_extract.py +8528bbf89d2770fa5a23f461274038898be251fb6e48c5d3adece5aab3bf976d magnifying_glass_owl.py +cb645f5e29ee5a36b2f97812039abfdaed7328386bcd25ad7b742af6a6b16399 map_speakers_v2.py +a90bd3fb729a05010c29a213134c60cc0bdd17769e27a7d3f1250919b7bf1613 match_face_identity.py +2d864dc831c2fd0142b19b8ad2cda169c2a05facd9662d31861d29bb710c4979 match_face_with_pose_filtering.py +889d4853707896885ed96ab945d4266acb213f4b122e2ba7c4563eb0e3e9e865 match_identities_to_tmdb.py +b34ec373bcf65139e08e41967f58a2fc8ebb67a59c361074d3590cd16541415a match_speakers_to_chunks.py +fe6260a94d01d8b43d0d3b59eb820cfd7b4711c907343a1261c69f9010ae990d mediapipe_holistic_processor.py +bb36844b4d13bba8edc1b7f0703f02081b62bea795535b8cd8dcbfdb4281f402 migrate_asr_to_children.py +819312cbfce6e68a0d8d731e02d283946f79de6044f207991ddf9a28ac853d79 migrate_face_results.py +c3d062aab67b5177ac7bf2c3ad2f0e578e12c9893e377f68339a17cc2783316c migrate_identity_files.py +c418f6e50054fa7eae1d0d879e28997b98f57437acec48b53ecb09f332728867 migrate_to_4188.py +6f60aa899e06f05e575cb5b461ea517481119cc32644566245d74c96eccde722 multi_stage_stamp_search.py +b24e2289c00f803c8339f59c34d44ed6c53a3c19dafc13e72c4b260d6bb312a6 music_segmentation_processor.py +da2546f84d0dbd711c8800ae4e32e59d9c38de9e62e1b423c4518fa1fda1dbea natural_language_top10.py +78c3d1a9302dbfacdf9b3655dab07348957fd9dbb4af94aae83eefecd5343a33 natural_language_vector_detailed.py +e924f04d68c9a8211ad373da811aa6671d2c5654281c1634dbf8b1e5e5b51533 natural_language_vector_test.py +df6ac92367b1afb50c0af958e362d87555fe569f608a8d213e0a593e2a43cde8 object_search_agent.py +fd39b779a0337f521940f3f7b159931f1f207f200eefd610183781fdcf3dfafd object_search.py +42d2952fc78b57302b0d12bc3d45790a2c2c46d4ffa3c713a82686134bd63f13 ocr_benchmark_runner.py +7b3ccb5c4ddd4c62c5ad04d0e3aafaecc2c1441012b6a98613cdcf055e2e50e8 ocr_processor_contract_v1.py +271023eec42d6be4a1ce6ae2ce3f29e825210a57e6bb37554a6f7fdf54616f9a ocr_processor_mps.py +2e73c41285e52ef013594fcd4d20df9f5781bfc26bcf62e54dd2c04ec44200c3 ocr_processor.py +62196108cb3337b5f9a873d70d2981ac8f49152369afbcc8a12b3a13de579e80 opencv_stamp_search.py +b2e8d552c272fd173c77693e9453a85fe16dfc12f7c2cd304d299c6188c14077 paligemma_vs_gdino.py +1534d5b7617dbae77f7a37a2c33a89b90f965247a6828f00b73ea6b720f6f4fc parent_chunk_5w1h.py +5208c738d4b615282813d351daf09872ce516121bb604caa64968ef5e52c53d3 pipeline_checklist.py +8f80c3a2be5c330e2d1853d9250a171c75db84598dbf3304280c42237ed4fb1f pipeline_status.py +94db44c0f49115a677d117d4901a1b7991c1517905300eaa495dd62b8ac1c79c pose_processor_contract_v1.py +167dee5e42c6bd46674bcffcfd92f368fc0b48a1f42c459c806853b281bc6482 pose_processor_mps.py +a6ef3a785ef5c6dc47fa38dbed80d76bc7d4bf48cbaf0f7edb3d26df98d7262c pose_processor.py +45e6798dc5900f2f7c8776a2d260c122aae5068a075256b8a5c02e8d0be6c131 probe_file.py +01c7b3c30c1531224f9605f0ee633285fe8489ab2d0a3c9c6a41f2b2b60d6626 quick_stamp_search.py +e3143673a2bff6139e05c82446fd8770c4b7e59a854a42c3b29662f5ac75efe2 rebuild_parents.py +4aa98981632d4f8a11039c510e86aa296ae1cd4b399fc871ed664ac11e445bd9 rebuild_story_content.py +090137a5872edfed1b89c97b537d13ad8aafda9a705ebb4c54f30352503e5e3a redis_publisher.py +750f778946b56bc57c47d9d2295332bb0f8cec2c1aa03c6b882d39ef4432673d refine_search.py +0f8a6a6866a5797e964d3b17e2b7ef146fe7a798f09fcea982fcda6f629b4d06 regenerate_parent_5w1h.py +3ee192b623f290136b36bd63abd018aad6e6639a9543970c3415734628b33bd6 register_sample_faces.py +334782f0f66d0ad3818a51adf6343186a2de65467378ab68a81ade806e496af9 release_manager.py +9a44cdd155953778b52ac0cfb118504c56eb6b1141984365ffbb717e28f3e65b release_pack.py +3906b48f3a7764d19605def2bf8ef84a54a6afe64c9291a7cc0881a91472a826 render_face_heatmap.py +44e432c31a35211a37dd26695772b7e250487ac42ba4f16a56f843277c2fabbf render_offline_report.py +3fac1e6a4125042185a2ce82771f695c562b3137c7aa58a912bada00ad8ecf78 rescan_single_frame_traces.py +9c3212cb455c2a6230be918448560fee00c153a8956ffd04fcb62974d5e1abff resume_framework.py +7c95ec08daf4f980bd53233503b7a4fa01afc08660e8fe8cd031ea3613ead8f7 save_events_to_db.py +24795e1531fe05e33d515104e4fb2f9567b46d802ef1b5a38f11268cf105be76 scan_charade_stamps.py +cad2da5073577f851c5cb2abdbd7cab05b39caa0d1179ccc89c378a7df2736c8 scan_full_video_stamps.py +03ae71470331fe5b7f8e394f7f789eee08cad4ed5ec9196b46ab2c9dbefa7fec scan_handheld_objects.py +d3935ba498786cf260d9d5370ca60d3af7bc4fd438f6be33ce23cfd0b7bab593 scan_keyframes_opencv.py +12c9b35212f587f5adb37584bf3c3844804d2bc642ebfc5d82b86b44f46d2472 scan_keyframes.py +f386130ac203308c904ba7efea09ce0ca0d640d36762b113bf0cfedc24d7f885 scene_classifier.py +482edae04e5467a68c77729760db53d3653e8d7654fa49e5ec9a36f1f8f22616 search_blue_stamp.py +e3786422932138272d1096ad4c800594e62c9640952a286a9158372a1e5443e3 search_envelope.py +2df1e259c2e52d10d79b20856cb94ffff5a9bfdbe47cee587b1148b2f1c16101 search_objects_in_hands.py +9fd49be8ab16f94fd82efc5ae035c029372a7ddeb7fd779b557f1917cdc14592 search_vase.py +7a6d8e7c435368f6218db972c04a7be16d7d6680d8d4374f82c05b7162716b9d select_face_reference_vectors_v2.py +2bcf7c1b3c407b51a134a5ee4982713f0ea387cfd6df01ed75554c94603971a6 select_face_reference_vectors_v3.py +d52098fcf1f9f7ba14f31a9a90bc5b3bc933e1a5e5697e3d09eff389c153cb18 select_face_reference_vectors.py +a02cb37639275d86ae0b4504d21f50963b45aaf94630c59472ba30d07722e50c simple_api_test.py +02516ab1616c1756c4f8041f48ff12811cc5d672c53b34850b84ce682fefdff1 simple_face_stats.py +b024d9bfe244d0d058daae0acd314b9344d6f0912e4f3b02dbc618f9fe3e4949 simple_test.py +af8703506769f3cdb89ff7849b071c2421307717850596dd86d2fe0b053e7809 smart_stamp_v2.py +5e5f86d47ea2b75bcaa8662689f73af1963645149c0da688dc43482616aa4e76 sound_event_detector.py +bab7697e4b4b05e93babc116e0c5b13cbaf1f4d419a65acd5dc1de5bdfc510dc speaker_assign.py +381ff240ce806ead7d6463ee40c5b830035eb6252180b4b0901b3c8313fa4bbd speaker_bind_lip.py +5eede29fa0966974c1943792d7fcca2dd9179d4f23570cf1a3964dc97bc9ac1e specific_stamp_search.py +d5363d832272bdb3c1d6f6d93eee7b7894893b9164a3f5ad5fa08a4a0eaeeb47 split_asr_segments.py +8e1269f173f2c72de78857c2d83d3111b62ec89bd79f4fb00c3f57390986ae4f step3_asr_fine.py +7592df8be5dc58376b33960bfa7fc0003c51114b70ebc01f1589f39ee9568d3b store_traced_faces.py +7ac32c1e2146a19e6654ab3e4bbbfd42e1a6540fb8717d40d55c61e9f5d1bf71 story_embed.py +74cc24b328a075f48b1f44a465611157f44eadc8f5dabf6d95cd5cc5f80dd9dc story_pipeline_full.py +97628f0f1270825dabafdf0a69f10ef12c4ffe2be4ac12941315f06bfb084e7c story_processor_contract_v1.py +1b1f42fc4bbff26551f26f4ac1e8a995dfe3ff98b940a29c9e130410965d0fa0 story_processor.py +cdbc7ef88551e2b3a3771eac5be5e0360989e71fa009ac28c97e548507e08a5e sync_face_speaker_to_chunks.py +8b08e9a33f5917aad10e070d6aa48805f5e7c23f905ba8fff3b8697b2109d962 sync_to_mongodb.py +869b6c56fe16cbf8973826782a17503f02b5cd757ec025b944da693d38bdb4cb sync_users_from_sftpgo.py +f64cc6dcb72f54d3e97aa981b40591aef4804ca769e1f14628d901b98bc6aeac terminology_manager.py +455546b9bb3a2c2c877c7720229b254e75b28eea33b3715d1731c02ca85294ae test_api_correct_usage.py +b03dc1bbb091672e7da2b131850b17badac896b4fbba92fe9bce76c232c99be4 test_api_with_key_id.py +7d295c77d5bcd4c72c5673370af48cc89bbccf9292c3b82aad3a230d242547a9 test_args.py +f474ec88e6634decbf178da497443fa709096b174bb4a4320a07256f516b1044 test_asr_large_model.py +aa952524dd86f346740ffe555075b74adf2e60bb822bb04a943a51b1fd262445 test_birth_uuid.py +db87badad7948527325a528400d67a4eeef76abf8d13f5c4254c812e944e4e0c test_end_to_end.py +e191c98a82f7e089f7dccfc4c536244da2bf14339f982a3afef05d33332c3755 test_face_api_final.py +1b97c9aae2e1744aa7aefb192eaef86c64e6134efc8f08ffa9a274bff16a58d3 test_face_api_with_correct_key.py +f7e4078f31b1ca8494c18878219cf2f90c301f19fc851b9e7084657b71a5e150 test_face_api.py +9eafc49f8fa42b4cd58109e9b725b3aec3b06943ec426919b1788838ccf1ed92 test_face_db_fix.py +38bce82b167e0c97b257cc6b955fdc2e9ded581ce2d39eb0fd2c60249275394b test_face_direct.py +24e82bf0af82407e6c04361e9a671770cbfb0b05d92df589bd0d5a0118bb5a98 test_face_learning.py +8dcdb144c4253fbb466f220359b42c2a9579193865e320a56e682e384c2ae176 test_face_recognition_integration.py +b921e3256fdea176d4391116d1ead472c4f3ca8aac6999140367818818c35ec3 test_face_registration_api.py +9af6c6ff0c766b3de92185c3602f2b8b62b815bf88dcb0e3251c2676e61e0a48 test_face_tracker.py +4f70eadb6a8b80eb8febe32b17b77e58d1a4823cc5d598e5ea45555342d2d4cb test_florence2_direct.py +0588be0acea540950d737943073f71e769b6301374eaa4ff7fdb96a80145c4e0 test_florence2_pipeline.py +694c15193616157ddae4bdb0a45feada2a8f8490f01d290a28aa77a4b24eabb2 test_florence2_stamps.py +2c281f698616a83e9eeccd610555d9f9ab657b2deac65ae9e3dbfba0b450d9b0 test_identity_db.py +7a73e8314ea7e91ca9dad3867a83b9c1101fdab09bdc0fdac0f798d0a7a204f3 test_llm_capabilities.py +68300f87b96a474f06a3071a833e6b3ae48d1db5fb8a7e5a3ec1834fd878d808 test_multilingual.py +c17cdd0f4ffb7a151a634add08d13cc576ba7a848bb20f54fb97d0c1d9d81cc0 test_object_search.py +d07bd363a2878259fbf4ffcba40e367f7f1bf4171b5a5dfdda97f7a53b450d0e test_ollama_feasibility.py +8421003b1f66cbd21c6fe5d3aff0a526897753e959b23905ca8f502f644f66a5 test_owl_vit_debug.py +6f9e8b7947229ea4aa0a62b59bda5fcec05bd74f6c00dc4a7b06d932bd1b730f test_owl_vit_stamps.py +da91a7c97466ce7f03cde13aa9bf6e691b3e482d2cac74519a2e1a61a2abb05a test_parent_chunk_generation.py +19d9f2492d3b04b7dafa008f106767d3107dd36b0c8e4601765dca30131027cd test_places365_scene.py +de44553023067362e8b2223f03e1bff55fcbd2f11ddf3d01060dc02c4675a744 test_probe_file.py +c0e987ba06a61cc0426ffbca8af1eb51a97bd79acab59b70453cfbb18eaee093 test_processor_performance.py +7b4b55e23dff35ba107b3da5b0560d03b1b41dfdea1d3a59eac777b4be4d4033 test_pyannote_audio.py +5cb8b42033ffba41f25e7ef74ef04cf352c0c277a9971e9eaef53fd673902712 test_pyannote_multilingual.py +8580e689ae148754e03d958419e108241040a012584ba49e8a90db114a9f8c13 test_scene_api.py +1194d450070b1f42e045d98e532f41205bb3e52fc48ba26e7c9b72a188fe1b2c test_segment_count.py +147bfffeac9561cfa407207b04a825862ac623ba97deecf5ed7c6257432dc62c test_speechbrain.py +22e4b865bc769329c1146c2f914395044a9bc84cd2a13acf68fb374a57fe1e3e test_v2_detailed.py +a616570a2a080b5b19f4bf783877147e714a014103b274143dd37984a946ca08 test_v2_model.py +7b83611f6b3028500c91c62197f774c0769e299136eca8dc4b612a7b5743e3d6 test_v2_with_text.py +1dd983c78074a61ceec26d7e3623d40772ca55fd6ee63ba368afe756c66ae091 test_with_real_image.py +1b738cc0d69d33e967cbb775def0a7f58dc02f1911404af56a5825bd60a5b75b text_semantic_analysis.py +a4221417ae00add76881c6c715ee4257c263e2dfd0a846a8887738682dfe8cda thumbnail_extractor.py +0d188a738a0df79ead10065d9f17c366fe159c862bd4bafa2860d0e6ba2640c3 tkg_builder.py +a084d3b5840e920d552515febffa22b34943b9efa8b73adab9cd193372e71592 tmdb_agent.py +8b97f0fdfc0899460bf23d420dba0a51a34737c74ebad0519856909d198662bf tmdb_cast_fetcher.py +4858909a0beaf8397becf4103be17fcc350841217afcdc1d917c48c512a9041b tmdb_embed_extractor.py +54d8321dfe0f8caa669e4a9d1b48dc772a5b25817eab95b552944140c91f457d tmdb_identity_integration.py +2a84aa2dcfb83ac385d2c394f884926f306c81798e4277a26dbd1f3c5506be46 trace_face_aggregator.py +61d3b4b362722ce24326a204f1b72cc7b1dcc20cf3264a4f526d4ea343a8d33d transcribe.py +ede9a184fd51ef4c87eb3e2541f09b91739a49986cb588591a7c6fbb33433020 unified_synonym_processor.py +a408f294c3a71eb6a0eea80b9b586f73dedcefe286c62233f713a7428a9979be update_all_demographics.py +e6520bb10ae6835ceade487ceb5e3fa549ca6f06de35b2c785d649921ef443f4 update_fine_speakers.py +a2191daff2ad228725b6a66f0e472ec659a6b4fa8f2cbbd74d1bf9c35cca63eb update_person_demographics.py +1a7dddd1db467990ee1c685d61b971babfa30c3ae3a754b5df8f3b4c320f3ed1 update_qdrant_uuid.py +60060753cfd2a6d1241e55bf40a0c74f1df15739656d0349e22e8543036b2424 update_speaker_assignments.py +fdc61009c351263e0018801b32ad90ffd8919af611a2a0580546be7fd62c99c4 update_terminology.py +4840c11964a59eabad26b97fe01033ccaf7903e2d24edd5e1035f6dd5fc995ea vectorize_4188.py +078979114c5f248d2bfd43aa8df55235fa03ab812f26998b984cd485a3d2cda8 vectorize_chunk_summaries.py +ff98864f1b11795cc3bb64f30ccb6f8609771ddc7a5df2c003ba7c2233d16fc2 vectorize_chunks.py +5880c128400e6e36c8eb7dffd009dbbc99dd13f8575b0037bdc854e25ddc41fb video_comparison_statistics.py +0a1501ffdc027236cdf88706b3d61229e2998ab268fd57fb60e399ccb734b6a1 vision_agent.py +eac8f90fbbb655614abcefc4b887e346bf94db5f015d33d37bc9514fb030489d visual_chunk_processor.py +c165dfc5fc981dc731b25ef414184ee58e56b73b148d41a32fdce985c701efd5 visualize_stamp.py +6c65a82fdd1d585e20bee4fcb2d1bdec2e6220bda71d6ef9cd00d6a3cf74c4d7 voice_embedding_extractor.py +2b3a7b357db4ddd07ca30bf200c6600724e33441d8def0a4d9a39673e2cfb1c0 weather_sound_detector.py +206b61ebf3c91d7ce3f1488247b52aca6e955042d8aa979c59723e3ff10dd36a yolo_benchmark_runner.py +e8cb0963c90fbd1c2aa91141f80340edd3c9560d69780dd825d107c6ed14fa64 yolo_count_comparison.py +dad775ecdca0144bd14b7abaa7ec8fb213e8b9428e39906abce541e93db496b6 yolo_processor_contract_v1.py +74ff880e664ec514223a4f220b682fbc87089f8c0851c93ac68c97269b8a59b6 yolo_processor_mps.py +8af0a6db683b6626e07820b302135ac5960d38e3d4b3d187c640b23ce8a14f72 yolo_processor.py +e13cf22b9aeae96c7e28b4512dd2137743a25eb59027da446966c1aaaaf4ce71 zero_shot_combined_test.py +f4aaf017ff588999f06cd9ba1787517e06c6d6e6228a15a54d8aa4f54fde5eb3 zero_shot_gun_test.py +0a285b8ec33d7999e9d4ae8d43ce768c9f06ee1929e13a6809e98bdabe6357ce zero_shot_objects_test.py diff --git a/scripts/embed_faces_only.py b/scripts/embed_faces_only.py new file mode 100644 index 0000000..fcbedd7 --- /dev/null +++ b/scripts/embed_faces_only.py @@ -0,0 +1,136 @@ +#!/opt/homebrew/bin/python3.11 +"""Embed faces from existing detections JSON using CoreML FaceNet.""" +import json, os, sys, time +import cv2 +import numpy as np +from pathlib import Path + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +import coremltools as ct + +FACENET_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "models", "facenet512.mlpackage") + +def classify_pose(roll: float, yaw: float) -> str: + abs_yaw, abs_roll = abs(yaw), abs(roll) + if abs_yaw < 15 and abs_roll < 15: + return "frontal" + elif abs_yaw > 30: + return "profile_right" if yaw > 0 else "profile_left" + return "three_quarter" + +def extract_embedding(coreml_model, face_img): + resized = cv2.resize(face_img, (160, 160)) + normalized = (resized.astype(np.float32) / 127.5) - 1.0 + normalized = np.transpose(normalized, (2, 0, 1)) + input_array = np.expand_dims(normalized, axis=0) + result = coreml_model.predict({"input": input_array}) + emb_key = [k for k in result.keys() if k.startswith("var_")][0] + return result[emb_key].flatten().tolist() + +def main(): + import argparse + parser = argparse.ArgumentParser(description="Embed faces only") + parser.add_argument("detections_json") + parser.add_argument("output_json") + parser.add_argument("--video", required=True) + args = parser.parse_args() + + print(f"[EMBED] Loading detections: {args.detections_json}") + with open(args.detections_json) as f: + detection_data = json.load(f) + + print(f"[EMBED] Loading CoreML FaceNet: {FACENET_PATH}") + coreml_model = ct.models.MLModel(FACENET_PATH) + + print(f"[EMBED] Opening video: {args.video}") + video = cv2.VideoCapture(args.video) + fps = video.get(cv2.CAP_PROP_FPS) + total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + face_data = { + "metadata": { + "video_path": os.path.abspath(args.video), + "fps": fps, "width": width, "height": height, + "sample_interval": detection_data.get("sample_interval", 3), + "detection_method": "apple_vision", + "embedding_method": "coreml_facenet", + "total_frames": total_frames, + }, + "frames": {} + } + + frames = detection_data.get("frames", []) + t0 = time.time() + embed_count, total_face_count = 0, 0 + batch_size = max(1, len(frames) // 20) + + for idx, frame_info in enumerate(frames): + frame_num = frame_info["frame"] + faces = [] + for face in frame_info.get("faces", []): + total_face_count += 1 + bb = face.get("bbox", face) + x, y, w, h = bb["x"], bb["y"], bb["width"], bb["height"] + if w <= 10 or h <= 10: + continue + + video.set(cv2.CAP_PROP_POS_FRAMES, frame_num) + ret, frame = video.read() + if not ret: + continue + + x1, y1 = max(0, x), max(0, y) + x2, y2 = min(width, x + w), min(height, y + h) + if x2 <= x1 or y2 <= y1: + continue + face_img = frame[y1:y2, x1:x2] + if face_img.size == 0: + continue + + emb = extract_embedding(coreml_model, face_img) + if emb is not None: + embed_count += 1 + + pose_info = face.get("pose", {}) + pose_angle = classify_pose( + pose_info.get("roll", 0), + pose_info.get("yaw", 0) + ) + + faces.append({ + "x": x, "y": y, "width": w, "height": h, + "confidence": face.get("confidence", 0.5), + "embedding": emb, + "pose_angle": { + "angle": pose_angle, + "roll": pose_info.get("roll", 0), + "yaw": pose_info.get("yaw", 0), + "pitch": pose_info.get("pitch", 0), + }, + "landmarks": face.get("landmarks", []), + }) + + face_data["frames"][str(frame_num)] = faces + + if (idx + 1) % batch_size == 0: + pct = (idx + 1) / len(frames) * 100 + elapsed = time.time() - t0 + eta = (elapsed / (idx + 1)) * (len(frames) - idx - 1) if idx > 0 else 0 + print(f"[EMBED] {pct:.0f}% | {idx+1}/{len(frames)} frames | " + f"{embed_count} embeddings | {elapsed:.0f}s elapsed | " + f"{eta:.0f}s ETA", flush=True) + + video.release() + face_data["metadata"]["status"] = "completed" + + print(f"[EMBED] Writing output: {args.output_json}") + with open(args.output_json, "w") as f: + json.dump(face_data, f, indent=2) + + elapsed = time.time() - t0 + print(f"[EMBED] Done: {len(frames)} frames, {embed_count}/{total_face_count} embeddings, {elapsed:.0f}s") + +if __name__ == "__main__": + main() diff --git a/scripts/extract_embeddings_from_face_json.py b/scripts/extract_embeddings_from_face_json.py new file mode 100644 index 0000000..d80fe6d --- /dev/null +++ b/scripts/extract_embeddings_from_face_json.py @@ -0,0 +1,121 @@ +#!/opt/homebrew/bin/python3.11 +""" +Extract embeddings from existing face.json using CoreML FaceNet. + +Usage: python3 scripts/extract_embeddings_from_face_json.py +""" +import sys +import os +import json +import cv2 +import numpy as np +import coremltools as ct + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +FACENET_PATH = os.path.join(SCRIPT_DIR, "..", "models", "facenet512.mlpackage") + + +def extract_embeddings(face_json_path: str, video_path: str, output_path: str): + # Load face.json + with open(face_json_path, 'r') as f: + face_data = json.load(f) + + frames = face_data.get('frames', []) + if not frames: + print("No frames in face.json") + return + + # Load CoreML FaceNet + facenet = os.path.normpath(FACENET_PATH) + if not os.path.exists(facenet): + print(f"FaceNet model not found: {facenet}") + return + + coreml_model = ct.models.MLModel(facenet) + print(f"[EMB] CoreML FaceNet loaded: {facenet}") + + # Open video + video = cv2.VideoCapture(video_path) + if not video.isOpened(): + print(f"Cannot open video: {video_path}") + return + + fps = video.get(cv2.CAP_PROP_FPS) + total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + print(f"[EMB] Video: {fps} fps, {total_frames} frames") + + # Extract embeddings for each face + embed_count = 0 + processed_frames = 0 + + for frame_entry in frames: + frame_num = frame_entry.get('frame', 0) + faces = frame_entry.get('faces', []) + + # Seek to frame + video.set(cv2.CAP_PROP_POS_FRAMES, frame_num) + ret, img = video.read() + if not ret: + continue + + processed_frames += 1 + + for face in faces: + x = face.get('x', 0) + y = face.get('y', 0) + w = face.get('width', 0) + h = face.get('height', 0) + + if w == 0 or h == 0: + continue + + # Crop face + crop = img[y:y+h, x:x+w] + if crop.size == 0: + continue + + # Resize to 160x160 (FaceNet input size) + crop_resized = cv2.resize(crop, (160, 160)) + + # Convert to RGB + crop_rgb = cv2.cvtColor(crop_resized, cv2.COLOR_BGR2RGB) + + # CoreML embedding + try: + input_dict = {'image': crop_rgb} + output = coreml_model.predict(input_dict) + emb = output.get('output', output.get('embeddings', None)) + + if emb is not None: + if isinstance(emb, np.ndarray): + emb = emb.flatten().tolist() + elif isinstance(emb, dict): + emb = list(emb.values())[0] + if isinstance(emb, np.ndarray): + emb = emb.flatten().tolist() + + face['embedding'] = emb + embed_count += 1 + except Exception as e: + print(f"[EMB] Frame {frame_num} embedding failed: {e}") + + if processed_frames % 1000 == 0: + print(f"[EMB] Processed {processed_frames} frames, {embed_count} embeddings") + + video.release() + + # Save updated face.json + face_data['metadata']['total_embeddings'] = embed_count + + with open(output_path, 'w') as f: + json.dump(face_data, f) + + print(f"[EMB] Done: {processed_frames} frames, {embed_count} embeddings") + + +if __name__ == "__main__": + if len(sys.argv) < 4: + print("Usage: python3 extract_embeddings_from_face_json.py