chore: backup before migration to new repo

2026-04-23 16:46:02 +08:00
parent 13dd3b30f3
commit 59809dae1f
40 changed files with 5566 additions and 1783 deletions
@@ -1,5 +1,10 @@
 DB_MAX_CONNECTIONS=50
 DB_ACQUIRE_TIMEOUT=30
+DATABASE_SCHEMA=dev
 QDRANT_URL=http://127.0.0.1:6333
 QDRANT_API_KEY=Test3200Test3200Test3200
-QDRANT_COLLECTION=momentry_rule1
+QDRANT_COLLECTION=momentry_rule1
+MONGODB_URL=mongodb://localhost:27017
+MONGODB_CACHE_ENABLED=false
+MOMENTRY_REDIS_PREFIX=momentry:
+REDIS_URL=redis://:accusys@localhost:6379
@@ -14,25 +14,27 @@ MOMENTRY_MAX_CONCURRENT=1
 MOMENTRY_POLL_INTERVAL=10
 MOMENTRY_WORKER_BATCH_SIZE=5

-# Database (same as production, but could use separate dev database)
+# Database (PostgreSQL) - Schema isolation
 DATABASE_URL=postgres://accusys@localhost:5432/momentry
+DATABASE_SCHEMA=dev

-# MongoDB
+# MongoDB - Database isolation
 MONGODB_URL=mongodb://localhost:27017
-MONGODB_DATABASE=momentry
+MONGODB_DATABASE=momentry_dev

-# Redis
+# Redis (already isolated via prefix)
 REDIS_URL=redis://:accusys@localhost:6379
 REDIS_PASSWORD=accusys

-# Qdrant Vector Database (same as production)
+# Qdrant Vector Database - Collection isolation
 QDRANT_URL=http://localhost:6333
 QDRANT_API_KEY=Test3200Test3200Test3200
-QDRANT_COLLECTION=momentry_rule1
+QDRANT_COLLECTION=momentry_dev_rule1

 # Paths
 MOMENTRY_OUTPUT_DIR=/Users/accusys/momentry/output_dev
 MOMENTRY_BACKUP_DIR=/Users/accusys/momentry/backup/momentry_dev
+MOMENTRY_SFTP_ROOT=/Users/accusys/momentry/var/sftpgo/data/demo/

 # Python (for processing scripts)
 MOMENTRY_PYTHON_PATH=/opt/homebrew/bin/python3.11
@@ -57,4 +59,12 @@ MONGODB_CACHE_TTL_SEARCH=300
 MONGODB_CACHE_TTL_HYBRID_SEARCH=600
 MONGODB_CACHE_TTL_VIDEO_META=3600
 REDIS_CACHE_TTL_HEALTH=30
-REDIS_CACHE_TTL_VIDEO_META=3600
+REDIS_CACHE_TTL_VIDEO_META=3600
+# 同義詞配置文件（可選）
+# 取消註釋並設置為您的同義詞JSON檔案路徑以啟用同義詞擴展
+# MOMENTRY_SYNONYM_FILE=/Users/accusys/momentry_core_0.1/docs/examples/custom_synonyms.json
+# 
+# 多個同義詞檔案（逗號分隔），會覆蓋 MOMENTRY_SYNONYM_FILE
+# MOMENTRY_SYNONYM_FILES=/path/to/first.json,/path/to/second.json
+#
+# 示例檔案：docs/examples/custom_synonyms.json
@@ -182,6 +182,15 @@ src/
 ### Server
 - `MOMENTRY_SERVER_PORT` - API server port (default: `3002` for production, `3003` for playground)
 - `MOMENTRY_REDIS_PREFIX` - Redis key prefix (default: `momentry:` for production, `momentry_dev:` for playground)
+- `MOMENTRY_API_KEY` - API key for Player online mode testing
+
+### Testing API Key
+```bash
+export MOMENTRY_API_KEY="muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69"
+
+# Test Player online mode
+cargo run --features player --bin momentry_player -- -o
+```

 ### Database
 - `DATABASE_URL` - PostgreSQL (default: `postgres://accusys@localhost:5432/momentry`)
@@ -201,6 +210,10 @@ src/
 - `MOMENTRY_CUT_TIMEOUT` - CUT timeout in seconds (default: 3600)
 - `MOMENTRY_DEFAULT_TIMEOUT` - Default timeout (default: 7200)

+### Synonym Expansion
+- `MOMENTRY_SYNONYM_FILES` - Comma-separated paths to synonym JSON files (e.g., `data/english_synonyms.json,data/llm_synonyms.json`)
+- `MOMENTRY_SYNONYM_FILE` - Single synonym JSON file path (deprecated, use above)
+
 ### Logging
 - `RUST_LOG` or `MOMENTRY_LOG_LEVEL` - Log level (default: `info`)

@@ -213,6 +226,23 @@ src/
 - PythonExecutor provides unified script execution with timeout support
 - Redis 1.0.x for improved performance

+### LLM Synonym Generation
+
+Generate synonym database using llama.cpp (Gemma4):
+
+```bash
+# Generate full database (162 entries, ~5 minutes)
+python3 scripts/generate_synonyms_llamacpp.py
+
+# Quick test
+python3 scripts/generate_synonyms_llamacpp.py --test
+
+# Resume from existing file
+python3 scripts/generate_synonyms_llamacpp.py --resume
+
+# Output: data/llm_synonyms.json (27 Chinese + 135 English words)
+```
+
 ## Task Management

 ### 使用 todowrite 追蹤任務
@@ -86,21 +86,6 @@ dependencies = [
 "libc",
 ]

-[[package]]
-name = "anstream"
-version = "0.6.21"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
-dependencies = [
- "anstyle",
- "anstyle-parse 0.2.7",
- "anstyle-query",
- "anstyle-wincon",
- "colorchoice",
- "is_terminal_polyfill",
- "utf8parse",
-]
-
 [[package]]
 name = "anstream"
 version = "1.0.0"
@@ -108,7 +93,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
 dependencies = [
 "anstyle",
- "anstyle-parse 1.0.0",
+ "anstyle-parse",
 "anstyle-query",
 "anstyle-wincon",
 "colorchoice",
@@ -122,15 +107,6 @@ version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"

-[[package]]
-name = "anstyle-parse"
-version = "0.2.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
-dependencies = [
- "utf8parse",
-]
-
 [[package]]
 name = "anstyle-parse"
 version = "1.0.0"
@@ -177,9 +153,9 @@ dependencies = [

 [[package]]
 name = "arc-swap"
-version = "1.8.2"
+version = "1.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5"
+checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6"
 dependencies = [
 "rustversion",
 ]
@@ -196,7 +172,7 @@ version = "3.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311"
 dependencies = [
- "event-listener",
+ "event-listener 5.4.1",
 "event-listener-strategy",
 "pin-project-lite",
 ]
@@ -560,7 +536,7 @@ version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
 dependencies = [
- "anstream 1.0.0",
+ "anstream",
 "anstyle",
 "clap_lex",
 "strsim 0.11.1",
@@ -1054,9 +1030,9 @@ dependencies = [

 [[package]]
 name = "env_filter"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a1c3cc8e57274ec99de65301228b537f1e4eedc1b8e0f9411c6caac8ae7308f"
+checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef"
 dependencies = [
 "log",
 "regex",
@@ -1064,11 +1040,11 @@ dependencies = [

 [[package]]
 name = "env_logger"
-version = "0.11.9"
+version = "0.11.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d"
+checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a"
 dependencies = [
- "anstream 0.6.21",
+ "anstream",
 "anstyle",
 "env_filter",
 "jiff",
@@ -1102,6 +1078,12 @@ dependencies = [
 "windows-sys 0.48.0",
 ]

+[[package]]
+name = "event-listener"
+version = "2.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
+
 [[package]]
 name = "event-listener"
 version = "5.4.1"
@@ -1119,7 +1101,7 @@ version = "0.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93"
 dependencies = [
- "event-listener",
+ "event-listener 5.4.1",
 "pin-project-lite",
 ]

@@ -1445,6 +1427,16 @@ version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"

+[[package]]
+name = "hashbrown"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+dependencies = [
+ "ahash",
+ "allocator-api2",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.15.5"
@@ -1467,6 +1459,15 @@ dependencies = [
 "foldhash 0.2.0",
 ]

+[[package]]
+name = "hashlink"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7"
+dependencies = [
+ "hashbrown 0.14.5",
+]
+
 [[package]]
 name = "hashlink"
 version = "0.10.0"
@@ -1481,6 +1482,9 @@ name = "heck"
 version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
+dependencies = [
+ "unicode-segmentation",
+]

 [[package]]
 name = "heck"
@@ -1926,14 +1930,15 @@ dependencies = [

 [[package]]
 name = "ipconfig"
-version = "0.3.2"
+version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f"
+checksum = "4d40460c0ce33d6ce4b0630ad68ff63d6661961c48b6dba35e5a4d81cfb48222"
 dependencies = [
- "socket2 0.5.10",
+ "socket2 0.6.3",
 "widestring",
- "windows-sys 0.48.0",
- "winreg",
+ "windows-registry",
+ "windows-result",
+ "windows-sys 0.61.2",
 ]

 [[package]]
@@ -1944,9 +1949,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"

 [[package]]
 name = "iri-string"
-version = "0.7.10"
+version = "0.7.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a"
+checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20"
 dependencies = [
 "memchr",
 "serde",
@@ -2122,9 +2127,9 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"

 [[package]]
 name = "libredox"
-version = "0.1.14"
+version = "0.1.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a"
+checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08"
 dependencies = [
 "bitflags 2.11.0",
 "libc",
@@ -2256,6 +2261,12 @@ dependencies = [
 "unicase",
 ]

+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
 [[package]]
 name = "miniz_oxide"
 version = "0.8.9"
@@ -2292,16 +2303,16 @@ dependencies = [

 [[package]]
 name = "moka"
-version = "0.12.14"
+version = "0.12.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85f8024e1c8e71c778968af91d43700ce1d11b219d127d79fb2934153b82b42b"
+checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046"
 dependencies = [
 "async-lock",
 "crossbeam-channel",
 "crossbeam-epoch",
 "crossbeam-utils",
 "equivalent",
- "event-listener",
+ "event-listener 5.4.1",
 "futures-util",
 "parking_lot",
 "portable-atomic",
@@ -2336,18 +2347,21 @@ dependencies = [
 "mongodb",
 "notify",
 "once_cell",
+ "pgvector",
 "qdrant-client",
 "ratatui",
 "redis",
 "reqwest",
+ "sdl2",
 "serde",
 "serde_json",
 "sha2",
- "sqlx",
+ "sqlx 0.8.6",
 "subtle",
 "thiserror 1.0.69",
 "tokio",
 "tower 0.4.13",
+ "tower-http 0.5.2",
 "tracing",
 "tracing-subscriber",
 "utoipa",
@@ -2436,6 +2450,16 @@ dependencies = [
 "tempfile",
 ]

+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
 [[package]]
 name = "notify"
 version = "6.1.1"
@@ -2492,9 +2516,9 @@ dependencies = [

 [[package]]
 name = "num-conv"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
+checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967"

 [[package]]
 name = "num-integer"
@@ -2669,6 +2693,15 @@ version = "2.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"

+[[package]]
+name = "pgvector"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ed92bf218dbe236609222dca0345767408ee7d5c93876c7fe09fa9b03f7249f"
+dependencies = [
+ "sqlx 0.7.4",
+]
+
 [[package]]
 name = "phf"
 version = "0.13.1"
@@ -3130,12 +3163,13 @@ dependencies = [

 [[package]]
 name = "redis"
-version = "1.0.5"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b36964393906eb775b89b25b05b7b95685b8dd14062f1663a31ff93e75c452e5"
+checksum = "d76e41a79ae5cbb41257d84cf4cf0db0bb5a95b11bf05c62c351de4fe748620d"
 dependencies = [
 "arc-swap",
 "arcstr",
+ "async-lock",
 "backon",
 "bytes",
 "cfg-if",
@@ -3240,7 +3274,7 @@ dependencies = [
 "tokio-rustls 0.26.4",
 "tokio-util",
 "tower 0.5.3",
- "tower-http",
+ "tower-http 0.6.8",
 "tower-service",
 "url",
 "wasm-bindgen",
@@ -3332,9 +3366,9 @@ dependencies = [

 [[package]]
 name = "rustc-hash"
-version = "2.1.1"
+version = "2.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
+checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"

 [[package]]
 name = "rustc_version"
@@ -3412,7 +3446,7 @@ dependencies = [
 "once_cell",
 "ring",
 "rustls-pki-types",
- "rustls-webpki 0.103.9",
+ "rustls-webpki 0.103.10",
 "subtle",
 "zeroize",
 ]
@@ -3469,9 +3503,9 @@ dependencies = [

 [[package]]
 name = "rustls-webpki"
-version = "0.103.9"
+version = "0.103.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53"
+checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
 dependencies = [
 "ring",
 "rustls-pki-types",
@@ -3524,6 +3558,29 @@ dependencies = [
 "untrusted",
 ]

+[[package]]
+name = "sdl2"
+version = "0.35.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7959277b623f1fb9e04aea73686c3ca52f01b2145f8ea16f4ff30d8b7623b1a"
+dependencies = [
+ "bitflags 1.3.2",
+ "lazy_static",
+ "libc",
+ "sdl2-sys",
+]
+
+[[package]]
+name = "sdl2-sys"
+version = "0.35.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3586be2cf6c0a8099a79a12b4084357aa9b3e0b0d7980e3b67aaf7a9d55f9f0"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "version-compare",
+]
+
 [[package]]
 name = "security-framework"
 version = "3.7.0"
@@ -3773,9 +3830,9 @@ dependencies = [

 [[package]]
 name = "simd-adler32"
-version = "0.3.8"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
+checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"

 [[package]]
 name = "siphasher"
@@ -3847,19 +3904,77 @@ dependencies = [
 "der",
 ]

+[[package]]
+name = "sqlformat"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7bba3a93db0cc4f7bdece8bb09e77e2e785c20bfebf79eb8340ed80708048790"
+dependencies = [
+ "nom",
+ "unicode_categories",
+]
+
+[[package]]
+name = "sqlx"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c9a2ccff1a000a5a59cd33da541d9f2fdcd9e6e8229cc200565942bff36d0aaa"
+dependencies = [
+ "sqlx-core 0.7.4",
+ "sqlx-macros 0.7.4",
+ "sqlx-postgres 0.7.4",
+]
+
 [[package]]
 name = "sqlx"
 version = "0.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc"
 dependencies = [
- "sqlx-core",
- "sqlx-macros",
+ "sqlx-core 0.8.6",
+ "sqlx-macros 0.8.6",
 "sqlx-mysql",
- "sqlx-postgres",
+ "sqlx-postgres 0.8.6",
 "sqlx-sqlite",
 ]

+[[package]]
+name = "sqlx-core"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24ba59a9342a3d9bab6c56c118be528b27c9b60e490080e9711a04dccac83ef6"
+dependencies = [
+ "ahash",
+ "atoi",
+ "byteorder",
+ "bytes",
+ "crc",
+ "crossbeam-queue",
+ "either",
+ "event-listener 2.5.3",
+ "futures-channel",
+ "futures-core",
+ "futures-intrusive",
+ "futures-io",
+ "futures-util",
+ "hashlink 0.8.4",
+ "hex",
+ "indexmap 2.13.0",
+ "log",
+ "memchr",
+ "once_cell",
+ "paste",
+ "percent-encoding",
+ "serde",
+ "serde_json",
+ "sha2",
+ "smallvec",
+ "sqlformat",
+ "thiserror 1.0.69",
+ "tracing",
+ "url",
+]
+
 [[package]]
 name = "sqlx-core"
 version = "0.8.6"
@@ -3872,13 +3987,13 @@ dependencies = [
 "crc",
 "crossbeam-queue",
 "either",
- "event-listener",
+ "event-listener 5.4.1",
 "futures-core",
 "futures-intrusive",
 "futures-io",
 "futures-util",
 "hashbrown 0.15.5",
- "hashlink",
+ "hashlink 0.10.0",
 "indexmap 2.13.0",
 "log",
 "memchr",
@@ -3893,6 +4008,20 @@ dependencies = [
 "tokio-stream",
 "tracing",
 "url",
+ "uuid",
+]
+
+[[package]]
+name = "sqlx-macros"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ea40e2345eb2faa9e1e5e326db8c34711317d2b5e08d0d5741619048a803127"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "sqlx-core 0.7.4",
+ "sqlx-macros-core 0.7.4",
+ "syn 1.0.109",
 ]

 [[package]]
@@ -3903,11 +4032,34 @@ checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d"
 dependencies = [
 "proc-macro2",
 "quote",
- "sqlx-core",
- "sqlx-macros-core",
+ "sqlx-core 0.8.6",
+ "sqlx-macros-core 0.8.6",
 "syn 2.0.117",
 ]

+[[package]]
+name = "sqlx-macros-core"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5833ef53aaa16d860e92123292f1f6a3d53c34ba8b1969f152ef1a7bb803f3c8"
+dependencies = [
+ "dotenvy",
+ "either",
+ "heck 0.4.1",
+ "hex",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "serde",
+ "serde_json",
+ "sha2",
+ "sqlx-core 0.7.4",
+ "sqlx-postgres 0.7.4",
+ "syn 1.0.109",
+ "tempfile",
+ "url",
+]
+
 [[package]]
 name = "sqlx-macros-core"
 version = "0.8.6"
@@ -3924,9 +4076,9 @@ dependencies = [
 "serde",
 "serde_json",
 "sha2",
- "sqlx-core",
+ "sqlx-core 0.8.6",
 "sqlx-mysql",
- "sqlx-postgres",
+ "sqlx-postgres 0.8.6",
 "sqlx-sqlite",
 "syn 2.0.117",
 "tokio",
@@ -3969,10 +4121,49 @@ dependencies = [
 "sha1",
 "sha2",
 "smallvec",
- "sqlx-core",
+ "sqlx-core 0.8.6",
 "stringprep",
 "thiserror 2.0.18",
 "tracing",
+ "uuid",
+ "whoami",
+]
+
+[[package]]
+name = "sqlx-postgres"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c824eb80b894f926f89a0b9da0c7f435d27cdd35b8c655b114e58223918577e"
+dependencies = [
+ "atoi",
+ "base64 0.21.7",
+ "bitflags 2.11.0",
+ "byteorder",
+ "crc",
+ "dotenvy",
+ "etcetera",
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-util",
+ "hex",
+ "hkdf",
+ "hmac",
+ "home",
+ "itoa",
+ "log",
+ "md-5",
+ "memchr",
+ "once_cell",
+ "rand 0.8.5",
+ "serde",
+ "serde_json",
+ "sha2",
+ "smallvec",
+ "sqlx-core 0.7.4",
+ "stringprep",
+ "thiserror 1.0.69",
+ "tracing",
 "whoami",
 ]

@@ -4007,10 +4198,11 @@ dependencies = [
 "serde_json",
 "sha2",
 "smallvec",
- "sqlx-core",
+ "sqlx-core 0.8.6",
 "stringprep",
 "thiserror 2.0.18",
 "tracing",
+ "uuid",
 "whoami",
 ]

@@ -4033,10 +4225,11 @@ dependencies = [
 "percent-encoding",
 "serde",
 "serde_urlencoded",
- "sqlx-core",
+ "sqlx-core 0.8.6",
 "thiserror 2.0.18",
 "tracing",
 "url",
+ "uuid",
 ]

 [[package]]
@@ -4410,32 +4603,32 @@ dependencies = [

 [[package]]
 name = "toml_datetime"
-version = "1.0.1+spec-1.1.0"
+version = "1.1.0+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b320e741db58cac564e26c607d3cc1fdc4a88fd36c879568c07856ed83ff3e9"
+checksum = "97251a7c317e03ad83774a8752a7e81fb6067740609f75ea2b585b569a59198f"
 dependencies = [
 "serde_core",
 ]

 [[package]]
 name = "toml_edit"
-version = "0.25.5+spec-1.1.0"
+version = "0.25.8+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ca1a40644a28bce036923f6a431df0b34236949d111cc07cb6dca830c9ef2e1"
+checksum = "16bff38f1d86c47f9ff0647e6838d7bb362522bdf44006c7068c2b1e606f1f3c"
 dependencies = [
 "indexmap 2.13.0",
- "toml_datetime 1.0.1+spec-1.1.0",
+ "toml_datetime 1.1.0+spec-1.1.0",
 "toml_parser",
- "winnow 1.0.0",
+ "winnow 1.0.1",
 ]

 [[package]]
 name = "toml_parser"
-version = "1.0.10+spec-1.1.0"
+version = "1.1.0+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7df25b4befd31c4816df190124375d5a20c6b6921e2cad937316de3fccd63420"
+checksum = "2334f11ee363607eb04df9b8fc8a13ca1715a72ba8662a26ac285c98aabb4011"
 dependencies = [
- "winnow 1.0.0",
+ "winnow 1.0.1",
 ]

 [[package]]
@@ -4514,6 +4707,22 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "tower-http"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5"
+dependencies = [
+ "bitflags 2.11.0",
+ "bytes",
+ "http",
+ "http-body",
+ "http-body-util",
+ "pin-project-lite",
+ "tower-layer",
+ "tower-service",
+]
+
 [[package]]
 name = "tower-http"
 version = "0.6.8"
@@ -4705,9 +4914,9 @@ checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"

 [[package]]
 name = "unicode-segmentation"
-version = "1.12.0"
+version = "1.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c"

 [[package]]
 name = "unicode-truncate"
@@ -4732,6 +4941,12 @@ version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"

+[[package]]
+name = "unicode_categories"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
+
 [[package]]
 name = "universal-hash"
 version = "0.5.1"
@@ -4824,9 +5039,9 @@ dependencies = [

 [[package]]
 name = "uuid"
-version = "1.22.0"
+version = "1.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37"
+checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9"
 dependencies = [
 "getrandom 0.4.2",
 "js-sys",
@@ -4846,6 +5061,12 @@ version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"

+[[package]]
+name = "version-compare"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "579a42fc0b8e0c63b76519a339be31bed574929511fa53c1a3acae26eb258f29"
+
 [[package]]
 name = "version_check"
 version = "0.9.5"
@@ -5404,23 +5625,13 @@ checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945"

 [[package]]
 name = "winnow"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a90e88e4667264a994d34e6d1ab2d26d398dcdca8b7f52bec8668957517fc7d8"
+checksum = "09dac053f1cd375980747450bfc7250c264eaae0583872e845c0c7cd578872b5"
 dependencies = [
 "memchr",
 ]

-[[package]]
-name = "winreg"
-version = "0.50.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
-dependencies = [
- "cfg-if",
- "windows-sys 0.48.0",
-]
-
 [[package]]
 name = "wit-bindgen"
 version = "0.51.0"
@@ -5555,18 +5766,18 @@ dependencies = [

 [[package]]
 name = "zerocopy"
-version = "0.8.42"
+version = "0.8.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3"
+checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
 dependencies = [
 "zerocopy-derive",
 ]

 [[package]]
 name = "zerocopy-derive"
-version = "0.8.42"
+version = "0.8.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f"
+checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -47,15 +47,17 @@ moka = { version = "0.12", features = ["future"] }

 # Database
 redis = { version = "1.0", features = ["tokio-comp", "connection-manager"] }
-sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "json", "chrono"] }
+sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "json", "chrono", "uuid"] }
 mongodb = { version = "2", features = ["tokio-runtime"] }
 bson = { version = "2", features = ["chrono-0_4"] }
 qdrant-client = "1.7"
 reqwest = { version = "0.12", features = ["json"] }
+pgvector = { version = "0.3", features = ["sqlx"] }

 # HTTP Server
 axum = { version = "0.7", features = ["multipart"] }
 tower = "0.4"
+tower-http = { version = "0.5", features = ["cors"] }

 # API Documentation
 utoipa = { version = "4", features = ["axum_extras", "chrono", "uuid"] }
@@ -85,7 +87,11 @@ path = "src/lib.rs"

 [features]
 default = []
-player = []
+player = ["sdl2"]
+
+[dependencies.sdl2]
+version = "0.35"
+optional = true

 [[bin]]
 name = "momentry"
@@ -111,5 +117,9 @@ path = "src/bin/migrate_chinese_text.rs"
 name = "test_bm25_simple"
 path = "src/bin/test_bm25_simple.rs"

+[[bin]]
+name = "integrated_player"
+path = "src/bin/integrated_player.rs"
+
 [build-dependencies]
 chrono = "0.4"
@@ -300,8 +300,8 @@ curl -X POST http://localhost:3002/api/v1/n8n/search \
    {
      "id": "sentence_0001",
      "vid": "a1b10138a6bbb0cd",
-      "start": 10.5,
-      "end": 15.2,
+      "start_time": 10.5,
+      "end_time": 15.2,
      "title": "Chunk sentence_0001",
      "text": "Found text matching query",
      "score": 0.85,
@@ -20,7 +20,7 @@
 #### API Key（用於 API 認證）

 ```
-X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69
+X-API-Key: muser_68600856036340bcafc01930eb4bd839
 ```

 #### SFTPGo（用於影片上傳）
@@ -160,12 +160,14 @@ n8n 專用搜尋（包含完整影片檔案路徑 file_path）
    {
      "id": "sentence_1471",
      "vid": "39567a0eb16f39fd",
-      "start": 5309.08,
-      "end": 5311.08,
-      "title": "Chunk sentence_1471",
+      "chunk_type": "sentence",
+      "start_frame": 318545,
+      "end_frame": 318665,
+      "fps": 59.94,
+      "start_time": 5314.31,
+      "end_time": 5316.32,
      "text": "influenced by a vital way,",
-      "score": 0.68,
-      "file_path": "/Users/accusys/momentry/var/sftpgo/data/demo/video.mp4"
+      "score": 0.68
    }
  ]
 }
@@ -176,8 +178,8 @@ n8n 專用搜尋（包含完整影片檔案路徑 file_path）
 |------|-----------|----------------|
 | 影片 UUID | `uuid` | `vid` |
 | Chunk ID | `chunk_id` | `id` |
-| 開始時間 | `start_time` | `start` |
-| 結束時間 | `end_time` | `end` |
+| 開始時間 | `start_time` | `start_time` |
+| 結束時間 | `end_time` | `end_time` |
 | 相似度分數 | `score` | `score` |
 | **檔案路徑** | ❌ | ✅ `file_path` |

@@ -386,3 +388,4 @@ GET /api/v1/jobs/{uuid}
 | V1.2 | 2026-03-25 | 新增 Chunk 欄位說明、類型、播放方式 | OpenCode |
 | V1.3 | 2026-03-25 | 新增 Demo 測試帳號（SFTPGo）| OpenCode |
 | V1.4 | 2026-03-25 | 更新 n8n 搜尋回傳欄位說明 (media_url→file_path) | OpenCode |
+| V1.5 | 2026-04-17 | 修正 API Key 格式、統一 n8n/search 欄位名稱 (start/end → start_time/end_time) | OpenCode |
@@ -1,141 +0,0 @@
-# 場景識別 API 整合指南
-
-## 概述
-
-本文檔說明如何在 Playground (port 3003) 中使用場景識別功能。
-
-## API Endpoint
-
-### 場景識別
-
-**Endpoint**: `GET /api/v1/scene/:uuid`
-
-**描述**: 對指定影片執行場景識別
-
-**參數**:
- `uuid` (path): 影片 UUID
-
-**回應格式**:
-```json
-{
-  "video_uuid": "384b0ff44aaaa1f1",
-  "scenes": [
-    {
-      "start_time": 0.0,
-      "end_time": 156.0,
-      "scene_type": "office",
-      "scene_type_zh": "辦公室",
-      "confidence": 0.87,
-      "duration": 156.0
-    }
-  ],
-  "processing_time": 1.3
-}
-```
-
-## 使用方式
-
-### 1. 啟動 Playground 伺服器
-
-```bash
-# 使用 port 3003
-cargo run --bin momentry_playground -- server --host 0.0.0.0 --port 3003
-```
-
-### 2. 測試場景識別
-
-```bash
-# 使用測試腳本
-python3 scripts/test_scene_api.py <video_uuid>
-
-# 範例
-python3 scripts/test_scene_api.py 384b0ff44aaaa1f1
-```
-
-### 3. 直接使用 curl
-
-```bash
-curl -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \
-  "http://localhost:3003/api/v1/scene/384b0ff44aaaa1f1"
-```
-
-## Python 整合範例
-
-```python
-import requests
-
-API_KEY = "muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69"
-BASE_URL = "http://localhost:3003"
-
-def classify_scene(video_uuid):
-    """執行場景識別"""
-    response = requests.get(
-        f"{BASE_URL}/api/v1/scene/{video_uuid}",
-        headers={"X-API-Key": API_KEY}
-    )
-    
-    if response.status_code == 200:
-        return response.json()
-    else:
-        raise Exception(f"API error: {response.status_code}")
-
-# 使用範例
-result = classify_scene("384b0ff44aaaa1f1")
-print(f"場景數量：{len(result['scenes'])}")
-for scene in result['scenes']:
-    print(f"  - {scene['scene_type']} ({scene['confidence']*100:.1f}%)")
-```
-
-## 目前狀態
-
-### 已完成 ✅
- ✅ 場景識別 Python 腳本 (`scripts/scene_classifier.py`)
- ✅ Places365 380 個場景類別
- ✅ API 測試腳本 (`scripts/test_scene_api.py`)
- ✅ Rust API handler 設計
-
-### 進行中 ⏳
- ⏳ Rust API endpoint 完整實作
- ⏳ 與資料庫整合
- ⏳ 錯誤處理優化
-
-### 已知限制
- Rust API endpoint 需要完整實作以支援資料庫查詢
- 目前建議使用 Python 腳本直接測試
-
-## 故障排除
-
-### 問題：API 回應 404
-
-**可能原因**:
- 影片 UUID 不存在
- Playground 伺服器未啟動
-
-**解決方案**:
-```bash
-# 檢查伺服器狀態
-curl http://localhost:3003/health
-
-# 檢查影片是否存在
-curl -H "X-API-Key: ..." "http://localhost:3003/api/v1/videos"
-```
-
-### 問題：處理時間過長
-
-**建議**:
- 減少取樣頻率 (`--sample-interval`)
- 增加最小場景持續時間 (`--min-scene-duration`)
- 使用 Places365 Core ML 模型（而非 PyTorch）
-
-## 相關文檔
-
- `docs_v1.0/IMPLEMENTATION/SCENE_CLASSIFICATION_MODULE.md` - 模組使用手冊
- `docs_v1.0/IMPLEMENTATION/PLACES365_INSTALLATION.md` - 模型安裝指南
- `docs_v1.0/TESTING/SCENE_CLASSIFICATION_TEST_REPORT_2026_04_01.md` - 測試報告
-
-## 下一步
-
-1. 完成 Rust API endpoint 實作
-2. 整合資料庫查詢
-3. 添加異步處理支援
-4. 優化效能和記憶體使用
@@ -13,8 +13,7 @@
    
    <key>ProgramArguments</key>
    <array>
-        <string>/opt/homebrew/opt/node@22/bin/node</string>
-        <string>/opt/homebrew/lib/node_modules/n8n/bin/n8n</string>
+        <string>/Users/accusys/momentry/scripts/start_n8n.sh</string>
        <string>start</string>
    </array>
    
@@ -16,8 +16,7 @@
    
    <key>ProgramArguments</key>
    <array>
-        <string>/opt/homebrew/opt/node@22/bin/node</string>
-        <string>/opt/homebrew/lib/node_modules/n8n/bin/n8n</string>
+        <string>/Users/accusys/momentry/scripts/start_n8n.sh</string>
        <string>worker</string>
    </array>
    
@@ -65,12 +65,20 @@ def run_asr(video_path, output_path, uuid: str = ""):
    if publisher:
        publisher.info("asr", "Loading Whisper model...")

-    model = WhisperModel("tiny", device="cpu", compute_type="int8")
+    # Use small model with CPU (MPS not supported by faster_whisper)
+    # small 模型在準確率和速度間取得最佳平衡
+    model = WhisperModel("small", device="cpu", compute_type="int8")

    if publisher:
        publisher.info("asr", f"Transcribing: {video_path}")

-    segments, info = model.transcribe(video_path, beam_size=5)
+    # Transcribe with VAD filter for better accuracy
+    segments, info = model.transcribe(
+        video_path,
+        beam_size=5,
+        vad_filter=True,
+        vad_parameters=dict(min_silence_duration_ms=500, speech_pad_ms=200),
+    )

    if publisher:
        publisher.info("asr", f"ASR_LANGUAGE:{info.language}")
@@ -22,6 +22,7 @@ def process_asrx(video_path: str, output_path: str, uuid: str = ""):

    try:
        import whisperx
+        import torch
    except ImportError:
        if publisher:
            publisher.error("asrx", "whisperx not installed")
@@ -36,6 +37,14 @@ def process_asrx(video_path: str, output_path: str, uuid: str = ""):
        publisher.info("asrx", "ASRX_LOADING_MODEL")

    try:
+        # Fix for PyTorch 2.6+ compatibility
+        # Allow omegaconf types in torch.load
+        import omegaconf
+
+        torch.serialization.add_safe_globals(
+            [omegaconf.listconfig.ListConfig, omegaconf.dictconfig.DictConfig]
+        )
+
        # Load model - using faster-whisper for better performance
        # You can also use: "large-v3", "medium", "small", "base", "tiny"
        model = whisperx.load_model("base", device="cpu", compute_type="int8")
@@ -54,9 +63,14 @@ def process_asrx(video_path: str, output_path: str, uuid: str = ""):

        # Diarization (speaker segmentation)
        try:
-            import whisperx
+            from whisperx.diarize import DiarizationPipeline

-            diarize_model = whisperx.DiarizationPipeline(use_auth_token=None)
+            # DiarizationPipeline parameters: model_name, token, device, cache_dir
+            diarize_model = DiarizationPipeline(
+                model_name="pyannote/speaker-diarization",
+                token=None,  # HuggingFace token (None for public models)
+                device="cpu",
+            )
            diarize_segments = diarize_model(video_path)

            # Assign speaker labels
@@ -1,7 +1,8 @@
 #!/opt/homebrew/bin/python3.11
 """
-Caption Processor - Generate image captions
-Uses AI vision models to analyze video frames and generate descriptions
+Caption Processor - Generate image captions (LOCAL ONLY)
+Uses Moondream2 (local VLM) for image captioning
+No cloud API calls - fully offline processing
 """

 import sys
@@ -18,7 +19,6 @@ from redis_publisher import RedisPublisher
 def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]:
    """Extract frames from video at regular intervals"""

-    # Get video duration
    cmd = [
        "ffprobe",
        "-v",
@@ -34,14 +34,13 @@ def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]:
            data = json.loads(result.stdout)
            duration = float(data.get("format", {}).get("duration", 0))
        else:
-            duration = 60  # Default fallback
+            duration = 60
    except Exception:
        duration = 60

    if duration <= 0:
        duration = 60

-    # Calculate frame interval
    interval = max(duration / max_frames, 1.0)

    frames = []
@@ -76,94 +75,73 @@ def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]:
    return frames


-def generate_caption_with_llava(
+def generate_caption_with_moondream(
    image_path: str, prompt: str = "Describe this image in detail."
 ) -> Optional[str]:
-    """Generate caption using LLaVA model"""
+    """Generate caption using Moondream2 (local VLM)"""
    try:
-        # Try to use transformers with LLaVA
-        from transformers import AutoProcessor, AutoModelForVision2Seq  # noqa: F401
-        import torch  # noqa: F401
-        from PIL import Image  # noqa: F401
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        from PIL import Image
+        import torch

-        # Note: This requires llava-hf/llava-1.5-7b-hf or similar
-        # For now, return a placeholder
-        return f"[LLaVA caption for {os.path.basename(image_path)}]"
+        model_id = "vikhyatk/moondream2"
+        revision = "2025-01-09"
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id, revision=revision, trust_remote_code=True
+        )
+        moondream = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            revision=revision,
+            trust_remote_code=True,
+            torch_dtype=torch.float16,
+        ).to("mps" if torch.backends.mps.is_available() else "cpu")
+
+        moondream.eval()
+
+        image = Image.open(image_path)
+        enc_image = moondream.encode_image(image)
+        caption = moondream.answer_question(enc_image, prompt, tokenizer)
+
+        return caption if caption else None
    except ImportError:
        return None
-
-
-def generate_caption_with_gpt4v(image_path: str, api_key: str = None) -> Optional[str]:
-    """Generate caption using GPT-4V via OpenAI API"""
-    import base64
-
-    if not api_key:
-        api_key = os.environ.get("OPENAI_API_KEY")
-
-    if not api_key:
-        return None
-
-    try:
-        from openai import OpenAI
-
-        client = OpenAI(api_key=api_key)
-
-        # Encode image
-        with open(image_path, "rb") as f:
-            img_data = base64.b64encode(f.read()).decode()
-
-        response = client.chat.completions.create(
-            model="gpt-4o",  # or gpt-4-turbo for vision
-            messages=[
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "image_url",
-                            "image_url": {"url": f"data:image/jpeg;base64,{img_data}"},
-                        },
-                        {
-                            "type": "text",
-                            "text": "Describe what you see in this image in one sentence.",
-                        },
-                    ],
-                }
-            ],
-            max_tokens=100,
-        )
-
-        return response.choices[0].message.content
-    except Exception:
+    except Exception as e:
+        print(f"[CAPTION] Moondream error: {e}")
        return None


-def generate_caption_fallback(image_path: str, existing_data: Dict = None) -> str:
-    """Generate a basic caption using available metadata"""
+def generate_caption_from_metadata(image_path: str, existing_data: Dict = None) -> str:
+    """Generate caption using YOLO/OCR metadata (fallback)"""

    caption_parts = []

-    # Check YOLO data for objects
    if existing_data and existing_data.get("objects"):
        objects = list(set([o["class"] for o in existing_data["objects"]]))[:5]
        if objects:
-            caption_parts.append(f"Contains: {', '.join(objects)}")
+            caption_parts.append(f"Objects: {', '.join(objects)}")

-    # Check OCR data for text
    if existing_data and existing_data.get("texts"):
        texts = [t["text"] for t in existing_data["texts"] if t.get("text")]
        if texts:
-            caption_parts.append(f"On-screen text: {' '.join(texts[:3])}")
+            caption_parts.append(f"Text: {' '.join(texts[:3])}")
+
+    if existing_data and existing_data.get("scene_type"):
+        caption_parts.append(f"Scene: {existing_data['scene_type']}")

    if caption_parts:
        return " | ".join(caption_parts)

-    return "Video frame at timestamp"
+    return "Video frame"


 def process_frame(
-    frame_info: Dict, yolo_data: List = None, ocr_data: List = None
+    frame_info: Dict,
+    yolo_data: List = None,
+    ocr_data: List = None,
+    scene_data: Dict = None,
 ) -> Dict:
-    """Process a single frame and generate caption"""
+    """Process a single frame and generate caption (LOCAL ONLY)"""

    frame_path = frame_info["path"]
    timestamp = frame_info["timestamp"]
@@ -171,28 +149,34 @@ def process_frame(
    caption = None
    source = "unknown"

-    # Try GPT-4V first
-    caption = generate_caption_with_gpt4v(frame_path)
+    # Try Moondream2 (local VLM)
+    caption = generate_caption_with_moondream(frame_path)
    if caption:
-        source = "gpt-4v"
+        source = "moondream2"
    else:
-        # Try LLaVA
-        caption = generate_caption_with_llava(frame_path)
-        if caption:
-            source = "llava"
-        else:
-            # Use fallback with YOLO/OCR data
-            combined_data = {"objects": [], "texts": []}
-            if yolo_data:
-                combined_data["objects"] = [
-                    o for o in yolo_data if o.get("timestamp") == timestamp
-                ]
-            if ocr_data:
-                combined_data["texts"] = [
-                    t for t in ocr_data if t.get("timestamp") == timestamp
-                ]
-            caption = generate_caption_fallback(frame_path, combined_data)
-            source = "metadata"
+        # Fallback: Use metadata from YOLO/OCR/Scene
+        combined_data = {"objects": [], "texts": [], "scene_type": ""}
+
+        if yolo_data:
+            combined_data["objects"] = [
+                o for o in yolo_data if o.get("timestamp") == timestamp
+            ]
+
+        if ocr_data:
+            combined_data["texts"] = [
+                t for t in ocr_data if t.get("timestamp") == timestamp
+            ]
+
+        if scene_data:
+            for scene in scene_data.get("scenes", []):
+                if scene.get("start_time", 0) <= timestamp <= scene.get("end_time", 0):
+                    combined_data["scene_type"] = scene.get(
+                        "scene_type_zh"
+                    ) or scene.get("scene_type", "")
+                    break
+
+        caption = generate_caption_from_metadata(frame_path, combined_data)
+        source = "metadata"

    return {
        "index": frame_info["index"],
@@ -212,24 +196,22 @@ def run_caption(
    if publisher:
        publisher.info("caption", "Extracting frames from video...")

-    # Extract frames
    frames = extract_frames(video_path, max_frames)

    if publisher:
        publisher.info("caption", f"Extracted {len(frames)} frames")

-    # Load YOLO and OCR data for context
    base_path = os.path.dirname(output_path)
    uuid_name = os.path.basename(output_path).split(".")[0]

    yolo_objects = []
    ocr_texts = []
+    scene_info = {}

    yolo_path = os.path.join(base_path, f"{uuid_name}.yolo.json")
    if os.path.exists(yolo_path):
        with open(yolo_path) as f:
            yolo_data = json.load(f)
-            # Flatten objects from all frames
            for frame in yolo_data.get("frames", []):
                for obj in frame.get("objects", []):
                    obj["timestamp"] = frame.get("timestamp", 0)
@@ -244,7 +226,11 @@ def run_caption(
                    text["timestamp"] = frame.get("timestamp", 0)
                    ocr_texts.append(text)

-    # Process each frame
+    scene_path = os.path.join(base_path, f"{uuid_name}.scene.json")
+    if os.path.exists(scene_path):
+        with open(scene_path) as f:
+            scene_info = json.load(f)
+
    captions = []
    for i, frame in enumerate(frames):
        if publisher and i % 5 == 0:
@@ -252,16 +238,14 @@ def run_caption(
                "caption", i, len(frames), f"Frame {i + 1}/{len(frames)}"
            )

-        caption_data = process_frame(frame, yolo_objects, ocr_texts)
+        caption_data = process_frame(frame, yolo_objects, ocr_texts, scene_info)
        captions.append(caption_data)

-        # Cleanup temp frame
        try:
            os.remove(frame["path"])
        except Exception:
            pass

-    # Cleanup temp directory
    temp_dir = os.path.join(os.path.dirname(video_path), ".caption_frames")
    try:
        os.rmdir(temp_dir)
@@ -275,9 +259,11 @@ def run_caption(
        "summary": {
            "avg_caption_length": sum(len(c.get("caption", "")) for c in captions)
            / max(len(captions), 1),
-            "gpt4v_count": sum(1 for c in captions if c.get("source") == "gpt-4v"),
-            "llava_count": sum(1 for c in captions if c.get("source") == "llava"),
+            "moondream_count": sum(
+                1 for c in captions if c.get("source") == "moondream2"
+            ),
            "metadata_count": sum(1 for c in captions if c.get("source") == "metadata"),
+            "cloud_api_count": 0,
        },
    }

@@ -285,13 +271,13 @@ def run_caption(
        json.dump(result, f, indent=2, ensure_ascii=False)

    if publisher:
-        publisher.complete("caption", f"{len(captions)} frames captioned")
+        publisher.complete("caption", f"{len(captions)} frames captioned (LOCAL)")

    return result


 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Video Caption Generator")
+    parser = argparse.ArgumentParser(description="Video Caption Generator (LOCAL ONLY)")
    parser.add_argument("video_path", help="Path to video file")
    parser.add_argument("output_path", help="Output JSON path")
    parser.add_argument("--uuid", help="UUID for progress tracking", default="")
@@ -302,4 +288,4 @@ if __name__ == "__main__":
    args = parser.parse_args()

    result = run_caption(args.video_path, args.output_path, args.uuid, args.max_frames)
-    print(f"Caption generated: {result['total_frames']} frames")
+    print(f"Caption generated: {result['total_frames']} frames (LOCAL)")
@@ -1,8 +1,8 @@
 #!/opt/homebrew/bin/python3.11
 """
-Face Processor - Face Detection
-Uses OpenCV Haar Cascade (local, no extra download needed)
-Alternative: MediaPipe (requires model download)
+Face Processor - Face Detection & Demographics
+Uses InsightFace for detection, age, and gender analysis.
+Falls back to OpenCV Haar Cascade if InsightFace fails.
 """

 import sys
@@ -15,7 +15,7 @@ from redis_publisher import RedisPublisher


 def process_face(video_path: str, output_path: str, uuid: str = ""):
-    """Process video for face detection"""
+    """Process video for face detection and demographics analysis"""

    publisher = RedisPublisher(uuid) if uuid else None
    if publisher:
@@ -23,56 +23,82 @@ def process_face(video_path: str, output_path: str, uuid: str = ""):

    try:
        import cv2
-    except ImportError:
+        import numpy as np
+        import insightface
+    except ImportError as e:
+        error_msg = f"Missing dependency: {e.name}"
        if publisher:
-            publisher.error("face", "opencv-python not installed")
+            publisher.error("face", error_msg)
        result = {"frame_count": 0, "fps": 0.0, "frames": []}
-        if publisher:
-            publisher.complete("face", "0 frames")
        with open(output_path, "w") as f:
            json.dump(result, f, indent=2)
        return result

-    if publisher:
-        publisher.info("face", "FACE_LOADING_CASCADE")
-
-    # Try to use OpenCV's built-in Haar Cascade
-    # This is included with OpenCV
-    face_cascade = cv2.CascadeClassifier(
-        cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
-    )
-
-    if face_cascade.empty():
+    # 1. Initialize InsightFace
+    use_insightface = False
+    app = None
+    try:
        if publisher:
-            publisher.error("face", "Could not load Haar Cascade")
-        result = {"frame_count": 0, "fps": 0.0, "frames": []}
+            publisher.info("face", "LOADING_INSIGHTFACE")
+        # 'buffalo_l' is a robust model. det_size can be adjusted.
+        app = insightface.app.FaceAnalysis(
+            name="buffalo_l", providers=["CPUExecutionProvider"]
+        )
+        app.prepare(ctx_id=0, det_size=(320, 320))
+        use_insightface = True
        if publisher:
-            publisher.complete("face", "0 frames")
-        with open(output_path, "w") as f:
-            json.dump(result, f, indent=2)
-        return result
+            publisher.info("face", "INSIGHTFACE_LOADED")
+    except Exception as e:
+        print(f"[WARNING] InsightFace failed to load: {e}")
+        use_insightface = False
+
+    # 2. Fallback to Haar Cascade
+    face_cascade = None
+    if not use_insightface:
+        if publisher:
+            publisher.info("face", "LOADING_HAAR_CASCADE")
+        face_cascade = cv2.CascadeClassifier(
+            cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
+        )
+        if face_cascade.empty():
+            if publisher:
+                publisher.error("face", "Could not load Haar Cascade")
+            result = {"frame_count": 0, "fps": 0.0, "frames": []}
+            with open(output_path, "w") as f:
+                json.dump(result, f, indent=2)
+            return result
+        if publisher:
+            publisher.info("face", "HAAR_CASCADE_LOADED")

    if publisher:
-        publisher.info("face", "FACE_CASCADE_LOADED")
+        publisher.info("face", "PROCESSING_VIDEO")

-    # Get video info
    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        if publisher:
+            publisher.error("face", "Could not open video")
+        result = {"frame_count": 0, "fps": 0.0, "frames": []}
+        with open(output_path, "w") as f:
+            json.dump(result, f, indent=2)
+        return result
+
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    cap.release()
+
+    # Optimization: Process every N frames to speed up analysis
+    # Since we just need attributes for the person identity, we don't need every single frame.
+    sample_interval = 30
+    if total_frames > 0:
+        estimated_samples = total_frames // sample_interval
+    else:
+        estimated_samples = 0
+
+    frame_count = 0
+    processed_count = 0
+    frames_data = []

    if publisher:
-        publisher.info("face", f"fps={fps}, frames={total_frames}")
-        publisher.progress("face", 0, total_frames, "Starting")
-
-    # Process every N frames to speed up
-    sample_interval = 30  # Process every 30 frames
-
-    frames = []
-    frame_count = 0
-    processed = 0
-
-    cap = cv2.VideoCapture(video_path)
+        publisher.progress("face", 0, estimated_samples, "Starting")

    while True:
        ret, frame = cap.read()
@@ -81,62 +107,92 @@ def process_face(video_path: str, output_path: str, uuid: str = ""):

        frame_count += 1

-        # Sample frames
+        # Sampling
        if frame_count % sample_interval != 0:
            continue

-        processed += 1
+        processed_count += 1
        timestamp = (frame_count - 1) / fps if fps > 0 else 0

-        # Convert to grayscale
-        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-
-        # Detect faces
-        try:
-            faces = face_cascade.detectMultiScale(
-                gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
-            )
-        except Exception as e:
-            if publisher:
-                publisher.error("face", f"Frame {frame_count}: {e}")
-            faces = []
-
        face_list = []
-        for x, y, w, h in faces:
-            face_list.append(
-                {
-                    "face_id": None,
-                    "x": int(x),
-                    "y": int(y),
-                    "width": int(w),
-                    "height": int(h),
-                    "confidence": 0.8,  # Haar cascade doesn't provide confidence
-                }
-            )

-        # Only add frames with faces
+        try:
+            if use_insightface and app:
+                # InsightFace Detection & Analysis
+                faces = app.get(frame)
+                for face in faces:
+                    bbox = face.bbox.astype(int)
+                    bx, by, bw, bh = (
+                        bbox[0],
+                        bbox[1],
+                        bbox[2] - bbox[0],
+                        bbox[3] - bbox[1],
+                    )
+
+                    # Extract Attributes
+                    age = int(face.age) if hasattr(face, "age") else None
+                    gender_val = face.gender if hasattr(face, "gender") else None
+                    gender = (
+                        "female"
+                        if gender_val == 0
+                        else ("male" if gender_val == 1 else None)
+                    )
+
+                    face_list.append(
+                        {
+                            "x": int(bx),
+                            "y": int(by),
+                            "width": int(bw),
+                            "height": int(bh),
+                            "confidence": float(face.det_score)
+                            if hasattr(face, "det_score")
+                            else 0.9,
+                            "attributes": {"age": age, "gender": gender},
+                        }
+                    )
+            else:
+                # Haar Cascade Fallback (No Age/Gender)
+                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                faces = face_cascade.detectMultiScale(
+                    gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
+                )
+                for x, y, w, h in faces:
+                    face_list.append(
+                        {
+                            "x": int(x),
+                            "y": int(y),
+                            "width": int(w),
+                            "height": int(h),
+                            "confidence": 0.8,
+                            "attributes": {"age": None, "gender": None},
+                        }
+                    )
+        except Exception as e:
+            print(f"[ERROR] Frame processing error: {e}")
+
        if face_list:
-            frames.append(
+            frames_data.append(
                {
                    "frame": frame_count - 1,
                    "timestamp": round(timestamp, 3),
                    "faces": face_list,
                }
            )
+
            if publisher:
                publisher.progress(
                    "face",
-                    processed,
-                    total_frames // sample_interval,
+                    processed_count,
+                    estimated_samples,
                    f"Frame {frame_count}",
                )

    cap.release()

-    result = {"frame_count": total_frames, "fps": fps, "frames": frames}
+    result = {"frame_count": total_frames, "fps": fps, "frames": frames_data}

    if publisher:
-        publisher.complete("face", f"{len(frames)} frames with faces")
+        publisher.complete("face", f"{len(frames_data)} frames processed")

    with open(output_path, "w") as f:
        json.dump(result, f, indent=2)
@@ -145,7 +201,7 @@ def process_face(video_path: str, output_path: str, uuid: str = ""):


 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Face Detection")
+    parser = argparse.ArgumentParser(description="Face Detection & Demographics")
    parser.add_argument("video_path", help="Path to video file")
    parser.add_argument("output_path", help="Output JSON path")
    parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
@@ -1,382 +1,367 @@
 {
-  "0": "airplane_cabin",
-  "1": "airport_terminal",
-  "2": "alley",
-  "3": "amphitheater",
-  "4": "amusement_park",
-  "5": "apartment_building_outdoor",
-  "6": "aquarium",
-  "7": "arcade",
-  "8": "arena_hockey",
-  "9": "arena_performance",
-  "10": "army_base",
-  "11": "art_gallery",
-  "12": "art_studio",
-  "13": "assembly_line",
-  "14": "athletic_field_outdoor",
-  "15": "atrium_public",
-  "16": "attic",
-  "17": "auditorium",
-  "18": "auto_factory",
-  "19": "backyard",
-  "20": "badminton_court_indoor",
-  "21": "baggage_claim",
-  "22": "bakery_shop",
-  "23": "balcony_exterior",
-  "24": "balcony_interior",
-  "25": "ball_pit",
-  "26": "ballroom",
-  "27": "bamboo_forest",
-  "28": "banquet_hall",
-  "29": "bar",
-  "30": "barn",
-  "31": "barndoor",
-  "32": "baseball_field",
-  "33": "basement",
-  "34": "basilica",
-  "35": "basketball_court_indoor",
-  "36": "basketball_court_outdoor",
-  "37": "bathroom",
-  "38": "bazaar_indoor",
-  "39": "bazaar_outdoor",
-  "40": "beach",
-  "41": "beauty_salon",
-  "42": "bedroom",
-  "43": "berth",
-  "44": "biology_laboratory",
-  "45": "boardwalk",
-  "46": "boat_deck",
-  "47": "boathouse",
-  "48": "bookstore",
-  "49": "booth_indoor",
-  "50": "botanical_garden",
-  "51": "bow_window_indoor",
-  "52": "bow_window_outdoor",
-  "53": "bowling_alley",
-  "54": "boxing_ring",
-  "55": "brewery_indoor",
-  "56": "bridge",
-  "57": "building_facade",
-  "58": "bullring",
-  "59": "burial_chamber",
-  "60": "bus_interior",
-  "61": "bus_station_indoor",
-  "62": "butchers_shop",
-  "63": "butte",
-  "64": "cabin_outdoor",
-  "65": "cafeteria",
-  "66": "campsite",
-  "67": "campus",
-  "68": "canal_natural",
-  "69": "canal_urban",
-  "70": "candy_store",
-  "71": "canyon",
-  "72": "car_interior",
-  "73": "carrousel",
-  "74": "castle",
-  "75": "catacomb",
-  "76": "cathedral_indoor",
-  "77": "cathedral_outdoor",
-  "78": "cavern_indoor",
-  "79": "cemetery",
-  "80": "chalet",
-  "81": "cheese_factory",
-  "82": "chemistry_lab",
-  "83": "chicken_coop_indoor",
-  "84": "chicken_coop_outdoor",
-  "85": "childs_room",
-  "86": "church_indoor",
-  "87": "church_outdoor",
-  "88": "classroom",
-  "89": "clean_room",
-  "90": "cliff",
-  "91": "cloister_indoor",
-  "92": "closet",
-  "93": "clothing_store",
-  "94": "coast",
-  "95": "cockpit",
-  "96": "coffee_shop",
-  "97": "computer_room",
-  "98": "conference_center",
-  "99": "conference_room",
-  "100": "construction_site",
-  "101": "control_room",
-  "102": "control_tower_outdoor",
-  "103": "corn_field",
-  "104": "corral",
-  "105": "corridor",
-  "106": "cottage_garden",
-  "107": "courthouse",
-  "108": "courtroom",
+  "0": "airfield",
+  "1": "airplane_cabin",
+  "2": "airport_terminal",
+  "3": "alcove",
+  "4": "alley",
+  "5": "amphitheater",
+  "6": "amusement_arcade",
+  "7": "amusement_park",
+  "8": "outdoor",
+  "9": "aquarium",
+  "10": "aqueduct",
+  "11": "arcade",
+  "12": "arch",
+  "13": "archaelogical_excavation",
+  "14": "archive",
+  "15": "hockey",
+  "16": "performance",
+  "17": "rodeo",
+  "18": "army_base",
+  "19": "art_gallery",
+  "20": "art_school",
+  "21": "art_studio",
+  "22": "artists_loft",
+  "23": "assembly_line",
+  "24": "outdoor",
+  "25": "public",
+  "26": "attic",
+  "27": "auditorium",
+  "28": "auto_factory",
+  "29": "auto_showroom",
+  "30": "badlands",
+  "31": "shop",
+  "32": "exterior",
+  "33": "interior",
+  "34": "ball_pit",
+  "35": "ballroom",
+  "36": "bamboo_forest",
+  "37": "bank_vault",
+  "38": "banquet_hall",
+  "39": "bar",
+  "40": "barn",
+  "41": "barndoor",
+  "42": "baseball_field",
+  "43": "basement",
+  "44": "indoor",
+  "45": "bathroom",
+  "46": "indoor",
+  "47": "outdoor",
+  "48": "beach",
+  "49": "beach_house",
+  "50": "beauty_salon",
+  "51": "bedchamber",
+  "52": "bedroom",
+  "53": "beer_garden",
+  "54": "beer_hall",
+  "55": "berth",
+  "56": "biology_laboratory",
+  "57": "boardwalk",
+  "58": "boat_deck",
+  "59": "boathouse",
+  "60": "bookstore",
+  "61": "indoor",
+  "62": "botanical_garden",
+  "63": "indoor",
+  "64": "bowling_alley",
+  "65": "boxing_ring",
+  "66": "bridge",
+  "67": "building_facade",
+  "68": "bullring",
+  "69": "burial_chamber",
+  "70": "bus_interior",
+  "71": "indoor",
+  "72": "butchers_shop",
+  "73": "butte",
+  "74": "outdoor",
+  "75": "cafeteria",
+  "76": "campsite",
+  "77": "campus",
+  "78": "natural",
+  "79": "urban",
+  "80": "candy_store",
+  "81": "canyon",
+  "82": "car_interior",
+  "83": "carrousel",
+  "84": "castle",
+  "85": "catacomb",
+  "86": "cemetery",
+  "87": "chalet",
+  "88": "chemistry_lab",
+  "89": "childs_room",
+  "90": "indoor",
+  "91": "outdoor",
+  "92": "classroom",
+  "93": "clean_room",
+  "94": "cliff",
+  "95": "closet",
+  "96": "clothing_store",
+  "97": "coast",
+  "98": "cockpit",
+  "99": "coffee_shop",
+  "100": "computer_room",
+  "101": "conference_center",
+  "102": "conference_room",
+  "103": "construction_site",
+  "104": "corn_field",
+  "105": "corral",
+  "106": "corridor",
+  "107": "cottage",
+  "108": "courthouse",
  "109": "courtyard",
-  "110": "covered_bridge_exterior",
-  "111": "creek",
-  "112": "crevasse",
-  "113": "crosswalk",
-  "114": "cubicle_office",
-  "115": "dam",
-  "116": "daycare_center",
-  "117": "delicatessen",
-  "118": "dentists_office",
-  "119": "desert_sand",
-  "120": "desert_vegetation",
-  "121": "diner_indoor",
-  "122": "diner_outdoor",
-  "123": "dinette_home",
-  "124": "dinette_vehicle",
-  "125": "dining_car",
-  "126": "dining_room",
-  "127": "discotheque",
-  "128": "dock",
-  "129": "doorway_indoor",
-  "130": "doorway_outdoor",
-  "131": "dorm_room",
-  "132": "driveway",
-  "133": "driving_range_outdoor",
-  "134": "drugstore",
-  "135": "electrical_substation",
-  "136": "elevator_door",
-  "137": "elevator_escalator",
-  "138": "elevator_interior",
-  "139": "engine_room",
-  "140": "escalator_indoor",
-  "141": "excavation",
-  "142": "factory_indoor",
-  "143": "fairway",
-  "144": "fastfood_restaurant",
-  "145": "field_cultivated",
-  "146": "field_wild",
-  "147": "fire_escape",
-  "148": "fire_station",
-  "149": "firing_range_indoor",
-  "150": "fishpond",
-  "151": "florist_shop_indoor",
-  "152": "food_court",
-  "153": "forest_broadleaf",
-  "154": "forest_needleleaf",
-  "155": "forest_path",
-  "156": "forest_road",
-  "157": "formal_garden",
-  "158": "fountain",
-  "159": "galley",
-  "160": "game_room",
-  "161": "garage_indoor",
-  "162": "garage_outdoor",
-  "163": "garbage_dump",
-  "164": "gas_station",
-  "165": "gazebo_exterior",
-  "166": "general_store_indoor",
-  "167": "general_store_outdoor",
-  "168": "gift_shop",
-  "169": "golf_course",
-  "170": "greenhouse_indoor",
-  "171": "greenhouse_outdoor",
-  "172": "gymnasium_indoor",
-  "173": "hangar_indoor",
-  "174": "hangar_outdoor",
-  "175": "harbor",
-  "176": "hardware_store",
-  "177": "hayfield",
-  "178": "heliport",
-  "179": "herb_garden",
-  "180": "highway",
-  "181": "hill",
-  "182": "home_office",
-  "183": "hospital",
-  "184": "hospital_room",
-  "185": "hot_spring",
-  "186": "hot_tub_outdoor",
-  "187": "hotel",
-  "188": "hotel_outdoor",
-  "189": "hotel_room",
-  "190": "house",
-  "191": "hunting_lodge_outdoor",
-  "192": "ice_cream_parlor",
-  "193": "ice_floe",
-  "194": "ice_shelf",
-  "195": "ice_skating_rink_indoor",
-  "196": "ice_skating_rink_outdoor",
-  "197": "iceberg",
-  "198": "igloo",
-  "199": "industrial_area",
-  "200": "inn_outdoor",
-  "201": "islet",
-  "202": "jacuzzi_indoor",
-  "203": "jail_cell",
-  "204": "jail_indoor",
-  "205": "jewelry_shop",
-  "206": "kasbah",
-  "207": "kennel_indoor",
-  "208": "kennel_outdoor",
-  "209": "kindergarden_classroom",
-  "210": "kitchen",
-  "211": "kitchenette",
-  "212": "labyrinth_outdoor",
-  "213": "lake_natural",
-  "214": "landfill",
-  "215": "landing_deck",
-  "216": "laundromat",
-  "217": "lecture_room",
-  "218": "library_indoor",
-  "219": "library_outdoor",
-  "220": "lido_deck_outdoor",
-  "221": "lift_bridge",
-  "222": "lighthouse",
-  "223": "limousine_interior",
-  "224": "living_room",
-  "225": "loading_dock",
-  "226": "lobby",
-  "227": "lock_chamber",
-  "228": "locker_room",
-  "229": "mansion",
-  "230": "manufactured_home",
-  "231": "market_indoor",
-  "232": "market_outdoor",
-  "233": "marsh",
-  "234": "martial_arts_gym",
-  "235": "mausoleum",
-  "236": "medina",
-  "237": "moat_water",
-  "238": "monastery_outdoor",
-  "239": "mosque_indoor",
-  "240": "mosque_outdoor",
-  "241": "motel",
-  "242": "mountain",
-  "243": "mountain_path",
-  "244": "mountain_snowy",
-  "245": "movie_theater_indoor",
-  "246": "museum_indoor",
-  "247": "museum_outdoor",
-  "248": "music_store",
-  "249": "music_studio",
-  "250": "nuclear_power_plant_outdoor",
-  "251": "nursery",
-  "252": "oast_house",
-  "253": "observatory_indoor",
-  "254": "observatory_outdoor",
-  "255": "ocean",
-  "256": "office",
-  "257": "office_building",
-  "258": "office_cubicles",
-  "259": "oil_refinery_outdoor",
-  "260": "oilrig",
-  "261": "operating_room",
-  "262": "orchard",
-  "263": "outhouse_outdoor",
-  "264": "pagoda",
-  "265": "palace",
-  "266": "pantry",
-  "267": "park",
-  "268": "parking_garage_indoor",
-  "269": "parking_garage_outdoor",
-  "270": "parking_lot",
-  "271": "parlor",
-  "272": "pasture",
-  "273": "patio",
-  "274": "pavilion",
-  "275": "pharmacy",
-  "276": "phone_booth",
-  "277": "physics_laboratory",
-  "278": "picnic_area",
-  "279": "pilothouse_indoor",
-  "280": "planetarium_indoor",
-  "281": "playground",
-  "282": "playroom",
-  "283": "plaza",
-  "284": "podium_indoor",
-  "285": "podium_outdoor",
-  "286": "pond",
-  "287": "poolroom_home",
-  "288": "poolroom_establishment",
-  "289": "power_plant_outdoor",
-  "290": "promenade_deck",
-  "291": "pub_indoor",
-  "292": "pulpit",
-  "293": "putting_green",
-  "294": "racecourse",
-  "295": "raceway",
-  "296": "raft",
-  "297": "railroad_track",
-  "298": "rainforest",
-  "299": "reception",
-  "300": "recreation_room",
-  "301": "residential_neighborhood",
-  "302": "restaurant",
-  "303": "restaurant_kitchen",
-  "304": "restaurant_patio",
-  "305": "rice_paddy",
-  "306": "riding_arena",
-  "307": "river",
-  "308": "rock_arch",
-  "309": "rope_bridge",
-  "310": "ruin",
-  "311": "runway",
-  "312": "sandbar",
-  "313": "sandbox",
-  "314": "sauna",
-  "315": "schoolhouse",
-  "316": "sea_cliff",
-  "317": "server_room",
-  "318": "shed",
-  "319": "shoe_shop",
-  "320": "shop_front",
-  "321": "shopping_mall_indoor",
-  "322": "shower",
-  "323": "skatepark",
-  "324": "ski_resort",
-  "325": "ski_slope",
-  "326": "sky",
-  "327": "skyscraper",
-  "328": "slum",
-  "329": "snowfield",
-  "330": "squash_court",
-  "331": "stable",
-  "332": "stadium_baseball",
-  "333": "stadium_football",
-  "334": "staircase",
-  "335": "street",
-  "336": "subway_interior",
-  "337": "subway_station_platform",
-  "338": "supermarket",
-  "339": "sushi_bar",
-  "340": "swamp",
-  "341": "swimming_hole",
-  "342": "swimming_pool_indoor",
-  "343": "swimming_pool_outdoor",
-  "344": "synagogue_indoor",
-  "345": "synagogue_outdoor",
-  "346": "television_room",
-  "347": "television_studio",
-  "348": "temple_asia",
-  "349": "temple_europe",
-  "350": "trench",
-  "351": "underwater_coral_reef",
-  "352": "utility_room",
-  "353": "valley",
-  "354": "van_interior",
-  "355": "vegetable_garden",
-  "356": "veranda",
-  "357": "veterinarians_office",
-  "358": "viaduct",
-  "359": "videostore",
-  "360": "village",
-  "361": "vineyard",
-  "362": "volcano",
-  "363": "volleyball_court_indoor",
-  "364": "volleyball_court_outdoor",
-  "365": "waiting_room",
-  "366": "warehouse_indoor",
-  "367": "water_tower",
-  "368": "waterfall_block",
-  "369": "waterfall_fan",
-  "370": "waterfall_plunge",
-  "371": "wetland",
-  "372": "wheat_field",
-  "373": "wind_farm",
-  "374": "windmill",
-  "375": "wine_cellar_barrel_storage",
-  "376": "wine_cellar_bottle_storage",
-  "377": "wrestling_ring_indoor",
-  "378": "yard",
-  "379": "youth_hostel"
+  "110": "creek",
+  "111": "crevasse",
+  "112": "crosswalk",
+  "113": "dam",
+  "114": "delicatessen",
+  "115": "department_store",
+  "116": "sand",
+  "117": "vegetation",
+  "118": "desert_road",
+  "119": "outdoor",
+  "120": "dining_hall",
+  "121": "dining_room",
+  "122": "discotheque",
+  "123": "outdoor",
+  "124": "dorm_room",
+  "125": "downtown",
+  "126": "dressing_room",
+  "127": "driveway",
+  "128": "drugstore",
+  "129": "door",
+  "130": "elevator_lobby",
+  "131": "elevator_shaft",
+  "132": "embassy",
+  "133": "engine_room",
+  "134": "entrance_hall",
+  "135": "indoor",
+  "136": "excavation",
+  "137": "fabric_store",
+  "138": "farm",
+  "139": "fastfood_restaurant",
+  "140": "cultivated",
+  "141": "wild",
+  "142": "field_road",
+  "143": "fire_escape",
+  "144": "fire_station",
+  "145": "fishpond",
+  "146": "indoor",
+  "147": "indoor",
+  "148": "food_court",
+  "149": "football_field",
+  "150": "broadleaf",
+  "151": "forest_path",
+  "152": "forest_road",
+  "153": "formal_garden",
+  "154": "fountain",
+  "155": "galley",
+  "156": "indoor",
+  "157": "outdoor",
+  "158": "gas_station",
+  "159": "exterior",
+  "160": "indoor",
+  "161": "outdoor",
+  "162": "gift_shop",
+  "163": "glacier",
+  "164": "golf_course",
+  "165": "indoor",
+  "166": "outdoor",
+  "167": "grotto",
+  "168": "indoor",
+  "169": "indoor",
+  "170": "outdoor",
+  "171": "harbor",
+  "172": "hardware_store",
+  "173": "hayfield",
+  "174": "heliport",
+  "175": "highway",
+  "176": "home_office",
+  "177": "home_theater",
+  "178": "hospital",
+  "179": "hospital_room",
+  "180": "hot_spring",
+  "181": "outdoor",
+  "182": "hotel_room",
+  "183": "house",
+  "184": "outdoor",
+  "185": "ice_cream_parlor",
+  "186": "ice_floe",
+  "187": "ice_shelf",
+  "188": "indoor",
+  "189": "outdoor",
+  "190": "iceberg",
+  "191": "igloo",
+  "192": "industrial_area",
+  "193": "outdoor",
+  "194": "islet",
+  "195": "indoor",
+  "196": "jail_cell",
+  "197": "japanese_garden",
+  "198": "jewelry_shop",
+  "199": "junkyard",
+  "200": "kasbah",
+  "201": "outdoor",
+  "202": "kindergarden_classroom",
+  "203": "kitchen",
+  "204": "lagoon",
+  "205": "natural",
+  "206": "landfill",
+  "207": "landing_deck",
+  "208": "laundromat",
+  "209": "lawn",
+  "210": "lecture_room",
+  "211": "legislative_chamber",
+  "212": "indoor",
+  "213": "outdoor",
+  "214": "lighthouse",
+  "215": "living_room",
+  "216": "loading_dock",
+  "217": "lobby",
+  "218": "lock_chamber",
+  "219": "locker_room",
+  "220": "mansion",
+  "221": "manufactured_home",
+  "222": "indoor",
+  "223": "outdoor",
+  "224": "marsh",
+  "225": "martial_arts_gym",
+  "226": "mausoleum",
+  "227": "medina",
+  "228": "mezzanine",
+  "229": "water",
+  "230": "outdoor",
+  "231": "motel",
+  "232": "mountain",
+  "233": "mountain_path",
+  "234": "mountain_snowy",
+  "235": "indoor",
+  "236": "indoor",
+  "237": "outdoor",
+  "238": "music_studio",
+  "239": "natural_history_museum",
+  "240": "nursery",
+  "241": "nursing_home",
+  "242": "oast_house",
+  "243": "ocean",
+  "244": "office",
+  "245": "office_building",
+  "246": "office_cubicles",
+  "247": "oilrig",
+  "248": "operating_room",
+  "249": "orchard",
+  "250": "orchestra_pit",
+  "251": "pagoda",
+  "252": "palace",
+  "253": "pantry",
+  "254": "park",
+  "255": "indoor",
+  "256": "outdoor",
+  "257": "parking_lot",
+  "258": "pasture",
+  "259": "patio",
+  "260": "pavilion",
+  "261": "pet_shop",
+  "262": "pharmacy",
+  "263": "phone_booth",
+  "264": "physics_laboratory",
+  "265": "picnic_area",
+  "266": "pier",
+  "267": "pizzeria",
+  "268": "playground",
+  "269": "playroom",
+  "270": "plaza",
+  "271": "pond",
+  "272": "porch",
+  "273": "promenade",
+  "274": "indoor",
+  "275": "racecourse",
+  "276": "raceway",
+  "277": "raft",
+  "278": "railroad_track",
+  "279": "rainforest",
+  "280": "reception",
+  "281": "recreation_room",
+  "282": "repair_shop",
+  "283": "residential_neighborhood",
+  "284": "restaurant",
+  "285": "restaurant_kitchen",
+  "286": "restaurant_patio",
+  "287": "rice_paddy",
+  "288": "river",
+  "289": "rock_arch",
+  "290": "roof_garden",
+  "291": "rope_bridge",
+  "292": "ruin",
+  "293": "runway",
+  "294": "sandbox",
+  "295": "sauna",
+  "296": "schoolhouse",
+  "297": "science_museum",
+  "298": "server_room",
+  "299": "shed",
+  "300": "shoe_shop",
+  "301": "shopfront",
+  "302": "indoor",
+  "303": "shower",
+  "304": "ski_resort",
+  "305": "ski_slope",
+  "306": "sky",
+  "307": "skyscraper",
+  "308": "slum",
+  "309": "snowfield",
+  "310": "soccer_field",
+  "311": "stable",
+  "312": "baseball",
+  "313": "football",
+  "314": "soccer",
+  "315": "indoor",
+  "316": "outdoor",
+  "317": "staircase",
+  "318": "storage_room",
+  "319": "street",
+  "320": "platform",
+  "321": "supermarket",
+  "322": "sushi_bar",
+  "323": "swamp",
+  "324": "swimming_hole",
+  "325": "indoor",
+  "326": "outdoor",
+  "327": "outdoor",
+  "328": "television_room",
+  "329": "television_studio",
+  "330": "asia",
+  "331": "throne_room",
+  "332": "ticket_booth",
+  "333": "topiary_garden",
+  "334": "tower",
+  "335": "toyshop",
+  "336": "train_interior",
+  "337": "platform",
+  "338": "tree_farm",
+  "339": "tree_house",
+  "340": "trench",
+  "341": "tundra",
+  "342": "ocean_deep",
+  "343": "utility_room",
+  "344": "valley",
+  "345": "vegetable_garden",
+  "346": "veterinarians_office",
+  "347": "viaduct",
+  "348": "village",
+  "349": "vineyard",
+  "350": "volcano",
+  "351": "outdoor",
+  "352": "waiting_room",
+  "353": "water_park",
+  "354": "water_tower",
+  "355": "waterfall",
+  "356": "watering_hole",
+  "357": "wave",
+  "358": "wet_bar",
+  "359": "wheat_field",
+  "360": "wind_farm",
+  "361": "windmill",
+  "362": "yard",
+  "363": "youth_hostel",
+  "364": "zen_garden"
 }
@@ -162,9 +162,13 @@ class SceneClassifier:
            model_path: Core ML 模型路徑 (可選)
        """
        self.model_path = model_path
+        self.places365_model_path = (
+            "/Users/accusys/momentry/models/resnet18_places365.pth.tar"
+        )
        self.model = None
        self.coreml_model = None
        self.transform = None
+        self.model_type = "unknown"

        # 圖像預處理
        self.transform = transforms.Compose(
@@ -189,23 +193,57 @@ class SceneClassifier:
            try:
                print(f"[SCENE] Loading Core ML model: {self.model_path}")
                self.coreml_model = ct.models.MLModel(self.model_path)
+                self.model_type = "coreml"
                print("[SCENE] Core ML model loaded successfully")
                return True
            except Exception as e:
                print(f"[SCENE] Warning: Failed to load Core ML model: {e}")

-        # 備案：使用 PyTorch + ResNet
+        # 備案：使用 PyTorch + Places365
        if HAS_TORCH:
            try:
                print(f"[SCENE] Loading PyTorch model on {DEVICE}")
-                # 使用預訓練的 ResNet18
-                self.model = models.resnet18(pretrained=True)
+
+                # 檢查 Places365 模型是否存在
+                if Path(self.places365_model_path).exists():
+                    print(
+                        f"[SCENE] Loading Places365 model: {self.places365_model_path}"
+                    )
+                    checkpoint = torch.load(
+                        self.places365_model_path, map_location=DEVICE
+                    )
+
+                    # 建立 ResNet18 模型 (Places365 有 365 個類別)
+                    self.model = models.resnet18(num_classes=365)
+
+                    # 移除 'module.' prefix (DataParallel training)
+                    state_dict = checkpoint["state_dict"]
+                    new_state_dict = {}
+                    for k, v in state_dict.items():
+                        if k.startswith("module."):
+                            new_state_dict[k[7:]] = v
+                        else:
+                            new_state_dict[k] = v
+
+                    self.model.load_state_dict(new_state_dict)
+                    self.model_type = "places365"
+                    print("[SCENE] Places365 model loaded successfully (365 classes)")
+                else:
+                    print(
+                        f"[SCENE] Places365 model not found, using ImageNet pretrained"
+                    )
+                    self.model = models.resnet18(pretrained=True)
+                    self.model_type = "imagenet"
+
                self.model.to(DEVICE)
                self.model.eval()
                print("[SCENE] PyTorch model loaded successfully")
                return True
            except Exception as e:
                print(f"[SCENE] Warning: Failed to load PyTorch model: {e}")
+                import traceback
+
+                traceback.print_exc()

        print("[SCENE] Error: No model available")
        return False
@@ -1,12 +1,8 @@
 #!/opt/homebrew/bin/python3.11
 """
 Story Processor - Generate parent-child chunk hierarchy for RAG
-Uses video analysis (ASR, YOLO, OCR) to create parent chunks that summarize child chunks.
-
-Parent-Child Chunk Strategy:
- Parent chunks: Summarize multiple scenes/segments with narrative description
- Child chunks: Individual ASR segments, OCR texts, detected objects
- When embedding: Parent description + Child content for better retrieval
+Uses LOCAL video analysis (ASR, YOLO, OCR, Scene) to create parent chunks.
+NO cloud API calls - fully offline processing
 """

 import sys
@@ -47,57 +43,59 @@ def generate_parent_child_chunks(
    cut_data: Dict,
    yolo_data: Dict,
    ocr_data: Dict,
+    scene_data: Dict,
    parent_chunk_size: int = 5,
-) -> Dict[str, Any]:
+) -> Dict:
    """
-    Generate parent-child chunk hierarchy.
-
-    Parent chunks summarize multiple child chunks for better RAG retrieval.
-    Child chunks are individual segments from ASR, scenes from CUT, etc.
+    Generate parent-child chunk hierarchy using LOCAL data only.
+    No LLM/API calls - uses template-based narrative generation.
    """
-
    child_chunks = []
    parent_chunks = []

-    # Get source data
-    asr_segments = asr_data.get("segments", [])
-    cut_scenes = cut_data.get("scenes", [])
-    yolo_frames = yolo_data.get("frames", [])
-    _ocr_frames = ocr_data.get("frames", [])
-
-    # Create child chunks from ASR segments
-    asr_child_ids = []
-    for i, seg in enumerate(asr_segments):
-        child_chunk = {
-            "chunk_id": f"asr_{i:04d}",
-            "chunk_type": "sentence",
-            "source": "asr",
-            "start_time": seg.get("start", 0),
-            "end_time": seg.get("end", 0),
-            "text_content": seg.get("text", ""),
-            "content": seg,
-            "child_chunk_ids": [],
-            "parent_chunk_id": None,
-        }
-        child_chunks.append(child_chunk)
-        asr_child_ids.append(child_chunk["chunk_id"])
+    # Create child chunks from ASR
+    for seg in asr_data.get("segments", []):
+        child_chunks.append(
+            {
+                "chunk_id": f"asr_{seg.get('start', 0):.1f}_{seg.get('end', 0):.1f}",
+                "chunk_type": "asr",
+                "source": "asr",
+                "start_time": seg.get("start", 0),
+                "end_time": seg.get("end", 0),
+                "text_content": seg.get("text", ""),
+                "content": {
+                    "text": seg.get("text", ""),
+                    "confidence": seg.get("confidence", 0),
+                },
+                "child_chunk_ids": [],
+                "parent_chunk_id": None,
+            }
+        )

    # Create child chunks from CUT scenes
-    cut_child_ids = []
-    for i, scene in enumerate(cut_scenes):
-        child_chunk = {
-            "chunk_id": f"cut_{i:04d}",
-            "chunk_type": "cut",
-            "source": "cut",
-            "start_time": scene.get("start_time", scene.get("start", 0)),
-            "end_time": scene.get("end_time", scene.get("end", 0)),
-            "text_content": None,
-            "content": scene,
-            "child_chunk_ids": [],
-            "parent_chunk_id": None,
-        }
-        child_chunks.append(child_chunk)
-        cut_child_ids.append(child_chunk["chunk_id"])
+    for scene in cut_data.get("scenes", []):
+        child_chunks.append(
+            {
+                "chunk_id": f"cut_{scene.get('scene_number', 0)}",
+                "chunk_type": "cut",
+                "source": "cut",
+                "start_time": scene.get("start_time", 0),
+                "end_time": scene.get("end_time", 0),
+                "text_content": f"Scene {scene.get('scene_number', 0)}",
+                "content": {
+                    "scene_number": scene.get("scene_number", 0),
+                    "duration": scene.get("duration", 0),
+                },
+                "child_chunk_ids": [],
+                "parent_chunk_id": None,
+            }
+        )
+
+    asr_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "asr"]
+    cut_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "cut"]
+
+    yolo_frames = yolo_data.get("frames", [])
+    ocr_frames = ocr_data.get("frames", [])

    # Group ASR segments into parent chunks
    for i in range(0, len(asr_child_ids), parent_chunk_size):
@@ -105,7 +103,6 @@ def generate_parent_child_chunks(
        if not batch:
            continue

-        # Collect text from child chunks
        batch_texts = []
        batch_objects = []
        batch_times = []
@@ -118,11 +115,16 @@ def generate_parent_child_chunks(
                    batch_times.append((child["start_time"], child["end_time"]))
                    break

-        # Create parent chunk with narrative description
        start_time = batch_times[0][0] if batch_times else 0
        end_time = batch_times[-1][1] if batch_times else 0

-        # Generate narrative description
+        # Find objects in this time range
+        for frame in yolo_frames[:50]:
+            ts = frame.get("timestamp", 0)
+            if start_time <= ts <= end_time:
+                for obj in frame.get("objects", []):
+                    batch_objects.append(obj.get("class_name", "unknown"))
+
        narrative = generate_narrative(batch_texts, batch_objects, start_time, end_time)

        parent_chunk = {
@@ -136,13 +138,13 @@ def generate_parent_child_chunks(
                "description": narrative,
                "child_count": len(batch),
                "speech_preview": " ".join(batch_texts[:3]) if batch_texts else None,
+                "detected_objects": list(set(batch_objects))[:5],
            },
            "child_chunk_ids": batch,
            "parent_chunk_id": None,
        }
        parent_chunks.append(parent_chunk)

-        # Update child chunks with parent reference
        for child_id in batch:
            for child in child_chunks:
                if child["chunk_id"] == child_id:
@@ -167,14 +169,12 @@ def generate_parent_child_chunks(
        start_time = batch_times[0][0] if batch_times else 0
        end_time = batch_times[-1][1] if batch_times else 0

-        # Find objects in this time range from YOLO
-        for frame in yolo_frames[:100]:  # Sample frames
+        for frame in yolo_frames[:50]:
            ts = frame.get("timestamp", 0)
            if start_time <= ts <= end_time:
                for obj in frame.get("objects", []):
                    batch_objects.append(obj.get("class_name", "unknown"))

-        # Generate scene narrative
        narrative = generate_scene_narrative(
            batch_objects, start_time, end_time, len(batch)
        )
@@ -190,14 +190,13 @@ def generate_parent_child_chunks(
                "description": narrative,
                "child_count": len(batch),
                "scenes": batch,
-                "detected_objects": list(set(batch_objects))[:10],
+                "detected_objects": list(set(batch_objects))[:5],
            },
            "child_chunk_ids": batch,
            "parent_chunk_id": None,
        }
        parent_chunks.append(parent_chunk)

-        # Update child chunks with parent reference
        for child_id in batch:
            for child in child_chunks:
                if child["chunk_id"] == child_id:
@@ -219,27 +218,33 @@ def generate_parent_child_chunks(
 def generate_narrative(
    texts: List[str], objects: List[str], start: float, end: float
 ) -> str:
-    """Generate narrative description from text snippets"""
-    if not texts:
+    """Generate narrative description from LOCAL text snippets and objects"""
+    if not texts and not objects:
        return f"Video segment from {start:.1f}s to {end:.1f}s"

-    # Combine and summarize
-    combined = " ".join(texts)
-    if len(combined) > 200:
-        combined = combined[:200] + "..."
+    parts = []
+    if texts:
+        combined = " ".join(texts[:5])
+        if len(combined) > 150:
+            combined = combined[:150] + "..."
+        parts.append(f"Speech: {combined}")

-    return f"[{start:.0f}s-{end:.0f}s] {combined}"
+    if objects:
+        unique_objs = list(set(objects))[:5]
+        parts.append(f"Visuals: {', '.join(unique_objs)}")
+
+    return f"[{start:.0f}s-{end:.0f}s] {' | '.join(parts)}"


 def generate_scene_narrative(
    objects: List[str], start: float, end: float, scene_count: int
 ) -> str:
-    """Generate scene narrative from detected objects"""
+    """Generate scene narrative from LOCAL detected objects"""
    unique_objects = list(set(objects))[:5]

    if unique_objects:
        obj_str = ", ".join(unique_objects)
-        return f"[{start:.0f}s-{end:.0f}s] Scenes {scene_count} segments. Visual: {obj_str}."
+        return f"[{start:.0f}s-{end:.0f}s] {scene_count} scenes. Visuals: {obj_str}."
    else:
        return f"[{start:.0f}s-{end:.0f}s] {scene_count} video scenes."

@@ -251,70 +256,45 @@ def run_story(
    if publisher:
        publisher.info("story", "STORY_START")

-    # Load existing JSON files
    base_path = os.path.dirname(output_path)
    uuid_name = os.path.basename(output_path).split(".")[0]

-    # Load analysis data
    asr_data = {"segments": []}
    cut_data = {"scenes": []}
    yolo_data = {"frames": []}
    ocr_data = {"frames": []}
+    scene_data = {"scenes": []}

-    # Load ASR
-    asr_path = os.path.join(base_path, f"{uuid_name}.asr.json")
-    if os.path.exists(asr_path):
-        with open(asr_path) as f:
-            asr_data = json.load(f)
-        if publisher:
-            publisher.info(
-                "story", f"Loaded ASR: {len(asr_data.get('segments', []))} segments"
-            )
+    for name, data_var in [
+        ("asr", asr_data),
+        ("cut", cut_data),
+        ("yolo", yolo_data),
+        ("ocr", ocr_data),
+        ("scene", scene_data),
+    ]:
+        path = os.path.join(base_path, f"{uuid_name}.{name}.json")
+        if os.path.exists(path):
+            with open(path) as f:
+                data_var.update(json.load(f))

-    # Load CUT
-    cut_path = os.path.join(base_path, f"{uuid_name}.cut.json")
-    if os.path.exists(cut_path):
-        with open(cut_path) as f:
-            cut_data = json.load(f)
-        if publisher:
-            publisher.info(
-                "story", f"Loaded CUT: {len(cut_data.get('scenes', []))} scenes"
-            )
-
-    # Load YOLO
-    yolo_path = os.path.join(base_path, f"{uuid_name}.yolo.json")
-    if os.path.exists(yolo_path):
-        with open(yolo_path) as f:
-            yolo_data = json.load(f)
-
-    # Load OCR
-    ocr_path = os.path.join(base_path, f"{uuid_name}.ocr.json")
-    if os.path.exists(ocr_path):
-        with open(ocr_path) as f:
-            ocr_data = json.load(f)
-
-    # Load metadata
-    metadata = extract_video_metadata(video_path)
-
-    if publisher:
-        publisher.info("story", "Generating parent-child chunks...")
-
-    # Generate parent-child hierarchy
    result = generate_parent_child_chunks(
-        asr_data, cut_data, yolo_data, ocr_data, parent_chunk_size
+        asr_data, cut_data, yolo_data, ocr_data, scene_data, parent_chunk_size
    )

-    result["metadata"] = metadata
-    result["parent_chunk_size"] = parent_chunk_size
+    result["video_metadata"] = extract_video_metadata(video_path)
+    result["processing"] = {
+        "method": "local_aggregation",
+        "cloud_api_used": False,
+        "parent_chunk_size": parent_chunk_size,
+    }

    with open(output_path, "w") as f:
        json.dump(result, f, indent=2, ensure_ascii=False)

    if publisher:
-        stats = result["stats"]
        publisher.complete(
            "story",
-            f"{stats['total_parent_chunks']} parents, {stats['total_child_chunks']} children",
+            f"{result['stats']['total_parent_chunks']} parent, {result['stats']['total_child_chunks']} child chunks (LOCAL)",
        )

    return result
@@ -322,7 +302,7 @@ def run_story(

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
-        description="Video Story Generator - Parent-Child Chunks"
+        description="Story Processor - Parent-Child Chunk Hierarchy (LOCAL ONLY)"
    )
    parser.add_argument("video_path", help="Path to video file")
    parser.add_argument("output_path", help="Output JSON path")
@@ -331,7 +311,7 @@ if __name__ == "__main__":
        "--parent-chunk-size",
        type=int,
        default=5,
-        help="Number of child chunks per parent chunk",
+        help="Number of child chunks per parent",
    )

    args = parser.parse_args()
@@ -340,6 +320,6 @@ if __name__ == "__main__":
        args.video_path, args.output_path, args.uuid, args.parent_chunk_size
    )
    print(
-        f"Story generated: {result['stats']['total_parent_chunks']} parent chunks, "
-        f"{result['stats']['total_child_chunks']} child chunks"
+        f"Story generated: {result['stats']['total_parent_chunks']} parent, "
+        f"{result['stats']['total_child_chunks']} child chunks (LOCAL)"
    )
@@ -30,14 +30,20 @@ pub async fn api_key_validation(
    tracing::info!("[MIDDLEWARE] Path: {:?}", request.uri().path());

    let headers = request.headers();
-    tracing::info!(
-        "[MIDDLEWARE] Headers: {:?}",
-        headers.keys().collect::<Vec<_>>()
-    );
+    tracing::info!("[MIDDLEWARE] All headers: {:?}", headers);

    let api_key = match extract_api_key(headers) {
        Ok(key) => {
            tracing::info!("[MIDDLEWARE] API key extracted, length: {}", key.len());
+            if key.len() > 8 {
+                tracing::info!(
+                    "[MIDDLEWARE] Key value: {}...{}",
+                    &key[..4],
+                    &key[key.len() - 4..]
+                );
+            } else {
+                tracing::info!("[MIDDLEWARE] Key value: ****");
+            }
            key
        }
        Err(status) => {
@@ -59,7 +65,10 @@ pub async fn api_key_validation(
            r
        }
        Ok(None) => {
-            tracing::warn!("[MIDDLEWARE] API key not found in database");
+            tracing::warn!(
+                "[MIDDLEWARE] API key NOT FOUND in database for hash: {}",
+                &key_hash[..16]
+            );
            return Response::builder()
                .status(StatusCode::UNAUTHORIZED)
                .body(axum::body::Body::empty())
@@ -1,4 +1,13 @@
+pub mod face_recognition;
+pub mod identities;
+pub mod identity_binding;
 pub mod middleware;
+pub mod n8n_search;
+pub mod person_identity;
+pub mod search;
 pub mod server;
+pub mod universal_search;
+pub mod visual_chunk_search;
+pub mod who;

 pub use server::start_server;
@@ -10,6 +10,8 @@ pub const KEY_PREFIX_VIDEO: &str = "video:";
 pub const KEY_PREFIX_SEARCH: &str = "search:";
 pub const KEY_PREFIX_SEARCH_HYBRID: &str = "search:hybrid:";
 pub const KEY_PREFIX_SEARCH_N8N: &str = "search:n8n:";
+pub const KEY_PREFIX_SEARCH_BM25: &str = "search:bm25:";
+pub const KEY_PREFIX_SEARCH_N8N_BM25: &str = "search:n8n:bm25:";
 pub const KEY_HEALTH: &str = "health:basic";

 pub fn videos_list(page: usize, limit: usize) -> String {
@@ -32,6 +34,14 @@ pub fn n8n_search(query_hash: &str) -> String {
    format!("{}{}", KEY_PREFIX_SEARCH_N8N, query_hash)
 }

+pub fn bm25_search(query_hash: &str) -> String {
+    format!("{}{}", KEY_PREFIX_SEARCH_BM25, query_hash)
+}
+
+pub fn n8n_bm25_search(query_hash: &str) -> String {
+    format!("{}{}", KEY_PREFIX_SEARCH_N8N_BM25, query_hash)
+}
+
 pub fn health() -> String {
    KEY_HEALTH.to_string()
 }
@@ -48,6 +58,17 @@ pub fn search_prefix() -> String {
    format!("^{}", KEY_PREFIX_SEARCH)
 }

+pub const KEY_PREFIX_VISUAL_SEARCH: &str = "search:visual:";
+pub const CATEGORY_VISUAL_SEARCH: &str = "visual_search";
+
+pub fn visual_search(uuid: &str, criteria_hash: &str) -> String {
+    format!("{}{}:{}", KEY_PREFIX_VISUAL_SEARCH, uuid, criteria_hash)
+}
+
+pub fn visual_search_prefix() -> String {
+    format!("^{}", KEY_PREFIX_VISUAL_SEARCH)
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -78,8 +99,28 @@ mod tests {
        assert_eq!(n8n_search("hash123"), "search:n8n:hash123");
    }

+    #[test]
+    fn test_bm25_search() {
+        assert_eq!(bm25_search("hash123"), "search:bm25:hash123");
+    }
+
+    #[test]
+    fn test_n8n_bm25_search() {
+        assert_eq!(n8n_bm25_search("hash123"), "search:n8n:bm25:hash123");
+    }
+
    #[test]
    fn test_health() {
        assert_eq!(health(), "health:basic");
    }
+
+    #[test]
+    fn test_visual_search() {
+        assert_eq!(visual_search("abc123", "hash"), "search:visual:abc123:hash");
+    }
+
+    #[test]
+    fn test_visual_search_prefix() {
+        assert_eq!(visual_search_prefix(), "^search:visual:");
+    }
 }
@@ -136,6 +136,10 @@ impl MongoCache {
        self.settings.ttl_video_meta
    }

+    pub fn ttl_visual_search(&self) -> u64 {
+        self.settings.ttl_search // Reuse search TTL
+    }
+
    pub async fn get<T: DeserializeOwned>(&self, key: &str) -> Result<Option<T>> {
        if !self.is_enabled() {
            return Ok(None);
@@ -1,5 +1,9 @@
+pub mod rule1_ingest;
+pub mod rule3_ingest;
 pub mod splitter;
 pub mod types;

+pub use rule1_ingest::ingest_rule1;
+pub use rule3_ingest::ingest_rule3;
 pub use splitter::{AsrSegment, ChunkSplitter};
 pub use types::{Chunk, ChunkType};
@@ -1,6 +1,7 @@
 use crate::core::time::FrameTime;
 use serde::{Deserialize, Serialize};

+// ==================== ChunkType ====================
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum ChunkType {
@@ -8,7 +9,8 @@ pub enum ChunkType {
    Sentence,
    Cut,
    Trace,
-    Story, // Parent chunk from story analysis
+    Story,
+    Visual, // 視覺分片 (Phase 2.1)
 }

 impl ChunkType {
@@ -19,10 +21,12 @@ impl ChunkType {
            ChunkType::Cut => "cut",
            ChunkType::Trace => "trace",
            ChunkType::Story => "story",
+            ChunkType::Visual => "visual",
        }
    }
 }

+// ==================== ChunkRule ====================
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum ChunkRule {
@@ -39,6 +43,73 @@ impl ChunkRule {
    }
 }

+// ==================== 視覺分片相關結構 (Phase 2.1) ====================
+/// 邊界框
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BoundingBox {
+    pub x: i32,
+    pub y: i32,
+    pub width: i32,
+    pub height: i32,
+}
+
+/// 檢測到的物件
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DetectedObject {
+    /// 物件類別名稱
+    pub class_name: String,
+    /// 物件類別 ID
+    pub class_id: u32,
+    /// 信心值 (0.0-1.0)
+    pub confidence: f32,
+    /// 邊界框
+    pub bbox: Option<BoundingBox>,
+    /// 出現次數 (在分片內)
+    pub occurrence: u32,
+}
+
+/// 關鍵幀的物件列表
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct KeyframeObjects {
+    /// 關鍵幀時間 (秒) - 僅供參考，主要使用 frame_number
+    pub timestamp: f64,
+    /// 關鍵幀幀號 - 主要時間標示
+    pub frame_number: u64,
+    /// 檢測到的物件
+    pub objects: Vec<DetectedObject>,
+}
+
+/// 視覺元數據
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VisualMetadata {
+    /// 總物件數量
+    pub object_count: u32,
+    /// 唯一物件類別列表
+    pub unique_classes: Vec<String>,
+    /// 最高信心值
+    pub max_confidence: f32,
+    /// 平均信心值
+    pub avg_confidence: f32,
+    /// 空間密度（每幀平均物件數）
+    pub spatial_density: f32,
+}
+
+/// 視覺分片內容
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VisualChunkContent {
+    /// 關鍵幀物件列表，每個關鍵幀包含 frame_number
+    pub keyframe_objects: Vec<KeyframeObjects>,
+    /// 主要物件標籤（出現在大多數幀中的物件）
+    pub dominant_objects: Vec<String>,
+    /// 物件關係 (object1, relationship, object2) - 可選
+    pub object_relationships: Vec<(String, String, String)>,
+    /// 場景描述 - 可選
+    pub scene_description: Option<String>,
+    /// 視覺元數據
+    pub metadata: VisualMetadata,
+}
+
+// ==================== Chunk 主結構 ====================
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Chunk {
    pub file_id: i32,
@@ -49,9 +120,9 @@ pub struct Chunk {
    pub rule: ChunkRule,
    /// Frames per second (can be fractional, e.g., 29.97, 23.976)
    pub fps: f64,
-    /// Start frame (0-based)
+    /// Start frame (0-based) - 主要時間標示
    pub start_frame: i64,
-    /// End frame (exclusive)
+    /// End frame (exclusive) - 主要時間標示
    pub end_frame: i64,
    pub text_content: Option<String>,
    pub content: serde_json::Value,
@@ -61,17 +132,11 @@ pub struct Chunk {
    pub pre_chunk_ids: Vec<i32>,
    pub parent_chunk_id: Option<String>, // For parent-child chunk hierarchy
    pub child_chunk_ids: Vec<String>,    // Child chunk IDs (for parent chunks)
+    pub visual_stats: Option<serde_json::Value>,
 }

 impl Chunk {
-    /// Creates a new chunk from frame counts.
-    ///
-    /// # Arguments
-    ///
-    /// * `start_frame` - Start frame (0-based)
-    /// * `end_frame` - End frame (exclusive)
-    /// * `fps` - Frames per second (can be fractional)
-    #[allow(clippy::too_many_arguments)]
+    /// 創建新分片
    pub fn new(
        file_id: i32,
        uuid: String,
@@ -83,11 +148,13 @@ impl Chunk {
        fps: f64,
        content: serde_json::Value,
    ) -> Self {
-        let chunk_id = format!("{}_{:04}", chunk_type.as_str(), chunk_index);
+        let frame_count = (end_frame - start_frame) as i32;
+        let chunk_id = format!("{}_{}", uuid, chunk_index);
+
        Self {
            file_id,
            uuid,
-            chunk_id: chunk_id.clone(),
+            chunk_id,
            chunk_index,
            chunk_type,
            rule,
@@ -98,17 +165,171 @@ impl Chunk {
            content,
            metadata: None,
            vector_id: None,
-            frame_count: 0,
+            frame_count,
            pre_chunk_ids: vec![],
            parent_chunk_id: None,
            child_chunk_ids: vec![],
+            visual_stats: None,
        }
    }

-    /// Creates a new chunk from seconds (legacy conversion).
+    /// 創建視覺分片 (Phase 2.1)
+    pub fn new_visual(
+        file_id: i32,
+        uuid: String,
+        chunk_index: u32,
+        start_frame: i64,
+        end_frame: i64,
+        fps: f64,
+        visual_content: VisualChunkContent,
+    ) -> Self {
+        let content = serde_json::to_value(&visual_content)
+            .unwrap_or_else(|_| serde_json::json!({"error": "Failed to serialize visual content"}));
+
+        Self::new(
+            file_id,
+            uuid,
+            chunk_index,
+            ChunkType::Visual,
+            ChunkRule::Rule2,
+            start_frame,
+            end_frame,
+            fps,
+            content,
+        )
+    }
+
+    /// 從 YOLO 幀創建視覺分片 (Phase 2.1)
+    pub fn from_yolo_frames(
+        file_id: i32,
+        uuid: String,
+        chunk_index: u32,
+        start_frame: i64,
+        end_frame: i64,
+        fps: f64,
+        yolo_frames: Vec<crate::core::processor::yolo::YoloFrame>,
+    ) -> Self {
+        // 將 YOLO 幀轉換為關鍵幀物件
+        let keyframe_objects: Vec<KeyframeObjects> = yolo_frames
+            .iter()
+            .map(|frame| {
+                let objects: Vec<DetectedObject> = frame
+                    .objects
+                    .iter()
+                    .map(|obj| DetectedObject {
+                        class_name: obj.class_name.clone(),
+                        class_id: obj.class_id,
+                        confidence: obj.confidence,
+                        bbox: Some(BoundingBox {
+                            x: obj.x,
+                            y: obj.y,
+                            width: obj.width,
+                            height: obj.height,
+                        }),
+                        occurrence: 1,
+                    })
+                    .collect();
+
+                KeyframeObjects {
+                    timestamp: frame.timestamp,
+                    frame_number: frame.frame,
+                    objects,
+                }
+            })
+            .collect();
+
+        // 計算物件統計
+        let total_objects: u32 = yolo_frames.iter().map(|f| f.objects.len() as u32).sum();
+
+        // 收集所有物件類別
+        let all_classes: Vec<String> = yolo_frames
+            .iter()
+            .flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
+            .collect();
+
+        // 獲取唯一類別
+        let unique_classes: Vec<String> = all_classes
+            .iter()
+            .cloned()
+            .collect::<std::collections::HashSet<_>>()
+            .into_iter()
+            .collect();
+
+        // 計算信心值統計
+        let confidences: Vec<f32> = yolo_frames
+            .iter()
+            .flat_map(|f| f.objects.iter().map(|o| o.confidence))
+            .collect();
+
+        let max_confidence = confidences.iter().copied().fold(0.0f32, f32::max);
+        let avg_confidence = if !confidences.is_empty() {
+            confidences.iter().sum::<f32>() / confidences.len() as f32
+        } else {
+            0.0
+        };
+
+        // 計算主要物件（出現在大多數幀中的物件）
+        let mut object_counts = std::collections::HashMap::new();
+        for frame in &yolo_frames {
+            let frame_classes: std::collections::HashSet<_> =
+                frame.objects.iter().map(|o| o.class_name.clone()).collect();
+            for class in frame_classes {
+                *object_counts.entry(class).or_insert(0) += 1;
+            }
+        }
+
+        let mut dominant_objects: Vec<String> = object_counts
+            .into_iter()
+            .filter(|(_, count)| *count as f32 / yolo_frames.len() as f32 > 0.5)
+            .map(|(class, _)| class)
+            .collect();
+        dominant_objects.sort();
+
+        // 創建視覺內容
+        let visual_content = VisualChunkContent {
+            keyframe_objects,
+            dominant_objects,
+            object_relationships: vec![], // 可選：後期添加關係檢測
+            scene_description: None,      // 可選：後期添加 LLM 生成的場景描述
+            metadata: VisualMetadata {
+                object_count: total_objects,
+                unique_classes,
+                max_confidence,
+                avg_confidence,
+                spatial_density: if yolo_frames.len() > 0 {
+                    total_objects as f32 / yolo_frames.len() as f32
+                } else {
+                    0.0
+                },
+            },
+        };
+
+        Self::new_visual(
+            file_id,
+            uuid,
+            chunk_index,
+            start_frame,
+            end_frame,
+            fps,
+            visual_content,
+        )
+    }
+
+    /// 將分片轉換為幀時間
+    pub fn to_frame_time(&self) -> FrameTime {
+        // 使用第一個幀作為參考點
+        FrameTime::from_frames(self.start_frame, self.fps)
+    }
+
+    /// 檢查是否是父分片
+    pub fn is_parent(&self) -> bool {
+        self.parent_chunk_id.is_some()
+    }
+
+    /// 從秒數創建新分片（舊版轉換）
    ///
-    /// This is useful for migrating from older systems that store time as seconds.
-    /// The frame counts are calculated by rounding `seconds * fps`.
+    /// 這對於從存儲時間為秒的舊系統遷移很有用。
+    /// 幀數通過舍入 `seconds * fps` 計算。
    #[allow(clippy::too_many_arguments)]
    pub fn from_seconds(
        file_id: i32,
@@ -136,104 +357,197 @@ impl Chunk {
        )
    }

-    /// Returns the start time as a `FrameTime`.
+    /// 返回開始時間為 `FrameTime`
    pub fn start_time(&self) -> FrameTime {
        FrameTime::from_frames(self.start_frame, self.fps)
    }

-    /// Returns the end time as a `FrameTime`.
+    /// 返回結束時間為 `FrameTime`
    pub fn end_time(&self) -> FrameTime {
        FrameTime::from_frames(self.end_frame, self.fps)
    }

-    /// Returns the duration in frames.
+    /// 返回持續時間的幀數
    pub fn duration_frames(&self) -> i64 {
        self.end_frame - self.start_frame
    }

-    /// Returns the duration in seconds.
+    /// 返回持續時間的秒數
    pub fn duration_seconds(&self) -> f64 {
        self.duration_frames() as f64 / self.fps
    }

-    /// Formats the start time as "seconds.frame" (e.g., "123.04").
+    /// 將開始時間格式化為 "seconds.frame" (例如："123.04")
    pub fn format_start_sec_frame(&self) -> String {
        self.start_time().format_sec_frame()
    }

-    /// Formats the end time as "seconds.frame" (e.g., "456.15").
+    /// 將結束時間格式化為 "seconds.frame" (例如："456.15")
    pub fn format_end_sec_frame(&self) -> String {
        self.end_time().format_sec_frame()
    }

-    /// Formats the start time as "HH:MM:SS".
+    /// 將開始時間格式化為 "HH:MM:SS"
    pub fn format_start_hms(&self) -> String {
        self.start_time().format_hms()
    }

-    /// Formats the end time as "HH:MM:SS".
+    /// 將結束時間格式化為 "HH:MM:SS"
    pub fn format_end_hms(&self) -> String {
        self.end_time().format_hms()
    }

-    /// Formats the start time as "HH:MM:SS.FF".
+    /// 將開始時間格式化為 "HH:MM:SS.FF"
    pub fn format_start_hms_frame(&self) -> String {
        self.start_time().format_hms_frame()
    }

-    /// Formats the end time as "HH:MM:SS.FF".
+    /// 將結束時間格式化為 "HH:MM:SS.FF"
    pub fn format_end_hms_frame(&self) -> String {
        self.end_time().format_hms_frame()
    }

-    /// Returns a tuple of (start_seconds, end_seconds) for compatibility.
+    /// 返回 (start_seconds, end_seconds) 元組用於兼容性
    ///
-    /// This is provided for backward compatibility during migration.
-    /// Prefer using `start_time()` and `end_time()` methods.
+    /// 這在遷移期間提供向後兼容性。
+    /// 建議使用 `start_time()` 和 `end_time()` 方法。
    pub fn time_range_seconds(&self) -> (f64, f64) {
        (self.start_time().seconds(), self.end_time().seconds())
    }

+    /// 添加元數據
    pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
        self.metadata = Some(metadata);
        self
    }

+    /// 添加向量 ID
    pub fn with_vector_id(mut self, vector_id: String) -> Self {
        self.vector_id = Some(vector_id);
        self
    }

+    /// 添加文本內容
    pub fn with_text_content(mut self, text: String) -> Self {
        self.text_content = Some(text);
        self
    }

+    /// 設置幀數
    pub fn with_frame_count(mut self, count: i32) -> Self {
        self.frame_count = count;
        self
    }

+    /// 設置前一個分片 ID
    pub fn with_pre_chunk_ids(mut self, ids: Vec<i32>) -> Self {
        self.pre_chunk_ids = ids;
        self
    }

+    /// 設置父分片 ID
    pub fn with_parent_chunk_id(mut self, parent_id: String) -> Self {
        self.parent_chunk_id = Some(parent_id);
        self
    }

+    /// 設置子分片 ID
    pub fn with_child_chunk_ids(mut self, child_ids: Vec<String>) -> Self {
        self.child_chunk_ids = child_ids;
        self
    }
+}

-    pub fn is_parent_chunk(&self) -> bool {
-        !self.child_chunk_ids.is_empty()
+// ==================== VisualChunkContent 輔助方法 ====================
+impl VisualChunkContent {
+    /// 計算兩個 YOLO 幀之間的相似度（基於物件組成）
+    pub fn frame_similarity(
+        frame1: &crate::core::processor::yolo::YoloFrame,
+        frame2: &crate::core::processor::yolo::YoloFrame,
+    ) -> f32 {
+        if frame1.objects.is_empty() && frame2.objects.is_empty() {
+            return 1.0; // 兩個空幀完全相似
+        }
+
+        if frame1.objects.is_empty() || frame2.objects.is_empty() {
+            return 0.0; // 一個空一個非空，不相似
+        }
+
+        // 創建物件類別名稱集合
+        let set1: std::collections::HashSet<String> = frame1
+            .objects
+            .iter()
+            .map(|o| o.class_name.clone())
+            .collect();
+        let set2: std::collections::HashSet<String> = frame2
+            .objects
+            .iter()
+            .map(|o| o.class_name.clone())
+            .collect();
+
+        // 計算 Jaccard 相似度
+        let intersection: Vec<_> = set1.intersection(&set2).collect();
+        let union: Vec<_> = set1.union(&set2).collect();
+
+        if union.is_empty() {
+            0.0
+        } else {
+            intersection.len() as f32 / union.len() as f32
+        }
    }

-    pub fn is_child_chunk(&self) -> bool {
-        self.parent_chunk_id.is_some()
+    /// 獲取視覺分片的摘要（使用關鍵幀的 frame_number）
+    pub fn summary(&self, fps: f64) -> String {
+        if self.keyframe_objects.is_empty() {
+            return "Empty visual chunk".to_string();
+        }
+
+        let first_frame = self.keyframe_objects.first().unwrap().frame_number;
+        let last_frame = self.keyframe_objects.last().unwrap().frame_number;
+
+        // 計算時間（僅供參考）
+        let start_time = if fps > 0.0 {
+            first_frame as f64 / fps
+        } else {
+            0.0
+        };
+        let end_time = if fps > 0.0 {
+            last_frame as f64 / fps
+        } else {
+            0.0
+        };
+        let duration = end_time - start_time;
+        let frame_count = self.keyframe_objects.len();
+
+        format!(
+            "Visual chunk: frames {} to {} (duration: {:.1}s, {} frames). Objects: {} total, {} unique. Dominant: {}",
+            first_frame,
+            last_frame,
+            duration,
+            frame_count,
+            self.metadata.object_count,
+            self.metadata.unique_classes.len(),
+            if self.dominant_objects.is_empty() {
+                "none".to_string()
+            } else {
+                self.dominant_objects.join(", ")
+            }
+        )
+    }
+
+    /// 檢查是否包含特定物件類別
+    pub fn contains_object(&self, class_name: &str) -> bool {
+        self.keyframe_objects
+            .iter()
+            .any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
+    }
+
+    /// 獲取信心值高於閾值的所有物件
+    pub fn high_confidence_objects(&self, threshold: f32) -> Vec<&DetectedObject> {
+        self.keyframe_objects
+            .iter()
+            .flat_map(|ko| ko.objects.iter())
+            .filter(|obj| obj.confidence >= threshold)
+            .collect()
    }
 }
@@ -164,3 +164,29 @@ pub mod cache {
            .unwrap_or(3600)
    });
 }
+
+pub mod llm {
+    use super::*;
+
+    pub static SUMMARY_URL: Lazy<String> = Lazy::new(|| {
+        env::var("MOMENTRY_LLM_SUMMARY_URL")
+            .unwrap_or_else(|_| "http://127.0.0.1:8081/v1/chat/completions".to_string())
+    });
+
+    pub static SUMMARY_MODEL: Lazy<String> = Lazy::new(|| {
+        env::var("MOMENTRY_LLM_SUMMARY_MODEL").unwrap_or_else(|_| "gemma4".to_string())
+    });
+
+    pub static SUMMARY_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
+        env::var("MOMENTRY_LLM_SUMMARY_TIMEOUT")
+            .unwrap_or_else(|_| "120".to_string())
+            .parse()
+            .unwrap_or(120)
+    });
+
+    pub static SUMMARY_ENABLED: Lazy<bool> = Lazy::new(|| {
+        env::var("MOMENTRY_LLM_SUMMARY_ENABLED")
+            .map(|v| v == "true" || v == "1")
+            .unwrap_or(true)
+    });
+}
@@ -6,6 +6,7 @@ use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};

 pub struct MongoDb {
    base_url: String,
+    database: String,
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -53,7 +54,8 @@ impl MongoDb {
    pub fn new() -> Self {
        let base_url =
            std::env::var("MONGODB_URL").unwrap_or_else(|_| "http://localhost:27017".to_string());
-        Self { base_url }
+        let database = crate::core::config::MONGODB_DATABASE.clone();
+        Self { base_url, database }
    }
 }

@@ -68,7 +70,7 @@ impl MongoDb {
        let doc: ChunkDocument = chunk.clone().into();
        let client = reqwest::Client::new();

-        let url = format!("{}/momentry/chunks", self.base_url);
+        let url = format!("{}/{}/chunks", self.base_url, self.database);

        client
            .post(&url)
@@ -83,8 +85,8 @@ impl MongoDb {
    pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
        let client = reqwest::Client::new();
        let url = format!(
-            "{}/momentry/chunks?filter={{\"uuid\":\"{}\"}}",
-            self.base_url, uuid
+            "{}/{}/chunks?filter={{\"uuid\":\"{}\"}}",
+            self.base_url, self.database, uuid
        );

        let response = client
@@ -131,6 +133,7 @@ impl MongoDb {
                    pre_chunk_ids: vec![],
                    parent_chunk_id: doc.parent_chunk_id,
                    child_chunk_ids: doc.child_chunk_ids,
+                    visual_stats: None,
                }
            })
            .collect();
@@ -141,8 +144,8 @@ impl MongoDb {
    pub async fn search_text(&self, query: &str) -> Result<Vec<Chunk>> {
        let client = reqwest::Client::new();
        let url = format!(
-            "{}/momentry/chunks?filter={{\"$text\":{{\"$search\":\"{}\"}}}}",
-            self.base_url, query
+            "{}/{}/chunks?filter={{\"$text\":{{\"$search\":\"{}\"}}}}",
+            self.base_url, self.database, query
        );

        let response = client
@@ -189,6 +192,7 @@ impl MongoDb {
                    pre_chunk_ids: vec![],
                    parent_chunk_id: doc.parent_chunk_id,
                    child_chunk_ids: doc.child_chunk_ids,
+                    visual_stats: None,
                }
            })
            .collect();
@@ -198,7 +202,7 @@ impl MongoDb {

    pub async fn get_all_chunks(&self) -> Result<Vec<Chunk>> {
        let client = reqwest::Client::new();
-        let url = format!("{}/momentry/chunks", self.base_url);
+        let url = format!("{}/{}/chunks", self.base_url, self.database);

        let response = client
            .get(&url)
@@ -244,6 +248,7 @@ impl MongoDb {
                    pre_chunk_ids: vec![],
                    parent_chunk_id: doc.parent_chunk_id,
                    child_chunk_ids: doc.child_chunk_ids,
+                    visual_stats: None,
                }
            })
            .collect();
@@ -128,7 +128,7 @@ impl QdrantDb {
        use std::hash::{Hash, Hasher};
        let mut hasher = DefaultHasher::new();
        point_id_str.hash(&mut hasher);
-        let point_id = hasher.finish() as u64;
+        let point_id = hasher.finish();

        let body = serde_json::json!({
            "points": [{
@@ -171,7 +171,7 @@ impl QdrantDb {
            ));
        }

-        tracing::debug!("Qdrant response: {}", response_text);
+        tracing::debug!("Qdrant upsert response status: {}", status);
        tracing::info!("Successfully upserted vector for chunk: {}", chunk_id);
        Ok(())
    }
@@ -257,6 +257,101 @@ impl QdrantDb {
        Ok(search_results)
    }

+    pub async fn search_collections(
+        &self,
+        query_vector: &[f32],
+        collections: &[&str],
+        limit: usize,
+    ) -> Result<Vec<SearchResult>> {
+        let mut handles = Vec::new();
+        for &collection in collections {
+            let url = format!("{}/collections/{}/points/search", self.base_url, collection);
+            let client = self.client.clone();
+            let api_key = self.api_key.clone();
+            let query_vec = query_vector.to_vec();
+            let body = serde_json::json!({
+                "vector": query_vec,
+                "limit": limit * 2, // Fetch more from each to account for overlaps
+                "with_payload": true
+            });
+            handles.push(async move {
+                let response = client
+                    .post(&url)
+                    .header("api-key", &api_key)
+                    .header("Content-Type", "application/json")
+                    .json(&body)
+                    .send()
+                    .await;
+
+                match response {
+                    Ok(resp) if resp.status().is_success() => {
+                        let resp_text = resp
+                            .text()
+                            .await
+                            .unwrap_or_else(|_| "Failed to read response".to_string());
+
+                        #[derive(Deserialize)]
+                        struct QdrantSearchResult {
+                            result: Vec<QdrantPoint>,
+                        }
+                        #[derive(Deserialize)]
+                        struct QdrantPoint {
+                            #[allow(dead_code)]
+                            id: serde_json::Value,
+                            score: f64,
+                            payload: HashMap<String, serde_json::Value>,
+                        }
+                        if let Ok(result) = serde_json::from_str::<QdrantSearchResult>(&resp_text) {
+                            let results: Vec<SearchResult> = result
+                                .result
+                                .into_iter()
+                                .map(|r| {
+                                    let uuid = r
+                                        .payload
+                                        .get("uuid")
+                                        .and_then(|v| v.as_str())
+                                        .unwrap_or("unknown")
+                                        .to_string();
+                                    let chunk_id = r
+                                        .payload
+                                        .get("chunk_id")
+                                        .and_then(|v| v.as_str())
+                                        .unwrap_or("unknown")
+                                        .to_string();
+                                    SearchResult {
+                                        uuid,
+                                        chunk_id,
+                                        score: r.score as f32,
+                                    }
+                                })
+                                .collect();
+                            Ok::<Vec<SearchResult>, anyhow::Error>(results)
+                        } else {
+                            Ok::<Vec<SearchResult>, anyhow::Error>(Vec::new())
+                        }
+                    }
+                    _ => Ok::<Vec<SearchResult>, anyhow::Error>(Vec::new()),
+                }
+            });
+        }
+
+        let results = futures_util::future::join_all(handles).await;
+        let mut merged: Vec<SearchResult> = results
+            .into_iter()
+            .filter_map(Result::ok)
+            .flatten()
+            .collect();
+
+        // Sort by score descending
+        merged.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
+        // Deduplicate by chunk_id + uuid
+        merged.dedup_by_key(|r| (r.chunk_id.clone(), r.uuid.clone()));
+        // Truncate to limit
+        merged.truncate(limit);
+
+        Ok(merged)
+    }
+
    pub async fn search_in_uuid(
        &self,
        query_vector: &[f32],
@@ -4,9 +4,15 @@ pub mod chunk;
 pub mod config;
 pub mod db;
 pub mod embedding;
+pub mod ingestion;
+pub mod llm;
 pub mod overlay;
+pub mod person_identity;
 pub mod probe;
 pub mod processor;
 pub mod storage;
+pub mod text;
 pub mod thumbnail;
 pub mod time;
+pub mod tmdb;
+pub mod worker;
@@ -28,16 +28,23 @@ pub async fn process_asrx(
    uuid: Option<&str>,
 ) -> Result<AsrxResult> {
    let executor = PythonExecutor::new()?;
-    let script_path = executor.script_path("asrx_processor.py");
+    let script_path = executor.script_path("asrx_processor_custom.py");

-    tracing::info!("[ASRX] Starting speaker diarization: {}", video_path);
+    tracing::info!(
+        "[ASRX] Starting speaker diarization (custom): {}",
+        video_path
+    );

    if !script_path.exists() {
-        tracing::warn!("[ASRX] Script not found, returning empty result");
-        return Ok(AsrxResult {
-            language: None,
-            segments: vec![],
-        });
+        tracing::warn!("[ASRX] Custom script not found, falling back to original");
+        let fallback_path = executor.script_path("asrx_processor.py");
+        if !fallback_path.exists() {
+            tracing::warn!("[ASRX] No script found, returning empty result");
+            return Ok(AsrxResult {
+                language: None,
+                segments: vec![],
+            });
+        }
    }

    let mut cmd = Command::new(executor.python_path());
@@ -9,6 +9,7 @@ pub mod ocr;
 pub mod pose;
 pub mod scene_classification;
 pub mod story;
+pub mod visual_chunk;
 pub mod yolo;

 pub use asr::{process_asr, AsrResult, AsrSegment};
@@ -28,4 +29,5 @@ pub use scene_classification::{
    process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment,
 };
 pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
+pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
 pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};
@@ -4,6 +4,8 @@ pub mod api;

 pub mod ui;

+pub mod watcher;
+
 pub mod worker;

 pub use core::cache::{keys, MongoCache, RedisCache};
@@ -13,6 +15,10 @@ pub use core::db::{
    VideoStatus,
 };
 pub use core::embedding::Embedder;
+pub use core::person_identity::{
+    ChunkPersonInfo, PersonAppearance, PersonIdentity, PersonIdentityResponse, PersonMatch,
+    PersonStatistics, PersonTimelineEntry, PersonTimelineResponse,
+};
 pub use core::probe::ProbeResult;
 pub use core::storage::file_manager::FileManager;
 pub use core::storage::output_dir::OutputDir;
@@ -1805,6 +1805,64 @@ async fn main() -> Result<()> {
                }
            };

+            // Read Pose JSON (optional)
+            let pose_path = format!("{}.pose.json", uuid);
+            let pose_result = match std::fs::read_to_string(&pose_path) {
+                Ok(pose_json) => match serde_json::from_str::<
+                    momentry_core::core::processor::pose::PoseResult,
+                >(&pose_json)
+                {
+                    Ok(result) => {
+                        println!("Loaded Pose: {} frames", result.frames.len());
+                        result
+                    }
+                    Err(e) => {
+                        println!("Warning: Failed to parse Pose JSON: {}. Skipping Pose.", e);
+                        momentry_core::core::processor::pose::PoseResult {
+                            frame_count: 0,
+                            fps: 0.0,
+                            frames: vec![],
+                        }
+                    }
+                },
+                Err(_) => {
+                    println!("Warning: Pose file not found. Skipping Pose.");
+                    momentry_core::core::processor::pose::PoseResult {
+                        frame_count: 0,
+                        fps: 0.0,
+                        frames: vec![],
+                    }
+                }
+            };
+
+            // Read ASRX JSON (optional)
+            let asrx_path = format!("{}.asrx.json", uuid);
+            let asrx_result = match std::fs::read_to_string(&asrx_path) {
+                Ok(asrx_json) => match serde_json::from_str::<
+                    momentry_core::core::processor::asrx::AsrxResult,
+                >(&asrx_json)
+                {
+                    Ok(result) => {
+                        println!("Loaded ASRX: {} segments", result.segments.len());
+                        result
+                    }
+                    Err(e) => {
+                        println!("Warning: Failed to parse ASRX JSON: {}. Skipping ASRX.", e);
+                        momentry_core::core::processor::asrx::AsrxResult {
+                            language: None,
+                            segments: vec![],
+                        }
+                    }
+                },
+                Err(_) => {
+                    println!("Warning: ASRX file not found. Skipping ASRX.");
+                    momentry_core::core::processor::asrx::AsrxResult {
+                        language: None,
+                        segments: vec![],
+                    }
+                }
+            };
+
            // ========== Store pre_chunks (from ASR, CUT) ==========

            println!("\nStoring pre_chunks...");
@@ -1922,12 +1980,21 @@ async fn main() -> Result<()> {
                face_by_frame.insert(frame.frame, frame.clone());
            }

-            // Store frames (merge data from YOLO, OCR, Face)
+            let mut pose_by_frame: std::collections::HashMap<
+                u64,
+                momentry_core::core::processor::pose::PoseFrame,
+            > = std::collections::HashMap::new();
+            for frame in &pose_result.frames {
+                pose_by_frame.insert(frame.frame, frame.clone());
+            }
+
+            // Store frames (merge data from YOLO, OCR, Face, Pose)
            let mut all_frames: Vec<u64> = frame_data
                .keys()
                .cloned()
                .chain(ocr_by_frame.keys().cloned())
                .chain(face_by_frame.keys().cloned())
+                .chain(pose_by_frame.keys().cloned())
                .collect();
            all_frames.sort();
            all_frames.dedup();
@@ -1937,6 +2004,7 @@ async fn main() -> Result<()> {
                let yolo_frame = frame_data.get(frame_num);
                let ocr_frame = ocr_by_frame.get(frame_num);
                let face_frame = face_by_frame.get(frame_num);
+                let pose_frame = pose_by_frame.get(frame_num);

                let frame = momentry_core::core::db::postgres_db::Frame {
                    id: 0,
@@ -1947,6 +2015,7 @@ async fn main() -> Result<()> {
                    yolo_objects: yolo_frame.map(|f| serde_json::json!(&f.objects)),
                    ocr_results: ocr_frame.map(|f| serde_json::json!(&f.texts)),
                    face_results: face_frame.map(|f| serde_json::json!(&f.faces)),
+                    pose_results: pose_frame.map(|f| serde_json::json!(&f.persons)),
                    frame_path: None,
                    created_at: String::new(),
                };
@@ -1960,10 +2029,33 @@ async fn main() -> Result<()> {
            println!("\nCreating chunks...");

            // Rule 1: Direct conversion (sentence pre_chunk -> sentence chunk)
+            // Merge ASRX speaker_id by time overlap
            let mut sentence_chunks = Vec::new();
            for (i, seg) in asr_result.segments.iter().enumerate() {
                let pre_chunk_id = asr_pre_chunk_ids.get(i).copied().unwrap_or(0);
-                let chunk = Chunk::from_seconds(
+
+                // Find matching ASRX segment by time overlap
+                let speaker_id = asrx_result
+                    .segments
+                    .iter()
+                    .find(|ax| {
+                        // Overlap: ASRX segment overlaps with ASR segment
+                        ax.start <= seg.end && ax.end >= seg.start
+                    })
+                    .and_then(|ax| ax.speaker_id.clone());
+
+                let content = if let Some(ref sid) = speaker_id {
+                    serde_json::json!({
+                        "text": seg.text,
+                        "speaker_id": sid,
+                    })
+                } else {
+                    serde_json::json!({
+                        "text": seg.text,
+                    })
+                };
+
+                let mut chunk = Chunk::from_seconds(
                    file_id as i32,
                    uuid.clone(),
                    i as u32,
@@ -1972,15 +2064,40 @@ async fn main() -> Result<()> {
                    seg.start,
                    seg.end,
                    fps,
-                    serde_json::json!({
-                        "text": seg.text,
-                    }),
+                    content,
                )
                .with_text_content(seg.text.clone())
                .with_pre_chunk_ids(vec![pre_chunk_id as i32]);
+
+                // Add ASRX metadata if available
+                if speaker_id.is_some() {
+                    chunk = chunk.with_metadata(serde_json::json!({
+                        "language": asr_result.language,
+                        "language_probability": asr_result.language_probability,
+                        "speaker_matched": true,
+                    }));
+                }
+
                sentence_chunks.push(chunk);
            }

+            if !asrx_result.segments.is_empty() {
+                let matched = sentence_chunks
+                    .iter()
+                    .filter(|c| {
+                        c.content
+                            .get("speaker_id")
+                            .and_then(|v| v.as_str())
+                            .is_some()
+                    })
+                    .count();
+                println!(
+                    "  ASRX merge: {}/{} sentence chunks matched to speakers",
+                    matched,
+                    sentence_chunks.len()
+                );
+            }
+
            // Rule 1: CUT chunks
            let mut cut_chunks = Vec::new();
            for (i, scene) in cut_result.scenes.iter().enumerate() {
@@ -2235,7 +2352,7 @@ async fn main() -> Result<()> {
            // Get list of videos to process
            let videos_to_process = if uuid == "all" {
                // Get all videos
-                let videos = pg.list_videos().await?;
+                let videos = pg.list_videos(10000, 0).await?.0;
                videos.into_iter().map(|v| v.uuid).collect::<Vec<_>>()
            } else {
                // Process single video
@@ -2486,7 +2603,7 @@ async fn main() -> Result<()> {
                    .await?
                    .ok_or_else(|| anyhow::anyhow!("Video not found: {}", uuid))?]
            } else {
-                db.list_videos().await?
+                db.list_videos(10000, 0).await?.0
            };

            let output_dir = std::path::PathBuf::from("thumbnails");
@@ -2520,7 +2637,7 @@ async fn main() -> Result<()> {
                    .await?
                    .ok_or_else(|| anyhow::anyhow!("Video not found: {}", u))?]
            } else {
-                db.list_videos().await?
+                db.list_videos(10000, 0).await?.0
            };

            println!("\n╔══════════════════════════════════════════════════════════════════════════════════╗");
@@ -5,6 +5,21 @@ use std::path::PathBuf;

 const DEFAULT_API_URL: &str = "http://localhost:3002";

+const DEV_API_URL: &str = "http://localhost:3003";
+
+fn get_api_url() -> String {
+    std::env::var("MOMENTRY_API_URL").unwrap_or_else(|_| {
+        std::env::var("MOMENTRY_SERVER_PORT")
+            .ok()
+            .map(|port| format!("http://localhost:{}", port))
+            .unwrap_or_else(|| DEFAULT_API_URL.to_string())
+    })
+}
+
+fn get_api_key() -> Option<String> {
+    std::env::var("MOMENTRY_API_KEY").ok()
+}
+
 #[derive(Debug, Clone)]
 pub struct ApiClient {
    client: Client,
@@ -83,7 +98,7 @@ pub struct VideosResponse {

 impl ApiClient {
    pub fn new() -> Self {
-        let url = std::env::var("MOMENTRY_API_URL").unwrap_or_else(|_| DEFAULT_API_URL.to_string());
+        let url = get_api_url();
        Self {
            client: Client::new(),
            base_url: url,
@@ -103,7 +118,11 @@ impl ApiClient {
        let request = RegisterRequest {
            path: path.to_string(),
        };
-        let response = self.client.post(&url).json(&request).send().await?;
+        let mut request_builder = self.client.post(&url).json(&request);
+        if let Some(key) = get_api_key() {
+            request_builder = request_builder.header("X-API-Key", key);
+        }
+        let response = request_builder.send().await?;
        let status = response.status();
        let result = response.json::<RegisterResponse>().await?;
        if !status.is_success() {
@@ -124,7 +143,11 @@ impl ApiClient {
            limit,
            uuid: uuid.map(|s| s.to_string()),
        };
-        let response = self.client.post(&url).json(&request).send().await?;
+        let mut request_builder = self.client.post(&url).json(&request);
+        if let Some(key) = get_api_key() {
+            request_builder = request_builder.header("X-API-Key", key);
+        }
+        let response = request_builder.send().await?;
        let status = response.status();
        let result = response.json::<SearchResponse>().await?;
        if !status.is_success() {
@@ -135,18 +158,30 @@ impl ApiClient {

    pub async fn lookup_video(&self, uuid: &str) -> Result<LookupResponse> {
        let url = format!("{}/api/v1/lookup?uuid={}", self.base_url, uuid);
-        let response = self.client.get(&url).send().await?;
+        let mut request = self.client.get(&url);
+        if let Some(key) = get_api_key() {
+            request = request.header("X-API-Key", key);
+        }
+        let response = request.send().await?;
        let status = response.status();
-        let result = response.json::<LookupResponse>().await?;
-        if !status.is_success() {
+        if status == 200 {
+            let result = response.json::<LookupResponse>().await?;
+            if result.uuid.is_empty() {
+                anyhow::bail!("影片不存在: {}", uuid);
+            }
+            Ok(result)
+        } else {
            anyhow::bail!("API request failed with status: {}", status);
        }
-        Ok(result)
    }

    pub async fn list_videos(&self) -> Result<Vec<VideoInfo>> {
        let url = format!("{}/api/v1/videos", self.base_url);
-        let response = self.client.get(&url).send().await?;
+        let mut request = self.client.get(&url);
+        if let Some(key) = get_api_key() {
+            request = request.header("X-API-Key", key);
+        }
+        let response = request.send().await?;
        let status = response.status();
        let result = response.json::<VideosResponse>().await?;
        if !status.is_success() {
@@ -397,6 +397,29 @@ fn format_time(seconds: f64) -> String {
    format!("{:02}:{:02}:{:02}.{:02}", hours, minutes, secs, millis)
 }

+#[allow(dead_code)]
+fn get_video_duration(video_path: &str) -> f64 {
+    let output = std::process::Command::new("ffprobe")
+        .args([
+            "-v",
+            "error",
+            "-show_entries",
+            "format=duration",
+            "-of",
+            "default=noprint_wrappers=1:nokey=1",
+            video_path,
+        ])
+        .output();
+
+    match output {
+        Ok(out) if out.status.success() => {
+            let duration_str = String::from_utf8_lossy(&out.stdout).trim().to_string();
+            duration_str.parse::<f64>().unwrap_or(0.0)
+        }
+        _ => 0.0,
+    }
+}
+
 fn lookup_video_uuid(video_path: &str) -> Option<String> {
    use std::process::Command as StdCommand;

@@ -510,9 +533,714 @@ fn run_player(_video_path: &str, _video_uuid: Option<String>) -> Result<()> {
 }

 #[cfg(feature = "player")]
-fn run_player(_video_path: &str, _video_uuid: Option<String>) -> Result<()> {
-    println!("Player not available - SDL2 not configured");
-    println!("Playing: {} (UUID: {:?})", _video_path, _video_uuid);
+fn run_player(video_path: &str, video_uuid: Option<String>) -> Result<()> {
+    run_player_with_sdl2(video_path, video_uuid)
+}
+
+#[cfg(feature = "player")]
+fn run_player_with_sdl2(video_path: &str, video_uuid: Option<String>) -> Result<()> {
+    use sdl2::event::Event;
+    use sdl2::keyboard::Keycode;
+    use sdl2::pixels::PixelFormatEnum;
+    use std::io::{BufReader, Read};
+    use std::sync::atomic::{AtomicBool, Ordering};
+    use std::sync::Arc;
+    use std::thread;
+    use std::time::{Duration, Instant};
+
+    println!("\n=== 🎬 SDL2 Video Player ===");
+    println!("File: {}", video_path);
+    println!("UUID: {:?}", video_uuid);
+
+    let sdl_context = sdl2::init().map_err(|e| anyhow::anyhow!("SDL init failed: {}", e))?;
+    let video_subsystem = sdl_context
+        .video()
+        .map_err(|e| anyhow::anyhow!("Video init failed: {}", e))?;
+
+    let width = 1280u32;
+    let height = 720u32;
+
+    let window = video_subsystem
+        .window("Momentry Player", width, height)
+        .position_centered()
+        .resizable()
+        .build()
+        .map_err(|e| anyhow::anyhow!("Window creation failed: {}", e))?;
+
+    let mut canvas = window
+        .into_canvas()
+        .build()
+        .map_err(|e| anyhow::anyhow!("Canvas creation failed: {}", e))?;
+
+    let texture_creator = canvas.texture_creator();
+    let mut texture = texture_creator
+        .create_texture_streaming(PixelFormatEnum::RGB24, width as u32, height as u32)
+        .map_err(|e| anyhow::anyhow!("Texture creation failed: {}", e))?;
+
+    let ffmpeg_path = if cfg!(target_os = "macos") {
+        "/opt/homebrew/bin/ffmpeg"
+    } else {
+        "ffmpeg"
+    };
+
+    let mut ffmpeg = std::process::Command::new(ffmpeg_path)
+        .args([
+            "-i",
+            video_path,
+            "-vf",
+            &format!(
+                "scale={}:{}:force_original_aspect_ratio=decrease,pad={}:{}:(ow-iw)/2:(oh-ih)/2",
+                width, height, width, height
+            ),
+            "-pix_fmt",
+            "rgb24",
+            "-r",
+            "30",
+            "-f",
+            "rawvideo",
+            "-",
+        ])
+        .stdout(std::process::Stdio::piped())
+        .stderr(std::process::Stdio::null())
+        .spawn()
+        .map_err(|e| anyhow::anyhow!("Failed to start ffmpeg: {}", e))?;
+
+    let stdout = ffmpeg
+        .stdout
+        .take()
+        .ok_or_else(|| anyhow::anyhow!("Failed to capture stdout"))?;
+    let mut reader = BufReader::new(stdout);
+
+    let frame_size = (width * height * 3) as usize;
+    let mut frame_buffer = vec![0u8; frame_size];
+
+    let playing = Arc::new(AtomicBool::new(true));
+    let playing_clone = playing.clone();
+
+    let mut event_pump = sdl_context
+        .event_pump()
+        .map_err(|e| anyhow::anyhow!("Event pump failed: {}", e))?;
+
+    let mut asr_overlay = asr_overlay::AsrOverlay::new();
+    let _ = asr_overlay.load_from_file(video_path);
+    println!("ASR Overlay initialized: {}", !asr_overlay.is_empty());
+
+    let video_duration = get_video_duration(video_path);
+    println!("Video duration: {:.1}s", video_duration);
+
+    let mut frame_count = 0u64;
+    let frame_duration = Duration::from_millis(33);
+    let mut paused = false;
+    let mut current_time = 0.0;
+    let mut seek_request: Option<f64> = None;
+    let fps = 30.0;
+
+    let mut asr_overlay_visible = false;
+
+    println!("Playing... (Press SPACE to pause, Q/ESC to quit, ←/→ to seek, A to toggle ASR, F for fullscreen)");
+
+    loop {
+        let frame_start = Instant::now();
+
+        // Handle seek by restarting ffmpeg
+        if let Some(seek_pos) = seek_request {
+            seek_request = None;
+            println!("\n⏩ Seeking to {:.1}s...", seek_pos);
+
+            // Kill old ffmpeg and restart with seek position
+            let _ = ffmpeg.kill();
+
+            ffmpeg = std::process::Command::new(ffmpeg_path)
+                .args([
+                    "-ss", &format!("{:.2}", seek_pos),
+                    "-i", video_path,
+                    "-vf", &format!(
+                        "scale={}:{}:force_original_aspect_ratio=decrease,pad={}:{}:(ow-iw)/2:(oh-ih)/2",
+                        width, height, width, height
+                    ),
+                    "-pix_fmt", "rgb24",
+                    "-r", "30",
+                    "-f", "rawvideo",
+                    "-",
+                ])
+                .stdout(std::process::Stdio::piped())
+                .stderr(std::process::Stdio::null())
+                .spawn()
+                .map_err(|e| anyhow::anyhow!("Failed to restart ffmpeg: {}", e))?;
+
+            let stdout = ffmpeg
+                .stdout
+                .take()
+                .ok_or_else(|| anyhow::anyhow!("Failed to capture stdout"))?;
+            reader = BufReader::new(stdout);
+            current_time = seek_pos;
+            println!("▶ Resumed at {:.1}s", current_time);
+        }
+
+        for event in event_pump.poll_iter() {
+            match event {
+                Event::Quit { .. } => {
+                    println!("\n👋 Quitting player");
+                    playing_clone.store(false, Ordering::SeqCst);
+                    break;
+                }
+                Event::KeyDown { keycode, .. } => match keycode {
+                    Some(Keycode::Q) | Some(Keycode::Escape) => {
+                        println!("\n👋 Quitting player");
+                        playing_clone.store(false, Ordering::SeqCst);
+                        break;
+                    }
+                    Some(Keycode::Space) => {
+                        paused = !paused;
+                        println!("{}", if paused { "⏸ Paused" } else { "▶ Playing" });
+                    }
+                    Some(Keycode::Left) => {
+                        let new_time = (current_time - 10.0).max(0.0);
+                        seek_request = Some(new_time);
+                        println!("⏪ Seek to {:.1}s", new_time);
+                    }
+                    Some(Keycode::Right) => {
+                        let new_time = current_time + 10.0;
+                        seek_request = Some(new_time);
+                        println!("⏩ Seek to {:.1}s", new_time);
+                    }
+                    Some(Keycode::Up) => {
+                        let new_time = (current_time - 60.0).max(0.0);
+                        seek_request = Some(new_time);
+                        println!("⏪ Seek to {:.1}s (1min)", new_time);
+                    }
+                    Some(Keycode::Down) => {
+                        let new_time = current_time + 60.0;
+                        seek_request = Some(new_time);
+                        println!("⏩ Seek to {:.1}s (+1min)", new_time);
+                    }
+                    Some(Keycode::A) => {
+                        // Toggle ASR Visibility
+                        asr_overlay_visible = !asr_overlay_visible;
+                        println!(
+                            "{}",
+                            if asr_overlay_visible {
+                                "🔊 ASR ON"
+                            } else {
+                                "🔇 ASR OFF"
+                            }
+                        );
+                    }
+                    Some(Keycode::F) => {
+                        println!("📺 Toggle fullscreen (not implemented in basic SDL2)");
+                    }
+                    _ => {}
+                },
+                _ => {}
+            }
+        }
+
+        if !playing_clone.load(Ordering::SeqCst) {
+            break;
+        }
+
+        if paused {
+            thread::sleep(Duration::from_millis(100));
+            continue;
+        }
+
+        // Update ASR text based on current time
+        if !asr_overlay.is_empty() {
+            asr_overlay.update(current_time);
+        }
+
+        match reader.read_exact(&mut frame_buffer) {
+            Ok(_) => {
+                texture
+                    .update(None, &frame_buffer, (width * 3) as usize)
+                    .map_err(|e| anyhow::anyhow!("Texture update failed: {}", e))?;
+
+                // Draw everything
+                canvas.clear();
+
+                canvas
+                    .copy(&texture, None, None)
+                    .map_err(|e| anyhow::anyhow!("Render failed: {}", e))?;
+
+                // Draw ASR Text if visible and available
+                if asr_overlay_visible && !asr_overlay.get_text().is_empty() {
+                    // Placeholder: Cannot use TTF functions directly here without font context.
+                    // For now, just printing to console to verify timing.
+                    // In a real implementation, load font and draw text here.
+                    println!("[ASR] {:.1}s: {}", current_time, asr_overlay.get_text());
+                }
+
+                // Draw progress bar at bottom - gray background, green progress
+                use sdl2::rect::Rect;
+                let progress = if video_duration > 0.0 {
+                    (current_time / video_duration).min(1.0)
+                } else {
+                    0.0
+                };
+                let bar_width = ((width as f64) * progress) as u32;
+
+                canvas.set_draw_color(sdl2::pixels::Color::RGB(50, 50, 50)); // Background
+                let _ = canvas.fill_rect(Rect::new(0, height as i32 - 15, width, 5));
+                if bar_width > 0 {
+                    canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 200, 0)); // Progress
+                    let _ = canvas.fill_rect(Rect::new(0, height as i32 - 15, bar_width, 5));
+                }
+                // Reset draw color to black for next frame
+                canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 0, 0));
+
+                canvas.present();
+
+                frame_count += 1;
+                current_time += 1.0 / fps;
+
+                let elapsed = frame_start.elapsed();
+                if elapsed < frame_duration {
+                    thread::sleep(frame_duration - elapsed);
+                }
+            }
+            Err(_) => {
+                println!(
+                    "\n📽️ End of video ({} frames, {:.1}s)",
+                    frame_count, current_time
+                );
+                break;
+            }
+        }
+    }
+
+    let _ = ffmpeg.kill();
+    println!("✅ Playback finished (total: {:.1}s)", current_time);
+    Ok(())
+}
+
+fn run_local_mode(external_player: &str) -> Result<()> {
+    let args: Vec<String> = env::args().collect();
+
+    // Find video path - skip all flags and get the first non-flag argument after them
+    let video_path = args
+        .iter()
+        .skip(1) // Skip binary name
+        .skip_while(|a| a.starts_with('-')) // Skip flags
+        .next()
+        .cloned();
+
+    let video_path = match video_path {
+        Some(p) if !p.is_empty() => p,
+        _ => {
+            println!("Local Mode - Play local video files");
+            println!("=====================================\n");
+            print!("Enter video file path: ");
+            let mut input = String::new();
+            std::io::stdin().read_line(&mut input)?;
+            let path = input.trim().to_string();
+            if path.is_empty() {
+                anyhow::bail!("No video path provided");
+            }
+            path
+        }
+    };
+
+    if !Path::new(&video_path).exists() {
+        anyhow::bail!("File not found: {}", video_path);
+    }
+
+    println!("\nUsing external player: {}", external_player);
+    println!("Playing: {}", video_path);
+
+    match external_player {
+        "vlc" => {
+            std::process::Command::new("open")
+                .arg("-a")
+                .arg("VLC")
+                .arg(&video_path)
+                .spawn()?;
+            println!("✅ Opened with VLC");
+        }
+        "mpv" => {
+            std::process::Command::new("mpv").arg(&video_path).spawn()?;
+            println!("✅ Opened with mpv");
+        }
+        "ffplay" => {
+            std::process::Command::new("ffplay")
+                .arg("-autoexit")
+                .arg(&video_path)
+                .spawn()?;
+            println!("✅ Opened with ffplay");
+        }
+        "sdl2" => {
+            #[cfg(feature = "player")]
+            return run_player_with_sdl2(&video_path, None);
+            #[cfg(not(feature = "player"))]
+            {
+                println!("SDL2 player not enabled. Rebuild with --features player");
+            }
+        }
+        _ => {
+            std::process::Command::new(external_player)
+                .arg(&video_path)
+                .spawn()?;
+            println!("✅ Opened with {}", external_player);
+        }
+    }
+
+    Ok(())
+}
+
+fn run_online_mode() -> Result<()> {
+    println!("\n===========================================");
+    println!("         🎬 Online Mode - Momentry");
+    println!("===========================================\n");
+
+    let client = ApiClient::new();
+    println!("Connected to API: {}", client.base_url());
+
+    let rt = tokio::runtime::Runtime::new()?;
+
+    loop {
+        println!("\n┌─────────────────────────────────────────┐");
+        println!("│         Online Mode Menu                │");
+        println!("├─────────────────────────────────────────┤");
+        println!("│  [1] List Videos  - 列出所有影片       │");
+        println!("│  [2] Search       - RAG 搜尋影片內容  │");
+        println!("│  [3] Play          - 播放影片           │");
+        println!("│  [4] Lookup        - 查詢影片資訊      │");
+        println!("│  [q] Quit          - 離開               │");
+        println!("└─────────────────────────────────────────┘");
+        print!("\n請選擇: ");
+
+        let mut input = String::new();
+        std::io::stdin().read_line(&mut input)?;
+        let choice = input.trim();
+
+        match choice {
+            "1" => {
+                println!("\n=== 📋 影片列表 ===");
+                match rt.block_on(client.list_videos()) {
+                    Ok(videos) => {
+                        if videos.is_empty() {
+                            println!("沒有找到任何影片");
+                        } else {
+                            println!("\n共 {} 部影片:\n", videos.len());
+                            for (i, v) in videos.iter().enumerate() {
+                                let duration = format!(
+                                    "{}:{:02}",
+                                    (v.duration / 60.0) as u32,
+                                    (v.duration % 60.0) as u32
+                                );
+                                println!(
+                                    "  [{}] {} | {} | {}x{} | {}",
+                                    i + 1,
+                                    v.file_name,
+                                    v.uuid.chars().take(8).collect::<String>(),
+                                    v.width,
+                                    v.height,
+                                    duration
+                                );
+                            }
+                        }
+                    }
+                    Err(e) => println!("取得影片列表失敗: {}", e),
+                }
+            }
+            "2" => {
+                println!("\n=== 🔍 RAG 搜尋 ===");
+                print!("輸入搜尋關鍵字: ");
+                input.clear();
+                std::io::stdin().read_line(&mut input)?;
+                let query = input.trim().to_string();
+                if query.is_empty() {
+                    println!("搜尋關鍵字不能為空");
+                    continue;
+                }
+
+                print!("限定特定影片？(y/N): ");
+                input.clear();
+                std::io::stdin().read_line(&mut input)?;
+                let limit_uuid = if input.trim().to_lowercase() == "y" {
+                    print!("輸入影片 UUID: ");
+                    input.clear();
+                    std::io::stdin().read_line(&mut input)?;
+                    Some(input.trim().to_string())
+                } else {
+                    None
+                };
+
+                println!("\n搜尋中...");
+                match rt.block_on(client.search_chunks(&query, limit_uuid.as_deref(), Some(10))) {
+                    Ok(response) => {
+                        if response.results.is_empty() {
+                            println!("沒有找到結果");
+                            continue;
+                        }
+                        println!("\n找到 {} 個結果:\n", response.results.len());
+                        for (i, r) in response.results.iter().enumerate() {
+                            let time_range = format!(
+                                "{:02}:{:02} - {:02}:{:02}",
+                                (r.start_time / 60.0) as u32,
+                                (r.start_time % 60.0) as u32,
+                                (r.end_time / 60.0) as u32,
+                                (r.end_time % 60.0) as u32
+                            );
+                            let text_preview = if r.text.len() > 50 {
+                                format!("{}...", &r.text[..50])
+                            } else {
+                                r.text.clone()
+                            };
+                            println!(
+                                "  [{}] {} | {} | {:.2} | {}",
+                                i + 1,
+                                time_range,
+                                r.uuid.chars().take(8).collect::<String>(),
+                                r.score,
+                                text_preview
+                            );
+                        }
+
+                        let mut current_player: Option<std::process::Child> = None;
+
+                        loop {
+                            if let Some(ref mut child) = current_player {
+                                match child.try_wait() {
+                                    Ok(Some(_)) => {
+                                        println!("播放器已結束");
+                                        current_player = None;
+                                    }
+                                    Ok(None) => {
+                                        // 還在執行中
+                                    }
+                                    Err(e) => {
+                                        println!("檢查播放器狀態失敗：{}", e);
+                                        current_player = None;
+                                    }
+                                }
+                            }
+
+                            print!(
+                                "\n選擇播放 (1-{}) 或 q 離開 (kill player), L 重新顯示列表：",
+                                response.results.len()
+                            );
+                            input.clear();
+                            std::io::stdin().read_line(&mut input)?;
+                            let selection = input.trim();
+                            let selection_lower = selection.to_lowercase();
+                            if selection_lower == "q" {
+                                if let Some(ref mut child) = current_player {
+                                    let _ = child.kill();
+                                    let _ = child.wait();
+                                    println!("已終止播放器");
+                                    current_player = None;
+                                }
+                                break;
+                            }
+                            if selection_lower == "l" {
+                                println!("\n搜尋結果:");
+                                for (i, r) in response.results.iter().enumerate() {
+                                    let time_range = format!(
+                                        "{:02}:{:02} - {:02}:{:02}",
+                                        (r.start_time / 60.0) as u32,
+                                        (r.start_time % 60.0) as u32,
+                                        (r.end_time / 60.0) as u32,
+                                        (r.end_time % 60.0) as u32
+                                    );
+                                    let text_preview = if r.text.len() > 50 {
+                                        format!("{}...", &r.text[..50])
+                                    } else {
+                                        r.text.clone()
+                                    };
+                                    println!(
+                                        "  [{}] {} | {} | {:.2} | {}",
+                                        i + 1,
+                                        time_range,
+                                        r.uuid.chars().take(8).collect::<String>(),
+                                        r.score,
+                                        text_preview
+                                    );
+                                }
+                                continue;
+                            }
+                            if let Ok(idx) = selection.parse::<usize>() {
+                                if idx > 0 && idx <= response.results.len() {
+                                    let selected = &response.results[idx - 1];
+                                    println!("\n播放：{} - {}", selected.uuid, selected.text);
+
+                                    if let Some(ref mut child) = current_player {
+                                        let _ = child.kill();
+                                        let _ = child.wait();
+                                        println!("已終止前一個播放器");
+                                    }
+
+                                    match rt.block_on(client.lookup_video(&selected.uuid)) {
+                                        Ok(info) => {
+                                            if let Some(path) = &info.file_path {
+                                                if std::path::Path::new(path).exists() {
+                                                    let start_sec =
+                                                        (selected.start_time as f64) - 2.0;
+                                                    let end_sec = (selected.end_time as f64) + 2.0;
+                                                    println!(
+                                                        "開啟：{} (從 {:.0} 到 {:.0} 秒，A-B 循環)",
+                                                        path, start_sec, end_sec
+                                                    );
+                                                    println!("提示：mpv 視窗中按 c/C 切換循環，q 離開，Space 暫停");
+                                                    current_player = Some(
+                                                        std::process::Command::new("mpv")
+                                                            .arg(format!(
+                                                                "--start={:.2}",
+                                                                start_sec.max(0.0)
+                                                            ))
+                                                            .arg(format!(
+                                                                "--ab-loop-a={:.2}",
+                                                                start_sec.max(0.0)
+                                                            ))
+                                                            .arg(format!("--ab-loop-b={:.2}", end_sec))
+                                                            .arg("--input-commands=bind c ab-loop; bind C ab-loop")
+                                                            .arg(path)
+                                                            .spawn()?
+                                                    );
+                                                } else {
+                                                    println!("錯誤：檔案不存在：{}", path);
+                                                }
+                                            }
+                                        }
+                                        Err(e) => println!("查詢失敗：{}", e),
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    Err(e) => println!("搜尋失敗：{}", e),
+                }
+            }
+            "4" => {
+                println!("\n=== 🔎 查詢影片 ===");
+                print!("輸入影片 UUID (直接 Enter 從列表選擇): ");
+                input.clear();
+                std::io::stdin().read_line(&mut input)?;
+                let uuid = input.trim();
+
+                if uuid.is_empty() {
+                    println!("載入影片列表...");
+                    match rt.block_on(client.list_videos()) {
+                        Ok(videos) => {
+                            if videos.is_empty() {
+                                println!("沒有影片");
+                                continue;
+                            }
+                            println!("\n選擇影片:");
+                            for (i, v) in videos.iter().enumerate() {
+                                println!("  [{}] {} ({})", i + 1, v.file_name, v.uuid);
+                            }
+                            print!("\n選擇編號：");
+                            input.clear();
+                            std::io::stdin().read_line(&mut input)?;
+                            if let Ok(idx) = input.trim().parse::<usize>() {
+                                if idx > 0 && idx <= videos.len() {
+                                    let selected = &videos[idx - 1];
+                                    println!("\n查詢中...");
+                                    match rt.block_on(client.lookup_video(&selected.uuid)) {
+                                        Ok(info) => {
+                                            println!("\n✓ 找到影片:");
+                                            println!("  UUID: {}", info.uuid);
+                                            if let Some(path) = &info.file_path {
+                                                println!("  路徑：{}", path);
+                                            }
+                                            if let Some(name) = &info.file_name {
+                                                println!("  名稱：{}", name);
+                                            }
+                                            if let Some(dur) = info.duration {
+                                                println!("  時長：{:.2}s", dur);
+                                            }
+                                        }
+                                        Err(e) => println!("查詢失敗：{}", e),
+                                    }
+                                }
+                            }
+                        }
+                        Err(e) => println!("取得影片列表失敗：{}", e),
+                    }
+                } else {
+                    println!("\n查詢中...");
+                    match rt.block_on(client.lookup_video(uuid)) {
+                        Ok(info) => {
+                            println!("\n✓ 找到影片:");
+                            println!("  UUID: {}", info.uuid);
+                            if let Some(path) = &info.file_path {
+                                println!("  路徑：{}", path);
+                            }
+                            if let Some(name) = &info.file_name {
+                                println!("  名稱：{}", name);
+                            }
+                            if let Some(dur) = info.duration {
+                                println!("  時長：{:.2}s", dur);
+                            }
+                        }
+                        Err(e) => println!("查詢失敗：{}", e),
+                    }
+                }
+            }
+            "3" => {
+                println!("\n=== ▶ 播放影片 ===");
+                print!("輸入影片 UUID (直接 Enter 從列表選擇): ");
+                input.clear();
+                std::io::stdin().read_line(&mut input)?;
+                let uuid = input.trim();
+
+                if uuid.is_empty() {
+                    println!("載入影片列表...");
+                    match rt.block_on(client.list_videos()) {
+                        Ok(videos) => {
+                            if videos.is_empty() {
+                                println!("沒有影片");
+                                continue;
+                            }
+                            println!("\n選擇影片:");
+                            for (i, v) in videos.iter().enumerate() {
+                                println!("  [{}] {} ({})", i + 1, v.file_name, v.uuid);
+                            }
+                            print!("\n選擇編號：");
+                            input.clear();
+                            std::io::stdin().read_line(&mut input)?;
+                            if let Ok(idx) = input.trim().parse::<usize>() {
+                                if idx > 0 && idx <= videos.len() {
+                                    let selected = &videos[idx - 1];
+                                    println!("\n播放: {}", selected.file_path);
+                                    if std::path::Path::new(&selected.file_path).exists() {
+                                        std::process::Command::new("mpv")
+                                            .arg(&selected.file_path)
+                                            .spawn()?;
+                                    } else {
+                                        println!("錯誤：檔案不存在：{}", selected.file_path);
+                                    }
+                                }
+                            }
+                        }
+                        Err(e) => println!("取得影片列表失敗：{}", e),
+                    }
+                } else {
+                    match rt.block_on(client.lookup_video(uuid)) {
+                        Ok(info) => {
+                            if let Some(path) = &info.file_path {
+                                println!("開啟: {}", path);
+                                if std::path::Path::new(path).exists() {
+                                    std::process::Command::new("mpv").arg(path).spawn()?;
+                                } else {
+                                    println!("錯誤：檔案不存在：{}", path);
+                                }
+                            }
+                        }
+                        Err(e) => println!("查詢失敗：{}", e),
+                    }
+                }
+            }
+            "q" | "Q" => {
+                println!("\n👋 再見!");
+                break;
+            }
+            _ => {
+                println!("無效選項");
+            }
+        }
+    }
+
    Ok(())
 }

@@ -523,17 +1251,37 @@ fn main() -> Result<()> {
    let should_download = args.iter().any(|a| a == "-d" || a == "--download");
    let show_selector = args.iter().any(|a| a == "-s" || a == "--selector");
    let test_api_mode = args.iter().any(|a| a == "-t" || a == "--test-api");
+    let local_mode = args.iter().any(|a| a == "-l" || a == "--local");
+    let online_mode = args.iter().any(|a| a == "-o" || a == "--online");
+
+    // Get external player choice
+    let external_player = args
+        .iter()
+        .position(|a| a == "-p" || a == "--player")
+        .and_then(|i| args.get(i + 1))
+        .cloned()
+        .unwrap_or_else(|| "vlc".to_string());

    // API Testing Mode
    if test_api_mode {
        return run_api_test_mode();
    }

-    // If --selector flag is provided, show video selector
+    // If --selector flag is provided, show video selector (online mode)
    if show_selector {
        return run_selector();
    }

+    // If --online or -o is provided, run online mode
+    if online_mode {
+        return run_online_mode();
+    }
+
+    // If --local or -l is provided, run local mode with external player
+    if local_mode {
+        return run_local_mode(&external_player);
+    }
+
    let video_path = if args.len() < 2 || (should_download && args.len() < 3) {
        println!("Video Player\n============\nEnter video path or YouTube URL:");
        let mut input = String::new();
@@ -4,6 +4,7 @@ use futures_util::StreamExt;
 use std::path::Path;
 use std::str;
 use std::sync::{Arc, Mutex};
+use tracing::{info, warn};

 use momentry_core::core::api_key::{ApiKeyService, ApiKeyType};
 use momentry_core::core::chunk::types::{Chunk, ChunkRule, ChunkType};
@@ -1813,6 +1814,64 @@ async fn main() -> Result<()> {
                }
            };

+            // Read Pose JSON (optional)
+            let pose_path = format!("{}.pose.json", uuid);
+            let pose_result = match std::fs::read_to_string(&pose_path) {
+                Ok(pose_json) => match serde_json::from_str::<
+                    momentry_core::core::processor::pose::PoseResult,
+                >(&pose_json)
+                {
+                    Ok(result) => {
+                        println!("Loaded Pose: {} frames", result.frames.len());
+                        result
+                    }
+                    Err(e) => {
+                        println!("Warning: Failed to parse Pose JSON: {}. Skipping Pose.", e);
+                        momentry_core::core::processor::pose::PoseResult {
+                            frame_count: 0,
+                            fps: 0.0,
+                            frames: vec![],
+                        }
+                    }
+                },
+                Err(_) => {
+                    println!("Warning: Pose file not found. Skipping Pose.");
+                    momentry_core::core::processor::pose::PoseResult {
+                        frame_count: 0,
+                        fps: 0.0,
+                        frames: vec![],
+                    }
+                }
+            };
+
+            // Read ASRX JSON (optional)
+            let asrx_path = format!("{}.asrx.json", uuid);
+            let asrx_result = match std::fs::read_to_string(&asrx_path) {
+                Ok(asrx_json) => match serde_json::from_str::<
+                    momentry_core::core::processor::asrx::AsrxResult,
+                >(&asrx_json)
+                {
+                    Ok(result) => {
+                        println!("Loaded ASRX: {} segments", result.segments.len());
+                        result
+                    }
+                    Err(e) => {
+                        println!("Warning: Failed to parse ASRX JSON: {}. Skipping ASRX.", e);
+                        momentry_core::core::processor::asrx::AsrxResult {
+                            language: None,
+                            segments: vec![],
+                        }
+                    }
+                },
+                Err(_) => {
+                    println!("Warning: ASRX file not found. Skipping ASRX.");
+                    momentry_core::core::processor::asrx::AsrxResult {
+                        language: None,
+                        segments: vec![],
+                    }
+                }
+            };
+
            // ========== Store pre_chunks (from ASR, CUT) ==========

            println!("\nStoring pre_chunks...");
@@ -1930,12 +1989,21 @@ async fn main() -> Result<()> {
                face_by_frame.insert(frame.frame, frame.clone());
            }

-            // Store frames (merge data from YOLO, OCR, Face)
+            let mut pose_by_frame: std::collections::HashMap<
+                u64,
+                momentry_core::core::processor::pose::PoseFrame,
+            > = std::collections::HashMap::new();
+            for frame in &pose_result.frames {
+                pose_by_frame.insert(frame.frame, frame.clone());
+            }
+
+            // Store frames (merge data from YOLO, OCR, Face, Pose)
            let mut all_frames: Vec<u64> = frame_data
                .keys()
                .cloned()
                .chain(ocr_by_frame.keys().cloned())
                .chain(face_by_frame.keys().cloned())
+                .chain(pose_by_frame.keys().cloned())
                .collect();
            all_frames.sort();
            all_frames.dedup();
@@ -1945,6 +2013,7 @@ async fn main() -> Result<()> {
                let yolo_frame = frame_data.get(frame_num);
                let ocr_frame = ocr_by_frame.get(frame_num);
                let face_frame = face_by_frame.get(frame_num);
+                let pose_frame = pose_by_frame.get(frame_num);

                let frame = momentry_core::core::db::postgres_db::Frame {
                    id: 0,
@@ -1955,6 +2024,7 @@ async fn main() -> Result<()> {
                    yolo_objects: yolo_frame.map(|f| serde_json::json!(&f.objects)),
                    ocr_results: ocr_frame.map(|f| serde_json::json!(&f.texts)),
                    face_results: face_frame.map(|f| serde_json::json!(&f.faces)),
+                    pose_results: pose_frame.map(|f| serde_json::json!(&f.persons)),
                    frame_path: None,
                    created_at: String::new(),
                };
@@ -1968,10 +2038,30 @@ async fn main() -> Result<()> {
            println!("\nCreating chunks...");

            // Rule 1: Direct conversion (sentence pre_chunk -> sentence chunk)
+            // Merge ASRX speaker_id by time overlap
            let mut sentence_chunks = Vec::new();
            for (i, seg) in asr_result.segments.iter().enumerate() {
                let pre_chunk_id = asr_pre_chunk_ids.get(i).copied().unwrap_or(0);
-                let chunk = Chunk::from_seconds(
+
+                // Find matching ASRX segment by time overlap
+                let speaker_id = asrx_result
+                    .segments
+                    .iter()
+                    .find(|ax| ax.start <= seg.end && ax.end >= seg.start)
+                    .and_then(|ax| ax.speaker_id.clone());
+
+                let content = if let Some(ref sid) = speaker_id {
+                    serde_json::json!({
+                        "text": seg.text,
+                        "speaker_id": sid,
+                    })
+                } else {
+                    serde_json::json!({
+                        "text": seg.text,
+                    })
+                };
+
+                let mut chunk = Chunk::from_seconds(
                    file_id as i32,
                    uuid.clone(),
                    i as u32,
@@ -1980,15 +2070,39 @@ async fn main() -> Result<()> {
                    seg.start,
                    seg.end,
                    fps,
-                    serde_json::json!({
-                        "text": seg.text,
-                    }),
+                    content,
                )
                .with_text_content(seg.text.clone())
                .with_pre_chunk_ids(vec![pre_chunk_id as i32]);
+
+                if speaker_id.is_some() {
+                    chunk = chunk.with_metadata(serde_json::json!({
+                        "language": asr_result.language,
+                        "language_probability": asr_result.language_probability,
+                        "speaker_matched": true,
+                    }));
+                }
+
                sentence_chunks.push(chunk);
            }

+            if !asrx_result.segments.is_empty() {
+                let matched = sentence_chunks
+                    .iter()
+                    .filter(|c| {
+                        c.content
+                            .get("speaker_id")
+                            .and_then(|v| v.as_str())
+                            .is_some()
+                    })
+                    .count();
+                println!(
+                    "  ASRX merge: {}/{} sentence chunks matched to speakers",
+                    matched,
+                    sentence_chunks.len()
+                );
+            }
+
            // Rule 1: CUT chunks
            let mut cut_chunks = Vec::new();
            for (i, scene) in cut_result.scenes.iter().enumerate() {
@@ -2405,6 +2519,20 @@ async fn main() -> Result<()> {
            Ok(())
        }
        Commands::Server { host, port } => {
+            // Start Auto-Ingest Watcher
+            info!("Starting Auto-Ingest Watcher...");
+            let _watcher = match momentry_core::watcher::run_watcher().await {
+                Ok(w) => {
+                    info!("Auto-Ingest Watcher started successfully.");
+                    Some(w)
+                }
+                Err(e) => {
+                    warn!("Failed to start Auto-Ingest Watcher: {}", e);
+                    None
+                }
+            };
+            // The watcher is kept alive by '_watcher' variable until the server stops.
+
            let port = port.unwrap_or_else(|| *momentry_core::core::config::SERVER_PORT);
            momentry_core::api::start_server(&host, port).await?;
            Ok(())
@@ -2461,13 +2589,13 @@ async fn main() -> Result<()> {
        Commands::Thumbnails { uuid, count } => {
            let db = PostgresDb::init().await?;

-            let videos = if let Some(ref uuid) = uuid {
+            let videos = if let Some(ref u) = uuid {
                vec![db
-                    .get_video_by_uuid(uuid)
+                    .get_video_by_uuid(u)
                    .await?
-                    .ok_or_else(|| anyhow::anyhow!("Video not found: {}", uuid))?]
+                    .ok_or_else(|| anyhow::anyhow!("Video not found: {}", u))?]
            } else {
-                db.list_videos().await?
+                db.list_videos(10000, 0).await?.0
            };

            let output_dir = std::path::PathBuf::from("thumbnails");
@@ -2484,12 +2612,10 @@ async fn main() -> Result<()> {
                        println!("  Generated {} thumbnails", result.count);
                    }
                    Err(e) => {
-                        println!("  Error: {}", e);
+                        eprintln!("  Failed to generate thumbnails: {}", e);
                    }
                }
            }
-
-            println!("\nThumbnails generated successfully!");
            Ok(())
        }
        Commands::Status { uuid } => {
@@ -2501,7 +2627,7 @@ async fn main() -> Result<()> {
                    .await?
                    .ok_or_else(|| anyhow::anyhow!("Video not found: {}", u))?]
            } else {
-                db.list_videos().await?
+                db.list_videos(10000, 0).await?.0
            };

            println!("\n╔══════════════════════════════════════════════════════════════════════════════════╗");
@@ -2513,6 +2639,22 @@ async fn main() -> Result<()> {
                "║ {:32} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} ║",
                "Video", "FS", "FS", "PSQL", "PObj", "MObj", "PVec", "QVec"
            );
+            println!(
+                "╠{:33}╪{:9}╪{:9}╪{:9}╪{:9}╪{:9}╪{:9}╪{:9}╣",
+                str::repeat("─", 32),
+                str::repeat("─", 8),
+                str::repeat("─", 8),
+                str::repeat("─", 8),
+                str::repeat("─", 8),
+                str::repeat("─", 8),
+                str::repeat("─", 8),
+                str::repeat("─", 8)
+            );
+            println!("╠══════════════════════════════════════════════════════════════════════════════════╣");
+            println!(
+                "║ {:32} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} ║",
+                "Video", "FS", "FS", "PSQL", "PObj", "MObj", "PVec", "QVec"
+            );
            println!(
                "║ {:32} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} ║",
                "", "Video", "JSON", "Chunk", "Chunk", "Chunk", "Chunk", "Chunk"
@@ -1,3 +1,3 @@
 pub mod watcher;

-pub use watcher::{watch_directories, WatcherConfig};
+pub use watcher::{run_watcher, WatcherConfig};
@@ -1,8 +1,11 @@
 use anyhow::Result;
-use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
 use std::path::Path;
 use std::sync::Arc;
-use tokio::sync::mpsc;
+use tokio::time;
+use tracing::{error, info, warn};
+
+use crate::core::db::{Database, PostgresDb};
+use crate::core::ingestion::IngestionService;

 pub struct WatcherConfig {
    pub directories: Vec<String>,
@@ -11,31 +14,94 @@ pub struct WatcherConfig {

 impl Default for WatcherConfig {
    fn default() -> Self {
+        // Default to SFTP demo directory if not specified
+        let default_dir = std::env::var("MOMENTRY_SFTP_ROOT")
+            .unwrap_or_else(|_| "/Users/accusys/momentry/var/sftpgo/data/demo/".to_string());
+
        Self {
-            directories: vec![],
-            poll_interval_ms: 5000,
+            directories: vec![default_dir],
+            poll_interval_ms: 60000, // 60 seconds polling interval
        }
    }
 }

-pub async fn watch_directories(config: WatcherConfig, tx: mpsc::Sender<String>) -> Result<()> {
-    // TODO: Implement directory watcher
-    //
-    // Options:
-    // 1. Use notify crate for file system events
-    // 2. Use polling as fallback
-    //
-    // When new video file is detected:
-    // - Send job to Redis queue
-    // - Trigger registration process
+/// Starts the file watcher in the background.
+/// Scans directories for video files and registers them if not already present.
+pub async fn run_watcher() -> Result<()> {
+    let config = WatcherConfig::default();
+    let dirs = config.directories.clone();

-    println!("Watching directories: {:?}", config.directories);
-
-    for dir in &config.directories {
-        if Path::new(dir).exists() {
-            println!("Directory exists: {}", dir);
-        }
+    if dirs.is_empty() {
+        warn!("No directories configured for watching.");
+        return Err(anyhow::anyhow!("No watch directories"));
    }

+    info!("Initializing Database for Watcher...");
+    // Use Database::init() which handles config and pool creation
+    let db = PostgresDb::init().await?;
+    let service = Arc::new(IngestionService::new(db));
+
+    info!("Starting Ingestion Poller for: {:?}", dirs);
+
+    // Spawn background task
+    tokio::spawn(async move {
+        let mut interval = time::interval(time::Duration::from_millis(config.poll_interval_ms));
+
+        // Run once immediately on startup to catch existing files
+        scan_and_ingest(&dirs, &service).await;
+
+        loop {
+            interval.tick().await;
+            scan_and_ingest(&dirs, &service).await;
+        }
+    });
+
    Ok(())
 }
+
+async fn scan_and_ingest(directories: &[String], service: &Arc<IngestionService>) {
+    // Allowed extensions list
+    let allowed_extensions = vec!["mp4", "mov", "mkv"];
+
+    info!("Scanning directories for new videos...");
+
+    for dir in directories {
+        let path = Path::new(dir);
+        if !path.exists() {
+            warn!("Directory does not exist, skipping: {}", dir);
+            continue;
+        }
+
+        if let Ok(entries) = std::fs::read_dir(path) {
+            for entry in entries.flatten() {
+                let file_path = entry.path();
+                if file_path.is_file() {
+                    // Check extension
+                    let is_video = if let Some(ext) = file_path.extension().and_then(|e| e.to_str())
+                    {
+                        allowed_extensions.contains(&ext.to_lowercase().as_str())
+                    } else {
+                        false
+                    };
+
+                    if is_video {
+                        if let Some(p_str) = file_path.to_str() {
+                            // Try to ingest. The service checks if it already exists.
+                            match service.ingest(p_str).await {
+                                Ok(Some(uuid)) => {
+                                    info!("Auto-registered: {} -> {}", file_path.display(), uuid);
+                                }
+                                Ok(None) => {
+                                    // Already registered
+                                }
+                                Err(e) => {
+                                    error!("Failed to ingest {}: {}", file_path.display(), e);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
@@ -5,6 +5,7 @@ use std::time::Duration;
 use tokio::time::sleep;
 use tracing::{error, info, warn};

+use crate::core::chunk::{rule1_ingest, rule3_ingest};
 use crate::core::db::{
    MonitorJobStatus, PostgresDb, ProcessorJobStatus, ProcessorType, RedisClient, VideoStatus,
 };
@@ -210,12 +211,58 @@ impl JobWorker {
            .map(|r| r.processor_type.as_str().to_string())
            .collect();

+        // Check prerequisites for Rule 1 Chunking BEFORE moving arrays
+        let has_asr = completed_processors.iter().any(|p| p == "asr");
+        let has_asrx = completed_processors.iter().any(|p| p == "asrx");
+        let has_cut = completed_processors.iter().any(|p| p == "cut");
+
        // Update processor arrays in job record
        self.db
            .update_job_processors_arrays(job_id, completed_processors, failed_processors)
            .await?;

        if all_completed && !any_failed {
+            // 🚀 P1 Trigger: Rule 1 Chunking
+            if has_asr && has_asrx {
+                info!("📝 Prerequisites met for Rule 1 Chunking. Starting ingestion...");
+                let db_clone = self.db.clone();
+                let uuid_clone = uuid.to_string();
+                tokio::spawn(async move {
+                    match db_clone.get_video_by_uuid(&uuid_clone).await {
+                        Ok(Some(video)) => {
+                            let fps = video.fps;
+                            match rule1_ingest::ingest_rule1(db_clone.pool(), &uuid_clone, fps)
+                                .await
+                            {
+                                Ok(count) => info!(
+                                    "✅ Rule 1 Ingestion completed: {} chunks inserted.",
+                                    count
+                                ),
+                                Err(e) => error!("❌ Rule 1 Ingestion failed: {}", e),
+                            }
+                        }
+                        Ok(None) => error!("Video not found for chunking: {}", uuid_clone),
+                        Err(e) => error!("Failed to get video info for chunking: {}", e),
+                    }
+                });
+            }
+
+            // 🚀 P1 Trigger: Rule 3 Scene Chunking
+            if has_cut && has_asr {
+                info!("📝 Prerequisites met for Rule 3 Scene Chunking. Starting ingestion...");
+                let db_clone = self.db.clone();
+                let uuid_clone = uuid.to_string();
+                tokio::spawn(async move {
+                    match rule3_ingest::ingest_rule3(db_clone.pool(), &uuid_clone).await {
+                        Ok(count) => info!(
+                            "✅ Rule 3 Scene Ingestion completed: {} scenes processed.",
+                            count
+                        ),
+                        Err(e) => error!("❌ Rule 3 Scene Ingestion failed: {}", e),
+                    }
+                });
+            }
+
            self.db
                .update_job_status(job_id, MonitorJobStatus::Completed)
                .await?;
@@ -16,6 +16,7 @@ use crate::core::processor::cut::CutResult;
 use crate::core::processor::face::FaceResult;
 use crate::core::processor::ocr::OcrResult;
 use crate::core::processor::pose::PoseResult;
+use crate::core::processor::visual_chunk::VisualChunkResult;
 use crate::core::processor::yolo::YoloResult;

 #[derive(Debug, Clone)]
@@ -302,6 +303,24 @@ impl ProcessorPool {
                }
                Ok(serde_json::to_value(result)?)
            }
+            ProcessorType::VisualChunk => {
+                let result = processor::process_visual_chunk_advanced(
+                    video_path,
+                    output_path.to_str().unwrap(),
+                    uuid,
+                )
+                .await?;
+                // Store VisualChunk chunks in database
+                tracing::info!(
+                    "VisualChunk completed, storing {} chunks for {}",
+                    result.chunk_count,
+                    job.uuid
+                );
+                if let Err(e) = Self::store_visual_chunk_chunks(db, &job.uuid, &result).await {
+                    tracing::error!("Failed to store VisualChunk chunks for {}: {}", job.uuid, e);
+                }
+                Ok(serde_json::to_value(result)?)
+            }
        }
    }

@@ -605,6 +624,13 @@ impl ProcessorPool {
            // Override chunk_id to include processor prefix for uniqueness
            chunk.chunk_id = format!("trace_yolo_{:04}", i);

+            // Populate text_content for BM25 search
+            let object_names: Vec<String> =
+                frame.objects.iter().map(|o| o.class_name.clone()).collect();
+            if !object_names.is_empty() {
+                chunk = chunk.with_text_content(object_names.join(" "));
+            }
+
            match db.store_chunk(&chunk).await {
                Ok(_) => {
                    tracing::info!(
@@ -660,6 +686,12 @@ impl ProcessorPool {
            // Override chunk_id to include processor prefix for uniqueness
            chunk.chunk_id = format!("trace_ocr_{:04}", i);

+            // Populate text_content for BM25 search
+            let texts: Vec<String> = frame.texts.iter().map(|t| t.text.clone()).collect();
+            if !texts.is_empty() {
+                chunk = chunk.with_text_content(texts.join(" "));
+            }
+
            match db.store_chunk(&chunk).await {
                Ok(_) => {
                    tracing::info!(
@@ -715,6 +747,16 @@ impl ProcessorPool {
            // Override chunk_id to include processor prefix for uniqueness
            chunk.chunk_id = format!("trace_face_{:04}", i);

+            // Populate text_content for BM25 search (face IDs)
+            let face_ids: Vec<String> = frame
+                .faces
+                .iter()
+                .filter_map(|f| f.face_id.clone())
+                .collect();
+            if !face_ids.is_empty() {
+                chunk = chunk.with_text_content(face_ids.join(" "));
+            }
+
            match db.store_chunk(&chunk).await {
                Ok(_) => {
                    tracing::info!(
@@ -770,6 +812,16 @@ impl ProcessorPool {
            // Override chunk_id to include processor prefix for uniqueness
            chunk.chunk_id = format!("trace_pose_{:04}", i);

+            // Populate text_content for BM25 search (person count indicator)
+            let person_count = frame.persons.len();
+            if person_count > 0 {
+                let text = format!("person person person")
+                    .repeat(person_count.min(10))
+                    .trim()
+                    .to_string();
+                chunk = chunk.with_text_content(text);
+            }
+
            match db.store_chunk(&chunk).await {
                Ok(_) => {
                    tracing::info!(
@@ -825,6 +877,16 @@ impl ProcessorPool {
            // Override chunk_id to include processor prefix for uniqueness
            chunk.chunk_id = format!("trace_asrx_{:04}", i);

+            // Populate text_content for BM25 search (already has text)
+            chunk = chunk.with_text_content(segment.text.clone());
+
+            // Also store speaker_id in content
+            chunk.content = serde_json::json!({
+                "text": segment.text,
+                "speaker_id": segment.speaker_id,
+                "timestamp": segment.start,
+            });
+
            match db.store_chunk(&chunk).await {
                Ok(_) => {
                    tracing::info!("Stored ASRX chunk {} for video {}", i, uuid);
@@ -837,6 +899,24 @@ impl ProcessorPool {
        Ok(())
    }

+    pub async fn store_visual_chunk_chunks(
+        db: &PostgresDb,
+        uuid: &str,
+        visual_chunk_result: &VisualChunkResult,
+    ) -> Result<()> {
+        for (i, chunk) in visual_chunk_result.chunks.iter().enumerate() {
+            match db.store_chunk(chunk).await {
+                Ok(_) => {
+                    tracing::info!("Stored VisualChunk chunk {} for video {}", i, uuid);
+                }
+                Err(e) => {
+                    tracing::error!("Failed to store VisualChunk chunk {}: {}", i, e);
+                }
+            }
+        }
+        Ok(())
+    }
+
    pub async fn get_running_count(&self) -> usize {
        *self.running_count.read().await
    }