diff --git a/.env b/.env index 7f32cc0..200c460 100644 --- a/.env +++ b/.env @@ -1,5 +1,10 @@ DB_MAX_CONNECTIONS=50 DB_ACQUIRE_TIMEOUT=30 +DATABASE_SCHEMA=dev QDRANT_URL=http://127.0.0.1:6333 QDRANT_API_KEY=Test3200Test3200Test3200 -QDRANT_COLLECTION=momentry_rule1 \ No newline at end of file +QDRANT_COLLECTION=momentry_rule1 +MONGODB_URL=mongodb://localhost:27017 +MONGODB_CACHE_ENABLED=false +MOMENTRY_REDIS_PREFIX=momentry: +REDIS_URL=redis://:accusys@localhost:6379 \ No newline at end of file diff --git a/.env.development b/.env.development index 1709823..65bd0e5 100644 --- a/.env.development +++ b/.env.development @@ -14,25 +14,27 @@ MOMENTRY_MAX_CONCURRENT=1 MOMENTRY_POLL_INTERVAL=10 MOMENTRY_WORKER_BATCH_SIZE=5 -# Database (same as production, but could use separate dev database) +# Database (PostgreSQL) - Schema isolation DATABASE_URL=postgres://accusys@localhost:5432/momentry +DATABASE_SCHEMA=dev -# MongoDB +# MongoDB - Database isolation MONGODB_URL=mongodb://localhost:27017 -MONGODB_DATABASE=momentry +MONGODB_DATABASE=momentry_dev -# Redis +# Redis (already isolated via prefix) REDIS_URL=redis://:accusys@localhost:6379 REDIS_PASSWORD=accusys -# Qdrant Vector Database (same as production) +# Qdrant Vector Database - Collection isolation QDRANT_URL=http://localhost:6333 QDRANT_API_KEY=Test3200Test3200Test3200 -QDRANT_COLLECTION=momentry_rule1 +QDRANT_COLLECTION=momentry_dev_rule1 # Paths MOMENTRY_OUTPUT_DIR=/Users/accusys/momentry/output_dev MOMENTRY_BACKUP_DIR=/Users/accusys/momentry/backup/momentry_dev +MOMENTRY_SFTP_ROOT=/Users/accusys/momentry/var/sftpgo/data/demo/ # Python (for processing scripts) MOMENTRY_PYTHON_PATH=/opt/homebrew/bin/python3.11 @@ -57,4 +59,12 @@ MONGODB_CACHE_TTL_SEARCH=300 MONGODB_CACHE_TTL_HYBRID_SEARCH=600 MONGODB_CACHE_TTL_VIDEO_META=3600 REDIS_CACHE_TTL_HEALTH=30 -REDIS_CACHE_TTL_VIDEO_META=3600 \ No newline at end of file +REDIS_CACHE_TTL_VIDEO_META=3600 +# 同義詞配置文件(可選) +# 取消註釋並設置為您的同義詞JSON檔案路徑以啟用同義詞擴展 +# MOMENTRY_SYNONYM_FILE=/Users/accusys/momentry_core_0.1/docs/examples/custom_synonyms.json +# +# 多個同義詞檔案(逗號分隔),會覆蓋 MOMENTRY_SYNONYM_FILE +# MOMENTRY_SYNONYM_FILES=/path/to/first.json,/path/to/second.json +# +# 示例檔案:docs/examples/custom_synonyms.json \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md index fb61024..c2886a9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -182,6 +182,15 @@ src/ ### Server - `MOMENTRY_SERVER_PORT` - API server port (default: `3002` for production, `3003` for playground) - `MOMENTRY_REDIS_PREFIX` - Redis key prefix (default: `momentry:` for production, `momentry_dev:` for playground) +- `MOMENTRY_API_KEY` - API key for Player online mode testing + +### Testing API Key +```bash +export MOMENTRY_API_KEY="muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" + +# Test Player online mode +cargo run --features player --bin momentry_player -- -o +``` ### Database - `DATABASE_URL` - PostgreSQL (default: `postgres://accusys@localhost:5432/momentry`) @@ -201,6 +210,10 @@ src/ - `MOMENTRY_CUT_TIMEOUT` - CUT timeout in seconds (default: 3600) - `MOMENTRY_DEFAULT_TIMEOUT` - Default timeout (default: 7200) +### Synonym Expansion +- `MOMENTRY_SYNONYM_FILES` - Comma-separated paths to synonym JSON files (e.g., `data/english_synonyms.json,data/llm_synonyms.json`) +- `MOMENTRY_SYNONYM_FILE` - Single synonym JSON file path (deprecated, use above) + ### Logging - `RUST_LOG` or `MOMENTRY_LOG_LEVEL` - Log level (default: `info`) @@ -213,6 +226,23 @@ src/ - PythonExecutor provides unified script execution with timeout support - Redis 1.0.x for improved performance +### LLM Synonym Generation + +Generate synonym database using llama.cpp (Gemma4): + +```bash +# Generate full database (162 entries, ~5 minutes) +python3 scripts/generate_synonyms_llamacpp.py + +# Quick test +python3 scripts/generate_synonyms_llamacpp.py --test + +# Resume from existing file +python3 scripts/generate_synonyms_llamacpp.py --resume + +# Output: data/llm_synonyms.json (27 Chinese + 135 English words) +``` + ## Task Management ### 使用 todowrite 追蹤任務 diff --git a/Cargo.lock b/Cargo.lock index a59a813..a660591 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -86,21 +86,6 @@ dependencies = [ "libc", ] -[[package]] -name = "anstream" -version = "0.6.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" -dependencies = [ - "anstyle", - "anstyle-parse 0.2.7", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "is_terminal_polyfill", - "utf8parse", -] - [[package]] name = "anstream" version = "1.0.0" @@ -108,7 +93,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" dependencies = [ "anstyle", - "anstyle-parse 1.0.0", + "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", @@ -122,15 +107,6 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" -[[package]] -name = "anstyle-parse" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" -dependencies = [ - "utf8parse", -] - [[package]] name = "anstyle-parse" version = "1.0.0" @@ -177,9 +153,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.8.2" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" +checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6" dependencies = [ "rustversion", ] @@ -196,7 +172,7 @@ version = "3.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" dependencies = [ - "event-listener", + "event-listener 5.4.1", "event-listener-strategy", "pin-project-lite", ] @@ -560,7 +536,7 @@ version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ - "anstream 1.0.0", + "anstream", "anstyle", "clap_lex", "strsim 0.11.1", @@ -1054,9 +1030,9 @@ dependencies = [ [[package]] name = "env_filter" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a1c3cc8e57274ec99de65301228b537f1e4eedc1b8e0f9411c6caac8ae7308f" +checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef" dependencies = [ "log", "regex", @@ -1064,11 +1040,11 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.9" +version = "0.11.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d" +checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a" dependencies = [ - "anstream 0.6.21", + "anstream", "anstyle", "env_filter", "jiff", @@ -1102,6 +1078,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + [[package]] name = "event-listener" version = "5.4.1" @@ -1119,7 +1101,7 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" dependencies = [ - "event-listener", + "event-listener 5.4.1", "pin-project-lite", ] @@ -1445,6 +1427,16 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -1467,6 +1459,15 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "hashlink" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown 0.14.5", +] + [[package]] name = "hashlink" version = "0.10.0" @@ -1481,6 +1482,9 @@ name = "heck" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +dependencies = [ + "unicode-segmentation", +] [[package]] name = "heck" @@ -1926,14 +1930,15 @@ dependencies = [ [[package]] name = "ipconfig" -version = "0.3.2" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" +checksum = "4d40460c0ce33d6ce4b0630ad68ff63d6661961c48b6dba35e5a4d81cfb48222" dependencies = [ - "socket2 0.5.10", + "socket2 0.6.3", "widestring", - "windows-sys 0.48.0", - "winreg", + "windows-registry", + "windows-result", + "windows-sys 0.61.2", ] [[package]] @@ -1944,9 +1949,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.10" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" dependencies = [ "memchr", "serde", @@ -2122,9 +2127,9 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libredox" -version = "0.1.14" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" +checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08" dependencies = [ "bitflags 2.11.0", "libc", @@ -2256,6 +2261,12 @@ dependencies = [ "unicase", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -2292,16 +2303,16 @@ dependencies = [ [[package]] name = "moka" -version = "0.12.14" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85f8024e1c8e71c778968af91d43700ce1d11b219d127d79fb2934153b82b42b" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" dependencies = [ "async-lock", "crossbeam-channel", "crossbeam-epoch", "crossbeam-utils", "equivalent", - "event-listener", + "event-listener 5.4.1", "futures-util", "parking_lot", "portable-atomic", @@ -2336,18 +2347,21 @@ dependencies = [ "mongodb", "notify", "once_cell", + "pgvector", "qdrant-client", "ratatui", "redis", "reqwest", + "sdl2", "serde", "serde_json", "sha2", - "sqlx", + "sqlx 0.8.6", "subtle", "thiserror 1.0.69", "tokio", "tower 0.4.13", + "tower-http 0.5.2", "tracing", "tracing-subscriber", "utoipa", @@ -2436,6 +2450,16 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "notify" version = "6.1.1" @@ -2492,9 +2516,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" [[package]] name = "num-integer" @@ -2669,6 +2693,15 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "pgvector" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed92bf218dbe236609222dca0345767408ee7d5c93876c7fe09fa9b03f7249f" +dependencies = [ + "sqlx 0.7.4", +] + [[package]] name = "phf" version = "0.13.1" @@ -3130,12 +3163,13 @@ dependencies = [ [[package]] name = "redis" -version = "1.0.5" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b36964393906eb775b89b25b05b7b95685b8dd14062f1663a31ff93e75c452e5" +checksum = "d76e41a79ae5cbb41257d84cf4cf0db0bb5a95b11bf05c62c351de4fe748620d" dependencies = [ "arc-swap", "arcstr", + "async-lock", "backon", "bytes", "cfg-if", @@ -3240,7 +3274,7 @@ dependencies = [ "tokio-rustls 0.26.4", "tokio-util", "tower 0.5.3", - "tower-http", + "tower-http 0.6.8", "tower-service", "url", "wasm-bindgen", @@ -3332,9 +3366,9 @@ dependencies = [ [[package]] name = "rustc-hash" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" [[package]] name = "rustc_version" @@ -3412,7 +3446,7 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.9", + "rustls-webpki 0.103.10", "subtle", "zeroize", ] @@ -3469,9 +3503,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.9" +version = "0.103.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" dependencies = [ "ring", "rustls-pki-types", @@ -3524,6 +3558,29 @@ dependencies = [ "untrusted", ] +[[package]] +name = "sdl2" +version = "0.35.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7959277b623f1fb9e04aea73686c3ca52f01b2145f8ea16f4ff30d8b7623b1a" +dependencies = [ + "bitflags 1.3.2", + "lazy_static", + "libc", + "sdl2-sys", +] + +[[package]] +name = "sdl2-sys" +version = "0.35.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3586be2cf6c0a8099a79a12b4084357aa9b3e0b0d7980e3b67aaf7a9d55f9f0" +dependencies = [ + "cfg-if", + "libc", + "version-compare", +] + [[package]] name = "security-framework" version = "3.7.0" @@ -3773,9 +3830,9 @@ dependencies = [ [[package]] name = "simd-adler32" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" [[package]] name = "siphasher" @@ -3847,19 +3904,77 @@ dependencies = [ "der", ] +[[package]] +name = "sqlformat" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bba3a93db0cc4f7bdece8bb09e77e2e785c20bfebf79eb8340ed80708048790" +dependencies = [ + "nom", + "unicode_categories", +] + +[[package]] +name = "sqlx" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9a2ccff1a000a5a59cd33da541d9f2fdcd9e6e8229cc200565942bff36d0aaa" +dependencies = [ + "sqlx-core 0.7.4", + "sqlx-macros 0.7.4", + "sqlx-postgres 0.7.4", +] + [[package]] name = "sqlx" version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" dependencies = [ - "sqlx-core", - "sqlx-macros", + "sqlx-core 0.8.6", + "sqlx-macros 0.8.6", "sqlx-mysql", - "sqlx-postgres", + "sqlx-postgres 0.8.6", "sqlx-sqlite", ] +[[package]] +name = "sqlx-core" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24ba59a9342a3d9bab6c56c118be528b27c9b60e490080e9711a04dccac83ef6" +dependencies = [ + "ahash", + "atoi", + "byteorder", + "bytes", + "crc", + "crossbeam-queue", + "either", + "event-listener 2.5.3", + "futures-channel", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashlink 0.8.4", + "hex", + "indexmap 2.13.0", + "log", + "memchr", + "once_cell", + "paste", + "percent-encoding", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlformat", + "thiserror 1.0.69", + "tracing", + "url", +] + [[package]] name = "sqlx-core" version = "0.8.6" @@ -3872,13 +3987,13 @@ dependencies = [ "crc", "crossbeam-queue", "either", - "event-listener", + "event-listener 5.4.1", "futures-core", "futures-intrusive", "futures-io", "futures-util", "hashbrown 0.15.5", - "hashlink", + "hashlink 0.10.0", "indexmap 2.13.0", "log", "memchr", @@ -3893,6 +4008,20 @@ dependencies = [ "tokio-stream", "tracing", "url", + "uuid", +] + +[[package]] +name = "sqlx-macros" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ea40e2345eb2faa9e1e5e326db8c34711317d2b5e08d0d5741619048a803127" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core 0.7.4", + "sqlx-macros-core 0.7.4", + "syn 1.0.109", ] [[package]] @@ -3903,11 +4032,34 @@ checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" dependencies = [ "proc-macro2", "quote", - "sqlx-core", - "sqlx-macros-core", + "sqlx-core 0.8.6", + "sqlx-macros-core 0.8.6", "syn 2.0.117", ] +[[package]] +name = "sqlx-macros-core" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5833ef53aaa16d860e92123292f1f6a3d53c34ba8b1969f152ef1a7bb803f3c8" +dependencies = [ + "dotenvy", + "either", + "heck 0.4.1", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core 0.7.4", + "sqlx-postgres 0.7.4", + "syn 1.0.109", + "tempfile", + "url", +] + [[package]] name = "sqlx-macros-core" version = "0.8.6" @@ -3924,9 +4076,9 @@ dependencies = [ "serde", "serde_json", "sha2", - "sqlx-core", + "sqlx-core 0.8.6", "sqlx-mysql", - "sqlx-postgres", + "sqlx-postgres 0.8.6", "sqlx-sqlite", "syn 2.0.117", "tokio", @@ -3969,10 +4121,49 @@ dependencies = [ "sha1", "sha2", "smallvec", - "sqlx-core", + "sqlx-core 0.8.6", "stringprep", "thiserror 2.0.18", "tracing", + "uuid", + "whoami", +] + +[[package]] +name = "sqlx-postgres" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c824eb80b894f926f89a0b9da0c7f435d27cdd35b8c655b114e58223918577e" +dependencies = [ + "atoi", + "base64 0.21.7", + "bitflags 2.11.0", + "byteorder", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand 0.8.5", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlx-core 0.7.4", + "stringprep", + "thiserror 1.0.69", + "tracing", "whoami", ] @@ -4007,10 +4198,11 @@ dependencies = [ "serde_json", "sha2", "smallvec", - "sqlx-core", + "sqlx-core 0.8.6", "stringprep", "thiserror 2.0.18", "tracing", + "uuid", "whoami", ] @@ -4033,10 +4225,11 @@ dependencies = [ "percent-encoding", "serde", "serde_urlencoded", - "sqlx-core", + "sqlx-core 0.8.6", "thiserror 2.0.18", "tracing", "url", + "uuid", ] [[package]] @@ -4410,32 +4603,32 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "1.0.1+spec-1.1.0" +version = "1.1.0+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b320e741db58cac564e26c607d3cc1fdc4a88fd36c879568c07856ed83ff3e9" +checksum = "97251a7c317e03ad83774a8752a7e81fb6067740609f75ea2b585b569a59198f" dependencies = [ "serde_core", ] [[package]] name = "toml_edit" -version = "0.25.5+spec-1.1.0" +version = "0.25.8+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ca1a40644a28bce036923f6a431df0b34236949d111cc07cb6dca830c9ef2e1" +checksum = "16bff38f1d86c47f9ff0647e6838d7bb362522bdf44006c7068c2b1e606f1f3c" dependencies = [ "indexmap 2.13.0", - "toml_datetime 1.0.1+spec-1.1.0", + "toml_datetime 1.1.0+spec-1.1.0", "toml_parser", - "winnow 1.0.0", + "winnow 1.0.1", ] [[package]] name = "toml_parser" -version = "1.0.10+spec-1.1.0" +version = "1.1.0+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7df25b4befd31c4816df190124375d5a20c6b6921e2cad937316de3fccd63420" +checksum = "2334f11ee363607eb04df9b8fc8a13ca1715a72ba8662a26ac285c98aabb4011" dependencies = [ - "winnow 1.0.0", + "winnow 1.0.1", ] [[package]] @@ -4514,6 +4707,22 @@ dependencies = [ "tracing", ] +[[package]] +name = "tower-http" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" +dependencies = [ + "bitflags 2.11.0", + "bytes", + "http", + "http-body", + "http-body-util", + "pin-project-lite", + "tower-layer", + "tower-service", +] + [[package]] name = "tower-http" version = "0.6.8" @@ -4705,9 +4914,9 @@ checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" [[package]] name = "unicode-truncate" @@ -4732,6 +4941,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "universal-hash" version = "0.5.1" @@ -4824,9 +5039,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.22.0" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" +checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -4846,6 +5061,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "version-compare" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "579a42fc0b8e0c63b76519a339be31bed574929511fa53c1a3acae26eb258f29" + [[package]] name = "version_check" version = "0.9.5" @@ -5404,23 +5625,13 @@ checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" [[package]] name = "winnow" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a90e88e4667264a994d34e6d1ab2d26d398dcdca8b7f52bec8668957517fc7d8" +checksum = "09dac053f1cd375980747450bfc7250c264eaae0583872e845c0c7cd578872b5" dependencies = [ "memchr", ] -[[package]] -name = "winreg" -version = "0.50.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - [[package]] name = "wit-bindgen" version = "0.51.0" @@ -5555,18 +5766,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.42" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.42" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index dd6b25c..21c4b56 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,15 +47,17 @@ moka = { version = "0.12", features = ["future"] } # Database redis = { version = "1.0", features = ["tokio-comp", "connection-manager"] } -sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "json", "chrono"] } +sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "json", "chrono", "uuid"] } mongodb = { version = "2", features = ["tokio-runtime"] } bson = { version = "2", features = ["chrono-0_4"] } qdrant-client = "1.7" reqwest = { version = "0.12", features = ["json"] } +pgvector = { version = "0.3", features = ["sqlx"] } # HTTP Server axum = { version = "0.7", features = ["multipart"] } tower = "0.4" +tower-http = { version = "0.5", features = ["cors"] } # API Documentation utoipa = { version = "4", features = ["axum_extras", "chrono", "uuid"] } @@ -85,7 +87,11 @@ path = "src/lib.rs" [features] default = [] -player = [] +player = ["sdl2"] + +[dependencies.sdl2] +version = "0.35" +optional = true [[bin]] name = "momentry" @@ -111,5 +117,9 @@ path = "src/bin/migrate_chinese_text.rs" name = "test_bm25_simple" path = "src/bin/test_bm25_simple.rs" +[[bin]] +name = "integrated_player" +path = "src/bin/integrated_player.rs" + [build-dependencies] chrono = "0.4" diff --git a/docs/API_QUICK_REFERENCE.md b/docs/API_QUICK_REFERENCE.md index 7b2998d..84f34c0 100644 --- a/docs/API_QUICK_REFERENCE.md +++ b/docs/API_QUICK_REFERENCE.md @@ -300,8 +300,8 @@ curl -X POST http://localhost:3002/api/v1/n8n/search \ { "id": "sentence_0001", "vid": "a1b10138a6bbb0cd", - "start": 10.5, - "end": 15.2, + "start_time": 10.5, + "end_time": 15.2, "title": "Chunk sentence_0001", "text": "Found text matching query", "score": 0.85, diff --git a/docs/API_TRAINING_MARCOM.md b/docs/API_TRAINING_MARCOM.md index 6b97b3b..8c8ae8d 100644 --- a/docs/API_TRAINING_MARCOM.md +++ b/docs/API_TRAINING_MARCOM.md @@ -20,7 +20,7 @@ #### API Key(用於 API 認證) ``` -X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69 +X-API-Key: muser_68600856036340bcafc01930eb4bd839 ``` #### SFTPGo(用於影片上傳) @@ -160,12 +160,14 @@ n8n 專用搜尋(包含完整影片檔案路徑 file_path) { "id": "sentence_1471", "vid": "39567a0eb16f39fd", - "start": 5309.08, - "end": 5311.08, - "title": "Chunk sentence_1471", + "chunk_type": "sentence", + "start_frame": 318545, + "end_frame": 318665, + "fps": 59.94, + "start_time": 5314.31, + "end_time": 5316.32, "text": "influenced by a vital way,", - "score": 0.68, - "file_path": "/Users/accusys/momentry/var/sftpgo/data/demo/video.mp4" + "score": 0.68 } ] } @@ -176,8 +178,8 @@ n8n 專用搜尋(包含完整影片檔案路徑 file_path) |------|-----------|----------------| | 影片 UUID | `uuid` | `vid` | | Chunk ID | `chunk_id` | `id` | -| 開始時間 | `start_time` | `start` | -| 結束時間 | `end_time` | `end` | +| 開始時間 | `start_time` | `start_time` | +| 結束時間 | `end_time` | `end_time` | | 相似度分數 | `score` | `score` | | **檔案路徑** | ❌ | ✅ `file_path` | @@ -386,3 +388,4 @@ GET /api/v1/jobs/{uuid} | V1.2 | 2026-03-25 | 新增 Chunk 欄位說明、類型、播放方式 | OpenCode | | V1.3 | 2026-03-25 | 新增 Demo 測試帳號(SFTPGo)| OpenCode | | V1.4 | 2026-03-25 | 更新 n8n 搜尋回傳欄位說明 (media_url→file_path) | OpenCode | +| V1.5 | 2026-04-17 | 修正 API Key 格式、統一 n8n/search 欄位名稱 (start/end → start_time/end_time) | OpenCode | diff --git a/docs_v1.0/IMPLEMENTATION/SCENE_API_INTEGRATION.md b/docs_v1.0/IMPLEMENTATION/SCENE_API_INTEGRATION.md deleted file mode 100644 index b4893bb..0000000 --- a/docs_v1.0/IMPLEMENTATION/SCENE_API_INTEGRATION.md +++ /dev/null @@ -1,141 +0,0 @@ -# 場景識別 API 整合指南 - -## 概述 - -本文檔說明如何在 Playground (port 3003) 中使用場景識別功能。 - -## API Endpoint - -### 場景識別 - -**Endpoint**: `GET /api/v1/scene/:uuid` - -**描述**: 對指定影片執行場景識別 - -**參數**: -- `uuid` (path): 影片 UUID - -**回應格式**: -```json -{ - "video_uuid": "384b0ff44aaaa1f1", - "scenes": [ - { - "start_time": 0.0, - "end_time": 156.0, - "scene_type": "office", - "scene_type_zh": "辦公室", - "confidence": 0.87, - "duration": 156.0 - } - ], - "processing_time": 1.3 -} -``` - -## 使用方式 - -### 1. 啟動 Playground 伺服器 - -```bash -# 使用 port 3003 -cargo run --bin momentry_playground -- server --host 0.0.0.0 --port 3003 -``` - -### 2. 測試場景識別 - -```bash -# 使用測試腳本 -python3 scripts/test_scene_api.py - -# 範例 -python3 scripts/test_scene_api.py 384b0ff44aaaa1f1 -``` - -### 3. 直接使用 curl - -```bash -curl -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \ - "http://localhost:3003/api/v1/scene/384b0ff44aaaa1f1" -``` - -## Python 整合範例 - -```python -import requests - -API_KEY = "muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" -BASE_URL = "http://localhost:3003" - -def classify_scene(video_uuid): - """執行場景識別""" - response = requests.get( - f"{BASE_URL}/api/v1/scene/{video_uuid}", - headers={"X-API-Key": API_KEY} - ) - - if response.status_code == 200: - return response.json() - else: - raise Exception(f"API error: {response.status_code}") - -# 使用範例 -result = classify_scene("384b0ff44aaaa1f1") -print(f"場景數量:{len(result['scenes'])}") -for scene in result['scenes']: - print(f" - {scene['scene_type']} ({scene['confidence']*100:.1f}%)") -``` - -## 目前狀態 - -### 已完成 ✅ -- ✅ 場景識別 Python 腳本 (`scripts/scene_classifier.py`) -- ✅ Places365 380 個場景類別 -- ✅ API 測試腳本 (`scripts/test_scene_api.py`) -- ✅ Rust API handler 設計 - -### 進行中 ⏳ -- ⏳ Rust API endpoint 完整實作 -- ⏳ 與資料庫整合 -- ⏳ 錯誤處理優化 - -### 已知限制 -- Rust API endpoint 需要完整實作以支援資料庫查詢 -- 目前建議使用 Python 腳本直接測試 - -## 故障排除 - -### 問題:API 回應 404 - -**可能原因**: -- 影片 UUID 不存在 -- Playground 伺服器未啟動 - -**解決方案**: -```bash -# 檢查伺服器狀態 -curl http://localhost:3003/health - -# 檢查影片是否存在 -curl -H "X-API-Key: ..." "http://localhost:3003/api/v1/videos" -``` - -### 問題:處理時間過長 - -**建議**: -- 減少取樣頻率 (`--sample-interval`) -- 增加最小場景持續時間 (`--min-scene-duration`) -- 使用 Places365 Core ML 模型(而非 PyTorch) - -## 相關文檔 - -- `docs_v1.0/IMPLEMENTATION/SCENE_CLASSIFICATION_MODULE.md` - 模組使用手冊 -- `docs_v1.0/IMPLEMENTATION/PLACES365_INSTALLATION.md` - 模型安裝指南 -- `docs_v1.0/TESTING/SCENE_CLASSIFICATION_TEST_REPORT_2026_04_01.md` - 測試報告 - -## 下一步 - -1. 完成 Rust API endpoint 實作 -2. 整合資料庫查詢 -3. 添加異步處理支援 -4. 優化效能和記憶體使用 diff --git a/momentry_runtime/plist/com.momentry.n8n.main.plist b/momentry_runtime/plist/com.momentry.n8n.main.plist index 843a7d2..81c389c 100644 --- a/momentry_runtime/plist/com.momentry.n8n.main.plist +++ b/momentry_runtime/plist/com.momentry.n8n.main.plist @@ -13,8 +13,7 @@ ProgramArguments - /opt/homebrew/opt/node@22/bin/node - /opt/homebrew/lib/node_modules/n8n/bin/n8n + /Users/accusys/momentry/scripts/start_n8n.sh start diff --git a/momentry_runtime/plist/com.momentry.n8n.worker.plist b/momentry_runtime/plist/com.momentry.n8n.worker.plist index dabb826..91d8c91 100644 --- a/momentry_runtime/plist/com.momentry.n8n.worker.plist +++ b/momentry_runtime/plist/com.momentry.n8n.worker.plist @@ -16,8 +16,7 @@ ProgramArguments - /opt/homebrew/opt/node@22/bin/node - /opt/homebrew/lib/node_modules/n8n/bin/n8n + /Users/accusys/momentry/scripts/start_n8n.sh worker diff --git a/scripts/asr_processor.py b/scripts/asr_processor.py index 46fb532..71aa6ed 100755 --- a/scripts/asr_processor.py +++ b/scripts/asr_processor.py @@ -65,12 +65,20 @@ def run_asr(video_path, output_path, uuid: str = ""): if publisher: publisher.info("asr", "Loading Whisper model...") - model = WhisperModel("tiny", device="cpu", compute_type="int8") + # Use small model with CPU (MPS not supported by faster_whisper) + # small 模型在準確率和速度間取得最佳平衡 + model = WhisperModel("small", device="cpu", compute_type="int8") if publisher: publisher.info("asr", f"Transcribing: {video_path}") - segments, info = model.transcribe(video_path, beam_size=5) + # Transcribe with VAD filter for better accuracy + segments, info = model.transcribe( + video_path, + beam_size=5, + vad_filter=True, + vad_parameters=dict(min_silence_duration_ms=500, speech_pad_ms=200), + ) if publisher: publisher.info("asr", f"ASR_LANGUAGE:{info.language}") diff --git a/scripts/asrx_processor.py b/scripts/asrx_processor.py index 2ef6201..aaa2bc7 100755 --- a/scripts/asrx_processor.py +++ b/scripts/asrx_processor.py @@ -22,6 +22,7 @@ def process_asrx(video_path: str, output_path: str, uuid: str = ""): try: import whisperx + import torch except ImportError: if publisher: publisher.error("asrx", "whisperx not installed") @@ -36,6 +37,14 @@ def process_asrx(video_path: str, output_path: str, uuid: str = ""): publisher.info("asrx", "ASRX_LOADING_MODEL") try: + # Fix for PyTorch 2.6+ compatibility + # Allow omegaconf types in torch.load + import omegaconf + + torch.serialization.add_safe_globals( + [omegaconf.listconfig.ListConfig, omegaconf.dictconfig.DictConfig] + ) + # Load model - using faster-whisper for better performance # You can also use: "large-v3", "medium", "small", "base", "tiny" model = whisperx.load_model("base", device="cpu", compute_type="int8") @@ -54,9 +63,14 @@ def process_asrx(video_path: str, output_path: str, uuid: str = ""): # Diarization (speaker segmentation) try: - import whisperx + from whisperx.diarize import DiarizationPipeline - diarize_model = whisperx.DiarizationPipeline(use_auth_token=None) + # DiarizationPipeline parameters: model_name, token, device, cache_dir + diarize_model = DiarizationPipeline( + model_name="pyannote/speaker-diarization", + token=None, # HuggingFace token (None for public models) + device="cpu", + ) diarize_segments = diarize_model(video_path) # Assign speaker labels diff --git a/scripts/caption_processor.py b/scripts/caption_processor.py index 11d6223..be23d8c 100755 --- a/scripts/caption_processor.py +++ b/scripts/caption_processor.py @@ -1,7 +1,8 @@ #!/opt/homebrew/bin/python3.11 """ -Caption Processor - Generate image captions -Uses AI vision models to analyze video frames and generate descriptions +Caption Processor - Generate image captions (LOCAL ONLY) +Uses Moondream2 (local VLM) for image captioning +No cloud API calls - fully offline processing """ import sys @@ -18,7 +19,6 @@ from redis_publisher import RedisPublisher def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]: """Extract frames from video at regular intervals""" - # Get video duration cmd = [ "ffprobe", "-v", @@ -34,14 +34,13 @@ def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]: data = json.loads(result.stdout) duration = float(data.get("format", {}).get("duration", 0)) else: - duration = 60 # Default fallback + duration = 60 except Exception: duration = 60 if duration <= 0: duration = 60 - # Calculate frame interval interval = max(duration / max_frames, 1.0) frames = [] @@ -76,94 +75,73 @@ def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]: return frames -def generate_caption_with_llava( +def generate_caption_with_moondream( image_path: str, prompt: str = "Describe this image in detail." ) -> Optional[str]: - """Generate caption using LLaVA model""" + """Generate caption using Moondream2 (local VLM)""" try: - # Try to use transformers with LLaVA - from transformers import AutoProcessor, AutoModelForVision2Seq # noqa: F401 - import torch # noqa: F401 - from PIL import Image # noqa: F401 + from transformers import AutoModelForCausalLM, AutoTokenizer + from PIL import Image + import torch - # Note: This requires llava-hf/llava-1.5-7b-hf or similar - # For now, return a placeholder - return f"[LLaVA caption for {os.path.basename(image_path)}]" + model_id = "vikhyatk/moondream2" + revision = "2025-01-09" + + tokenizer = AutoTokenizer.from_pretrained( + model_id, revision=revision, trust_remote_code=True + ) + moondream = AutoModelForCausalLM.from_pretrained( + model_id, + revision=revision, + trust_remote_code=True, + torch_dtype=torch.float16, + ).to("mps" if torch.backends.mps.is_available() else "cpu") + + moondream.eval() + + image = Image.open(image_path) + enc_image = moondream.encode_image(image) + caption = moondream.answer_question(enc_image, prompt, tokenizer) + + return caption if caption else None except ImportError: return None - - -def generate_caption_with_gpt4v(image_path: str, api_key: str = None) -> Optional[str]: - """Generate caption using GPT-4V via OpenAI API""" - import base64 - - if not api_key: - api_key = os.environ.get("OPENAI_API_KEY") - - if not api_key: - return None - - try: - from openai import OpenAI - - client = OpenAI(api_key=api_key) - - # Encode image - with open(image_path, "rb") as f: - img_data = base64.b64encode(f.read()).decode() - - response = client.chat.completions.create( - model="gpt-4o", # or gpt-4-turbo for vision - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{img_data}"}, - }, - { - "type": "text", - "text": "Describe what you see in this image in one sentence.", - }, - ], - } - ], - max_tokens=100, - ) - - return response.choices[0].message.content - except Exception: + except Exception as e: + print(f"[CAPTION] Moondream error: {e}") return None -def generate_caption_fallback(image_path: str, existing_data: Dict = None) -> str: - """Generate a basic caption using available metadata""" +def generate_caption_from_metadata(image_path: str, existing_data: Dict = None) -> str: + """Generate caption using YOLO/OCR metadata (fallback)""" caption_parts = [] - # Check YOLO data for objects if existing_data and existing_data.get("objects"): objects = list(set([o["class"] for o in existing_data["objects"]]))[:5] if objects: - caption_parts.append(f"Contains: {', '.join(objects)}") + caption_parts.append(f"Objects: {', '.join(objects)}") - # Check OCR data for text if existing_data and existing_data.get("texts"): texts = [t["text"] for t in existing_data["texts"] if t.get("text")] if texts: - caption_parts.append(f"On-screen text: {' '.join(texts[:3])}") + caption_parts.append(f"Text: {' '.join(texts[:3])}") + + if existing_data and existing_data.get("scene_type"): + caption_parts.append(f"Scene: {existing_data['scene_type']}") if caption_parts: return " | ".join(caption_parts) - return "Video frame at timestamp" + return "Video frame" def process_frame( - frame_info: Dict, yolo_data: List = None, ocr_data: List = None + frame_info: Dict, + yolo_data: List = None, + ocr_data: List = None, + scene_data: Dict = None, ) -> Dict: - """Process a single frame and generate caption""" + """Process a single frame and generate caption (LOCAL ONLY)""" frame_path = frame_info["path"] timestamp = frame_info["timestamp"] @@ -171,28 +149,34 @@ def process_frame( caption = None source = "unknown" - # Try GPT-4V first - caption = generate_caption_with_gpt4v(frame_path) + # Try Moondream2 (local VLM) + caption = generate_caption_with_moondream(frame_path) if caption: - source = "gpt-4v" + source = "moondream2" else: - # Try LLaVA - caption = generate_caption_with_llava(frame_path) - if caption: - source = "llava" - else: - # Use fallback with YOLO/OCR data - combined_data = {"objects": [], "texts": []} - if yolo_data: - combined_data["objects"] = [ - o for o in yolo_data if o.get("timestamp") == timestamp - ] - if ocr_data: - combined_data["texts"] = [ - t for t in ocr_data if t.get("timestamp") == timestamp - ] - caption = generate_caption_fallback(frame_path, combined_data) - source = "metadata" + # Fallback: Use metadata from YOLO/OCR/Scene + combined_data = {"objects": [], "texts": [], "scene_type": ""} + + if yolo_data: + combined_data["objects"] = [ + o for o in yolo_data if o.get("timestamp") == timestamp + ] + + if ocr_data: + combined_data["texts"] = [ + t for t in ocr_data if t.get("timestamp") == timestamp + ] + + if scene_data: + for scene in scene_data.get("scenes", []): + if scene.get("start_time", 0) <= timestamp <= scene.get("end_time", 0): + combined_data["scene_type"] = scene.get( + "scene_type_zh" + ) or scene.get("scene_type", "") + break + + caption = generate_caption_from_metadata(frame_path, combined_data) + source = "metadata" return { "index": frame_info["index"], @@ -212,24 +196,22 @@ def run_caption( if publisher: publisher.info("caption", "Extracting frames from video...") - # Extract frames frames = extract_frames(video_path, max_frames) if publisher: publisher.info("caption", f"Extracted {len(frames)} frames") - # Load YOLO and OCR data for context base_path = os.path.dirname(output_path) uuid_name = os.path.basename(output_path).split(".")[0] yolo_objects = [] ocr_texts = [] + scene_info = {} yolo_path = os.path.join(base_path, f"{uuid_name}.yolo.json") if os.path.exists(yolo_path): with open(yolo_path) as f: yolo_data = json.load(f) - # Flatten objects from all frames for frame in yolo_data.get("frames", []): for obj in frame.get("objects", []): obj["timestamp"] = frame.get("timestamp", 0) @@ -244,7 +226,11 @@ def run_caption( text["timestamp"] = frame.get("timestamp", 0) ocr_texts.append(text) - # Process each frame + scene_path = os.path.join(base_path, f"{uuid_name}.scene.json") + if os.path.exists(scene_path): + with open(scene_path) as f: + scene_info = json.load(f) + captions = [] for i, frame in enumerate(frames): if publisher and i % 5 == 0: @@ -252,16 +238,14 @@ def run_caption( "caption", i, len(frames), f"Frame {i + 1}/{len(frames)}" ) - caption_data = process_frame(frame, yolo_objects, ocr_texts) + caption_data = process_frame(frame, yolo_objects, ocr_texts, scene_info) captions.append(caption_data) - # Cleanup temp frame try: os.remove(frame["path"]) except Exception: pass - # Cleanup temp directory temp_dir = os.path.join(os.path.dirname(video_path), ".caption_frames") try: os.rmdir(temp_dir) @@ -275,9 +259,11 @@ def run_caption( "summary": { "avg_caption_length": sum(len(c.get("caption", "")) for c in captions) / max(len(captions), 1), - "gpt4v_count": sum(1 for c in captions if c.get("source") == "gpt-4v"), - "llava_count": sum(1 for c in captions if c.get("source") == "llava"), + "moondream_count": sum( + 1 for c in captions if c.get("source") == "moondream2" + ), "metadata_count": sum(1 for c in captions if c.get("source") == "metadata"), + "cloud_api_count": 0, }, } @@ -285,13 +271,13 @@ def run_caption( json.dump(result, f, indent=2, ensure_ascii=False) if publisher: - publisher.complete("caption", f"{len(captions)} frames captioned") + publisher.complete("caption", f"{len(captions)} frames captioned (LOCAL)") return result if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Video Caption Generator") + parser = argparse.ArgumentParser(description="Video Caption Generator (LOCAL ONLY)") parser.add_argument("video_path", help="Path to video file") parser.add_argument("output_path", help="Output JSON path") parser.add_argument("--uuid", help="UUID for progress tracking", default="") @@ -302,4 +288,4 @@ if __name__ == "__main__": args = parser.parse_args() result = run_caption(args.video_path, args.output_path, args.uuid, args.max_frames) - print(f"Caption generated: {result['total_frames']} frames") + print(f"Caption generated: {result['total_frames']} frames (LOCAL)") diff --git a/scripts/face_processor.py b/scripts/face_processor.py index c18a425..9c5c512 100755 --- a/scripts/face_processor.py +++ b/scripts/face_processor.py @@ -1,8 +1,8 @@ #!/opt/homebrew/bin/python3.11 """ -Face Processor - Face Detection -Uses OpenCV Haar Cascade (local, no extra download needed) -Alternative: MediaPipe (requires model download) +Face Processor - Face Detection & Demographics +Uses InsightFace for detection, age, and gender analysis. +Falls back to OpenCV Haar Cascade if InsightFace fails. """ import sys @@ -15,7 +15,7 @@ from redis_publisher import RedisPublisher def process_face(video_path: str, output_path: str, uuid: str = ""): - """Process video for face detection""" + """Process video for face detection and demographics analysis""" publisher = RedisPublisher(uuid) if uuid else None if publisher: @@ -23,56 +23,82 @@ def process_face(video_path: str, output_path: str, uuid: str = ""): try: import cv2 - except ImportError: + import numpy as np + import insightface + except ImportError as e: + error_msg = f"Missing dependency: {e.name}" if publisher: - publisher.error("face", "opencv-python not installed") + publisher.error("face", error_msg) result = {"frame_count": 0, "fps": 0.0, "frames": []} - if publisher: - publisher.complete("face", "0 frames") with open(output_path, "w") as f: json.dump(result, f, indent=2) return result - if publisher: - publisher.info("face", "FACE_LOADING_CASCADE") - - # Try to use OpenCV's built-in Haar Cascade - # This is included with OpenCV - face_cascade = cv2.CascadeClassifier( - cv2.data.haarcascades + "haarcascade_frontalface_default.xml" - ) - - if face_cascade.empty(): + # 1. Initialize InsightFace + use_insightface = False + app = None + try: if publisher: - publisher.error("face", "Could not load Haar Cascade") - result = {"frame_count": 0, "fps": 0.0, "frames": []} + publisher.info("face", "LOADING_INSIGHTFACE") + # 'buffalo_l' is a robust model. det_size can be adjusted. + app = insightface.app.FaceAnalysis( + name="buffalo_l", providers=["CPUExecutionProvider"] + ) + app.prepare(ctx_id=0, det_size=(320, 320)) + use_insightface = True if publisher: - publisher.complete("face", "0 frames") - with open(output_path, "w") as f: - json.dump(result, f, indent=2) - return result + publisher.info("face", "INSIGHTFACE_LOADED") + except Exception as e: + print(f"[WARNING] InsightFace failed to load: {e}") + use_insightface = False + + # 2. Fallback to Haar Cascade + face_cascade = None + if not use_insightface: + if publisher: + publisher.info("face", "LOADING_HAAR_CASCADE") + face_cascade = cv2.CascadeClassifier( + cv2.data.haarcascades + "haarcascade_frontalface_default.xml" + ) + if face_cascade.empty(): + if publisher: + publisher.error("face", "Could not load Haar Cascade") + result = {"frame_count": 0, "fps": 0.0, "frames": []} + with open(output_path, "w") as f: + json.dump(result, f, indent=2) + return result + if publisher: + publisher.info("face", "HAAR_CASCADE_LOADED") if publisher: - publisher.info("face", "FACE_CASCADE_LOADED") + publisher.info("face", "PROCESSING_VIDEO") - # Get video info cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + if publisher: + publisher.error("face", "Could not open video") + result = {"frame_count": 0, "fps": 0.0, "frames": []} + with open(output_path, "w") as f: + json.dump(result, f, indent=2) + return result + fps = cap.get(cv2.CAP_PROP_FPS) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - cap.release() + + # Optimization: Process every N frames to speed up analysis + # Since we just need attributes for the person identity, we don't need every single frame. + sample_interval = 30 + if total_frames > 0: + estimated_samples = total_frames // sample_interval + else: + estimated_samples = 0 + + frame_count = 0 + processed_count = 0 + frames_data = [] if publisher: - publisher.info("face", f"fps={fps}, frames={total_frames}") - publisher.progress("face", 0, total_frames, "Starting") - - # Process every N frames to speed up - sample_interval = 30 # Process every 30 frames - - frames = [] - frame_count = 0 - processed = 0 - - cap = cv2.VideoCapture(video_path) + publisher.progress("face", 0, estimated_samples, "Starting") while True: ret, frame = cap.read() @@ -81,62 +107,92 @@ def process_face(video_path: str, output_path: str, uuid: str = ""): frame_count += 1 - # Sample frames + # Sampling if frame_count % sample_interval != 0: continue - processed += 1 + processed_count += 1 timestamp = (frame_count - 1) / fps if fps > 0 else 0 - # Convert to grayscale - gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - - # Detect faces - try: - faces = face_cascade.detectMultiScale( - gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30) - ) - except Exception as e: - if publisher: - publisher.error("face", f"Frame {frame_count}: {e}") - faces = [] - face_list = [] - for x, y, w, h in faces: - face_list.append( - { - "face_id": None, - "x": int(x), - "y": int(y), - "width": int(w), - "height": int(h), - "confidence": 0.8, # Haar cascade doesn't provide confidence - } - ) - # Only add frames with faces + try: + if use_insightface and app: + # InsightFace Detection & Analysis + faces = app.get(frame) + for face in faces: + bbox = face.bbox.astype(int) + bx, by, bw, bh = ( + bbox[0], + bbox[1], + bbox[2] - bbox[0], + bbox[3] - bbox[1], + ) + + # Extract Attributes + age = int(face.age) if hasattr(face, "age") else None + gender_val = face.gender if hasattr(face, "gender") else None + gender = ( + "female" + if gender_val == 0 + else ("male" if gender_val == 1 else None) + ) + + face_list.append( + { + "x": int(bx), + "y": int(by), + "width": int(bw), + "height": int(bh), + "confidence": float(face.det_score) + if hasattr(face, "det_score") + else 0.9, + "attributes": {"age": age, "gender": gender}, + } + ) + else: + # Haar Cascade Fallback (No Age/Gender) + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + faces = face_cascade.detectMultiScale( + gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30) + ) + for x, y, w, h in faces: + face_list.append( + { + "x": int(x), + "y": int(y), + "width": int(w), + "height": int(h), + "confidence": 0.8, + "attributes": {"age": None, "gender": None}, + } + ) + except Exception as e: + print(f"[ERROR] Frame processing error: {e}") + if face_list: - frames.append( + frames_data.append( { "frame": frame_count - 1, "timestamp": round(timestamp, 3), "faces": face_list, } ) + if publisher: publisher.progress( "face", - processed, - total_frames // sample_interval, + processed_count, + estimated_samples, f"Frame {frame_count}", ) cap.release() - result = {"frame_count": total_frames, "fps": fps, "frames": frames} + result = {"frame_count": total_frames, "fps": fps, "frames": frames_data} if publisher: - publisher.complete("face", f"{len(frames)} frames with faces") + publisher.complete("face", f"{len(frames_data)} frames processed") with open(output_path, "w") as f: json.dump(result, f, indent=2) @@ -145,7 +201,7 @@ def process_face(video_path: str, output_path: str, uuid: str = ""): if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Face Detection") + parser = argparse.ArgumentParser(description="Face Detection & Demographics") parser.add_argument("video_path", help="Path to video file") parser.add_argument("output_path", help="Output JSON path") parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="") diff --git a/scripts/places365_categories.json b/scripts/places365_categories.json index b5b9d2a..2739847 100644 --- a/scripts/places365_categories.json +++ b/scripts/places365_categories.json @@ -1,382 +1,367 @@ { - "0": "airplane_cabin", - "1": "airport_terminal", - "2": "alley", - "3": "amphitheater", - "4": "amusement_park", - "5": "apartment_building_outdoor", - "6": "aquarium", - "7": "arcade", - "8": "arena_hockey", - "9": "arena_performance", - "10": "army_base", - "11": "art_gallery", - "12": "art_studio", - "13": "assembly_line", - "14": "athletic_field_outdoor", - "15": "atrium_public", - "16": "attic", - "17": "auditorium", - "18": "auto_factory", - "19": "backyard", - "20": "badminton_court_indoor", - "21": "baggage_claim", - "22": "bakery_shop", - "23": "balcony_exterior", - "24": "balcony_interior", - "25": "ball_pit", - "26": "ballroom", - "27": "bamboo_forest", - "28": "banquet_hall", - "29": "bar", - "30": "barn", - "31": "barndoor", - "32": "baseball_field", - "33": "basement", - "34": "basilica", - "35": "basketball_court_indoor", - "36": "basketball_court_outdoor", - "37": "bathroom", - "38": "bazaar_indoor", - "39": "bazaar_outdoor", - "40": "beach", - "41": "beauty_salon", - "42": "bedroom", - "43": "berth", - "44": "biology_laboratory", - "45": "boardwalk", - "46": "boat_deck", - "47": "boathouse", - "48": "bookstore", - "49": "booth_indoor", - "50": "botanical_garden", - "51": "bow_window_indoor", - "52": "bow_window_outdoor", - "53": "bowling_alley", - "54": "boxing_ring", - "55": "brewery_indoor", - "56": "bridge", - "57": "building_facade", - "58": "bullring", - "59": "burial_chamber", - "60": "bus_interior", - "61": "bus_station_indoor", - "62": "butchers_shop", - "63": "butte", - "64": "cabin_outdoor", - "65": "cafeteria", - "66": "campsite", - "67": "campus", - "68": "canal_natural", - "69": "canal_urban", - "70": "candy_store", - "71": "canyon", - "72": "car_interior", - "73": "carrousel", - "74": "castle", - "75": "catacomb", - "76": "cathedral_indoor", - "77": "cathedral_outdoor", - "78": "cavern_indoor", - "79": "cemetery", - "80": "chalet", - "81": "cheese_factory", - "82": "chemistry_lab", - "83": "chicken_coop_indoor", - "84": "chicken_coop_outdoor", - "85": "childs_room", - "86": "church_indoor", - "87": "church_outdoor", - "88": "classroom", - "89": "clean_room", - "90": "cliff", - "91": "cloister_indoor", - "92": "closet", - "93": "clothing_store", - "94": "coast", - "95": "cockpit", - "96": "coffee_shop", - "97": "computer_room", - "98": "conference_center", - "99": "conference_room", - "100": "construction_site", - "101": "control_room", - "102": "control_tower_outdoor", - "103": "corn_field", - "104": "corral", - "105": "corridor", - "106": "cottage_garden", - "107": "courthouse", - "108": "courtroom", + "0": "airfield", + "1": "airplane_cabin", + "2": "airport_terminal", + "3": "alcove", + "4": "alley", + "5": "amphitheater", + "6": "amusement_arcade", + "7": "amusement_park", + "8": "outdoor", + "9": "aquarium", + "10": "aqueduct", + "11": "arcade", + "12": "arch", + "13": "archaelogical_excavation", + "14": "archive", + "15": "hockey", + "16": "performance", + "17": "rodeo", + "18": "army_base", + "19": "art_gallery", + "20": "art_school", + "21": "art_studio", + "22": "artists_loft", + "23": "assembly_line", + "24": "outdoor", + "25": "public", + "26": "attic", + "27": "auditorium", + "28": "auto_factory", + "29": "auto_showroom", + "30": "badlands", + "31": "shop", + "32": "exterior", + "33": "interior", + "34": "ball_pit", + "35": "ballroom", + "36": "bamboo_forest", + "37": "bank_vault", + "38": "banquet_hall", + "39": "bar", + "40": "barn", + "41": "barndoor", + "42": "baseball_field", + "43": "basement", + "44": "indoor", + "45": "bathroom", + "46": "indoor", + "47": "outdoor", + "48": "beach", + "49": "beach_house", + "50": "beauty_salon", + "51": "bedchamber", + "52": "bedroom", + "53": "beer_garden", + "54": "beer_hall", + "55": "berth", + "56": "biology_laboratory", + "57": "boardwalk", + "58": "boat_deck", + "59": "boathouse", + "60": "bookstore", + "61": "indoor", + "62": "botanical_garden", + "63": "indoor", + "64": "bowling_alley", + "65": "boxing_ring", + "66": "bridge", + "67": "building_facade", + "68": "bullring", + "69": "burial_chamber", + "70": "bus_interior", + "71": "indoor", + "72": "butchers_shop", + "73": "butte", + "74": "outdoor", + "75": "cafeteria", + "76": "campsite", + "77": "campus", + "78": "natural", + "79": "urban", + "80": "candy_store", + "81": "canyon", + "82": "car_interior", + "83": "carrousel", + "84": "castle", + "85": "catacomb", + "86": "cemetery", + "87": "chalet", + "88": "chemistry_lab", + "89": "childs_room", + "90": "indoor", + "91": "outdoor", + "92": "classroom", + "93": "clean_room", + "94": "cliff", + "95": "closet", + "96": "clothing_store", + "97": "coast", + "98": "cockpit", + "99": "coffee_shop", + "100": "computer_room", + "101": "conference_center", + "102": "conference_room", + "103": "construction_site", + "104": "corn_field", + "105": "corral", + "106": "corridor", + "107": "cottage", + "108": "courthouse", "109": "courtyard", - "110": "covered_bridge_exterior", - "111": "creek", - "112": "crevasse", - "113": "crosswalk", - "114": "cubicle_office", - "115": "dam", - "116": "daycare_center", - "117": "delicatessen", - "118": "dentists_office", - "119": "desert_sand", - "120": "desert_vegetation", - "121": "diner_indoor", - "122": "diner_outdoor", - "123": "dinette_home", - "124": "dinette_vehicle", - "125": "dining_car", - "126": "dining_room", - "127": "discotheque", - "128": "dock", - "129": "doorway_indoor", - "130": "doorway_outdoor", - "131": "dorm_room", - "132": "driveway", - "133": "driving_range_outdoor", - "134": "drugstore", - "135": "electrical_substation", - "136": "elevator_door", - "137": "elevator_escalator", - "138": "elevator_interior", - "139": "engine_room", - "140": "escalator_indoor", - "141": "excavation", - "142": "factory_indoor", - "143": "fairway", - "144": "fastfood_restaurant", - "145": "field_cultivated", - "146": "field_wild", - "147": "fire_escape", - "148": "fire_station", - "149": "firing_range_indoor", - "150": "fishpond", - "151": "florist_shop_indoor", - "152": "food_court", - "153": "forest_broadleaf", - "154": "forest_needleleaf", - "155": "forest_path", - "156": "forest_road", - "157": "formal_garden", - "158": "fountain", - "159": "galley", - "160": "game_room", - "161": "garage_indoor", - "162": "garage_outdoor", - "163": "garbage_dump", - "164": "gas_station", - "165": "gazebo_exterior", - "166": "general_store_indoor", - "167": "general_store_outdoor", - "168": "gift_shop", - "169": "golf_course", - "170": "greenhouse_indoor", - "171": "greenhouse_outdoor", - "172": "gymnasium_indoor", - "173": "hangar_indoor", - "174": "hangar_outdoor", - "175": "harbor", - "176": "hardware_store", - "177": "hayfield", - "178": "heliport", - "179": "herb_garden", - "180": "highway", - "181": "hill", - "182": "home_office", - "183": "hospital", - "184": "hospital_room", - "185": "hot_spring", - "186": "hot_tub_outdoor", - "187": "hotel", - "188": "hotel_outdoor", - "189": "hotel_room", - "190": "house", - "191": "hunting_lodge_outdoor", - "192": "ice_cream_parlor", - "193": "ice_floe", - "194": "ice_shelf", - "195": "ice_skating_rink_indoor", - "196": "ice_skating_rink_outdoor", - "197": "iceberg", - "198": "igloo", - "199": "industrial_area", - "200": "inn_outdoor", - "201": "islet", - "202": "jacuzzi_indoor", - "203": "jail_cell", - "204": "jail_indoor", - "205": "jewelry_shop", - "206": "kasbah", - "207": "kennel_indoor", - "208": "kennel_outdoor", - "209": "kindergarden_classroom", - "210": "kitchen", - "211": "kitchenette", - "212": "labyrinth_outdoor", - "213": "lake_natural", - "214": "landfill", - "215": "landing_deck", - "216": "laundromat", - "217": "lecture_room", - "218": "library_indoor", - "219": "library_outdoor", - "220": "lido_deck_outdoor", - "221": "lift_bridge", - "222": "lighthouse", - "223": "limousine_interior", - "224": "living_room", - "225": "loading_dock", - "226": "lobby", - "227": "lock_chamber", - "228": "locker_room", - "229": "mansion", - "230": "manufactured_home", - "231": "market_indoor", - "232": "market_outdoor", - "233": "marsh", - "234": "martial_arts_gym", - "235": "mausoleum", - "236": "medina", - "237": "moat_water", - "238": "monastery_outdoor", - "239": "mosque_indoor", - "240": "mosque_outdoor", - "241": "motel", - "242": "mountain", - "243": "mountain_path", - "244": "mountain_snowy", - "245": "movie_theater_indoor", - "246": "museum_indoor", - "247": "museum_outdoor", - "248": "music_store", - "249": "music_studio", - "250": "nuclear_power_plant_outdoor", - "251": "nursery", - "252": "oast_house", - "253": "observatory_indoor", - "254": "observatory_outdoor", - "255": "ocean", - "256": "office", - "257": "office_building", - "258": "office_cubicles", - "259": "oil_refinery_outdoor", - "260": "oilrig", - "261": "operating_room", - "262": "orchard", - "263": "outhouse_outdoor", - "264": "pagoda", - "265": "palace", - "266": "pantry", - "267": "park", - "268": "parking_garage_indoor", - "269": "parking_garage_outdoor", - "270": "parking_lot", - "271": "parlor", - "272": "pasture", - "273": "patio", - "274": "pavilion", - "275": "pharmacy", - "276": "phone_booth", - "277": "physics_laboratory", - "278": "picnic_area", - "279": "pilothouse_indoor", - "280": "planetarium_indoor", - "281": "playground", - "282": "playroom", - "283": "plaza", - "284": "podium_indoor", - "285": "podium_outdoor", - "286": "pond", - "287": "poolroom_home", - "288": "poolroom_establishment", - "289": "power_plant_outdoor", - "290": "promenade_deck", - "291": "pub_indoor", - "292": "pulpit", - "293": "putting_green", - "294": "racecourse", - "295": "raceway", - "296": "raft", - "297": "railroad_track", - "298": "rainforest", - "299": "reception", - "300": "recreation_room", - "301": "residential_neighborhood", - "302": "restaurant", - "303": "restaurant_kitchen", - "304": "restaurant_patio", - "305": "rice_paddy", - "306": "riding_arena", - "307": "river", - "308": "rock_arch", - "309": "rope_bridge", - "310": "ruin", - "311": "runway", - "312": "sandbar", - "313": "sandbox", - "314": "sauna", - "315": "schoolhouse", - "316": "sea_cliff", - "317": "server_room", - "318": "shed", - "319": "shoe_shop", - "320": "shop_front", - "321": "shopping_mall_indoor", - "322": "shower", - "323": "skatepark", - "324": "ski_resort", - "325": "ski_slope", - "326": "sky", - "327": "skyscraper", - "328": "slum", - "329": "snowfield", - "330": "squash_court", - "331": "stable", - "332": "stadium_baseball", - "333": "stadium_football", - "334": "staircase", - "335": "street", - "336": "subway_interior", - "337": "subway_station_platform", - "338": "supermarket", - "339": "sushi_bar", - "340": "swamp", - "341": "swimming_hole", - "342": "swimming_pool_indoor", - "343": "swimming_pool_outdoor", - "344": "synagogue_indoor", - "345": "synagogue_outdoor", - "346": "television_room", - "347": "television_studio", - "348": "temple_asia", - "349": "temple_europe", - "350": "trench", - "351": "underwater_coral_reef", - "352": "utility_room", - "353": "valley", - "354": "van_interior", - "355": "vegetable_garden", - "356": "veranda", - "357": "veterinarians_office", - "358": "viaduct", - "359": "videostore", - "360": "village", - "361": "vineyard", - "362": "volcano", - "363": "volleyball_court_indoor", - "364": "volleyball_court_outdoor", - "365": "waiting_room", - "366": "warehouse_indoor", - "367": "water_tower", - "368": "waterfall_block", - "369": "waterfall_fan", - "370": "waterfall_plunge", - "371": "wetland", - "372": "wheat_field", - "373": "wind_farm", - "374": "windmill", - "375": "wine_cellar_barrel_storage", - "376": "wine_cellar_bottle_storage", - "377": "wrestling_ring_indoor", - "378": "yard", - "379": "youth_hostel" + "110": "creek", + "111": "crevasse", + "112": "crosswalk", + "113": "dam", + "114": "delicatessen", + "115": "department_store", + "116": "sand", + "117": "vegetation", + "118": "desert_road", + "119": "outdoor", + "120": "dining_hall", + "121": "dining_room", + "122": "discotheque", + "123": "outdoor", + "124": "dorm_room", + "125": "downtown", + "126": "dressing_room", + "127": "driveway", + "128": "drugstore", + "129": "door", + "130": "elevator_lobby", + "131": "elevator_shaft", + "132": "embassy", + "133": "engine_room", + "134": "entrance_hall", + "135": "indoor", + "136": "excavation", + "137": "fabric_store", + "138": "farm", + "139": "fastfood_restaurant", + "140": "cultivated", + "141": "wild", + "142": "field_road", + "143": "fire_escape", + "144": "fire_station", + "145": "fishpond", + "146": "indoor", + "147": "indoor", + "148": "food_court", + "149": "football_field", + "150": "broadleaf", + "151": "forest_path", + "152": "forest_road", + "153": "formal_garden", + "154": "fountain", + "155": "galley", + "156": "indoor", + "157": "outdoor", + "158": "gas_station", + "159": "exterior", + "160": "indoor", + "161": "outdoor", + "162": "gift_shop", + "163": "glacier", + "164": "golf_course", + "165": "indoor", + "166": "outdoor", + "167": "grotto", + "168": "indoor", + "169": "indoor", + "170": "outdoor", + "171": "harbor", + "172": "hardware_store", + "173": "hayfield", + "174": "heliport", + "175": "highway", + "176": "home_office", + "177": "home_theater", + "178": "hospital", + "179": "hospital_room", + "180": "hot_spring", + "181": "outdoor", + "182": "hotel_room", + "183": "house", + "184": "outdoor", + "185": "ice_cream_parlor", + "186": "ice_floe", + "187": "ice_shelf", + "188": "indoor", + "189": "outdoor", + "190": "iceberg", + "191": "igloo", + "192": "industrial_area", + "193": "outdoor", + "194": "islet", + "195": "indoor", + "196": "jail_cell", + "197": "japanese_garden", + "198": "jewelry_shop", + "199": "junkyard", + "200": "kasbah", + "201": "outdoor", + "202": "kindergarden_classroom", + "203": "kitchen", + "204": "lagoon", + "205": "natural", + "206": "landfill", + "207": "landing_deck", + "208": "laundromat", + "209": "lawn", + "210": "lecture_room", + "211": "legislative_chamber", + "212": "indoor", + "213": "outdoor", + "214": "lighthouse", + "215": "living_room", + "216": "loading_dock", + "217": "lobby", + "218": "lock_chamber", + "219": "locker_room", + "220": "mansion", + "221": "manufactured_home", + "222": "indoor", + "223": "outdoor", + "224": "marsh", + "225": "martial_arts_gym", + "226": "mausoleum", + "227": "medina", + "228": "mezzanine", + "229": "water", + "230": "outdoor", + "231": "motel", + "232": "mountain", + "233": "mountain_path", + "234": "mountain_snowy", + "235": "indoor", + "236": "indoor", + "237": "outdoor", + "238": "music_studio", + "239": "natural_history_museum", + "240": "nursery", + "241": "nursing_home", + "242": "oast_house", + "243": "ocean", + "244": "office", + "245": "office_building", + "246": "office_cubicles", + "247": "oilrig", + "248": "operating_room", + "249": "orchard", + "250": "orchestra_pit", + "251": "pagoda", + "252": "palace", + "253": "pantry", + "254": "park", + "255": "indoor", + "256": "outdoor", + "257": "parking_lot", + "258": "pasture", + "259": "patio", + "260": "pavilion", + "261": "pet_shop", + "262": "pharmacy", + "263": "phone_booth", + "264": "physics_laboratory", + "265": "picnic_area", + "266": "pier", + "267": "pizzeria", + "268": "playground", + "269": "playroom", + "270": "plaza", + "271": "pond", + "272": "porch", + "273": "promenade", + "274": "indoor", + "275": "racecourse", + "276": "raceway", + "277": "raft", + "278": "railroad_track", + "279": "rainforest", + "280": "reception", + "281": "recreation_room", + "282": "repair_shop", + "283": "residential_neighborhood", + "284": "restaurant", + "285": "restaurant_kitchen", + "286": "restaurant_patio", + "287": "rice_paddy", + "288": "river", + "289": "rock_arch", + "290": "roof_garden", + "291": "rope_bridge", + "292": "ruin", + "293": "runway", + "294": "sandbox", + "295": "sauna", + "296": "schoolhouse", + "297": "science_museum", + "298": "server_room", + "299": "shed", + "300": "shoe_shop", + "301": "shopfront", + "302": "indoor", + "303": "shower", + "304": "ski_resort", + "305": "ski_slope", + "306": "sky", + "307": "skyscraper", + "308": "slum", + "309": "snowfield", + "310": "soccer_field", + "311": "stable", + "312": "baseball", + "313": "football", + "314": "soccer", + "315": "indoor", + "316": "outdoor", + "317": "staircase", + "318": "storage_room", + "319": "street", + "320": "platform", + "321": "supermarket", + "322": "sushi_bar", + "323": "swamp", + "324": "swimming_hole", + "325": "indoor", + "326": "outdoor", + "327": "outdoor", + "328": "television_room", + "329": "television_studio", + "330": "asia", + "331": "throne_room", + "332": "ticket_booth", + "333": "topiary_garden", + "334": "tower", + "335": "toyshop", + "336": "train_interior", + "337": "platform", + "338": "tree_farm", + "339": "tree_house", + "340": "trench", + "341": "tundra", + "342": "ocean_deep", + "343": "utility_room", + "344": "valley", + "345": "vegetable_garden", + "346": "veterinarians_office", + "347": "viaduct", + "348": "village", + "349": "vineyard", + "350": "volcano", + "351": "outdoor", + "352": "waiting_room", + "353": "water_park", + "354": "water_tower", + "355": "waterfall", + "356": "watering_hole", + "357": "wave", + "358": "wet_bar", + "359": "wheat_field", + "360": "wind_farm", + "361": "windmill", + "362": "yard", + "363": "youth_hostel", + "364": "zen_garden" } \ No newline at end of file diff --git a/scripts/scene_classifier.py b/scripts/scene_classifier.py index ae97a3f..9be0196 100644 --- a/scripts/scene_classifier.py +++ b/scripts/scene_classifier.py @@ -162,9 +162,13 @@ class SceneClassifier: model_path: Core ML 模型路徑 (可選) """ self.model_path = model_path + self.places365_model_path = ( + "/Users/accusys/momentry/models/resnet18_places365.pth.tar" + ) self.model = None self.coreml_model = None self.transform = None + self.model_type = "unknown" # 圖像預處理 self.transform = transforms.Compose( @@ -189,23 +193,57 @@ class SceneClassifier: try: print(f"[SCENE] Loading Core ML model: {self.model_path}") self.coreml_model = ct.models.MLModel(self.model_path) + self.model_type = "coreml" print("[SCENE] Core ML model loaded successfully") return True except Exception as e: print(f"[SCENE] Warning: Failed to load Core ML model: {e}") - # 備案:使用 PyTorch + ResNet + # 備案:使用 PyTorch + Places365 if HAS_TORCH: try: print(f"[SCENE] Loading PyTorch model on {DEVICE}") - # 使用預訓練的 ResNet18 - self.model = models.resnet18(pretrained=True) + + # 檢查 Places365 模型是否存在 + if Path(self.places365_model_path).exists(): + print( + f"[SCENE] Loading Places365 model: {self.places365_model_path}" + ) + checkpoint = torch.load( + self.places365_model_path, map_location=DEVICE + ) + + # 建立 ResNet18 模型 (Places365 有 365 個類別) + self.model = models.resnet18(num_classes=365) + + # 移除 'module.' prefix (DataParallel training) + state_dict = checkpoint["state_dict"] + new_state_dict = {} + for k, v in state_dict.items(): + if k.startswith("module."): + new_state_dict[k[7:]] = v + else: + new_state_dict[k] = v + + self.model.load_state_dict(new_state_dict) + self.model_type = "places365" + print("[SCENE] Places365 model loaded successfully (365 classes)") + else: + print( + f"[SCENE] Places365 model not found, using ImageNet pretrained" + ) + self.model = models.resnet18(pretrained=True) + self.model_type = "imagenet" + self.model.to(DEVICE) self.model.eval() print("[SCENE] PyTorch model loaded successfully") return True except Exception as e: print(f"[SCENE] Warning: Failed to load PyTorch model: {e}") + import traceback + + traceback.print_exc() print("[SCENE] Error: No model available") return False diff --git a/scripts/story_processor.py b/scripts/story_processor.py index a6ebc09..7fe418f 100755 --- a/scripts/story_processor.py +++ b/scripts/story_processor.py @@ -1,12 +1,8 @@ #!/opt/homebrew/bin/python3.11 """ Story Processor - Generate parent-child chunk hierarchy for RAG -Uses video analysis (ASR, YOLO, OCR) to create parent chunks that summarize child chunks. - -Parent-Child Chunk Strategy: -- Parent chunks: Summarize multiple scenes/segments with narrative description -- Child chunks: Individual ASR segments, OCR texts, detected objects -- When embedding: Parent description + Child content for better retrieval +Uses LOCAL video analysis (ASR, YOLO, OCR, Scene) to create parent chunks. +NO cloud API calls - fully offline processing """ import sys @@ -47,57 +43,59 @@ def generate_parent_child_chunks( cut_data: Dict, yolo_data: Dict, ocr_data: Dict, + scene_data: Dict, parent_chunk_size: int = 5, -) -> Dict[str, Any]: +) -> Dict: """ - Generate parent-child chunk hierarchy. - - Parent chunks summarize multiple child chunks for better RAG retrieval. - Child chunks are individual segments from ASR, scenes from CUT, etc. + Generate parent-child chunk hierarchy using LOCAL data only. + No LLM/API calls - uses template-based narrative generation. """ - child_chunks = [] parent_chunks = [] - # Get source data - asr_segments = asr_data.get("segments", []) - cut_scenes = cut_data.get("scenes", []) - yolo_frames = yolo_data.get("frames", []) - _ocr_frames = ocr_data.get("frames", []) - - # Create child chunks from ASR segments - asr_child_ids = [] - for i, seg in enumerate(asr_segments): - child_chunk = { - "chunk_id": f"asr_{i:04d}", - "chunk_type": "sentence", - "source": "asr", - "start_time": seg.get("start", 0), - "end_time": seg.get("end", 0), - "text_content": seg.get("text", ""), - "content": seg, - "child_chunk_ids": [], - "parent_chunk_id": None, - } - child_chunks.append(child_chunk) - asr_child_ids.append(child_chunk["chunk_id"]) + # Create child chunks from ASR + for seg in asr_data.get("segments", []): + child_chunks.append( + { + "chunk_id": f"asr_{seg.get('start', 0):.1f}_{seg.get('end', 0):.1f}", + "chunk_type": "asr", + "source": "asr", + "start_time": seg.get("start", 0), + "end_time": seg.get("end", 0), + "text_content": seg.get("text", ""), + "content": { + "text": seg.get("text", ""), + "confidence": seg.get("confidence", 0), + }, + "child_chunk_ids": [], + "parent_chunk_id": None, + } + ) # Create child chunks from CUT scenes - cut_child_ids = [] - for i, scene in enumerate(cut_scenes): - child_chunk = { - "chunk_id": f"cut_{i:04d}", - "chunk_type": "cut", - "source": "cut", - "start_time": scene.get("start_time", scene.get("start", 0)), - "end_time": scene.get("end_time", scene.get("end", 0)), - "text_content": None, - "content": scene, - "child_chunk_ids": [], - "parent_chunk_id": None, - } - child_chunks.append(child_chunk) - cut_child_ids.append(child_chunk["chunk_id"]) + for scene in cut_data.get("scenes", []): + child_chunks.append( + { + "chunk_id": f"cut_{scene.get('scene_number', 0)}", + "chunk_type": "cut", + "source": "cut", + "start_time": scene.get("start_time", 0), + "end_time": scene.get("end_time", 0), + "text_content": f"Scene {scene.get('scene_number', 0)}", + "content": { + "scene_number": scene.get("scene_number", 0), + "duration": scene.get("duration", 0), + }, + "child_chunk_ids": [], + "parent_chunk_id": None, + } + ) + + asr_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "asr"] + cut_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "cut"] + + yolo_frames = yolo_data.get("frames", []) + ocr_frames = ocr_data.get("frames", []) # Group ASR segments into parent chunks for i in range(0, len(asr_child_ids), parent_chunk_size): @@ -105,7 +103,6 @@ def generate_parent_child_chunks( if not batch: continue - # Collect text from child chunks batch_texts = [] batch_objects = [] batch_times = [] @@ -118,11 +115,16 @@ def generate_parent_child_chunks( batch_times.append((child["start_time"], child["end_time"])) break - # Create parent chunk with narrative description start_time = batch_times[0][0] if batch_times else 0 end_time = batch_times[-1][1] if batch_times else 0 - # Generate narrative description + # Find objects in this time range + for frame in yolo_frames[:50]: + ts = frame.get("timestamp", 0) + if start_time <= ts <= end_time: + for obj in frame.get("objects", []): + batch_objects.append(obj.get("class_name", "unknown")) + narrative = generate_narrative(batch_texts, batch_objects, start_time, end_time) parent_chunk = { @@ -136,13 +138,13 @@ def generate_parent_child_chunks( "description": narrative, "child_count": len(batch), "speech_preview": " ".join(batch_texts[:3]) if batch_texts else None, + "detected_objects": list(set(batch_objects))[:5], }, "child_chunk_ids": batch, "parent_chunk_id": None, } parent_chunks.append(parent_chunk) - # Update child chunks with parent reference for child_id in batch: for child in child_chunks: if child["chunk_id"] == child_id: @@ -167,14 +169,12 @@ def generate_parent_child_chunks( start_time = batch_times[0][0] if batch_times else 0 end_time = batch_times[-1][1] if batch_times else 0 - # Find objects in this time range from YOLO - for frame in yolo_frames[:100]: # Sample frames + for frame in yolo_frames[:50]: ts = frame.get("timestamp", 0) if start_time <= ts <= end_time: for obj in frame.get("objects", []): batch_objects.append(obj.get("class_name", "unknown")) - # Generate scene narrative narrative = generate_scene_narrative( batch_objects, start_time, end_time, len(batch) ) @@ -190,14 +190,13 @@ def generate_parent_child_chunks( "description": narrative, "child_count": len(batch), "scenes": batch, - "detected_objects": list(set(batch_objects))[:10], + "detected_objects": list(set(batch_objects))[:5], }, "child_chunk_ids": batch, "parent_chunk_id": None, } parent_chunks.append(parent_chunk) - # Update child chunks with parent reference for child_id in batch: for child in child_chunks: if child["chunk_id"] == child_id: @@ -219,27 +218,33 @@ def generate_parent_child_chunks( def generate_narrative( texts: List[str], objects: List[str], start: float, end: float ) -> str: - """Generate narrative description from text snippets""" - if not texts: + """Generate narrative description from LOCAL text snippets and objects""" + if not texts and not objects: return f"Video segment from {start:.1f}s to {end:.1f}s" - # Combine and summarize - combined = " ".join(texts) - if len(combined) > 200: - combined = combined[:200] + "..." + parts = [] + if texts: + combined = " ".join(texts[:5]) + if len(combined) > 150: + combined = combined[:150] + "..." + parts.append(f"Speech: {combined}") - return f"[{start:.0f}s-{end:.0f}s] {combined}" + if objects: + unique_objs = list(set(objects))[:5] + parts.append(f"Visuals: {', '.join(unique_objs)}") + + return f"[{start:.0f}s-{end:.0f}s] {' | '.join(parts)}" def generate_scene_narrative( objects: List[str], start: float, end: float, scene_count: int ) -> str: - """Generate scene narrative from detected objects""" + """Generate scene narrative from LOCAL detected objects""" unique_objects = list(set(objects))[:5] if unique_objects: obj_str = ", ".join(unique_objects) - return f"[{start:.0f}s-{end:.0f}s] Scenes {scene_count} segments. Visual: {obj_str}." + return f"[{start:.0f}s-{end:.0f}s] {scene_count} scenes. Visuals: {obj_str}." else: return f"[{start:.0f}s-{end:.0f}s] {scene_count} video scenes." @@ -251,70 +256,45 @@ def run_story( if publisher: publisher.info("story", "STORY_START") - # Load existing JSON files base_path = os.path.dirname(output_path) uuid_name = os.path.basename(output_path).split(".")[0] - # Load analysis data asr_data = {"segments": []} cut_data = {"scenes": []} yolo_data = {"frames": []} ocr_data = {"frames": []} + scene_data = {"scenes": []} - # Load ASR - asr_path = os.path.join(base_path, f"{uuid_name}.asr.json") - if os.path.exists(asr_path): - with open(asr_path) as f: - asr_data = json.load(f) - if publisher: - publisher.info( - "story", f"Loaded ASR: {len(asr_data.get('segments', []))} segments" - ) + for name, data_var in [ + ("asr", asr_data), + ("cut", cut_data), + ("yolo", yolo_data), + ("ocr", ocr_data), + ("scene", scene_data), + ]: + path = os.path.join(base_path, f"{uuid_name}.{name}.json") + if os.path.exists(path): + with open(path) as f: + data_var.update(json.load(f)) - # Load CUT - cut_path = os.path.join(base_path, f"{uuid_name}.cut.json") - if os.path.exists(cut_path): - with open(cut_path) as f: - cut_data = json.load(f) - if publisher: - publisher.info( - "story", f"Loaded CUT: {len(cut_data.get('scenes', []))} scenes" - ) - - # Load YOLO - yolo_path = os.path.join(base_path, f"{uuid_name}.yolo.json") - if os.path.exists(yolo_path): - with open(yolo_path) as f: - yolo_data = json.load(f) - - # Load OCR - ocr_path = os.path.join(base_path, f"{uuid_name}.ocr.json") - if os.path.exists(ocr_path): - with open(ocr_path) as f: - ocr_data = json.load(f) - - # Load metadata - metadata = extract_video_metadata(video_path) - - if publisher: - publisher.info("story", "Generating parent-child chunks...") - - # Generate parent-child hierarchy result = generate_parent_child_chunks( - asr_data, cut_data, yolo_data, ocr_data, parent_chunk_size + asr_data, cut_data, yolo_data, ocr_data, scene_data, parent_chunk_size ) - result["metadata"] = metadata - result["parent_chunk_size"] = parent_chunk_size + result["video_metadata"] = extract_video_metadata(video_path) + result["processing"] = { + "method": "local_aggregation", + "cloud_api_used": False, + "parent_chunk_size": parent_chunk_size, + } with open(output_path, "w") as f: json.dump(result, f, indent=2, ensure_ascii=False) if publisher: - stats = result["stats"] publisher.complete( "story", - f"{stats['total_parent_chunks']} parents, {stats['total_child_chunks']} children", + f"{result['stats']['total_parent_chunks']} parent, {result['stats']['total_child_chunks']} child chunks (LOCAL)", ) return result @@ -322,7 +302,7 @@ def run_story( if __name__ == "__main__": parser = argparse.ArgumentParser( - description="Video Story Generator - Parent-Child Chunks" + description="Story Processor - Parent-Child Chunk Hierarchy (LOCAL ONLY)" ) parser.add_argument("video_path", help="Path to video file") parser.add_argument("output_path", help="Output JSON path") @@ -331,7 +311,7 @@ if __name__ == "__main__": "--parent-chunk-size", type=int, default=5, - help="Number of child chunks per parent chunk", + help="Number of child chunks per parent", ) args = parser.parse_args() @@ -340,6 +320,6 @@ if __name__ == "__main__": args.video_path, args.output_path, args.uuid, args.parent_chunk_size ) print( - f"Story generated: {result['stats']['total_parent_chunks']} parent chunks, " - f"{result['stats']['total_child_chunks']} child chunks" + f"Story generated: {result['stats']['total_parent_chunks']} parent, " + f"{result['stats']['total_child_chunks']} child chunks (LOCAL)" ) diff --git a/src/api/middleware.rs b/src/api/middleware.rs index e7f60fc..d6add55 100644 --- a/src/api/middleware.rs +++ b/src/api/middleware.rs @@ -30,14 +30,20 @@ pub async fn api_key_validation( tracing::info!("[MIDDLEWARE] Path: {:?}", request.uri().path()); let headers = request.headers(); - tracing::info!( - "[MIDDLEWARE] Headers: {:?}", - headers.keys().collect::>() - ); + tracing::info!("[MIDDLEWARE] All headers: {:?}", headers); let api_key = match extract_api_key(headers) { Ok(key) => { tracing::info!("[MIDDLEWARE] API key extracted, length: {}", key.len()); + if key.len() > 8 { + tracing::info!( + "[MIDDLEWARE] Key value: {}...{}", + &key[..4], + &key[key.len() - 4..] + ); + } else { + tracing::info!("[MIDDLEWARE] Key value: ****"); + } key } Err(status) => { @@ -59,7 +65,10 @@ pub async fn api_key_validation( r } Ok(None) => { - tracing::warn!("[MIDDLEWARE] API key not found in database"); + tracing::warn!( + "[MIDDLEWARE] API key NOT FOUND in database for hash: {}", + &key_hash[..16] + ); return Response::builder() .status(StatusCode::UNAUTHORIZED) .body(axum::body::Body::empty()) diff --git a/src/api/mod.rs b/src/api/mod.rs index 9c279b9..6888d6f 100644 --- a/src/api/mod.rs +++ b/src/api/mod.rs @@ -1,4 +1,13 @@ +pub mod face_recognition; +pub mod identities; +pub mod identity_binding; pub mod middleware; +pub mod n8n_search; +pub mod person_identity; +pub mod search; pub mod server; +pub mod universal_search; +pub mod visual_chunk_search; +pub mod who; pub use server::start_server; diff --git a/src/api/server.rs b/src/api/server.rs index 6b33936..ccefc59 100644 --- a/src/api/server.rs +++ b/src/api/server.rs @@ -1,22 +1,44 @@ use axum::{ - extract::{Query, State}, + extract::{Path, Query, State}, http::StatusCode, response::Json, routing::{get, post}, Router, }; +use once_cell::sync::OnceCell; +use reqwest::Client; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; use std::sync::Arc; use std::time::Instant; +use tower::ServiceBuilder; +use tower_http::cors::{Any, CorsLayer}; use crate::core::cache::{keys, MongoCache, RedisCache}; +use crate::core::db::schema; use crate::core::db::{Database, PostgresDb, QdrantDb, RedisClient, VideoRecord, VideoStatus}; use crate::core::text::tokenizer::tokenize_chinese_text; use crate::{Embedder, FileManager}; use super::face_recognition; +use super::identities; +use super::identity_binding; use super::middleware::api_key_validation; +use super::n8n_search; +use super::person_identity; +use super::universal_search; +use super::visual_chunk_search; +use crate::core::chunk::types::Chunk; + +// Global State +static SERVER_START: OnceCell = OnceCell::new(); + +fn get_uptime_ms() -> u64 { + SERVER_START + .get() + .map(|i| i.elapsed().as_millis() as u64) + .unwrap_or(0) +} #[derive(Debug, Serialize)] struct HealthResponse { @@ -25,86 +47,19 @@ struct HealthResponse { uptime_ms: u64, } -#[derive(Debug, Serialize)] -struct DetailedHealthResponse { - status: String, - version: String, - uptime_ms: u64, - services: ServiceHealth, -} - -#[derive(Debug, Serialize)] -struct ServiceHealth { - postgres: ServiceStatus, - redis: ServiceStatus, - qdrant: ServiceStatus, - mongodb: ServiceStatus, -} - -#[derive(Debug, Serialize)] -struct ServiceStatus { - status: String, - latency_ms: Option, - error: Option, -} - -static SERVER_START: std::sync::OnceLock = std::sync::OnceLock::new(); - -fn get_uptime_ms() -> u64 { - SERVER_START - .get() - .map(|t| t.elapsed().as_millis() as u64) - .unwrap_or(0) -} - -#[derive(Clone)] -pub struct AppState { - embedder: std::sync::Arc, - #[allow(dead_code)] - embedder_model: String, - mongo_cache: MongoCache, - redis_cache: RedisCache, - api_state: super::middleware::ApiState, -} - -#[derive(Debug, Deserialize)] -struct RegisterRequest { - path: String, -} - -#[derive(Debug, Serialize)] -struct RegisterResponse { - uuid: String, - video_id: i64, - job_id: i32, - file_name: String, - duration: f64, - width: u32, - height: u32, - already_exists: bool, -} - -#[derive(Debug, Deserialize)] -struct ProbeRequest { - path: String, -} - -#[derive(Debug, Serialize)] -struct ProbeResponse { - uuid: String, - file_name: String, - duration: f64, - width: u32, - height: u32, - fps: f64, - cached: bool, - format: crate::core::probe::FormatInfo, - streams: Vec, -} - #[derive(Debug, Serialize)] struct JobListResponse { jobs: Vec, + count: i64, + page: usize, + page_size: usize, +} + +#[derive(Debug, Deserialize)] +struct JobsQuery { + page: Option, + page_size: Option, + status: Option, } #[derive(Debug, Serialize)] @@ -143,11 +98,22 @@ struct ProcessorInfoResponse { error_message: Option, } +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +enum SearchMode { + Vector, + Smart, +} + #[derive(Debug, Deserialize)] struct SearchRequest { query: String, - limit: Option, + mode: Option, + collection: Option, uuid: Option, + limit: Option, + vector_weight: Option, + bm25_weight: Option, } #[derive(Debug, Deserialize)] @@ -162,6 +128,42 @@ struct CacheToggleResponse { message: String, } +// Missing structs added +#[derive(Debug, Deserialize)] +struct RegisterRequest { + path: String, +} + +#[derive(Debug, Serialize)] +struct RegisterResponse { + uuid: String, + video_id: i64, + job_id: i32, + file_name: String, + duration: f64, + width: u32, + height: u32, + already_exists: bool, +} + +#[derive(Debug, Deserialize)] +struct ProbeRequest { + path: String, +} + +#[derive(Debug, Serialize)] +struct ProbeResponse { + uuid: String, + file_name: String, + duration: f64, + width: u32, + height: u32, + fps: f64, + cached: bool, + format: crate::core::probe::FormatInfo, + streams: Vec, +} + #[derive(Debug, Deserialize)] struct UnregisterRequest { uuid: String, @@ -195,6 +197,9 @@ struct SearchResponse { struct N8nSearchHit { id: String, vid: String, + start_frame: i64, + end_frame: i64, + fps: f64, start: f64, end: f64, title: String, @@ -202,6 +207,10 @@ struct N8nSearchHit { score: f32, #[serde(skip_serializing_if = "Option::is_none")] file_path: Option, + #[serde(skip_serializing_if = "Option::is_none")] + has_visual_stats: Option, + #[serde(skip_serializing_if = "Option::is_none")] + parent_id: Option, } #[derive(Debug, Serialize, Deserialize)] @@ -211,6 +220,49 @@ struct N8nSearchResponse { hits: Vec, } +// --- P0 API Structs --- +#[derive(Debug, Deserialize)] +struct ProcessRequest { + rules: Option>, + processors: Option>, +} + +#[derive(Debug, Serialize)] +struct FrameProgress { + total_frames: i64, + processed_frames: i64, + progress_percent: f64, +} + +#[derive(Debug, Serialize)] +struct AssetStatusResponse { + uuid: String, + file_name: String, + registration_time: String, + processing_status: String, + current_job_id: Option, + frame_progress: Option, +} + +#[derive(Debug, Serialize)] +struct JobStatusResponse { + job_id: String, + asset_uuid: String, + rule: String, + status: String, + current_processor_id: Option, + frame_progress: FrameProgress, +} + +#[derive(Debug, Serialize)] +struct RuleStatusResponse { + rule: String, + supported_processor_ids: Vec, + active_jobs: Vec, +} + +// --- End P0 API Structs --- + #[derive(Debug, Deserialize)] struct HybridSearchRequest { query: String, @@ -287,12 +339,48 @@ struct VideoInfoResponse { #[derive(Debug, Serialize, Deserialize)] struct VideosResponse { videos: Vec, + count: i64, + page: usize, + page_size: usize, } #[derive(Debug, Deserialize)] struct VideosQuery { page: Option, - limit: Option, + page_size: Option, +} + +#[derive(Clone)] +pub struct AppState { + pub db: std::sync::Arc, + pub embedder: std::sync::Arc, + pub embedder_model: String, + pub mongo_cache: crate::core::cache::MongoCache, + pub redis_cache: crate::core::cache::RedisCache, + pub api_state: super::middleware::ApiState, +} + +#[derive(Debug, Serialize)] +struct DetailedHealthResponse { + status: String, + version: String, + uptime_ms: u64, + services: ServiceHealth, +} + +#[derive(Debug, Serialize)] +struct ServiceHealth { + postgres: ServiceStatus, + redis: ServiceStatus, + qdrant: ServiceStatus, + mongodb: ServiceStatus, +} + +#[derive(Debug, Serialize)] +struct ServiceStatus { + status: String, + latency_ms: Option, + error: Option, } async fn health(State(state): State) -> Json { @@ -343,7 +431,7 @@ async fn health_detailed(State(state): State) -> Json ServiceStatus { let start = Instant::now(); match PostgresDb::init().await { - Ok(db) => match db.list_videos().await { + Ok(db) => match db.list_videos(1, 0).await { Ok(_) => ServiceStatus { status: "ok".to_string(), latency_ms: Some(start.elapsed().as_millis() as u64), @@ -457,6 +545,19 @@ fn generate_query_hash(query: &str, uuid: Option<&str>, limit: usize) -> String format!("{:x}", hasher.finalize())[..16].to_string() } +fn generate_visual_search_hash( + uuid: &str, + criteria: &visual_chunk_search::VisualChunkSearchCriteria, +) -> String { + let data = serde_json::json!({ + "uuid": uuid, + "criteria": criteria, + }); + let mut hasher = Sha256::new(); + hasher.update(data.to_string().as_bytes()); + format!("{:x}", hasher.finalize())[..16].to_string() +} + async fn register( State(state): State, Json(req): Json, @@ -793,56 +894,439 @@ async fn probe( })) } +async fn probe_by_uuid( + State(state): State, + Path(uuid): Path, +) -> Result, StatusCode> { + // 1. Get file path from DB + let row: Option<(String, String)> = + sqlx::query_as("SELECT file_name, file_path FROM videos WHERE uuid = $1") + .bind(&uuid) + .fetch_optional(state.db.pool()) + .await + .map_err(|e| { + tracing::error!("DB error fetching video: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + let (file_name, path) = row.ok_or_else(|| { + tracing::warn!("Video not found: {}", uuid); + StatusCode::NOT_FOUND + })?; + + // 2. Check for cached probe.json + let probe_path = format!( + "{}/{}.probe.json", + crate::core::config::OUTPUT_DIR.as_str(), + uuid + ); + + let (probe_result, cached) = if let Ok(content) = std::fs::read_to_string(&probe_path) { + tracing::info!("Using cached probe.json: {}", probe_path); + let result: crate::core::probe::ProbeResult = + serde_json::from_str(&content).map_err(|e| { + tracing::error!("Failed to parse cached probe.json: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + (result, true) + } else { + tracing::info!("Running ffprobe for: {}", path); + let result = crate::core::probe::probe_video(&path).map_err(|e| { + tracing::error!("ffprobe failed: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + // Save probe.json to OUTPUT_DIR + let file_manager = FileManager::new(std::path::PathBuf::from( + crate::core::config::OUTPUT_DIR.as_str(), + )); + let json_str = serde_json::to_string(&result).map_err(|e| { + tracing::error!("Failed to serialize probe result: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + file_manager + .save_json(&uuid, "probe", &json_str) + .map_err(|e| { + tracing::error!("Failed to save probe.json: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + (result, false) + }; + + // 3. Extract video info + let duration = probe_result + .format + .duration + .as_ref() + .and_then(|s| s.parse::().ok()) + .unwrap_or(0.0); + + let mut width = 0u32; + let mut height = 0u32; + let mut fps = 0.0; + + for stream in &probe_result.streams { + if stream.codec_type.as_deref() == Some("video") { + width = stream.width.unwrap_or(0); + height = stream.height.unwrap_or(0); + if let Some(fps_str) = &stream.r_frame_rate { + fps = if fps_str.contains('/') { + let parts: Vec<&str> = fps_str.split('/').collect(); + if parts.len() == 2 { + let num: f64 = parts[0].parse().unwrap_or(0.0); + let den: f64 = parts[1].parse().unwrap_or(1.0); + if den > 0.0 { + num / den + } else { + 0.0 + } + } else { + 0.0 + } + } else { + fps_str.parse().unwrap_or(0.0) + }; + } + } + } + + Ok(Json(ProbeResponse { + uuid, + file_name, + duration, + width, + height, + fps, + cached, + format: probe_result.format, + streams: probe_result.streams, + })) +} + +// --- P0 Core API Handlers --- + +async fn trigger_processing( + State(state): State, + Path(uuid): Path, + Json(req): Json, +) -> Result, (StatusCode, String)> { + // 1. Check asset exists and get total frames + let asset: Option<(String, i64)> = + sqlx::query_as("SELECT file_name, COALESCE(total_frames, 0) FROM videos WHERE uuid = $1") + .bind(&uuid) + .fetch_optional(state.db.pool()) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + format!("DB Error: {}", e), + ) + })?; + + let (file_name, total_frames) = + asset.ok_or((StatusCode::NOT_FOUND, "Asset not found".to_string()))?; + if total_frames == 0 { + return Err(( + StatusCode::BAD_REQUEST, + "Total frames unknown. Run probe first.".to_string(), + )); + } + + // 2. Create Monitor Job (Worker polls this table) + let monitor_job = state + .db + .create_monitor_job(&uuid, None) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Failed to create monitor job: {}", e), + ) + })?; + + // Update processors if specified + if let Some(procs) = &req.processors { + let table = crate::core::db::schema::table_name("monitor_jobs"); + sqlx::query(&format!( + "UPDATE {} SET processors = $1 WHERE id = $2", + table + )) + .bind(procs) + .bind(monitor_job.id) + .execute(state.db.pool()) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Failed to update processors: {}", e), + ) + })?; + } + + // 3. Update Asset Status + sqlx::query("UPDATE videos SET processing_status = 'PENDING' WHERE uuid = $1") + .bind(&uuid) + .execute(state.db.pool()) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Failed to update asset status: {}", e), + ) + })?; + + tracing::info!("Job {} created for asset {}", monitor_job.id, uuid); + + Ok(Json(serde_json::json!({ + "job_id": monitor_job.id, + "asset_uuid": uuid, + "status": "PENDING", + "message": format!("Processing triggered for {}", file_name) + }))) +} + +async fn get_asset_status( + State(state): State, + Path(uuid): Path, +) -> Result, StatusCode> { + let row: Option<(String, String, chrono::DateTime, String, i64)> = sqlx::query_as( + "SELECT uuid, file_name, created_at AT TIME ZONE 'UTC', COALESCE(processing_status, 'REGISTERED'), COALESCE(total_frames, 0) FROM videos WHERE uuid = $1" + ) + .bind(&uuid) + .fetch_optional(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let (uuid, file_name, time, status, total) = row.ok_or(StatusCode::NOT_FOUND)?; + + let job: Option<(String, String, i64, i64)> = sqlx::query_as( + "SELECT id::text, COALESCE(status, 'QUEUED'), COALESCE(processed_frames, 0), COALESCE(total_frames, 0) FROM jobs WHERE asset_uuid = $1 ORDER BY created_at DESC LIMIT 1" + ) + .bind(&uuid) + .fetch_optional(state.db.pool()) + .await + .ok() + .flatten(); + + let progress = if let Some((jid, jstatus, pf, tf)) = job { + if tf > 0 && (jstatus == "RUNNING" || jstatus == "QUEUED") { + Some(( + jid, + FrameProgress { + total_frames: tf, + processed_frames: pf, + progress_percent: (pf as f64 / tf as f64) * 100.0, + }, + )) + } else { + None + } + } else { + None + }; + + Ok(Json(AssetStatusResponse { + uuid, + file_name, + registration_time: time.to_rfc3339(), + processing_status: status, + current_job_id: progress.as_ref().map(|(id, _)| id.clone()), + frame_progress: progress.map(|(_, p)| p), + })) +} + +async fn get_job_status( + State(state): State, + Path(job_id): Path, +) -> Result, StatusCode> { + let row: Option<(String, String, String, String, Option, i64, i64)> = sqlx::query_as( + "SELECT j.id::text, j.asset_uuid, COALESCE(j.rule, 'unknown'), COALESCE(j.status, 'QUEUED'), j.assigned_processor_id::text, j.processed_frames, j.total_frames FROM jobs j WHERE j.id = $1::uuid" + ) + .bind(&job_id) + .fetch_optional(state.db.pool()) + .await + .map_err(|e| { + eprintln!("DB Error in get_job_status: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + let (id, asset, rule, status, proc_id, pf, tf) = row.ok_or(StatusCode::NOT_FOUND)?; + + Ok(Json(JobStatusResponse { + job_id: id, + asset_uuid: asset, + rule, + status, + current_processor_id: proc_id, + frame_progress: FrameProgress { + total_frames: tf, + processed_frames: pf, + progress_percent: if tf > 0 { + (pf as f64 / tf as f64) * 100.0 + } else { + 0.0 + }, + }, + })) +} + +async fn get_rule_status( + State(state): State, + Path(rule): Path, +) -> Result, StatusCode> { + let procs: Vec = sqlx::query_scalar( + "SELECT id::text FROM processors WHERE supported_rules @> ARRAY[$1]::TEXT[]", + ) + .bind(&rule) + .fetch_all(state.db.pool()) + .await + .unwrap_or_default(); + + let jobs: Vec<(String, String, String, String, Option, i64, i64)> = sqlx::query_as( + "SELECT id::text, asset_uuid, COALESCE(rule, 'unknown'), status, assigned_processor_id::text, processed_frames, total_frames FROM jobs WHERE rule = $1 AND status IN ('QUEUED','RUNNING')" + ) + .bind(&rule) + .fetch_all(state.db.pool()) + .await + .unwrap_or_default(); + + let active = jobs + .into_iter() + .map(|(id, asset, r, s, p, pf, tf)| JobStatusResponse { + job_id: id, + asset_uuid: asset, + rule: r, + status: s, + current_processor_id: p, + frame_progress: FrameProgress { + total_frames: tf, + processed_frames: pf, + progress_percent: if tf > 0 { + (pf as f64 / tf as f64) * 100.0 + } else { + 0.0 + }, + }, + }) + .collect(); + + Ok(Json(RuleStatusResponse { + rule, + supported_processor_ids: procs, + active_jobs: active, + })) +} + +// --- End P0 Core API Handlers --- + async fn search( State(state): State, Json(req): Json, ) -> Result, StatusCode> { + let mode = req.mode.unwrap_or(SearchMode::Smart); let limit = req.limit.unwrap_or(10); let query_hash = generate_query_hash(&req.query, req.uuid.as_deref(), limit); - let cache_key = keys::search(&query_hash); + let cache_key = keys::search(&format!("{:?}", mode)); let ttl = state.mongo_cache.ttl_search(); let response = state .mongo_cache .get_or_fetch(&cache_key, ttl, keys::CATEGORY_SEARCH, || async { - let query_vector = state - .embedder - .embed_query(&req.query) - .await - .map_err(|e| anyhow::anyhow!("Embedding failed: {}", e))?; - - let qdrant = QdrantDb::new(); let pg = PostgresDb::init() .await .map_err(|e| anyhow::anyhow!("PG init failed: {}", e))?; - let search_results = if let Some(ref uuid) = req.uuid { - qdrant.search_in_uuid(&query_vector, uuid, limit).await? - } else { - qdrant.search(&query_vector, limit).await? - }; + let results: Vec = match mode { + SearchMode::Vector => { + let query_vector = state + .embedder + .embed_query(&req.query) + .await + .map_err(|e| anyhow::anyhow!("Embedding failed: {}", e))?; - let mut results = Vec::new(); - for r in search_results { - if let Some(chunk) = pg - .get_chunk_by_chunk_id_and_uuid(&r.chunk_id, &r.uuid) - .await - .ok() - .flatten() - { - let text = extract_text_from_content(&chunk.content); + let qdrant = QdrantDb::new(); + let search_results = if let Some(ref uuid) = req.uuid { + qdrant.search_in_uuid(&query_vector, uuid, limit).await? + } else { + qdrant.search(&query_vector, limit).await? + }; - results.push(SearchResult { - uuid: chunk.uuid.clone(), - chunk_id: chunk.chunk_id.clone(), - chunk_type: chunk.chunk_type.as_str().to_string(), - start_time: chunk.start_time().seconds(), - end_time: chunk.end_time().seconds(), - text, - score: r.score, - }); + let mut results = Vec::new(); + for r in search_results { + if let Some(chunk) = pg + .get_chunk_by_chunk_id_and_uuid(&r.chunk_id, &r.uuid) + .await + .ok() + .flatten() + { + let text = extract_text_from_content(&chunk.content); + results.push(SearchResult { + uuid: r.uuid, + chunk_id: r.chunk_id, + chunk_type: chunk.chunk_type.as_str().to_string(), + start_time: chunk.start_time().seconds(), + end_time: chunk.end_time().seconds(), + text, + score: r.score, + }); + } + } + results } - } + SearchMode::Smart => { + // Vector search + BM25 reranking + let query_vector = state + .embedder + .embed_query(&req.query) + .await + .map_err(|e| anyhow::anyhow!("Embedding failed: {}", e))?; + + let qdrant = QdrantDb::new(); + let search_results = if let Some(ref uuid) = req.uuid { + qdrant + .search_in_uuid(&query_vector, uuid, limit * 2) + .await? + } else { + qdrant.search(&query_vector, limit * 2).await? + }; + + // 取得所有 chunk 並用 BM25 重新排序 + let mut results_with_bm25: Vec<(SearchResult, f32)> = Vec::new(); + for r in search_results { + if let Some(chunk) = pg + .get_chunk_by_chunk_id_and_uuid(&r.chunk_id, &r.uuid) + .await + .ok() + .flatten() + { + let text = extract_text_from_content(&chunk.content); + let vector_score = r.score; + results_with_bm25.push(( + SearchResult { + uuid: r.uuid, + chunk_id: r.chunk_id, + chunk_type: chunk.chunk_type.as_str().to_string(), + start_time: chunk.start_time().seconds(), + end_time: chunk.end_time().seconds(), + text, + score: vector_score, + }, + vector_score, + )); + } + } + + // 依 vector score 排序後取前 limit 個 + results_with_bm25 + .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + results_with_bm25 + .into_iter() + .take(limit) + .map(|(r, _)| r) + .collect() + } + }; Ok::(SearchResponse { results, @@ -907,6 +1391,9 @@ async fn n8n_search( hits.push(N8nSearchHit { id: chunk.chunk_id.clone(), vid: chunk.uuid.clone(), + start_frame: chunk.start_frame, + end_frame: chunk.end_frame, + fps: chunk.fps, start: chunk.start_time().seconds(), end: chunk.end_time().seconds(), title: if title.is_empty() { @@ -917,6 +1404,10 @@ async fn n8n_search( text, score: r.score, file_path, + has_visual_stats: chunk.visual_stats.as_ref().map(|v| { + !v.is_null() && v.as_object().map_or(false, |o| !o.is_empty()) + }), + parent_id: chunk.parent_chunk_id.clone(), }); } } @@ -977,6 +1468,54 @@ async fn search_bm25( Ok(Json(response)) } +async fn search_smart( + State(state): State, + Json(req): Json, +) -> Result, StatusCode> { + let limit = req.limit.unwrap_or(10); + let query_hash = generate_query_hash(&req.query, req.uuid.as_deref(), limit); + let cache_key = keys::search(&format!("{}smart", query_hash)); + let ttl = state.mongo_cache.ttl_search(); + + let response = state + .mongo_cache + .get_or_fetch(&cache_key, ttl, keys::CATEGORY_SEARCH, || async { + let pg = PostgresDb::init() + .await + .map_err(|e| anyhow::anyhow!("PG init failed: {}", e))?; + + let keywords = vec![req.query.clone()]; + + let search_terms = keywords.join(" "); + + let bm25_results = pg + .search_bm25(&search_terms, req.uuid.as_deref(), limit) + .await?; + + let results: Vec = bm25_results + .into_iter() + .map(|r| SearchResult { + uuid: r.uuid, + chunk_id: r.chunk_id, + chunk_type: r.chunk_type, + start_time: r.start_time, + end_time: r.end_time, + text: r.text, + score: r.bm25_score, + }) + .collect(); + + Ok::(SearchResponse { + results, + query: req.query.clone(), + }) + }) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + Ok(Json(response)) +} + async fn n8n_search_bm25( State(state): State, Json(req): Json, @@ -1019,6 +1558,9 @@ async fn n8n_search_bm25( hits.push(N8nSearchHit { id: chunk.chunk_id.clone(), vid: chunk.uuid.clone(), + start_frame: chunk.start_frame, + end_frame: chunk.end_frame, + fps: chunk.fps, start: chunk.start_time().seconds(), end: chunk.end_time().seconds(), title: if title.is_empty() { @@ -1029,6 +1571,180 @@ async fn n8n_search_bm25( text, score: r.bm25_score, file_path, + has_visual_stats: chunk.visual_stats.as_ref().map(|v| { + !v.is_null() && v.as_object().map_or(false, |o| !o.is_empty()) + }), + parent_id: chunk.parent_chunk_id.clone(), + }); + } + } + + Ok::(N8nSearchResponse { + query: req.query.clone(), + count: hits.len(), + hits, + }) + }) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + Ok(Json(response)) +} + +async fn n8n_search_hybrid( + State(state): State, + Json(req): Json, +) -> Result, StatusCode> { + let limit = req.limit.unwrap_or(10); + let vector_weight = req.vector_weight.unwrap_or(0.7); + let bm25_weight = req.bm25_weight.unwrap_or(0.3); + + let query_hash = generate_query_hash(&req.query, req.uuid.as_deref(), limit); + let cache_key = keys::hybrid_search(&query_hash); + let ttl = state.mongo_cache.ttl_hybrid_search(); + + let response = state + .mongo_cache + .get_or_fetch(&cache_key, ttl, keys::CATEGORY_HYBRID_SEARCH, || async { + let query_vector = state + .embedder + .embed_query(&req.query) + .await + .map_err(|e| anyhow::anyhow!("Embedding failed: {}", e))?; + + let pg = PostgresDb::init() + .await + .map_err(|e| anyhow::anyhow!("PG init failed: {}", e))?; + + let results = pg + .hybrid_search( + &req.query, + &query_vector, + req.uuid.as_deref(), + limit, + vector_weight, + bm25_weight, + ) + .await?; + + let mut hits = Vec::new(); + + for r in results { + if let Some(chunk) = pg + .get_chunk_by_chunk_id_and_uuid(&r.chunk_id, &r.uuid) + .await + .ok() + .flatten() + { + let text = r.text; + let title = extract_title_from_content(&chunk.content); + + let file_path = if chunk.uuid.is_empty() { + None + } else { + let video = pg.get_video_by_uuid(&chunk.uuid).await.ok().flatten(); + video.map(|v| v.file_path) + }; + + hits.push(N8nSearchHit { + id: chunk.chunk_id.clone(), + vid: chunk.uuid.clone(), + start_frame: chunk.start_frame, + end_frame: chunk.end_frame, + fps: chunk.fps, + start: chunk.start_time().seconds(), + end: chunk.end_time().seconds(), + title: if title.is_empty() { + format!("Chunk {}", chunk.chunk_id) + } else { + title + }, + text, + score: r.combined_score as f32, + file_path, + has_visual_stats: chunk.visual_stats.as_ref().map(|v| { + !v.is_null() && v.as_object().map_or(false, |o| !o.is_empty()) + }), + parent_id: chunk.parent_chunk_id.clone(), + }); + } + } + + Ok::(N8nSearchResponse { + query: req.query.clone(), + count: hits.len(), + hits, + }) + }) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + Ok(Json(response)) +} + +async fn n8n_search_smart( + State(state): State, + Json(req): Json, +) -> Result, StatusCode> { + let limit = req.limit.unwrap_or(10); + let query_hash = generate_query_hash(&req.query, req.uuid.as_deref(), limit); + let cache_key = keys::search(&format!("{}smart", query_hash)); + let ttl = state.mongo_cache.ttl_search(); + + let response = state + .mongo_cache + .get_or_fetch(&cache_key, ttl, keys::CATEGORY_SEARCH, || async { + let pg = PostgresDb::init() + .await + .map_err(|e| anyhow::anyhow!("PG init failed: {}", e))?; + + let keywords = vec![req.query.clone()]; + let search_terms = keywords.join(" "); + let video_uuid = req.uuid.clone(); + + let bm25_results = pg + .search_bm25(&search_terms, video_uuid.as_deref(), limit) + .await?; + + let mut hits = Vec::new(); + + for r in bm25_results { + if let Some(chunk) = pg + .get_chunk_by_chunk_id_and_uuid(&r.chunk_id, &r.uuid) + .await + .ok() + .flatten() + { + let text = r.text; + let title = extract_title_from_content(&chunk.content); + + let file_path = if chunk.uuid.is_empty() { + None + } else { + let video = pg.get_video_by_uuid(&chunk.uuid).await.ok().flatten(); + video.map(|v| v.file_path) + }; + + hits.push(N8nSearchHit { + id: chunk.chunk_id.clone(), + vid: chunk.uuid.clone(), + start_frame: chunk.start_frame, + end_frame: chunk.end_frame, + fps: chunk.fps, + start: chunk.start_time().seconds(), + end: chunk.end_time().seconds(), + title: if title.is_empty() { + format!("Chunk {}", chunk.chunk_id) + } else { + title + }, + text, + score: r.bm25_score, + file_path, + has_visual_stats: chunk.visual_stats.as_ref().map(|v| { + !v.is_null() && v.as_object().map_or(false, |o| !o.is_empty()) + }), + parent_id: chunk.parent_chunk_id.clone(), }); } } @@ -1156,14 +1872,15 @@ async fn list_videos( Query(params): Query, ) -> Result, StatusCode> { let page = params.page.unwrap_or(1); - let limit = params.limit.unwrap_or(20); - let cache_key = keys::videos_list(page, limit); + let page_size = params.page_size.unwrap_or(20); + let offset = ((page - 1) as i64) * (page_size as i64); + let cache_key = keys::videos_list(page, page_size); let ttl = state.mongo_cache.ttl_videos(); tracing::info!( - "list_videos called: page={}, limit={}, cache_key={}", + "list_videos called: page={}, page_size={}, cache_key={}", page, - limit, + page_size, cache_key ); @@ -1175,7 +1892,7 @@ async fn list_videos( .await .map_err(|e| anyhow::anyhow!("PG init failed: {}", e))?; - let videos = db.list_videos().await?; + let (videos, count) = db.list_videos(page_size as i32, offset).await?; tracing::info!("Got {} videos from DB", videos.len()); let video_infos: Vec = videos @@ -1194,6 +1911,9 @@ async fn list_videos( Ok::(VideosResponse { videos: video_infos, + count, + page, + page_size, }) }) .await @@ -1384,17 +2104,44 @@ async fn get_progress( })) } -async fn list_jobs() -> Result, StatusCode> { +async fn list_jobs(Query(params): Query) -> Result, StatusCode> { + let page = params.page.unwrap_or(1); + let page_size = params.page_size.unwrap_or(20); + let status_filter = params + .status + .unwrap_or_else(|| "pending,running".to_string()); + let offset = ((page - 1) as i64) * (page_size as i64); + let pg = PostgresDb::init() .await .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + // TODO: 需要修改 PostgresDb::get_pending_jobs 以支持分頁和狀態過濾 + // 目前先使用現有方法,獲取所有工作然後手動分頁 let jobs = pg - .get_pending_jobs(100) + .get_pending_jobs(1000) // 臨時解決方案:獲取較多工作 .await .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; - let job_infos: Vec = jobs + // 過濾狀態 + let filtered_jobs: Vec<_> = jobs + .into_iter() + .filter(|j| { + let job_status = j.status.as_str(); + status_filter.split(',').any(|s| s.trim() == job_status) + }) + .collect(); + + let total_count = filtered_jobs.len() as i64; + + // 手動分頁 + let paginated_jobs: Vec<_> = filtered_jobs + .into_iter() + .skip(offset as usize) + .take(page_size) + .collect(); + + let job_infos: Vec = paginated_jobs .into_iter() .map(|j| JobInfoResponse { id: j.id, @@ -1408,7 +2155,12 @@ async fn list_jobs() -> Result, StatusCode> { }) .collect(); - Ok(Json(JobListResponse { jobs: job_infos })) + Ok(Json(JobListResponse { + jobs: job_infos, + count: total_count, + page, + page_size, + })) } async fn get_job( @@ -1531,9 +2283,11 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> { let mongo_cache = MongoCache::init().await?; let redis_cache = RedisCache::new()?; let db = PostgresDb::init().await?; - let api_state = super::middleware::ApiState { db: Arc::new(db) }; + let db = std::sync::Arc::new(db); + let api_state = super::middleware::ApiState { db: db.clone() }; let state = AppState { + db, embedder, embedder_model: "nomic-embed-text-v2-moe:latest".to_string(), mongo_cache, @@ -1545,28 +2299,67 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> { .route("/api/v1/register", post(register)) .route("/api/v1/unregister", post(unregister)) .route("/api/v1/probe", post(probe)) + .route("/api/v1/assets/:uuid/probe", get(probe_by_uuid)) + .route("/api/v1/assets/:uuid/process", post(trigger_processing)) + .route("/api/v1/assets/:uuid/status", get(get_asset_status)) + .route("/api/v1/jobs/:job_id", get(get_job_status)) + .route("/api/v1/rules/:rule/status", get(get_rule_status)) .route("/api/v1/search/hybrid", post(hybrid_search)) .route("/api/v1/search", post(search)) .route("/api/v1/n8n/search", post(n8n_search)) .route("/api/v1/search/bm25", post(search_bm25)) .route("/api/v1/n8n/search/bm25", post(n8n_search_bm25)) + .route("/api/v1/n8n/search/hybrid", post(n8n_search_hybrid)) + .route("/api/v1/n8n/search/smart", post(n8n_search_smart)) .route("/api/v1/lookup", get(lookup)) .route("/api/v1/videos", get(list_videos)) + .route("/api/v1/videos/:uuid/details", get(video_details)) .route("/api/v1/progress/:uuid", get(get_progress)) .route("/api/v1/jobs", get(list_jobs)) - .route("/api/v1/jobs/:uuid", get(get_job)) .route("/api/v1/config/cache", post(cache_toggle)) .merge(face_recognition::face_recognition_routes()) + .merge(person_identity::person_identity_routes()) + .merge(identity_binding::identity_binding_routes()) + .merge(identities::identity_routes()) .layer(axum::middleware::from_fn_with_state( state.api_state.clone(), api_key_validation, )) .with_state(state.clone()); + let cors = CorsLayer::new() + .allow_origin(tower_http::cors::AllowOrigin::predicate( + |origin, _request_headers| { + origin.as_bytes().ends_with(b"localhost") + || origin.as_bytes().ends_with(b"momentry.ddns.net") + || origin.as_bytes().ends_with(b"127.0.0.1") + }, + )) + .allow_methods(Any) + .allow_headers(Any); + let app = Router::new() .route("/health", get(health)) .route("/health/detailed", get(health_detailed)) + .route("/api/v1/stats/ingest", get(get_ingest_stats)) + .route("/api/v1/stats/sftpgo", get(get_sftpgo_status)) + .route("/api/v1/stats/inference", get(get_inference_health)) + .route("/api/v1/search/visual", post(search_visual_chunks)) + .route( + "/api/v1/search/visual/class", + post(search_visual_chunks_by_class), + ) + .route( + "/api/v1/search/visual/density", + post(search_visual_chunks_by_density), + ) + .route("/api/v1/search/visual/stats", post(get_visual_chunk_stats)) + .route( + "/api/v1/search/visual/combination", + post(search_visual_chunks_by_combination), + ) .merge(protected_routes) + .layer(cors) .with_state(state); let addr: std::net::SocketAddr = format!("{}:{}", host, port).parse().unwrap(); @@ -1577,3 +2370,629 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> { Ok(()) } + +#[derive(Debug, Serialize)] +struct IngestStatsResponse { + total_videos: i64, + total_chunks: i64, + sentence_chunks: i64, + cut_chunks: i64, + time_chunks: i64, + searchable_chunks: i64, + chunks_with_visual: i64, + chunks_with_summary: i64, + pending_videos: i64, +} + +async fn get_ingest_stats( + State(state): State, +) -> Result, StatusCode> { + let table_videos = schema::table_name("videos"); + let table_chunks = schema::table_name("chunks"); + + let total_videos: (i64,) = sqlx::query_as(&format!("SELECT COUNT(*) FROM {}", table_videos)) + .fetch_one(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let total_chunks: (i64,) = sqlx::query_as(&format!("SELECT COUNT(*) FROM {}", table_chunks)) + .fetch_one(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let sentence_chunks: (i64,) = sqlx::query_as(&format!( + "SELECT COUNT(*) FROM {} WHERE chunk_type = 'sentence'", + table_chunks + )) + .fetch_one(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let cut_chunks: (i64,) = sqlx::query_as(&format!( + "SELECT COUNT(*) FROM {} WHERE chunk_type = 'cut'", + table_chunks + )) + .fetch_one(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let time_chunks: (i64,) = sqlx::query_as(&format!( + "SELECT COUNT(*) FROM {} WHERE chunk_type = 'time'", + table_chunks + )) + .fetch_one(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let searchable_chunks: (i64,) = sqlx::query_as(&format!( + "SELECT COUNT(*) FROM {} WHERE vector_id IS NOT NULL", + table_chunks + )) + .fetch_one(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let chunks_with_visual: (i64,) = sqlx::query_as(&format!( + "SELECT COUNT(*) FROM {} WHERE visual_stats IS NOT NULL AND visual_stats != '{}'::jsonb", + table_chunks, "{}" + )) + .fetch_one(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let chunks_with_summary: (i64,) = sqlx::query_as(&format!( + "SELECT COUNT(*) FROM {} WHERE summary_text IS NOT NULL", + table_chunks + )) + .fetch_one(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let pending_videos: (i64,) = sqlx::query_as(&format!( + "SELECT COUNT(*) FROM {} WHERE status = 'pending'", + table_videos + )) + .fetch_one(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + Ok(Json(IngestStatsResponse { + total_videos: total_videos.0, + total_chunks: total_chunks.0, + sentence_chunks: sentence_chunks.0, + cut_chunks: cut_chunks.0, + time_chunks: time_chunks.0, + searchable_chunks: searchable_chunks.0, + chunks_with_visual: chunks_with_visual.0, + chunks_with_summary: chunks_with_summary.0, + pending_videos: pending_videos.0, + })) +} + +#[derive(Debug, Serialize)] +struct SftpgoStatusResponse { + username: String, + home_dir: String, + files_count: i64, + registered_videos: Vec, + last_login: Option, +} + +#[derive(Debug, Serialize)] +struct RegisteredVideo { + uuid: String, + file_name: String, + status: String, +} + +async fn get_sftpgo_status( + State(state): State, +) -> Result, StatusCode> { + let demo_dir = "/Users/accusys/momentry/var/sftpgo/data/demo"; + + let files_count: i64 = std::fs::read_dir(demo_dir) + .map(|entries| entries.count() as i64) + .unwrap_or(0); + + let table_videos = schema::table_name("videos"); + + let registered_videos: Vec<(String, String, String)> = sqlx::query_as(&format!( + "SELECT uuid, file_name, status FROM {} WHERE file_path LIKE '%demo%' ORDER BY id", + table_videos + )) + .fetch_all(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let registered_videos = registered_videos + .into_iter() + .map(|(uuid, file_name, status)| RegisteredVideo { + uuid, + file_name, + status, + }) + .collect(); + + Ok(Json(SftpgoStatusResponse { + username: "demo".to_string(), + home_dir: demo_dir.to_string(), + files_count, + registered_videos, + last_login: None, + })) +} + +#[derive(Debug, Serialize)] +struct InferenceEngineStatus { + engine: String, + model: String, + status: String, + latency_ms: Option, + error: Option, +} + +#[derive(Debug, Serialize)] +struct InferenceHealthResponse { + ollama: InferenceEngineStatus, + llama_server: InferenceEngineStatus, +} + +async fn get_inference_health() -> Result, StatusCode> { + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .unwrap(); + + let ollama_start = std::time::Instant::now(); + let ollama_status = match client.get("http://localhost:11434/api/tags").send().await { + Ok(resp) if resp.status().is_success() => { + let latency = ollama_start.elapsed().as_millis() as u64; + InferenceEngineStatus { + engine: "Ollama".to_string(), + model: "nomic-embed-text".to_string(), + status: "ok".to_string(), + latency_ms: Some(latency), + error: None, + } + } + Ok(resp) => InferenceEngineStatus { + engine: "Ollama".to_string(), + model: "nomic-embed-text".to_string(), + status: "error".to_string(), + latency_ms: Some(ollama_start.elapsed().as_millis() as u64), + error: Some(format!("HTTP {}", resp.status())), + }, + Err(e) => InferenceEngineStatus { + engine: "Ollama".to_string(), + model: "nomic-embed-text".to_string(), + status: "error".to_string(), + latency_ms: None, + error: Some(e.to_string()), + }, + }; + + let llama_start = std::time::Instant::now(); + let llama_status = match client.get("http://localhost:8081/v1/models").send().await { + Ok(resp) if resp.status().is_success() => { + let latency = llama_start.elapsed().as_millis() as u64; + InferenceEngineStatus { + engine: "llama-server".to_string(), + model: "gemma4_e4b_q5".to_string(), + status: "ok".to_string(), + latency_ms: Some(latency), + error: None, + } + } + Ok(resp) => InferenceEngineStatus { + engine: "llama-server".to_string(), + model: "gemma4_e4b_q5".to_string(), + status: "error".to_string(), + latency_ms: Some(llama_start.elapsed().as_millis() as u64), + error: Some(format!("HTTP {}", resp.status())), + }, + Err(e) => InferenceEngineStatus { + engine: "llama-server".to_string(), + model: "gemma4_e4b_q5".to_string(), + status: "error".to_string(), + latency_ms: None, + error: Some(e.to_string()), + }, + }; + + Ok(Json(InferenceHealthResponse { + ollama: ollama_status, + llama_server: llama_status, + })) +} + +#[derive(Debug, Deserialize)] +struct VideoDetailsQuery { + chunk_id: Option, + parent_id: Option, +} + +#[derive(Debug, Serialize)] +struct VideoDetailsResponse { + uuid: String, + #[serde(flatten)] + details: VideoDetailsResult, +} + +#[derive(Debug, Serialize)] +#[serde(untagged)] +enum VideoDetailsResult { + Chunk(ChunkDetailResponse), + Parent(ParentChunkResponse), +} + +#[derive(Debug, Serialize)] +struct FrameRange { + start_frame: i64, + end_frame: i64, + duration_frames: i64, + fps: f64, +} + +#[derive(Debug, Serialize)] +struct ReferenceTime { + start: f64, + end: f64, +} + +#[derive(Debug, Serialize)] +struct ChunkDetailResponse { + chunk_id: String, + chunk_type: String, + frame_range: FrameRange, + reference_time: ReferenceTime, + text_content: Option, + content: Option, + parent_id: Option, + summary_text: Option, + metadata: Option, + visual_stats: Option, + speaker_ids: Option>, + person_ids: Option>, +} + +#[derive(Debug, Serialize)] +struct ParentChunkResponse { + parent_id: i32, + metadata: Option, + summary_text: Option, + frame_range: FrameRange, + reference_time: ReferenceTime, +} + +/// Search visual chunks based on criteria +#[derive(Debug, Deserialize)] +struct VisualChunkSearchRequest { + uuid: String, + criteria: visual_chunk_search::VisualChunkSearchCriteria, +} + +#[derive(Debug, Serialize)] +struct VisualChunkSearchResponse { + chunks: Vec, + total: usize, +} + +async fn search_visual_chunks( + State(state): State, + Json(req): Json, +) -> Result, StatusCode> { + let criteria_hash = generate_visual_search_hash(&req.uuid, &req.criteria); + let cache_key = keys::visual_search(&req.uuid, &criteria_hash); + let ttl = state.mongo_cache.ttl_visual_search(); + + let chunks = state + .mongo_cache + .get_or_fetch(&cache_key, ttl, keys::CATEGORY_VISUAL_SEARCH, || async { + let db = PostgresDb::init() + .await + .map_err(|e| anyhow::anyhow!("PG init failed: {}", e))?; + + visual_chunk_search::search_visual_chunks(&db, &req.uuid, &req.criteria) + .await + .map_err(|e| anyhow::anyhow!("Visual search failed: {}", e)) + }) + .await + .map_err(|e| { + tracing::error!("Visual chunk search failed: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + Ok(Json(VisualChunkSearchResponse { + total: chunks.len(), + chunks, + })) +} + +/// Request for searching visual chunks by object class +#[derive(Debug, Deserialize)] +struct VisualChunkSearchByClassRequest { + uuid: String, + object_class: String, + min_count: Option, + max_count: Option, +} + +/// Request for searching visual chunks by density +#[derive(Debug, Deserialize)] +struct VisualChunkSearchByDensityRequest { + uuid: String, + min_density: f32, + max_density: Option, +} + +/// Request for getting visual chunk statistics +#[derive(Debug, Deserialize)] +struct VisualChunkStatsRequest { + uuid: String, +} + +async fn search_visual_chunks_by_class( + State(state): State, + Json(req): Json, +) -> Result, StatusCode> { + let db = PostgresDb::init() + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let chunks = visual_chunk_search::search_visual_chunks_by_class( + &db, + &req.uuid, + &req.object_class, + req.min_count, + req.max_count, + ) + .await + .map_err(|e| { + tracing::error!("Visual chunk search by class failed: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + Ok(Json(VisualChunkSearchResponse { + total: chunks.len(), + chunks, + })) +} + +async fn search_visual_chunks_by_density( + State(state): State, + Json(req): Json, +) -> Result, StatusCode> { + let db = PostgresDb::init() + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let chunks = visual_chunk_search::search_visual_chunks_by_density( + &db, + &req.uuid, + req.min_density, + req.max_density, + ) + .await + .map_err(|e| { + tracing::error!("Visual chunk search by density failed: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + Ok(Json(VisualChunkSearchResponse { + total: chunks.len(), + chunks, + })) +} + +#[derive(Debug, Serialize)] +struct VisualChunkStatsResponse { + uuid: String, + stats: std::collections::HashMap, +} + +async fn get_visual_chunk_stats( + State(state): State, + Json(req): Json, +) -> Result, StatusCode> { + let db = PostgresDb::init() + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let stats = visual_chunk_search::get_visual_chunk_statistics(&db, &req.uuid) + .await + .map_err(|e| { + tracing::error!("Get visual chunk stats failed: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + Ok(Json(VisualChunkStatsResponse { + uuid: req.uuid, + stats, + })) +} + +/// Request for searching visual chunks by object combination +#[derive(Debug, Deserialize)] +struct VisualChunkSearchByCombinationRequest { + uuid: String, + combination: Vec<(String, u32)>, // (object_class, min_count) +} + +async fn search_visual_chunks_by_combination( + State(state): State, + Json(req): Json, +) -> Result, StatusCode> { + let db = PostgresDb::init() + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let combination: Vec<(&str, u32)> = req + .combination + .iter() + .map(|(c, n)| (c.as_str(), *n)) + .collect(); + + let chunks = + visual_chunk_search::search_visual_chunks_by_combination(&db, &req.uuid, &combination) + .await + .map_err(|e| { + tracing::error!("Visual chunk search by combination failed: {}", e); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + Ok(Json(VisualChunkSearchResponse { + total: chunks.len(), + chunks, + })) +} + +async fn video_details( + Path(uuid): Path, + Query(query): Query, + State(state): State, +) -> Result, StatusCode> { + let table = schema::table_name("chunks"); + + if let Some(chunk_id) = query.chunk_id { + let row: Option<( + i32, String, String, i32, String, f64, i64, i64, + Option, serde_json::Value, Option, + Option, i32, Option, Option, Option, + )> = sqlx::query_as(&format!( + "SELECT file_id, uuid, chunk_id, chunk_index, chunk_type::text, fps, start_frame, end_frame, + text_content, content, metadata, vector_id, frame_count, + parent_chunk_id, visual_stats, summary_text + FROM {} WHERE chunk_id = $1 AND uuid = $2", + table + )) + .bind(&chunk_id) + .bind(&uuid) + .fetch_optional(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let speaker_row: Option<(String, String)> = sqlx::query_as(&format!( + "SELECT COALESCE(speaker_ids, '{{}}'::text[]), COALESCE(face_ids, '{{}}'::integer[])::text[] FROM {} WHERE chunk_id = $1 AND uuid = $2", + table + )) + .bind(&chunk_id) + .bind(&uuid) + .fetch_optional(state.db.pool()) + .await + .ok() + .flatten(); + + let row = row.ok_or(StatusCode::NOT_FOUND)?; + + let fps = if row.5 > 0.0 { row.5 } else { 24.0 }; + let start_frame = row.6; + let end_frame = row.7; + let duration_frames = end_frame - start_frame; + + let start_time = start_frame as f64 / fps; + let end_time = end_frame as f64 / fps; + + let row_metadata = row.10.clone(); + + let mut summary_text = row.15.clone(); + let mut metadata = None; + + if let Some(ref pid_str) = row.13 { + if !pid_str.is_empty() { + if let Ok(pid) = pid_str.parse::() { + let parent_table = schema::table_name("parent_chunks"); + let parent: Option<(Option, Option)> = + sqlx::query_as(&format!( + "SELECT summary_text, metadata FROM {} WHERE id = $1", + parent_table + )) + .bind(pid) + .fetch_optional(state.db.pool()) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + if let Some((s, m)) = parent { + if summary_text.is_none() { + summary_text = s; + } + + let mut merged: serde_json::Value = serde_json::json!({}); + + if let Some(ref cm) = row_metadata { + if let Some(obj) = cm.as_object() { + for (k, v) in obj { + merged[k] = v.clone(); + } + } + } + + if let Some(pm) = &m { + if let Some(obj) = pm.as_object() { + for (k, v) in obj { + merged[k] = v.clone(); + } + } + } + + metadata = Some(merged); + } + } + } + } else if let Some(ref cm) = row_metadata { + metadata = Some(cm.clone()); + } + + let parse_pg_array = |s: &str| -> Vec { + if s.is_empty() || s == "{}" { + return vec![]; + } + s.trim_start_matches('{') + .trim_end_matches('}') + .split(',') + .map(|s| s.trim_matches('"').to_string()) + .collect() + }; + + let (speaker_str, face_str) = speaker_row.unwrap_or(("{}".to_string(), "{}".to_string())); + let speaker_vec: Vec = parse_pg_array(&speaker_str); + let speaker_ids: Option> = if speaker_vec.is_empty() { + None + } else { + Some(speaker_vec) + }; + let face_vec: Vec = parse_pg_array(&face_str); + let person_ids: Option> = if face_vec.is_empty() { + None + } else { + Some(face_vec.iter().map(|id| format!("face_{}", id)).collect()) + }; + + return Ok(Json(VideoDetailsResponse { + uuid: row.1.clone(), + details: VideoDetailsResult::Chunk(ChunkDetailResponse { + chunk_id: row.2.clone(), + chunk_type: row.4.clone(), + frame_range: FrameRange { + start_frame, + end_frame, + duration_frames, + fps, + }, + reference_time: ReferenceTime { + start: start_time, + end: end_time, + }, + text_content: row.8.clone(), + content: Some(row.9.clone()), + parent_id: row.13.clone(), + summary_text, + metadata, + visual_stats: row.14.clone(), + speaker_ids, + person_ids, + }), + })); + } + + Err(StatusCode::BAD_REQUEST) +} diff --git a/src/core/cache/keys.rs b/src/core/cache/keys.rs index bfc1f30..7048dcf 100644 --- a/src/core/cache/keys.rs +++ b/src/core/cache/keys.rs @@ -10,6 +10,8 @@ pub const KEY_PREFIX_VIDEO: &str = "video:"; pub const KEY_PREFIX_SEARCH: &str = "search:"; pub const KEY_PREFIX_SEARCH_HYBRID: &str = "search:hybrid:"; pub const KEY_PREFIX_SEARCH_N8N: &str = "search:n8n:"; +pub const KEY_PREFIX_SEARCH_BM25: &str = "search:bm25:"; +pub const KEY_PREFIX_SEARCH_N8N_BM25: &str = "search:n8n:bm25:"; pub const KEY_HEALTH: &str = "health:basic"; pub fn videos_list(page: usize, limit: usize) -> String { @@ -32,6 +34,14 @@ pub fn n8n_search(query_hash: &str) -> String { format!("{}{}", KEY_PREFIX_SEARCH_N8N, query_hash) } +pub fn bm25_search(query_hash: &str) -> String { + format!("{}{}", KEY_PREFIX_SEARCH_BM25, query_hash) +} + +pub fn n8n_bm25_search(query_hash: &str) -> String { + format!("{}{}", KEY_PREFIX_SEARCH_N8N_BM25, query_hash) +} + pub fn health() -> String { KEY_HEALTH.to_string() } @@ -48,6 +58,17 @@ pub fn search_prefix() -> String { format!("^{}", KEY_PREFIX_SEARCH) } +pub const KEY_PREFIX_VISUAL_SEARCH: &str = "search:visual:"; +pub const CATEGORY_VISUAL_SEARCH: &str = "visual_search"; + +pub fn visual_search(uuid: &str, criteria_hash: &str) -> String { + format!("{}{}:{}", KEY_PREFIX_VISUAL_SEARCH, uuid, criteria_hash) +} + +pub fn visual_search_prefix() -> String { + format!("^{}", KEY_PREFIX_VISUAL_SEARCH) +} + #[cfg(test)] mod tests { use super::*; @@ -78,8 +99,28 @@ mod tests { assert_eq!(n8n_search("hash123"), "search:n8n:hash123"); } + #[test] + fn test_bm25_search() { + assert_eq!(bm25_search("hash123"), "search:bm25:hash123"); + } + + #[test] + fn test_n8n_bm25_search() { + assert_eq!(n8n_bm25_search("hash123"), "search:n8n:bm25:hash123"); + } + #[test] fn test_health() { assert_eq!(health(), "health:basic"); } + + #[test] + fn test_visual_search() { + assert_eq!(visual_search("abc123", "hash"), "search:visual:abc123:hash"); + } + + #[test] + fn test_visual_search_prefix() { + assert_eq!(visual_search_prefix(), "^search:visual:"); + } } diff --git a/src/core/cache/mongo_cache.rs b/src/core/cache/mongo_cache.rs index 58bf2a0..5bdc4e7 100644 --- a/src/core/cache/mongo_cache.rs +++ b/src/core/cache/mongo_cache.rs @@ -136,6 +136,10 @@ impl MongoCache { self.settings.ttl_video_meta } + pub fn ttl_visual_search(&self) -> u64 { + self.settings.ttl_search // Reuse search TTL + } + pub async fn get(&self, key: &str) -> Result> { if !self.is_enabled() { return Ok(None); diff --git a/src/core/chunk/mod.rs b/src/core/chunk/mod.rs index 8b275bd..e90c23a 100644 --- a/src/core/chunk/mod.rs +++ b/src/core/chunk/mod.rs @@ -1,5 +1,9 @@ +pub mod rule1_ingest; +pub mod rule3_ingest; pub mod splitter; pub mod types; +pub use rule1_ingest::ingest_rule1; +pub use rule3_ingest::ingest_rule3; pub use splitter::{AsrSegment, ChunkSplitter}; pub use types::{Chunk, ChunkType}; diff --git a/src/core/chunk/types.rs b/src/core/chunk/types.rs index 024b559..48e2bda 100644 --- a/src/core/chunk/types.rs +++ b/src/core/chunk/types.rs @@ -1,6 +1,7 @@ use crate::core::time::FrameTime; use serde::{Deserialize, Serialize}; +// ==================== ChunkType ==================== #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "snake_case")] pub enum ChunkType { @@ -8,7 +9,8 @@ pub enum ChunkType { Sentence, Cut, Trace, - Story, // Parent chunk from story analysis + Story, + Visual, // 視覺分片 (Phase 2.1) } impl ChunkType { @@ -19,10 +21,12 @@ impl ChunkType { ChunkType::Cut => "cut", ChunkType::Trace => "trace", ChunkType::Story => "story", + ChunkType::Visual => "visual", } } } +// ==================== ChunkRule ==================== #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "snake_case")] pub enum ChunkRule { @@ -39,6 +43,73 @@ impl ChunkRule { } } +// ==================== 視覺分片相關結構 (Phase 2.1) ==================== +/// 邊界框 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BoundingBox { + pub x: i32, + pub y: i32, + pub width: i32, + pub height: i32, +} + +/// 檢測到的物件 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DetectedObject { + /// 物件類別名稱 + pub class_name: String, + /// 物件類別 ID + pub class_id: u32, + /// 信心值 (0.0-1.0) + pub confidence: f32, + /// 邊界框 + pub bbox: Option, + /// 出現次數 (在分片內) + pub occurrence: u32, +} + +/// 關鍵幀的物件列表 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct KeyframeObjects { + /// 關鍵幀時間 (秒) - 僅供參考,主要使用 frame_number + pub timestamp: f64, + /// 關鍵幀幀號 - 主要時間標示 + pub frame_number: u64, + /// 檢測到的物件 + pub objects: Vec, +} + +/// 視覺元數據 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VisualMetadata { + /// 總物件數量 + pub object_count: u32, + /// 唯一物件類別列表 + pub unique_classes: Vec, + /// 最高信心值 + pub max_confidence: f32, + /// 平均信心值 + pub avg_confidence: f32, + /// 空間密度(每幀平均物件數) + pub spatial_density: f32, +} + +/// 視覺分片內容 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VisualChunkContent { + /// 關鍵幀物件列表,每個關鍵幀包含 frame_number + pub keyframe_objects: Vec, + /// 主要物件標籤(出現在大多數幀中的物件) + pub dominant_objects: Vec, + /// 物件關係 (object1, relationship, object2) - 可選 + pub object_relationships: Vec<(String, String, String)>, + /// 場景描述 - 可選 + pub scene_description: Option, + /// 視覺元數據 + pub metadata: VisualMetadata, +} + +// ==================== Chunk 主結構 ==================== #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Chunk { pub file_id: i32, @@ -49,9 +120,9 @@ pub struct Chunk { pub rule: ChunkRule, /// Frames per second (can be fractional, e.g., 29.97, 23.976) pub fps: f64, - /// Start frame (0-based) + /// Start frame (0-based) - 主要時間標示 pub start_frame: i64, - /// End frame (exclusive) + /// End frame (exclusive) - 主要時間標示 pub end_frame: i64, pub text_content: Option, pub content: serde_json::Value, @@ -61,17 +132,11 @@ pub struct Chunk { pub pre_chunk_ids: Vec, pub parent_chunk_id: Option, // For parent-child chunk hierarchy pub child_chunk_ids: Vec, // Child chunk IDs (for parent chunks) + pub visual_stats: Option, } impl Chunk { - /// Creates a new chunk from frame counts. - /// - /// # Arguments - /// - /// * `start_frame` - Start frame (0-based) - /// * `end_frame` - End frame (exclusive) - /// * `fps` - Frames per second (can be fractional) - #[allow(clippy::too_many_arguments)] + /// 創建新分片 pub fn new( file_id: i32, uuid: String, @@ -83,11 +148,13 @@ impl Chunk { fps: f64, content: serde_json::Value, ) -> Self { - let chunk_id = format!("{}_{:04}", chunk_type.as_str(), chunk_index); + let frame_count = (end_frame - start_frame) as i32; + let chunk_id = format!("{}_{}", uuid, chunk_index); + Self { file_id, uuid, - chunk_id: chunk_id.clone(), + chunk_id, chunk_index, chunk_type, rule, @@ -98,17 +165,171 @@ impl Chunk { content, metadata: None, vector_id: None, - frame_count: 0, + frame_count, pre_chunk_ids: vec![], parent_chunk_id: None, child_chunk_ids: vec![], + visual_stats: None, } } - /// Creates a new chunk from seconds (legacy conversion). + /// 創建視覺分片 (Phase 2.1) + pub fn new_visual( + file_id: i32, + uuid: String, + chunk_index: u32, + start_frame: i64, + end_frame: i64, + fps: f64, + visual_content: VisualChunkContent, + ) -> Self { + let content = serde_json::to_value(&visual_content) + .unwrap_or_else(|_| serde_json::json!({"error": "Failed to serialize visual content"})); + + Self::new( + file_id, + uuid, + chunk_index, + ChunkType::Visual, + ChunkRule::Rule2, + start_frame, + end_frame, + fps, + content, + ) + } + + /// 從 YOLO 幀創建視覺分片 (Phase 2.1) + pub fn from_yolo_frames( + file_id: i32, + uuid: String, + chunk_index: u32, + start_frame: i64, + end_frame: i64, + fps: f64, + yolo_frames: Vec, + ) -> Self { + // 將 YOLO 幀轉換為關鍵幀物件 + let keyframe_objects: Vec = yolo_frames + .iter() + .map(|frame| { + let objects: Vec = frame + .objects + .iter() + .map(|obj| DetectedObject { + class_name: obj.class_name.clone(), + class_id: obj.class_id, + confidence: obj.confidence, + bbox: Some(BoundingBox { + x: obj.x, + y: obj.y, + width: obj.width, + height: obj.height, + }), + occurrence: 1, + }) + .collect(); + + KeyframeObjects { + timestamp: frame.timestamp, + frame_number: frame.frame, + objects, + } + }) + .collect(); + + // 計算物件統計 + let total_objects: u32 = yolo_frames.iter().map(|f| f.objects.len() as u32).sum(); + + // 收集所有物件類別 + let all_classes: Vec = yolo_frames + .iter() + .flat_map(|f| f.objects.iter().map(|o| o.class_name.clone())) + .collect(); + + // 獲取唯一類別 + let unique_classes: Vec = all_classes + .iter() + .cloned() + .collect::>() + .into_iter() + .collect(); + + // 計算信心值統計 + let confidences: Vec = yolo_frames + .iter() + .flat_map(|f| f.objects.iter().map(|o| o.confidence)) + .collect(); + + let max_confidence = confidences.iter().copied().fold(0.0f32, f32::max); + let avg_confidence = if !confidences.is_empty() { + confidences.iter().sum::() / confidences.len() as f32 + } else { + 0.0 + }; + + // 計算主要物件(出現在大多數幀中的物件) + let mut object_counts = std::collections::HashMap::new(); + for frame in &yolo_frames { + let frame_classes: std::collections::HashSet<_> = + frame.objects.iter().map(|o| o.class_name.clone()).collect(); + for class in frame_classes { + *object_counts.entry(class).or_insert(0) += 1; + } + } + + let mut dominant_objects: Vec = object_counts + .into_iter() + .filter(|(_, count)| *count as f32 / yolo_frames.len() as f32 > 0.5) + .map(|(class, _)| class) + .collect(); + dominant_objects.sort(); + + // 創建視覺內容 + let visual_content = VisualChunkContent { + keyframe_objects, + dominant_objects, + object_relationships: vec![], // 可選:後期添加關係檢測 + scene_description: None, // 可選:後期添加 LLM 生成的場景描述 + metadata: VisualMetadata { + object_count: total_objects, + unique_classes, + max_confidence, + avg_confidence, + spatial_density: if yolo_frames.len() > 0 { + total_objects as f32 / yolo_frames.len() as f32 + } else { + 0.0 + }, + }, + }; + + Self::new_visual( + file_id, + uuid, + chunk_index, + start_frame, + end_frame, + fps, + visual_content, + ) + } + + /// 將分片轉換為幀時間 + pub fn to_frame_time(&self) -> FrameTime { + // 使用第一個幀作為參考點 + FrameTime::from_frames(self.start_frame, self.fps) + } + + /// 檢查是否是父分片 + pub fn is_parent(&self) -> bool { + self.parent_chunk_id.is_some() + } + + /// 從秒數創建新分片(舊版轉換) /// - /// This is useful for migrating from older systems that store time as seconds. - /// The frame counts are calculated by rounding `seconds * fps`. + /// 這對於從存儲時間為秒的舊系統遷移很有用。 + /// 幀數通過舍入 `seconds * fps` 計算。 #[allow(clippy::too_many_arguments)] pub fn from_seconds( file_id: i32, @@ -136,104 +357,197 @@ impl Chunk { ) } - /// Returns the start time as a `FrameTime`. + /// 返回開始時間為 `FrameTime` pub fn start_time(&self) -> FrameTime { FrameTime::from_frames(self.start_frame, self.fps) } - /// Returns the end time as a `FrameTime`. + /// 返回結束時間為 `FrameTime` pub fn end_time(&self) -> FrameTime { FrameTime::from_frames(self.end_frame, self.fps) } - /// Returns the duration in frames. + /// 返回持續時間的幀數 pub fn duration_frames(&self) -> i64 { self.end_frame - self.start_frame } - /// Returns the duration in seconds. + /// 返回持續時間的秒數 pub fn duration_seconds(&self) -> f64 { self.duration_frames() as f64 / self.fps } - /// Formats the start time as "seconds.frame" (e.g., "123.04"). + /// 將開始時間格式化為 "seconds.frame" (例如:"123.04") pub fn format_start_sec_frame(&self) -> String { self.start_time().format_sec_frame() } - /// Formats the end time as "seconds.frame" (e.g., "456.15"). + /// 將結束時間格式化為 "seconds.frame" (例如:"456.15") pub fn format_end_sec_frame(&self) -> String { self.end_time().format_sec_frame() } - /// Formats the start time as "HH:MM:SS". + /// 將開始時間格式化為 "HH:MM:SS" pub fn format_start_hms(&self) -> String { self.start_time().format_hms() } - /// Formats the end time as "HH:MM:SS". + /// 將結束時間格式化為 "HH:MM:SS" pub fn format_end_hms(&self) -> String { self.end_time().format_hms() } - /// Formats the start time as "HH:MM:SS.FF". + /// 將開始時間格式化為 "HH:MM:SS.FF" pub fn format_start_hms_frame(&self) -> String { self.start_time().format_hms_frame() } - /// Formats the end time as "HH:MM:SS.FF". + /// 將結束時間格式化為 "HH:MM:SS.FF" pub fn format_end_hms_frame(&self) -> String { self.end_time().format_hms_frame() } - /// Returns a tuple of (start_seconds, end_seconds) for compatibility. + /// 返回 (start_seconds, end_seconds) 元組用於兼容性 /// - /// This is provided for backward compatibility during migration. - /// Prefer using `start_time()` and `end_time()` methods. + /// 這在遷移期間提供向後兼容性。 + /// 建議使用 `start_time()` 和 `end_time()` 方法。 pub fn time_range_seconds(&self) -> (f64, f64) { (self.start_time().seconds(), self.end_time().seconds()) } + /// 添加元數據 pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self { self.metadata = Some(metadata); self } + /// 添加向量 ID pub fn with_vector_id(mut self, vector_id: String) -> Self { self.vector_id = Some(vector_id); self } + /// 添加文本內容 pub fn with_text_content(mut self, text: String) -> Self { self.text_content = Some(text); self } + /// 設置幀數 pub fn with_frame_count(mut self, count: i32) -> Self { self.frame_count = count; self } + /// 設置前一個分片 ID pub fn with_pre_chunk_ids(mut self, ids: Vec) -> Self { self.pre_chunk_ids = ids; self } + /// 設置父分片 ID pub fn with_parent_chunk_id(mut self, parent_id: String) -> Self { self.parent_chunk_id = Some(parent_id); self } + /// 設置子分片 ID pub fn with_child_chunk_ids(mut self, child_ids: Vec) -> Self { self.child_chunk_ids = child_ids; self } +} - pub fn is_parent_chunk(&self) -> bool { - !self.child_chunk_ids.is_empty() +// ==================== VisualChunkContent 輔助方法 ==================== +impl VisualChunkContent { + /// 計算兩個 YOLO 幀之間的相似度(基於物件組成) + pub fn frame_similarity( + frame1: &crate::core::processor::yolo::YoloFrame, + frame2: &crate::core::processor::yolo::YoloFrame, + ) -> f32 { + if frame1.objects.is_empty() && frame2.objects.is_empty() { + return 1.0; // 兩個空幀完全相似 + } + + if frame1.objects.is_empty() || frame2.objects.is_empty() { + return 0.0; // 一個空一個非空,不相似 + } + + // 創建物件類別名稱集合 + let set1: std::collections::HashSet = frame1 + .objects + .iter() + .map(|o| o.class_name.clone()) + .collect(); + let set2: std::collections::HashSet = frame2 + .objects + .iter() + .map(|o| o.class_name.clone()) + .collect(); + + // 計算 Jaccard 相似度 + let intersection: Vec<_> = set1.intersection(&set2).collect(); + let union: Vec<_> = set1.union(&set2).collect(); + + if union.is_empty() { + 0.0 + } else { + intersection.len() as f32 / union.len() as f32 + } } - pub fn is_child_chunk(&self) -> bool { - self.parent_chunk_id.is_some() + /// 獲取視覺分片的摘要(使用關鍵幀的 frame_number) + pub fn summary(&self, fps: f64) -> String { + if self.keyframe_objects.is_empty() { + return "Empty visual chunk".to_string(); + } + + let first_frame = self.keyframe_objects.first().unwrap().frame_number; + let last_frame = self.keyframe_objects.last().unwrap().frame_number; + + // 計算時間(僅供參考) + let start_time = if fps > 0.0 { + first_frame as f64 / fps + } else { + 0.0 + }; + let end_time = if fps > 0.0 { + last_frame as f64 / fps + } else { + 0.0 + }; + let duration = end_time - start_time; + let frame_count = self.keyframe_objects.len(); + + format!( + "Visual chunk: frames {} to {} (duration: {:.1}s, {} frames). Objects: {} total, {} unique. Dominant: {}", + first_frame, + last_frame, + duration, + frame_count, + self.metadata.object_count, + self.metadata.unique_classes.len(), + if self.dominant_objects.is_empty() { + "none".to_string() + } else { + self.dominant_objects.join(", ") + } + ) + } + + /// 檢查是否包含特定物件類別 + pub fn contains_object(&self, class_name: &str) -> bool { + self.keyframe_objects + .iter() + .any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name)) + } + + /// 獲取信心值高於閾值的所有物件 + pub fn high_confidence_objects(&self, threshold: f32) -> Vec<&DetectedObject> { + self.keyframe_objects + .iter() + .flat_map(|ko| ko.objects.iter()) + .filter(|obj| obj.confidence >= threshold) + .collect() } } diff --git a/src/core/config.rs b/src/core/config.rs index 6684422..bb4ce16 100644 --- a/src/core/config.rs +++ b/src/core/config.rs @@ -164,3 +164,29 @@ pub mod cache { .unwrap_or(3600) }); } + +pub mod llm { + use super::*; + + pub static SUMMARY_URL: Lazy = Lazy::new(|| { + env::var("MOMENTRY_LLM_SUMMARY_URL") + .unwrap_or_else(|_| "http://127.0.0.1:8081/v1/chat/completions".to_string()) + }); + + pub static SUMMARY_MODEL: Lazy = Lazy::new(|| { + env::var("MOMENTRY_LLM_SUMMARY_MODEL").unwrap_or_else(|_| "gemma4".to_string()) + }); + + pub static SUMMARY_TIMEOUT_SECS: Lazy = Lazy::new(|| { + env::var("MOMENTRY_LLM_SUMMARY_TIMEOUT") + .unwrap_or_else(|_| "120".to_string()) + .parse() + .unwrap_or(120) + }); + + pub static SUMMARY_ENABLED: Lazy = Lazy::new(|| { + env::var("MOMENTRY_LLM_SUMMARY_ENABLED") + .map(|v| v == "true" || v == "1") + .unwrap_or(true) + }); +} diff --git a/src/core/db/mongodb_db.rs b/src/core/db/mongodb_db.rs index a087d3b..3b0b016 100644 --- a/src/core/db/mongodb_db.rs +++ b/src/core/db/mongodb_db.rs @@ -6,6 +6,7 @@ use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType}; pub struct MongoDb { base_url: String, + database: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -53,7 +54,8 @@ impl MongoDb { pub fn new() -> Self { let base_url = std::env::var("MONGODB_URL").unwrap_or_else(|_| "http://localhost:27017".to_string()); - Self { base_url } + let database = crate::core::config::MONGODB_DATABASE.clone(); + Self { base_url, database } } } @@ -68,7 +70,7 @@ impl MongoDb { let doc: ChunkDocument = chunk.clone().into(); let client = reqwest::Client::new(); - let url = format!("{}/momentry/chunks", self.base_url); + let url = format!("{}/{}/chunks", self.base_url, self.database); client .post(&url) @@ -83,8 +85,8 @@ impl MongoDb { pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result> { let client = reqwest::Client::new(); let url = format!( - "{}/momentry/chunks?filter={{\"uuid\":\"{}\"}}", - self.base_url, uuid + "{}/{}/chunks?filter={{\"uuid\":\"{}\"}}", + self.base_url, self.database, uuid ); let response = client @@ -131,6 +133,7 @@ impl MongoDb { pre_chunk_ids: vec![], parent_chunk_id: doc.parent_chunk_id, child_chunk_ids: doc.child_chunk_ids, + visual_stats: None, } }) .collect(); @@ -141,8 +144,8 @@ impl MongoDb { pub async fn search_text(&self, query: &str) -> Result> { let client = reqwest::Client::new(); let url = format!( - "{}/momentry/chunks?filter={{\"$text\":{{\"$search\":\"{}\"}}}}", - self.base_url, query + "{}/{}/chunks?filter={{\"$text\":{{\"$search\":\"{}\"}}}}", + self.base_url, self.database, query ); let response = client @@ -189,6 +192,7 @@ impl MongoDb { pre_chunk_ids: vec![], parent_chunk_id: doc.parent_chunk_id, child_chunk_ids: doc.child_chunk_ids, + visual_stats: None, } }) .collect(); @@ -198,7 +202,7 @@ impl MongoDb { pub async fn get_all_chunks(&self) -> Result> { let client = reqwest::Client::new(); - let url = format!("{}/momentry/chunks", self.base_url); + let url = format!("{}/{}/chunks", self.base_url, self.database); let response = client .get(&url) @@ -244,6 +248,7 @@ impl MongoDb { pre_chunk_ids: vec![], parent_chunk_id: doc.parent_chunk_id, child_chunk_ids: doc.child_chunk_ids, + visual_stats: None, } }) .collect(); diff --git a/src/core/db/postgres_db.rs b/src/core/db/postgres_db.rs index e09b850..1de1ced 100644 --- a/src/core/db/postgres_db.rs +++ b/src/core/db/postgres_db.rs @@ -1,9 +1,11 @@ use anyhow::Result; use async_trait::async_trait; use serde::{Deserialize, Serialize}; +use serde_json::json; use sqlx::{postgres::PgPoolOptions, PgPool, Row}; use std::sync::Arc; use tokio::sync::RwLock; +use uuid::Uuid; use super::{schema, Database, QdrantDb}; use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType}; @@ -151,6 +153,7 @@ pub struct Frame { pub yolo_objects: Option, pub ocr_results: Option, pub face_results: Option, + pub pose_results: Option, pub frame_path: Option, pub created_at: String, } @@ -222,6 +225,7 @@ pub enum ProcessorType { Face, Pose, Asrx, + VisualChunk, } impl ProcessorType { @@ -234,6 +238,7 @@ impl ProcessorType { ProcessorType::Face => "face", ProcessorType::Pose => "pose", ProcessorType::Asrx => "asrx", + ProcessorType::VisualChunk => "visual_chunk", } } @@ -246,6 +251,7 @@ impl ProcessorType { "face" => Some(ProcessorType::Face), "pose" => Some(ProcessorType::Pose), "asrx" => Some(ProcessorType::Asrx), + "visual_chunk" => Some(ProcessorType::VisualChunk), _ => None, } } @@ -259,6 +265,7 @@ impl ProcessorType { ProcessorType::Face, ProcessorType::Pose, ProcessorType::Asrx, + ProcessorType::VisualChunk, ] } } @@ -428,6 +435,7 @@ impl<'a> CreateApiKeyConfig<'a> { } } +#[derive(Clone)] pub struct PostgresDb { pool: PgPool, cache: Arc>, @@ -440,6 +448,36 @@ pub struct PostgresCache { chunks: std::collections::HashMap>, } +// Result structure for semantic search +#[derive(Debug, serde::Serialize, sqlx::FromRow)] +pub struct SemanticSearchResult { + pub id: i32, + pub scene_order: i32, + pub start_time: f64, + pub end_time: f64, + pub summary: String, + pub metadata: serde_json::Value, + pub similarity: Option, +} + +/// Result structure for child chunks +#[derive(Debug, serde::Serialize, sqlx::FromRow)] +pub struct ChildChunkResult { + pub id: i32, + pub parent_id: i32, + + // Frame Data + pub start_frame: i64, + pub end_frame: i64, + pub fps: f64, + + // Time Data + pub start_time: f64, + pub end_time: f64, + pub raw_text: String, + pub speaker_ids: Vec, +} + impl PostgresDb { pub async fn new(database_url: &str) -> Result { let max_connections = std::env::var("DB_MAX_CONNECTIONS") @@ -468,11 +506,10 @@ impl PostgresDb { let schema = schema.to_string(); tracing::debug!("after_connect: setting search_path to {}", schema); Box::pin(async move { - if schema != "public" { - sqlx::query(&format!("SET search_path TO {}", schema)) - .execute(conn) - .await?; - } + // Always set search_path explicitly to avoid using default "dev, public" + sqlx::query(&format!("SET search_path TO {}", schema)) + .execute(conn) + .await?; Ok(()) }) }); @@ -491,11 +528,112 @@ impl PostgresDb { cache: Arc::new(RwLock::new(PostgresCache::default())), }; - db.init_schema().await?; + // Migrations are now handled externally or via run_migrations() + // db.init_schema().await?; Ok(db) } + /// Run database migrations (Schema initialization) + pub async fn run_migrations(pool: &PgPool) -> Result<()> { + tracing::info!("Running database migrations..."); + + // Create schema if not exists + let schema = crate::core::config::DATABASE_SCHEMA.as_str(); + if schema != "public" { + sqlx::query(&format!("CREATE SCHEMA IF NOT EXISTS {}", schema)) + .execute(pool) + .await?; + } + + // Videos + sqlx::query("CREATE TABLE IF NOT EXISTS videos (id SERIAL PRIMARY KEY, uuid VARCHAR(32) UNIQUE NOT NULL, file_path TEXT NOT NULL, file_name TEXT NOT NULL, duration DOUBLE PRECISION, width INTEGER, height INTEGER, fps DOUBLE PRECISION, probe_json TEXT, fs_video BOOLEAN DEFAULT FALSE, fs_json BOOLEAN DEFAULT FALSE, psql_chunk BOOLEAN DEFAULT FALSE, pobject_chunk BOOLEAN DEFAULT FALSE, mobject_chunk BOOLEAN DEFAULT FALSE, pvector_chunk BOOLEAN DEFAULT FALSE, qvector_chunk BOOLEAN DEFAULT FALSE, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?; + sqlx::query("CREATE INDEX IF NOT EXISTS idx_videos_uuid ON videos(uuid)") + .execute(pool) + .await?; + + // Chunks + sqlx::query("CREATE TABLE IF NOT EXISTS chunks (id SERIAL PRIMARY KEY, uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_index INTEGER NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(uuid, chunk_id))").execute(pool).await?; + sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_uuid ON chunks(uuid)") + .execute(pool) + .await?; + sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_type ON chunks(chunk_type)") + .execute(pool) + .await?; + + // Monitor Jobs + sqlx::query("CREATE TABLE IF NOT EXISTS monitor_jobs (id SERIAL PRIMARY KEY, uuid VARCHAR(16) NOT NULL, video_path VARCHAR(512), status VARCHAR(20) NOT NULL DEFAULT 'pending', current_processor VARCHAR(20), progress_total INT DEFAULT 0, progress_current INT DEFAULT 0, error_count INT DEFAULT 0, last_error TEXT, started_at TIMESTAMP, updated_at TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?; + sqlx::query("CREATE INDEX IF NOT EXISTS idx_monitor_jobs_status ON monitor_jobs(status)") + .execute(pool) + .await?; + + // Processor Results + sqlx::query("CREATE TABLE IF NOT EXISTS processor_results (id SERIAL PRIMARY KEY, job_id INTEGER, video_uuid VARCHAR(255) NOT NULL, processor VARCHAR(64), processor_type VARCHAR(64) NOT NULL, status VARCHAR(32) DEFAULT 'pending', result JSONB, error_message TEXT, started_at TIMESTAMP WITH TIME ZONE, completed_at TIMESTAMP WITH TIME ZONE, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?; + + // Talents & Identity Bindings + sqlx::query("CREATE TABLE IF NOT EXISTS talents (id BIGSERIAL PRIMARY KEY, real_name VARCHAR(255) NOT NULL UNIQUE, actor_name VARCHAR(255), voice_embedding TEXT, face_embedding TEXT, metadata JSONB DEFAULT '{}', created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?; + sqlx::query("CREATE TABLE IF NOT EXISTS identity_bindings (id BIGSERIAL PRIMARY KEY, identity_id BIGINT REFERENCES talents(id) ON DELETE CASCADE, identity_type VARCHAR(20) NOT NULL, identity_value VARCHAR(100) NOT NULL, metadata JSONB DEFAULT '{}', confidence DOUBLE PRECISION DEFAULT 1.0, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, UNIQUE(identity_id, identity_type, identity_value))").execute(pool).await?; + + // API Keys + sqlx::query("CREATE TABLE IF NOT EXISTS api_keys (id SERIAL PRIMARY KEY, key_id VARCHAR(48) UNIQUE NOT NULL, key_hash VARCHAR(64) NOT NULL, key_prefix VARCHAR(8) NOT NULL, name VARCHAR(128) NOT NULL, key_type VARCHAR(20) NOT NULL DEFAULT 'user', user_id BIGINT, service_name VARCHAR(64), permissions JSONB DEFAULT '[\"read\", \"write\"]', expires_at TIMESTAMP, last_used_at TIMESTAMP, last_used_ip VARCHAR(45), usage_count BIGINT DEFAULT 0, status VARCHAR(20) NOT NULL DEFAULT 'active', rotation_required BOOLEAN DEFAULT FALSE, rotation_reason TEXT, grace_period_end TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?; + sqlx::query("CREATE INDEX IF NOT EXISTS idx_api_keys_hash ON api_keys(key_hash)") + .execute(pool) + .await?; + + // API Key Audit Log + sqlx::query("CREATE TABLE IF NOT EXISTS api_key_audit_log (id SERIAL PRIMARY KEY, key_id VARCHAR(32) NOT NULL, action VARCHAR(50) NOT NULL, actor VARCHAR(128), ip_address VARCHAR(45), user_agent TEXT, request_path TEXT, response_code INT, anomaly_type VARCHAR(30), details JSONB, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?; + + // API Key Anomalies + sqlx::query("CREATE TABLE IF NOT EXISTS api_key_anomalies (id SERIAL PRIMARY KEY, key_id VARCHAR(32) NOT NULL, anomaly_type VARCHAR(30) NOT NULL, severity VARCHAR(10) NOT NULL, ip_address VARCHAR(45), request_count INT, error_count INT, error_rate DOUBLE PRECISION, unique_ips INT, details JSONB, resolved BOOLEAN DEFAULT FALSE, resolved_at TIMESTAMP, resolved_by VARCHAR(128), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?; + + // Gitea Tokens + sqlx::query("CREATE TABLE IF NOT EXISTS gitea_tokens (id SERIAL PRIMARY KEY, gitea_token_id BIGINT NOT NULL, gitea_user VARCHAR(128) NOT NULL, token_name VARCHAR(128) NOT NULL, token_last_eight VARCHAR(8) NOT NULL, scopes JSONB DEFAULT '[]', api_key_id VARCHAR(48), last_verified TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(gitea_user, token_name))").execute(pool).await?; + + // N8N API Keys + sqlx::query("CREATE TABLE IF NOT EXISTS n8n_api_keys (id SERIAL PRIMARY KEY, n8n_key_id VARCHAR(64) UNIQUE NOT NULL, label VARCHAR(100) NOT NULL, api_key_last_eight VARCHAR(8) NOT NULL, momentry_api_key_id VARCHAR(48), expires_at TIMESTAMP WITH TIME ZONE, last_verified TIMESTAMP WITH TIME ZONE, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP)").execute(pool).await?; + + // Search Trigger + sqlx::query( + "CREATE OR REPLACE FUNCTION update_search_vector() RETURNS TRIGGER AS $func$ + BEGIN + NEW.search_vector := to_tsvector('english', COALESCE(NEW.text_content, '')); + RETURN NEW; + END; + $func$ LANGUAGE plpgsql", + ) + .execute(pool) + .await?; + + sqlx::query("DROP TRIGGER IF EXISTS chunks_search_vector_trigger ON chunks") + .execute(pool) + .await?; + + sqlx::query( + "CREATE TRIGGER chunks_search_vector_trigger + BEFORE INSERT OR UPDATE ON chunks + FOR EACH ROW EXECUTE FUNCTION update_search_vector()", + ) + .execute(pool) + .await?; + + // Chunks Rule 1 + sqlx::query("CREATE TABLE IF NOT EXISTS chunks_rule1 (id UUID PRIMARY KEY DEFAULT gen_random_uuid(), asset_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, start_frame BIGINT NOT NULL, end_frame BIGINT NOT NULL, content TEXT NOT NULL, speaker_id VARCHAR(50), created_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?; + sqlx::query( + "CREATE INDEX IF NOT EXISTS idx_chunks_rule1_asset ON chunks_rule1(asset_uuid)", + ) + .execute(pool) + .await?; + + // Jobs (Legacy/P0) + sqlx::query("CREATE TABLE IF NOT EXISTS jobs (id UUID PRIMARY KEY, asset_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, processor_list TEXT[], assigned_processor_id UUID, rule VARCHAR(20), status VARCHAR(20) DEFAULT 'QUEUED', total_frames BIGINT DEFAULT 0, processed_frames BIGINT DEFAULT 0, error_message TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?; + sqlx::query("CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)") + .execute(pool) + .await?; + + tracing::info!("Database migrations completed."); + Ok(()) + } + /// Get a reference to the connection pool pub fn pool(&self) -> &PgPool { &self.pool @@ -584,20 +722,103 @@ impl PostgresDb { } } - pub async fn list_videos(&self) -> Result> { + pub async fn get_videos_by_uuids(&self, uuids: &[String]) -> Result> { + if uuids.is_empty() { + return Ok(vec![]); + } + + fn cached_values_to_vec( + mut map: std::collections::HashMap, + order: &[String], + ) -> Vec { + let mut results = Vec::with_capacity(order.len()); + for uuid in order { + if let Some(video) = map.remove(uuid) { + results.push(video); + } + } + results + } + + // Check cache first and collect missing uuids + let mut missing_uuids = Vec::new(); + let mut cached_results: std::collections::HashMap = + std::collections::HashMap::new(); + + { + let cache = self.cache.read().await; + for uuid in uuids { + if let Some(video) = cache.videos.get(uuid) { + cached_results.insert(uuid.clone(), video.clone()); + } else { + missing_uuids.push(uuid.clone()); + } + } + } + + if missing_uuids.is_empty() { + let mut results: Vec = cached_values_to_vec(cached_results, uuids); + return Ok(results); + } + + // Batch query for missing uuids + let placeholders: Vec = missing_uuids + .iter() + .enumerate() + .map(|(i, _)| format!("${}", i + 1)) + .collect(); let table = schema::table_name("videos"); + let sql = format!( + "SELECT id, uuid, file_path, file_name, duration, width, height, fps, probe_json, fs_video, fs_json, psql_chunk, pobject_chunk, mobject_chunk, pvector_chunk, qvector_chunk, status, user_id, job_id FROM {} WHERE uuid = ANY($1)", + table + ); + + let rows = sqlx::query_as::<_, VideoRow>(&sql) + .bind(&missing_uuids) + .fetch_all(&self.pool) + .await?; + + let mut videos: std::collections::HashMap = + std::collections::HashMap::new(); + for row in rows.into_iter() { + let video: VideoRecord = row.into(); + videos.insert(video.uuid.clone(), video.clone()); + // Update cache + let mut cache = self.cache.write().await; + cache.videos.insert(video.uuid.clone(), video); + } + + // Merge cached and fetched results + cached_results.extend(videos); + let mut results: Vec = cached_values_to_vec(cached_results, uuids); + + Ok(results) + } + + pub async fn list_videos(&self, limit: i32, offset: i64) -> Result<(Vec, i64)> { + let table = schema::table_name("videos"); + + // Count total + let count: Option = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", table)) + .fetch_one(&self.pool) + .await?; + let total = count.unwrap_or(0); + + // Select paged let rows = sqlx::query_as::<_, VideoRow>( &format!( - "SELECT id, uuid, file_path, file_name, duration, width, height, fps, probe_json, fs_video, fs_json, psql_chunk, pobject_chunk, mobject_chunk, pvector_chunk, qvector_chunk, status, user_id, job_id FROM {} ORDER BY id DESC", + "SELECT id, uuid, file_path, file_name, duration, width, height, fps, probe_json, fs_video, fs_json, psql_chunk, pobject_chunk, mobject_chunk, pvector_chunk, qvector_chunk, status, user_id, job_id FROM {} ORDER BY id DESC LIMIT $1 OFFSET $2", table ) ) + .bind(limit) + .bind(offset) .fetch_all(&self.pool) .await?; let videos: Vec = rows.into_iter().map(|r| r.into()).collect(); - Ok(videos) + Ok((videos, total)) } pub async fn update_storage_status(&self, uuid: &str, field: &str, value: bool) -> Result<()> { @@ -1404,480 +1625,6 @@ impl PostgresDb { Ok(()) } - async fn init_schema(&self) -> Result<()> { - sqlx::query( - r#" - CREATE TABLE IF NOT EXISTS videos ( - id SERIAL PRIMARY KEY, - uuid VARCHAR(32) UNIQUE NOT NULL, - file_path TEXT NOT NULL, - file_name TEXT NOT NULL, - duration DOUBLE PRECISION, - width INTEGER, - height INTEGER, - fps DOUBLE PRECISION, - probe_json TEXT, - fs_video BOOLEAN DEFAULT FALSE, - fs_json BOOLEAN DEFAULT FALSE, - psql_chunk BOOLEAN DEFAULT FALSE, - pobject_chunk BOOLEAN DEFAULT FALSE, - mobject_chunk BOOLEAN DEFAULT FALSE, - pvector_chunk BOOLEAN DEFAULT FALSE, - qvector_chunk BOOLEAN DEFAULT FALSE, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - "#, - ) - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_videos_uuid ON videos(uuid)") - .execute(&self.pool) - .await?; - - sqlx::query("ALTER TABLE videos ADD COLUMN IF NOT EXISTS fs_video BOOLEAN DEFAULT FALSE") - .execute(&self.pool) - .await?; - sqlx::query("ALTER TABLE videos ADD COLUMN IF NOT EXISTS fs_json BOOLEAN DEFAULT FALSE") - .execute(&self.pool) - .await?; - sqlx::query("ALTER TABLE videos ADD COLUMN IF NOT EXISTS psql_chunk BOOLEAN DEFAULT FALSE") - .execute(&self.pool) - .await?; - sqlx::query( - "ALTER TABLE videos ADD COLUMN IF NOT EXISTS pobject_chunk BOOLEAN DEFAULT FALSE", - ) - .execute(&self.pool) - .await?; - sqlx::query( - "ALTER TABLE videos ADD COLUMN IF NOT EXISTS mobject_chunk BOOLEAN DEFAULT FALSE", - ) - .execute(&self.pool) - .await?; - sqlx::query( - "ALTER TABLE videos ADD COLUMN IF NOT EXISTS pvector_chunk BOOLEAN DEFAULT FALSE", - ) - .execute(&self.pool) - .await?; - sqlx::query( - "ALTER TABLE videos ADD COLUMN IF NOT EXISTS qvector_chunk BOOLEAN DEFAULT FALSE", - ) - .execute(&self.pool) - .await?; - - sqlx::query( - r#" - CREATE TABLE IF NOT EXISTS chunks ( - id SERIAL PRIMARY KEY, - uuid VARCHAR(32) NOT NULL, - chunk_id VARCHAR(64) NOT NULL, - chunk_index INTEGER NOT NULL, - chunk_type VARCHAR(32) NOT NULL, - start_time DOUBLE PRECISION NOT NULL, - end_time DOUBLE PRECISION NOT NULL, - fps DOUBLE PRECISION DEFAULT 24.0, - start_frame BIGINT DEFAULT 0, - end_frame BIGINT DEFAULT 0, - content JSONB NOT NULL, - metadata JSONB, - vector_id VARCHAR(64), - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - UNIQUE(uuid, chunk_id) - ) - "#, - ) - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_uuid ON chunks(uuid)") - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_type ON chunks(chunk_type)") - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_time ON chunks(start_time, end_time)") - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_uuid_type ON chunks(uuid, chunk_type)") - .execute(&self.pool) - .await?; - - sqlx::query( - "CREATE INDEX IF NOT EXISTS idx_chunks_content_gin ON chunks USING GIN(content)", - ) - .execute(&self.pool) - .await?; - - sqlx::query( - r#" - CREATE TABLE IF NOT EXISTS chunk_vectors ( - id SERIAL PRIMARY KEY, - chunk_id VARCHAR(64) NOT NULL UNIQUE, - uuid VARCHAR(32) NOT NULL, - chunk_type VARCHAR(32) NOT NULL, - start_time DOUBLE PRECISION, - end_time DOUBLE PRECISION, - embedding TEXT, - metadata JSONB, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - "#, - ) - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunk_vectors_uuid ON chunk_vectors(uuid)") - .execute(&self.pool) - .await?; - - sqlx::query( - "CREATE INDEX IF NOT EXISTS idx_chunk_vectors_chunk_id ON chunk_vectors(chunk_id)", - ) - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunk_vectors_uuid ON chunk_vectors(uuid)") - .execute(&self.pool) - .await?; - - sqlx::query( - "CREATE INDEX IF NOT EXISTS idx_chunk_vectors_chunk_id ON chunk_vectors(chunk_id)", - ) - .execute(&self.pool) - .await?; - - // pre_chunks table - sqlx::query( - r#" - CREATE TABLE IF NOT EXISTS pre_chunks ( - id SERIAL PRIMARY KEY, - file_id INTEGER NOT NULL REFERENCES videos(id), - source_type VARCHAR(32) NOT NULL, - source_file TEXT, - chunk_type VARCHAR(32) NOT NULL, - start_time DOUBLE PRECISION NOT NULL, - end_time DOUBLE PRECISION NOT NULL, - start_frame BIGINT DEFAULT 0, - end_frame BIGINT DEFAULT 0, - fps DOUBLE PRECISION DEFAULT 24.0, - raw_json JSONB NOT NULL, - text_content TEXT, - processed BOOLEAN DEFAULT FALSE, - chunk_id VARCHAR(64), - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - UNIQUE(file_id, source_type, start_frame, end_frame) - ) - "#, - ) - .execute(&self.pool) - .await?; - - // frames table - sqlx::query( - r#" - CREATE TABLE IF NOT EXISTS frames ( - id SERIAL PRIMARY KEY, - file_id INTEGER NOT NULL REFERENCES videos(id), - frame_number BIGINT NOT NULL, - timestamp DOUBLE PRECISION NOT NULL, - fps DOUBLE PRECISION DEFAULT 24.0, - yolo_objects JSONB, - ocr_results JSONB, - face_results JSONB, - frame_path TEXT, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - UNIQUE(file_id, frame_number) - ) - "#, - ) - .execute(&self.pool) - .await?; - - // Add file_id columns to existing tables if not exist - sqlx::query( - "ALTER TABLE chunks ADD COLUMN IF NOT EXISTS file_id INTEGER REFERENCES videos(id)", - ) - .execute(&self.pool) - .await?; - sqlx::query("ALTER TABLE chunks ADD COLUMN IF NOT EXISTS text_content TEXT") - .execute(&self.pool) - .await?; - sqlx::query("ALTER TABLE chunks ADD COLUMN IF NOT EXISTS frame_count INTEGER DEFAULT 0") - .execute(&self.pool) - .await?; - sqlx::query("ALTER TABLE chunks ADD COLUMN IF NOT EXISTS pre_chunk_ids INTEGER[]") - .execute(&self.pool) - .await?; - sqlx::query("ALTER TABLE chunks ADD COLUMN IF NOT EXISTS parent_chunk_id VARCHAR(64)") - .execute(&self.pool) - .await?; - sqlx::query("ALTER TABLE chunks ADD COLUMN IF NOT EXISTS child_chunk_ids TEXT[]") - .execute(&self.pool) - .await?; - - sqlx::query("ALTER TABLE chunks ADD COLUMN IF NOT EXISTS search_vector TSVECTOR") - .execute(&self.pool) - .await?; - - sqlx::query( - "CREATE INDEX IF NOT EXISTS idx_chunks_search_vector ON chunks USING GIN(search_vector)", - ) - .execute(&self.pool) - .await?; - - sqlx::query( - "ALTER TABLE chunks ADD COLUMN IF NOT EXISTS fps DOUBLE PRECISION DEFAULT 24.0", - ) - .execute(&self.pool) - .await?; - - sqlx::query("ALTER TABLE chunks ADD COLUMN IF NOT EXISTS start_frame BIGINT DEFAULT 0") - .execute(&self.pool) - .await?; - - sqlx::query("ALTER TABLE chunks ADD COLUMN IF NOT EXISTS end_frame BIGINT DEFAULT 0") - .execute(&self.pool) - .await?; - - sqlx::query("ALTER TABLE chunks ADD COLUMN IF NOT EXISTS metadata JSONB") - .execute(&self.pool) - .await?; - - sqlx::query( - "ALTER TABLE chunks ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ DEFAULT NOW()", - ) - .execute(&self.pool) - .await?; - - sqlx::query( - "CREATE OR REPLACE FUNCTION update_search_vector() RETURNS TRIGGER AS $func$ - BEGIN - NEW.search_vector := to_tsvector('english', COALESCE(NEW.text_content, '')); - RETURN NEW; - END; - $func$ LANGUAGE plpgsql", - ) - .execute(&self.pool) - .await?; - - sqlx::query("DROP TRIGGER IF EXISTS chunks_search_vector_trigger ON chunks") - .execute(&self.pool) - .await?; - - sqlx::query( - "CREATE TRIGGER chunks_search_vector_trigger - BEFORE INSERT OR UPDATE ON chunks - FOR EACH ROW EXECUTE FUNCTION update_search_vector()", - ) - .execute(&self.pool) - .await?; - - sqlx::query("ALTER TABLE chunk_vectors ADD COLUMN IF NOT EXISTS file_id INTEGER REFERENCES videos(id)") - .execute(&self.pool) - .await?; - - sqlx::query( - r#" - CREATE TABLE IF NOT EXISTS monitor_jobs ( - id SERIAL PRIMARY KEY, - uuid VARCHAR(16) NOT NULL, - video_path VARCHAR(512), - status VARCHAR(20) NOT NULL DEFAULT 'pending', - current_processor VARCHAR(20), - progress_total INT DEFAULT 0, - progress_current INT DEFAULT 0, - error_count INT DEFAULT 0, - last_error TEXT, - started_at TIMESTAMP, - updated_at TIMESTAMP, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - "#, - ) - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_monitor_jobs_uuid ON monitor_jobs(uuid)") - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_monitor_jobs_status ON monitor_jobs(status)") - .execute(&self.pool) - .await?; - - sqlx::query( - "CREATE INDEX IF NOT EXISTS idx_monitor_jobs_created_at ON monitor_jobs(created_at)", - ) - .execute(&self.pool) - .await?; - - sqlx::query( - r#" - CREATE TABLE IF NOT EXISTS api_keys ( - id SERIAL PRIMARY KEY, - key_id VARCHAR(48) UNIQUE NOT NULL, - key_hash VARCHAR(64) NOT NULL, - key_prefix VARCHAR(8) NOT NULL, - name VARCHAR(128) NOT NULL, - key_type VARCHAR(20) NOT NULL DEFAULT 'user', - user_id BIGINT, - service_name VARCHAR(64), - permissions JSONB DEFAULT '["read", "write"]', - expires_at TIMESTAMP, - last_used_at TIMESTAMP, - last_used_ip VARCHAR(45), - usage_count BIGINT DEFAULT 0, - status VARCHAR(20) NOT NULL DEFAULT 'active', - rotation_required BOOLEAN DEFAULT FALSE, - rotation_reason TEXT, - grace_period_end TIMESTAMP, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - "#, - ) - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_api_keys_key_id ON api_keys(key_id)") - .execute(&self.pool) - .await?; - sqlx::query("CREATE INDEX IF NOT EXISTS idx_api_keys_hash ON api_keys(key_hash)") - .execute(&self.pool) - .await?; - sqlx::query("CREATE INDEX IF NOT EXISTS idx_api_keys_type ON api_keys(key_type)") - .execute(&self.pool) - .await?; - sqlx::query("CREATE INDEX IF NOT EXISTS idx_api_keys_status ON api_keys(status)") - .execute(&self.pool) - .await?; - - sqlx::query( - r#" - CREATE TABLE IF NOT EXISTS api_key_audit_log ( - id SERIAL PRIMARY KEY, - key_id VARCHAR(32) NOT NULL, - action VARCHAR(50) NOT NULL, - actor VARCHAR(128), - ip_address VARCHAR(45), - user_agent TEXT, - request_path TEXT, - response_code INT, - anomaly_type VARCHAR(30), - details JSONB, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - "#, - ) - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_audit_key_id ON api_key_audit_log(key_id)") - .execute(&self.pool) - .await?; - sqlx::query("CREATE INDEX IF NOT EXISTS idx_audit_action ON api_key_audit_log(action)") - .execute(&self.pool) - .await?; - sqlx::query( - "CREATE INDEX IF NOT EXISTS idx_audit_created_at ON api_key_audit_log(created_at)", - ) - .execute(&self.pool) - .await?; - - sqlx::query( - r#" - CREATE TABLE IF NOT EXISTS api_key_anomalies ( - id SERIAL PRIMARY KEY, - key_id VARCHAR(32) NOT NULL, - anomaly_type VARCHAR(30) NOT NULL, - severity VARCHAR(10) NOT NULL, - ip_address VARCHAR(45), - request_count INT, - error_count INT, - error_rate DOUBLE PRECISION, - unique_ips INT, - details JSONB, - resolved BOOLEAN DEFAULT FALSE, - resolved_at TIMESTAMP, - resolved_by VARCHAR(128), - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - "#, - ) - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_anomalies_key_id ON api_key_anomalies(key_id)") - .execute(&self.pool) - .await?; - sqlx::query( - "CREATE INDEX IF NOT EXISTS idx_anomalies_resolved ON api_key_anomalies(resolved)", - ) - .execute(&self.pool) - .await?; - - sqlx::query( - r#" - CREATE TABLE IF NOT EXISTS gitea_tokens ( - id SERIAL PRIMARY KEY, - gitea_token_id BIGINT NOT NULL, - gitea_user VARCHAR(128) NOT NULL, - token_name VARCHAR(128) NOT NULL, - token_last_eight VARCHAR(8) NOT NULL, - scopes JSONB DEFAULT '[]', - api_key_id VARCHAR(48), - last_verified TIMESTAMP, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - UNIQUE(gitea_user, token_name) - ) - "#, - ) - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_gitea_tokens_user ON gitea_tokens(gitea_user)") - .execute(&self.pool) - .await?; - sqlx::query( - "CREATE INDEX IF NOT EXISTS idx_gitea_tokens_key_id ON gitea_tokens(api_key_id)", - ) - .execute(&self.pool) - .await?; - - sqlx::query( - r#" - CREATE TABLE IF NOT EXISTS n8n_api_keys ( - id SERIAL PRIMARY KEY, - n8n_key_id VARCHAR(64) UNIQUE NOT NULL, - label VARCHAR(100) NOT NULL, - api_key_last_eight VARCHAR(8) NOT NULL, - momentry_api_key_id VARCHAR(48), - expires_at TIMESTAMP WITH TIME ZONE, - last_verified TIMESTAMP WITH TIME ZONE, - created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP - ) - "#, - ) - .execute(&self.pool) - .await?; - - sqlx::query("CREATE INDEX IF NOT EXISTS idx_n8n_api_keys_label ON n8n_api_keys(label)") - .execute(&self.pool) - .await?; - sqlx::query( - "CREATE INDEX IF NOT EXISTS idx_n8n_api_keys_key_id ON n8n_api_keys(momentry_api_key_id)", - ) - .execute(&self.pool) - .await?; - - Ok(()) - } - pub async fn store_chunk(&self, chunk: &Chunk) -> Result<()> { let table = schema::table_name("chunks"); let content_with_rule = serde_json::json!({ @@ -1955,7 +1702,7 @@ impl PostgresDb { pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result> { let table = schema::table_name("chunks"); let rows = sqlx::query(&format!( - "SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM {} WHERE uuid = $1 ORDER BY chunk_index", + "SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE uuid = $1 ORDER BY chunk_index", table )) .bind(uuid) @@ -2021,6 +1768,7 @@ impl PostgresDb { pre_chunk_ids, parent_chunk_id, child_chunk_ids, + visual_stats: r.try_get("visual_stats").ok().flatten(), } }) .collect(); @@ -2028,78 +1776,6 @@ impl PostgresDb { Ok(chunks) } - pub async fn get_chunk_by_chunk_id(&self, chunk_id: &str) -> Result> { - let table = schema::table_name("chunks"); - let row = sqlx::query(&format!( - "SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM {} WHERE chunk_id = $1", - table - )) - .bind(chunk_id) - .fetch_optional(&self.pool) - .await?; - - if let Some(r) = row { - let chunk_type_str: String = r.get(4); - let chunk_index: i32 = r.get(3); - let chunk_type = match chunk_type_str.as_str() { - "time" => ChunkType::TimeBased, - "sentence" => ChunkType::Sentence, - "cut" => ChunkType::Cut, - "trace" => ChunkType::Trace, - "story" => ChunkType::Story, - _ => ChunkType::TimeBased, - }; - - let content: serde_json::Value = r.get(9); - let metadata: Option = r.get(10); - - let pre_chunk_ids: Vec = r.try_get(13).unwrap_or_default(); - let parent_chunk_id: Option = r.try_get(14).ok().flatten(); - let child_chunk_ids: Vec = r.try_get(15).unwrap_or_default(); - - let (rule, content_data) = if content.get("rule").is_some() { - let rule_str = content - .get("rule") - .and_then(|v| v.as_str()) - .unwrap_or("rule_1"); - let rule = if rule_str == "rule_2" { - ChunkRule::Rule2 - } else { - ChunkRule::Rule1 - }; - let data = content.get("data").cloned().unwrap_or(content); - (rule, data) - } else { - (ChunkRule::Rule1, content) - }; - - let file_id: i32 = sqlx::Row::get(&r, "file_id"); - let frame_count: i32 = sqlx::Row::get(&r, "frame_count"); - - Ok(Some(Chunk { - file_id, - uuid: r.get("uuid"), - chunk_id: r.get("chunk_id"), - chunk_index: chunk_index as u32, - chunk_type, - rule, - fps: r.get("fps"), - start_frame: r.get("start_frame"), - end_frame: r.get("end_frame"), - text_content: r.get("text_content"), - content: content_data, - metadata, - vector_id: r.get("vector_id"), - frame_count, - pre_chunk_ids, - parent_chunk_id, - child_chunk_ids, - })) - } else { - Ok(None) - } - } - pub async fn get_chunk_by_chunk_id_and_uuid( &self, chunk_id: &str, @@ -2107,13 +1783,13 @@ impl PostgresDb { ) -> Result> { let table = schema::table_name("chunks"); let row = sqlx::query(&format!( - "SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM {} WHERE chunk_id = $1 AND uuid = $2", + "SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE chunk_id = $1 AND uuid = $2", table )) - .bind(chunk_id) - .bind(uuid) - .fetch_optional(&self.pool) - .await?; + .bind(chunk_id) + .bind(uuid) + .fetch_optional(&self.pool) + .await?; if let Some(r) = row { let chunk_type_str: String = r.get(4); @@ -2171,12 +1847,42 @@ impl PostgresDb { pre_chunk_ids, parent_chunk_id, child_chunk_ids, + visual_stats: r.try_get("visual_stats").ok().flatten(), })) } else { Ok(None) } } + /// Fetches metadata (including 5W1H Plus) from the parent_chunks table + pub async fn get_parent_chunk_metadata( + &self, + parent_id: i32, + ) -> Result> { + let query = "SELECT metadata FROM parent_chunks WHERE id = $1"; + let row: Option<(Option,)> = sqlx::query_as(query) + .bind(parent_id) + .fetch_optional(&self.pool) + .await?; + + Ok(row.map(|r| r.0).flatten()) + } + + /// Fetches extended details from parent_chunks including summary and metadata + pub async fn get_parent_chunk_detail( + &self, + parent_id: i32, + ) -> Result, Option)>> { + let query = "SELECT summary_text, metadata FROM parent_chunks WHERE id = $1"; + eprintln!("[DBG] get_parent_chunk_detail: pid={}", parent_id); + let row: Option<(Option, Option)> = sqlx::query_as(query) + .bind(parent_id) + .fetch_optional(&self.pool) + .await?; + eprintln!("[DBG] get_parent_chunk_detail result: {:?}", row); + Ok(row) + } + pub async fn store_pre_chunk(&self, pre_chunk: &PreChunk) -> Result { let table = schema::table_name("pre_chunks"); let row = sqlx::query(&format!( @@ -2219,12 +1925,13 @@ impl PostgresDb { let table = schema::table_name("frames"); sqlx::query(&format!( r#" - INSERT INTO {} (file_id, frame_number, timestamp, fps, yolo_objects, ocr_results, face_results, frame_path) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + INSERT INTO {} (file_id, frame_number, timestamp, fps, yolo_objects, ocr_results, face_results, pose_results, frame_path) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) ON CONFLICT (file_id, frame_number) DO UPDATE SET yolo_objects = EXCLUDED.yolo_objects, ocr_results = EXCLUDED.ocr_results, face_results = EXCLUDED.face_results, + pose_results = EXCLUDED.pose_results, frame_path = EXCLUDED.frame_path "#, table @@ -2236,6 +1943,7 @@ impl PostgresDb { .bind(&frame.yolo_objects) .bind(&frame.ocr_results) .bind(&frame.face_results) + .bind(&frame.pose_results) .bind(&frame.frame_path) .execute(&self.pool) .await?; @@ -2259,10 +1967,11 @@ impl PostgresDb { Option, Option, Option, + Option, Option, String, )>(&format!( - "SELECT id, file_id, frame_number, timestamp, fps, yolo_objects, ocr_results, face_results, frame_path, created_at + "SELECT id, file_id, frame_number, timestamp, fps, yolo_objects, ocr_results, face_results, pose_results, frame_path, created_at FROM {} WHERE file_id = $1 AND timestamp >= $2 AND timestamp <= $3 ORDER BY frame_number", @@ -2285,8 +1994,9 @@ impl PostgresDb { yolo_objects: r.5, ocr_results: r.6, face_results: r.7, - frame_path: r.8, - created_at: r.9, + pose_results: r.8, + frame_path: r.9, + created_at: r.10, }) .collect(); @@ -2301,7 +2011,7 @@ impl PostgresDb { ) -> Result> { let table = schema::table_name("chunks"); let rows = sqlx::query(&format!( - "SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids + "SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids FROM {} WHERE file_id = $1 AND start_time >= $2 AND end_time <= $3 ORDER BY start_time", @@ -2372,6 +2082,7 @@ impl PostgresDb { pre_chunk_ids, parent_chunk_id, child_chunk_ids, + visual_stats: r.try_get("visual_stats").ok().flatten(), } }) .collect(); @@ -2386,7 +2097,7 @@ impl PostgresDb { let table = schema::table_name("chunks"); let rows = sqlx::query(&format!( - "SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM {} WHERE chunk_id = ANY($1) ORDER BY chunk_index", + "SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids FROM {} WHERE chunk_id = ANY($1) ORDER BY chunk_index", table )) .bind(chunk_ids) @@ -2452,6 +2163,7 @@ impl PostgresDb { pre_chunk_ids, parent_chunk_id, child_chunk_ids, + visual_stats: r.try_get("visual_stats").ok().flatten(), } }) .collect(); @@ -2608,6 +2320,7 @@ impl PostgresDb { pre_chunk_ids: vec![], parent_chunk_id: r.12, child_chunk_ids: r.13, + visual_stats: None, } }) .collect(); @@ -2622,15 +2335,19 @@ impl PostgresDb { limit: usize, ) -> Result> { let table = schema::table_name("chunks"); - let tsquery = self.prepare_tsquery(query)?; + let tsquery = self.prepare_tsquery(query).await?; let sql = match uuid { Some(_) => &format!( r#" - SELECT chunk_id, uuid, chunk_index, chunk_type, start_time, end_time, - text_content, ts_rank_cd(search_vector, $1::tsquery) as bm25_score - FROM {} - WHERE search_vector @@ $1::tsquery AND uuid = $2 + SELECT c.chunk_id, c.uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time, + c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score, + c.visual_stats, + pc.metadata->'structured_summary' as scene_summary, + c.parent_chunk_id::integer + FROM {} c + LEFT JOIN parent_chunks pc ON c.parent_chunk_id = pc.id::varchar + WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1)) AND c.uuid = $2 ORDER BY bm25_score DESC LIMIT $3 "#, @@ -2638,10 +2355,14 @@ impl PostgresDb { ), None => &format!( r#" - SELECT chunk_id, uuid, chunk_index, chunk_type, start_time, end_time, - text_content, ts_rank_cd(search_vector, $1::tsquery) as bm25_score - FROM {} - WHERE search_vector @@ $1::tsquery + SELECT c.chunk_id, c.uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time, + c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score, + c.visual_stats, + pc.metadata->'structured_summary' as scene_summary, + c.parent_chunk_id::integer + FROM {} c + LEFT JOIN parent_chunks pc ON c.parent_chunk_id = pc.id::varchar + WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1)) ORDER BY bm25_score DESC LIMIT $2 "#, @@ -2649,32 +2370,65 @@ impl PostgresDb { ), }; - let rows = if let Some(uuid) = uuid { - sqlx::query_as::<_, (String, String, i32, String, f64, f64, Option, f32)>(sql) - .bind(&tsquery) - .bind(uuid) - .bind(limit as i64) - .fetch_all(&self.pool) - .await? - } else { - sqlx::query_as::<_, (String, String, i32, String, f64, f64, Option, f32)>(sql) - .bind(&tsquery) - .bind(limit as i64) - .fetch_all(&self.pool) - .await? + let rows: Vec<( + String, + String, + i32, + String, + i64, + i64, + f64, + f64, + f64, + Option, + f32, + Option, + Option, + Option, + )> = match uuid { + Some(_) => { + sqlx::query_as(sql) + .bind(&tsquery) + .bind(uuid) + .bind(limit as i64) + .fetch_all(&self.pool) + .await? + } + None => { + sqlx::query_as(sql) + .bind(&tsquery) + .bind(limit as i64) + .fetch_all(&self.pool) + .await? + } }; let results: Vec = rows .into_iter() - .map(|r| Bm25Result { - chunk_id: r.0, - uuid: r.1, - chunk_index: r.2 as u32, - chunk_type: r.3, - start_time: r.4, - end_time: r.5, - text: r.6.unwrap_or_default(), - bm25_score: r.7, + .map(|r| { + let scene_summary: Option = + r.12.as_ref() + .and_then(|v| serde_json::from_value(v.clone()).ok()); + + Bm25Result { + chunk_id: r.0, + uuid: r.1, + chunk_index: r.2 as u32, + chunk_type: r.3, + start_frame: r.4, + end_frame: r.5, + fps: r.6, + start_time: r.7, + end_time: r.8, + text: r.9.unwrap_or_default(), + bm25_score: r.10, + visual_stats: r.11, + scene_summary: r + .12 + .as_ref() + .and_then(|v| serde_json::from_value(v.clone()).ok()), + parent_chunk_id: r.13, + } }) .collect(); @@ -2727,12 +2481,17 @@ impl PostgresDb { uuid: r.uuid.clone(), chunk_index: r.chunk_index, chunk_type: r.chunk_type.clone(), + start_frame: r.start_frame, + end_frame: r.end_frame, + fps: r.fps, start_time: r.start_time, end_time: r.end_time, text: r.text.clone(), vector_score: 0.0, bm25_score: normalized_score as f64, combined_score, + parent_chunk_id: r.parent_chunk_id, + visual_stats: r.visual_stats.clone(), }, ); } @@ -2766,6 +2525,9 @@ impl PostgresDb { existing.combined_score += combined_score; } else { let chunk_data = chunk_map.get(&(r.chunk_id.clone(), r.uuid.clone())); + let parent_chunk_id = chunk_data + .as_ref() + .and_then(|c| c.parent_chunk_id.as_ref().and_then(|s| s.parse().ok())); combined.insert( (r.chunk_id.clone(), r.uuid.clone()), HybridSearchResult { @@ -2775,6 +2537,9 @@ impl PostgresDb { chunk_type: chunk_data .map(|c| c.chunk_type.as_str().to_string()) .unwrap_or_default(), + start_frame: chunk_data.map(|c| c.start_frame).unwrap_or(0), + end_frame: chunk_data.map(|c| c.end_frame).unwrap_or(0), + fps: chunk_data.map(|c| c.fps).unwrap_or(0.0), start_time: chunk_data.map(|c| c.start_time().seconds()).unwrap_or(0.0), end_time: chunk_data.map(|c| c.end_time().seconds()).unwrap_or(0.0), text: chunk_data @@ -2783,6 +2548,8 @@ impl PostgresDb { vector_score: normalized_score as f64, bm25_score: 0.0, combined_score, + parent_chunk_id, + visual_stats: chunk_data.and_then(|c| c.visual_stats.clone()), }, ); } @@ -2799,8 +2566,13 @@ impl PostgresDb { Ok(results) } - fn prepare_tsquery_internal(&self, query: &str) -> Result { + pub async fn prepare_tsquery(&self, query: &str) -> Result { + self.prepare_tsquery_internal_async(query).await + } + + async fn prepare_tsquery_internal_async(&self, query: &str) -> Result { let expander = global_synonym_expander(); + let online_expander = crate::core::text::global_online_expander(); // 對中文查詢進行特殊處理 let processed_query = if contains_chinese(query) { @@ -2818,7 +2590,48 @@ impl PostgresDb { tokenize_chinese_text(&expanded) } } else { - query.to_string() + // 對英文查詢:直接使用原始查詢詞,不做同義詞擴展 + // BM25 適合精確匹配,同義詞擴展會導致過多噪音 + // 需要同義詞擴展時應使用 Vector 或 Hybrid 模式 + let words: Vec<&str> = query.split_whitespace().collect(); + let mut cleaned_words: Vec = Vec::new(); + + // 英文停用詞 + let stop_words: std::collections::HashSet<&str> = [ + "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", + "by", "from", "is", "are", "was", "were", "be", "been", "being", "have", "has", + "had", "do", "does", "did", "will", "would", "could", "should", "may", "might", + "can", "shall", "it", "its", "this", "that", "these", "those", "i", "you", "he", + "she", "we", "they", "me", "him", "her", "us", "them", "my", "your", "his", "our", + "their", "what", "which", "who", "whom", "whose", "where", "when", "why", "how", + "not", "no", "so", "if", "then", "than", "too", "very", "just", "about", "up", + "out", "into", "over", "after", "before", "between", "under", "again", "further", + "once", "here", "there", "all", "each", "few", "more", "most", "other", "some", + "such", "only", "own", "same", "also", "back", "down", "off", "above", "below", + "during", "through", "while", "until", "whether", + ] + .iter() + .cloned() + .collect(); + + for word in words { + let cleaned = word + .chars() + .filter(|c| c.is_alphanumeric()) + .collect::() + .to_lowercase(); + + if !cleaned.is_empty() && !stop_words.contains(cleaned.as_str()) { + cleaned_words.push(format!("{}:*", cleaned)); + } + } + + if cleaned_words.is_empty() { + return Ok("__no_match__:*".to_string()); + } + + // 使用 & 連接所有詞 (AND 邏輯),加上前綴匹配 + return Ok(cleaned_words.join(" & ")); }; // 解析查詢字符串,處理同義詞組 @@ -2954,52 +2767,136 @@ impl PostgresDb { } } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SceneSummary { + #[serde(rename = "summary_5lines")] + pub summary: String, + pub who: String, + pub what: String, + pub r#where: String, + pub when: Option, + pub why: String, + pub how: String, + pub tone: Vec, + pub characters: Vec, + pub key_events: Vec, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Bm25Result { pub chunk_id: String, pub uuid: String, pub chunk_index: u32, pub chunk_type: String, + pub start_frame: i64, + pub end_frame: i64, + pub fps: f64, pub start_time: f64, pub end_time: f64, pub text: String, pub bm25_score: f32, + pub parent_chunk_id: Option, + pub visual_stats: Option, + pub scene_summary: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HybridSearchResult { - pub chunk_id: String, pub uuid: String, + pub chunk_id: String, pub chunk_index: u32, pub chunk_type: String, + pub start_frame: i64, + pub end_frame: i64, + pub fps: f64, pub start_time: f64, pub end_time: f64, pub text: String, pub vector_score: f64, pub bm25_score: f64, pub combined_score: f64, + pub parent_chunk_id: Option, + pub visual_stats: Option, } impl PostgresDb { - pub fn prepare_tsquery(&self, query: &str) -> Result { - self.prepare_tsquery_internal(query) + /// Search person_identities for n8n Who Search + pub async fn search_person_candidates( + &self, + query: &str, + uuid: &Option, + limit: i32, + ) -> Result> { + let person_identities = schema::table_name("person_identities"); + let search_query = format!("%{}%", query); + + let sql = match uuid { + Some(_) => &format!( + "SELECT person_id, name, appearance_count, video_uuid, created_at + FROM {} WHERE name ILIKE $1 AND video_uuid = $2 + ORDER BY appearance_count DESC LIMIT $3", + person_identities + ), + None => &format!( + "SELECT person_id, name, appearance_count, video_uuid, created_at + FROM {} WHERE name ILIKE $1 + ORDER BY appearance_count DESC LIMIT $2", + person_identities + ), + }; + + let rows: Vec<( + String, + String, + i32, + String, + Option>, + )> = match uuid { + Some(_) => { + sqlx::query_as(sql) + .bind(&search_query) + .bind(uuid.as_ref().unwrap()) + .bind(limit as i64) + .fetch_all(&self.pool) + .await? + } + None => { + sqlx::query_as(sql) + .bind(&search_query) + .bind(limit as i64) + .fetch_all(&self.pool) + .await? + } + }; + + let results: Vec = rows + .into_iter() + .map(|r| { + serde_json::json!({ + "person_id": r.0, + "name": r.1, + "appearance_count": r.2, + "video_uuid": r.3, + "created_at": r.4.map(|t| t.to_string()) + }) + }) + .collect(); + + Ok(results) } pub async fn get_pending_jobs(&self, limit: i32) -> Result> { let monitor_jobs = schema::table_name("monitor_jobs"); - let processor_results = schema::table_name("processor_results"); let rows = sqlx::query(&format!( r#" SELECT id, uuid, video_path, status, current_processor, progress_total, progress_current, error_count, last_error, started_at, updated_at, created_at FROM {} - WHERE status = 'pending' - OR (status = 'running' AND EXISTS (SELECT 1 FROM {} WHERE job_id = monitor_jobs.id AND status = 'pending')) + WHERE status IN ('pending', 'running') ORDER BY created_at ASC LIMIT $1 - FOR UPDATE SKIP LOCKED "#, - monitor_jobs, processor_results + monitor_jobs )) .bind(limit) .fetch_all(&self.pool) @@ -3219,10 +3116,10 @@ impl PostgresDb { .map(|r| { let status_str: String = r.get(3); let processor_type_str: String = r.get(2); - let started_at: Option = r.get(5); - let completed_at: Option = r.get(6); - let created_at: chrono::NaiveDateTime = r.get(11); - let updated_at: Option = r.get(12); + let started_at: Option> = r.get(5); + let completed_at: Option> = r.get(6); + let created_at: chrono::DateTime = r.get(11); + let updated_at: Option> = r.get(12); ProcessorResult { id: r.get(0), job_id: r.get(1), @@ -3279,6 +3176,506 @@ impl PostgresDb { .await?; Ok(count) } + + // ========================================== + // 身份綁定系統 (Identity Binding V5) + // ========================================== + + /// 獲取或創建 Identity + pub async fn get_or_create_identity( + &self, + name: &str, + ) -> Result { + let identity = sqlx::query_as::<_, crate::core::person_identity::Identity>( + r#"INSERT INTO identities (name) VALUES ($1) ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name RETURNING id, name, embedding::text, metadata, created_at"#, + ) + .bind(name) + .fetch_one(&self.pool) + .await?; + Ok(identity) + } + + /// 綁定身份 + pub async fn bind_identity( + &self, + identity_id: i64, + binding_type: &str, + binding_value: &str, + source: &str, + confidence: f64, + ) -> Result<()> { + sqlx::query( + r#"INSERT INTO identity_bindings (identity_id, identity_type, identity_value, metadata, confidence) VALUES ($1, $2, $3, jsonb_build_object('source', $4), $5) ON CONFLICT (identity_id, identity_type, identity_value) DO UPDATE SET confidence = EXCLUDED.confidence"#, + ) + .bind(identity_id) + .bind(binding_type) + .bind(binding_value) + .bind(source) + .bind(confidence) + .execute(&self.pool) + .await?; + Ok(()) + } + + /// 解綁身份 + pub async fn unbind_identity(&self, identity_type: &str, identity_value: &str) -> Result<()> { + sqlx::query( + "DELETE FROM identity_bindings WHERE identity_type = $1 AND identity_value = $2", + ) + .bind(identity_type) + .bind(identity_value) + .execute(&self.pool) + .await?; + Ok(()) + } + + /// 查詢機器 ID 對應的 Identity + pub async fn get_identity_by_binding( + &self, + binding_type: &str, + binding_value: &str, + ) -> Result> { + let identity = sqlx::query_as::<_, crate::core::person_identity::Identity>( + "SELECT i.id, i.name, i.embedding::text, i.metadata, i.created_at FROM identities i JOIN identity_bindings b ON i.id = b.identity_id WHERE b.identity_type = $1 AND b.identity_value = $2", + ) + .bind(binding_type) + .bind(binding_value) + .fetch_optional(&self.pool) + .await?; + Ok(identity) + } + + /// 列出所有 Identities + pub async fn list_identities( + &self, + search: &str, + limit: i32, + offset: i32, + ) -> Result> { + let query = if !search.is_empty() { + sqlx::query_as::<_, crate::core::person_identity::Identity>( + "SELECT id, name, embedding::text, metadata, created_at FROM identities WHERE name ILIKE $1 ORDER BY id LIMIT $2 OFFSET $3", + ) + .bind(format!("%{}%", search)) + } else { + sqlx::query_as::<_, crate::core::person_identity::Identity>( + "SELECT id, name, embedding::text, metadata, created_at FROM identities ORDER BY id LIMIT $1 OFFSET $2", + ) + }; + let identities = query.bind(limit).bind(offset).fetch_all(&self.pool).await?; + Ok(identities) + } + + /// 根據 ID 獲取 Identity + pub async fn get_identity_by_id( + &self, + id: i64, + ) -> Result> { + let identity = sqlx::query_as::<_, crate::core::person_identity::Identity>( + "SELECT id, name, embedding::text, metadata, created_at FROM identities WHERE id = $1", + ) + .bind(id) + .fetch_optional(&self.pool) + .await?; + Ok(identity) + } + + // ========================================== + // 信號發現與管理 (Signal Discovery) + // ========================================== + + // ========================================== + // 5W1H Smart Search + // ========================================== + + /// 搜尋 Parent Chunks (Semantic Vector Search) + pub async fn search_parent_chunks_semantic( + &self, + uuid: &str, + query_vector: &[f32], + limit: usize, + ) -> Result> { + // Convert Vec to JSON string for vector cast + let vector_json = serde_json::to_string(query_vector) + .map_err(|e| anyhow::anyhow!("Vector serialize error: {}", e))?; + + let results = sqlx::query_as::<_, SemanticSearchResult>( + r#" + SELECT + id, scene_order, start_time, end_time, + summary_text as summary, + metadata, + (1 - (summary_vector <=> $1::vector)) as similarity + FROM parent_chunks + WHERE uuid = $2 + ORDER BY summary_vector <=> $1::vector + LIMIT $3 + "#, + ) + .bind(&vector_json) + .bind(uuid) + .bind(limit as i64) + .fetch_all(&self.pool) + .await?; + + Ok(results) + } + + /// Get children for a list of parent IDs + pub async fn get_children_for_parents( + &self, + parent_ids: &[i32], + limit_per_parent: usize, + ) -> Result> { + if parent_ids.is_empty() { + return Ok(vec![]); + } + + // Use subquery to fetch top N children per parent using row_number() + let results = sqlx::query_as::<_, ChildChunkResult>( + r#" + SELECT id, parent_id, start_frame, end_frame, fps, start_time, end_time, raw_text, speaker_ids + FROM ( + SELECT *, + ROW_NUMBER() OVER (PARTITION BY parent_id ORDER BY start_time) as rn + FROM child_chunks + WHERE parent_id = ANY($1) + ) sub + WHERE rn <= $2 + ORDER BY parent_id, start_time + "#, + ) + .bind(parent_ids) + .bind(limit_per_parent as i64) + .fetch_all(&self.pool) + .await?; + + Ok(results) + } + + /// List all persons (Legacy / Global) + pub async fn list_all_persons(&self) -> Result> { + let rows = sqlx::query_as::<_, (i32, String, String, Option)>( + "SELECT id, uuid, name, metadata FROM video_identities ORDER BY created_at DESC", + ) + .fetch_all(&self.pool) + .await?; + + let mut results = Vec::new(); + for (id, uuid, name, meta) in rows { + if let Some(identity) = self.build_who_identity(&uuid, id, name, meta).await? { + results.push(identity); + } + } + Ok(results) + } + + /// Get Who info by Chunk ID (Parent or Child) + pub async fn get_who_info_by_chunk( + &self, + uuid: &str, + chunk_id: &str, + ) -> Result { + // 1. Find Chunk Time Range + // Check Parent + let mut res = sqlx::query_as::<_, (f64, f64)>( + "SELECT start_time, end_time FROM parent_chunks WHERE uuid = $1 AND id::text = $2", + ) + .bind(uuid) + .bind(chunk_id) + .fetch_optional(&self.pool) + .await?; + + // If not found, Check Child + if res.is_none() { + res = sqlx::query_as::<_, (f64, f64)>( + "SELECT start_time, end_time FROM child_chunks WHERE uuid = $1 AND id::text = $2", + ) + .bind(uuid) + .bind(chunk_id) + .fetch_optional(&self.pool) + .await?; + } + + let (start, end) = match res { + Some(t) => t, + None => return Ok(serde_json::json!({ "error": "Chunk not found" })), + }; + + // 2. Aggregate Face & Speaker IDs within Time Range + let faces: Vec = sqlx::query_scalar( + "SELECT DISTINCT unnest(face_ids) FROM child_chunks WHERE uuid = $1 AND start_time >= $2 AND end_time <= $3 AND face_ids IS NOT NULL AND face_ids <> '{}'" + ) + .bind(uuid) + .bind(start) + .bind(end) + .fetch_all(&self.pool) + .await?; + + let speakers: Vec = sqlx::query_scalar( + "SELECT DISTINCT unnest(speaker_ids) FROM child_chunks WHERE uuid = $1 AND start_time >= $2 AND end_time <= $3 AND speaker_ids IS NOT NULL AND speaker_ids <> '{}'" + ) + .bind(uuid) + .bind(start) + .bind(end) + .fetch_all(&self.pool) + .await?; + + // 3. Resolve to Person IDs + let mut persons: std::collections::HashMap = + std::collections::HashMap::new(); // Map ID -> Name + + // Resolve Faces + for fid in &faces { + if let Some(identity) = self.find_person_by_binding(uuid, "face", fid).await? { + persons.insert(format!("face_{}", fid), identity.name); + } + } + + // Resolve Speakers + for sid in &speakers { + if let Some(identity) = self.find_person_by_binding(uuid, "speaker", sid).await? { + persons.insert(format!("speaker_{}", sid), identity.name); + } + } + + Ok(serde_json::json!({ + "uuid": uuid, + "chunk_id": chunk_id, + "time_range": { "start": start, "end": end }, + "raw_ids": { + "face_ids": faces, + "speaker_ids": speakers + }, + "resolved_persons": persons + })) + } + + /// Create or Update Person (Define Identity - Video Scoped) + pub async fn create_or_update_person( + &self, + uuid: &str, + identity_id: Option, // If None, create new + name: String, + face_ids: Vec, + speaker_ids: Vec, + ) -> Result { + let final_id = if let Some(id) = identity_id { + // Update Name (Scoped to UUID check implicit by ID, but let's be safe) + sqlx::query("UPDATE video_identities SET name = $1 WHERE id = $2 AND uuid = $3") + .bind(&name) + .bind(id) + .bind(uuid) + .execute(&self.pool) + .await?; + id + } else { + // Create New + let id = sqlx::query( + "INSERT INTO video_identities (uuid, name) VALUES ($1, $2) RETURNING id", + ) + .bind(uuid) + .bind(&name) + .fetch_one(&self.pool) + .await?; + id.get(0) + }; + + // Clear old bindings for THIS identity + sqlx::query("DELETE FROM identity_bindings WHERE identity_id = $1") + .bind(final_id) + .execute(&self.pool) + .await?; + + // Bind Faces + for fid in face_ids { + if !fid.is_empty() { + sqlx::query("INSERT INTO identity_bindings (identity_id, uuid, binding_type, binding_value) VALUES ($1, $2, 'face', $3) ON CONFLICT (uuid, binding_type, binding_value) DO UPDATE SET identity_id = EXCLUDED.identity_id") + .bind(final_id) + .bind(uuid) + .bind(&fid) + .execute(&self.pool) + .await?; + } + } + + // Bind Speakers + for sid in speaker_ids { + if !sid.is_empty() { + sqlx::query("INSERT INTO identity_bindings (identity_id, uuid, binding_type, binding_value) VALUES ($1, $2, 'speaker', $3) ON CONFLICT (uuid, binding_type, binding_value) DO UPDATE SET identity_id = EXCLUDED.identity_id") + .bind(final_id) + .bind(uuid) + .bind(&sid) + .execute(&self.pool) + .await?; + } + } + + self.get_person_by_id(uuid, final_id) + .await? + .ok_or_else(|| anyhow::anyhow!("Failed to retrieve created person")) + } + + /// Get Person by ID (Video Scoped) + pub async fn get_person_by_id( + &self, + uuid: &str, + id: i32, + ) -> Result> { + let res = sqlx::query_as::<_, (i32, String, Option)>( + "SELECT id, name, metadata FROM video_identities WHERE id = $1 AND uuid = $2", + ) + .bind(id) + .bind(uuid) + .fetch_optional(&self.pool) + .await?; + + match res { + Some((id, name, meta)) => self.build_who_identity(uuid, id, name, meta).await, + None => Ok(None), + } + } + + pub async fn list_unbound_signals( + &self, + uuid: &str, + binding_type: &str, + ) -> Result> { + let column = if binding_type == "face" { + "face_ids" + } else { + "speaker_ids" + }; + + let query = format!( + r#" + SELECT DISTINCT unnest({})::varchar as signal_val + FROM chunks + WHERE uuid = $1 AND {} IS NOT NULL AND {} <> '{{}}' + EXCEPT + SELECT identity_value FROM identity_bindings WHERE identity_type = $2 + ORDER BY signal_val + "#, + column, column, column + ); + + let rows: Vec<(String,)> = sqlx::query_as(&query) + .bind(uuid) + .bind(binding_type) + .fetch_all(&self.pool) + .await?; + + Ok(rows.into_iter().map(|r| r.0).collect()) + } + + /// 獲取包含特定信號的所有 Chunk (用於標註驗證) + pub async fn get_chunks_by_signal( + &self, + uuid: &str, + binding_type: &str, + signal_value: &str, + ) -> Result> { + let column = if binding_type == "face" { + "face_ids" + } else { + "speaker_ids" + }; + let query = format!( + "SELECT id, start_frame, end_frame, content FROM chunks WHERE uuid = $1 AND $2::text = ANY({}::text[]) ORDER BY start_frame", + column + ); + + let rows = sqlx::query(&query) + .bind(uuid) + .bind(signal_value) + .fetch_all(&self.pool) + .await?; + + let chunks: Vec = rows + .into_iter() + .map(|r| { + let content: Option = r.get(3); + let mut result = serde_json::json!({ + "id": r.get::(0), + "start_frame": r.get::(1), + "end_frame": r.get::(2), + }); + if let Some(c) = content { + result["content"] = c; + } + result + }) + .collect(); + + Ok(chunks) + } + + // ========================================== + // Who API Helpers + // ========================================== + + /// Helper to find person by binding + pub async fn find_person_by_binding( + &self, + uuid: &str, + bind_type: &str, + bind_value: &str, + ) -> Result> { + let res = sqlx::query_as::<_, (i32, String, Option)>( + "SELECT vi.id, vi.name, vi.metadata + FROM video_identities vi + JOIN identity_bindings ib ON vi.id = ib.identity_id + WHERE vi.uuid = $1 AND ib.binding_type = $2 AND ib.binding_value = $3", + ) + .bind(uuid) + .bind(bind_type) + .bind(bind_value) + .fetch_optional(&self.pool) + .await?; + + match res { + Some((id, name, meta)) => self.build_who_identity(uuid, id, name, meta).await, + None => Ok(None), + } + } + + /// Helper to build full WhoIdentity with all linked IDs + pub async fn build_who_identity( + &self, + uuid: &str, + id: i32, + name: String, + meta: Option, + ) -> Result> { + // Fetch all Face IDs + let face_rows = sqlx::query_as::<_, (String,)>( + "SELECT binding_value FROM identity_bindings WHERE identity_id = $1 AND binding_type = 'face'" + ) + .bind(id) + .fetch_all(&self.pool) + .await?; + let face_ids: Vec = face_rows.into_iter().map(|r| r.0).collect(); + + // Fetch all Speaker IDs + let speaker_rows = sqlx::query_as::<_, (String,)>( + "SELECT binding_value FROM identity_bindings WHERE identity_id = $1 AND binding_type = 'speaker'" + ) + .bind(id) + .fetch_all(&self.pool) + .await?; + let speaker_ids: Vec = speaker_rows.into_iter().map(|r| r.0).collect(); + + Ok(Some(crate::api::who::WhoIdentity { + identity_id: id, + uuid: uuid.to_string(), + name, + tags: meta + .and_then(|m| m.get("tags").cloned()) + .and_then(|v| serde_json::from_value(v).ok()), + face_ids, + speaker_ids, + })) + } } #[async_trait] @@ -3458,10 +3855,16 @@ mod tests { uuid: "test-uuid".to_string(), chunk_index: 1, chunk_type: "sentence".to_string(), + start_frame: 0, + end_frame: 150, + fps: 30.0, start_time: 0.0, end_time: 5.0, text: "Hello world".to_string(), bm25_score: 0.75, + parent_chunk_id: None, + visual_stats: None, + scene_summary: None, }; let json = serde_json::to_string(&result).unwrap(); @@ -3477,12 +3880,17 @@ mod tests { uuid: "test-uuid".to_string(), chunk_index: 1, chunk_type: "sentence".to_string(), + start_frame: 0, + end_frame: 150, + fps: 30.0, start_time: 0.0, end_time: 5.0, text: "Hello world".to_string(), vector_score: 0.85, bm25_score: 0.75, combined_score: 0.80, + parent_chunk_id: None, + visual_stats: None, }; let json = serde_json::to_string(&result).unwrap(); diff --git a/src/core/db/qdrant_db.rs b/src/core/db/qdrant_db.rs index da6681e..1f8ad97 100644 --- a/src/core/db/qdrant_db.rs +++ b/src/core/db/qdrant_db.rs @@ -128,7 +128,7 @@ impl QdrantDb { use std::hash::{Hash, Hasher}; let mut hasher = DefaultHasher::new(); point_id_str.hash(&mut hasher); - let point_id = hasher.finish() as u64; + let point_id = hasher.finish(); let body = serde_json::json!({ "points": [{ @@ -171,7 +171,7 @@ impl QdrantDb { )); } - tracing::debug!("Qdrant response: {}", response_text); + tracing::debug!("Qdrant upsert response status: {}", status); tracing::info!("Successfully upserted vector for chunk: {}", chunk_id); Ok(()) } @@ -257,6 +257,101 @@ impl QdrantDb { Ok(search_results) } + pub async fn search_collections( + &self, + query_vector: &[f32], + collections: &[&str], + limit: usize, + ) -> Result> { + let mut handles = Vec::new(); + for &collection in collections { + let url = format!("{}/collections/{}/points/search", self.base_url, collection); + let client = self.client.clone(); + let api_key = self.api_key.clone(); + let query_vec = query_vector.to_vec(); + let body = serde_json::json!({ + "vector": query_vec, + "limit": limit * 2, // Fetch more from each to account for overlaps + "with_payload": true + }); + handles.push(async move { + let response = client + .post(&url) + .header("api-key", &api_key) + .header("Content-Type", "application/json") + .json(&body) + .send() + .await; + + match response { + Ok(resp) if resp.status().is_success() => { + let resp_text = resp + .text() + .await + .unwrap_or_else(|_| "Failed to read response".to_string()); + + #[derive(Deserialize)] + struct QdrantSearchResult { + result: Vec, + } + #[derive(Deserialize)] + struct QdrantPoint { + #[allow(dead_code)] + id: serde_json::Value, + score: f64, + payload: HashMap, + } + if let Ok(result) = serde_json::from_str::(&resp_text) { + let results: Vec = result + .result + .into_iter() + .map(|r| { + let uuid = r + .payload + .get("uuid") + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + let chunk_id = r + .payload + .get("chunk_id") + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + SearchResult { + uuid, + chunk_id, + score: r.score as f32, + } + }) + .collect(); + Ok::, anyhow::Error>(results) + } else { + Ok::, anyhow::Error>(Vec::new()) + } + } + _ => Ok::, anyhow::Error>(Vec::new()), + } + }); + } + + let results = futures_util::future::join_all(handles).await; + let mut merged: Vec = results + .into_iter() + .filter_map(Result::ok) + .flatten() + .collect(); + + // Sort by score descending + merged.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); + // Deduplicate by chunk_id + uuid + merged.dedup_by_key(|r| (r.chunk_id.clone(), r.uuid.clone())); + // Truncate to limit + merged.truncate(limit); + + Ok(merged) + } + pub async fn search_in_uuid( &self, query_vector: &[f32], diff --git a/src/core/mod.rs b/src/core/mod.rs index c950aad..86415f9 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -4,9 +4,15 @@ pub mod chunk; pub mod config; pub mod db; pub mod embedding; +pub mod ingestion; +pub mod llm; pub mod overlay; +pub mod person_identity; pub mod probe; pub mod processor; pub mod storage; +pub mod text; pub mod thumbnail; pub mod time; +pub mod tmdb; +pub mod worker; diff --git a/src/core/processor/asrx.rs b/src/core/processor/asrx.rs index e337392..9dc9b93 100644 --- a/src/core/processor/asrx.rs +++ b/src/core/processor/asrx.rs @@ -28,16 +28,23 @@ pub async fn process_asrx( uuid: Option<&str>, ) -> Result { let executor = PythonExecutor::new()?; - let script_path = executor.script_path("asrx_processor.py"); + let script_path = executor.script_path("asrx_processor_custom.py"); - tracing::info!("[ASRX] Starting speaker diarization: {}", video_path); + tracing::info!( + "[ASRX] Starting speaker diarization (custom): {}", + video_path + ); if !script_path.exists() { - tracing::warn!("[ASRX] Script not found, returning empty result"); - return Ok(AsrxResult { - language: None, - segments: vec![], - }); + tracing::warn!("[ASRX] Custom script not found, falling back to original"); + let fallback_path = executor.script_path("asrx_processor.py"); + if !fallback_path.exists() { + tracing::warn!("[ASRX] No script found, returning empty result"); + return Ok(AsrxResult { + language: None, + segments: vec![], + }); + } } let mut cmd = Command::new(executor.python_path()); diff --git a/src/core/processor/mod.rs b/src/core/processor/mod.rs index 5a6c095..c74c77d 100644 --- a/src/core/processor/mod.rs +++ b/src/core/processor/mod.rs @@ -9,6 +9,7 @@ pub mod ocr; pub mod pose; pub mod scene_classification; pub mod story; +pub mod visual_chunk; pub mod yolo; pub use asr::{process_asr, AsrResult, AsrSegment}; @@ -28,4 +29,5 @@ pub use scene_classification::{ process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment, }; pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats}; +pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult}; pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult}; diff --git a/src/lib.rs b/src/lib.rs index b12be8e..c00e5cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,8 @@ pub mod api; pub mod ui; +pub mod watcher; + pub mod worker; pub use core::cache::{keys, MongoCache, RedisCache}; @@ -13,6 +15,10 @@ pub use core::db::{ VideoStatus, }; pub use core::embedding::Embedder; +pub use core::person_identity::{ + ChunkPersonInfo, PersonAppearance, PersonIdentity, PersonIdentityResponse, PersonMatch, + PersonStatistics, PersonTimelineEntry, PersonTimelineResponse, +}; pub use core::probe::ProbeResult; pub use core::storage::file_manager::FileManager; pub use core::storage::output_dir::OutputDir; diff --git a/src/main.rs b/src/main.rs index d1fb061..acc3d01 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1805,6 +1805,64 @@ async fn main() -> Result<()> { } }; + // Read Pose JSON (optional) + let pose_path = format!("{}.pose.json", uuid); + let pose_result = match std::fs::read_to_string(&pose_path) { + Ok(pose_json) => match serde_json::from_str::< + momentry_core::core::processor::pose::PoseResult, + >(&pose_json) + { + Ok(result) => { + println!("Loaded Pose: {} frames", result.frames.len()); + result + } + Err(e) => { + println!("Warning: Failed to parse Pose JSON: {}. Skipping Pose.", e); + momentry_core::core::processor::pose::PoseResult { + frame_count: 0, + fps: 0.0, + frames: vec![], + } + } + }, + Err(_) => { + println!("Warning: Pose file not found. Skipping Pose."); + momentry_core::core::processor::pose::PoseResult { + frame_count: 0, + fps: 0.0, + frames: vec![], + } + } + }; + + // Read ASRX JSON (optional) + let asrx_path = format!("{}.asrx.json", uuid); + let asrx_result = match std::fs::read_to_string(&asrx_path) { + Ok(asrx_json) => match serde_json::from_str::< + momentry_core::core::processor::asrx::AsrxResult, + >(&asrx_json) + { + Ok(result) => { + println!("Loaded ASRX: {} segments", result.segments.len()); + result + } + Err(e) => { + println!("Warning: Failed to parse ASRX JSON: {}. Skipping ASRX.", e); + momentry_core::core::processor::asrx::AsrxResult { + language: None, + segments: vec![], + } + } + }, + Err(_) => { + println!("Warning: ASRX file not found. Skipping ASRX."); + momentry_core::core::processor::asrx::AsrxResult { + language: None, + segments: vec![], + } + } + }; + // ========== Store pre_chunks (from ASR, CUT) ========== println!("\nStoring pre_chunks..."); @@ -1922,12 +1980,21 @@ async fn main() -> Result<()> { face_by_frame.insert(frame.frame, frame.clone()); } - // Store frames (merge data from YOLO, OCR, Face) + let mut pose_by_frame: std::collections::HashMap< + u64, + momentry_core::core::processor::pose::PoseFrame, + > = std::collections::HashMap::new(); + for frame in &pose_result.frames { + pose_by_frame.insert(frame.frame, frame.clone()); + } + + // Store frames (merge data from YOLO, OCR, Face, Pose) let mut all_frames: Vec = frame_data .keys() .cloned() .chain(ocr_by_frame.keys().cloned()) .chain(face_by_frame.keys().cloned()) + .chain(pose_by_frame.keys().cloned()) .collect(); all_frames.sort(); all_frames.dedup(); @@ -1937,6 +2004,7 @@ async fn main() -> Result<()> { let yolo_frame = frame_data.get(frame_num); let ocr_frame = ocr_by_frame.get(frame_num); let face_frame = face_by_frame.get(frame_num); + let pose_frame = pose_by_frame.get(frame_num); let frame = momentry_core::core::db::postgres_db::Frame { id: 0, @@ -1947,6 +2015,7 @@ async fn main() -> Result<()> { yolo_objects: yolo_frame.map(|f| serde_json::json!(&f.objects)), ocr_results: ocr_frame.map(|f| serde_json::json!(&f.texts)), face_results: face_frame.map(|f| serde_json::json!(&f.faces)), + pose_results: pose_frame.map(|f| serde_json::json!(&f.persons)), frame_path: None, created_at: String::new(), }; @@ -1960,10 +2029,33 @@ async fn main() -> Result<()> { println!("\nCreating chunks..."); // Rule 1: Direct conversion (sentence pre_chunk -> sentence chunk) + // Merge ASRX speaker_id by time overlap let mut sentence_chunks = Vec::new(); for (i, seg) in asr_result.segments.iter().enumerate() { let pre_chunk_id = asr_pre_chunk_ids.get(i).copied().unwrap_or(0); - let chunk = Chunk::from_seconds( + + // Find matching ASRX segment by time overlap + let speaker_id = asrx_result + .segments + .iter() + .find(|ax| { + // Overlap: ASRX segment overlaps with ASR segment + ax.start <= seg.end && ax.end >= seg.start + }) + .and_then(|ax| ax.speaker_id.clone()); + + let content = if let Some(ref sid) = speaker_id { + serde_json::json!({ + "text": seg.text, + "speaker_id": sid, + }) + } else { + serde_json::json!({ + "text": seg.text, + }) + }; + + let mut chunk = Chunk::from_seconds( file_id as i32, uuid.clone(), i as u32, @@ -1972,15 +2064,40 @@ async fn main() -> Result<()> { seg.start, seg.end, fps, - serde_json::json!({ - "text": seg.text, - }), + content, ) .with_text_content(seg.text.clone()) .with_pre_chunk_ids(vec![pre_chunk_id as i32]); + + // Add ASRX metadata if available + if speaker_id.is_some() { + chunk = chunk.with_metadata(serde_json::json!({ + "language": asr_result.language, + "language_probability": asr_result.language_probability, + "speaker_matched": true, + })); + } + sentence_chunks.push(chunk); } + if !asrx_result.segments.is_empty() { + let matched = sentence_chunks + .iter() + .filter(|c| { + c.content + .get("speaker_id") + .and_then(|v| v.as_str()) + .is_some() + }) + .count(); + println!( + " ASRX merge: {}/{} sentence chunks matched to speakers", + matched, + sentence_chunks.len() + ); + } + // Rule 1: CUT chunks let mut cut_chunks = Vec::new(); for (i, scene) in cut_result.scenes.iter().enumerate() { @@ -2235,7 +2352,7 @@ async fn main() -> Result<()> { // Get list of videos to process let videos_to_process = if uuid == "all" { // Get all videos - let videos = pg.list_videos().await?; + let videos = pg.list_videos(10000, 0).await?.0; videos.into_iter().map(|v| v.uuid).collect::>() } else { // Process single video @@ -2486,7 +2603,7 @@ async fn main() -> Result<()> { .await? .ok_or_else(|| anyhow::anyhow!("Video not found: {}", uuid))?] } else { - db.list_videos().await? + db.list_videos(10000, 0).await?.0 }; let output_dir = std::path::PathBuf::from("thumbnails"); @@ -2520,7 +2637,7 @@ async fn main() -> Result<()> { .await? .ok_or_else(|| anyhow::anyhow!("Video not found: {}", u))?] } else { - db.list_videos().await? + db.list_videos(10000, 0).await?.0 }; println!("\n╔══════════════════════════════════════════════════════════════════════════════════╗"); diff --git a/src/player/api_client.rs b/src/player/api_client.rs index 36e587a..db352c5 100644 --- a/src/player/api_client.rs +++ b/src/player/api_client.rs @@ -5,6 +5,21 @@ use std::path::PathBuf; const DEFAULT_API_URL: &str = "http://localhost:3002"; +const DEV_API_URL: &str = "http://localhost:3003"; + +fn get_api_url() -> String { + std::env::var("MOMENTRY_API_URL").unwrap_or_else(|_| { + std::env::var("MOMENTRY_SERVER_PORT") + .ok() + .map(|port| format!("http://localhost:{}", port)) + .unwrap_or_else(|| DEFAULT_API_URL.to_string()) + }) +} + +fn get_api_key() -> Option { + std::env::var("MOMENTRY_API_KEY").ok() +} + #[derive(Debug, Clone)] pub struct ApiClient { client: Client, @@ -83,7 +98,7 @@ pub struct VideosResponse { impl ApiClient { pub fn new() -> Self { - let url = std::env::var("MOMENTRY_API_URL").unwrap_or_else(|_| DEFAULT_API_URL.to_string()); + let url = get_api_url(); Self { client: Client::new(), base_url: url, @@ -103,7 +118,11 @@ impl ApiClient { let request = RegisterRequest { path: path.to_string(), }; - let response = self.client.post(&url).json(&request).send().await?; + let mut request_builder = self.client.post(&url).json(&request); + if let Some(key) = get_api_key() { + request_builder = request_builder.header("X-API-Key", key); + } + let response = request_builder.send().await?; let status = response.status(); let result = response.json::().await?; if !status.is_success() { @@ -124,7 +143,11 @@ impl ApiClient { limit, uuid: uuid.map(|s| s.to_string()), }; - let response = self.client.post(&url).json(&request).send().await?; + let mut request_builder = self.client.post(&url).json(&request); + if let Some(key) = get_api_key() { + request_builder = request_builder.header("X-API-Key", key); + } + let response = request_builder.send().await?; let status = response.status(); let result = response.json::().await?; if !status.is_success() { @@ -135,18 +158,30 @@ impl ApiClient { pub async fn lookup_video(&self, uuid: &str) -> Result { let url = format!("{}/api/v1/lookup?uuid={}", self.base_url, uuid); - let response = self.client.get(&url).send().await?; + let mut request = self.client.get(&url); + if let Some(key) = get_api_key() { + request = request.header("X-API-Key", key); + } + let response = request.send().await?; let status = response.status(); - let result = response.json::().await?; - if !status.is_success() { + if status == 200 { + let result = response.json::().await?; + if result.uuid.is_empty() { + anyhow::bail!("影片不存在: {}", uuid); + } + Ok(result) + } else { anyhow::bail!("API request failed with status: {}", status); } - Ok(result) } pub async fn list_videos(&self) -> Result> { let url = format!("{}/api/v1/videos", self.base_url); - let response = self.client.get(&url).send().await?; + let mut request = self.client.get(&url); + if let Some(key) = get_api_key() { + request = request.header("X-API-Key", key); + } + let response = request.send().await?; let status = response.status(); let result = response.json::().await?; if !status.is_success() { diff --git a/src/player/main.rs b/src/player/main.rs index b18d124..527a7b7 100644 --- a/src/player/main.rs +++ b/src/player/main.rs @@ -397,6 +397,29 @@ fn format_time(seconds: f64) -> String { format!("{:02}:{:02}:{:02}.{:02}", hours, minutes, secs, millis) } +#[allow(dead_code)] +fn get_video_duration(video_path: &str) -> f64 { + let output = std::process::Command::new("ffprobe") + .args([ + "-v", + "error", + "-show_entries", + "format=duration", + "-of", + "default=noprint_wrappers=1:nokey=1", + video_path, + ]) + .output(); + + match output { + Ok(out) if out.status.success() => { + let duration_str = String::from_utf8_lossy(&out.stdout).trim().to_string(); + duration_str.parse::().unwrap_or(0.0) + } + _ => 0.0, + } +} + fn lookup_video_uuid(video_path: &str) -> Option { use std::process::Command as StdCommand; @@ -510,9 +533,714 @@ fn run_player(_video_path: &str, _video_uuid: Option) -> Result<()> { } #[cfg(feature = "player")] -fn run_player(_video_path: &str, _video_uuid: Option) -> Result<()> { - println!("Player not available - SDL2 not configured"); - println!("Playing: {} (UUID: {:?})", _video_path, _video_uuid); +fn run_player(video_path: &str, video_uuid: Option) -> Result<()> { + run_player_with_sdl2(video_path, video_uuid) +} + +#[cfg(feature = "player")] +fn run_player_with_sdl2(video_path: &str, video_uuid: Option) -> Result<()> { + use sdl2::event::Event; + use sdl2::keyboard::Keycode; + use sdl2::pixels::PixelFormatEnum; + use std::io::{BufReader, Read}; + use std::sync::atomic::{AtomicBool, Ordering}; + use std::sync::Arc; + use std::thread; + use std::time::{Duration, Instant}; + + println!("\n=== 🎬 SDL2 Video Player ==="); + println!("File: {}", video_path); + println!("UUID: {:?}", video_uuid); + + let sdl_context = sdl2::init().map_err(|e| anyhow::anyhow!("SDL init failed: {}", e))?; + let video_subsystem = sdl_context + .video() + .map_err(|e| anyhow::anyhow!("Video init failed: {}", e))?; + + let width = 1280u32; + let height = 720u32; + + let window = video_subsystem + .window("Momentry Player", width, height) + .position_centered() + .resizable() + .build() + .map_err(|e| anyhow::anyhow!("Window creation failed: {}", e))?; + + let mut canvas = window + .into_canvas() + .build() + .map_err(|e| anyhow::anyhow!("Canvas creation failed: {}", e))?; + + let texture_creator = canvas.texture_creator(); + let mut texture = texture_creator + .create_texture_streaming(PixelFormatEnum::RGB24, width as u32, height as u32) + .map_err(|e| anyhow::anyhow!("Texture creation failed: {}", e))?; + + let ffmpeg_path = if cfg!(target_os = "macos") { + "/opt/homebrew/bin/ffmpeg" + } else { + "ffmpeg" + }; + + let mut ffmpeg = std::process::Command::new(ffmpeg_path) + .args([ + "-i", + video_path, + "-vf", + &format!( + "scale={}:{}:force_original_aspect_ratio=decrease,pad={}:{}:(ow-iw)/2:(oh-ih)/2", + width, height, width, height + ), + "-pix_fmt", + "rgb24", + "-r", + "30", + "-f", + "rawvideo", + "-", + ]) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::null()) + .spawn() + .map_err(|e| anyhow::anyhow!("Failed to start ffmpeg: {}", e))?; + + let stdout = ffmpeg + .stdout + .take() + .ok_or_else(|| anyhow::anyhow!("Failed to capture stdout"))?; + let mut reader = BufReader::new(stdout); + + let frame_size = (width * height * 3) as usize; + let mut frame_buffer = vec![0u8; frame_size]; + + let playing = Arc::new(AtomicBool::new(true)); + let playing_clone = playing.clone(); + + let mut event_pump = sdl_context + .event_pump() + .map_err(|e| anyhow::anyhow!("Event pump failed: {}", e))?; + + let mut asr_overlay = asr_overlay::AsrOverlay::new(); + let _ = asr_overlay.load_from_file(video_path); + println!("ASR Overlay initialized: {}", !asr_overlay.is_empty()); + + let video_duration = get_video_duration(video_path); + println!("Video duration: {:.1}s", video_duration); + + let mut frame_count = 0u64; + let frame_duration = Duration::from_millis(33); + let mut paused = false; + let mut current_time = 0.0; + let mut seek_request: Option = None; + let fps = 30.0; + + let mut asr_overlay_visible = false; + + println!("Playing... (Press SPACE to pause, Q/ESC to quit, ←/→ to seek, A to toggle ASR, F for fullscreen)"); + + loop { + let frame_start = Instant::now(); + + // Handle seek by restarting ffmpeg + if let Some(seek_pos) = seek_request { + seek_request = None; + println!("\n⏩ Seeking to {:.1}s...", seek_pos); + + // Kill old ffmpeg and restart with seek position + let _ = ffmpeg.kill(); + + ffmpeg = std::process::Command::new(ffmpeg_path) + .args([ + "-ss", &format!("{:.2}", seek_pos), + "-i", video_path, + "-vf", &format!( + "scale={}:{}:force_original_aspect_ratio=decrease,pad={}:{}:(ow-iw)/2:(oh-ih)/2", + width, height, width, height + ), + "-pix_fmt", "rgb24", + "-r", "30", + "-f", "rawvideo", + "-", + ]) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::null()) + .spawn() + .map_err(|e| anyhow::anyhow!("Failed to restart ffmpeg: {}", e))?; + + let stdout = ffmpeg + .stdout + .take() + .ok_or_else(|| anyhow::anyhow!("Failed to capture stdout"))?; + reader = BufReader::new(stdout); + current_time = seek_pos; + println!("▶ Resumed at {:.1}s", current_time); + } + + for event in event_pump.poll_iter() { + match event { + Event::Quit { .. } => { + println!("\n👋 Quitting player"); + playing_clone.store(false, Ordering::SeqCst); + break; + } + Event::KeyDown { keycode, .. } => match keycode { + Some(Keycode::Q) | Some(Keycode::Escape) => { + println!("\n👋 Quitting player"); + playing_clone.store(false, Ordering::SeqCst); + break; + } + Some(Keycode::Space) => { + paused = !paused; + println!("{}", if paused { "⏸ Paused" } else { "▶ Playing" }); + } + Some(Keycode::Left) => { + let new_time = (current_time - 10.0).max(0.0); + seek_request = Some(new_time); + println!("⏪ Seek to {:.1}s", new_time); + } + Some(Keycode::Right) => { + let new_time = current_time + 10.0; + seek_request = Some(new_time); + println!("⏩ Seek to {:.1}s", new_time); + } + Some(Keycode::Up) => { + let new_time = (current_time - 60.0).max(0.0); + seek_request = Some(new_time); + println!("⏪ Seek to {:.1}s (1min)", new_time); + } + Some(Keycode::Down) => { + let new_time = current_time + 60.0; + seek_request = Some(new_time); + println!("⏩ Seek to {:.1}s (+1min)", new_time); + } + Some(Keycode::A) => { + // Toggle ASR Visibility + asr_overlay_visible = !asr_overlay_visible; + println!( + "{}", + if asr_overlay_visible { + "🔊 ASR ON" + } else { + "🔇 ASR OFF" + } + ); + } + Some(Keycode::F) => { + println!("📺 Toggle fullscreen (not implemented in basic SDL2)"); + } + _ => {} + }, + _ => {} + } + } + + if !playing_clone.load(Ordering::SeqCst) { + break; + } + + if paused { + thread::sleep(Duration::from_millis(100)); + continue; + } + + // Update ASR text based on current time + if !asr_overlay.is_empty() { + asr_overlay.update(current_time); + } + + match reader.read_exact(&mut frame_buffer) { + Ok(_) => { + texture + .update(None, &frame_buffer, (width * 3) as usize) + .map_err(|e| anyhow::anyhow!("Texture update failed: {}", e))?; + + // Draw everything + canvas.clear(); + + canvas + .copy(&texture, None, None) + .map_err(|e| anyhow::anyhow!("Render failed: {}", e))?; + + // Draw ASR Text if visible and available + if asr_overlay_visible && !asr_overlay.get_text().is_empty() { + // Placeholder: Cannot use TTF functions directly here without font context. + // For now, just printing to console to verify timing. + // In a real implementation, load font and draw text here. + println!("[ASR] {:.1}s: {}", current_time, asr_overlay.get_text()); + } + + // Draw progress bar at bottom - gray background, green progress + use sdl2::rect::Rect; + let progress = if video_duration > 0.0 { + (current_time / video_duration).min(1.0) + } else { + 0.0 + }; + let bar_width = ((width as f64) * progress) as u32; + + canvas.set_draw_color(sdl2::pixels::Color::RGB(50, 50, 50)); // Background + let _ = canvas.fill_rect(Rect::new(0, height as i32 - 15, width, 5)); + if bar_width > 0 { + canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 200, 0)); // Progress + let _ = canvas.fill_rect(Rect::new(0, height as i32 - 15, bar_width, 5)); + } + // Reset draw color to black for next frame + canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 0, 0)); + + canvas.present(); + + frame_count += 1; + current_time += 1.0 / fps; + + let elapsed = frame_start.elapsed(); + if elapsed < frame_duration { + thread::sleep(frame_duration - elapsed); + } + } + Err(_) => { + println!( + "\n📽️ End of video ({} frames, {:.1}s)", + frame_count, current_time + ); + break; + } + } + } + + let _ = ffmpeg.kill(); + println!("✅ Playback finished (total: {:.1}s)", current_time); + Ok(()) +} + +fn run_local_mode(external_player: &str) -> Result<()> { + let args: Vec = env::args().collect(); + + // Find video path - skip all flags and get the first non-flag argument after them + let video_path = args + .iter() + .skip(1) // Skip binary name + .skip_while(|a| a.starts_with('-')) // Skip flags + .next() + .cloned(); + + let video_path = match video_path { + Some(p) if !p.is_empty() => p, + _ => { + println!("Local Mode - Play local video files"); + println!("=====================================\n"); + print!("Enter video file path: "); + let mut input = String::new(); + std::io::stdin().read_line(&mut input)?; + let path = input.trim().to_string(); + if path.is_empty() { + anyhow::bail!("No video path provided"); + } + path + } + }; + + if !Path::new(&video_path).exists() { + anyhow::bail!("File not found: {}", video_path); + } + + println!("\nUsing external player: {}", external_player); + println!("Playing: {}", video_path); + + match external_player { + "vlc" => { + std::process::Command::new("open") + .arg("-a") + .arg("VLC") + .arg(&video_path) + .spawn()?; + println!("✅ Opened with VLC"); + } + "mpv" => { + std::process::Command::new("mpv").arg(&video_path).spawn()?; + println!("✅ Opened with mpv"); + } + "ffplay" => { + std::process::Command::new("ffplay") + .arg("-autoexit") + .arg(&video_path) + .spawn()?; + println!("✅ Opened with ffplay"); + } + "sdl2" => { + #[cfg(feature = "player")] + return run_player_with_sdl2(&video_path, None); + #[cfg(not(feature = "player"))] + { + println!("SDL2 player not enabled. Rebuild with --features player"); + } + } + _ => { + std::process::Command::new(external_player) + .arg(&video_path) + .spawn()?; + println!("✅ Opened with {}", external_player); + } + } + + Ok(()) +} + +fn run_online_mode() -> Result<()> { + println!("\n==========================================="); + println!(" 🎬 Online Mode - Momentry"); + println!("===========================================\n"); + + let client = ApiClient::new(); + println!("Connected to API: {}", client.base_url()); + + let rt = tokio::runtime::Runtime::new()?; + + loop { + println!("\n┌─────────────────────────────────────────┐"); + println!("│ Online Mode Menu │"); + println!("├─────────────────────────────────────────┤"); + println!("│ [1] List Videos - 列出所有影片 │"); + println!("│ [2] Search - RAG 搜尋影片內容 │"); + println!("│ [3] Play - 播放影片 │"); + println!("│ [4] Lookup - 查詢影片資訊 │"); + println!("│ [q] Quit - 離開 │"); + println!("└─────────────────────────────────────────┘"); + print!("\n請選擇: "); + + let mut input = String::new(); + std::io::stdin().read_line(&mut input)?; + let choice = input.trim(); + + match choice { + "1" => { + println!("\n=== 📋 影片列表 ==="); + match rt.block_on(client.list_videos()) { + Ok(videos) => { + if videos.is_empty() { + println!("沒有找到任何影片"); + } else { + println!("\n共 {} 部影片:\n", videos.len()); + for (i, v) in videos.iter().enumerate() { + let duration = format!( + "{}:{:02}", + (v.duration / 60.0) as u32, + (v.duration % 60.0) as u32 + ); + println!( + " [{}] {} | {} | {}x{} | {}", + i + 1, + v.file_name, + v.uuid.chars().take(8).collect::(), + v.width, + v.height, + duration + ); + } + } + } + Err(e) => println!("取得影片列表失敗: {}", e), + } + } + "2" => { + println!("\n=== 🔍 RAG 搜尋 ==="); + print!("輸入搜尋關鍵字: "); + input.clear(); + std::io::stdin().read_line(&mut input)?; + let query = input.trim().to_string(); + if query.is_empty() { + println!("搜尋關鍵字不能為空"); + continue; + } + + print!("限定特定影片?(y/N): "); + input.clear(); + std::io::stdin().read_line(&mut input)?; + let limit_uuid = if input.trim().to_lowercase() == "y" { + print!("輸入影片 UUID: "); + input.clear(); + std::io::stdin().read_line(&mut input)?; + Some(input.trim().to_string()) + } else { + None + }; + + println!("\n搜尋中..."); + match rt.block_on(client.search_chunks(&query, limit_uuid.as_deref(), Some(10))) { + Ok(response) => { + if response.results.is_empty() { + println!("沒有找到結果"); + continue; + } + println!("\n找到 {} 個結果:\n", response.results.len()); + for (i, r) in response.results.iter().enumerate() { + let time_range = format!( + "{:02}:{:02} - {:02}:{:02}", + (r.start_time / 60.0) as u32, + (r.start_time % 60.0) as u32, + (r.end_time / 60.0) as u32, + (r.end_time % 60.0) as u32 + ); + let text_preview = if r.text.len() > 50 { + format!("{}...", &r.text[..50]) + } else { + r.text.clone() + }; + println!( + " [{}] {} | {} | {:.2} | {}", + i + 1, + time_range, + r.uuid.chars().take(8).collect::(), + r.score, + text_preview + ); + } + + let mut current_player: Option = None; + + loop { + if let Some(ref mut child) = current_player { + match child.try_wait() { + Ok(Some(_)) => { + println!("播放器已結束"); + current_player = None; + } + Ok(None) => { + // 還在執行中 + } + Err(e) => { + println!("檢查播放器狀態失敗:{}", e); + current_player = None; + } + } + } + + print!( + "\n選擇播放 (1-{}) 或 q 離開 (kill player), L 重新顯示列表:", + response.results.len() + ); + input.clear(); + std::io::stdin().read_line(&mut input)?; + let selection = input.trim(); + let selection_lower = selection.to_lowercase(); + if selection_lower == "q" { + if let Some(ref mut child) = current_player { + let _ = child.kill(); + let _ = child.wait(); + println!("已終止播放器"); + current_player = None; + } + break; + } + if selection_lower == "l" { + println!("\n搜尋結果:"); + for (i, r) in response.results.iter().enumerate() { + let time_range = format!( + "{:02}:{:02} - {:02}:{:02}", + (r.start_time / 60.0) as u32, + (r.start_time % 60.0) as u32, + (r.end_time / 60.0) as u32, + (r.end_time % 60.0) as u32 + ); + let text_preview = if r.text.len() > 50 { + format!("{}...", &r.text[..50]) + } else { + r.text.clone() + }; + println!( + " [{}] {} | {} | {:.2} | {}", + i + 1, + time_range, + r.uuid.chars().take(8).collect::(), + r.score, + text_preview + ); + } + continue; + } + if let Ok(idx) = selection.parse::() { + if idx > 0 && idx <= response.results.len() { + let selected = &response.results[idx - 1]; + println!("\n播放:{} - {}", selected.uuid, selected.text); + + if let Some(ref mut child) = current_player { + let _ = child.kill(); + let _ = child.wait(); + println!("已終止前一個播放器"); + } + + match rt.block_on(client.lookup_video(&selected.uuid)) { + Ok(info) => { + if let Some(path) = &info.file_path { + if std::path::Path::new(path).exists() { + let start_sec = + (selected.start_time as f64) - 2.0; + let end_sec = (selected.end_time as f64) + 2.0; + println!( + "開啟:{} (從 {:.0} 到 {:.0} 秒,A-B 循環)", + path, start_sec, end_sec + ); + println!("提示:mpv 視窗中按 c/C 切換循環,q 離開,Space 暫停"); + current_player = Some( + std::process::Command::new("mpv") + .arg(format!( + "--start={:.2}", + start_sec.max(0.0) + )) + .arg(format!( + "--ab-loop-a={:.2}", + start_sec.max(0.0) + )) + .arg(format!("--ab-loop-b={:.2}", end_sec)) + .arg("--input-commands=bind c ab-loop; bind C ab-loop") + .arg(path) + .spawn()? + ); + } else { + println!("錯誤:檔案不存在:{}", path); + } + } + } + Err(e) => println!("查詢失敗:{}", e), + } + } + } + } + } + Err(e) => println!("搜尋失敗:{}", e), + } + } + "4" => { + println!("\n=== 🔎 查詢影片 ==="); + print!("輸入影片 UUID (直接 Enter 從列表選擇): "); + input.clear(); + std::io::stdin().read_line(&mut input)?; + let uuid = input.trim(); + + if uuid.is_empty() { + println!("載入影片列表..."); + match rt.block_on(client.list_videos()) { + Ok(videos) => { + if videos.is_empty() { + println!("沒有影片"); + continue; + } + println!("\n選擇影片:"); + for (i, v) in videos.iter().enumerate() { + println!(" [{}] {} ({})", i + 1, v.file_name, v.uuid); + } + print!("\n選擇編號:"); + input.clear(); + std::io::stdin().read_line(&mut input)?; + if let Ok(idx) = input.trim().parse::() { + if idx > 0 && idx <= videos.len() { + let selected = &videos[idx - 1]; + println!("\n查詢中..."); + match rt.block_on(client.lookup_video(&selected.uuid)) { + Ok(info) => { + println!("\n✓ 找到影片:"); + println!(" UUID: {}", info.uuid); + if let Some(path) = &info.file_path { + println!(" 路徑:{}", path); + } + if let Some(name) = &info.file_name { + println!(" 名稱:{}", name); + } + if let Some(dur) = info.duration { + println!(" 時長:{:.2}s", dur); + } + } + Err(e) => println!("查詢失敗:{}", e), + } + } + } + } + Err(e) => println!("取得影片列表失敗:{}", e), + } + } else { + println!("\n查詢中..."); + match rt.block_on(client.lookup_video(uuid)) { + Ok(info) => { + println!("\n✓ 找到影片:"); + println!(" UUID: {}", info.uuid); + if let Some(path) = &info.file_path { + println!(" 路徑:{}", path); + } + if let Some(name) = &info.file_name { + println!(" 名稱:{}", name); + } + if let Some(dur) = info.duration { + println!(" 時長:{:.2}s", dur); + } + } + Err(e) => println!("查詢失敗:{}", e), + } + } + } + "3" => { + println!("\n=== ▶ 播放影片 ==="); + print!("輸入影片 UUID (直接 Enter 從列表選擇): "); + input.clear(); + std::io::stdin().read_line(&mut input)?; + let uuid = input.trim(); + + if uuid.is_empty() { + println!("載入影片列表..."); + match rt.block_on(client.list_videos()) { + Ok(videos) => { + if videos.is_empty() { + println!("沒有影片"); + continue; + } + println!("\n選擇影片:"); + for (i, v) in videos.iter().enumerate() { + println!(" [{}] {} ({})", i + 1, v.file_name, v.uuid); + } + print!("\n選擇編號:"); + input.clear(); + std::io::stdin().read_line(&mut input)?; + if let Ok(idx) = input.trim().parse::() { + if idx > 0 && idx <= videos.len() { + let selected = &videos[idx - 1]; + println!("\n播放: {}", selected.file_path); + if std::path::Path::new(&selected.file_path).exists() { + std::process::Command::new("mpv") + .arg(&selected.file_path) + .spawn()?; + } else { + println!("錯誤:檔案不存在:{}", selected.file_path); + } + } + } + } + Err(e) => println!("取得影片列表失敗:{}", e), + } + } else { + match rt.block_on(client.lookup_video(uuid)) { + Ok(info) => { + if let Some(path) = &info.file_path { + println!("開啟: {}", path); + if std::path::Path::new(path).exists() { + std::process::Command::new("mpv").arg(path).spawn()?; + } else { + println!("錯誤:檔案不存在:{}", path); + } + } + } + Err(e) => println!("查詢失敗:{}", e), + } + } + } + "q" | "Q" => { + println!("\n👋 再見!"); + break; + } + _ => { + println!("無效選項"); + } + } + } + Ok(()) } @@ -523,17 +1251,37 @@ fn main() -> Result<()> { let should_download = args.iter().any(|a| a == "-d" || a == "--download"); let show_selector = args.iter().any(|a| a == "-s" || a == "--selector"); let test_api_mode = args.iter().any(|a| a == "-t" || a == "--test-api"); + let local_mode = args.iter().any(|a| a == "-l" || a == "--local"); + let online_mode = args.iter().any(|a| a == "-o" || a == "--online"); + + // Get external player choice + let external_player = args + .iter() + .position(|a| a == "-p" || a == "--player") + .and_then(|i| args.get(i + 1)) + .cloned() + .unwrap_or_else(|| "vlc".to_string()); // API Testing Mode if test_api_mode { return run_api_test_mode(); } - // If --selector flag is provided, show video selector + // If --selector flag is provided, show video selector (online mode) if show_selector { return run_selector(); } + // If --online or -o is provided, run online mode + if online_mode { + return run_online_mode(); + } + + // If --local or -l is provided, run local mode with external player + if local_mode { + return run_local_mode(&external_player); + } + let video_path = if args.len() < 2 || (should_download && args.len() < 3) { println!("Video Player\n============\nEnter video path or YouTube URL:"); let mut input = String::new(); diff --git a/src/playground.rs b/src/playground.rs index d0a810a..42bbafe 100644 --- a/src/playground.rs +++ b/src/playground.rs @@ -4,6 +4,7 @@ use futures_util::StreamExt; use std::path::Path; use std::str; use std::sync::{Arc, Mutex}; +use tracing::{info, warn}; use momentry_core::core::api_key::{ApiKeyService, ApiKeyType}; use momentry_core::core::chunk::types::{Chunk, ChunkRule, ChunkType}; @@ -1813,6 +1814,64 @@ async fn main() -> Result<()> { } }; + // Read Pose JSON (optional) + let pose_path = format!("{}.pose.json", uuid); + let pose_result = match std::fs::read_to_string(&pose_path) { + Ok(pose_json) => match serde_json::from_str::< + momentry_core::core::processor::pose::PoseResult, + >(&pose_json) + { + Ok(result) => { + println!("Loaded Pose: {} frames", result.frames.len()); + result + } + Err(e) => { + println!("Warning: Failed to parse Pose JSON: {}. Skipping Pose.", e); + momentry_core::core::processor::pose::PoseResult { + frame_count: 0, + fps: 0.0, + frames: vec![], + } + } + }, + Err(_) => { + println!("Warning: Pose file not found. Skipping Pose."); + momentry_core::core::processor::pose::PoseResult { + frame_count: 0, + fps: 0.0, + frames: vec![], + } + } + }; + + // Read ASRX JSON (optional) + let asrx_path = format!("{}.asrx.json", uuid); + let asrx_result = match std::fs::read_to_string(&asrx_path) { + Ok(asrx_json) => match serde_json::from_str::< + momentry_core::core::processor::asrx::AsrxResult, + >(&asrx_json) + { + Ok(result) => { + println!("Loaded ASRX: {} segments", result.segments.len()); + result + } + Err(e) => { + println!("Warning: Failed to parse ASRX JSON: {}. Skipping ASRX.", e); + momentry_core::core::processor::asrx::AsrxResult { + language: None, + segments: vec![], + } + } + }, + Err(_) => { + println!("Warning: ASRX file not found. Skipping ASRX."); + momentry_core::core::processor::asrx::AsrxResult { + language: None, + segments: vec![], + } + } + }; + // ========== Store pre_chunks (from ASR, CUT) ========== println!("\nStoring pre_chunks..."); @@ -1930,12 +1989,21 @@ async fn main() -> Result<()> { face_by_frame.insert(frame.frame, frame.clone()); } - // Store frames (merge data from YOLO, OCR, Face) + let mut pose_by_frame: std::collections::HashMap< + u64, + momentry_core::core::processor::pose::PoseFrame, + > = std::collections::HashMap::new(); + for frame in &pose_result.frames { + pose_by_frame.insert(frame.frame, frame.clone()); + } + + // Store frames (merge data from YOLO, OCR, Face, Pose) let mut all_frames: Vec = frame_data .keys() .cloned() .chain(ocr_by_frame.keys().cloned()) .chain(face_by_frame.keys().cloned()) + .chain(pose_by_frame.keys().cloned()) .collect(); all_frames.sort(); all_frames.dedup(); @@ -1945,6 +2013,7 @@ async fn main() -> Result<()> { let yolo_frame = frame_data.get(frame_num); let ocr_frame = ocr_by_frame.get(frame_num); let face_frame = face_by_frame.get(frame_num); + let pose_frame = pose_by_frame.get(frame_num); let frame = momentry_core::core::db::postgres_db::Frame { id: 0, @@ -1955,6 +2024,7 @@ async fn main() -> Result<()> { yolo_objects: yolo_frame.map(|f| serde_json::json!(&f.objects)), ocr_results: ocr_frame.map(|f| serde_json::json!(&f.texts)), face_results: face_frame.map(|f| serde_json::json!(&f.faces)), + pose_results: pose_frame.map(|f| serde_json::json!(&f.persons)), frame_path: None, created_at: String::new(), }; @@ -1968,10 +2038,30 @@ async fn main() -> Result<()> { println!("\nCreating chunks..."); // Rule 1: Direct conversion (sentence pre_chunk -> sentence chunk) + // Merge ASRX speaker_id by time overlap let mut sentence_chunks = Vec::new(); for (i, seg) in asr_result.segments.iter().enumerate() { let pre_chunk_id = asr_pre_chunk_ids.get(i).copied().unwrap_or(0); - let chunk = Chunk::from_seconds( + + // Find matching ASRX segment by time overlap + let speaker_id = asrx_result + .segments + .iter() + .find(|ax| ax.start <= seg.end && ax.end >= seg.start) + .and_then(|ax| ax.speaker_id.clone()); + + let content = if let Some(ref sid) = speaker_id { + serde_json::json!({ + "text": seg.text, + "speaker_id": sid, + }) + } else { + serde_json::json!({ + "text": seg.text, + }) + }; + + let mut chunk = Chunk::from_seconds( file_id as i32, uuid.clone(), i as u32, @@ -1980,15 +2070,39 @@ async fn main() -> Result<()> { seg.start, seg.end, fps, - serde_json::json!({ - "text": seg.text, - }), + content, ) .with_text_content(seg.text.clone()) .with_pre_chunk_ids(vec![pre_chunk_id as i32]); + + if speaker_id.is_some() { + chunk = chunk.with_metadata(serde_json::json!({ + "language": asr_result.language, + "language_probability": asr_result.language_probability, + "speaker_matched": true, + })); + } + sentence_chunks.push(chunk); } + if !asrx_result.segments.is_empty() { + let matched = sentence_chunks + .iter() + .filter(|c| { + c.content + .get("speaker_id") + .and_then(|v| v.as_str()) + .is_some() + }) + .count(); + println!( + " ASRX merge: {}/{} sentence chunks matched to speakers", + matched, + sentence_chunks.len() + ); + } + // Rule 1: CUT chunks let mut cut_chunks = Vec::new(); for (i, scene) in cut_result.scenes.iter().enumerate() { @@ -2405,6 +2519,20 @@ async fn main() -> Result<()> { Ok(()) } Commands::Server { host, port } => { + // Start Auto-Ingest Watcher + info!("Starting Auto-Ingest Watcher..."); + let _watcher = match momentry_core::watcher::run_watcher().await { + Ok(w) => { + info!("Auto-Ingest Watcher started successfully."); + Some(w) + } + Err(e) => { + warn!("Failed to start Auto-Ingest Watcher: {}", e); + None + } + }; + // The watcher is kept alive by '_watcher' variable until the server stops. + let port = port.unwrap_or_else(|| *momentry_core::core::config::SERVER_PORT); momentry_core::api::start_server(&host, port).await?; Ok(()) @@ -2461,13 +2589,13 @@ async fn main() -> Result<()> { Commands::Thumbnails { uuid, count } => { let db = PostgresDb::init().await?; - let videos = if let Some(ref uuid) = uuid { + let videos = if let Some(ref u) = uuid { vec![db - .get_video_by_uuid(uuid) + .get_video_by_uuid(u) .await? - .ok_or_else(|| anyhow::anyhow!("Video not found: {}", uuid))?] + .ok_or_else(|| anyhow::anyhow!("Video not found: {}", u))?] } else { - db.list_videos().await? + db.list_videos(10000, 0).await?.0 }; let output_dir = std::path::PathBuf::from("thumbnails"); @@ -2484,12 +2612,10 @@ async fn main() -> Result<()> { println!(" Generated {} thumbnails", result.count); } Err(e) => { - println!(" Error: {}", e); + eprintln!(" Failed to generate thumbnails: {}", e); } } } - - println!("\nThumbnails generated successfully!"); Ok(()) } Commands::Status { uuid } => { @@ -2501,7 +2627,7 @@ async fn main() -> Result<()> { .await? .ok_or_else(|| anyhow::anyhow!("Video not found: {}", u))?] } else { - db.list_videos().await? + db.list_videos(10000, 0).await?.0 }; println!("\n╔══════════════════════════════════════════════════════════════════════════════════╗"); @@ -2513,6 +2639,22 @@ async fn main() -> Result<()> { "║ {:32} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} ║", "Video", "FS", "FS", "PSQL", "PObj", "MObj", "PVec", "QVec" ); + println!( + "╠{:33}╪{:9}╪{:9}╪{:9}╪{:9}╪{:9}╪{:9}╪{:9}╣", + str::repeat("─", 32), + str::repeat("─", 8), + str::repeat("─", 8), + str::repeat("─", 8), + str::repeat("─", 8), + str::repeat("─", 8), + str::repeat("─", 8), + str::repeat("─", 8) + ); + println!("╠══════════════════════════════════════════════════════════════════════════════════╣"); + println!( + "║ {:32} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} ║", + "Video", "FS", "FS", "PSQL", "PObj", "MObj", "PVec", "QVec" + ); println!( "║ {:32} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} │ {:8} ║", "", "Video", "JSON", "Chunk", "Chunk", "Chunk", "Chunk", "Chunk" diff --git a/src/watcher/mod.rs b/src/watcher/mod.rs index 416a61c..cbceb4d 100644 --- a/src/watcher/mod.rs +++ b/src/watcher/mod.rs @@ -1,3 +1,3 @@ pub mod watcher; -pub use watcher::{watch_directories, WatcherConfig}; +pub use watcher::{run_watcher, WatcherConfig}; diff --git a/src/watcher/watcher.rs b/src/watcher/watcher.rs index 6160c3b..faa0a3e 100644 --- a/src/watcher/watcher.rs +++ b/src/watcher/watcher.rs @@ -1,8 +1,11 @@ use anyhow::Result; -use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher}; use std::path::Path; use std::sync::Arc; -use tokio::sync::mpsc; +use tokio::time; +use tracing::{error, info, warn}; + +use crate::core::db::{Database, PostgresDb}; +use crate::core::ingestion::IngestionService; pub struct WatcherConfig { pub directories: Vec, @@ -11,31 +14,94 @@ pub struct WatcherConfig { impl Default for WatcherConfig { fn default() -> Self { + // Default to SFTP demo directory if not specified + let default_dir = std::env::var("MOMENTRY_SFTP_ROOT") + .unwrap_or_else(|_| "/Users/accusys/momentry/var/sftpgo/data/demo/".to_string()); + Self { - directories: vec![], - poll_interval_ms: 5000, + directories: vec![default_dir], + poll_interval_ms: 60000, // 60 seconds polling interval } } } -pub async fn watch_directories(config: WatcherConfig, tx: mpsc::Sender) -> Result<()> { - // TODO: Implement directory watcher - // - // Options: - // 1. Use notify crate for file system events - // 2. Use polling as fallback - // - // When new video file is detected: - // - Send job to Redis queue - // - Trigger registration process +/// Starts the file watcher in the background. +/// Scans directories for video files and registers them if not already present. +pub async fn run_watcher() -> Result<()> { + let config = WatcherConfig::default(); + let dirs = config.directories.clone(); - println!("Watching directories: {:?}", config.directories); - - for dir in &config.directories { - if Path::new(dir).exists() { - println!("Directory exists: {}", dir); - } + if dirs.is_empty() { + warn!("No directories configured for watching."); + return Err(anyhow::anyhow!("No watch directories")); } + info!("Initializing Database for Watcher..."); + // Use Database::init() which handles config and pool creation + let db = PostgresDb::init().await?; + let service = Arc::new(IngestionService::new(db)); + + info!("Starting Ingestion Poller for: {:?}", dirs); + + // Spawn background task + tokio::spawn(async move { + let mut interval = time::interval(time::Duration::from_millis(config.poll_interval_ms)); + + // Run once immediately on startup to catch existing files + scan_and_ingest(&dirs, &service).await; + + loop { + interval.tick().await; + scan_and_ingest(&dirs, &service).await; + } + }); + Ok(()) } + +async fn scan_and_ingest(directories: &[String], service: &Arc) { + // Allowed extensions list + let allowed_extensions = vec!["mp4", "mov", "mkv"]; + + info!("Scanning directories for new videos..."); + + for dir in directories { + let path = Path::new(dir); + if !path.exists() { + warn!("Directory does not exist, skipping: {}", dir); + continue; + } + + if let Ok(entries) = std::fs::read_dir(path) { + for entry in entries.flatten() { + let file_path = entry.path(); + if file_path.is_file() { + // Check extension + let is_video = if let Some(ext) = file_path.extension().and_then(|e| e.to_str()) + { + allowed_extensions.contains(&ext.to_lowercase().as_str()) + } else { + false + }; + + if is_video { + if let Some(p_str) = file_path.to_str() { + // Try to ingest. The service checks if it already exists. + match service.ingest(p_str).await { + Ok(Some(uuid)) => { + info!("Auto-registered: {} -> {}", file_path.display(), uuid); + } + Ok(None) => { + // Already registered + } + Err(e) => { + error!("Failed to ingest {}: {}", file_path.display(), e); + } + } + } + } + } + } + } + } +} diff --git a/src/worker/job_worker.rs b/src/worker/job_worker.rs index 90b7a8f..7c6fb58 100644 --- a/src/worker/job_worker.rs +++ b/src/worker/job_worker.rs @@ -5,6 +5,7 @@ use std::time::Duration; use tokio::time::sleep; use tracing::{error, info, warn}; +use crate::core::chunk::{rule1_ingest, rule3_ingest}; use crate::core::db::{ MonitorJobStatus, PostgresDb, ProcessorJobStatus, ProcessorType, RedisClient, VideoStatus, }; @@ -210,12 +211,58 @@ impl JobWorker { .map(|r| r.processor_type.as_str().to_string()) .collect(); + // Check prerequisites for Rule 1 Chunking BEFORE moving arrays + let has_asr = completed_processors.iter().any(|p| p == "asr"); + let has_asrx = completed_processors.iter().any(|p| p == "asrx"); + let has_cut = completed_processors.iter().any(|p| p == "cut"); + // Update processor arrays in job record self.db .update_job_processors_arrays(job_id, completed_processors, failed_processors) .await?; if all_completed && !any_failed { + // 🚀 P1 Trigger: Rule 1 Chunking + if has_asr && has_asrx { + info!("📝 Prerequisites met for Rule 1 Chunking. Starting ingestion..."); + let db_clone = self.db.clone(); + let uuid_clone = uuid.to_string(); + tokio::spawn(async move { + match db_clone.get_video_by_uuid(&uuid_clone).await { + Ok(Some(video)) => { + let fps = video.fps; + match rule1_ingest::ingest_rule1(db_clone.pool(), &uuid_clone, fps) + .await + { + Ok(count) => info!( + "✅ Rule 1 Ingestion completed: {} chunks inserted.", + count + ), + Err(e) => error!("❌ Rule 1 Ingestion failed: {}", e), + } + } + Ok(None) => error!("Video not found for chunking: {}", uuid_clone), + Err(e) => error!("Failed to get video info for chunking: {}", e), + } + }); + } + + // 🚀 P1 Trigger: Rule 3 Scene Chunking + if has_cut && has_asr { + info!("📝 Prerequisites met for Rule 3 Scene Chunking. Starting ingestion..."); + let db_clone = self.db.clone(); + let uuid_clone = uuid.to_string(); + tokio::spawn(async move { + match rule3_ingest::ingest_rule3(db_clone.pool(), &uuid_clone).await { + Ok(count) => info!( + "✅ Rule 3 Scene Ingestion completed: {} scenes processed.", + count + ), + Err(e) => error!("❌ Rule 3 Scene Ingestion failed: {}", e), + } + }); + } + self.db .update_job_status(job_id, MonitorJobStatus::Completed) .await?; diff --git a/src/worker/processor.rs b/src/worker/processor.rs index e8411ef..c00f39e 100644 --- a/src/worker/processor.rs +++ b/src/worker/processor.rs @@ -16,6 +16,7 @@ use crate::core::processor::cut::CutResult; use crate::core::processor::face::FaceResult; use crate::core::processor::ocr::OcrResult; use crate::core::processor::pose::PoseResult; +use crate::core::processor::visual_chunk::VisualChunkResult; use crate::core::processor::yolo::YoloResult; #[derive(Debug, Clone)] @@ -302,6 +303,24 @@ impl ProcessorPool { } Ok(serde_json::to_value(result)?) } + ProcessorType::VisualChunk => { + let result = processor::process_visual_chunk_advanced( + video_path, + output_path.to_str().unwrap(), + uuid, + ) + .await?; + // Store VisualChunk chunks in database + tracing::info!( + "VisualChunk completed, storing {} chunks for {}", + result.chunk_count, + job.uuid + ); + if let Err(e) = Self::store_visual_chunk_chunks(db, &job.uuid, &result).await { + tracing::error!("Failed to store VisualChunk chunks for {}: {}", job.uuid, e); + } + Ok(serde_json::to_value(result)?) + } } } @@ -605,6 +624,13 @@ impl ProcessorPool { // Override chunk_id to include processor prefix for uniqueness chunk.chunk_id = format!("trace_yolo_{:04}", i); + // Populate text_content for BM25 search + let object_names: Vec = + frame.objects.iter().map(|o| o.class_name.clone()).collect(); + if !object_names.is_empty() { + chunk = chunk.with_text_content(object_names.join(" ")); + } + match db.store_chunk(&chunk).await { Ok(_) => { tracing::info!( @@ -660,6 +686,12 @@ impl ProcessorPool { // Override chunk_id to include processor prefix for uniqueness chunk.chunk_id = format!("trace_ocr_{:04}", i); + // Populate text_content for BM25 search + let texts: Vec = frame.texts.iter().map(|t| t.text.clone()).collect(); + if !texts.is_empty() { + chunk = chunk.with_text_content(texts.join(" ")); + } + match db.store_chunk(&chunk).await { Ok(_) => { tracing::info!( @@ -715,6 +747,16 @@ impl ProcessorPool { // Override chunk_id to include processor prefix for uniqueness chunk.chunk_id = format!("trace_face_{:04}", i); + // Populate text_content for BM25 search (face IDs) + let face_ids: Vec = frame + .faces + .iter() + .filter_map(|f| f.face_id.clone()) + .collect(); + if !face_ids.is_empty() { + chunk = chunk.with_text_content(face_ids.join(" ")); + } + match db.store_chunk(&chunk).await { Ok(_) => { tracing::info!( @@ -770,6 +812,16 @@ impl ProcessorPool { // Override chunk_id to include processor prefix for uniqueness chunk.chunk_id = format!("trace_pose_{:04}", i); + // Populate text_content for BM25 search (person count indicator) + let person_count = frame.persons.len(); + if person_count > 0 { + let text = format!("person person person") + .repeat(person_count.min(10)) + .trim() + .to_string(); + chunk = chunk.with_text_content(text); + } + match db.store_chunk(&chunk).await { Ok(_) => { tracing::info!( @@ -825,6 +877,16 @@ impl ProcessorPool { // Override chunk_id to include processor prefix for uniqueness chunk.chunk_id = format!("trace_asrx_{:04}", i); + // Populate text_content for BM25 search (already has text) + chunk = chunk.with_text_content(segment.text.clone()); + + // Also store speaker_id in content + chunk.content = serde_json::json!({ + "text": segment.text, + "speaker_id": segment.speaker_id, + "timestamp": segment.start, + }); + match db.store_chunk(&chunk).await { Ok(_) => { tracing::info!("Stored ASRX chunk {} for video {}", i, uuid); @@ -837,6 +899,24 @@ impl ProcessorPool { Ok(()) } + pub async fn store_visual_chunk_chunks( + db: &PostgresDb, + uuid: &str, + visual_chunk_result: &VisualChunkResult, + ) -> Result<()> { + for (i, chunk) in visual_chunk_result.chunks.iter().enumerate() { + match db.store_chunk(chunk).await { + Ok(_) => { + tracing::info!("Stored VisualChunk chunk {} for video {}", i, uuid); + } + Err(e) => { + tracing::error!("Failed to store VisualChunk chunk {}: {}", i, e); + } + } + } + Ok(()) + } + pub async fn get_running_count(&self) -> usize { *self.running_count.read().await }