feat: Swift Face Pose integration + TKG 方案 B

Major Changes:
- swift_face_pose: output pose angles (yaw/pitch/roll) in face.json
- face_processor.py: call swift_face_pose (dual output: face.json + pose.json)
- Face struct: add pose_angle field
- TKG 方案 B: gaze/lip_track nodes from face.json (no face_detections dependency)
- Chunk cleanup: delete old data before rebuild (avoid duplicate key)
- Hand nodes: classify by hand_type + gesture (15 combinations)
- HAND_OBJECT edges: bbox spatial matching (174 matches)

Test Results:
- Blake Jones: 8 faces, pose_angle ✓, 66 nodes, 174 edges
- FilmRiot: 394 faces, pose_angle ✓, 35 nodes, 39 edges
- Left hands: 132, Right hands: 2

Architecture:
- All TKG nodes built from JSON files (face.json, hand.json, yolo.json)
- Swift processors: sample_interval=3 (Face/Pose/Hand sync)
- Cleanup functions: delete_tkg_nodes_by_uuid, delete_tkg_edges_by_uuid
This commit is contained in:
Accusys
2026-06-23 05:47:24 +08:00
parent e1e2da2140
commit 766a1d9a6d
17 changed files with 1108 additions and 47 deletions
+24 -12
View File
@@ -33,7 +33,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from redis_publisher import RedisPublisher
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
SWIFT_BIN = os.path.join(SCRIPT_DIR, "swift_processors", ".build", "debug", "swift_face")
SWIFT_BIN = os.path.join(SCRIPT_DIR, "swift_processors", ".build", "debug", "swift_face_pose")
FACENET_PATH = os.path.join(SCRIPT_DIR, "..", "models", "facenet512.mlpackage")
# Pose angle classification from roll/yaw
@@ -106,23 +106,29 @@ class FaceProcessorVision:
return None
def process_with_swift(self) -> Dict:
"""Step 1: Run swift_face to get bbox + pose"""
print(f"[FACE_V2] Step 1: Vision detection...")
"""Step 1: Run swift_face_pose to get bbox + pose (generates face.json + pose.json)"""
print(f"[FACE_V2] Step 1: Vision detection (face + pose)...")
# Build swift_face if needed
# Build swift_face_pose if needed
if not os.path.exists(SWIFT_BIN):
build_dir = os.path.join(SCRIPT_DIR, "swift_processors")
print(f"[FACE_V2] Building swift_face in {build_dir}...")
print(f"[FACE_V2] Building swift_face_pose in {build_dir}...")
subprocess.run(
["swift", "build", "-c", "debug", "--product", "swift_face"],
["swift", "build", "-c", "debug", "--product", "swift_face_pose"],
cwd=build_dir, check=True
)
swift_out = self.output_path.replace(".json", "_detect.json")
swift_face_out = self.output_path.replace(".json", "_detect.json")
# Pose output: same directory, but replace "face" with "pose" in filename
output_dir = os.path.dirname(self.output_path)
output_basename = os.path.basename(self.output_path)
pose_basename = output_basename.replace("face", "pose")
swift_pose_out = os.path.join(output_dir, pose_basename)
cmd = [
SWIFT_BIN,
self.video_path,
swift_out,
swift_face_out,
swift_pose_out,
"--sample-interval", str(self.sample_interval),
]
if self.uuid:
@@ -130,7 +136,7 @@ class FaceProcessorVision:
print(f"[FACE_V2] Running: {' '.join(cmd)}")
t0 = time.time()
log_path = swift_out + ".log"
log_path = swift_face_out + ".log"
log_f = open(log_path, "w")
proc = subprocess.Popen(cmd, stdout=log_f, stderr=subprocess.STDOUT, text=True)
last_pct = -1
@@ -155,13 +161,19 @@ class FaceProcessorVision:
stderr_out = proc.stderr.read()
if stderr_out:
print(stderr_out.strip(), file=sys.stderr)
raise RuntimeError(f"swift_face exited with code {proc.returncode}")
raise RuntimeError(f"swift_face_pose exited with code {proc.returncode}")
elapsed = time.time() - t0
print(f"[FACE_V2] Detection done in {elapsed:.1f}s")
with open(swift_out) as f:
return json.load(f)
with open(swift_face_out) as f:
face_data = json.load(f)
# Also check if pose.json was generated (for reference)
if os.path.exists(swift_pose_out):
print(f"[FACE_V2] Pose file generated: {swift_pose_out}")
return face_data
def embed_and_save(self, detection_data: Dict):
"""Step 2: Crop faces + CoreML embedding + save face.json"""
+101
View File
@@ -0,0 +1,101 @@
#!/opt/homebrew/bin/python3.11
"""
Hand Processor Wrapper
Calls Swift Vision Framework hand pose (swift_hand) for gesture detection.
Uses VNDetectHumanHandPoseRequest with ANE acceleration.
"""
import re
import sys
import json
import os
import subprocess
import argparse
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from redis_publisher import RedisPublisher
SWIFT_HAND_PATH = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"swift_processors/.build/debug/swift_hand"
)
SWIFT_HAND_ALT = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"swift_processors/.build/arm64-apple-macosx/debug/swift_hand"
)
SWIFT_HAND_PROGRESS_RE = re.compile(r"\[SwiftHand\] Progress:\s*(\d+)%")
def process_hand(
video_path: str,
output_path: str,
uuid: str = "",
sample_interval: int = 3,
publisher: RedisPublisher = None,
) -> dict:
swift_bin = SWIFT_HAND_PATH
if not os.path.exists(swift_bin):
swift_bin = SWIFT_HAND_ALT
if not os.path.exists(swift_bin):
print("[Hand] Swift binary not found", file=sys.stderr)
if publisher:
publisher.error("hand", "Swift binary not found")
return {"frame_count": 0, "fps": 0.0, "frames": []}
cmd = [swift_bin, video_path, output_path,
"--sample-interval", str(sample_interval),
"--uuid", uuid]
print(f"[Hand] Running Swift Hand (Vision Framework)", file=sys.stderr)
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
last_pct = -1
for line in proc.stdout:
line = line.strip()
m = SWIFT_HAND_PROGRESS_RE.search(line)
if m:
pct = int(m.group(1))
if pct > last_pct:
last_pct = pct
print(f"[Hand] Progress: {pct}%", file=sys.stderr)
if publisher:
publisher.progress("hand", pct, 100, f"{pct}%")
elif line:
print(f" {line}", file=sys.stderr)
stderr_output = proc.stderr.read()
if stderr_output:
print(stderr_output.strip(), file=sys.stderr)
proc.wait()
if proc.returncode != 0 or not os.path.exists(output_path):
print(f"[Hand] Swift Hand failed (exit={proc.returncode})", file=sys.stderr)
if publisher:
publisher.error("hand", f"Swift Hand failed")
return {"frame_count": 0, "fps": 0.0, "frames": []}
with open(output_path) as f:
return json.load(f)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Hand Processor (Swift Vision)")
parser.add_argument("video_path")
parser.add_argument("output_path")
parser.add_argument("--uuid", "-u", default="")
parser.add_argument("--sample-interval", type=int, default=3)
args = parser.parse_args()
publisher = RedisPublisher(args.uuid) if args.uuid else None
if publisher:
publisher.info("hand", "HAND_START")
result = process_hand(args.video_path, args.output_path, args.uuid,
args.sample_interval, publisher)
with open(args.output_path, "w") as f:
json.dump(result, f, indent=2)
print(f"Hand: {len(result.get('frames', []))} frames with hands")
if publisher:
publisher.complete("hand", f"{len(result.get('frames',[]))} frames")
+2 -2
View File
@@ -31,7 +31,7 @@ def process_pose(
video_path: str,
output_path: str,
uuid: str = "",
sample_interval: int = 30,
sample_interval: int = 3, # Changed from 30 to match Face
publisher: RedisPublisher = None,
) -> dict:
swift_bin = SWIFT_POSE_PATH
@@ -134,7 +134,7 @@ if __name__ == "__main__":
parser.add_argument("video_path")
parser.add_argument("output_path")
parser.add_argument("--uuid", "-u", default="")
parser.add_argument("--sample-interval", type=int, default=30)
parser.add_argument("--sample-interval", type=int, default=3) # Changed from 30 to match Face
args = parser.parse_args()
publisher = RedisPublisher(args.uuid) if args.uuid else None
+8
View File
@@ -118,5 +118,13 @@ let package = Package(
path: ".",
sources: ["swift_hand.swift"]
),
.executableTarget(
name: "swift_face_pose",
dependencies: [
.product(name: "ArgumentParser", package: "swift-argument-parser"),
],
path: ".",
sources: ["swift_face_pose.swift"]
),
]
)
+2 -2
View File
@@ -13,8 +13,8 @@ struct SwiftFace: ParsableCommand {
@Argument(help: "Output JSON path")
var outputPath: String
@Option(name: .long, help: "Sample interval (frames, default=30)")
var sampleInterval: Int = 30
@Option(name: .long, help: "Sample interval (frames, default=3)")
var sampleInterval: Int = 3
@Option(name: .long, help: "UUID for logging")
var uuid: String = ""
+11 -1
View File
@@ -318,8 +318,18 @@ struct SwiftFacePose: ParsableCommand {
"fps": Double(fps),
"frames": faceFrames,
]
if let faceJson = try? JSONSerialization.data(withJSONObject: faceOutputDict, options: []) {
do {
let faceJson = try JSONSerialization.data(withJSONObject: faceOutputDict, options: [])
try faceJson.write(to: URL(fileURLWithPath: faceOutput))
print("[SwiftFacePose] Face output written: \(faceOutput)")
// Verify file exists
if FileManager.default.fileExists(atPath: faceOutput) {
print("[SwiftFacePose] Verified: file exists at \(faceOutput)")
} else {
print("[SwiftFacePose] ERROR: file not found after write!")
}
} catch {
print("[SwiftFacePose] ERROR writing face output: \(error)")
}
let poseOutputDict: [String: Any] = [
+1 -1
View File
@@ -18,7 +18,7 @@ struct SwiftHandProcessor: ParsableCommand {
var uuid: String = ""
@Option(name: [.short, .long], help: "Sample interval (frames)")
var sampleInterval: Int = 30
var sampleInterval: Int = 3
@Option(name: [.long], help: "Minimum confidence threshold")
var minConfidence: Double = 0.3
+2 -2
View File
@@ -26,8 +26,8 @@ struct SwiftPose: ParsableCommand {
@Argument(help: "Output JSON path")
var outputPath: String
@Option(name: .long, help: "Sample interval (frames, default=30)")
var sampleInterval: Int = 30
@Option(name: .long, help: "Sample interval (frames, default=3)")
var sampleInterval: Int = 3
@Option(name: .long, help: "UUID for logging")
var uuid: String = ""