feat: Swift Face Pose integration + TKG 方案 B
Major Changes: - swift_face_pose: output pose angles (yaw/pitch/roll) in face.json - face_processor.py: call swift_face_pose (dual output: face.json + pose.json) - Face struct: add pose_angle field - TKG 方案 B: gaze/lip_track nodes from face.json (no face_detections dependency) - Chunk cleanup: delete old data before rebuild (avoid duplicate key) - Hand nodes: classify by hand_type + gesture (15 combinations) - HAND_OBJECT edges: bbox spatial matching (174 matches) Test Results: - Blake Jones: 8 faces, pose_angle ✓, 66 nodes, 174 edges - FilmRiot: 394 faces, pose_angle ✓, 35 nodes, 39 edges - Left hands: 132, Right hands: 2 Architecture: - All TKG nodes built from JSON files (face.json, hand.json, yolo.json) - Swift processors: sample_interval=3 (Face/Pose/Hand sync) - Cleanup functions: delete_tkg_nodes_by_uuid, delete_tkg_edges_by_uuid
This commit is contained in:
+24
-12
@@ -33,7 +33,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
SWIFT_BIN = os.path.join(SCRIPT_DIR, "swift_processors", ".build", "debug", "swift_face")
|
||||
SWIFT_BIN = os.path.join(SCRIPT_DIR, "swift_processors", ".build", "debug", "swift_face_pose")
|
||||
FACENET_PATH = os.path.join(SCRIPT_DIR, "..", "models", "facenet512.mlpackage")
|
||||
|
||||
# Pose angle classification from roll/yaw
|
||||
@@ -106,23 +106,29 @@ class FaceProcessorVision:
|
||||
return None
|
||||
|
||||
def process_with_swift(self) -> Dict:
|
||||
"""Step 1: Run swift_face to get bbox + pose"""
|
||||
print(f"[FACE_V2] Step 1: Vision detection...")
|
||||
"""Step 1: Run swift_face_pose to get bbox + pose (generates face.json + pose.json)"""
|
||||
print(f"[FACE_V2] Step 1: Vision detection (face + pose)...")
|
||||
|
||||
# Build swift_face if needed
|
||||
# Build swift_face_pose if needed
|
||||
if not os.path.exists(SWIFT_BIN):
|
||||
build_dir = os.path.join(SCRIPT_DIR, "swift_processors")
|
||||
print(f"[FACE_V2] Building swift_face in {build_dir}...")
|
||||
print(f"[FACE_V2] Building swift_face_pose in {build_dir}...")
|
||||
subprocess.run(
|
||||
["swift", "build", "-c", "debug", "--product", "swift_face"],
|
||||
["swift", "build", "-c", "debug", "--product", "swift_face_pose"],
|
||||
cwd=build_dir, check=True
|
||||
)
|
||||
|
||||
swift_out = self.output_path.replace(".json", "_detect.json")
|
||||
swift_face_out = self.output_path.replace(".json", "_detect.json")
|
||||
# Pose output: same directory, but replace "face" with "pose" in filename
|
||||
output_dir = os.path.dirname(self.output_path)
|
||||
output_basename = os.path.basename(self.output_path)
|
||||
pose_basename = output_basename.replace("face", "pose")
|
||||
swift_pose_out = os.path.join(output_dir, pose_basename)
|
||||
cmd = [
|
||||
SWIFT_BIN,
|
||||
self.video_path,
|
||||
swift_out,
|
||||
swift_face_out,
|
||||
swift_pose_out,
|
||||
"--sample-interval", str(self.sample_interval),
|
||||
]
|
||||
if self.uuid:
|
||||
@@ -130,7 +136,7 @@ class FaceProcessorVision:
|
||||
|
||||
print(f"[FACE_V2] Running: {' '.join(cmd)}")
|
||||
t0 = time.time()
|
||||
log_path = swift_out + ".log"
|
||||
log_path = swift_face_out + ".log"
|
||||
log_f = open(log_path, "w")
|
||||
proc = subprocess.Popen(cmd, stdout=log_f, stderr=subprocess.STDOUT, text=True)
|
||||
last_pct = -1
|
||||
@@ -155,13 +161,19 @@ class FaceProcessorVision:
|
||||
stderr_out = proc.stderr.read()
|
||||
if stderr_out:
|
||||
print(stderr_out.strip(), file=sys.stderr)
|
||||
raise RuntimeError(f"swift_face exited with code {proc.returncode}")
|
||||
raise RuntimeError(f"swift_face_pose exited with code {proc.returncode}")
|
||||
|
||||
elapsed = time.time() - t0
|
||||
print(f"[FACE_V2] Detection done in {elapsed:.1f}s")
|
||||
|
||||
with open(swift_out) as f:
|
||||
return json.load(f)
|
||||
with open(swift_face_out) as f:
|
||||
face_data = json.load(f)
|
||||
|
||||
# Also check if pose.json was generated (for reference)
|
||||
if os.path.exists(swift_pose_out):
|
||||
print(f"[FACE_V2] Pose file generated: {swift_pose_out}")
|
||||
|
||||
return face_data
|
||||
|
||||
def embed_and_save(self, detection_data: Dict):
|
||||
"""Step 2: Crop faces + CoreML embedding + save face.json"""
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Hand Processor Wrapper
|
||||
Calls Swift Vision Framework hand pose (swift_hand) for gesture detection.
|
||||
Uses VNDetectHumanHandPoseRequest with ANE acceleration.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import argparse
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
SWIFT_HAND_PATH = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
"swift_processors/.build/debug/swift_hand"
|
||||
)
|
||||
SWIFT_HAND_ALT = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
"swift_processors/.build/arm64-apple-macosx/debug/swift_hand"
|
||||
)
|
||||
|
||||
SWIFT_HAND_PROGRESS_RE = re.compile(r"\[SwiftHand\] Progress:\s*(\d+)%")
|
||||
|
||||
def process_hand(
|
||||
video_path: str,
|
||||
output_path: str,
|
||||
uuid: str = "",
|
||||
sample_interval: int = 3,
|
||||
publisher: RedisPublisher = None,
|
||||
) -> dict:
|
||||
swift_bin = SWIFT_HAND_PATH
|
||||
if not os.path.exists(swift_bin):
|
||||
swift_bin = SWIFT_HAND_ALT
|
||||
|
||||
if not os.path.exists(swift_bin):
|
||||
print("[Hand] Swift binary not found", file=sys.stderr)
|
||||
if publisher:
|
||||
publisher.error("hand", "Swift binary not found")
|
||||
return {"frame_count": 0, "fps": 0.0, "frames": []}
|
||||
|
||||
cmd = [swift_bin, video_path, output_path,
|
||||
"--sample-interval", str(sample_interval),
|
||||
"--uuid", uuid]
|
||||
|
||||
print(f"[Hand] Running Swift Hand (Vision Framework)", file=sys.stderr)
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
|
||||
last_pct = -1
|
||||
for line in proc.stdout:
|
||||
line = line.strip()
|
||||
m = SWIFT_HAND_PROGRESS_RE.search(line)
|
||||
if m:
|
||||
pct = int(m.group(1))
|
||||
if pct > last_pct:
|
||||
last_pct = pct
|
||||
print(f"[Hand] Progress: {pct}%", file=sys.stderr)
|
||||
if publisher:
|
||||
publisher.progress("hand", pct, 100, f"{pct}%")
|
||||
elif line:
|
||||
print(f" {line}", file=sys.stderr)
|
||||
|
||||
stderr_output = proc.stderr.read()
|
||||
if stderr_output:
|
||||
print(stderr_output.strip(), file=sys.stderr)
|
||||
|
||||
proc.wait()
|
||||
|
||||
if proc.returncode != 0 or not os.path.exists(output_path):
|
||||
print(f"[Hand] Swift Hand failed (exit={proc.returncode})", file=sys.stderr)
|
||||
if publisher:
|
||||
publisher.error("hand", f"Swift Hand failed")
|
||||
return {"frame_count": 0, "fps": 0.0, "frames": []}
|
||||
|
||||
with open(output_path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Hand Processor (Swift Vision)")
|
||||
parser.add_argument("video_path")
|
||||
parser.add_argument("output_path")
|
||||
parser.add_argument("--uuid", "-u", default="")
|
||||
parser.add_argument("--sample-interval", type=int, default=3)
|
||||
args = parser.parse_args()
|
||||
|
||||
publisher = RedisPublisher(args.uuid) if args.uuid else None
|
||||
if publisher:
|
||||
publisher.info("hand", "HAND_START")
|
||||
|
||||
result = process_hand(args.video_path, args.output_path, args.uuid,
|
||||
args.sample_interval, publisher)
|
||||
with open(args.output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
print(f"Hand: {len(result.get('frames', []))} frames with hands")
|
||||
if publisher:
|
||||
publisher.complete("hand", f"{len(result.get('frames',[]))} frames")
|
||||
@@ -31,7 +31,7 @@ def process_pose(
|
||||
video_path: str,
|
||||
output_path: str,
|
||||
uuid: str = "",
|
||||
sample_interval: int = 30,
|
||||
sample_interval: int = 3, # Changed from 30 to match Face
|
||||
publisher: RedisPublisher = None,
|
||||
) -> dict:
|
||||
swift_bin = SWIFT_POSE_PATH
|
||||
@@ -134,7 +134,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument("video_path")
|
||||
parser.add_argument("output_path")
|
||||
parser.add_argument("--uuid", "-u", default="")
|
||||
parser.add_argument("--sample-interval", type=int, default=30)
|
||||
parser.add_argument("--sample-interval", type=int, default=3) # Changed from 30 to match Face
|
||||
args = parser.parse_args()
|
||||
|
||||
publisher = RedisPublisher(args.uuid) if args.uuid else None
|
||||
|
||||
@@ -118,5 +118,13 @@ let package = Package(
|
||||
path: ".",
|
||||
sources: ["swift_hand.swift"]
|
||||
),
|
||||
.executableTarget(
|
||||
name: "swift_face_pose",
|
||||
dependencies: [
|
||||
.product(name: "ArgumentParser", package: "swift-argument-parser"),
|
||||
],
|
||||
path: ".",
|
||||
sources: ["swift_face_pose.swift"]
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -13,8 +13,8 @@ struct SwiftFace: ParsableCommand {
|
||||
@Argument(help: "Output JSON path")
|
||||
var outputPath: String
|
||||
|
||||
@Option(name: .long, help: "Sample interval (frames, default=30)")
|
||||
var sampleInterval: Int = 30
|
||||
@Option(name: .long, help: "Sample interval (frames, default=3)")
|
||||
var sampleInterval: Int = 3
|
||||
|
||||
@Option(name: .long, help: "UUID for logging")
|
||||
var uuid: String = ""
|
||||
|
||||
@@ -318,8 +318,18 @@ struct SwiftFacePose: ParsableCommand {
|
||||
"fps": Double(fps),
|
||||
"frames": faceFrames,
|
||||
]
|
||||
if let faceJson = try? JSONSerialization.data(withJSONObject: faceOutputDict, options: []) {
|
||||
do {
|
||||
let faceJson = try JSONSerialization.data(withJSONObject: faceOutputDict, options: [])
|
||||
try faceJson.write(to: URL(fileURLWithPath: faceOutput))
|
||||
print("[SwiftFacePose] Face output written: \(faceOutput)")
|
||||
// Verify file exists
|
||||
if FileManager.default.fileExists(atPath: faceOutput) {
|
||||
print("[SwiftFacePose] Verified: file exists at \(faceOutput)")
|
||||
} else {
|
||||
print("[SwiftFacePose] ERROR: file not found after write!")
|
||||
}
|
||||
} catch {
|
||||
print("[SwiftFacePose] ERROR writing face output: \(error)")
|
||||
}
|
||||
|
||||
let poseOutputDict: [String: Any] = [
|
||||
|
||||
@@ -18,7 +18,7 @@ struct SwiftHandProcessor: ParsableCommand {
|
||||
var uuid: String = ""
|
||||
|
||||
@Option(name: [.short, .long], help: "Sample interval (frames)")
|
||||
var sampleInterval: Int = 30
|
||||
var sampleInterval: Int = 3
|
||||
|
||||
@Option(name: [.long], help: "Minimum confidence threshold")
|
||||
var minConfidence: Double = 0.3
|
||||
|
||||
@@ -26,8 +26,8 @@ struct SwiftPose: ParsableCommand {
|
||||
@Argument(help: "Output JSON path")
|
||||
var outputPath: String
|
||||
|
||||
@Option(name: .long, help: "Sample interval (frames, default=30)")
|
||||
var sampleInterval: Int = 30
|
||||
@Option(name: .long, help: "Sample interval (frames, default=3)")
|
||||
var sampleInterval: Int = 3
|
||||
|
||||
@Option(name: .long, help: "UUID for logging")
|
||||
var uuid: String = ""
|
||||
|
||||
Reference in New Issue
Block a user