chore: backup before migration to new repo
This commit is contained in:
@@ -65,12 +65,20 @@ def run_asr(video_path, output_path, uuid: str = ""):
|
||||
if publisher:
|
||||
publisher.info("asr", "Loading Whisper model...")
|
||||
|
||||
model = WhisperModel("tiny", device="cpu", compute_type="int8")
|
||||
# Use small model with CPU (MPS not supported by faster_whisper)
|
||||
# small 模型在準確率和速度間取得最佳平衡
|
||||
model = WhisperModel("small", device="cpu", compute_type="int8")
|
||||
|
||||
if publisher:
|
||||
publisher.info("asr", f"Transcribing: {video_path}")
|
||||
|
||||
segments, info = model.transcribe(video_path, beam_size=5)
|
||||
# Transcribe with VAD filter for better accuracy
|
||||
segments, info = model.transcribe(
|
||||
video_path,
|
||||
beam_size=5,
|
||||
vad_filter=True,
|
||||
vad_parameters=dict(min_silence_duration_ms=500, speech_pad_ms=200),
|
||||
)
|
||||
|
||||
if publisher:
|
||||
publisher.info("asr", f"ASR_LANGUAGE:{info.language}")
|
||||
|
||||
@@ -22,6 +22,7 @@ def process_asrx(video_path: str, output_path: str, uuid: str = ""):
|
||||
|
||||
try:
|
||||
import whisperx
|
||||
import torch
|
||||
except ImportError:
|
||||
if publisher:
|
||||
publisher.error("asrx", "whisperx not installed")
|
||||
@@ -36,6 +37,14 @@ def process_asrx(video_path: str, output_path: str, uuid: str = ""):
|
||||
publisher.info("asrx", "ASRX_LOADING_MODEL")
|
||||
|
||||
try:
|
||||
# Fix for PyTorch 2.6+ compatibility
|
||||
# Allow omegaconf types in torch.load
|
||||
import omegaconf
|
||||
|
||||
torch.serialization.add_safe_globals(
|
||||
[omegaconf.listconfig.ListConfig, omegaconf.dictconfig.DictConfig]
|
||||
)
|
||||
|
||||
# Load model - using faster-whisper for better performance
|
||||
# You can also use: "large-v3", "medium", "small", "base", "tiny"
|
||||
model = whisperx.load_model("base", device="cpu", compute_type="int8")
|
||||
@@ -54,9 +63,14 @@ def process_asrx(video_path: str, output_path: str, uuid: str = ""):
|
||||
|
||||
# Diarization (speaker segmentation)
|
||||
try:
|
||||
import whisperx
|
||||
from whisperx.diarize import DiarizationPipeline
|
||||
|
||||
diarize_model = whisperx.DiarizationPipeline(use_auth_token=None)
|
||||
# DiarizationPipeline parameters: model_name, token, device, cache_dir
|
||||
diarize_model = DiarizationPipeline(
|
||||
model_name="pyannote/speaker-diarization",
|
||||
token=None, # HuggingFace token (None for public models)
|
||||
device="cpu",
|
||||
)
|
||||
diarize_segments = diarize_model(video_path)
|
||||
|
||||
# Assign speaker labels
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Caption Processor - Generate image captions
|
||||
Uses AI vision models to analyze video frames and generate descriptions
|
||||
Caption Processor - Generate image captions (LOCAL ONLY)
|
||||
Uses Moondream2 (local VLM) for image captioning
|
||||
No cloud API calls - fully offline processing
|
||||
"""
|
||||
|
||||
import sys
|
||||
@@ -18,7 +19,6 @@ from redis_publisher import RedisPublisher
|
||||
def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]:
|
||||
"""Extract frames from video at regular intervals"""
|
||||
|
||||
# Get video duration
|
||||
cmd = [
|
||||
"ffprobe",
|
||||
"-v",
|
||||
@@ -34,14 +34,13 @@ def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]:
|
||||
data = json.loads(result.stdout)
|
||||
duration = float(data.get("format", {}).get("duration", 0))
|
||||
else:
|
||||
duration = 60 # Default fallback
|
||||
duration = 60
|
||||
except Exception:
|
||||
duration = 60
|
||||
|
||||
if duration <= 0:
|
||||
duration = 60
|
||||
|
||||
# Calculate frame interval
|
||||
interval = max(duration / max_frames, 1.0)
|
||||
|
||||
frames = []
|
||||
@@ -76,94 +75,73 @@ def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]:
|
||||
return frames
|
||||
|
||||
|
||||
def generate_caption_with_llava(
|
||||
def generate_caption_with_moondream(
|
||||
image_path: str, prompt: str = "Describe this image in detail."
|
||||
) -> Optional[str]:
|
||||
"""Generate caption using LLaVA model"""
|
||||
"""Generate caption using Moondream2 (local VLM)"""
|
||||
try:
|
||||
# Try to use transformers with LLaVA
|
||||
from transformers import AutoProcessor, AutoModelForVision2Seq # noqa: F401
|
||||
import torch # noqa: F401
|
||||
from PIL import Image # noqa: F401
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
from PIL import Image
|
||||
import torch
|
||||
|
||||
# Note: This requires llava-hf/llava-1.5-7b-hf or similar
|
||||
# For now, return a placeholder
|
||||
return f"[LLaVA caption for {os.path.basename(image_path)}]"
|
||||
model_id = "vikhyatk/moondream2"
|
||||
revision = "2025-01-09"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
model_id, revision=revision, trust_remote_code=True
|
||||
)
|
||||
moondream = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
revision=revision,
|
||||
trust_remote_code=True,
|
||||
torch_dtype=torch.float16,
|
||||
).to("mps" if torch.backends.mps.is_available() else "cpu")
|
||||
|
||||
moondream.eval()
|
||||
|
||||
image = Image.open(image_path)
|
||||
enc_image = moondream.encode_image(image)
|
||||
caption = moondream.answer_question(enc_image, prompt, tokenizer)
|
||||
|
||||
return caption if caption else None
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
|
||||
def generate_caption_with_gpt4v(image_path: str, api_key: str = None) -> Optional[str]:
|
||||
"""Generate caption using GPT-4V via OpenAI API"""
|
||||
import base64
|
||||
|
||||
if not api_key:
|
||||
api_key = os.environ.get("OPENAI_API_KEY")
|
||||
|
||||
if not api_key:
|
||||
return None
|
||||
|
||||
try:
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(api_key=api_key)
|
||||
|
||||
# Encode image
|
||||
with open(image_path, "rb") as f:
|
||||
img_data = base64.b64encode(f.read()).decode()
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4o", # or gpt-4-turbo for vision
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{img_data}"},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Describe what you see in this image in one sentence.",
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
return response.choices[0].message.content
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
print(f"[CAPTION] Moondream error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def generate_caption_fallback(image_path: str, existing_data: Dict = None) -> str:
|
||||
"""Generate a basic caption using available metadata"""
|
||||
def generate_caption_from_metadata(image_path: str, existing_data: Dict = None) -> str:
|
||||
"""Generate caption using YOLO/OCR metadata (fallback)"""
|
||||
|
||||
caption_parts = []
|
||||
|
||||
# Check YOLO data for objects
|
||||
if existing_data and existing_data.get("objects"):
|
||||
objects = list(set([o["class"] for o in existing_data["objects"]]))[:5]
|
||||
if objects:
|
||||
caption_parts.append(f"Contains: {', '.join(objects)}")
|
||||
caption_parts.append(f"Objects: {', '.join(objects)}")
|
||||
|
||||
# Check OCR data for text
|
||||
if existing_data and existing_data.get("texts"):
|
||||
texts = [t["text"] for t in existing_data["texts"] if t.get("text")]
|
||||
if texts:
|
||||
caption_parts.append(f"On-screen text: {' '.join(texts[:3])}")
|
||||
caption_parts.append(f"Text: {' '.join(texts[:3])}")
|
||||
|
||||
if existing_data and existing_data.get("scene_type"):
|
||||
caption_parts.append(f"Scene: {existing_data['scene_type']}")
|
||||
|
||||
if caption_parts:
|
||||
return " | ".join(caption_parts)
|
||||
|
||||
return "Video frame at timestamp"
|
||||
return "Video frame"
|
||||
|
||||
|
||||
def process_frame(
|
||||
frame_info: Dict, yolo_data: List = None, ocr_data: List = None
|
||||
frame_info: Dict,
|
||||
yolo_data: List = None,
|
||||
ocr_data: List = None,
|
||||
scene_data: Dict = None,
|
||||
) -> Dict:
|
||||
"""Process a single frame and generate caption"""
|
||||
"""Process a single frame and generate caption (LOCAL ONLY)"""
|
||||
|
||||
frame_path = frame_info["path"]
|
||||
timestamp = frame_info["timestamp"]
|
||||
@@ -171,28 +149,34 @@ def process_frame(
|
||||
caption = None
|
||||
source = "unknown"
|
||||
|
||||
# Try GPT-4V first
|
||||
caption = generate_caption_with_gpt4v(frame_path)
|
||||
# Try Moondream2 (local VLM)
|
||||
caption = generate_caption_with_moondream(frame_path)
|
||||
if caption:
|
||||
source = "gpt-4v"
|
||||
source = "moondream2"
|
||||
else:
|
||||
# Try LLaVA
|
||||
caption = generate_caption_with_llava(frame_path)
|
||||
if caption:
|
||||
source = "llava"
|
||||
else:
|
||||
# Use fallback with YOLO/OCR data
|
||||
combined_data = {"objects": [], "texts": []}
|
||||
if yolo_data:
|
||||
combined_data["objects"] = [
|
||||
o for o in yolo_data if o.get("timestamp") == timestamp
|
||||
]
|
||||
if ocr_data:
|
||||
combined_data["texts"] = [
|
||||
t for t in ocr_data if t.get("timestamp") == timestamp
|
||||
]
|
||||
caption = generate_caption_fallback(frame_path, combined_data)
|
||||
source = "metadata"
|
||||
# Fallback: Use metadata from YOLO/OCR/Scene
|
||||
combined_data = {"objects": [], "texts": [], "scene_type": ""}
|
||||
|
||||
if yolo_data:
|
||||
combined_data["objects"] = [
|
||||
o for o in yolo_data if o.get("timestamp") == timestamp
|
||||
]
|
||||
|
||||
if ocr_data:
|
||||
combined_data["texts"] = [
|
||||
t for t in ocr_data if t.get("timestamp") == timestamp
|
||||
]
|
||||
|
||||
if scene_data:
|
||||
for scene in scene_data.get("scenes", []):
|
||||
if scene.get("start_time", 0) <= timestamp <= scene.get("end_time", 0):
|
||||
combined_data["scene_type"] = scene.get(
|
||||
"scene_type_zh"
|
||||
) or scene.get("scene_type", "")
|
||||
break
|
||||
|
||||
caption = generate_caption_from_metadata(frame_path, combined_data)
|
||||
source = "metadata"
|
||||
|
||||
return {
|
||||
"index": frame_info["index"],
|
||||
@@ -212,24 +196,22 @@ def run_caption(
|
||||
if publisher:
|
||||
publisher.info("caption", "Extracting frames from video...")
|
||||
|
||||
# Extract frames
|
||||
frames = extract_frames(video_path, max_frames)
|
||||
|
||||
if publisher:
|
||||
publisher.info("caption", f"Extracted {len(frames)} frames")
|
||||
|
||||
# Load YOLO and OCR data for context
|
||||
base_path = os.path.dirname(output_path)
|
||||
uuid_name = os.path.basename(output_path).split(".")[0]
|
||||
|
||||
yolo_objects = []
|
||||
ocr_texts = []
|
||||
scene_info = {}
|
||||
|
||||
yolo_path = os.path.join(base_path, f"{uuid_name}.yolo.json")
|
||||
if os.path.exists(yolo_path):
|
||||
with open(yolo_path) as f:
|
||||
yolo_data = json.load(f)
|
||||
# Flatten objects from all frames
|
||||
for frame in yolo_data.get("frames", []):
|
||||
for obj in frame.get("objects", []):
|
||||
obj["timestamp"] = frame.get("timestamp", 0)
|
||||
@@ -244,7 +226,11 @@ def run_caption(
|
||||
text["timestamp"] = frame.get("timestamp", 0)
|
||||
ocr_texts.append(text)
|
||||
|
||||
# Process each frame
|
||||
scene_path = os.path.join(base_path, f"{uuid_name}.scene.json")
|
||||
if os.path.exists(scene_path):
|
||||
with open(scene_path) as f:
|
||||
scene_info = json.load(f)
|
||||
|
||||
captions = []
|
||||
for i, frame in enumerate(frames):
|
||||
if publisher and i % 5 == 0:
|
||||
@@ -252,16 +238,14 @@ def run_caption(
|
||||
"caption", i, len(frames), f"Frame {i + 1}/{len(frames)}"
|
||||
)
|
||||
|
||||
caption_data = process_frame(frame, yolo_objects, ocr_texts)
|
||||
caption_data = process_frame(frame, yolo_objects, ocr_texts, scene_info)
|
||||
captions.append(caption_data)
|
||||
|
||||
# Cleanup temp frame
|
||||
try:
|
||||
os.remove(frame["path"])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Cleanup temp directory
|
||||
temp_dir = os.path.join(os.path.dirname(video_path), ".caption_frames")
|
||||
try:
|
||||
os.rmdir(temp_dir)
|
||||
@@ -275,9 +259,11 @@ def run_caption(
|
||||
"summary": {
|
||||
"avg_caption_length": sum(len(c.get("caption", "")) for c in captions)
|
||||
/ max(len(captions), 1),
|
||||
"gpt4v_count": sum(1 for c in captions if c.get("source") == "gpt-4v"),
|
||||
"llava_count": sum(1 for c in captions if c.get("source") == "llava"),
|
||||
"moondream_count": sum(
|
||||
1 for c in captions if c.get("source") == "moondream2"
|
||||
),
|
||||
"metadata_count": sum(1 for c in captions if c.get("source") == "metadata"),
|
||||
"cloud_api_count": 0,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -285,13 +271,13 @@ def run_caption(
|
||||
json.dump(result, f, indent=2, ensure_ascii=False)
|
||||
|
||||
if publisher:
|
||||
publisher.complete("caption", f"{len(captions)} frames captioned")
|
||||
publisher.complete("caption", f"{len(captions)} frames captioned (LOCAL)")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Video Caption Generator")
|
||||
parser = argparse.ArgumentParser(description="Video Caption Generator (LOCAL ONLY)")
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
parser.add_argument("--uuid", help="UUID for progress tracking", default="")
|
||||
@@ -302,4 +288,4 @@ if __name__ == "__main__":
|
||||
args = parser.parse_args()
|
||||
|
||||
result = run_caption(args.video_path, args.output_path, args.uuid, args.max_frames)
|
||||
print(f"Caption generated: {result['total_frames']} frames")
|
||||
print(f"Caption generated: {result['total_frames']} frames (LOCAL)")
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Face Processor - Face Detection
|
||||
Uses OpenCV Haar Cascade (local, no extra download needed)
|
||||
Alternative: MediaPipe (requires model download)
|
||||
Face Processor - Face Detection & Demographics
|
||||
Uses InsightFace for detection, age, and gender analysis.
|
||||
Falls back to OpenCV Haar Cascade if InsightFace fails.
|
||||
"""
|
||||
|
||||
import sys
|
||||
@@ -15,7 +15,7 @@ from redis_publisher import RedisPublisher
|
||||
|
||||
|
||||
def process_face(video_path: str, output_path: str, uuid: str = ""):
|
||||
"""Process video for face detection"""
|
||||
"""Process video for face detection and demographics analysis"""
|
||||
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
@@ -23,56 +23,82 @@ def process_face(video_path: str, output_path: str, uuid: str = ""):
|
||||
|
||||
try:
|
||||
import cv2
|
||||
except ImportError:
|
||||
import numpy as np
|
||||
import insightface
|
||||
except ImportError as e:
|
||||
error_msg = f"Missing dependency: {e.name}"
|
||||
if publisher:
|
||||
publisher.error("face", "opencv-python not installed")
|
||||
publisher.error("face", error_msg)
|
||||
result = {"frame_count": 0, "fps": 0.0, "frames": []}
|
||||
if publisher:
|
||||
publisher.complete("face", "0 frames")
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
|
||||
if publisher:
|
||||
publisher.info("face", "FACE_LOADING_CASCADE")
|
||||
|
||||
# Try to use OpenCV's built-in Haar Cascade
|
||||
# This is included with OpenCV
|
||||
face_cascade = cv2.CascadeClassifier(
|
||||
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
||||
)
|
||||
|
||||
if face_cascade.empty():
|
||||
# 1. Initialize InsightFace
|
||||
use_insightface = False
|
||||
app = None
|
||||
try:
|
||||
if publisher:
|
||||
publisher.error("face", "Could not load Haar Cascade")
|
||||
result = {"frame_count": 0, "fps": 0.0, "frames": []}
|
||||
publisher.info("face", "LOADING_INSIGHTFACE")
|
||||
# 'buffalo_l' is a robust model. det_size can be adjusted.
|
||||
app = insightface.app.FaceAnalysis(
|
||||
name="buffalo_l", providers=["CPUExecutionProvider"]
|
||||
)
|
||||
app.prepare(ctx_id=0, det_size=(320, 320))
|
||||
use_insightface = True
|
||||
if publisher:
|
||||
publisher.complete("face", "0 frames")
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
publisher.info("face", "INSIGHTFACE_LOADED")
|
||||
except Exception as e:
|
||||
print(f"[WARNING] InsightFace failed to load: {e}")
|
||||
use_insightface = False
|
||||
|
||||
# 2. Fallback to Haar Cascade
|
||||
face_cascade = None
|
||||
if not use_insightface:
|
||||
if publisher:
|
||||
publisher.info("face", "LOADING_HAAR_CASCADE")
|
||||
face_cascade = cv2.CascadeClassifier(
|
||||
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
||||
)
|
||||
if face_cascade.empty():
|
||||
if publisher:
|
||||
publisher.error("face", "Could not load Haar Cascade")
|
||||
result = {"frame_count": 0, "fps": 0.0, "frames": []}
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
if publisher:
|
||||
publisher.info("face", "HAAR_CASCADE_LOADED")
|
||||
|
||||
if publisher:
|
||||
publisher.info("face", "FACE_CASCADE_LOADED")
|
||||
publisher.info("face", "PROCESSING_VIDEO")
|
||||
|
||||
# Get video info
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
if publisher:
|
||||
publisher.error("face", "Could not open video")
|
||||
result = {"frame_count": 0, "fps": 0.0, "frames": []}
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
cap.release()
|
||||
|
||||
# Optimization: Process every N frames to speed up analysis
|
||||
# Since we just need attributes for the person identity, we don't need every single frame.
|
||||
sample_interval = 30
|
||||
if total_frames > 0:
|
||||
estimated_samples = total_frames // sample_interval
|
||||
else:
|
||||
estimated_samples = 0
|
||||
|
||||
frame_count = 0
|
||||
processed_count = 0
|
||||
frames_data = []
|
||||
|
||||
if publisher:
|
||||
publisher.info("face", f"fps={fps}, frames={total_frames}")
|
||||
publisher.progress("face", 0, total_frames, "Starting")
|
||||
|
||||
# Process every N frames to speed up
|
||||
sample_interval = 30 # Process every 30 frames
|
||||
|
||||
frames = []
|
||||
frame_count = 0
|
||||
processed = 0
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
publisher.progress("face", 0, estimated_samples, "Starting")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
@@ -81,62 +107,92 @@ def process_face(video_path: str, output_path: str, uuid: str = ""):
|
||||
|
||||
frame_count += 1
|
||||
|
||||
# Sample frames
|
||||
# Sampling
|
||||
if frame_count % sample_interval != 0:
|
||||
continue
|
||||
|
||||
processed += 1
|
||||
processed_count += 1
|
||||
timestamp = (frame_count - 1) / fps if fps > 0 else 0
|
||||
|
||||
# Convert to grayscale
|
||||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Detect faces
|
||||
try:
|
||||
faces = face_cascade.detectMultiScale(
|
||||
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
|
||||
)
|
||||
except Exception as e:
|
||||
if publisher:
|
||||
publisher.error("face", f"Frame {frame_count}: {e}")
|
||||
faces = []
|
||||
|
||||
face_list = []
|
||||
for x, y, w, h in faces:
|
||||
face_list.append(
|
||||
{
|
||||
"face_id": None,
|
||||
"x": int(x),
|
||||
"y": int(y),
|
||||
"width": int(w),
|
||||
"height": int(h),
|
||||
"confidence": 0.8, # Haar cascade doesn't provide confidence
|
||||
}
|
||||
)
|
||||
|
||||
# Only add frames with faces
|
||||
try:
|
||||
if use_insightface and app:
|
||||
# InsightFace Detection & Analysis
|
||||
faces = app.get(frame)
|
||||
for face in faces:
|
||||
bbox = face.bbox.astype(int)
|
||||
bx, by, bw, bh = (
|
||||
bbox[0],
|
||||
bbox[1],
|
||||
bbox[2] - bbox[0],
|
||||
bbox[3] - bbox[1],
|
||||
)
|
||||
|
||||
# Extract Attributes
|
||||
age = int(face.age) if hasattr(face, "age") else None
|
||||
gender_val = face.gender if hasattr(face, "gender") else None
|
||||
gender = (
|
||||
"female"
|
||||
if gender_val == 0
|
||||
else ("male" if gender_val == 1 else None)
|
||||
)
|
||||
|
||||
face_list.append(
|
||||
{
|
||||
"x": int(bx),
|
||||
"y": int(by),
|
||||
"width": int(bw),
|
||||
"height": int(bh),
|
||||
"confidence": float(face.det_score)
|
||||
if hasattr(face, "det_score")
|
||||
else 0.9,
|
||||
"attributes": {"age": age, "gender": gender},
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Haar Cascade Fallback (No Age/Gender)
|
||||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
faces = face_cascade.detectMultiScale(
|
||||
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
|
||||
)
|
||||
for x, y, w, h in faces:
|
||||
face_list.append(
|
||||
{
|
||||
"x": int(x),
|
||||
"y": int(y),
|
||||
"width": int(w),
|
||||
"height": int(h),
|
||||
"confidence": 0.8,
|
||||
"attributes": {"age": None, "gender": None},
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Frame processing error: {e}")
|
||||
|
||||
if face_list:
|
||||
frames.append(
|
||||
frames_data.append(
|
||||
{
|
||||
"frame": frame_count - 1,
|
||||
"timestamp": round(timestamp, 3),
|
||||
"faces": face_list,
|
||||
}
|
||||
)
|
||||
|
||||
if publisher:
|
||||
publisher.progress(
|
||||
"face",
|
||||
processed,
|
||||
total_frames // sample_interval,
|
||||
processed_count,
|
||||
estimated_samples,
|
||||
f"Frame {frame_count}",
|
||||
)
|
||||
|
||||
cap.release()
|
||||
|
||||
result = {"frame_count": total_frames, "fps": fps, "frames": frames}
|
||||
result = {"frame_count": total_frames, "fps": fps, "frames": frames_data}
|
||||
|
||||
if publisher:
|
||||
publisher.complete("face", f"{len(frames)} frames with faces")
|
||||
publisher.complete("face", f"{len(frames_data)} frames processed")
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
@@ -145,7 +201,7 @@ def process_face(video_path: str, output_path: str, uuid: str = ""):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Face Detection")
|
||||
parser = argparse.ArgumentParser(description="Face Detection & Demographics")
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
||||
|
||||
@@ -1,382 +1,367 @@
|
||||
{
|
||||
"0": "airplane_cabin",
|
||||
"1": "airport_terminal",
|
||||
"2": "alley",
|
||||
"3": "amphitheater",
|
||||
"4": "amusement_park",
|
||||
"5": "apartment_building_outdoor",
|
||||
"6": "aquarium",
|
||||
"7": "arcade",
|
||||
"8": "arena_hockey",
|
||||
"9": "arena_performance",
|
||||
"10": "army_base",
|
||||
"11": "art_gallery",
|
||||
"12": "art_studio",
|
||||
"13": "assembly_line",
|
||||
"14": "athletic_field_outdoor",
|
||||
"15": "atrium_public",
|
||||
"16": "attic",
|
||||
"17": "auditorium",
|
||||
"18": "auto_factory",
|
||||
"19": "backyard",
|
||||
"20": "badminton_court_indoor",
|
||||
"21": "baggage_claim",
|
||||
"22": "bakery_shop",
|
||||
"23": "balcony_exterior",
|
||||
"24": "balcony_interior",
|
||||
"25": "ball_pit",
|
||||
"26": "ballroom",
|
||||
"27": "bamboo_forest",
|
||||
"28": "banquet_hall",
|
||||
"29": "bar",
|
||||
"30": "barn",
|
||||
"31": "barndoor",
|
||||
"32": "baseball_field",
|
||||
"33": "basement",
|
||||
"34": "basilica",
|
||||
"35": "basketball_court_indoor",
|
||||
"36": "basketball_court_outdoor",
|
||||
"37": "bathroom",
|
||||
"38": "bazaar_indoor",
|
||||
"39": "bazaar_outdoor",
|
||||
"40": "beach",
|
||||
"41": "beauty_salon",
|
||||
"42": "bedroom",
|
||||
"43": "berth",
|
||||
"44": "biology_laboratory",
|
||||
"45": "boardwalk",
|
||||
"46": "boat_deck",
|
||||
"47": "boathouse",
|
||||
"48": "bookstore",
|
||||
"49": "booth_indoor",
|
||||
"50": "botanical_garden",
|
||||
"51": "bow_window_indoor",
|
||||
"52": "bow_window_outdoor",
|
||||
"53": "bowling_alley",
|
||||
"54": "boxing_ring",
|
||||
"55": "brewery_indoor",
|
||||
"56": "bridge",
|
||||
"57": "building_facade",
|
||||
"58": "bullring",
|
||||
"59": "burial_chamber",
|
||||
"60": "bus_interior",
|
||||
"61": "bus_station_indoor",
|
||||
"62": "butchers_shop",
|
||||
"63": "butte",
|
||||
"64": "cabin_outdoor",
|
||||
"65": "cafeteria",
|
||||
"66": "campsite",
|
||||
"67": "campus",
|
||||
"68": "canal_natural",
|
||||
"69": "canal_urban",
|
||||
"70": "candy_store",
|
||||
"71": "canyon",
|
||||
"72": "car_interior",
|
||||
"73": "carrousel",
|
||||
"74": "castle",
|
||||
"75": "catacomb",
|
||||
"76": "cathedral_indoor",
|
||||
"77": "cathedral_outdoor",
|
||||
"78": "cavern_indoor",
|
||||
"79": "cemetery",
|
||||
"80": "chalet",
|
||||
"81": "cheese_factory",
|
||||
"82": "chemistry_lab",
|
||||
"83": "chicken_coop_indoor",
|
||||
"84": "chicken_coop_outdoor",
|
||||
"85": "childs_room",
|
||||
"86": "church_indoor",
|
||||
"87": "church_outdoor",
|
||||
"88": "classroom",
|
||||
"89": "clean_room",
|
||||
"90": "cliff",
|
||||
"91": "cloister_indoor",
|
||||
"92": "closet",
|
||||
"93": "clothing_store",
|
||||
"94": "coast",
|
||||
"95": "cockpit",
|
||||
"96": "coffee_shop",
|
||||
"97": "computer_room",
|
||||
"98": "conference_center",
|
||||
"99": "conference_room",
|
||||
"100": "construction_site",
|
||||
"101": "control_room",
|
||||
"102": "control_tower_outdoor",
|
||||
"103": "corn_field",
|
||||
"104": "corral",
|
||||
"105": "corridor",
|
||||
"106": "cottage_garden",
|
||||
"107": "courthouse",
|
||||
"108": "courtroom",
|
||||
"0": "airfield",
|
||||
"1": "airplane_cabin",
|
||||
"2": "airport_terminal",
|
||||
"3": "alcove",
|
||||
"4": "alley",
|
||||
"5": "amphitheater",
|
||||
"6": "amusement_arcade",
|
||||
"7": "amusement_park",
|
||||
"8": "outdoor",
|
||||
"9": "aquarium",
|
||||
"10": "aqueduct",
|
||||
"11": "arcade",
|
||||
"12": "arch",
|
||||
"13": "archaelogical_excavation",
|
||||
"14": "archive",
|
||||
"15": "hockey",
|
||||
"16": "performance",
|
||||
"17": "rodeo",
|
||||
"18": "army_base",
|
||||
"19": "art_gallery",
|
||||
"20": "art_school",
|
||||
"21": "art_studio",
|
||||
"22": "artists_loft",
|
||||
"23": "assembly_line",
|
||||
"24": "outdoor",
|
||||
"25": "public",
|
||||
"26": "attic",
|
||||
"27": "auditorium",
|
||||
"28": "auto_factory",
|
||||
"29": "auto_showroom",
|
||||
"30": "badlands",
|
||||
"31": "shop",
|
||||
"32": "exterior",
|
||||
"33": "interior",
|
||||
"34": "ball_pit",
|
||||
"35": "ballroom",
|
||||
"36": "bamboo_forest",
|
||||
"37": "bank_vault",
|
||||
"38": "banquet_hall",
|
||||
"39": "bar",
|
||||
"40": "barn",
|
||||
"41": "barndoor",
|
||||
"42": "baseball_field",
|
||||
"43": "basement",
|
||||
"44": "indoor",
|
||||
"45": "bathroom",
|
||||
"46": "indoor",
|
||||
"47": "outdoor",
|
||||
"48": "beach",
|
||||
"49": "beach_house",
|
||||
"50": "beauty_salon",
|
||||
"51": "bedchamber",
|
||||
"52": "bedroom",
|
||||
"53": "beer_garden",
|
||||
"54": "beer_hall",
|
||||
"55": "berth",
|
||||
"56": "biology_laboratory",
|
||||
"57": "boardwalk",
|
||||
"58": "boat_deck",
|
||||
"59": "boathouse",
|
||||
"60": "bookstore",
|
||||
"61": "indoor",
|
||||
"62": "botanical_garden",
|
||||
"63": "indoor",
|
||||
"64": "bowling_alley",
|
||||
"65": "boxing_ring",
|
||||
"66": "bridge",
|
||||
"67": "building_facade",
|
||||
"68": "bullring",
|
||||
"69": "burial_chamber",
|
||||
"70": "bus_interior",
|
||||
"71": "indoor",
|
||||
"72": "butchers_shop",
|
||||
"73": "butte",
|
||||
"74": "outdoor",
|
||||
"75": "cafeteria",
|
||||
"76": "campsite",
|
||||
"77": "campus",
|
||||
"78": "natural",
|
||||
"79": "urban",
|
||||
"80": "candy_store",
|
||||
"81": "canyon",
|
||||
"82": "car_interior",
|
||||
"83": "carrousel",
|
||||
"84": "castle",
|
||||
"85": "catacomb",
|
||||
"86": "cemetery",
|
||||
"87": "chalet",
|
||||
"88": "chemistry_lab",
|
||||
"89": "childs_room",
|
||||
"90": "indoor",
|
||||
"91": "outdoor",
|
||||
"92": "classroom",
|
||||
"93": "clean_room",
|
||||
"94": "cliff",
|
||||
"95": "closet",
|
||||
"96": "clothing_store",
|
||||
"97": "coast",
|
||||
"98": "cockpit",
|
||||
"99": "coffee_shop",
|
||||
"100": "computer_room",
|
||||
"101": "conference_center",
|
||||
"102": "conference_room",
|
||||
"103": "construction_site",
|
||||
"104": "corn_field",
|
||||
"105": "corral",
|
||||
"106": "corridor",
|
||||
"107": "cottage",
|
||||
"108": "courthouse",
|
||||
"109": "courtyard",
|
||||
"110": "covered_bridge_exterior",
|
||||
"111": "creek",
|
||||
"112": "crevasse",
|
||||
"113": "crosswalk",
|
||||
"114": "cubicle_office",
|
||||
"115": "dam",
|
||||
"116": "daycare_center",
|
||||
"117": "delicatessen",
|
||||
"118": "dentists_office",
|
||||
"119": "desert_sand",
|
||||
"120": "desert_vegetation",
|
||||
"121": "diner_indoor",
|
||||
"122": "diner_outdoor",
|
||||
"123": "dinette_home",
|
||||
"124": "dinette_vehicle",
|
||||
"125": "dining_car",
|
||||
"126": "dining_room",
|
||||
"127": "discotheque",
|
||||
"128": "dock",
|
||||
"129": "doorway_indoor",
|
||||
"130": "doorway_outdoor",
|
||||
"131": "dorm_room",
|
||||
"132": "driveway",
|
||||
"133": "driving_range_outdoor",
|
||||
"134": "drugstore",
|
||||
"135": "electrical_substation",
|
||||
"136": "elevator_door",
|
||||
"137": "elevator_escalator",
|
||||
"138": "elevator_interior",
|
||||
"139": "engine_room",
|
||||
"140": "escalator_indoor",
|
||||
"141": "excavation",
|
||||
"142": "factory_indoor",
|
||||
"143": "fairway",
|
||||
"144": "fastfood_restaurant",
|
||||
"145": "field_cultivated",
|
||||
"146": "field_wild",
|
||||
"147": "fire_escape",
|
||||
"148": "fire_station",
|
||||
"149": "firing_range_indoor",
|
||||
"150": "fishpond",
|
||||
"151": "florist_shop_indoor",
|
||||
"152": "food_court",
|
||||
"153": "forest_broadleaf",
|
||||
"154": "forest_needleleaf",
|
||||
"155": "forest_path",
|
||||
"156": "forest_road",
|
||||
"157": "formal_garden",
|
||||
"158": "fountain",
|
||||
"159": "galley",
|
||||
"160": "game_room",
|
||||
"161": "garage_indoor",
|
||||
"162": "garage_outdoor",
|
||||
"163": "garbage_dump",
|
||||
"164": "gas_station",
|
||||
"165": "gazebo_exterior",
|
||||
"166": "general_store_indoor",
|
||||
"167": "general_store_outdoor",
|
||||
"168": "gift_shop",
|
||||
"169": "golf_course",
|
||||
"170": "greenhouse_indoor",
|
||||
"171": "greenhouse_outdoor",
|
||||
"172": "gymnasium_indoor",
|
||||
"173": "hangar_indoor",
|
||||
"174": "hangar_outdoor",
|
||||
"175": "harbor",
|
||||
"176": "hardware_store",
|
||||
"177": "hayfield",
|
||||
"178": "heliport",
|
||||
"179": "herb_garden",
|
||||
"180": "highway",
|
||||
"181": "hill",
|
||||
"182": "home_office",
|
||||
"183": "hospital",
|
||||
"184": "hospital_room",
|
||||
"185": "hot_spring",
|
||||
"186": "hot_tub_outdoor",
|
||||
"187": "hotel",
|
||||
"188": "hotel_outdoor",
|
||||
"189": "hotel_room",
|
||||
"190": "house",
|
||||
"191": "hunting_lodge_outdoor",
|
||||
"192": "ice_cream_parlor",
|
||||
"193": "ice_floe",
|
||||
"194": "ice_shelf",
|
||||
"195": "ice_skating_rink_indoor",
|
||||
"196": "ice_skating_rink_outdoor",
|
||||
"197": "iceberg",
|
||||
"198": "igloo",
|
||||
"199": "industrial_area",
|
||||
"200": "inn_outdoor",
|
||||
"201": "islet",
|
||||
"202": "jacuzzi_indoor",
|
||||
"203": "jail_cell",
|
||||
"204": "jail_indoor",
|
||||
"205": "jewelry_shop",
|
||||
"206": "kasbah",
|
||||
"207": "kennel_indoor",
|
||||
"208": "kennel_outdoor",
|
||||
"209": "kindergarden_classroom",
|
||||
"210": "kitchen",
|
||||
"211": "kitchenette",
|
||||
"212": "labyrinth_outdoor",
|
||||
"213": "lake_natural",
|
||||
"214": "landfill",
|
||||
"215": "landing_deck",
|
||||
"216": "laundromat",
|
||||
"217": "lecture_room",
|
||||
"218": "library_indoor",
|
||||
"219": "library_outdoor",
|
||||
"220": "lido_deck_outdoor",
|
||||
"221": "lift_bridge",
|
||||
"222": "lighthouse",
|
||||
"223": "limousine_interior",
|
||||
"224": "living_room",
|
||||
"225": "loading_dock",
|
||||
"226": "lobby",
|
||||
"227": "lock_chamber",
|
||||
"228": "locker_room",
|
||||
"229": "mansion",
|
||||
"230": "manufactured_home",
|
||||
"231": "market_indoor",
|
||||
"232": "market_outdoor",
|
||||
"233": "marsh",
|
||||
"234": "martial_arts_gym",
|
||||
"235": "mausoleum",
|
||||
"236": "medina",
|
||||
"237": "moat_water",
|
||||
"238": "monastery_outdoor",
|
||||
"239": "mosque_indoor",
|
||||
"240": "mosque_outdoor",
|
||||
"241": "motel",
|
||||
"242": "mountain",
|
||||
"243": "mountain_path",
|
||||
"244": "mountain_snowy",
|
||||
"245": "movie_theater_indoor",
|
||||
"246": "museum_indoor",
|
||||
"247": "museum_outdoor",
|
||||
"248": "music_store",
|
||||
"249": "music_studio",
|
||||
"250": "nuclear_power_plant_outdoor",
|
||||
"251": "nursery",
|
||||
"252": "oast_house",
|
||||
"253": "observatory_indoor",
|
||||
"254": "observatory_outdoor",
|
||||
"255": "ocean",
|
||||
"256": "office",
|
||||
"257": "office_building",
|
||||
"258": "office_cubicles",
|
||||
"259": "oil_refinery_outdoor",
|
||||
"260": "oilrig",
|
||||
"261": "operating_room",
|
||||
"262": "orchard",
|
||||
"263": "outhouse_outdoor",
|
||||
"264": "pagoda",
|
||||
"265": "palace",
|
||||
"266": "pantry",
|
||||
"267": "park",
|
||||
"268": "parking_garage_indoor",
|
||||
"269": "parking_garage_outdoor",
|
||||
"270": "parking_lot",
|
||||
"271": "parlor",
|
||||
"272": "pasture",
|
||||
"273": "patio",
|
||||
"274": "pavilion",
|
||||
"275": "pharmacy",
|
||||
"276": "phone_booth",
|
||||
"277": "physics_laboratory",
|
||||
"278": "picnic_area",
|
||||
"279": "pilothouse_indoor",
|
||||
"280": "planetarium_indoor",
|
||||
"281": "playground",
|
||||
"282": "playroom",
|
||||
"283": "plaza",
|
||||
"284": "podium_indoor",
|
||||
"285": "podium_outdoor",
|
||||
"286": "pond",
|
||||
"287": "poolroom_home",
|
||||
"288": "poolroom_establishment",
|
||||
"289": "power_plant_outdoor",
|
||||
"290": "promenade_deck",
|
||||
"291": "pub_indoor",
|
||||
"292": "pulpit",
|
||||
"293": "putting_green",
|
||||
"294": "racecourse",
|
||||
"295": "raceway",
|
||||
"296": "raft",
|
||||
"297": "railroad_track",
|
||||
"298": "rainforest",
|
||||
"299": "reception",
|
||||
"300": "recreation_room",
|
||||
"301": "residential_neighborhood",
|
||||
"302": "restaurant",
|
||||
"303": "restaurant_kitchen",
|
||||
"304": "restaurant_patio",
|
||||
"305": "rice_paddy",
|
||||
"306": "riding_arena",
|
||||
"307": "river",
|
||||
"308": "rock_arch",
|
||||
"309": "rope_bridge",
|
||||
"310": "ruin",
|
||||
"311": "runway",
|
||||
"312": "sandbar",
|
||||
"313": "sandbox",
|
||||
"314": "sauna",
|
||||
"315": "schoolhouse",
|
||||
"316": "sea_cliff",
|
||||
"317": "server_room",
|
||||
"318": "shed",
|
||||
"319": "shoe_shop",
|
||||
"320": "shop_front",
|
||||
"321": "shopping_mall_indoor",
|
||||
"322": "shower",
|
||||
"323": "skatepark",
|
||||
"324": "ski_resort",
|
||||
"325": "ski_slope",
|
||||
"326": "sky",
|
||||
"327": "skyscraper",
|
||||
"328": "slum",
|
||||
"329": "snowfield",
|
||||
"330": "squash_court",
|
||||
"331": "stable",
|
||||
"332": "stadium_baseball",
|
||||
"333": "stadium_football",
|
||||
"334": "staircase",
|
||||
"335": "street",
|
||||
"336": "subway_interior",
|
||||
"337": "subway_station_platform",
|
||||
"338": "supermarket",
|
||||
"339": "sushi_bar",
|
||||
"340": "swamp",
|
||||
"341": "swimming_hole",
|
||||
"342": "swimming_pool_indoor",
|
||||
"343": "swimming_pool_outdoor",
|
||||
"344": "synagogue_indoor",
|
||||
"345": "synagogue_outdoor",
|
||||
"346": "television_room",
|
||||
"347": "television_studio",
|
||||
"348": "temple_asia",
|
||||
"349": "temple_europe",
|
||||
"350": "trench",
|
||||
"351": "underwater_coral_reef",
|
||||
"352": "utility_room",
|
||||
"353": "valley",
|
||||
"354": "van_interior",
|
||||
"355": "vegetable_garden",
|
||||
"356": "veranda",
|
||||
"357": "veterinarians_office",
|
||||
"358": "viaduct",
|
||||
"359": "videostore",
|
||||
"360": "village",
|
||||
"361": "vineyard",
|
||||
"362": "volcano",
|
||||
"363": "volleyball_court_indoor",
|
||||
"364": "volleyball_court_outdoor",
|
||||
"365": "waiting_room",
|
||||
"366": "warehouse_indoor",
|
||||
"367": "water_tower",
|
||||
"368": "waterfall_block",
|
||||
"369": "waterfall_fan",
|
||||
"370": "waterfall_plunge",
|
||||
"371": "wetland",
|
||||
"372": "wheat_field",
|
||||
"373": "wind_farm",
|
||||
"374": "windmill",
|
||||
"375": "wine_cellar_barrel_storage",
|
||||
"376": "wine_cellar_bottle_storage",
|
||||
"377": "wrestling_ring_indoor",
|
||||
"378": "yard",
|
||||
"379": "youth_hostel"
|
||||
"110": "creek",
|
||||
"111": "crevasse",
|
||||
"112": "crosswalk",
|
||||
"113": "dam",
|
||||
"114": "delicatessen",
|
||||
"115": "department_store",
|
||||
"116": "sand",
|
||||
"117": "vegetation",
|
||||
"118": "desert_road",
|
||||
"119": "outdoor",
|
||||
"120": "dining_hall",
|
||||
"121": "dining_room",
|
||||
"122": "discotheque",
|
||||
"123": "outdoor",
|
||||
"124": "dorm_room",
|
||||
"125": "downtown",
|
||||
"126": "dressing_room",
|
||||
"127": "driveway",
|
||||
"128": "drugstore",
|
||||
"129": "door",
|
||||
"130": "elevator_lobby",
|
||||
"131": "elevator_shaft",
|
||||
"132": "embassy",
|
||||
"133": "engine_room",
|
||||
"134": "entrance_hall",
|
||||
"135": "indoor",
|
||||
"136": "excavation",
|
||||
"137": "fabric_store",
|
||||
"138": "farm",
|
||||
"139": "fastfood_restaurant",
|
||||
"140": "cultivated",
|
||||
"141": "wild",
|
||||
"142": "field_road",
|
||||
"143": "fire_escape",
|
||||
"144": "fire_station",
|
||||
"145": "fishpond",
|
||||
"146": "indoor",
|
||||
"147": "indoor",
|
||||
"148": "food_court",
|
||||
"149": "football_field",
|
||||
"150": "broadleaf",
|
||||
"151": "forest_path",
|
||||
"152": "forest_road",
|
||||
"153": "formal_garden",
|
||||
"154": "fountain",
|
||||
"155": "galley",
|
||||
"156": "indoor",
|
||||
"157": "outdoor",
|
||||
"158": "gas_station",
|
||||
"159": "exterior",
|
||||
"160": "indoor",
|
||||
"161": "outdoor",
|
||||
"162": "gift_shop",
|
||||
"163": "glacier",
|
||||
"164": "golf_course",
|
||||
"165": "indoor",
|
||||
"166": "outdoor",
|
||||
"167": "grotto",
|
||||
"168": "indoor",
|
||||
"169": "indoor",
|
||||
"170": "outdoor",
|
||||
"171": "harbor",
|
||||
"172": "hardware_store",
|
||||
"173": "hayfield",
|
||||
"174": "heliport",
|
||||
"175": "highway",
|
||||
"176": "home_office",
|
||||
"177": "home_theater",
|
||||
"178": "hospital",
|
||||
"179": "hospital_room",
|
||||
"180": "hot_spring",
|
||||
"181": "outdoor",
|
||||
"182": "hotel_room",
|
||||
"183": "house",
|
||||
"184": "outdoor",
|
||||
"185": "ice_cream_parlor",
|
||||
"186": "ice_floe",
|
||||
"187": "ice_shelf",
|
||||
"188": "indoor",
|
||||
"189": "outdoor",
|
||||
"190": "iceberg",
|
||||
"191": "igloo",
|
||||
"192": "industrial_area",
|
||||
"193": "outdoor",
|
||||
"194": "islet",
|
||||
"195": "indoor",
|
||||
"196": "jail_cell",
|
||||
"197": "japanese_garden",
|
||||
"198": "jewelry_shop",
|
||||
"199": "junkyard",
|
||||
"200": "kasbah",
|
||||
"201": "outdoor",
|
||||
"202": "kindergarden_classroom",
|
||||
"203": "kitchen",
|
||||
"204": "lagoon",
|
||||
"205": "natural",
|
||||
"206": "landfill",
|
||||
"207": "landing_deck",
|
||||
"208": "laundromat",
|
||||
"209": "lawn",
|
||||
"210": "lecture_room",
|
||||
"211": "legislative_chamber",
|
||||
"212": "indoor",
|
||||
"213": "outdoor",
|
||||
"214": "lighthouse",
|
||||
"215": "living_room",
|
||||
"216": "loading_dock",
|
||||
"217": "lobby",
|
||||
"218": "lock_chamber",
|
||||
"219": "locker_room",
|
||||
"220": "mansion",
|
||||
"221": "manufactured_home",
|
||||
"222": "indoor",
|
||||
"223": "outdoor",
|
||||
"224": "marsh",
|
||||
"225": "martial_arts_gym",
|
||||
"226": "mausoleum",
|
||||
"227": "medina",
|
||||
"228": "mezzanine",
|
||||
"229": "water",
|
||||
"230": "outdoor",
|
||||
"231": "motel",
|
||||
"232": "mountain",
|
||||
"233": "mountain_path",
|
||||
"234": "mountain_snowy",
|
||||
"235": "indoor",
|
||||
"236": "indoor",
|
||||
"237": "outdoor",
|
||||
"238": "music_studio",
|
||||
"239": "natural_history_museum",
|
||||
"240": "nursery",
|
||||
"241": "nursing_home",
|
||||
"242": "oast_house",
|
||||
"243": "ocean",
|
||||
"244": "office",
|
||||
"245": "office_building",
|
||||
"246": "office_cubicles",
|
||||
"247": "oilrig",
|
||||
"248": "operating_room",
|
||||
"249": "orchard",
|
||||
"250": "orchestra_pit",
|
||||
"251": "pagoda",
|
||||
"252": "palace",
|
||||
"253": "pantry",
|
||||
"254": "park",
|
||||
"255": "indoor",
|
||||
"256": "outdoor",
|
||||
"257": "parking_lot",
|
||||
"258": "pasture",
|
||||
"259": "patio",
|
||||
"260": "pavilion",
|
||||
"261": "pet_shop",
|
||||
"262": "pharmacy",
|
||||
"263": "phone_booth",
|
||||
"264": "physics_laboratory",
|
||||
"265": "picnic_area",
|
||||
"266": "pier",
|
||||
"267": "pizzeria",
|
||||
"268": "playground",
|
||||
"269": "playroom",
|
||||
"270": "plaza",
|
||||
"271": "pond",
|
||||
"272": "porch",
|
||||
"273": "promenade",
|
||||
"274": "indoor",
|
||||
"275": "racecourse",
|
||||
"276": "raceway",
|
||||
"277": "raft",
|
||||
"278": "railroad_track",
|
||||
"279": "rainforest",
|
||||
"280": "reception",
|
||||
"281": "recreation_room",
|
||||
"282": "repair_shop",
|
||||
"283": "residential_neighborhood",
|
||||
"284": "restaurant",
|
||||
"285": "restaurant_kitchen",
|
||||
"286": "restaurant_patio",
|
||||
"287": "rice_paddy",
|
||||
"288": "river",
|
||||
"289": "rock_arch",
|
||||
"290": "roof_garden",
|
||||
"291": "rope_bridge",
|
||||
"292": "ruin",
|
||||
"293": "runway",
|
||||
"294": "sandbox",
|
||||
"295": "sauna",
|
||||
"296": "schoolhouse",
|
||||
"297": "science_museum",
|
||||
"298": "server_room",
|
||||
"299": "shed",
|
||||
"300": "shoe_shop",
|
||||
"301": "shopfront",
|
||||
"302": "indoor",
|
||||
"303": "shower",
|
||||
"304": "ski_resort",
|
||||
"305": "ski_slope",
|
||||
"306": "sky",
|
||||
"307": "skyscraper",
|
||||
"308": "slum",
|
||||
"309": "snowfield",
|
||||
"310": "soccer_field",
|
||||
"311": "stable",
|
||||
"312": "baseball",
|
||||
"313": "football",
|
||||
"314": "soccer",
|
||||
"315": "indoor",
|
||||
"316": "outdoor",
|
||||
"317": "staircase",
|
||||
"318": "storage_room",
|
||||
"319": "street",
|
||||
"320": "platform",
|
||||
"321": "supermarket",
|
||||
"322": "sushi_bar",
|
||||
"323": "swamp",
|
||||
"324": "swimming_hole",
|
||||
"325": "indoor",
|
||||
"326": "outdoor",
|
||||
"327": "outdoor",
|
||||
"328": "television_room",
|
||||
"329": "television_studio",
|
||||
"330": "asia",
|
||||
"331": "throne_room",
|
||||
"332": "ticket_booth",
|
||||
"333": "topiary_garden",
|
||||
"334": "tower",
|
||||
"335": "toyshop",
|
||||
"336": "train_interior",
|
||||
"337": "platform",
|
||||
"338": "tree_farm",
|
||||
"339": "tree_house",
|
||||
"340": "trench",
|
||||
"341": "tundra",
|
||||
"342": "ocean_deep",
|
||||
"343": "utility_room",
|
||||
"344": "valley",
|
||||
"345": "vegetable_garden",
|
||||
"346": "veterinarians_office",
|
||||
"347": "viaduct",
|
||||
"348": "village",
|
||||
"349": "vineyard",
|
||||
"350": "volcano",
|
||||
"351": "outdoor",
|
||||
"352": "waiting_room",
|
||||
"353": "water_park",
|
||||
"354": "water_tower",
|
||||
"355": "waterfall",
|
||||
"356": "watering_hole",
|
||||
"357": "wave",
|
||||
"358": "wet_bar",
|
||||
"359": "wheat_field",
|
||||
"360": "wind_farm",
|
||||
"361": "windmill",
|
||||
"362": "yard",
|
||||
"363": "youth_hostel",
|
||||
"364": "zen_garden"
|
||||
}
|
||||
@@ -162,9 +162,13 @@ class SceneClassifier:
|
||||
model_path: Core ML 模型路徑 (可選)
|
||||
"""
|
||||
self.model_path = model_path
|
||||
self.places365_model_path = (
|
||||
"/Users/accusys/momentry/models/resnet18_places365.pth.tar"
|
||||
)
|
||||
self.model = None
|
||||
self.coreml_model = None
|
||||
self.transform = None
|
||||
self.model_type = "unknown"
|
||||
|
||||
# 圖像預處理
|
||||
self.transform = transforms.Compose(
|
||||
@@ -189,23 +193,57 @@ class SceneClassifier:
|
||||
try:
|
||||
print(f"[SCENE] Loading Core ML model: {self.model_path}")
|
||||
self.coreml_model = ct.models.MLModel(self.model_path)
|
||||
self.model_type = "coreml"
|
||||
print("[SCENE] Core ML model loaded successfully")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"[SCENE] Warning: Failed to load Core ML model: {e}")
|
||||
|
||||
# 備案:使用 PyTorch + ResNet
|
||||
# 備案:使用 PyTorch + Places365
|
||||
if HAS_TORCH:
|
||||
try:
|
||||
print(f"[SCENE] Loading PyTorch model on {DEVICE}")
|
||||
# 使用預訓練的 ResNet18
|
||||
self.model = models.resnet18(pretrained=True)
|
||||
|
||||
# 檢查 Places365 模型是否存在
|
||||
if Path(self.places365_model_path).exists():
|
||||
print(
|
||||
f"[SCENE] Loading Places365 model: {self.places365_model_path}"
|
||||
)
|
||||
checkpoint = torch.load(
|
||||
self.places365_model_path, map_location=DEVICE
|
||||
)
|
||||
|
||||
# 建立 ResNet18 模型 (Places365 有 365 個類別)
|
||||
self.model = models.resnet18(num_classes=365)
|
||||
|
||||
# 移除 'module.' prefix (DataParallel training)
|
||||
state_dict = checkpoint["state_dict"]
|
||||
new_state_dict = {}
|
||||
for k, v in state_dict.items():
|
||||
if k.startswith("module."):
|
||||
new_state_dict[k[7:]] = v
|
||||
else:
|
||||
new_state_dict[k] = v
|
||||
|
||||
self.model.load_state_dict(new_state_dict)
|
||||
self.model_type = "places365"
|
||||
print("[SCENE] Places365 model loaded successfully (365 classes)")
|
||||
else:
|
||||
print(
|
||||
f"[SCENE] Places365 model not found, using ImageNet pretrained"
|
||||
)
|
||||
self.model = models.resnet18(pretrained=True)
|
||||
self.model_type = "imagenet"
|
||||
|
||||
self.model.to(DEVICE)
|
||||
self.model.eval()
|
||||
print("[SCENE] PyTorch model loaded successfully")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"[SCENE] Warning: Failed to load PyTorch model: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
||||
print("[SCENE] Error: No model available")
|
||||
return False
|
||||
|
||||
@@ -1,12 +1,8 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Story Processor - Generate parent-child chunk hierarchy for RAG
|
||||
Uses video analysis (ASR, YOLO, OCR) to create parent chunks that summarize child chunks.
|
||||
|
||||
Parent-Child Chunk Strategy:
|
||||
- Parent chunks: Summarize multiple scenes/segments with narrative description
|
||||
- Child chunks: Individual ASR segments, OCR texts, detected objects
|
||||
- When embedding: Parent description + Child content for better retrieval
|
||||
Uses LOCAL video analysis (ASR, YOLO, OCR, Scene) to create parent chunks.
|
||||
NO cloud API calls - fully offline processing
|
||||
"""
|
||||
|
||||
import sys
|
||||
@@ -47,57 +43,59 @@ def generate_parent_child_chunks(
|
||||
cut_data: Dict,
|
||||
yolo_data: Dict,
|
||||
ocr_data: Dict,
|
||||
scene_data: Dict,
|
||||
parent_chunk_size: int = 5,
|
||||
) -> Dict[str, Any]:
|
||||
) -> Dict:
|
||||
"""
|
||||
Generate parent-child chunk hierarchy.
|
||||
|
||||
Parent chunks summarize multiple child chunks for better RAG retrieval.
|
||||
Child chunks are individual segments from ASR, scenes from CUT, etc.
|
||||
Generate parent-child chunk hierarchy using LOCAL data only.
|
||||
No LLM/API calls - uses template-based narrative generation.
|
||||
"""
|
||||
|
||||
child_chunks = []
|
||||
parent_chunks = []
|
||||
|
||||
# Get source data
|
||||
asr_segments = asr_data.get("segments", [])
|
||||
cut_scenes = cut_data.get("scenes", [])
|
||||
yolo_frames = yolo_data.get("frames", [])
|
||||
_ocr_frames = ocr_data.get("frames", [])
|
||||
|
||||
# Create child chunks from ASR segments
|
||||
asr_child_ids = []
|
||||
for i, seg in enumerate(asr_segments):
|
||||
child_chunk = {
|
||||
"chunk_id": f"asr_{i:04d}",
|
||||
"chunk_type": "sentence",
|
||||
"source": "asr",
|
||||
"start_time": seg.get("start", 0),
|
||||
"end_time": seg.get("end", 0),
|
||||
"text_content": seg.get("text", ""),
|
||||
"content": seg,
|
||||
"child_chunk_ids": [],
|
||||
"parent_chunk_id": None,
|
||||
}
|
||||
child_chunks.append(child_chunk)
|
||||
asr_child_ids.append(child_chunk["chunk_id"])
|
||||
# Create child chunks from ASR
|
||||
for seg in asr_data.get("segments", []):
|
||||
child_chunks.append(
|
||||
{
|
||||
"chunk_id": f"asr_{seg.get('start', 0):.1f}_{seg.get('end', 0):.1f}",
|
||||
"chunk_type": "asr",
|
||||
"source": "asr",
|
||||
"start_time": seg.get("start", 0),
|
||||
"end_time": seg.get("end", 0),
|
||||
"text_content": seg.get("text", ""),
|
||||
"content": {
|
||||
"text": seg.get("text", ""),
|
||||
"confidence": seg.get("confidence", 0),
|
||||
},
|
||||
"child_chunk_ids": [],
|
||||
"parent_chunk_id": None,
|
||||
}
|
||||
)
|
||||
|
||||
# Create child chunks from CUT scenes
|
||||
cut_child_ids = []
|
||||
for i, scene in enumerate(cut_scenes):
|
||||
child_chunk = {
|
||||
"chunk_id": f"cut_{i:04d}",
|
||||
"chunk_type": "cut",
|
||||
"source": "cut",
|
||||
"start_time": scene.get("start_time", scene.get("start", 0)),
|
||||
"end_time": scene.get("end_time", scene.get("end", 0)),
|
||||
"text_content": None,
|
||||
"content": scene,
|
||||
"child_chunk_ids": [],
|
||||
"parent_chunk_id": None,
|
||||
}
|
||||
child_chunks.append(child_chunk)
|
||||
cut_child_ids.append(child_chunk["chunk_id"])
|
||||
for scene in cut_data.get("scenes", []):
|
||||
child_chunks.append(
|
||||
{
|
||||
"chunk_id": f"cut_{scene.get('scene_number', 0)}",
|
||||
"chunk_type": "cut",
|
||||
"source": "cut",
|
||||
"start_time": scene.get("start_time", 0),
|
||||
"end_time": scene.get("end_time", 0),
|
||||
"text_content": f"Scene {scene.get('scene_number', 0)}",
|
||||
"content": {
|
||||
"scene_number": scene.get("scene_number", 0),
|
||||
"duration": scene.get("duration", 0),
|
||||
},
|
||||
"child_chunk_ids": [],
|
||||
"parent_chunk_id": None,
|
||||
}
|
||||
)
|
||||
|
||||
asr_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "asr"]
|
||||
cut_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "cut"]
|
||||
|
||||
yolo_frames = yolo_data.get("frames", [])
|
||||
ocr_frames = ocr_data.get("frames", [])
|
||||
|
||||
# Group ASR segments into parent chunks
|
||||
for i in range(0, len(asr_child_ids), parent_chunk_size):
|
||||
@@ -105,7 +103,6 @@ def generate_parent_child_chunks(
|
||||
if not batch:
|
||||
continue
|
||||
|
||||
# Collect text from child chunks
|
||||
batch_texts = []
|
||||
batch_objects = []
|
||||
batch_times = []
|
||||
@@ -118,11 +115,16 @@ def generate_parent_child_chunks(
|
||||
batch_times.append((child["start_time"], child["end_time"]))
|
||||
break
|
||||
|
||||
# Create parent chunk with narrative description
|
||||
start_time = batch_times[0][0] if batch_times else 0
|
||||
end_time = batch_times[-1][1] if batch_times else 0
|
||||
|
||||
# Generate narrative description
|
||||
# Find objects in this time range
|
||||
for frame in yolo_frames[:50]:
|
||||
ts = frame.get("timestamp", 0)
|
||||
if start_time <= ts <= end_time:
|
||||
for obj in frame.get("objects", []):
|
||||
batch_objects.append(obj.get("class_name", "unknown"))
|
||||
|
||||
narrative = generate_narrative(batch_texts, batch_objects, start_time, end_time)
|
||||
|
||||
parent_chunk = {
|
||||
@@ -136,13 +138,13 @@ def generate_parent_child_chunks(
|
||||
"description": narrative,
|
||||
"child_count": len(batch),
|
||||
"speech_preview": " ".join(batch_texts[:3]) if batch_texts else None,
|
||||
"detected_objects": list(set(batch_objects))[:5],
|
||||
},
|
||||
"child_chunk_ids": batch,
|
||||
"parent_chunk_id": None,
|
||||
}
|
||||
parent_chunks.append(parent_chunk)
|
||||
|
||||
# Update child chunks with parent reference
|
||||
for child_id in batch:
|
||||
for child in child_chunks:
|
||||
if child["chunk_id"] == child_id:
|
||||
@@ -167,14 +169,12 @@ def generate_parent_child_chunks(
|
||||
start_time = batch_times[0][0] if batch_times else 0
|
||||
end_time = batch_times[-1][1] if batch_times else 0
|
||||
|
||||
# Find objects in this time range from YOLO
|
||||
for frame in yolo_frames[:100]: # Sample frames
|
||||
for frame in yolo_frames[:50]:
|
||||
ts = frame.get("timestamp", 0)
|
||||
if start_time <= ts <= end_time:
|
||||
for obj in frame.get("objects", []):
|
||||
batch_objects.append(obj.get("class_name", "unknown"))
|
||||
|
||||
# Generate scene narrative
|
||||
narrative = generate_scene_narrative(
|
||||
batch_objects, start_time, end_time, len(batch)
|
||||
)
|
||||
@@ -190,14 +190,13 @@ def generate_parent_child_chunks(
|
||||
"description": narrative,
|
||||
"child_count": len(batch),
|
||||
"scenes": batch,
|
||||
"detected_objects": list(set(batch_objects))[:10],
|
||||
"detected_objects": list(set(batch_objects))[:5],
|
||||
},
|
||||
"child_chunk_ids": batch,
|
||||
"parent_chunk_id": None,
|
||||
}
|
||||
parent_chunks.append(parent_chunk)
|
||||
|
||||
# Update child chunks with parent reference
|
||||
for child_id in batch:
|
||||
for child in child_chunks:
|
||||
if child["chunk_id"] == child_id:
|
||||
@@ -219,27 +218,33 @@ def generate_parent_child_chunks(
|
||||
def generate_narrative(
|
||||
texts: List[str], objects: List[str], start: float, end: float
|
||||
) -> str:
|
||||
"""Generate narrative description from text snippets"""
|
||||
if not texts:
|
||||
"""Generate narrative description from LOCAL text snippets and objects"""
|
||||
if not texts and not objects:
|
||||
return f"Video segment from {start:.1f}s to {end:.1f}s"
|
||||
|
||||
# Combine and summarize
|
||||
combined = " ".join(texts)
|
||||
if len(combined) > 200:
|
||||
combined = combined[:200] + "..."
|
||||
parts = []
|
||||
if texts:
|
||||
combined = " ".join(texts[:5])
|
||||
if len(combined) > 150:
|
||||
combined = combined[:150] + "..."
|
||||
parts.append(f"Speech: {combined}")
|
||||
|
||||
return f"[{start:.0f}s-{end:.0f}s] {combined}"
|
||||
if objects:
|
||||
unique_objs = list(set(objects))[:5]
|
||||
parts.append(f"Visuals: {', '.join(unique_objs)}")
|
||||
|
||||
return f"[{start:.0f}s-{end:.0f}s] {' | '.join(parts)}"
|
||||
|
||||
|
||||
def generate_scene_narrative(
|
||||
objects: List[str], start: float, end: float, scene_count: int
|
||||
) -> str:
|
||||
"""Generate scene narrative from detected objects"""
|
||||
"""Generate scene narrative from LOCAL detected objects"""
|
||||
unique_objects = list(set(objects))[:5]
|
||||
|
||||
if unique_objects:
|
||||
obj_str = ", ".join(unique_objects)
|
||||
return f"[{start:.0f}s-{end:.0f}s] Scenes {scene_count} segments. Visual: {obj_str}."
|
||||
return f"[{start:.0f}s-{end:.0f}s] {scene_count} scenes. Visuals: {obj_str}."
|
||||
else:
|
||||
return f"[{start:.0f}s-{end:.0f}s] {scene_count} video scenes."
|
||||
|
||||
@@ -251,70 +256,45 @@ def run_story(
|
||||
if publisher:
|
||||
publisher.info("story", "STORY_START")
|
||||
|
||||
# Load existing JSON files
|
||||
base_path = os.path.dirname(output_path)
|
||||
uuid_name = os.path.basename(output_path).split(".")[0]
|
||||
|
||||
# Load analysis data
|
||||
asr_data = {"segments": []}
|
||||
cut_data = {"scenes": []}
|
||||
yolo_data = {"frames": []}
|
||||
ocr_data = {"frames": []}
|
||||
scene_data = {"scenes": []}
|
||||
|
||||
# Load ASR
|
||||
asr_path = os.path.join(base_path, f"{uuid_name}.asr.json")
|
||||
if os.path.exists(asr_path):
|
||||
with open(asr_path) as f:
|
||||
asr_data = json.load(f)
|
||||
if publisher:
|
||||
publisher.info(
|
||||
"story", f"Loaded ASR: {len(asr_data.get('segments', []))} segments"
|
||||
)
|
||||
for name, data_var in [
|
||||
("asr", asr_data),
|
||||
("cut", cut_data),
|
||||
("yolo", yolo_data),
|
||||
("ocr", ocr_data),
|
||||
("scene", scene_data),
|
||||
]:
|
||||
path = os.path.join(base_path, f"{uuid_name}.{name}.json")
|
||||
if os.path.exists(path):
|
||||
with open(path) as f:
|
||||
data_var.update(json.load(f))
|
||||
|
||||
# Load CUT
|
||||
cut_path = os.path.join(base_path, f"{uuid_name}.cut.json")
|
||||
if os.path.exists(cut_path):
|
||||
with open(cut_path) as f:
|
||||
cut_data = json.load(f)
|
||||
if publisher:
|
||||
publisher.info(
|
||||
"story", f"Loaded CUT: {len(cut_data.get('scenes', []))} scenes"
|
||||
)
|
||||
|
||||
# Load YOLO
|
||||
yolo_path = os.path.join(base_path, f"{uuid_name}.yolo.json")
|
||||
if os.path.exists(yolo_path):
|
||||
with open(yolo_path) as f:
|
||||
yolo_data = json.load(f)
|
||||
|
||||
# Load OCR
|
||||
ocr_path = os.path.join(base_path, f"{uuid_name}.ocr.json")
|
||||
if os.path.exists(ocr_path):
|
||||
with open(ocr_path) as f:
|
||||
ocr_data = json.load(f)
|
||||
|
||||
# Load metadata
|
||||
metadata = extract_video_metadata(video_path)
|
||||
|
||||
if publisher:
|
||||
publisher.info("story", "Generating parent-child chunks...")
|
||||
|
||||
# Generate parent-child hierarchy
|
||||
result = generate_parent_child_chunks(
|
||||
asr_data, cut_data, yolo_data, ocr_data, parent_chunk_size
|
||||
asr_data, cut_data, yolo_data, ocr_data, scene_data, parent_chunk_size
|
||||
)
|
||||
|
||||
result["metadata"] = metadata
|
||||
result["parent_chunk_size"] = parent_chunk_size
|
||||
result["video_metadata"] = extract_video_metadata(video_path)
|
||||
result["processing"] = {
|
||||
"method": "local_aggregation",
|
||||
"cloud_api_used": False,
|
||||
"parent_chunk_size": parent_chunk_size,
|
||||
}
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2, ensure_ascii=False)
|
||||
|
||||
if publisher:
|
||||
stats = result["stats"]
|
||||
publisher.complete(
|
||||
"story",
|
||||
f"{stats['total_parent_chunks']} parents, {stats['total_child_chunks']} children",
|
||||
f"{result['stats']['total_parent_chunks']} parent, {result['stats']['total_child_chunks']} child chunks (LOCAL)",
|
||||
)
|
||||
|
||||
return result
|
||||
@@ -322,7 +302,7 @@ def run_story(
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Video Story Generator - Parent-Child Chunks"
|
||||
description="Story Processor - Parent-Child Chunk Hierarchy (LOCAL ONLY)"
|
||||
)
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
@@ -331,7 +311,7 @@ if __name__ == "__main__":
|
||||
"--parent-chunk-size",
|
||||
type=int,
|
||||
default=5,
|
||||
help="Number of child chunks per parent chunk",
|
||||
help="Number of child chunks per parent",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -340,6 +320,6 @@ if __name__ == "__main__":
|
||||
args.video_path, args.output_path, args.uuid, args.parent_chunk_size
|
||||
)
|
||||
print(
|
||||
f"Story generated: {result['stats']['total_parent_chunks']} parent chunks, "
|
||||
f"{result['stats']['total_child_chunks']} child chunks"
|
||||
f"Story generated: {result['stats']['total_parent_chunks']} parent, "
|
||||
f"{result['stats']['total_child_chunks']} child chunks (LOCAL)"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user