fix: ASRX duplication, TKG edges, trace ingest, and add pipeline progress publishing
- ASRX handler no longer stores duplicate 'asr' pre_chunks - Pre_chunks storage made idempotent (delete-before-insert) - Rule 1 + trace_ingest changed to query 'asrx' not 'asr' - Trace chunks removed (dynamic from TKG/Qdrant) - TKG scroll_face_points fixed: trace_id >= 1 (not == 1) - TKG AsrxSegmentEntry: start/end -> start_time/end_time (match ASRX JSON) - Unregister error handling: log instead of silent discard - Add publish_pipeline_progress calls at each pipeline stage (processors, rule1, face_trace, identity_agent, TKG, rule2, completion)
This commit is contained in:
+124
-63
@@ -1,91 +1,152 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
CUT Processor - Scene Detection
|
||||
Uses PySceneDetect for scene detection (local)
|
||||
CUT Processor - Scene Detection & Video Quality Check
|
||||
Uses ffprobe for video analysis. Always produces at least 1 scene.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
|
||||
def get_video_info(video_path: str) -> dict:
|
||||
"""Get video info via ffprobe"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ffprobe", "-v", "quiet", "-print_format", "json",
|
||||
"-show_format", "-show_streams", video_path],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
info = json.loads(result.stdout)
|
||||
for stream in info.get("streams", []):
|
||||
if stream.get("codec_type") == "video":
|
||||
nb_frames = stream.get("nb_frames")
|
||||
if nb_frames:
|
||||
fr = stream.get("r_frame_rate", "0/1")
|
||||
fps = eval(fr) if "/" in fr else float(fr)
|
||||
return {
|
||||
"frame_count": int(nb_frames),
|
||||
"fps": fps,
|
||||
"duration": float(stream.get("duration", 0)),
|
||||
"width": int(stream.get("width", 0)),
|
||||
"height": int(stream.get("height", 0)),
|
||||
"codec": stream.get("codec_name", ""),
|
||||
}
|
||||
dur = float(stream.get("duration", 0))
|
||||
afr = stream.get("avg_frame_rate", "0/1")
|
||||
avg_fps = eval(afr) if "/" in afr else float(afr)
|
||||
if dur > 0 and avg_fps > 0:
|
||||
return {
|
||||
"frame_count": int(dur * avg_fps),
|
||||
"fps": avg_fps,
|
||||
"duration": dur,
|
||||
"width": int(stream.get("width", 0)),
|
||||
"height": int(stream.get("height", 0)),
|
||||
"codec": stream.get("codec_name", ""),
|
||||
}
|
||||
return {
|
||||
"frame_count": 0, "fps": 0.0, "duration": dur,
|
||||
"width": 0, "height": 0, "codec": "",
|
||||
}
|
||||
return {"frame_count": 0, "fps": 0.0, "duration": 0, "width": 0, "height": 0, "codec": ""}
|
||||
except Exception:
|
||||
return {"frame_count": 0, "fps": 0.0, "duration": 0, "width": 0, "height": 0, "codec": ""}
|
||||
|
||||
|
||||
def detect_scenes_ffmpeg(video_path: str, fps: float, duration: float) -> list:
|
||||
"""Detect scene changes using ffmpeg scene filter"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ffprobe", "-v", "quiet", "-show_entries", "frame=pts_time",
|
||||
"-of", "default=nk=0",
|
||||
"-f", "lavfi",
|
||||
f"movie={video_path},select='gt(scene\\,0.3)',showinfo",
|
||||
"-show_frames"],
|
||||
capture_output=True, text=True, timeout=300,
|
||||
)
|
||||
times = []
|
||||
for line in (result.stderr + "\n" + result.stdout).split("\n"):
|
||||
for prefix in ("pts_time=", "pts_time:"):
|
||||
if prefix in line:
|
||||
rest = line.split(prefix)[1].split()[0]
|
||||
try:
|
||||
t = float(rest)
|
||||
times.append(t)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
scenes = []
|
||||
prev_time = 0.0
|
||||
for i, t in enumerate(times):
|
||||
end_frame = round(t * fps)
|
||||
start_frame = round(prev_time * fps)
|
||||
if end_frame > start_frame:
|
||||
scenes.append({
|
||||
"scene_number": i + 1,
|
||||
"start_frame": start_frame,
|
||||
"end_frame": end_frame - 1,
|
||||
"start_time": prev_time,
|
||||
"end_time": t - (1.0 / fps) if fps > 0 else t,
|
||||
})
|
||||
prev_time = t
|
||||
|
||||
last_frame = round(duration * fps) if fps > 0 else 0
|
||||
prev_frame = round(prev_time * fps) if fps > 0 else 0
|
||||
if last_frame > prev_frame:
|
||||
scenes.append({
|
||||
"scene_number": len(scenes) + 1,
|
||||
"start_frame": prev_frame,
|
||||
"end_frame": last_frame - 1,
|
||||
"start_time": prev_time,
|
||||
"end_time": duration,
|
||||
})
|
||||
|
||||
return scenes
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def process_cut(video_path: str, output_path: str, uuid: str = ""):
|
||||
"""Process video for scene detection"""
|
||||
"""Process video for scene detection and quality verification"""
|
||||
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("cut", "CUT_START")
|
||||
|
||||
try:
|
||||
from scenedetect import VideoManager, SceneManager
|
||||
from scenedetect.detectors import ContentDetector
|
||||
except ImportError:
|
||||
if publisher:
|
||||
publisher.error("cut", "scenedetect not installed")
|
||||
result = {"frame_count": 0, "fps": 0.0, "scenes": []}
|
||||
if publisher:
|
||||
publisher.complete("cut", "0 scenes")
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
vinfo = get_video_info(video_path)
|
||||
|
||||
if publisher:
|
||||
publisher.info("cut", "CUT_LOADING_VIDEO")
|
||||
publisher.info("cut", f"fps={vinfo['fps']}, frames={vinfo['frame_count']}, codec={vinfo['codec']}")
|
||||
|
||||
# Create video manager and scene manager
|
||||
video_manager = VideoManager([video_path])
|
||||
scene_manager = SceneManager()
|
||||
total_frames = vinfo["frame_count"]
|
||||
fps = vinfo["fps"]
|
||||
duration = vinfo["duration"]
|
||||
|
||||
# Add content detector (detects scene cuts based on frame differences)
|
||||
# threshold: sensitivity (lower = more sensitive, default 30)
|
||||
# min_scene_len: minimum frames per scene (default 15)
|
||||
scene_manager.add_detector(ContentDetector(threshold=30.0, min_scene_len=15))
|
||||
# Try ffmpeg scene detection
|
||||
scenes = detect_scenes_ffmpeg(video_path, fps, duration)
|
||||
|
||||
# Set downscale factor for faster processing
|
||||
video_manager.set_downscale_factor()
|
||||
|
||||
if publisher:
|
||||
publisher.info("cut", "CUT_DETECTING")
|
||||
|
||||
# Start video manager
|
||||
video_manager.start()
|
||||
|
||||
# Detect scenes
|
||||
scene_manager.detect_scenes(frame_source=video_manager)
|
||||
|
||||
# Get scene list
|
||||
scene_list = scene_manager.get_scene_list()
|
||||
|
||||
# Get frame rate
|
||||
fps = video_manager.get_framerate()
|
||||
|
||||
if publisher:
|
||||
publisher.info("cut", f"fps={fps}")
|
||||
|
||||
# Get total frame count
|
||||
frame_count = 0
|
||||
if scene_list:
|
||||
frame_count = scene_list[-1][1].get_frames()
|
||||
|
||||
# Convert scenes to result format
|
||||
scenes = []
|
||||
for i, (start, end) in enumerate(scene_list):
|
||||
scene = {
|
||||
"scene_number": i + 1,
|
||||
"start_frame": start.get_frames(),
|
||||
"end_frame": end.get_frames() - 1, # end is exclusive
|
||||
"start_time": start.get_seconds(),
|
||||
"end_time": end.get_seconds() - (1.0 / fps) if fps > 0 else 0,
|
||||
}
|
||||
scenes.append(scene)
|
||||
# Always ensure at least 1 scene
|
||||
if not scenes and total_frames > 0:
|
||||
scenes = [{
|
||||
"scene_number": 1,
|
||||
"start_frame": 0,
|
||||
"end_frame": total_frames - 1,
|
||||
"start_time": 0.0,
|
||||
"end_time": duration,
|
||||
}]
|
||||
if publisher:
|
||||
publisher.progress("cut", i + 1, len(scene_list), f"Scene {i + 1}")
|
||||
publisher.info("cut", "No scene changes detected, using whole video as single scene")
|
||||
|
||||
result = {"frame_count": frame_count, "fps": fps, "scenes": scenes}
|
||||
result = {
|
||||
"frame_count": total_frames,
|
||||
"fps": fps,
|
||||
"scenes": scenes,
|
||||
}
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
|
||||
Reference in New Issue
Block a user