fix: ASRX duplication, TKG edges, trace ingest, and add pipeline progress publishing
- ASRX handler no longer stores duplicate 'asr' pre_chunks - Pre_chunks storage made idempotent (delete-before-insert) - Rule 1 + trace_ingest changed to query 'asrx' not 'asr' - Trace chunks removed (dynamic from TKG/Qdrant) - TKG scroll_face_points fixed: trace_id >= 1 (not == 1) - TKG AsrxSegmentEntry: start/end -> start_time/end_time (match ASRX JSON) - Unregister error handling: log instead of silent discard - Add publish_pipeline_progress calls at each pipeline stage (processors, rule1, face_trace, identity_agent, TKG, rule2, completion)
This commit is contained in:
@@ -126,9 +126,17 @@ def _convert_result(result, output_path):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
segment_count = len(result.get("segments", []))
|
||||
if segment_count > 0:
|
||||
status = "has_transcript"
|
||||
else:
|
||||
status = "silent_audio"
|
||||
|
||||
output_result = {
|
||||
"status": status,
|
||||
"language": result.get("language"),
|
||||
"segments": [],
|
||||
"segment_count": segment_count,
|
||||
"n_speakers": result.get("n_speakers", 0),
|
||||
"speaker_stats": result.get("speaker_stats", {}),
|
||||
}
|
||||
@@ -172,6 +180,37 @@ def process_asrx(video_path: str, output_path: str, uuid: str = "",
|
||||
if publisher:
|
||||
publisher.info("asrx", "ASRX_START")
|
||||
|
||||
# Check for audio stream first
|
||||
tracks = probe_audio_tracks(video_path)
|
||||
if not tracks:
|
||||
if publisher:
|
||||
publisher.info("asrx", "No audio stream detected")
|
||||
output_result = {"status": "no_audio_track", "language": None, "segments": [], "segment_count": 0}
|
||||
_atomic_write(output_path, output_result)
|
||||
if publisher:
|
||||
publisher.complete("asrx", "0 segments (no audio)")
|
||||
print("[ASRX] No audio stream, skipping", file=sys.stderr)
|
||||
return output_result
|
||||
|
||||
# Check if ASR already determined no audio/silent - skip processing
|
||||
asr_path = output_path.replace(".asrx.json", ".asr.json")
|
||||
if os.path.exists(asr_path):
|
||||
try:
|
||||
with open(asr_path) as f:
|
||||
asr_data = json.load(f)
|
||||
asr_status = asr_data.get("status", "")
|
||||
if asr_status in ("no_audio_track", "silent_audio"):
|
||||
if publisher:
|
||||
publisher.info("asrx", f"ASR status={asr_status}, skipping ASRX processing")
|
||||
output_result = {"status": asr_status, "language": asr_data.get("language"), "segments": [], "segment_count": 0}
|
||||
_atomic_write(output_path, output_result)
|
||||
if publisher:
|
||||
publisher.complete("asrx", f"0 segments (ASR: {asr_status})")
|
||||
print(f"[ASRX] ASR status={asr_status}, skipping", file=sys.stderr)
|
||||
return output_result
|
||||
except Exception as e:
|
||||
print(f"[ASRX] Failed to read ASR output: {e}", file=sys.stderr)
|
||||
|
||||
checkpoint_path = output_path + ".stage1.json"
|
||||
|
||||
# ── Phase 2: Resume from checkpoint (Steps 4-7 only) ──
|
||||
@@ -189,7 +228,7 @@ def process_asrx(video_path: str, output_path: str, uuid: str = "",
|
||||
if "error" in result:
|
||||
if publisher:
|
||||
publisher.error("asrx", result["error"])
|
||||
output_result = {"language": None, "segments": []}
|
||||
output_result = {"status": "silent_audio", "language": None, "segments": [], "segment_count": 0}
|
||||
_atomic_write(output_path, output_result)
|
||||
if publisher:
|
||||
publisher.complete("asrx", "0 segments")
|
||||
@@ -225,7 +264,7 @@ def process_asrx(video_path: str, output_path: str, uuid: str = "",
|
||||
publisher.error("asrx", str(e))
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
output_result = {"language": None, "segments": []}
|
||||
output_result = {"status": "silent_audio", "language": None, "segments": [], "segment_count": 0}
|
||||
_atomic_write(output_path, output_result)
|
||||
if publisher:
|
||||
publisher.complete("asrx", "0 segments")
|
||||
@@ -289,7 +328,7 @@ def process_asrx(video_path: str, output_path: str, uuid: str = "",
|
||||
if "error" in result:
|
||||
if publisher:
|
||||
publisher.error("asrx", result["error"])
|
||||
output_result = {"language": None, "segments": []}
|
||||
output_result = {"status": "silent_audio", "language": None, "segments": [], "segment_count": 0}
|
||||
_atomic_write(output_path, output_result)
|
||||
if publisher:
|
||||
publisher.complete("asrx", "0 segments")
|
||||
@@ -320,7 +359,7 @@ def process_asrx(video_path: str, output_path: str, uuid: str = "",
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
output_result = {"language": None, "segments": []}
|
||||
output_result = {"status": "silent_audio", "language": None, "segments": [], "segment_count": 0}
|
||||
_atomic_write(output_path, output_result)
|
||||
if publisher:
|
||||
publisher.complete("asrx", "0 segments")
|
||||
|
||||
Reference in New Issue
Block a user