Files
momentry_core/scripts/test_florence2_pipeline.py
Warren 8f05a7c188 feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors
- Add release pre-flight check script
- Add synonym generation, chunk processing scripts
- Add face recognition, stamp search utilities
2026-04-30 15:07:49 +08:00

58 lines
1.6 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
Test Florence-2 for "Stamps" Detection using Pipeline
"""
import os
import cv2
from transformers import pipeline
UUID = "384b0ff44aaaa1f1"
VIDEO_PATH = f"output/{UUID}/{UUID}.mp4"
OUTPUT_DIR = f"output/{UUID}/florence2_results"
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Frame where "stamp" is heavily discussed
TIMESTAMP = 6846.0
print(f"📽️ Extracting frame at {TIMESTAMP}s...")
cap = cv2.VideoCapture(VIDEO_PATH)
cap.set(cv2.CAP_PROP_POS_MSEC, TIMESTAMP * 1000)
ret, frame = cap.read()
cap.release()
if not ret:
print("❌ Failed to read frame.")
exit()
# Save raw frame
raw_path = os.path.join(OUTPUT_DIR, f"raw_{int(TIMESTAMP)}.jpg")
cv2.imwrite(raw_path, frame)
print(f"💾 Raw frame saved.")
print("🧠 Loading Florence-2 model via pipeline...")
try:
# Using pipeline handles model configuration automatically
pipe = pipeline(
"image-to-text", model="microsoft/Florence-2-base", trust_remote_code=True
)
print("🔍 Running detection on 'stamp'...")
# Florence-2 tasks: '<OPEN_VOCABULARY_DETECTION>', '<CAPTION>', etc.
# We want to see if there is a stamp, so let's use caption first to see what it sees.
result = pipe(raw_path, prompt="<CAPTION>")
print(f"📝 Caption Result: {result}")
# Let's try open vocabulary detection for 'stamp'
print("🔍 Running Open Vocabulary Detection for 'stamp'...")
result_ood = pipe(
raw_path, prompt="<OPEN_VOCABULARY_DETECTION>", text_input="stamp"
)
print(f"📦 OOD Result: {result_ood}")
except Exception as e:
print(f"❌ Error: {e}")
print("🏁 Done.")