#!/opt/homebrew/bin/python3.11 """ Test Florence-2 for "Stamps" Detection using Pipeline """ import os import cv2 from transformers import pipeline UUID = "384b0ff44aaaa1f1" VIDEO_PATH = f"output/{UUID}/{UUID}.mp4" OUTPUT_DIR = f"output/{UUID}/florence2_results" os.makedirs(OUTPUT_DIR, exist_ok=True) # Frame where "stamp" is heavily discussed TIMESTAMP = 6846.0 print(f"📽️ Extracting frame at {TIMESTAMP}s...") cap = cv2.VideoCapture(VIDEO_PATH) cap.set(cv2.CAP_PROP_POS_MSEC, TIMESTAMP * 1000) ret, frame = cap.read() cap.release() if not ret: print("❌ Failed to read frame.") exit() # Save raw frame raw_path = os.path.join(OUTPUT_DIR, f"raw_{int(TIMESTAMP)}.jpg") cv2.imwrite(raw_path, frame) print("💾 Raw frame saved.") print("🧠 Loading Florence-2 model via pipeline...") try: # Using pipeline handles model configuration automatically pipe = pipeline( "image-to-text", model="microsoft/Florence-2-base", trust_remote_code=True ) print("🔍 Running detection on 'stamp'...") # Florence-2 tasks: '', '', etc. # We want to see if there is a stamp, so let's use caption first to see what it sees. result = pipe(raw_path, prompt="") print(f"📝 Caption Result: {result}") # Let's try open vocabulary detection for 'stamp' print("🔍 Running Open Vocabulary Detection for 'stamp'...") result_ood = pipe( raw_path, prompt="", text_input="stamp" ) print(f"📦 OOD Result: {result_ood}") except Exception as e: print(f"❌ Error: {e}") print("🏁 Done.")