#!/opt/homebrew/bin/python3.11 """ Debug OWL-ViT with Multiple Prompts """ import os import cv2 import torch from PIL import Image from transformers import OwlViTProcessor, OwlViTForObjectDetection UUID = "384b0ff44aaaa1f1" VIDEO_PATH = f"output/{UUID}/{UUID}.mp4" OUTPUT_DIR = f"output/{UUID}/owl_vit_results_debug" os.makedirs(OUTPUT_DIR, exist_ok=True) print("🧠 Loading OWL-ViT model...") processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32") model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32") cap = cv2.VideoCapture(VIDEO_PATH) # Frames we want to check timestamps = [5851.6, 5860.4, 6756.6, 6846.0] # Prompts to try prompts = [ ["a postage stamp", "a stamp"], ["a letter", "an envelope", "a piece of paper"], ["a small square paper"], ] for t in timestamps: cap.set(cv2.CAP_PROP_POS_MSEC, t * 1000) ret, frame = cap.read() if not ret: continue image_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # Try different prompt sets found_any = False for i, text_queries in enumerate(prompts): inputs = processor(text=text_queries, images=image_pil, return_tensors="pt") outputs = model(**inputs) target_sizes = torch.Tensor([image_pil.size[::-1]]) results = processor.post_process_object_detection( outputs=outputs, target_sizes=target_sizes, threshold=0.05 ) for box, score, label in zip( results[0]["boxes"], results[0]["scores"], results[0]["labels"] ): if score > 0.05: found_any = True x_min, y_min, x_max, y_max = box.int().tolist() label_text = text_queries[label.item()] print(f" 🟢 Found '{label_text}' ({score.item():.3f}) at {t:.2f}s") # Draw cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2) cv2.putText( frame, f"{label_text} {score.item():.3f}", (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, ) if not found_any: print(f" 🔴 Nothing found at {t:.2f}s") cv2.putText( frame, "NO DETECTIONS", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, ) else: # Save result save_path = os.path.join(OUTPUT_DIR, f"detected_{int(t)}.jpg") cv2.imwrite(save_path, frame) print(f" 💾 Saved to {save_path}") cap.release()