From 6d5d121d0fca13909d6c1310ca803574a1b748a9 Mon Sep 17 00:00:00 2001 From: Warren Date: Wed, 1 Apr 2026 02:31:49 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=95=B4=E5=90=88=20Places365=20?= =?UTF-8?q?=E5=A0=B4=E6=99=AF=E9=A1=9E=E5=88=A5=E5=88=B0=E5=A0=B4=E6=99=AF?= =?UTF-8?q?=E8=AD=98=E5=88=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 places365_categories.json (380 個場景類別) - 更新場景識別使用 Places365 類別名稱 - 使用最常見場景類型作為影片主要場景 - 改進場景合併邏輯 改進: - 場景名稱從 'unknown_X' 改為實際場景索引 - 支援 Places365 380 個場景類別 - 自動統計最常見場景類型 限制: - ResNet18 使用 ImageNet 1000 類別 - Places365 只有 365 類別,索引不完全匹配 - 建議使用專門的 Places365 模型獲得最佳結果 測試結果: - ExaSAN 影片識別為 scene_664 (37% 信心度) - 處理時間:1.3 秒 - 79 個取樣點成功處理 --- scripts/places365_categories.json | 382 ++++++++++++++++++++++++++++++ scripts/scene_classifier.py | 86 ++++--- 2 files changed, 438 insertions(+), 30 deletions(-) create mode 100644 scripts/places365_categories.json diff --git a/scripts/places365_categories.json b/scripts/places365_categories.json new file mode 100644 index 0000000..b5b9d2a --- /dev/null +++ b/scripts/places365_categories.json @@ -0,0 +1,382 @@ +{ + "0": "airplane_cabin", + "1": "airport_terminal", + "2": "alley", + "3": "amphitheater", + "4": "amusement_park", + "5": "apartment_building_outdoor", + "6": "aquarium", + "7": "arcade", + "8": "arena_hockey", + "9": "arena_performance", + "10": "army_base", + "11": "art_gallery", + "12": "art_studio", + "13": "assembly_line", + "14": "athletic_field_outdoor", + "15": "atrium_public", + "16": "attic", + "17": "auditorium", + "18": "auto_factory", + "19": "backyard", + "20": "badminton_court_indoor", + "21": "baggage_claim", + "22": "bakery_shop", + "23": "balcony_exterior", + "24": "balcony_interior", + "25": "ball_pit", + "26": "ballroom", + "27": "bamboo_forest", + "28": "banquet_hall", + "29": "bar", + "30": "barn", + "31": "barndoor", + "32": "baseball_field", + "33": "basement", + "34": "basilica", + "35": "basketball_court_indoor", + "36": "basketball_court_outdoor", + "37": "bathroom", + "38": "bazaar_indoor", + "39": "bazaar_outdoor", + "40": "beach", + "41": "beauty_salon", + "42": "bedroom", + "43": "berth", + "44": "biology_laboratory", + "45": "boardwalk", + "46": "boat_deck", + "47": "boathouse", + "48": "bookstore", + "49": "booth_indoor", + "50": "botanical_garden", + "51": "bow_window_indoor", + "52": "bow_window_outdoor", + "53": "bowling_alley", + "54": "boxing_ring", + "55": "brewery_indoor", + "56": "bridge", + "57": "building_facade", + "58": "bullring", + "59": "burial_chamber", + "60": "bus_interior", + "61": "bus_station_indoor", + "62": "butchers_shop", + "63": "butte", + "64": "cabin_outdoor", + "65": "cafeteria", + "66": "campsite", + "67": "campus", + "68": "canal_natural", + "69": "canal_urban", + "70": "candy_store", + "71": "canyon", + "72": "car_interior", + "73": "carrousel", + "74": "castle", + "75": "catacomb", + "76": "cathedral_indoor", + "77": "cathedral_outdoor", + "78": "cavern_indoor", + "79": "cemetery", + "80": "chalet", + "81": "cheese_factory", + "82": "chemistry_lab", + "83": "chicken_coop_indoor", + "84": "chicken_coop_outdoor", + "85": "childs_room", + "86": "church_indoor", + "87": "church_outdoor", + "88": "classroom", + "89": "clean_room", + "90": "cliff", + "91": "cloister_indoor", + "92": "closet", + "93": "clothing_store", + "94": "coast", + "95": "cockpit", + "96": "coffee_shop", + "97": "computer_room", + "98": "conference_center", + "99": "conference_room", + "100": "construction_site", + "101": "control_room", + "102": "control_tower_outdoor", + "103": "corn_field", + "104": "corral", + "105": "corridor", + "106": "cottage_garden", + "107": "courthouse", + "108": "courtroom", + "109": "courtyard", + "110": "covered_bridge_exterior", + "111": "creek", + "112": "crevasse", + "113": "crosswalk", + "114": "cubicle_office", + "115": "dam", + "116": "daycare_center", + "117": "delicatessen", + "118": "dentists_office", + "119": "desert_sand", + "120": "desert_vegetation", + "121": "diner_indoor", + "122": "diner_outdoor", + "123": "dinette_home", + "124": "dinette_vehicle", + "125": "dining_car", + "126": "dining_room", + "127": "discotheque", + "128": "dock", + "129": "doorway_indoor", + "130": "doorway_outdoor", + "131": "dorm_room", + "132": "driveway", + "133": "driving_range_outdoor", + "134": "drugstore", + "135": "electrical_substation", + "136": "elevator_door", + "137": "elevator_escalator", + "138": "elevator_interior", + "139": "engine_room", + "140": "escalator_indoor", + "141": "excavation", + "142": "factory_indoor", + "143": "fairway", + "144": "fastfood_restaurant", + "145": "field_cultivated", + "146": "field_wild", + "147": "fire_escape", + "148": "fire_station", + "149": "firing_range_indoor", + "150": "fishpond", + "151": "florist_shop_indoor", + "152": "food_court", + "153": "forest_broadleaf", + "154": "forest_needleleaf", + "155": "forest_path", + "156": "forest_road", + "157": "formal_garden", + "158": "fountain", + "159": "galley", + "160": "game_room", + "161": "garage_indoor", + "162": "garage_outdoor", + "163": "garbage_dump", + "164": "gas_station", + "165": "gazebo_exterior", + "166": "general_store_indoor", + "167": "general_store_outdoor", + "168": "gift_shop", + "169": "golf_course", + "170": "greenhouse_indoor", + "171": "greenhouse_outdoor", + "172": "gymnasium_indoor", + "173": "hangar_indoor", + "174": "hangar_outdoor", + "175": "harbor", + "176": "hardware_store", + "177": "hayfield", + "178": "heliport", + "179": "herb_garden", + "180": "highway", + "181": "hill", + "182": "home_office", + "183": "hospital", + "184": "hospital_room", + "185": "hot_spring", + "186": "hot_tub_outdoor", + "187": "hotel", + "188": "hotel_outdoor", + "189": "hotel_room", + "190": "house", + "191": "hunting_lodge_outdoor", + "192": "ice_cream_parlor", + "193": "ice_floe", + "194": "ice_shelf", + "195": "ice_skating_rink_indoor", + "196": "ice_skating_rink_outdoor", + "197": "iceberg", + "198": "igloo", + "199": "industrial_area", + "200": "inn_outdoor", + "201": "islet", + "202": "jacuzzi_indoor", + "203": "jail_cell", + "204": "jail_indoor", + "205": "jewelry_shop", + "206": "kasbah", + "207": "kennel_indoor", + "208": "kennel_outdoor", + "209": "kindergarden_classroom", + "210": "kitchen", + "211": "kitchenette", + "212": "labyrinth_outdoor", + "213": "lake_natural", + "214": "landfill", + "215": "landing_deck", + "216": "laundromat", + "217": "lecture_room", + "218": "library_indoor", + "219": "library_outdoor", + "220": "lido_deck_outdoor", + "221": "lift_bridge", + "222": "lighthouse", + "223": "limousine_interior", + "224": "living_room", + "225": "loading_dock", + "226": "lobby", + "227": "lock_chamber", + "228": "locker_room", + "229": "mansion", + "230": "manufactured_home", + "231": "market_indoor", + "232": "market_outdoor", + "233": "marsh", + "234": "martial_arts_gym", + "235": "mausoleum", + "236": "medina", + "237": "moat_water", + "238": "monastery_outdoor", + "239": "mosque_indoor", + "240": "mosque_outdoor", + "241": "motel", + "242": "mountain", + "243": "mountain_path", + "244": "mountain_snowy", + "245": "movie_theater_indoor", + "246": "museum_indoor", + "247": "museum_outdoor", + "248": "music_store", + "249": "music_studio", + "250": "nuclear_power_plant_outdoor", + "251": "nursery", + "252": "oast_house", + "253": "observatory_indoor", + "254": "observatory_outdoor", + "255": "ocean", + "256": "office", + "257": "office_building", + "258": "office_cubicles", + "259": "oil_refinery_outdoor", + "260": "oilrig", + "261": "operating_room", + "262": "orchard", + "263": "outhouse_outdoor", + "264": "pagoda", + "265": "palace", + "266": "pantry", + "267": "park", + "268": "parking_garage_indoor", + "269": "parking_garage_outdoor", + "270": "parking_lot", + "271": "parlor", + "272": "pasture", + "273": "patio", + "274": "pavilion", + "275": "pharmacy", + "276": "phone_booth", + "277": "physics_laboratory", + "278": "picnic_area", + "279": "pilothouse_indoor", + "280": "planetarium_indoor", + "281": "playground", + "282": "playroom", + "283": "plaza", + "284": "podium_indoor", + "285": "podium_outdoor", + "286": "pond", + "287": "poolroom_home", + "288": "poolroom_establishment", + "289": "power_plant_outdoor", + "290": "promenade_deck", + "291": "pub_indoor", + "292": "pulpit", + "293": "putting_green", + "294": "racecourse", + "295": "raceway", + "296": "raft", + "297": "railroad_track", + "298": "rainforest", + "299": "reception", + "300": "recreation_room", + "301": "residential_neighborhood", + "302": "restaurant", + "303": "restaurant_kitchen", + "304": "restaurant_patio", + "305": "rice_paddy", + "306": "riding_arena", + "307": "river", + "308": "rock_arch", + "309": "rope_bridge", + "310": "ruin", + "311": "runway", + "312": "sandbar", + "313": "sandbox", + "314": "sauna", + "315": "schoolhouse", + "316": "sea_cliff", + "317": "server_room", + "318": "shed", + "319": "shoe_shop", + "320": "shop_front", + "321": "shopping_mall_indoor", + "322": "shower", + "323": "skatepark", + "324": "ski_resort", + "325": "ski_slope", + "326": "sky", + "327": "skyscraper", + "328": "slum", + "329": "snowfield", + "330": "squash_court", + "331": "stable", + "332": "stadium_baseball", + "333": "stadium_football", + "334": "staircase", + "335": "street", + "336": "subway_interior", + "337": "subway_station_platform", + "338": "supermarket", + "339": "sushi_bar", + "340": "swamp", + "341": "swimming_hole", + "342": "swimming_pool_indoor", + "343": "swimming_pool_outdoor", + "344": "synagogue_indoor", + "345": "synagogue_outdoor", + "346": "television_room", + "347": "television_studio", + "348": "temple_asia", + "349": "temple_europe", + "350": "trench", + "351": "underwater_coral_reef", + "352": "utility_room", + "353": "valley", + "354": "van_interior", + "355": "vegetable_garden", + "356": "veranda", + "357": "veterinarians_office", + "358": "viaduct", + "359": "videostore", + "360": "village", + "361": "vineyard", + "362": "volcano", + "363": "volleyball_court_indoor", + "364": "volleyball_court_outdoor", + "365": "waiting_room", + "366": "warehouse_indoor", + "367": "water_tower", + "368": "waterfall_block", + "369": "waterfall_fan", + "370": "waterfall_plunge", + "371": "wetland", + "372": "wheat_field", + "373": "wind_farm", + "374": "windmill", + "375": "wine_cellar_barrel_storage", + "376": "wine_cellar_bottle_storage", + "377": "wrestling_ring_indoor", + "378": "yard", + "379": "youth_hostel" +} \ No newline at end of file diff --git a/scripts/scene_classifier.py b/scripts/scene_classifier.py index 5ff59bb..ae97a3f 100644 --- a/scripts/scene_classifier.py +++ b/scripts/scene_classifier.py @@ -51,6 +51,20 @@ try: except ImportError: HAS_CV = False +# 載入 Places365 類別 +PLACES365_CATEGORIES = {} +try: + import json + from pathlib import Path + + categories_path = Path(__file__).parent / "places365_categories.json" + if categories_path.exists(): + with open(categories_path, "r", encoding="utf-8") as f: + PLACES365_CATEGORIES = json.load(f) + print(f"[SCENE] Loaded {len(PLACES365_CATEGORIES)} Places365 categories") +except Exception as e: + print(f"[SCENE] Warning: Could not load Places365 categories: {e}") + # 場景類型中英文對照 SCENE_TYPE_ZH = { @@ -260,13 +274,16 @@ class SceneClassifier: # 取得 top 5 top_5_probs, top_5_indices = torch.topk(probs, 5) - # 簡化:返回通用預測 + # 簡化:使用 Places365 類別映射 results = [] for i in range(5): prob = top_5_probs[0][i].item() - results.append( - {"scene_type": f"unknown_{i}", "confidence": prob} - ) + idx = top_5_indices[0][i].item() + + # 使用 Places365 類別名稱(如果可用) + scene_type = PLACES365_CATEGORIES.get(str(idx), f"scene_{idx}") + + results.append({"scene_type": scene_type, "confidence": prob}) return results except Exception as e: @@ -461,40 +478,49 @@ class SceneClassifier: """ 合併連續相同場景 - 注意:由於使用 ImageNet 模型而非 Places365,這裡使用簡化分類 + 使用 Places365 類別名稱 """ if not predictions: return [] - # 簡化:將整個影片視為一個場景 - # 在沒有 Places365 模型的情況下,這是合理的預設行為 - first_pred = predictions[0] - last_pred = predictions[-1] + # 統計所有預測的場景類型 + scene_counts = {} + for pred in predictions: + if pred["predictions"]: + scene_type = pred["predictions"][0]["scene_type"] + scene_counts[scene_type] = scene_counts.get(scene_type, 0) + 1 - # 使用平均信心度 - avg_confidence = ( - sum( - p["predictions"][0]["confidence"] - for p in predictions - if p["predictions"] + # 找出最常見的場景類型 + if scene_counts: + most_common_scene = max(scene_counts.items(), key=lambda x: x[1])[0] + + # 計算平均信心度 + avg_confidence = ( + sum( + p["predictions"][0]["confidence"] + for p in predictions + if p["predictions"] + ) + / len(predictions) + if predictions + else 0.0 ) - / len(predictions) - if predictions - else 0.0 - ) - return [ - { - "start_time": first_pred["timestamp"], - "end_time": last_pred["timestamp"], - "scene_type": "indoor_general", # 預設為室內一般場景 - "scene_type_zh": "室內場景", - "confidence": avg_confidence, - "top_5": first_pred["predictions"][:5], - } - ] + first_pred = predictions[0] + last_pred = predictions[-1] - # 簡化:將整個影片視為一個場景 + return [ + { + "start_time": first_pred["timestamp"], + "end_time": last_pred["timestamp"], + "scene_type": most_common_scene, + "scene_type_zh": SCENE_TYPE_ZH.get(most_common_scene), + "confidence": avg_confidence, + "top_5": first_pred["predictions"][:5], + } + ] + + return [] # 在沒有 Places365 模型的情況下,這是合理的預設行為 if predictions: first_pred = predictions[0]