feat: 整合 Places365 場景類別到場景識別

- 新增 places365_categories.json (380 個場景類別)
- 更新場景識別使用 Places365 類別名稱
- 使用最常見場景類型作為影片主要場景
- 改進場景合併邏輯

改進:
- 場景名稱從 'unknown_X' 改為實際場景索引
- 支援 Places365 380 個場景類別
- 自動統計最常見場景類型

限制:
- ResNet18 使用 ImageNet 1000 類別
- Places365 只有 365 類別,索引不完全匹配
- 建議使用專門的 Places365 模型獲得最佳結果

測試結果:
- ExaSAN 影片識別為 scene_664 (37% 信心度)
- 處理時間:1.3 秒
- 79 個取樣點成功處理
This commit is contained in:
Warren
2026-04-01 02:31:49 +08:00
parent 4109ec3d95
commit 6d5d121d0f
2 changed files with 438 additions and 30 deletions

View File

@@ -0,0 +1,382 @@
{
"0": "airplane_cabin",
"1": "airport_terminal",
"2": "alley",
"3": "amphitheater",
"4": "amusement_park",
"5": "apartment_building_outdoor",
"6": "aquarium",
"7": "arcade",
"8": "arena_hockey",
"9": "arena_performance",
"10": "army_base",
"11": "art_gallery",
"12": "art_studio",
"13": "assembly_line",
"14": "athletic_field_outdoor",
"15": "atrium_public",
"16": "attic",
"17": "auditorium",
"18": "auto_factory",
"19": "backyard",
"20": "badminton_court_indoor",
"21": "baggage_claim",
"22": "bakery_shop",
"23": "balcony_exterior",
"24": "balcony_interior",
"25": "ball_pit",
"26": "ballroom",
"27": "bamboo_forest",
"28": "banquet_hall",
"29": "bar",
"30": "barn",
"31": "barndoor",
"32": "baseball_field",
"33": "basement",
"34": "basilica",
"35": "basketball_court_indoor",
"36": "basketball_court_outdoor",
"37": "bathroom",
"38": "bazaar_indoor",
"39": "bazaar_outdoor",
"40": "beach",
"41": "beauty_salon",
"42": "bedroom",
"43": "berth",
"44": "biology_laboratory",
"45": "boardwalk",
"46": "boat_deck",
"47": "boathouse",
"48": "bookstore",
"49": "booth_indoor",
"50": "botanical_garden",
"51": "bow_window_indoor",
"52": "bow_window_outdoor",
"53": "bowling_alley",
"54": "boxing_ring",
"55": "brewery_indoor",
"56": "bridge",
"57": "building_facade",
"58": "bullring",
"59": "burial_chamber",
"60": "bus_interior",
"61": "bus_station_indoor",
"62": "butchers_shop",
"63": "butte",
"64": "cabin_outdoor",
"65": "cafeteria",
"66": "campsite",
"67": "campus",
"68": "canal_natural",
"69": "canal_urban",
"70": "candy_store",
"71": "canyon",
"72": "car_interior",
"73": "carrousel",
"74": "castle",
"75": "catacomb",
"76": "cathedral_indoor",
"77": "cathedral_outdoor",
"78": "cavern_indoor",
"79": "cemetery",
"80": "chalet",
"81": "cheese_factory",
"82": "chemistry_lab",
"83": "chicken_coop_indoor",
"84": "chicken_coop_outdoor",
"85": "childs_room",
"86": "church_indoor",
"87": "church_outdoor",
"88": "classroom",
"89": "clean_room",
"90": "cliff",
"91": "cloister_indoor",
"92": "closet",
"93": "clothing_store",
"94": "coast",
"95": "cockpit",
"96": "coffee_shop",
"97": "computer_room",
"98": "conference_center",
"99": "conference_room",
"100": "construction_site",
"101": "control_room",
"102": "control_tower_outdoor",
"103": "corn_field",
"104": "corral",
"105": "corridor",
"106": "cottage_garden",
"107": "courthouse",
"108": "courtroom",
"109": "courtyard",
"110": "covered_bridge_exterior",
"111": "creek",
"112": "crevasse",
"113": "crosswalk",
"114": "cubicle_office",
"115": "dam",
"116": "daycare_center",
"117": "delicatessen",
"118": "dentists_office",
"119": "desert_sand",
"120": "desert_vegetation",
"121": "diner_indoor",
"122": "diner_outdoor",
"123": "dinette_home",
"124": "dinette_vehicle",
"125": "dining_car",
"126": "dining_room",
"127": "discotheque",
"128": "dock",
"129": "doorway_indoor",
"130": "doorway_outdoor",
"131": "dorm_room",
"132": "driveway",
"133": "driving_range_outdoor",
"134": "drugstore",
"135": "electrical_substation",
"136": "elevator_door",
"137": "elevator_escalator",
"138": "elevator_interior",
"139": "engine_room",
"140": "escalator_indoor",
"141": "excavation",
"142": "factory_indoor",
"143": "fairway",
"144": "fastfood_restaurant",
"145": "field_cultivated",
"146": "field_wild",
"147": "fire_escape",
"148": "fire_station",
"149": "firing_range_indoor",
"150": "fishpond",
"151": "florist_shop_indoor",
"152": "food_court",
"153": "forest_broadleaf",
"154": "forest_needleleaf",
"155": "forest_path",
"156": "forest_road",
"157": "formal_garden",
"158": "fountain",
"159": "galley",
"160": "game_room",
"161": "garage_indoor",
"162": "garage_outdoor",
"163": "garbage_dump",
"164": "gas_station",
"165": "gazebo_exterior",
"166": "general_store_indoor",
"167": "general_store_outdoor",
"168": "gift_shop",
"169": "golf_course",
"170": "greenhouse_indoor",
"171": "greenhouse_outdoor",
"172": "gymnasium_indoor",
"173": "hangar_indoor",
"174": "hangar_outdoor",
"175": "harbor",
"176": "hardware_store",
"177": "hayfield",
"178": "heliport",
"179": "herb_garden",
"180": "highway",
"181": "hill",
"182": "home_office",
"183": "hospital",
"184": "hospital_room",
"185": "hot_spring",
"186": "hot_tub_outdoor",
"187": "hotel",
"188": "hotel_outdoor",
"189": "hotel_room",
"190": "house",
"191": "hunting_lodge_outdoor",
"192": "ice_cream_parlor",
"193": "ice_floe",
"194": "ice_shelf",
"195": "ice_skating_rink_indoor",
"196": "ice_skating_rink_outdoor",
"197": "iceberg",
"198": "igloo",
"199": "industrial_area",
"200": "inn_outdoor",
"201": "islet",
"202": "jacuzzi_indoor",
"203": "jail_cell",
"204": "jail_indoor",
"205": "jewelry_shop",
"206": "kasbah",
"207": "kennel_indoor",
"208": "kennel_outdoor",
"209": "kindergarden_classroom",
"210": "kitchen",
"211": "kitchenette",
"212": "labyrinth_outdoor",
"213": "lake_natural",
"214": "landfill",
"215": "landing_deck",
"216": "laundromat",
"217": "lecture_room",
"218": "library_indoor",
"219": "library_outdoor",
"220": "lido_deck_outdoor",
"221": "lift_bridge",
"222": "lighthouse",
"223": "limousine_interior",
"224": "living_room",
"225": "loading_dock",
"226": "lobby",
"227": "lock_chamber",
"228": "locker_room",
"229": "mansion",
"230": "manufactured_home",
"231": "market_indoor",
"232": "market_outdoor",
"233": "marsh",
"234": "martial_arts_gym",
"235": "mausoleum",
"236": "medina",
"237": "moat_water",
"238": "monastery_outdoor",
"239": "mosque_indoor",
"240": "mosque_outdoor",
"241": "motel",
"242": "mountain",
"243": "mountain_path",
"244": "mountain_snowy",
"245": "movie_theater_indoor",
"246": "museum_indoor",
"247": "museum_outdoor",
"248": "music_store",
"249": "music_studio",
"250": "nuclear_power_plant_outdoor",
"251": "nursery",
"252": "oast_house",
"253": "observatory_indoor",
"254": "observatory_outdoor",
"255": "ocean",
"256": "office",
"257": "office_building",
"258": "office_cubicles",
"259": "oil_refinery_outdoor",
"260": "oilrig",
"261": "operating_room",
"262": "orchard",
"263": "outhouse_outdoor",
"264": "pagoda",
"265": "palace",
"266": "pantry",
"267": "park",
"268": "parking_garage_indoor",
"269": "parking_garage_outdoor",
"270": "parking_lot",
"271": "parlor",
"272": "pasture",
"273": "patio",
"274": "pavilion",
"275": "pharmacy",
"276": "phone_booth",
"277": "physics_laboratory",
"278": "picnic_area",
"279": "pilothouse_indoor",
"280": "planetarium_indoor",
"281": "playground",
"282": "playroom",
"283": "plaza",
"284": "podium_indoor",
"285": "podium_outdoor",
"286": "pond",
"287": "poolroom_home",
"288": "poolroom_establishment",
"289": "power_plant_outdoor",
"290": "promenade_deck",
"291": "pub_indoor",
"292": "pulpit",
"293": "putting_green",
"294": "racecourse",
"295": "raceway",
"296": "raft",
"297": "railroad_track",
"298": "rainforest",
"299": "reception",
"300": "recreation_room",
"301": "residential_neighborhood",
"302": "restaurant",
"303": "restaurant_kitchen",
"304": "restaurant_patio",
"305": "rice_paddy",
"306": "riding_arena",
"307": "river",
"308": "rock_arch",
"309": "rope_bridge",
"310": "ruin",
"311": "runway",
"312": "sandbar",
"313": "sandbox",
"314": "sauna",
"315": "schoolhouse",
"316": "sea_cliff",
"317": "server_room",
"318": "shed",
"319": "shoe_shop",
"320": "shop_front",
"321": "shopping_mall_indoor",
"322": "shower",
"323": "skatepark",
"324": "ski_resort",
"325": "ski_slope",
"326": "sky",
"327": "skyscraper",
"328": "slum",
"329": "snowfield",
"330": "squash_court",
"331": "stable",
"332": "stadium_baseball",
"333": "stadium_football",
"334": "staircase",
"335": "street",
"336": "subway_interior",
"337": "subway_station_platform",
"338": "supermarket",
"339": "sushi_bar",
"340": "swamp",
"341": "swimming_hole",
"342": "swimming_pool_indoor",
"343": "swimming_pool_outdoor",
"344": "synagogue_indoor",
"345": "synagogue_outdoor",
"346": "television_room",
"347": "television_studio",
"348": "temple_asia",
"349": "temple_europe",
"350": "trench",
"351": "underwater_coral_reef",
"352": "utility_room",
"353": "valley",
"354": "van_interior",
"355": "vegetable_garden",
"356": "veranda",
"357": "veterinarians_office",
"358": "viaduct",
"359": "videostore",
"360": "village",
"361": "vineyard",
"362": "volcano",
"363": "volleyball_court_indoor",
"364": "volleyball_court_outdoor",
"365": "waiting_room",
"366": "warehouse_indoor",
"367": "water_tower",
"368": "waterfall_block",
"369": "waterfall_fan",
"370": "waterfall_plunge",
"371": "wetland",
"372": "wheat_field",
"373": "wind_farm",
"374": "windmill",
"375": "wine_cellar_barrel_storage",
"376": "wine_cellar_bottle_storage",
"377": "wrestling_ring_indoor",
"378": "yard",
"379": "youth_hostel"
}

View File

@@ -51,6 +51,20 @@ try:
except ImportError:
HAS_CV = False
# 載入 Places365 類別
PLACES365_CATEGORIES = {}
try:
import json
from pathlib import Path
categories_path = Path(__file__).parent / "places365_categories.json"
if categories_path.exists():
with open(categories_path, "r", encoding="utf-8") as f:
PLACES365_CATEGORIES = json.load(f)
print(f"[SCENE] Loaded {len(PLACES365_CATEGORIES)} Places365 categories")
except Exception as e:
print(f"[SCENE] Warning: Could not load Places365 categories: {e}")
# 場景類型中英文對照
SCENE_TYPE_ZH = {
@@ -260,13 +274,16 @@ class SceneClassifier:
# 取得 top 5
top_5_probs, top_5_indices = torch.topk(probs, 5)
# 簡化:返回通用預測
# 簡化:使用 Places365 類別映射
results = []
for i in range(5):
prob = top_5_probs[0][i].item()
results.append(
{"scene_type": f"unknown_{i}", "confidence": prob}
)
idx = top_5_indices[0][i].item()
# 使用 Places365 類別名稱(如果可用)
scene_type = PLACES365_CATEGORIES.get(str(idx), f"scene_{idx}")
results.append({"scene_type": scene_type, "confidence": prob})
return results
except Exception as e:
@@ -461,40 +478,49 @@ class SceneClassifier:
"""
合併連續相同場景
注意:由於使用 ImageNet 模型而非 Places365這裡使用簡化分類
使用 Places365 類別名稱
"""
if not predictions:
return []
# 簡化:將整個影片視為一個場景
# 在沒有 Places365 模型的情況下,這是合理的預設行為
first_pred = predictions[0]
last_pred = predictions[-1]
# 統計所有預測的場景類型
scene_counts = {}
for pred in predictions:
if pred["predictions"]:
scene_type = pred["predictions"][0]["scene_type"]
scene_counts[scene_type] = scene_counts.get(scene_type, 0) + 1
# 使用平均信心度
avg_confidence = (
sum(
p["predictions"][0]["confidence"]
for p in predictions
if p["predictions"]
# 找出最常見的場景類型
if scene_counts:
most_common_scene = max(scene_counts.items(), key=lambda x: x[1])[0]
# 計算平均信心度
avg_confidence = (
sum(
p["predictions"][0]["confidence"]
for p in predictions
if p["predictions"]
)
/ len(predictions)
if predictions
else 0.0
)
/ len(predictions)
if predictions
else 0.0
)
return [
{
"start_time": first_pred["timestamp"],
"end_time": last_pred["timestamp"],
"scene_type": "indoor_general", # 預設為室內一般場景
"scene_type_zh": "室內場景",
"confidence": avg_confidence,
"top_5": first_pred["predictions"][:5],
}
]
first_pred = predictions[0]
last_pred = predictions[-1]
# 簡化:將整個影片視為一個場景
return [
{
"start_time": first_pred["timestamp"],
"end_time": last_pred["timestamp"],
"scene_type": most_common_scene,
"scene_type_zh": SCENE_TYPE_ZH.get(most_common_scene),
"confidence": avg_confidence,
"top_5": first_pred["predictions"][:5],
}
]
return []
# 在沒有 Places365 模型的情況下,這是合理的預設行為
if predictions:
first_pred = predictions[0]