From 96fe213bc46ecfe14288b7c33b4ec4856a27907d Mon Sep 17 00:00:00 2001 From: MarkBase Admin Date: Mon, 6 Jul 2026 02:53:49 +0800 Subject: [PATCH] v2: add E4B multimodal test, fix VisionTower missing groupSize --- Sources/MarkBase/Vision/VisionTower.swift | 2 + Tests/01_Model/MultimodalE4BTest.swift | 118 ++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 Tests/01_Model/MultimodalE4BTest.swift diff --git a/Sources/MarkBase/Vision/VisionTower.swift b/Sources/MarkBase/Vision/VisionTower.swift index a9f6ca4..e01964d 100644 --- a/Sources/MarkBase/Vision/VisionTower.swift +++ b/Sources/MarkBase/Vision/VisionTower.swift @@ -77,6 +77,8 @@ public final class VisionTower { enc.setBytes(&inD, length: MemoryLayout.size, index: 5) var outD = UInt32(weights.outDim) enc.setBytes(&outD, length: MemoryLayout.size, index: 6) + var groupSize = UInt32(weights.groupSize) + enc.setBytes(&groupSize, length: MemoryLayout.size, index: 7) let grid = MTLSize(width: weights.outDim * seqLen, height: 1, depth: 1) let tg = engine.threadgroupSize1D(pso, count: max(weights.outDim, seqLen)) diff --git a/Tests/01_Model/MultimodalE4BTest.swift b/Tests/01_Model/MultimodalE4BTest.swift new file mode 100644 index 0000000..a162880 --- /dev/null +++ b/Tests/01_Model/MultimodalE4BTest.swift @@ -0,0 +1,118 @@ +import XCTest +@testable import MarkBase + +final class MultimodalE4BTest: XCTestCase { + + var engine: MarkBaseEngine! + var multimodal: MultimodalModel! + let modelDir = "/Users/accusys/MarkBaseEngine/models/E4B-MarkBase" + let maxCtx = 64 + + override func setUp() { + super.setUp() + guard FileManager.default.fileExists(atPath: modelDir + "/model.safetensors") else { + return + } + engine = try? MarkBaseEngine(autoCompile: true) + multimodal = try? MultimodalModel(modelDir: modelDir, engine: engine, maxContextLength: maxCtx) + } + + func testModelLoads() throws { + try XCTSkipIf(multimodal == nil, "E4B-MarkBase not found") + XCTAssertEqual(multimodal!.textModel.hiddenSize, 2560) + XCTAssertNotNil(multimodal!.visionTowerFull, "Full VisionTower should load") + XCTAssertNotNil(multimodal!.audioTowerFull, "Full AudioTower should load") + } + + func testVisionTowerForward() throws { + try XCTSkipIf(multimodal?.visionTowerFull == nil, "Vision tower not loaded") + let tower = multimodal!.visionTowerFull! + let numPatches = 4 + let patchDim = 768 + let hs = tower.config.hiddenSize // 768 + + var patches = [Float](repeating: 0, count: numPatches * patchDim) + for i in 0..