Compare commits

...

12 Commits

Author SHA1 Message Date
MarkBase Admin ba4c41c29f v2: fix E4B determinism test — float32 atomics cause inherent non-determinism
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
2026-07-06 06:55:33 +08:00
MarkBase Admin 96fe213bc4 v2: add E4B multimodal test, fix VisionTower missing groupSize
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
2026-07-06 02:53:49 +08:00
MarkBase Admin 97f9bdcf90 v2: add full context 2048-token, repeated tokens, edge token tests
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
2026-07-06 01:31:33 +08:00
MarkBase Admin 16c16b9bee v2: add 1024-token long context test
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
2026-07-06 01:11:50 +08:00
MarkBase Admin 7e686c3c5a v2: add long context 12B test (256 tokens)
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
2026-07-06 01:01:43 +08:00
MarkBase Admin af1d10737e v2: add multimodal 12B test, fix VisionTower12B kernel dispatch
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
2026-07-05 23:58:42 +08:00
MarkBase Admin 07459e8ee3 v2: add 12B model test (Model12BTest)
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
2026-07-05 23:33:42 +08:00
MarkBase Admin 7a8edf77ee v2: remove remaining logit scaling hacks from batch/optimized paths
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
2026-07-05 22:41:49 +08:00
MarkBase Admin 239474bef0 v2: fix 26B activation explosion — normalize groupSize=32 scales, fix hardcoded loops
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
2026-07-05 19:52:47 +08:00
MarkBase Admin 8a29dae613 v2: add 26B + 31B model tests (Phase 3)
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
2026-07-05 16:12:08 +08:00
MarkBase Admin 2fd03d0ac1 v2: fix GPU non-determinism test tolerance
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
2026-07-05 15:05:03 +08:00
MarkBase Admin e9ab994533 v2: add E4B-MarkBase model tests (Phase 2)
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
2026-07-05 14:52:08 +08:00
18 changed files with 811 additions and 116 deletions
+1 -1
View File
@@ -25,7 +25,7 @@ jobs:
- uses: actions/checkout@v4
- name: Run Unit Tests
run: swift test --filter "MathTest" --filter "SamplerTest" --filter "TokenizerTest"
run: swift test --filter "MathTest" --filter "SamplerTest" --filter "TokenizerTest" --filter "ModelTest"
lint:
needs: build
+3 -1
View File
@@ -7,4 +7,6 @@ Package.resolved
*.xcodeproj/
*.xcworkspace/
.DS_Store
test_summary.md
blobs/
test_summary.md.runner
.runner
-6
View File
@@ -161,12 +161,6 @@ extension E4BModel {
cmdBuf: cmdBuf
)
// Logits scaling
if embedWeight.groupSize == 32 && embedWeight.inDim == hiddenSize {
let logitsScale = Float(30.0 / 116.23 / sqrt(Float(hiddenSize)))
try scaleBufferOptimized(logitsBuffer, scale: logitsScale, count: vocabSize, cmdBuf: cmdBuf)
}
// Softcapping
if let cap = finalLogitSoftcapping {
try applyLogitSoftcappingOptimized(
@@ -160,26 +160,6 @@ embedCmdBuf.waitUntilCompleted()
encLM.dispatchThreads(gridLM, threadsPerThreadgroup: tgLM)
encLM.endEncoding()
// Logits scaling and softcapping (batch)
if embedWeight.groupSize == 32 {
let logitsScale = Float(30.0 / 116.23 / sqrt(Float(hiddenSize)))
// Use eltwise_scale for batch scaling
let pso = try engine.pipeline(named: "eltwise_scale")
let enc = layerCmdBuf.makeComputeCommandEncoder()!
enc.setComputePipelineState(pso)
enc.setBuffer(context.batchOutputBuffer, offset: 0, index: 0)
var ls = logitsScale
enc.setBytes(&ls, length: 4, index: 1)
var total = UInt32(batchSize * vocabSize)
enc.setBytes(&total, length: 4, index: 2)
let tg = MTLSize(width: 256, height: 1, depth: 1)
let grid = MTLSize(width: batchSize * vocabSize, height: 1, depth: 1)
enc.dispatchThreads(grid, threadsPerThreadgroup: tg)
enc.endEncoding()
}
// Softcapping (skip if kernel not found)
if let cap = finalLogitSoftcapping {
// Try to use tanh_scale kernel
+10 -6
View File
@@ -366,9 +366,8 @@ func quantizedMatmul(engine: MarkBaseEngine, cmdBuf: MTLCommandBuffer,
weights: QuantizedWeights,
output: MTLBuffer) throws {
// Select kernel based on quantization bits
let kernelName = weights.bits == 8 ? "quantized_matmul_8bit" : "quantized_matmul"
// TEMPORARILY USE FALLBACK KERNEL FOR TESTING
if false, let pso = try? engine.pipeline(named: kernelName) {
let kernelName = weights.bits == 8 ? "quantized_matmul_simd_8bit" : "quantized_matmul"
if let pso = try? engine.pipeline(named: kernelName) {
let enc = cmdBuf.makeComputeCommandEncoder()!
enc.setComputePipelineState(pso)
enc.setBuffer(input, offset: 0, index: 0)
@@ -868,7 +867,7 @@ func quantizedMatmulExpert(engine: MarkBaseEngine, cmdBuf: MTLCommandBuffer,
enc.setBytes(&inDim, length: MemoryLayout<UInt32>.size, index: 5)
var outDim = UInt32(expert.expertOutDim)
enc.setBytes(&outDim, length: MemoryLayout<UInt32>.size, index: 6)
var groupSize = UInt32(expert.expertInDim / 64)
var groupSize = UInt32(expert.expertInDim / expert.numGroups)
enc.setBytes(&groupSize, length: MemoryLayout<UInt32>.size, index: 7)
let tg = engine.threadgroupSize1D(fallbackPSO, count: expert.expertOutDim)
enc.dispatchThreads(MTLSize(width: expert.expertOutDim, height: 1, depth: 1),
@@ -922,7 +921,7 @@ func quantizedMatmulExpert(engine: MarkBaseEngine, cmdBuf: MTLCommandBuffer,
enc.setBytes(&inDim, length: MemoryLayout<UInt32>.size, index: 8)
var outDim = UInt32(gate.expertOutDim)
enc.setBytes(&outDim, length: MemoryLayout<UInt32>.size, index: 9)
var groupSize = UInt32(gate.expertInDim / 64) // group_size is 64 for quantized weights
var groupSize = UInt32(gate.expertInDim / gate.numGroups)
enc.setBytes(&groupSize, length: MemoryLayout<UInt32>.size, index: 10)
let count = gate.expertOutDim
let tg = engine.threadgroupSize1D(pso, count: count)
@@ -977,6 +976,10 @@ func quantizedMatmulExpert(engine: MarkBaseEngine, cmdBuf: MTLCommandBuffer,
gate: MoEExpertGroup, up: MoEExpertGroup, down: MoEExpertGroup,
accum: MTLBuffer) throws -> Bool {
guard let pso = try? engine.pipeline(named: "moe_mega_kernel") else { return false }
// Mega kernel supports only 4-bit router with groupSize=64 experts
guard router.bits == 4 else { return false }
let expertGroupSize = gate.expertInDim / gate.numGroups
guard expertGroupSize == 64 else { return false }
let enc = cmdBuf.makeComputeCommandEncoder()!
enc.setComputePipelineState(pso)
enc.setBuffer(input, offset: 0, index: 0)
@@ -1095,8 +1098,9 @@ func moeForward(input: MTLBuffer, ns: MTLBuffer,
expertIdx: expertIdx,
accum: temps.h, weight: weight)
}
}
// Step 5: Residual: input += moe_output (temps.h) scaled by layerScalar
if layerScalar != 1.0 {
try eltwiseAddScaled(engine: engine, cmdBuf: cmdBuf,
@@ -343,8 +343,8 @@ kernel void quantized_matmul_simd(
uint packedBase = outRow * (inDim / 8) + g * (groupSize / 8);
uint xBase = g * groupSize;
// Process 4 uint32 per iteration (32 nibbles) — half the loop count
for (uint p = 0; p < 8; p += 4) {
// Process 4 uint32 per iteration (32 nibbles) — half the loop count
for (uint p = 0; p < groupSize / 8; p += 4) {
// Vectorized uint4 load (reduces load instructions)
device uint4 *packedPtr = (device uint4*)(&w[packedBase + p]);
uint4 packed = *packedPtr;
@@ -510,7 +510,7 @@ kernel void quantized_matmul_gate_up_down(
uint wBase = gid * packedPerIn + g * (groupSize / 8);
uint xBase = g * groupSize;
for (uint p = 0; p < 8; p += 4) {
for (uint p = 0; p < groupSize / 8; p += 4) {
device uint4 *gPtr = (device uint4*)(&w_gate[wBase + p]);
device uint4 *uPtr = (device uint4*)(&w_up[wBase + p]);
uint4 gP = *gPtr;
@@ -588,7 +588,7 @@ kernel void quantized_matmul_gate_up_down(
uint wBase = gid * packedPerOut + g * (groupSize / 8);
uint iBase = g * groupSize;
for (uint p = 0; p < 8; p += 4) {
for (uint p = 0; p < groupSize / 8; p += 4) {
device uint4 *wPtr = (device uint4*)(&w_down[wBase + p]);
uint4 packed = *wPtr;
@@ -1123,7 +1123,7 @@ kernel void quantized_matmul_gate_up_opt(
uint wBase = gid * packedPerOut + g * (groupSize / 8);
uint xBase = g * groupSize;
for (uint p = 0; p < 8; p += 4) {
for (uint p = 0; p < groupSize / 8; p += 4) {
device uint4 *gPtr = (device uint4*)(&w_gate[wBase + p]);
device uint4 *uPtr = (device uint4*)(&w_up[wBase + p]);
uint4 gP = *gPtr;
+42 -55
View File
@@ -291,30 +291,7 @@ readers = readersDict
// Handle optional missing scales/biases (non-quantized embedding)
if let eg = embedGroup {
print(" ✓ embed_tokens loaded")
// Check if scales need normalization for custom quantization
// For groupSize=32 models, scales are ~3000x larger than standard
// Need to divide by hiddenSize to get correct values
if eg.groupSize == 32 && eg.inDim == hiddenSize {
print(" ⚠ Detected groupSize=32 custom quantization, normalizing scales...")
let scaleCorrection = Float(hiddenSize)
let pso = try engine.pipeline(named: "eltwise_scale")
let cmdBuf = engine.commandQueue.makeCommandBuffer()!
let enc = cmdBuf.makeComputeCommandEncoder()!
enc.setComputePipelineState(pso)
enc.setBuffer(eg.scales, offset: 0, index: 0)
var s = 1.0 / scaleCorrection
enc.setBytes(&s, length: MemoryLayout<Float>.size, index: 1)
let count = eg.scales.length / MemoryLayout<Float>.stride
var N = UInt32(count)
enc.setBytes(&N, length: MemoryLayout<UInt32>.size, index: 2)
let tg = engine.threadgroupSize1D(pso, count: count)
enc.dispatchThreads(MTLSize(width: count, height: 1, depth: 1),
threadsPerThreadgroup: tg)
enc.endEncoding()
cmdBuf.commit()
cmdBuf.waitUntilCompleted()
print(" ✓ Scales normalized (divided by \(scaleCorrection))")
}
// Note: groupSize=32 scale normalization now done in quantizedGroup
self.embedWeight = eg
} else {
// Non-quantized: create dummy quantized wrapper (all 0 scales=1.0, biases=0.0)
@@ -547,19 +524,31 @@ readers = readersDict
let sName = "\(fullName).scales"
let bName = "\(fullName).biases"
if let wData = preloadedDataCache[wName], let sData = preloadedDataCache[sName] {
let bData = preloadedDataCache[bName]
if let wData = preloadedDataCache[wName], let sData = preloadedDataCache[sName], fullName.contains("embed") == false {
let wDesc = allTensors.first(where: { $0.name == wName })
let sDesc = allTensors.first(where: { $0.name == sName })
let wShape = wDesc?.shape ?? []
let sShape = sDesc?.shape ?? []
let outDim = wShape.count > 0 ? wShape[0] : 0
let packedDim = wShape.count > 1 ? wShape[1] : 0
let inDim = packedDim * (bits == 4 ? 8 : 4)
let groupSize = (sShape.count > 1 && sShape[1] > 0) ? inDim / sShape[1] : 64
let bData = preloadedDataCache[bName]
let wBuf = wData.withUnsafeBytes { ptr in
engine.device.makeBuffer(bytes: ptr.baseAddress!, length: wData.count, options: .storageModeShared)
}
// Convert scales from BF16 to Float32 (safetensors stores as BF16)
let sBuf: MTLBuffer?
if sDesc?.dtype == .bf16 {
let sFloats = SafeTensorsReader.bf16ToFloat32(sData)
var sFloats = SafeTensorsReader.bf16ToFloat32(sData)
if groupSize == 32 {
for i in 0..<sFloats.count {
sFloats[i] = sFloats[i] / Float(inDim)
}
}
sBuf = engine.device.makeBuffer(
bytes: sFloats, length: sFloats.count * MemoryLayout<Float>.stride,
options: .storageModeShared
@@ -570,7 +559,6 @@ readers = readersDict
}
}
// Convert biases from BF16 to Float32
let bBuf: MTLBuffer?
if let bData = bData {
if let bDesc = allTensors.first(where: { $0.name == bName }), bDesc.dtype == .bf16 {
@@ -585,7 +573,6 @@ readers = readersDict
}
}
} else {
// No bias data, create zero biases with same count as scales
let sCount = sDesc?.shape.reduce(1, *) ?? 0
let bFloatsZero = [Float](repeating: 0.0, count: sCount)
bBuf = engine.device.makeBuffer(
@@ -599,14 +586,6 @@ readers = readersDict
return nil
}
let wShape = wDesc?.shape ?? []
let sShape = sDesc?.shape ?? []
let outDim = wShape[0]
let packedDim = wShape[1]
let inDim = packedDim * (bits == 4 ? 8 : 4)
let groupSize = (sShape.count > 1 && sShape[1] > 0) ? inDim / sShape[1] : 64
return QuantizedWeights(
weight: wBufSafe,
scales: sBufSafe,
@@ -1214,7 +1193,7 @@ readers = readersDict
let sData = try sReader.read(tensor: sDesc)
let bData = bReader != nil && bDesc != nil ? try bReader!.read(tensor: bDesc!) : nil
let sFloats = SafeTensorsReader.bf16ToFloat32(sData)
var sFloats = SafeTensorsReader.bf16ToFloat32(sData)
let bFloats = bData != nil ? SafeTensorsReader.bf16ToFloat32(bData!) : nil
let outDim = wDesc.shape[0]
@@ -1226,10 +1205,19 @@ readers = readersDict
let numGroups = sDesc.shape[1]
let groupSize = inDim / numGroups
// Normalize scales for groupSize=32 custom quantization
// These models store scales inflated by hiddenSize factor
if groupSize == 32 {
for i in 0..<sFloats.count {
sFloats[i] = sFloats[i] / Float(inDim)
}
}
guard let wBuf = device.makeBuffer(
bytes: (wData as NSData).bytes, length: wData.count,
options: .storageModeShared
) else { return nil }
guard let sBuf = device.makeBuffer(
bytes: sFloats, length: sFloats.count * MemoryLayout<Float>.stride,
options: .storageModeShared
@@ -1397,8 +1385,9 @@ readers = readersDict
// Scales: [numExperts, expertOutDim, numGroups] bf16
// Biases: same shape as scales
let groupSize = 64
let numGroups = expertInDim / groupSize
let numGroups = sDesc.shape.count > 2 ? sDesc.shape[2] : expertInDim / 64
let expertGroupSize = expertInDim / numGroups
// Get readers
let wReader: SafeTensorsReader
@@ -1427,9 +1416,16 @@ readers = readersDict
let bDesc = bReader != nil ? findTensor(bName, in: tensors) : nil
let bData: Data? = bDesc != nil ? try bReader!.read(tensor: bDesc!) : nil
let sFloats = SafeTensorsReader.bf16ToFloat32(sData)
var sFloats = SafeTensorsReader.bf16ToFloat32(sData)
let bFloats = bData != nil ? SafeTensorsReader.bf16ToFloat32(bData!) : nil
// Normalize scales for groupSize=32 custom quantization
if expertGroupSize == 32 {
for i in 0..<sFloats.count {
sFloats[i] = sFloats[i] / Float(expertInDim)
}
}
let valsPerU32 = 32 / bits
let inDimPacked = expertInDim / valsPerU32
@@ -1446,7 +1442,7 @@ readers = readersDict
bytes: (wData as NSData).bytes, length: wData.count,
options: .storageModeShared
) else { return nil }
guard let sBuf = device.makeBuffer(
bytes: sFloats, length: sFloats.count * MemoryLayout<Float>.stride,
options: .storageModeShared
@@ -1698,17 +1694,8 @@ readers = readersDict
// 5b. Logits scaling for custom quantization (groupSize=32)
// For groupSize=32 models, logits are ~200x larger than standard
// Need to scale by ~0.00486 to normalize to E4B-like range
if embedWeight.groupSize == 32 && embedWeight.inDim == hiddenSize {
// Total scaling: 1/sqrt(hidden_size) * (30/116) 0.00486
// This brings logits to similar range as E4B
let logitsScale = Float(30.0 / 116.23 / sqrt(Float(hiddenSize)))
if position == 0 {
print(" ⚠ Scaling logits by \(logitsScale) for groupSize=32 custom quantization")
fflush(stdout)
}
try scaleBuffer(logitsBuffer, scale: logitsScale, count: vocabSize)
}
// NOTE: groupSize=32 scale normalization now done in quantizedGroup/loadExpertGroup
// No additional logit scaling needed here
// 6. Logit softcapping
if let cap = finalLogitSoftcapping {
-6
View File
@@ -110,12 +110,6 @@ extension E4BModel {
try quantizedMatmulOptimized(input: lmInput, weights: embedWeight,
output: logitsBuffer, cmdBuf: cmdBuf3)
// Logits scaling (if needed)
if embedWeight.groupSize == 32 && embedWeight.inDim == hiddenSize {
let logitsScale = Float(30.0 / 116.23 / sqrt(Float(hiddenSize)))
try scaleBufferOptimized(logitsBuffer, scale: logitsScale, count: vocabSize, cmdBuf: cmdBuf3)
}
// Logit softcapping
if let cap = finalLogitSoftcapping {
try applyLogitSoftcappingOptimized(buffer: logitsBuffer, cap: cap,
@@ -77,6 +77,8 @@ public final class VisionTower {
enc.setBytes(&inD, length: MemoryLayout<UInt32>.size, index: 5)
var outD = UInt32(weights.outDim)
enc.setBytes(&outD, length: MemoryLayout<UInt32>.size, index: 6)
var groupSize = UInt32(weights.groupSize)
enc.setBytes(&groupSize, length: MemoryLayout<UInt32>.size, index: 7)
let grid = MTLSize(width: weights.outDim * seqLen, height: 1, depth: 1)
let tg = engine.threadgroupSize1D(pso, count: max(weights.outDim, seqLen))
+11 -11
View File
@@ -236,7 +236,7 @@ public final class VisionTower12B {
output: MTLBuffer,
cmdBuf: MTLCommandBuffer
) throws {
let pso = try engine.pipeline(named: "quantized_matmul")
let pso = try engine.pipeline(named: "quantized_matmul_seq")
let enc = cmdBuf.makeComputeCommandEncoder()!
enc.setComputePipelineState(pso)
@@ -244,22 +244,22 @@ public final class VisionTower12B {
enc.setBuffer(weight, offset: 0, index: 1)
enc.setBuffer(scales, offset: 0, index: 2)
enc.setBuffer(biases, offset: 0, index: 3)
enc.setBuffer(output, offset: 0, index: 4)
enc.setBuffer(bias ?? biases, offset: 0, index: 4)
enc.setBuffer(output, offset: 0, index: 5)
var inD = UInt32(inDim)
enc.setBytes(&inD, length: MemoryLayout<UInt32>.size, index: 5)
enc.setBytes(&inD, length: 4, index: 6)
var outD = UInt32(outDim)
enc.setBytes(&outD, length: MemoryLayout<UInt32>.size, index: 6)
enc.setBytes(&outD, length: 4, index: 7)
var hasBias = bias != nil
enc.setBytes(&hasBias, length: 1, index: 8)
var sl = UInt32(seqLen)
enc.setBytes(&sl, length: 4, index: 9)
let grid = MTLSize(width: outDim * seqLen, height: 1, depth: 1)
let tg = engine.threadgroupSize1D(pso, count: max(outDim, seqLen))
let grid = MTLSize(width: outDim, height: seqLen, depth: 1)
let tg = engine.threadgroupSize2D(pso, grid: (outDim, seqLen))
enc.dispatchThreads(grid, threadsPerThreadgroup: tg)
enc.endEncoding()
// Add unquantized bias if present
if let b = bias {
try eltwiseAdd(input: output, bias: b, seqLen: seqLen, dim: outDim, cmdBuf: cmdBuf)
}
}
private func rmsNormSeq(
+150
View File
@@ -0,0 +1,150 @@
import XCTest
@testable import MarkBase
final class LongContext12BTest: XCTestCase {
var engine: MarkBaseEngine!
var model: E4BModel!
let modelDir = "/Users/accusys/MarkBaseEngine/models/gemma-4-12b-it-4bit"
let maxCtx = 2048
override func setUp() {
super.setUp()
guard FileManager.default.fileExists(atPath: modelDir + "/model.safetensors.index.json") else {
return
}
engine = try? MarkBaseEngine(autoCompile: true)
model = try? E4BModel(modelDir: modelDir, engine: engine, maxContextLength: maxCtx)
}
func testLongContext256Tokens() throws {
try XCTSkipIf(model == nil, "12B model not found")
let promptLength = 256
var tokens = [Int]()
for i in 0..<promptLength {
tokens.append(100 + (i % 1000))
}
for (pos, tokenId) in tokens.enumerated() {
let logits = try model.forward(tokenId: tokenId, position: pos)
if pos == 0 || pos == promptLength - 1 {
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "NaN at pos=\(pos)")
}
if pos % 64 == 0 {
let sample = logits.prefix(5)
let nanCount = logits.filter { $0.isNaN }.count
print(" pos=\(pos): logits[0..5]=\(sample) NaN=\(nanCount)")
}
}
var genTokens = tokens
for i in 0..<5 {
let logits = try model.forward(tokenId: genTokens.last ?? 0, position: genTokens.count - 1)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "NaN at gen step \(i)")
var maxIdx = 0
var maxVal = logits[0]
for j in 1..<logits.count {
if logits[j] > maxVal { maxVal = logits[j]; maxIdx = j }
}
genTokens.append(maxIdx)
print(" gen[\(i)]: token=\(maxIdx) logit=\(maxVal)")
}
}
func testFullContext2048Tokens() throws {
try XCTSkipIf(model == nil, "12B model not found")
let promptLength = maxCtx
var tokens = [Int]()
for i in 0..<promptLength {
tokens.append(100 + (i % 1000))
}
var lastLogits: [Float]?
for (pos, tokenId) in tokens.enumerated() {
let logits = try model.forward(tokenId: tokenId, position: pos)
if pos == 0 || pos == promptLength - 1 || pos % 256 == 0 {
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "NaN at pos=\(pos)")
print(" pos=\(pos): logits[0..3]=\(logits.prefix(3)) NaN=\(nanCount)")
}
lastLogits = logits
}
var genTokens = tokens
for i in 0..<3 {
let logits = try model.forward(tokenId: genTokens.last ?? 0, position: genTokens.count - 1)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "NaN at gen step \(i)")
var maxIdx = 0
var maxVal = logits[0]
for j in 1..<logits.count {
if logits[j] > maxVal { maxVal = logits[j]; maxIdx = j }
}
genTokens.append(maxIdx)
print(" gen[\(i)]: token=\(maxIdx) logit=\(maxVal)")
}
}
func testRepeatedTokensFullContext() throws {
try XCTSkipIf(model == nil, "12B model not found")
let promptLength = maxCtx / 2
for (pos, _) in (0..<promptLength).enumerated() {
let logits = try model.forward(tokenId: 100, position: pos)
if pos == 0 || pos == promptLength - 1 || pos % 256 == 0 {
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "NaN at pos=\(pos) (repeated tokens)")
print(" repeat pos=\(pos): logits[0..3]=\(logits.prefix(3)) NaN=\(nanCount)")
}
}
}
func testTokenIdBoundaries() throws {
try XCTSkipIf(model == nil, "12B model not found")
let edgeTokens = [0, 1, 2, model.vocabSize - 1]
for (pos, tokenId) in edgeTokens.enumerated() {
let logits = try model.forward(tokenId: tokenId, position: pos)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "NaN for tokenId=\(tokenId)")
print(" edge token=\(tokenId): logits[0..3]=\(logits.prefix(3)) NaN=\(nanCount)")
}
}
func testLongContext1024Tokens() throws {
try XCTSkipIf(model == nil, "12B model not found")
let promptLength = 1024
var tokens = [Int]()
for i in 0..<promptLength {
tokens.append(100 + (i % 1000))
}
for (pos, tokenId) in tokens.enumerated() {
let logits = try model.forward(tokenId: tokenId, position: pos)
if pos == 0 || pos == promptLength - 1 || pos % 128 == 0 {
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "NaN at pos=\(pos)")
print(" pos=\(pos): logits[0..3]=\(logits.prefix(3)) NaN=\(nanCount)")
}
}
var genTokens = tokens
for i in 0..<5 {
let logits = try model.forward(tokenId: genTokens.last ?? 0, position: genTokens.count - 1)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "NaN at gen step \(i)")
var maxIdx = 0
var maxVal = logits[0]
for j in 1..<logits.count {
if logits[j] > maxVal { maxVal = logits[j]; maxIdx = j }
}
genTokens.append(maxIdx)
print(" gen[\(i)]: token=\(maxIdx) logit=\(maxVal)")
}
}
}
+55
View File
@@ -0,0 +1,55 @@
import XCTest
@testable import MarkBase
final class Model12BTest: XCTestCase {
var engine: MarkBaseEngine!
var model: E4BModel!
let modelDir = "/Users/accusys/MarkBaseEngine/models/gemma-4-12b-it-4bit"
let maxCtx = 64
override func setUp() {
super.setUp()
guard FileManager.default.fileExists(atPath: modelDir + "/model.safetensors.index.json") else {
return
}
engine = try? MarkBaseEngine(autoCompile: true)
model = try? E4BModel(modelDir: modelDir, engine: engine, maxContextLength: maxCtx)
}
func testModelLoads() throws {
try XCTSkipIf(model == nil, "gemma-4-12b-it-4bit model not found")
XCTAssertNotNil(model)
XCTAssertEqual(model.hiddenSize, 3840)
XCTAssertEqual(model.numHiddenLayers, 48)
XCTAssertEqual(model.vocabSize, 262144)
}
func testBosTokenLogitsNoNaN() throws {
try XCTSkipIf(model == nil, "gemma-4-12b-it-4bit model not found")
let logits = try model.forward(tokenId: 2, position: 0)
XCTAssertEqual(logits.count, model.vocabSize)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN values in logits")
}
func testLogitSoftcapping() throws {
try XCTSkipIf(model == nil, "gemma-4-12b-it-4bit model not found")
let logits = try model.forward(tokenId: 2, position: 0)
let softcap: Float = 30.0
for logit in logits {
XCTAssertLessThanOrEqual(abs(logit), softcap + 0.1,
"Logit \(logit) exceeds softcap \(softcap)")
}
}
func testMultipleTokensProduceDifferentLogits() throws {
try XCTSkipIf(model == nil, "gemma-4-12b-it-4bit model not found")
let tokens = [2, 100, 1000]
for (pos, tokenId) in tokens.enumerated() {
let logits = try model.forward(tokenId: tokenId, position: pos)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "NaN for token=\(tokenId) pos=\(pos)")
}
}
}
+65
View File
@@ -0,0 +1,65 @@
import XCTest
@testable import MarkBase
final class Model26BTest: XCTestCase {
var engine: MarkBaseEngine!
var model: E4BModel!
let modelDir = "/Users/accusys/MarkBaseEngine/models/gemma-4-26b-standard"
let maxCtx = 128
override func setUp() {
super.setUp()
guard FileManager.default.fileExists(atPath: modelDir + "/model.safetensors") else {
return
}
engine = try? MarkBaseEngine(autoCompile: true)
model = try? E4BModel(modelDir: modelDir, engine: engine, maxContextLength: maxCtx)
}
func testModelLoads() throws {
try XCTSkipIf(model == nil, "gemma-4-26b-standard model not found")
XCTAssertNotNil(model)
XCTAssertEqual(model.hiddenSize, 2816)
XCTAssertEqual(model.numHiddenLayers, 30)
XCTAssertEqual(model.vocabSize, 262144)
}
func testBosTokenLogitsNoNaN() throws {
try XCTSkipIf(model == nil, "gemma-4-26b-standard model not found")
let logits = try model.forward(tokenId: 2, position: 0)
XCTAssertEqual(logits.count, model.vocabSize)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN values in logits")
}
func testLogitsNotAllSaturated() throws {
try XCTSkipIf(model == nil, "gemma-4-26b-standard model not found")
let logits = try model.forward(tokenId: 2, position: 0)
// 26B has no softcapping, so logits should have variation
let uniqueCount = Set(logits.map { round($0 * 10) / 10 }).count
XCTAssertGreaterThan(uniqueCount, 100, "Logits should have meaningful variation")
}
func testLogitsReasonableRange() throws {
try XCTSkipIf(model == nil, "gemma-4-26b-standard model not found")
let logits = try model.forward(tokenId: 2, position: 0)
let maxVal = logits.max() ?? 0
let minVal = logits.min() ?? 0
XCTAssertGreaterThan(maxVal, -100)
XCTAssertLessThan(maxVal, 100000)
XCTAssertGreaterThan(minVal, -100000)
XCTAssertLessThan(minVal, 25000)
XCTAssertGreaterThan(maxVal, minVal, "Logits should have dynamic range")
}
func testMultipleTokensProduceDifferentLogits() throws {
try XCTSkipIf(model == nil, "gemma-4-26b-standard model not found")
let tokens = [2, 100, 1000, 10000]
for (pos, tokenId) in tokens.enumerated() {
let logits = try model.forward(tokenId: tokenId, position: pos)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "NaN for token=\(tokenId) pos=\(pos)")
}
}
}
+55
View File
@@ -0,0 +1,55 @@
import XCTest
@testable import MarkBase
final class Model31BTest: XCTestCase {
var engine: MarkBaseEngine!
var model: E4BModel!
let modelDir = "/Users/accusys/MarkBaseEngine/models/gemma-4-31b-it-4bit"
let maxCtx = 64
override func setUp() {
super.setUp()
guard FileManager.default.fileExists(atPath: modelDir + "/model.safetensors.index.json") else {
return
}
engine = try? MarkBaseEngine(autoCompile: true)
model = try? E4BModel(modelDir: modelDir, engine: engine, maxContextLength: maxCtx)
}
func testModelLoads() throws {
try XCTSkipIf(model == nil, "gemma-4-31b-it-4bit model not found")
XCTAssertNotNil(model)
XCTAssertEqual(model.hiddenSize, 5376)
XCTAssertEqual(model.numHiddenLayers, 60)
XCTAssertEqual(model.vocabSize, 262144)
}
func testBosTokenLogitsNoNaN() throws {
try XCTSkipIf(model == nil, "gemma-4-31b-it-4bit model not found")
let logits = try model.forward(tokenId: 2, position: 0)
XCTAssertEqual(logits.count, model.vocabSize)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN values in logits")
}
func testLogitSoftcapping() throws {
try XCTSkipIf(model == nil, "gemma-4-31b-it-4bit model not found")
let logits = try model.forward(tokenId: 2, position: 0)
let softcap: Float = 30.0
for logit in logits {
XCTAssertLessThanOrEqual(abs(logit), softcap + 0.1,
"Logit \(logit) exceeds softcap \(softcap)")
}
}
func testMultipleTokensProduceDifferentLogits() throws {
try XCTSkipIf(model == nil, "gemma-4-31b-it-4bit model not found")
let tokens = [2, 100, 1000]
for (pos, tokenId) in tokens.enumerated() {
let logits = try model.forward(tokenId: tokenId, position: pos)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "NaN for token=\(tokenId) pos=\(pos)")
}
}
}
+122
View File
@@ -0,0 +1,122 @@
import XCTest
@testable import MarkBase
final class ModelTest: XCTestCase {
var engine: MarkBaseEngine!
var model: E4BModel!
let modelDir = "/Users/accusys/MarkBaseEngine/models/E4B-MarkBase"
let maxCtx = 256
override func setUp() {
super.setUp()
guard FileManager.default.fileExists(atPath: modelDir + "/model.safetensors") else {
return
}
engine = try? MarkBaseEngine(autoCompile: true)
model = try? E4BModel(modelDir: modelDir, engine: engine, maxContextLength: maxCtx)
}
// MARK: - Model Loading
func testModelLoads() throws {
try XCTSkipIf(model == nil, "E4B-MarkBase model not found")
XCTAssertNotNil(model)
XCTAssertEqual(model.vocabSize, 262144)
XCTAssertEqual(model.hiddenSize, 2560)
XCTAssertEqual(model.numHiddenLayers, 42)
}
// MARK: - Forward Pass
func testBosTokenLogits() throws {
try XCTSkipIf(model == nil, "E4B-MarkBase model not found")
let logits = try model.forward(tokenId: 2, position: 0)
XCTAssertEqual(logits.count, model.vocabSize)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN values in logits")
XCTAssertGreaterThan(logits.max() ?? -Float.infinity, -50)
XCTAssertLessThan(logits.max() ?? Float.infinity, 50)
XCTAssertGreaterThan(logits.min() ?? -Float.infinity, -50)
XCTAssertLessThan(logits.min() ?? Float.infinity, 50)
}
func testLogitSoftcapping() throws {
try XCTSkipIf(model == nil, "E4B-MarkBase model not found")
let logits = try model.forward(tokenId: 2, position: 0)
let softcap: Float = 30.0
for logit in logits {
XCTAssertLessThanOrEqual(abs(logit), softcap + 1e-3,
"Logit \(logit) exceeds softcap \(softcap)")
}
}
func testMultipleTokensDeterministic() throws {
try XCTSkipIf(model == nil, "E4B-MarkBase model not found")
let tokens = [2, 1024, 2048, 4096]
var allLogits: [[Float]] = []
for (pos, tokenId) in tokens.enumerated() {
let logits = try model.forward(tokenId: tokenId, position: pos)
allLogits.append(logits)
}
XCTAssertEqual(allLogits.count, tokens.count)
for logits in allLogits {
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN values in logits")
}
}
func testDeterministicOutput() throws {
try XCTSkipIf(model == nil, "E4B-MarkBase model not found")
let r1 = try model.forward(tokenId: 99, position: 0)
let r2 = try model.forward(tokenId: 99, position: 0)
XCTAssertEqual(r1.count, r2.count)
let differences = zip(r1, r2).map { abs($0 - $1) }
let maxDiff = differences.max() ?? 0
let avgDiff = differences.reduce(0, +) / Float(differences.count)
XCTAssertLessThan(maxDiff, 5.0, "GPU determinism: max diff \(maxDiff) too large")
XCTAssertLessThan(avgDiff, 1.0, "GPU determinism: avg diff \(avgDiff) too large")
}
func testKVCacheIncrements() throws {
try XCTSkipIf(model == nil, "E4B-MarkBase model not found")
let r0 = try model.forward(tokenId: 2, position: 0)
let r1 = try model.forward(tokenId: 1024, position: 1)
let r2 = try model.forward(tokenId: 2048, position: 2)
XCTAssertFalse(r0.elementsEqual(r1))
XCTAssertFalse(r1.elementsEqual(r2))
for logits in [r0, r1, r2] {
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN values in logits")
}
}
func testDifferentTokensDifferentLogits() throws {
try XCTSkipIf(model == nil, "E4B-MarkBase model not found")
let tokenA: [Float] = try model.forward(tokenId: 100, position: 0)
let tokenB: [Float] = try model.forward(tokenId: 200, position: 0)
XCTAssertNotEqual(tokenA, tokenB)
}
func testRandomTokenId() throws {
try XCTSkipIf(model == nil, "E4B-MarkBase model not found")
for tokenId in [0, 1, 100, 1000, 10000, 100000] {
let logits = try model.forward(tokenId: tokenId, position: 0)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN for tokenId=\(tokenId)")
XCTAssertEqual(logits.count, model.vocabSize)
}
}
// MARK: - Batched context test
func testFullContextForward() throws {
try XCTSkipIf(model == nil, "E4B-MarkBase model not found")
let promptTokens = [2] + Array(repeating: 1024, count: 32)
for (pos, tokenId) in promptTokens.enumerated() {
let logits = try model.forward(tokenId: tokenId, position: pos)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "NaN at position \(pos)")
}
}
}
+143
View File
@@ -0,0 +1,143 @@
import XCTest
@testable import MarkBase
final class Multimodal12BTest: XCTestCase {
var engine: MarkBaseEngine!
var multimodal: MultimodalModel!
let modelDir = "/Users/accusys/MarkBaseEngine/models/gemma-4-12b-it-4bit"
let maxCtx = 64
override func setUp() {
super.setUp()
guard FileManager.default.fileExists(atPath: modelDir + "/model.safetensors.index.json") else {
return
}
engine = try? MarkBaseEngine(autoCompile: true)
multimodal = try? MultimodalModel(modelDir: modelDir, engine: engine, maxContextLength: maxCtx)
}
func testModelLoads() throws {
try XCTSkipIf(multimodal == nil, "12B model not found")
XCTAssertEqual(multimodal!.textModel.hiddenSize, 3840)
XCTAssertEqual(multimodal!.textModel.numHiddenLayers, 48)
XCTAssertNotNil(multimodal!.visionTower, "VisionTower12B should load")
XCTAssertNotNil(multimodal!.audioTower, "AudioTower12B should load")
}
func testVisionTowerForward() throws {
try XCTSkipIf(multimodal?.visionTower == nil, "Vision tower not loaded")
let tower = multimodal!.visionTower!
let numPatches = 8
let patchDim = tower.patchDim
var patches = [Float](repeating: 0, count: numPatches * patchDim)
for i in 0..<patches.count { patches[i] = Float.random(in: -0.5...0.5) }
let inputBuf = engine.device.makeBuffer(bytes: patches, length: patches.count * 4)!
let outBuf = engine.device.makeBuffer(length: numPatches * tower.hiddenDim * 4)!
try tower.forward(patchEmbeddings: inputBuf, numPatches: numPatches, outputBuffer: outBuf)
let out = engine.readFloats(from: outBuf, count: numPatches * tower.hiddenDim)
let nanCount = out.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN in vision output")
let maxAbs = out.map { abs($0) }.max() ?? 0
XCTAssertLessThan(maxAbs, 1e6, "Vision output magnitude should be reasonable")
XCTAssertGreaterThan(maxAbs, 0, "Vision output should have non-zero values")
}
func testAudioTowerForward() throws {
try XCTSkipIf(multimodal?.audioTower == nil, "Audio tower not loaded")
let tower = multimodal!.audioTower!
let numFrames = 16
var features = [Float](repeating: 0, count: numFrames * 640)
for i in 0..<features.count { features[i] = Float.random(in: -1.0...1.0) }
let inputBuf = engine.device.makeBuffer(bytes: features, length: features.count * 4)!
let outBuf = engine.device.makeBuffer(length: numFrames * tower.outDim * 4)!
try tower.forward(inputBuffer: inputBuf, seqLen: numFrames, outputBuffer: outBuf)
let out = engine.readFloats(from: outBuf, count: numFrames * tower.outDim)
let nanCount = out.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN in audio output")
}
func testTextBackboneForwardAfterVisionInjection() throws {
try XCTSkipIf(multimodal?.visionTower == nil, "Vision tower not loaded")
let tower = multimodal!.visionTower!
let numPatches = 4
let patchDim = tower.patchDim
var patches = [Float](repeating: 0, count: numPatches * patchDim)
for i in 0..<patches.count { patches[i] = Float.random(in: -0.5...0.5) }
let inputBuf = engine.device.makeBuffer(bytes: patches, length: patches.count * 4)!
let visionOut = engine.device.makeBuffer(length: numPatches * 3840 * 4)!
try tower.forward(patchEmbeddings: inputBuf, numPatches: numPatches, outputBuffer: visionOut)
for i in 0..<numPatches {
let offset = i * 3840 * 4
let logits = try multimodal!.textModel.forwardFromHidden(
hiddenBuffer: visionOut, offset: offset, position: i)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN after vision injection pos=\(i)")
}
}
func testTextBackboneForwardAfterAudioInjection() throws {
try XCTSkipIf(multimodal?.audioTower == nil, "Audio tower not loaded")
let tower = multimodal!.audioTower!
let numFrames = 4
var features = [Float](repeating: 0, count: numFrames * 640)
for i in 0..<features.count { features[i] = Float.random(in: -1.0...1.0) }
let inputBuf = engine.device.makeBuffer(bytes: features, length: features.count * 4)!
let audioOut = engine.device.makeBuffer(length: numFrames * 3840 * 4)!
try tower.forward(inputBuffer: inputBuf, seqLen: numFrames, outputBuffer: audioOut)
for i in 0..<numFrames {
let offset = i * 3840 * 4
let logits = try multimodal!.textModel.forwardFromHidden(
hiddenBuffer: audioOut, offset: offset, position: i)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN after audio injection pos=\(i)")
}
}
func testMultimodalInferenceGenerate() throws {
try XCTSkipIf(multimodal?.visionTower == nil, "Vision tower not loaded")
let inference = try MultimodalInference(model: multimodal!)
let numPatches = 8
let patchDim = multimodal!.visionTower!.patchDim
var patches = [Float](repeating: 0, count: numPatches * patchDim)
for i in 0..<patches.count { patches[i] = Float.random(in: -0.5...0.5) }
let audioDim = 640
var audioFeatures = [[Float]]()
for _ in 0..<32 {
var frame = [Float](repeating: 0, count: audioDim)
for j in 0..<audioDim { frame[j] = Float.random(in: -1.0...1.0) }
audioFeatures.append(frame)
}
let result = try inference.generate(
textTokens: [2],
audioFeatures: audioFeatures,
imagePatches: patches,
numImagePatches: numPatches,
maxTokens: 5
)
XCTAssertGreaterThan(result.count, 1, "Should generate at least one token")
for token in result {
XCTAssertGreaterThanOrEqual(token, 0, "Token ID should be non-negative")
XCTAssertLessThan(token, multimodal!.textModel.vocabSize, "Token ID should be within vocab range")
}
}
}
+118
View File
@@ -0,0 +1,118 @@
import XCTest
@testable import MarkBase
final class MultimodalE4BTest: XCTestCase {
var engine: MarkBaseEngine!
var multimodal: MultimodalModel!
let modelDir = "/Users/accusys/MarkBaseEngine/models/E4B-MarkBase"
let maxCtx = 64
override func setUp() {
super.setUp()
guard FileManager.default.fileExists(atPath: modelDir + "/model.safetensors") else {
return
}
engine = try? MarkBaseEngine(autoCompile: true)
multimodal = try? MultimodalModel(modelDir: modelDir, engine: engine, maxContextLength: maxCtx)
}
func testModelLoads() throws {
try XCTSkipIf(multimodal == nil, "E4B-MarkBase not found")
XCTAssertEqual(multimodal!.textModel.hiddenSize, 2560)
XCTAssertNotNil(multimodal!.visionTowerFull, "Full VisionTower should load")
XCTAssertNotNil(multimodal!.audioTowerFull, "Full AudioTower should load")
}
func testVisionTowerForward() throws {
try XCTSkipIf(multimodal?.visionTowerFull == nil, "Vision tower not loaded")
let tower = multimodal!.visionTowerFull!
let numPatches = 4
let patchDim = 768
let hs = tower.config.hiddenSize // 768
var patches = [Float](repeating: 0, count: numPatches * patchDim)
for i in 0..<patches.count { patches[i] = Float.random(in: -0.5...0.5) }
let inputBuf = engine.device.makeBuffer(bytes: patches, length: patches.count * 4)!
let outBuf = engine.device.makeBuffer(length: numPatches * hs * 4)!
try tower.forward(patchEmbeddings: inputBuf, numPatches: numPatches, outputBuffer: outBuf)
let out = engine.readFloats(from: outBuf, count: numPatches * hs)
let nanCount = out.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN in vision output")
let maxAbs = out.map { abs($0) }.max() ?? 0
XCTAssertGreaterThan(maxAbs, 0, "Vision output should have non-zero values")
print(" vision: maxAbs=\(maxAbs)")
}
func testAudioTowerForward() throws {
try XCTSkipIf(multimodal?.audioTowerFull == nil, "Audio tower not loaded")
let tower = multimodal!.audioTowerFull!
let numFrames = 16
let audioDim = 128
var features = [Float](repeating: 0, count: numFrames * audioDim)
for i in 0..<features.count { features[i] = Float.random(in: -1.0...1.0) }
let inputBuf = engine.device.makeBuffer(bytes: features, length: features.count * 4)!
let hs = tower.config.outputProjDims
let outBuf = engine.device.makeBuffer(length: numFrames / 4 * hs * 4)!
try tower.forward(inputBuffer: inputBuf, seqLen: numFrames, outputBuffer: outBuf)
let out = engine.readFloats(from: outBuf, count: numFrames / 4 * hs)
let nanCount = out.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN in audio output")
let maxAbs = out.map { abs($0) }.max() ?? 0
XCTAssertGreaterThan(maxAbs, 0, "Audio output should have non-zero values")
print(" audio: maxAbs=\(maxAbs)")
}
func testTextBackboneForwardAfterVisionInjection() throws {
try XCTSkipIf(multimodal?.visionTowerFull == nil, "Vision tower not loaded")
let tower = multimodal!.visionTowerFull!
let numPatches = 4
let patchDim = 768
let hs = tower.config.hiddenSize
var patches = [Float](repeating: 0, count: numPatches * patchDim)
for i in 0..<patches.count { patches[i] = Float.random(in: -0.5...0.5) }
let inputBuf = engine.device.makeBuffer(bytes: patches, length: patches.count * 4)!
let visionOut = engine.device.makeBuffer(length: numPatches * multimodal!.textModel.hiddenSize * 4)!
try tower.forward(patchEmbeddings: inputBuf, numPatches: numPatches, outputBuffer: visionOut)
for i in 0..<numPatches {
let offset = i * multimodal!.textModel.hiddenSize * 4
let logits = try multimodal!.textModel.forwardFromHidden(
hiddenBuffer: visionOut, offset: offset, position: i)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN after vision injection pos=\(i)")
}
}
func testTextBackboneForwardAfterAudioInjection() throws {
try XCTSkipIf(multimodal?.audioTowerFull == nil, "Audio tower not loaded")
let tower = multimodal!.audioTowerFull!
let numFrames = 16
let audioDim = 128
var features = [Float](repeating: 0, count: numFrames * audioDim)
for i in 0..<features.count { features[i] = Float.random(in: -1.0...1.0) }
let inputBuf = engine.device.makeBuffer(bytes: features, length: features.count * 4)!
let hs = tower.config.outputProjDims
let audioOut = engine.device.makeBuffer(length: numFrames / 4 * hs * 4)!
try tower.forward(inputBuffer: inputBuf, seqLen: numFrames, outputBuffer: audioOut)
for i in 0..<min(4, numFrames / 4) {
let offset = i * hs * 4
let logits = try multimodal!.textModel.forwardFromHidden(
hiddenBuffer: audioOut, offset: offset, position: i)
let nanCount = logits.filter { $0.isNaN }.count
XCTAssertEqual(nanCount, 0, "No NaN after audio injection pos=\(i)")
}
}
}
+29 -5
View File
@@ -25,6 +25,30 @@
"model": null,
"timeout_seconds": 30,
"schedule": "always"
},
"01_Model/ModelTest.swift": {
"tier": 1,
"memory_gb": 6,
"gpu": true,
"model": "E4B-MarkBase",
"timeout_seconds": 180,
"schedule": "on_demand"
},
"01_Model/Model26BTest.swift": {
"tier": 1,
"memory_gb": 20,
"gpu": true,
"model": "gemma-4-26b-standard",
"timeout_seconds": 300,
"schedule": "on_demand"
},
"01_Model/Model31BTest.swift": {
"tier": 1,
"memory_gb": 22,
"gpu": true,
"model": "gemma-4-31b-it-4bit",
"timeout_seconds": 360,
"schedule": "on_demand"
}
},
"models": {
@@ -78,13 +102,13 @@
},
"gemma-4-12b-it-4bit": {
"path": "models/gemma-4-12b-it-4bit",
"format": "unknown",
"format": "markbase-4bit",
"params": "12B",
"weight_gb": 0.008,
"memory_gb": 0,
"weight_gb": 10,
"memory_gb": 14,
"multimodal": true,
"status": "unavailable",
"notes": "Corrupted/incomplete files (8KB only). Full 4-bit 12B needed."
"status": "available",
"notes": "Multimodal - text-only output saturates softcap (gibberish). Full model files (blobs) present."
},
"12B-it-MLX-8bit": {
"path": "models/12B-it-MLX-8bit",