v2: fix 26B activation explosion — normalize groupSize=32 scales, fix hardcoded loops
This commit is contained in:
@@ -366,9 +366,8 @@ func quantizedMatmul(engine: MarkBaseEngine, cmdBuf: MTLCommandBuffer,
|
|||||||
weights: QuantizedWeights,
|
weights: QuantizedWeights,
|
||||||
output: MTLBuffer) throws {
|
output: MTLBuffer) throws {
|
||||||
// Select kernel based on quantization bits
|
// Select kernel based on quantization bits
|
||||||
let kernelName = weights.bits == 8 ? "quantized_matmul_8bit" : "quantized_matmul"
|
let kernelName = weights.bits == 8 ? "quantized_matmul_simd_8bit" : "quantized_matmul"
|
||||||
// TEMPORARILY USE FALLBACK KERNEL FOR TESTING
|
if let pso = try? engine.pipeline(named: kernelName) {
|
||||||
if false, let pso = try? engine.pipeline(named: kernelName) {
|
|
||||||
let enc = cmdBuf.makeComputeCommandEncoder()!
|
let enc = cmdBuf.makeComputeCommandEncoder()!
|
||||||
enc.setComputePipelineState(pso)
|
enc.setComputePipelineState(pso)
|
||||||
enc.setBuffer(input, offset: 0, index: 0)
|
enc.setBuffer(input, offset: 0, index: 0)
|
||||||
@@ -868,7 +867,7 @@ func quantizedMatmulExpert(engine: MarkBaseEngine, cmdBuf: MTLCommandBuffer,
|
|||||||
enc.setBytes(&inDim, length: MemoryLayout<UInt32>.size, index: 5)
|
enc.setBytes(&inDim, length: MemoryLayout<UInt32>.size, index: 5)
|
||||||
var outDim = UInt32(expert.expertOutDim)
|
var outDim = UInt32(expert.expertOutDim)
|
||||||
enc.setBytes(&outDim, length: MemoryLayout<UInt32>.size, index: 6)
|
enc.setBytes(&outDim, length: MemoryLayout<UInt32>.size, index: 6)
|
||||||
var groupSize = UInt32(expert.expertInDim / 64)
|
var groupSize = UInt32(expert.expertInDim / expert.numGroups)
|
||||||
enc.setBytes(&groupSize, length: MemoryLayout<UInt32>.size, index: 7)
|
enc.setBytes(&groupSize, length: MemoryLayout<UInt32>.size, index: 7)
|
||||||
let tg = engine.threadgroupSize1D(fallbackPSO, count: expert.expertOutDim)
|
let tg = engine.threadgroupSize1D(fallbackPSO, count: expert.expertOutDim)
|
||||||
enc.dispatchThreads(MTLSize(width: expert.expertOutDim, height: 1, depth: 1),
|
enc.dispatchThreads(MTLSize(width: expert.expertOutDim, height: 1, depth: 1),
|
||||||
@@ -922,7 +921,7 @@ func quantizedMatmulExpert(engine: MarkBaseEngine, cmdBuf: MTLCommandBuffer,
|
|||||||
enc.setBytes(&inDim, length: MemoryLayout<UInt32>.size, index: 8)
|
enc.setBytes(&inDim, length: MemoryLayout<UInt32>.size, index: 8)
|
||||||
var outDim = UInt32(gate.expertOutDim)
|
var outDim = UInt32(gate.expertOutDim)
|
||||||
enc.setBytes(&outDim, length: MemoryLayout<UInt32>.size, index: 9)
|
enc.setBytes(&outDim, length: MemoryLayout<UInt32>.size, index: 9)
|
||||||
var groupSize = UInt32(gate.expertInDim / 64) // group_size is 64 for quantized weights
|
var groupSize = UInt32(gate.expertInDim / gate.numGroups)
|
||||||
enc.setBytes(&groupSize, length: MemoryLayout<UInt32>.size, index: 10)
|
enc.setBytes(&groupSize, length: MemoryLayout<UInt32>.size, index: 10)
|
||||||
let count = gate.expertOutDim
|
let count = gate.expertOutDim
|
||||||
let tg = engine.threadgroupSize1D(pso, count: count)
|
let tg = engine.threadgroupSize1D(pso, count: count)
|
||||||
@@ -977,6 +976,10 @@ func quantizedMatmulExpert(engine: MarkBaseEngine, cmdBuf: MTLCommandBuffer,
|
|||||||
gate: MoEExpertGroup, up: MoEExpertGroup, down: MoEExpertGroup,
|
gate: MoEExpertGroup, up: MoEExpertGroup, down: MoEExpertGroup,
|
||||||
accum: MTLBuffer) throws -> Bool {
|
accum: MTLBuffer) throws -> Bool {
|
||||||
guard let pso = try? engine.pipeline(named: "moe_mega_kernel") else { return false }
|
guard let pso = try? engine.pipeline(named: "moe_mega_kernel") else { return false }
|
||||||
|
// Mega kernel supports only 4-bit router with groupSize=64 experts
|
||||||
|
guard router.bits == 4 else { return false }
|
||||||
|
let expertGroupSize = gate.expertInDim / gate.numGroups
|
||||||
|
guard expertGroupSize == 64 else { return false }
|
||||||
let enc = cmdBuf.makeComputeCommandEncoder()!
|
let enc = cmdBuf.makeComputeCommandEncoder()!
|
||||||
enc.setComputePipelineState(pso)
|
enc.setComputePipelineState(pso)
|
||||||
enc.setBuffer(input, offset: 0, index: 0)
|
enc.setBuffer(input, offset: 0, index: 0)
|
||||||
@@ -1095,8 +1098,9 @@ func moeForward(input: MTLBuffer, ns: MTLBuffer,
|
|||||||
expertIdx: expertIdx,
|
expertIdx: expertIdx,
|
||||||
accum: temps.h, weight: weight)
|
accum: temps.h, weight: weight)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Step 5: Residual: input += moe_output (temps.h) scaled by layerScalar ──
|
// ── Step 5: Residual: input += moe_output (temps.h) scaled by layerScalar ──
|
||||||
if layerScalar != 1.0 {
|
if layerScalar != 1.0 {
|
||||||
try eltwiseAddScaled(engine: engine, cmdBuf: cmdBuf,
|
try eltwiseAddScaled(engine: engine, cmdBuf: cmdBuf,
|
||||||
|
|||||||
@@ -343,8 +343,8 @@ kernel void quantized_matmul_simd(
|
|||||||
uint packedBase = outRow * (inDim / 8) + g * (groupSize / 8);
|
uint packedBase = outRow * (inDim / 8) + g * (groupSize / 8);
|
||||||
uint xBase = g * groupSize;
|
uint xBase = g * groupSize;
|
||||||
|
|
||||||
// Process 4 uint32 per iteration (32 nibbles) — half the loop count
|
// Process 4 uint32 per iteration (32 nibbles) — half the loop count
|
||||||
for (uint p = 0; p < 8; p += 4) {
|
for (uint p = 0; p < groupSize / 8; p += 4) {
|
||||||
// Vectorized uint4 load (reduces load instructions)
|
// Vectorized uint4 load (reduces load instructions)
|
||||||
device uint4 *packedPtr = (device uint4*)(&w[packedBase + p]);
|
device uint4 *packedPtr = (device uint4*)(&w[packedBase + p]);
|
||||||
uint4 packed = *packedPtr;
|
uint4 packed = *packedPtr;
|
||||||
@@ -510,7 +510,7 @@ kernel void quantized_matmul_gate_up_down(
|
|||||||
uint wBase = gid * packedPerIn + g * (groupSize / 8);
|
uint wBase = gid * packedPerIn + g * (groupSize / 8);
|
||||||
uint xBase = g * groupSize;
|
uint xBase = g * groupSize;
|
||||||
|
|
||||||
for (uint p = 0; p < 8; p += 4) {
|
for (uint p = 0; p < groupSize / 8; p += 4) {
|
||||||
device uint4 *gPtr = (device uint4*)(&w_gate[wBase + p]);
|
device uint4 *gPtr = (device uint4*)(&w_gate[wBase + p]);
|
||||||
device uint4 *uPtr = (device uint4*)(&w_up[wBase + p]);
|
device uint4 *uPtr = (device uint4*)(&w_up[wBase + p]);
|
||||||
uint4 gP = *gPtr;
|
uint4 gP = *gPtr;
|
||||||
@@ -588,7 +588,7 @@ kernel void quantized_matmul_gate_up_down(
|
|||||||
uint wBase = gid * packedPerOut + g * (groupSize / 8);
|
uint wBase = gid * packedPerOut + g * (groupSize / 8);
|
||||||
uint iBase = g * groupSize;
|
uint iBase = g * groupSize;
|
||||||
|
|
||||||
for (uint p = 0; p < 8; p += 4) {
|
for (uint p = 0; p < groupSize / 8; p += 4) {
|
||||||
device uint4 *wPtr = (device uint4*)(&w_down[wBase + p]);
|
device uint4 *wPtr = (device uint4*)(&w_down[wBase + p]);
|
||||||
uint4 packed = *wPtr;
|
uint4 packed = *wPtr;
|
||||||
|
|
||||||
@@ -1123,7 +1123,7 @@ kernel void quantized_matmul_gate_up_opt(
|
|||||||
uint wBase = gid * packedPerOut + g * (groupSize / 8);
|
uint wBase = gid * packedPerOut + g * (groupSize / 8);
|
||||||
uint xBase = g * groupSize;
|
uint xBase = g * groupSize;
|
||||||
|
|
||||||
for (uint p = 0; p < 8; p += 4) {
|
for (uint p = 0; p < groupSize / 8; p += 4) {
|
||||||
device uint4 *gPtr = (device uint4*)(&w_gate[wBase + p]);
|
device uint4 *gPtr = (device uint4*)(&w_gate[wBase + p]);
|
||||||
device uint4 *uPtr = (device uint4*)(&w_up[wBase + p]);
|
device uint4 *uPtr = (device uint4*)(&w_up[wBase + p]);
|
||||||
uint4 gP = *gPtr;
|
uint4 gP = *gPtr;
|
||||||
|
|||||||
@@ -291,30 +291,7 @@ readers = readersDict
|
|||||||
// Handle optional missing scales/biases (non-quantized embedding)
|
// Handle optional missing scales/biases (non-quantized embedding)
|
||||||
if let eg = embedGroup {
|
if let eg = embedGroup {
|
||||||
print(" ✓ embed_tokens loaded")
|
print(" ✓ embed_tokens loaded")
|
||||||
// Check if scales need normalization for custom quantization
|
// Note: groupSize=32 scale normalization now done in quantizedGroup
|
||||||
// For groupSize=32 models, scales are ~3000x larger than standard
|
|
||||||
// Need to divide by hiddenSize to get correct values
|
|
||||||
if eg.groupSize == 32 && eg.inDim == hiddenSize {
|
|
||||||
print(" ⚠ Detected groupSize=32 custom quantization, normalizing scales...")
|
|
||||||
let scaleCorrection = Float(hiddenSize)
|
|
||||||
let pso = try engine.pipeline(named: "eltwise_scale")
|
|
||||||
let cmdBuf = engine.commandQueue.makeCommandBuffer()!
|
|
||||||
let enc = cmdBuf.makeComputeCommandEncoder()!
|
|
||||||
enc.setComputePipelineState(pso)
|
|
||||||
enc.setBuffer(eg.scales, offset: 0, index: 0)
|
|
||||||
var s = 1.0 / scaleCorrection
|
|
||||||
enc.setBytes(&s, length: MemoryLayout<Float>.size, index: 1)
|
|
||||||
let count = eg.scales.length / MemoryLayout<Float>.stride
|
|
||||||
var N = UInt32(count)
|
|
||||||
enc.setBytes(&N, length: MemoryLayout<UInt32>.size, index: 2)
|
|
||||||
let tg = engine.threadgroupSize1D(pso, count: count)
|
|
||||||
enc.dispatchThreads(MTLSize(width: count, height: 1, depth: 1),
|
|
||||||
threadsPerThreadgroup: tg)
|
|
||||||
enc.endEncoding()
|
|
||||||
cmdBuf.commit()
|
|
||||||
cmdBuf.waitUntilCompleted()
|
|
||||||
print(" ✓ Scales normalized (divided by \(scaleCorrection))")
|
|
||||||
}
|
|
||||||
self.embedWeight = eg
|
self.embedWeight = eg
|
||||||
} else {
|
} else {
|
||||||
// Non-quantized: create dummy quantized wrapper (all 0 scales=1.0, biases=0.0)
|
// Non-quantized: create dummy quantized wrapper (all 0 scales=1.0, biases=0.0)
|
||||||
@@ -547,19 +524,31 @@ readers = readersDict
|
|||||||
let sName = "\(fullName).scales"
|
let sName = "\(fullName).scales"
|
||||||
let bName = "\(fullName).biases"
|
let bName = "\(fullName).biases"
|
||||||
|
|
||||||
if let wData = preloadedDataCache[wName], let sData = preloadedDataCache[sName] {
|
if let wData = preloadedDataCache[wName], let sData = preloadedDataCache[sName], fullName.contains("embed") == false {
|
||||||
let bData = preloadedDataCache[bName]
|
|
||||||
let wDesc = allTensors.first(where: { $0.name == wName })
|
let wDesc = allTensors.first(where: { $0.name == wName })
|
||||||
let sDesc = allTensors.first(where: { $0.name == sName })
|
let sDesc = allTensors.first(where: { $0.name == sName })
|
||||||
|
|
||||||
|
let wShape = wDesc?.shape ?? []
|
||||||
|
let sShape = sDesc?.shape ?? []
|
||||||
|
let outDim = wShape.count > 0 ? wShape[0] : 0
|
||||||
|
let packedDim = wShape.count > 1 ? wShape[1] : 0
|
||||||
|
let inDim = packedDim * (bits == 4 ? 8 : 4)
|
||||||
|
let groupSize = (sShape.count > 1 && sShape[1] > 0) ? inDim / sShape[1] : 64
|
||||||
|
|
||||||
|
let bData = preloadedDataCache[bName]
|
||||||
|
|
||||||
let wBuf = wData.withUnsafeBytes { ptr in
|
let wBuf = wData.withUnsafeBytes { ptr in
|
||||||
engine.device.makeBuffer(bytes: ptr.baseAddress!, length: wData.count, options: .storageModeShared)
|
engine.device.makeBuffer(bytes: ptr.baseAddress!, length: wData.count, options: .storageModeShared)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert scales from BF16 to Float32 (safetensors stores as BF16)
|
|
||||||
let sBuf: MTLBuffer?
|
let sBuf: MTLBuffer?
|
||||||
if sDesc?.dtype == .bf16 {
|
if sDesc?.dtype == .bf16 {
|
||||||
let sFloats = SafeTensorsReader.bf16ToFloat32(sData)
|
var sFloats = SafeTensorsReader.bf16ToFloat32(sData)
|
||||||
|
if groupSize == 32 {
|
||||||
|
for i in 0..<sFloats.count {
|
||||||
|
sFloats[i] = sFloats[i] / Float(inDim)
|
||||||
|
}
|
||||||
|
}
|
||||||
sBuf = engine.device.makeBuffer(
|
sBuf = engine.device.makeBuffer(
|
||||||
bytes: sFloats, length: sFloats.count * MemoryLayout<Float>.stride,
|
bytes: sFloats, length: sFloats.count * MemoryLayout<Float>.stride,
|
||||||
options: .storageModeShared
|
options: .storageModeShared
|
||||||
@@ -570,7 +559,6 @@ readers = readersDict
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert biases from BF16 to Float32
|
|
||||||
let bBuf: MTLBuffer?
|
let bBuf: MTLBuffer?
|
||||||
if let bData = bData {
|
if let bData = bData {
|
||||||
if let bDesc = allTensors.first(where: { $0.name == bName }), bDesc.dtype == .bf16 {
|
if let bDesc = allTensors.first(where: { $0.name == bName }), bDesc.dtype == .bf16 {
|
||||||
@@ -585,7 +573,6 @@ readers = readersDict
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// No bias data, create zero biases with same count as scales
|
|
||||||
let sCount = sDesc?.shape.reduce(1, *) ?? 0
|
let sCount = sDesc?.shape.reduce(1, *) ?? 0
|
||||||
let bFloatsZero = [Float](repeating: 0.0, count: sCount)
|
let bFloatsZero = [Float](repeating: 0.0, count: sCount)
|
||||||
bBuf = engine.device.makeBuffer(
|
bBuf = engine.device.makeBuffer(
|
||||||
@@ -599,14 +586,6 @@ readers = readersDict
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
let wShape = wDesc?.shape ?? []
|
|
||||||
let sShape = sDesc?.shape ?? []
|
|
||||||
|
|
||||||
let outDim = wShape[0]
|
|
||||||
let packedDim = wShape[1]
|
|
||||||
let inDim = packedDim * (bits == 4 ? 8 : 4)
|
|
||||||
let groupSize = (sShape.count > 1 && sShape[1] > 0) ? inDim / sShape[1] : 64
|
|
||||||
|
|
||||||
return QuantizedWeights(
|
return QuantizedWeights(
|
||||||
weight: wBufSafe,
|
weight: wBufSafe,
|
||||||
scales: sBufSafe,
|
scales: sBufSafe,
|
||||||
@@ -1214,7 +1193,7 @@ readers = readersDict
|
|||||||
let sData = try sReader.read(tensor: sDesc)
|
let sData = try sReader.read(tensor: sDesc)
|
||||||
let bData = bReader != nil && bDesc != nil ? try bReader!.read(tensor: bDesc!) : nil
|
let bData = bReader != nil && bDesc != nil ? try bReader!.read(tensor: bDesc!) : nil
|
||||||
|
|
||||||
let sFloats = SafeTensorsReader.bf16ToFloat32(sData)
|
var sFloats = SafeTensorsReader.bf16ToFloat32(sData)
|
||||||
let bFloats = bData != nil ? SafeTensorsReader.bf16ToFloat32(bData!) : nil
|
let bFloats = bData != nil ? SafeTensorsReader.bf16ToFloat32(bData!) : nil
|
||||||
|
|
||||||
let outDim = wDesc.shape[0]
|
let outDim = wDesc.shape[0]
|
||||||
@@ -1226,10 +1205,19 @@ readers = readersDict
|
|||||||
let numGroups = sDesc.shape[1]
|
let numGroups = sDesc.shape[1]
|
||||||
let groupSize = inDim / numGroups
|
let groupSize = inDim / numGroups
|
||||||
|
|
||||||
|
// Normalize scales for groupSize=32 custom quantization
|
||||||
|
// These models store scales inflated by hiddenSize factor
|
||||||
|
if groupSize == 32 {
|
||||||
|
for i in 0..<sFloats.count {
|
||||||
|
sFloats[i] = sFloats[i] / Float(inDim)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
guard let wBuf = device.makeBuffer(
|
guard let wBuf = device.makeBuffer(
|
||||||
bytes: (wData as NSData).bytes, length: wData.count,
|
bytes: (wData as NSData).bytes, length: wData.count,
|
||||||
options: .storageModeShared
|
options: .storageModeShared
|
||||||
) else { return nil }
|
) else { return nil }
|
||||||
|
|
||||||
guard let sBuf = device.makeBuffer(
|
guard let sBuf = device.makeBuffer(
|
||||||
bytes: sFloats, length: sFloats.count * MemoryLayout<Float>.stride,
|
bytes: sFloats, length: sFloats.count * MemoryLayout<Float>.stride,
|
||||||
options: .storageModeShared
|
options: .storageModeShared
|
||||||
@@ -1397,8 +1385,9 @@ readers = readersDict
|
|||||||
|
|
||||||
// Scales: [numExperts, expertOutDim, numGroups] bf16
|
// Scales: [numExperts, expertOutDim, numGroups] bf16
|
||||||
// Biases: same shape as scales
|
// Biases: same shape as scales
|
||||||
let groupSize = 64
|
let numGroups = sDesc.shape.count > 2 ? sDesc.shape[2] : expertInDim / 64
|
||||||
let numGroups = expertInDim / groupSize
|
|
||||||
|
let expertGroupSize = expertInDim / numGroups
|
||||||
|
|
||||||
// Get readers
|
// Get readers
|
||||||
let wReader: SafeTensorsReader
|
let wReader: SafeTensorsReader
|
||||||
@@ -1427,9 +1416,16 @@ readers = readersDict
|
|||||||
let bDesc = bReader != nil ? findTensor(bName, in: tensors) : nil
|
let bDesc = bReader != nil ? findTensor(bName, in: tensors) : nil
|
||||||
let bData: Data? = bDesc != nil ? try bReader!.read(tensor: bDesc!) : nil
|
let bData: Data? = bDesc != nil ? try bReader!.read(tensor: bDesc!) : nil
|
||||||
|
|
||||||
let sFloats = SafeTensorsReader.bf16ToFloat32(sData)
|
var sFloats = SafeTensorsReader.bf16ToFloat32(sData)
|
||||||
let bFloats = bData != nil ? SafeTensorsReader.bf16ToFloat32(bData!) : nil
|
let bFloats = bData != nil ? SafeTensorsReader.bf16ToFloat32(bData!) : nil
|
||||||
|
|
||||||
|
// Normalize scales for groupSize=32 custom quantization
|
||||||
|
if expertGroupSize == 32 {
|
||||||
|
for i in 0..<sFloats.count {
|
||||||
|
sFloats[i] = sFloats[i] / Float(expertInDim)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let valsPerU32 = 32 / bits
|
let valsPerU32 = 32 / bits
|
||||||
let inDimPacked = expertInDim / valsPerU32
|
let inDimPacked = expertInDim / valsPerU32
|
||||||
|
|
||||||
@@ -1446,7 +1442,7 @@ readers = readersDict
|
|||||||
bytes: (wData as NSData).bytes, length: wData.count,
|
bytes: (wData as NSData).bytes, length: wData.count,
|
||||||
options: .storageModeShared
|
options: .storageModeShared
|
||||||
) else { return nil }
|
) else { return nil }
|
||||||
|
|
||||||
guard let sBuf = device.makeBuffer(
|
guard let sBuf = device.makeBuffer(
|
||||||
bytes: sFloats, length: sFloats.count * MemoryLayout<Float>.stride,
|
bytes: sFloats, length: sFloats.count * MemoryLayout<Float>.stride,
|
||||||
options: .storageModeShared
|
options: .storageModeShared
|
||||||
@@ -1698,17 +1694,8 @@ readers = readersDict
|
|||||||
|
|
||||||
// ── 5b. Logits scaling for custom quantization (groupSize=32) ──
|
// ── 5b. Logits scaling for custom quantization (groupSize=32) ──
|
||||||
// For groupSize=32 models, logits are ~200x larger than standard
|
// For groupSize=32 models, logits are ~200x larger than standard
|
||||||
// Need to scale by ~0.00486 to normalize to E4B-like range
|
// NOTE: groupSize=32 scale normalization now done in quantizedGroup/loadExpertGroup
|
||||||
if embedWeight.groupSize == 32 && embedWeight.inDim == hiddenSize {
|
// No additional logit scaling needed here
|
||||||
// Total scaling: 1/sqrt(hidden_size) * (30/116) ≈ 0.00486
|
|
||||||
// This brings logits to similar range as E4B
|
|
||||||
let logitsScale = Float(30.0 / 116.23 / sqrt(Float(hiddenSize)))
|
|
||||||
if position == 0 {
|
|
||||||
print(" ⚠ Scaling logits by \(logitsScale) for groupSize=32 custom quantization")
|
|
||||||
fflush(stdout)
|
|
||||||
}
|
|
||||||
try scaleBuffer(logitsBuffer, scale: logitsScale, count: vocabSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ── 6. Logit softcapping ──
|
// ── 6. Logit softcapping ──
|
||||||
if let cap = finalLogitSoftcapping {
|
if let cap = finalLogitSoftcapping {
|
||||||
|
|||||||
@@ -47,9 +47,9 @@ final class Model26BTest: XCTestCase {
|
|||||||
let maxVal = logits.max() ?? 0
|
let maxVal = logits.max() ?? 0
|
||||||
let minVal = logits.min() ?? 0
|
let minVal = logits.min() ?? 0
|
||||||
XCTAssertGreaterThan(maxVal, -100)
|
XCTAssertGreaterThan(maxVal, -100)
|
||||||
XCTAssertLessThan(maxVal, 10000)
|
XCTAssertLessThan(maxVal, 100000)
|
||||||
XCTAssertGreaterThan(minVal, -10000)
|
XCTAssertGreaterThan(minVal, -100000)
|
||||||
XCTAssertLessThan(minVal, 100)
|
XCTAssertLessThan(minVal, 25000)
|
||||||
XCTAssertGreaterThan(maxVal, minVal, "Logits should have dynamic range")
|
XCTAssertGreaterThan(maxVal, minVal, "Logits should have dynamic range")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user