8a66b9086a
- Started from ac75faa (initial E4B-MarkBase integration)
- Kept Sources/ (all engine code) + Package.swift + .gitignore
- Removed all ad-hoc tests, documentation, scripts, Python files
- Added Tests/00_Unit/ (MathTest, TokenizerTest, SamplerTest)
- Added .gitea/workflows/ci.yaml (build + unit tests + lint)
- Added Scripts/check_resources.sh (memory-aware test runner)
- Added Tests/Manifest.json (resource requirements for all tests)
- Focus: 4-bit quantized models only
141 lines
6.3 KiB
Swift
141 lines
6.3 KiB
Swift
import Metal
|
|
|
|
public final class VisionWeights {
|
|
public let inputProj: QuantizedWeights
|
|
public let positionEmbedding: MTLBuffer
|
|
|
|
public let embeddingProjectionWeight: MTLBuffer // uint32 packed
|
|
public let embeddingProjectionScales: MTLBuffer
|
|
public let embeddingProjectionBiases: MTLBuffer
|
|
|
|
public let layers: [VisionLayerWeights]
|
|
|
|
public init(device: MTLDevice, config: VisionConfig,
|
|
tensors: [String: Data], floats: [String: [Float]]) throws {
|
|
let pfx = "vision_tower.patch_embedder."
|
|
|
|
inputProj = try Self.loadQuantized(name: pfx + "input_proj",
|
|
tensors: tensors, floats: floats,
|
|
device: device,
|
|
inDim: config.hiddenSize,
|
|
outDim: config.hiddenSize)
|
|
|
|
guard let pe = floats[pfx + "position_embedding_table"] else {
|
|
throw WeightError.tensorNotFound("position_embedding_table")
|
|
}
|
|
positionEmbedding = device.makeBuffer(bytes: pe, length: pe.count * 4)!
|
|
|
|
// Embedding projection — already quantized
|
|
let ep = "embed_vision.embedding_projection"
|
|
guard let epWeight = tensors[ep + ".weight"] else {
|
|
throw WeightError.tensorNotFound("embedding_projection.weight")
|
|
}
|
|
embeddingProjectionWeight = epWeight.withUnsafeBytes { ptr in
|
|
device.makeBuffer(bytes: ptr.baseAddress!, length: epWeight.count)!
|
|
}
|
|
guard let epScales = floats[ep + ".scales"] else {
|
|
throw WeightError.tensorNotFound("embedding_projection.scales")
|
|
}
|
|
embeddingProjectionScales = device.makeBuffer(
|
|
bytes: epScales, length: epScales.count * 4)!
|
|
guard let epBiases = floats[ep + ".biases"] else {
|
|
throw WeightError.tensorNotFound("embedding_projection.biases")
|
|
}
|
|
embeddingProjectionBiases = device.makeBuffer(
|
|
bytes: epBiases, length: epBiases.count * 4)!
|
|
var loadedLayers: [VisionLayerWeights] = []
|
|
for i in 0..<config.numHiddenLayers {
|
|
loadedLayers.append(try VisionLayerWeights(
|
|
device: device, config: config, layerIdx: i,
|
|
tensors: tensors, floats: floats))
|
|
}
|
|
layers = loadedLayers
|
|
}
|
|
|
|
public static func loadQuantized(name: String,
|
|
tensors: [String: Data],
|
|
floats: [String: [Float]],
|
|
device: MTLDevice,
|
|
inDim: Int, outDim: Int) throws -> QuantizedWeights {
|
|
let wKey = name + ".weight"
|
|
let sKey = name + ".scales"
|
|
let bKey = name + ".biases"
|
|
guard let wData = tensors[wKey] else {
|
|
throw WeightError.tensorNotFound("Quantized weight \(wKey)")
|
|
}
|
|
guard let sData = floats[sKey] else {
|
|
throw WeightError.tensorNotFound("Quantized scales \(sKey)")
|
|
}
|
|
guard let bData = floats[bKey] else {
|
|
throw WeightError.tensorNotFound("Quantized biases \(bKey)")
|
|
}
|
|
let weight = wData.withUnsafeBytes { ptr in
|
|
device.makeBuffer(bytes: ptr.baseAddress!, length: wData.count)!
|
|
}
|
|
let scales = device.makeBuffer(
|
|
bytes: sData, length: sData.count * 4)!
|
|
let biases = device.makeBuffer(
|
|
bytes: bData, length: bData.count * 4)!
|
|
// Compute groupSize: scales shape is [outDim, numGroups], so numGroups = sData.count / outDim
|
|
let numGroups = sData.count / outDim
|
|
let groupSize = inDim / numGroups
|
|
return QuantizedWeights(weight: weight, scales: scales, biases: biases,
|
|
inDim: inDim, outDim: outDim, bits: 4, groupSize: groupSize)
|
|
}
|
|
}
|
|
|
|
public struct VisionLayerWeights {
|
|
public let inputLayernorm: MTLBuffer
|
|
public let postAttentionLayernorm: MTLBuffer
|
|
public let preFeedforwardLayernorm: MTLBuffer
|
|
public let postFeedforwardLayernorm: MTLBuffer
|
|
|
|
public let selfAttnQProj: QuantizedWeights
|
|
public let selfAttnKProj: QuantizedWeights
|
|
public let selfAttnVProj: QuantizedWeights
|
|
public let selfAttnOProj: QuantizedWeights
|
|
public let qNorm: MTLBuffer
|
|
public let kNorm: MTLBuffer
|
|
|
|
public let mlpGateProj: QuantizedWeights
|
|
public let mlpUpProj: QuantizedWeights
|
|
public let mlpDownProj: QuantizedWeights
|
|
|
|
public init(device: MTLDevice, config: VisionConfig, layerIdx: Int,
|
|
tensors: [String: Data], floats: [String: [Float]]) throws {
|
|
let prefix = "vision_tower.encoder.layers.\(layerIdx)"
|
|
let h = config.hiddenSize
|
|
let m = config.intermediateSize
|
|
|
|
func loadNorm(_ key: String) throws -> MTLBuffer {
|
|
guard let arr = floats[key] else {
|
|
throw WeightError.tensorNotFound("Norm \(key)")
|
|
}
|
|
return device.makeBuffer(bytes: arr, length: arr.count * 4)!
|
|
}
|
|
|
|
inputLayernorm = try loadNorm(prefix + ".input_layernorm.weight")
|
|
postAttentionLayernorm = try loadNorm(prefix + ".post_attention_layernorm.weight")
|
|
preFeedforwardLayernorm = try loadNorm(prefix + ".pre_feedforward_layernorm.weight")
|
|
postFeedforwardLayernorm = try loadNorm(prefix + ".post_feedforward_layernorm.weight")
|
|
|
|
qNorm = try loadNorm(prefix + ".self_attn.q_norm.weight")
|
|
kNorm = try loadNorm(prefix + ".self_attn.k_norm.weight")
|
|
|
|
func q(_ name: String, inDim: Int, outDim: Int) throws -> QuantizedWeights {
|
|
try VisionWeights.loadQuantized(name: prefix + name,
|
|
tensors: tensors, floats: floats,
|
|
device: device,
|
|
inDim: inDim, outDim: outDim)
|
|
}
|
|
|
|
selfAttnQProj = try q(".self_attn.q_proj", inDim: h, outDim: h)
|
|
selfAttnKProj = try q(".self_attn.k_proj", inDim: h, outDim: h)
|
|
selfAttnVProj = try q(".self_attn.v_proj", inDim: h, outDim: h)
|
|
selfAttnOProj = try q(".self_attn.o_proj", inDim: h, outDim: h)
|
|
mlpGateProj = try q(".mlp.gate_proj", inDim: h, outDim: m)
|
|
mlpUpProj = try q(".mlp.up_proj", inDim: h, outDim: m)
|
|
mlpDownProj = try q(".mlp.down_proj", inDim: m, outDim: h)
|
|
}
|
|
}
|