Files
markbaseengine/Sources/MarkBase/Vision/VisionWeights.swift
T
MarkBase Admin 8a66b9086a
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
v2: Initial clean branch with unit tests + CI/CD pipeline
- Started from ac75faa (initial E4B-MarkBase integration)
- Kept Sources/ (all engine code) + Package.swift + .gitignore
- Removed all ad-hoc tests, documentation, scripts, Python files
- Added Tests/00_Unit/ (MathTest, TokenizerTest, SamplerTest)
- Added .gitea/workflows/ci.yaml (build + unit tests + lint)
- Added Scripts/check_resources.sh (memory-aware test runner)
- Added Tests/Manifest.json (resource requirements for all tests)
- Focus: 4-bit quantized models only
2026-07-05 13:29:25 +08:00

141 lines
6.3 KiB
Swift

import Metal
public final class VisionWeights {
public let inputProj: QuantizedWeights
public let positionEmbedding: MTLBuffer
public let embeddingProjectionWeight: MTLBuffer // uint32 packed
public let embeddingProjectionScales: MTLBuffer
public let embeddingProjectionBiases: MTLBuffer
public let layers: [VisionLayerWeights]
public init(device: MTLDevice, config: VisionConfig,
tensors: [String: Data], floats: [String: [Float]]) throws {
let pfx = "vision_tower.patch_embedder."
inputProj = try Self.loadQuantized(name: pfx + "input_proj",
tensors: tensors, floats: floats,
device: device,
inDim: config.hiddenSize,
outDim: config.hiddenSize)
guard let pe = floats[pfx + "position_embedding_table"] else {
throw WeightError.tensorNotFound("position_embedding_table")
}
positionEmbedding = device.makeBuffer(bytes: pe, length: pe.count * 4)!
// Embedding projection — already quantized
let ep = "embed_vision.embedding_projection"
guard let epWeight = tensors[ep + ".weight"] else {
throw WeightError.tensorNotFound("embedding_projection.weight")
}
embeddingProjectionWeight = epWeight.withUnsafeBytes { ptr in
device.makeBuffer(bytes: ptr.baseAddress!, length: epWeight.count)!
}
guard let epScales = floats[ep + ".scales"] else {
throw WeightError.tensorNotFound("embedding_projection.scales")
}
embeddingProjectionScales = device.makeBuffer(
bytes: epScales, length: epScales.count * 4)!
guard let epBiases = floats[ep + ".biases"] else {
throw WeightError.tensorNotFound("embedding_projection.biases")
}
embeddingProjectionBiases = device.makeBuffer(
bytes: epBiases, length: epBiases.count * 4)!
var loadedLayers: [VisionLayerWeights] = []
for i in 0..<config.numHiddenLayers {
loadedLayers.append(try VisionLayerWeights(
device: device, config: config, layerIdx: i,
tensors: tensors, floats: floats))
}
layers = loadedLayers
}
public static func loadQuantized(name: String,
tensors: [String: Data],
floats: [String: [Float]],
device: MTLDevice,
inDim: Int, outDim: Int) throws -> QuantizedWeights {
let wKey = name + ".weight"
let sKey = name + ".scales"
let bKey = name + ".biases"
guard let wData = tensors[wKey] else {
throw WeightError.tensorNotFound("Quantized weight \(wKey)")
}
guard let sData = floats[sKey] else {
throw WeightError.tensorNotFound("Quantized scales \(sKey)")
}
guard let bData = floats[bKey] else {
throw WeightError.tensorNotFound("Quantized biases \(bKey)")
}
let weight = wData.withUnsafeBytes { ptr in
device.makeBuffer(bytes: ptr.baseAddress!, length: wData.count)!
}
let scales = device.makeBuffer(
bytes: sData, length: sData.count * 4)!
let biases = device.makeBuffer(
bytes: bData, length: bData.count * 4)!
// Compute groupSize: scales shape is [outDim, numGroups], so numGroups = sData.count / outDim
let numGroups = sData.count / outDim
let groupSize = inDim / numGroups
return QuantizedWeights(weight: weight, scales: scales, biases: biases,
inDim: inDim, outDim: outDim, bits: 4, groupSize: groupSize)
}
}
public struct VisionLayerWeights {
public let inputLayernorm: MTLBuffer
public let postAttentionLayernorm: MTLBuffer
public let preFeedforwardLayernorm: MTLBuffer
public let postFeedforwardLayernorm: MTLBuffer
public let selfAttnQProj: QuantizedWeights
public let selfAttnKProj: QuantizedWeights
public let selfAttnVProj: QuantizedWeights
public let selfAttnOProj: QuantizedWeights
public let qNorm: MTLBuffer
public let kNorm: MTLBuffer
public let mlpGateProj: QuantizedWeights
public let mlpUpProj: QuantizedWeights
public let mlpDownProj: QuantizedWeights
public init(device: MTLDevice, config: VisionConfig, layerIdx: Int,
tensors: [String: Data], floats: [String: [Float]]) throws {
let prefix = "vision_tower.encoder.layers.\(layerIdx)"
let h = config.hiddenSize
let m = config.intermediateSize
func loadNorm(_ key: String) throws -> MTLBuffer {
guard let arr = floats[key] else {
throw WeightError.tensorNotFound("Norm \(key)")
}
return device.makeBuffer(bytes: arr, length: arr.count * 4)!
}
inputLayernorm = try loadNorm(prefix + ".input_layernorm.weight")
postAttentionLayernorm = try loadNorm(prefix + ".post_attention_layernorm.weight")
preFeedforwardLayernorm = try loadNorm(prefix + ".pre_feedforward_layernorm.weight")
postFeedforwardLayernorm = try loadNorm(prefix + ".post_feedforward_layernorm.weight")
qNorm = try loadNorm(prefix + ".self_attn.q_norm.weight")
kNorm = try loadNorm(prefix + ".self_attn.k_norm.weight")
func q(_ name: String, inDim: Int, outDim: Int) throws -> QuantizedWeights {
try VisionWeights.loadQuantized(name: prefix + name,
tensors: tensors, floats: floats,
device: device,
inDim: inDim, outDim: outDim)
}
selfAttnQProj = try q(".self_attn.q_proj", inDim: h, outDim: h)
selfAttnKProj = try q(".self_attn.k_proj", inDim: h, outDim: h)
selfAttnVProj = try q(".self_attn.v_proj", inDim: h, outDim: h)
selfAttnOProj = try q(".self_attn.o_proj", inDim: h, outDim: h)
mlpGateProj = try q(".mlp.gate_proj", inDim: h, outDim: m)
mlpUpProj = try q(".mlp.up_proj", inDim: h, outDim: m)
mlpDownProj = try q(".mlp.down_proj", inDim: m, outDim: h)
}
}