Files
markbaseengine/Sources/MarkBase/Audio/AudioWeights.swift
T
MarkBase Admin 8a66b9086a
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
v2: Initial clean branch with unit tests + CI/CD pipeline
- Started from ac75faa (initial E4B-MarkBase integration)
- Kept Sources/ (all engine code) + Package.swift + .gitignore
- Removed all ad-hoc tests, documentation, scripts, Python files
- Added Tests/00_Unit/ (MathTest, TokenizerTest, SamplerTest)
- Added .gitea/workflows/ci.yaml (build + unit tests + lint)
- Added Scripts/check_resources.sh (memory-aware test runner)
- Added Tests/Manifest.json (resource requirements for all tests)
- Focus: 4-bit quantized models only
2026-07-05 13:29:25 +08:00

210 lines
10 KiB
Swift

import Metal
import Foundation
public final class AudioWeights {
public let subsampleConvLayer0: SubsampleConvLayer
public let subsampleConvLayer1: SubsampleConvLayer
public let inputProjLinearWeight: MTLBuffer // Float32, not quantized
public let outputProj: QuantizedWeights
public let outputProjBias: MTLBuffer
public let layers: [AudioLayerWeights]
public init(device: MTLDevice, config: AudioConfig,
tensors: [String: Data], floats: [String: [Float]],
descriptors: [String: TensorDescriptor]) throws {
let P = "audio_tower."
subsampleConvLayer0 = SubsampleConvLayer(
convWeight: try Self.buffer(device, floats, P + "subsample_conv_projection.layer0.conv.weight"),
normWeight: try Self.buffer(device, floats, P + "subsample_conv_projection.layer0.norm.weight")
)
subsampleConvLayer1 = SubsampleConvLayer(
convWeight: try Self.buffer(device, floats, P + "subsample_conv_projection.layer1.conv.weight"),
normWeight: try Self.buffer(device, floats, P + "subsample_conv_projection.layer1.norm.weight")
)
inputProjLinearWeight = try Self.buffer(device, floats, P + "subsample_conv_projection.input_proj_linear.weight")
outputProj = try Self.loadQuantized(device: device, tensors: tensors, floats: floats,
descriptors: descriptors,
name: P + "output_proj")
outputProjBias = try Self.buffer(device, floats, P + "output_proj.bias")
var loadedLayers: [AudioLayerWeights] = []
for i in 0..<config.numHiddenLayers {
loadedLayers.append(try AudioLayerWeights(device: device, layerIdx: i,
tensors: tensors, floats: floats,
descriptors: descriptors))
}
layers = loadedLayers
}
// ── Helpers ──
private static func buffer(_ device: MTLDevice, _ floats: [String: [Float]],
_ key: String) throws -> MTLBuffer {
guard let f = floats[key] else {
throw WeightError.tensorNotFound(key)
}
guard let buf = device.makeBuffer(bytes: f, length: f.count * MemoryLayout<Float>.stride) else {
throw WeightError.tensorNotFound("Failed to create buffer for \(key)")
}
return buf
}
static func loadQuantized(device: MTLDevice, tensors: [String: Data],
floats: [String: [Float]],
descriptors: [String: TensorDescriptor],
name: String) throws -> QuantizedWeights {
let wName = name + ".weight"
let sName = name + ".scales"
let bName = name + ".biases"
guard let wData = tensors[wName],
let sFloats = floats[sName],
let bFloats = floats[bName],
let wDesc = descriptors[wName],
let sDesc = descriptors[sName] else {
throw WeightError.tensorNotFound(name)
}
// Dimensions from descriptors:
// weight: [outDim, inDim/8] (U32 packed, 8 values per U32)
// scales: [outDim, numGroups] where numGroups = inDim / groupSize
let outDim = wDesc.shape[0]
let numGroups = sDesc.shape[1]
let groupSize = 64 // Audio uses fixed group_size=64
let inDim = numGroups * groupSize
guard let wBuf = device.makeBuffer(bytes: (wData as NSData).bytes, length: wData.count,
options: .storageModeShared) else {
throw WeightError.bufferCreationFailed(wName)
}
guard let sBuf = device.makeBuffer(bytes: sFloats, length: sFloats.count * MemoryLayout<Float>.stride,
options: .storageModeShared) else {
throw WeightError.bufferCreationFailed(sName)
}
guard let bBuf = device.makeBuffer(bytes: bFloats, length: bFloats.count * MemoryLayout<Float>.stride,
options: .storageModeShared) else {
throw WeightError.bufferCreationFailed(bName)
}
return QuantizedWeights(weight: wBuf, scales: sBuf, biases: bBuf,
inDim: inDim, outDim: outDim, bits: 4, groupSize: groupSize)
}
}
public struct SubsampleConvLayer {
public let convWeight: MTLBuffer
public let normWeight: MTLBuffer
}
public struct AudioLayerWeights {
public let normPreAttn: MTLBuffer
public let normPostAttn: MTLBuffer
public let normOut: MTLBuffer
public let selfAttnQProj: QuantizedWeights
public let selfAttnKProj: QuantizedWeights
public let selfAttnVProj: QuantizedWeights
public let selfAttnPost: QuantizedWeights
public let selfAttnRelativeKProj: MTLBuffer
public let selfAttnPerDimScale: MTLBuffer
public let lconv1dPreLayerNorm: MTLBuffer
public let lconv1dConvNorm: MTLBuffer
public let lconv1dDepthwiseConv: MTLBuffer
public let lconv1dLinearStart: QuantizedWeights
public let lconv1dLinearEnd: QuantizedWeights
public let feedForward1: FeedForwardWeights
public let feedForward2: FeedForwardWeights
private static func buffer(_ device: MTLDevice, _ floats: [String: [Float]],
_ key: String) throws -> MTLBuffer {
guard let f = floats[key] else {
throw WeightError.tensorNotFound(key)
}
guard let buf = device.makeBuffer(bytes: f, length: f.count * MemoryLayout<Float>.stride) else {
throw WeightError.tensorNotFound("Failed to create buffer for \(key)")
}
return buf
}
public init(device: MTLDevice, layerIdx: Int,
tensors: [String: Data], floats: [String: [Float]],
descriptors: [String: TensorDescriptor]) throws {
let P = "audio_tower.layers.\(layerIdx)."
normPreAttn = try Self.buffer(device, floats, P + "norm_pre_attn.weight")
normPostAttn = try Self.buffer(device, floats, P + "norm_post_attn.weight")
normOut = try Self.buffer(device, floats, P + "norm_out.weight")
selfAttnQProj = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
descriptors: descriptors,
name: P + "self_attn.q_proj")
selfAttnKProj = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
descriptors: descriptors,
name: P + "self_attn.k_proj")
selfAttnVProj = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
descriptors: descriptors,
name: P + "self_attn.v_proj")
selfAttnPost = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
descriptors: descriptors,
name: P + "self_attn.post")
selfAttnRelativeKProj = try Self.buffer(device, floats, P + "self_attn.relative_k_proj.weight")
selfAttnPerDimScale = try Self.buffer(device, floats, P + "self_attn.per_dim_scale")
lconv1dPreLayerNorm = try Self.buffer(device, floats, P + "lconv1d.pre_layer_norm.weight")
lconv1dConvNorm = try Self.buffer(device, floats, P + "lconv1d.conv_norm.weight")
lconv1dDepthwiseConv = try Self.buffer(device, floats, P + "lconv1d.depthwise_conv1d.weight")
lconv1dLinearStart = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
descriptors: descriptors,
name: P + "lconv1d.linear_start")
lconv1dLinearEnd = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
descriptors: descriptors,
name: P + "lconv1d.linear_end")
feedForward1 = try FeedForwardWeights(device: device, prefix: P + "feed_forward1",
tensors: tensors, floats: floats,
descriptors: descriptors)
feedForward2 = try FeedForwardWeights(device: device, prefix: P + "feed_forward2",
tensors: tensors, floats: floats,
descriptors: descriptors)
}
}
public struct FeedForwardWeights {
public let preLayerNorm: MTLBuffer
public let postLayerNorm: MTLBuffer
public let ffwLayer1: QuantizedWeights
public let ffwLayer2: QuantizedWeights
public init(device: MTLDevice, prefix: String,
tensors: [String: Data], floats: [String: [Float]],
descriptors: [String: TensorDescriptor]) throws {
let b = { (key: String) throws -> MTLBuffer in
guard let f = floats[key] else { throw WeightError.tensorNotFound(key) }
guard let buf = device.makeBuffer(bytes: f, length: f.count * MemoryLayout<Float>.stride) else {
throw WeightError.tensorNotFound("Failed to create buffer for \(key)")
}
return buf
}
preLayerNorm = try b(prefix + ".pre_layer_norm.weight")
postLayerNorm = try b(prefix + ".post_layer_norm.weight")
ffwLayer1 = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
descriptors: descriptors,
name: prefix + ".ffw_layer_1")
ffwLayer2 = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
descriptors: descriptors,
name: prefix + ".ffw_layer_2")
}
}