8a66b9086a
- Started from ac75faa (initial E4B-MarkBase integration)
- Kept Sources/ (all engine code) + Package.swift + .gitignore
- Removed all ad-hoc tests, documentation, scripts, Python files
- Added Tests/00_Unit/ (MathTest, TokenizerTest, SamplerTest)
- Added .gitea/workflows/ci.yaml (build + unit tests + lint)
- Added Scripts/check_resources.sh (memory-aware test runner)
- Added Tests/Manifest.json (resource requirements for all tests)
- Focus: 4-bit quantized models only
210 lines
10 KiB
Swift
210 lines
10 KiB
Swift
import Metal
|
|
import Foundation
|
|
|
|
public final class AudioWeights {
|
|
public let subsampleConvLayer0: SubsampleConvLayer
|
|
public let subsampleConvLayer1: SubsampleConvLayer
|
|
public let inputProjLinearWeight: MTLBuffer // Float32, not quantized
|
|
|
|
public let outputProj: QuantizedWeights
|
|
public let outputProjBias: MTLBuffer
|
|
|
|
public let layers: [AudioLayerWeights]
|
|
|
|
public init(device: MTLDevice, config: AudioConfig,
|
|
tensors: [String: Data], floats: [String: [Float]],
|
|
descriptors: [String: TensorDescriptor]) throws {
|
|
let P = "audio_tower."
|
|
|
|
subsampleConvLayer0 = SubsampleConvLayer(
|
|
convWeight: try Self.buffer(device, floats, P + "subsample_conv_projection.layer0.conv.weight"),
|
|
normWeight: try Self.buffer(device, floats, P + "subsample_conv_projection.layer0.norm.weight")
|
|
)
|
|
|
|
subsampleConvLayer1 = SubsampleConvLayer(
|
|
convWeight: try Self.buffer(device, floats, P + "subsample_conv_projection.layer1.conv.weight"),
|
|
normWeight: try Self.buffer(device, floats, P + "subsample_conv_projection.layer1.norm.weight")
|
|
)
|
|
|
|
inputProjLinearWeight = try Self.buffer(device, floats, P + "subsample_conv_projection.input_proj_linear.weight")
|
|
|
|
outputProj = try Self.loadQuantized(device: device, tensors: tensors, floats: floats,
|
|
descriptors: descriptors,
|
|
name: P + "output_proj")
|
|
outputProjBias = try Self.buffer(device, floats, P + "output_proj.bias")
|
|
|
|
var loadedLayers: [AudioLayerWeights] = []
|
|
for i in 0..<config.numHiddenLayers {
|
|
loadedLayers.append(try AudioLayerWeights(device: device, layerIdx: i,
|
|
tensors: tensors, floats: floats,
|
|
descriptors: descriptors))
|
|
}
|
|
layers = loadedLayers
|
|
}
|
|
|
|
// ── Helpers ──
|
|
|
|
private static func buffer(_ device: MTLDevice, _ floats: [String: [Float]],
|
|
_ key: String) throws -> MTLBuffer {
|
|
guard let f = floats[key] else {
|
|
throw WeightError.tensorNotFound(key)
|
|
}
|
|
guard let buf = device.makeBuffer(bytes: f, length: f.count * MemoryLayout<Float>.stride) else {
|
|
throw WeightError.tensorNotFound("Failed to create buffer for \(key)")
|
|
}
|
|
return buf
|
|
}
|
|
|
|
static func loadQuantized(device: MTLDevice, tensors: [String: Data],
|
|
floats: [String: [Float]],
|
|
descriptors: [String: TensorDescriptor],
|
|
name: String) throws -> QuantizedWeights {
|
|
let wName = name + ".weight"
|
|
let sName = name + ".scales"
|
|
let bName = name + ".biases"
|
|
|
|
guard let wData = tensors[wName],
|
|
let sFloats = floats[sName],
|
|
let bFloats = floats[bName],
|
|
let wDesc = descriptors[wName],
|
|
let sDesc = descriptors[sName] else {
|
|
throw WeightError.tensorNotFound(name)
|
|
}
|
|
|
|
// Dimensions from descriptors:
|
|
// weight: [outDim, inDim/8] (U32 packed, 8 values per U32)
|
|
// scales: [outDim, numGroups] where numGroups = inDim / groupSize
|
|
let outDim = wDesc.shape[0]
|
|
let numGroups = sDesc.shape[1]
|
|
let groupSize = 64 // Audio uses fixed group_size=64
|
|
let inDim = numGroups * groupSize
|
|
|
|
guard let wBuf = device.makeBuffer(bytes: (wData as NSData).bytes, length: wData.count,
|
|
options: .storageModeShared) else {
|
|
throw WeightError.bufferCreationFailed(wName)
|
|
}
|
|
guard let sBuf = device.makeBuffer(bytes: sFloats, length: sFloats.count * MemoryLayout<Float>.stride,
|
|
options: .storageModeShared) else {
|
|
throw WeightError.bufferCreationFailed(sName)
|
|
}
|
|
guard let bBuf = device.makeBuffer(bytes: bFloats, length: bFloats.count * MemoryLayout<Float>.stride,
|
|
options: .storageModeShared) else {
|
|
throw WeightError.bufferCreationFailed(bName)
|
|
}
|
|
|
|
return QuantizedWeights(weight: wBuf, scales: sBuf, biases: bBuf,
|
|
inDim: inDim, outDim: outDim, bits: 4, groupSize: groupSize)
|
|
}
|
|
}
|
|
|
|
public struct SubsampleConvLayer {
|
|
public let convWeight: MTLBuffer
|
|
public let normWeight: MTLBuffer
|
|
}
|
|
|
|
public struct AudioLayerWeights {
|
|
public let normPreAttn: MTLBuffer
|
|
public let normPostAttn: MTLBuffer
|
|
public let normOut: MTLBuffer
|
|
|
|
public let selfAttnQProj: QuantizedWeights
|
|
public let selfAttnKProj: QuantizedWeights
|
|
public let selfAttnVProj: QuantizedWeights
|
|
public let selfAttnPost: QuantizedWeights
|
|
public let selfAttnRelativeKProj: MTLBuffer
|
|
public let selfAttnPerDimScale: MTLBuffer
|
|
|
|
public let lconv1dPreLayerNorm: MTLBuffer
|
|
public let lconv1dConvNorm: MTLBuffer
|
|
public let lconv1dDepthwiseConv: MTLBuffer
|
|
public let lconv1dLinearStart: QuantizedWeights
|
|
public let lconv1dLinearEnd: QuantizedWeights
|
|
|
|
public let feedForward1: FeedForwardWeights
|
|
public let feedForward2: FeedForwardWeights
|
|
|
|
private static func buffer(_ device: MTLDevice, _ floats: [String: [Float]],
|
|
_ key: String) throws -> MTLBuffer {
|
|
guard let f = floats[key] else {
|
|
throw WeightError.tensorNotFound(key)
|
|
}
|
|
guard let buf = device.makeBuffer(bytes: f, length: f.count * MemoryLayout<Float>.stride) else {
|
|
throw WeightError.tensorNotFound("Failed to create buffer for \(key)")
|
|
}
|
|
return buf
|
|
}
|
|
|
|
public init(device: MTLDevice, layerIdx: Int,
|
|
tensors: [String: Data], floats: [String: [Float]],
|
|
descriptors: [String: TensorDescriptor]) throws {
|
|
let P = "audio_tower.layers.\(layerIdx)."
|
|
|
|
normPreAttn = try Self.buffer(device, floats, P + "norm_pre_attn.weight")
|
|
normPostAttn = try Self.buffer(device, floats, P + "norm_post_attn.weight")
|
|
normOut = try Self.buffer(device, floats, P + "norm_out.weight")
|
|
|
|
selfAttnQProj = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
|
|
descriptors: descriptors,
|
|
name: P + "self_attn.q_proj")
|
|
selfAttnKProj = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
|
|
descriptors: descriptors,
|
|
name: P + "self_attn.k_proj")
|
|
selfAttnVProj = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
|
|
descriptors: descriptors,
|
|
name: P + "self_attn.v_proj")
|
|
selfAttnPost = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
|
|
descriptors: descriptors,
|
|
name: P + "self_attn.post")
|
|
|
|
selfAttnRelativeKProj = try Self.buffer(device, floats, P + "self_attn.relative_k_proj.weight")
|
|
selfAttnPerDimScale = try Self.buffer(device, floats, P + "self_attn.per_dim_scale")
|
|
|
|
lconv1dPreLayerNorm = try Self.buffer(device, floats, P + "lconv1d.pre_layer_norm.weight")
|
|
lconv1dConvNorm = try Self.buffer(device, floats, P + "lconv1d.conv_norm.weight")
|
|
lconv1dDepthwiseConv = try Self.buffer(device, floats, P + "lconv1d.depthwise_conv1d.weight")
|
|
|
|
lconv1dLinearStart = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
|
|
descriptors: descriptors,
|
|
name: P + "lconv1d.linear_start")
|
|
lconv1dLinearEnd = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
|
|
descriptors: descriptors,
|
|
name: P + "lconv1d.linear_end")
|
|
|
|
feedForward1 = try FeedForwardWeights(device: device, prefix: P + "feed_forward1",
|
|
tensors: tensors, floats: floats,
|
|
descriptors: descriptors)
|
|
feedForward2 = try FeedForwardWeights(device: device, prefix: P + "feed_forward2",
|
|
tensors: tensors, floats: floats,
|
|
descriptors: descriptors)
|
|
}
|
|
}
|
|
|
|
public struct FeedForwardWeights {
|
|
public let preLayerNorm: MTLBuffer
|
|
public let postLayerNorm: MTLBuffer
|
|
public let ffwLayer1: QuantizedWeights
|
|
public let ffwLayer2: QuantizedWeights
|
|
|
|
public init(device: MTLDevice, prefix: String,
|
|
tensors: [String: Data], floats: [String: [Float]],
|
|
descriptors: [String: TensorDescriptor]) throws {
|
|
let b = { (key: String) throws -> MTLBuffer in
|
|
guard let f = floats[key] else { throw WeightError.tensorNotFound(key) }
|
|
guard let buf = device.makeBuffer(bytes: f, length: f.count * MemoryLayout<Float>.stride) else {
|
|
throw WeightError.tensorNotFound("Failed to create buffer for \(key)")
|
|
}
|
|
return buf
|
|
}
|
|
|
|
preLayerNorm = try b(prefix + ".pre_layer_norm.weight")
|
|
postLayerNorm = try b(prefix + ".post_layer_norm.weight")
|
|
|
|
ffwLayer1 = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
|
|
descriptors: descriptors,
|
|
name: prefix + ".ffw_layer_1")
|
|
ffwLayer2 = try AudioWeights.loadQuantized(device: device, tensors: tensors, floats: floats,
|
|
descriptors: descriptors,
|
|
name: prefix + ".ffw_layer_2")
|
|
}
|
|
}
|