v2: Apply tokenizer UTF-8 fix + Engine writeFloats helper
CI / build (push) Waiting to run
CI / unit-tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions

- Tokenizer fix: collect <0xXX> bytes and decode as UTF-8
  (fixes Chinese/non-ASCII character decoding)
- BPETokenizer + HuggingFaceTokenizer: both updated
- Engine.swift: added writeFloats() utility method
- FloatWeights struct added to Layer.swift (bf16 support)
- attnQBits/KBits/VBits/OBits detection added to Model.swift
- bf16 layer weight support from commit 48c0347 cherry-picked
This commit is contained in:
MarkBase Admin
2026-07-05 13:41:48 +08:00
parent 5a94501f95
commit 31427770b1
5 changed files with 40 additions and 5 deletions
+7
View File
@@ -286,4 +286,11 @@ public final class MarkBaseEngine: @unchecked Sendable {
let ptr = buffer.contents().assumingMemoryBound(to: Float.self) let ptr = buffer.contents().assumingMemoryBound(to: Float.self)
return Array(UnsafeBufferPointer(start: ptr + offset, count: count)) return Array(UnsafeBufferPointer(start: ptr + offset, count: count))
} }
public func writeFloats(to buffer: MTLBuffer, values: [Float], offset: Int = 0) {
let ptr = buffer.contents().assumingMemoryBound(to: Float.self)
for i in 0..<values.count {
ptr[i + offset] = values[i]
}
}
} }
+8
View File
@@ -13,6 +13,14 @@ public struct QuantizedWeights {
public let groupSize: Int // Quantization group size (32, 64, etc.) public let groupSize: Int // Quantization group size (32, 64, etc.)
} }
// Float Weights (non-quantized bf16/f32)
public struct FloatWeights {
public let weight: MTLBuffer // Float32 [outDim, inDim]
public let inDim: Int
public let outDim: Int
}
// Layer Configuration // Layer Configuration
public struct E4BLayerConfig { public struct E4BLayerConfig {
+5 -1
View File
@@ -716,9 +716,13 @@ readers = readersDict
print(" layer_scalar: NOT FOUND (using 1.0)") print(" layer_scalar: NOT FOUND (using 1.0)")
} }
// Detect quantization bits from weight shape (supports both uniform 4-bit and 8-bit MLP/router) // Detect quantization bits from weight shape (supports both uniform 4-bit and 8-bit MLP/router)
let mlpGateBits = detectBits(for: "mlp.gate_proj", expectedInDim: hiddenSize, defaultBits: 4) let mlpGateBits = detectBits(for: "mlp.gate_proj", expectedInDim: hiddenSize, defaultBits: 4)
let mlpDownBits = detectBits(for: "mlp.down_proj", expectedInDim: intermediate, defaultBits: 4) let mlpDownBits = detectBits(for: "mlp.down_proj", expectedInDim: intermediate, defaultBits: 4)
let attnQBits = detectBits(for: "self_attn.q_proj", expectedInDim: hiddenSize, defaultBits: 4)
let attnKBits = detectBits(for: "self_attn.k_proj", expectedInDim: hiddenSize, defaultBits: 4)
let attnVBits = detectBits(for: "self_attn.v_proj", expectedInDim: hiddenSize, defaultBits: 4)
let attnOBits = detectBits(for: "self_attn.o_proj", expectedInDim: hiddenSize, defaultBits: 4)
// Try bf16 weights first (for bf16 models) // Try bf16 weights first (for bf16 models)
let qpFloat = try fw("self_attn.q_proj") let qpFloat = try fw("self_attn.q_proj")
+10 -1
View File
@@ -201,6 +201,7 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable {
} }
private func decodeByteTokens(_ text: String) -> String { private func decodeByteTokens(_ text: String) -> String {
var bytes: [UInt8] = []
var result = "" var result = ""
var i = text.startIndex var i = text.startIndex
@@ -215,7 +216,7 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable {
let hexStr = String(text[hexStart..<hexEnd]) let hexStr = String(text[hexStart..<hexEnd])
if let byte = UInt8(hexStr, radix: 16) { if let byte = UInt8(hexStr, radix: 16) {
result.append(Character(UnicodeScalar(byte))) bytes.append(byte)
let afterHex = text.index(after: hexEnd) let afterHex = text.index(after: hexEnd)
if afterHex < text.endIndex && text[afterHex] == ">" { if afterHex < text.endIndex && text[afterHex] == ">" {
i = text.index(after: afterHex) i = text.index(after: afterHex)
@@ -228,10 +229,18 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable {
} }
} }
if !bytes.isEmpty {
result += String(bytes: bytes, encoding: .utf8) ?? ""
bytes.removeAll()
}
result.append(text[i]) result.append(text[i])
i = text.index(after: i) i = text.index(after: i)
} }
if !bytes.isEmpty {
result += String(bytes: bytes, encoding: .utf8) ?? ""
}
return result return result
} }
} }
@@ -268,11 +268,11 @@ public final class HuggingFaceTokenizer: Tokenizer {
/// Decode <0xXX> byte tokens back to characters /// Decode <0xXX> byte tokens back to characters
private func decodeByteTokens(_ text: String) -> String { private func decodeByteTokens(_ text: String) -> String {
var bytes: [UInt8] = []
var result = "" var result = ""
var i = text.startIndex var i = text.startIndex
while i < text.endIndex { while i < text.endIndex {
// Check for <0xXX> pattern
if text[i] == "<" { if text[i] == "<" {
let nextIndex = text.index(after: i) let nextIndex = text.index(after: i)
if nextIndex < text.endIndex && text[nextIndex] == "0" { if nextIndex < text.endIndex && text[nextIndex] == "0" {
@@ -283,8 +283,7 @@ public final class HuggingFaceTokenizer: Tokenizer {
let hexStr = String(text[hexStart..<hexEnd]) let hexStr = String(text[hexStart..<hexEnd])
if let byte = UInt8(hexStr, radix: 16) { if let byte = UInt8(hexStr, radix: 16) {
result.append(Character(UnicodeScalar(byte))) bytes.append(byte)
// Skip past the closing >
let afterHex = text.index(after: hexEnd) let afterHex = text.index(after: hexEnd)
if afterHex < text.endIndex && text[afterHex] == ">" { if afterHex < text.endIndex && text[afterHex] == ">" {
i = text.index(after: afterHex) i = text.index(after: afterHex)
@@ -297,10 +296,18 @@ public final class HuggingFaceTokenizer: Tokenizer {
} }
} }
if !bytes.isEmpty {
result += String(bytes: bytes, encoding: .utf8) ?? ""
bytes.removeAll()
}
result.append(text[i]) result.append(text[i])
i = text.index(after: i) i = text.index(after: i)
} }
if !bytes.isEmpty {
result += String(bytes: bytes, encoding: .utf8) ?? ""
}
return result return result
} }
} }