v2: Apply tokenizer UTF-8 fix + Engine writeFloats helper
- Tokenizer fix: collect <0xXX> bytes and decode as UTF-8 (fixes Chinese/non-ASCII character decoding) - BPETokenizer + HuggingFaceTokenizer: both updated - Engine.swift: added writeFloats() utility method - FloatWeights struct added to Layer.swift (bf16 support) - attnQBits/KBits/VBits/OBits detection added to Model.swift - bf16 layer weight support from commit 48c0347 cherry-picked
This commit is contained in:
@@ -286,4 +286,11 @@ public final class MarkBaseEngine: @unchecked Sendable {
|
|||||||
let ptr = buffer.contents().assumingMemoryBound(to: Float.self)
|
let ptr = buffer.contents().assumingMemoryBound(to: Float.self)
|
||||||
return Array(UnsafeBufferPointer(start: ptr + offset, count: count))
|
return Array(UnsafeBufferPointer(start: ptr + offset, count: count))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public func writeFloats(to buffer: MTLBuffer, values: [Float], offset: Int = 0) {
|
||||||
|
let ptr = buffer.contents().assumingMemoryBound(to: Float.self)
|
||||||
|
for i in 0..<values.count {
|
||||||
|
ptr[i + offset] = values[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,6 +13,14 @@ public struct QuantizedWeights {
|
|||||||
public let groupSize: Int // Quantization group size (32, 64, etc.)
|
public let groupSize: Int // Quantization group size (32, 64, etc.)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Float Weights (non-quantized bf16/f32) ────────────────────────────
|
||||||
|
|
||||||
|
public struct FloatWeights {
|
||||||
|
public let weight: MTLBuffer // Float32 [outDim, inDim]
|
||||||
|
public let inDim: Int
|
||||||
|
public let outDim: Int
|
||||||
|
}
|
||||||
|
|
||||||
// ── Layer Configuration ──────────────────────────
|
// ── Layer Configuration ──────────────────────────
|
||||||
|
|
||||||
public struct E4BLayerConfig {
|
public struct E4BLayerConfig {
|
||||||
|
|||||||
@@ -716,9 +716,13 @@ readers = readersDict
|
|||||||
print(" layer_scalar: NOT FOUND (using 1.0)")
|
print(" layer_scalar: NOT FOUND (using 1.0)")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Detect quantization bits from weight shape (supports both uniform 4-bit and 8-bit MLP/router)
|
// Detect quantization bits from weight shape (supports both uniform 4-bit and 8-bit MLP/router)
|
||||||
let mlpGateBits = detectBits(for: "mlp.gate_proj", expectedInDim: hiddenSize, defaultBits: 4)
|
let mlpGateBits = detectBits(for: "mlp.gate_proj", expectedInDim: hiddenSize, defaultBits: 4)
|
||||||
let mlpDownBits = detectBits(for: "mlp.down_proj", expectedInDim: intermediate, defaultBits: 4)
|
let mlpDownBits = detectBits(for: "mlp.down_proj", expectedInDim: intermediate, defaultBits: 4)
|
||||||
|
let attnQBits = detectBits(for: "self_attn.q_proj", expectedInDim: hiddenSize, defaultBits: 4)
|
||||||
|
let attnKBits = detectBits(for: "self_attn.k_proj", expectedInDim: hiddenSize, defaultBits: 4)
|
||||||
|
let attnVBits = detectBits(for: "self_attn.v_proj", expectedInDim: hiddenSize, defaultBits: 4)
|
||||||
|
let attnOBits = detectBits(for: "self_attn.o_proj", expectedInDim: hiddenSize, defaultBits: 4)
|
||||||
|
|
||||||
// Try bf16 weights first (for bf16 models)
|
// Try bf16 weights first (for bf16 models)
|
||||||
let qpFloat = try fw("self_attn.q_proj")
|
let qpFloat = try fw("self_attn.q_proj")
|
||||||
|
|||||||
@@ -201,6 +201,7 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private func decodeByteTokens(_ text: String) -> String {
|
private func decodeByteTokens(_ text: String) -> String {
|
||||||
|
var bytes: [UInt8] = []
|
||||||
var result = ""
|
var result = ""
|
||||||
var i = text.startIndex
|
var i = text.startIndex
|
||||||
|
|
||||||
@@ -215,7 +216,7 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable {
|
|||||||
let hexStr = String(text[hexStart..<hexEnd])
|
let hexStr = String(text[hexStart..<hexEnd])
|
||||||
|
|
||||||
if let byte = UInt8(hexStr, radix: 16) {
|
if let byte = UInt8(hexStr, radix: 16) {
|
||||||
result.append(Character(UnicodeScalar(byte)))
|
bytes.append(byte)
|
||||||
let afterHex = text.index(after: hexEnd)
|
let afterHex = text.index(after: hexEnd)
|
||||||
if afterHex < text.endIndex && text[afterHex] == ">" {
|
if afterHex < text.endIndex && text[afterHex] == ">" {
|
||||||
i = text.index(after: afterHex)
|
i = text.index(after: afterHex)
|
||||||
@@ -228,10 +229,18 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !bytes.isEmpty {
|
||||||
|
result += String(bytes: bytes, encoding: .utf8) ?? ""
|
||||||
|
bytes.removeAll()
|
||||||
|
}
|
||||||
result.append(text[i])
|
result.append(text[i])
|
||||||
i = text.index(after: i)
|
i = text.index(after: i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !bytes.isEmpty {
|
||||||
|
result += String(bytes: bytes, encoding: .utf8) ?? ""
|
||||||
|
}
|
||||||
|
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -268,11 +268,11 @@ public final class HuggingFaceTokenizer: Tokenizer {
|
|||||||
|
|
||||||
/// Decode <0xXX> byte tokens back to characters
|
/// Decode <0xXX> byte tokens back to characters
|
||||||
private func decodeByteTokens(_ text: String) -> String {
|
private func decodeByteTokens(_ text: String) -> String {
|
||||||
|
var bytes: [UInt8] = []
|
||||||
var result = ""
|
var result = ""
|
||||||
var i = text.startIndex
|
var i = text.startIndex
|
||||||
|
|
||||||
while i < text.endIndex {
|
while i < text.endIndex {
|
||||||
// Check for <0xXX> pattern
|
|
||||||
if text[i] == "<" {
|
if text[i] == "<" {
|
||||||
let nextIndex = text.index(after: i)
|
let nextIndex = text.index(after: i)
|
||||||
if nextIndex < text.endIndex && text[nextIndex] == "0" {
|
if nextIndex < text.endIndex && text[nextIndex] == "0" {
|
||||||
@@ -283,8 +283,7 @@ public final class HuggingFaceTokenizer: Tokenizer {
|
|||||||
let hexStr = String(text[hexStart..<hexEnd])
|
let hexStr = String(text[hexStart..<hexEnd])
|
||||||
|
|
||||||
if let byte = UInt8(hexStr, radix: 16) {
|
if let byte = UInt8(hexStr, radix: 16) {
|
||||||
result.append(Character(UnicodeScalar(byte)))
|
bytes.append(byte)
|
||||||
// Skip past the closing >
|
|
||||||
let afterHex = text.index(after: hexEnd)
|
let afterHex = text.index(after: hexEnd)
|
||||||
if afterHex < text.endIndex && text[afterHex] == ">" {
|
if afterHex < text.endIndex && text[afterHex] == ">" {
|
||||||
i = text.index(after: afterHex)
|
i = text.index(after: afterHex)
|
||||||
@@ -297,10 +296,18 @@ public final class HuggingFaceTokenizer: Tokenizer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !bytes.isEmpty {
|
||||||
|
result += String(bytes: bytes, encoding: .utf8) ?? ""
|
||||||
|
bytes.removeAll()
|
||||||
|
}
|
||||||
result.append(text[i])
|
result.append(text[i])
|
||||||
i = text.index(after: i)
|
i = text.index(after: i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !bytes.isEmpty {
|
||||||
|
result += String(bytes: bytes, encoding: .utf8) ?? ""
|
||||||
|
}
|
||||||
|
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user