v2: Apply tokenizer UTF-8 fix + Engine writeFloats helper

- Tokenizer fix: collect <0xXX> bytes and decode as UTF-8 (fixes Chinese/non-ASCII character decoding) - BPETokenizer + HuggingFaceTokenizer: both updated - Engine.swift: added writeFloats() utility method - FloatWeights struct added to Layer.swift (bf16 support) - attnQBits/KBits/VBits/OBits detection added to Model.swift - bf16 layer weight support from commit 48c0347 cherry-picked
2026-07-05 13:41:48 +08:00
parent 5a94501f95
commit 31427770b1
5 changed files with 40 additions and 5 deletions
@@ -286,4 +286,11 @@ public final class MarkBaseEngine: @unchecked Sendable {
        let ptr = buffer.contents().assumingMemoryBound(to: Float.self)
        return Array(UnsafeBufferPointer(start: ptr + offset, count: count))
    }
    public func writeFloats(to buffer: MTLBuffer, values: [Float], offset: Int = 0) {
        let ptr = buffer.contents().assumingMemoryBound(to: Float.self)
        for i in 0..<values.count {
            ptr[i + offset] = values[i]
        }
    }
 }
@@ -13,6 +13,14 @@ public struct QuantizedWeights {
    public let groupSize: Int      // Quantization group size (32, 64, etc.)
 }
 // ── Float Weights (non-quantized bf16/f32) ────────────────────────────
 public struct FloatWeights {
    public let weight: MTLBuffer   // Float32 [outDim, inDim]
    public let inDim: Int
    public let outDim: Int
 }
 // ── Layer Configuration ──────────────────────────
 public struct E4BLayerConfig {
@@ -716,9 +716,13 @@ readers = readersDict
                print("    layer_scalar: NOT FOUND (using 1.0)")
            }
-// Detect quantization bits from weight shape (supports both uniform 4-bit and 8-bit MLP/router)
+            // Detect quantization bits from weight shape (supports both uniform 4-bit and 8-bit MLP/router)
            let mlpGateBits = detectBits(for: "mlp.gate_proj", expectedInDim: hiddenSize, defaultBits: 4)
            let mlpDownBits = detectBits(for: "mlp.down_proj", expectedInDim: intermediate, defaultBits: 4)
            let attnQBits = detectBits(for: "self_attn.q_proj", expectedInDim: hiddenSize, defaultBits: 4)
            let attnKBits = detectBits(for: "self_attn.k_proj", expectedInDim: hiddenSize, defaultBits: 4)
            let attnVBits = detectBits(for: "self_attn.v_proj", expectedInDim: hiddenSize, defaultBits: 4)
            let attnOBits = detectBits(for: "self_attn.o_proj", expectedInDim: hiddenSize, defaultBits: 4)
            // Try bf16 weights first (for bf16 models)
            let qpFloat = try fw("self_attn.q_proj")
@@ -201,6 +201,7 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable {
    }
    private func decodeByteTokens(_ text: String) -> String {
        var bytes: [UInt8] = []
        var result = ""
        var i = text.startIndex
@@ -215,7 +216,7 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable {
                        let hexStr = String(text[hexStart..<hexEnd])
                        if let byte = UInt8(hexStr, radix: 16) {
-                            result.append(Character(UnicodeScalar(byte)))
+                            bytes.append(byte)
                            let afterHex = text.index(after: hexEnd)
                            if afterHex < text.endIndex && text[afterHex] == ">" {
                                i = text.index(after: afterHex)
@@ -228,10 +229,18 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable {
                }
            }
            if !bytes.isEmpty {
                result += String(bytes: bytes, encoding: .utf8) ?? ""
                bytes.removeAll()
            }
            result.append(text[i])
            i = text.index(after: i)
        }
        if !bytes.isEmpty {
            result += String(bytes: bytes, encoding: .utf8) ?? ""
        }
        return result
    }
 }
@@ -268,11 +268,11 @@ public final class HuggingFaceTokenizer: Tokenizer {
    /// Decode <0xXX> byte tokens back to characters
    private func decodeByteTokens(_ text: String) -> String {
        var bytes: [UInt8] = []
        var result = ""
        var i = text.startIndex
        while i < text.endIndex {
            // Check for <0xXX> pattern
            if text[i] == "<" {
                let nextIndex = text.index(after: i)
                if nextIndex < text.endIndex && text[nextIndex] == "0" {
@@ -283,8 +283,7 @@ public final class HuggingFaceTokenizer: Tokenizer {
                        let hexStr = String(text[hexStart..<hexEnd])
                        if let byte = UInt8(hexStr, radix: 16) {
-                            result.append(Character(UnicodeScalar(byte)))
+                            bytes.append(byte)
                            // Skip past the closing >
                            let afterHex = text.index(after: hexEnd)
                            if afterHex < text.endIndex && text[afterHex] == ">" {
                                i = text.index(after: afterHex)
@@ -297,10 +296,18 @@ public final class HuggingFaceTokenizer: Tokenizer {
                }
            }
            if !bytes.isEmpty {
                result += String(bytes: bytes, encoding: .utf8) ?? ""
                bytes.removeAll()
            }
            result.append(text[i])
            i = text.index(after: i)
        }
        if !bytes.isEmpty {
            result += String(bytes: bytes, encoding: .utf8) ?? ""
        }
        return result
    }
 }