From 31427770b16b382ce78183895cacb4b399a51833 Mon Sep 17 00:00:00 2001
From: MarkBase Admin <admin@markbase.local>
Date: Sun, 5 Jul 2026 13:41:48 +0800
Subject: [PATCH] v2: Apply tokenizer UTF-8 fix + Engine writeFloats helper

- Tokenizer fix: collect <0xXX> bytes and decode as UTF-8
  (fixes Chinese/non-ASCII character decoding)
- BPETokenizer + HuggingFaceTokenizer: both updated
- Engine.swift: added writeFloats() utility method
- FloatWeights struct added to Layer.swift (bf16 support)
- attnQBits/KBits/VBits/OBits detection added to Model.swift
- bf16 layer weight support from commit 48c0347 cherry-picked
---
 Sources/MarkBase/Engine.swift                       |  7 +++++++
 Sources/MarkBase/Layers/Layer.swift                 |  8 ++++++++
 Sources/MarkBase/Model.swift                        |  6 +++++-
 Sources/MarkBase/Tokenizer/BPETokenizer.swift       | 11 ++++++++++-
 .../MarkBase/Tokenizer/HuggingFaceTokenizer.swift   | 13 ++++++++++---
 5 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/Sources/MarkBase/Engine.swift b/Sources/MarkBase/Engine.swift
index bf6a76c..fb60715 100644
--- a/Sources/MarkBase/Engine.swift
+++ b/Sources/MarkBase/Engine.swift
@@ -286,4 +286,11 @@ public final class MarkBaseEngine: @unchecked Sendable {
         let ptr = buffer.contents().assumingMemoryBound(to: Float.self)
         return Array(UnsafeBufferPointer(start: ptr + offset, count: count))
     }
+
+    public func writeFloats(to buffer: MTLBuffer, values: [Float], offset: Int = 0) {
+        let ptr = buffer.contents().assumingMemoryBound(to: Float.self)
+        for i in 0..<values.count {
+            ptr[i + offset] = values[i]
+        }
+    }
 }
diff --git a/Sources/MarkBase/Layers/Layer.swift b/Sources/MarkBase/Layers/Layer.swift
index 289b90f..3e78c02 100644
--- a/Sources/MarkBase/Layers/Layer.swift
+++ b/Sources/MarkBase/Layers/Layer.swift
@@ -13,6 +13,14 @@ public struct QuantizedWeights {
     public let groupSize: Int      // Quantization group size (32, 64, etc.)
 }
 
+// ── Float Weights (non-quantized bf16/f32) ────────────────────────────
+
+public struct FloatWeights {
+    public let weight: MTLBuffer   // Float32 [outDim, inDim]
+    public let inDim: Int
+    public let outDim: Int
+}
+
 // ── Layer Configuration ──────────────────────────
 
 public struct E4BLayerConfig {
diff --git a/Sources/MarkBase/Model.swift b/Sources/MarkBase/Model.swift
index 086f4bb..01e5778 100644
--- a/Sources/MarkBase/Model.swift
+++ b/Sources/MarkBase/Model.swift
@@ -716,9 +716,13 @@ readers = readersDict
                 print("    layer_scalar: NOT FOUND (using 1.0)")
             }
 
-// Detect quantization bits from weight shape (supports both uniform 4-bit and 8-bit MLP/router)
+            // Detect quantization bits from weight shape (supports both uniform 4-bit and 8-bit MLP/router)
             let mlpGateBits = detectBits(for: "mlp.gate_proj", expectedInDim: hiddenSize, defaultBits: 4)
             let mlpDownBits = detectBits(for: "mlp.down_proj", expectedInDim: intermediate, defaultBits: 4)
+            let attnQBits = detectBits(for: "self_attn.q_proj", expectedInDim: hiddenSize, defaultBits: 4)
+            let attnKBits = detectBits(for: "self_attn.k_proj", expectedInDim: hiddenSize, defaultBits: 4)
+            let attnVBits = detectBits(for: "self_attn.v_proj", expectedInDim: hiddenSize, defaultBits: 4)
+            let attnOBits = detectBits(for: "self_attn.o_proj", expectedInDim: hiddenSize, defaultBits: 4)
             
             // Try bf16 weights first (for bf16 models)
             let qpFloat = try fw("self_attn.q_proj")
diff --git a/Sources/MarkBase/Tokenizer/BPETokenizer.swift b/Sources/MarkBase/Tokenizer/BPETokenizer.swift
index 5e275c8..c6d398a 100644
--- a/Sources/MarkBase/Tokenizer/BPETokenizer.swift
+++ b/Sources/MarkBase/Tokenizer/BPETokenizer.swift
@@ -201,6 +201,7 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable {
     }
     
     private func decodeByteTokens(_ text: String) -> String {
+        var bytes: [UInt8] = []
         var result = ""
         var i = text.startIndex
         
@@ -215,7 +216,7 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable {
                         let hexStr = String(text[hexStart..<hexEnd])
                         
                         if let byte = UInt8(hexStr, radix: 16) {
-                            result.append(Character(UnicodeScalar(byte)))
+                            bytes.append(byte)
                             let afterHex = text.index(after: hexEnd)
                             if afterHex < text.endIndex && text[afterHex] == ">" {
                                 i = text.index(after: afterHex)
@@ -228,10 +229,18 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable {
                 }
             }
             
+            if !bytes.isEmpty {
+                result += String(bytes: bytes, encoding: .utf8) ?? ""
+                bytes.removeAll()
+            }
             result.append(text[i])
             i = text.index(after: i)
         }
         
+        if !bytes.isEmpty {
+            result += String(bytes: bytes, encoding: .utf8) ?? ""
+        }
+        
         return result
     }
 }
diff --git a/Sources/MarkBase/Tokenizer/HuggingFaceTokenizer.swift b/Sources/MarkBase/Tokenizer/HuggingFaceTokenizer.swift
index 92df4ba..ecab69d 100644
--- a/Sources/MarkBase/Tokenizer/HuggingFaceTokenizer.swift
+++ b/Sources/MarkBase/Tokenizer/HuggingFaceTokenizer.swift
@@ -268,11 +268,11 @@ public final class HuggingFaceTokenizer: Tokenizer {
     
     /// Decode <0xXX> byte tokens back to characters
     private func decodeByteTokens(_ text: String) -> String {
+        var bytes: [UInt8] = []
         var result = ""
         var i = text.startIndex
         
         while i < text.endIndex {
-            // Check for <0xXX> pattern
             if text[i] == "<" {
                 let nextIndex = text.index(after: i)
                 if nextIndex < text.endIndex && text[nextIndex] == "0" {
@@ -283,8 +283,7 @@ public final class HuggingFaceTokenizer: Tokenizer {
                         let hexStr = String(text[hexStart..<hexEnd])
                         
                         if let byte = UInt8(hexStr, radix: 16) {
-                            result.append(Character(UnicodeScalar(byte)))
-                            // Skip past the closing >
+                            bytes.append(byte)
                             let afterHex = text.index(after: hexEnd)
                             if afterHex < text.endIndex && text[afterHex] == ">" {
                                 i = text.index(after: afterHex)
@@ -297,10 +296,18 @@ public final class HuggingFaceTokenizer: Tokenizer {
                 }
             }
             
+            if !bytes.isEmpty {
+                result += String(bytes: bytes, encoding: .utf8) ?? ""
+                bytes.removeAll()
+            }
             result.append(text[i])
             i = text.index(after: i)
         }
         
+        if !bytes.isEmpty {
+            result += String(bytes: bytes, encoding: .utf8) ?? ""
+        }
+        
         return result
     }
 }