diff --git a/Sources/MarkBase/Engine.swift b/Sources/MarkBase/Engine.swift index bf6a76c..fb60715 100644 --- a/Sources/MarkBase/Engine.swift +++ b/Sources/MarkBase/Engine.swift @@ -286,4 +286,11 @@ public final class MarkBaseEngine: @unchecked Sendable { let ptr = buffer.contents().assumingMemoryBound(to: Float.self) return Array(UnsafeBufferPointer(start: ptr + offset, count: count)) } + + public func writeFloats(to buffer: MTLBuffer, values: [Float], offset: Int = 0) { + let ptr = buffer.contents().assumingMemoryBound(to: Float.self) + for i in 0.. String { + var bytes: [UInt8] = [] var result = "" var i = text.startIndex @@ -215,7 +216,7 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable { let hexStr = String(text[hexStart.." { i = text.index(after: afterHex) @@ -228,10 +229,18 @@ public final class BPETokenizer: Tokenizer, @unchecked Sendable { } } + if !bytes.isEmpty { + result += String(bytes: bytes, encoding: .utf8) ?? "" + bytes.removeAll() + } result.append(text[i]) i = text.index(after: i) } + if !bytes.isEmpty { + result += String(bytes: bytes, encoding: .utf8) ?? "" + } + return result } } diff --git a/Sources/MarkBase/Tokenizer/HuggingFaceTokenizer.swift b/Sources/MarkBase/Tokenizer/HuggingFaceTokenizer.swift index 92df4ba..ecab69d 100644 --- a/Sources/MarkBase/Tokenizer/HuggingFaceTokenizer.swift +++ b/Sources/MarkBase/Tokenizer/HuggingFaceTokenizer.swift @@ -268,11 +268,11 @@ public final class HuggingFaceTokenizer: Tokenizer { /// Decode <0xXX> byte tokens back to characters private func decodeByteTokens(_ text: String) -> String { + var bytes: [UInt8] = [] var result = "" var i = text.startIndex while i < text.endIndex { - // Check for <0xXX> pattern if text[i] == "<" { let nextIndex = text.index(after: i) if nextIndex < text.endIndex && text[nextIndex] == "0" { @@ -283,8 +283,7 @@ public final class HuggingFaceTokenizer: Tokenizer { let hexStr = String(text[hexStart.. + bytes.append(byte) let afterHex = text.index(after: hexEnd) if afterHex < text.endIndex && text[afterHex] == ">" { i = text.index(after: afterHex) @@ -297,10 +296,18 @@ public final class HuggingFaceTokenizer: Tokenizer { } } + if !bytes.isEmpty { + result += String(bytes: bytes, encoding: .utf8) ?? "" + bytes.removeAll() + } result.append(text[i]) i = text.index(after: i) } + if !bytes.isEmpty { + result += String(bytes: bytes, encoding: .utf8) ?? "" + } + return result } }