8a66b9086a
- Started from ac75faa (initial E4B-MarkBase integration)
- Kept Sources/ (all engine code) + Package.swift + .gitignore
- Removed all ad-hoc tests, documentation, scripts, Python files
- Added Tests/00_Unit/ (MathTest, TokenizerTest, SamplerTest)
- Added .gitea/workflows/ci.yaml (build + unit tests + lint)
- Added Scripts/check_resources.sh (memory-aware test runner)
- Added Tests/Manifest.json (resource requirements for all tests)
- Focus: 4-bit quantized models only
58 lines
2.0 KiB
Swift
58 lines
2.0 KiB
Swift
import XCTest
|
|
@testable import MarkBase
|
|
|
|
final class TokenizerTest: XCTestCase {
|
|
|
|
var tokenizer: Tokenizing!
|
|
|
|
override func setUp() {
|
|
super.setUp()
|
|
let modelDir = "/Users/accusys/MarkBaseEngine/models/E4B-MarkBase"
|
|
guard FileManager.default.fileExists(atPath: modelDir) else {
|
|
return
|
|
}
|
|
tokenizer = try? TokenizerFactory.load(modelDir: modelDir)
|
|
}
|
|
|
|
func testTokenizerAvailable() {
|
|
let modelDir = "/Users/accusys/MarkBaseEngine/models/E4B-MarkBase"
|
|
guard FileManager.default.fileExists(atPath: modelDir) else {
|
|
throw XCTSkip("E4B-MarkBase model not found")
|
|
}
|
|
XCTAssertNotNil(tokenizer, "Tokenizer should load successfully")
|
|
}
|
|
|
|
func testEncodeDecodeRoundtrip() throws {
|
|
try XCTSkipIf(tokenizer == nil, "Tokenizer not available")
|
|
let inputs = ["Hello", "Hello World", "test", "123", "你好"]
|
|
for input in inputs {
|
|
let tokens = tokenizer.encode(text: input)
|
|
let decoded = tokenizer.decode(tokens: tokens)
|
|
XCTAssertEqual(decoded.lowercased(), input.lowercased(),
|
|
"Roundtrip failed for '\(input)': got '\(decoded)'")
|
|
}
|
|
}
|
|
|
|
func testSpacePreservation() throws {
|
|
try XCTSkipIf(tokenizer == nil, "Tokenizer not available")
|
|
let input = "Hello World"
|
|
let tokens = tokenizer.encode(text: input)
|
|
let decoded = tokenizer.decode(tokens: tokens)
|
|
XCTAssertTrue(decoded.contains(" "), "Spaces should be preserved in '\(decoded)'")
|
|
}
|
|
|
|
func testSpecialTokens() throws {
|
|
try XCTSkipIf(tokenizer == nil, "Tokenizer not available")
|
|
// BOS token should exist
|
|
let bosToken = tokenizer.encode(text: "")
|
|
XCTAssertFalse(bosToken.isEmpty, "BOS token should be prepended")
|
|
}
|
|
|
|
func testEmptyString() throws {
|
|
try XCTSkipIf(tokenizer == nil, "Tokenizer not available")
|
|
let tokens = tokenizer.encode(text: "")
|
|
let decoded = tokenizer.decode(tokens: tokens)
|
|
XCTAssertNotNil(decoded)
|
|
}
|
|
}
|