416 lines
8.6 KiB
Go
416 lines
8.6 KiB
Go
//go:build linux && !cgo
|
|
// +build linux,!cgo
|
|
|
|
/*
|
|
Copyright 2024 The GoStor Authors All rights reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package numa
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"os"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"syscall"
|
|
"unsafe"
|
|
)
|
|
|
|
// Syscall numbers for x86_64 Linux
|
|
const (
|
|
SYS_GETCPU = 309
|
|
SYS_SET_MEMPOLICY = 238
|
|
SYS_GET_MEMPOLICY = 239
|
|
SYS_MBIND = 237
|
|
SYS_MIGRATE_PAGES = 238
|
|
)
|
|
|
|
const (
|
|
// NUMA memory policies
|
|
MPOL_DEFAULT = 0
|
|
MPOL_PREFERRED = 1
|
|
MPOL_BIND = 2
|
|
MPOL_INTERLEAVE = 3
|
|
MPOL_LOCAL = 4
|
|
|
|
// Flags for get_mempolicy
|
|
MPOL_F_NODE = 1 << 0
|
|
MPOL_F_ADDR = 1 << 1
|
|
|
|
// Flags for mbind
|
|
MPOL_MF_STRICT = 1 << 0
|
|
)
|
|
|
|
//go:noescape
|
|
//go:linkname runtime_GetCPU runtime.getcpu
|
|
func runtime_GetCPU() uint32
|
|
|
|
func detectLinuxTopology(topology *Topology) error {
|
|
nodes, err := detectNodesFromSys()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
topology.NumNodes = len(nodes)
|
|
|
|
for _, nodeID := range nodes {
|
|
nodeInfo := &NodeInfo{
|
|
ID: NodeID(nodeID),
|
|
}
|
|
|
|
// Get CPUs for this node
|
|
cpus, err := getCPUsForNodeNoCGO(nodeID)
|
|
if err == nil {
|
|
nodeInfo.CPUs = cpus
|
|
for _, cpu := range cpus {
|
|
topology.CPUToNodeMap[cpu] = NodeID(nodeID)
|
|
}
|
|
}
|
|
|
|
// Get memory info for this node
|
|
memInfo, err := getMemoryInfoForNodeNoCGO(nodeID)
|
|
if err == nil {
|
|
nodeInfo.TotalMemory = memInfo.total
|
|
nodeInfo.FreeMemory = memInfo.free
|
|
}
|
|
|
|
// Get distance matrix
|
|
distances, err := getDistancesForNodeNoCGO(nodeID, len(nodes))
|
|
if err == nil {
|
|
nodeInfo.DistanceToNode = distances
|
|
}
|
|
|
|
topology.Nodes[NodeID(nodeID)] = nodeInfo
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func detectNodesFromSys() ([]int, error) {
|
|
entries, err := os.ReadDir("/sys/devices/system/node")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var nodes []int
|
|
for _, entry := range entries {
|
|
if entry.IsDir() && strings.HasPrefix(entry.Name(), "node") {
|
|
nodeID, err := strconv.Atoi(entry.Name()[4:])
|
|
if err == nil {
|
|
nodes = append(nodes, nodeID)
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(nodes) == 0 {
|
|
return nil, fmt.Errorf("no NUMA nodes found")
|
|
}
|
|
|
|
return nodes, nil
|
|
}
|
|
|
|
type memoryInfo struct {
|
|
total uint64
|
|
free uint64
|
|
}
|
|
|
|
func getMemoryInfoForNodeNoCGO(nodeID int) (*memoryInfo, error) {
|
|
file, err := os.Open(fmt.Sprintf("/sys/devices/system/node/node%d/meminfo", nodeID))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer file.Close()
|
|
|
|
info := &memoryInfo{}
|
|
scanner := bufio.NewScanner(file)
|
|
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
if strings.Contains(line, "MemTotal:") {
|
|
fields := strings.Fields(line)
|
|
if len(fields) >= 2 {
|
|
val, _ := strconv.ParseUint(fields[1], 10, 64)
|
|
info.total = val * 1024
|
|
}
|
|
} else if strings.Contains(line, "MemFree:") {
|
|
fields := strings.Fields(line)
|
|
if len(fields) >= 2 {
|
|
val, _ := strconv.ParseUint(fields[1], 10, 64)
|
|
info.free = val * 1024
|
|
}
|
|
}
|
|
}
|
|
|
|
return info, scanner.Err()
|
|
}
|
|
|
|
func getCPUsForNodeNoCGO(nodeID int) ([]int, error) {
|
|
data, err := os.ReadFile(fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", nodeID))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return parseCPUListNoCGO(strings.TrimSpace(string(data)))
|
|
}
|
|
|
|
func parseCPUListNoCGO(list string) ([]int, error) {
|
|
var cpus []int
|
|
|
|
if list == "" {
|
|
return cpus, nil
|
|
}
|
|
|
|
parts := strings.Split(list, ",")
|
|
for _, part := range parts {
|
|
if strings.Contains(part, "-") {
|
|
rangeParts := strings.Split(part, "-")
|
|
if len(rangeParts) == 2 {
|
|
start, _ := strconv.Atoi(rangeParts[0])
|
|
end, _ := strconv.Atoi(rangeParts[1])
|
|
for i := start; i <= end; i++ {
|
|
cpus = append(cpus, i)
|
|
}
|
|
}
|
|
} else {
|
|
cpu, _ := strconv.Atoi(part)
|
|
cpus = append(cpus, cpu)
|
|
}
|
|
}
|
|
|
|
return cpus, nil
|
|
}
|
|
|
|
func getDistancesForNodeNoCGO(nodeID int, numNodes int) ([]uint32, error) {
|
|
data, err := os.ReadFile(fmt.Sprintf("/sys/devices/system/node/node%d/distance", nodeID))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
fields := strings.Fields(string(data))
|
|
distances := make([]uint32, len(fields))
|
|
for i, field := range fields {
|
|
val, _ := strconv.ParseUint(field, 10, 32)
|
|
distances[i] = uint32(val)
|
|
}
|
|
|
|
return distances, nil
|
|
}
|
|
|
|
func getCurrentNodeImpl() (NodeID, error) {
|
|
var cpu, node uint32
|
|
|
|
// Use getcpu syscall
|
|
r1, _, errno := syscall.Syscall(SYS_GETCPU,
|
|
uintptr(unsafe.Pointer(&cpu)),
|
|
uintptr(unsafe.Pointer(&node)),
|
|
0)
|
|
|
|
if errno != 0 {
|
|
// Fallback: try to determine from CPU
|
|
return getNodeFromSchedGetCPU()
|
|
}
|
|
|
|
_ = r1 // suppress unused warning
|
|
return NodeID(node), nil
|
|
}
|
|
|
|
func getNodeFromSchedGetCPU() (NodeID, error) {
|
|
// Get current CPU
|
|
cpu := runtime.GOMAXPROCS(0)
|
|
|
|
// Look up in topology
|
|
topology := GetTopology()
|
|
node, ok := topology.GetNodeForCPU(cpu)
|
|
if !ok {
|
|
return 0, fmt.Errorf("cannot determine NUMA node for CPU %d", cpu)
|
|
}
|
|
return node, nil
|
|
}
|
|
|
|
func setPreferredNodeImpl(node NodeID) (*PreferredNode, error) {
|
|
mask := uint64(1) << uint64(node)
|
|
maxNode := uint64(node) + 1
|
|
|
|
_, _, errno := syscall.Syscall6(SYS_SET_MEMPOLICY,
|
|
uintptr(MPOL_PREFERRED),
|
|
uintptr(unsafe.Pointer(&mask)),
|
|
uintptr(maxNode),
|
|
0, 0, 0)
|
|
|
|
if errno != 0 {
|
|
return nil, fmt.Errorf("set_mempolicy failed: %v", errno)
|
|
}
|
|
|
|
return &PreferredNode{nodeID: node}, nil
|
|
}
|
|
|
|
func revertPreferredNodeImpl(p *PreferredNode) error {
|
|
_, _, errno := syscall.Syscall(SYS_SET_MEMPOLICY,
|
|
uintptr(MPOL_DEFAULT),
|
|
0, 0)
|
|
|
|
if errno != 0 {
|
|
return fmt.Errorf("set_mempolicy failed: %v", errno)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func setMemoryPolicyImpl(policy MemoryPolicy, nodes []NodeID) error {
|
|
var mode int
|
|
switch policy {
|
|
case MPDefault:
|
|
mode = MPOL_DEFAULT
|
|
case MPBind:
|
|
mode = MPOL_BIND
|
|
case MPPreferred:
|
|
mode = MPOL_PREFERRED
|
|
case MPInterleave:
|
|
mode = MPOL_INTERLEAVE
|
|
default:
|
|
return fmt.Errorf("unknown memory policy: %d", policy)
|
|
}
|
|
|
|
var mask uint64
|
|
for _, node := range nodes {
|
|
mask |= 1 << uint64(node)
|
|
}
|
|
|
|
maxNode := uint64(0)
|
|
for _, node := range nodes {
|
|
if uint64(node) >= maxNode {
|
|
maxNode = uint64(node) + 1
|
|
}
|
|
}
|
|
|
|
_, _, errno := syscall.Syscall6(SYS_SET_MEMPOLICY,
|
|
uintptr(mode),
|
|
uintptr(unsafe.Pointer(&mask)),
|
|
uintptr(maxNode),
|
|
0, 0, 0)
|
|
|
|
if errno != 0 {
|
|
return fmt.Errorf("set_mempolicy failed: %v", errno)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func allocateOnNodeImpl(size int, node NodeID) ([]byte, error) {
|
|
buf := make([]byte, size)
|
|
|
|
// Try to use mbind to bind memory to node
|
|
mask := uint64(1) << uint64(node)
|
|
maxNode := uint64(node) + 1
|
|
|
|
_, _, errno := syscall.Syscall6(SYS_MBIND,
|
|
uintptr(unsafe.Pointer(&buf[0])),
|
|
uintptr(size),
|
|
uintptr(MPOL_BIND),
|
|
uintptr(unsafe.Pointer(&mask)),
|
|
uintptr(maxNode),
|
|
uintptr(MPOL_MF_STRICT))
|
|
|
|
if errno != 0 {
|
|
// Fall back to regular allocation
|
|
return buf, nil
|
|
}
|
|
|
|
return buf, nil
|
|
}
|
|
|
|
func scheduleOnNodeImpl(cpu int, fn func()) error {
|
|
var mask syscall.CPUSet
|
|
mask.Set(cpu)
|
|
|
|
runtime.LockOSThread()
|
|
defer runtime.UnlockOSThread()
|
|
|
|
if err := syscall.SchedSetaffinity(0, &mask); err != nil {
|
|
return fmt.Errorf("sched_setaffinity failed: %v", err)
|
|
}
|
|
|
|
fn()
|
|
return nil
|
|
}
|
|
|
|
func getPreferredNodeForCurrentThreadImpl() NodeID {
|
|
var mode int
|
|
var node uint32
|
|
|
|
_, _, errno := syscall.Syscall6(SYS_GET_MEMPOLICY,
|
|
uintptr(unsafe.Pointer(&mode)),
|
|
0, 0,
|
|
uintptr(unsafe.Pointer(&node)),
|
|
uintptr(MPOL_F_NODE),
|
|
0)
|
|
|
|
if errno != 0 {
|
|
node, _ := getCurrentNodeImpl()
|
|
return node
|
|
}
|
|
|
|
if mode == MPOL_DEFAULT {
|
|
node, _ := getCurrentNodeImpl()
|
|
return node
|
|
}
|
|
|
|
return NodeID(node)
|
|
}
|
|
|
|
// PinThreadToNode pins the current goroutine's OS thread to a specific NUMA node
|
|
func PinThreadToNode(node NodeID) error {
|
|
topology := GetTopology()
|
|
nodeInfo, ok := topology.GetNode(node)
|
|
if !ok {
|
|
return fmt.Errorf("NUMA node %d not found", node)
|
|
}
|
|
|
|
if len(nodeInfo.CPUs) == 0 {
|
|
return fmt.Errorf("NUMA node %d has no CPUs", node)
|
|
}
|
|
|
|
runtime.LockOSThread()
|
|
|
|
var mask syscall.CPUSet
|
|
for _, cpu := range nodeInfo.CPUs {
|
|
mask.Set(cpu)
|
|
}
|
|
|
|
if err := syscall.SchedSetaffinity(0, &mask); err != nil {
|
|
runtime.UnlockOSThread()
|
|
return fmt.Errorf("sched_setaffinity failed: %v", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// UnpinThread releases the current goroutine's OS thread from NUMA binding
|
|
func UnpinThread() {
|
|
runtime.UnlockOSThread()
|
|
}
|
|
|
|
// RunOnNode runs a function with the current goroutine pinned to a specific NUMA node
|
|
func RunOnNode(node NodeID, fn func()) error {
|
|
if err := PinThreadToNode(node); err != nil {
|
|
return err
|
|
}
|
|
defer UnpinThread()
|
|
|
|
fn()
|
|
return nil
|
|
}
|