From f457d63400f9859acdfff1853c53af13429acea5 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Mon, 5 Aug 2024 12:56:20 -0700 Subject: [PATCH] Implement linux NUMA detection If the system has multiple numa nodes, enable numa support in llama.cpp If we detect numactl in the path, use that, else use the basic "distribute" mode. --- api/types.go | 2 -- gpu/cpu_common.go | 21 +++++++++++++++++++++ llm/server.go | 10 ++++++++-- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/api/types.go b/api/types.go index c25296521..291522a39 100644 --- a/api/types.go +++ b/api/types.go @@ -231,7 +231,6 @@ type Options struct { // Runner options which must be set when the model is loaded into memory type Runner struct { - UseNUMA bool `json:"numa,omitempty"` NumCtx int `json:"num_ctx,omitempty"` NumBatch int `json:"num_batch,omitempty"` NumGPU int `json:"num_gpu,omitempty"` @@ -615,7 +614,6 @@ func DefaultOptions() Options { F16KV: true, UseMLock: false, UseMMap: nil, - UseNUMA: false, }, } } diff --git a/gpu/cpu_common.go b/gpu/cpu_common.go index 63e88f25b..34edcdc5a 100644 --- a/gpu/cpu_common.go +++ b/gpu/cpu_common.go @@ -1,6 +1,11 @@ package gpu import ( + "os" + "path/filepath" + "runtime" + "strings" + "golang.org/x/sys/cpu" ) @@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability { // else LCD return CPUCapabilityNone } + +func IsNUMA() bool { + if runtime.GOOS != "linux" { + // numa support in llama.cpp is linux only + return false + } + ids := map[string]interface{}{} + packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id") + for _, packageId := range packageIds { + id, err := os.ReadFile(packageId) + if err == nil { + ids[strings.TrimSpace(string(id))] = struct{}{} + } + } + return len(ids) > 1 +} diff --git a/llm/server.go b/llm/server.go index 7abc3bd72..152b7582f 100644 --- a/llm/server.go +++ b/llm/server.go @@ -256,8 +256,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr params = append(params, "--mlock") } - if opts.UseNUMA { - params = append(params, "--numa") + if gpu.IsNUMA() { + numaMode := "distribute" + if runtime.GOOS == "linux" { + if _, err := exec.LookPath("numactl"); err == nil { + numaMode = "numactl" + } + } + params = append(params, "--numa", numaMode) } params = append(params, "--parallel", strconv.Itoa(numParallel))