From 539be43640edf4b643ae2139ad1f1ccc8c66570d Mon Sep 17 00:00:00 2001 From: Sam Date: Wed, 4 Dec 2024 11:30:40 +1100 Subject: [PATCH] llm: normalise kvct parameter handling (#7926) --- llm/memory.go | 2 +- llm/server.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llm/memory.go b/llm/memory.go index c5d861b6a..384e2dc60 100644 --- a/llm/memory.go +++ b/llm/memory.go @@ -129,7 +129,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string, var kvct string if fa { - requested := envconfig.KvCacheType() + requested := strings.ToLower(envconfig.KvCacheType()) if requested != "" && ggml.SupportsKVCacheType(requested) { kvct = requested } diff --git a/llm/server.go b/llm/server.go index 23caa9a0a..debdd35e8 100644 --- a/llm/server.go +++ b/llm/server.go @@ -225,7 +225,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter fa = false } - kvct := envconfig.KvCacheType() + kvct := strings.ToLower(envconfig.KvCacheType()) if fa { slog.Info("enabling flash attention")