llm: normalise kvct parameter handling (#7926)

This commit is contained in:
Sam 2024-12-04 11:30:40 +11:00 committed by GitHub
parent 1bdab9fdb1
commit 539be43640
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 2 additions and 2 deletions

View File

@ -129,7 +129,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
var kvct string
if fa {
requested := envconfig.KvCacheType()
requested := strings.ToLower(envconfig.KvCacheType())
if requested != "" && ggml.SupportsKVCacheType(requested) {
kvct = requested
}

View File

@ -225,7 +225,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
fa = false
}
kvct := envconfig.KvCacheType()
kvct := strings.ToLower(envconfig.KvCacheType())
if fa {
slog.Info("enabling flash attention")