llm: normalise kvct parameter handling (#7926)
This commit is contained in:
parent
1bdab9fdb1
commit
539be43640
@ -129,7 +129,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
|
||||
|
||||
var kvct string
|
||||
if fa {
|
||||
requested := envconfig.KvCacheType()
|
||||
requested := strings.ToLower(envconfig.KvCacheType())
|
||||
if requested != "" && ggml.SupportsKVCacheType(requested) {
|
||||
kvct = requested
|
||||
}
|
||||
|
@ -225,7 +225,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
|
||||
fa = false
|
||||
}
|
||||
|
||||
kvct := envconfig.KvCacheType()
|
||||
kvct := strings.ToLower(envconfig.KvCacheType())
|
||||
|
||||
if fa {
|
||||
slog.Info("enabling flash attention")
|
||||
|
Loading…
x
Reference in New Issue
Block a user