diff --git a/llm/memory.go b/llm/memory.go index fdfe798f9..2f930d75f 100644 --- a/llm/memory.go +++ b/llm/memory.go @@ -338,7 +338,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin return estimate } -func (m MemoryEstimate) log() { +func (m MemoryEstimate) LogValue() slog.Value { overhead := envconfig.GpuOverhead() log := slog.With() @@ -352,8 +352,8 @@ func (m MemoryEstimate) log() { ) } - log.Info( - "offload to "+m.inferenceLibrary, + return slog.GroupValue( + slog.String("library", m.inferenceLibrary), slog.Group( "layers", // requested number of layers to offload diff --git a/llm/server.go b/llm/server.go index 134f5d8a3..dc38979ca 100644 --- a/llm/server.go +++ b/llm/server.go @@ -139,7 +139,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt } } - estimate.log() + slog.Info("offload", "", estimate) // Loop through potential servers finalErr := errors.New("no suitable llama servers found")