From 010313bb63e73cce2b42d1eebaf8cea3eb529567 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 14 Feb 2025 11:13:34 -0800 Subject: [PATCH] llamarunner: Init GGML before printing system info We currently print system info before the GGML backends are loaded. This results in only getting information about the default lowest common denominator runner. If we move up the GGML init then we can see what we are actually running. Before: time=2025-02-14T11:15:07.606-08:00 level=INFO source=runner.go:935 msg=system info="CPU : LLAMAFILE = 1 | CPU : LLAMAFILE = 1 | cgo(gcc)" threads=24 After: time=2025-02-14T11:16:02.936-08:00 level=INFO source=runner.go:935 msg=system info="CPU : LLAMAFILE = 1 | CPU : LLAMAFILE = 1 | CUDA : ARCHS = 890 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | LLAMAFILE = 1 | cgo(gcc)" threads=24 --- runner/llamarunner/runner.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runner/llamarunner/runner.go b/runner/llamarunner/runner.go index 93d6bfabe..72873ec4d 100644 --- a/runner/llamarunner/runner.go +++ b/runner/llamarunner/runner.go @@ -845,8 +845,6 @@ func (s *Server) loadModel( threads int, multiUserCache bool, ) { - llama.BackendInit() - var err error s.model, err = llama.LoadModelFromFile(mpath, params) if err != nil { @@ -932,6 +930,8 @@ func Execute(args []string) error { }) slog.SetDefault(slog.New(handler)) slog.Info("starting go runner") + + llama.BackendInit() slog.Info("system", "info", llama.PrintSystemInfo(), "threads", *threads) server := &Server{