build: disable cuda compression

ggml: Reduce log level of "key not found"
Most of the time this is not an error.
2025-05-05 11:20:57 -07:00 · 2025-05-05 11:17:32 -07:00 · 2025-05-05 11:08:12 -07:00 · 2025-05-05 09:06:46 -07:00 · 2025-05-05 09:01:33 -07:00 · 2025-05-05 08:02:39 -07:00
10 changed files with 17 additions and 10 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,7 +24,7 @@ set(GGML_LLAMAFILE ON)
 set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
 set(GGML_CUDA_GRAPHS ON)
 set(GGML_CUDA_FA ON)
-set(GGML_CUDA_COMPRESSION_MODE default)
+set(GGML_CUDA_COMPRESSION_MODE none)

 if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
    OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))
--- a/convert/convert_test.go
+++ b/convert/convert_test.go
@@ -130,6 +130,7 @@ func TestConvertModel(t *testing.T) {
 			if err != nil {
 				t.Fatal(err)
 			}
+			defer expectFile.Close()

 			var expect map[string]string
 			if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {
--- a/discover/gpu_info_cudart.c
+++ b/discover/gpu_info_cudart.c
@@ -58,7 +58,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
    LOG(resp->ch.verbose, "cudaSetDevice err: %d\n", ret);
    UNLOAD_LIBRARY(resp->ch.handle);
    resp->ch.handle = NULL;
-    if (ret == CUDA_ERROR_INSUFFICIENT_DRIVER) {
+    if (ret == CUDART_ERROR_INSUFFICIENT_DRIVER) {
      resp->err = strdup("your nvidia driver is too old or missing.  If you have a CUDA GPU please upgrade to run ollama");
      return;
    }
@@ -168,9 +168,9 @@ void cudart_bootstrap(cudart_handle_t h, int i, mem_info_t *resp) {
  resp->free = memInfo.free;
  resp->used = memInfo.used;

-  LOG(h.verbose, "[%s] CUDA totalMem %lu\n", resp->gpu_id, resp->total);
-  LOG(h.verbose, "[%s] CUDA freeMem %lu\n", resp->gpu_id, resp->free);
-  LOG(h.verbose, "[%s] CUDA usedMem %lu\n", resp->gpu_id, resp->used);
+  LOG(h.verbose, "[%s] CUDA totalMem %llu\n", resp->gpu_id, resp->total);
+  LOG(h.verbose, "[%s] CUDA freeMem %llu\n", resp->gpu_id, resp->free);
+  LOG(h.verbose, "[%s] CUDA usedMem %llu\n", resp->gpu_id, resp->used);
  LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);
 }

--- a/discover/gpu_info_nvcuda.c
+++ b/discover/gpu_info_nvcuda.c
@@ -193,8 +193,8 @@ void nvcuda_bootstrap(nvcuda_handle_t h, int i, mem_info_t *resp) {
  resp->total = memInfo.total;
  resp->free = memInfo.free;

-  LOG(h.verbose, "[%s] CUDA totalMem %lu mb\n", resp->gpu_id, resp->total / 1024 / 1024);
-  LOG(h.verbose, "[%s] CUDA freeMem %lu mb\n", resp->gpu_id, resp->free / 1024 / 1024);
+  LOG(h.verbose, "[%s] CUDA totalMem %llu mb\n", resp->gpu_id, resp->total / 1024 / 1024);
+  LOG(h.verbose, "[%s] CUDA freeMem %llu mb\n", resp->gpu_id, resp->free / 1024 / 1024);
  LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);

  
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -149,7 +149,7 @@ func keyValue[T valueTypes | arrayValueTypes](kv KV, key string, defaultValue ..
 		return val.(T)
 	}

-	slog.Warn("key not found", "key", key, "default", defaultValue[0])
+	slog.Debug("key not found", "key", key, "default", defaultValue[0])
 	return defaultValue[0]
 }

--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -217,6 +217,7 @@ func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, strin
 					slog.Error("failed to open server log", "logfile", lifecycle.ServerLogFile, "error", err)
 					return
 				}
+				defer fp.Close()
 				data, err := io.ReadAll(fp)
 				if err != nil {
 					slog.Error("failed to read server log", "logfile", lifecycle.ServerLogFile, "error", err)
--- a/llama/llama.go
+++ b/llama/llama.go
@@ -2,6 +2,7 @@ package llama

 /*
 #cgo CFLAGS: -std=c11
+#cgo windows CFLAGS: -Wno-dll-attribute-on-redeclaration
 #cgo CXXFLAGS: -std=c++17
 #cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/include
 #cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/common
--- a/llm/llm_windows.go
+++ b/llm/llm_windows.go
@@ -7,6 +7,7 @@ import (
 const (
 	CREATE_DEFAULT_ERROR_MODE   = 0x04000000
 	ABOVE_NORMAL_PRIORITY_CLASS = 0x00008000
+	CREATE_NO_WINDOW            = 0x08000000
 )

 var LlamaServerSysProcAttr = &syscall.SysProcAttr{
@@ -18,5 +19,5 @@ var LlamaServerSysProcAttr = &syscall.SysProcAttr{
 	//
 	// Setting Above Normal priority class ensures when running as a "background service"
 	// with "programs" given best priority, we aren't starved of cpu cycles
-	CreationFlags: CREATE_DEFAULT_ERROR_MODE | ABOVE_NORMAL_PRIORITY_CLASS,
+	CreationFlags: CREATE_DEFAULT_ERROR_MODE | ABOVE_NORMAL_PRIORITY_CLASS | CREATE_NO_WINDOW,
 }
--- a/ml/backend/ggml/ggml/src/ggml.go
+++ b/ml/backend/ggml/ggml/src/ggml.go
@@ -3,6 +3,7 @@ package ggml
 // #cgo CXXFLAGS: -std=c++17
 // #cgo CPPFLAGS: -DNDEBUG -DGGML_USE_CPU
 // #cgo CPPFLAGS: -I${SRCDIR}/../include -I${SRCDIR}/ggml-cpu
+// #cgo windows CFLAGS: -Wno-dll-attribute-on-redeclaration
 // #cgo windows LDFLAGS: -lmsvcrt -static -static-libgcc -static-libstdc++
 // #include <stdlib.h>
 // #include "ggml-backend.h"
--- a/server/sched.go
+++ b/server/sched.go
@@ -691,11 +691,13 @@ func (runner *runnerRef) LogValue() slog.Value {
 	attrs = append(attrs,
 		slog.String("size", format.HumanBytes2(runner.estimatedTotal)),
 		slog.String("vram", format.HumanBytes2(runner.estimatedVRAM)),
-		slog.Int("num_ctx", runner.NumCtx),
 		slog.Int("parallel", runner.numParallel),
 		slog.Int("pid", runner.pid),
 		slog.String("model", runner.modelPath),
 	)
+	if runner.Options != nil {
+		attrs = append(attrs, slog.Int("num_ctx", runner.Options.NumCtx))
+	}
 	return slog.GroupValue(attrs...)
 }
Author	SHA1	Message	Date
jmorganca	a0a1fb463a	build: disable cuda compression	2025-05-05 11:20:57 -07:00
Jesse Gross	7073600797	ggml: Reduce log level of "key not found" Most of the time this is not an error.	2025-05-05 11:17:32 -07:00
Daniel Hiltgen	b1c40138da	win: lint fix (#10571 )	2025-05-05 11:08:12 -07:00
Ashok Gelal	17466217e5	Hide empty terminal window (#8668 ) This hides the LlamaServer blank window when chatting outside of the terminal (say like with an app like Msty). This has no other side effects when invoking it the regular way.	2025-05-05 09:06:46 -07:00
Jeffrey Morgan	1703d1472e	server: fix panic when runner.Options is nil (#10566 )	2025-05-05 09:01:33 -07:00
Jeffrey Morgan	913905028b	all: fix cgo compiler warnings on windows (#10563 )	2025-05-05 08:02:39 -07:00
湛露先生	7e5c8eee5c	file close check and close. (#10554 ) Signed-off-by: zhanluxianshen <zhanluxianshen@163.com>	2025-05-04 15:37:59 -07:00