Compare commits

...

7 Commits

Author SHA1 Message Date
jmorganca
a0a1fb463a build: disable cuda compression 2025-05-05 11:20:57 -07:00
Jesse Gross
7073600797 ggml: Reduce log level of "key not found"
Most of the time this is not an error.
2025-05-05 11:17:32 -07:00
Daniel Hiltgen
b1c40138da win: lint fix (#10571) 2025-05-05 11:08:12 -07:00
Ashok Gelal
17466217e5 Hide empty terminal window (#8668)
This hides the LlamaServer blank window when chatting outside of the terminal (say like with an app like Msty). This has no other side effects when invoking it the regular way.
2025-05-05 09:06:46 -07:00
Jeffrey Morgan
1703d1472e server: fix panic when runner.Options is nil (#10566) 2025-05-05 09:01:33 -07:00
Jeffrey Morgan
913905028b all: fix cgo compiler warnings on windows (#10563) 2025-05-05 08:02:39 -07:00
湛露先生
7e5c8eee5c file close check and close. (#10554)
Signed-off-by: zhanluxianshen <zhanluxianshen@163.com>
2025-05-04 15:37:59 -07:00
10 changed files with 17 additions and 10 deletions

View File

@@ -24,7 +24,7 @@ set(GGML_LLAMAFILE ON)
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
set(GGML_CUDA_GRAPHS ON)
set(GGML_CUDA_FA ON)
set(GGML_CUDA_COMPRESSION_MODE default)
set(GGML_CUDA_COMPRESSION_MODE none)
if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))

View File

@@ -130,6 +130,7 @@ func TestConvertModel(t *testing.T) {
if err != nil {
t.Fatal(err)
}
defer expectFile.Close()
var expect map[string]string
if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {

View File

@@ -58,7 +58,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
LOG(resp->ch.verbose, "cudaSetDevice err: %d\n", ret);
UNLOAD_LIBRARY(resp->ch.handle);
resp->ch.handle = NULL;
if (ret == CUDA_ERROR_INSUFFICIENT_DRIVER) {
if (ret == CUDART_ERROR_INSUFFICIENT_DRIVER) {
resp->err = strdup("your nvidia driver is too old or missing. If you have a CUDA GPU please upgrade to run ollama");
return;
}
@@ -168,9 +168,9 @@ void cudart_bootstrap(cudart_handle_t h, int i, mem_info_t *resp) {
resp->free = memInfo.free;
resp->used = memInfo.used;
LOG(h.verbose, "[%s] CUDA totalMem %lu\n", resp->gpu_id, resp->total);
LOG(h.verbose, "[%s] CUDA freeMem %lu\n", resp->gpu_id, resp->free);
LOG(h.verbose, "[%s] CUDA usedMem %lu\n", resp->gpu_id, resp->used);
LOG(h.verbose, "[%s] CUDA totalMem %llu\n", resp->gpu_id, resp->total);
LOG(h.verbose, "[%s] CUDA freeMem %llu\n", resp->gpu_id, resp->free);
LOG(h.verbose, "[%s] CUDA usedMem %llu\n", resp->gpu_id, resp->used);
LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);
}

View File

@@ -193,8 +193,8 @@ void nvcuda_bootstrap(nvcuda_handle_t h, int i, mem_info_t *resp) {
resp->total = memInfo.total;
resp->free = memInfo.free;
LOG(h.verbose, "[%s] CUDA totalMem %lu mb\n", resp->gpu_id, resp->total / 1024 / 1024);
LOG(h.verbose, "[%s] CUDA freeMem %lu mb\n", resp->gpu_id, resp->free / 1024 / 1024);
LOG(h.verbose, "[%s] CUDA totalMem %llu mb\n", resp->gpu_id, resp->total / 1024 / 1024);
LOG(h.verbose, "[%s] CUDA freeMem %llu mb\n", resp->gpu_id, resp->free / 1024 / 1024);
LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);

View File

@@ -149,7 +149,7 @@ func keyValue[T valueTypes | arrayValueTypes](kv KV, key string, defaultValue ..
return val.(T)
}
slog.Warn("key not found", "key", key, "default", defaultValue[0])
slog.Debug("key not found", "key", key, "default", defaultValue[0])
return defaultValue[0]
}

View File

@@ -217,6 +217,7 @@ func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, strin
slog.Error("failed to open server log", "logfile", lifecycle.ServerLogFile, "error", err)
return
}
defer fp.Close()
data, err := io.ReadAll(fp)
if err != nil {
slog.Error("failed to read server log", "logfile", lifecycle.ServerLogFile, "error", err)

View File

@@ -2,6 +2,7 @@ package llama
/*
#cgo CFLAGS: -std=c11
#cgo windows CFLAGS: -Wno-dll-attribute-on-redeclaration
#cgo CXXFLAGS: -std=c++17
#cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/include
#cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/common

View File

@@ -7,6 +7,7 @@ import (
const (
CREATE_DEFAULT_ERROR_MODE = 0x04000000
ABOVE_NORMAL_PRIORITY_CLASS = 0x00008000
CREATE_NO_WINDOW = 0x08000000
)
var LlamaServerSysProcAttr = &syscall.SysProcAttr{
@@ -18,5 +19,5 @@ var LlamaServerSysProcAttr = &syscall.SysProcAttr{
//
// Setting Above Normal priority class ensures when running as a "background service"
// with "programs" given best priority, we aren't starved of cpu cycles
CreationFlags: CREATE_DEFAULT_ERROR_MODE | ABOVE_NORMAL_PRIORITY_CLASS,
CreationFlags: CREATE_DEFAULT_ERROR_MODE | ABOVE_NORMAL_PRIORITY_CLASS | CREATE_NO_WINDOW,
}

View File

@@ -3,6 +3,7 @@ package ggml
// #cgo CXXFLAGS: -std=c++17
// #cgo CPPFLAGS: -DNDEBUG -DGGML_USE_CPU
// #cgo CPPFLAGS: -I${SRCDIR}/../include -I${SRCDIR}/ggml-cpu
// #cgo windows CFLAGS: -Wno-dll-attribute-on-redeclaration
// #cgo windows LDFLAGS: -lmsvcrt -static -static-libgcc -static-libstdc++
// #include <stdlib.h>
// #include "ggml-backend.h"

View File

@@ -691,11 +691,13 @@ func (runner *runnerRef) LogValue() slog.Value {
attrs = append(attrs,
slog.String("size", format.HumanBytes2(runner.estimatedTotal)),
slog.String("vram", format.HumanBytes2(runner.estimatedVRAM)),
slog.Int("num_ctx", runner.NumCtx),
slog.Int("parallel", runner.numParallel),
slog.Int("pid", runner.pid),
slog.String("model", runner.modelPath),
)
if runner.Options != nil {
attrs = append(attrs, slog.Int("num_ctx", runner.Options.NumCtx))
}
return slog.GroupValue(attrs...)
}