Compare commits
7 Commits
v0.6.8
...
jmorganca/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a0a1fb463a | ||
|
|
7073600797 | ||
|
|
b1c40138da | ||
|
|
17466217e5 | ||
|
|
1703d1472e | ||
|
|
913905028b | ||
|
|
7e5c8eee5c |
@@ -24,7 +24,7 @@ set(GGML_LLAMAFILE ON)
|
||||
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
|
||||
set(GGML_CUDA_GRAPHS ON)
|
||||
set(GGML_CUDA_FA ON)
|
||||
set(GGML_CUDA_COMPRESSION_MODE default)
|
||||
set(GGML_CUDA_COMPRESSION_MODE none)
|
||||
|
||||
if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
|
||||
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))
|
||||
|
||||
@@ -130,6 +130,7 @@ func TestConvertModel(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer expectFile.Close()
|
||||
|
||||
var expect map[string]string
|
||||
if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {
|
||||
|
||||
@@ -58,7 +58,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
|
||||
LOG(resp->ch.verbose, "cudaSetDevice err: %d\n", ret);
|
||||
UNLOAD_LIBRARY(resp->ch.handle);
|
||||
resp->ch.handle = NULL;
|
||||
if (ret == CUDA_ERROR_INSUFFICIENT_DRIVER) {
|
||||
if (ret == CUDART_ERROR_INSUFFICIENT_DRIVER) {
|
||||
resp->err = strdup("your nvidia driver is too old or missing. If you have a CUDA GPU please upgrade to run ollama");
|
||||
return;
|
||||
}
|
||||
@@ -168,9 +168,9 @@ void cudart_bootstrap(cudart_handle_t h, int i, mem_info_t *resp) {
|
||||
resp->free = memInfo.free;
|
||||
resp->used = memInfo.used;
|
||||
|
||||
LOG(h.verbose, "[%s] CUDA totalMem %lu\n", resp->gpu_id, resp->total);
|
||||
LOG(h.verbose, "[%s] CUDA freeMem %lu\n", resp->gpu_id, resp->free);
|
||||
LOG(h.verbose, "[%s] CUDA usedMem %lu\n", resp->gpu_id, resp->used);
|
||||
LOG(h.verbose, "[%s] CUDA totalMem %llu\n", resp->gpu_id, resp->total);
|
||||
LOG(h.verbose, "[%s] CUDA freeMem %llu\n", resp->gpu_id, resp->free);
|
||||
LOG(h.verbose, "[%s] CUDA usedMem %llu\n", resp->gpu_id, resp->used);
|
||||
LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);
|
||||
}
|
||||
|
||||
|
||||
@@ -193,8 +193,8 @@ void nvcuda_bootstrap(nvcuda_handle_t h, int i, mem_info_t *resp) {
|
||||
resp->total = memInfo.total;
|
||||
resp->free = memInfo.free;
|
||||
|
||||
LOG(h.verbose, "[%s] CUDA totalMem %lu mb\n", resp->gpu_id, resp->total / 1024 / 1024);
|
||||
LOG(h.verbose, "[%s] CUDA freeMem %lu mb\n", resp->gpu_id, resp->free / 1024 / 1024);
|
||||
LOG(h.verbose, "[%s] CUDA totalMem %llu mb\n", resp->gpu_id, resp->total / 1024 / 1024);
|
||||
LOG(h.verbose, "[%s] CUDA freeMem %llu mb\n", resp->gpu_id, resp->free / 1024 / 1024);
|
||||
LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);
|
||||
|
||||
|
||||
|
||||
@@ -149,7 +149,7 @@ func keyValue[T valueTypes | arrayValueTypes](kv KV, key string, defaultValue ..
|
||||
return val.(T)
|
||||
}
|
||||
|
||||
slog.Warn("key not found", "key", key, "default", defaultValue[0])
|
||||
slog.Debug("key not found", "key", key, "default", defaultValue[0])
|
||||
return defaultValue[0]
|
||||
}
|
||||
|
||||
|
||||
@@ -217,6 +217,7 @@ func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, strin
|
||||
slog.Error("failed to open server log", "logfile", lifecycle.ServerLogFile, "error", err)
|
||||
return
|
||||
}
|
||||
defer fp.Close()
|
||||
data, err := io.ReadAll(fp)
|
||||
if err != nil {
|
||||
slog.Error("failed to read server log", "logfile", lifecycle.ServerLogFile, "error", err)
|
||||
|
||||
@@ -2,6 +2,7 @@ package llama
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -std=c11
|
||||
#cgo windows CFLAGS: -Wno-dll-attribute-on-redeclaration
|
||||
#cgo CXXFLAGS: -std=c++17
|
||||
#cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/include
|
||||
#cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/common
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
const (
|
||||
CREATE_DEFAULT_ERROR_MODE = 0x04000000
|
||||
ABOVE_NORMAL_PRIORITY_CLASS = 0x00008000
|
||||
CREATE_NO_WINDOW = 0x08000000
|
||||
)
|
||||
|
||||
var LlamaServerSysProcAttr = &syscall.SysProcAttr{
|
||||
@@ -18,5 +19,5 @@ var LlamaServerSysProcAttr = &syscall.SysProcAttr{
|
||||
//
|
||||
// Setting Above Normal priority class ensures when running as a "background service"
|
||||
// with "programs" given best priority, we aren't starved of cpu cycles
|
||||
CreationFlags: CREATE_DEFAULT_ERROR_MODE | ABOVE_NORMAL_PRIORITY_CLASS,
|
||||
CreationFlags: CREATE_DEFAULT_ERROR_MODE | ABOVE_NORMAL_PRIORITY_CLASS | CREATE_NO_WINDOW,
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package ggml
|
||||
// #cgo CXXFLAGS: -std=c++17
|
||||
// #cgo CPPFLAGS: -DNDEBUG -DGGML_USE_CPU
|
||||
// #cgo CPPFLAGS: -I${SRCDIR}/../include -I${SRCDIR}/ggml-cpu
|
||||
// #cgo windows CFLAGS: -Wno-dll-attribute-on-redeclaration
|
||||
// #cgo windows LDFLAGS: -lmsvcrt -static -static-libgcc -static-libstdc++
|
||||
// #include <stdlib.h>
|
||||
// #include "ggml-backend.h"
|
||||
|
||||
@@ -691,11 +691,13 @@ func (runner *runnerRef) LogValue() slog.Value {
|
||||
attrs = append(attrs,
|
||||
slog.String("size", format.HumanBytes2(runner.estimatedTotal)),
|
||||
slog.String("vram", format.HumanBytes2(runner.estimatedVRAM)),
|
||||
slog.Int("num_ctx", runner.NumCtx),
|
||||
slog.Int("parallel", runner.numParallel),
|
||||
slog.Int("pid", runner.pid),
|
||||
slog.String("model", runner.modelPath),
|
||||
)
|
||||
if runner.Options != nil {
|
||||
attrs = append(attrs, slog.Int("num_ctx", runner.Options.NumCtx))
|
||||
}
|
||||
return slog.GroupValue(attrs...)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user