diff --git a/app/lifecycle/logging.go b/app/lifecycle/logging.go index 9985fc3f8..22e3de194 100644 --- a/app/lifecycle/logging.go +++ b/app/lifecycle/logging.go @@ -4,20 +4,14 @@ import ( "fmt" "log/slog" "os" - "path/filepath" "strconv" "strings" "github.com/ollama/ollama/envconfig" + "github.com/ollama/ollama/logutil" ) func InitLogging() { - level := slog.LevelInfo - - if envconfig.Debug() { - level = slog.LevelDebug - } - var logFile *os.File var err error // Detect if we're a GUI app on windows, and if not, send logs to console @@ -33,20 +27,8 @@ func InitLogging() { return } } - handler := slog.NewTextHandler(logFile, &slog.HandlerOptions{ - Level: level, - AddSource: true, - ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr { - if attr.Key == slog.SourceKey { - source := attr.Value.Any().(*slog.Source) - source.File = filepath.Base(source.File) - } - return attr - }, - }) - - slog.SetDefault(slog.New(handler)) + slog.SetDefault(logutil.NewLogger(logFile, envconfig.LogLevel())) slog.Info("ollama app started") } diff --git a/discover/gpu.go b/discover/gpu.go index ba906a184..15bad4466 100644 --- a/discover/gpu.go +++ b/discover/gpu.go @@ -670,7 +670,7 @@ func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, e } func getVerboseState() C.uint16_t { - if envconfig.Debug() { + if envconfig.LogLevel() < slog.LevelInfo { return C.uint16_t(1) } return C.uint16_t(0) diff --git a/envconfig/config.go b/envconfig/config.go index a1f056155..9d7c2e218 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -149,9 +149,22 @@ func Bool(k string) func() bool { } } +// LogLevel returns the log level for the application. +// Values are 0 or false INFO (Default), 1 or true DEBUG, 2 TRACE +func LogLevel() slog.Level { + level := slog.LevelInfo + if s := Var("OLLAMA_DEBUG"); s != "" { + if b, _ := strconv.ParseBool(s); b { + level = slog.LevelDebug + } else if i, _ := strconv.ParseInt(s, 10, 64); i != 0 { + level = slog.Level(i * -4) + } + } + + return level +} + var ( - // Debug enabled additional debug information. - Debug = Bool("OLLAMA_DEBUG") // FlashAttention enables the experimental flash attention feature. FlashAttention = Bool("OLLAMA_FLASH_ATTENTION") // KvCacheType is the quantization type for the K/V cache. @@ -236,7 +249,7 @@ type EnvVar struct { func AsMap() map[string]EnvVar { ret := map[string]EnvVar{ - "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"}, + "OLLAMA_DEBUG": {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"}, "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"}, "OLLAMA_KV_CACHE_TYPE": {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"}, "OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"}, diff --git a/envconfig/config_test.go b/envconfig/config_test.go index 9e80645c7..f232f1cdc 100644 --- a/envconfig/config_test.go +++ b/envconfig/config_test.go @@ -1,11 +1,13 @@ package envconfig import ( + "log/slog" "math" "testing" "time" "github.com/google/go-cmp/cmp" + "github.com/ollama/ollama/logutil" ) func TestHost(t *testing.T) { @@ -292,3 +294,34 @@ func TestContextLength(t *testing.T) { }) } } + +func TestLogLevel(t *testing.T) { + cases := map[string]slog.Level{ + // Default to INFO + "": slog.LevelInfo, + "false": slog.LevelInfo, + "f": slog.LevelInfo, + "0": slog.LevelInfo, + + // True values enable Debug + "true": slog.LevelDebug, + "t": slog.LevelDebug, + + // Positive values increase verbosity + "1": slog.LevelDebug, + "2": logutil.LevelTrace, + + // Negative values decrease verbosity + "-1": slog.LevelWarn, + "-2": slog.LevelError, + } + + for k, v := range cases { + t.Run(k, func(t *testing.T) { + t.Setenv("OLLAMA_DEBUG", k) + if i := LogLevel(); i != v { + t.Errorf("%s: expected %d, got %d", k, v, i) + } + }) + } +} diff --git a/llm/server.go b/llm/server.go index 8884d105a..085e0980c 100644 --- a/llm/server.go +++ b/llm/server.go @@ -17,6 +17,7 @@ import ( "os/exec" "path/filepath" "runtime" + "slices" "strconv" "strings" "sync" @@ -30,9 +31,37 @@ import ( "github.com/ollama/ollama/format" "github.com/ollama/ollama/fs/ggml" "github.com/ollama/ollama/llama" + "github.com/ollama/ollama/logutil" "github.com/ollama/ollama/model" ) +type filteredEnv []string + +func (e filteredEnv) LogValue() slog.Value { + var attrs []slog.Attr + for _, env := range e { + if key, value, ok := strings.Cut(env, "="); ok { + switch { + case strings.HasPrefix(key, "OLLAMA_"), + strings.HasPrefix(key, "CUDA_"), + strings.HasPrefix(key, "ROCR_"), + strings.HasPrefix(key, "ROCM_"), + strings.HasPrefix(key, "HIP_"), + strings.HasPrefix(key, "GPU_"), + strings.HasPrefix(key, "HSA_"), + strings.HasPrefix(key, "GGML_"), + slices.Contains([]string{ + "PATH", + "LD_LIBRARY_PATH", + "DYLD_LIBRARY_PATH", + }, key): + attrs = append(attrs, slog.String(key, value)) + } + } + } + return slog.GroupValue(attrs...) +} + type LlamaServer interface { Ping(ctx context.Context) error WaitUntilRunning(ctx context.Context) error @@ -148,10 +177,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU)) } - if envconfig.Debug() { - params = append(params, "--verbose") - } - if opts.MainGPU > 0 { params = append(params, "--main-gpu", strconv.Itoa(opts.MainGPU)) } @@ -404,26 +429,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a } slog.Info("starting llama server", "cmd", s.cmd) - if envconfig.Debug() { - filteredEnv := []string{} - for _, ev := range s.cmd.Env { - if strings.HasPrefix(ev, "OLLAMA_") || - strings.HasPrefix(ev, "CUDA_") || - strings.HasPrefix(ev, "ROCR_") || - strings.HasPrefix(ev, "ROCM_") || - strings.HasPrefix(ev, "HIP_") || - strings.HasPrefix(ev, "GPU_") || - strings.HasPrefix(ev, "HSA_") || - strings.HasPrefix(ev, "GGML_") || - strings.HasPrefix(ev, "PATH=") || - strings.HasPrefix(ev, "LD_LIBRARY_PATH=") || - strings.HasPrefix(ev, "DYLD_LIBRARY_PATH=") { - filteredEnv = append(filteredEnv, ev) - } - } - // Log at debug as the environment is inherited and might contain sensitive information - slog.Debug("subprocess", "environment", filteredEnv) - } + slog.Debug("subprocess", "", filteredEnv(s.cmd.Env)) if err = s.cmd.Start(); err != nil { var msg string @@ -721,6 +727,9 @@ type CompletionResponse struct { } func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error { + slog.Debug("completion request", "images", len(req.Images), "prompt", len(req.Prompt), "format", string(req.Format)) + slog.Log(ctx, logutil.LevelTrace, "completion request", "prompt", req.Prompt) + if len(req.Format) > 0 { switch string(req.Format) { case `null`, `""`: @@ -884,6 +893,8 @@ type EmbeddingResponse struct { } func (s *llmServer) Embedding(ctx context.Context, input string) ([]float32, error) { + slog.Log(ctx, logutil.LevelTrace, "embedding request", "input", input) + if err := s.sem.Acquire(ctx, 1); err != nil { if errors.Is(err, context.Canceled) { slog.Info("aborting embedding request due to client closing the connection") diff --git a/logutil/logutil.go b/logutil/logutil.go new file mode 100644 index 000000000..406caf540 --- /dev/null +++ b/logutil/logutil.go @@ -0,0 +1,29 @@ +package logutil + +import ( + "io" + "log/slog" + "path/filepath" +) + +const LevelTrace slog.Level = -8 + +func NewLogger(w io.Writer, level slog.Level) *slog.Logger { + return slog.New(slog.NewTextHandler(w, &slog.HandlerOptions{ + Level: level, + AddSource: true, + ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr { + switch attr.Key { + case slog.LevelKey: + switch attr.Value.Any().(slog.Level) { + case LevelTrace: + attr.Value = slog.StringValue("TRACE") + } + case slog.SourceKey: + source := attr.Value.Any().(*slog.Source) + source.File = filepath.Base(source.File) + } + return attr + }, + })) +} diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index aace1335d..ff340561c 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -27,6 +27,7 @@ import ( "github.com/ollama/ollama/format" "github.com/ollama/ollama/fs" fsggml "github.com/ollama/ollama/fs/ggml" + "github.com/ollama/ollama/logutil" "github.com/ollama/ollama/ml" ggml "github.com/ollama/ollama/ml/backend/ggml/ggml/src" "golang.org/x/sync/errgroup" @@ -222,7 +223,7 @@ func New(ctx context.Context, r *os.File, params ml.BackendParams) (ml.Backend, tt := C.ggml_new_tensor(ctxs[bt], t.source.Kind, C.int(len(t.source.Shape)), (*C.int64_t)(unsafe.Pointer(&t.source.Shape[0]))) C.ggml_set_name(tt, cname) - slog.Debug("created tensor", "name", name, "shape", t.source.Shape, "dtype", t.source.Kind, "buffer_type", C.GoString(C.ggml_backend_buft_name(bt))) + slog.Log(context.TODO(), logutil.LevelTrace, "created tensor", "name", name, "shape", t.source.Shape, "dtype", t.source.Kind, "buffer_type", C.GoString(C.ggml_backend_buft_name(bt))) //nolint:staticcheck // TODO: check if buffer type supports this tensor return tt } diff --git a/model/model.go b/model/model.go index ab96c4c70..7883b8517 100644 --- a/model/model.go +++ b/model/model.go @@ -19,6 +19,7 @@ import ( "github.com/ollama/ollama/fs" fsggml "github.com/ollama/ollama/fs/ggml" "github.com/ollama/ollama/kvcache" + "github.com/ollama/ollama/logutil" "github.com/ollama/ollama/ml" _ "github.com/ollama/ollama/ml/backend" "github.com/ollama/ollama/model/input" @@ -202,7 +203,7 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value { names := fn(tagsCopy) for _, name := range names { if tensor := base.Backend().Get(strings.Join(name, ".")); tensor != nil { - slog.Debug("found tensor", "", tensor) + slog.Log(context.TODO(), logutil.LevelTrace, "found tensor", "", tensor) vv.Set(reflect.ValueOf(tensor)) break } diff --git a/model/process_text.go b/model/process_text.go index 90b220a2e..7b87ccc33 100644 --- a/model/process_text.go +++ b/model/process_text.go @@ -2,6 +2,7 @@ package model import ( "cmp" + "context" "iter" "log/slog" "slices" @@ -10,6 +11,7 @@ import ( "github.com/dlclark/regexp2" heap "github.com/emirpasic/gods/v2/trees/binaryheap" + "github.com/ollama/ollama/logutil" ) type Special int32 @@ -322,6 +324,7 @@ func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) { } } + slog.Log(context.TODO(), logutil.LevelTrace, "encoded", "ids", ids) return ids, nil } @@ -349,5 +352,6 @@ func (bpe BytePairEncoding) Decode(ids []int32) (string, error) { } } + slog.Log(context.TODO(), logutil.LevelTrace, "decoded", "string", sb.String()) return sb.String(), nil } diff --git a/model/process_text_spm.go b/model/process_text_spm.go index 446d5d604..b1cff7d27 100644 --- a/model/process_text_spm.go +++ b/model/process_text_spm.go @@ -2,10 +2,13 @@ package model import ( "container/heap" + "context" "fmt" "log/slog" "strconv" "strings" + + "github.com/ollama/ollama/logutil" ) const spmWhitespaceSep = "▁" @@ -22,7 +25,7 @@ func (spm SentencePieceModel) Vocabulary() *Vocabulary { } func NewSentencePieceModel(vocab *Vocabulary) SentencePieceModel { - slog.Debug("Tokens", "num tokens", len(vocab.Values), "vals", vocab.Values[:5], "scores", vocab.Scores[:5], "types", vocab.Types[:5]) + slog.Log(context.TODO(), logutil.LevelTrace, "Tokens", "num tokens", len(vocab.Values), "vals", vocab.Values[:5], "scores", vocab.Scores[:5], "types", vocab.Types[:5]) counter := map[int]int{} var maxTokenLen int @@ -36,7 +39,7 @@ func NewSentencePieceModel(vocab *Vocabulary) SentencePieceModel { } } - slog.Debug("Token counts", "normal", counter[TOKEN_TYPE_NORMAL], "unknown", counter[TOKEN_TYPE_UNKNOWN], "control", counter[TOKEN_TYPE_CONTROL], + slog.Log(context.TODO(), logutil.LevelTrace, "Token counts", "normal", counter[TOKEN_TYPE_NORMAL], "unknown", counter[TOKEN_TYPE_UNKNOWN], "control", counter[TOKEN_TYPE_CONTROL], "user defined", counter[TOKEN_TYPE_USER_DEFINED], "unused", counter[TOKEN_TYPE_UNUSED], "byte", counter[TOKEN_TYPE_BYTE], "max token len", maxTokenLen) @@ -199,6 +202,7 @@ func (spm SentencePieceModel) Encode(s string, addSpecial bool) ([]int32, error) } } + slog.Log(context.TODO(), logutil.LevelTrace, "encoded", "ids", ids) return ids, nil } @@ -257,5 +261,6 @@ func (spm SentencePieceModel) Decode(ids []int32) (string, error) { } } + slog.Log(context.TODO(), logutil.LevelTrace, "decoded", "string", sb.String()) return sb.String(), nil } diff --git a/runner/llamarunner/runner.go b/runner/llamarunner/runner.go index 5341d4fb1..73e50ee0b 100644 --- a/runner/llamarunner/runner.go +++ b/runner/llamarunner/runner.go @@ -11,7 +11,6 @@ import ( "net" "net/http" "os" - "path/filepath" "regexp" "runtime" "strconv" @@ -23,8 +22,10 @@ import ( "golang.org/x/sync/semaphore" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/llama" "github.com/ollama/ollama/llm" + "github.com/ollama/ollama/logutil" "github.com/ollama/ollama/runner/common" ) @@ -680,8 +681,6 @@ func (s *Server) embeddings(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") - slog.Debug("embedding request", "content", req.Content) - seq, err := s.NewSequence(req.Content, nil, NewSequenceParams{embedding: true}) if err != nil { http.Error(w, fmt.Sprintf("Failed to create new sequence: %v", err), http.StatusInternalServerError) @@ -815,7 +814,7 @@ func Execute(args []string) error { kvCacheType := fs.String("kv-cache-type", "", "quantization type for KV cache (default: f16)") port := fs.Int("port", 8080, "Port to expose the server on") threads := fs.Int("threads", runtime.NumCPU(), "Number of threads to use during generation") - verbose := fs.Bool("verbose", false, "verbose output (default: disabled)") + _ = fs.Bool("verbose", false, "verbose output (default: disabled)") noMmap := fs.Bool("no-mmap", false, "do not memory-map model (slower load but may reduce pageouts if not using mlock)") tensorSplit := fs.String("tensor-split", "", "fraction of the model to offload to each GPU, comma-separated list of proportions") multiUserCache := fs.Bool("multiuser-cache", false, "optimize input cache algorithm for multiple users") @@ -830,22 +829,7 @@ func Execute(args []string) error { if err := fs.Parse(args); err != nil { return err } - level := slog.LevelInfo - if *verbose { - level = slog.LevelDebug - } - handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{ - Level: level, - AddSource: true, - ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr { - if attr.Key == slog.SourceKey { - source := attr.Value.Any().(*slog.Source) - source.File = filepath.Base(source.File) - } - return attr - }, - }) - slog.SetDefault(slog.New(handler)) + slog.SetDefault(logutil.NewLogger(os.Stderr, envconfig.LogLevel())) slog.Info("starting go runner") llama.BackendInit() diff --git a/runner/ollamarunner/runner.go b/runner/ollamarunner/runner.go index b028a7216..9a5222236 100644 --- a/runner/ollamarunner/runner.go +++ b/runner/ollamarunner/runner.go @@ -12,7 +12,6 @@ import ( "net" "net/http" "os" - "path/filepath" "regexp" "runtime" "strconv" @@ -24,7 +23,9 @@ import ( "golang.org/x/sync/semaphore" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/llm" + "github.com/ollama/ollama/logutil" "github.com/ollama/ollama/ml" "github.com/ollama/ollama/model" "github.com/ollama/ollama/model/input" @@ -816,7 +817,7 @@ func Execute(args []string) error { kvCacheType := fs.String("kv-cache-type", "", "quantization type for KV cache (default: f16)") port := fs.Int("port", 8080, "Port to expose the server on") threads := fs.Int("threads", runtime.NumCPU(), "Number of threads to use during generation") - verbose := fs.Bool("verbose", false, "verbose output (default: disabled)") + _ = fs.Bool("verbose", false, "verbose output (default: disabled)") _ = fs.Bool("no-mmap", false, "do not memory-map model (slower load but may reduce pageouts if not using mlock)") tensorSplit := fs.String("tensor-split", "", "fraction of the model to offload to each GPU, comma-separated list of proportions") multiUserCache := fs.Bool("multiuser-cache", false, "optimize input cache algorithm for multiple users") @@ -831,22 +832,7 @@ func Execute(args []string) error { if err := fs.Parse(args); err != nil { return err } - level := slog.LevelInfo - if *verbose { - level = slog.LevelDebug - } - handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{ - Level: level, - AddSource: true, - ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr { - if attr.Key == slog.SourceKey { - source := attr.Value.Any().(*slog.Source) - source.File = filepath.Base(source.File) - } - return attr - }, - }) - slog.SetDefault(slog.New(handler)) + slog.SetDefault(logutil.NewLogger(os.Stderr, envconfig.LogLevel())) slog.Info("starting ollama engine") server := &Server{ diff --git a/server/routes.go b/server/routes.go index 8b0c7aca5..f1706f74b 100644 --- a/server/routes.go +++ b/server/routes.go @@ -17,7 +17,6 @@ import ( "net/netip" "os" "os/signal" - "path/filepath" "regexp" "slices" "strings" @@ -33,6 +32,7 @@ import ( "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/fs/ggml" "github.com/ollama/ollama/llm" + "github.com/ollama/ollama/logutil" "github.com/ollama/ollama/model/models/mllama" "github.com/ollama/ollama/openai" "github.com/ollama/ollama/server/internal/client/ollama" @@ -295,8 +295,6 @@ func (s *Server) GenerateHandler(c *gin.Context) { prompt = b.String() } - slog.Debug("generate request", "images", len(images), "prompt", prompt) - ch := make(chan any) go func() { // TODO (jmorganca): avoid building the response twice both here and below @@ -1226,26 +1224,8 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) { } func Serve(ln net.Listener) error { - level := slog.LevelInfo - if envconfig.Debug() { - level = slog.LevelDebug - } - + slog.SetDefault(logutil.NewLogger(os.Stderr, envconfig.LogLevel())) slog.Info("server config", "env", envconfig.Values()) - handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{ - Level: level, - AddSource: true, - ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr { - if attr.Key == slog.SourceKey { - source := attr.Value.Any().(*slog.Source) - source.File = filepath.Base(source.File) - } - - return attr - }, - }) - - slog.SetDefault(slog.New(handler)) blobsDir, err := GetBlobsPath("") if err != nil { @@ -1521,8 +1501,6 @@ func (s *Server) ChatHandler(c *gin.Context) { return } - slog.Debug("chat request", "images", len(images), "prompt", prompt) - ch := make(chan any) go func() { defer close(ch)