parent
82a9e9462a
commit
f95a1f2bef
@ -4,20 +4,14 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
|
"github.com/ollama/ollama/logutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
func InitLogging() {
|
func InitLogging() {
|
||||||
level := slog.LevelInfo
|
|
||||||
|
|
||||||
if envconfig.Debug() {
|
|
||||||
level = slog.LevelDebug
|
|
||||||
}
|
|
||||||
|
|
||||||
var logFile *os.File
|
var logFile *os.File
|
||||||
var err error
|
var err error
|
||||||
// Detect if we're a GUI app on windows, and if not, send logs to console
|
// Detect if we're a GUI app on windows, and if not, send logs to console
|
||||||
@ -33,20 +27,8 @@ func InitLogging() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
handler := slog.NewTextHandler(logFile, &slog.HandlerOptions{
|
|
||||||
Level: level,
|
|
||||||
AddSource: true,
|
|
||||||
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
|
|
||||||
if attr.Key == slog.SourceKey {
|
|
||||||
source := attr.Value.Any().(*slog.Source)
|
|
||||||
source.File = filepath.Base(source.File)
|
|
||||||
}
|
|
||||||
return attr
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
slog.SetDefault(slog.New(handler))
|
|
||||||
|
|
||||||
|
slog.SetDefault(logutil.NewLogger(logFile, envconfig.LogLevel()))
|
||||||
slog.Info("ollama app started")
|
slog.Info("ollama app started")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -670,7 +670,7 @@ func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, e
|
|||||||
}
|
}
|
||||||
|
|
||||||
func getVerboseState() C.uint16_t {
|
func getVerboseState() C.uint16_t {
|
||||||
if envconfig.Debug() {
|
if envconfig.LogLevel() < slog.LevelInfo {
|
||||||
return C.uint16_t(1)
|
return C.uint16_t(1)
|
||||||
}
|
}
|
||||||
return C.uint16_t(0)
|
return C.uint16_t(0)
|
||||||
|
@ -149,9 +149,22 @@ func Bool(k string) func() bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LogLevel returns the log level for the application.
|
||||||
|
// Values are 0 or false INFO (Default), 1 or true DEBUG, 2 TRACE
|
||||||
|
func LogLevel() slog.Level {
|
||||||
|
level := slog.LevelInfo
|
||||||
|
if s := Var("OLLAMA_DEBUG"); s != "" {
|
||||||
|
if b, _ := strconv.ParseBool(s); b {
|
||||||
|
level = slog.LevelDebug
|
||||||
|
} else if i, _ := strconv.ParseInt(s, 10, 64); i != 0 {
|
||||||
|
level = slog.Level(i * -4)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return level
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// Debug enabled additional debug information.
|
|
||||||
Debug = Bool("OLLAMA_DEBUG")
|
|
||||||
// FlashAttention enables the experimental flash attention feature.
|
// FlashAttention enables the experimental flash attention feature.
|
||||||
FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
|
FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
|
||||||
// KvCacheType is the quantization type for the K/V cache.
|
// KvCacheType is the quantization type for the K/V cache.
|
||||||
@ -236,7 +249,7 @@ type EnvVar struct {
|
|||||||
|
|
||||||
func AsMap() map[string]EnvVar {
|
func AsMap() map[string]EnvVar {
|
||||||
ret := map[string]EnvVar{
|
ret := map[string]EnvVar{
|
||||||
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
||||||
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
|
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
|
||||||
"OLLAMA_KV_CACHE_TYPE": {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
|
"OLLAMA_KV_CACHE_TYPE": {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
|
||||||
"OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
|
"OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
package envconfig
|
package envconfig
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"log/slog"
|
||||||
"math"
|
"math"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
"github.com/google/go-cmp/cmp"
|
||||||
|
"github.com/ollama/ollama/logutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestHost(t *testing.T) {
|
func TestHost(t *testing.T) {
|
||||||
@ -292,3 +294,34 @@ func TestContextLength(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLogLevel(t *testing.T) {
|
||||||
|
cases := map[string]slog.Level{
|
||||||
|
// Default to INFO
|
||||||
|
"": slog.LevelInfo,
|
||||||
|
"false": slog.LevelInfo,
|
||||||
|
"f": slog.LevelInfo,
|
||||||
|
"0": slog.LevelInfo,
|
||||||
|
|
||||||
|
// True values enable Debug
|
||||||
|
"true": slog.LevelDebug,
|
||||||
|
"t": slog.LevelDebug,
|
||||||
|
|
||||||
|
// Positive values increase verbosity
|
||||||
|
"1": slog.LevelDebug,
|
||||||
|
"2": logutil.LevelTrace,
|
||||||
|
|
||||||
|
// Negative values decrease verbosity
|
||||||
|
"-1": slog.LevelWarn,
|
||||||
|
"-2": slog.LevelError,
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range cases {
|
||||||
|
t.Run(k, func(t *testing.T) {
|
||||||
|
t.Setenv("OLLAMA_DEBUG", k)
|
||||||
|
if i := LogLevel(); i != v {
|
||||||
|
t.Errorf("%s: expected %d, got %d", k, v, i)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -17,6 +17,7 @@ import (
|
|||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@ -30,9 +31,37 @@ import (
|
|||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/fs/ggml"
|
||||||
"github.com/ollama/ollama/llama"
|
"github.com/ollama/ollama/llama"
|
||||||
|
"github.com/ollama/ollama/logutil"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type filteredEnv []string
|
||||||
|
|
||||||
|
func (e filteredEnv) LogValue() slog.Value {
|
||||||
|
var attrs []slog.Attr
|
||||||
|
for _, env := range e {
|
||||||
|
if key, value, ok := strings.Cut(env, "="); ok {
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(key, "OLLAMA_"),
|
||||||
|
strings.HasPrefix(key, "CUDA_"),
|
||||||
|
strings.HasPrefix(key, "ROCR_"),
|
||||||
|
strings.HasPrefix(key, "ROCM_"),
|
||||||
|
strings.HasPrefix(key, "HIP_"),
|
||||||
|
strings.HasPrefix(key, "GPU_"),
|
||||||
|
strings.HasPrefix(key, "HSA_"),
|
||||||
|
strings.HasPrefix(key, "GGML_"),
|
||||||
|
slices.Contains([]string{
|
||||||
|
"PATH",
|
||||||
|
"LD_LIBRARY_PATH",
|
||||||
|
"DYLD_LIBRARY_PATH",
|
||||||
|
}, key):
|
||||||
|
attrs = append(attrs, slog.String(key, value))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return slog.GroupValue(attrs...)
|
||||||
|
}
|
||||||
|
|
||||||
type LlamaServer interface {
|
type LlamaServer interface {
|
||||||
Ping(ctx context.Context) error
|
Ping(ctx context.Context) error
|
||||||
WaitUntilRunning(ctx context.Context) error
|
WaitUntilRunning(ctx context.Context) error
|
||||||
@ -148,10 +177,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
|||||||
params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU))
|
params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU))
|
||||||
}
|
}
|
||||||
|
|
||||||
if envconfig.Debug() {
|
|
||||||
params = append(params, "--verbose")
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.MainGPU > 0 {
|
if opts.MainGPU > 0 {
|
||||||
params = append(params, "--main-gpu", strconv.Itoa(opts.MainGPU))
|
params = append(params, "--main-gpu", strconv.Itoa(opts.MainGPU))
|
||||||
}
|
}
|
||||||
@ -404,26 +429,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
|||||||
}
|
}
|
||||||
|
|
||||||
slog.Info("starting llama server", "cmd", s.cmd)
|
slog.Info("starting llama server", "cmd", s.cmd)
|
||||||
if envconfig.Debug() {
|
slog.Debug("subprocess", "", filteredEnv(s.cmd.Env))
|
||||||
filteredEnv := []string{}
|
|
||||||
for _, ev := range s.cmd.Env {
|
|
||||||
if strings.HasPrefix(ev, "OLLAMA_") ||
|
|
||||||
strings.HasPrefix(ev, "CUDA_") ||
|
|
||||||
strings.HasPrefix(ev, "ROCR_") ||
|
|
||||||
strings.HasPrefix(ev, "ROCM_") ||
|
|
||||||
strings.HasPrefix(ev, "HIP_") ||
|
|
||||||
strings.HasPrefix(ev, "GPU_") ||
|
|
||||||
strings.HasPrefix(ev, "HSA_") ||
|
|
||||||
strings.HasPrefix(ev, "GGML_") ||
|
|
||||||
strings.HasPrefix(ev, "PATH=") ||
|
|
||||||
strings.HasPrefix(ev, "LD_LIBRARY_PATH=") ||
|
|
||||||
strings.HasPrefix(ev, "DYLD_LIBRARY_PATH=") {
|
|
||||||
filteredEnv = append(filteredEnv, ev)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Log at debug as the environment is inherited and might contain sensitive information
|
|
||||||
slog.Debug("subprocess", "environment", filteredEnv)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = s.cmd.Start(); err != nil {
|
if err = s.cmd.Start(); err != nil {
|
||||||
var msg string
|
var msg string
|
||||||
@ -721,6 +727,9 @@ type CompletionResponse struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
|
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
|
||||||
|
slog.Debug("completion request", "images", len(req.Images), "prompt", len(req.Prompt), "format", string(req.Format))
|
||||||
|
slog.Log(ctx, logutil.LevelTrace, "completion request", "prompt", req.Prompt)
|
||||||
|
|
||||||
if len(req.Format) > 0 {
|
if len(req.Format) > 0 {
|
||||||
switch string(req.Format) {
|
switch string(req.Format) {
|
||||||
case `null`, `""`:
|
case `null`, `""`:
|
||||||
@ -884,6 +893,8 @@ type EmbeddingResponse struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *llmServer) Embedding(ctx context.Context, input string) ([]float32, error) {
|
func (s *llmServer) Embedding(ctx context.Context, input string) ([]float32, error) {
|
||||||
|
slog.Log(ctx, logutil.LevelTrace, "embedding request", "input", input)
|
||||||
|
|
||||||
if err := s.sem.Acquire(ctx, 1); err != nil {
|
if err := s.sem.Acquire(ctx, 1); err != nil {
|
||||||
if errors.Is(err, context.Canceled) {
|
if errors.Is(err, context.Canceled) {
|
||||||
slog.Info("aborting embedding request due to client closing the connection")
|
slog.Info("aborting embedding request due to client closing the connection")
|
||||||
|
29
logutil/logutil.go
Normal file
29
logutil/logutil.go
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
package logutil
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"log/slog"
|
||||||
|
"path/filepath"
|
||||||
|
)
|
||||||
|
|
||||||
|
const LevelTrace slog.Level = -8
|
||||||
|
|
||||||
|
func NewLogger(w io.Writer, level slog.Level) *slog.Logger {
|
||||||
|
return slog.New(slog.NewTextHandler(w, &slog.HandlerOptions{
|
||||||
|
Level: level,
|
||||||
|
AddSource: true,
|
||||||
|
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
|
||||||
|
switch attr.Key {
|
||||||
|
case slog.LevelKey:
|
||||||
|
switch attr.Value.Any().(slog.Level) {
|
||||||
|
case LevelTrace:
|
||||||
|
attr.Value = slog.StringValue("TRACE")
|
||||||
|
}
|
||||||
|
case slog.SourceKey:
|
||||||
|
source := attr.Value.Any().(*slog.Source)
|
||||||
|
source.File = filepath.Base(source.File)
|
||||||
|
}
|
||||||
|
return attr
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
}
|
@ -27,6 +27,7 @@ import (
|
|||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/fs"
|
"github.com/ollama/ollama/fs"
|
||||||
fsggml "github.com/ollama/ollama/fs/ggml"
|
fsggml "github.com/ollama/ollama/fs/ggml"
|
||||||
|
"github.com/ollama/ollama/logutil"
|
||||||
"github.com/ollama/ollama/ml"
|
"github.com/ollama/ollama/ml"
|
||||||
ggml "github.com/ollama/ollama/ml/backend/ggml/ggml/src"
|
ggml "github.com/ollama/ollama/ml/backend/ggml/ggml/src"
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
@ -222,7 +223,7 @@ func New(ctx context.Context, r *os.File, params ml.BackendParams) (ml.Backend,
|
|||||||
tt := C.ggml_new_tensor(ctxs[bt], t.source.Kind, C.int(len(t.source.Shape)), (*C.int64_t)(unsafe.Pointer(&t.source.Shape[0])))
|
tt := C.ggml_new_tensor(ctxs[bt], t.source.Kind, C.int(len(t.source.Shape)), (*C.int64_t)(unsafe.Pointer(&t.source.Shape[0])))
|
||||||
C.ggml_set_name(tt, cname)
|
C.ggml_set_name(tt, cname)
|
||||||
|
|
||||||
slog.Debug("created tensor", "name", name, "shape", t.source.Shape, "dtype", t.source.Kind, "buffer_type", C.GoString(C.ggml_backend_buft_name(bt)))
|
slog.Log(context.TODO(), logutil.LevelTrace, "created tensor", "name", name, "shape", t.source.Shape, "dtype", t.source.Kind, "buffer_type", C.GoString(C.ggml_backend_buft_name(bt)))
|
||||||
//nolint:staticcheck // TODO: check if buffer type supports this tensor
|
//nolint:staticcheck // TODO: check if buffer type supports this tensor
|
||||||
return tt
|
return tt
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,7 @@ import (
|
|||||||
"github.com/ollama/ollama/fs"
|
"github.com/ollama/ollama/fs"
|
||||||
fsggml "github.com/ollama/ollama/fs/ggml"
|
fsggml "github.com/ollama/ollama/fs/ggml"
|
||||||
"github.com/ollama/ollama/kvcache"
|
"github.com/ollama/ollama/kvcache"
|
||||||
|
"github.com/ollama/ollama/logutil"
|
||||||
"github.com/ollama/ollama/ml"
|
"github.com/ollama/ollama/ml"
|
||||||
_ "github.com/ollama/ollama/ml/backend"
|
_ "github.com/ollama/ollama/ml/backend"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
@ -202,7 +203,7 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
|||||||
names := fn(tagsCopy)
|
names := fn(tagsCopy)
|
||||||
for _, name := range names {
|
for _, name := range names {
|
||||||
if tensor := base.Backend().Get(strings.Join(name, ".")); tensor != nil {
|
if tensor := base.Backend().Get(strings.Join(name, ".")); tensor != nil {
|
||||||
slog.Debug("found tensor", "", tensor)
|
slog.Log(context.TODO(), logutil.LevelTrace, "found tensor", "", tensor)
|
||||||
vv.Set(reflect.ValueOf(tensor))
|
vv.Set(reflect.ValueOf(tensor))
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@ package model
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"cmp"
|
"cmp"
|
||||||
|
"context"
|
||||||
"iter"
|
"iter"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"slices"
|
"slices"
|
||||||
@ -10,6 +11,7 @@ import (
|
|||||||
|
|
||||||
"github.com/dlclark/regexp2"
|
"github.com/dlclark/regexp2"
|
||||||
heap "github.com/emirpasic/gods/v2/trees/binaryheap"
|
heap "github.com/emirpasic/gods/v2/trees/binaryheap"
|
||||||
|
"github.com/ollama/ollama/logutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Special int32
|
type Special int32
|
||||||
@ -322,6 +324,7 @@ func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
slog.Log(context.TODO(), logutil.LevelTrace, "encoded", "ids", ids)
|
||||||
return ids, nil
|
return ids, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -349,5 +352,6 @@ func (bpe BytePairEncoding) Decode(ids []int32) (string, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
slog.Log(context.TODO(), logutil.LevelTrace, "decoded", "string", sb.String())
|
||||||
return sb.String(), nil
|
return sb.String(), nil
|
||||||
}
|
}
|
||||||
|
@ -2,10 +2,13 @@ package model
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"container/heap"
|
"container/heap"
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/logutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
const spmWhitespaceSep = "▁"
|
const spmWhitespaceSep = "▁"
|
||||||
@ -22,7 +25,7 @@ func (spm SentencePieceModel) Vocabulary() *Vocabulary {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func NewSentencePieceModel(vocab *Vocabulary) SentencePieceModel {
|
func NewSentencePieceModel(vocab *Vocabulary) SentencePieceModel {
|
||||||
slog.Debug("Tokens", "num tokens", len(vocab.Values), "vals", vocab.Values[:5], "scores", vocab.Scores[:5], "types", vocab.Types[:5])
|
slog.Log(context.TODO(), logutil.LevelTrace, "Tokens", "num tokens", len(vocab.Values), "vals", vocab.Values[:5], "scores", vocab.Scores[:5], "types", vocab.Types[:5])
|
||||||
|
|
||||||
counter := map[int]int{}
|
counter := map[int]int{}
|
||||||
var maxTokenLen int
|
var maxTokenLen int
|
||||||
@ -36,7 +39,7 @@ func NewSentencePieceModel(vocab *Vocabulary) SentencePieceModel {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Debug("Token counts", "normal", counter[TOKEN_TYPE_NORMAL], "unknown", counter[TOKEN_TYPE_UNKNOWN], "control", counter[TOKEN_TYPE_CONTROL],
|
slog.Log(context.TODO(), logutil.LevelTrace, "Token counts", "normal", counter[TOKEN_TYPE_NORMAL], "unknown", counter[TOKEN_TYPE_UNKNOWN], "control", counter[TOKEN_TYPE_CONTROL],
|
||||||
"user defined", counter[TOKEN_TYPE_USER_DEFINED], "unused", counter[TOKEN_TYPE_UNUSED], "byte", counter[TOKEN_TYPE_BYTE],
|
"user defined", counter[TOKEN_TYPE_USER_DEFINED], "unused", counter[TOKEN_TYPE_UNUSED], "byte", counter[TOKEN_TYPE_BYTE],
|
||||||
"max token len", maxTokenLen)
|
"max token len", maxTokenLen)
|
||||||
|
|
||||||
@ -199,6 +202,7 @@ func (spm SentencePieceModel) Encode(s string, addSpecial bool) ([]int32, error)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
slog.Log(context.TODO(), logutil.LevelTrace, "encoded", "ids", ids)
|
||||||
return ids, nil
|
return ids, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -257,5 +261,6 @@ func (spm SentencePieceModel) Decode(ids []int32) (string, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
slog.Log(context.TODO(), logutil.LevelTrace, "decoded", "string", sb.String())
|
||||||
return sb.String(), nil
|
return sb.String(), nil
|
||||||
}
|
}
|
||||||
|
@ -11,7 +11,6 @@ import (
|
|||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
|
||||||
"regexp"
|
"regexp"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
@ -23,8 +22,10 @@ import (
|
|||||||
"golang.org/x/sync/semaphore"
|
"golang.org/x/sync/semaphore"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/llama"
|
"github.com/ollama/ollama/llama"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
|
"github.com/ollama/ollama/logutil"
|
||||||
"github.com/ollama/ollama/runner/common"
|
"github.com/ollama/ollama/runner/common"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -680,8 +681,6 @@ func (s *Server) embeddings(w http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
|
||||||
slog.Debug("embedding request", "content", req.Content)
|
|
||||||
|
|
||||||
seq, err := s.NewSequence(req.Content, nil, NewSequenceParams{embedding: true})
|
seq, err := s.NewSequence(req.Content, nil, NewSequenceParams{embedding: true})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, fmt.Sprintf("Failed to create new sequence: %v", err), http.StatusInternalServerError)
|
http.Error(w, fmt.Sprintf("Failed to create new sequence: %v", err), http.StatusInternalServerError)
|
||||||
@ -815,7 +814,7 @@ func Execute(args []string) error {
|
|||||||
kvCacheType := fs.String("kv-cache-type", "", "quantization type for KV cache (default: f16)")
|
kvCacheType := fs.String("kv-cache-type", "", "quantization type for KV cache (default: f16)")
|
||||||
port := fs.Int("port", 8080, "Port to expose the server on")
|
port := fs.Int("port", 8080, "Port to expose the server on")
|
||||||
threads := fs.Int("threads", runtime.NumCPU(), "Number of threads to use during generation")
|
threads := fs.Int("threads", runtime.NumCPU(), "Number of threads to use during generation")
|
||||||
verbose := fs.Bool("verbose", false, "verbose output (default: disabled)")
|
_ = fs.Bool("verbose", false, "verbose output (default: disabled)")
|
||||||
noMmap := fs.Bool("no-mmap", false, "do not memory-map model (slower load but may reduce pageouts if not using mlock)")
|
noMmap := fs.Bool("no-mmap", false, "do not memory-map model (slower load but may reduce pageouts if not using mlock)")
|
||||||
tensorSplit := fs.String("tensor-split", "", "fraction of the model to offload to each GPU, comma-separated list of proportions")
|
tensorSplit := fs.String("tensor-split", "", "fraction of the model to offload to each GPU, comma-separated list of proportions")
|
||||||
multiUserCache := fs.Bool("multiuser-cache", false, "optimize input cache algorithm for multiple users")
|
multiUserCache := fs.Bool("multiuser-cache", false, "optimize input cache algorithm for multiple users")
|
||||||
@ -830,22 +829,7 @@ func Execute(args []string) error {
|
|||||||
if err := fs.Parse(args); err != nil {
|
if err := fs.Parse(args); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
level := slog.LevelInfo
|
slog.SetDefault(logutil.NewLogger(os.Stderr, envconfig.LogLevel()))
|
||||||
if *verbose {
|
|
||||||
level = slog.LevelDebug
|
|
||||||
}
|
|
||||||
handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
|
||||||
Level: level,
|
|
||||||
AddSource: true,
|
|
||||||
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
|
|
||||||
if attr.Key == slog.SourceKey {
|
|
||||||
source := attr.Value.Any().(*slog.Source)
|
|
||||||
source.File = filepath.Base(source.File)
|
|
||||||
}
|
|
||||||
return attr
|
|
||||||
},
|
|
||||||
})
|
|
||||||
slog.SetDefault(slog.New(handler))
|
|
||||||
slog.Info("starting go runner")
|
slog.Info("starting go runner")
|
||||||
|
|
||||||
llama.BackendInit()
|
llama.BackendInit()
|
||||||
|
@ -12,7 +12,6 @@ import (
|
|||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
|
||||||
"regexp"
|
"regexp"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
@ -24,7 +23,9 @@ import (
|
|||||||
"golang.org/x/sync/semaphore"
|
"golang.org/x/sync/semaphore"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
|
"github.com/ollama/ollama/logutil"
|
||||||
"github.com/ollama/ollama/ml"
|
"github.com/ollama/ollama/ml"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
@ -816,7 +817,7 @@ func Execute(args []string) error {
|
|||||||
kvCacheType := fs.String("kv-cache-type", "", "quantization type for KV cache (default: f16)")
|
kvCacheType := fs.String("kv-cache-type", "", "quantization type for KV cache (default: f16)")
|
||||||
port := fs.Int("port", 8080, "Port to expose the server on")
|
port := fs.Int("port", 8080, "Port to expose the server on")
|
||||||
threads := fs.Int("threads", runtime.NumCPU(), "Number of threads to use during generation")
|
threads := fs.Int("threads", runtime.NumCPU(), "Number of threads to use during generation")
|
||||||
verbose := fs.Bool("verbose", false, "verbose output (default: disabled)")
|
_ = fs.Bool("verbose", false, "verbose output (default: disabled)")
|
||||||
_ = fs.Bool("no-mmap", false, "do not memory-map model (slower load but may reduce pageouts if not using mlock)")
|
_ = fs.Bool("no-mmap", false, "do not memory-map model (slower load but may reduce pageouts if not using mlock)")
|
||||||
tensorSplit := fs.String("tensor-split", "", "fraction of the model to offload to each GPU, comma-separated list of proportions")
|
tensorSplit := fs.String("tensor-split", "", "fraction of the model to offload to each GPU, comma-separated list of proportions")
|
||||||
multiUserCache := fs.Bool("multiuser-cache", false, "optimize input cache algorithm for multiple users")
|
multiUserCache := fs.Bool("multiuser-cache", false, "optimize input cache algorithm for multiple users")
|
||||||
@ -831,22 +832,7 @@ func Execute(args []string) error {
|
|||||||
if err := fs.Parse(args); err != nil {
|
if err := fs.Parse(args); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
level := slog.LevelInfo
|
slog.SetDefault(logutil.NewLogger(os.Stderr, envconfig.LogLevel()))
|
||||||
if *verbose {
|
|
||||||
level = slog.LevelDebug
|
|
||||||
}
|
|
||||||
handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
|
||||||
Level: level,
|
|
||||||
AddSource: true,
|
|
||||||
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
|
|
||||||
if attr.Key == slog.SourceKey {
|
|
||||||
source := attr.Value.Any().(*slog.Source)
|
|
||||||
source.File = filepath.Base(source.File)
|
|
||||||
}
|
|
||||||
return attr
|
|
||||||
},
|
|
||||||
})
|
|
||||||
slog.SetDefault(slog.New(handler))
|
|
||||||
slog.Info("starting ollama engine")
|
slog.Info("starting ollama engine")
|
||||||
|
|
||||||
server := &Server{
|
server := &Server{
|
||||||
|
@ -17,7 +17,6 @@ import (
|
|||||||
"net/netip"
|
"net/netip"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"path/filepath"
|
|
||||||
"regexp"
|
"regexp"
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
@ -33,6 +32,7 @@ import (
|
|||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/fs/ggml"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
|
"github.com/ollama/ollama/logutil"
|
||||||
"github.com/ollama/ollama/model/models/mllama"
|
"github.com/ollama/ollama/model/models/mllama"
|
||||||
"github.com/ollama/ollama/openai"
|
"github.com/ollama/ollama/openai"
|
||||||
"github.com/ollama/ollama/server/internal/client/ollama"
|
"github.com/ollama/ollama/server/internal/client/ollama"
|
||||||
@ -295,8 +295,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
prompt = b.String()
|
prompt = b.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Debug("generate request", "images", len(images), "prompt", prompt)
|
|
||||||
|
|
||||||
ch := make(chan any)
|
ch := make(chan any)
|
||||||
go func() {
|
go func() {
|
||||||
// TODO (jmorganca): avoid building the response twice both here and below
|
// TODO (jmorganca): avoid building the response twice both here and below
|
||||||
@ -1226,26 +1224,8 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func Serve(ln net.Listener) error {
|
func Serve(ln net.Listener) error {
|
||||||
level := slog.LevelInfo
|
slog.SetDefault(logutil.NewLogger(os.Stderr, envconfig.LogLevel()))
|
||||||
if envconfig.Debug() {
|
|
||||||
level = slog.LevelDebug
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Info("server config", "env", envconfig.Values())
|
slog.Info("server config", "env", envconfig.Values())
|
||||||
handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
|
||||||
Level: level,
|
|
||||||
AddSource: true,
|
|
||||||
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
|
|
||||||
if attr.Key == slog.SourceKey {
|
|
||||||
source := attr.Value.Any().(*slog.Source)
|
|
||||||
source.File = filepath.Base(source.File)
|
|
||||||
}
|
|
||||||
|
|
||||||
return attr
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
slog.SetDefault(slog.New(handler))
|
|
||||||
|
|
||||||
blobsDir, err := GetBlobsPath("")
|
blobsDir, err := GetBlobsPath("")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -1521,8 +1501,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Debug("chat request", "images", len(images), "prompt", prompt)
|
|
||||||
|
|
||||||
ch := make(chan any)
|
ch := make(chan any)
|
||||||
go func() {
|
go func() {
|
||||||
defer close(ch)
|
defer close(ch)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user