no runners
This commit is contained in:
parent
cef3cf353a
commit
67bcb55941
@ -719,23 +719,18 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
|||||||
func LibraryDirs() []string {
|
func LibraryDirs() []string {
|
||||||
// dependencies can exist wherever we found the runners (e.g. build tree for developers) and relative to the executable
|
// dependencies can exist wherever we found the runners (e.g. build tree for developers) and relative to the executable
|
||||||
// This can be simplified once we no longer carry runners as payloads
|
// This can be simplified once we no longer carry runners as payloads
|
||||||
paths := []string{}
|
exe, err := os.Executable()
|
||||||
appExe, err := os.Executable()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("failed to lookup executable path", "error", err)
|
slog.Warn("failed to lookup executable path", "error", err)
|
||||||
} else {
|
return nil
|
||||||
appRelative := filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe(), "lib", "ollama")
|
|
||||||
if _, err := os.Stat(appRelative); err == nil {
|
|
||||||
paths = append(paths, appRelative)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
rDir := runners.Locate()
|
|
||||||
if err != nil {
|
lib := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama")
|
||||||
slog.Warn("unable to locate gpu dependency libraries", "error", err)
|
if _, err := os.Stat(lib); err != nil {
|
||||||
} else {
|
return nil
|
||||||
paths = append(paths, filepath.Dir(rDir))
|
|
||||||
}
|
}
|
||||||
return paths
|
|
||||||
|
return []string{lib}
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetSystemInfo() SystemInfo {
|
func GetSystemInfo() SystemInfo {
|
||||||
|
345
llm/server.go
345
llm/server.go
@ -30,7 +30,6 @@ import (
|
|||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/fs/ggml"
|
||||||
"github.com/ollama/ollama/llama"
|
"github.com/ollama/ollama/llama"
|
||||||
"github.com/ollama/ollama/runners"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type LlamaServer interface {
|
type LlamaServer interface {
|
||||||
@ -91,25 +90,19 @@ func LoadModel(model string, maxArraySize int) (*ggml.GGML, error) {
|
|||||||
// NewLlamaServer will run a server for the given GPUs
|
// NewLlamaServer will run a server for the given GPUs
|
||||||
// The gpu list must be a single family.
|
// The gpu list must be a single family.
|
||||||
func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapters, projectors []string, opts api.Options, numParallel int) (LlamaServer, error) {
|
func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapters, projectors []string, opts api.Options, numParallel int) (LlamaServer, error) {
|
||||||
var err error
|
|
||||||
var cpuRunner string
|
|
||||||
var estimate MemoryEstimate
|
|
||||||
var systemTotalMemory uint64
|
|
||||||
var systemFreeMemory uint64
|
|
||||||
var systemSwapFreeMemory uint64
|
|
||||||
|
|
||||||
systemInfo := discover.GetSystemInfo()
|
systemInfo := discover.GetSystemInfo()
|
||||||
systemTotalMemory = systemInfo.System.TotalMemory
|
systemTotalMemory := systemInfo.System.TotalMemory
|
||||||
systemFreeMemory = systemInfo.System.FreeMemory
|
systemFreeMemory := systemInfo.System.FreeMemory
|
||||||
systemSwapFreeMemory = systemInfo.System.FreeSwap
|
systemSwapFreeMemory := systemInfo.System.FreeSwap
|
||||||
slog.Info("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory))
|
slog.Info("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory))
|
||||||
|
|
||||||
// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
|
// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
|
||||||
if opts.NumGPU == 0 {
|
if opts.NumGPU == 0 {
|
||||||
gpus = discover.GetCPUInfo()
|
gpus = discover.GetCPUInfo()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var estimate MemoryEstimate
|
||||||
if len(gpus) == 1 && gpus[0].Library == "cpu" {
|
if len(gpus) == 1 && gpus[0].Library == "cpu" {
|
||||||
cpuRunner = runners.ServerForCpu()
|
|
||||||
estimate = EstimateGPULayers(gpus, f, projectors, opts)
|
estimate = EstimateGPULayers(gpus, f, projectors, opts)
|
||||||
} else {
|
} else {
|
||||||
estimate = EstimateGPULayers(gpus, f, projectors, opts)
|
estimate = EstimateGPULayers(gpus, f, projectors, opts)
|
||||||
@ -121,7 +114,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
|
|||||||
opts.NumGPU = 0
|
opts.NumGPU = 0
|
||||||
case gpus[0].Library != "metal" && estimate.Layers == 0:
|
case gpus[0].Library != "metal" && estimate.Layers == 0:
|
||||||
// Don't bother loading into the GPU if no layers can fit
|
// Don't bother loading into the GPU if no layers can fit
|
||||||
cpuRunner = runners.ServerForCpu()
|
|
||||||
gpus = discover.GetCPUInfo()
|
gpus = discover.GetCPUInfo()
|
||||||
case opts.NumGPU < 0 && estimate.Layers > 0 && gpus[0].Library != "cpu":
|
case opts.NumGPU < 0 && estimate.Layers > 0 && gpus[0].Library != "cpu":
|
||||||
opts.NumGPU = estimate.Layers
|
opts.NumGPU = estimate.Layers
|
||||||
@ -141,36 +133,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
|
|||||||
|
|
||||||
slog.Info("offload", "", estimate)
|
slog.Info("offload", "", estimate)
|
||||||
|
|
||||||
// Loop through potential servers
|
|
||||||
finalErr := errors.New("no suitable llama servers found")
|
|
||||||
|
|
||||||
availableServers := runners.GetAvailableServers()
|
|
||||||
|
|
||||||
var servers []string
|
|
||||||
if cpuRunner != "" {
|
|
||||||
servers = []string{cpuRunner}
|
|
||||||
} else {
|
|
||||||
servers = runners.ServersForGpu(gpus[0].RunnerName()) // All GPUs in the list are matching Library and Variant
|
|
||||||
}
|
|
||||||
demandLib := envconfig.LLMLibrary()
|
|
||||||
if demandLib != "" {
|
|
||||||
serverPath := availableServers[demandLib]
|
|
||||||
if serverPath == "" {
|
|
||||||
slog.Info(fmt.Sprintf("Invalid OLLAMA_LLM_LIBRARY %s - not found", demandLib))
|
|
||||||
} else {
|
|
||||||
slog.Info("user override", "OLLAMA_LLM_LIBRARY", demandLib, "path", serverPath)
|
|
||||||
servers = []string{demandLib}
|
|
||||||
if strings.HasPrefix(demandLib, "cpu") || (!(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") && demandLib == runners.BuiltinName()) {
|
|
||||||
// Omit the GPU flag to silence the warning
|
|
||||||
opts.NumGPU = -1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(servers) == 0 {
|
|
||||||
return nil, fmt.Errorf("no servers found for %v", gpus)
|
|
||||||
}
|
|
||||||
|
|
||||||
params := []string{
|
params := []string{
|
||||||
"--model", model,
|
"--model", model,
|
||||||
"--ctx-size", strconv.Itoa(opts.NumCtx),
|
"--ctx-size", strconv.Itoa(opts.NumCtx),
|
||||||
@ -271,164 +233,149 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, f *ggml.GGML, adapt
|
|||||||
params = append(params, "--multiuser-cache")
|
params = append(params, "--multiuser-cache")
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := range servers {
|
exe, err := os.Executable()
|
||||||
builtin := servers[i] == runners.BuiltinName()
|
if err != nil {
|
||||||
server := availableServers[servers[i]]
|
return nil, err
|
||||||
if server == "" {
|
|
||||||
// Shouldn't happen
|
|
||||||
finalErr = fmt.Errorf("[%d] server %s not listed in available servers %v", i, servers[i], availableServers)
|
|
||||||
slog.Error("server list inconsistent", "error", finalErr)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if strings.HasPrefix(servers[i], "cpu") || (builtin && !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64")) {
|
|
||||||
gpus = discover.GetCPUInfo()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find an availableServers port, retry on each iteration in case the failure was a port conflict race
|
|
||||||
port := 0
|
|
||||||
if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil {
|
|
||||||
var l *net.TCPListener
|
|
||||||
if l, err = net.ListenTCP("tcp", a); err == nil {
|
|
||||||
port = l.Addr().(*net.TCPAddr).Port
|
|
||||||
l.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if port == 0 {
|
|
||||||
slog.Debug("ResolveTCPAddr failed ", "error", err)
|
|
||||||
port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
|
|
||||||
}
|
|
||||||
finalParams := []string{"runner"}
|
|
||||||
finalParams = append(finalParams, params...)
|
|
||||||
finalParams = append(finalParams, "--port", strconv.Itoa(port))
|
|
||||||
|
|
||||||
pathEnv := "LD_LIBRARY_PATH"
|
|
||||||
if runtime.GOOS == "windows" {
|
|
||||||
pathEnv = "PATH"
|
|
||||||
}
|
|
||||||
// Start with the server directory for the LD_LIBRARY_PATH/PATH
|
|
||||||
libraryPaths := []string{filepath.Dir(server)}
|
|
||||||
|
|
||||||
if libraryPath, ok := os.LookupEnv(pathEnv); ok {
|
|
||||||
// favor our bundled library dependencies over system libraries
|
|
||||||
libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Note: we always put the dependency path first
|
|
||||||
// since this was the exact version we compiled/linked against
|
|
||||||
if gpus[0].DependencyPath != nil {
|
|
||||||
// assume gpus from the same library have the same dependency path
|
|
||||||
libraryPaths = append(gpus[0].DependencyPath, libraryPaths...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO - once fully switched to the Go runner, load the model here for tokenize/detokenize cgo access
|
|
||||||
s := &llmServer{
|
|
||||||
port: port,
|
|
||||||
cmd: exec.Command(server, finalParams...),
|
|
||||||
status: NewStatusWriter(os.Stderr),
|
|
||||||
options: opts,
|
|
||||||
modelPath: model,
|
|
||||||
estimate: estimate,
|
|
||||||
numParallel: numParallel,
|
|
||||||
sem: semaphore.NewWeighted(int64(numParallel)),
|
|
||||||
totalLayers: f.KV().BlockCount() + 1,
|
|
||||||
gpus: gpus,
|
|
||||||
done: make(chan error, 1),
|
|
||||||
}
|
|
||||||
|
|
||||||
s.cmd.Env = os.Environ()
|
|
||||||
s.cmd.Stdout = os.Stdout
|
|
||||||
s.cmd.Stderr = s.status
|
|
||||||
s.cmd.SysProcAttr = LlamaServerSysProcAttr
|
|
||||||
|
|
||||||
envWorkarounds := [][2]string{}
|
|
||||||
for _, gpu := range gpus {
|
|
||||||
envWorkarounds = append(envWorkarounds, gpu.EnvWorkarounds...)
|
|
||||||
}
|
|
||||||
visibleDevicesEnv, visibleDevicesEnvVal := gpus.GetVisibleDevicesEnv()
|
|
||||||
pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
|
|
||||||
|
|
||||||
// Update or add the path and visible devices variable with our adjusted version
|
|
||||||
pathNeeded := true
|
|
||||||
devicesNeeded := visibleDevicesEnv != ""
|
|
||||||
for i := range s.cmd.Env {
|
|
||||||
cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
|
|
||||||
if strings.EqualFold(cmp[0], pathEnv) {
|
|
||||||
s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
|
|
||||||
pathNeeded = false
|
|
||||||
} else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) {
|
|
||||||
s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal
|
|
||||||
devicesNeeded = false
|
|
||||||
} else if len(envWorkarounds) != 0 {
|
|
||||||
for _, kv := range envWorkarounds {
|
|
||||||
if strings.EqualFold(cmp[0], kv[0]) {
|
|
||||||
s.cmd.Env[i] = kv[0] + "=" + kv[1]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if pathNeeded {
|
|
||||||
s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
|
|
||||||
}
|
|
||||||
if devicesNeeded {
|
|
||||||
s.cmd.Env = append(s.cmd.Env, visibleDevicesEnv+"="+visibleDevicesEnvVal)
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Info("starting llama server", "cmd", s.cmd.String())
|
|
||||||
if envconfig.Debug() {
|
|
||||||
filteredEnv := []string{}
|
|
||||||
for _, ev := range s.cmd.Env {
|
|
||||||
if strings.HasPrefix(ev, "CUDA_") ||
|
|
||||||
strings.HasPrefix(ev, "ROCR_") ||
|
|
||||||
strings.HasPrefix(ev, "ROCM_") ||
|
|
||||||
strings.HasPrefix(ev, "HIP_") ||
|
|
||||||
strings.HasPrefix(ev, "GPU_") ||
|
|
||||||
strings.HasPrefix(ev, "HSA_") ||
|
|
||||||
strings.HasPrefix(ev, "GGML_") ||
|
|
||||||
strings.HasPrefix(ev, "PATH=") ||
|
|
||||||
strings.HasPrefix(ev, "LD_LIBRARY_PATH=") {
|
|
||||||
filteredEnv = append(filteredEnv, ev)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Log at debug as the environment is inherited and might contain sensitive information
|
|
||||||
slog.Debug("subprocess", "environment", filteredEnv)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = s.cmd.Start(); err != nil {
|
|
||||||
// Detect permission denied and augment the message about noexec
|
|
||||||
if errors.Is(err, os.ErrPermission) {
|
|
||||||
finalErr = fmt.Errorf("unable to start server %w. %s may have noexec set. Set OLLAMA_TMPDIR for server to a writable executable directory", err, server)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
msg := ""
|
|
||||||
if s.status != nil && s.status.LastErrMsg != "" {
|
|
||||||
msg = s.status.LastErrMsg
|
|
||||||
}
|
|
||||||
err = fmt.Errorf("error starting the external llama server: %v %s", err, msg)
|
|
||||||
finalErr = err
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// reap subprocess when it exits
|
|
||||||
go func() {
|
|
||||||
err := s.cmd.Wait()
|
|
||||||
// Favor a more detailed message over the process exit status
|
|
||||||
if err != nil && s.status != nil && s.status.LastErrMsg != "" {
|
|
||||||
slog.Debug("llama runner terminated", "error", err)
|
|
||||||
if strings.Contains(s.status.LastErrMsg, "unknown model") {
|
|
||||||
s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
|
|
||||||
}
|
|
||||||
s.done <- errors.New(s.status.LastErrMsg)
|
|
||||||
} else {
|
|
||||||
s.done <- err
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
return s, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Error("unable to load any llama server", "error", finalErr)
|
// Find an availableServers port, retry on each iteration in case the failure was a port conflict race
|
||||||
return nil, finalErr
|
port := 0
|
||||||
|
if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil {
|
||||||
|
var l *net.TCPListener
|
||||||
|
if l, err = net.ListenTCP("tcp", a); err == nil {
|
||||||
|
port = l.Addr().(*net.TCPAddr).Port
|
||||||
|
l.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if port == 0 {
|
||||||
|
slog.Debug("ResolveTCPAddr failed ", "error", err)
|
||||||
|
port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
|
||||||
|
}
|
||||||
|
finalParams := []string{"runner"}
|
||||||
|
finalParams = append(finalParams, params...)
|
||||||
|
finalParams = append(finalParams, "--port", strconv.Itoa(port))
|
||||||
|
|
||||||
|
pathEnv := "LD_LIBRARY_PATH"
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
pathEnv = "PATH"
|
||||||
|
}
|
||||||
|
// Start with the server directory for the LD_LIBRARY_PATH/PATH
|
||||||
|
libraryPaths := []string{filepath.Dir(exe)}
|
||||||
|
|
||||||
|
if libraryPath, ok := os.LookupEnv(pathEnv); ok {
|
||||||
|
// favor our bundled library dependencies over system libraries
|
||||||
|
libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: we always put the dependency path first
|
||||||
|
// since this was the exact version we compiled/linked against
|
||||||
|
if gpus[0].DependencyPath != nil {
|
||||||
|
// assume gpus from the same library have the same dependency path
|
||||||
|
libraryPaths = append(gpus[0].DependencyPath, libraryPaths...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO - once fully switched to the Go runner, load the model here for tokenize/detokenize cgo access
|
||||||
|
s := &llmServer{
|
||||||
|
port: port,
|
||||||
|
cmd: exec.Command(exe, finalParams...),
|
||||||
|
status: NewStatusWriter(os.Stderr),
|
||||||
|
options: opts,
|
||||||
|
modelPath: model,
|
||||||
|
estimate: estimate,
|
||||||
|
numParallel: numParallel,
|
||||||
|
sem: semaphore.NewWeighted(int64(numParallel)),
|
||||||
|
totalLayers: f.KV().BlockCount() + 1,
|
||||||
|
gpus: gpus,
|
||||||
|
done: make(chan error, 1),
|
||||||
|
}
|
||||||
|
|
||||||
|
s.cmd.Env = os.Environ()
|
||||||
|
s.cmd.Stdout = os.Stdout
|
||||||
|
s.cmd.Stderr = s.status
|
||||||
|
s.cmd.SysProcAttr = LlamaServerSysProcAttr
|
||||||
|
|
||||||
|
envWorkarounds := [][2]string{}
|
||||||
|
for _, gpu := range gpus {
|
||||||
|
envWorkarounds = append(envWorkarounds, gpu.EnvWorkarounds...)
|
||||||
|
}
|
||||||
|
visibleDevicesEnv, visibleDevicesEnvVal := gpus.GetVisibleDevicesEnv()
|
||||||
|
pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
|
||||||
|
|
||||||
|
// Update or add the path and visible devices variable with our adjusted version
|
||||||
|
pathNeeded := true
|
||||||
|
devicesNeeded := visibleDevicesEnv != ""
|
||||||
|
for i := range s.cmd.Env {
|
||||||
|
cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
|
||||||
|
if strings.EqualFold(cmp[0], pathEnv) {
|
||||||
|
s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
|
||||||
|
pathNeeded = false
|
||||||
|
} else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) {
|
||||||
|
s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal
|
||||||
|
devicesNeeded = false
|
||||||
|
} else if len(envWorkarounds) != 0 {
|
||||||
|
for _, kv := range envWorkarounds {
|
||||||
|
if strings.EqualFold(cmp[0], kv[0]) {
|
||||||
|
s.cmd.Env[i] = kv[0] + "=" + kv[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if pathNeeded {
|
||||||
|
s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
|
||||||
|
}
|
||||||
|
if devicesNeeded {
|
||||||
|
s.cmd.Env = append(s.cmd.Env, visibleDevicesEnv+"="+visibleDevicesEnvVal)
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("starting llama server", "cmd", s.cmd.String())
|
||||||
|
if envconfig.Debug() {
|
||||||
|
filteredEnv := []string{}
|
||||||
|
for _, ev := range s.cmd.Env {
|
||||||
|
if strings.HasPrefix(ev, "CUDA_") ||
|
||||||
|
strings.HasPrefix(ev, "ROCR_") ||
|
||||||
|
strings.HasPrefix(ev, "ROCM_") ||
|
||||||
|
strings.HasPrefix(ev, "HIP_") ||
|
||||||
|
strings.HasPrefix(ev, "GPU_") ||
|
||||||
|
strings.HasPrefix(ev, "HSA_") ||
|
||||||
|
strings.HasPrefix(ev, "GGML_") ||
|
||||||
|
strings.HasPrefix(ev, "PATH=") ||
|
||||||
|
strings.HasPrefix(ev, "LD_LIBRARY_PATH=") {
|
||||||
|
filteredEnv = append(filteredEnv, ev)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Log at debug as the environment is inherited and might contain sensitive information
|
||||||
|
slog.Debug("subprocess", "environment", filteredEnv)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = s.cmd.Start(); err != nil {
|
||||||
|
// Detect permission denied and augment the message about noexec
|
||||||
|
if errors.Is(err, os.ErrPermission) {
|
||||||
|
return nil, fmt.Errorf("unable to start server %w. %s may have noexec set. Set OLLAMA_TMPDIR for server to a writable executable directory", err, exe)
|
||||||
|
}
|
||||||
|
|
||||||
|
msg := ""
|
||||||
|
if s.status != nil && s.status.LastErrMsg != "" {
|
||||||
|
msg = s.status.LastErrMsg
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("error starting the external llama server: %v %s", err, msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// reap subprocess when it exits
|
||||||
|
go func() {
|
||||||
|
err := s.cmd.Wait()
|
||||||
|
// Favor a more detailed message over the process exit status
|
||||||
|
if err != nil && s.status != nil && s.status.LastErrMsg != "" {
|
||||||
|
slog.Debug("llama runner terminated", "error", err)
|
||||||
|
if strings.Contains(s.status.LastErrMsg, "unknown model") {
|
||||||
|
s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
|
||||||
|
}
|
||||||
|
s.done <- errors.New(s.status.LastErrMsg)
|
||||||
|
} else {
|
||||||
|
s.done <- err
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return s, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type ServerStatus int
|
type ServerStatus int
|
||||||
|
@ -1,17 +1,9 @@
|
|||||||
package runners
|
package runners
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log/slog"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"runtime"
|
|
||||||
"slices"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"golang.org/x/sys/cpu"
|
"golang.org/x/sys/cpu"
|
||||||
|
|
||||||
"github.com/ollama/ollama/envconfig"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -52,155 +44,3 @@ func GetCPUCapability() CPUCapability {
|
|||||||
// else LCD
|
// else LCD
|
||||||
return CPUCapabilityNone
|
return CPUCapabilityNone
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the location where runners were located
|
|
||||||
// empty string indicates only builtin is present
|
|
||||||
func Locate() string {
|
|
||||||
once.Do(locateRunnersOnce)
|
|
||||||
return runnersDir
|
|
||||||
}
|
|
||||||
|
|
||||||
// searches for runners in a prioritized set of locations
|
|
||||||
// 1. local build, with executable at the top of the tree
|
|
||||||
// 2. lib directory relative to executable
|
|
||||||
func locateRunnersOnce() {
|
|
||||||
exe, err := os.Executable()
|
|
||||||
if err != nil {
|
|
||||||
slog.Debug("runner locate", "error", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
paths := []string{
|
|
||||||
filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"),
|
|
||||||
filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"),
|
|
||||||
}
|
|
||||||
for _, path := range paths {
|
|
||||||
if _, err := os.Stat(path); err == nil {
|
|
||||||
runnersDir = path
|
|
||||||
slog.Debug("runners located", "dir", runnersDir)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Fall back to built-in
|
|
||||||
slog.Debug("no dynamic runners detected, using only built-in")
|
|
||||||
runnersDir = ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the well-known name of the builtin runner for the given platform
|
|
||||||
func BuiltinName() string {
|
|
||||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
|
||||||
return "metal"
|
|
||||||
}
|
|
||||||
return "cpu"
|
|
||||||
}
|
|
||||||
|
|
||||||
// directory names are the name of the runner and may contain an optional
|
|
||||||
// variant prefixed with '_' as the separator. For example, "cuda_v11" and
|
|
||||||
// "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the
|
|
||||||
// lowest common denominator
|
|
||||||
func GetAvailableServers() map[string]string {
|
|
||||||
once.Do(locateRunnersOnce)
|
|
||||||
|
|
||||||
servers := make(map[string]string)
|
|
||||||
exe, err := os.Executable()
|
|
||||||
if err == nil {
|
|
||||||
servers[BuiltinName()] = exe
|
|
||||||
}
|
|
||||||
|
|
||||||
if runnersDir == "" {
|
|
||||||
return servers
|
|
||||||
}
|
|
||||||
|
|
||||||
// glob runnersDir for files that start with ollama_
|
|
||||||
pattern := filepath.Join(runnersDir, "*", "ollama_*")
|
|
||||||
|
|
||||||
files, err := filepath.Glob(pattern)
|
|
||||||
if err != nil {
|
|
||||||
slog.Debug("could not glob", "pattern", pattern, "error", err)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, file := range files {
|
|
||||||
slog.Debug("availableServers : found", "file", file)
|
|
||||||
runnerName := filepath.Base(filepath.Dir(file))
|
|
||||||
// Special case for our GPU runners - if compiled with standard AVX flag
|
|
||||||
// detect incompatible system
|
|
||||||
// Custom builds will omit this and its up to the user to ensure compatibility
|
|
||||||
parsed := strings.Split(runnerName, "_")
|
|
||||||
if len(parsed) == 3 && parsed[2] == "avx" && !cpu.X86.HasAVX {
|
|
||||||
slog.Info("GPU runner incompatible with host system, CPU does not have AVX", "runner", runnerName)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
servers[runnerName] = file
|
|
||||||
}
|
|
||||||
|
|
||||||
return servers
|
|
||||||
}
|
|
||||||
|
|
||||||
// serversForGpu returns a list of compatible servers give the provided GPU library/variant
|
|
||||||
func ServersForGpu(requested string) []string {
|
|
||||||
// glob workDir for files that start with ollama_
|
|
||||||
availableServers := GetAvailableServers()
|
|
||||||
|
|
||||||
// Short circuit if the only option is built-in
|
|
||||||
if _, ok := availableServers[BuiltinName()]; ok && len(availableServers) == 1 {
|
|
||||||
return []string{BuiltinName()}
|
|
||||||
}
|
|
||||||
|
|
||||||
bestCPUVariant := GetCPUCapability()
|
|
||||||
requestedLib := strings.Split(requested, "_")[0]
|
|
||||||
servers := []string{}
|
|
||||||
|
|
||||||
// exact match first
|
|
||||||
for a := range availableServers {
|
|
||||||
short := a
|
|
||||||
parsed := strings.Split(a, "_")
|
|
||||||
if len(parsed) == 3 {
|
|
||||||
// Strip off optional _avx for comparison
|
|
||||||
short = parsed[0] + "_" + parsed[1]
|
|
||||||
}
|
|
||||||
if a == requested || short == requested {
|
|
||||||
servers = []string{a}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If no exact match, then try without variant
|
|
||||||
if len(servers) == 0 {
|
|
||||||
alt := []string{}
|
|
||||||
for a := range availableServers {
|
|
||||||
if requestedLib == strings.Split(a, "_")[0] && a != requested {
|
|
||||||
alt = append(alt, a)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
slices.Sort(alt)
|
|
||||||
servers = append(servers, alt...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Finally append the best CPU option if found, then builtin
|
|
||||||
if bestCPUVariant != CPUCapabilityNone {
|
|
||||||
for cmp := range availableServers {
|
|
||||||
if cmp == "cpu_"+bestCPUVariant.String() {
|
|
||||||
servers = append(servers, cmp)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
servers = append(servers, BuiltinName())
|
|
||||||
return servers
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the optimal server for this CPU architecture
|
|
||||||
func ServerForCpu() string {
|
|
||||||
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
|
|
||||||
return BuiltinName()
|
|
||||||
}
|
|
||||||
variant := GetCPUCapability()
|
|
||||||
availableServers := GetAvailableServers()
|
|
||||||
if variant != CPUCapabilityNone {
|
|
||||||
for cmp := range availableServers {
|
|
||||||
if cmp == "cpu_"+variant.String() {
|
|
||||||
return cmp
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return BuiltinName()
|
|
||||||
}
|
|
||||||
|
@ -33,7 +33,6 @@ import (
|
|||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/openai"
|
"github.com/ollama/ollama/openai"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
"github.com/ollama/ollama/runners"
|
|
||||||
"github.com/ollama/ollama/server/imageproc"
|
"github.com/ollama/ollama/server/imageproc"
|
||||||
"github.com/ollama/ollama/template"
|
"github.com/ollama/ollama/template"
|
||||||
"github.com/ollama/ollama/types/errtypes"
|
"github.com/ollama/ollama/types/errtypes"
|
||||||
@ -1269,14 +1268,6 @@ func Serve(ln net.Listener) error {
|
|||||||
done()
|
done()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Locate and log what runners are present at startup
|
|
||||||
var runnerNames []string
|
|
||||||
for v := range runners.GetAvailableServers() {
|
|
||||||
runnerNames = append(runnerNames, v)
|
|
||||||
}
|
|
||||||
slog.Info("Dynamic LLM libraries", "runners", runnerNames)
|
|
||||||
slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
|
|
||||||
|
|
||||||
s.sched.Run(schedCtx)
|
s.sched.Run(schedCtx)
|
||||||
|
|
||||||
// At startup we retrieve GPU information so we can get log messages before loading a model
|
// At startup we retrieve GPU information so we can get log messages before loading a model
|
||||||
|
Loading…
x
Reference in New Issue
Block a user