Compare commits
22 Commits
v0.3.3
...
mxyng/api-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2fe945412a | ||
|
|
de4fc29773 | ||
|
|
e04c7012c2 | ||
|
|
d4a7216c82 | ||
|
|
a4fdd03c3b | ||
|
|
fc85f50a2b | ||
|
|
86b907f82a | ||
|
|
10d49bce70 | ||
|
|
7ed367419e | ||
|
|
50ee8b5f56 | ||
|
|
03bdac0595 | ||
|
|
f457d63400 | ||
|
|
39f2bc6bfc | ||
|
|
b73b0940ef | ||
|
|
6a07344786 | ||
|
|
8b920f35a4 | ||
|
|
4221e39867 | ||
|
|
a091fadfda | ||
|
|
77ccbf04dc | ||
|
|
4addf6b587 | ||
|
|
85c7f11170 | ||
|
|
b732beba6a |
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1 +1,2 @@
|
||||
llm/ext_server/* linguist-vendored
|
||||
* text eol=lf
|
||||
|
||||
2
.github/workflows/test.yaml
vendored
2
.github/workflows/test.yaml
vendored
@@ -273,7 +273,7 @@ jobs:
|
||||
if: ${{ startsWith(matrix.os, 'macos-') }}
|
||||
- uses: golangci/golangci-lint-action@v6
|
||||
with:
|
||||
args: --timeout 8m0s -v ${{ startsWith(matrix.os, 'windows-') && '' || '--disable gofmt --disable goimports' }}
|
||||
args: --timeout 8m0s -v
|
||||
test:
|
||||
strategy:
|
||||
matrix:
|
||||
|
||||
@@ -7,22 +7,32 @@ linters:
|
||||
- bodyclose
|
||||
- containedctx
|
||||
- contextcheck
|
||||
- errcheck
|
||||
- exportloopref
|
||||
- gci
|
||||
- gocheckcompilerdirectives
|
||||
# conditionally enable this on linux/macos
|
||||
# - gofmt
|
||||
# - goimports
|
||||
- gofmt
|
||||
- gofumpt
|
||||
- gosimple
|
||||
- govet
|
||||
- ineffassign
|
||||
- intrange
|
||||
- makezero
|
||||
- misspell
|
||||
- nilerr
|
||||
- nolintlint
|
||||
- nosprintfhostport
|
||||
- staticcheck
|
||||
- tenv
|
||||
- testifylint
|
||||
- unconvert
|
||||
- unused
|
||||
- usestdlibvars
|
||||
- wastedassign
|
||||
- whitespace
|
||||
- usestdlibvars
|
||||
linters-settings:
|
||||
gci:
|
||||
sections: [standard, default, localmodule]
|
||||
severity:
|
||||
default-severity: error
|
||||
rules:
|
||||
|
||||
@@ -54,6 +54,7 @@ Here are some example models that can be downloaded:
|
||||
| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` |
|
||||
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
|
||||
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
|
||||
| Gemma 2 | 2B | 1.6GB | `ollama run gemma2:2b` |
|
||||
| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
|
||||
| Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
|
||||
| Mistral | 7B | 4.1GB | `ollama run mistral` |
|
||||
@@ -300,6 +301,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
|
||||
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
|
||||
- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
|
||||
- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
|
||||
|
||||
### Terminal
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -172,7 +173,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
|
||||
}
|
||||
|
||||
if errorResponse.Error != "" {
|
||||
return fmt.Errorf(errorResponse.Error)
|
||||
return errors.New(errorResponse.Error)
|
||||
}
|
||||
|
||||
if response.StatusCode >= http.StatusBadRequest {
|
||||
|
||||
@@ -231,7 +231,6 @@ type Options struct {
|
||||
|
||||
// Runner options which must be set when the model is loaded into memory
|
||||
type Runner struct {
|
||||
UseNUMA bool `json:"numa,omitempty"`
|
||||
NumCtx int `json:"num_ctx,omitempty"`
|
||||
NumBatch int `json:"num_batch,omitempty"`
|
||||
NumGPU int `json:"num_gpu,omitempty"`
|
||||
@@ -505,7 +504,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
|
||||
for key, val := range m {
|
||||
opt, ok := jsonOpts[key]
|
||||
if !ok {
|
||||
slog.Warn("invalid option provided", "option", opt.Name)
|
||||
slog.Warn("invalid option provided", "option", key)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -615,7 +614,6 @@ func DefaultOptions() Options {
|
||||
F16KV: true,
|
||||
UseMLock: false,
|
||||
UseMMap: nil,
|
||||
UseNUMA: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"errors"
|
||||
"math"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -192,7 +192,7 @@ func TestUseMmapFormatParams(t *testing.T) {
|
||||
"use_mmap": {"foo"},
|
||||
},
|
||||
exp: nil,
|
||||
err: fmt.Errorf("invalid bool value [foo]"),
|
||||
err: errors.New("invalid bool value [foo]"),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
|
||||
package lifecycle
|
||||
|
||||
import "fmt"
|
||||
import "errors"
|
||||
|
||||
func GetStarted() error {
|
||||
return fmt.Errorf("GetStarted not implemented")
|
||||
return errors.New("not implemented")
|
||||
}
|
||||
|
||||
@@ -34,7 +34,6 @@ func GetStarted() error {
|
||||
Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
|
||||
}
|
||||
proc, err := os.StartProcess(args[0], args, attrs)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to start getting started shell %w", err)
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ func InitLogging() {
|
||||
// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
|
||||
} else {
|
||||
rotateLogs(AppLogFile)
|
||||
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
||||
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
|
||||
if err != nil {
|
||||
slog.Error(fmt.Sprintf("failed to create server log %v", err))
|
||||
return
|
||||
|
||||
@@ -5,5 +5,5 @@ package lifecycle
|
||||
import "log/slog"
|
||||
|
||||
func ShowLogs() {
|
||||
slog.Warn("ShowLogs not yet implemented")
|
||||
slog.Warn("not implemented")
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ func TestRotateLogs(t *testing.T) {
|
||||
// No log exists
|
||||
rotateLogs(logFile)
|
||||
|
||||
require.NoError(t, os.WriteFile(logFile, []byte("1"), 0644))
|
||||
require.NoError(t, os.WriteFile(logFile, []byte("1"), 0o644))
|
||||
assert.FileExists(t, logFile)
|
||||
// First rotation
|
||||
rotateLogs(logFile)
|
||||
@@ -32,7 +32,7 @@ func TestRotateLogs(t *testing.T) {
|
||||
assert.NoFileExists(t, logFile)
|
||||
|
||||
for i := 2; i <= LogRotationCount+1; i++ {
|
||||
require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0644))
|
||||
require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0o644))
|
||||
assert.FileExists(t, logFile)
|
||||
rotateLogs(logFile)
|
||||
assert.NoFileExists(t, logFile)
|
||||
|
||||
@@ -55,7 +55,7 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
|
||||
}
|
||||
|
||||
rotateLogs(ServerLogFile)
|
||||
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
||||
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create server log: %w", err)
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -46,7 +47,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
|
||||
query.Add("os", runtime.GOOS)
|
||||
query.Add("arch", runtime.GOARCH)
|
||||
query.Add("version", version.Version)
|
||||
query.Add("ts", fmt.Sprintf("%d", time.Now().Unix()))
|
||||
query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))
|
||||
|
||||
nonce, err := auth.NewNonce(rand.Reader, 16)
|
||||
if err != nil {
|
||||
|
||||
@@ -4,9 +4,9 @@ package lifecycle
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"errors"
|
||||
)
|
||||
|
||||
func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
||||
return fmt.Errorf("DoUpgrade not yet implemented")
|
||||
return errors.New("not implemented")
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package lifecycle
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
@@ -15,7 +16,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
||||
return fmt.Errorf("failed to lookup downloads: %s", err)
|
||||
}
|
||||
if len(files) == 0 {
|
||||
return fmt.Errorf("no update downloads found")
|
||||
return errors.New("no update downloads found")
|
||||
} else if len(files) > 1 {
|
||||
// Shouldn't happen
|
||||
slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
|
||||
@@ -64,7 +65,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
||||
}
|
||||
} else {
|
||||
// TODO - some details about why it didn't start, or is this a pedantic error case?
|
||||
return fmt.Errorf("installer process did not start")
|
||||
return errors.New("installer process did not start")
|
||||
}
|
||||
|
||||
// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
package tray
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"errors"
|
||||
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
)
|
||||
|
||||
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
|
||||
return nil, fmt.Errorf("NOT IMPLEMENTED YET")
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
@@ -11,9 +11,7 @@ import (
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
var (
|
||||
quitOnce sync.Once
|
||||
)
|
||||
var quitOnce sync.Once
|
||||
|
||||
func (t *winTray) Run() {
|
||||
nativeLoop()
|
||||
|
||||
@@ -13,8 +13,9 @@ import (
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
"golang.org/x/sys/windows"
|
||||
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
)
|
||||
|
||||
// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
|
||||
@@ -414,7 +415,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
|
||||
iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
|
||||
|
||||
if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
|
||||
if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil {
|
||||
if err := os.WriteFile(iconFilePath, iconBytes, 0o644); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
@@ -78,7 +79,7 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
|
||||
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
|
||||
parts := bytes.Split(publicKey, []byte(" "))
|
||||
if len(parts) < 2 {
|
||||
return "", fmt.Errorf("malformed public key")
|
||||
return "", errors.New("malformed public key")
|
||||
}
|
||||
|
||||
signedData, err := privateKey.Sign(rand.Reader, bts)
|
||||
|
||||
@@ -1160,7 +1160,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
|
||||
return err
|
||||
}
|
||||
if err := startApp(cmd.Context(), client); err != nil {
|
||||
return fmt.Errorf("could not connect to ollama app, is it running?")
|
||||
return errors.New("could not connect to ollama app, is it running?")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -604,7 +604,7 @@ func getImageData(filePath string) ([]byte, error) {
|
||||
// Check if the file size exceeds 100MB
|
||||
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
||||
if info.Size() > maxSize {
|
||||
return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
|
||||
return nil, errors.New("file size exceeds maximum limit (100MB)")
|
||||
}
|
||||
|
||||
buf = make([]byte, info.Size())
|
||||
|
||||
@@ -2,7 +2,7 @@ package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"errors"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
@@ -20,7 +20,7 @@ func startApp(ctx context.Context, client *api.Client) error {
|
||||
return err
|
||||
}
|
||||
if !strings.Contains(link, "Ollama.app") {
|
||||
return fmt.Errorf("could not find ollama app")
|
||||
return errors.New("could not find ollama app")
|
||||
}
|
||||
path := strings.Split(link, "Ollama.app")
|
||||
if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
|
||||
|
||||
@@ -4,11 +4,11 @@ package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"errors"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func startApp(ctx context.Context, client *api.Client) error {
|
||||
return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it")
|
||||
return errors.New("could not connect to ollama server, run 'ollama serve' to start it")
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ func startApp(ctx context.Context, client *api.Client) error {
|
||||
// Finally look in the path
|
||||
appExe, err = exec.LookPath(AppName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not locate ollama app")
|
||||
return errors.New("could not locate ollama app")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,9 +5,10 @@ import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type llama struct {
|
||||
|
||||
@@ -2,6 +2,7 @@ package convert
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
@@ -14,8 +15,9 @@ import (
|
||||
"slices"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"golang.org/x/exp/maps"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
|
||||
@@ -99,7 +101,7 @@ func TestConvertFull(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
actual[tensor.Name] = fmt.Sprintf("%x", sha256sum.Sum(nil))
|
||||
actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
|
||||
}
|
||||
|
||||
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
|
||||
|
||||
@@ -10,8 +10,8 @@ import (
|
||||
)
|
||||
|
||||
type ZipReader struct {
|
||||
r *zip.Reader
|
||||
p string
|
||||
r *zip.Reader
|
||||
p string
|
||||
|
||||
// limit is the maximum size of a file that can be read directly
|
||||
// from the zip archive. Files larger than this size will be extracted
|
||||
|
||||
@@ -111,8 +111,9 @@ func (st safetensor) WriteTo(w io.Writer) (int64, error) {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
for _, b := range u16s {
|
||||
f32s = append(f32s, float16.Frombits(b).Float32())
|
||||
f32s = make([]float32, len(u16s))
|
||||
for i := range u16s {
|
||||
f32s[i] = float16.Frombits(u16s[i]).Float32()
|
||||
}
|
||||
|
||||
case "BF16":
|
||||
|
||||
142
docs/docker.md
142
docs/docker.md
@@ -1,71 +1,71 @@
|
||||
# Ollama Docker image
|
||||
|
||||
### CPU only
|
||||
|
||||
```bash
|
||||
docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
||||
```
|
||||
|
||||
### Nvidia GPU
|
||||
Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
|
||||
|
||||
#### Install with Apt
|
||||
1. Configure the repository
|
||||
```bash
|
||||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
|
||||
| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
|
||||
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
|
||||
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||
sudo apt-get update
|
||||
```
|
||||
2. Install the NVIDIA Container Toolkit packages
|
||||
```bash
|
||||
sudo apt-get install -y nvidia-container-toolkit
|
||||
```
|
||||
|
||||
#### Install with Yum or Dnf
|
||||
1. Configure the repository
|
||||
|
||||
```bash
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
|
||||
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
```
|
||||
|
||||
2. Install the NVIDIA Container Toolkit packages
|
||||
|
||||
```bash
|
||||
sudo yum install -y nvidia-container-toolkit
|
||||
```
|
||||
|
||||
#### Configure Docker to use Nvidia driver
|
||||
```
|
||||
sudo nvidia-ctk runtime configure --runtime=docker
|
||||
sudo systemctl restart docker
|
||||
```
|
||||
|
||||
#### Start the container
|
||||
|
||||
```bash
|
||||
docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
||||
```
|
||||
|
||||
### AMD GPU
|
||||
|
||||
To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
|
||||
|
||||
```
|
||||
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
|
||||
```
|
||||
|
||||
### Run model locally
|
||||
|
||||
Now you can run a model:
|
||||
|
||||
```
|
||||
docker exec -it ollama ollama run llama3.1
|
||||
```
|
||||
|
||||
### Try different models
|
||||
|
||||
More models can be found on the [Ollama library](https://ollama.com/library).
|
||||
# Ollama Docker image
|
||||
|
||||
### CPU only
|
||||
|
||||
```bash
|
||||
docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
||||
```
|
||||
|
||||
### Nvidia GPU
|
||||
Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
|
||||
|
||||
#### Install with Apt
|
||||
1. Configure the repository
|
||||
```bash
|
||||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
|
||||
| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
|
||||
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
|
||||
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||
sudo apt-get update
|
||||
```
|
||||
2. Install the NVIDIA Container Toolkit packages
|
||||
```bash
|
||||
sudo apt-get install -y nvidia-container-toolkit
|
||||
```
|
||||
|
||||
#### Install with Yum or Dnf
|
||||
1. Configure the repository
|
||||
|
||||
```bash
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
|
||||
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
```
|
||||
|
||||
2. Install the NVIDIA Container Toolkit packages
|
||||
|
||||
```bash
|
||||
sudo yum install -y nvidia-container-toolkit
|
||||
```
|
||||
|
||||
#### Configure Docker to use Nvidia driver
|
||||
```
|
||||
sudo nvidia-ctk runtime configure --runtime=docker
|
||||
sudo systemctl restart docker
|
||||
```
|
||||
|
||||
#### Start the container
|
||||
|
||||
```bash
|
||||
docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
||||
```
|
||||
|
||||
### AMD GPU
|
||||
|
||||
To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
|
||||
|
||||
```
|
||||
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
|
||||
```
|
||||
|
||||
### Run model locally
|
||||
|
||||
Now you can run a model:
|
||||
|
||||
```
|
||||
docker exec -it ollama ollama run llama3.1
|
||||
```
|
||||
|
||||
### Try different models
|
||||
|
||||
More models can be found on the [Ollama library](https://ollama.com/library).
|
||||
|
||||
125
docs/openai.md
125
docs/openai.md
@@ -28,13 +28,35 @@ chat_completion = client.chat.completions.create(
|
||||
model='llama3',
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="llava",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
max_tokens=300,
|
||||
)
|
||||
|
||||
completion = client.completions.create(
|
||||
model="llama3",
|
||||
prompt="Say this is a test",
|
||||
)
|
||||
|
||||
list_completion = client.models.list()
|
||||
|
||||
model = client.models.retrieve("llama3")
|
||||
|
||||
embeddings = client.embeddings.create(
|
||||
model="all-minilm",
|
||||
input=["why is the sky blue?", "why is the grass green?"]
|
||||
input=["why is the sky blue?", "why is the grass green?"],
|
||||
)
|
||||
```
|
||||
|
||||
@@ -51,23 +73,44 @@ const openai = new OpenAI({
|
||||
})
|
||||
|
||||
const chatCompletion = await openai.chat.completions.create({
|
||||
messages: [{ role: 'user', content: 'Say this is a test' }],
|
||||
model: 'llama3',
|
||||
messages: [{ role: 'user', content: 'Say this is a test' }],
|
||||
model: 'llama3',
|
||||
})
|
||||
|
||||
const response = await openai.chat.completions.create({
|
||||
model: "llava",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "What's in this image?" },
|
||||
{
|
||||
type: "image_url",
|
||||
image_url: "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
const completion = await openai.completions.create({
|
||||
model: "llama3",
|
||||
prompt: "Say this is a test.",
|
||||
})
|
||||
|
||||
const listCompletion = await openai.models.list()
|
||||
|
||||
const model = await openai.models.retrieve("llama3");
|
||||
const model = await openai.models.retrieve("llama3")
|
||||
|
||||
const embedding = await openai.embeddings.create({
|
||||
model: "all-minilm",
|
||||
input: ["why is the sky blue?", "why is the grass green?"],
|
||||
});
|
||||
})
|
||||
```
|
||||
|
||||
### `curl`
|
||||
|
||||
```
|
||||
``` shell
|
||||
curl http://localhost:11434/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
@@ -84,6 +127,37 @@ curl http://localhost:11434/v1/chat/completions \
|
||||
]
|
||||
}'
|
||||
|
||||
curl http://localhost:11434/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "llava",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What'\''s in this image?"
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"max_tokens": 300
|
||||
}'
|
||||
|
||||
curl http://localhost:11434/v1/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "llama3",
|
||||
"prompt": "Say this is a test"
|
||||
}'
|
||||
|
||||
curl http://localhost:11434/v1/models
|
||||
|
||||
curl http://localhost:11434/v1/models/llama3
|
||||
@@ -106,6 +180,7 @@ curl http://localhost:11434/v1/embeddings \
|
||||
- [x] Streaming
|
||||
- [x] JSON mode
|
||||
- [x] Reproducible outputs
|
||||
- [x] Vision
|
||||
- [x] Tools (streaming support coming soon)
|
||||
- [ ] Vision
|
||||
- [ ] Logprobs
|
||||
@@ -115,7 +190,10 @@ curl http://localhost:11434/v1/embeddings \
|
||||
- [x] `model`
|
||||
- [x] `messages`
|
||||
- [x] Text `content`
|
||||
- [ ] Array of `content` parts
|
||||
- [x] Image `content`
|
||||
- [x] Base64 encoded image
|
||||
- [ ] Image URL
|
||||
- [x] Array of `content` parts
|
||||
- [x] `frequency_penalty`
|
||||
- [x] `presence_penalty`
|
||||
- [x] `response_format`
|
||||
@@ -131,6 +209,39 @@ curl http://localhost:11434/v1/embeddings \
|
||||
- [ ] `user`
|
||||
- [ ] `n`
|
||||
|
||||
### `/v1/completions`
|
||||
|
||||
#### Supported features
|
||||
|
||||
- [x] Completions
|
||||
- [x] Streaming
|
||||
- [x] JSON mode
|
||||
- [x] Reproducible outputs
|
||||
- [ ] Logprobs
|
||||
|
||||
#### Supported request fields
|
||||
|
||||
- [x] `model`
|
||||
- [x] `prompt`
|
||||
- [x] `frequency_penalty`
|
||||
- [x] `presence_penalty`
|
||||
- [x] `seed`
|
||||
- [x] `stop`
|
||||
- [x] `stream`
|
||||
- [x] `temperature`
|
||||
- [x] `top_p`
|
||||
- [x] `max_tokens`
|
||||
- [x] `suffix`
|
||||
- [ ] `best_of`
|
||||
- [ ] `echo`
|
||||
- [ ] `logit_bias`
|
||||
- [ ] `user`
|
||||
- [ ] `n`
|
||||
|
||||
#### Notes
|
||||
|
||||
- `prompt` currently only accepts a string
|
||||
|
||||
### `/v1/models`
|
||||
|
||||
#### Notes
|
||||
|
||||
@@ -9,7 +9,7 @@ cat ~/.ollama/logs/server.log
|
||||
On **Linux** systems with systemd, the logs can be found with this command:
|
||||
|
||||
```shell
|
||||
journalctl -u ollama
|
||||
journalctl -u ollama --no-pager
|
||||
```
|
||||
|
||||
When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
|
||||
|
||||
@@ -3,6 +3,7 @@ package format
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -28,6 +29,6 @@ func HumanNumber(b uint64) string {
|
||||
case b >= Thousand:
|
||||
return fmt.Sprintf("%.0fK", float64(b)/Thousand)
|
||||
default:
|
||||
return fmt.Sprintf("%d", b)
|
||||
return strconv.FormatUint(b, 10)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
package gpu
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -95,5 +95,5 @@ func commonAMDValidateLibDir() (string, error) {
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
||||
return "", errors.New("no suitable rocm found, falling back to CPU")
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package gpu
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"syscall"
|
||||
@@ -76,7 +77,7 @@ func (hl *HipLib) Release() {
|
||||
|
||||
func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
||||
if hl.dll == 0 {
|
||||
return 0, 0, fmt.Errorf("dll has been unloaded")
|
||||
return 0, 0, errors.New("dll has been unloaded")
|
||||
}
|
||||
var version int
|
||||
status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
|
||||
@@ -110,7 +111,7 @@ func (hl *HipLib) HipGetDeviceCount() int {
|
||||
|
||||
func (hl *HipLib) HipSetDevice(device int) error {
|
||||
if hl.dll == 0 {
|
||||
return fmt.Errorf("dll has been unloaded")
|
||||
return errors.New("dll has been unloaded")
|
||||
}
|
||||
status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
|
||||
if status != hipSuccess {
|
||||
@@ -121,7 +122,7 @@ func (hl *HipLib) HipSetDevice(device int) error {
|
||||
|
||||
func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
|
||||
if hl.dll == 0 {
|
||||
return nil, fmt.Errorf("dll has been unloaded")
|
||||
return nil, errors.New("dll has been unloaded")
|
||||
}
|
||||
var props hipDevicePropMinimal
|
||||
status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
|
||||
@@ -134,7 +135,7 @@ func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, err
|
||||
// free, total, err
|
||||
func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
|
||||
if hl.dll == 0 {
|
||||
return 0, 0, fmt.Errorf("dll has been unloaded")
|
||||
return 0, 0, errors.New("dll has been unloaded")
|
||||
}
|
||||
var totalMemory uint64
|
||||
var freeMemory uint64
|
||||
|
||||
@@ -393,7 +393,7 @@ func AMDValidateLibDir() (string, error) {
|
||||
|
||||
// If we still haven't found a usable rocm, the user will have to install it on their own
|
||||
slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install")
|
||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
||||
return "", errors.New("no suitable rocm found, falling back to CPU")
|
||||
}
|
||||
|
||||
func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
||||
|
||||
@@ -2,7 +2,7 @@ package gpu
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -85,7 +85,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
|
||||
n = bytes.IndexByte(props.GcnArchName[:], 0)
|
||||
gfx := string(props.GcnArchName[:n])
|
||||
slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
|
||||
//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
|
||||
// slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
|
||||
// TODO Why isn't props.iGPU accurate!?
|
||||
if strings.EqualFold(name, iGPUName) {
|
||||
slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
|
||||
@@ -161,7 +161,7 @@ func AMDValidateLibDir() (string, error) {
|
||||
|
||||
// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
|
||||
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm")
|
||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
||||
return "", errors.New("no suitable rocm found, falling back to CPU")
|
||||
}
|
||||
|
||||
func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
|
||||
|
||||
@@ -42,7 +42,7 @@ func PayloadsDir() (string, error) {
|
||||
return "", fmt.Errorf("failed to generate tmp dir: %w", err)
|
||||
}
|
||||
} else {
|
||||
err = os.MkdirAll(tmpDir, 0755)
|
||||
err = os.MkdirAll(tmpDir, 0o755)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err)
|
||||
}
|
||||
@@ -54,7 +54,7 @@ func PayloadsDir() (string, error) {
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if _, err := pidFile.Write([]byte(fmt.Sprint(os.Getpid()))); err != nil {
|
||||
if _, err := pidFile.Write([]byte(strconv.Itoa(os.Getpid()))); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
package gpu
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/cpu"
|
||||
)
|
||||
|
||||
@@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
|
||||
// else LCD
|
||||
return CPUCapabilityNone
|
||||
}
|
||||
|
||||
func IsNUMA() bool {
|
||||
if runtime.GOOS != "linux" {
|
||||
// numa support in llama.cpp is linux only
|
||||
return false
|
||||
}
|
||||
ids := map[string]interface{}{}
|
||||
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
|
||||
for _, packageId := range packageIds {
|
||||
id, err := os.ReadFile(packageId)
|
||||
if err == nil {
|
||||
ids[strings.TrimSpace(string(id))] = struct{}{}
|
||||
}
|
||||
}
|
||||
return len(ids) > 1
|
||||
}
|
||||
|
||||
19
gpu/gpu.go
19
gpu/gpu.go
@@ -7,9 +7,9 @@ package gpu
|
||||
#cgo windows LDFLAGS: -lpthread
|
||||
|
||||
#include "gpu_info.h"
|
||||
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
@@ -70,7 +70,6 @@ var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
||||
|
||||
// Note: gpuMutex must already be held
|
||||
func initCudaHandles() *cudaHandles {
|
||||
|
||||
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
||||
|
||||
cHandles := &cudaHandles{}
|
||||
@@ -211,14 +210,16 @@ func GetGPUInfo() GpuInfoList {
|
||||
if err != nil {
|
||||
slog.Warn("error looking up system memory", "error", err)
|
||||
}
|
||||
cpus = []CPUInfo{CPUInfo{
|
||||
GpuInfo: GpuInfo{
|
||||
memInfo: mem,
|
||||
Library: "cpu",
|
||||
Variant: cpuCapability,
|
||||
ID: "0",
|
||||
cpus = []CPUInfo{
|
||||
{
|
||||
GpuInfo: GpuInfo{
|
||||
memInfo: mem,
|
||||
Library: "cpu",
|
||||
Variant: cpuCapability,
|
||||
ID: "0",
|
||||
},
|
||||
},
|
||||
}}
|
||||
}
|
||||
|
||||
// Fallback to CPU mode if we're lacking required vector extensions on x86
|
||||
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
|
||||
|
||||
@@ -8,6 +8,7 @@ package gpu
|
||||
#include "gpu_info_darwin.h"
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
|
||||
|
||||
@@ -67,4 +67,4 @@ void cpu_check_ram(mem_info_t *resp);
|
||||
#include "gpu_info_oneapi.h"
|
||||
|
||||
#endif // __GPU_INFO_H__
|
||||
#endif // __APPLE__
|
||||
#endif // __APPLE__
|
||||
|
||||
@@ -43,10 +43,12 @@ var OneapiGlobs = []string{
|
||||
"/usr/lib*/libze_intel_gpu.so*",
|
||||
}
|
||||
|
||||
var CudartMgmtName = "libcudart.so*"
|
||||
var NvcudaMgmtName = "libcuda.so*"
|
||||
var NvmlMgmtName = "" // not currently wired on linux
|
||||
var OneapiMgmtName = "libze_intel_gpu.so"
|
||||
var (
|
||||
CudartMgmtName = "libcudart.so*"
|
||||
NvcudaMgmtName = "libcuda.so*"
|
||||
NvmlMgmtName = "" // not currently wired on linux
|
||||
OneapiMgmtName = "libze_intel_gpu.so"
|
||||
)
|
||||
|
||||
func GetCPUMem() (memInfo, error) {
|
||||
var mem memInfo
|
||||
|
||||
@@ -40,10 +40,12 @@ var OneapiGlobs = []string{
|
||||
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
|
||||
}
|
||||
|
||||
var CudartMgmtName = "cudart64_*.dll"
|
||||
var NvcudaMgmtName = "nvcuda.dll"
|
||||
var NvmlMgmtName = "nvml.dll"
|
||||
var OneapiMgmtName = "ze_intel_gpu64.dll"
|
||||
var (
|
||||
CudartMgmtName = "cudart64_*.dll"
|
||||
NvcudaMgmtName = "nvcuda.dll"
|
||||
NvmlMgmtName = "nvml.dll"
|
||||
OneapiMgmtName = "ze_intel_gpu64.dll"
|
||||
)
|
||||
|
||||
func GetCPUMem() (memInfo, error) {
|
||||
memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
|
||||
|
||||
@@ -5,6 +5,7 @@ package integration
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strconv"
|
||||
"sync"
|
||||
"testing"
|
||||
@@ -13,7 +14,6 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/format"
|
||||
)
|
||||
|
||||
@@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
|
||||
},
|
||||
}
|
||||
resp = [2][]string{
|
||||
[]string{"sunlight"},
|
||||
[]string{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||
{"sunlight"},
|
||||
{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||
}
|
||||
)
|
||||
var wg sync.WaitGroup
|
||||
@@ -71,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
|
||||
reqLimit := len(req)
|
||||
iterLimit := 5
|
||||
|
||||
vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM
|
||||
if vram != "" {
|
||||
max, err := strconv.ParseUint(vram, 10, 64)
|
||||
if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
|
||||
maxVram, err := strconv.ParseUint(s, 10, 64)
|
||||
require.NoError(t, err)
|
||||
// Don't hammer on small VRAM cards...
|
||||
if max < 4*1024*1024*1024 {
|
||||
if maxVram < 4*format.GibiByte {
|
||||
reqLimit = min(reqLimit, 2)
|
||||
iterLimit = 2
|
||||
}
|
||||
@@ -233,12 +232,12 @@ func TestMultiModelStress(t *testing.T) {
|
||||
consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
|
||||
for i := 0; i < len(req); i++ {
|
||||
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
|
||||
if i > 1 && consumed > vram {
|
||||
slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))
|
||||
if i > 1 && consumed > maxVram {
|
||||
slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
|
||||
break
|
||||
}
|
||||
consumed += chosenModels[i].size
|
||||
slog.Info("target vram", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))
|
||||
slog.Info("target vram", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
|
||||
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
|
||||
@@ -35,8 +35,8 @@ var (
|
||||
},
|
||||
}
|
||||
resp = [2][]string{
|
||||
[]string{"sunlight"},
|
||||
[]string{"england", "english", "massachusetts", "pilgrims"},
|
||||
{"sunlight"},
|
||||
{"england", "english", "massachusetts", "pilgrims"},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) {
|
||||
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
|
||||
threadCount := 32
|
||||
if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
|
||||
threadCount = maxQueue
|
||||
threadCount = int(maxQueue)
|
||||
} else {
|
||||
t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
|
||||
}
|
||||
|
||||
@@ -162,7 +162,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
|
||||
fn := func(resp api.ProgressResponse) error {
|
||||
// fmt.Print(".")
|
||||
if !stallTimer.Reset(stallDuration) {
|
||||
return fmt.Errorf("stall was detected, aborting status reporting")
|
||||
return errors.New("stall was detected, aborting status reporting")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -180,7 +180,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
|
||||
|
||||
select {
|
||||
case <-stallTimer.C:
|
||||
return fmt.Errorf("download stalled")
|
||||
return errors.New("download stalled")
|
||||
case <-done:
|
||||
return pullError
|
||||
}
|
||||
@@ -243,7 +243,7 @@ func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq ap
|
||||
// fmt.Print(".")
|
||||
buf.Write([]byte(response.Response))
|
||||
if !stallTimer.Reset(streamTimeout) {
|
||||
return fmt.Errorf("stall was detected while streaming response, aborting")
|
||||
return errors.New("stall was detected while streaming response, aborting")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
|
||||
},
|
||||
},
|
||||
[][]string{
|
||||
[]string{"sunlight"},
|
||||
[]string{"soil", "organic", "earth", "black", "tan"},
|
||||
[]string{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||
[]string{"fourth", "july", "declaration", "independence"},
|
||||
[]string{"nitrogen", "oxygen", "carbon", "dioxide"},
|
||||
{"sunlight"},
|
||||
{"soil", "organic", "earth", "black", "tan"},
|
||||
{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||
{"fourth", "july", "declaration", "independence"},
|
||||
{"nitrogen", "oxygen", "carbon", "dioxide"},
|
||||
}
|
||||
}
|
||||
|
||||
24
llm/ext_server/CMakeLists.txt
vendored
24
llm/ext_server/CMakeLists.txt
vendored
@@ -1,13 +1,13 @@
|
||||
set(TARGET ollama_llama_server)
|
||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_compile_definitions(${TARGET} PRIVATE
|
||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||
)
|
||||
target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT})
|
||||
if (WIN32)
|
||||
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
||||
endif()
|
||||
set(TARGET ollama_llama_server)
|
||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_compile_definitions(${TARGET} PRIVATE
|
||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||
)
|
||||
target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT})
|
||||
if (WIN32)
|
||||
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
||||
endif()
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
20
llm/ext_server/server.cpp
vendored
20
llm/ext_server/server.cpp
vendored
@@ -44,6 +44,7 @@
|
||||
#include <errhandlingapi.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
@@ -402,7 +403,9 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||
auto init_result = llama_init_from_gpt_params(params);
|
||||
model = init_result.model;
|
||||
ctx = init_result.context;
|
||||
if (model == nullptr)
|
||||
{
|
||||
LOG_ERROR("unable to load model", {{"model", params.model}});
|
||||
@@ -1220,6 +1223,7 @@ struct llama_server_context
|
||||
|
||||
res.result_json = json
|
||||
{
|
||||
{"id", res.id},
|
||||
{"embedding", std::vector<float>(embd, embd + n_embd)},
|
||||
{"timings", slot.get_formated_timings()},
|
||||
};
|
||||
@@ -2420,7 +2424,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.lora_adapter.emplace_back(argv[i], 1.0f);
|
||||
params.lora_adapters.push_back({
|
||||
std::string(argv[i]),
|
||||
1.0,
|
||||
});
|
||||
params.use_mmap = false;
|
||||
}
|
||||
else if (arg == "--lora-scaled")
|
||||
@@ -2436,7 +2443,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
|
||||
params.lora_adapters.push_back({
|
||||
lora_adapter,
|
||||
std::stof(argv[i])
|
||||
});
|
||||
params.use_mmap = false;
|
||||
}
|
||||
else if (arg == "-v" || arg == "--verbose")
|
||||
@@ -3203,6 +3213,10 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
responses = result.result_json.value("results", std::vector<json>{result.result_json});
|
||||
std::sort(responses.begin(), responses.end(), [](const json& a, const json& b) {
|
||||
return a["id"] < b["id"];
|
||||
});
|
||||
|
||||
json embeddings = json::array();
|
||||
|
||||
int prompt_n = 0;
|
||||
|
||||
Submodule llm/llama.cpp updated: 6eeaeba126...1e6f6554aa
@@ -11,8 +11,9 @@ package llm
|
||||
// #include <stdlib.h>
|
||||
// #include "llama.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"errors"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
@@ -33,7 +34,7 @@ func Quantize(infile, outfile string, ftype fileType) error {
|
||||
params.ftype = ftype.Value()
|
||||
|
||||
if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 {
|
||||
return fmt.Errorf("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
|
||||
return errors.New("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -6,10 +6,11 @@ import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
)
|
||||
|
||||
func TestEstimateGPULayers(t *testing.T) {
|
||||
|
||||
@@ -1,40 +1,32 @@
|
||||
diff --git a/common/common.cpp b/common/common.cpp
|
||||
index dbb724fb..c26fe6ee 100644
|
||||
index 2e8374d5..70d0afde 100644
|
||||
--- a/common/common.cpp
|
||||
+++ b/common/common.cpp
|
||||
@@ -2087,14 +2087,27 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
||||
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
|
||||
const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
|
||||
float lora_scale = std::get<1>(params.lora_adapter[i]);
|
||||
+
|
||||
+ // try to load as gguf
|
||||
auto adapter = llama_lora_adapter_init(model, lora_adapter.c_str());
|
||||
if (adapter == nullptr) {
|
||||
- fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
||||
@@ -2110,9 +2110,21 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
||||
loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str());
|
||||
if (loaded_la.adapter == nullptr) {
|
||||
fprintf(stderr, "%s: error: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
|
||||
- llama_free(lctx);
|
||||
- llama_free_model(model);
|
||||
- return std::make_tuple(nullptr, nullptr);
|
||||
+ fprintf(stderr, "%s: error: failed to apply lora adapter, trying ggla\n", __func__);
|
||||
- return iparams;
|
||||
+
|
||||
+ // if that fails, try loading as ggla for compatibility
|
||||
+ int err = llama_model_apply_lora_from_file(model,
|
||||
+ lora_adapter.c_str(),
|
||||
+ lora_scale,
|
||||
+ la.path.c_str(),
|
||||
+ la.scale,
|
||||
+ nullptr,
|
||||
+ params.n_threads);
|
||||
+ if (err != 0) {
|
||||
+ fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
||||
+ llama_free(lctx);
|
||||
+ llama_free_model(model);
|
||||
+ return std::make_tuple(nullptr, nullptr);
|
||||
+ return iparams;
|
||||
+ } else {
|
||||
+ break;
|
||||
+ }
|
||||
+ } else {
|
||||
+ llama_lora_adapter_set(lctx, adapter, lora_scale);
|
||||
}
|
||||
- llama_lora_adapter_set(lctx, adapter, lora_scale);
|
||||
iparams.lora_adapters.push_back(loaded_la); // copy to list of loaded adapters
|
||||
}
|
||||
|
||||
if (params.ignore_eos) {
|
||||
diff --git a/include/llama.h b/include/llama.h
|
||||
index 93fd77ca..b0fb37a6 100644
|
||||
--- a/include/llama.h
|
||||
@@ -355,4 +347,4 @@ index 80a0dd0f..9d7b0e17 100644
|
||||
+ return 1;
|
||||
+ }
|
||||
+}
|
||||
\ No newline at end of file
|
||||
\ No newline at end of file
|
||||
@@ -1,20 +0,0 @@
|
||||
diff --git a/src/llama.cpp b/src/llama.cpp
|
||||
index a207451f..fba6b175 100644
|
||||
--- a/src/llama.cpp
|
||||
+++ b/src/llama.cpp
|
||||
@@ -4969,6 +4969,7 @@ static void llm_load_hparams(
|
||||
hparams.attn_soft_cap = true;
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
+ case 26: model.type = e_model::MODEL_2B; break;
|
||||
case 42: model.type = e_model::MODEL_9B; break;
|
||||
case 46: model.type = e_model::MODEL_27B; break;
|
||||
default: model.type = e_model::MODEL_UNKNOWN;
|
||||
@@ -11736,6 +11737,7 @@ struct llm_build_context {
|
||||
|
||||
// ref: https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
|
||||
switch (model.type) {
|
||||
+ case e_model::MODEL_2B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
|
||||
case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
|
||||
case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
|
||||
default: GGML_ABORT("fatal error");
|
||||
@@ -44,11 +44,12 @@ type LlamaServer interface {
|
||||
|
||||
// llmServer is an instance of the llama.cpp server
|
||||
type llmServer struct {
|
||||
port int
|
||||
cmd *exec.Cmd
|
||||
done chan error // Channel to signal when the process exits
|
||||
status *StatusWriter
|
||||
options api.Options
|
||||
port int
|
||||
cmd *exec.Cmd
|
||||
done chan error // Channel to signal when the process exits
|
||||
status *StatusWriter
|
||||
options api.Options
|
||||
numParallel int
|
||||
|
||||
estimate MemoryEstimate
|
||||
totalLayers uint64
|
||||
@@ -184,15 +185,15 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
|
||||
params := []string{
|
||||
"--model", model,
|
||||
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
|
||||
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
||||
"--ctx-size", strconv.Itoa(opts.NumCtx),
|
||||
"--batch-size", strconv.Itoa(opts.NumBatch),
|
||||
"--embedding",
|
||||
}
|
||||
|
||||
params = append(params, "--log-disable")
|
||||
|
||||
if opts.NumGPU >= 0 {
|
||||
params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU))
|
||||
params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU))
|
||||
}
|
||||
|
||||
if envconfig.Debug() {
|
||||
@@ -200,7 +201,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
}
|
||||
|
||||
if opts.MainGPU > 0 {
|
||||
params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
|
||||
params = append(params, "--main-gpu", strconv.Itoa(opts.MainGPU))
|
||||
}
|
||||
|
||||
if len(adapters) > 0 {
|
||||
@@ -214,7 +215,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
}
|
||||
|
||||
if opts.NumThread > 0 {
|
||||
params = append(params, "--threads", fmt.Sprintf("%d", opts.NumThread))
|
||||
params = append(params, "--threads", strconv.Itoa(opts.NumThread))
|
||||
}
|
||||
|
||||
if !opts.F16KV {
|
||||
@@ -256,11 +257,17 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
params = append(params, "--mlock")
|
||||
}
|
||||
|
||||
if opts.UseNUMA {
|
||||
params = append(params, "--numa")
|
||||
if gpu.IsNUMA() {
|
||||
numaMode := "distribute"
|
||||
if runtime.GOOS == "linux" {
|
||||
if _, err := exec.LookPath("numactl"); err == nil {
|
||||
numaMode = "numactl"
|
||||
}
|
||||
}
|
||||
params = append(params, "--numa", numaMode)
|
||||
}
|
||||
|
||||
params = append(params, "--parallel", fmt.Sprintf("%d", numParallel))
|
||||
params = append(params, "--parallel", strconv.Itoa(numParallel))
|
||||
|
||||
if estimate.TensorSplit != "" {
|
||||
params = append(params, "--tensor-split", estimate.TensorSplit)
|
||||
@@ -337,6 +344,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
status: NewStatusWriter(os.Stderr),
|
||||
options: opts,
|
||||
estimate: estimate,
|
||||
numParallel: numParallel,
|
||||
sem: semaphore.NewWeighted(int64(numParallel)),
|
||||
totalLayers: ggml.KV().BlockCount() + 1,
|
||||
gpus: gpus,
|
||||
@@ -425,7 +433,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
if strings.Contains(s.status.LastErrMsg, "unknown model") {
|
||||
s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
|
||||
}
|
||||
s.done <- fmt.Errorf(s.status.LastErrMsg)
|
||||
s.done <- errors.New(s.status.LastErrMsg)
|
||||
} else {
|
||||
s.done <- err
|
||||
}
|
||||
@@ -884,11 +892,14 @@ type EmbedResponse struct {
|
||||
}
|
||||
|
||||
func (s *llmServer) Embed(ctx context.Context, input []string) (*EmbedResponse, error) {
|
||||
if err := s.sem.Acquire(ctx, 1); err != nil {
|
||||
// each input will use a slot, so we need to acquire the semaphore for
|
||||
// the number of inputs up to numParallel
|
||||
slots := int64(min(len(input), s.numParallel))
|
||||
if err := s.sem.Acquire(ctx, slots); err != nil {
|
||||
slog.Error("Failed to acquire semaphore", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
defer s.sem.Release(1)
|
||||
defer s.sem.Release(slots)
|
||||
|
||||
// Make sure the server is ready
|
||||
status, err := s.getServerStatusRetry(ctx)
|
||||
|
||||
3
main.go
3
main.go
@@ -3,8 +3,9 @@ package main
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/ollama/ollama/cmd"
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/ollama/ollama/cmd"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
@@ -14,6 +15,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
)
|
||||
@@ -367,24 +369,24 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
||||
for _, c := range content {
|
||||
data, ok := c.(map[string]any)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
return nil, errors.New("invalid message format")
|
||||
}
|
||||
switch data["type"] {
|
||||
case "text":
|
||||
text, ok := data["text"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
return nil, errors.New("invalid message format")
|
||||
}
|
||||
messages = append(messages, api.Message{Role: msg.Role, Content: text})
|
||||
case "image_url":
|
||||
var url string
|
||||
if urlMap, ok := data["image_url"].(map[string]any); ok {
|
||||
if url, ok = urlMap["url"].(string); !ok {
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
return nil, errors.New("invalid message format")
|
||||
}
|
||||
} else {
|
||||
if url, ok = data["image_url"].(string); !ok {
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
return nil, errors.New("invalid message format")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -400,17 +402,17 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
||||
}
|
||||
|
||||
if !valid {
|
||||
return nil, fmt.Errorf("invalid image input")
|
||||
return nil, errors.New("invalid image input")
|
||||
}
|
||||
|
||||
img, err := base64.StdEncoding.DecodeString(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
return nil, errors.New("invalid message format")
|
||||
}
|
||||
|
||||
messages = append(messages, api.Message{Role: msg.Role, Images: []api.ImageData{img}})
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
return nil, errors.New("invalid message format")
|
||||
}
|
||||
}
|
||||
default:
|
||||
@@ -423,7 +425,7 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
||||
toolCalls[i].Function.Name = tc.Function.Name
|
||||
err := json.Unmarshal([]byte(tc.Function.Arguments), &toolCalls[i].Function.Arguments)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid tool call arguments")
|
||||
return nil, errors.New("invalid tool call arguments")
|
||||
}
|
||||
}
|
||||
messages = append(messages, api.Message{Role: msg.Role, ToolCalls: toolCalls})
|
||||
@@ -737,14 +739,12 @@ func (w *RetrieveWriter) Write(data []byte) (int, error) {
|
||||
func (w *EmbedWriter) writeResponse(data []byte) (int, error) {
|
||||
var embedResponse api.EmbedResponse
|
||||
err := json.Unmarshal(data, &embedResponse)
|
||||
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
w.ResponseWriter.Header().Set("Content-Type", "application/json")
|
||||
err = json.NewEncoder(w.ResponseWriter).Encode(toEmbeddingList(w.model, embedResponse))
|
||||
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
@@ -12,13 +12,16 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
const prefix = `data:image/jpeg;base64,`
|
||||
const image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
|
||||
const imageURL = prefix + image
|
||||
const (
|
||||
prefix = `data:image/jpeg;base64,`
|
||||
image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
|
||||
imageURL = prefix + image
|
||||
)
|
||||
|
||||
func prepareRequest(req *http.Request, body any) {
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
|
||||
@@ -82,7 +82,7 @@ TEMPLATE """ {{ if .System }}<|start_header_id|>system<|end_header_id|>
|
||||
}
|
||||
|
||||
func TestParseFileFrom(t *testing.T) {
|
||||
var cases = []struct {
|
||||
cases := []struct {
|
||||
input string
|
||||
expected []Command
|
||||
err error
|
||||
@@ -185,7 +185,7 @@ BADCOMMAND param1 value1
|
||||
}
|
||||
|
||||
func TestParseFileMessages(t *testing.T) {
|
||||
var cases = []struct {
|
||||
cases := []struct {
|
||||
input string
|
||||
expected []Command
|
||||
err error
|
||||
@@ -276,7 +276,7 @@ MESSAGE system`,
|
||||
}
|
||||
|
||||
func TestParseFileQuoted(t *testing.T) {
|
||||
var cases = []struct {
|
||||
cases := []struct {
|
||||
multiline string
|
||||
expected []Command
|
||||
err error
|
||||
@@ -430,7 +430,7 @@ TEMPLATE """
|
||||
}
|
||||
|
||||
func TestParseFileParameters(t *testing.T) {
|
||||
var cases = map[string]struct {
|
||||
cases := map[string]struct {
|
||||
name, value string
|
||||
}{
|
||||
"numa true": {"numa", "true"},
|
||||
@@ -491,7 +491,7 @@ func TestParseFileParameters(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestParseFileComments(t *testing.T) {
|
||||
var cases = []struct {
|
||||
cases := []struct {
|
||||
input string
|
||||
expected []Command
|
||||
}{
|
||||
@@ -516,7 +516,7 @@ FROM foo
|
||||
}
|
||||
|
||||
func TestParseFileFormatParseFile(t *testing.T) {
|
||||
var cases = []string{
|
||||
cases := []string{
|
||||
`
|
||||
FROM foo
|
||||
ADAPTER adapter1
|
||||
|
||||
@@ -6,8 +6,9 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
"golang.org/x/term"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
)
|
||||
|
||||
type Bar struct {
|
||||
|
||||
@@ -13,7 +13,7 @@ type Buffer struct {
|
||||
DisplayPos int
|
||||
Pos int
|
||||
Buf *arraylist.List
|
||||
//LineHasSpace is an arraylist of bools to keep track of whether a line has a space at the end
|
||||
// LineHasSpace is an arraylist of bools to keep track of whether a line has a space at the end
|
||||
LineHasSpace *arraylist.List
|
||||
Prompt *Prompt
|
||||
LineWidth int
|
||||
@@ -56,7 +56,7 @@ func (b *Buffer) GetLineSpacing(line int) bool {
|
||||
|
||||
func (b *Buffer) MoveLeft() {
|
||||
if b.Pos > 0 {
|
||||
//asserts that we retrieve a rune
|
||||
// asserts that we retrieve a rune
|
||||
if e, ok := b.Buf.Get(b.Pos - 1); ok {
|
||||
if r, ok := e.(rune); ok {
|
||||
rLength := runewidth.RuneWidth(r)
|
||||
|
||||
@@ -4,9 +4,7 @@ import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInterrupt = errors.New("Interrupt")
|
||||
)
|
||||
var ErrInterrupt = errors.New("Interrupt")
|
||||
|
||||
type InterruptError struct {
|
||||
Line []rune
|
||||
|
||||
@@ -7,8 +7,10 @@ import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const tcgets = 0x5401
|
||||
const tcsets = 0x5402
|
||||
const (
|
||||
tcgets = 0x5401
|
||||
tcsets = 0x5402
|
||||
)
|
||||
|
||||
func getTermios(fd uintptr) (*Termios, error) {
|
||||
termios := new(Termios)
|
||||
|
||||
@@ -28,8 +28,10 @@ import (
|
||||
|
||||
const maxRetries = 6
|
||||
|
||||
var errMaxRetriesExceeded = errors.New("max retries exceeded")
|
||||
var errPartStalled = errors.New("part stalled")
|
||||
var (
|
||||
errMaxRetriesExceeded = errors.New("max retries exceeded")
|
||||
errPartStalled = errors.New("part stalled")
|
||||
)
|
||||
|
||||
var blobDownloadManager sync.Map
|
||||
|
||||
@@ -214,6 +216,9 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
if err := setSparse(file); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_ = file.Truncate(b.Total)
|
||||
|
||||
|
||||
@@ -828,7 +828,7 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
|
||||
fn(api.ProgressResponse{Status: "retrieving manifest"})
|
||||
|
||||
if mp.ProtocolScheme == "http" && !regOpts.Insecure {
|
||||
return fmt.Errorf("insecure protocol http")
|
||||
return errors.New("insecure protocol http")
|
||||
}
|
||||
|
||||
manifest, _, err := GetManifest(mp)
|
||||
@@ -895,7 +895,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
|
||||
}
|
||||
|
||||
if mp.ProtocolScheme == "http" && !regOpts.Insecure {
|
||||
return fmt.Errorf("insecure protocol http")
|
||||
return errors.New("insecure protocol http")
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: "pulling manifest"})
|
||||
@@ -1010,7 +1010,7 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
|
||||
return fmt.Sprintf("sha256:%x", h.Sum(nil)), n
|
||||
}
|
||||
|
||||
var errUnauthorized = fmt.Errorf("unauthorized: access denied")
|
||||
var errUnauthorized = errors.New("unauthorized: access denied")
|
||||
|
||||
// getTokenSubject returns the subject of a JWT token, it does not validate the token
|
||||
func getTokenSubject(token string) string {
|
||||
|
||||
@@ -2,9 +2,9 @@ package server
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
@@ -19,6 +19,7 @@ type Manifest struct {
|
||||
Config *Layer `json:"config"`
|
||||
Layers []*Layer `json:"layers"`
|
||||
|
||||
name model.Name
|
||||
filepath string
|
||||
fi os.FileInfo
|
||||
digest string
|
||||
@@ -69,7 +70,6 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
|
||||
|
||||
p := filepath.Join(manifests, n.Filepath())
|
||||
|
||||
var m Manifest
|
||||
f, err := os.Open(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -81,14 +81,16 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var m Manifest
|
||||
sha256sum := sha256.New()
|
||||
if err := json.NewDecoder(io.TeeReader(f, sha256sum)).Decode(&m); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m.name = n
|
||||
m.filepath = p
|
||||
m.fi = fi
|
||||
m.digest = fmt.Sprintf("%x", sha256sum.Sum(nil))
|
||||
m.digest = hex.EncodeToString(sha256sum.Sum(nil))
|
||||
|
||||
return &m, nil
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ func createManifest(t *testing.T, path, name string) {
|
||||
t.Helper()
|
||||
|
||||
p := filepath.Join(path, "manifests", name)
|
||||
if err := os.MkdirAll(filepath.Dir(p), 0755); err != nil {
|
||||
if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/template"
|
||||
)
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/template"
|
||||
)
|
||||
|
||||
164
server/routes.go
164
server/routes.go
@@ -55,8 +55,10 @@ func init() {
|
||||
gin.SetMode(mode)
|
||||
}
|
||||
|
||||
var errRequired = errors.New("is required")
|
||||
var errBadTemplate = errors.New("template error")
|
||||
var (
|
||||
errRequired = errors.New("is required")
|
||||
errBadTemplate = errors.New("template error")
|
||||
)
|
||||
|
||||
func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
|
||||
opts := api.DefaultOptions()
|
||||
@@ -369,7 +371,6 @@ func (s *Server) EmbedHandler(c *gin.Context) {
|
||||
input[i] = s
|
||||
}
|
||||
embeddings, err := r.Embed(c.Request.Context(), input)
|
||||
|
||||
if err != nil {
|
||||
slog.Error("embedding generation failed", "error", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
|
||||
@@ -430,7 +431,6 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
|
||||
}
|
||||
|
||||
embeddings, err := r.Embed(c.Request.Context(), []string{req.Prompt})
|
||||
|
||||
if err != nil {
|
||||
slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
|
||||
@@ -556,7 +556,7 @@ func checkNameExists(name model.Name) error {
|
||||
|
||||
for n := range names {
|
||||
if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name {
|
||||
return fmt.Errorf("a model with that name already exists")
|
||||
return errors.New("a model with that name already exists")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -703,6 +703,153 @@ func (s *Server) ShowModelHandler(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
func manifestLayers(m *Manifest, exclude []string) (map[string]any, error) {
|
||||
r := map[string]any{
|
||||
"name": m.name.DisplayShortest(),
|
||||
"digest": m.digest,
|
||||
"size": m.Size(),
|
||||
"modified_at": m.fi.ModTime(),
|
||||
}
|
||||
|
||||
excludeAll := slices.Contains(exclude, "all")
|
||||
excludeDetails := slices.Contains(exclude, "details")
|
||||
|
||||
for _, layer := range m.Layers {
|
||||
var errExcludeKey = errors.New("exclude key")
|
||||
key, content, err := func() (string, any, error) {
|
||||
key := strings.TrimPrefix(layer.MediaType, "application/vnd.ollama.image.")
|
||||
if slices.Contains(exclude, key) || excludeAll {
|
||||
return "", nil, errExcludeKey
|
||||
}
|
||||
|
||||
f, err := layer.Open()
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
switch key {
|
||||
case "model", "projector", "adapter":
|
||||
ggml, _, err := llm.DecodeGGML(f, 0)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
content := map[string]any{
|
||||
"architecture": ggml.KV().Architecture(),
|
||||
"file_type": ggml.KV().FileType().String(),
|
||||
"parameter_count": ggml.KV().ParameterCount(),
|
||||
}
|
||||
|
||||
if !slices.Contains(exclude, key+".details") && !excludeAll && !excludeDetails {
|
||||
// exclude any extraneous or redundant fields
|
||||
delete(ggml.KV(), "general.basename")
|
||||
delete(ggml.KV(), "general.description")
|
||||
delete(ggml.KV(), "general.filename")
|
||||
delete(ggml.KV(), "general.finetune")
|
||||
delete(ggml.KV(), "general.languages")
|
||||
delete(ggml.KV(), "general.license")
|
||||
delete(ggml.KV(), "general.license.link")
|
||||
delete(ggml.KV(), "general.name")
|
||||
delete(ggml.KV(), "general.paramter_count")
|
||||
delete(ggml.KV(), "general.size_label")
|
||||
delete(ggml.KV(), "general.tags")
|
||||
delete(ggml.KV(), "general.type")
|
||||
delete(ggml.KV(), "general.quantization_version")
|
||||
delete(ggml.KV(), "tokenizer.chat_template")
|
||||
content["details"] = ggml.KV()
|
||||
}
|
||||
|
||||
return key, content, nil
|
||||
case "params", "messages":
|
||||
var content any
|
||||
if err := json.NewDecoder(f).Decode(&content); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
return key, content, nil
|
||||
case "template", "system", "license":
|
||||
bts, err := io.ReadAll(f)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
if key == "license" {
|
||||
return key, []any{string(bts)}, nil
|
||||
}
|
||||
|
||||
return key, string(bts), nil
|
||||
}
|
||||
|
||||
return layer.MediaType, nil, nil
|
||||
}()
|
||||
if errors.Is(err, errExcludeKey) {
|
||||
continue
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if s, ok := r[key].([]any); ok {
|
||||
r[key] = append(s, content)
|
||||
} else {
|
||||
r[key] = content
|
||||
}
|
||||
}
|
||||
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func (s *Server) GetModelsHandler(c *gin.Context) {
|
||||
ms, err := Manifests()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
var rs []map[string]any
|
||||
for _, m := range ms {
|
||||
r, err := manifestLayers(m, c.QueryArray("exclude"))
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
rs = append(rs, r)
|
||||
}
|
||||
|
||||
slices.SortStableFunc(rs, func(i, j map[string]any) int {
|
||||
// most recently modified first
|
||||
return cmp.Compare(
|
||||
j["modified_at"].(time.Time).Unix(),
|
||||
i["modified_at"].(time.Time).Unix(),
|
||||
)
|
||||
})
|
||||
|
||||
c.JSON(http.StatusOK, rs)
|
||||
}
|
||||
|
||||
func (s *Server) GetModelHandler(c *gin.Context) {
|
||||
n := model.ParseName(strings.TrimPrefix(c.Param("model"), "/"))
|
||||
if !n.IsValid() {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
|
||||
return
|
||||
}
|
||||
|
||||
m, err := ParseNamedManifest(n)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
r, err := manifestLayers(m, c.QueryArray("exclude"))
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, r)
|
||||
}
|
||||
|
||||
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
||||
m, err := GetModel(req.Model)
|
||||
if err != nil {
|
||||
@@ -729,7 +876,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
||||
|
||||
n := model.ParseName(req.Model)
|
||||
if !n.IsValid() {
|
||||
return nil, fmt.Errorf("invalid model name")
|
||||
return nil, errors.New("invalid model name")
|
||||
}
|
||||
|
||||
manifest, err := ParseNamedManifest(n)
|
||||
@@ -993,7 +1140,7 @@ func allowedHost(host string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
var tlds = []string{
|
||||
tlds := []string{
|
||||
"localhost",
|
||||
"local",
|
||||
"internal",
|
||||
@@ -1090,6 +1237,9 @@ func (s *Server) GenerateRoutes() http.Handler {
|
||||
c.String(http.StatusOK, "Ollama is running")
|
||||
})
|
||||
|
||||
r.Handle(method, "/api/models", s.GetModelsHandler)
|
||||
r.Handle(method, "/api/models/*model", s.GetModelHandler)
|
||||
|
||||
r.Handle(method, "/api/tags", s.ListModelsHandler)
|
||||
r.Handle(method, "/api/version", func(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gin.H{"version": version.Version})
|
||||
|
||||
@@ -2,6 +2,7 @@ package server
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"cmp"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -13,6 +14,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
@@ -52,6 +54,8 @@ func (t *responseRecorder) CloseNotify() <-chan bool {
|
||||
|
||||
func createRequest(t *testing.T, fn func(*gin.Context), body any) *httptest.ResponseRecorder {
|
||||
t.Helper()
|
||||
// if OLLAMA_MODELS is not set, set it to the temp directory
|
||||
t.Setenv("OLLAMA_MODELS", cmp.Or(os.Getenv("OLLAMA_MODELS"), t.TempDir()))
|
||||
|
||||
w := NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -489,7 +493,7 @@ func TestCreateTemplateSystem(t *testing.T) {
|
||||
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status code 400, actual %d", w.Code)
|
||||
}
|
||||
@@ -501,7 +505,7 @@ func TestCreateTemplateSystem(t *testing.T) {
|
||||
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ if .Prompt }}", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status code 400, actual %d", w.Code)
|
||||
}
|
||||
@@ -513,7 +517,7 @@ func TestCreateTemplateSystem(t *testing.T) {
|
||||
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ Prompt }}", createBinFile(t, nil, nil)),
|
||||
Stream: &stream,
|
||||
})
|
||||
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status code 400, actual %d", w.Code)
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
)
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
|
||||
@@ -333,7 +333,6 @@ func Test_Routes(t *testing.T) {
|
||||
t.Fatalf("expected content type application/json; charset=utf-8, got %s", contentType)
|
||||
}
|
||||
_, err := io.ReadAll(resp.Body)
|
||||
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -58,7 +58,7 @@ var defaultModelsPerGPU = 3
|
||||
// we'll back off down to 1 to try to get it to fit
|
||||
var defaultParallel = 4
|
||||
|
||||
var ErrMaxQueue = fmt.Errorf("server busy, please try again. maximum pending requests exceeded")
|
||||
var ErrMaxQueue = errors.New("server busy, please try again. maximum pending requests exceeded")
|
||||
|
||||
func InitScheduler(ctx context.Context) *Scheduler {
|
||||
maxQueue := envconfig.MaxQueue()
|
||||
|
||||
@@ -3,23 +3,25 @@ package server
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/app/lifecycle"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func init() {
|
||||
func TestMain(m *testing.M) {
|
||||
os.Setenv("OLLAMA_DEBUG", "1")
|
||||
lifecycle.InitLogging()
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func TestInitScheduler(t *testing.T) {
|
||||
@@ -46,7 +48,7 @@ func TestLoad(t *testing.T) {
|
||||
}
|
||||
// Fail to load model first
|
||||
s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
|
||||
return nil, fmt.Errorf("something failed to load model blah")
|
||||
return nil, errors.New("something failed to load model blah")
|
||||
}
|
||||
gpus := gpu.GpuInfoList{}
|
||||
s.load(req, ggml, gpus, 0)
|
||||
@@ -75,7 +77,7 @@ func TestLoad(t *testing.T) {
|
||||
}
|
||||
|
||||
req.model.ModelPath = "dummy_model_path"
|
||||
server.waitResp = fmt.Errorf("wait failure")
|
||||
server.waitResp = errors.New("wait failure")
|
||||
s.load(req, ggml, gpus, 0)
|
||||
select {
|
||||
case err := <-req.errCh:
|
||||
@@ -600,7 +602,7 @@ func TestNeedsReload(t *testing.T) {
|
||||
resp = runner.needsReload(ctx, req)
|
||||
require.True(t, resp)
|
||||
req.opts.NumBatch = runner.Options.NumBatch
|
||||
llm.pingResp = fmt.Errorf("foo")
|
||||
llm.pingResp = errors.New("foo")
|
||||
resp = runner.needsReload(ctx, req)
|
||||
require.True(t, resp)
|
||||
llm.pingResp = nil
|
||||
@@ -724,15 +726,19 @@ func (s *mockLlm) WaitUntilRunning(ctx context.Context) error { return s.waitRes
|
||||
func (s *mockLlm) Completion(ctx context.Context, req llm.CompletionRequest, fn func(llm.CompletionResponse)) error {
|
||||
return s.completionResp
|
||||
}
|
||||
|
||||
func (s *mockLlm) Embed(ctx context.Context, input []string) (*llm.EmbedResponse, error) {
|
||||
return s.embedResp, s.embedRespErr
|
||||
}
|
||||
|
||||
func (s *mockLlm) Tokenize(ctx context.Context, content string) ([]int, error) {
|
||||
return s.tokenizeResp, s.tokenizeRespErr
|
||||
}
|
||||
|
||||
func (s *mockLlm) Detokenize(ctx context.Context, tokens []int) (string, error) {
|
||||
return s.detokenizeResp, s.detonekizeRespErr
|
||||
}
|
||||
|
||||
func (s *mockLlm) Close() error {
|
||||
s.closeCalled = true
|
||||
return s.closeResp
|
||||
|
||||
9
server/sparse_common.go
Normal file
9
server/sparse_common.go
Normal file
@@ -0,0 +1,9 @@
|
||||
//go:build !windows
|
||||
|
||||
package server
|
||||
|
||||
import "os"
|
||||
|
||||
func setSparse(file *os.File) error {
|
||||
return nil
|
||||
}
|
||||
16
server/sparse_windows.go
Normal file
16
server/sparse_windows.go
Normal file
@@ -0,0 +1,16 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
func setSparse(file *os.File) error {
|
||||
return windows.DeviceIoControl(
|
||||
windows.Handle(file.Fd()), windows.FSCTL_SET_SPARSE,
|
||||
nil, 0,
|
||||
nil, 0,
|
||||
nil, nil,
|
||||
)
|
||||
}
|
||||
@@ -12,13 +12,15 @@ import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strconv"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
var blobUploadManager sync.Map
|
||||
@@ -212,7 +214,7 @@ func (b *blobUpload) Run(ctx context.Context, opts *registryOptions) {
|
||||
func (b *blobUpload) uploadPart(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *registryOptions) error {
|
||||
headers := make(http.Header)
|
||||
headers.Set("Content-Type", "application/octet-stream")
|
||||
headers.Set("Content-Length", fmt.Sprintf("%d", part.Size))
|
||||
headers.Set("Content-Length", strconv.FormatInt(part.Size, 10))
|
||||
|
||||
if method == http.MethodPatch {
|
||||
headers.Set("X-Redirect-Uploads", "1")
|
||||
|
||||
@@ -15,8 +15,9 @@ import (
|
||||
"text/template/parse"
|
||||
|
||||
"github.com/agnivade/levenshtein"
|
||||
"github.com/ollama/ollama/api"
|
||||
"golang.org/x/exp/maps"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
//go:embed index.json
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
@@ -6,8 +6,10 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
const UnknownOllamaKeyErrMsg = "unknown ollama key"
|
||||
const InvalidModelNameErrMsg = "invalid model name"
|
||||
const (
|
||||
UnknownOllamaKeyErrMsg = "unknown ollama key"
|
||||
InvalidModelNameErrMsg = "invalid model name"
|
||||
)
|
||||
|
||||
// TODO: This should have a structured response from the API
|
||||
type UnknownOllamaKey struct {
|
||||
|
||||
@@ -258,7 +258,7 @@ func (n Name) IsValid() bool {
|
||||
// IsFullyQualified returns true if all parts of the name are present and
|
||||
// valid without the digest.
|
||||
func (n Name) IsFullyQualified() bool {
|
||||
var parts = []string{
|
||||
parts := []string{
|
||||
n.Host,
|
||||
n.Namespace,
|
||||
n.Model,
|
||||
|
||||
Reference in New Issue
Block a user