Compare commits
22 Commits
v0.3.3
...
mxyng/api-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2fe945412a | ||
|
|
de4fc29773 | ||
|
|
e04c7012c2 | ||
|
|
d4a7216c82 | ||
|
|
a4fdd03c3b | ||
|
|
fc85f50a2b | ||
|
|
86b907f82a | ||
|
|
10d49bce70 | ||
|
|
7ed367419e | ||
|
|
50ee8b5f56 | ||
|
|
03bdac0595 | ||
|
|
f457d63400 | ||
|
|
39f2bc6bfc | ||
|
|
b73b0940ef | ||
|
|
6a07344786 | ||
|
|
8b920f35a4 | ||
|
|
4221e39867 | ||
|
|
a091fadfda | ||
|
|
77ccbf04dc | ||
|
|
4addf6b587 | ||
|
|
85c7f11170 | ||
|
|
b732beba6a |
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1 +1,2 @@
|
|||||||
llm/ext_server/* linguist-vendored
|
llm/ext_server/* linguist-vendored
|
||||||
|
* text eol=lf
|
||||||
|
|||||||
2
.github/workflows/test.yaml
vendored
2
.github/workflows/test.yaml
vendored
@@ -273,7 +273,7 @@ jobs:
|
|||||||
if: ${{ startsWith(matrix.os, 'macos-') }}
|
if: ${{ startsWith(matrix.os, 'macos-') }}
|
||||||
- uses: golangci/golangci-lint-action@v6
|
- uses: golangci/golangci-lint-action@v6
|
||||||
with:
|
with:
|
||||||
args: --timeout 8m0s -v ${{ startsWith(matrix.os, 'windows-') && '' || '--disable gofmt --disable goimports' }}
|
args: --timeout 8m0s -v
|
||||||
test:
|
test:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
|
|||||||
@@ -7,22 +7,32 @@ linters:
|
|||||||
- bodyclose
|
- bodyclose
|
||||||
- containedctx
|
- containedctx
|
||||||
- contextcheck
|
- contextcheck
|
||||||
|
- errcheck
|
||||||
- exportloopref
|
- exportloopref
|
||||||
|
- gci
|
||||||
- gocheckcompilerdirectives
|
- gocheckcompilerdirectives
|
||||||
# conditionally enable this on linux/macos
|
- gofmt
|
||||||
# - gofmt
|
- gofumpt
|
||||||
# - goimports
|
- gosimple
|
||||||
|
- govet
|
||||||
|
- ineffassign
|
||||||
- intrange
|
- intrange
|
||||||
|
- makezero
|
||||||
- misspell
|
- misspell
|
||||||
- nilerr
|
- nilerr
|
||||||
- nolintlint
|
- nolintlint
|
||||||
- nosprintfhostport
|
- nosprintfhostport
|
||||||
|
- staticcheck
|
||||||
|
- tenv
|
||||||
- testifylint
|
- testifylint
|
||||||
- unconvert
|
- unconvert
|
||||||
- unused
|
- unused
|
||||||
|
- usestdlibvars
|
||||||
- wastedassign
|
- wastedassign
|
||||||
- whitespace
|
- whitespace
|
||||||
- usestdlibvars
|
linters-settings:
|
||||||
|
gci:
|
||||||
|
sections: [standard, default, localmodule]
|
||||||
severity:
|
severity:
|
||||||
default-severity: error
|
default-severity: error
|
||||||
rules:
|
rules:
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ Here are some example models that can be downloaded:
|
|||||||
| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` |
|
| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` |
|
||||||
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
|
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
|
||||||
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
|
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
|
||||||
|
| Gemma 2 | 2B | 1.6GB | `ollama run gemma2:2b` |
|
||||||
| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
|
| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
|
||||||
| Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
|
| Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
|
||||||
| Mistral | 7B | 4.1GB | `ollama run mistral` |
|
| Mistral | 7B | 4.1GB | `ollama run mistral` |
|
||||||
@@ -300,6 +301,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
|
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
|
||||||
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
|
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
|
||||||
- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
|
- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
|
||||||
|
- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
|
||||||
|
|
||||||
### Terminal
|
### Terminal
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -172,7 +173,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
|
|||||||
}
|
}
|
||||||
|
|
||||||
if errorResponse.Error != "" {
|
if errorResponse.Error != "" {
|
||||||
return fmt.Errorf(errorResponse.Error)
|
return errors.New(errorResponse.Error)
|
||||||
}
|
}
|
||||||
|
|
||||||
if response.StatusCode >= http.StatusBadRequest {
|
if response.StatusCode >= http.StatusBadRequest {
|
||||||
|
|||||||
@@ -231,7 +231,6 @@ type Options struct {
|
|||||||
|
|
||||||
// Runner options which must be set when the model is loaded into memory
|
// Runner options which must be set when the model is loaded into memory
|
||||||
type Runner struct {
|
type Runner struct {
|
||||||
UseNUMA bool `json:"numa,omitempty"`
|
|
||||||
NumCtx int `json:"num_ctx,omitempty"`
|
NumCtx int `json:"num_ctx,omitempty"`
|
||||||
NumBatch int `json:"num_batch,omitempty"`
|
NumBatch int `json:"num_batch,omitempty"`
|
||||||
NumGPU int `json:"num_gpu,omitempty"`
|
NumGPU int `json:"num_gpu,omitempty"`
|
||||||
@@ -505,7 +504,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
|
|||||||
for key, val := range m {
|
for key, val := range m {
|
||||||
opt, ok := jsonOpts[key]
|
opt, ok := jsonOpts[key]
|
||||||
if !ok {
|
if !ok {
|
||||||
slog.Warn("invalid option provided", "option", opt.Name)
|
slog.Warn("invalid option provided", "option", key)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -615,7 +614,6 @@ func DefaultOptions() Options {
|
|||||||
F16KV: true,
|
F16KV: true,
|
||||||
UseMLock: false,
|
UseMLock: false,
|
||||||
UseMMap: nil,
|
UseMMap: nil,
|
||||||
UseNUMA: false,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package api
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"errors"
|
||||||
"math"
|
"math"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
@@ -192,7 +192,7 @@ func TestUseMmapFormatParams(t *testing.T) {
|
|||||||
"use_mmap": {"foo"},
|
"use_mmap": {"foo"},
|
||||||
},
|
},
|
||||||
exp: nil,
|
exp: nil,
|
||||||
err: fmt.Errorf("invalid bool value [foo]"),
|
err: errors.New("invalid bool value [foo]"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,8 +2,8 @@
|
|||||||
|
|
||||||
package lifecycle
|
package lifecycle
|
||||||
|
|
||||||
import "fmt"
|
import "errors"
|
||||||
|
|
||||||
func GetStarted() error {
|
func GetStarted() error {
|
||||||
return fmt.Errorf("GetStarted not implemented")
|
return errors.New("not implemented")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ func GetStarted() error {
|
|||||||
Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
|
Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
|
||||||
}
|
}
|
||||||
proc, err := os.StartProcess(args[0], args, attrs)
|
proc, err := os.StartProcess(args[0], args, attrs)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to start getting started shell %w", err)
|
return fmt.Errorf("unable to start getting started shell %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ func InitLogging() {
|
|||||||
// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
|
// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
|
||||||
} else {
|
} else {
|
||||||
rotateLogs(AppLogFile)
|
rotateLogs(AppLogFile)
|
||||||
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Error(fmt.Sprintf("failed to create server log %v", err))
|
slog.Error(fmt.Sprintf("failed to create server log %v", err))
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -5,5 +5,5 @@ package lifecycle
|
|||||||
import "log/slog"
|
import "log/slog"
|
||||||
|
|
||||||
func ShowLogs() {
|
func ShowLogs() {
|
||||||
slog.Warn("ShowLogs not yet implemented")
|
slog.Warn("not implemented")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ func TestRotateLogs(t *testing.T) {
|
|||||||
// No log exists
|
// No log exists
|
||||||
rotateLogs(logFile)
|
rotateLogs(logFile)
|
||||||
|
|
||||||
require.NoError(t, os.WriteFile(logFile, []byte("1"), 0644))
|
require.NoError(t, os.WriteFile(logFile, []byte("1"), 0o644))
|
||||||
assert.FileExists(t, logFile)
|
assert.FileExists(t, logFile)
|
||||||
// First rotation
|
// First rotation
|
||||||
rotateLogs(logFile)
|
rotateLogs(logFile)
|
||||||
@@ -32,7 +32,7 @@ func TestRotateLogs(t *testing.T) {
|
|||||||
assert.NoFileExists(t, logFile)
|
assert.NoFileExists(t, logFile)
|
||||||
|
|
||||||
for i := 2; i <= LogRotationCount+1; i++ {
|
for i := 2; i <= LogRotationCount+1; i++ {
|
||||||
require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0644))
|
require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0o644))
|
||||||
assert.FileExists(t, logFile)
|
assert.FileExists(t, logFile)
|
||||||
rotateLogs(logFile)
|
rotateLogs(logFile)
|
||||||
assert.NoFileExists(t, logFile)
|
assert.NoFileExists(t, logFile)
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
rotateLogs(ServerLogFile)
|
rotateLogs(ServerLogFile)
|
||||||
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create server log: %w", err)
|
return nil, fmt.Errorf("failed to create server log: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import (
|
|||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -46,7 +47,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
|
|||||||
query.Add("os", runtime.GOOS)
|
query.Add("os", runtime.GOOS)
|
||||||
query.Add("arch", runtime.GOARCH)
|
query.Add("arch", runtime.GOARCH)
|
||||||
query.Add("version", version.Version)
|
query.Add("version", version.Version)
|
||||||
query.Add("ts", fmt.Sprintf("%d", time.Now().Unix()))
|
query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))
|
||||||
|
|
||||||
nonce, err := auth.NewNonce(rand.Reader, 16)
|
nonce, err := auth.NewNonce(rand.Reader, 16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -4,9 +4,9 @@ package lifecycle
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"errors"
|
||||||
)
|
)
|
||||||
|
|
||||||
func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
||||||
return fmt.Errorf("DoUpgrade not yet implemented")
|
return errors.New("not implemented")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package lifecycle
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
@@ -15,7 +16,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
|||||||
return fmt.Errorf("failed to lookup downloads: %s", err)
|
return fmt.Errorf("failed to lookup downloads: %s", err)
|
||||||
}
|
}
|
||||||
if len(files) == 0 {
|
if len(files) == 0 {
|
||||||
return fmt.Errorf("no update downloads found")
|
return errors.New("no update downloads found")
|
||||||
} else if len(files) > 1 {
|
} else if len(files) > 1 {
|
||||||
// Shouldn't happen
|
// Shouldn't happen
|
||||||
slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
|
slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
|
||||||
@@ -64,7 +65,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// TODO - some details about why it didn't start, or is this a pedantic error case?
|
// TODO - some details about why it didn't start, or is this a pedantic error case?
|
||||||
return fmt.Errorf("installer process did not start")
|
return errors.New("installer process did not start")
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
|
// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
|
||||||
|
|||||||
@@ -3,11 +3,11 @@
|
|||||||
package tray
|
package tray
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"errors"
|
||||||
|
|
||||||
"github.com/ollama/ollama/app/tray/commontray"
|
"github.com/ollama/ollama/app/tray/commontray"
|
||||||
)
|
)
|
||||||
|
|
||||||
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
|
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
|
||||||
return nil, fmt.Errorf("NOT IMPLEMENTED YET")
|
return nil, errors.New("not implemented")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,9 +11,7 @@ import (
|
|||||||
"golang.org/x/sys/windows"
|
"golang.org/x/sys/windows"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var quitOnce sync.Once
|
||||||
quitOnce sync.Once
|
|
||||||
)
|
|
||||||
|
|
||||||
func (t *winTray) Run() {
|
func (t *winTray) Run() {
|
||||||
nativeLoop()
|
nativeLoop()
|
||||||
|
|||||||
@@ -13,8 +13,9 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/ollama/ollama/app/tray/commontray"
|
|
||||||
"golang.org/x/sys/windows"
|
"golang.org/x/sys/windows"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/app/tray/commontray"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
|
// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
|
||||||
@@ -414,7 +415,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
|
|||||||
iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
|
iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
|
||||||
|
|
||||||
if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
|
if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
|
||||||
if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil {
|
if err := os.WriteFile(iconFilePath, iconBytes, 0o644); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
@@ -78,7 +79,7 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
|
|||||||
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
|
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
|
||||||
parts := bytes.Split(publicKey, []byte(" "))
|
parts := bytes.Split(publicKey, []byte(" "))
|
||||||
if len(parts) < 2 {
|
if len(parts) < 2 {
|
||||||
return "", fmt.Errorf("malformed public key")
|
return "", errors.New("malformed public key")
|
||||||
}
|
}
|
||||||
|
|
||||||
signedData, err := privateKey.Sign(rand.Reader, bts)
|
signedData, err := privateKey.Sign(rand.Reader, bts)
|
||||||
|
|||||||
@@ -1160,7 +1160,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := startApp(cmd.Context(), client); err != nil {
|
if err := startApp(cmd.Context(), client); err != nil {
|
||||||
return fmt.Errorf("could not connect to ollama app, is it running?")
|
return errors.New("could not connect to ollama app, is it running?")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -604,7 +604,7 @@ func getImageData(filePath string) ([]byte, error) {
|
|||||||
// Check if the file size exceeds 100MB
|
// Check if the file size exceeds 100MB
|
||||||
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
||||||
if info.Size() > maxSize {
|
if info.Size() > maxSize {
|
||||||
return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
|
return nil, errors.New("file size exceeds maximum limit (100MB)")
|
||||||
}
|
}
|
||||||
|
|
||||||
buf = make([]byte, info.Size())
|
buf = make([]byte, info.Size())
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package cmd
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"errors"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -20,7 +20,7 @@ func startApp(ctx context.Context, client *api.Client) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if !strings.Contains(link, "Ollama.app") {
|
if !strings.Contains(link, "Ollama.app") {
|
||||||
return fmt.Errorf("could not find ollama app")
|
return errors.New("could not find ollama app")
|
||||||
}
|
}
|
||||||
path := strings.Split(link, "Ollama.app")
|
path := strings.Split(link, "Ollama.app")
|
||||||
if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
|
if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
|
||||||
|
|||||||
@@ -4,11 +4,11 @@ package cmd
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"errors"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
)
|
)
|
||||||
|
|
||||||
func startApp(ctx context.Context, client *api.Client) error {
|
func startApp(ctx context.Context, client *api.Client) error {
|
||||||
return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it")
|
return errors.New("could not connect to ollama server, run 'ollama serve' to start it")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ func startApp(ctx context.Context, client *api.Client) error {
|
|||||||
// Finally look in the path
|
// Finally look in the path
|
||||||
appExe, err = exec.LookPath(AppName)
|
appExe, err = exec.LookPath(AppName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not locate ollama app")
|
return errors.New("could not locate ollama app")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,9 +5,10 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/llm"
|
|
||||||
"github.com/pdevine/tensor"
|
"github.com/pdevine/tensor"
|
||||||
"github.com/pdevine/tensor/native"
|
"github.com/pdevine/tensor/native"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type llama struct {
|
type llama struct {
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package convert
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -14,8 +15,9 @@ import (
|
|||||||
"slices"
|
"slices"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/ollama/ollama/llm"
|
|
||||||
"golang.org/x/exp/maps"
|
"golang.org/x/exp/maps"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
|
func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
|
||||||
@@ -99,7 +101,7 @@ func TestConvertFull(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
actual[tensor.Name] = fmt.Sprintf("%x", sha256sum.Sum(nil))
|
actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
|
||||||
}
|
}
|
||||||
|
|
||||||
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
|
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
|
||||||
|
|||||||
@@ -10,8 +10,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type ZipReader struct {
|
type ZipReader struct {
|
||||||
r *zip.Reader
|
r *zip.Reader
|
||||||
p string
|
p string
|
||||||
|
|
||||||
// limit is the maximum size of a file that can be read directly
|
// limit is the maximum size of a file that can be read directly
|
||||||
// from the zip archive. Files larger than this size will be extracted
|
// from the zip archive. Files larger than this size will be extracted
|
||||||
|
|||||||
@@ -111,8 +111,9 @@ func (st safetensor) WriteTo(w io.Writer) (int64, error) {
|
|||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, b := range u16s {
|
f32s = make([]float32, len(u16s))
|
||||||
f32s = append(f32s, float16.Frombits(b).Float32())
|
for i := range u16s {
|
||||||
|
f32s[i] = float16.Frombits(u16s[i]).Float32()
|
||||||
}
|
}
|
||||||
|
|
||||||
case "BF16":
|
case "BF16":
|
||||||
|
|||||||
125
docs/openai.md
125
docs/openai.md
@@ -28,13 +28,35 @@ chat_completion = client.chat.completions.create(
|
|||||||
model='llama3',
|
model='llama3',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="llava",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "What's in this image?"},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
max_tokens=300,
|
||||||
|
)
|
||||||
|
|
||||||
|
completion = client.completions.create(
|
||||||
|
model="llama3",
|
||||||
|
prompt="Say this is a test",
|
||||||
|
)
|
||||||
|
|
||||||
list_completion = client.models.list()
|
list_completion = client.models.list()
|
||||||
|
|
||||||
model = client.models.retrieve("llama3")
|
model = client.models.retrieve("llama3")
|
||||||
|
|
||||||
embeddings = client.embeddings.create(
|
embeddings = client.embeddings.create(
|
||||||
model="all-minilm",
|
model="all-minilm",
|
||||||
input=["why is the sky blue?", "why is the grass green?"]
|
input=["why is the sky blue?", "why is the grass green?"],
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -51,23 +73,44 @@ const openai = new OpenAI({
|
|||||||
})
|
})
|
||||||
|
|
||||||
const chatCompletion = await openai.chat.completions.create({
|
const chatCompletion = await openai.chat.completions.create({
|
||||||
messages: [{ role: 'user', content: 'Say this is a test' }],
|
messages: [{ role: 'user', content: 'Say this is a test' }],
|
||||||
model: 'llama3',
|
model: 'llama3',
|
||||||
|
})
|
||||||
|
|
||||||
|
const response = await openai.chat.completions.create({
|
||||||
|
model: "llava",
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: [
|
||||||
|
{ type: "text", text: "What's in this image?" },
|
||||||
|
{
|
||||||
|
type: "image_url",
|
||||||
|
image_url: "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
|
||||||
|
const completion = await openai.completions.create({
|
||||||
|
model: "llama3",
|
||||||
|
prompt: "Say this is a test.",
|
||||||
})
|
})
|
||||||
|
|
||||||
const listCompletion = await openai.models.list()
|
const listCompletion = await openai.models.list()
|
||||||
|
|
||||||
const model = await openai.models.retrieve("llama3");
|
const model = await openai.models.retrieve("llama3")
|
||||||
|
|
||||||
const embedding = await openai.embeddings.create({
|
const embedding = await openai.embeddings.create({
|
||||||
model: "all-minilm",
|
model: "all-minilm",
|
||||||
input: ["why is the sky blue?", "why is the grass green?"],
|
input: ["why is the sky blue?", "why is the grass green?"],
|
||||||
});
|
})
|
||||||
```
|
```
|
||||||
|
|
||||||
### `curl`
|
### `curl`
|
||||||
|
|
||||||
```
|
``` shell
|
||||||
curl http://localhost:11434/v1/chat/completions \
|
curl http://localhost:11434/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
@@ -84,6 +127,37 @@ curl http://localhost:11434/v1/chat/completions \
|
|||||||
]
|
]
|
||||||
}'
|
}'
|
||||||
|
|
||||||
|
curl http://localhost:11434/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "llava",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What'\''s in this image?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 300
|
||||||
|
}'
|
||||||
|
|
||||||
|
curl http://localhost:11434/v1/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "llama3",
|
||||||
|
"prompt": "Say this is a test"
|
||||||
|
}'
|
||||||
|
|
||||||
curl http://localhost:11434/v1/models
|
curl http://localhost:11434/v1/models
|
||||||
|
|
||||||
curl http://localhost:11434/v1/models/llama3
|
curl http://localhost:11434/v1/models/llama3
|
||||||
@@ -106,6 +180,7 @@ curl http://localhost:11434/v1/embeddings \
|
|||||||
- [x] Streaming
|
- [x] Streaming
|
||||||
- [x] JSON mode
|
- [x] JSON mode
|
||||||
- [x] Reproducible outputs
|
- [x] Reproducible outputs
|
||||||
|
- [x] Vision
|
||||||
- [x] Tools (streaming support coming soon)
|
- [x] Tools (streaming support coming soon)
|
||||||
- [ ] Vision
|
- [ ] Vision
|
||||||
- [ ] Logprobs
|
- [ ] Logprobs
|
||||||
@@ -115,7 +190,10 @@ curl http://localhost:11434/v1/embeddings \
|
|||||||
- [x] `model`
|
- [x] `model`
|
||||||
- [x] `messages`
|
- [x] `messages`
|
||||||
- [x] Text `content`
|
- [x] Text `content`
|
||||||
- [ ] Array of `content` parts
|
- [x] Image `content`
|
||||||
|
- [x] Base64 encoded image
|
||||||
|
- [ ] Image URL
|
||||||
|
- [x] Array of `content` parts
|
||||||
- [x] `frequency_penalty`
|
- [x] `frequency_penalty`
|
||||||
- [x] `presence_penalty`
|
- [x] `presence_penalty`
|
||||||
- [x] `response_format`
|
- [x] `response_format`
|
||||||
@@ -131,6 +209,39 @@ curl http://localhost:11434/v1/embeddings \
|
|||||||
- [ ] `user`
|
- [ ] `user`
|
||||||
- [ ] `n`
|
- [ ] `n`
|
||||||
|
|
||||||
|
### `/v1/completions`
|
||||||
|
|
||||||
|
#### Supported features
|
||||||
|
|
||||||
|
- [x] Completions
|
||||||
|
- [x] Streaming
|
||||||
|
- [x] JSON mode
|
||||||
|
- [x] Reproducible outputs
|
||||||
|
- [ ] Logprobs
|
||||||
|
|
||||||
|
#### Supported request fields
|
||||||
|
|
||||||
|
- [x] `model`
|
||||||
|
- [x] `prompt`
|
||||||
|
- [x] `frequency_penalty`
|
||||||
|
- [x] `presence_penalty`
|
||||||
|
- [x] `seed`
|
||||||
|
- [x] `stop`
|
||||||
|
- [x] `stream`
|
||||||
|
- [x] `temperature`
|
||||||
|
- [x] `top_p`
|
||||||
|
- [x] `max_tokens`
|
||||||
|
- [x] `suffix`
|
||||||
|
- [ ] `best_of`
|
||||||
|
- [ ] `echo`
|
||||||
|
- [ ] `logit_bias`
|
||||||
|
- [ ] `user`
|
||||||
|
- [ ] `n`
|
||||||
|
|
||||||
|
#### Notes
|
||||||
|
|
||||||
|
- `prompt` currently only accepts a string
|
||||||
|
|
||||||
### `/v1/models`
|
### `/v1/models`
|
||||||
|
|
||||||
#### Notes
|
#### Notes
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ cat ~/.ollama/logs/server.log
|
|||||||
On **Linux** systems with systemd, the logs can be found with this command:
|
On **Linux** systems with systemd, the logs can be found with this command:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
journalctl -u ollama
|
journalctl -u ollama --no-pager
|
||||||
```
|
```
|
||||||
|
|
||||||
When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
|
When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package format
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"strconv"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -28,6 +29,6 @@ func HumanNumber(b uint64) string {
|
|||||||
case b >= Thousand:
|
case b >= Thousand:
|
||||||
return fmt.Sprintf("%.0fK", float64(b)/Thousand)
|
return fmt.Sprintf("%.0fK", float64(b)/Thousand)
|
||||||
default:
|
default:
|
||||||
return fmt.Sprintf("%d", b)
|
return strconv.FormatUint(b, 10)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
package gpu
|
package gpu
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"errors"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -95,5 +95,5 @@ func commonAMDValidateLibDir() (string, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
return "", errors.New("no suitable rocm found, falling back to CPU")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package gpu
|
package gpu
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"syscall"
|
"syscall"
|
||||||
@@ -76,7 +77,7 @@ func (hl *HipLib) Release() {
|
|||||||
|
|
||||||
func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
||||||
if hl.dll == 0 {
|
if hl.dll == 0 {
|
||||||
return 0, 0, fmt.Errorf("dll has been unloaded")
|
return 0, 0, errors.New("dll has been unloaded")
|
||||||
}
|
}
|
||||||
var version int
|
var version int
|
||||||
status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
|
status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
|
||||||
@@ -110,7 +111,7 @@ func (hl *HipLib) HipGetDeviceCount() int {
|
|||||||
|
|
||||||
func (hl *HipLib) HipSetDevice(device int) error {
|
func (hl *HipLib) HipSetDevice(device int) error {
|
||||||
if hl.dll == 0 {
|
if hl.dll == 0 {
|
||||||
return fmt.Errorf("dll has been unloaded")
|
return errors.New("dll has been unloaded")
|
||||||
}
|
}
|
||||||
status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
|
status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
|
||||||
if status != hipSuccess {
|
if status != hipSuccess {
|
||||||
@@ -121,7 +122,7 @@ func (hl *HipLib) HipSetDevice(device int) error {
|
|||||||
|
|
||||||
func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
|
func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
|
||||||
if hl.dll == 0 {
|
if hl.dll == 0 {
|
||||||
return nil, fmt.Errorf("dll has been unloaded")
|
return nil, errors.New("dll has been unloaded")
|
||||||
}
|
}
|
||||||
var props hipDevicePropMinimal
|
var props hipDevicePropMinimal
|
||||||
status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
|
status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
|
||||||
@@ -134,7 +135,7 @@ func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, err
|
|||||||
// free, total, err
|
// free, total, err
|
||||||
func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
|
func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
|
||||||
if hl.dll == 0 {
|
if hl.dll == 0 {
|
||||||
return 0, 0, fmt.Errorf("dll has been unloaded")
|
return 0, 0, errors.New("dll has been unloaded")
|
||||||
}
|
}
|
||||||
var totalMemory uint64
|
var totalMemory uint64
|
||||||
var freeMemory uint64
|
var freeMemory uint64
|
||||||
|
|||||||
@@ -393,7 +393,7 @@ func AMDValidateLibDir() (string, error) {
|
|||||||
|
|
||||||
// If we still haven't found a usable rocm, the user will have to install it on their own
|
// If we still haven't found a usable rocm, the user will have to install it on their own
|
||||||
slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install")
|
slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install")
|
||||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
return "", errors.New("no suitable rocm found, falling back to CPU")
|
||||||
}
|
}
|
||||||
|
|
||||||
func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package gpu
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"errors"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -85,7 +85,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
|
|||||||
n = bytes.IndexByte(props.GcnArchName[:], 0)
|
n = bytes.IndexByte(props.GcnArchName[:], 0)
|
||||||
gfx := string(props.GcnArchName[:n])
|
gfx := string(props.GcnArchName[:n])
|
||||||
slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
|
slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
|
||||||
//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
|
// slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
|
||||||
// TODO Why isn't props.iGPU accurate!?
|
// TODO Why isn't props.iGPU accurate!?
|
||||||
if strings.EqualFold(name, iGPUName) {
|
if strings.EqualFold(name, iGPUName) {
|
||||||
slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
|
slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
|
||||||
@@ -161,7 +161,7 @@ func AMDValidateLibDir() (string, error) {
|
|||||||
|
|
||||||
// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
|
// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
|
||||||
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm")
|
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm")
|
||||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
return "", errors.New("no suitable rocm found, falling back to CPU")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
|
func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ func PayloadsDir() (string, error) {
|
|||||||
return "", fmt.Errorf("failed to generate tmp dir: %w", err)
|
return "", fmt.Errorf("failed to generate tmp dir: %w", err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
err = os.MkdirAll(tmpDir, 0755)
|
err = os.MkdirAll(tmpDir, 0o755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err)
|
return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err)
|
||||||
}
|
}
|
||||||
@@ -54,7 +54,7 @@ func PayloadsDir() (string, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
if _, err := pidFile.Write([]byte(fmt.Sprint(os.Getpid()))); err != nil {
|
if _, err := pidFile.Write([]byte(strconv.Itoa(os.Getpid()))); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,11 @@
|
|||||||
package gpu
|
package gpu
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/sys/cpu"
|
"golang.org/x/sys/cpu"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
|
|||||||
// else LCD
|
// else LCD
|
||||||
return CPUCapabilityNone
|
return CPUCapabilityNone
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func IsNUMA() bool {
|
||||||
|
if runtime.GOOS != "linux" {
|
||||||
|
// numa support in llama.cpp is linux only
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
ids := map[string]interface{}{}
|
||||||
|
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
|
||||||
|
for _, packageId := range packageIds {
|
||||||
|
id, err := os.ReadFile(packageId)
|
||||||
|
if err == nil {
|
||||||
|
ids[strings.TrimSpace(string(id))] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len(ids) > 1
|
||||||
|
}
|
||||||
|
|||||||
19
gpu/gpu.go
19
gpu/gpu.go
@@ -7,9 +7,9 @@ package gpu
|
|||||||
#cgo windows LDFLAGS: -lpthread
|
#cgo windows LDFLAGS: -lpthread
|
||||||
|
|
||||||
#include "gpu_info.h"
|
#include "gpu_info.h"
|
||||||
|
|
||||||
*/
|
*/
|
||||||
import "C"
|
import "C"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
@@ -70,7 +70,6 @@ var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
|||||||
|
|
||||||
// Note: gpuMutex must already be held
|
// Note: gpuMutex must already be held
|
||||||
func initCudaHandles() *cudaHandles {
|
func initCudaHandles() *cudaHandles {
|
||||||
|
|
||||||
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
||||||
|
|
||||||
cHandles := &cudaHandles{}
|
cHandles := &cudaHandles{}
|
||||||
@@ -211,14 +210,16 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("error looking up system memory", "error", err)
|
slog.Warn("error looking up system memory", "error", err)
|
||||||
}
|
}
|
||||||
cpus = []CPUInfo{CPUInfo{
|
cpus = []CPUInfo{
|
||||||
GpuInfo: GpuInfo{
|
{
|
||||||
memInfo: mem,
|
GpuInfo: GpuInfo{
|
||||||
Library: "cpu",
|
memInfo: mem,
|
||||||
Variant: cpuCapability,
|
Library: "cpu",
|
||||||
ID: "0",
|
Variant: cpuCapability,
|
||||||
|
ID: "0",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}}
|
}
|
||||||
|
|
||||||
// Fallback to CPU mode if we're lacking required vector extensions on x86
|
// Fallback to CPU mode if we're lacking required vector extensions on x86
|
||||||
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
|
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ package gpu
|
|||||||
#include "gpu_info_darwin.h"
|
#include "gpu_info_darwin.h"
|
||||||
*/
|
*/
|
||||||
import "C"
|
import "C"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"runtime"
|
"runtime"
|
||||||
|
|
||||||
|
|||||||
@@ -43,10 +43,12 @@ var OneapiGlobs = []string{
|
|||||||
"/usr/lib*/libze_intel_gpu.so*",
|
"/usr/lib*/libze_intel_gpu.so*",
|
||||||
}
|
}
|
||||||
|
|
||||||
var CudartMgmtName = "libcudart.so*"
|
var (
|
||||||
var NvcudaMgmtName = "libcuda.so*"
|
CudartMgmtName = "libcudart.so*"
|
||||||
var NvmlMgmtName = "" // not currently wired on linux
|
NvcudaMgmtName = "libcuda.so*"
|
||||||
var OneapiMgmtName = "libze_intel_gpu.so"
|
NvmlMgmtName = "" // not currently wired on linux
|
||||||
|
OneapiMgmtName = "libze_intel_gpu.so"
|
||||||
|
)
|
||||||
|
|
||||||
func GetCPUMem() (memInfo, error) {
|
func GetCPUMem() (memInfo, error) {
|
||||||
var mem memInfo
|
var mem memInfo
|
||||||
|
|||||||
@@ -40,10 +40,12 @@ var OneapiGlobs = []string{
|
|||||||
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
|
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
|
||||||
}
|
}
|
||||||
|
|
||||||
var CudartMgmtName = "cudart64_*.dll"
|
var (
|
||||||
var NvcudaMgmtName = "nvcuda.dll"
|
CudartMgmtName = "cudart64_*.dll"
|
||||||
var NvmlMgmtName = "nvml.dll"
|
NvcudaMgmtName = "nvcuda.dll"
|
||||||
var OneapiMgmtName = "ze_intel_gpu64.dll"
|
NvmlMgmtName = "nvml.dll"
|
||||||
|
OneapiMgmtName = "ze_intel_gpu64.dll"
|
||||||
|
)
|
||||||
|
|
||||||
func GetCPUMem() (memInfo, error) {
|
func GetCPUMem() (memInfo, error) {
|
||||||
memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
|
memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ package integration
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
@@ -13,7 +14,6 @@ import (
|
|||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/envconfig"
|
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
resp = [2][]string{
|
resp = [2][]string{
|
||||||
[]string{"sunlight"},
|
{"sunlight"},
|
||||||
[]string{"england", "english", "massachusetts", "pilgrims", "british"},
|
{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
@@ -71,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
|
|||||||
reqLimit := len(req)
|
reqLimit := len(req)
|
||||||
iterLimit := 5
|
iterLimit := 5
|
||||||
|
|
||||||
vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM
|
if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
|
||||||
if vram != "" {
|
maxVram, err := strconv.ParseUint(s, 10, 64)
|
||||||
max, err := strconv.ParseUint(vram, 10, 64)
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
// Don't hammer on small VRAM cards...
|
// Don't hammer on small VRAM cards...
|
||||||
if max < 4*1024*1024*1024 {
|
if maxVram < 4*format.GibiByte {
|
||||||
reqLimit = min(reqLimit, 2)
|
reqLimit = min(reqLimit, 2)
|
||||||
iterLimit = 2
|
iterLimit = 2
|
||||||
}
|
}
|
||||||
@@ -233,12 +232,12 @@ func TestMultiModelStress(t *testing.T) {
|
|||||||
consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
|
consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
|
||||||
for i := 0; i < len(req); i++ {
|
for i := 0; i < len(req); i++ {
|
||||||
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
|
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
|
||||||
if i > 1 && consumed > vram {
|
if i > 1 && consumed > maxVram {
|
||||||
slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))
|
slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
consumed += chosenModels[i].size
|
consumed += chosenModels[i].size
|
||||||
slog.Info("target vram", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))
|
slog.Info("target vram", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(i int) {
|
go func(i int) {
|
||||||
|
|||||||
@@ -35,8 +35,8 @@ var (
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
resp = [2][]string{
|
resp = [2][]string{
|
||||||
[]string{"sunlight"},
|
{"sunlight"},
|
||||||
[]string{"england", "english", "massachusetts", "pilgrims"},
|
{"england", "english", "massachusetts", "pilgrims"},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) {
|
|||||||
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
|
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
|
||||||
threadCount := 32
|
threadCount := 32
|
||||||
if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
|
if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
|
||||||
threadCount = maxQueue
|
threadCount = int(maxQueue)
|
||||||
} else {
|
} else {
|
||||||
t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
|
t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -162,7 +162,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
|
|||||||
fn := func(resp api.ProgressResponse) error {
|
fn := func(resp api.ProgressResponse) error {
|
||||||
// fmt.Print(".")
|
// fmt.Print(".")
|
||||||
if !stallTimer.Reset(stallDuration) {
|
if !stallTimer.Reset(stallDuration) {
|
||||||
return fmt.Errorf("stall was detected, aborting status reporting")
|
return errors.New("stall was detected, aborting status reporting")
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -180,7 +180,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
|
|||||||
|
|
||||||
select {
|
select {
|
||||||
case <-stallTimer.C:
|
case <-stallTimer.C:
|
||||||
return fmt.Errorf("download stalled")
|
return errors.New("download stalled")
|
||||||
case <-done:
|
case <-done:
|
||||||
return pullError
|
return pullError
|
||||||
}
|
}
|
||||||
@@ -243,7 +243,7 @@ func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq ap
|
|||||||
// fmt.Print(".")
|
// fmt.Print(".")
|
||||||
buf.Write([]byte(response.Response))
|
buf.Write([]byte(response.Response))
|
||||||
if !stallTimer.Reset(streamTimeout) {
|
if !stallTimer.Reset(streamTimeout) {
|
||||||
return fmt.Errorf("stall was detected while streaming response, aborting")
|
return errors.New("stall was detected while streaming response, aborting")
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
[][]string{
|
[][]string{
|
||||||
[]string{"sunlight"},
|
{"sunlight"},
|
||||||
[]string{"soil", "organic", "earth", "black", "tan"},
|
{"soil", "organic", "earth", "black", "tan"},
|
||||||
[]string{"england", "english", "massachusetts", "pilgrims", "british"},
|
{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||||
[]string{"fourth", "july", "declaration", "independence"},
|
{"fourth", "july", "declaration", "independence"},
|
||||||
[]string{"nitrogen", "oxygen", "carbon", "dioxide"},
|
{"nitrogen", "oxygen", "carbon", "dioxide"},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
20
llm/ext_server/server.cpp
vendored
20
llm/ext_server/server.cpp
vendored
@@ -44,6 +44,7 @@
|
|||||||
#include <errhandlingapi.h>
|
#include <errhandlingapi.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
@@ -402,7 +403,9 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
auto init_result = llama_init_from_gpt_params(params);
|
||||||
|
model = init_result.model;
|
||||||
|
ctx = init_result.context;
|
||||||
if (model == nullptr)
|
if (model == nullptr)
|
||||||
{
|
{
|
||||||
LOG_ERROR("unable to load model", {{"model", params.model}});
|
LOG_ERROR("unable to load model", {{"model", params.model}});
|
||||||
@@ -1220,6 +1223,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
res.result_json = json
|
res.result_json = json
|
||||||
{
|
{
|
||||||
|
{"id", res.id},
|
||||||
{"embedding", std::vector<float>(embd, embd + n_embd)},
|
{"embedding", std::vector<float>(embd, embd + n_embd)},
|
||||||
{"timings", slot.get_formated_timings()},
|
{"timings", slot.get_formated_timings()},
|
||||||
};
|
};
|
||||||
@@ -2420,7 +2424,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
|
|||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.lora_adapter.emplace_back(argv[i], 1.0f);
|
params.lora_adapters.push_back({
|
||||||
|
std::string(argv[i]),
|
||||||
|
1.0,
|
||||||
|
});
|
||||||
params.use_mmap = false;
|
params.use_mmap = false;
|
||||||
}
|
}
|
||||||
else if (arg == "--lora-scaled")
|
else if (arg == "--lora-scaled")
|
||||||
@@ -2436,7 +2443,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
|
|||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
|
params.lora_adapters.push_back({
|
||||||
|
lora_adapter,
|
||||||
|
std::stof(argv[i])
|
||||||
|
});
|
||||||
params.use_mmap = false;
|
params.use_mmap = false;
|
||||||
}
|
}
|
||||||
else if (arg == "-v" || arg == "--verbose")
|
else if (arg == "-v" || arg == "--verbose")
|
||||||
@@ -3203,6 +3213,10 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
responses = result.result_json.value("results", std::vector<json>{result.result_json});
|
responses = result.result_json.value("results", std::vector<json>{result.result_json});
|
||||||
|
std::sort(responses.begin(), responses.end(), [](const json& a, const json& b) {
|
||||||
|
return a["id"] < b["id"];
|
||||||
|
});
|
||||||
|
|
||||||
json embeddings = json::array();
|
json embeddings = json::array();
|
||||||
|
|
||||||
int prompt_n = 0;
|
int prompt_n = 0;
|
||||||
|
|||||||
Submodule llm/llama.cpp updated: 6eeaeba126...1e6f6554aa
@@ -11,8 +11,9 @@ package llm
|
|||||||
// #include <stdlib.h>
|
// #include <stdlib.h>
|
||||||
// #include "llama.h"
|
// #include "llama.h"
|
||||||
import "C"
|
import "C"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"errors"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -33,7 +34,7 @@ func Quantize(infile, outfile string, ftype fileType) error {
|
|||||||
params.ftype = ftype.Value()
|
params.ftype = ftype.Value()
|
||||||
|
|
||||||
if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 {
|
if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 {
|
||||||
return fmt.Errorf("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
|
return errors.New("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -6,10 +6,11 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
|
||||||
"github.com/ollama/ollama/gpu"
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/gpu"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestEstimateGPULayers(t *testing.T) {
|
func TestEstimateGPULayers(t *testing.T) {
|
||||||
|
|||||||
@@ -1,40 +1,32 @@
|
|||||||
diff --git a/common/common.cpp b/common/common.cpp
|
diff --git a/common/common.cpp b/common/common.cpp
|
||||||
index dbb724fb..c26fe6ee 100644
|
index 2e8374d5..70d0afde 100644
|
||||||
--- a/common/common.cpp
|
--- a/common/common.cpp
|
||||||
+++ b/common/common.cpp
|
+++ b/common/common.cpp
|
||||||
@@ -2087,14 +2087,27 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
@@ -2110,9 +2110,21 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
||||||
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
|
loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str());
|
||||||
const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
|
if (loaded_la.adapter == nullptr) {
|
||||||
float lora_scale = std::get<1>(params.lora_adapter[i]);
|
fprintf(stderr, "%s: error: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
|
||||||
+
|
|
||||||
+ // try to load as gguf
|
|
||||||
auto adapter = llama_lora_adapter_init(model, lora_adapter.c_str());
|
|
||||||
if (adapter == nullptr) {
|
|
||||||
- fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
|
||||||
- llama_free(lctx);
|
- llama_free(lctx);
|
||||||
- llama_free_model(model);
|
- llama_free_model(model);
|
||||||
- return std::make_tuple(nullptr, nullptr);
|
- return iparams;
|
||||||
+ fprintf(stderr, "%s: error: failed to apply lora adapter, trying ggla\n", __func__);
|
|
||||||
+
|
+
|
||||||
+ // if that fails, try loading as ggla for compatibility
|
+ // if that fails, try loading as ggla for compatibility
|
||||||
+ int err = llama_model_apply_lora_from_file(model,
|
+ int err = llama_model_apply_lora_from_file(model,
|
||||||
+ lora_adapter.c_str(),
|
+ la.path.c_str(),
|
||||||
+ lora_scale,
|
+ la.scale,
|
||||||
+ nullptr,
|
+ nullptr,
|
||||||
+ params.n_threads);
|
+ params.n_threads);
|
||||||
+ if (err != 0) {
|
+ if (err != 0) {
|
||||||
+ fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
+ fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
||||||
+ llama_free(lctx);
|
+ llama_free(lctx);
|
||||||
+ llama_free_model(model);
|
+ llama_free_model(model);
|
||||||
+ return std::make_tuple(nullptr, nullptr);
|
+ return iparams;
|
||||||
|
+ } else {
|
||||||
|
+ break;
|
||||||
+ }
|
+ }
|
||||||
+ } else {
|
|
||||||
+ llama_lora_adapter_set(lctx, adapter, lora_scale);
|
|
||||||
}
|
}
|
||||||
- llama_lora_adapter_set(lctx, adapter, lora_scale);
|
iparams.lora_adapters.push_back(loaded_la); // copy to list of loaded adapters
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.ignore_eos) {
|
|
||||||
diff --git a/include/llama.h b/include/llama.h
|
diff --git a/include/llama.h b/include/llama.h
|
||||||
index 93fd77ca..b0fb37a6 100644
|
index 93fd77ca..b0fb37a6 100644
|
||||||
--- a/include/llama.h
|
--- a/include/llama.h
|
||||||
|
|||||||
@@ -1,20 +0,0 @@
|
|||||||
diff --git a/src/llama.cpp b/src/llama.cpp
|
|
||||||
index a207451f..fba6b175 100644
|
|
||||||
--- a/src/llama.cpp
|
|
||||||
+++ b/src/llama.cpp
|
|
||||||
@@ -4969,6 +4969,7 @@ static void llm_load_hparams(
|
|
||||||
hparams.attn_soft_cap = true;
|
|
||||||
|
|
||||||
switch (hparams.n_layer) {
|
|
||||||
+ case 26: model.type = e_model::MODEL_2B; break;
|
|
||||||
case 42: model.type = e_model::MODEL_9B; break;
|
|
||||||
case 46: model.type = e_model::MODEL_27B; break;
|
|
||||||
default: model.type = e_model::MODEL_UNKNOWN;
|
|
||||||
@@ -11736,6 +11737,7 @@ struct llm_build_context {
|
|
||||||
|
|
||||||
// ref: https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
|
|
||||||
switch (model.type) {
|
|
||||||
+ case e_model::MODEL_2B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
|
|
||||||
case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
|
|
||||||
case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
|
|
||||||
default: GGML_ABORT("fatal error");
|
|
||||||
@@ -44,11 +44,12 @@ type LlamaServer interface {
|
|||||||
|
|
||||||
// llmServer is an instance of the llama.cpp server
|
// llmServer is an instance of the llama.cpp server
|
||||||
type llmServer struct {
|
type llmServer struct {
|
||||||
port int
|
port int
|
||||||
cmd *exec.Cmd
|
cmd *exec.Cmd
|
||||||
done chan error // Channel to signal when the process exits
|
done chan error // Channel to signal when the process exits
|
||||||
status *StatusWriter
|
status *StatusWriter
|
||||||
options api.Options
|
options api.Options
|
||||||
|
numParallel int
|
||||||
|
|
||||||
estimate MemoryEstimate
|
estimate MemoryEstimate
|
||||||
totalLayers uint64
|
totalLayers uint64
|
||||||
@@ -184,15 +185,15 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
|
|
||||||
params := []string{
|
params := []string{
|
||||||
"--model", model,
|
"--model", model,
|
||||||
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
|
"--ctx-size", strconv.Itoa(opts.NumCtx),
|
||||||
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
"--batch-size", strconv.Itoa(opts.NumBatch),
|
||||||
"--embedding",
|
"--embedding",
|
||||||
}
|
}
|
||||||
|
|
||||||
params = append(params, "--log-disable")
|
params = append(params, "--log-disable")
|
||||||
|
|
||||||
if opts.NumGPU >= 0 {
|
if opts.NumGPU >= 0 {
|
||||||
params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU))
|
params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU))
|
||||||
}
|
}
|
||||||
|
|
||||||
if envconfig.Debug() {
|
if envconfig.Debug() {
|
||||||
@@ -200,7 +201,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
}
|
}
|
||||||
|
|
||||||
if opts.MainGPU > 0 {
|
if opts.MainGPU > 0 {
|
||||||
params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
|
params = append(params, "--main-gpu", strconv.Itoa(opts.MainGPU))
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(adapters) > 0 {
|
if len(adapters) > 0 {
|
||||||
@@ -214,7 +215,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
}
|
}
|
||||||
|
|
||||||
if opts.NumThread > 0 {
|
if opts.NumThread > 0 {
|
||||||
params = append(params, "--threads", fmt.Sprintf("%d", opts.NumThread))
|
params = append(params, "--threads", strconv.Itoa(opts.NumThread))
|
||||||
}
|
}
|
||||||
|
|
||||||
if !opts.F16KV {
|
if !opts.F16KV {
|
||||||
@@ -256,11 +257,17 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
params = append(params, "--mlock")
|
params = append(params, "--mlock")
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.UseNUMA {
|
if gpu.IsNUMA() {
|
||||||
params = append(params, "--numa")
|
numaMode := "distribute"
|
||||||
|
if runtime.GOOS == "linux" {
|
||||||
|
if _, err := exec.LookPath("numactl"); err == nil {
|
||||||
|
numaMode = "numactl"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
params = append(params, "--numa", numaMode)
|
||||||
}
|
}
|
||||||
|
|
||||||
params = append(params, "--parallel", fmt.Sprintf("%d", numParallel))
|
params = append(params, "--parallel", strconv.Itoa(numParallel))
|
||||||
|
|
||||||
if estimate.TensorSplit != "" {
|
if estimate.TensorSplit != "" {
|
||||||
params = append(params, "--tensor-split", estimate.TensorSplit)
|
params = append(params, "--tensor-split", estimate.TensorSplit)
|
||||||
@@ -337,6 +344,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
status: NewStatusWriter(os.Stderr),
|
status: NewStatusWriter(os.Stderr),
|
||||||
options: opts,
|
options: opts,
|
||||||
estimate: estimate,
|
estimate: estimate,
|
||||||
|
numParallel: numParallel,
|
||||||
sem: semaphore.NewWeighted(int64(numParallel)),
|
sem: semaphore.NewWeighted(int64(numParallel)),
|
||||||
totalLayers: ggml.KV().BlockCount() + 1,
|
totalLayers: ggml.KV().BlockCount() + 1,
|
||||||
gpus: gpus,
|
gpus: gpus,
|
||||||
@@ -425,7 +433,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
if strings.Contains(s.status.LastErrMsg, "unknown model") {
|
if strings.Contains(s.status.LastErrMsg, "unknown model") {
|
||||||
s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
|
s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
|
||||||
}
|
}
|
||||||
s.done <- fmt.Errorf(s.status.LastErrMsg)
|
s.done <- errors.New(s.status.LastErrMsg)
|
||||||
} else {
|
} else {
|
||||||
s.done <- err
|
s.done <- err
|
||||||
}
|
}
|
||||||
@@ -884,11 +892,14 @@ type EmbedResponse struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *llmServer) Embed(ctx context.Context, input []string) (*EmbedResponse, error) {
|
func (s *llmServer) Embed(ctx context.Context, input []string) (*EmbedResponse, error) {
|
||||||
if err := s.sem.Acquire(ctx, 1); err != nil {
|
// each input will use a slot, so we need to acquire the semaphore for
|
||||||
|
// the number of inputs up to numParallel
|
||||||
|
slots := int64(min(len(input), s.numParallel))
|
||||||
|
if err := s.sem.Acquire(ctx, slots); err != nil {
|
||||||
slog.Error("Failed to acquire semaphore", "error", err)
|
slog.Error("Failed to acquire semaphore", "error", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer s.sem.Release(1)
|
defer s.sem.Release(slots)
|
||||||
|
|
||||||
// Make sure the server is ready
|
// Make sure the server is ready
|
||||||
status, err := s.getServerStatusRetry(ctx)
|
status, err := s.getServerStatusRetry(ctx)
|
||||||
|
|||||||
3
main.go
3
main.go
@@ -3,8 +3,9 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
|
||||||
"github.com/ollama/ollama/cmd"
|
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/cmd"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
@@ -14,6 +15,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
)
|
)
|
||||||
@@ -367,24 +369,24 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
|||||||
for _, c := range content {
|
for _, c := range content {
|
||||||
data, ok := c.(map[string]any)
|
data, ok := c.(map[string]any)
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, fmt.Errorf("invalid message format")
|
return nil, errors.New("invalid message format")
|
||||||
}
|
}
|
||||||
switch data["type"] {
|
switch data["type"] {
|
||||||
case "text":
|
case "text":
|
||||||
text, ok := data["text"].(string)
|
text, ok := data["text"].(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, fmt.Errorf("invalid message format")
|
return nil, errors.New("invalid message format")
|
||||||
}
|
}
|
||||||
messages = append(messages, api.Message{Role: msg.Role, Content: text})
|
messages = append(messages, api.Message{Role: msg.Role, Content: text})
|
||||||
case "image_url":
|
case "image_url":
|
||||||
var url string
|
var url string
|
||||||
if urlMap, ok := data["image_url"].(map[string]any); ok {
|
if urlMap, ok := data["image_url"].(map[string]any); ok {
|
||||||
if url, ok = urlMap["url"].(string); !ok {
|
if url, ok = urlMap["url"].(string); !ok {
|
||||||
return nil, fmt.Errorf("invalid message format")
|
return nil, errors.New("invalid message format")
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if url, ok = data["image_url"].(string); !ok {
|
if url, ok = data["image_url"].(string); !ok {
|
||||||
return nil, fmt.Errorf("invalid message format")
|
return nil, errors.New("invalid message format")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -400,17 +402,17 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !valid {
|
if !valid {
|
||||||
return nil, fmt.Errorf("invalid image input")
|
return nil, errors.New("invalid image input")
|
||||||
}
|
}
|
||||||
|
|
||||||
img, err := base64.StdEncoding.DecodeString(url)
|
img, err := base64.StdEncoding.DecodeString(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("invalid message format")
|
return nil, errors.New("invalid message format")
|
||||||
}
|
}
|
||||||
|
|
||||||
messages = append(messages, api.Message{Role: msg.Role, Images: []api.ImageData{img}})
|
messages = append(messages, api.Message{Role: msg.Role, Images: []api.ImageData{img}})
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("invalid message format")
|
return nil, errors.New("invalid message format")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@@ -423,7 +425,7 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
|||||||
toolCalls[i].Function.Name = tc.Function.Name
|
toolCalls[i].Function.Name = tc.Function.Name
|
||||||
err := json.Unmarshal([]byte(tc.Function.Arguments), &toolCalls[i].Function.Arguments)
|
err := json.Unmarshal([]byte(tc.Function.Arguments), &toolCalls[i].Function.Arguments)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("invalid tool call arguments")
|
return nil, errors.New("invalid tool call arguments")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
messages = append(messages, api.Message{Role: msg.Role, ToolCalls: toolCalls})
|
messages = append(messages, api.Message{Role: msg.Role, ToolCalls: toolCalls})
|
||||||
@@ -737,14 +739,12 @@ func (w *RetrieveWriter) Write(data []byte) (int, error) {
|
|||||||
func (w *EmbedWriter) writeResponse(data []byte) (int, error) {
|
func (w *EmbedWriter) writeResponse(data []byte) (int, error) {
|
||||||
var embedResponse api.EmbedResponse
|
var embedResponse api.EmbedResponse
|
||||||
err := json.Unmarshal(data, &embedResponse)
|
err := json.Unmarshal(data, &embedResponse)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
w.ResponseWriter.Header().Set("Content-Type", "application/json")
|
w.ResponseWriter.Header().Set("Content-Type", "application/json")
|
||||||
err = json.NewEncoder(w.ResponseWriter).Encode(toEmbeddingList(w.model, embedResponse))
|
err = json.NewEncoder(w.ResponseWriter).Encode(toEmbeddingList(w.model, embedResponse))
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,13 +12,16 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/ollama/ollama/api"
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
)
|
)
|
||||||
|
|
||||||
const prefix = `data:image/jpeg;base64,`
|
const (
|
||||||
const image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
|
prefix = `data:image/jpeg;base64,`
|
||||||
const imageURL = prefix + image
|
image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
|
||||||
|
imageURL = prefix + image
|
||||||
|
)
|
||||||
|
|
||||||
func prepareRequest(req *http.Request, body any) {
|
func prepareRequest(req *http.Request, body any) {
|
||||||
bodyBytes, _ := json.Marshal(body)
|
bodyBytes, _ := json.Marshal(body)
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ TEMPLATE """ {{ if .System }}<|start_header_id|>system<|end_header_id|>
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseFileFrom(t *testing.T) {
|
func TestParseFileFrom(t *testing.T) {
|
||||||
var cases = []struct {
|
cases := []struct {
|
||||||
input string
|
input string
|
||||||
expected []Command
|
expected []Command
|
||||||
err error
|
err error
|
||||||
@@ -185,7 +185,7 @@ BADCOMMAND param1 value1
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseFileMessages(t *testing.T) {
|
func TestParseFileMessages(t *testing.T) {
|
||||||
var cases = []struct {
|
cases := []struct {
|
||||||
input string
|
input string
|
||||||
expected []Command
|
expected []Command
|
||||||
err error
|
err error
|
||||||
@@ -276,7 +276,7 @@ MESSAGE system`,
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseFileQuoted(t *testing.T) {
|
func TestParseFileQuoted(t *testing.T) {
|
||||||
var cases = []struct {
|
cases := []struct {
|
||||||
multiline string
|
multiline string
|
||||||
expected []Command
|
expected []Command
|
||||||
err error
|
err error
|
||||||
@@ -430,7 +430,7 @@ TEMPLATE """
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseFileParameters(t *testing.T) {
|
func TestParseFileParameters(t *testing.T) {
|
||||||
var cases = map[string]struct {
|
cases := map[string]struct {
|
||||||
name, value string
|
name, value string
|
||||||
}{
|
}{
|
||||||
"numa true": {"numa", "true"},
|
"numa true": {"numa", "true"},
|
||||||
@@ -491,7 +491,7 @@ func TestParseFileParameters(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseFileComments(t *testing.T) {
|
func TestParseFileComments(t *testing.T) {
|
||||||
var cases = []struct {
|
cases := []struct {
|
||||||
input string
|
input string
|
||||||
expected []Command
|
expected []Command
|
||||||
}{
|
}{
|
||||||
@@ -516,7 +516,7 @@ FROM foo
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestParseFileFormatParseFile(t *testing.T) {
|
func TestParseFileFormatParseFile(t *testing.T) {
|
||||||
var cases = []string{
|
cases := []string{
|
||||||
`
|
`
|
||||||
FROM foo
|
FROM foo
|
||||||
ADAPTER adapter1
|
ADAPTER adapter1
|
||||||
|
|||||||
@@ -6,8 +6,9 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
|
||||||
"golang.org/x/term"
|
"golang.org/x/term"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/format"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Bar struct {
|
type Bar struct {
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ type Buffer struct {
|
|||||||
DisplayPos int
|
DisplayPos int
|
||||||
Pos int
|
Pos int
|
||||||
Buf *arraylist.List
|
Buf *arraylist.List
|
||||||
//LineHasSpace is an arraylist of bools to keep track of whether a line has a space at the end
|
// LineHasSpace is an arraylist of bools to keep track of whether a line has a space at the end
|
||||||
LineHasSpace *arraylist.List
|
LineHasSpace *arraylist.List
|
||||||
Prompt *Prompt
|
Prompt *Prompt
|
||||||
LineWidth int
|
LineWidth int
|
||||||
@@ -56,7 +56,7 @@ func (b *Buffer) GetLineSpacing(line int) bool {
|
|||||||
|
|
||||||
func (b *Buffer) MoveLeft() {
|
func (b *Buffer) MoveLeft() {
|
||||||
if b.Pos > 0 {
|
if b.Pos > 0 {
|
||||||
//asserts that we retrieve a rune
|
// asserts that we retrieve a rune
|
||||||
if e, ok := b.Buf.Get(b.Pos - 1); ok {
|
if e, ok := b.Buf.Get(b.Pos - 1); ok {
|
||||||
if r, ok := e.(rune); ok {
|
if r, ok := e.(rune); ok {
|
||||||
rLength := runewidth.RuneWidth(r)
|
rLength := runewidth.RuneWidth(r)
|
||||||
|
|||||||
@@ -4,9 +4,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var ErrInterrupt = errors.New("Interrupt")
|
||||||
ErrInterrupt = errors.New("Interrupt")
|
|
||||||
)
|
|
||||||
|
|
||||||
type InterruptError struct {
|
type InterruptError struct {
|
||||||
Line []rune
|
Line []rune
|
||||||
|
|||||||
@@ -7,8 +7,10 @@ import (
|
|||||||
"unsafe"
|
"unsafe"
|
||||||
)
|
)
|
||||||
|
|
||||||
const tcgets = 0x5401
|
const (
|
||||||
const tcsets = 0x5402
|
tcgets = 0x5401
|
||||||
|
tcsets = 0x5402
|
||||||
|
)
|
||||||
|
|
||||||
func getTermios(fd uintptr) (*Termios, error) {
|
func getTermios(fd uintptr) (*Termios, error) {
|
||||||
termios := new(Termios)
|
termios := new(Termios)
|
||||||
|
|||||||
@@ -28,8 +28,10 @@ import (
|
|||||||
|
|
||||||
const maxRetries = 6
|
const maxRetries = 6
|
||||||
|
|
||||||
var errMaxRetriesExceeded = errors.New("max retries exceeded")
|
var (
|
||||||
var errPartStalled = errors.New("part stalled")
|
errMaxRetriesExceeded = errors.New("max retries exceeded")
|
||||||
|
errPartStalled = errors.New("part stalled")
|
||||||
|
)
|
||||||
|
|
||||||
var blobDownloadManager sync.Map
|
var blobDownloadManager sync.Map
|
||||||
|
|
||||||
@@ -214,6 +216,9 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
if err := setSparse(file); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
_ = file.Truncate(b.Total)
|
_ = file.Truncate(b.Total)
|
||||||
|
|
||||||
|
|||||||
@@ -828,7 +828,7 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
|
|||||||
fn(api.ProgressResponse{Status: "retrieving manifest"})
|
fn(api.ProgressResponse{Status: "retrieving manifest"})
|
||||||
|
|
||||||
if mp.ProtocolScheme == "http" && !regOpts.Insecure {
|
if mp.ProtocolScheme == "http" && !regOpts.Insecure {
|
||||||
return fmt.Errorf("insecure protocol http")
|
return errors.New("insecure protocol http")
|
||||||
}
|
}
|
||||||
|
|
||||||
manifest, _, err := GetManifest(mp)
|
manifest, _, err := GetManifest(mp)
|
||||||
@@ -895,7 +895,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
|
|||||||
}
|
}
|
||||||
|
|
||||||
if mp.ProtocolScheme == "http" && !regOpts.Insecure {
|
if mp.ProtocolScheme == "http" && !regOpts.Insecure {
|
||||||
return fmt.Errorf("insecure protocol http")
|
return errors.New("insecure protocol http")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn(api.ProgressResponse{Status: "pulling manifest"})
|
fn(api.ProgressResponse{Status: "pulling manifest"})
|
||||||
@@ -1010,7 +1010,7 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
|
|||||||
return fmt.Sprintf("sha256:%x", h.Sum(nil)), n
|
return fmt.Sprintf("sha256:%x", h.Sum(nil)), n
|
||||||
}
|
}
|
||||||
|
|
||||||
var errUnauthorized = fmt.Errorf("unauthorized: access denied")
|
var errUnauthorized = errors.New("unauthorized: access denied")
|
||||||
|
|
||||||
// getTokenSubject returns the subject of a JWT token, it does not validate the token
|
// getTokenSubject returns the subject of a JWT token, it does not validate the token
|
||||||
func getTokenSubject(token string) string {
|
func getTokenSubject(token string) string {
|
||||||
|
|||||||
@@ -2,9 +2,9 @@ package server
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
@@ -19,6 +19,7 @@ type Manifest struct {
|
|||||||
Config *Layer `json:"config"`
|
Config *Layer `json:"config"`
|
||||||
Layers []*Layer `json:"layers"`
|
Layers []*Layer `json:"layers"`
|
||||||
|
|
||||||
|
name model.Name
|
||||||
filepath string
|
filepath string
|
||||||
fi os.FileInfo
|
fi os.FileInfo
|
||||||
digest string
|
digest string
|
||||||
@@ -69,7 +70,6 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
|
|||||||
|
|
||||||
p := filepath.Join(manifests, n.Filepath())
|
p := filepath.Join(manifests, n.Filepath())
|
||||||
|
|
||||||
var m Manifest
|
|
||||||
f, err := os.Open(p)
|
f, err := os.Open(p)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -81,14 +81,16 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var m Manifest
|
||||||
sha256sum := sha256.New()
|
sha256sum := sha256.New()
|
||||||
if err := json.NewDecoder(io.TeeReader(f, sha256sum)).Decode(&m); err != nil {
|
if err := json.NewDecoder(io.TeeReader(f, sha256sum)).Decode(&m); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m.name = n
|
||||||
m.filepath = p
|
m.filepath = p
|
||||||
m.fi = fi
|
m.fi = fi
|
||||||
m.digest = fmt.Sprintf("%x", sha256sum.Sum(nil))
|
m.digest = hex.EncodeToString(sha256sum.Sum(nil))
|
||||||
|
|
||||||
return &m, nil
|
return &m, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ func createManifest(t *testing.T, path, name string) {
|
|||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
p := filepath.Join(path, "manifests", name)
|
p := filepath.Join(path, "manifests", name)
|
||||||
if err := os.MkdirAll(filepath.Dir(p), 0755); err != nil {
|
if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
"github.com/google/go-cmp/cmp"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/template"
|
"github.com/ollama/ollama/template"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
"github.com/google/go-cmp/cmp"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/template"
|
"github.com/ollama/ollama/template"
|
||||||
)
|
)
|
||||||
|
|||||||
164
server/routes.go
164
server/routes.go
@@ -55,8 +55,10 @@ func init() {
|
|||||||
gin.SetMode(mode)
|
gin.SetMode(mode)
|
||||||
}
|
}
|
||||||
|
|
||||||
var errRequired = errors.New("is required")
|
var (
|
||||||
var errBadTemplate = errors.New("template error")
|
errRequired = errors.New("is required")
|
||||||
|
errBadTemplate = errors.New("template error")
|
||||||
|
)
|
||||||
|
|
||||||
func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
|
func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
|
||||||
opts := api.DefaultOptions()
|
opts := api.DefaultOptions()
|
||||||
@@ -369,7 +371,6 @@ func (s *Server) EmbedHandler(c *gin.Context) {
|
|||||||
input[i] = s
|
input[i] = s
|
||||||
}
|
}
|
||||||
embeddings, err := r.Embed(c.Request.Context(), input)
|
embeddings, err := r.Embed(c.Request.Context(), input)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Error("embedding generation failed", "error", err)
|
slog.Error("embedding generation failed", "error", err)
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
|
||||||
@@ -430,7 +431,6 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
embeddings, err := r.Embed(c.Request.Context(), []string{req.Prompt})
|
embeddings, err := r.Embed(c.Request.Context(), []string{req.Prompt})
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
|
slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
|
||||||
@@ -556,7 +556,7 @@ func checkNameExists(name model.Name) error {
|
|||||||
|
|
||||||
for n := range names {
|
for n := range names {
|
||||||
if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name {
|
if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name {
|
||||||
return fmt.Errorf("a model with that name already exists")
|
return errors.New("a model with that name already exists")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -703,6 +703,153 @@ func (s *Server) ShowModelHandler(c *gin.Context) {
|
|||||||
c.JSON(http.StatusOK, resp)
|
c.JSON(http.StatusOK, resp)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func manifestLayers(m *Manifest, exclude []string) (map[string]any, error) {
|
||||||
|
r := map[string]any{
|
||||||
|
"name": m.name.DisplayShortest(),
|
||||||
|
"digest": m.digest,
|
||||||
|
"size": m.Size(),
|
||||||
|
"modified_at": m.fi.ModTime(),
|
||||||
|
}
|
||||||
|
|
||||||
|
excludeAll := slices.Contains(exclude, "all")
|
||||||
|
excludeDetails := slices.Contains(exclude, "details")
|
||||||
|
|
||||||
|
for _, layer := range m.Layers {
|
||||||
|
var errExcludeKey = errors.New("exclude key")
|
||||||
|
key, content, err := func() (string, any, error) {
|
||||||
|
key := strings.TrimPrefix(layer.MediaType, "application/vnd.ollama.image.")
|
||||||
|
if slices.Contains(exclude, key) || excludeAll {
|
||||||
|
return "", nil, errExcludeKey
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := layer.Open()
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
switch key {
|
||||||
|
case "model", "projector", "adapter":
|
||||||
|
ggml, _, err := llm.DecodeGGML(f, 0)
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
content := map[string]any{
|
||||||
|
"architecture": ggml.KV().Architecture(),
|
||||||
|
"file_type": ggml.KV().FileType().String(),
|
||||||
|
"parameter_count": ggml.KV().ParameterCount(),
|
||||||
|
}
|
||||||
|
|
||||||
|
if !slices.Contains(exclude, key+".details") && !excludeAll && !excludeDetails {
|
||||||
|
// exclude any extraneous or redundant fields
|
||||||
|
delete(ggml.KV(), "general.basename")
|
||||||
|
delete(ggml.KV(), "general.description")
|
||||||
|
delete(ggml.KV(), "general.filename")
|
||||||
|
delete(ggml.KV(), "general.finetune")
|
||||||
|
delete(ggml.KV(), "general.languages")
|
||||||
|
delete(ggml.KV(), "general.license")
|
||||||
|
delete(ggml.KV(), "general.license.link")
|
||||||
|
delete(ggml.KV(), "general.name")
|
||||||
|
delete(ggml.KV(), "general.paramter_count")
|
||||||
|
delete(ggml.KV(), "general.size_label")
|
||||||
|
delete(ggml.KV(), "general.tags")
|
||||||
|
delete(ggml.KV(), "general.type")
|
||||||
|
delete(ggml.KV(), "general.quantization_version")
|
||||||
|
delete(ggml.KV(), "tokenizer.chat_template")
|
||||||
|
content["details"] = ggml.KV()
|
||||||
|
}
|
||||||
|
|
||||||
|
return key, content, nil
|
||||||
|
case "params", "messages":
|
||||||
|
var content any
|
||||||
|
if err := json.NewDecoder(f).Decode(&content); err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return key, content, nil
|
||||||
|
case "template", "system", "license":
|
||||||
|
bts, err := io.ReadAll(f)
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if key == "license" {
|
||||||
|
return key, []any{string(bts)}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return key, string(bts), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return layer.MediaType, nil, nil
|
||||||
|
}()
|
||||||
|
if errors.Is(err, errExcludeKey) {
|
||||||
|
continue
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if s, ok := r[key].([]any); ok {
|
||||||
|
r[key] = append(s, content)
|
||||||
|
} else {
|
||||||
|
r[key] = content
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) GetModelsHandler(c *gin.Context) {
|
||||||
|
ms, err := Manifests()
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var rs []map[string]any
|
||||||
|
for _, m := range ms {
|
||||||
|
r, err := manifestLayers(m, c.QueryArray("exclude"))
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rs = append(rs, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
slices.SortStableFunc(rs, func(i, j map[string]any) int {
|
||||||
|
// most recently modified first
|
||||||
|
return cmp.Compare(
|
||||||
|
j["modified_at"].(time.Time).Unix(),
|
||||||
|
i["modified_at"].(time.Time).Unix(),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
c.JSON(http.StatusOK, rs)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) GetModelHandler(c *gin.Context) {
|
||||||
|
n := model.ParseName(strings.TrimPrefix(c.Param("model"), "/"))
|
||||||
|
if !n.IsValid() {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
m, err := ParseNamedManifest(n)
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
r, err := manifestLayers(m, c.QueryArray("exclude"))
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
c.JSON(http.StatusOK, r)
|
||||||
|
}
|
||||||
|
|
||||||
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
||||||
m, err := GetModel(req.Model)
|
m, err := GetModel(req.Model)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -729,7 +876,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
|||||||
|
|
||||||
n := model.ParseName(req.Model)
|
n := model.ParseName(req.Model)
|
||||||
if !n.IsValid() {
|
if !n.IsValid() {
|
||||||
return nil, fmt.Errorf("invalid model name")
|
return nil, errors.New("invalid model name")
|
||||||
}
|
}
|
||||||
|
|
||||||
manifest, err := ParseNamedManifest(n)
|
manifest, err := ParseNamedManifest(n)
|
||||||
@@ -993,7 +1140,7 @@ func allowedHost(host string) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
var tlds = []string{
|
tlds := []string{
|
||||||
"localhost",
|
"localhost",
|
||||||
"local",
|
"local",
|
||||||
"internal",
|
"internal",
|
||||||
@@ -1090,6 +1237,9 @@ func (s *Server) GenerateRoutes() http.Handler {
|
|||||||
c.String(http.StatusOK, "Ollama is running")
|
c.String(http.StatusOK, "Ollama is running")
|
||||||
})
|
})
|
||||||
|
|
||||||
|
r.Handle(method, "/api/models", s.GetModelsHandler)
|
||||||
|
r.Handle(method, "/api/models/*model", s.GetModelHandler)
|
||||||
|
|
||||||
r.Handle(method, "/api/tags", s.ListModelsHandler)
|
r.Handle(method, "/api/tags", s.ListModelsHandler)
|
||||||
r.Handle(method, "/api/version", func(c *gin.Context) {
|
r.Handle(method, "/api/version", func(c *gin.Context) {
|
||||||
c.JSON(http.StatusOK, gin.H{"version": version.Version})
|
c.JSON(http.StatusOK, gin.H{"version": version.Version})
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package server
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"cmp"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
@@ -13,6 +14,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
@@ -52,6 +54,8 @@ func (t *responseRecorder) CloseNotify() <-chan bool {
|
|||||||
|
|
||||||
func createRequest(t *testing.T, fn func(*gin.Context), body any) *httptest.ResponseRecorder {
|
func createRequest(t *testing.T, fn func(*gin.Context), body any) *httptest.ResponseRecorder {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
// if OLLAMA_MODELS is not set, set it to the temp directory
|
||||||
|
t.Setenv("OLLAMA_MODELS", cmp.Or(os.Getenv("OLLAMA_MODELS"), t.TempDir()))
|
||||||
|
|
||||||
w := NewRecorder()
|
w := NewRecorder()
|
||||||
c, _ := gin.CreateTestContext(w)
|
c, _ := gin.CreateTestContext(w)
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -333,7 +333,6 @@ func Test_Routes(t *testing.T) {
|
|||||||
t.Fatalf("expected content type application/json; charset=utf-8, got %s", contentType)
|
t.Fatalf("expected content type application/json; charset=utf-8, got %s", contentType)
|
||||||
}
|
}
|
||||||
_, err := io.ReadAll(resp.Body)
|
_, err := io.ReadAll(resp.Body)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ var defaultModelsPerGPU = 3
|
|||||||
// we'll back off down to 1 to try to get it to fit
|
// we'll back off down to 1 to try to get it to fit
|
||||||
var defaultParallel = 4
|
var defaultParallel = 4
|
||||||
|
|
||||||
var ErrMaxQueue = fmt.Errorf("server busy, please try again. maximum pending requests exceeded")
|
var ErrMaxQueue = errors.New("server busy, please try again. maximum pending requests exceeded")
|
||||||
|
|
||||||
func InitScheduler(ctx context.Context) *Scheduler {
|
func InitScheduler(ctx context.Context) *Scheduler {
|
||||||
maxQueue := envconfig.MaxQueue()
|
maxQueue := envconfig.MaxQueue()
|
||||||
|
|||||||
@@ -3,23 +3,25 @@ package server
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"errors"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/app/lifecycle"
|
"github.com/ollama/ollama/app/lifecycle"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/gpu"
|
"github.com/ollama/ollama/gpu"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func TestMain(m *testing.M) {
|
||||||
os.Setenv("OLLAMA_DEBUG", "1")
|
os.Setenv("OLLAMA_DEBUG", "1")
|
||||||
lifecycle.InitLogging()
|
lifecycle.InitLogging()
|
||||||
|
os.Exit(m.Run())
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestInitScheduler(t *testing.T) {
|
func TestInitScheduler(t *testing.T) {
|
||||||
@@ -46,7 +48,7 @@ func TestLoad(t *testing.T) {
|
|||||||
}
|
}
|
||||||
// Fail to load model first
|
// Fail to load model first
|
||||||
s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
|
s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) {
|
||||||
return nil, fmt.Errorf("something failed to load model blah")
|
return nil, errors.New("something failed to load model blah")
|
||||||
}
|
}
|
||||||
gpus := gpu.GpuInfoList{}
|
gpus := gpu.GpuInfoList{}
|
||||||
s.load(req, ggml, gpus, 0)
|
s.load(req, ggml, gpus, 0)
|
||||||
@@ -75,7 +77,7 @@ func TestLoad(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
req.model.ModelPath = "dummy_model_path"
|
req.model.ModelPath = "dummy_model_path"
|
||||||
server.waitResp = fmt.Errorf("wait failure")
|
server.waitResp = errors.New("wait failure")
|
||||||
s.load(req, ggml, gpus, 0)
|
s.load(req, ggml, gpus, 0)
|
||||||
select {
|
select {
|
||||||
case err := <-req.errCh:
|
case err := <-req.errCh:
|
||||||
@@ -600,7 +602,7 @@ func TestNeedsReload(t *testing.T) {
|
|||||||
resp = runner.needsReload(ctx, req)
|
resp = runner.needsReload(ctx, req)
|
||||||
require.True(t, resp)
|
require.True(t, resp)
|
||||||
req.opts.NumBatch = runner.Options.NumBatch
|
req.opts.NumBatch = runner.Options.NumBatch
|
||||||
llm.pingResp = fmt.Errorf("foo")
|
llm.pingResp = errors.New("foo")
|
||||||
resp = runner.needsReload(ctx, req)
|
resp = runner.needsReload(ctx, req)
|
||||||
require.True(t, resp)
|
require.True(t, resp)
|
||||||
llm.pingResp = nil
|
llm.pingResp = nil
|
||||||
@@ -724,15 +726,19 @@ func (s *mockLlm) WaitUntilRunning(ctx context.Context) error { return s.waitRes
|
|||||||
func (s *mockLlm) Completion(ctx context.Context, req llm.CompletionRequest, fn func(llm.CompletionResponse)) error {
|
func (s *mockLlm) Completion(ctx context.Context, req llm.CompletionRequest, fn func(llm.CompletionResponse)) error {
|
||||||
return s.completionResp
|
return s.completionResp
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *mockLlm) Embed(ctx context.Context, input []string) (*llm.EmbedResponse, error) {
|
func (s *mockLlm) Embed(ctx context.Context, input []string) (*llm.EmbedResponse, error) {
|
||||||
return s.embedResp, s.embedRespErr
|
return s.embedResp, s.embedRespErr
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *mockLlm) Tokenize(ctx context.Context, content string) ([]int, error) {
|
func (s *mockLlm) Tokenize(ctx context.Context, content string) ([]int, error) {
|
||||||
return s.tokenizeResp, s.tokenizeRespErr
|
return s.tokenizeResp, s.tokenizeRespErr
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *mockLlm) Detokenize(ctx context.Context, tokens []int) (string, error) {
|
func (s *mockLlm) Detokenize(ctx context.Context, tokens []int) (string, error) {
|
||||||
return s.detokenizeResp, s.detonekizeRespErr
|
return s.detokenizeResp, s.detonekizeRespErr
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *mockLlm) Close() error {
|
func (s *mockLlm) Close() error {
|
||||||
s.closeCalled = true
|
s.closeCalled = true
|
||||||
return s.closeResp
|
return s.closeResp
|
||||||
|
|||||||
9
server/sparse_common.go
Normal file
9
server/sparse_common.go
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
//go:build !windows
|
||||||
|
|
||||||
|
package server
|
||||||
|
|
||||||
|
import "os"
|
||||||
|
|
||||||
|
func setSparse(file *os.File) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
16
server/sparse_windows.go
Normal file
16
server/sparse_windows.go
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"golang.org/x/sys/windows"
|
||||||
|
)
|
||||||
|
|
||||||
|
func setSparse(file *os.File) error {
|
||||||
|
return windows.DeviceIoControl(
|
||||||
|
windows.Handle(file.Fd()), windows.FSCTL_SET_SPARSE,
|
||||||
|
nil, 0,
|
||||||
|
nil, 0,
|
||||||
|
nil, nil,
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -12,13 +12,15 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/sync/errgroup"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"golang.org/x/sync/errgroup"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var blobUploadManager sync.Map
|
var blobUploadManager sync.Map
|
||||||
@@ -212,7 +214,7 @@ func (b *blobUpload) Run(ctx context.Context, opts *registryOptions) {
|
|||||||
func (b *blobUpload) uploadPart(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *registryOptions) error {
|
func (b *blobUpload) uploadPart(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *registryOptions) error {
|
||||||
headers := make(http.Header)
|
headers := make(http.Header)
|
||||||
headers.Set("Content-Type", "application/octet-stream")
|
headers.Set("Content-Type", "application/octet-stream")
|
||||||
headers.Set("Content-Length", fmt.Sprintf("%d", part.Size))
|
headers.Set("Content-Length", strconv.FormatInt(part.Size, 10))
|
||||||
|
|
||||||
if method == http.MethodPatch {
|
if method == http.MethodPatch {
|
||||||
headers.Set("X-Redirect-Uploads", "1")
|
headers.Set("X-Redirect-Uploads", "1")
|
||||||
|
|||||||
@@ -15,8 +15,9 @@ import (
|
|||||||
"text/template/parse"
|
"text/template/parse"
|
||||||
|
|
||||||
"github.com/agnivade/levenshtein"
|
"github.com/agnivade/levenshtein"
|
||||||
"github.com/ollama/ollama/api"
|
|
||||||
"golang.org/x/exp/maps"
|
"golang.org/x/exp/maps"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
)
|
)
|
||||||
|
|
||||||
//go:embed index.json
|
//go:embed index.json
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
"github.com/google/go-cmp/cmp"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -6,8 +6,10 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
const UnknownOllamaKeyErrMsg = "unknown ollama key"
|
const (
|
||||||
const InvalidModelNameErrMsg = "invalid model name"
|
UnknownOllamaKeyErrMsg = "unknown ollama key"
|
||||||
|
InvalidModelNameErrMsg = "invalid model name"
|
||||||
|
)
|
||||||
|
|
||||||
// TODO: This should have a structured response from the API
|
// TODO: This should have a structured response from the API
|
||||||
type UnknownOllamaKey struct {
|
type UnknownOllamaKey struct {
|
||||||
|
|||||||
@@ -258,7 +258,7 @@ func (n Name) IsValid() bool {
|
|||||||
// IsFullyQualified returns true if all parts of the name are present and
|
// IsFullyQualified returns true if all parts of the name are present and
|
||||||
// valid without the digest.
|
// valid without the digest.
|
||||||
func (n Name) IsFullyQualified() bool {
|
func (n Name) IsFullyQualified() bool {
|
||||||
var parts = []string{
|
parts := []string{
|
||||||
n.Host,
|
n.Host,
|
||||||
n.Namespace,
|
n.Namespace,
|
||||||
n.Model,
|
n.Model,
|
||||||
|
|||||||
Reference in New Issue
Block a user