Compare commits

...

14 Commits

Author SHA1 Message Date
Josh Yan
0e1ec461f9 import 2024-08-28 11:18:23 -07:00
Josh Yan
52ef79bb7d last lint (hopefully) 2024-08-28 11:12:39 -07:00
Josh Yan
800edd7884 lint again 2024-08-28 11:10:03 -07:00
Josh Yan
01b20fe6f1 lint 2024-08-28 11:07:43 -07:00
Josh Yan
340162fbc3 convert progress 2024-08-28 10:54:52 -07:00
Patrick Devine
6c1c1ad6a9 throw an error when encountering unsupport tensor sizes (#6538) 2024-08-27 17:54:04 -07:00
Daniel Hiltgen
93ea9240ae Move ollama executable out of bin dir (#6535) 2024-08-27 16:19:00 -07:00
Patrick Devine
d13c3daa0b add safetensors to the modelfile docs (#6532) 2024-08-27 14:46:47 -07:00
Patrick Devine
1713eddcd0 Fix import image width (#6528) 2024-08-27 14:19:47 -07:00
Daniel Hiltgen
4e1c4f6e0b Update manual instructions with discrete ROCm bundle (#6445) 2024-08-27 13:42:28 -07:00
Sean Khatiri
397cae7962 llm: fix typo in comment (#6530) 2024-08-27 13:28:29 -07:00
Patrick Devine
1c70a00f71 adjust image sizes 2024-08-27 11:15:25 -07:00
Patrick Devine
ac80010db8 update the import docs (#6104) 2024-08-26 19:57:26 -07:00
Jeffrey Morgan
47fa0839b9 server: clean up route names for consistency (#6524) 2024-08-26 19:36:11 -07:00
27 changed files with 422 additions and 144 deletions

View File

@@ -87,7 +87,7 @@ DialogFontSize=12
[Files]
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
Source: "..\ollama.exe"; DestDir: "{app}\bin"; Flags: ignoreversion 64bit
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
Source: "..\dist\windows-{#ARCH}\lib\ollama\runners\*"; DestDir: "{app}\lib\ollama\runners"; Flags: ignoreversion 64bit recursesubdirs
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
@@ -99,7 +99,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
[Run]
Filename: "{cmd}"; Parameters: "/C set PATH={app}\bin;%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
[UninstallRun]
; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
@@ -134,8 +134,8 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
[Registry]
Root: HKCU; Subkey: "Environment"; \
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}\bin"; \
Check: NeedsAddPath('{app}\bin')
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \
Check: NeedsAddPath('{app}')
[Code]

View File

@@ -124,6 +124,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
}
bars := make(map[string]*progress.Bar)
var convertSpin *progress.Spinner
fn := func(resp api.ProgressResponse) error {
if resp.Digest != "" {
spinner.Stop()
@@ -136,6 +137,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
}
bar.Set(resp.Completed)
} else if strings.Contains(resp.Status, "converting") {
spinner.Stop()
if convertSpin != nil {
convertSpin.SetMessage(resp.Status)
} else {
status = resp.Status
convertSpin = progress.NewSpinner(resp.Status)
p.Add("convert", convertSpin)
}
} else if status != resp.Status {
spinner.Stop()

View File

@@ -9,6 +9,7 @@ import (
"log/slog"
"strings"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/llm"
)
@@ -79,12 +80,12 @@ func (ModelParameters) specialTokenTypes() []string {
}
}
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
return llm.WriteGGUF(ws, kv, ts)
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor, fn func(api.ProgressResponse)) error {
return llm.WriteGGUF(ws, kv, ts, fn)
}
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
return llm.WriteGGUF(ws, kv, ts)
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor, fn func(api.ProgressResponse)) error {
return llm.WriteGGUF(ws, kv, ts, fn)
}
type ModelConverter interface {
@@ -99,7 +100,7 @@ type ModelConverter interface {
// specialTokenTypes returns any special token types the model uses
specialTokenTypes() []string
// writeFile writes the model to the provided io.WriteSeeker
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor, func(api.ProgressResponse)) error
}
type moreParser interface {
@@ -115,10 +116,10 @@ type AdapterConverter interface {
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor, func(api.ProgressResponse)) error
}
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV, fn func(api.ProgressResponse)) error {
bts, err := fs.ReadFile(fsys, "adapter_config.json")
if err != nil {
return err
@@ -153,14 +154,17 @@ func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
return err
}
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
fn(api.ProgressResponse{
Status: fmt.Sprintf("converting adapter 0%%"),
})
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts), fn)
}
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
func ConvertModel(fsys fs.FS, ws io.WriteSeeker, fn func(api.ProgressResponse)) error {
bts, err := fs.ReadFile(fsys, "config.json")
if err != nil {
return err
@@ -224,5 +228,8 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
return err
}
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
fn(api.ProgressResponse{
Status: fmt.Sprintf("converting model 0%%"),
})
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts), fn)
}

View File

@@ -19,6 +19,7 @@ import (
"golang.org/x/exp/maps"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/llm"
)
@@ -31,7 +32,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
}
defer f.Close()
if err := ConvertModel(fsys, f); err != nil {
if err := ConvertModel(fsys, f, func(api.ProgressResponse) {}); err != nil {
t.Fatal(err)
}
@@ -140,6 +141,107 @@ func TestConvertFull(t *testing.T) {
}
}
func TestConvertInvalidDatatype(t *testing.T) {
f, err := os.CreateTemp(t.TempDir(), "testmodel")
if err != nil {
t.Fatal(err)
}
defer f.Close()
tempDir := t.TempDir()
generateSafetensorTestData(t, tempDir)
err = ConvertModel(os.DirFS(tempDir), f, func(api.ProgressResponse) {})
if err == nil || err.Error() != "unsupported safetensors model" {
t.Errorf("expected error but didn't get one")
}
}
func generateSafetensorTestData(t *testing.T, tempDir string) {
type tensorData struct {
Offsets []int `json:"data_offsets"`
Type string `json:"dtype"`
Shape []int `json:"shape"`
}
offset := 4096 * 14336
td := map[string]*tensorData{}
td["model.layers.0.mlp.down_proj.weight"] = &tensorData{
Offsets: []int{0, offset},
Type: "I8",
Shape: []int{4096, 14336},
}
td["model.layers.0.mlp.down_proj.weight_format"] = &tensorData{
Offsets: []int{offset, offset},
Type: "U8",
Shape: []int{},
}
data, err := json.Marshal(td)
if err != nil {
t.Fatal(err)
}
var buf bytes.Buffer
l := int64(len(data))
err = binary.Write(&buf, binary.LittleEndian, l)
if err != nil {
t.Fatal(err)
}
_, err = buf.Write(data)
if err != nil {
t.Fatal(err)
}
fdata, err := os.Create(filepath.Join(tempDir, "model-00001-of-00001.safetensors"))
if err != nil {
t.Fatal(err)
}
defer fdata.Close()
_, err = fdata.Write(buf.Bytes())
if err != nil {
t.Fatal(err)
}
configData := `
{
"architectures": [
"LlamaForCausalLM"
]
}
`
f, err := os.Create(filepath.Join(tempDir, "config.json"))
if err != nil {
t.Fatal(err)
}
defer f.Close()
_, err = f.WriteString(configData)
if err != nil {
t.Fatal(err)
}
tokenizerData := `
{
}
`
f, err = os.Create(filepath.Join(tempDir, "tokenizer.json"))
if err != nil {
t.Fatal(err)
}
defer f.Close()
_, err = f.WriteString(tokenizerData)
if err != nil {
t.Fatal(err)
}
}
func TestConvertAdapter(t *testing.T) {
type AdapterCase struct {
Name string
@@ -186,7 +288,7 @@ func TestConvertAdapter(t *testing.T) {
tempDir := t.TempDir()
generateLoraTestData(t, tempDir)
if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil {
if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV, func(api.ProgressResponse) {}); err != nil {
t.Fatal(err)
}

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"io"
"io/fs"
@@ -50,6 +51,10 @@ func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]T
for _, key := range keys {
if value := headers[key]; value.Type != "" {
// bitsandbytes quantized models are unsupported
if len(value.Shape) == 0 {
return nil, errors.New("unsupported safetensors model")
}
ts = append(ts, safetensor{
fs: fsys,
path: p,

BIN
docs/images/ollama-keys.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 150 KiB

BIN
docs/images/signup.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

View File

@@ -1,44 +1,129 @@
# Import
# Importing a model
GGUF models and select Safetensors models can be imported directly into Ollama.
## Table of Contents
## Import GGUF
* [Importing a Safetensors adapter](#Importing-a-fine-tuned-adapter-from-Safetensors-weights)
* [Importing a Safetensors model](#Importing-a-model-from-Safetensors-weights)
* [Importing a GGUF file](#Importing-a-GGUF-based-model-or-adapter)
* [Sharing models on ollama.com](#Sharing-your-model-on-ollamacom)
A binary GGUF file can be imported directly into Ollama through a Modelfile.
## Importing a fine tuned adapter from Safetensors weights
First, create a `Modelfile` with a `FROM` command pointing at the base model you used for fine tuning, and an `ADAPTER` command which points to the directory with your Safetensors adapter:
```dockerfile
FROM /path/to/file.gguf
FROM <base model name>
ADAPTER /path/to/safetensors/adapter/directory
```
## Import Safetensors
Make sure that you use the same base model in the `FROM` command as you used to create the adapter otherwise you will get erratic results. Most frameworks use different quantization methods, so it's best to use non-quantized (i.e. non-QLoRA) adapters. If your adapter is in the same directory as your `Modelfile`, use `ADAPTER .` to specify the adapter path.
If the model being imported is one of these architectures, it can be imported directly into Ollama through a Modelfile:
Now run `ollama create` from the directory where the `Modelfile` was created:
- LlamaForCausalLM
- MistralForCausalLM
- MixtralForCausalLM
- GemmaForCausalLM
- Phi3ForCausalLM
```bash
ollama create my-model
```
Lastly, test the model:
```bash
ollama run my-model
```
Ollama supports importing adapters based on several different model architectures including:
* Llama (including Llama 2, Llama 3, and Llama 3.1);
* Mistral (including Mistral 1, Mistral 2, and Mixtral); and
* Gemma (including Gemma 1 and Gemma 2)
You can create the adapter using a fine tuning framework or tool which can output adapters in the Safetensors format, such as:
* Hugging Face [fine tuning framework] (https://huggingface.co/docs/transformers/en/training)
* [Unsloth](https://github.com/unslothai/unsloth)
* [MLX](https://github.com/ml-explore/mlx)
## Importing a model from Safetensors weights
First, create a `Modelfile` with a `FROM` command which points to the directory containing your Safetensors weights:
```dockerfile
FROM /path/to/safetensors/directory
```
For architectures not directly convertable by Ollama, see llama.cpp's [guide](https://github.com/ggerganov/llama.cpp/blob/master/README.md#prepare-and-quantize) on conversion. After conversion, see [Import GGUF](#import-gguf).
If you create the Modelfile in the same directory as the weights, you can use the command `FROM .`.
## Automatic Quantization
Now run the `ollama create` command from the directory where you created the `Modelfile`:
> [!NOTE]
> Automatic quantization requires v0.1.35 or higher.
```shell
ollama create my-model
```
Ollama is capable of quantizing FP16 or FP32 models to any of the supported quantizations with the `-q/--quantize` flag in `ollama create`.
Lastly, test the model:
```shell
ollama run my-model
```
Ollama supports importing models for several different architectures including:
* Llama (including Llama 2, Llama 3, and Llama 3.1);
* Mistral (including Mistral 1, Mistral 2, and Mixtral);
* Gemma (including Gemma 1 and Gemma 2); and
* Phi3
This includes importing foundation models as well as any fine tuned models which which have been _fused_ with a foundation model.
## Importing a GGUF based model or adapter
If you have a GGUF based model or adapter it is possible to import it into Ollama. You can obtain a GGUF model or adapter by:
* converting a Safetensors model with the `convert_hf_to_gguf.py` from Llama.cpp;
* converting a Safetensors adapter with the `convert_lora_to_gguf.py` from Llama.cpp; or
* downloading a model or adapter from a place such as HuggingFace
To import a GGUF model, create a `Modelfile` containg:
```dockerfile
FROM /path/to/file.gguf
```
For a GGUF adapter, create the `Modelfile` with:
```dockerfile
FROM <model name>
ADAPTER /path/to/file.gguf
```
When importing a GGUF adapter, it's important to use the same base model as the base model that the adapter was created with. You can use:
* a model from Ollama
* a GGUF file
* a Safetensors based model
Once you have created your `Modelfile`, use the `ollama create` command to build the model.
```shell
ollama create my-model
```
## Quantizing a Model
Quantizing a model allows you to run models faster and with less memory consumption but at reduced accuracy. This allows you to run a model on more modest hardware.
Ollama can quantize FP16 and FP32 based models into different quantization levels using the `-q/--quantize` flag with the `ollama create` command.
First, create a Modelfile with the FP16 or FP32 based model you wish to quantize.
```dockerfile
FROM /path/to/my/gemma/f16/model
```
Use `ollama create` to then create the quantized model.
```shell
$ ollama create -q Q4_K_M mymodel
$ ollama create --quantize q4_K_M mymodel
transferring model data
quantizing F16 model to Q4_K_M
creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd
@@ -49,42 +134,53 @@ success
### Supported Quantizations
- `Q4_0`
- `Q4_1`
- `Q5_0`
- `Q5_1`
- `Q8_0`
- `q4_0`
- `q4_1`
- `q5_0`
- `q5_1`
- `q8_0`
#### K-means Quantizations
- `Q3_K_S`
- `Q3_K_M`
- `Q3_K_L`
- `Q4_K_S`
- `Q4_K_M`
- `Q5_K_S`
- `Q5_K_M`
- `Q6_K`
- `q3_K_S`
- `q3_K_M`
- `q3_K_L`
- `q4_K_S`
- `q4_K_M`
- `q5_K_S`
- `q5_K_M`
- `q6_K`
## Template Detection
> [!NOTE]
> Template detection requires v0.1.42 or higher.
## Sharing your model on ollama.com
Ollama uses model metadata, specifically `tokenizer.chat_template`, to automatically create a template appropriate for the model you're importing.
You can share any model you have created by pushing it to [ollama.com](https://ollama.com) so that other users can try it out.
```dockerfile
FROM /path/to/my/gemma/model
```
First, use your browser to go to the [Ollama Sign-Up](https://ollama.com/signup) page. If you already have an account, you can skip this step.
<img src="images/signup.png" alt="Sign-Up" width="40%">
The `Username` field will be used as part of your model's name (e.g. `jmorganca/mymodel`), so make sure you are comfortable with the username that you have selected.
Now that you have created an account and are signed-in, go to the [Ollama Keys Settings](https://ollama.com/settings/keys) page.
Follow the directions on the page to determine where your Ollama Public Key is located.
<img src="images/ollama-keys.png" alt="Ollama Keys" width="80%">
Click on the `Add Ollama Public Key` button, and copy and paste the contents of your Ollama Public Key into the text field.
To push a model to [ollama.com](https://ollama.com), first make sure that it is named correctly with your username. You may have to use the `ollama cp` command to copy
your model to give it the correct name. Once you're happy with your model's name, use the `ollama push` command to push it to [ollama.com](https://ollama.com).
```shell
$ ollama create mymodel
transferring model data
using autodetected template gemma-instruct
creating new layer sha256:baa2a0edc27d19cc6b7537578a9a7ba1a4e3214dc185ed5ae43692b319af7b84
creating new layer sha256:ba66c3309914dbef07e5149a648fd1877f030d337a4f240d444ea335008943cb
writing manifest
success
ollama cp mymodel myuser/mymodel
ollama push myuser/mymodel
```
Once your model has been pushed, other users can pull and run it by using the command:
```shell
ollama run myuser/mymodel
```
Defining a template in the Modelfile will disable this feature which may be useful if you want to use a different template than the autodetected one.

View File

@@ -28,6 +28,11 @@ Download and extract the Linux package:
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
```
If you have an AMD GPU, also download and extract the ROCm package into the same location
```bash
curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tgz | sudo tar zx -C /usr
```
### Adding Ollama as a startup service (recommended)
Create a user for Ollama:

View File

@@ -11,8 +11,9 @@ A model file is the blueprint to create and share models with Ollama.
- [Examples](#examples)
- [Instructions](#instructions)
- [FROM (Required)](#from-required)
- [Build from llama3](#build-from-llama3)
- [Build from a bin file](#build-from-a-bin-file)
- [Build from llama3.1](#build-from-llama31)
- [Build from a Safetensors model](#build-from-a-safetensors-model)
- [Build from a GGUF file](#build-from-a-gguf-file)
- [PARAMETER](#parameter)
- [Valid Parameters and Values](#valid-parameters-and-values)
- [TEMPLATE](#template)
@@ -99,22 +100,39 @@ The `FROM` instruction defines the base model to use when creating a model.
FROM <model name>:<tag>
```
#### Build from llama3
#### Build from llama3.1
```modelfile
FROM llama3
FROM llama3.1
```
A list of available base models:
<https://github.com/ollama/ollama#model-library>
Additional models can be found at:
<https://ollama.com/library>
#### Build from a `bin` file
#### Build from a Safetensors model
```modelfile
FROM <model directory>
```
The model directory should contain the Safetensors weights for a supported architecture.
Currently supported model architectures:
* Llama (including Llama 2, Llama 3, and Llama 3.1)
* Mistral (including Mistral 1, Mistral 2, and Mixtral)
* Gemma (including Gemma 1 and Gemma 2)
* Phi3
#### Build from a GGUF file
```modelfile
FROM ./ollama-model.bin
```
This bin file location should be specified as an absolute path or relative to the `Modelfile` location.
The GGUF bin file location should be specified as an absolute path or relative to the `Modelfile` location.
### PARAMETER
@@ -174,7 +192,20 @@ SYSTEM """<system message>"""
### ADAPTER
The `ADAPTER` instruction is an optional instruction that specifies any LoRA adapter that should apply to the base model. The value of this instruction should be an absolute path or a path relative to the Modelfile and the file must be in a GGML file format. The adapter should be tuned from the base model otherwise the behaviour is undefined.
The `ADAPTER` instruction specifies a fine tuned LoRA adapter that should apply to the base model. The value of the adapter should be an absolute path or a path relative to the Modelfile. The base model should be specified with a `FROM` instruction. If the base model is not the same as the base model that the adapter was tuned from the behaviour will be erratic.
#### Safetensor adapter
```modelfile
ADAPTER <path to safetensor adapter>
```
Currently supported Safetensor adapters:
* Llama (including Llama 2, Llama 3, and Llama 3.1)
* Mistral (including Mistral 1, Mistral 2, and Mixtral)
* Gemma (including Gemma 1 and Gemma 2)
#### GGUF adapter
```modelfile
ADAPTER ./ollama-lora.bin

View File

@@ -190,7 +190,7 @@ func RunnersDir() (p string) {
}
var paths []string
for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), ".."), cwd} {
for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), LibRelativeToExe()), cwd} {
paths = append(paths,
root,
filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
@@ -282,3 +282,12 @@ func Values() map[string]string {
func Var(key string) string {
return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
}
// On windows, we keep the binary at the top directory, but
// other platforms use a "bin" directory, so this returns ".."
func LibRelativeToExe() string {
if runtime.GOOS == "windows" {
return "."
}
return ".."
}

View File

@@ -9,6 +9,8 @@ import (
"path/filepath"
"runtime"
"strings"
"github.com/ollama/ollama/envconfig"
)
// Determine if the given ROCm lib directory is usable by checking for existence of some glob patterns
@@ -54,7 +56,7 @@ func commonAMDValidateLibDir() (string, error) {
// Installer payload location if we're running the installed binary
exe, err := os.Executable()
if err == nil {
rocmTargetDir := filepath.Join(filepath.Dir(exe), "..", "lib", "ollama")
rocmTargetDir := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama")
if rocmLibUsable(rocmTargetDir) {
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
return rocmTargetDir, nil

View File

@@ -153,7 +153,7 @@ func AMDValidateLibDir() (string, error) {
// Installer payload (if we're running from some other location)
localAppData := os.Getenv("LOCALAPPDATA")
appDir := filepath.Join(localAppData, "Programs", "Ollama")
rocmTargetDir := filepath.Join(appDir, "..", "lib", "ollama")
rocmTargetDir := filepath.Join(appDir, envconfig.LibRelativeToExe(), "lib", "ollama")
if rocmLibUsable(rocmTargetDir) {
slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
return rocmTargetDir, nil

View File

@@ -653,7 +653,7 @@ func LibraryDir() string {
slog.Warn("failed to lookup working directory", "error", err)
}
// Scan for any of our dependeices, and pick first match
for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), ".."), cwd} {
for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe()), cwd} {
libDep := filepath.Join("lib", "ollama")
if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
return filepath.Join(root, libDep)

View File

@@ -12,6 +12,8 @@ import (
"strings"
"golang.org/x/exp/maps"
"github.com/ollama/ollama/api"
)
type containerGGUF struct {
@@ -506,7 +508,7 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
return binary.Write(w, binary.LittleEndian, s)
}
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor, fn func(api.ProgressResponse)) error {
if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
return err
}
@@ -552,7 +554,10 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
}
var alignment int64 = 32
for _, t := range ts {
for i, t := range ts {
fn(api.ProgressResponse{
Status: fmt.Sprintf("converting model %d%%", 100*(i+1)/len(ts)),
})
if err := ggufWriteTensor(ws, t, alignment); err != nil {
return err
}

View File

@@ -41,7 +41,7 @@ func TestEstimateGPULayers(t *testing.T) {
"tokenizer.ggml.tokens": []string{" "},
"tokenizer.ggml.scores": []float32{0},
"tokenizer.ggml.token_type": []int32{0},
}, tensors)
}, tensors, func(api.ProgressResponse) {})
require.NoError(t, err)
ggml, err := LoadModel(f.Name(), 0)

View File

@@ -409,7 +409,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
}
if err = s.cmd.Start(); err != nil {
// Detect permission denied and augment them essage about noexec
// Detect permission denied and augment the message about noexec
if errors.Is(err, os.ErrPermission) {
finalErr = fmt.Errorf("unable to start server %w. %s may have noexec set. Set OLLAMA_TMPDIR for server to a writable executable directory", err, dir)
continue

View File

@@ -122,8 +122,8 @@ function buildOllama() {
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\bin\ -Force
cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\bin\
New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\ -Force
cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\
}
function buildApp() {

View File

@@ -98,7 +98,6 @@ func parseFromZipFile(_ context.Context, command string, baseLayers []*layerGGML
}
defer os.RemoveAll(p)
fn(api.ProgressResponse{Status: "converting model"})
// TODO(mxyng): this should write directly into a layer
// e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
t, err := os.CreateTemp(p, "fp16")
@@ -123,13 +122,18 @@ func parseFromZipFile(_ context.Context, command string, baseLayers []*layerGGML
if baseModel == nil {
return nil, fmt.Errorf("no base model specified for the adapter")
}
if err := convert.ConvertAdapter(convert.NewZipReader(r, p, 32<<20), t, baseModel.KV()); err != nil {
fn(api.ProgressResponse{
Status: "converting adapter",
})
if err := convert.ConvertAdapter(convert.NewZipReader(r, p, 32<<20), t, baseModel.KV(), fn); err != nil {
return nil, err
}
layerType = "application/vnd.ollama.image.adapter"
case "model":
if err := convert.ConvertModel(convert.NewZipReader(r, p, 32<<20), t); err != nil {
fn(api.ProgressResponse{
Status: "converting model",
})
if err := convert.ConvertModel(convert.NewZipReader(r, p, 32<<20), t, fn); err != nil {
return nil, err
}
layerType = "application/vnd.ollama.image.model"

View File

@@ -145,7 +145,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
t.Fatalf("failed to open file: %v", err)
}
defer file.Close()
if err := llm.WriteGGUF(file, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil {
if err := llm.WriteGGUF(file, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}, func(api.ProgressResponse) {}); err != nil {
t.Fatalf("failed to write gguf: %v", err)
}
@@ -197,7 +197,7 @@ func TestParseLayerFromCopy(t *testing.T) {
defer file2.Close()
for range 5 {
if err := llm.WriteGGUF(file2, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil {
if err := llm.WriteGGUF(file2, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}, func(api.ProgressResponse) {}); err != nil {
t.Fatalf("failed to write gguf: %v", err)
}
}

View File

@@ -463,7 +463,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
c.JSON(http.StatusOK, resp)
}
func (s *Server) PullModelHandler(c *gin.Context) {
func (s *Server) PullHandler(c *gin.Context) {
var req api.PullRequest
err := c.ShouldBindJSON(&req)
switch {
@@ -513,7 +513,7 @@ func (s *Server) PullModelHandler(c *gin.Context) {
streamResponse(c, ch)
}
func (s *Server) PushModelHandler(c *gin.Context) {
func (s *Server) PushHandler(c *gin.Context) {
var req api.PushRequest
err := c.ShouldBindJSON(&req)
switch {
@@ -577,7 +577,7 @@ func checkNameExists(name model.Name) error {
return nil
}
func (s *Server) CreateModelHandler(c *gin.Context) {
func (s *Server) CreateHandler(c *gin.Context) {
var r api.CreateRequest
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
@@ -647,7 +647,7 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
streamResponse(c, ch)
}
func (s *Server) DeleteModelHandler(c *gin.Context) {
func (s *Server) DeleteHandler(c *gin.Context) {
var r api.DeleteRequest
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
@@ -680,7 +680,7 @@ func (s *Server) DeleteModelHandler(c *gin.Context) {
}
}
func (s *Server) ShowModelHandler(c *gin.Context) {
func (s *Server) ShowHandler(c *gin.Context) {
var req api.ShowRequest
err := c.ShouldBindJSON(&req)
switch {
@@ -829,7 +829,7 @@ func getKVData(digest string, verbose bool) (llm.KV, error) {
return kv, nil
}
func (s *Server) ListModelsHandler(c *gin.Context) {
func (s *Server) ListHandler(c *gin.Context) {
ms, err := Manifests()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
@@ -879,7 +879,7 @@ func (s *Server) ListModelsHandler(c *gin.Context) {
c.JSON(http.StatusOK, api.ListResponse{Models: models})
}
func (s *Server) CopyModelHandler(c *gin.Context) {
func (s *Server) CopyHandler(c *gin.Context) {
var r api.CopyRequest
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
@@ -1081,33 +1081,33 @@ func (s *Server) GenerateRoutes() http.Handler {
allowedHostsMiddleware(s.addr),
)
r.POST("/api/pull", s.PullModelHandler)
r.POST("/api/pull", s.PullHandler)
r.POST("/api/generate", s.GenerateHandler)
r.POST("/api/chat", s.ChatHandler)
r.POST("/api/embed", s.EmbedHandler)
r.POST("/api/embeddings", s.EmbeddingsHandler)
r.POST("/api/create", s.CreateModelHandler)
r.POST("/api/push", s.PushModelHandler)
r.POST("/api/copy", s.CopyModelHandler)
r.DELETE("/api/delete", s.DeleteModelHandler)
r.POST("/api/show", s.ShowModelHandler)
r.POST("/api/create", s.CreateHandler)
r.POST("/api/push", s.PushHandler)
r.POST("/api/copy", s.CopyHandler)
r.DELETE("/api/delete", s.DeleteHandler)
r.POST("/api/show", s.ShowHandler)
r.POST("/api/blobs/:digest", s.CreateBlobHandler)
r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
r.GET("/api/ps", s.ProcessHandler)
r.GET("/api/ps", s.PsHandler)
// Compatibility endpoints
r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler)
r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler)
r.POST("/v1/embeddings", openai.EmbeddingsMiddleware(), s.EmbedHandler)
r.GET("/v1/models", openai.ListMiddleware(), s.ListModelsHandler)
r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowModelHandler)
r.GET("/v1/models", openai.ListMiddleware(), s.ListHandler)
r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowHandler)
for _, method := range []string{http.MethodGet, http.MethodHead} {
r.Handle(method, "/", func(c *gin.Context) {
c.String(http.StatusOK, "Ollama is running")
})
r.Handle(method, "/api/tags", s.ListModelsHandler)
r.Handle(method, "/api/tags", s.ListHandler)
r.Handle(method, "/api/version", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"version": version.Version})
})
@@ -1269,7 +1269,7 @@ func streamResponse(c *gin.Context, ch chan any) {
})
}
func (s *Server) ProcessHandler(c *gin.Context) {
func (s *Server) PsHandler(c *gin.Context) {
models := []api.ProcessModelResponse{}
for _, v := range s.sched.loaded {

View File

@@ -30,7 +30,7 @@ func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string {
}
defer f.Close()
if err := llm.WriteGGUF(f, kv, ti); err != nil {
if err := llm.WriteGGUF(f, kv, ti, func(api.ProgressResponse) {}); err != nil {
t.Fatal(err)
}
@@ -93,7 +93,7 @@ func TestCreateFromBin(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -120,7 +120,7 @@ func TestCreateFromModel(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -134,7 +134,7 @@ func TestCreateFromModel(t *testing.T) {
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
})
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test2",
Modelfile: "FROM test",
Stream: &stream,
@@ -162,7 +162,7 @@ func TestCreateRemovesLayers(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -182,7 +182,7 @@ func TestCreateRemovesLayers(t *testing.T) {
filepath.Join(p, "blobs", "sha256-bc80b03733773e0728011b2f4adf34c458b400e1aad48cb28d61170f3a2ad2d6"),
})
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -210,7 +210,7 @@ func TestCreateUnsetsSystem(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nSYSTEM Say hi!", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -230,7 +230,7 @@ func TestCreateUnsetsSystem(t *testing.T) {
filepath.Join(p, "blobs", "sha256-f29e82a8284dbdf5910b1555580ff60b04238b8da9d5e51159ada67a4d0d5851"),
})
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nSYSTEM \"\"", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -267,7 +267,7 @@ func TestCreateMergeParameters(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nPARAMETER temperature 1\nPARAMETER top_k 10\nPARAMETER stop USER:\nPARAMETER stop ASSISTANT:", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -288,7 +288,7 @@ func TestCreateMergeParameters(t *testing.T) {
})
// in order to merge parameters, the second model must be created FROM the first
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test2",
Modelfile: "FROM test\nPARAMETER temperature 0.6\nPARAMETER top_p 0.7",
Stream: &stream,
@@ -326,7 +326,7 @@ func TestCreateMergeParameters(t *testing.T) {
}
// slices are replaced
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test2",
Modelfile: "FROM test\nPARAMETER temperature 0.6\nPARAMETER top_p 0.7\nPARAMETER stop <|endoftext|>",
Stream: &stream,
@@ -371,7 +371,7 @@ func TestCreateReplacesMessages(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nMESSAGE assistant \"What is my purpose?\"\nMESSAGE user \"You run tests.\"\nMESSAGE assistant \"Oh, my god.\"", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -391,7 +391,7 @@ func TestCreateReplacesMessages(t *testing.T) {
filepath.Join(p, "blobs", "sha256-e0e27d47045063ccb167ae852c51d49a98eab33fabaee4633fdddf97213e40b5"),
})
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test2",
Modelfile: "FROM test\nMESSAGE assistant \"You're a test, Harry.\"\nMESSAGE user \"I-I'm a what?\"\nMESSAGE assistant \"A test. And a thumping good one at that, I'd wager.\"",
Stream: &stream,
@@ -448,7 +448,7 @@ func TestCreateTemplateSystem(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}\nSYSTEM Say hello!\nTEMPLATE {{ .System }} {{ .Prompt }}\nSYSTEM Say bye!", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -488,7 +488,7 @@ func TestCreateTemplateSystem(t *testing.T) {
}
t.Run("incomplete template", func(t *testing.T) {
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -500,7 +500,7 @@ func TestCreateTemplateSystem(t *testing.T) {
})
t.Run("template with unclosed if", func(t *testing.T) {
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ if .Prompt }}", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -512,7 +512,7 @@ func TestCreateTemplateSystem(t *testing.T) {
})
t.Run("template with undefined function", func(t *testing.T) {
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ Prompt }}", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -531,7 +531,7 @@ func TestCreateLicenses(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p)
var s Server
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -579,7 +579,7 @@ func TestCreateDetectTemplate(t *testing.T) {
var s Server
t.Run("matched", func(t *testing.T) {
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
"tokenizer.chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
@@ -600,7 +600,7 @@ func TestCreateDetectTemplate(t *testing.T) {
})
t.Run("unmatched", func(t *testing.T) {
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream,

View File

@@ -22,7 +22,7 @@ func TestDelete(t *testing.T) {
var s Server
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
})
@@ -31,7 +31,7 @@ func TestDelete(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "test2",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)),
})
@@ -52,7 +52,7 @@ func TestDelete(t *testing.T) {
filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"),
})
w = createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test"})
w = createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "test"})
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code)
@@ -68,7 +68,7 @@ func TestDelete(t *testing.T) {
filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"),
})
w = createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test2"})
w = createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "test2"})
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code)
@@ -102,7 +102,7 @@ func TestDeleteDuplicateLayers(t *testing.T) {
t.Fatal(err)
}
w := createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test"})
w := createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "test"})
if w.Code != http.StatusOK {
t.Errorf("expected status code 200, actual %d", w.Code)
}

View File

@@ -84,7 +84,7 @@ func TestGenerateChat(t *testing.T) {
go s.sched.Run(context.TODO())
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "test",
Modelfile: fmt.Sprintf(`FROM %s
TEMPLATE """
@@ -144,7 +144,7 @@ func TestGenerateChat(t *testing.T) {
})
t.Run("missing capabilities chat", func(t *testing.T) {
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "bert",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
"general.architecture": "bert",
@@ -270,7 +270,7 @@ func TestGenerateChat(t *testing.T) {
checkChatResponse(t, w.Body, "test", "Hi!")
})
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
w = createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "test-system",
Modelfile: "FROM test\nSYSTEM You are a helpful assistant.",
})
@@ -382,7 +382,7 @@ func TestGenerate(t *testing.T) {
go s.sched.Run(context.TODO())
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "test",
Modelfile: fmt.Sprintf(`FROM %s
TEMPLATE """
@@ -442,7 +442,7 @@ func TestGenerate(t *testing.T) {
})
t.Run("missing capabilities generate", func(t *testing.T) {
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "bert",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
"general.architecture": "bert",
@@ -583,7 +583,7 @@ func TestGenerate(t *testing.T) {
checkGenerateResponse(t, w.Body, "test", "Hi!")
})
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
w = createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "test-system",
Modelfile: "FROM test\nSYSTEM You are a helpful assistant.",
})
@@ -652,7 +652,7 @@ func TestGenerate(t *testing.T) {
checkGenerateResponse(t, w.Body, "test-system", "Abra kadabra!")
})
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
w = createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "test-suffix",
Modelfile: `FROM test
TEMPLATE """{{- if .Suffix }}<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>

View File

@@ -31,13 +31,13 @@ func TestList(t *testing.T) {
var s Server
for _, n := range expectNames {
createRequest(t, s.CreateModelHandler, api.CreateRequest{
createRequest(t, s.CreateHandler, api.CreateRequest{
Name: n,
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
})
}
w := createRequest(t, s.ListModelsHandler, nil)
w := createRequest(t, s.ListHandler, nil)
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code)
}

View File

@@ -318,7 +318,7 @@ func TestCase(t *testing.T) {
var s Server
for _, tt := range cases {
t.Run(tt, func(t *testing.T) {
w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Name: tt,
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -334,7 +334,7 @@ func TestCase(t *testing.T) {
}
t.Run("create", func(t *testing.T) {
w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
w = createRequest(t, s.CreateHandler, api.CreateRequest{
Name: strings.ToUpper(tt),
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream,
@@ -350,7 +350,7 @@ func TestCase(t *testing.T) {
})
t.Run("pull", func(t *testing.T) {
w := createRequest(t, s.PullModelHandler, api.PullRequest{
w := createRequest(t, s.PullHandler, api.PullRequest{
Name: strings.ToUpper(tt),
Stream: &stream,
})
@@ -365,7 +365,7 @@ func TestCase(t *testing.T) {
})
t.Run("copy", func(t *testing.T) {
w := createRequest(t, s.CopyModelHandler, api.CopyRequest{
w := createRequest(t, s.CopyHandler, api.CopyRequest{
Source: tt,
Destination: strings.ToUpper(tt),
})
@@ -387,7 +387,7 @@ func TestShow(t *testing.T) {
var s Server
createRequest(t, s.CreateModelHandler, api.CreateRequest{
createRequest(t, s.CreateHandler, api.CreateRequest{
Name: "show-model",
Modelfile: fmt.Sprintf(
"FROM %s\nFROM %s",
@@ -396,7 +396,7 @@ func TestShow(t *testing.T) {
),
})
w := createRequest(t, s.ShowModelHandler, api.ShowRequest{
w := createRequest(t, s.ShowHandler, api.ShowRequest{
Name: "show-model",
})

View File

@@ -128,7 +128,8 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
}, []llm.Tensor{
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
}))
},
func(api.ProgressResponse) {}))
require.NoError(t, err)
fname := f.Name()