Compare commits

..

6 Commits

Author SHA1 Message Date
Patrick Devine
cb576a6b23 fix ref 2024-08-26 19:59:33 -07:00
Patrick Devine
15b7ff3a89 more comments 2024-08-26 19:56:45 -07:00
Patrick Devine
3ad243466b comments 2024-08-26 19:54:06 -07:00
Patrick Devine
a13e583c49 cleanup whitespace 2024-08-26 18:09:21 -07:00
Patrick Devine
3c1994d0ee small change 2024-08-26 18:07:59 -07:00
Patrick Devine
1b2da3829d update the import docs 2024-08-26 18:04:46 -07:00
23 changed files with 82 additions and 1466 deletions

1
.gitattributes vendored
View File

@@ -1,4 +1,3 @@
llm/ext_server/* linguist-vendored llm/ext_server/* linguist-vendored
llm/*.h linguist-vendored
* text=auto * text=auto
*.go text eol=lf *.go text eol=lf

View File

@@ -87,7 +87,7 @@ DialogFontSize=12
[Files] [Files]
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit Source: "..\ollama.exe"; DestDir: "{app}\bin"; Flags: ignoreversion 64bit
Source: "..\dist\windows-{#ARCH}\lib\ollama\runners\*"; DestDir: "{app}\lib\ollama\runners"; Flags: ignoreversion 64bit recursesubdirs Source: "..\dist\windows-{#ARCH}\lib\ollama\runners\*"; DestDir: "{app}\lib\ollama\runners"; Flags: ignoreversion 64bit recursesubdirs
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
@@ -99,7 +99,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico" Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
[Run] [Run]
Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden Filename: "{cmd}"; Parameters: "/C set PATH={app}\bin;%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
[UninstallRun] [UninstallRun]
; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden ; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
@@ -134,8 +134,8 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
[Registry] [Registry]
Root: HKCU; Subkey: "Environment"; \ Root: HKCU; Subkey: "Environment"; \
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \ ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}\bin"; \
Check: NeedsAddPath('{app}') Check: NeedsAddPath('{app}\bin')
[Code] [Code]

View File

@@ -124,7 +124,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
} }
bars := make(map[string]*progress.Bar) bars := make(map[string]*progress.Bar)
var quantizeSpin *progress.Spinner
fn := func(resp api.ProgressResponse) error { fn := func(resp api.ProgressResponse) error {
if resp.Digest != "" { if resp.Digest != "" {
spinner.Stop() spinner.Stop()
@@ -137,15 +136,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
} }
bar.Set(resp.Completed) bar.Set(resp.Completed)
} else if strings.Contains(resp.Status, "quantizing") {
spinner.Stop()
if quantizeSpin != nil {
quantizeSpin.SetMessage(resp.Status)
} else {
quantizeSpin = progress.NewSpinner(resp.Status)
p.Add("quantize", quantizeSpin)
}
} else if status != resp.Status { } else if status != resp.Status {
spinner.Stop() spinner.Stop()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 150 KiB

After

Width:  |  Height:  |  Size: 141 KiB

View File

@@ -158,7 +158,7 @@ You can share any model you have created by pushing it to [ollama.com](https://o
First, use your browser to go to the [Ollama Sign-Up](https://ollama.com/signup) page. If you already have an account, you can skip this step. First, use your browser to go to the [Ollama Sign-Up](https://ollama.com/signup) page. If you already have an account, you can skip this step.
<img src="images/signup.png" alt="Sign-Up" width="40%"> ![Sign-Up](images/signup.png)
The `Username` field will be used as part of your model's name (e.g. `jmorganca/mymodel`), so make sure you are comfortable with the username that you have selected. The `Username` field will be used as part of your model's name (e.g. `jmorganca/mymodel`), so make sure you are comfortable with the username that you have selected.
@@ -166,7 +166,7 @@ Now that you have created an account and are signed-in, go to the [Ollama Keys S
Follow the directions on the page to determine where your Ollama Public Key is located. Follow the directions on the page to determine where your Ollama Public Key is located.
<img src="images/ollama-keys.png" alt="Ollama Keys" width="80%"> ![Ollama Key](images/ollama-keys.png)
Click on the `Add Ollama Public Key` button, and copy and paste the contents of your Ollama Public Key into the text field. Click on the `Add Ollama Public Key` button, and copy and paste the contents of your Ollama Public Key into the text field.

View File

@@ -28,11 +28,6 @@ Download and extract the Linux package:
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
``` ```
If you have an AMD GPU, also download and extract the ROCm package into the same location
```bash
curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tgz | sudo tar zx -C /usr
```
### Adding Ollama as a startup service (recommended) ### Adding Ollama as a startup service (recommended)
Create a user for Ollama: Create a user for Ollama:

View File

@@ -11,9 +11,8 @@ A model file is the blueprint to create and share models with Ollama.
- [Examples](#examples) - [Examples](#examples)
- [Instructions](#instructions) - [Instructions](#instructions)
- [FROM (Required)](#from-required) - [FROM (Required)](#from-required)
- [Build from llama3.1](#build-from-llama31) - [Build from llama3](#build-from-llama3)
- [Build from a Safetensors model](#build-from-a-safetensors-model) - [Build from a bin file](#build-from-a-bin-file)
- [Build from a GGUF file](#build-from-a-gguf-file)
- [PARAMETER](#parameter) - [PARAMETER](#parameter)
- [Valid Parameters and Values](#valid-parameters-and-values) - [Valid Parameters and Values](#valid-parameters-and-values)
- [TEMPLATE](#template) - [TEMPLATE](#template)
@@ -100,39 +99,22 @@ The `FROM` instruction defines the base model to use when creating a model.
FROM <model name>:<tag> FROM <model name>:<tag>
``` ```
#### Build from llama3.1 #### Build from llama3
```modelfile ```modelfile
FROM llama3.1 FROM llama3
``` ```
A list of available base models: A list of available base models:
<https://github.com/ollama/ollama#model-library> <https://github.com/ollama/ollama#model-library>
Additional models can be found at:
<https://ollama.com/library>
#### Build from a Safetensors model #### Build from a `bin` file
```modelfile
FROM <model directory>
```
The model directory should contain the Safetensors weights for a supported architecture.
Currently supported model architectures:
* Llama (including Llama 2, Llama 3, and Llama 3.1)
* Mistral (including Mistral 1, Mistral 2, and Mixtral)
* Gemma (including Gemma 1 and Gemma 2)
* Phi3
#### Build from a GGUF file
```modelfile ```modelfile
FROM ./ollama-model.bin FROM ./ollama-model.bin
``` ```
The GGUF bin file location should be specified as an absolute path or relative to the `Modelfile` location. This bin file location should be specified as an absolute path or relative to the `Modelfile` location.
### PARAMETER ### PARAMETER
@@ -192,20 +174,7 @@ SYSTEM """<system message>"""
### ADAPTER ### ADAPTER
The `ADAPTER` instruction specifies a fine tuned LoRA adapter that should apply to the base model. The value of the adapter should be an absolute path or a path relative to the Modelfile. The base model should be specified with a `FROM` instruction. If the base model is not the same as the base model that the adapter was tuned from the behaviour will be erratic. The `ADAPTER` instruction is an optional instruction that specifies any LoRA adapter that should apply to the base model. The value of this instruction should be an absolute path or a path relative to the Modelfile and the file must be in a GGML file format. The adapter should be tuned from the base model otherwise the behaviour is undefined.
#### Safetensor adapter
```modelfile
ADAPTER <path to safetensor adapter>
```
Currently supported Safetensor adapters:
* Llama (including Llama 2, Llama 3, and Llama 3.1)
* Mistral (including Mistral 1, Mistral 2, and Mixtral)
* Gemma (including Gemma 1 and Gemma 2)
#### GGUF adapter
```modelfile ```modelfile
ADAPTER ./ollama-lora.bin ADAPTER ./ollama-lora.bin

View File

@@ -190,7 +190,7 @@ func RunnersDir() (p string) {
} }
var paths []string var paths []string
for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), LibRelativeToExe()), cwd} { for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), ".."), cwd} {
paths = append(paths, paths = append(paths,
root, root,
filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH), filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
@@ -282,12 +282,3 @@ func Values() map[string]string {
func Var(key string) string { func Var(key string) string {
return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'") return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
} }
// On windows, we keep the binary at the top directory, but
// other platforms use a "bin" directory, so this returns ".."
func LibRelativeToExe() string {
if runtime.GOOS == "windows" {
return "."
}
return ".."
}

View File

@@ -9,8 +9,6 @@ import (
"path/filepath" "path/filepath"
"runtime" "runtime"
"strings" "strings"
"github.com/ollama/ollama/envconfig"
) )
// Determine if the given ROCm lib directory is usable by checking for existence of some glob patterns // Determine if the given ROCm lib directory is usable by checking for existence of some glob patterns
@@ -56,7 +54,7 @@ func commonAMDValidateLibDir() (string, error) {
// Installer payload location if we're running the installed binary // Installer payload location if we're running the installed binary
exe, err := os.Executable() exe, err := os.Executable()
if err == nil { if err == nil {
rocmTargetDir := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama") rocmTargetDir := filepath.Join(filepath.Dir(exe), "..", "lib", "ollama")
if rocmLibUsable(rocmTargetDir) { if rocmLibUsable(rocmTargetDir) {
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir) slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
return rocmTargetDir, nil return rocmTargetDir, nil

View File

@@ -153,7 +153,7 @@ func AMDValidateLibDir() (string, error) {
// Installer payload (if we're running from some other location) // Installer payload (if we're running from some other location)
localAppData := os.Getenv("LOCALAPPDATA") localAppData := os.Getenv("LOCALAPPDATA")
appDir := filepath.Join(localAppData, "Programs", "Ollama") appDir := filepath.Join(localAppData, "Programs", "Ollama")
rocmTargetDir := filepath.Join(appDir, envconfig.LibRelativeToExe(), "lib", "ollama") rocmTargetDir := filepath.Join(appDir, "..", "lib", "ollama")
if rocmLibUsable(rocmTargetDir) { if rocmLibUsable(rocmTargetDir) {
slog.Debug("detected ollama installed ROCm at " + rocmTargetDir) slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
return rocmTargetDir, nil return rocmTargetDir, nil

View File

@@ -653,7 +653,7 @@ func LibraryDir() string {
slog.Warn("failed to lookup working directory", "error", err) slog.Warn("failed to lookup working directory", "error", err)
} }
// Scan for any of our dependeices, and pick first match // Scan for any of our dependeices, and pick first match
for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe()), cwd} { for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), ".."), cwd} {
libDep := filepath.Join("lib", "ollama") libDep := filepath.Join("lib", "ollama")
if _, err := os.Stat(filepath.Join(root, libDep)); err == nil { if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
return filepath.Join(root, libDep) return filepath.Join(root, libDep)

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
package llm package llm
// #cgo CPPFLAGS: -Illama.cpp/ggml/include // #cgo CFLAGS: -Illama.cpp -Illama.cpp/include -Illama.cpp/ggml/include
// #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread // #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread
// #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal // #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal
// #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src // #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src
@@ -9,24 +9,12 @@ package llm
// #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/linux/x86_64_static -L${SRCDIR}/build/linux/x86_64_static/src -L${SRCDIR}/build/linux/x86_64_static/ggml/src // #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/linux/x86_64_static -L${SRCDIR}/build/linux/x86_64_static/src -L${SRCDIR}/build/linux/x86_64_static/ggml/src
// #cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/linux/arm64_static -L${SRCDIR}/build/linux/arm64_static/src -L${SRCDIR}/build/linux/arm64_static/ggml/src // #cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/linux/arm64_static -L${SRCDIR}/build/linux/arm64_static/src -L${SRCDIR}/build/linux/arm64_static/ggml/src
// #include <stdlib.h> // #include <stdlib.h>
// #include <stdatomic.h>
// #include "llama.h" // #include "llama.h"
// bool update_quantize_progress(float progress, void* data) {
// atomic_int* atomicData = (atomic_int*)data;
// int intProgress = *((int*)&progress);
// atomic_store(atomicData, intProgress);
// return true;
// }
import "C" import "C"
import ( import (
"errors" "errors"
"fmt"
"sync/atomic"
"time"
"unsafe" "unsafe"
"github.com/ollama/ollama/api"
) )
// SystemInfo is an unused example of calling llama.cpp functions using CGo // SystemInfo is an unused example of calling llama.cpp functions using CGo
@@ -34,49 +22,17 @@ func SystemInfo() string {
return C.GoString(C.llama_print_system_info()) return C.GoString(C.llama_print_system_info())
} }
func Quantize(infile, outfile string, ftype fileType, fn func(resp api.ProgressResponse), tensorCount int) error { func Quantize(infile, outfile string, ftype fileType) error {
cinfile := C.CString(infile) cinfile := C.CString(infile)
defer C.free(unsafe.Pointer(cinfile)) defer C.free(unsafe.Pointer(cinfile))
coutfile := C.CString(outfile) coutfile := C.CString(outfile)
defer C.free(unsafe.Pointer(coutfile)) defer C.free(unsafe.Pointer(coutfile))
params := C.llama_model_quantize_default_params() params := C.llama_model_quantize_default_params()
params.nthread = -1 params.nthread = -1
params.ftype = ftype.Value() params.ftype = ftype.Value()
// Initialize "global" to store progress
store := (*int32)(C.malloc(C.sizeof_int))
defer C.free(unsafe.Pointer(store))
// Initialize store value, e.g., setting initial progress to 0
atomic.StoreInt32(store, 0)
params.quantize_callback_data = unsafe.Pointer(store)
params.quantize_callback = (C.llama_progress_callback)(C.update_quantize_progress)
ticker := time.NewTicker(30 * time.Millisecond)
done := make(chan struct{})
defer close(done)
go func() {
defer ticker.Stop()
for {
select {
case <-ticker.C:
progressInt := atomic.LoadInt32(store)
progress := *(*float32)(unsafe.Pointer(&progressInt))
fn(api.ProgressResponse{
Status: fmt.Sprintf("quantizing model %d%%", 100*int(progress)/tensorCount),
})
case <-done:
fn(api.ProgressResponse{
Status: fmt.Sprintf("quantizing model 100%%"),
})
return
}
}
}()
if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 { if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
return errors.New("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version") return errors.New("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
} }

View File

@@ -1,52 +0,0 @@
From ed941590d59fc07b1ad21d6aa458588e47d1e446 Mon Sep 17 00:00:00 2001
From: Josh Yan <jyan00017@gmail.com>
Date: Wed, 10 Jul 2024 13:39:39 -0700
Subject: [PATCH] quantize progress
---
include/llama.h | 3 +++
src/llama.cpp | 8 ++++++++
2 files changed, 11 insertions(+)
diff --git a/include/llama.h b/include/llama.h
index bb4b05ba..613db68e 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -349,6 +349,9 @@ extern "C" {
bool keep_split; // quantize to the same number of shards
void * imatrix; // pointer to importance matrix data
void * kv_overrides; // pointer to vector containing overrides
+
+ llama_progress_callback quantize_callback; // callback to report quantization progress
+ void * quantize_callback_data; // user data for the callback
} llama_model_quantize_params;
// grammar types
diff --git a/src/llama.cpp b/src/llama.cpp
index 2b9ace28..ac640c02 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -18252,6 +18252,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
const auto tn = LLM_TN(model.arch);
new_ofstream(0);
for (int i = 0; i < ml.n_tensors; ++i) {
+ if (params->quantize_callback){
+ if (!params->quantize_callback(i, params->quantize_callback_data)) {
+ return;
+ }
+ }
+
auto weight = ml.get_weight(i);
struct ggml_tensor * tensor = weight->tensor;
if (weight->idx != cur_split && params->keep_split) {
@@ -18789,6 +18795,8 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
/*.keep_split =*/ false,
/*.imatrix =*/ nullptr,
/*.kv_overrides =*/ nullptr,
+ /*.quantize_callback =*/ nullptr,
+ /*.quantize_callback_data =*/ nullptr,
};
return result;
--
2.39.3 (Apple Git-146)

View File

@@ -122,8 +122,8 @@ function buildOllama() {
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe /csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
} }
New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\ -Force New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\bin\ -Force
cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\ cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\bin\
} }
function buildApp() { function buildApp() {

View File

@@ -435,14 +435,11 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
return err return err
} }
tensorCount := len(baseLayer.GGML.Tensors().Items)
ft := baseLayer.GGML.KV().FileType() ft := baseLayer.GGML.KV().FileType()
if !slices.Contains([]string{"F16", "F32"}, ft.String()) { if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
return errors.New("quantization is only supported for F16 and F32 models") return errors.New("quantization is only supported for F16 and F32 models")
} else if want != ft { } else if want != ft {
fn(api.ProgressResponse{ fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantization)})
Status: "quantizing model tensors",
})
blob, err := GetBlobsPath(baseLayer.Digest) blob, err := GetBlobsPath(baseLayer.Digest)
if err != nil { if err != nil {
@@ -456,7 +453,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
defer temp.Close() defer temp.Close()
defer os.Remove(temp.Name()) defer os.Remove(temp.Name())
if err := llm.Quantize(blob, temp.Name(), want, fn, tensorCount); err != nil { if err := llm.Quantize(blob, temp.Name(), want); err != nil {
return err return err
} }

View File

@@ -463,7 +463,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
c.JSON(http.StatusOK, resp) c.JSON(http.StatusOK, resp)
} }
func (s *Server) PullHandler(c *gin.Context) { func (s *Server) PullModelHandler(c *gin.Context) {
var req api.PullRequest var req api.PullRequest
err := c.ShouldBindJSON(&req) err := c.ShouldBindJSON(&req)
switch { switch {
@@ -513,7 +513,7 @@ func (s *Server) PullHandler(c *gin.Context) {
streamResponse(c, ch) streamResponse(c, ch)
} }
func (s *Server) PushHandler(c *gin.Context) { func (s *Server) PushModelHandler(c *gin.Context) {
var req api.PushRequest var req api.PushRequest
err := c.ShouldBindJSON(&req) err := c.ShouldBindJSON(&req)
switch { switch {
@@ -577,7 +577,7 @@ func checkNameExists(name model.Name) error {
return nil return nil
} }
func (s *Server) CreateHandler(c *gin.Context) { func (s *Server) CreateModelHandler(c *gin.Context) {
var r api.CreateRequest var r api.CreateRequest
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) { if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"}) c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
@@ -647,7 +647,7 @@ func (s *Server) CreateHandler(c *gin.Context) {
streamResponse(c, ch) streamResponse(c, ch)
} }
func (s *Server) DeleteHandler(c *gin.Context) { func (s *Server) DeleteModelHandler(c *gin.Context) {
var r api.DeleteRequest var r api.DeleteRequest
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) { if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"}) c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
@@ -680,7 +680,7 @@ func (s *Server) DeleteHandler(c *gin.Context) {
} }
} }
func (s *Server) ShowHandler(c *gin.Context) { func (s *Server) ShowModelHandler(c *gin.Context) {
var req api.ShowRequest var req api.ShowRequest
err := c.ShouldBindJSON(&req) err := c.ShouldBindJSON(&req)
switch { switch {
@@ -829,7 +829,7 @@ func getKVData(digest string, verbose bool) (llm.KV, error) {
return kv, nil return kv, nil
} }
func (s *Server) ListHandler(c *gin.Context) { func (s *Server) ListModelsHandler(c *gin.Context) {
ms, err := Manifests() ms, err := Manifests()
if err != nil { if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
@@ -879,7 +879,7 @@ func (s *Server) ListHandler(c *gin.Context) {
c.JSON(http.StatusOK, api.ListResponse{Models: models}) c.JSON(http.StatusOK, api.ListResponse{Models: models})
} }
func (s *Server) CopyHandler(c *gin.Context) { func (s *Server) CopyModelHandler(c *gin.Context) {
var r api.CopyRequest var r api.CopyRequest
if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) { if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"}) c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
@@ -1081,33 +1081,33 @@ func (s *Server) GenerateRoutes() http.Handler {
allowedHostsMiddleware(s.addr), allowedHostsMiddleware(s.addr),
) )
r.POST("/api/pull", s.PullHandler) r.POST("/api/pull", s.PullModelHandler)
r.POST("/api/generate", s.GenerateHandler) r.POST("/api/generate", s.GenerateHandler)
r.POST("/api/chat", s.ChatHandler) r.POST("/api/chat", s.ChatHandler)
r.POST("/api/embed", s.EmbedHandler) r.POST("/api/embed", s.EmbedHandler)
r.POST("/api/embeddings", s.EmbeddingsHandler) r.POST("/api/embeddings", s.EmbeddingsHandler)
r.POST("/api/create", s.CreateHandler) r.POST("/api/create", s.CreateModelHandler)
r.POST("/api/push", s.PushHandler) r.POST("/api/push", s.PushModelHandler)
r.POST("/api/copy", s.CopyHandler) r.POST("/api/copy", s.CopyModelHandler)
r.DELETE("/api/delete", s.DeleteHandler) r.DELETE("/api/delete", s.DeleteModelHandler)
r.POST("/api/show", s.ShowHandler) r.POST("/api/show", s.ShowModelHandler)
r.POST("/api/blobs/:digest", s.CreateBlobHandler) r.POST("/api/blobs/:digest", s.CreateBlobHandler)
r.HEAD("/api/blobs/:digest", s.HeadBlobHandler) r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
r.GET("/api/ps", s.PsHandler) r.GET("/api/ps", s.ProcessHandler)
// Compatibility endpoints // Compatibility endpoints
r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler) r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler)
r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler) r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler)
r.POST("/v1/embeddings", openai.EmbeddingsMiddleware(), s.EmbedHandler) r.POST("/v1/embeddings", openai.EmbeddingsMiddleware(), s.EmbedHandler)
r.GET("/v1/models", openai.ListMiddleware(), s.ListHandler) r.GET("/v1/models", openai.ListMiddleware(), s.ListModelsHandler)
r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowHandler) r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowModelHandler)
for _, method := range []string{http.MethodGet, http.MethodHead} { for _, method := range []string{http.MethodGet, http.MethodHead} {
r.Handle(method, "/", func(c *gin.Context) { r.Handle(method, "/", func(c *gin.Context) {
c.String(http.StatusOK, "Ollama is running") c.String(http.StatusOK, "Ollama is running")
}) })
r.Handle(method, "/api/tags", s.ListHandler) r.Handle(method, "/api/tags", s.ListModelsHandler)
r.Handle(method, "/api/version", func(c *gin.Context) { r.Handle(method, "/api/version", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"version": version.Version}) c.JSON(http.StatusOK, gin.H{"version": version.Version})
}) })
@@ -1269,7 +1269,7 @@ func streamResponse(c *gin.Context, ch chan any) {
}) })
} }
func (s *Server) PsHandler(c *gin.Context) { func (s *Server) ProcessHandler(c *gin.Context) {
models := []api.ProcessModelResponse{} models := []api.ProcessModelResponse{}
for _, v := range s.sched.loaded { for _, v := range s.sched.loaded {

View File

@@ -93,7 +93,7 @@ func TestCreateFromBin(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p) t.Setenv("OLLAMA_MODELS", p)
var s Server var s Server
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -120,7 +120,7 @@ func TestCreateFromModel(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p) t.Setenv("OLLAMA_MODELS", p)
var s Server var s Server
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -134,7 +134,7 @@ func TestCreateFromModel(t *testing.T) {
filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"), filepath.Join(p, "manifests", "registry.ollama.ai", "library", "test", "latest"),
}) })
w = createRequest(t, s.CreateHandler, api.CreateRequest{ w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test2", Name: "test2",
Modelfile: "FROM test", Modelfile: "FROM test",
Stream: &stream, Stream: &stream,
@@ -162,7 +162,7 @@ func TestCreateRemovesLayers(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p) t.Setenv("OLLAMA_MODELS", p)
var s Server var s Server
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -182,7 +182,7 @@ func TestCreateRemovesLayers(t *testing.T) {
filepath.Join(p, "blobs", "sha256-bc80b03733773e0728011b2f4adf34c458b400e1aad48cb28d61170f3a2ad2d6"), filepath.Join(p, "blobs", "sha256-bc80b03733773e0728011b2f4adf34c458b400e1aad48cb28d61170f3a2ad2d6"),
}) })
w = createRequest(t, s.CreateHandler, api.CreateRequest{ w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -210,7 +210,7 @@ func TestCreateUnsetsSystem(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p) t.Setenv("OLLAMA_MODELS", p)
var s Server var s Server
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nSYSTEM Say hi!", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s\nSYSTEM Say hi!", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -230,7 +230,7 @@ func TestCreateUnsetsSystem(t *testing.T) {
filepath.Join(p, "blobs", "sha256-f29e82a8284dbdf5910b1555580ff60b04238b8da9d5e51159ada67a4d0d5851"), filepath.Join(p, "blobs", "sha256-f29e82a8284dbdf5910b1555580ff60b04238b8da9d5e51159ada67a4d0d5851"),
}) })
w = createRequest(t, s.CreateHandler, api.CreateRequest{ w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nSYSTEM \"\"", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s\nSYSTEM \"\"", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -267,7 +267,7 @@ func TestCreateMergeParameters(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p) t.Setenv("OLLAMA_MODELS", p)
var s Server var s Server
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nPARAMETER temperature 1\nPARAMETER top_k 10\nPARAMETER stop USER:\nPARAMETER stop ASSISTANT:", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s\nPARAMETER temperature 1\nPARAMETER top_k 10\nPARAMETER stop USER:\nPARAMETER stop ASSISTANT:", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -288,7 +288,7 @@ func TestCreateMergeParameters(t *testing.T) {
}) })
// in order to merge parameters, the second model must be created FROM the first // in order to merge parameters, the second model must be created FROM the first
w = createRequest(t, s.CreateHandler, api.CreateRequest{ w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test2", Name: "test2",
Modelfile: "FROM test\nPARAMETER temperature 0.6\nPARAMETER top_p 0.7", Modelfile: "FROM test\nPARAMETER temperature 0.6\nPARAMETER top_p 0.7",
Stream: &stream, Stream: &stream,
@@ -326,7 +326,7 @@ func TestCreateMergeParameters(t *testing.T) {
} }
// slices are replaced // slices are replaced
w = createRequest(t, s.CreateHandler, api.CreateRequest{ w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test2", Name: "test2",
Modelfile: "FROM test\nPARAMETER temperature 0.6\nPARAMETER top_p 0.7\nPARAMETER stop <|endoftext|>", Modelfile: "FROM test\nPARAMETER temperature 0.6\nPARAMETER top_p 0.7\nPARAMETER stop <|endoftext|>",
Stream: &stream, Stream: &stream,
@@ -371,7 +371,7 @@ func TestCreateReplacesMessages(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p) t.Setenv("OLLAMA_MODELS", p)
var s Server var s Server
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nMESSAGE assistant \"What is my purpose?\"\nMESSAGE user \"You run tests.\"\nMESSAGE assistant \"Oh, my god.\"", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s\nMESSAGE assistant \"What is my purpose?\"\nMESSAGE user \"You run tests.\"\nMESSAGE assistant \"Oh, my god.\"", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -391,7 +391,7 @@ func TestCreateReplacesMessages(t *testing.T) {
filepath.Join(p, "blobs", "sha256-e0e27d47045063ccb167ae852c51d49a98eab33fabaee4633fdddf97213e40b5"), filepath.Join(p, "blobs", "sha256-e0e27d47045063ccb167ae852c51d49a98eab33fabaee4633fdddf97213e40b5"),
}) })
w = createRequest(t, s.CreateHandler, api.CreateRequest{ w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test2", Name: "test2",
Modelfile: "FROM test\nMESSAGE assistant \"You're a test, Harry.\"\nMESSAGE user \"I-I'm a what?\"\nMESSAGE assistant \"A test. And a thumping good one at that, I'd wager.\"", Modelfile: "FROM test\nMESSAGE assistant \"You're a test, Harry.\"\nMESSAGE user \"I-I'm a what?\"\nMESSAGE assistant \"A test. And a thumping good one at that, I'd wager.\"",
Stream: &stream, Stream: &stream,
@@ -448,7 +448,7 @@ func TestCreateTemplateSystem(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p) t.Setenv("OLLAMA_MODELS", p)
var s Server var s Server
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}\nSYSTEM Say hello!\nTEMPLATE {{ .System }} {{ .Prompt }}\nSYSTEM Say bye!", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt }}\nSYSTEM Say hello!\nTEMPLATE {{ .System }} {{ .Prompt }}\nSYSTEM Say bye!", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -488,7 +488,7 @@ func TestCreateTemplateSystem(t *testing.T) {
} }
t.Run("incomplete template", func(t *testing.T) { t.Run("incomplete template", func(t *testing.T) {
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .Prompt", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -500,7 +500,7 @@ func TestCreateTemplateSystem(t *testing.T) {
}) })
t.Run("template with unclosed if", func(t *testing.T) { t.Run("template with unclosed if", func(t *testing.T) {
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ if .Prompt }}", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ if .Prompt }}", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -512,7 +512,7 @@ func TestCreateTemplateSystem(t *testing.T) {
}) })
t.Run("template with undefined function", func(t *testing.T) { t.Run("template with undefined function", func(t *testing.T) {
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ Prompt }}", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ Prompt }}", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -531,7 +531,7 @@ func TestCreateLicenses(t *testing.T) {
t.Setenv("OLLAMA_MODELS", p) t.Setenv("OLLAMA_MODELS", p)
var s Server var s Server
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s\nLICENSE MIT\nLICENSE Apache-2.0", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -579,7 +579,7 @@ func TestCreateDetectTemplate(t *testing.T) {
var s Server var s Server
t.Run("matched", func(t *testing.T) { t.Run("matched", func(t *testing.T) {
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
"tokenizer.chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}", "tokenizer.chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
@@ -600,7 +600,7 @@ func TestCreateDetectTemplate(t *testing.T) {
}) })
t.Run("unmatched", func(t *testing.T) { t.Run("unmatched", func(t *testing.T) {
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,

View File

@@ -22,7 +22,7 @@ func TestDelete(t *testing.T) {
var s Server var s Server
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test", Name: "test",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
}) })
@@ -31,7 +31,7 @@ func TestDelete(t *testing.T) {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }
w = createRequest(t, s.CreateHandler, api.CreateRequest{ w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "test2", Name: "test2",
Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s\nTEMPLATE {{ .System }} {{ .Prompt }}", createBinFile(t, nil, nil)),
}) })
@@ -52,7 +52,7 @@ func TestDelete(t *testing.T) {
filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"), filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"),
}) })
w = createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "test"}) w = createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test"})
if w.Code != http.StatusOK { if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
@@ -68,7 +68,7 @@ func TestDelete(t *testing.T) {
filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"), filepath.Join(p, "blobs", "sha256-fe7ac77b725cda2ccad03f88a880ecdfd7a33192d6cae08fce2c0ee1455991ed"),
}) })
w = createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "test2"}) w = createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test2"})
if w.Code != http.StatusOK { if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
@@ -102,7 +102,7 @@ func TestDeleteDuplicateLayers(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
w := createRequest(t, s.DeleteHandler, api.DeleteRequest{Name: "test"}) w := createRequest(t, s.DeleteModelHandler, api.DeleteRequest{Name: "test"})
if w.Code != http.StatusOK { if w.Code != http.StatusOK {
t.Errorf("expected status code 200, actual %d", w.Code) t.Errorf("expected status code 200, actual %d", w.Code)
} }

View File

@@ -84,7 +84,7 @@ func TestGenerateChat(t *testing.T) {
go s.sched.Run(context.TODO()) go s.sched.Run(context.TODO())
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Model: "test", Model: "test",
Modelfile: fmt.Sprintf(`FROM %s Modelfile: fmt.Sprintf(`FROM %s
TEMPLATE """ TEMPLATE """
@@ -144,7 +144,7 @@ func TestGenerateChat(t *testing.T) {
}) })
t.Run("missing capabilities chat", func(t *testing.T) { t.Run("missing capabilities chat", func(t *testing.T) {
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Model: "bert", Model: "bert",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
"general.architecture": "bert", "general.architecture": "bert",
@@ -270,7 +270,7 @@ func TestGenerateChat(t *testing.T) {
checkChatResponse(t, w.Body, "test", "Hi!") checkChatResponse(t, w.Body, "test", "Hi!")
}) })
w = createRequest(t, s.CreateHandler, api.CreateRequest{ w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
Model: "test-system", Model: "test-system",
Modelfile: "FROM test\nSYSTEM You are a helpful assistant.", Modelfile: "FROM test\nSYSTEM You are a helpful assistant.",
}) })
@@ -382,7 +382,7 @@ func TestGenerate(t *testing.T) {
go s.sched.Run(context.TODO()) go s.sched.Run(context.TODO())
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Model: "test", Model: "test",
Modelfile: fmt.Sprintf(`FROM %s Modelfile: fmt.Sprintf(`FROM %s
TEMPLATE """ TEMPLATE """
@@ -442,7 +442,7 @@ func TestGenerate(t *testing.T) {
}) })
t.Run("missing capabilities generate", func(t *testing.T) { t.Run("missing capabilities generate", func(t *testing.T) {
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Model: "bert", Model: "bert",
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
"general.architecture": "bert", "general.architecture": "bert",
@@ -583,7 +583,7 @@ func TestGenerate(t *testing.T) {
checkGenerateResponse(t, w.Body, "test", "Hi!") checkGenerateResponse(t, w.Body, "test", "Hi!")
}) })
w = createRequest(t, s.CreateHandler, api.CreateRequest{ w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
Model: "test-system", Model: "test-system",
Modelfile: "FROM test\nSYSTEM You are a helpful assistant.", Modelfile: "FROM test\nSYSTEM You are a helpful assistant.",
}) })
@@ -652,7 +652,7 @@ func TestGenerate(t *testing.T) {
checkGenerateResponse(t, w.Body, "test-system", "Abra kadabra!") checkGenerateResponse(t, w.Body, "test-system", "Abra kadabra!")
}) })
w = createRequest(t, s.CreateHandler, api.CreateRequest{ w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
Model: "test-suffix", Model: "test-suffix",
Modelfile: `FROM test Modelfile: `FROM test
TEMPLATE """{{- if .Suffix }}<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID> TEMPLATE """{{- if .Suffix }}<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>

View File

@@ -31,13 +31,13 @@ func TestList(t *testing.T) {
var s Server var s Server
for _, n := range expectNames { for _, n := range expectNames {
createRequest(t, s.CreateHandler, api.CreateRequest{ createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: n, Name: n,
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
}) })
} }
w := createRequest(t, s.ListHandler, nil) w := createRequest(t, s.ListModelsHandler, nil)
if w.Code != http.StatusOK { if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code) t.Fatalf("expected status code 200, actual %d", w.Code)
} }

View File

@@ -318,7 +318,7 @@ func TestCase(t *testing.T) {
var s Server var s Server
for _, tt := range cases { for _, tt := range cases {
t.Run(tt, func(t *testing.T) { t.Run(tt, func(t *testing.T) {
w := createRequest(t, s.CreateHandler, api.CreateRequest{ w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: tt, Name: tt,
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -334,7 +334,7 @@ func TestCase(t *testing.T) {
} }
t.Run("create", func(t *testing.T) { t.Run("create", func(t *testing.T) {
w = createRequest(t, s.CreateHandler, api.CreateRequest{ w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: strings.ToUpper(tt), Name: strings.ToUpper(tt),
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)), Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, nil, nil)),
Stream: &stream, Stream: &stream,
@@ -350,7 +350,7 @@ func TestCase(t *testing.T) {
}) })
t.Run("pull", func(t *testing.T) { t.Run("pull", func(t *testing.T) {
w := createRequest(t, s.PullHandler, api.PullRequest{ w := createRequest(t, s.PullModelHandler, api.PullRequest{
Name: strings.ToUpper(tt), Name: strings.ToUpper(tt),
Stream: &stream, Stream: &stream,
}) })
@@ -365,7 +365,7 @@ func TestCase(t *testing.T) {
}) })
t.Run("copy", func(t *testing.T) { t.Run("copy", func(t *testing.T) {
w := createRequest(t, s.CopyHandler, api.CopyRequest{ w := createRequest(t, s.CopyModelHandler, api.CopyRequest{
Source: tt, Source: tt,
Destination: strings.ToUpper(tt), Destination: strings.ToUpper(tt),
}) })
@@ -387,7 +387,7 @@ func TestShow(t *testing.T) {
var s Server var s Server
createRequest(t, s.CreateHandler, api.CreateRequest{ createRequest(t, s.CreateModelHandler, api.CreateRequest{
Name: "show-model", Name: "show-model",
Modelfile: fmt.Sprintf( Modelfile: fmt.Sprintf(
"FROM %s\nFROM %s", "FROM %s\nFROM %s",
@@ -396,7 +396,7 @@ func TestShow(t *testing.T) {
), ),
}) })
w := createRequest(t, s.ShowHandler, api.ShowRequest{ w := createRequest(t, s.ShowModelHandler, api.ShowRequest{
Name: "show-model", Name: "show-model",
}) })