From 269ed6e6a2cea822ab137d40d5c70c8bf09470f8 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 17 Jun 2024 10:38:55 -0700 Subject: [PATCH 1/7] update message processing --- server/images.go | 17 +- server/prompt.go | 241 ++++-------------- server/prompt_test.go | 317 ++++++++++++------------ server/routes.go | 508 ++++++++++++-------------------------- template/template.go | 169 ++++++++++++- template/template_test.go | 153 +++++++++++- 6 files changed, 685 insertions(+), 720 deletions(-) diff --git a/server/images.go b/server/images.go index a62991f16..688d5dcae 100644 --- a/server/images.go +++ b/server/images.go @@ -34,6 +34,8 @@ import ( "github.com/ollama/ollama/version" ) +var errCapabilityCompletion = errors.New("completion") + type Capability string const CapabilityCompletion = Capability("completion") @@ -62,7 +64,10 @@ type Model struct { Template *template.Template } -func (m *Model) Has(caps ...Capability) bool { +// CheckCapabilities checks if the model has the specified capabilities returning an error describing +// any missing or unknown capabilities +func (m *Model) CheckCapabilities(caps ...Capability) error { + var errs []error for _, cap := range caps { switch cap { case CapabilityCompletion: @@ -81,15 +86,19 @@ func (m *Model) Has(caps ...Capability) bool { } if _, ok := ggml.KV()[fmt.Sprintf("%s.pooling_type", ggml.KV().Architecture())]; ok { - return false + errs = append(errs, errCapabilityCompletion) } default: slog.Error("unknown capability", "capability", cap) - return false + return fmt.Errorf("unknown capability: %s", cap) } } - return true + if err := errors.Join(errs...); err != nil { + return fmt.Errorf("missing capabilities: %w", errors.Join(errs...)) + } + + return nil } func (m *Model) String() string { diff --git a/server/prompt.go b/server/prompt.go index bfc319a50..5016fbe14 100644 --- a/server/prompt.go +++ b/server/prompt.go @@ -1,217 +1,74 @@ package server import ( - "fmt" + "bytes" + "context" "log/slog" - "strings" - - "text/template/parse" + "slices" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/llm" "github.com/ollama/ollama/template" ) -// isResponseNode checks if the node contains .Response -func isResponseNode(node *parse.ActionNode) bool { - for _, cmd := range node.Pipe.Cmds { - for _, arg := range cmd.Args { - if fieldNode, ok := arg.(*parse.FieldNode); ok && len(fieldNode.Ident) > 0 { - if fieldNode.Ident[0] == "Response" { - return true - } - } +func chatPrompt(ctx context.Context, r *runnerRef, msgs []api.Message) (prompt string, images []llm.ImageData, _ error) { + // extract system messages which should always be included + var system []api.Message + msgs = slices.DeleteFunc(msgs, func(m api.Message) bool { + if m.Role == "system" { + system = append(system, m) + return true } - } - return false -} -// formatTemplateForResponse formats the template AST to: -// 1. remove all nodes after the first .Response (if generate=true) -// 2. add a .Response node to the end if it doesn't exist -// TODO(jmorganca): this should recursively cut the template before the first .Response -func formatTemplateForResponse(tmpl *template.Template, generate bool) { - var found bool - for i, node := range tmpl.Tree.Root.Nodes { - if actionNode, ok := node.(*parse.ActionNode); ok { - if isResponseNode(actionNode) { - found = true - if generate { - tmpl.Tree.Root.Nodes = tmpl.Tree.Root.Nodes[:i+1] - break - } - } + return false + }) + + if len(system) == 0 && r.model.System != "" { + // add model system prompt since it wasn't provided + system = append(system, api.Message{Role: "system", Content: r.model.System}) + } + + n := len(msgs) - 1 + for i := n - 1; i >= 0; i-- { + var b bytes.Buffer + if err := r.model.Template.Execute(&b, template.Values{Messages: append(system, msgs[i:]...)}); err != nil { + return "", nil, err } - } - if !found { - // add the response node if it doesn't exist - responseFieldNode := &parse.FieldNode{NodeType: parse.NodeField, Ident: []string{"Response"}} - responsePipeNode := &parse.PipeNode{NodeType: parse.NodePipe, Cmds: []*parse.CommandNode{{NodeType: parse.NodeCommand, Args: []parse.Node{responseFieldNode}}}} - responseActionNode := &parse.ActionNode{NodeType: parse.NodeAction, Pipe: responsePipeNode} - tmpl.Tree.Root.Nodes = append(tmpl.Tree.Root.Nodes, responseActionNode) - } -} - -// Prompt renders a prompt from a template. If generate is set to true, -// the response and parts of the template following it are not rendered -func Prompt(tmpl *template.Template, system, prompt, response string, generate bool) (string, error) { - formatTemplateForResponse(tmpl, generate) - - vars := map[string]any{ - "System": system, - "Prompt": prompt, - "Response": response, - } - - var sb strings.Builder - if err := tmpl.Execute(&sb, vars); err != nil { - return "", err - } - - return sb.String(), nil -} - -func countTokens(tmpl *template.Template, system string, prompt string, response string, encode func(string) ([]int, error)) (int, error) { - rendered, err := Prompt(tmpl, system, prompt, response, false) - if err != nil { - return 0, err - } - - tokens, err := encode(rendered) - if err != nil { - slog.Error("failed to encode prompt", "err", err) - return 0, err - } - - return len(tokens), err -} - -// ChatPrompt builds up a prompt from a series of messages, truncating based on context window size -func ChatPrompt(tmpl *template.Template, messages []api.Message, window int, encode func(string) ([]int, error)) (string, error) { - type prompt struct { - System string - Prompt string - Response string - - images []int - tokens int - } - - var p prompt - - // iterate through messages to build up {system,user,response} prompts - var imgId int - var prompts []prompt - for _, msg := range messages { - switch strings.ToLower(msg.Role) { - case "system": - if p.System != "" || p.Prompt != "" || p.Response != "" { - prompts = append(prompts, p) - p = prompt{} - } - - p.System = msg.Content - case "user": - if p.Prompt != "" || p.Response != "" { - prompts = append(prompts, p) - p = prompt{} - } - - var sb strings.Builder - for range msg.Images { - fmt.Fprintf(&sb, "[img-%d] ", imgId) - p.images = append(p.images, imgId) - imgId += 1 - } - - sb.WriteString(msg.Content) - p.Prompt = sb.String() - case "assistant": - if p.Response != "" { - prompts = append(prompts, p) - p = prompt{} - } - - p.Response = msg.Content - default: - return "", fmt.Errorf("invalid role: %s, role must be one of [system, user, assistant]", msg.Role) - } - } - - // add final prompt - if p.System != "" || p.Prompt != "" || p.Response != "" { - prompts = append(prompts, p) - } - - // calculate token lengths for each prompt, estimating 768 tokens per images - for i, p := range prompts { - tokens, err := countTokens(tmpl, p.System, p.Prompt, p.Response, encode) + s, err := r.llama.Tokenize(ctx, b.String()) if err != nil { - return "", err + return "", nil, err } - prompts[i].tokens = tokens + len(prompts[i].images)*768 - } - - // truncate images and prompts starting from the beginning of the list - // until either one prompt remains or the total tokens fits the context window - // TODO (jmorganca): this doesn't account for the context window room required for the response - for { - var required int - for _, p := range prompts { - required += p.tokens + c := len(s) + if r.model.ProjectorPaths != nil { + for _, m := range msgs[i:] { + // TODO: get image embedding length from project metadata + c += 768 * len(m.Images) + } } - required += 1 // for bos token - - if required <= window { - slog.Debug("prompt now fits in context window", "required", required, "window", window) + if c > r.NumCtx { + slog.Debug("truncating input messages which exceed context length", "truncated", len(msgs[i:])) break + } else { + n = i } - - prompt := &prompts[0] - - if len(prompt.images) > 1 { - img := prompt.images[0] - slog.Debug("prompt longer than context window, removing image", "id", img, "required", required, "window", window) - prompt.images = prompt.images[1:] - prompt.Prompt = strings.Replace(prompt.Prompt, fmt.Sprintf(" [img-%d]", img), "", 1) - prompt.tokens -= 768 - continue - } - - if len(prompts) > 1 { - slog.Debug("required tokens longer than context window, removing first prompt", "prompt", prompts[0].tokens, "required", required, "window", window) - system := prompt.System - prompts = prompts[1:] - - if system != "" && prompts[0].System == "" { - prompts[0].System = system - - tokens, err := countTokens(tmpl, prompts[0].System, prompts[0].Prompt, prompts[0].Response, encode) - if err != nil { - return "", err - } - - prompts[0].tokens = tokens + len(prompts[0].images)*768 - } - - continue - } - - // stop truncating if there's only one prompt left - break } - var sb strings.Builder - for i, p := range prompts { - // last prompt should leave the response unrendered (for completion) - rendered, err := Prompt(tmpl, p.System, p.Prompt, p.Response, i == len(prompts)-1) - if err != nil { - return "", err - } - sb.WriteString(rendered) + var b bytes.Buffer + if err := r.model.Template.Execute(&b, template.Values{Messages: append(system, msgs[n:]...)}); err != nil { + return "", nil, err } - return sb.String(), nil + for _, m := range msgs[n:] { + for _, i := range m.Images { + images = append(images, llm.ImageData{ + ID: len(images), + Data: i, + }) + } + } + + return b.String(), images, nil } diff --git a/server/prompt_test.go b/server/prompt_test.go index 7df58d0bd..59288b46c 100644 --- a/server/prompt_test.go +++ b/server/prompt_test.go @@ -1,215 +1,214 @@ package server import ( + "bytes" + "context" "strings" "testing" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/llm" "github.com/ollama/ollama/template" ) -func TestPrompt(t *testing.T) { - tests := []struct { - name string - template string - system string - prompt string - response string - generate bool - want string - }{ - { - name: "simple prompt", - template: "[INST] {{ .System }} {{ .Prompt }} [/INST]", - system: "You are a Wizard.", - prompt: "What are the potion ingredients?", - want: "[INST] You are a Wizard. What are the potion ingredients? [/INST]", - }, - { - name: "implicit response", - template: "[INST] {{ .System }} {{ .Prompt }} [/INST]", - system: "You are a Wizard.", - prompt: "What are the potion ingredients?", - response: "I don't know.", - want: "[INST] You are a Wizard. What are the potion ingredients? [/INST]I don't know.", - }, - { - name: "response", - template: "[INST] {{ .System }} {{ .Prompt }} [/INST] {{ .Response }}", - system: "You are a Wizard.", - prompt: "What are the potion ingredients?", - response: "I don't know.", - want: "[INST] You are a Wizard. What are the potion ingredients? [/INST] I don't know.", - }, - { - name: "cut", - template: "{{ .System }}{{ .Prompt }}{{ .Response }}", - system: "You are a Wizard.", - prompt: "What are the potion ingredients?", - response: "I don't know.", - generate: true, - want: "You are a Wizard.What are the potion ingredients?I don't know.", - }, - { - name: "nocut", - template: "{{ .System }}{{ .Prompt }}{{ .Response }}", - system: "You are a Wizard.", - prompt: "What are the potion ingredients?", - response: "I don't know.", - want: "You are a Wizard.What are the potion ingredients?I don't know.", - }, +type mock struct { + llm.LlamaServer +} + +func (m mock) Tokenize(_ context.Context, s string) (tokens []int, err error) { + for range strings.Fields(s) { + tokens = append(tokens, len(tokens)) } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - tmpl, err := template.Parse(tc.template) - if err != nil { - t.Fatal(err) - } - - got, err := Prompt(tmpl, tc.system, tc.prompt, tc.response, tc.generate) - if err != nil { - t.Errorf("error = %v", err) - } - - if got != tc.want { - t.Errorf("got = %v, want %v", got, tc.want) - } - }) - } + return } func TestChatPrompt(t *testing.T) { - tests := []struct { - name string - template string - messages []api.Message - window int - want string + type expect struct { + prompt string + images [][]byte + } + + cases := []struct { + name string + limit int + msgs []api.Message + expect }{ { - name: "simple prompt", - template: "[INST] {{ .Prompt }} [/INST]", - messages: []api.Message{ - {Role: "user", Content: "Hello"}, + name: "messages", + limit: 64, + msgs: []api.Message{ + {Role: "user", Content: "You're a test, Harry!"}, + {Role: "assistant", Content: "I-I'm a what?"}, + {Role: "user", Content: "A test. And a thumping good one at that, I'd wager."}, + }, + expect: expect{ + prompt: "You're a test, Harry! I-I'm a what? A test. And a thumping good one at that, I'd wager. ", }, - window: 1024, - want: "[INST] Hello [/INST]", }, { - name: "with system message", - template: "[INST] {{ if .System }}<>{{ .System }}<> {{ end }}{{ .Prompt }} [/INST]", - messages: []api.Message{ - {Role: "system", Content: "You are a Wizard."}, - {Role: "user", Content: "Hello"}, + name: "truncate messages", + limit: 1, + msgs: []api.Message{ + {Role: "user", Content: "You're a test, Harry!"}, + {Role: "assistant", Content: "I-I'm a what?"}, + {Role: "user", Content: "A test. And a thumping good one at that, I'd wager."}, + }, + expect: expect{ + prompt: "A test. And a thumping good one at that, I'd wager. ", }, - window: 1024, - want: "[INST] <>You are a Wizard.<> Hello [/INST]", }, { - name: "with response", - template: "[INST] {{ if .System }}<>{{ .System }}<> {{ end }}{{ .Prompt }} [/INST] {{ .Response }}", - messages: []api.Message{ - {Role: "system", Content: "You are a Wizard."}, - {Role: "user", Content: "Hello"}, - {Role: "assistant", Content: "I am?"}, + name: "truncate messages with image", + limit: 64, + msgs: []api.Message{ + {Role: "user", Content: "You're a test, Harry!"}, + {Role: "assistant", Content: "I-I'm a what?"}, + {Role: "user", Content: "A test. And a thumping good one at that, I'd wager.", Images: []api.ImageData{[]byte("something")}}, + }, + expect: expect{ + prompt: "[img-0] A test. And a thumping good one at that, I'd wager. ", + images: [][]byte{ + []byte("something"), + }, }, - window: 1024, - want: "[INST] <>You are a Wizard.<> Hello [/INST] I am?", }, { - name: "with implicit response", - template: "[INST] {{ if .System }}<>{{ .System }}<> {{ end }}{{ .Prompt }} [/INST]", - messages: []api.Message{ - {Role: "system", Content: "You are a Wizard."}, - {Role: "user", Content: "Hello"}, - {Role: "assistant", Content: "I am?"}, + name: "truncate messages with images", + limit: 64, + msgs: []api.Message{ + {Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}}, + {Role: "assistant", Content: "I-I'm a what?"}, + {Role: "user", Content: "A test. And a thumping good one at that, I'd wager.", Images: []api.ImageData{[]byte("somethingelse")}}, + }, + expect: expect{ + prompt: "[img-0] A test. And a thumping good one at that, I'd wager. ", + images: [][]byte{ + []byte("somethingelse"), + }, }, - window: 1024, - want: "[INST] <>You are a Wizard.<> Hello [/INST]I am?", }, { - name: "with conversation", - template: "[INST] {{ if .System }}<>{{ .System }}<> {{ end }}{{ .Prompt }} [/INST] {{ .Response }} ", - messages: []api.Message{ - {Role: "system", Content: "You are a Wizard."}, - {Role: "user", Content: "What are the potion ingredients?"}, - {Role: "assistant", Content: "sugar"}, - {Role: "user", Content: "Anything else?"}, + name: "messages with images", + limit: 2048, + msgs: []api.Message{ + {Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}}, + {Role: "assistant", Content: "I-I'm a what?"}, + {Role: "user", Content: "A test. And a thumping good one at that, I'd wager.", Images: []api.ImageData{[]byte("somethingelse")}}, + }, + expect: expect{ + prompt: "[img-0] You're a test, Harry! I-I'm a what? [img-1] A test. And a thumping good one at that, I'd wager. ", + images: [][]byte{ + []byte("something"), + []byte("somethingelse"), + }, }, - window: 1024, - want: "[INST] <>You are a Wizard.<> What are the potion ingredients? [/INST] sugar [INST] Anything else? [/INST] ", }, { - name: "with truncation", - template: "{{ .System }} {{ .Prompt }} {{ .Response }} ", - messages: []api.Message{ - {Role: "system", Content: "You are a Wizard."}, - {Role: "user", Content: "Hello"}, - {Role: "assistant", Content: "I am?"}, - {Role: "user", Content: "Why is the sky blue?"}, - {Role: "assistant", Content: "The sky is blue from rayleigh scattering"}, + name: "message with image tag", + limit: 2048, + msgs: []api.Message{ + {Role: "user", Content: "You're a test, Harry! [img]", Images: []api.ImageData{[]byte("something")}}, + {Role: "assistant", Content: "I-I'm a what?"}, + {Role: "user", Content: "A test. And a thumping good one at that, I'd wager.", Images: []api.ImageData{[]byte("somethingelse")}}, + }, + expect: expect{ + prompt: "You're a test, Harry! [img-0] I-I'm a what? [img-1] A test. And a thumping good one at that, I'd wager. ", + images: [][]byte{ + []byte("something"), + []byte("somethingelse"), + }, }, - window: 10, - want: "You are a Wizard. Why is the sky blue? The sky is blue from rayleigh scattering", }, { - name: "images", - template: "{{ .System }} {{ .Prompt }}", - messages: []api.Message{ - {Role: "system", Content: "You are a Wizard."}, - {Role: "user", Content: "Hello", Images: []api.ImageData{[]byte("base64")}}, + name: "messages with interleaved images", + limit: 2048, + msgs: []api.Message{ + {Role: "user", Content: "You're a test, Harry!"}, + {Role: "user", Images: []api.ImageData{[]byte("something")}}, + {Role: "user", Images: []api.ImageData{[]byte("somethingelse")}}, + {Role: "assistant", Content: "I-I'm a what?"}, + {Role: "user", Content: "A test. And a thumping good one at that, I'd wager."}, + }, + expect: expect{ + prompt: "You're a test, Harry!\n\n[img-0]\n\n[img-1] I-I'm a what? A test. And a thumping good one at that, I'd wager. ", + images: [][]byte{ + []byte("something"), + []byte("somethingelse"), + }, }, - window: 1024, - want: "You are a Wizard. [img-0] Hello", }, { - name: "images truncated", - template: "{{ .System }} {{ .Prompt }}", - messages: []api.Message{ - {Role: "system", Content: "You are a Wizard."}, - {Role: "user", Content: "Hello", Images: []api.ImageData{[]byte("img1"), []byte("img2")}}, + name: "truncate message with interleaved images", + limit: 1024, + msgs: []api.Message{ + {Role: "user", Content: "You're a test, Harry!"}, + {Role: "user", Images: []api.ImageData{[]byte("something")}}, + {Role: "user", Images: []api.ImageData{[]byte("somethingelse")}}, + {Role: "assistant", Content: "I-I'm a what?"}, + {Role: "user", Content: "A test. And a thumping good one at that, I'd wager."}, + }, + expect: expect{ + prompt: "[img-0] I-I'm a what? A test. And a thumping good one at that, I'd wager. ", + images: [][]byte{ + []byte("somethingelse"), + }, }, - window: 1024, - want: "You are a Wizard. [img-0] [img-1] Hello", }, { - name: "empty list", - template: "{{ .System }} {{ .Prompt }}", - messages: []api.Message{}, - window: 1024, - want: "", - }, - { - name: "empty prompt", - template: "[INST] {{ if .System }}<>{{ .System }}<> {{ end }}{{ .Prompt }} [/INST] {{ .Response }} ", - messages: []api.Message{ - {Role: "user", Content: ""}, + name: "message with system prompt", + limit: 2048, + msgs: []api.Message{ + {Role: "system", Content: "You are the Test Who Lived."}, + {Role: "user", Content: "You're a test, Harry!"}, + {Role: "assistant", Content: "I-I'm a what?"}, + {Role: "user", Content: "A test. And a thumping good one at that, I'd wager."}, + }, + expect: expect{ + prompt: "You're a test, Harry! I-I'm a what? You are the Test Who Lived. A test. And a thumping good one at that, I'd wager. ", }, - window: 1024, - want: "", }, } - encode := func(s string) ([]int, error) { - words := strings.Fields(s) - return make([]int, len(words)), nil + tmpl, err := template.Parse(` +{{- if .System }}{{ .System }} {{ end }} +{{- if .Prompt }}{{ .Prompt }} {{ end }} +{{- if .Response }}{{ .Response }} {{ end }}`) + if err != nil { + t.Fatal(err) } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - tmpl, err := template.Parse(tc.template) + for _, tt := range cases { + t.Run(tt.name, func(t *testing.T) { + r := runnerRef{ + llama: mock{}, + model: &Model{Template: tmpl, ProjectorPaths: []string{"vision"}}, + Options: &api.Options{}, + } + + r.NumCtx = tt.limit + prompt, images, err := chatPrompt(context.TODO(), &r, tt.msgs) if err != nil { t.Fatal(err) } - got, err := ChatPrompt(tmpl, tc.messages, tc.window, encode) - if err != nil { - t.Errorf("error = %v", err) + if tt.prompt != prompt { + t.Errorf("expected %q, got %q", tt.prompt, prompt) } - if got != tc.want { - t.Errorf("got: %q, want: %q", got, tc.want) + if len(images) != len(tt.images) { + t.Fatalf("expected %d images, got %d", len(tt.images), len(images)) + } + + for i := range images { + if images[i].ID != i { + t.Errorf("expected ID %d, got %d", i, images[i].ID) + } + + if !bytes.Equal(images[i].Data, tt.images[i]) { + t.Errorf("expected %q, got %q", tt.images[i], images[i]) + } } }) } diff --git a/server/routes.go b/server/routes.go index ac6b713a7..35e64511b 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1,13 +1,13 @@ package server import ( + "bytes" "cmp" "context" "encoding/json" "errors" "fmt" "io" - "io/fs" "log/slog" "net" "net/http" @@ -67,163 +67,140 @@ func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options return opts, nil } -func isSupportedImageType(image []byte) bool { - contentType := http.DetectContentType(image) - allowedTypes := []string{"image/jpeg", "image/jpg", "image/png"} - return slices.Contains(allowedTypes, contentType) +func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (*runnerRef, error) { + if name == "" { + return nil, errors.New("model is required") + } + + model, err := GetModel(name) + if err != nil { + return nil, err + } + + if err := model.CheckCapabilities(caps...); err != nil { + return nil, fmt.Errorf("%s %w", name, err) + } + + opts, err := modelOptions(model, requestOpts) + if err != nil { + return nil, err + } + + runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive) + var runner *runnerRef + select { + case runner = <-runnerCh: + case err = <-errCh: + return nil, err + } + + return runner, nil } func (s *Server) GenerateHandler(c *gin.Context) { - checkpointStart := time.Now() var req api.GenerateRequest - err := c.ShouldBindJSON(&req) - - switch { - case errors.Is(err, io.EOF): + if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) { c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"}) return - case err != nil: + } else if err != nil { c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) return } - // validate the request - switch { - case req.Model == "": - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"}) + if req.Format != "" && req.Format != "json" { + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be empty or \"json\""}) return - case len(req.Format) > 0 && req.Format != "json": - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"}) - return - case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0): + } else if req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0) { c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"}) return } - for _, img := range req.Images { - if !isSupportedImageType(img) { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "unsupported image format"}) - return - } - } - - model, err := GetModel(req.Model) - if err != nil { - var pErr *fs.PathError - if errors.As(err, &pErr) { - c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)}) - return - } - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + caps := []Capability{CapabilityCompletion} + r, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive) + if errors.Is(err, errCapabilityCompletion) { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)}) + return + } else if err != nil { + handleScheduleError(c, err) return } - if !model.Has(CapabilityCompletion) { - c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%s does not support generate", req.Model)}) - return + images := make([]llm.ImageData, len(req.Images)) + for i := range req.Images { + images[i] = llm.ImageData{ID: i, Data: req.Images[i]} } - opts, err := modelOptions(model, req.Options) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } - - rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, req.KeepAlive) - var runner *runnerRef - select { - case runner = <-rCh: - case err = <-eCh: - handleErrorResponse(c, err) - return - } - - // an empty request loads the model - // note: for a short while template was used in lieu - // of `raw` mode so we need to check for it too - if req.Prompt == "" && req.Template == "" && req.System == "" { - c.JSON(http.StatusOK, api.GenerateResponse{ - CreatedAt: time.Now().UTC(), - Model: req.Model, - Done: true, - DoneReason: "load", - }) - return - } - - tmpl, err := template.Parse(req.Template) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } - - checkpointLoaded := time.Now() - - var prompt string - switch { - case req.Raw: - prompt = req.Prompt - case req.Prompt != "": - if req.Template == "" { - tmpl = model.Template + prompt := req.Prompt + if !req.Raw { + var msgs []api.Message + if req.System != "" { + msgs = append(msgs, api.Message{Role: "system", Content: req.System}) + } else if r.model.System != "" { + msgs = append(msgs, api.Message{Role: "system", Content: r.model.System}) } - if req.System == "" { - req.System = model.System + if req.Prompt != "" { + for _, i := range images { + msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)}) + } + + msgs = append(msgs, api.Message{Role: "user", Content: req.Prompt}) } - slog.Debug("generate handler", "prompt", req.Prompt) - slog.Debug("generate handler", "template", req.Template) - slog.Debug("generate handler", "system", req.System) - - var sb strings.Builder - for i := range req.Images { - fmt.Fprintf(&sb, "[img-%d] ", i) - } - - sb.WriteString(req.Prompt) - - p, err := Prompt(tmpl, req.System, sb.String(), "", true) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + if len(msgs) == 0 { + c.JSON(http.StatusOK, api.GenerateResponse{ + Model: req.Model, + CreatedAt: time.Now().UTC(), + Done: true, + DoneReason: "load", + }) return } - sb.Reset() + tmpl := r.model.Template + if req.Template != "" { + tmpl, err = template.Parse(req.Template) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + } + + var b bytes.Buffer if req.Context != nil { - prev, err := runner.llama.Detokenize(c.Request.Context(), req.Context) + s, err := r.llama.Detokenize(c.Request.Context(), req.Context) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return } - sb.WriteString(prev) + b.WriteString(s) } - sb.WriteString(p) + if err := tmpl.Execute(&b, template.Values{Messages: msgs}); err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } - prompt = sb.String() + prompt = b.String() } - slog.Debug("generate handler", "prompt", prompt) + slog.Debug("generate request", "prompt", prompt, "images", images) ch := make(chan any) - var generated strings.Builder go func() { defer close(ch) - - fn := func(r llm.CompletionResponse) { - // Build up the full response - if _, err := generated.WriteString(r.Content); err != nil { - ch <- gin.H{"error": err.Error()} - return - } - - resp := api.GenerateResponse{ + if err := r.llama.Completion(c.Request.Context(), llm.CompletionRequest{ + Prompt: prompt, + Images: images, + Format: req.Format, + Options: *r.Options, + }, func(r llm.CompletionResponse) { + ch <- api.GenerateResponse{ Model: req.Model, CreatedAt: time.Now().UTC(), - Done: r.Done, Response: r.Content, + Done: r.Done, DoneReason: r.DoneReason, Metrics: api.Metrics{ PromptEvalCount: r.PromptEvalCount, @@ -232,77 +209,35 @@ func (s *Server) GenerateHandler(c *gin.Context) { EvalDuration: r.EvalDuration, }, } - - if r.Done { - resp.TotalDuration = time.Since(checkpointStart) - resp.LoadDuration = checkpointLoaded.Sub(checkpointStart) - - if !req.Raw { - p, err := Prompt(tmpl, req.System, req.Prompt, generated.String(), false) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } - - // TODO (jmorganca): encode() should not strip special tokens - tokens, err := runner.llama.Tokenize(c.Request.Context(), p) - if err != nil { - ch <- gin.H{"error": err.Error()} - return - } - - resp.Context = append(req.Context, tokens...) - } - } - - ch <- resp - } - - var images []llm.ImageData - for i := range req.Images { - images = append(images, llm.ImageData{ - ID: i, - Data: req.Images[i], - }) - } - - // Start prediction - req := llm.CompletionRequest{ - Prompt: prompt, - Format: req.Format, - Images: images, - Options: opts, - } - if err := runner.llama.Completion(c.Request.Context(), req, fn); err != nil { + }); err != nil { ch <- gin.H{"error": err.Error()} } }() if req.Stream != nil && !*req.Stream { - // Accumulate responses into the final response - var final api.GenerateResponse + var r api.GenerateResponse var sb strings.Builder - for resp := range ch { - switch r := resp.(type) { + for rr := range ch { + switch t := rr.(type) { case api.GenerateResponse: - sb.WriteString(r.Response) - final = r + sb.WriteString(t.Response) + r = t case gin.H: - if errorMsg, ok := r["error"].(string); ok { - c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg}) - return - } else { - c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"}) - return + msg, ok := t["error"].(string) + if !ok { + msg = "unexpected error format in response" } + + c.JSON(http.StatusInternalServerError, gin.H{"error": msg}) + return default: - c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"}) + c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"}) return } } - final.Response = sb.String() - c.JSON(http.StatusOK, final) + r.Response = sb.String() + c.JSON(http.StatusOK, r) return } @@ -311,44 +246,17 @@ func (s *Server) GenerateHandler(c *gin.Context) { func (s *Server) EmbeddingsHandler(c *gin.Context) { var req api.EmbeddingRequest - err := c.ShouldBindJSON(&req) - switch { - case errors.Is(err, io.EOF): + if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) { c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"}) return - case err != nil: + } else if err != nil { c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) return } - if req.Model == "" { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"}) - return - } - - model, err := GetModel(req.Model) + r, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive) if err != nil { - var pErr *fs.PathError - if errors.As(err, &pErr) { - c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)}) - return - } - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } - - opts, err := modelOptions(model, req.Options) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } - - rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, req.KeepAlive) - var runner *runnerRef - select { - case runner = <-rCh: - case err = <-eCh: - handleErrorResponse(c, err) + handleScheduleError(c, err) return } @@ -358,17 +266,14 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) { return } - embedding, err := runner.llama.Embedding(c.Request.Context(), req.Prompt) + embedding, err := r.llama.Embedding(c.Request.Context(), req.Prompt) if err != nil { slog.Info(fmt.Sprintf("embedding generation failed: %v", err)) c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"}) return } - resp := api.EmbeddingResponse{ - Embedding: embedding, - } - c.JSON(http.StatusOK, resp) + c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: embedding}) } func (s *Server) PullModelHandler(c *gin.Context) { @@ -649,9 +554,9 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) { } } - msgs := make([]api.Message, 0) - for _, msg := range m.Messages { - msgs = append(msgs, api.Message{Role: msg.Role, Content: msg.Content}) + msgs := make([]api.Message, len(m.Messages)) + for i, msg := range m.Messages { + msgs[i] = api.Message{Role: msg.Role, Content: msg.Content} } n := model.ParseName(req.Model) @@ -1214,132 +1119,55 @@ func (s *Server) ProcessHandler(c *gin.Context) { c.JSON(http.StatusOK, api.ProcessResponse{Models: models}) } -// ChatPrompt builds up a prompt from a series of messages for the currently `loaded` model -func chatPrompt(ctx context.Context, runner *runnerRef, template *template.Template, messages []api.Message, numCtx int) (string, error) { - encode := func(s string) ([]int, error) { - return runner.llama.Tokenize(ctx, s) - } - - prompt, err := ChatPrompt(template, messages, numCtx, encode) - if err != nil { - return "", err - } - - return prompt, nil -} - func (s *Server) ChatHandler(c *gin.Context) { - checkpointStart := time.Now() - var req api.ChatRequest - err := c.ShouldBindJSON(&req) - switch { - case errors.Is(err, io.EOF): + if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) { c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"}) return - case err != nil: + } else if err != nil { c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()}) return } - // validate the request - switch { - case req.Model == "": - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"}) + caps := []Capability{CapabilityCompletion} + r, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive) + if errors.Is(err, errCapabilityCompletion) { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)}) return - case len(req.Format) > 0 && req.Format != "json": - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"}) + } else if err != nil { + handleScheduleError(c, err) return } - model, err := GetModel(req.Model) - if err != nil { - var pErr *fs.PathError - if errors.As(err, &pErr) { - c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)}) - return - } - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } - - if !model.Has(CapabilityCompletion) { - c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%s does not support chat", req.Model)}) - return - } - - opts, err := modelOptions(model, req.Options) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return - } - - rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, req.KeepAlive) - var runner *runnerRef - select { - case runner = <-rCh: - case err = <-eCh: - handleErrorResponse(c, err) - return - } - - checkpointLoaded := time.Now() - - // if the first message is not a system message, then add the model's default system message - if len(req.Messages) > 0 && req.Messages[0].Role != "system" { - req.Messages = append([]api.Message{ - { - Role: "system", - Content: model.System, - }, - }, req.Messages...) - } - - prompt, err := chatPrompt(c.Request.Context(), runner, model.Template, req.Messages, opts.NumCtx) - if err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) - return - } - - // an empty request loads the model - if len(req.Messages) == 0 || prompt == "" { - resp := api.ChatResponse{ - CreatedAt: time.Now().UTC(), + if len(req.Messages) == 0 { + c.JSON(http.StatusOK, api.ChatResponse{ Model: req.Model, + CreatedAt: time.Now().UTC(), + Message: api.Message{Role: "assistant"}, Done: true, DoneReason: "load", - Message: api.Message{Role: "assistant"}, - } - c.JSON(http.StatusOK, resp) + }) return } - // only send images that are in the prompt - var i int - var images []llm.ImageData - for _, m := range req.Messages { - for _, img := range m.Images { - if !isSupportedImageType(img) { - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "unsupported image format"}) - return - } - - if strings.Contains(prompt, fmt.Sprintf("[img-%d]", i)) { - images = append(images, llm.ImageData{Data: img, ID: i}) - } - i += 1 - } + prompt, images, err := chatPrompt(c.Request.Context(), r, req.Messages) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return } - slog.Debug("chat handler", "prompt", prompt, "images", len(images)) + slog.Debug("chat request", "images", len(images), "prompt", prompt) ch := make(chan any) - go func() { defer close(ch) - - fn := func(r llm.CompletionResponse) { - resp := api.ChatResponse{ + if err := r.llama.Completion(c.Request.Context(), llm.CompletionRequest{ + Prompt: prompt, + Images: images, + Format: req.Format, + Options: *r.Options, + }, func(r llm.CompletionResponse) { + ch <- api.ChatResponse{ Model: req.Model, CreatedAt: time.Now().UTC(), Message: api.Message{Role: "assistant", Content: r.Content}, @@ -1352,64 +1180,48 @@ func (s *Server) ChatHandler(c *gin.Context) { EvalDuration: r.EvalDuration, }, } - - if r.Done { - resp.TotalDuration = time.Since(checkpointStart) - resp.LoadDuration = checkpointLoaded.Sub(checkpointStart) - } - - ch <- resp - } - - if err := runner.llama.Completion(c.Request.Context(), llm.CompletionRequest{ - Prompt: prompt, - Format: req.Format, - Images: images, - Options: opts, - }, fn); err != nil { + }); err != nil { ch <- gin.H{"error": err.Error()} } }() if req.Stream != nil && !*req.Stream { - // Accumulate responses into the final response - var final api.ChatResponse + var r api.ChatResponse var sb strings.Builder - for resp := range ch { - switch r := resp.(type) { + for rr := range ch { + switch t := rr.(type) { case api.ChatResponse: - sb.WriteString(r.Message.Content) - final = r + sb.WriteString(t.Message.Content) + r = t case gin.H: - if errorMsg, ok := r["error"].(string); ok { - c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg}) - return - } else { - c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"}) - return + msg, ok := t["error"].(string) + if !ok { + msg = "unexpected error format in response" } + + c.JSON(http.StatusInternalServerError, gin.H{"error": msg}) + return default: - c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"}) + c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"}) return } } - final.Message = api.Message{Role: "assistant", Content: sb.String()} - c.JSON(http.StatusOK, final) + r.Message.Content = sb.String() + c.JSON(http.StatusOK, r) return } streamResponse(c, ch) } -func handleErrorResponse(c *gin.Context, err error) { - if errors.Is(err, context.Canceled) { +func handleScheduleError(c *gin.Context, err error) { + switch { + case errors.Is(err, context.Canceled): c.JSON(499, gin.H{"error": "request canceled"}) - return - } - if errors.Is(err, ErrMaxQueue) { + case errors.Is(err, ErrMaxQueue): c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()}) - return + default: + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) } - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) } diff --git a/template/template.go b/template/template.go index d15f7156f..cfba5a238 100644 --- a/template/template.go +++ b/template/template.go @@ -5,6 +5,7 @@ import ( "embed" "encoding/json" "errors" + "fmt" "io" "math" "slices" @@ -14,6 +15,7 @@ import ( "text/template/parse" "github.com/agnivade/levenshtein" + "github.com/ollama/ollama/api" "golang.org/x/exp/maps" ) @@ -74,30 +76,78 @@ func Named(s string) (*named, error) { return nil, errors.New("no matching template found") } +var DefaultTemplate, _ = Parse("{{ .Prompt }}") + type Template struct { *template.Template raw string } +var response = parse.ActionNode{ + NodeType: parse.NodeAction, + Pipe: &parse.PipeNode{ + NodeType: parse.NodePipe, + Cmds: []*parse.CommandNode{ + { + NodeType: parse.NodeCommand, + Args: []parse.Node{ + &parse.FieldNode{ + NodeType: parse.NodeField, + Ident: []string{"Response"}, + }, + }, + }, + }, + }, +} + +func Parse(s string) (*Template, error) { + tmpl := template.New("").Option("missingkey=zero").Funcs(template.FuncMap{ + "toJson": func(v any) string { + b, err := json.Marshal(v) + if err != nil { + return "" + } + + return string(b) + }, + "isLastMessage": func(s []*api.Message, m *api.Message) bool { + for i := len(s) - 1; i >= 0; i-- { + if m.Role != s[i].Role { + continue + } + + return m == s[i] + } + + return false + }, + }) + + tmpl, err := tmpl.Parse(s) + if err != nil { + return nil, err + } + + t := Template{Template: tmpl, raw: s} + if vars := t.Vars(); !slices.Contains(vars, "messages") && !slices.Contains(vars, "response") { + // touch up the template and append {{ .Response }} + tmpl.Tree.Root.Nodes = append(tmpl.Tree.Root.Nodes, &response) + } + + return &t, nil +} + func (t *Template) String() string { return t.raw } -var DefaultTemplate, _ = Parse("{{ .Prompt }}") - -func Parse(s string) (*Template, error) { - t, err := template.New("").Option("missingkey=zero").Parse(s) - if err != nil { - return nil, err - } - - return &Template{Template: t, raw: s}, nil -} - func (t *Template) Vars() []string { var vars []string - for _, n := range t.Tree.Root.Nodes { - vars = append(vars, parseNode(n)...) + for _, tt := range t.Templates() { + for _, n := range tt.Root.Nodes { + vars = append(vars, parseNode(n)...) + } } set := make(map[string]struct{}) @@ -110,6 +160,97 @@ func (t *Template) Vars() []string { return vars } +type Values struct { + Messages []api.Message +} + +func (t *Template) Execute(w io.Writer, v Values) error { + system, collated := collate(v.Messages) + if slices.Contains(t.Vars(), "messages") { + return t.Template.Execute(w, map[string]any{ + "System": system, + "Messages": collated, + }) + } + + var b bytes.Buffer + var prompt, response string + for i, m := range collated { + if m.Role == "user" { + prompt = m.Content + } else { + response = m.Content + } + + if i != len(collated)-1 && prompt != "" && response != "" { + if err := t.Template.Execute(&b, map[string]any{ + "System": "", + "Prompt": prompt, + "Response": response, + }); err != nil { + return err + } + + prompt = "" + response = "" + } + } + + var cut bool + tree := t.Template.Copy() + // for the last message, cut everything after "{{ .Response }}" + tree.Root.Nodes = slices.DeleteFunc(tree.Root.Nodes, func(n parse.Node) bool { + if slices.Contains(parseNode(n), "Response") { + cut = true + } + + return cut + }) + + if err := template.Must(template.New("").AddParseTree("", tree)).Execute(&b, map[string]any{ + "System": system, + "Prompt": prompt, + }); err != nil { + return err + } + + _, err := io.Copy(w, &b) + return err +} + +func collate(msgs []api.Message) (system string, collated []*api.Message) { + var n int + for i := range msgs { + msg := msgs[i] + if msg.Role == "system" { + if system != "" { + system += "\n\n" + } + + system += msg.Content + continue + } + + for range msg.Images { + imageTag := fmt.Sprintf("[img-%d]", n) + if !strings.Contains(msg.Content, "[img]") { + msg.Content = strings.TrimSpace("[img] " + msg.Content) + } + + msg.Content = strings.Replace(msg.Content, "[img]", imageTag, 1) + n++ + } + + if len(collated) > 0 && collated[len(collated)-1].Role == msg.Role { + collated[len(collated)-1].Content += "\n\n" + msg.Content + } else { + collated = append(collated, &msg) + } + } + + return +} + func parseNode(n parse.Node) []string { switch n := n.(type) { case *parse.ActionNode: @@ -152,6 +293,8 @@ func parseNode(n parse.Node) []string { return names case *parse.FieldNode: return n.Ident + case *parse.TemplateNode: + return parseNode(n.Pipe) } return nil diff --git a/template/template_test.go b/template/template_test.go index eda4634f4..5d5dad4b2 100644 --- a/template/template_test.go +++ b/template/template_test.go @@ -11,6 +11,7 @@ import ( "testing" "text/template" + "github.com/ollama/ollama/api" "github.com/ollama/ollama/llm" ) @@ -64,13 +65,12 @@ func TestParse(t *testing.T) { template string vars []string }{ - {"{{ .Prompt }}", []string{"prompt"}}, - {"{{ .System }} {{ .Prompt }}", []string{"prompt", "system"}}, + {"{{ .Prompt }}", []string{"prompt", "response"}}, + {"{{ .System }} {{ .Prompt }}", []string{"prompt", "response", "system"}}, {"{{ .System }} {{ .Prompt }} {{ .Response }}", []string{"prompt", "response", "system"}}, - {"{{ with .Tools }}{{ . }}{{ end }} {{ .System }} {{ .Prompt }}", []string{"prompt", "system", "tools"}}, + {"{{ with .Tools }}{{ . }}{{ end }} {{ .System }} {{ .Prompt }}", []string{"prompt", "response", "system", "tools"}}, {"{{ range .Messages }}{{ .Role }} {{ .Content }}{{ end }}", []string{"content", "messages", "role"}}, {"{{ range .Messages }}{{ if eq .Role \"system\" }}SYSTEM: {{ .Content }}{{ else if eq .Role \"user\" }}USER: {{ .Content }}{{ else if eq .Role \"assistant\" }}ASSISTANT: {{ .Content }}{{ end }}{{ end }}", []string{"content", "messages", "role"}}, - {"{{ .Prompt }} {{ .Suffix }}", []string{"prompt", "suffix"}}, } for _, tt := range cases { @@ -87,3 +87,148 @@ func TestParse(t *testing.T) { }) } } + +func TestExecuteWithMessages(t *testing.T) { + cases := []struct { + templates []string + values Values + expected string + }{ + { + []string{ + `[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `, + `[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`, + `{{- range .Messages }} +{{- if eq .Role "user" }}[INST] {{ if and (isLastMessage $.Messages .) $.System }}{{ $.System }}{{ print "\n\n" }} +{{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }} +{{- end }} +{{- end }}`, + }, + Values{ + Messages: []api.Message{ + {Role: "user", Content: "Hello friend!"}, + {Role: "assistant", Content: "Hello human!"}, + {Role: "user", Content: "Yay!"}, + }, + }, + `[INST] Hello friend![/INST] Hello human![INST] Yay![/INST] `, + }, + { + []string{ + `[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `, + `[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`, + ` +{{- range .Messages }} +{{- if eq .Role "user" }}[INST] {{ if and (isLastMessage $.Messages .) $.System }}{{ $.System }}{{ print "\n\n" }} +{{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }} +{{- end }} +{{- end }}`, + }, + Values{ + Messages: []api.Message{ + {Role: "system", Content: "You are a helpful assistant!"}, + {Role: "user", Content: "Hello friend!"}, + {Role: "assistant", Content: "Hello human!"}, + {Role: "user", Content: "Yay!"}, + }, + }, + `[INST] Hello friend![/INST] Hello human![INST] You are a helpful assistant! + +Yay![/INST] `, + }, + { + []string{ + `{{ if .System }}<|im_start|>system +{{ .System }}<|im_end|> +{{ end }}{{ if .Prompt }}<|im_start|>user +{{ .Prompt }}<|im_end|> +{{ end }}<|im_start|>assistant +{{ .Response }}<|im_end|> +`, + ` +{{- range .Messages }} +{{- if and (eq .Role "user") (isLastMessage $.Messages .) $.System }}<|im_start|>system +{{ $.System }}<|im_end|>{{ print "\n" }} +{{- end }}<|im_start|>{{ .Role }} +{{ .Content }}<|im_end|>{{ print "\n" }} +{{- end }}<|im_start|>assistant +`, + }, + Values{ + Messages: []api.Message{ + {Role: "system", Content: "You are a helpful assistant!"}, + {Role: "user", Content: "Hello friend!"}, + {Role: "assistant", Content: "Hello human!"}, + {Role: "user", Content: "Yay!"}, + }, + }, + `<|im_start|>user +Hello friend!<|im_end|> +<|im_start|>assistant +Hello human!<|im_end|> +<|im_start|>system +You are a helpful assistant!<|im_end|> +<|im_start|>user +Yay!<|im_end|> +<|im_start|>assistant +`, + }, + { + []string{ + `{{ if .Prompt }}Question: {{ .Prompt }} + +{{ end }}Answer: {{ .Response }} + +`, + ` +{{- range .Messages }} +{{- if eq .Role "user" }}Question: {{ .Content }}{{ print "\n\n" }} +{{- else if eq .Role "assistant" }}Answer: {{ .Content }}{{ print "\n\n" }} +{{- end }} +{{- end }}Answer: `, + }, + Values{ + Messages: []api.Message{ + {Role: "user", Content: "What's in this image?", Images: []api.ImageData{[]byte("")}}, + {Role: "assistant", Content: "It's a hot dog."}, + {Role: "user", Content: "What's in _this_ image?"}, + {Role: "user", Images: []api.ImageData{[]byte("")}}, + {Role: "user", Content: "Is it a hot dog?"}, + }, + }, + `Question: [img-0] What's in this image? + +Answer: It's a hot dog. + +Question: What's in _this_ image? + +[img-1] + +Is it a hot dog? + +Answer: `, + }, + } + + for _, tt := range cases { + t.Run("", func(t *testing.T) { + for _, tmpl := range tt.templates { + t.Run("", func(t *testing.T) { + tmpl, err := Parse(tmpl) + if err != nil { + t.Fatal(err) + } + + var b bytes.Buffer + if err := tmpl.Execute(&b, tt.values); err != nil { + t.Fatal(err) + } + + if b.String() != tt.expected { + t.Errorf("expected\n%s,\ngot\n%s", tt.expected, b.String()) + } + }) + } + }) + } +} From 2c3fe1fd972b7810091120f844afc35bc98accbd Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Thu, 20 Jun 2024 11:00:08 -0700 Subject: [PATCH 2/7] comments --- server/prompt.go | 29 +++--- server/prompt_test.go | 34 +++---- server/routes.go | 46 +++++----- template/template.go | 48 +++++----- template/template_test.go | 180 ++++++++++++++++++++++++++++++-------- 5 files changed, 224 insertions(+), 113 deletions(-) diff --git a/server/prompt.go b/server/prompt.go index 5016fbe14..51d691a9f 100644 --- a/server/prompt.go +++ b/server/prompt.go @@ -11,8 +11,13 @@ import ( "github.com/ollama/ollama/template" ) -func chatPrompt(ctx context.Context, r *runnerRef, msgs []api.Message) (prompt string, images []llm.ImageData, _ error) { - // extract system messages which should always be included +type tokenizeFunc func(context.Context, string) ([]int, error) + +// chatPrompt accepts a list of messages and returns the prompt and images that should be used for the next chat turn. +// chatPrompt truncates any messages that exceed the context window of the model, making sure to always include 1) the +// latest message and 2) system messages +func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.Options, msgs []api.Message) (prompt string, images []llm.ImageData, _ error) { + // pull out any system messages which should always be included in the prompt var system []api.Message msgs = slices.DeleteFunc(msgs, func(m api.Message) bool { if m.Role == "system" { @@ -23,32 +28,35 @@ func chatPrompt(ctx context.Context, r *runnerRef, msgs []api.Message) (prompt s return false }) - if len(system) == 0 && r.model.System != "" { + if len(system) == 0 && m.System != "" { // add model system prompt since it wasn't provided - system = append(system, api.Message{Role: "system", Content: r.model.System}) + system = append(system, api.Message{Role: "system", Content: m.System}) } + // always include the last message n := len(msgs) - 1 + // in reverse, find all messages that fit into context window for i := n - 1; i >= 0; i-- { var b bytes.Buffer - if err := r.model.Template.Execute(&b, template.Values{Messages: append(system, msgs[i:]...)}); err != nil { + if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[i:]...)}); err != nil { return "", nil, err } - s, err := r.llama.Tokenize(ctx, b.String()) + s, err := tokenize(ctx, b.String()) if err != nil { return "", nil, err } c := len(s) - if r.model.ProjectorPaths != nil { + if m.ProjectorPaths != nil { for _, m := range msgs[i:] { - // TODO: get image embedding length from project metadata + // images are represented as 768 sized embeddings + // TODO: get embedding length from project metadata c += 768 * len(m.Images) } } - if c > r.NumCtx { + if c > opts.NumCtx { slog.Debug("truncating input messages which exceed context length", "truncated", len(msgs[i:])) break } else { @@ -56,8 +64,9 @@ func chatPrompt(ctx context.Context, r *runnerRef, msgs []api.Message) (prompt s } } + // truncate any messages that do not fit into the context window var b bytes.Buffer - if err := r.model.Template.Execute(&b, template.Values{Messages: append(system, msgs[n:]...)}); err != nil { + if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[n:]...)}); err != nil { return "", nil, err } diff --git a/server/prompt_test.go b/server/prompt_test.go index 59288b46c..d4cee98c2 100644 --- a/server/prompt_test.go +++ b/server/prompt_test.go @@ -7,15 +7,10 @@ import ( "testing" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/llm" "github.com/ollama/ollama/template" ) -type mock struct { - llm.LlamaServer -} - -func (m mock) Tokenize(_ context.Context, s string) (tokens []int, err error) { +func tokenize(_ context.Context, s string) (tokens []int, err error) { for range strings.Fields(s) { tokens = append(tokens, len(tokens)) } @@ -48,7 +43,7 @@ func TestChatPrompt(t *testing.T) { }, }, { - name: "truncate messages", + name: "truncate messages", limit: 1, msgs: []api.Message{ {Role: "user", Content: "You're a test, Harry!"}, @@ -60,7 +55,7 @@ func TestChatPrompt(t *testing.T) { }, }, { - name: "truncate messages with image", + name: "truncate messages with image", limit: 64, msgs: []api.Message{ {Role: "user", Content: "You're a test, Harry!"}, @@ -75,7 +70,7 @@ func TestChatPrompt(t *testing.T) { }, }, { - name: "truncate messages with images", + name: "truncate messages with images", limit: 64, msgs: []api.Message{ {Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}}, @@ -90,7 +85,7 @@ func TestChatPrompt(t *testing.T) { }, }, { - name: "messages with images", + name: "messages with images", limit: 2048, msgs: []api.Message{ {Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}}, @@ -106,7 +101,7 @@ func TestChatPrompt(t *testing.T) { }, }, { - name: "message with image tag", + name: "message with image tag", limit: 2048, msgs: []api.Message{ {Role: "user", Content: "You're a test, Harry! [img]", Images: []api.ImageData{[]byte("something")}}, @@ -122,7 +117,7 @@ func TestChatPrompt(t *testing.T) { }, }, { - name: "messages with interleaved images", + name: "messages with interleaved images", limit: 2048, msgs: []api.Message{ {Role: "user", Content: "You're a test, Harry!"}, @@ -140,7 +135,7 @@ func TestChatPrompt(t *testing.T) { }, }, { - name: "truncate message with interleaved images", + name: "truncate message with interleaved images", limit: 1024, msgs: []api.Message{ {Role: "user", Content: "You're a test, Harry!"}, @@ -157,7 +152,7 @@ func TestChatPrompt(t *testing.T) { }, }, { - name: "message with system prompt", + name: "message with system prompt", limit: 2048, msgs: []api.Message{ {Role: "system", Content: "You are the Test Who Lived."}, @@ -181,14 +176,9 @@ func TestChatPrompt(t *testing.T) { for _, tt := range cases { t.Run(tt.name, func(t *testing.T) { - r := runnerRef{ - llama: mock{}, - model: &Model{Template: tmpl, ProjectorPaths: []string{"vision"}}, - Options: &api.Options{}, - } - - r.NumCtx = tt.limit - prompt, images, err := chatPrompt(context.TODO(), &r, tt.msgs) + model := Model{Template: tmpl, ProjectorPaths: []string{"vision"}} + opts := api.Options{Runner: api.Runner{NumCtx: tt.limit}} + prompt, images, err := chatPrompt(context.TODO(), &model, tokenize, &opts, tt.msgs) if err != nil { t.Fatal(err) } diff --git a/server/routes.go b/server/routes.go index 35e64511b..1a93e9770 100644 --- a/server/routes.go +++ b/server/routes.go @@ -54,6 +54,8 @@ func init() { gin.SetMode(mode) } +var errRequired = errors.New("is required") + func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) { opts := api.DefaultOptions() if err := opts.FromMap(model.Options); err != nil { @@ -69,7 +71,7 @@ func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (*runnerRef, error) { if name == "" { - return nil, errors.New("model is required") + return nil, fmt.Errorf("model %w", errRequired) } model, err := GetModel(name) @@ -121,7 +123,17 @@ func (s *Server) GenerateHandler(c *gin.Context) { c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)}) return } else if err != nil { - handleScheduleError(c, err) + handleScheduleError(c, req.Model, err) + return + } + + if req.Prompt == "" { + c.JSON(http.StatusOK, api.GenerateResponse{ + Model: req.Model, + CreatedAt: time.Now().UTC(), + Done: true, + DoneReason: "load", + }) return } @@ -139,23 +151,11 @@ func (s *Server) GenerateHandler(c *gin.Context) { msgs = append(msgs, api.Message{Role: "system", Content: r.model.System}) } - if req.Prompt != "" { - for _, i := range images { - msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)}) - } - - msgs = append(msgs, api.Message{Role: "user", Content: req.Prompt}) + for _, i := range images { + msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)}) } - if len(msgs) == 0 { - c.JSON(http.StatusOK, api.GenerateResponse{ - Model: req.Model, - CreatedAt: time.Now().UTC(), - Done: true, - DoneReason: "load", - }) - return - } + msgs = append(msgs, api.Message{Role: "user", Content: req.Prompt}) tmpl := r.model.Template if req.Template != "" { @@ -256,7 +256,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) { r, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive) if err != nil { - handleScheduleError(c, err) + handleScheduleError(c, req.Model, err) return } @@ -1135,7 +1135,7 @@ func (s *Server) ChatHandler(c *gin.Context) { c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)}) return } else if err != nil { - handleScheduleError(c, err) + handleScheduleError(c, req.Model, err) return } @@ -1150,7 +1150,7 @@ func (s *Server) ChatHandler(c *gin.Context) { return } - prompt, images, err := chatPrompt(c.Request.Context(), r, req.Messages) + prompt, images, err := chatPrompt(c.Request.Context(), r.model, r.llama.Tokenize, r.Options, req.Messages) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return @@ -1215,12 +1215,16 @@ func (s *Server) ChatHandler(c *gin.Context) { streamResponse(c, ch) } -func handleScheduleError(c *gin.Context, err error) { +func handleScheduleError(c *gin.Context, name string, err error) { switch { + case errors.Is(err, errRequired): + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) case errors.Is(err, context.Canceled): c.JSON(499, gin.H{"error": "request canceled"}) case errors.Is(err, ErrMaxQueue): c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()}) + case errors.Is(err, os.ErrNotExist): + c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found, try pulling it first", name)}) default: c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) } diff --git a/template/template.go b/template/template.go index cfba5a238..c8f8f6d0d 100644 --- a/template/template.go +++ b/template/template.go @@ -83,6 +83,7 @@ type Template struct { raw string } +// response is a template node that can be added to templates that don't already have one var response = parse.ActionNode{ NodeType: parse.NodeAction, Pipe: &parse.PipeNode{ @@ -101,28 +102,25 @@ var response = parse.ActionNode{ }, } +var funcs = template.FuncMap{ + "toJson": func(v any) string { + b, err := json.Marshal(v) + if err != nil { + return "" + } + + return string(b) + }, + "add": func(a, b int) int { + return a + b + }, + "sub": func(a, b int) int { + return a - b + }, +} + func Parse(s string) (*Template, error) { - tmpl := template.New("").Option("missingkey=zero").Funcs(template.FuncMap{ - "toJson": func(v any) string { - b, err := json.Marshal(v) - if err != nil { - return "" - } - - return string(b) - }, - "isLastMessage": func(s []*api.Message, m *api.Message) bool { - for i := len(s) - 1; i >= 0; i-- { - if m.Role != s[i].Role { - continue - } - - return m == s[i] - } - - return false - }, - }) + tmpl := template.New("").Option("missingkey=zero").Funcs(funcs) tmpl, err := tmpl.Parse(s) if err != nil { @@ -218,7 +216,13 @@ func (t *Template) Execute(w io.Writer, v Values) error { return err } -func collate(msgs []api.Message) (system string, collated []*api.Message) { +type messages []*api.Message + +// collate messages based on role. consecutive messages of the same role are merged +// into a single message. collate also pulls out and merges messages with Role == "system" +// which are templated separately. As a side effect, it mangles message content adding image +// tags ([img-%d]) as needed +func collate(msgs []api.Message) (system string, collated messages) { var n int for i := range msgs { msg := msgs[i] diff --git a/template/template_test.go b/template/template_test.go index 5d5dad4b2..ac92bf489 100644 --- a/template/template_test.go +++ b/template/template_test.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "slices" + "strconv" "testing" "text/template" @@ -15,6 +16,98 @@ import ( "github.com/ollama/ollama/llm" ) +func TestFuncs(t *testing.T) { + t.Run("toJson", func(t *testing.T) { + cases := []struct { + input any + expected string + }{ + {nil, "null"}, + {true, "true"}, + {false, "false"}, + {0, "0"}, + {1, "1"}, + {1.0, "1"}, + {1.1, "1.1"}, + {"", `""`}, + {"hello", `"hello"`}, + {[]int{1, 2, 3}, "[1,2,3]"}, + {[]string{"a", "b", "c"}, `["a","b","c"]`}, + {map[string]int{"a": 1, "b": 2}, `{"a":1,"b":2}`}, + {map[string]string{"a": "b", "c": "d"}, `{"a":"b","c":"d"}`}, + } + + for _, tt := range cases { + t.Run(tt.expected, func(t *testing.T) { + toJson, ok := funcs["toJson"].(func(any) string) + if !ok { + t.Fatal("toJson is not a function") + } + + if s := toJson(tt.input); s != tt.expected { + t.Errorf("expected %q, got %q", tt.expected, s) + } + }) + } + }) + + t.Run("add", func(t *testing.T) { + cases := []struct { + a, b int + expected int + }{ + {0, 0, 0}, + {0, 1, 1}, + {1, 0, 1}, + {1, 1, 2}, + {1, -1, 0}, + {-1, 1, 0}, + {-1, -1, -2}, + } + + for _, tt := range cases { + t.Run(strconv.Itoa(tt.expected), func(t *testing.T) { + add, ok := funcs["add"].(func(int, int) int) + if !ok { + t.Fatal("add is not a function") + } + + if n := add(tt.a, tt.b); n != tt.expected { + t.Errorf("expected %d, got %d", tt.expected, n) + } + }) + } + }) + + t.Run("sub", func(t *testing.T) { + cases := []struct { + a, b int + expected int + }{ + {0, 0, 0}, + {0, 1, -1}, + {1, 0, 1}, + {1, 1, 0}, + {1, -1, 2}, + {-1, 1, -2}, + {-1, -1, 0}, + } + + for _, tt := range cases { + t.Run(strconv.Itoa(tt.expected), func(t *testing.T) { + sub, ok := funcs["sub"].(func(int, int) int) + if !ok { + t.Fatal("sub is not a function") + } + + if n := sub(tt.a, tt.b); n != tt.expected { + t.Errorf("expected %d, got %d", tt.expected, n) + } + }) + } + }) +} + func TestNamed(t *testing.T) { f, err := os.Open(filepath.Join("testdata", "templates.jsonl")) if err != nil { @@ -89,77 +182,86 @@ func TestParse(t *testing.T) { } func TestExecuteWithMessages(t *testing.T) { + type template struct { + name string + template string + } cases := []struct { - templates []string + name string + templates []template values Values expected string }{ { - []string{ - `[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `, - `[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`, - `{{- range .Messages }} -{{- if eq .Role "user" }}[INST] {{ if and (isLastMessage $.Messages .) $.System }}{{ $.System }}{{ print "\n\n" }} + "mistral", + []template{ + {"no response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `}, + {"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`}, + {"messages", `{{- range .Messages }} +{{- if eq .Role "user" }}[INST] {{ if and (eq (index $.Messages (sub (len $.Messages) 1)) .) $.System }}{{ $.System }}{{ "\n\n" }} {{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }} {{- end }} -{{- end }}`, +{{- end }}`}, }, Values{ Messages: []api.Message{ {Role: "user", Content: "Hello friend!"}, {Role: "assistant", Content: "Hello human!"}, - {Role: "user", Content: "Yay!"}, + {Role: "user", Content: "What is your name?"}, }, }, - `[INST] Hello friend![/INST] Hello human![INST] Yay![/INST] `, + `[INST] Hello friend![/INST] Hello human![INST] What is your name?[/INST] `, }, { - []string{ - `[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `, - `[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`, - ` + "mistral system", + []template{ + {"no response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `}, + {"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`}, + {"messages", ` {{- range .Messages }} -{{- if eq .Role "user" }}[INST] {{ if and (isLastMessage $.Messages .) $.System }}{{ $.System }}{{ print "\n\n" }} +{{- if eq .Role "user" }}[INST] {{ if and (eq (index $.Messages (sub (len $.Messages) 1)) .) $.System }}{{ $.System }}{{ "\n\n" }} {{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }} {{- end }} -{{- end }}`, +{{- end }}`}, }, Values{ Messages: []api.Message{ {Role: "system", Content: "You are a helpful assistant!"}, {Role: "user", Content: "Hello friend!"}, {Role: "assistant", Content: "Hello human!"}, - {Role: "user", Content: "Yay!"}, + {Role: "user", Content: "What is your name?"}, }, }, `[INST] Hello friend![/INST] Hello human![INST] You are a helpful assistant! -Yay![/INST] `, +What is your name?[/INST] `, }, { - []string{ - `{{ if .System }}<|im_start|>system + "chatml", + []template{ + // this does not have a "no response" test because it's impossible to render the same output + {"response", `{{ if .System }}<|im_start|>system {{ .System }}<|im_end|> {{ end }}{{ if .Prompt }}<|im_start|>user {{ .Prompt }}<|im_end|> {{ end }}<|im_start|>assistant {{ .Response }}<|im_end|> -`, - ` +`}, + {"messages", ` {{- range .Messages }} -{{- if and (eq .Role "user") (isLastMessage $.Messages .) $.System }}<|im_start|>system -{{ $.System }}<|im_end|>{{ print "\n" }} +{{- if and (eq .Role "user") (eq (index $.Messages (sub (len $.Messages) 1)) .) $.System }}<|im_start|>system +{{ $.System }}<|im_end|>{{ "\n" }} {{- end }}<|im_start|>{{ .Role }} -{{ .Content }}<|im_end|>{{ print "\n" }} +{{ .Content }}<|im_end|>{{ "\n" }} {{- end }}<|im_start|>assistant -`, +`}, }, Values{ Messages: []api.Message{ {Role: "system", Content: "You are a helpful assistant!"}, {Role: "user", Content: "Hello friend!"}, {Role: "assistant", Content: "Hello human!"}, - {Role: "user", Content: "Yay!"}, + {Role: "user", Content: "What is your name?"}, }, }, `<|im_start|>user @@ -169,23 +271,25 @@ Hello human!<|im_end|> <|im_start|>system You are a helpful assistant!<|im_end|> <|im_start|>user -Yay!<|im_end|> +What is your name?<|im_end|> <|im_start|>assistant `, }, { - []string{ - `{{ if .Prompt }}Question: {{ .Prompt }} + "moondream", + []template{ + // this does not have a "no response" test because it's impossible to render the same output + {"response", `{{ if .Prompt }}Question: {{ .Prompt }} {{ end }}Answer: {{ .Response }} -`, - ` +`}, + {"messages", ` {{- range .Messages }} -{{- if eq .Role "user" }}Question: {{ .Content }}{{ print "\n\n" }} -{{- else if eq .Role "assistant" }}Answer: {{ .Content }}{{ print "\n\n" }} +{{- if eq .Role "user" }}Question: {{ .Content }}{{ "\n\n" }} +{{- else if eq .Role "assistant" }}Answer: {{ .Content }}{{ "\n\n" }} {{- end }} -{{- end }}Answer: `, +{{- end }}Answer: `}, }, Values{ Messages: []api.Message{ @@ -211,10 +315,10 @@ Answer: `, } for _, tt := range cases { - t.Run("", func(t *testing.T) { - for _, tmpl := range tt.templates { - t.Run("", func(t *testing.T) { - tmpl, err := Parse(tmpl) + t.Run(tt.name, func(t *testing.T) { + for _, ttt := range tt.templates { + t.Run(ttt.name, func(t *testing.T) { + tmpl, err := Parse(ttt.template) if err != nil { t.Fatal(err) } From ac7a842e550721fbc00e36e416e7cf6606993149 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 3 Jul 2024 09:00:07 -0700 Subject: [PATCH 3/7] fix model reloading ensure runtime model changes (template, system prompt, messages, options) are captured on model updates without needing to reload the server --- llm/server.go | 2 +- server/routes.go | 42 ++++++++++++++++++++++-------------------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/llm/server.go b/llm/server.go index 206f9e391..229d61e4a 100644 --- a/llm/server.go +++ b/llm/server.go @@ -679,7 +679,7 @@ type CompletionRequest struct { Prompt string Format string Images []ImageData - Options api.Options + Options *api.Options } type CompletionResponse struct { diff --git a/server/routes.go b/server/routes.go index 1a93e9770..4059c7c52 100644 --- a/server/routes.go +++ b/server/routes.go @@ -69,23 +69,25 @@ func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options return opts, nil } -func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (*runnerRef, error) { +// scheduleRunner schedules a runner after validating inputs such as capabilities and model options. +// It returns the allocated runner, model instance, and consolidated options if successful and error otherwise. +func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) { if name == "" { - return nil, fmt.Errorf("model %w", errRequired) + return nil, nil, nil, fmt.Errorf("model %w", errRequired) } model, err := GetModel(name) if err != nil { - return nil, err + return nil, nil, nil, err } if err := model.CheckCapabilities(caps...); err != nil { - return nil, fmt.Errorf("%s %w", name, err) + return nil, nil, nil, fmt.Errorf("%s %w", name, err) } opts, err := modelOptions(model, requestOpts) if err != nil { - return nil, err + return nil, nil, nil, err } runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive) @@ -93,10 +95,10 @@ func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capabil select { case runner = <-runnerCh: case err = <-errCh: - return nil, err + return nil, nil, nil, err } - return runner, nil + return runner.llama, model, &opts, nil } func (s *Server) GenerateHandler(c *gin.Context) { @@ -118,7 +120,7 @@ func (s *Server) GenerateHandler(c *gin.Context) { } caps := []Capability{CapabilityCompletion} - r, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive) + r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive) if errors.Is(err, errCapabilityCompletion) { c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)}) return @@ -147,8 +149,8 @@ func (s *Server) GenerateHandler(c *gin.Context) { var msgs []api.Message if req.System != "" { msgs = append(msgs, api.Message{Role: "system", Content: req.System}) - } else if r.model.System != "" { - msgs = append(msgs, api.Message{Role: "system", Content: r.model.System}) + } else if m.System != "" { + msgs = append(msgs, api.Message{Role: "system", Content: m.System}) } for _, i := range images { @@ -157,7 +159,7 @@ func (s *Server) GenerateHandler(c *gin.Context) { msgs = append(msgs, api.Message{Role: "user", Content: req.Prompt}) - tmpl := r.model.Template + tmpl := m.Template if req.Template != "" { tmpl, err = template.Parse(req.Template) if err != nil { @@ -168,7 +170,7 @@ func (s *Server) GenerateHandler(c *gin.Context) { var b bytes.Buffer if req.Context != nil { - s, err := r.llama.Detokenize(c.Request.Context(), req.Context) + s, err := r.Detokenize(c.Request.Context(), req.Context) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return @@ -190,11 +192,11 @@ func (s *Server) GenerateHandler(c *gin.Context) { ch := make(chan any) go func() { defer close(ch) - if err := r.llama.Completion(c.Request.Context(), llm.CompletionRequest{ + if err := r.Completion(c.Request.Context(), llm.CompletionRequest{ Prompt: prompt, Images: images, Format: req.Format, - Options: *r.Options, + Options: opts, }, func(r llm.CompletionResponse) { ch <- api.GenerateResponse{ Model: req.Model, @@ -254,7 +256,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) { return } - r, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive) + r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive) if err != nil { handleScheduleError(c, req.Model, err) return @@ -266,7 +268,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) { return } - embedding, err := r.llama.Embedding(c.Request.Context(), req.Prompt) + embedding, err := r.Embedding(c.Request.Context(), req.Prompt) if err != nil { slog.Info(fmt.Sprintf("embedding generation failed: %v", err)) c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"}) @@ -1130,7 +1132,7 @@ func (s *Server) ChatHandler(c *gin.Context) { } caps := []Capability{CapabilityCompletion} - r, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive) + r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive) if errors.Is(err, errCapabilityCompletion) { c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)}) return @@ -1150,7 +1152,7 @@ func (s *Server) ChatHandler(c *gin.Context) { return } - prompt, images, err := chatPrompt(c.Request.Context(), r.model, r.llama.Tokenize, r.Options, req.Messages) + prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, req.Messages) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) return @@ -1161,11 +1163,11 @@ func (s *Server) ChatHandler(c *gin.Context) { ch := make(chan any) go func() { defer close(ch) - if err := r.llama.Completion(c.Request.Context(), llm.CompletionRequest{ + if err := r.Completion(c.Request.Context(), llm.CompletionRequest{ Prompt: prompt, Images: images, Format: req.Format, - Options: *r.Options, + Options: opts, }, func(r llm.CompletionResponse) { ch <- api.ChatResponse{ Model: req.Model, From 326363b3a72d9e2972a019dfc4c6147ea901f501 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 3 Jul 2024 13:49:14 -0700 Subject: [PATCH 4/7] no funcs --- template/template.go | 19 +------ template/template_test.go | 105 +++----------------------------------- 2 files changed, 7 insertions(+), 117 deletions(-) diff --git a/template/template.go b/template/template.go index c8f8f6d0d..b133b97e9 100644 --- a/template/template.go +++ b/template/template.go @@ -102,25 +102,8 @@ var response = parse.ActionNode{ }, } -var funcs = template.FuncMap{ - "toJson": func(v any) string { - b, err := json.Marshal(v) - if err != nil { - return "" - } - - return string(b) - }, - "add": func(a, b int) int { - return a + b - }, - "sub": func(a, b int) int { - return a - b - }, -} - func Parse(s string) (*Template, error) { - tmpl := template.New("").Option("missingkey=zero").Funcs(funcs) + tmpl := template.New("").Option("missingkey=zero") tmpl, err := tmpl.Parse(s) if err != nil { diff --git a/template/template_test.go b/template/template_test.go index ac92bf489..ac16bd606 100644 --- a/template/template_test.go +++ b/template/template_test.go @@ -8,7 +8,6 @@ import ( "os" "path/filepath" "slices" - "strconv" "testing" "text/template" @@ -16,98 +15,6 @@ import ( "github.com/ollama/ollama/llm" ) -func TestFuncs(t *testing.T) { - t.Run("toJson", func(t *testing.T) { - cases := []struct { - input any - expected string - }{ - {nil, "null"}, - {true, "true"}, - {false, "false"}, - {0, "0"}, - {1, "1"}, - {1.0, "1"}, - {1.1, "1.1"}, - {"", `""`}, - {"hello", `"hello"`}, - {[]int{1, 2, 3}, "[1,2,3]"}, - {[]string{"a", "b", "c"}, `["a","b","c"]`}, - {map[string]int{"a": 1, "b": 2}, `{"a":1,"b":2}`}, - {map[string]string{"a": "b", "c": "d"}, `{"a":"b","c":"d"}`}, - } - - for _, tt := range cases { - t.Run(tt.expected, func(t *testing.T) { - toJson, ok := funcs["toJson"].(func(any) string) - if !ok { - t.Fatal("toJson is not a function") - } - - if s := toJson(tt.input); s != tt.expected { - t.Errorf("expected %q, got %q", tt.expected, s) - } - }) - } - }) - - t.Run("add", func(t *testing.T) { - cases := []struct { - a, b int - expected int - }{ - {0, 0, 0}, - {0, 1, 1}, - {1, 0, 1}, - {1, 1, 2}, - {1, -1, 0}, - {-1, 1, 0}, - {-1, -1, -2}, - } - - for _, tt := range cases { - t.Run(strconv.Itoa(tt.expected), func(t *testing.T) { - add, ok := funcs["add"].(func(int, int) int) - if !ok { - t.Fatal("add is not a function") - } - - if n := add(tt.a, tt.b); n != tt.expected { - t.Errorf("expected %d, got %d", tt.expected, n) - } - }) - } - }) - - t.Run("sub", func(t *testing.T) { - cases := []struct { - a, b int - expected int - }{ - {0, 0, 0}, - {0, 1, -1}, - {1, 0, 1}, - {1, 1, 0}, - {1, -1, 2}, - {-1, 1, -2}, - {-1, -1, 0}, - } - - for _, tt := range cases { - t.Run(strconv.Itoa(tt.expected), func(t *testing.T) { - sub, ok := funcs["sub"].(func(int, int) int) - if !ok { - t.Fatal("sub is not a function") - } - - if n := sub(tt.a, tt.b); n != tt.expected { - t.Errorf("expected %d, got %d", tt.expected, n) - } - }) - } - }) -} - func TestNamed(t *testing.T) { f, err := os.Open(filepath.Join("testdata", "templates.jsonl")) if err != nil { @@ -197,8 +104,8 @@ func TestExecuteWithMessages(t *testing.T) { []template{ {"no response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `}, {"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`}, - {"messages", `{{- range .Messages }} -{{- if eq .Role "user" }}[INST] {{ if and (eq (index $.Messages (sub (len $.Messages) 1)) .) $.System }}{{ $.System }}{{ "\n\n" }} + {"messages", `{{- range $index, $_ := .Messages }} +{{- if eq .Role "user" }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}{{ "\n\n" }} {{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }} {{- end }} {{- end }}`}, @@ -218,8 +125,8 @@ func TestExecuteWithMessages(t *testing.T) { {"no response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `}, {"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`}, {"messages", ` -{{- range .Messages }} -{{- if eq .Role "user" }}[INST] {{ if and (eq (index $.Messages (sub (len $.Messages) 1)) .) $.System }}{{ $.System }}{{ "\n\n" }} +{{- range $index, $_ := .Messages }} +{{- if eq .Role "user" }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}{{ "\n\n" }} {{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }} {{- end }} {{- end }}`}, @@ -248,8 +155,8 @@ What is your name?[/INST] `, {{ .Response }}<|im_end|> `}, {"messages", ` -{{- range .Messages }} -{{- if and (eq .Role "user") (eq (index $.Messages (sub (len $.Messages) 1)) .) $.System }}<|im_start|>system +{{- range $index, $_ := .Messages }} +{{- if and (eq .Role "user") (eq (len (slice $.Messages $index)) 1) $.System }}<|im_start|>system {{ $.System }}<|im_end|>{{ "\n" }} {{- end }}<|im_start|>{{ .Role }} {{ .Content }}<|im_end|>{{ "\n" }} From fb6cbc02fbe0ff8d791413a81558a1fe9725b778 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Thu, 27 Jun 2024 14:15:17 -0700 Subject: [PATCH 5/7] update named templates --- go.mod | 3 +- server/routes_create_test.go | 4 +- template/alfred.gotmpl | 9 ++- template/alpaca.gotmpl | 14 +++- template/chatml.gotmpl | 11 ++- template/chatqa.gotmpl | 14 +++- template/codellama-70b-instruct.gotmpl | 13 +++- template/falcon-instruct.gotmpl | 12 +++- template/gemma-instruct.gotmpl | 14 +++- template/granite-instruct.gotmpl | 16 ++++- template/llama2-chat.gotmpl | 15 +++- template/llama3-instruct.gotmpl | 14 +++- template/magicoder.gotmpl | 15 +++- template/mistral-instruct.gotmpl | 15 ++-- template/openchat.gotmpl | 12 +++- template/phi-3.gotmpl | 11 ++- template/solar-instruct.gotmpl | 16 ++++- template/starcoder2-instruct.gotmpl | 15 ++++ template/template_test.go | 69 ++++++++++++++++++- .../alfred.gotmpl/system-user-assistant-user | 1 + template/testdata/alfred.gotmpl/user | 1 + .../alfred.gotmpl/user-assistant-user | 1 + .../alpaca.gotmpl/system-user-assistant-user | 10 +++ template/testdata/alpaca.gotmpl/user | 4 ++ .../alpaca.gotmpl/user-assistant-user | 10 +++ .../chatml.gotmpl/system-user-assistant-user | 9 +++ template/testdata/chatml.gotmpl/user | 3 + .../chatml.gotmpl/user-assistant-user | 7 ++ .../chatqa.gotmpl/system-user-assistant-user | 9 +++ template/testdata/chatqa.gotmpl/user | 3 + .../chatqa.gotmpl/user-assistant-user | 7 ++ .../system-user-assistant-user | 11 +++ .../codellama-70b-instruct.gotmpl/user | 5 ++ .../user-assistant-user | 9 +++ .../system-user-assistant-user | 8 +++ template/testdata/falcon-instruct.gotmpl/user | 3 + .../user-assistant-user | 7 ++ .../system-user-assistant-user | 8 +++ template/testdata/gemma-instruct.gotmpl/user | 3 + .../gemma-instruct.gotmpl/user-assistant-user | 7 ++ .../system-user-assistant-user | 13 ++++ .../testdata/granite-instruct.gotmpl/user | 4 ++ .../user-assistant-user | 10 +++ .../system-user-assistant-user | 5 ++ template/testdata/llama2-chat.gotmpl/user | 3 + .../llama2-chat.gotmpl/user-assistant-user | 3 + .../system-user-assistant-user | 10 +++ template/testdata/llama3-instruct.gotmpl/user | 4 ++ .../user-assistant-user | 8 +++ .../system-user-assistant-user | 12 ++++ template/testdata/magicoder.gotmpl/user | 4 ++ .../magicoder.gotmpl/user-assistant-user | 10 +++ .../system-user-assistant-user | 2 + .../testdata/mistral-instruct.gotmpl/user | 1 + .../user-assistant-user | 1 + .../system-user-assistant-user | 1 + template/testdata/openchat.gotmpl/user | 1 + .../openchat.gotmpl/user-assistant-user | 1 + .../phi-3.gotmpl/system-user-assistant-user | 9 +++ template/testdata/phi-3.gotmpl/user | 3 + .../testdata/phi-3.gotmpl/user-assistant-user | 7 ++ .../system-user-assistant-user | 13 ++++ template/testdata/solar-instruct.gotmpl/user | 4 ++ .../solar-instruct.gotmpl/user-assistant-user | 10 +++ .../system-user-assistant-user | 12 ++++ .../testdata/starcoder2-instruct.gotmpl/user | 4 ++ .../user-assistant-user | 10 +++ .../vicuna.gotmpl/system-user-assistant-user | 6 ++ template/testdata/vicuna.gotmpl/user | 2 + .../vicuna.gotmpl/user-assistant-user | 4 ++ .../zephyr.gotmpl/system-user-assistant-user | 9 +++ template/testdata/zephyr.gotmpl/user | 3 + .../zephyr.gotmpl/user-assistant-user | 7 ++ template/vicuna.gotmpl | 13 +++- template/zephyr.gotmpl | 11 ++- 75 files changed, 611 insertions(+), 27 deletions(-) create mode 100644 template/testdata/alfred.gotmpl/system-user-assistant-user create mode 100644 template/testdata/alfred.gotmpl/user create mode 100644 template/testdata/alfred.gotmpl/user-assistant-user create mode 100644 template/testdata/alpaca.gotmpl/system-user-assistant-user create mode 100644 template/testdata/alpaca.gotmpl/user create mode 100644 template/testdata/alpaca.gotmpl/user-assistant-user create mode 100644 template/testdata/chatml.gotmpl/system-user-assistant-user create mode 100644 template/testdata/chatml.gotmpl/user create mode 100644 template/testdata/chatml.gotmpl/user-assistant-user create mode 100644 template/testdata/chatqa.gotmpl/system-user-assistant-user create mode 100644 template/testdata/chatqa.gotmpl/user create mode 100644 template/testdata/chatqa.gotmpl/user-assistant-user create mode 100644 template/testdata/codellama-70b-instruct.gotmpl/system-user-assistant-user create mode 100644 template/testdata/codellama-70b-instruct.gotmpl/user create mode 100644 template/testdata/codellama-70b-instruct.gotmpl/user-assistant-user create mode 100644 template/testdata/falcon-instruct.gotmpl/system-user-assistant-user create mode 100644 template/testdata/falcon-instruct.gotmpl/user create mode 100644 template/testdata/falcon-instruct.gotmpl/user-assistant-user create mode 100644 template/testdata/gemma-instruct.gotmpl/system-user-assistant-user create mode 100644 template/testdata/gemma-instruct.gotmpl/user create mode 100644 template/testdata/gemma-instruct.gotmpl/user-assistant-user create mode 100644 template/testdata/granite-instruct.gotmpl/system-user-assistant-user create mode 100644 template/testdata/granite-instruct.gotmpl/user create mode 100644 template/testdata/granite-instruct.gotmpl/user-assistant-user create mode 100644 template/testdata/llama2-chat.gotmpl/system-user-assistant-user create mode 100644 template/testdata/llama2-chat.gotmpl/user create mode 100644 template/testdata/llama2-chat.gotmpl/user-assistant-user create mode 100644 template/testdata/llama3-instruct.gotmpl/system-user-assistant-user create mode 100644 template/testdata/llama3-instruct.gotmpl/user create mode 100644 template/testdata/llama3-instruct.gotmpl/user-assistant-user create mode 100644 template/testdata/magicoder.gotmpl/system-user-assistant-user create mode 100644 template/testdata/magicoder.gotmpl/user create mode 100644 template/testdata/magicoder.gotmpl/user-assistant-user create mode 100644 template/testdata/mistral-instruct.gotmpl/system-user-assistant-user create mode 100644 template/testdata/mistral-instruct.gotmpl/user create mode 100644 template/testdata/mistral-instruct.gotmpl/user-assistant-user create mode 100644 template/testdata/openchat.gotmpl/system-user-assistant-user create mode 100644 template/testdata/openchat.gotmpl/user create mode 100644 template/testdata/openchat.gotmpl/user-assistant-user create mode 100644 template/testdata/phi-3.gotmpl/system-user-assistant-user create mode 100644 template/testdata/phi-3.gotmpl/user create mode 100644 template/testdata/phi-3.gotmpl/user-assistant-user create mode 100644 template/testdata/solar-instruct.gotmpl/system-user-assistant-user create mode 100644 template/testdata/solar-instruct.gotmpl/user create mode 100644 template/testdata/solar-instruct.gotmpl/user-assistant-user create mode 100644 template/testdata/starcoder2-instruct.gotmpl/system-user-assistant-user create mode 100644 template/testdata/starcoder2-instruct.gotmpl/user create mode 100644 template/testdata/starcoder2-instruct.gotmpl/user-assistant-user create mode 100644 template/testdata/vicuna.gotmpl/system-user-assistant-user create mode 100644 template/testdata/vicuna.gotmpl/user create mode 100644 template/testdata/vicuna.gotmpl/user-assistant-user create mode 100644 template/testdata/zephyr.gotmpl/system-user-assistant-user create mode 100644 template/testdata/zephyr.gotmpl/user create mode 100644 template/testdata/zephyr.gotmpl/user-assistant-user diff --git a/go.mod b/go.mod index 6807b9b48..2e0c6614c 100644 --- a/go.mod +++ b/go.mod @@ -18,6 +18,7 @@ require ( require ( github.com/agnivade/levenshtein v1.1.1 github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1 + github.com/google/go-cmp v0.6.0 github.com/mattn/go-runewidth v0.0.14 github.com/nlpodyssey/gopickle v0.3.0 github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c @@ -71,7 +72,7 @@ require ( golang.org/x/net v0.25.0 // indirect golang.org/x/sys v0.20.0 golang.org/x/term v0.20.0 - golang.org/x/text v0.15.0 // indirect + golang.org/x/text v0.15.0 google.golang.org/protobuf v1.34.1 gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/server/routes_create_test.go b/server/routes_create_test.go index 340612822..269a0ba12 100644 --- a/server/routes_create_test.go +++ b/server/routes_create_test.go @@ -545,9 +545,9 @@ func TestCreateDetectTemplate(t *testing.T) { } checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{ - filepath.Join(p, "blobs", "sha256-2f8e594e6f34b1b4d36a246628eeb3365ce442303d656f1fcc69e821722acea0"), - filepath.Join(p, "blobs", "sha256-542b217f179c7825eeb5bca3c77d2b75ed05bafbd3451d9188891a60a85337c6"), filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"), + filepath.Join(p, "blobs", "sha256-9512c372dfc7d84d6065b8dd2b601aeed8cc1a78e7a7aa784a42fff37f5524b7"), + filepath.Join(p, "blobs", "sha256-b8b78cb8c6eefd14c06f1af042e6161255bf87bbf2dd14fce57cdac893db8139"), }) }) diff --git a/template/alfred.gotmpl b/template/alfred.gotmpl index cecb9d2c8..44284f04c 100644 --- a/template/alfred.gotmpl +++ b/template/alfred.gotmpl @@ -1 +1,8 @@ -{{ if .System }}{{ .System }}{{ end }}{{ if .Prompt }}{{ .Prompt }}{{ end }}{{ .Response }} \ No newline at end of file +{{- if .Messages }} +{{- if .System }}{{ .System }} +{{- end }} +{{- range .Messages }}{{ .Content }} +{{- end }} +{{- else }} +{{ if .System }}{{ .System }}{{ end }}{{ if .Prompt }}{{ .Prompt }}{{ end }}{{ .Response }} +{{- end }} \ No newline at end of file diff --git a/template/alpaca.gotmpl b/template/alpaca.gotmpl index 440d06627..c1f69dc92 100644 --- a/template/alpaca.gotmpl +++ b/template/alpaca.gotmpl @@ -1,7 +1,19 @@ +{{- if .Messages }} +{{- if .System }}{{ .System }} +{{- end }} +{{- range .Messages }} +{{- if eq .Role "user" }}### Instruction: +{{- else if eq .Role "assistant" }}### Response: +{{- end }} +{{ .Content }} + +{{ end }}### Response: +{{ else }} {{ if .System }}{{ .System }} {{ end }}{{ if .Prompt }}### Instruction: {{ .Prompt }} {{ end }}### Response: -{{ .Response }} \ No newline at end of file +{{ .Response }} +{{- end }} \ No newline at end of file diff --git a/template/chatml.gotmpl b/template/chatml.gotmpl index dcf172853..d945547c7 100644 --- a/template/chatml.gotmpl +++ b/template/chatml.gotmpl @@ -1,6 +1,15 @@ +{{- if .Messages }} +{{- if .System }}<|im_start|>system +{{ .System }}<|im_end|> +{{ end }} +{{- range .Messages }}<|im_start|>{{ .Role }} +{{ .Content }}<|im_end|> +{{ end }}<|im_start|>assistant +{{ else }} {{ if .System }}<|im_start|>system {{ .System }}<|im_end|> {{ end }}{{ if .Prompt }}<|im_start|>user {{ .Prompt }}<|im_end|> {{ end }}<|im_start|>assistant -{{ .Response }}<|im_end|> \ No newline at end of file +{{ .Response }}<|im_end|> +{{- end }} \ No newline at end of file diff --git a/template/chatqa.gotmpl b/template/chatqa.gotmpl index 1ede6227f..7022c4790 100644 --- a/template/chatqa.gotmpl +++ b/template/chatqa.gotmpl @@ -1,5 +1,17 @@ +{{- if .Messages }} +{{- if .System }}System: {{ .System }} + +{{ end }} +{{- range .Messages }} +{{- if eq .Role "user" }}User: +{{- else if eq .Role "assistant" }}Assistant: +{{- end }} {{ .Content }} + +{{ end }}Assistant: +{{- else }} {{ if .System }}System: {{ .System }} {{ end }}{{ if .Prompt }}User: {{ .Prompt }} -{{ end }}Assistant: <|begin_of_text|>{{ .Response }} \ No newline at end of file +{{ end }}Assistant: <|begin_of_text|>{{ .Response }} +{{- end }} \ No newline at end of file diff --git a/template/codellama-70b-instruct.gotmpl b/template/codellama-70b-instruct.gotmpl index 3196bd6fd..392d839eb 100644 --- a/template/codellama-70b-instruct.gotmpl +++ b/template/codellama-70b-instruct.gotmpl @@ -1,3 +1,13 @@ +{{- if .Messages }} +{{- if .System }}Source: system + + {{ .System }} {{ end }} +{{- range .Messages }}Source: {{ .Role }} + + {{ .Content }} {{ end }}Source: assistant +Destination: user + +{{ else }} {{ if .System }} Source: system {{ .System }} {{ end }} Source: user @@ -5,4 +15,5 @@ {{ .Prompt }} Source: assistant Destination: user - {{ .Response }} \ No newline at end of file + {{ .Response }} +{{- end }} \ No newline at end of file diff --git a/template/falcon-instruct.gotmpl b/template/falcon-instruct.gotmpl index 2309a1c5d..99d67f93c 100644 --- a/template/falcon-instruct.gotmpl +++ b/template/falcon-instruct.gotmpl @@ -1,3 +1,13 @@ +{{- if .Messages }} +{{- if .System }}System: {{ .System }} +{{ end }} +{{- range .Messages }} +{{- if eq .Role "user" }}User: +{{ else if eq .Role "assistant" }}Falcon: +{{ end }}{{ .Content }} +{{ end }}Falcon: +{{ else }} {{ if .System }}{{ .System }} {{ end }}{{ if .Prompt }}User: {{ .Prompt }} -{{ end }}Assistant: {{ .Response }} \ No newline at end of file +{{ end }}Assistant: {{ .Response }} +{{- end }} \ No newline at end of file diff --git a/template/gemma-instruct.gotmpl b/template/gemma-instruct.gotmpl index 91b9883a1..870a8f2e2 100644 --- a/template/gemma-instruct.gotmpl +++ b/template/gemma-instruct.gotmpl @@ -1,4 +1,16 @@ +{{- if .Messages }} +{{- range $index, $_ := .Messages }} +{{- if eq .Role "user" }}user +{{- if and $.System (eq $index 0) }} +{{ $.System }} +{{- end }} +{{- else if eq .Role "assistant" }}model +{{- end }} +{{ .Content }} +{{ end }}model +{{ else }} user {{ if .System }}{{ .System }} {{ end }}{{ .Prompt }} model -{{ .Response }} \ No newline at end of file +{{ .Response }} +{{- end }} \ No newline at end of file diff --git a/template/granite-instruct.gotmpl b/template/granite-instruct.gotmpl index 2ede647f5..327ff3eef 100644 --- a/template/granite-instruct.gotmpl +++ b/template/granite-instruct.gotmpl @@ -1,3 +1,16 @@ +{{- if .Messages }} +{{- if .System }}System: +{{ .System }} + +{{ end }} +{{- range .Messages }} +{{- if eq .Role "user" }}Question: +{{- else if eq .Role "assistant" }}Answer: +{{- end }} +{{ .Content }} + +{{ end }}Answer: +{{ else }} {{ if .System }} System: {{ .System }} @@ -6,4 +19,5 @@ System: {{ .Prompt }} {{ end }}Answer: -{{ .Response }} \ No newline at end of file +{{ .Response }} +{{- end }} \ No newline at end of file diff --git a/template/llama2-chat.gotmpl b/template/llama2-chat.gotmpl index a739f6908..6327d5812 100644 --- a/template/llama2-chat.gotmpl +++ b/template/llama2-chat.gotmpl @@ -1,3 +1,16 @@ +{{- if .Messages }} +{{- range $index, $_ := .Messages }} +{{- if eq .Role "user" }}[INST] {{ if eq $index 0 }}<> +{{- if $.System }} +{{ $.System }} +{{ end }}<> + +{{ end }}{{ .Content }} +{{- else }} [/INST] {{ .Content }} +{{- end }} +{{- end }} [/INST] +{{- else }} [INST] <>{{ .System }}<> -{{ .Prompt }} [/INST] {{ .Response }} \ No newline at end of file +{{ .Prompt }} [/INST] {{ .Response }} +{{- end }} \ No newline at end of file diff --git a/template/llama3-instruct.gotmpl b/template/llama3-instruct.gotmpl index 36d0218b6..9c81a9535 100644 --- a/template/llama3-instruct.gotmpl +++ b/template/llama3-instruct.gotmpl @@ -1,7 +1,19 @@ +{{- if .Messages }} +{{- if .System }}<|start_header_id|>system<|end_header_id|> + +{{ .System }}<|eot_id|> +{{- end }} +{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|> + +{{ .Content }}<|eot_id|> +{{- end }}<|start_header_id|>assistant<|end_header_id|> + +{{ else }} {{ if .System }}<|start_header_id|>system<|end_header_id|> {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|> {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|> -{{ .Response }}<|eot_id|> \ No newline at end of file +{{ .Response }}<|eot_id|> +{{- end }} \ No newline at end of file diff --git a/template/magicoder.gotmpl b/template/magicoder.gotmpl index 306972ecc..73a58127c 100644 --- a/template/magicoder.gotmpl +++ b/template/magicoder.gotmpl @@ -1,7 +1,20 @@ +{{- if .Messages }} +{{- if .System }}{{ .System }} + +{{ end }} +{{- range .Messages }} +{{- if eq .Role "user" }}@@ Instruction +{{- else if eq .Role "assistant" }}@@ Response +{{- end }} +{{ .Content }} + +{{ end }}@@ Response +{{ else }} {{ if .System }}{{ .System }} {{ end }}{{ if .Prompt }}@@ Instruction {{ .Prompt }} {{ end }}@@ Response -{{ .Response }} \ No newline at end of file +{{ .Response }} +{{- end }} \ No newline at end of file diff --git a/template/mistral-instruct.gotmpl b/template/mistral-instruct.gotmpl index dcf172853..eb3d5ced2 100644 --- a/template/mistral-instruct.gotmpl +++ b/template/mistral-instruct.gotmpl @@ -1,6 +1,9 @@ -{{ if .System }}<|im_start|>system -{{ .System }}<|im_end|> -{{ end }}{{ if .Prompt }}<|im_start|>user -{{ .Prompt }}<|im_end|> -{{ end }}<|im_start|>assistant -{{ .Response }}<|im_end|> \ No newline at end of file +{{- if .Messages }} +{{- range $index, $_ := .Messages }} +{{- if eq .Role "user" }}[INST] {{ if and $.System (eq (len (slice $.Messages $index)) 1) }}{{ $.System }} +{{ end }}{{ .Content }} +{{- else if eq .Role "assistant" }}[/INST] {{ .Content }} +{{- end }} +{{- end }}[/INST] +{{- else }}[INST] {{ if .System }}{{ .System }} {{ end }}{{ .Prompt }} [/INST] {{ .Response }} +{{- end }} \ No newline at end of file diff --git a/template/openchat.gotmpl b/template/openchat.gotmpl index d2ca38685..d5e1cbb0d 100644 --- a/template/openchat.gotmpl +++ b/template/openchat.gotmpl @@ -1 +1,11 @@ -{{ .System }}<|end_of_turn|>GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|> \ No newline at end of file +{{- if .Messages }} +{{- if .System }}GPT Correct System: {{ .System }}<|end_of_turn|> +{{- end }} +{{- range .Messages }}GPT Correct +{{- if eq .Role "user" }} User: +{{- else if eq .Role "assistant" }} Assistant: +{{- end }} {{ .Content }}<|end_of_turn|> +{{- end }}GPT Correct Assistant: +{{- else }} +{{ .System }}<|end_of_turn|>GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|> +{{- end }} \ No newline at end of file diff --git a/template/phi-3.gotmpl b/template/phi-3.gotmpl index bf26dcee2..a3558d2b7 100644 --- a/template/phi-3.gotmpl +++ b/template/phi-3.gotmpl @@ -1,6 +1,15 @@ +{{- if .Messages }} +{{- if .System }}<|system|> +{{ .System }}<|end|> +{{ end }} +{{- range .Messages }}<|{{ .Role }}|> +{{ .Content }}<|end|> +{{ end }}<|assistant|> +{{ else }} {{ if .System }}<|system|> {{ .System }}<|end|> {{ end }}{{ if .Prompt }}<|user|> {{ .Prompt }}<|end|> {{ end }}<|assistant|> -{{ .Response }}<|end|> \ No newline at end of file +{{ .Response }}<|end|> +{{- end }} \ No newline at end of file diff --git a/template/solar-instruct.gotmpl b/template/solar-instruct.gotmpl index c275a26a3..caa6e8e77 100644 --- a/template/solar-instruct.gotmpl +++ b/template/solar-instruct.gotmpl @@ -1,3 +1,16 @@ +{{- if .Messages }} +{{- if .System }}### System: +{{ .System }} + +{{ end }} +{{- range .Messages }} +{{- if eq .Role "user" }}### User: +{{ .Content }} +{{ else if eq .Role "assistant" }}### Assistant: +{{ .Content }} +{{ end }} +{{ end }}### Assistant: +{{ else }} {{ if .System }}### System: {{ .System }} @@ -5,4 +18,5 @@ {{ .Prompt }} {{ end }}### Assistant: -{{ .Response }} \ No newline at end of file +{{ .Response }} +{{- end }} \ No newline at end of file diff --git a/template/starcoder2-instruct.gotmpl b/template/starcoder2-instruct.gotmpl index 33357e54e..7d7ff9326 100644 --- a/template/starcoder2-instruct.gotmpl +++ b/template/starcoder2-instruct.gotmpl @@ -1,3 +1,17 @@ +{{- if .Messages }} +{{- if .System }}{{ .System }} + +{{ end }} +{{- range .Messages }} +{{- if eq .Role "user" }}### Instruction +{{ .Content }} + +{{ else if eq .Role "assistant" }}### Response +{{ .Content }}<|endoftext|> + +{{ end }} +{{- end }}### Response +{{ else }} {{ if .System }}{{ .System }} {{ end }}{{ if .Prompt }}### Instruction @@ -7,3 +21,4 @@ {{ end }}### Response {{ .Response }}<|endoftext|> +{{- end }} \ No newline at end of file diff --git a/template/template_test.go b/template/template_test.go index ac16bd606..428cdc77c 100644 --- a/template/template_test.go +++ b/template/template_test.go @@ -8,9 +8,10 @@ import ( "os" "path/filepath" "slices" + "strings" "testing" - "text/template" + "github.com/google/go-cmp/cmp" "github.com/ollama/ollama/api" "github.com/ollama/ollama/llm" ) @@ -47,7 +48,7 @@ func TestNamed(t *testing.T) { t.Fatal(err) } - tmpl, err := template.New(s).Parse(b.String()) + tmpl, err := Parse(b.String()) if err != nil { t.Fatal(err) } @@ -60,6 +61,70 @@ func TestNamed(t *testing.T) { } } +func TestTemplate(t *testing.T) { + cases := make(map[string][]api.Message) + for _, mm := range [][]api.Message{ + { + {Role: "user", Content: "Hello, how are you?"}, + }, + { + {Role: "user", Content: "Hello, how are you?"}, + {Role: "assistant", Content: "I'm doing great. How can I help you today?"}, + {Role: "user", Content: "I'd like to show off how chat templating works!"}, + }, + { + {Role: "system", Content: "You are a helpful assistant."}, + {Role: "user", Content: "Hello, how are you?"}, + {Role: "assistant", Content: "I'm doing great. How can I help you today?"}, + {Role: "user", Content: "I'd like to show off how chat templating works!"}, + }, + } { + var roles []string + for _, m := range mm { + roles = append(roles, m.Role) + } + + cases[strings.Join(roles, "-")] = mm + } + + matches, err := filepath.Glob("*.gotmpl") + if err != nil { + t.Fatal(err) + } + + for _, match := range matches { + t.Run(match, func(t *testing.T) { + bts, err := os.ReadFile(match) + if err != nil { + t.Fatal(err) + } + + tmpl, err := Parse(string(bts)) + if err != nil { + t.Fatal(err) + } + + for n, tt := range cases { + t.Run(n, func(t *testing.T) { + var actual bytes.Buffer + if err := tmpl.Execute(&actual, Values{Messages: tt}); err != nil { + t.Fatal(err) + } + + expect, err := os.ReadFile(filepath.Join("testdata", match, n)) + if err != nil { + t.Fatal(err) + } + + if diff := cmp.Diff(actual.Bytes(), expect); diff != "" { + t.Errorf("mismatch (-got +want):\n%s", diff) + } + }) + } + }) + } +} + func TestParse(t *testing.T) { cases := []struct { template string diff --git a/template/testdata/alfred.gotmpl/system-user-assistant-user b/template/testdata/alfred.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..03e23ea9c --- /dev/null +++ b/template/testdata/alfred.gotmpl/system-user-assistant-user @@ -0,0 +1 @@ +You are a helpful assistant.Hello, how are you?I'm doing great. How can I help you today?I'd like to show off how chat templating works! \ No newline at end of file diff --git a/template/testdata/alfred.gotmpl/user b/template/testdata/alfred.gotmpl/user new file mode 100644 index 000000000..7c884a6f0 --- /dev/null +++ b/template/testdata/alfred.gotmpl/user @@ -0,0 +1 @@ +Hello, how are you? \ No newline at end of file diff --git a/template/testdata/alfred.gotmpl/user-assistant-user b/template/testdata/alfred.gotmpl/user-assistant-user new file mode 100644 index 000000000..a60701ed7 --- /dev/null +++ b/template/testdata/alfred.gotmpl/user-assistant-user @@ -0,0 +1 @@ +Hello, how are you?I'm doing great. How can I help you today?I'd like to show off how chat templating works! \ No newline at end of file diff --git a/template/testdata/alpaca.gotmpl/system-user-assistant-user b/template/testdata/alpaca.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..20182d829 --- /dev/null +++ b/template/testdata/alpaca.gotmpl/system-user-assistant-user @@ -0,0 +1,10 @@ +You are a helpful assistant.### Instruction: +Hello, how are you? + +### Response: +I'm doing great. How can I help you today? + +### Instruction: +I'd like to show off how chat templating works! + +### Response: diff --git a/template/testdata/alpaca.gotmpl/user b/template/testdata/alpaca.gotmpl/user new file mode 100644 index 000000000..a0ce5dec7 --- /dev/null +++ b/template/testdata/alpaca.gotmpl/user @@ -0,0 +1,4 @@ +### Instruction: +Hello, how are you? + +### Response: diff --git a/template/testdata/alpaca.gotmpl/user-assistant-user b/template/testdata/alpaca.gotmpl/user-assistant-user new file mode 100644 index 000000000..6c5e23ff5 --- /dev/null +++ b/template/testdata/alpaca.gotmpl/user-assistant-user @@ -0,0 +1,10 @@ +### Instruction: +Hello, how are you? + +### Response: +I'm doing great. How can I help you today? + +### Instruction: +I'd like to show off how chat templating works! + +### Response: diff --git a/template/testdata/chatml.gotmpl/system-user-assistant-user b/template/testdata/chatml.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..8b013fcfb --- /dev/null +++ b/template/testdata/chatml.gotmpl/system-user-assistant-user @@ -0,0 +1,9 @@ +<|im_start|>system +You are a helpful assistant.<|im_end|> +<|im_start|>user +Hello, how are you?<|im_end|> +<|im_start|>assistant +I'm doing great. How can I help you today?<|im_end|> +<|im_start|>user +I'd like to show off how chat templating works!<|im_end|> +<|im_start|>assistant diff --git a/template/testdata/chatml.gotmpl/user b/template/testdata/chatml.gotmpl/user new file mode 100644 index 000000000..aa9e597a4 --- /dev/null +++ b/template/testdata/chatml.gotmpl/user @@ -0,0 +1,3 @@ +<|im_start|>user +Hello, how are you?<|im_end|> +<|im_start|>assistant diff --git a/template/testdata/chatml.gotmpl/user-assistant-user b/template/testdata/chatml.gotmpl/user-assistant-user new file mode 100644 index 000000000..a7cba4de0 --- /dev/null +++ b/template/testdata/chatml.gotmpl/user-assistant-user @@ -0,0 +1,7 @@ +<|im_start|>user +Hello, how are you?<|im_end|> +<|im_start|>assistant +I'm doing great. How can I help you today?<|im_end|> +<|im_start|>user +I'd like to show off how chat templating works!<|im_end|> +<|im_start|>assistant diff --git a/template/testdata/chatqa.gotmpl/system-user-assistant-user b/template/testdata/chatqa.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..98fd59bfa --- /dev/null +++ b/template/testdata/chatqa.gotmpl/system-user-assistant-user @@ -0,0 +1,9 @@ +System: You are a helpful assistant. + +User: Hello, how are you? + +Assistant: I'm doing great. How can I help you today? + +User: I'd like to show off how chat templating works! + +Assistant: \ No newline at end of file diff --git a/template/testdata/chatqa.gotmpl/user b/template/testdata/chatqa.gotmpl/user new file mode 100644 index 000000000..9e7cf702d --- /dev/null +++ b/template/testdata/chatqa.gotmpl/user @@ -0,0 +1,3 @@ +User: Hello, how are you? + +Assistant: \ No newline at end of file diff --git a/template/testdata/chatqa.gotmpl/user-assistant-user b/template/testdata/chatqa.gotmpl/user-assistant-user new file mode 100644 index 000000000..405bbe12c --- /dev/null +++ b/template/testdata/chatqa.gotmpl/user-assistant-user @@ -0,0 +1,7 @@ +User: Hello, how are you? + +Assistant: I'm doing great. How can I help you today? + +User: I'd like to show off how chat templating works! + +Assistant: \ No newline at end of file diff --git a/template/testdata/codellama-70b-instruct.gotmpl/system-user-assistant-user b/template/testdata/codellama-70b-instruct.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..fdd0fc8b4 --- /dev/null +++ b/template/testdata/codellama-70b-instruct.gotmpl/system-user-assistant-user @@ -0,0 +1,11 @@ +Source: system + + You are a helpful assistant. Source: user + + Hello, how are you? Source: assistant + + I'm doing great. How can I help you today? Source: user + + I'd like to show off how chat templating works! Source: assistant +Destination: user + diff --git a/template/testdata/codellama-70b-instruct.gotmpl/user b/template/testdata/codellama-70b-instruct.gotmpl/user new file mode 100644 index 000000000..9e7174a84 --- /dev/null +++ b/template/testdata/codellama-70b-instruct.gotmpl/user @@ -0,0 +1,5 @@ +Source: user + + Hello, how are you? Source: assistant +Destination: user + diff --git a/template/testdata/codellama-70b-instruct.gotmpl/user-assistant-user b/template/testdata/codellama-70b-instruct.gotmpl/user-assistant-user new file mode 100644 index 000000000..b4ba1736b --- /dev/null +++ b/template/testdata/codellama-70b-instruct.gotmpl/user-assistant-user @@ -0,0 +1,9 @@ +Source: user + + Hello, how are you? Source: assistant + + I'm doing great. How can I help you today? Source: user + + I'd like to show off how chat templating works! Source: assistant +Destination: user + diff --git a/template/testdata/falcon-instruct.gotmpl/system-user-assistant-user b/template/testdata/falcon-instruct.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..16e45e5b6 --- /dev/null +++ b/template/testdata/falcon-instruct.gotmpl/system-user-assistant-user @@ -0,0 +1,8 @@ +System: You are a helpful assistant. +User: +Hello, how are you? +Falcon: +I'm doing great. How can I help you today? +User: +I'd like to show off how chat templating works! +Falcon: diff --git a/template/testdata/falcon-instruct.gotmpl/user b/template/testdata/falcon-instruct.gotmpl/user new file mode 100644 index 000000000..110831a2c --- /dev/null +++ b/template/testdata/falcon-instruct.gotmpl/user @@ -0,0 +1,3 @@ +User: +Hello, how are you? +Falcon: diff --git a/template/testdata/falcon-instruct.gotmpl/user-assistant-user b/template/testdata/falcon-instruct.gotmpl/user-assistant-user new file mode 100644 index 000000000..b49639ea5 --- /dev/null +++ b/template/testdata/falcon-instruct.gotmpl/user-assistant-user @@ -0,0 +1,7 @@ +User: +Hello, how are you? +Falcon: +I'm doing great. How can I help you today? +User: +I'd like to show off how chat templating works! +Falcon: diff --git a/template/testdata/gemma-instruct.gotmpl/system-user-assistant-user b/template/testdata/gemma-instruct.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..5f6c37324 --- /dev/null +++ b/template/testdata/gemma-instruct.gotmpl/system-user-assistant-user @@ -0,0 +1,8 @@ +user +You are a helpful assistant. +Hello, how are you? +model +I'm doing great. How can I help you today? +user +I'd like to show off how chat templating works! +model diff --git a/template/testdata/gemma-instruct.gotmpl/user b/template/testdata/gemma-instruct.gotmpl/user new file mode 100644 index 000000000..dc8b30b68 --- /dev/null +++ b/template/testdata/gemma-instruct.gotmpl/user @@ -0,0 +1,3 @@ +user +Hello, how are you? +model diff --git a/template/testdata/gemma-instruct.gotmpl/user-assistant-user b/template/testdata/gemma-instruct.gotmpl/user-assistant-user new file mode 100644 index 000000000..1185924b0 --- /dev/null +++ b/template/testdata/gemma-instruct.gotmpl/user-assistant-user @@ -0,0 +1,7 @@ +user +Hello, how are you? +model +I'm doing great. How can I help you today? +user +I'd like to show off how chat templating works! +model diff --git a/template/testdata/granite-instruct.gotmpl/system-user-assistant-user b/template/testdata/granite-instruct.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..a732a77fb --- /dev/null +++ b/template/testdata/granite-instruct.gotmpl/system-user-assistant-user @@ -0,0 +1,13 @@ +System: +You are a helpful assistant. + +Question: +Hello, how are you? + +Answer: +I'm doing great. How can I help you today? + +Question: +I'd like to show off how chat templating works! + +Answer: diff --git a/template/testdata/granite-instruct.gotmpl/user b/template/testdata/granite-instruct.gotmpl/user new file mode 100644 index 000000000..7abd2ea35 --- /dev/null +++ b/template/testdata/granite-instruct.gotmpl/user @@ -0,0 +1,4 @@ +Question: +Hello, how are you? + +Answer: diff --git a/template/testdata/granite-instruct.gotmpl/user-assistant-user b/template/testdata/granite-instruct.gotmpl/user-assistant-user new file mode 100644 index 000000000..da5e43eae --- /dev/null +++ b/template/testdata/granite-instruct.gotmpl/user-assistant-user @@ -0,0 +1,10 @@ +Question: +Hello, how are you? + +Answer: +I'm doing great. How can I help you today? + +Question: +I'd like to show off how chat templating works! + +Answer: diff --git a/template/testdata/llama2-chat.gotmpl/system-user-assistant-user b/template/testdata/llama2-chat.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..fc2679bf0 --- /dev/null +++ b/template/testdata/llama2-chat.gotmpl/system-user-assistant-user @@ -0,0 +1,5 @@ +[INST] <> +You are a helpful assistant. +<> + +Hello, how are you? [/INST] I'm doing great. How can I help you today?[INST] I'd like to show off how chat templating works! [/INST] \ No newline at end of file diff --git a/template/testdata/llama2-chat.gotmpl/user b/template/testdata/llama2-chat.gotmpl/user new file mode 100644 index 000000000..ceef9bdbb --- /dev/null +++ b/template/testdata/llama2-chat.gotmpl/user @@ -0,0 +1,3 @@ +[INST] <><> + +Hello, how are you? [/INST] \ No newline at end of file diff --git a/template/testdata/llama2-chat.gotmpl/user-assistant-user b/template/testdata/llama2-chat.gotmpl/user-assistant-user new file mode 100644 index 000000000..42b4c5294 --- /dev/null +++ b/template/testdata/llama2-chat.gotmpl/user-assistant-user @@ -0,0 +1,3 @@ +[INST] <><> + +Hello, how are you? [/INST] I'm doing great. How can I help you today?[INST] I'd like to show off how chat templating works! [/INST] \ No newline at end of file diff --git a/template/testdata/llama3-instruct.gotmpl/system-user-assistant-user b/template/testdata/llama3-instruct.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..6740bcb4d --- /dev/null +++ b/template/testdata/llama3-instruct.gotmpl/system-user-assistant-user @@ -0,0 +1,10 @@ +<|start_header_id|>system<|end_header_id|> + +You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, how are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +I'm doing great. How can I help you today?<|eot_id|><|start_header_id|>user<|end_header_id|> + +I'd like to show off how chat templating works!<|eot_id|><|start_header_id|>assistant<|end_header_id|> + diff --git a/template/testdata/llama3-instruct.gotmpl/user b/template/testdata/llama3-instruct.gotmpl/user new file mode 100644 index 000000000..470aa028f --- /dev/null +++ b/template/testdata/llama3-instruct.gotmpl/user @@ -0,0 +1,4 @@ +<|start_header_id|>user<|end_header_id|> + +Hello, how are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|> + diff --git a/template/testdata/llama3-instruct.gotmpl/user-assistant-user b/template/testdata/llama3-instruct.gotmpl/user-assistant-user new file mode 100644 index 000000000..6dd768af5 --- /dev/null +++ b/template/testdata/llama3-instruct.gotmpl/user-assistant-user @@ -0,0 +1,8 @@ +<|start_header_id|>user<|end_header_id|> + +Hello, how are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +I'm doing great. How can I help you today?<|eot_id|><|start_header_id|>user<|end_header_id|> + +I'd like to show off how chat templating works!<|eot_id|><|start_header_id|>assistant<|end_header_id|> + diff --git a/template/testdata/magicoder.gotmpl/system-user-assistant-user b/template/testdata/magicoder.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..c966a861d --- /dev/null +++ b/template/testdata/magicoder.gotmpl/system-user-assistant-user @@ -0,0 +1,12 @@ +You are a helpful assistant. + +@@ Instruction +Hello, how are you? + +@@ Response +I'm doing great. How can I help you today? + +@@ Instruction +I'd like to show off how chat templating works! + +@@ Response diff --git a/template/testdata/magicoder.gotmpl/user b/template/testdata/magicoder.gotmpl/user new file mode 100644 index 000000000..ccfb02bd2 --- /dev/null +++ b/template/testdata/magicoder.gotmpl/user @@ -0,0 +1,4 @@ +@@ Instruction +Hello, how are you? + +@@ Response diff --git a/template/testdata/magicoder.gotmpl/user-assistant-user b/template/testdata/magicoder.gotmpl/user-assistant-user new file mode 100644 index 000000000..3aea6dab9 --- /dev/null +++ b/template/testdata/magicoder.gotmpl/user-assistant-user @@ -0,0 +1,10 @@ +@@ Instruction +Hello, how are you? + +@@ Response +I'm doing great. How can I help you today? + +@@ Instruction +I'd like to show off how chat templating works! + +@@ Response diff --git a/template/testdata/mistral-instruct.gotmpl/system-user-assistant-user b/template/testdata/mistral-instruct.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..b6b4bf93e --- /dev/null +++ b/template/testdata/mistral-instruct.gotmpl/system-user-assistant-user @@ -0,0 +1,2 @@ +[INST] Hello, how are you?[/INST] I'm doing great. How can I help you today?[INST] You are a helpful assistant. +I'd like to show off how chat templating works![/INST] \ No newline at end of file diff --git a/template/testdata/mistral-instruct.gotmpl/user b/template/testdata/mistral-instruct.gotmpl/user new file mode 100644 index 000000000..b04871e5d --- /dev/null +++ b/template/testdata/mistral-instruct.gotmpl/user @@ -0,0 +1 @@ +[INST] Hello, how are you?[/INST] \ No newline at end of file diff --git a/template/testdata/mistral-instruct.gotmpl/user-assistant-user b/template/testdata/mistral-instruct.gotmpl/user-assistant-user new file mode 100644 index 000000000..b473e0df0 --- /dev/null +++ b/template/testdata/mistral-instruct.gotmpl/user-assistant-user @@ -0,0 +1 @@ +[INST] Hello, how are you?[/INST] I'm doing great. How can I help you today?[INST] I'd like to show off how chat templating works![/INST] \ No newline at end of file diff --git a/template/testdata/openchat.gotmpl/system-user-assistant-user b/template/testdata/openchat.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..1214c1264 --- /dev/null +++ b/template/testdata/openchat.gotmpl/system-user-assistant-user @@ -0,0 +1 @@ +GPT Correct System: You are a helpful assistant.<|end_of_turn|>GPT Correct User: Hello, how are you?<|end_of_turn|>GPT Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT Correct User: I'd like to show off how chat templating works!<|end_of_turn|>GPT Correct Assistant: \ No newline at end of file diff --git a/template/testdata/openchat.gotmpl/user b/template/testdata/openchat.gotmpl/user new file mode 100644 index 000000000..611daa83e --- /dev/null +++ b/template/testdata/openchat.gotmpl/user @@ -0,0 +1 @@ +GPT Correct User: Hello, how are you?<|end_of_turn|>GPT Correct Assistant: \ No newline at end of file diff --git a/template/testdata/openchat.gotmpl/user-assistant-user b/template/testdata/openchat.gotmpl/user-assistant-user new file mode 100644 index 000000000..f97b02b9c --- /dev/null +++ b/template/testdata/openchat.gotmpl/user-assistant-user @@ -0,0 +1 @@ +GPT Correct User: Hello, how are you?<|end_of_turn|>GPT Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT Correct User: I'd like to show off how chat templating works!<|end_of_turn|>GPT Correct Assistant: \ No newline at end of file diff --git a/template/testdata/phi-3.gotmpl/system-user-assistant-user b/template/testdata/phi-3.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..6109a9a24 --- /dev/null +++ b/template/testdata/phi-3.gotmpl/system-user-assistant-user @@ -0,0 +1,9 @@ +<|system|> +You are a helpful assistant.<|end|> +<|user|> +Hello, how are you?<|end|> +<|assistant|> +I'm doing great. How can I help you today?<|end|> +<|user|> +I'd like to show off how chat templating works!<|end|> +<|assistant|> diff --git a/template/testdata/phi-3.gotmpl/user b/template/testdata/phi-3.gotmpl/user new file mode 100644 index 000000000..feb96e7c9 --- /dev/null +++ b/template/testdata/phi-3.gotmpl/user @@ -0,0 +1,3 @@ +<|user|> +Hello, how are you?<|end|> +<|assistant|> diff --git a/template/testdata/phi-3.gotmpl/user-assistant-user b/template/testdata/phi-3.gotmpl/user-assistant-user new file mode 100644 index 000000000..db79d01c1 --- /dev/null +++ b/template/testdata/phi-3.gotmpl/user-assistant-user @@ -0,0 +1,7 @@ +<|user|> +Hello, how are you?<|end|> +<|assistant|> +I'm doing great. How can I help you today?<|end|> +<|user|> +I'd like to show off how chat templating works!<|end|> +<|assistant|> diff --git a/template/testdata/solar-instruct.gotmpl/system-user-assistant-user b/template/testdata/solar-instruct.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..28c1730ab --- /dev/null +++ b/template/testdata/solar-instruct.gotmpl/system-user-assistant-user @@ -0,0 +1,13 @@ +### System: +You are a helpful assistant. + +### User: +Hello, how are you? + +### Assistant: +I'm doing great. How can I help you today? + +### User: +I'd like to show off how chat templating works! + +### Assistant: diff --git a/template/testdata/solar-instruct.gotmpl/user b/template/testdata/solar-instruct.gotmpl/user new file mode 100644 index 000000000..3a43382af --- /dev/null +++ b/template/testdata/solar-instruct.gotmpl/user @@ -0,0 +1,4 @@ +### User: +Hello, how are you? + +### Assistant: diff --git a/template/testdata/solar-instruct.gotmpl/user-assistant-user b/template/testdata/solar-instruct.gotmpl/user-assistant-user new file mode 100644 index 000000000..8553e73ba --- /dev/null +++ b/template/testdata/solar-instruct.gotmpl/user-assistant-user @@ -0,0 +1,10 @@ +### User: +Hello, how are you? + +### Assistant: +I'm doing great. How can I help you today? + +### User: +I'd like to show off how chat templating works! + +### Assistant: diff --git a/template/testdata/starcoder2-instruct.gotmpl/system-user-assistant-user b/template/testdata/starcoder2-instruct.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..5b718b3ec --- /dev/null +++ b/template/testdata/starcoder2-instruct.gotmpl/system-user-assistant-user @@ -0,0 +1,12 @@ +You are a helpful assistant. + +### Instruction +Hello, how are you? + +### Response +I'm doing great. How can I help you today?<|endoftext|> + +### Instruction +I'd like to show off how chat templating works! + +### Response diff --git a/template/testdata/starcoder2-instruct.gotmpl/user b/template/testdata/starcoder2-instruct.gotmpl/user new file mode 100644 index 000000000..11b0be1fe --- /dev/null +++ b/template/testdata/starcoder2-instruct.gotmpl/user @@ -0,0 +1,4 @@ +### Instruction +Hello, how are you? + +### Response diff --git a/template/testdata/starcoder2-instruct.gotmpl/user-assistant-user b/template/testdata/starcoder2-instruct.gotmpl/user-assistant-user new file mode 100644 index 000000000..d99feabb0 --- /dev/null +++ b/template/testdata/starcoder2-instruct.gotmpl/user-assistant-user @@ -0,0 +1,10 @@ +### Instruction +Hello, how are you? + +### Response +I'm doing great. How can I help you today?<|endoftext|> + +### Instruction +I'd like to show off how chat templating works! + +### Response diff --git a/template/testdata/vicuna.gotmpl/system-user-assistant-user b/template/testdata/vicuna.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..50d2f92c1 --- /dev/null +++ b/template/testdata/vicuna.gotmpl/system-user-assistant-user @@ -0,0 +1,6 @@ +You are a helpful assistant. + +USER: Hello, how are you? +ASSISTANT: I'm doing great. How can I help you today? +USER: I'd like to show off how chat templating works! +ASSISTANT: \ No newline at end of file diff --git a/template/testdata/vicuna.gotmpl/user b/template/testdata/vicuna.gotmpl/user new file mode 100644 index 000000000..cbe5ef709 --- /dev/null +++ b/template/testdata/vicuna.gotmpl/user @@ -0,0 +1,2 @@ +USER: Hello, how are you? +ASSISTANT: \ No newline at end of file diff --git a/template/testdata/vicuna.gotmpl/user-assistant-user b/template/testdata/vicuna.gotmpl/user-assistant-user new file mode 100644 index 000000000..9172547e3 --- /dev/null +++ b/template/testdata/vicuna.gotmpl/user-assistant-user @@ -0,0 +1,4 @@ +USER: Hello, how are you? +ASSISTANT: I'm doing great. How can I help you today? +USER: I'd like to show off how chat templating works! +ASSISTANT: \ No newline at end of file diff --git a/template/testdata/zephyr.gotmpl/system-user-assistant-user b/template/testdata/zephyr.gotmpl/system-user-assistant-user new file mode 100644 index 000000000..03d43fc34 --- /dev/null +++ b/template/testdata/zephyr.gotmpl/system-user-assistant-user @@ -0,0 +1,9 @@ +<|system|> +You are a helpful assistant. +<|user|> +Hello, how are you? +<|assistant|> +I'm doing great. How can I help you today? +<|user|> +I'd like to show off how chat templating works! +<|assistant|> diff --git a/template/testdata/zephyr.gotmpl/user b/template/testdata/zephyr.gotmpl/user new file mode 100644 index 000000000..6cefdaa0f --- /dev/null +++ b/template/testdata/zephyr.gotmpl/user @@ -0,0 +1,3 @@ +<|user|> +Hello, how are you? +<|assistant|> diff --git a/template/testdata/zephyr.gotmpl/user-assistant-user b/template/testdata/zephyr.gotmpl/user-assistant-user new file mode 100644 index 000000000..3937b006a --- /dev/null +++ b/template/testdata/zephyr.gotmpl/user-assistant-user @@ -0,0 +1,7 @@ +<|user|> +Hello, how are you? +<|assistant|> +I'm doing great. How can I help you today? +<|user|> +I'd like to show off how chat templating works! +<|assistant|> diff --git a/template/vicuna.gotmpl b/template/vicuna.gotmpl index 174c1a353..2e13e990d 100644 --- a/template/vicuna.gotmpl +++ b/template/vicuna.gotmpl @@ -1,3 +1,14 @@ +{{- if .Messages }} +{{- if .System }}{{ .System }} + +{{ end }} +{{- range .Messages }} +{{- if eq .Role "user" }}USER: {{ .Content }} +{{ else if eq .Role "assistant" }}ASSISTANT: {{ .Content }} +{{ end }} +{{- end }}ASSISTANT: +{{- else }} {{ if .System }}{{ .System }} {{ end }}{{ if .Prompt }}USER: {{ .Prompt }} -{{ end }}ASSISTANT: {{ .Response }} \ No newline at end of file +{{ end }}ASSISTANT: {{ .Response }} +{{- end }} \ No newline at end of file diff --git a/template/zephyr.gotmpl b/template/zephyr.gotmpl index aac0c7a1f..e66688480 100644 --- a/template/zephyr.gotmpl +++ b/template/zephyr.gotmpl @@ -1,6 +1,15 @@ +{{- if .Messages }} +{{- if .System }}<|system|> +{{ .System }} +{{ end }} +{{- range .Messages }}<|{{ .Role }}|> +{{ .Content }} +{{ end }}<|assistant|> +{{ else }} {{ if .System }}<|system|> {{ .System }} {{ end }}{{ if .Prompt }}<|user|> {{ .Prompt }} {{ end }}<|assistant|> -{{ .Response }} \ No newline at end of file +{{ .Response }} +{{- end }} \ No newline at end of file From e4ff73297db2f53f1ea4b603df5670c5bde6a944 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Mon, 8 Jul 2024 22:32:15 -0700 Subject: [PATCH 6/7] server: fix model reloads when setting `OLLAMA_NUM_PARALLEL` (#5560) * server: fix unneeded model reloads when setting `OLLAMA_NUM_PARALLEL` * remove whitespace change * undo some changes --- server/sched.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/server/sched.go b/server/sched.go index 9dff2ae07..48047bfec 100644 --- a/server/sched.go +++ b/server/sched.go @@ -133,10 +133,6 @@ func (s *Scheduler) processPending(ctx context.Context) { numParallel = 1 slog.Warn("multimodal models don't support parallel requests yet") } - // Keep NumCtx and numParallel in sync - if numParallel > 1 { - pending.opts.NumCtx = pending.origNumCtx * numParallel - } for { cpus := s.getCpuFn() @@ -234,9 +230,10 @@ func (s *Scheduler) processPending(ctx context.Context) { // simplifying assumption of defaultParallel when in CPU mode if numParallel <= 0 { numParallel = defaultParallel - pending.opts.NumCtx = pending.origNumCtx * numParallel } + pending.opts.NumCtx = pending.origNumCtx * numParallel + if loadedCount == 0 { slog.Debug("cpu mode with first model, loading") s.loadFn(pending, ggml, gpus, numParallel) From b51e3b63ac7bc995e99f3a8f7c1b507a1f8fb5d9 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 9 Jul 2024 11:17:44 -0700 Subject: [PATCH 7/7] Statically link c++ and thread lib This makes sure we statically link the c++ and thread library on windows to avoid unnecessary runtime dependencies on non-standard DLLs --- .github/workflows/release.yaml | 5 ----- llm/llm.go | 4 ++-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 0005c69d3..61ca3c433 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -304,11 +304,6 @@ jobs: write-host "Installing plugin" & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet write-host "plugin installed" - - name: remove unwanted mingw dll.a files - run: | - Get-ChildItem -Path "C:\mingw64" -Recurse -Filter "libpthread.dll.a" -File | Remove-Item -Force - Get-ChildItem -Path "C:\mingw64" -Recurse -Filter "libwinpthread.dll.a" -File | Remove-Item -Force - Get-ChildItem -Path "C:\mingw64" -Recurse -Filter "libstdc++.dll.a" -File | Remove-Item -Force - uses: actions/setup-go@v5 with: go-version-file: go.mod diff --git a/llm/llm.go b/llm/llm.go index 88c0258d6..f2a5e557a 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -4,8 +4,8 @@ package llm // #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread // #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal // #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src -// #cgo windows,amd64 LDFLAGS: -L${SRCDIR}/build/windows/amd64_static -L${SRCDIR}/build/windows/amd64_static/src -L${SRCDIR}/build/windows/amd64_static/ggml/src -// #cgo windows,arm64 LDFLAGS: -L${SRCDIR}/build/windows/arm64_static -L${SRCDIR}/build/windows/arm64_static/src -L${SRCDIR}/build/windows/arm64_static/ggml/src +// #cgo windows,amd64 LDFLAGS: -static-libstdc++ -static-libgcc -static -L${SRCDIR}/build/windows/amd64_static -L${SRCDIR}/build/windows/amd64_static/src -L${SRCDIR}/build/windows/amd64_static/ggml/src +// #cgo windows,arm64 LDFLAGS: -static-libstdc++ -static-libgcc -static -L${SRCDIR}/build/windows/arm64_static -L${SRCDIR}/build/windows/arm64_static/src -L${SRCDIR}/build/windows/arm64_static/ggml/src // #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/linux/x86_64_static -L${SRCDIR}/build/linux/x86_64_static/src -L${SRCDIR}/build/linux/x86_64_static/ggml/src // #cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/linux/arm64_static -L${SRCDIR}/build/linux/arm64_static/src -L${SRCDIR}/build/linux/arm64_static/ggml/src // #include