From 269ed6e6a2cea822ab137d40d5c70c8bf09470f8 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 17 Jun 2024 10:38:55 -0700
Subject: [PATCH 1/7] update message processing

---
 server/images.go          |  17 +-
 server/prompt.go          | 241 ++++--------------
 server/prompt_test.go     | 317 ++++++++++++------------
 server/routes.go          | 508 ++++++++++++--------------------------
 template/template.go      | 169 ++++++++++++-
 template/template_test.go | 153 +++++++++++-
 6 files changed, 685 insertions(+), 720 deletions(-)

diff --git a/server/images.go b/server/images.go
index a62991f16..688d5dcae 100644
--- a/server/images.go
+++ b/server/images.go
@@ -34,6 +34,8 @@ import (
 	"github.com/ollama/ollama/version"
 )
 
+var errCapabilityCompletion = errors.New("completion")
+
 type Capability string
 
 const CapabilityCompletion = Capability("completion")
@@ -62,7 +64,10 @@ type Model struct {
 	Template *template.Template
 }
 
-func (m *Model) Has(caps ...Capability) bool {
+// CheckCapabilities checks if the model has the specified capabilities returning an error describing
+// any missing or unknown capabilities
+func (m *Model) CheckCapabilities(caps ...Capability) error {
+	var errs []error
 	for _, cap := range caps {
 		switch cap {
 		case CapabilityCompletion:
@@ -81,15 +86,19 @@ func (m *Model) Has(caps ...Capability) bool {
 			}
 
 			if _, ok := ggml.KV()[fmt.Sprintf("%s.pooling_type", ggml.KV().Architecture())]; ok {
-				return false
+				errs = append(errs, errCapabilityCompletion)
 			}
 		default:
 			slog.Error("unknown capability", "capability", cap)
-			return false
+			return fmt.Errorf("unknown capability: %s", cap)
 		}
 	}
 
-	return true
+	if err := errors.Join(errs...); err != nil {
+		return fmt.Errorf("missing capabilities: %w", errors.Join(errs...))
+	}
+
+	return nil
 }
 
 func (m *Model) String() string {
diff --git a/server/prompt.go b/server/prompt.go
index bfc319a50..5016fbe14 100644
--- a/server/prompt.go
+++ b/server/prompt.go
@@ -1,217 +1,74 @@
 package server
 
 import (
-	"fmt"
+	"bytes"
+	"context"
 	"log/slog"
-	"strings"
-
-	"text/template/parse"
+	"slices"
 
 	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/template"
 )
 
-// isResponseNode checks if the node contains .Response
-func isResponseNode(node *parse.ActionNode) bool {
-	for _, cmd := range node.Pipe.Cmds {
-		for _, arg := range cmd.Args {
-			if fieldNode, ok := arg.(*parse.FieldNode); ok && len(fieldNode.Ident) > 0 {
-				if fieldNode.Ident[0] == "Response" {
-					return true
-				}
-			}
+func chatPrompt(ctx context.Context, r *runnerRef, msgs []api.Message) (prompt string, images []llm.ImageData, _ error) {
+	// extract system messages which should always be included
+	var system []api.Message
+	msgs = slices.DeleteFunc(msgs, func(m api.Message) bool {
+		if m.Role == "system" {
+			system = append(system, m)
+			return true
 		}
-	}
-	return false
-}
 
-// formatTemplateForResponse formats the template AST to:
-// 1. remove all nodes after the first .Response (if generate=true)
-// 2. add a .Response node to the end if it doesn't exist
-// TODO(jmorganca): this should recursively cut the template before the first .Response
-func formatTemplateForResponse(tmpl *template.Template, generate bool) {
-	var found bool
-	for i, node := range tmpl.Tree.Root.Nodes {
-		if actionNode, ok := node.(*parse.ActionNode); ok {
-			if isResponseNode(actionNode) {
-				found = true
-				if generate {
-					tmpl.Tree.Root.Nodes = tmpl.Tree.Root.Nodes[:i+1]
-					break
-				}
-			}
+		return false
+	})
+
+	if len(system) == 0 && r.model.System != "" {
+		// add model system prompt since it wasn't provided
+		system = append(system, api.Message{Role: "system", Content: r.model.System})
+	}
+
+	n := len(msgs) - 1
+	for i := n - 1; i >= 0; i-- {
+		var b bytes.Buffer
+		if err := r.model.Template.Execute(&b, template.Values{Messages: append(system, msgs[i:]...)}); err != nil {
+			return "", nil, err
 		}
-	}
 
-	if !found {
-		// add the response node if it doesn't exist
-		responseFieldNode := &parse.FieldNode{NodeType: parse.NodeField, Ident: []string{"Response"}}
-		responsePipeNode := &parse.PipeNode{NodeType: parse.NodePipe, Cmds: []*parse.CommandNode{{NodeType: parse.NodeCommand, Args: []parse.Node{responseFieldNode}}}}
-		responseActionNode := &parse.ActionNode{NodeType: parse.NodeAction, Pipe: responsePipeNode}
-		tmpl.Tree.Root.Nodes = append(tmpl.Tree.Root.Nodes, responseActionNode)
-	}
-}
-
-// Prompt renders a prompt from a template. If generate is set to true,
-// the response and parts of the template following it are not rendered
-func Prompt(tmpl *template.Template, system, prompt, response string, generate bool) (string, error) {
-	formatTemplateForResponse(tmpl, generate)
-
-	vars := map[string]any{
-		"System":   system,
-		"Prompt":   prompt,
-		"Response": response,
-	}
-
-	var sb strings.Builder
-	if err := tmpl.Execute(&sb, vars); err != nil {
-		return "", err
-	}
-
-	return sb.String(), nil
-}
-
-func countTokens(tmpl *template.Template, system string, prompt string, response string, encode func(string) ([]int, error)) (int, error) {
-	rendered, err := Prompt(tmpl, system, prompt, response, false)
-	if err != nil {
-		return 0, err
-	}
-
-	tokens, err := encode(rendered)
-	if err != nil {
-		slog.Error("failed to encode prompt", "err", err)
-		return 0, err
-	}
-
-	return len(tokens), err
-}
-
-// ChatPrompt builds up a prompt from a series of messages, truncating based on context window size
-func ChatPrompt(tmpl *template.Template, messages []api.Message, window int, encode func(string) ([]int, error)) (string, error) {
-	type prompt struct {
-		System   string
-		Prompt   string
-		Response string
-
-		images []int
-		tokens int
-	}
-
-	var p prompt
-
-	// iterate through messages to build up {system,user,response} prompts
-	var imgId int
-	var prompts []prompt
-	for _, msg := range messages {
-		switch strings.ToLower(msg.Role) {
-		case "system":
-			if p.System != "" || p.Prompt != "" || p.Response != "" {
-				prompts = append(prompts, p)
-				p = prompt{}
-			}
-
-			p.System = msg.Content
-		case "user":
-			if p.Prompt != "" || p.Response != "" {
-				prompts = append(prompts, p)
-				p = prompt{}
-			}
-
-			var sb strings.Builder
-			for range msg.Images {
-				fmt.Fprintf(&sb, "[img-%d] ", imgId)
-				p.images = append(p.images, imgId)
-				imgId += 1
-			}
-
-			sb.WriteString(msg.Content)
-			p.Prompt = sb.String()
-		case "assistant":
-			if p.Response != "" {
-				prompts = append(prompts, p)
-				p = prompt{}
-			}
-
-			p.Response = msg.Content
-		default:
-			return "", fmt.Errorf("invalid role: %s, role must be one of [system, user, assistant]", msg.Role)
-		}
-	}
-
-	// add final prompt
-	if p.System != "" || p.Prompt != "" || p.Response != "" {
-		prompts = append(prompts, p)
-	}
-
-	// calculate token lengths for each prompt, estimating 768 tokens per images
-	for i, p := range prompts {
-		tokens, err := countTokens(tmpl, p.System, p.Prompt, p.Response, encode)
+		s, err := r.llama.Tokenize(ctx, b.String())
 		if err != nil {
-			return "", err
+			return "", nil, err
 		}
 
-		prompts[i].tokens = tokens + len(prompts[i].images)*768
-	}
-
-	// truncate images and prompts starting from the beginning of the list
-	// until either one prompt remains or the total tokens fits the context window
-	// TODO (jmorganca): this doesn't account for the context window room required for the response
-	for {
-		var required int
-		for _, p := range prompts {
-			required += p.tokens
+		c := len(s)
+		if r.model.ProjectorPaths != nil {
+			for _, m := range msgs[i:] {
+				// TODO: get image embedding length from project metadata
+				c += 768 * len(m.Images)
+			}
 		}
 
-		required += 1 // for bos token
-
-		if required <= window {
-			slog.Debug("prompt now fits in context window", "required", required, "window", window)
+		if c > r.NumCtx {
+			slog.Debug("truncating input messages which exceed context length", "truncated", len(msgs[i:]))
 			break
+		} else {
+			n = i
 		}
-
-		prompt := &prompts[0]
-
-		if len(prompt.images) > 1 {
-			img := prompt.images[0]
-			slog.Debug("prompt longer than context window, removing image", "id", img, "required", required, "window", window)
-			prompt.images = prompt.images[1:]
-			prompt.Prompt = strings.Replace(prompt.Prompt, fmt.Sprintf(" [img-%d]", img), "", 1)
-			prompt.tokens -= 768
-			continue
-		}
-
-		if len(prompts) > 1 {
-			slog.Debug("required tokens longer than context window, removing first prompt", "prompt", prompts[0].tokens, "required", required, "window", window)
-			system := prompt.System
-			prompts = prompts[1:]
-
-			if system != "" && prompts[0].System == "" {
-				prompts[0].System = system
-
-				tokens, err := countTokens(tmpl, prompts[0].System, prompts[0].Prompt, prompts[0].Response, encode)
-				if err != nil {
-					return "", err
-				}
-
-				prompts[0].tokens = tokens + len(prompts[0].images)*768
-			}
-
-			continue
-		}
-
-		// stop truncating if there's only one prompt left
-		break
 	}
 
-	var sb strings.Builder
-	for i, p := range prompts {
-		// last prompt should leave the response unrendered (for completion)
-		rendered, err := Prompt(tmpl, p.System, p.Prompt, p.Response, i == len(prompts)-1)
-		if err != nil {
-			return "", err
-		}
-		sb.WriteString(rendered)
+	var b bytes.Buffer
+	if err := r.model.Template.Execute(&b, template.Values{Messages: append(system, msgs[n:]...)}); err != nil {
+		return "", nil, err
 	}
 
-	return sb.String(), nil
+	for _, m := range msgs[n:] {
+		for _, i := range m.Images {
+			images = append(images, llm.ImageData{
+				ID:   len(images),
+				Data: i,
+			})
+		}
+	}
+
+	return b.String(), images, nil
 }
diff --git a/server/prompt_test.go b/server/prompt_test.go
index 7df58d0bd..59288b46c 100644
--- a/server/prompt_test.go
+++ b/server/prompt_test.go
@@ -1,215 +1,214 @@
 package server
 
 import (
+	"bytes"
+	"context"
 	"strings"
 	"testing"
 
 	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/template"
 )
 
-func TestPrompt(t *testing.T) {
-	tests := []struct {
-		name     string
-		template string
-		system   string
-		prompt   string
-		response string
-		generate bool
-		want     string
-	}{
-		{
-			name:     "simple prompt",
-			template: "[INST] {{ .System }} {{ .Prompt }} [/INST]",
-			system:   "You are a Wizard.",
-			prompt:   "What are the potion ingredients?",
-			want:     "[INST] You are a Wizard. What are the potion ingredients? [/INST]",
-		},
-		{
-			name:     "implicit response",
-			template: "[INST] {{ .System }} {{ .Prompt }} [/INST]",
-			system:   "You are a Wizard.",
-			prompt:   "What are the potion ingredients?",
-			response: "I don't know.",
-			want:     "[INST] You are a Wizard. What are the potion ingredients? [/INST]I don't know.",
-		},
-		{
-			name:     "response",
-			template: "[INST] {{ .System }} {{ .Prompt }} [/INST] {{ .Response }}",
-			system:   "You are a Wizard.",
-			prompt:   "What are the potion ingredients?",
-			response: "I don't know.",
-			want:     "[INST] You are a Wizard. What are the potion ingredients? [/INST] I don't know.",
-		},
-		{
-			name:     "cut",
-			template: "<system>{{ .System }}</system><user>{{ .Prompt }}</user><assistant>{{ .Response }}</assistant>",
-			system:   "You are a Wizard.",
-			prompt:   "What are the potion ingredients?",
-			response: "I don't know.",
-			generate: true,
-			want:     "<system>You are a Wizard.</system><user>What are the potion ingredients?</user><assistant>I don't know.",
-		},
-		{
-			name:     "nocut",
-			template: "<system>{{ .System }}</system><user>{{ .Prompt }}</user><assistant>{{ .Response }}</assistant>",
-			system:   "You are a Wizard.",
-			prompt:   "What are the potion ingredients?",
-			response: "I don't know.",
-			want:     "<system>You are a Wizard.</system><user>What are the potion ingredients?</user><assistant>I don't know.</assistant>",
-		},
+type mock struct {
+	llm.LlamaServer
+}
+
+func (m mock) Tokenize(_ context.Context, s string) (tokens []int, err error) {
+	for range strings.Fields(s) {
+		tokens = append(tokens, len(tokens))
 	}
 
-	for _, tc := range tests {
-		t.Run(tc.name, func(t *testing.T) {
-			tmpl, err := template.Parse(tc.template)
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			got, err := Prompt(tmpl, tc.system, tc.prompt, tc.response, tc.generate)
-			if err != nil {
-				t.Errorf("error = %v", err)
-			}
-
-			if got != tc.want {
-				t.Errorf("got = %v, want %v", got, tc.want)
-			}
-		})
-	}
+	return
 }
 
 func TestChatPrompt(t *testing.T) {
-	tests := []struct {
-		name     string
-		template string
-		messages []api.Message
-		window   int
-		want     string
+	type expect struct {
+		prompt string
+		images [][]byte
+	}
+
+	cases := []struct {
+		name  string
+		limit int
+		msgs  []api.Message
+		expect
 	}{
 		{
-			name:     "simple prompt",
-			template: "[INST] {{ .Prompt }} [/INST]",
-			messages: []api.Message{
-				{Role: "user", Content: "Hello"},
+			name:  "messages",
+			limit: 64,
+			msgs: []api.Message{
+				{Role: "user", Content: "You're a test, Harry!"},
+				{Role: "assistant", Content: "I-I'm a what?"},
+				{Role: "user", Content: "A test. And a thumping good one at that, I'd wager."},
+			},
+			expect: expect{
+				prompt: "You're a test, Harry! I-I'm a what? A test. And a thumping good one at that, I'd wager. ",
 			},
-			window: 1024,
-			want:   "[INST] Hello [/INST]",
 		},
 		{
-			name:     "with system message",
-			template: "[INST] {{ if .System }}<<SYS>>{{ .System }}<</SYS>> {{ end }}{{ .Prompt }} [/INST]",
-			messages: []api.Message{
-				{Role: "system", Content: "You are a Wizard."},
-				{Role: "user", Content: "Hello"},
+			name: "truncate messages",
+			limit: 1,
+			msgs: []api.Message{
+				{Role: "user", Content: "You're a test, Harry!"},
+				{Role: "assistant", Content: "I-I'm a what?"},
+				{Role: "user", Content: "A test. And a thumping good one at that, I'd wager."},
+			},
+			expect: expect{
+				prompt: "A test. And a thumping good one at that, I'd wager. ",
 			},
-			window: 1024,
-			want:   "[INST] <<SYS>>You are a Wizard.<</SYS>> Hello [/INST]",
 		},
 		{
-			name:     "with response",
-			template: "[INST] {{ if .System }}<<SYS>>{{ .System }}<</SYS>> {{ end }}{{ .Prompt }} [/INST] {{ .Response }}",
-			messages: []api.Message{
-				{Role: "system", Content: "You are a Wizard."},
-				{Role: "user", Content: "Hello"},
-				{Role: "assistant", Content: "I am?"},
+			name: "truncate messages with image",
+			limit: 64,
+			msgs: []api.Message{
+				{Role: "user", Content: "You're a test, Harry!"},
+				{Role: "assistant", Content: "I-I'm a what?"},
+				{Role: "user", Content: "A test. And a thumping good one at that, I'd wager.", Images: []api.ImageData{[]byte("something")}},
+			},
+			expect: expect{
+				prompt: "[img-0] A test. And a thumping good one at that, I'd wager. ",
+				images: [][]byte{
+					[]byte("something"),
+				},
 			},
-			window: 1024,
-			want:   "[INST] <<SYS>>You are a Wizard.<</SYS>> Hello [/INST] I am?",
 		},
 		{
-			name:     "with implicit response",
-			template: "[INST] {{ if .System }}<<SYS>>{{ .System }}<</SYS>> {{ end }}{{ .Prompt }} [/INST]",
-			messages: []api.Message{
-				{Role: "system", Content: "You are a Wizard."},
-				{Role: "user", Content: "Hello"},
-				{Role: "assistant", Content: "I am?"},
+			name: "truncate messages with images",
+			limit: 64,
+			msgs: []api.Message{
+				{Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}},
+				{Role: "assistant", Content: "I-I'm a what?"},
+				{Role: "user", Content: "A test. And a thumping good one at that, I'd wager.", Images: []api.ImageData{[]byte("somethingelse")}},
+			},
+			expect: expect{
+				prompt: "[img-0] A test. And a thumping good one at that, I'd wager. ",
+				images: [][]byte{
+					[]byte("somethingelse"),
+				},
 			},
-			window: 1024,
-			want:   "[INST] <<SYS>>You are a Wizard.<</SYS>> Hello [/INST]I am?",
 		},
 		{
-			name:     "with conversation",
-			template: "[INST] {{ if .System }}<<SYS>>{{ .System }}<</SYS>> {{ end }}{{ .Prompt }} [/INST] {{ .Response }} ",
-			messages: []api.Message{
-				{Role: "system", Content: "You are a Wizard."},
-				{Role: "user", Content: "What are the potion ingredients?"},
-				{Role: "assistant", Content: "sugar"},
-				{Role: "user", Content: "Anything else?"},
+			name: "messages with images",
+			limit: 2048,
+			msgs: []api.Message{
+				{Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}},
+				{Role: "assistant", Content: "I-I'm a what?"},
+				{Role: "user", Content: "A test. And a thumping good one at that, I'd wager.", Images: []api.ImageData{[]byte("somethingelse")}},
+			},
+			expect: expect{
+				prompt: "[img-0] You're a test, Harry! I-I'm a what? [img-1] A test. And a thumping good one at that, I'd wager. ",
+				images: [][]byte{
+					[]byte("something"),
+					[]byte("somethingelse"),
+				},
 			},
-			window: 1024,
-			want:   "[INST] <<SYS>>You are a Wizard.<</SYS>> What are the potion ingredients? [/INST] sugar [INST] Anything else? [/INST] ",
 		},
 		{
-			name:     "with truncation",
-			template: "{{ .System }} {{ .Prompt }} {{ .Response }} ",
-			messages: []api.Message{
-				{Role: "system", Content: "You are a Wizard."},
-				{Role: "user", Content: "Hello"},
-				{Role: "assistant", Content: "I am?"},
-				{Role: "user", Content: "Why is the sky blue?"},
-				{Role: "assistant", Content: "The sky is blue from rayleigh scattering"},
+			name: "message with image tag",
+			limit: 2048,
+			msgs: []api.Message{
+				{Role: "user", Content: "You're a test, Harry! [img]", Images: []api.ImageData{[]byte("something")}},
+				{Role: "assistant", Content: "I-I'm a what?"},
+				{Role: "user", Content: "A test. And a thumping good one at that, I'd wager.", Images: []api.ImageData{[]byte("somethingelse")}},
+			},
+			expect: expect{
+				prompt: "You're a test, Harry! [img-0] I-I'm a what? [img-1] A test. And a thumping good one at that, I'd wager. ",
+				images: [][]byte{
+					[]byte("something"),
+					[]byte("somethingelse"),
+				},
 			},
-			window: 10,
-			want:   "You are a Wizard. Why is the sky blue? The sky is blue from rayleigh scattering",
 		},
 		{
-			name:     "images",
-			template: "{{ .System }} {{ .Prompt }}",
-			messages: []api.Message{
-				{Role: "system", Content: "You are a Wizard."},
-				{Role: "user", Content: "Hello", Images: []api.ImageData{[]byte("base64")}},
+			name: "messages with interleaved images",
+			limit: 2048,
+			msgs: []api.Message{
+				{Role: "user", Content: "You're a test, Harry!"},
+				{Role: "user", Images: []api.ImageData{[]byte("something")}},
+				{Role: "user", Images: []api.ImageData{[]byte("somethingelse")}},
+				{Role: "assistant", Content: "I-I'm a what?"},
+				{Role: "user", Content: "A test. And a thumping good one at that, I'd wager."},
+			},
+			expect: expect{
+				prompt: "You're a test, Harry!\n\n[img-0]\n\n[img-1] I-I'm a what? A test. And a thumping good one at that, I'd wager. ",
+				images: [][]byte{
+					[]byte("something"),
+					[]byte("somethingelse"),
+				},
 			},
-			window: 1024,
-			want:   "You are a Wizard. [img-0] Hello",
 		},
 		{
-			name:     "images truncated",
-			template: "{{ .System }} {{ .Prompt }}",
-			messages: []api.Message{
-				{Role: "system", Content: "You are a Wizard."},
-				{Role: "user", Content: "Hello", Images: []api.ImageData{[]byte("img1"), []byte("img2")}},
+			name: "truncate message with interleaved images",
+			limit: 1024,
+			msgs: []api.Message{
+				{Role: "user", Content: "You're a test, Harry!"},
+				{Role: "user", Images: []api.ImageData{[]byte("something")}},
+				{Role: "user", Images: []api.ImageData{[]byte("somethingelse")}},
+				{Role: "assistant", Content: "I-I'm a what?"},
+				{Role: "user", Content: "A test. And a thumping good one at that, I'd wager."},
+			},
+			expect: expect{
+				prompt: "[img-0] I-I'm a what? A test. And a thumping good one at that, I'd wager. ",
+				images: [][]byte{
+					[]byte("somethingelse"),
+				},
 			},
-			window: 1024,
-			want:   "You are a Wizard. [img-0] [img-1] Hello",
 		},
 		{
-			name:     "empty list",
-			template: "{{ .System }} {{ .Prompt }}",
-			messages: []api.Message{},
-			window:   1024,
-			want:     "",
-		},
-		{
-			name:     "empty prompt",
-			template: "[INST] {{ if .System }}<<SYS>>{{ .System }}<</SYS>> {{ end }}{{ .Prompt }} [/INST] {{ .Response }} ",
-			messages: []api.Message{
-				{Role: "user", Content: ""},
+			name: "message with system prompt",
+			limit: 2048,
+			msgs: []api.Message{
+				{Role: "system", Content: "You are the Test Who Lived."},
+				{Role: "user", Content: "You're a test, Harry!"},
+				{Role: "assistant", Content: "I-I'm a what?"},
+				{Role: "user", Content: "A test. And a thumping good one at that, I'd wager."},
+			},
+			expect: expect{
+				prompt: "You're a test, Harry! I-I'm a what? You are the Test Who Lived. A test. And a thumping good one at that, I'd wager. ",
 			},
-			window: 1024,
-			want:   "",
 		},
 	}
 
-	encode := func(s string) ([]int, error) {
-		words := strings.Fields(s)
-		return make([]int, len(words)), nil
+	tmpl, err := template.Parse(`
+{{- if .System }}{{ .System }} {{ end }}
+{{- if .Prompt }}{{ .Prompt }} {{ end }}
+{{- if .Response }}{{ .Response }} {{ end }}`)
+	if err != nil {
+		t.Fatal(err)
 	}
 
-	for _, tc := range tests {
-		t.Run(tc.name, func(t *testing.T) {
-			tmpl, err := template.Parse(tc.template)
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			r := runnerRef{
+				llama:   mock{},
+				model:   &Model{Template: tmpl, ProjectorPaths: []string{"vision"}},
+				Options: &api.Options{},
+			}
+
+			r.NumCtx = tt.limit
+			prompt, images, err := chatPrompt(context.TODO(), &r, tt.msgs)
 			if err != nil {
 				t.Fatal(err)
 			}
 
-			got, err := ChatPrompt(tmpl, tc.messages, tc.window, encode)
-			if err != nil {
-				t.Errorf("error = %v", err)
+			if tt.prompt != prompt {
+				t.Errorf("expected %q, got %q", tt.prompt, prompt)
 			}
 
-			if got != tc.want {
-				t.Errorf("got: %q, want: %q", got, tc.want)
+			if len(images) != len(tt.images) {
+				t.Fatalf("expected %d images, got %d", len(tt.images), len(images))
+			}
+
+			for i := range images {
+				if images[i].ID != i {
+					t.Errorf("expected ID %d, got %d", i, images[i].ID)
+				}
+
+				if !bytes.Equal(images[i].Data, tt.images[i]) {
+					t.Errorf("expected %q, got %q", tt.images[i], images[i])
+				}
 			}
 		})
 	}
diff --git a/server/routes.go b/server/routes.go
index ac6b713a7..35e64511b 100644
--- a/server/routes.go
+++ b/server/routes.go
@@ -1,13 +1,13 @@
 package server
 
 import (
+	"bytes"
 	"cmp"
 	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
-	"io/fs"
 	"log/slog"
 	"net"
 	"net/http"
@@ -67,163 +67,140 @@ func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options
 	return opts, nil
 }
 
-func isSupportedImageType(image []byte) bool {
-	contentType := http.DetectContentType(image)
-	allowedTypes := []string{"image/jpeg", "image/jpg", "image/png"}
-	return slices.Contains(allowedTypes, contentType)
+func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (*runnerRef, error) {
+	if name == "" {
+		return nil, errors.New("model is required")
+	}
+
+	model, err := GetModel(name)
+	if err != nil {
+		return nil, err
+	}
+
+	if err := model.CheckCapabilities(caps...); err != nil {
+		return nil, fmt.Errorf("%s %w", name, err)
+	}
+
+	opts, err := modelOptions(model, requestOpts)
+	if err != nil {
+		return nil, err
+	}
+
+	runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
+	var runner *runnerRef
+	select {
+	case runner = <-runnerCh:
+	case err = <-errCh:
+		return nil, err
+	}
+
+	return runner, nil
 }
 
 func (s *Server) GenerateHandler(c *gin.Context) {
-	checkpointStart := time.Now()
 	var req api.GenerateRequest
-	err := c.ShouldBindJSON(&req)
-
-	switch {
-	case errors.Is(err, io.EOF):
+	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
 		return
-	case err != nil:
+	} else if err != nil {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
 	}
 
-	// validate the request
-	switch {
-	case req.Model == "":
-		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
+	if req.Format != "" && req.Format != "json" {
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be empty or \"json\""})
 		return
-	case len(req.Format) > 0 && req.Format != "json":
-		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
-		return
-	case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
+	} else if req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0) {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
 		return
 	}
 
-	for _, img := range req.Images {
-		if !isSupportedImageType(img) {
-			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "unsupported image format"})
-			return
-		}
-	}
-
-	model, err := GetModel(req.Model)
-	if err != nil {
-		var pErr *fs.PathError
-		if errors.As(err, &pErr) {
-			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
-			return
-		}
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+	caps := []Capability{CapabilityCompletion}
+	r, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
+	if errors.Is(err, errCapabilityCompletion) {
+		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
+		return
+	} else if err != nil {
+		handleScheduleError(c, err)
 		return
 	}
 
-	if !model.Has(CapabilityCompletion) {
-		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%s does not support generate", req.Model)})
-		return
+	images := make([]llm.ImageData, len(req.Images))
+	for i := range req.Images {
+		images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
 	}
 
-	opts, err := modelOptions(model, req.Options)
-	if err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
-	}
-
-	rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, req.KeepAlive)
-	var runner *runnerRef
-	select {
-	case runner = <-rCh:
-	case err = <-eCh:
-		handleErrorResponse(c, err)
-		return
-	}
-
-	// an empty request loads the model
-	// note: for a short while template was used in lieu
-	// of `raw` mode so we need to check for it too
-	if req.Prompt == "" && req.Template == "" && req.System == "" {
-		c.JSON(http.StatusOK, api.GenerateResponse{
-			CreatedAt:  time.Now().UTC(),
-			Model:      req.Model,
-			Done:       true,
-			DoneReason: "load",
-		})
-		return
-	}
-
-	tmpl, err := template.Parse(req.Template)
-	if err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
-	}
-
-	checkpointLoaded := time.Now()
-
-	var prompt string
-	switch {
-	case req.Raw:
-		prompt = req.Prompt
-	case req.Prompt != "":
-		if req.Template == "" {
-			tmpl = model.Template
+	prompt := req.Prompt
+	if !req.Raw {
+		var msgs []api.Message
+		if req.System != "" {
+			msgs = append(msgs, api.Message{Role: "system", Content: req.System})
+		} else if r.model.System != "" {
+			msgs = append(msgs, api.Message{Role: "system", Content: r.model.System})
 		}
 
-		if req.System == "" {
-			req.System = model.System
+		if req.Prompt != "" {
+			for _, i := range images {
+				msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)})
+			}
+
+			msgs = append(msgs, api.Message{Role: "user", Content: req.Prompt})
 		}
 
-		slog.Debug("generate handler", "prompt", req.Prompt)
-		slog.Debug("generate handler", "template", req.Template)
-		slog.Debug("generate handler", "system", req.System)
-
-		var sb strings.Builder
-		for i := range req.Images {
-			fmt.Fprintf(&sb, "[img-%d] ", i)
-		}
-
-		sb.WriteString(req.Prompt)
-
-		p, err := Prompt(tmpl, req.System, sb.String(), "", true)
-		if err != nil {
-			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		if len(msgs) == 0 {
+			c.JSON(http.StatusOK, api.GenerateResponse{
+				Model:      req.Model,
+				CreatedAt:  time.Now().UTC(),
+				Done:       true,
+				DoneReason: "load",
+			})
 			return
 		}
 
-		sb.Reset()
+		tmpl := r.model.Template
+		if req.Template != "" {
+			tmpl, err = template.Parse(req.Template)
+			if err != nil {
+				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+				return
+			}
+		}
+
+		var b bytes.Buffer
 		if req.Context != nil {
-			prev, err := runner.llama.Detokenize(c.Request.Context(), req.Context)
+			s, err := r.llama.Detokenize(c.Request.Context(), req.Context)
 			if err != nil {
 				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 				return
 			}
 
-			sb.WriteString(prev)
+			b.WriteString(s)
 		}
 
-		sb.WriteString(p)
+		if err := tmpl.Execute(&b, template.Values{Messages: msgs}); err != nil {
+			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+			return
+		}
 
-		prompt = sb.String()
+		prompt = b.String()
 	}
 
-	slog.Debug("generate handler", "prompt", prompt)
+	slog.Debug("generate request", "prompt", prompt, "images", images)
 
 	ch := make(chan any)
-	var generated strings.Builder
 	go func() {
 		defer close(ch)
-
-		fn := func(r llm.CompletionResponse) {
-			// Build up the full response
-			if _, err := generated.WriteString(r.Content); err != nil {
-				ch <- gin.H{"error": err.Error()}
-				return
-			}
-
-			resp := api.GenerateResponse{
+		if err := r.llama.Completion(c.Request.Context(), llm.CompletionRequest{
+			Prompt:  prompt,
+			Images:  images,
+			Format:  req.Format,
+			Options: *r.Options,
+		}, func(r llm.CompletionResponse) {
+			ch <- api.GenerateResponse{
 				Model:      req.Model,
 				CreatedAt:  time.Now().UTC(),
-				Done:       r.Done,
 				Response:   r.Content,
+				Done:       r.Done,
 				DoneReason: r.DoneReason,
 				Metrics: api.Metrics{
 					PromptEvalCount:    r.PromptEvalCount,
@@ -232,77 +209,35 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 					EvalDuration:       r.EvalDuration,
 				},
 			}
-
-			if r.Done {
-				resp.TotalDuration = time.Since(checkpointStart)
-				resp.LoadDuration = checkpointLoaded.Sub(checkpointStart)
-
-				if !req.Raw {
-					p, err := Prompt(tmpl, req.System, req.Prompt, generated.String(), false)
-					if err != nil {
-						c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-						return
-					}
-
-					// TODO (jmorganca): encode() should not strip special tokens
-					tokens, err := runner.llama.Tokenize(c.Request.Context(), p)
-					if err != nil {
-						ch <- gin.H{"error": err.Error()}
-						return
-					}
-
-					resp.Context = append(req.Context, tokens...)
-				}
-			}
-
-			ch <- resp
-		}
-
-		var images []llm.ImageData
-		for i := range req.Images {
-			images = append(images, llm.ImageData{
-				ID:   i,
-				Data: req.Images[i],
-			})
-		}
-
-		// Start prediction
-		req := llm.CompletionRequest{
-			Prompt:  prompt,
-			Format:  req.Format,
-			Images:  images,
-			Options: opts,
-		}
-		if err := runner.llama.Completion(c.Request.Context(), req, fn); err != nil {
+		}); err != nil {
 			ch <- gin.H{"error": err.Error()}
 		}
 	}()
 
 	if req.Stream != nil && !*req.Stream {
-		// Accumulate responses into the final response
-		var final api.GenerateResponse
+		var r api.GenerateResponse
 		var sb strings.Builder
-		for resp := range ch {
-			switch r := resp.(type) {
+		for rr := range ch {
+			switch t := rr.(type) {
 			case api.GenerateResponse:
-				sb.WriteString(r.Response)
-				final = r
+				sb.WriteString(t.Response)
+				r = t
 			case gin.H:
-				if errorMsg, ok := r["error"].(string); ok {
-					c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
-					return
-				} else {
-					c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
-					return
+				msg, ok := t["error"].(string)
+				if !ok {
+					msg = "unexpected error format in response"
 				}
+
+				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
+				return
 			default:
-				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
+				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
 				return
 			}
 		}
 
-		final.Response = sb.String()
-		c.JSON(http.StatusOK, final)
+		r.Response = sb.String()
+		c.JSON(http.StatusOK, r)
 		return
 	}
 
@@ -311,44 +246,17 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 
 func (s *Server) EmbeddingsHandler(c *gin.Context) {
 	var req api.EmbeddingRequest
-	err := c.ShouldBindJSON(&req)
-	switch {
-	case errors.Is(err, io.EOF):
+	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
 		return
-	case err != nil:
+	} else if err != nil {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
 	}
 
-	if req.Model == "" {
-		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
-		return
-	}
-
-	model, err := GetModel(req.Model)
+	r, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
 	if err != nil {
-		var pErr *fs.PathError
-		if errors.As(err, &pErr) {
-			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
-			return
-		}
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
-	}
-
-	opts, err := modelOptions(model, req.Options)
-	if err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
-	}
-
-	rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, req.KeepAlive)
-	var runner *runnerRef
-	select {
-	case runner = <-rCh:
-	case err = <-eCh:
-		handleErrorResponse(c, err)
+		handleScheduleError(c, err)
 		return
 	}
 
@@ -358,17 +266,14 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
 		return
 	}
 
-	embedding, err := runner.llama.Embedding(c.Request.Context(), req.Prompt)
+	embedding, err := r.llama.Embedding(c.Request.Context(), req.Prompt)
 	if err != nil {
 		slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
 		return
 	}
 
-	resp := api.EmbeddingResponse{
-		Embedding: embedding,
-	}
-	c.JSON(http.StatusOK, resp)
+	c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: embedding})
 }
 
 func (s *Server) PullModelHandler(c *gin.Context) {
@@ -649,9 +554,9 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 		}
 	}
 
-	msgs := make([]api.Message, 0)
-	for _, msg := range m.Messages {
-		msgs = append(msgs, api.Message{Role: msg.Role, Content: msg.Content})
+	msgs := make([]api.Message, len(m.Messages))
+	for i, msg := range m.Messages {
+		msgs[i] = api.Message{Role: msg.Role, Content: msg.Content}
 	}
 
 	n := model.ParseName(req.Model)
@@ -1214,132 +1119,55 @@ func (s *Server) ProcessHandler(c *gin.Context) {
 	c.JSON(http.StatusOK, api.ProcessResponse{Models: models})
 }
 
-// ChatPrompt builds up a prompt from a series of messages for the currently `loaded` model
-func chatPrompt(ctx context.Context, runner *runnerRef, template *template.Template, messages []api.Message, numCtx int) (string, error) {
-	encode := func(s string) ([]int, error) {
-		return runner.llama.Tokenize(ctx, s)
-	}
-
-	prompt, err := ChatPrompt(template, messages, numCtx, encode)
-	if err != nil {
-		return "", err
-	}
-
-	return prompt, nil
-}
-
 func (s *Server) ChatHandler(c *gin.Context) {
-	checkpointStart := time.Now()
-
 	var req api.ChatRequest
-	err := c.ShouldBindJSON(&req)
-	switch {
-	case errors.Is(err, io.EOF):
+	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
 		return
-	case err != nil:
+	} else if err != nil {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
 	}
 
-	// validate the request
-	switch {
-	case req.Model == "":
-		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
+	caps := []Capability{CapabilityCompletion}
+	r, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
+	if errors.Is(err, errCapabilityCompletion) {
+		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
 		return
-	case len(req.Format) > 0 && req.Format != "json":
-		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
+	} else if err != nil {
+		handleScheduleError(c, err)
 		return
 	}
 
-	model, err := GetModel(req.Model)
-	if err != nil {
-		var pErr *fs.PathError
-		if errors.As(err, &pErr) {
-			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
-			return
-		}
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
-	}
-
-	if !model.Has(CapabilityCompletion) {
-		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%s does not support chat", req.Model)})
-		return
-	}
-
-	opts, err := modelOptions(model, req.Options)
-	if err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
-	}
-
-	rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, req.KeepAlive)
-	var runner *runnerRef
-	select {
-	case runner = <-rCh:
-	case err = <-eCh:
-		handleErrorResponse(c, err)
-		return
-	}
-
-	checkpointLoaded := time.Now()
-
-	// if the first message is not a system message, then add the model's default system message
-	if len(req.Messages) > 0 && req.Messages[0].Role != "system" {
-		req.Messages = append([]api.Message{
-			{
-				Role:    "system",
-				Content: model.System,
-			},
-		}, req.Messages...)
-	}
-
-	prompt, err := chatPrompt(c.Request.Context(), runner, model.Template, req.Messages, opts.NumCtx)
-	if err != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
-		return
-	}
-
-	// an empty request loads the model
-	if len(req.Messages) == 0 || prompt == "" {
-		resp := api.ChatResponse{
-			CreatedAt:  time.Now().UTC(),
+	if len(req.Messages) == 0 {
+		c.JSON(http.StatusOK, api.ChatResponse{
 			Model:      req.Model,
+			CreatedAt:  time.Now().UTC(),
+			Message:    api.Message{Role: "assistant"},
 			Done:       true,
 			DoneReason: "load",
-			Message:    api.Message{Role: "assistant"},
-		}
-		c.JSON(http.StatusOK, resp)
+		})
 		return
 	}
 
-	// only send images that are in the prompt
-	var i int
-	var images []llm.ImageData
-	for _, m := range req.Messages {
-		for _, img := range m.Images {
-			if !isSupportedImageType(img) {
-				c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "unsupported image format"})
-				return
-			}
-
-			if strings.Contains(prompt, fmt.Sprintf("[img-%d]", i)) {
-				images = append(images, llm.ImageData{Data: img, ID: i})
-			}
-			i += 1
-		}
+	prompt, images, err := chatPrompt(c.Request.Context(), r, req.Messages)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		return
 	}
 
-	slog.Debug("chat handler", "prompt", prompt, "images", len(images))
+	slog.Debug("chat request", "images", len(images), "prompt", prompt)
 
 	ch := make(chan any)
-
 	go func() {
 		defer close(ch)
-
-		fn := func(r llm.CompletionResponse) {
-			resp := api.ChatResponse{
+		if err := r.llama.Completion(c.Request.Context(), llm.CompletionRequest{
+			Prompt:  prompt,
+			Images:  images,
+			Format:  req.Format,
+			Options: *r.Options,
+		}, func(r llm.CompletionResponse) {
+			ch <- api.ChatResponse{
 				Model:      req.Model,
 				CreatedAt:  time.Now().UTC(),
 				Message:    api.Message{Role: "assistant", Content: r.Content},
@@ -1352,64 +1180,48 @@ func (s *Server) ChatHandler(c *gin.Context) {
 					EvalDuration:       r.EvalDuration,
 				},
 			}
-
-			if r.Done {
-				resp.TotalDuration = time.Since(checkpointStart)
-				resp.LoadDuration = checkpointLoaded.Sub(checkpointStart)
-			}
-
-			ch <- resp
-		}
-
-		if err := runner.llama.Completion(c.Request.Context(), llm.CompletionRequest{
-			Prompt:  prompt,
-			Format:  req.Format,
-			Images:  images,
-			Options: opts,
-		}, fn); err != nil {
+		}); err != nil {
 			ch <- gin.H{"error": err.Error()}
 		}
 	}()
 
 	if req.Stream != nil && !*req.Stream {
-		// Accumulate responses into the final response
-		var final api.ChatResponse
+		var r api.ChatResponse
 		var sb strings.Builder
-		for resp := range ch {
-			switch r := resp.(type) {
+		for rr := range ch {
+			switch t := rr.(type) {
 			case api.ChatResponse:
-				sb.WriteString(r.Message.Content)
-				final = r
+				sb.WriteString(t.Message.Content)
+				r = t
 			case gin.H:
-				if errorMsg, ok := r["error"].(string); ok {
-					c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
-					return
-				} else {
-					c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
-					return
+				msg, ok := t["error"].(string)
+				if !ok {
+					msg = "unexpected error format in response"
 				}
+
+				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
+				return
 			default:
-				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
+				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
 				return
 			}
 		}
 
-		final.Message = api.Message{Role: "assistant", Content: sb.String()}
-		c.JSON(http.StatusOK, final)
+		r.Message.Content = sb.String()
+		c.JSON(http.StatusOK, r)
 		return
 	}
 
 	streamResponse(c, ch)
 }
 
-func handleErrorResponse(c *gin.Context, err error) {
-	if errors.Is(err, context.Canceled) {
+func handleScheduleError(c *gin.Context, err error) {
+	switch {
+	case errors.Is(err, context.Canceled):
 		c.JSON(499, gin.H{"error": "request canceled"})
-		return
-	}
-	if errors.Is(err, ErrMaxQueue) {
+	case errors.Is(err, ErrMaxQueue):
 		c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
-		return
+	default:
+		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 	}
-	c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 }
diff --git a/template/template.go b/template/template.go
index d15f7156f..cfba5a238 100644
--- a/template/template.go
+++ b/template/template.go
@@ -5,6 +5,7 @@ import (
 	"embed"
 	"encoding/json"
 	"errors"
+	"fmt"
 	"io"
 	"math"
 	"slices"
@@ -14,6 +15,7 @@ import (
 	"text/template/parse"
 
 	"github.com/agnivade/levenshtein"
+	"github.com/ollama/ollama/api"
 	"golang.org/x/exp/maps"
 )
 
@@ -74,30 +76,78 @@ func Named(s string) (*named, error) {
 	return nil, errors.New("no matching template found")
 }
 
+var DefaultTemplate, _ = Parse("{{ .Prompt }}")
+
 type Template struct {
 	*template.Template
 	raw string
 }
 
+var response = parse.ActionNode{
+	NodeType: parse.NodeAction,
+	Pipe: &parse.PipeNode{
+		NodeType: parse.NodePipe,
+		Cmds: []*parse.CommandNode{
+			{
+				NodeType: parse.NodeCommand,
+				Args: []parse.Node{
+					&parse.FieldNode{
+						NodeType: parse.NodeField,
+						Ident:    []string{"Response"},
+					},
+				},
+			},
+		},
+	},
+}
+
+func Parse(s string) (*Template, error) {
+	tmpl := template.New("").Option("missingkey=zero").Funcs(template.FuncMap{
+		"toJson": func(v any) string {
+			b, err := json.Marshal(v)
+			if err != nil {
+				return ""
+			}
+
+			return string(b)
+		},
+		"isLastMessage": func(s []*api.Message, m *api.Message) bool {
+			for i := len(s) - 1; i >= 0; i-- {
+				if m.Role != s[i].Role {
+					continue
+				}
+
+				return m == s[i]
+			}
+
+			return false
+		},
+	})
+
+	tmpl, err := tmpl.Parse(s)
+	if err != nil {
+		return nil, err
+	}
+
+	t := Template{Template: tmpl, raw: s}
+	if vars := t.Vars(); !slices.Contains(vars, "messages") && !slices.Contains(vars, "response") {
+		// touch up the template and append {{ .Response }}
+		tmpl.Tree.Root.Nodes = append(tmpl.Tree.Root.Nodes, &response)
+	}
+
+	return &t, nil
+}
+
 func (t *Template) String() string {
 	return t.raw
 }
 
-var DefaultTemplate, _ = Parse("{{ .Prompt }}")
-
-func Parse(s string) (*Template, error) {
-	t, err := template.New("").Option("missingkey=zero").Parse(s)
-	if err != nil {
-		return nil, err
-	}
-
-	return &Template{Template: t, raw: s}, nil
-}
-
 func (t *Template) Vars() []string {
 	var vars []string
-	for _, n := range t.Tree.Root.Nodes {
-		vars = append(vars, parseNode(n)...)
+	for _, tt := range t.Templates() {
+		for _, n := range tt.Root.Nodes {
+			vars = append(vars, parseNode(n)...)
+		}
 	}
 
 	set := make(map[string]struct{})
@@ -110,6 +160,97 @@ func (t *Template) Vars() []string {
 	return vars
 }
 
+type Values struct {
+	Messages []api.Message
+}
+
+func (t *Template) Execute(w io.Writer, v Values) error {
+	system, collated := collate(v.Messages)
+	if slices.Contains(t.Vars(), "messages") {
+		return t.Template.Execute(w, map[string]any{
+			"System":   system,
+			"Messages": collated,
+		})
+	}
+
+	var b bytes.Buffer
+	var prompt, response string
+	for i, m := range collated {
+		if m.Role == "user" {
+			prompt = m.Content
+		} else {
+			response = m.Content
+		}
+
+		if i != len(collated)-1 && prompt != "" && response != "" {
+			if err := t.Template.Execute(&b, map[string]any{
+				"System":   "",
+				"Prompt":   prompt,
+				"Response": response,
+			}); err != nil {
+				return err
+			}
+
+			prompt = ""
+			response = ""
+		}
+	}
+
+	var cut bool
+	tree := t.Template.Copy()
+	// for the last message, cut everything after "{{ .Response }}"
+	tree.Root.Nodes = slices.DeleteFunc(tree.Root.Nodes, func(n parse.Node) bool {
+		if slices.Contains(parseNode(n), "Response") {
+			cut = true
+		}
+
+		return cut
+	})
+
+	if err := template.Must(template.New("").AddParseTree("", tree)).Execute(&b, map[string]any{
+		"System": system,
+		"Prompt": prompt,
+	}); err != nil {
+		return err
+	}
+
+	_, err := io.Copy(w, &b)
+	return err
+}
+
+func collate(msgs []api.Message) (system string, collated []*api.Message) {
+	var n int
+	for i := range msgs {
+		msg := msgs[i]
+		if msg.Role == "system" {
+			if system != "" {
+				system += "\n\n"
+			}
+
+			system += msg.Content
+			continue
+		}
+
+		for range msg.Images {
+			imageTag := fmt.Sprintf("[img-%d]", n)
+			if !strings.Contains(msg.Content, "[img]") {
+				msg.Content = strings.TrimSpace("[img] " + msg.Content)
+			}
+
+			msg.Content = strings.Replace(msg.Content, "[img]", imageTag, 1)
+			n++
+		}
+
+		if len(collated) > 0 && collated[len(collated)-1].Role == msg.Role {
+			collated[len(collated)-1].Content += "\n\n" + msg.Content
+		} else {
+			collated = append(collated, &msg)
+		}
+	}
+
+	return
+}
+
 func parseNode(n parse.Node) []string {
 	switch n := n.(type) {
 	case *parse.ActionNode:
@@ -152,6 +293,8 @@ func parseNode(n parse.Node) []string {
 		return names
 	case *parse.FieldNode:
 		return n.Ident
+	case *parse.TemplateNode:
+		return parseNode(n.Pipe)
 	}
 
 	return nil
diff --git a/template/template_test.go b/template/template_test.go
index eda4634f4..5d5dad4b2 100644
--- a/template/template_test.go
+++ b/template/template_test.go
@@ -11,6 +11,7 @@ import (
 	"testing"
 	"text/template"
 
+	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/llm"
 )
 
@@ -64,13 +65,12 @@ func TestParse(t *testing.T) {
 		template string
 		vars     []string
 	}{
-		{"{{ .Prompt }}", []string{"prompt"}},
-		{"{{ .System }} {{ .Prompt }}", []string{"prompt", "system"}},
+		{"{{ .Prompt }}", []string{"prompt", "response"}},
+		{"{{ .System }} {{ .Prompt }}", []string{"prompt", "response", "system"}},
 		{"{{ .System }} {{ .Prompt }} {{ .Response }}", []string{"prompt", "response", "system"}},
-		{"{{ with .Tools }}{{ . }}{{ end }} {{ .System }} {{ .Prompt }}", []string{"prompt", "system", "tools"}},
+		{"{{ with .Tools }}{{ . }}{{ end }} {{ .System }} {{ .Prompt }}", []string{"prompt", "response", "system", "tools"}},
 		{"{{ range .Messages }}{{ .Role }} {{ .Content }}{{ end }}", []string{"content", "messages", "role"}},
 		{"{{ range .Messages }}{{ if eq .Role \"system\" }}SYSTEM: {{ .Content }}{{ else if eq .Role \"user\" }}USER: {{ .Content }}{{ else if eq .Role \"assistant\" }}ASSISTANT: {{ .Content }}{{ end }}{{ end }}", []string{"content", "messages", "role"}},
-		{"{{ .Prompt }} {{ .Suffix }}", []string{"prompt", "suffix"}},
 	}
 
 	for _, tt := range cases {
@@ -87,3 +87,148 @@ func TestParse(t *testing.T) {
 		})
 	}
 }
+
+func TestExecuteWithMessages(t *testing.T) {
+	cases := []struct {
+		templates []string
+		values    Values
+		expected  string
+	}{
+		{
+			[]string{
+				`[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `,
+				`[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`,
+				`{{- range .Messages }}
+{{- if eq .Role "user" }}[INST] {{ if and (isLastMessage $.Messages .) $.System }}{{ $.System }}{{ print "\n\n" }}
+{{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}
+{{- end }}
+{{- end }}`,
+			},
+			Values{
+				Messages: []api.Message{
+					{Role: "user", Content: "Hello friend!"},
+					{Role: "assistant", Content: "Hello human!"},
+					{Role: "user", Content: "Yay!"},
+				},
+			},
+			`[INST] Hello friend![/INST] Hello human![INST] Yay![/INST] `,
+		},
+		{
+			[]string{
+				`[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `,
+				`[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`,
+				`
+{{- range .Messages }}
+{{- if eq .Role "user" }}[INST] {{ if and (isLastMessage $.Messages .) $.System }}{{ $.System }}{{ print "\n\n" }}
+{{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}
+{{- end }}
+{{- end }}`,
+			},
+			Values{
+				Messages: []api.Message{
+					{Role: "system", Content: "You are a helpful assistant!"},
+					{Role: "user", Content: "Hello friend!"},
+					{Role: "assistant", Content: "Hello human!"},
+					{Role: "user", Content: "Yay!"},
+				},
+			},
+			`[INST] Hello friend![/INST] Hello human![INST] You are a helpful assistant!
+
+Yay![/INST] `,
+		},
+		{
+			[]string{
+				`{{ if .System }}<|im_start|>system
+{{ .System }}<|im_end|>
+{{ end }}{{ if .Prompt }}<|im_start|>user
+{{ .Prompt }}<|im_end|>
+{{ end }}<|im_start|>assistant
+{{ .Response }}<|im_end|>
+`,
+				`
+{{- range .Messages }}
+{{- if and (eq .Role "user") (isLastMessage $.Messages .) $.System }}<|im_start|>system
+{{ $.System }}<|im_end|>{{ print "\n" }}
+{{- end }}<|im_start|>{{ .Role }}
+{{ .Content }}<|im_end|>{{ print "\n" }}
+{{- end }}<|im_start|>assistant
+`,
+			},
+			Values{
+				Messages: []api.Message{
+					{Role: "system", Content: "You are a helpful assistant!"},
+					{Role: "user", Content: "Hello friend!"},
+					{Role: "assistant", Content: "Hello human!"},
+					{Role: "user", Content: "Yay!"},
+				},
+			},
+			`<|im_start|>user
+Hello friend!<|im_end|>
+<|im_start|>assistant
+Hello human!<|im_end|>
+<|im_start|>system
+You are a helpful assistant!<|im_end|>
+<|im_start|>user
+Yay!<|im_end|>
+<|im_start|>assistant
+`,
+		},
+		{
+			[]string{
+				`{{ if .Prompt }}Question: {{ .Prompt }}
+
+{{ end }}Answer: {{ .Response }}
+
+`,
+				`
+{{- range .Messages }}
+{{- if eq .Role "user" }}Question: {{ .Content }}{{ print "\n\n" }}
+{{- else if eq .Role "assistant" }}Answer: {{ .Content }}{{ print "\n\n" }}
+{{- end }}
+{{- end }}Answer: `,
+			},
+			Values{
+				Messages: []api.Message{
+					{Role: "user", Content: "What's in this image?", Images: []api.ImageData{[]byte("")}},
+					{Role: "assistant", Content: "It's a hot dog."},
+					{Role: "user", Content: "What's in _this_ image?"},
+					{Role: "user", Images: []api.ImageData{[]byte("")}},
+					{Role: "user", Content: "Is it a hot dog?"},
+				},
+			},
+			`Question: [img-0] What's in this image?
+
+Answer: It's a hot dog.
+
+Question: What's in _this_ image?
+
+[img-1]
+
+Is it a hot dog?
+
+Answer: `,
+		},
+	}
+
+	for _, tt := range cases {
+		t.Run("", func(t *testing.T) {
+			for _, tmpl := range tt.templates {
+				t.Run("", func(t *testing.T) {
+					tmpl, err := Parse(tmpl)
+					if err != nil {
+						t.Fatal(err)
+					}
+
+					var b bytes.Buffer
+					if err := tmpl.Execute(&b, tt.values); err != nil {
+						t.Fatal(err)
+					}
+
+					if b.String() != tt.expected {
+						t.Errorf("expected\n%s,\ngot\n%s", tt.expected, b.String())
+					}
+				})
+			}
+		})
+	}
+}

From 2c3fe1fd972b7810091120f844afc35bc98accbd Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Thu, 20 Jun 2024 11:00:08 -0700
Subject: [PATCH 2/7] comments

---
 server/prompt.go          |  29 +++---
 server/prompt_test.go     |  34 +++----
 server/routes.go          |  46 +++++-----
 template/template.go      |  48 +++++-----
 template/template_test.go | 180 ++++++++++++++++++++++++++++++--------
 5 files changed, 224 insertions(+), 113 deletions(-)

diff --git a/server/prompt.go b/server/prompt.go
index 5016fbe14..51d691a9f 100644
--- a/server/prompt.go
+++ b/server/prompt.go
@@ -11,8 +11,13 @@ import (
 	"github.com/ollama/ollama/template"
 )
 
-func chatPrompt(ctx context.Context, r *runnerRef, msgs []api.Message) (prompt string, images []llm.ImageData, _ error) {
-	// extract system messages which should always be included
+type tokenizeFunc func(context.Context, string) ([]int, error)
+
+// chatPrompt accepts a list of messages and returns the prompt and images that should be used for the next chat turn.
+// chatPrompt truncates any messages that exceed the context window of the model, making sure to always include 1) the
+// latest message and 2) system messages
+func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.Options, msgs []api.Message) (prompt string, images []llm.ImageData, _ error) {
+	// pull out any system messages which should always be included in the prompt
 	var system []api.Message
 	msgs = slices.DeleteFunc(msgs, func(m api.Message) bool {
 		if m.Role == "system" {
@@ -23,32 +28,35 @@ func chatPrompt(ctx context.Context, r *runnerRef, msgs []api.Message) (prompt s
 		return false
 	})
 
-	if len(system) == 0 && r.model.System != "" {
+	if len(system) == 0 && m.System != "" {
 		// add model system prompt since it wasn't provided
-		system = append(system, api.Message{Role: "system", Content: r.model.System})
+		system = append(system, api.Message{Role: "system", Content: m.System})
 	}
 
+	// always include the last message
 	n := len(msgs) - 1
+	// in reverse, find all messages that fit into context window
 	for i := n - 1; i >= 0; i-- {
 		var b bytes.Buffer
-		if err := r.model.Template.Execute(&b, template.Values{Messages: append(system, msgs[i:]...)}); err != nil {
+		if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[i:]...)}); err != nil {
 			return "", nil, err
 		}
 
-		s, err := r.llama.Tokenize(ctx, b.String())
+		s, err := tokenize(ctx, b.String())
 		if err != nil {
 			return "", nil, err
 		}
 
 		c := len(s)
-		if r.model.ProjectorPaths != nil {
+		if m.ProjectorPaths != nil {
 			for _, m := range msgs[i:] {
-				// TODO: get image embedding length from project metadata
+				// images are represented as 768 sized embeddings
+				// TODO: get embedding length from project metadata
 				c += 768 * len(m.Images)
 			}
 		}
 
-		if c > r.NumCtx {
+		if c > opts.NumCtx {
 			slog.Debug("truncating input messages which exceed context length", "truncated", len(msgs[i:]))
 			break
 		} else {
@@ -56,8 +64,9 @@ func chatPrompt(ctx context.Context, r *runnerRef, msgs []api.Message) (prompt s
 		}
 	}
 
+	// truncate any messages that do not fit into the context window
 	var b bytes.Buffer
-	if err := r.model.Template.Execute(&b, template.Values{Messages: append(system, msgs[n:]...)}); err != nil {
+	if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[n:]...)}); err != nil {
 		return "", nil, err
 	}
 
diff --git a/server/prompt_test.go b/server/prompt_test.go
index 59288b46c..d4cee98c2 100644
--- a/server/prompt_test.go
+++ b/server/prompt_test.go
@@ -7,15 +7,10 @@ import (
 	"testing"
 
 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/template"
 )
 
-type mock struct {
-	llm.LlamaServer
-}
-
-func (m mock) Tokenize(_ context.Context, s string) (tokens []int, err error) {
+func tokenize(_ context.Context, s string) (tokens []int, err error) {
 	for range strings.Fields(s) {
 		tokens = append(tokens, len(tokens))
 	}
@@ -48,7 +43,7 @@ func TestChatPrompt(t *testing.T) {
 			},
 		},
 		{
-			name: "truncate messages",
+			name:  "truncate messages",
 			limit: 1,
 			msgs: []api.Message{
 				{Role: "user", Content: "You're a test, Harry!"},
@@ -60,7 +55,7 @@ func TestChatPrompt(t *testing.T) {
 			},
 		},
 		{
-			name: "truncate messages with image",
+			name:  "truncate messages with image",
 			limit: 64,
 			msgs: []api.Message{
 				{Role: "user", Content: "You're a test, Harry!"},
@@ -75,7 +70,7 @@ func TestChatPrompt(t *testing.T) {
 			},
 		},
 		{
-			name: "truncate messages with images",
+			name:  "truncate messages with images",
 			limit: 64,
 			msgs: []api.Message{
 				{Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}},
@@ -90,7 +85,7 @@ func TestChatPrompt(t *testing.T) {
 			},
 		},
 		{
-			name: "messages with images",
+			name:  "messages with images",
 			limit: 2048,
 			msgs: []api.Message{
 				{Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}},
@@ -106,7 +101,7 @@ func TestChatPrompt(t *testing.T) {
 			},
 		},
 		{
-			name: "message with image tag",
+			name:  "message with image tag",
 			limit: 2048,
 			msgs: []api.Message{
 				{Role: "user", Content: "You're a test, Harry! [img]", Images: []api.ImageData{[]byte("something")}},
@@ -122,7 +117,7 @@ func TestChatPrompt(t *testing.T) {
 			},
 		},
 		{
-			name: "messages with interleaved images",
+			name:  "messages with interleaved images",
 			limit: 2048,
 			msgs: []api.Message{
 				{Role: "user", Content: "You're a test, Harry!"},
@@ -140,7 +135,7 @@ func TestChatPrompt(t *testing.T) {
 			},
 		},
 		{
-			name: "truncate message with interleaved images",
+			name:  "truncate message with interleaved images",
 			limit: 1024,
 			msgs: []api.Message{
 				{Role: "user", Content: "You're a test, Harry!"},
@@ -157,7 +152,7 @@ func TestChatPrompt(t *testing.T) {
 			},
 		},
 		{
-			name: "message with system prompt",
+			name:  "message with system prompt",
 			limit: 2048,
 			msgs: []api.Message{
 				{Role: "system", Content: "You are the Test Who Lived."},
@@ -181,14 +176,9 @@ func TestChatPrompt(t *testing.T) {
 
 	for _, tt := range cases {
 		t.Run(tt.name, func(t *testing.T) {
-			r := runnerRef{
-				llama:   mock{},
-				model:   &Model{Template: tmpl, ProjectorPaths: []string{"vision"}},
-				Options: &api.Options{},
-			}
-
-			r.NumCtx = tt.limit
-			prompt, images, err := chatPrompt(context.TODO(), &r, tt.msgs)
+			model := Model{Template: tmpl, ProjectorPaths: []string{"vision"}}
+			opts := api.Options{Runner: api.Runner{NumCtx: tt.limit}}
+			prompt, images, err := chatPrompt(context.TODO(), &model, tokenize, &opts, tt.msgs)
 			if err != nil {
 				t.Fatal(err)
 			}
diff --git a/server/routes.go b/server/routes.go
index 35e64511b..1a93e9770 100644
--- a/server/routes.go
+++ b/server/routes.go
@@ -54,6 +54,8 @@ func init() {
 	gin.SetMode(mode)
 }
 
+var errRequired = errors.New("is required")
+
 func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
 	opts := api.DefaultOptions()
 	if err := opts.FromMap(model.Options); err != nil {
@@ -69,7 +71,7 @@ func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options
 
 func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (*runnerRef, error) {
 	if name == "" {
-		return nil, errors.New("model is required")
+		return nil, fmt.Errorf("model %w", errRequired)
 	}
 
 	model, err := GetModel(name)
@@ -121,7 +123,17 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
 		return
 	} else if err != nil {
-		handleScheduleError(c, err)
+		handleScheduleError(c, req.Model, err)
+		return
+	}
+
+	if req.Prompt == "" {
+		c.JSON(http.StatusOK, api.GenerateResponse{
+			Model:      req.Model,
+			CreatedAt:  time.Now().UTC(),
+			Done:       true,
+			DoneReason: "load",
+		})
 		return
 	}
 
@@ -139,23 +151,11 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 			msgs = append(msgs, api.Message{Role: "system", Content: r.model.System})
 		}
 
-		if req.Prompt != "" {
-			for _, i := range images {
-				msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)})
-			}
-
-			msgs = append(msgs, api.Message{Role: "user", Content: req.Prompt})
+		for _, i := range images {
+			msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)})
 		}
 
-		if len(msgs) == 0 {
-			c.JSON(http.StatusOK, api.GenerateResponse{
-				Model:      req.Model,
-				CreatedAt:  time.Now().UTC(),
-				Done:       true,
-				DoneReason: "load",
-			})
-			return
-		}
+		msgs = append(msgs, api.Message{Role: "user", Content: req.Prompt})
 
 		tmpl := r.model.Template
 		if req.Template != "" {
@@ -256,7 +256,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
 
 	r, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
 	if err != nil {
-		handleScheduleError(c, err)
+		handleScheduleError(c, req.Model, err)
 		return
 	}
 
@@ -1135,7 +1135,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
 		return
 	} else if err != nil {
-		handleScheduleError(c, err)
+		handleScheduleError(c, req.Model, err)
 		return
 	}
 
@@ -1150,7 +1150,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		return
 	}
 
-	prompt, images, err := chatPrompt(c.Request.Context(), r, req.Messages)
+	prompt, images, err := chatPrompt(c.Request.Context(), r.model, r.llama.Tokenize, r.Options, req.Messages)
 	if err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
@@ -1215,12 +1215,16 @@ func (s *Server) ChatHandler(c *gin.Context) {
 	streamResponse(c, ch)
 }
 
-func handleScheduleError(c *gin.Context, err error) {
+func handleScheduleError(c *gin.Context, name string, err error) {
 	switch {
+	case errors.Is(err, errRequired):
+		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 	case errors.Is(err, context.Canceled):
 		c.JSON(499, gin.H{"error": "request canceled"})
 	case errors.Is(err, ErrMaxQueue):
 		c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
+	case errors.Is(err, os.ErrNotExist):
+		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found, try pulling it first", name)})
 	default:
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 	}
diff --git a/template/template.go b/template/template.go
index cfba5a238..c8f8f6d0d 100644
--- a/template/template.go
+++ b/template/template.go
@@ -83,6 +83,7 @@ type Template struct {
 	raw string
 }
 
+// response is a template node that can be added to templates that don't already have one
 var response = parse.ActionNode{
 	NodeType: parse.NodeAction,
 	Pipe: &parse.PipeNode{
@@ -101,28 +102,25 @@ var response = parse.ActionNode{
 	},
 }
 
+var funcs = template.FuncMap{
+	"toJson": func(v any) string {
+		b, err := json.Marshal(v)
+		if err != nil {
+			return ""
+		}
+
+		return string(b)
+	},
+	"add": func(a, b int) int {
+		return a + b
+	},
+	"sub": func(a, b int) int {
+		return a - b
+	},
+}
+
 func Parse(s string) (*Template, error) {
-	tmpl := template.New("").Option("missingkey=zero").Funcs(template.FuncMap{
-		"toJson": func(v any) string {
-			b, err := json.Marshal(v)
-			if err != nil {
-				return ""
-			}
-
-			return string(b)
-		},
-		"isLastMessage": func(s []*api.Message, m *api.Message) bool {
-			for i := len(s) - 1; i >= 0; i-- {
-				if m.Role != s[i].Role {
-					continue
-				}
-
-				return m == s[i]
-			}
-
-			return false
-		},
-	})
+	tmpl := template.New("").Option("missingkey=zero").Funcs(funcs)
 
 	tmpl, err := tmpl.Parse(s)
 	if err != nil {
@@ -218,7 +216,13 @@ func (t *Template) Execute(w io.Writer, v Values) error {
 	return err
 }
 
-func collate(msgs []api.Message) (system string, collated []*api.Message) {
+type messages []*api.Message
+
+// collate messages based on role. consecutive messages of the same role are merged
+// into a single message. collate also pulls out and merges messages with Role == "system"
+// which are templated separately. As a side effect, it mangles message content adding image
+// tags ([img-%d]) as needed
+func collate(msgs []api.Message) (system string, collated messages) {
 	var n int
 	for i := range msgs {
 		msg := msgs[i]
diff --git a/template/template_test.go b/template/template_test.go
index 5d5dad4b2..ac92bf489 100644
--- a/template/template_test.go
+++ b/template/template_test.go
@@ -8,6 +8,7 @@ import (
 	"os"
 	"path/filepath"
 	"slices"
+	"strconv"
 	"testing"
 	"text/template"
 
@@ -15,6 +16,98 @@ import (
 	"github.com/ollama/ollama/llm"
 )
 
+func TestFuncs(t *testing.T) {
+	t.Run("toJson", func(t *testing.T) {
+		cases := []struct {
+			input    any
+			expected string
+		}{
+			{nil, "null"},
+			{true, "true"},
+			{false, "false"},
+			{0, "0"},
+			{1, "1"},
+			{1.0, "1"},
+			{1.1, "1.1"},
+			{"", `""`},
+			{"hello", `"hello"`},
+			{[]int{1, 2, 3}, "[1,2,3]"},
+			{[]string{"a", "b", "c"}, `["a","b","c"]`},
+			{map[string]int{"a": 1, "b": 2}, `{"a":1,"b":2}`},
+			{map[string]string{"a": "b", "c": "d"}, `{"a":"b","c":"d"}`},
+		}
+
+		for _, tt := range cases {
+			t.Run(tt.expected, func(t *testing.T) {
+				toJson, ok := funcs["toJson"].(func(any) string)
+				if !ok {
+					t.Fatal("toJson is not a function")
+				}
+
+				if s := toJson(tt.input); s != tt.expected {
+					t.Errorf("expected %q, got %q", tt.expected, s)
+				}
+			})
+		}
+	})
+
+	t.Run("add", func(t *testing.T) {
+		cases := []struct {
+			a, b     int
+			expected int
+		}{
+			{0, 0, 0},
+			{0, 1, 1},
+			{1, 0, 1},
+			{1, 1, 2},
+			{1, -1, 0},
+			{-1, 1, 0},
+			{-1, -1, -2},
+		}
+
+		for _, tt := range cases {
+			t.Run(strconv.Itoa(tt.expected), func(t *testing.T) {
+				add, ok := funcs["add"].(func(int, int) int)
+				if !ok {
+					t.Fatal("add is not a function")
+				}
+
+				if n := add(tt.a, tt.b); n != tt.expected {
+					t.Errorf("expected %d, got %d", tt.expected, n)
+				}
+			})
+		}
+	})
+
+	t.Run("sub", func(t *testing.T) {
+		cases := []struct {
+			a, b     int
+			expected int
+		}{
+			{0, 0, 0},
+			{0, 1, -1},
+			{1, 0, 1},
+			{1, 1, 0},
+			{1, -1, 2},
+			{-1, 1, -2},
+			{-1, -1, 0},
+		}
+
+		for _, tt := range cases {
+			t.Run(strconv.Itoa(tt.expected), func(t *testing.T) {
+				sub, ok := funcs["sub"].(func(int, int) int)
+				if !ok {
+					t.Fatal("sub is not a function")
+				}
+
+				if n := sub(tt.a, tt.b); n != tt.expected {
+					t.Errorf("expected %d, got %d", tt.expected, n)
+				}
+			})
+		}
+	})
+}
+
 func TestNamed(t *testing.T) {
 	f, err := os.Open(filepath.Join("testdata", "templates.jsonl"))
 	if err != nil {
@@ -89,77 +182,86 @@ func TestParse(t *testing.T) {
 }
 
 func TestExecuteWithMessages(t *testing.T) {
+	type template struct {
+		name     string
+		template string
+	}
 	cases := []struct {
-		templates []string
+		name      string
+		templates []template
 		values    Values
 		expected  string
 	}{
 		{
-			[]string{
-				`[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `,
-				`[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`,
-				`{{- range .Messages }}
-{{- if eq .Role "user" }}[INST] {{ if and (isLastMessage $.Messages .) $.System }}{{ $.System }}{{ print "\n\n" }}
+			"mistral",
+			[]template{
+				{"no response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `},
+				{"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
+				{"messages", `{{- range .Messages }}
+{{- if eq .Role "user" }}[INST] {{ if and (eq (index $.Messages (sub (len $.Messages) 1)) .) $.System }}{{ $.System }}{{ "\n\n" }}
 {{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}
 {{- end }}
-{{- end }}`,
+{{- end }}`},
 			},
 			Values{
 				Messages: []api.Message{
 					{Role: "user", Content: "Hello friend!"},
 					{Role: "assistant", Content: "Hello human!"},
-					{Role: "user", Content: "Yay!"},
+					{Role: "user", Content: "What is your name?"},
 				},
 			},
-			`[INST] Hello friend![/INST] Hello human![INST] Yay![/INST] `,
+			`[INST] Hello friend![/INST] Hello human![INST] What is your name?[/INST] `,
 		},
 		{
-			[]string{
-				`[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `,
-				`[INST] {{ if .System }}{{ .System }}{{ print "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`,
-				`
+			"mistral system",
+			[]template{
+				{"no response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `},
+				{"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
+				{"messages", `
 {{- range .Messages }}
-{{- if eq .Role "user" }}[INST] {{ if and (isLastMessage $.Messages .) $.System }}{{ $.System }}{{ print "\n\n" }}
+{{- if eq .Role "user" }}[INST] {{ if and (eq (index $.Messages (sub (len $.Messages) 1)) .) $.System }}{{ $.System }}{{ "\n\n" }}
 {{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}
 {{- end }}
-{{- end }}`,
+{{- end }}`},
 			},
 			Values{
 				Messages: []api.Message{
 					{Role: "system", Content: "You are a helpful assistant!"},
 					{Role: "user", Content: "Hello friend!"},
 					{Role: "assistant", Content: "Hello human!"},
-					{Role: "user", Content: "Yay!"},
+					{Role: "user", Content: "What is your name?"},
 				},
 			},
 			`[INST] Hello friend![/INST] Hello human![INST] You are a helpful assistant!
 
-Yay![/INST] `,
+What is your name?[/INST] `,
 		},
 		{
-			[]string{
-				`{{ if .System }}<|im_start|>system
+			"chatml",
+			[]template{
+				// this does not have a "no response" test because it's impossible to render the same output
+				{"response", `{{ if .System }}<|im_start|>system
 {{ .System }}<|im_end|>
 {{ end }}{{ if .Prompt }}<|im_start|>user
 {{ .Prompt }}<|im_end|>
 {{ end }}<|im_start|>assistant
 {{ .Response }}<|im_end|>
-`,
-				`
+`},
+				{"messages", `
 {{- range .Messages }}
-{{- if and (eq .Role "user") (isLastMessage $.Messages .) $.System }}<|im_start|>system
-{{ $.System }}<|im_end|>{{ print "\n" }}
+{{- if and (eq .Role "user") (eq (index $.Messages (sub (len $.Messages) 1)) .) $.System }}<|im_start|>system
+{{ $.System }}<|im_end|>{{ "\n" }}
 {{- end }}<|im_start|>{{ .Role }}
-{{ .Content }}<|im_end|>{{ print "\n" }}
+{{ .Content }}<|im_end|>{{ "\n" }}
 {{- end }}<|im_start|>assistant
-`,
+`},
 			},
 			Values{
 				Messages: []api.Message{
 					{Role: "system", Content: "You are a helpful assistant!"},
 					{Role: "user", Content: "Hello friend!"},
 					{Role: "assistant", Content: "Hello human!"},
-					{Role: "user", Content: "Yay!"},
+					{Role: "user", Content: "What is your name?"},
 				},
 			},
 			`<|im_start|>user
@@ -169,23 +271,25 @@ Hello human!<|im_end|>
 <|im_start|>system
 You are a helpful assistant!<|im_end|>
 <|im_start|>user
-Yay!<|im_end|>
+What is your name?<|im_end|>
 <|im_start|>assistant
 `,
 		},
 		{
-			[]string{
-				`{{ if .Prompt }}Question: {{ .Prompt }}
+			"moondream",
+			[]template{
+				// this does not have a "no response" test because it's impossible to render the same output
+				{"response", `{{ if .Prompt }}Question: {{ .Prompt }}
 
 {{ end }}Answer: {{ .Response }}
 
-`,
-				`
+`},
+				{"messages", `
 {{- range .Messages }}
-{{- if eq .Role "user" }}Question: {{ .Content }}{{ print "\n\n" }}
-{{- else if eq .Role "assistant" }}Answer: {{ .Content }}{{ print "\n\n" }}
+{{- if eq .Role "user" }}Question: {{ .Content }}{{ "\n\n" }}
+{{- else if eq .Role "assistant" }}Answer: {{ .Content }}{{ "\n\n" }}
 {{- end }}
-{{- end }}Answer: `,
+{{- end }}Answer: `},
 			},
 			Values{
 				Messages: []api.Message{
@@ -211,10 +315,10 @@ Answer: `,
 	}
 
 	for _, tt := range cases {
-		t.Run("", func(t *testing.T) {
-			for _, tmpl := range tt.templates {
-				t.Run("", func(t *testing.T) {
-					tmpl, err := Parse(tmpl)
+		t.Run(tt.name, func(t *testing.T) {
+			for _, ttt := range tt.templates {
+				t.Run(ttt.name, func(t *testing.T) {
+					tmpl, err := Parse(ttt.template)
 					if err != nil {
 						t.Fatal(err)
 					}

From ac7a842e550721fbc00e36e416e7cf6606993149 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Wed, 3 Jul 2024 09:00:07 -0700
Subject: [PATCH 3/7] fix model reloading

ensure runtime model changes (template, system prompt, messages,
options) are captured on model updates without needing to reload the
server
---
 llm/server.go    |  2 +-
 server/routes.go | 42 ++++++++++++++++++++++--------------------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/llm/server.go b/llm/server.go
index 206f9e391..229d61e4a 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -679,7 +679,7 @@ type CompletionRequest struct {
 	Prompt  string
 	Format  string
 	Images  []ImageData
-	Options api.Options
+	Options *api.Options
 }
 
 type CompletionResponse struct {
diff --git a/server/routes.go b/server/routes.go
index 1a93e9770..4059c7c52 100644
--- a/server/routes.go
+++ b/server/routes.go
@@ -69,23 +69,25 @@ func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options
 	return opts, nil
 }
 
-func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (*runnerRef, error) {
+// scheduleRunner schedules a runner after validating inputs such as capabilities and model options.
+// It returns the allocated runner, model instance, and consolidated options if successful and error otherwise.
+func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) {
 	if name == "" {
-		return nil, fmt.Errorf("model %w", errRequired)
+		return nil, nil, nil, fmt.Errorf("model %w", errRequired)
 	}
 
 	model, err := GetModel(name)
 	if err != nil {
-		return nil, err
+		return nil, nil, nil, err
 	}
 
 	if err := model.CheckCapabilities(caps...); err != nil {
-		return nil, fmt.Errorf("%s %w", name, err)
+		return nil, nil, nil, fmt.Errorf("%s %w", name, err)
 	}
 
 	opts, err := modelOptions(model, requestOpts)
 	if err != nil {
-		return nil, err
+		return nil, nil, nil, err
 	}
 
 	runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
@@ -93,10 +95,10 @@ func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capabil
 	select {
 	case runner = <-runnerCh:
 	case err = <-errCh:
-		return nil, err
+		return nil, nil, nil, err
 	}
 
-	return runner, nil
+	return runner.llama, model, &opts, nil
 }
 
 func (s *Server) GenerateHandler(c *gin.Context) {
@@ -118,7 +120,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 	}
 
 	caps := []Capability{CapabilityCompletion}
-	r, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
+	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
 	if errors.Is(err, errCapabilityCompletion) {
 		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
 		return
@@ -147,8 +149,8 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 		var msgs []api.Message
 		if req.System != "" {
 			msgs = append(msgs, api.Message{Role: "system", Content: req.System})
-		} else if r.model.System != "" {
-			msgs = append(msgs, api.Message{Role: "system", Content: r.model.System})
+		} else if m.System != "" {
+			msgs = append(msgs, api.Message{Role: "system", Content: m.System})
 		}
 
 		for _, i := range images {
@@ -157,7 +159,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 
 		msgs = append(msgs, api.Message{Role: "user", Content: req.Prompt})
 
-		tmpl := r.model.Template
+		tmpl := m.Template
 		if req.Template != "" {
 			tmpl, err = template.Parse(req.Template)
 			if err != nil {
@@ -168,7 +170,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 
 		var b bytes.Buffer
 		if req.Context != nil {
-			s, err := r.llama.Detokenize(c.Request.Context(), req.Context)
+			s, err := r.Detokenize(c.Request.Context(), req.Context)
 			if err != nil {
 				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 				return
@@ -190,11 +192,11 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 	ch := make(chan any)
 	go func() {
 		defer close(ch)
-		if err := r.llama.Completion(c.Request.Context(), llm.CompletionRequest{
+		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
 			Prompt:  prompt,
 			Images:  images,
 			Format:  req.Format,
-			Options: *r.Options,
+			Options: opts,
 		}, func(r llm.CompletionResponse) {
 			ch <- api.GenerateResponse{
 				Model:      req.Model,
@@ -254,7 +256,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
 		return
 	}
 
-	r, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
+	r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
 	if err != nil {
 		handleScheduleError(c, req.Model, err)
 		return
@@ -266,7 +268,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
 		return
 	}
 
-	embedding, err := r.llama.Embedding(c.Request.Context(), req.Prompt)
+	embedding, err := r.Embedding(c.Request.Context(), req.Prompt)
 	if err != nil {
 		slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
@@ -1130,7 +1132,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
 	}
 
 	caps := []Capability{CapabilityCompletion}
-	r, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
+	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
 	if errors.Is(err, errCapabilityCompletion) {
 		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
 		return
@@ -1150,7 +1152,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		return
 	}
 
-	prompt, images, err := chatPrompt(c.Request.Context(), r.model, r.llama.Tokenize, r.Options, req.Messages)
+	prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, req.Messages)
 	if err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
@@ -1161,11 +1163,11 @@ func (s *Server) ChatHandler(c *gin.Context) {
 	ch := make(chan any)
 	go func() {
 		defer close(ch)
-		if err := r.llama.Completion(c.Request.Context(), llm.CompletionRequest{
+		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
 			Prompt:  prompt,
 			Images:  images,
 			Format:  req.Format,
-			Options: *r.Options,
+			Options: opts,
 		}, func(r llm.CompletionResponse) {
 			ch <- api.ChatResponse{
 				Model:      req.Model,

From 326363b3a72d9e2972a019dfc4c6147ea901f501 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Wed, 3 Jul 2024 13:49:14 -0700
Subject: [PATCH 4/7] no funcs

---
 template/template.go      |  19 +------
 template/template_test.go | 105 +++-----------------------------------
 2 files changed, 7 insertions(+), 117 deletions(-)

diff --git a/template/template.go b/template/template.go
index c8f8f6d0d..b133b97e9 100644
--- a/template/template.go
+++ b/template/template.go
@@ -102,25 +102,8 @@ var response = parse.ActionNode{
 	},
 }
 
-var funcs = template.FuncMap{
-	"toJson": func(v any) string {
-		b, err := json.Marshal(v)
-		if err != nil {
-			return ""
-		}
-
-		return string(b)
-	},
-	"add": func(a, b int) int {
-		return a + b
-	},
-	"sub": func(a, b int) int {
-		return a - b
-	},
-}
-
 func Parse(s string) (*Template, error) {
-	tmpl := template.New("").Option("missingkey=zero").Funcs(funcs)
+	tmpl := template.New("").Option("missingkey=zero")
 
 	tmpl, err := tmpl.Parse(s)
 	if err != nil {
diff --git a/template/template_test.go b/template/template_test.go
index ac92bf489..ac16bd606 100644
--- a/template/template_test.go
+++ b/template/template_test.go
@@ -8,7 +8,6 @@ import (
 	"os"
 	"path/filepath"
 	"slices"
-	"strconv"
 	"testing"
 	"text/template"
 
@@ -16,98 +15,6 @@ import (
 	"github.com/ollama/ollama/llm"
 )
 
-func TestFuncs(t *testing.T) {
-	t.Run("toJson", func(t *testing.T) {
-		cases := []struct {
-			input    any
-			expected string
-		}{
-			{nil, "null"},
-			{true, "true"},
-			{false, "false"},
-			{0, "0"},
-			{1, "1"},
-			{1.0, "1"},
-			{1.1, "1.1"},
-			{"", `""`},
-			{"hello", `"hello"`},
-			{[]int{1, 2, 3}, "[1,2,3]"},
-			{[]string{"a", "b", "c"}, `["a","b","c"]`},
-			{map[string]int{"a": 1, "b": 2}, `{"a":1,"b":2}`},
-			{map[string]string{"a": "b", "c": "d"}, `{"a":"b","c":"d"}`},
-		}
-
-		for _, tt := range cases {
-			t.Run(tt.expected, func(t *testing.T) {
-				toJson, ok := funcs["toJson"].(func(any) string)
-				if !ok {
-					t.Fatal("toJson is not a function")
-				}
-
-				if s := toJson(tt.input); s != tt.expected {
-					t.Errorf("expected %q, got %q", tt.expected, s)
-				}
-			})
-		}
-	})
-
-	t.Run("add", func(t *testing.T) {
-		cases := []struct {
-			a, b     int
-			expected int
-		}{
-			{0, 0, 0},
-			{0, 1, 1},
-			{1, 0, 1},
-			{1, 1, 2},
-			{1, -1, 0},
-			{-1, 1, 0},
-			{-1, -1, -2},
-		}
-
-		for _, tt := range cases {
-			t.Run(strconv.Itoa(tt.expected), func(t *testing.T) {
-				add, ok := funcs["add"].(func(int, int) int)
-				if !ok {
-					t.Fatal("add is not a function")
-				}
-
-				if n := add(tt.a, tt.b); n != tt.expected {
-					t.Errorf("expected %d, got %d", tt.expected, n)
-				}
-			})
-		}
-	})
-
-	t.Run("sub", func(t *testing.T) {
-		cases := []struct {
-			a, b     int
-			expected int
-		}{
-			{0, 0, 0},
-			{0, 1, -1},
-			{1, 0, 1},
-			{1, 1, 0},
-			{1, -1, 2},
-			{-1, 1, -2},
-			{-1, -1, 0},
-		}
-
-		for _, tt := range cases {
-			t.Run(strconv.Itoa(tt.expected), func(t *testing.T) {
-				sub, ok := funcs["sub"].(func(int, int) int)
-				if !ok {
-					t.Fatal("sub is not a function")
-				}
-
-				if n := sub(tt.a, tt.b); n != tt.expected {
-					t.Errorf("expected %d, got %d", tt.expected, n)
-				}
-			})
-		}
-	})
-}
-
 func TestNamed(t *testing.T) {
 	f, err := os.Open(filepath.Join("testdata", "templates.jsonl"))
 	if err != nil {
@@ -197,8 +104,8 @@ func TestExecuteWithMessages(t *testing.T) {
 			[]template{
 				{"no response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `},
 				{"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
-				{"messages", `{{- range .Messages }}
-{{- if eq .Role "user" }}[INST] {{ if and (eq (index $.Messages (sub (len $.Messages) 1)) .) $.System }}{{ $.System }}{{ "\n\n" }}
+				{"messages", `{{- range $index, $_ := .Messages }}
+{{- if eq .Role "user" }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}{{ "\n\n" }}
 {{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}
 {{- end }}
 {{- end }}`},
@@ -218,8 +125,8 @@ func TestExecuteWithMessages(t *testing.T) {
 				{"no response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] `},
 				{"response", `[INST] {{ if .System }}{{ .System }}{{ "\n\n" }}{{ end }}{{ .Prompt }}[/INST] {{ .Response }}`},
 				{"messages", `
-{{- range .Messages }}
-{{- if eq .Role "user" }}[INST] {{ if and (eq (index $.Messages (sub (len $.Messages) 1)) .) $.System }}{{ $.System }}{{ "\n\n" }}
+{{- range $index, $_ := .Messages }}
+{{- if eq .Role "user" }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}{{ "\n\n" }}
 {{- end }}{{ .Content }}[/INST] {{ else if eq .Role "assistant" }}{{ .Content }}
 {{- end }}
 {{- end }}`},
@@ -248,8 +155,8 @@ What is your name?[/INST] `,
 {{ .Response }}<|im_end|>
 `},
 				{"messages", `
-{{- range .Messages }}
-{{- if and (eq .Role "user") (eq (index $.Messages (sub (len $.Messages) 1)) .) $.System }}<|im_start|>system
+{{- range $index, $_ := .Messages }}
+{{- if and (eq .Role "user") (eq (len (slice $.Messages $index)) 1) $.System }}<|im_start|>system
 {{ $.System }}<|im_end|>{{ "\n" }}
 {{- end }}<|im_start|>{{ .Role }}
 {{ .Content }}<|im_end|>{{ "\n" }}

From fb6cbc02fbe0ff8d791413a81558a1fe9725b778 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Thu, 27 Jun 2024 14:15:17 -0700
Subject: [PATCH 5/7] update named templates

---
 go.mod                                        |  3 +-
 server/routes_create_test.go                  |  4 +-
 template/alfred.gotmpl                        |  9 ++-
 template/alpaca.gotmpl                        | 14 +++-
 template/chatml.gotmpl                        | 11 ++-
 template/chatqa.gotmpl                        | 14 +++-
 template/codellama-70b-instruct.gotmpl        | 13 +++-
 template/falcon-instruct.gotmpl               | 12 +++-
 template/gemma-instruct.gotmpl                | 14 +++-
 template/granite-instruct.gotmpl              | 16 ++++-
 template/llama2-chat.gotmpl                   | 15 +++-
 template/llama3-instruct.gotmpl               | 14 +++-
 template/magicoder.gotmpl                     | 15 +++-
 template/mistral-instruct.gotmpl              | 15 ++--
 template/openchat.gotmpl                      | 12 +++-
 template/phi-3.gotmpl                         | 11 ++-
 template/solar-instruct.gotmpl                | 16 ++++-
 template/starcoder2-instruct.gotmpl           | 15 ++++
 template/template_test.go                     | 69 ++++++++++++++++++-
 .../alfred.gotmpl/system-user-assistant-user  |  1 +
 template/testdata/alfred.gotmpl/user          |  1 +
 .../alfred.gotmpl/user-assistant-user         |  1 +
 .../alpaca.gotmpl/system-user-assistant-user  | 10 +++
 template/testdata/alpaca.gotmpl/user          |  4 ++
 .../alpaca.gotmpl/user-assistant-user         | 10 +++
 .../chatml.gotmpl/system-user-assistant-user  |  9 +++
 template/testdata/chatml.gotmpl/user          |  3 +
 .../chatml.gotmpl/user-assistant-user         |  7 ++
 .../chatqa.gotmpl/system-user-assistant-user  |  9 +++
 template/testdata/chatqa.gotmpl/user          |  3 +
 .../chatqa.gotmpl/user-assistant-user         |  7 ++
 .../system-user-assistant-user                | 11 +++
 .../codellama-70b-instruct.gotmpl/user        |  5 ++
 .../user-assistant-user                       |  9 +++
 .../system-user-assistant-user                |  8 +++
 template/testdata/falcon-instruct.gotmpl/user |  3 +
 .../user-assistant-user                       |  7 ++
 .../system-user-assistant-user                |  8 +++
 template/testdata/gemma-instruct.gotmpl/user  |  3 +
 .../gemma-instruct.gotmpl/user-assistant-user |  7 ++
 .../system-user-assistant-user                | 13 ++++
 .../testdata/granite-instruct.gotmpl/user     |  4 ++
 .../user-assistant-user                       | 10 +++
 .../system-user-assistant-user                |  5 ++
 template/testdata/llama2-chat.gotmpl/user     |  3 +
 .../llama2-chat.gotmpl/user-assistant-user    |  3 +
 .../system-user-assistant-user                | 10 +++
 template/testdata/llama3-instruct.gotmpl/user |  4 ++
 .../user-assistant-user                       |  8 +++
 .../system-user-assistant-user                | 12 ++++
 template/testdata/magicoder.gotmpl/user       |  4 ++
 .../magicoder.gotmpl/user-assistant-user      | 10 +++
 .../system-user-assistant-user                |  2 +
 .../testdata/mistral-instruct.gotmpl/user     |  1 +
 .../user-assistant-user                       |  1 +
 .../system-user-assistant-user                |  1 +
 template/testdata/openchat.gotmpl/user        |  1 +
 .../openchat.gotmpl/user-assistant-user       |  1 +
 .../phi-3.gotmpl/system-user-assistant-user   |  9 +++
 template/testdata/phi-3.gotmpl/user           |  3 +
 .../testdata/phi-3.gotmpl/user-assistant-user |  7 ++
 .../system-user-assistant-user                | 13 ++++
 template/testdata/solar-instruct.gotmpl/user  |  4 ++
 .../solar-instruct.gotmpl/user-assistant-user | 10 +++
 .../system-user-assistant-user                | 12 ++++
 .../testdata/starcoder2-instruct.gotmpl/user  |  4 ++
 .../user-assistant-user                       | 10 +++
 .../vicuna.gotmpl/system-user-assistant-user  |  6 ++
 template/testdata/vicuna.gotmpl/user          |  2 +
 .../vicuna.gotmpl/user-assistant-user         |  4 ++
 .../zephyr.gotmpl/system-user-assistant-user  |  9 +++
 template/testdata/zephyr.gotmpl/user          |  3 +
 .../zephyr.gotmpl/user-assistant-user         |  7 ++
 template/vicuna.gotmpl                        | 13 +++-
 template/zephyr.gotmpl                        | 11 ++-
 75 files changed, 611 insertions(+), 27 deletions(-)
 create mode 100644 template/testdata/alfred.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/alfred.gotmpl/user
 create mode 100644 template/testdata/alfred.gotmpl/user-assistant-user
 create mode 100644 template/testdata/alpaca.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/alpaca.gotmpl/user
 create mode 100644 template/testdata/alpaca.gotmpl/user-assistant-user
 create mode 100644 template/testdata/chatml.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/chatml.gotmpl/user
 create mode 100644 template/testdata/chatml.gotmpl/user-assistant-user
 create mode 100644 template/testdata/chatqa.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/chatqa.gotmpl/user
 create mode 100644 template/testdata/chatqa.gotmpl/user-assistant-user
 create mode 100644 template/testdata/codellama-70b-instruct.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/codellama-70b-instruct.gotmpl/user
 create mode 100644 template/testdata/codellama-70b-instruct.gotmpl/user-assistant-user
 create mode 100644 template/testdata/falcon-instruct.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/falcon-instruct.gotmpl/user
 create mode 100644 template/testdata/falcon-instruct.gotmpl/user-assistant-user
 create mode 100644 template/testdata/gemma-instruct.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/gemma-instruct.gotmpl/user
 create mode 100644 template/testdata/gemma-instruct.gotmpl/user-assistant-user
 create mode 100644 template/testdata/granite-instruct.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/granite-instruct.gotmpl/user
 create mode 100644 template/testdata/granite-instruct.gotmpl/user-assistant-user
 create mode 100644 template/testdata/llama2-chat.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/llama2-chat.gotmpl/user
 create mode 100644 template/testdata/llama2-chat.gotmpl/user-assistant-user
 create mode 100644 template/testdata/llama3-instruct.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/llama3-instruct.gotmpl/user
 create mode 100644 template/testdata/llama3-instruct.gotmpl/user-assistant-user
 create mode 100644 template/testdata/magicoder.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/magicoder.gotmpl/user
 create mode 100644 template/testdata/magicoder.gotmpl/user-assistant-user
 create mode 100644 template/testdata/mistral-instruct.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/mistral-instruct.gotmpl/user
 create mode 100644 template/testdata/mistral-instruct.gotmpl/user-assistant-user
 create mode 100644 template/testdata/openchat.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/openchat.gotmpl/user
 create mode 100644 template/testdata/openchat.gotmpl/user-assistant-user
 create mode 100644 template/testdata/phi-3.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/phi-3.gotmpl/user
 create mode 100644 template/testdata/phi-3.gotmpl/user-assistant-user
 create mode 100644 template/testdata/solar-instruct.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/solar-instruct.gotmpl/user
 create mode 100644 template/testdata/solar-instruct.gotmpl/user-assistant-user
 create mode 100644 template/testdata/starcoder2-instruct.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/starcoder2-instruct.gotmpl/user
 create mode 100644 template/testdata/starcoder2-instruct.gotmpl/user-assistant-user
 create mode 100644 template/testdata/vicuna.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/vicuna.gotmpl/user
 create mode 100644 template/testdata/vicuna.gotmpl/user-assistant-user
 create mode 100644 template/testdata/zephyr.gotmpl/system-user-assistant-user
 create mode 100644 template/testdata/zephyr.gotmpl/user
 create mode 100644 template/testdata/zephyr.gotmpl/user-assistant-user

diff --git a/go.mod b/go.mod
index 6807b9b48..2e0c6614c 100644
--- a/go.mod
+++ b/go.mod
@@ -18,6 +18,7 @@ require (
 require (
 	github.com/agnivade/levenshtein v1.1.1
 	github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
+	github.com/google/go-cmp v0.6.0
 	github.com/mattn/go-runewidth v0.0.14
 	github.com/nlpodyssey/gopickle v0.3.0
 	github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
@@ -71,7 +72,7 @@ require (
 	golang.org/x/net v0.25.0 // indirect
 	golang.org/x/sys v0.20.0
 	golang.org/x/term v0.20.0
-	golang.org/x/text v0.15.0 // indirect
+	golang.org/x/text v0.15.0
 	google.golang.org/protobuf v1.34.1
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
diff --git a/server/routes_create_test.go b/server/routes_create_test.go
index 340612822..269a0ba12 100644
--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@@ -545,9 +545,9 @@ func TestCreateDetectTemplate(t *testing.T) {
 		}
 
 		checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
-			filepath.Join(p, "blobs", "sha256-2f8e594e6f34b1b4d36a246628eeb3365ce442303d656f1fcc69e821722acea0"),
-			filepath.Join(p, "blobs", "sha256-542b217f179c7825eeb5bca3c77d2b75ed05bafbd3451d9188891a60a85337c6"),
 			filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
+			filepath.Join(p, "blobs", "sha256-9512c372dfc7d84d6065b8dd2b601aeed8cc1a78e7a7aa784a42fff37f5524b7"),
+			filepath.Join(p, "blobs", "sha256-b8b78cb8c6eefd14c06f1af042e6161255bf87bbf2dd14fce57cdac893db8139"),
 		})
 	})
 
diff --git a/template/alfred.gotmpl b/template/alfred.gotmpl
index cecb9d2c8..44284f04c 100644
--- a/template/alfred.gotmpl
+++ b/template/alfred.gotmpl
@@ -1 +1,8 @@
-{{ if .System }}<start_system>{{ .System }}<end_message>{{ end }}{{ if .Prompt }}<start_user>{{ .Prompt }}<end_message>{{ end }}<start_assistant>{{ .Response }}<end_message>
\ No newline at end of file
+{{- if .Messages }}
+{{- if .System }}<start_system>{{ .System }}<end_message>
+{{- end }}
+{{- range .Messages }}<start_{{ .Role }}>{{ .Content }}<end_message>
+{{- end }}<start_assistant>
+{{- else }}
+{{ if .System }}<start_system>{{ .System }}<end_message>{{ end }}{{ if .Prompt }}<start_user>{{ .Prompt }}<end_message>{{ end }}<start_assistant>{{ .Response }}<end_message>
+{{- end }}
\ No newline at end of file
diff --git a/template/alpaca.gotmpl b/template/alpaca.gotmpl
index 440d06627..c1f69dc92 100644
--- a/template/alpaca.gotmpl
+++ b/template/alpaca.gotmpl
@@ -1,7 +1,19 @@
+{{- if .Messages }}
+{{- if .System }}{{ .System }}
+{{- end }}
+{{- range .Messages }}
+{{- if eq .Role "user" }}### Instruction:
+{{- else if eq .Role "assistant" }}### Response:
+{{- end }}
+{{ .Content }}
+
+{{ end }}### Response:
+{{ else }}
 {{ if .System }}{{ .System }}
 
 {{ end }}{{ if .Prompt }}### Instruction:
 {{ .Prompt }}
 
 {{ end }}### Response:
-{{ .Response }}
\ No newline at end of file
+{{ .Response }}
+{{- end }}
\ No newline at end of file
diff --git a/template/chatml.gotmpl b/template/chatml.gotmpl
index dcf172853..d945547c7 100644
--- a/template/chatml.gotmpl
+++ b/template/chatml.gotmpl
@@ -1,6 +1,15 @@
+{{- if .Messages }}
+{{- if .System }}<|im_start|>system
+{{ .System }}<|im_end|>
+{{ end }}
+{{- range .Messages }}<|im_start|>{{ .Role }}
+{{ .Content }}<|im_end|>
+{{ end }}<|im_start|>assistant
+{{ else }}
 {{ if .System }}<|im_start|>system
 {{ .System }}<|im_end|>
 {{ end }}{{ if .Prompt }}<|im_start|>user
 {{ .Prompt }}<|im_end|>
 {{ end }}<|im_start|>assistant
-{{ .Response }}<|im_end|>
\ No newline at end of file
+{{ .Response }}<|im_end|>
+{{- end }}
\ No newline at end of file
diff --git a/template/chatqa.gotmpl b/template/chatqa.gotmpl
index 1ede6227f..7022c4790 100644
--- a/template/chatqa.gotmpl
+++ b/template/chatqa.gotmpl
@@ -1,5 +1,17 @@
+{{- if .Messages }}
+{{- if .System }}System: {{ .System }}
+
+{{ end }}
+{{- range .Messages }}
+{{- if eq .Role "user" }}User:
+{{- else if eq .Role "assistant" }}Assistant:
+{{- end }} {{ .Content }}
+
+{{ end }}Assistant:
+{{- else }}
 {{ if .System }}System: {{ .System }}
 
 {{ end }}{{ if .Prompt }}User: {{ .Prompt }}
 
-{{ end }}Assistant: <|begin_of_text|>{{ .Response }}
\ No newline at end of file
+{{ end }}Assistant: <|begin_of_text|>{{ .Response }}
+{{- end }}
\ No newline at end of file
diff --git a/template/codellama-70b-instruct.gotmpl b/template/codellama-70b-instruct.gotmpl
index 3196bd6fd..392d839eb 100644
--- a/template/codellama-70b-instruct.gotmpl
+++ b/template/codellama-70b-instruct.gotmpl
@@ -1,3 +1,13 @@
+{{- if .Messages }}
+{{- if .System }}Source: system
+
+ {{ .System }} <step> {{ end }}
+{{- range .Messages }}Source: {{ .Role }}
+
+ {{ .Content }} <step> {{ end }}Source: assistant
+Destination: user
+
+{{ else }}
 {{ if .System }} Source: system
 
  {{ .System }} <step>{{ end }} Source: user
@@ -5,4 +15,5 @@
  {{ .Prompt }} <step> Source: assistant
 Destination: user
 
- {{ .Response }}<step>
\ No newline at end of file
+ {{ .Response }}<step>
+{{- end }}
\ No newline at end of file
diff --git a/template/falcon-instruct.gotmpl b/template/falcon-instruct.gotmpl
index 2309a1c5d..99d67f93c 100644
--- a/template/falcon-instruct.gotmpl
+++ b/template/falcon-instruct.gotmpl
@@ -1,3 +1,13 @@
+{{- if .Messages }}
+{{- if .System }}System: {{ .System }}
+{{ end }}
+{{- range .Messages }}
+{{- if eq .Role "user" }}User:
+{{ else if eq .Role "assistant" }}Falcon:
+{{ end }}{{ .Content }}
+{{ end }}Falcon:
+{{ else }}
 {{ if .System }}{{ .System }}
 {{ end }}{{ if .Prompt }}User: {{ .Prompt }}
-{{ end }}Assistant: {{ .Response }}
\ No newline at end of file
+{{ end }}Assistant: {{ .Response }}
+{{- end }}
\ No newline at end of file
diff --git a/template/gemma-instruct.gotmpl b/template/gemma-instruct.gotmpl
index 91b9883a1..870a8f2e2 100644
--- a/template/gemma-instruct.gotmpl
+++ b/template/gemma-instruct.gotmpl
@@ -1,4 +1,16 @@
+{{- if .Messages }}
+{{- range $index, $_ := .Messages }}<start_of_turn>
+{{- if eq .Role "user" }}user
+{{- if and $.System (eq $index 0) }}
+{{ $.System }}
+{{- end }}
+{{- else if eq .Role "assistant" }}model
+{{- end }}
+{{ .Content }}<end_of_turn>
+{{ end }}<start_of_turn>model
+{{ else }}
 <start_of_turn>user
 {{ if .System }}{{ .System }} {{ end }}{{ .Prompt }}<end_of_turn>
 <start_of_turn>model
-{{ .Response }}<end_of_turn>
\ No newline at end of file
+{{ .Response }}<end_of_turn>
+{{- end }}
\ No newline at end of file
diff --git a/template/granite-instruct.gotmpl b/template/granite-instruct.gotmpl
index 2ede647f5..327ff3eef 100644
--- a/template/granite-instruct.gotmpl
+++ b/template/granite-instruct.gotmpl
@@ -1,3 +1,16 @@
+{{- if .Messages }}
+{{- if .System }}System:
+{{ .System }}
+
+{{ end }}
+{{- range .Messages }}
+{{- if eq .Role "user" }}Question:
+{{- else if eq .Role "assistant" }}Answer:
+{{- end }}
+{{ .Content }}
+
+{{ end }}Answer:
+{{ else }}
 {{ if .System }}
 System:
 {{ .System }}
@@ -6,4 +19,5 @@ System:
 {{ .Prompt }}
 
 {{ end }}Answer:
-{{ .Response }}
\ No newline at end of file
+{{ .Response }}
+{{- end }}
\ No newline at end of file
diff --git a/template/llama2-chat.gotmpl b/template/llama2-chat.gotmpl
index a739f6908..6327d5812 100644
--- a/template/llama2-chat.gotmpl
+++ b/template/llama2-chat.gotmpl
@@ -1,3 +1,16 @@
+{{- if .Messages }}
+{{- range $index, $_ := .Messages }}
+{{- if eq .Role "user" }}[INST] {{ if eq $index 0 }}<<SYS>>
+{{- if $.System }}
+{{ $.System }}
+{{ end }}<</SYS>>
+
+{{ end }}{{ .Content }}
+{{- else }} [/INST] {{ .Content }}</s><s>
+{{- end }}
+{{- end }} [/INST]
+{{- else }}
 [INST] <<SYS>>{{ .System }}<</SYS>>
 
-{{ .Prompt }} [/INST] {{ .Response }}
\ No newline at end of file
+{{ .Prompt }} [/INST] {{ .Response }}
+{{- end }}
\ No newline at end of file
diff --git a/template/llama3-instruct.gotmpl b/template/llama3-instruct.gotmpl
index 36d0218b6..9c81a9535 100644
--- a/template/llama3-instruct.gotmpl
+++ b/template/llama3-instruct.gotmpl
@@ -1,7 +1,19 @@
+{{- if .Messages }}
+{{- if .System }}<|start_header_id|>system<|end_header_id|>
+
+{{ .System }}<|eot_id|>
+{{- end }}
+{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
+
+{{ .Content }}<|eot_id|>
+{{- end }}<|start_header_id|>assistant<|end_header_id|>
+
+{{ else }}
 {{ if .System }}<|start_header_id|>system<|end_header_id|>
 
 {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
 
 {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
 
-{{ .Response }}<|eot_id|>
\ No newline at end of file
+{{ .Response }}<|eot_id|>
+{{- end }}
\ No newline at end of file
diff --git a/template/magicoder.gotmpl b/template/magicoder.gotmpl
index 306972ecc..73a58127c 100644
--- a/template/magicoder.gotmpl
+++ b/template/magicoder.gotmpl
@@ -1,7 +1,20 @@
+{{- if .Messages }}
+{{- if .System }}{{ .System }}
+
+{{ end }}
+{{- range .Messages }}
+{{- if eq .Role "user" }}@@ Instruction
+{{- else if eq .Role "assistant" }}@@ Response
+{{- end }}
+{{ .Content }}
+
+{{ end }}@@ Response
+{{ else }}
 {{ if .System }}{{ .System }}
 
 {{ end }}{{ if .Prompt }}@@ Instruction
 {{ .Prompt }}
 
 {{ end }}@@ Response
-{{ .Response }}
\ No newline at end of file
+{{ .Response }}
+{{- end }}
\ No newline at end of file
diff --git a/template/mistral-instruct.gotmpl b/template/mistral-instruct.gotmpl
index dcf172853..eb3d5ced2 100644
--- a/template/mistral-instruct.gotmpl
+++ b/template/mistral-instruct.gotmpl
@@ -1,6 +1,9 @@
-{{ if .System }}<|im_start|>system
-{{ .System }}<|im_end|>
-{{ end }}{{ if .Prompt }}<|im_start|>user
-{{ .Prompt }}<|im_end|>
-{{ end }}<|im_start|>assistant
-{{ .Response }}<|im_end|>
\ No newline at end of file
+{{- if .Messages }}
+{{- range $index, $_ := .Messages }}
+{{- if eq .Role "user" }}[INST] {{ if and $.System (eq (len (slice $.Messages $index)) 1) }}{{ $.System }}
+{{ end }}{{ .Content }}
+{{- else if eq .Role "assistant" }}[/INST] {{ .Content }}</s>
+{{- end }}
+{{- end }}[/INST]
+{{- else }}[INST] {{ if .System }}{{ .System }} {{ end }}{{ .Prompt }} [/INST] {{ .Response }}
+{{- end }}
\ No newline at end of file
diff --git a/template/openchat.gotmpl b/template/openchat.gotmpl
index d2ca38685..d5e1cbb0d 100644
--- a/template/openchat.gotmpl
+++ b/template/openchat.gotmpl
@@ -1 +1,11 @@
-{{ .System }}<|end_of_turn|>GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|>
\ No newline at end of file
+{{- if .Messages }}
+{{- if .System }}GPT Correct System: {{ .System }}<|end_of_turn|>
+{{- end }}
+{{- range .Messages }}GPT Correct
+{{- if eq .Role "user" }} User:
+{{- else if eq .Role "assistant" }} Assistant:
+{{- end }} {{ .Content }}<|end_of_turn|>
+{{- end }}GPT Correct Assistant:
+{{- else }}
+{{ .System }}<|end_of_turn|>GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|>
+{{- end }}
\ No newline at end of file
diff --git a/template/phi-3.gotmpl b/template/phi-3.gotmpl
index bf26dcee2..a3558d2b7 100644
--- a/template/phi-3.gotmpl
+++ b/template/phi-3.gotmpl
@@ -1,6 +1,15 @@
+{{- if .Messages }}
+{{- if .System }}<|system|>
+{{ .System }}<|end|>
+{{ end }}
+{{- range .Messages }}<|{{ .Role }}|>
+{{ .Content }}<|end|>
+{{ end }}<|assistant|>
+{{ else }}
 {{ if .System }}<|system|>
 {{ .System }}<|end|>
 {{ end }}{{ if .Prompt }}<|user|>
 {{ .Prompt }}<|end|>
 {{ end }}<|assistant|>
-{{ .Response }}<|end|>
\ No newline at end of file
+{{ .Response }}<|end|>
+{{- end }}
\ No newline at end of file
diff --git a/template/solar-instruct.gotmpl b/template/solar-instruct.gotmpl
index c275a26a3..caa6e8e77 100644
--- a/template/solar-instruct.gotmpl
+++ b/template/solar-instruct.gotmpl
@@ -1,3 +1,16 @@
+{{- if .Messages }}
+{{- if .System }}### System:
+{{ .System }}
+
+{{ end }}
+{{- range .Messages }}
+{{- if eq .Role "user" }}### User:
+{{ .Content }}
+{{ else if eq .Role "assistant" }}### Assistant:
+{{ .Content }}</s>
+{{ end }}
+{{ end }}### Assistant:
+{{ else }}
 {{ if .System }}### System:
 {{ .System }}
 
@@ -5,4 +18,5 @@
 {{ .Prompt }}
 
 {{ end }}### Assistant:
-{{ .Response }}
\ No newline at end of file
+{{ .Response }}
+{{- end }}
\ No newline at end of file
diff --git a/template/starcoder2-instruct.gotmpl b/template/starcoder2-instruct.gotmpl
index 33357e54e..7d7ff9326 100644
--- a/template/starcoder2-instruct.gotmpl
+++ b/template/starcoder2-instruct.gotmpl
@@ -1,3 +1,17 @@
+{{- if .Messages }}
+{{- if .System }}{{ .System }}
+
+{{ end }}
+{{- range .Messages }}
+{{- if eq .Role "user" }}### Instruction
+{{ .Content }}
+
+{{ else if eq .Role "assistant" }}### Response
+{{ .Content }}<|endoftext|>
+
+{{ end }}
+{{- end }}### Response
+{{ else }}
 {{ if .System }}{{ .System }}
 
 {{ end }}{{ if .Prompt }}### Instruction
@@ -7,3 +21,4 @@
 {{ end }}### Response
 {{ .Response }}<|endoftext|>
 
+{{- end }}
\ No newline at end of file
diff --git a/template/template_test.go b/template/template_test.go
index ac16bd606..428cdc77c 100644
--- a/template/template_test.go
+++ b/template/template_test.go
@@ -8,9 +8,10 @@ import (
 	"os"
 	"path/filepath"
 	"slices"
+	"strings"
 	"testing"
-	"text/template"
 
+	"github.com/google/go-cmp/cmp"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/llm"
 )
@@ -47,7 +48,7 @@ func TestNamed(t *testing.T) {
 					t.Fatal(err)
 				}
 
-				tmpl, err := template.New(s).Parse(b.String())
+				tmpl, err := Parse(b.String())
 				if err != nil {
 					t.Fatal(err)
 				}
@@ -60,6 +61,70 @@ func TestNamed(t *testing.T) {
 	}
 }
 
+func TestTemplate(t *testing.T) {
+	cases := make(map[string][]api.Message)
+	for _, mm := range [][]api.Message{
+		{
+			{Role: "user", Content: "Hello, how are you?"},
+		},
+		{
+			{Role: "user", Content: "Hello, how are you?"},
+			{Role: "assistant", Content: "I'm doing great. How can I help you today?"},
+			{Role: "user", Content: "I'd like to show off how chat templating works!"},
+		},
+		{
+			{Role: "system", Content: "You are a helpful assistant."},
+			{Role: "user", Content: "Hello, how are you?"},
+			{Role: "assistant", Content: "I'm doing great. How can I help you today?"},
+			{Role: "user", Content: "I'd like to show off how chat templating works!"},
+		},
+	} {
+		var roles []string
+		for _, m := range mm {
+			roles = append(roles, m.Role)
+		}
+
+		cases[strings.Join(roles, "-")] = mm
+	}
+
+	matches, err := filepath.Glob("*.gotmpl")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for _, match := range matches {
+		t.Run(match, func(t *testing.T) {
+			bts, err := os.ReadFile(match)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			tmpl, err := Parse(string(bts))
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			for n, tt := range cases {
+				t.Run(n, func(t *testing.T) {
+					var actual bytes.Buffer
+					if err := tmpl.Execute(&actual, Values{Messages: tt}); err != nil {
+						t.Fatal(err)
+					}
+
+					expect, err := os.ReadFile(filepath.Join("testdata", match, n))
+					if err != nil {
+						t.Fatal(err)
+					}
+
+					if diff := cmp.Diff(actual.Bytes(), expect); diff != "" {
+						t.Errorf("mismatch (-got +want):\n%s", diff)
+					}
+				})
+			}
+		})
+	}
+}
+
 func TestParse(t *testing.T) {
 	cases := []struct {
 		template string
diff --git a/template/testdata/alfred.gotmpl/system-user-assistant-user b/template/testdata/alfred.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..03e23ea9c
--- /dev/null
+++ b/template/testdata/alfred.gotmpl/system-user-assistant-user
@@ -0,0 +1 @@
+<start_system>You are a helpful assistant.<end_message><start_user>Hello, how are you?<end_message><start_assistant>I'm doing great. How can I help you today?<end_message><start_user>I'd like to show off how chat templating works!<end_message><start_assistant>
\ No newline at end of file
diff --git a/template/testdata/alfred.gotmpl/user b/template/testdata/alfred.gotmpl/user
new file mode 100644
index 000000000..7c884a6f0
--- /dev/null
+++ b/template/testdata/alfred.gotmpl/user
@@ -0,0 +1 @@
+<start_user>Hello, how are you?<end_message><start_assistant>
\ No newline at end of file
diff --git a/template/testdata/alfred.gotmpl/user-assistant-user b/template/testdata/alfred.gotmpl/user-assistant-user
new file mode 100644
index 000000000..a60701ed7
--- /dev/null
+++ b/template/testdata/alfred.gotmpl/user-assistant-user
@@ -0,0 +1 @@
+<start_user>Hello, how are you?<end_message><start_assistant>I'm doing great. How can I help you today?<end_message><start_user>I'd like to show off how chat templating works!<end_message><start_assistant>
\ No newline at end of file
diff --git a/template/testdata/alpaca.gotmpl/system-user-assistant-user b/template/testdata/alpaca.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..20182d829
--- /dev/null
+++ b/template/testdata/alpaca.gotmpl/system-user-assistant-user
@@ -0,0 +1,10 @@
+You are a helpful assistant.### Instruction:
+Hello, how are you?
+
+### Response:
+I'm doing great. How can I help you today?
+
+### Instruction:
+I'd like to show off how chat templating works!
+
+### Response:
diff --git a/template/testdata/alpaca.gotmpl/user b/template/testdata/alpaca.gotmpl/user
new file mode 100644
index 000000000..a0ce5dec7
--- /dev/null
+++ b/template/testdata/alpaca.gotmpl/user
@@ -0,0 +1,4 @@
+### Instruction:
+Hello, how are you?
+
+### Response:
diff --git a/template/testdata/alpaca.gotmpl/user-assistant-user b/template/testdata/alpaca.gotmpl/user-assistant-user
new file mode 100644
index 000000000..6c5e23ff5
--- /dev/null
+++ b/template/testdata/alpaca.gotmpl/user-assistant-user
@@ -0,0 +1,10 @@
+### Instruction:
+Hello, how are you?
+
+### Response:
+I'm doing great. How can I help you today?
+
+### Instruction:
+I'd like to show off how chat templating works!
+
+### Response:
diff --git a/template/testdata/chatml.gotmpl/system-user-assistant-user b/template/testdata/chatml.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..8b013fcfb
--- /dev/null
+++ b/template/testdata/chatml.gotmpl/system-user-assistant-user
@@ -0,0 +1,9 @@
+<|im_start|>system
+You are a helpful assistant.<|im_end|>
+<|im_start|>user
+Hello, how are you?<|im_end|>
+<|im_start|>assistant
+I'm doing great. How can I help you today?<|im_end|>
+<|im_start|>user
+I'd like to show off how chat templating works!<|im_end|>
+<|im_start|>assistant
diff --git a/template/testdata/chatml.gotmpl/user b/template/testdata/chatml.gotmpl/user
new file mode 100644
index 000000000..aa9e597a4
--- /dev/null
+++ b/template/testdata/chatml.gotmpl/user
@@ -0,0 +1,3 @@
+<|im_start|>user
+Hello, how are you?<|im_end|>
+<|im_start|>assistant
diff --git a/template/testdata/chatml.gotmpl/user-assistant-user b/template/testdata/chatml.gotmpl/user-assistant-user
new file mode 100644
index 000000000..a7cba4de0
--- /dev/null
+++ b/template/testdata/chatml.gotmpl/user-assistant-user
@@ -0,0 +1,7 @@
+<|im_start|>user
+Hello, how are you?<|im_end|>
+<|im_start|>assistant
+I'm doing great. How can I help you today?<|im_end|>
+<|im_start|>user
+I'd like to show off how chat templating works!<|im_end|>
+<|im_start|>assistant
diff --git a/template/testdata/chatqa.gotmpl/system-user-assistant-user b/template/testdata/chatqa.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..98fd59bfa
--- /dev/null
+++ b/template/testdata/chatqa.gotmpl/system-user-assistant-user
@@ -0,0 +1,9 @@
+System: You are a helpful assistant.
+
+User: Hello, how are you?
+
+Assistant: I'm doing great. How can I help you today?
+
+User: I'd like to show off how chat templating works!
+
+Assistant:
\ No newline at end of file
diff --git a/template/testdata/chatqa.gotmpl/user b/template/testdata/chatqa.gotmpl/user
new file mode 100644
index 000000000..9e7cf702d
--- /dev/null
+++ b/template/testdata/chatqa.gotmpl/user
@@ -0,0 +1,3 @@
+User: Hello, how are you?
+
+Assistant:
\ No newline at end of file
diff --git a/template/testdata/chatqa.gotmpl/user-assistant-user b/template/testdata/chatqa.gotmpl/user-assistant-user
new file mode 100644
index 000000000..405bbe12c
--- /dev/null
+++ b/template/testdata/chatqa.gotmpl/user-assistant-user
@@ -0,0 +1,7 @@
+User: Hello, how are you?
+
+Assistant: I'm doing great. How can I help you today?
+
+User: I'd like to show off how chat templating works!
+
+Assistant:
\ No newline at end of file
diff --git a/template/testdata/codellama-70b-instruct.gotmpl/system-user-assistant-user b/template/testdata/codellama-70b-instruct.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..fdd0fc8b4
--- /dev/null
+++ b/template/testdata/codellama-70b-instruct.gotmpl/system-user-assistant-user
@@ -0,0 +1,11 @@
+Source: system
+
+ You are a helpful assistant. <step> Source: user
+
+ Hello, how are you? <step> Source: assistant
+
+ I'm doing great. How can I help you today? <step> Source: user
+
+ I'd like to show off how chat templating works! <step> Source: assistant
+Destination: user
+
diff --git a/template/testdata/codellama-70b-instruct.gotmpl/user b/template/testdata/codellama-70b-instruct.gotmpl/user
new file mode 100644
index 000000000..9e7174a84
--- /dev/null
+++ b/template/testdata/codellama-70b-instruct.gotmpl/user
@@ -0,0 +1,5 @@
+Source: user
+
+ Hello, how are you? <step> Source: assistant
+Destination: user
+
diff --git a/template/testdata/codellama-70b-instruct.gotmpl/user-assistant-user b/template/testdata/codellama-70b-instruct.gotmpl/user-assistant-user
new file mode 100644
index 000000000..b4ba1736b
--- /dev/null
+++ b/template/testdata/codellama-70b-instruct.gotmpl/user-assistant-user
@@ -0,0 +1,9 @@
+Source: user
+
+ Hello, how are you? <step> Source: assistant
+
+ I'm doing great. How can I help you today? <step> Source: user
+
+ I'd like to show off how chat templating works! <step> Source: assistant
+Destination: user
+
diff --git a/template/testdata/falcon-instruct.gotmpl/system-user-assistant-user b/template/testdata/falcon-instruct.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..16e45e5b6
--- /dev/null
+++ b/template/testdata/falcon-instruct.gotmpl/system-user-assistant-user
@@ -0,0 +1,8 @@
+System: You are a helpful assistant.
+User:
+Hello, how are you?
+Falcon:
+I'm doing great. How can I help you today?
+User:
+I'd like to show off how chat templating works!
+Falcon:
diff --git a/template/testdata/falcon-instruct.gotmpl/user b/template/testdata/falcon-instruct.gotmpl/user
new file mode 100644
index 000000000..110831a2c
--- /dev/null
+++ b/template/testdata/falcon-instruct.gotmpl/user
@@ -0,0 +1,3 @@
+User:
+Hello, how are you?
+Falcon:
diff --git a/template/testdata/falcon-instruct.gotmpl/user-assistant-user b/template/testdata/falcon-instruct.gotmpl/user-assistant-user
new file mode 100644
index 000000000..b49639ea5
--- /dev/null
+++ b/template/testdata/falcon-instruct.gotmpl/user-assistant-user
@@ -0,0 +1,7 @@
+User:
+Hello, how are you?
+Falcon:
+I'm doing great. How can I help you today?
+User:
+I'd like to show off how chat templating works!
+Falcon:
diff --git a/template/testdata/gemma-instruct.gotmpl/system-user-assistant-user b/template/testdata/gemma-instruct.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..5f6c37324
--- /dev/null
+++ b/template/testdata/gemma-instruct.gotmpl/system-user-assistant-user
@@ -0,0 +1,8 @@
+<start_of_turn>user
+You are a helpful assistant.
+Hello, how are you?<end_of_turn>
+<start_of_turn>model
+I'm doing great. How can I help you today?<end_of_turn>
+<start_of_turn>user
+I'd like to show off how chat templating works!<end_of_turn>
+<start_of_turn>model
diff --git a/template/testdata/gemma-instruct.gotmpl/user b/template/testdata/gemma-instruct.gotmpl/user
new file mode 100644
index 000000000..dc8b30b68
--- /dev/null
+++ b/template/testdata/gemma-instruct.gotmpl/user
@@ -0,0 +1,3 @@
+<start_of_turn>user
+Hello, how are you?<end_of_turn>
+<start_of_turn>model
diff --git a/template/testdata/gemma-instruct.gotmpl/user-assistant-user b/template/testdata/gemma-instruct.gotmpl/user-assistant-user
new file mode 100644
index 000000000..1185924b0
--- /dev/null
+++ b/template/testdata/gemma-instruct.gotmpl/user-assistant-user
@@ -0,0 +1,7 @@
+<start_of_turn>user
+Hello, how are you?<end_of_turn>
+<start_of_turn>model
+I'm doing great. How can I help you today?<end_of_turn>
+<start_of_turn>user
+I'd like to show off how chat templating works!<end_of_turn>
+<start_of_turn>model
diff --git a/template/testdata/granite-instruct.gotmpl/system-user-assistant-user b/template/testdata/granite-instruct.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..a732a77fb
--- /dev/null
+++ b/template/testdata/granite-instruct.gotmpl/system-user-assistant-user
@@ -0,0 +1,13 @@
+System:
+You are a helpful assistant.
+
+Question:
+Hello, how are you?
+
+Answer:
+I'm doing great. How can I help you today?
+
+Question:
+I'd like to show off how chat templating works!
+
+Answer:
diff --git a/template/testdata/granite-instruct.gotmpl/user b/template/testdata/granite-instruct.gotmpl/user
new file mode 100644
index 000000000..7abd2ea35
--- /dev/null
+++ b/template/testdata/granite-instruct.gotmpl/user
@@ -0,0 +1,4 @@
+Question:
+Hello, how are you?
+
+Answer:
diff --git a/template/testdata/granite-instruct.gotmpl/user-assistant-user b/template/testdata/granite-instruct.gotmpl/user-assistant-user
new file mode 100644
index 000000000..da5e43eae
--- /dev/null
+++ b/template/testdata/granite-instruct.gotmpl/user-assistant-user
@@ -0,0 +1,10 @@
+Question:
+Hello, how are you?
+
+Answer:
+I'm doing great. How can I help you today?
+
+Question:
+I'd like to show off how chat templating works!
+
+Answer:
diff --git a/template/testdata/llama2-chat.gotmpl/system-user-assistant-user b/template/testdata/llama2-chat.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..fc2679bf0
--- /dev/null
+++ b/template/testdata/llama2-chat.gotmpl/system-user-assistant-user
@@ -0,0 +1,5 @@
+[INST] <<SYS>>
+You are a helpful assistant.
+<</SYS>>
+
+Hello, how are you? [/INST] I'm doing great. How can I help you today?</s><s>[INST] I'd like to show off how chat templating works! [/INST]
\ No newline at end of file
diff --git a/template/testdata/llama2-chat.gotmpl/user b/template/testdata/llama2-chat.gotmpl/user
new file mode 100644
index 000000000..ceef9bdbb
--- /dev/null
+++ b/template/testdata/llama2-chat.gotmpl/user
@@ -0,0 +1,3 @@
+[INST] <<SYS>><</SYS>>
+
+Hello, how are you? [/INST]
\ No newline at end of file
diff --git a/template/testdata/llama2-chat.gotmpl/user-assistant-user b/template/testdata/llama2-chat.gotmpl/user-assistant-user
new file mode 100644
index 000000000..42b4c5294
--- /dev/null
+++ b/template/testdata/llama2-chat.gotmpl/user-assistant-user
@@ -0,0 +1,3 @@
+[INST] <<SYS>><</SYS>>
+
+Hello, how are you? [/INST] I'm doing great. How can I help you today?</s><s>[INST] I'd like to show off how chat templating works! [/INST]
\ No newline at end of file
diff --git a/template/testdata/llama3-instruct.gotmpl/system-user-assistant-user b/template/testdata/llama3-instruct.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..6740bcb4d
--- /dev/null
+++ b/template/testdata/llama3-instruct.gotmpl/system-user-assistant-user
@@ -0,0 +1,10 @@
+<|start_header_id|>system<|end_header_id|>
+
+You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+Hello, how are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+I'm doing great. How can I help you today?<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+I'd like to show off how chat templating works!<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
diff --git a/template/testdata/llama3-instruct.gotmpl/user b/template/testdata/llama3-instruct.gotmpl/user
new file mode 100644
index 000000000..470aa028f
--- /dev/null
+++ b/template/testdata/llama3-instruct.gotmpl/user
@@ -0,0 +1,4 @@
+<|start_header_id|>user<|end_header_id|>
+
+Hello, how are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
diff --git a/template/testdata/llama3-instruct.gotmpl/user-assistant-user b/template/testdata/llama3-instruct.gotmpl/user-assistant-user
new file mode 100644
index 000000000..6dd768af5
--- /dev/null
+++ b/template/testdata/llama3-instruct.gotmpl/user-assistant-user
@@ -0,0 +1,8 @@
+<|start_header_id|>user<|end_header_id|>
+
+Hello, how are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+I'm doing great. How can I help you today?<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+I'd like to show off how chat templating works!<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
diff --git a/template/testdata/magicoder.gotmpl/system-user-assistant-user b/template/testdata/magicoder.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..c966a861d
--- /dev/null
+++ b/template/testdata/magicoder.gotmpl/system-user-assistant-user
@@ -0,0 +1,12 @@
+You are a helpful assistant.
+
+@@ Instruction
+Hello, how are you?
+
+@@ Response
+I'm doing great. How can I help you today?
+
+@@ Instruction
+I'd like to show off how chat templating works!
+
+@@ Response
diff --git a/template/testdata/magicoder.gotmpl/user b/template/testdata/magicoder.gotmpl/user
new file mode 100644
index 000000000..ccfb02bd2
--- /dev/null
+++ b/template/testdata/magicoder.gotmpl/user
@@ -0,0 +1,4 @@
+@@ Instruction
+Hello, how are you?
+
+@@ Response
diff --git a/template/testdata/magicoder.gotmpl/user-assistant-user b/template/testdata/magicoder.gotmpl/user-assistant-user
new file mode 100644
index 000000000..3aea6dab9
--- /dev/null
+++ b/template/testdata/magicoder.gotmpl/user-assistant-user
@@ -0,0 +1,10 @@
+@@ Instruction
+Hello, how are you?
+
+@@ Response
+I'm doing great. How can I help you today?
+
+@@ Instruction
+I'd like to show off how chat templating works!
+
+@@ Response
diff --git a/template/testdata/mistral-instruct.gotmpl/system-user-assistant-user b/template/testdata/mistral-instruct.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..b6b4bf93e
--- /dev/null
+++ b/template/testdata/mistral-instruct.gotmpl/system-user-assistant-user
@@ -0,0 +1,2 @@
+[INST] Hello, how are you?[/INST] I'm doing great. How can I help you today?</s>[INST] You are a helpful assistant.
+I'd like to show off how chat templating works![/INST]
\ No newline at end of file
diff --git a/template/testdata/mistral-instruct.gotmpl/user b/template/testdata/mistral-instruct.gotmpl/user
new file mode 100644
index 000000000..b04871e5d
--- /dev/null
+++ b/template/testdata/mistral-instruct.gotmpl/user
@@ -0,0 +1 @@
+[INST] Hello, how are you?[/INST]
\ No newline at end of file
diff --git a/template/testdata/mistral-instruct.gotmpl/user-assistant-user b/template/testdata/mistral-instruct.gotmpl/user-assistant-user
new file mode 100644
index 000000000..b473e0df0
--- /dev/null
+++ b/template/testdata/mistral-instruct.gotmpl/user-assistant-user
@@ -0,0 +1 @@
+[INST] Hello, how are you?[/INST] I'm doing great. How can I help you today?</s>[INST] I'd like to show off how chat templating works![/INST]
\ No newline at end of file
diff --git a/template/testdata/openchat.gotmpl/system-user-assistant-user b/template/testdata/openchat.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..1214c1264
--- /dev/null
+++ b/template/testdata/openchat.gotmpl/system-user-assistant-user
@@ -0,0 +1 @@
+GPT Correct System: You are a helpful assistant.<|end_of_turn|>GPT Correct User: Hello, how are you?<|end_of_turn|>GPT Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT Correct User: I'd like to show off how chat templating works!<|end_of_turn|>GPT Correct Assistant:
\ No newline at end of file
diff --git a/template/testdata/openchat.gotmpl/user b/template/testdata/openchat.gotmpl/user
new file mode 100644
index 000000000..611daa83e
--- /dev/null
+++ b/template/testdata/openchat.gotmpl/user
@@ -0,0 +1 @@
+GPT Correct User: Hello, how are you?<|end_of_turn|>GPT Correct Assistant:
\ No newline at end of file
diff --git a/template/testdata/openchat.gotmpl/user-assistant-user b/template/testdata/openchat.gotmpl/user-assistant-user
new file mode 100644
index 000000000..f97b02b9c
--- /dev/null
+++ b/template/testdata/openchat.gotmpl/user-assistant-user
@@ -0,0 +1 @@
+GPT Correct User: Hello, how are you?<|end_of_turn|>GPT Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT Correct User: I'd like to show off how chat templating works!<|end_of_turn|>GPT Correct Assistant:
\ No newline at end of file
diff --git a/template/testdata/phi-3.gotmpl/system-user-assistant-user b/template/testdata/phi-3.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..6109a9a24
--- /dev/null
+++ b/template/testdata/phi-3.gotmpl/system-user-assistant-user
@@ -0,0 +1,9 @@
+<|system|>
+You are a helpful assistant.<|end|>
+<|user|>
+Hello, how are you?<|end|>
+<|assistant|>
+I'm doing great. How can I help you today?<|end|>
+<|user|>
+I'd like to show off how chat templating works!<|end|>
+<|assistant|>
diff --git a/template/testdata/phi-3.gotmpl/user b/template/testdata/phi-3.gotmpl/user
new file mode 100644
index 000000000..feb96e7c9
--- /dev/null
+++ b/template/testdata/phi-3.gotmpl/user
@@ -0,0 +1,3 @@
+<|user|>
+Hello, how are you?<|end|>
+<|assistant|>
diff --git a/template/testdata/phi-3.gotmpl/user-assistant-user b/template/testdata/phi-3.gotmpl/user-assistant-user
new file mode 100644
index 000000000..db79d01c1
--- /dev/null
+++ b/template/testdata/phi-3.gotmpl/user-assistant-user
@@ -0,0 +1,7 @@
+<|user|>
+Hello, how are you?<|end|>
+<|assistant|>
+I'm doing great. How can I help you today?<|end|>
+<|user|>
+I'd like to show off how chat templating works!<|end|>
+<|assistant|>
diff --git a/template/testdata/solar-instruct.gotmpl/system-user-assistant-user b/template/testdata/solar-instruct.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..28c1730ab
--- /dev/null
+++ b/template/testdata/solar-instruct.gotmpl/system-user-assistant-user
@@ -0,0 +1,13 @@
+### System:
+You are a helpful assistant.
+
+### User:
+Hello, how are you?
+
+### Assistant:
+I'm doing great. How can I help you today?</s>
+
+### User:
+I'd like to show off how chat templating works!
+
+### Assistant:
diff --git a/template/testdata/solar-instruct.gotmpl/user b/template/testdata/solar-instruct.gotmpl/user
new file mode 100644
index 000000000..3a43382af
--- /dev/null
+++ b/template/testdata/solar-instruct.gotmpl/user
@@ -0,0 +1,4 @@
+### User:
+Hello, how are you?
+
+### Assistant:
diff --git a/template/testdata/solar-instruct.gotmpl/user-assistant-user b/template/testdata/solar-instruct.gotmpl/user-assistant-user
new file mode 100644
index 000000000..8553e73ba
--- /dev/null
+++ b/template/testdata/solar-instruct.gotmpl/user-assistant-user
@@ -0,0 +1,10 @@
+### User:
+Hello, how are you?
+
+### Assistant:
+I'm doing great. How can I help you today?</s>
+
+### User:
+I'd like to show off how chat templating works!
+
+### Assistant:
diff --git a/template/testdata/starcoder2-instruct.gotmpl/system-user-assistant-user b/template/testdata/starcoder2-instruct.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..5b718b3ec
--- /dev/null
+++ b/template/testdata/starcoder2-instruct.gotmpl/system-user-assistant-user
@@ -0,0 +1,12 @@
+You are a helpful assistant.
+
+### Instruction
+Hello, how are you?
+
+### Response
+I'm doing great. How can I help you today?<|endoftext|>
+
+### Instruction
+I'd like to show off how chat templating works!
+
+### Response
diff --git a/template/testdata/starcoder2-instruct.gotmpl/user b/template/testdata/starcoder2-instruct.gotmpl/user
new file mode 100644
index 000000000..11b0be1fe
--- /dev/null
+++ b/template/testdata/starcoder2-instruct.gotmpl/user
@@ -0,0 +1,4 @@
+### Instruction
+Hello, how are you?
+
+### Response
diff --git a/template/testdata/starcoder2-instruct.gotmpl/user-assistant-user b/template/testdata/starcoder2-instruct.gotmpl/user-assistant-user
new file mode 100644
index 000000000..d99feabb0
--- /dev/null
+++ b/template/testdata/starcoder2-instruct.gotmpl/user-assistant-user
@@ -0,0 +1,10 @@
+### Instruction
+Hello, how are you?
+
+### Response
+I'm doing great. How can I help you today?<|endoftext|>
+
+### Instruction
+I'd like to show off how chat templating works!
+
+### Response
diff --git a/template/testdata/vicuna.gotmpl/system-user-assistant-user b/template/testdata/vicuna.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..50d2f92c1
--- /dev/null
+++ b/template/testdata/vicuna.gotmpl/system-user-assistant-user
@@ -0,0 +1,6 @@
+You are a helpful assistant.
+
+USER: Hello, how are you?
+ASSISTANT: I'm doing great. How can I help you today?</s>
+USER: I'd like to show off how chat templating works!
+ASSISTANT:
\ No newline at end of file
diff --git a/template/testdata/vicuna.gotmpl/user b/template/testdata/vicuna.gotmpl/user
new file mode 100644
index 000000000..cbe5ef709
--- /dev/null
+++ b/template/testdata/vicuna.gotmpl/user
@@ -0,0 +1,2 @@
+USER: Hello, how are you?
+ASSISTANT:
\ No newline at end of file
diff --git a/template/testdata/vicuna.gotmpl/user-assistant-user b/template/testdata/vicuna.gotmpl/user-assistant-user
new file mode 100644
index 000000000..9172547e3
--- /dev/null
+++ b/template/testdata/vicuna.gotmpl/user-assistant-user
@@ -0,0 +1,4 @@
+USER: Hello, how are you?
+ASSISTANT: I'm doing great. How can I help you today?</s>
+USER: I'd like to show off how chat templating works!
+ASSISTANT:
\ No newline at end of file
diff --git a/template/testdata/zephyr.gotmpl/system-user-assistant-user b/template/testdata/zephyr.gotmpl/system-user-assistant-user
new file mode 100644
index 000000000..03d43fc34
--- /dev/null
+++ b/template/testdata/zephyr.gotmpl/system-user-assistant-user
@@ -0,0 +1,9 @@
+<|system|>
+You are a helpful assistant.</s>
+<|user|>
+Hello, how are you?</s>
+<|assistant|>
+I'm doing great. How can I help you today?</s>
+<|user|>
+I'd like to show off how chat templating works!</s>
+<|assistant|>
diff --git a/template/testdata/zephyr.gotmpl/user b/template/testdata/zephyr.gotmpl/user
new file mode 100644
index 000000000..6cefdaa0f
--- /dev/null
+++ b/template/testdata/zephyr.gotmpl/user
@@ -0,0 +1,3 @@
+<|user|>
+Hello, how are you?</s>
+<|assistant|>
diff --git a/template/testdata/zephyr.gotmpl/user-assistant-user b/template/testdata/zephyr.gotmpl/user-assistant-user
new file mode 100644
index 000000000..3937b006a
--- /dev/null
+++ b/template/testdata/zephyr.gotmpl/user-assistant-user
@@ -0,0 +1,7 @@
+<|user|>
+Hello, how are you?</s>
+<|assistant|>
+I'm doing great. How can I help you today?</s>
+<|user|>
+I'd like to show off how chat templating works!</s>
+<|assistant|>
diff --git a/template/vicuna.gotmpl b/template/vicuna.gotmpl
index 174c1a353..2e13e990d 100644
--- a/template/vicuna.gotmpl
+++ b/template/vicuna.gotmpl
@@ -1,3 +1,14 @@
+{{- if .Messages }}
+{{- if .System }}{{ .System }}
+
+{{ end }}
+{{- range .Messages }}
+{{- if eq .Role "user" }}USER: {{ .Content }}
+{{ else if eq .Role "assistant" }}ASSISTANT: {{ .Content }}</s>
+{{ end }}
+{{- end }}ASSISTANT:
+{{- else }}
 {{ if .System }}{{ .System }}
 {{ end }}{{ if .Prompt }}USER: {{ .Prompt }}
-{{ end }}ASSISTANT: {{ .Response }}
\ No newline at end of file
+{{ end }}ASSISTANT: {{ .Response }}
+{{- end }}
\ No newline at end of file
diff --git a/template/zephyr.gotmpl b/template/zephyr.gotmpl
index aac0c7a1f..e66688480 100644
--- a/template/zephyr.gotmpl
+++ b/template/zephyr.gotmpl
@@ -1,6 +1,15 @@
+{{- if .Messages }}
+{{- if .System }}<|system|>
+{{ .System }}</s>
+{{ end }}
+{{- range .Messages }}<|{{ .Role }}|>
+{{ .Content }}</s>
+{{ end }}<|assistant|>
+{{ else }}
 {{ if .System }}<|system|>
 {{ .System }}</s>
 {{ end }}{{ if .Prompt }}<|user|>
 {{ .Prompt }}</s>
 {{ end }}<|assistant|>
-{{ .Response }}</s>
\ No newline at end of file
+{{ .Response }}</s>
+{{- end }}
\ No newline at end of file

From e4ff73297db2f53f1ea4b603df5670c5bde6a944 Mon Sep 17 00:00:00 2001
From: Jeffrey Morgan <jmorganca@gmail.com>
Date: Mon, 8 Jul 2024 22:32:15 -0700
Subject: [PATCH 6/7] server: fix model reloads when setting
 `OLLAMA_NUM_PARALLEL` (#5560)

* server: fix unneeded model reloads when setting `OLLAMA_NUM_PARALLEL`

* remove whitespace change

* undo some changes
---
 server/sched.go | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/server/sched.go b/server/sched.go
index 9dff2ae07..48047bfec 100644
--- a/server/sched.go
+++ b/server/sched.go
@@ -133,10 +133,6 @@ func (s *Scheduler) processPending(ctx context.Context) {
 				numParallel = 1
 				slog.Warn("multimodal models don't support parallel requests yet")
 			}
-			// Keep NumCtx and numParallel in sync
-			if numParallel > 1 {
-				pending.opts.NumCtx = pending.origNumCtx * numParallel
-			}
 
 			for {
 				cpus := s.getCpuFn()
@@ -234,9 +230,10 @@ func (s *Scheduler) processPending(ctx context.Context) {
 						// simplifying assumption of defaultParallel when in CPU mode
 						if numParallel <= 0 {
 							numParallel = defaultParallel
-							pending.opts.NumCtx = pending.origNumCtx * numParallel
 						}
 
+						pending.opts.NumCtx = pending.origNumCtx * numParallel
+
 						if loadedCount == 0 {
 							slog.Debug("cpu mode with first model, loading")
 							s.loadFn(pending, ggml, gpus, numParallel)

From b51e3b63ac7bc995e99f3a8f7c1b507a1f8fb5d9 Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <daniel@ollama.com>
Date: Tue, 9 Jul 2024 11:17:44 -0700
Subject: [PATCH 7/7] Statically link c++ and thread lib

This makes sure we statically link the c++ and thread library on windows
to avoid unnecessary runtime dependencies on non-standard DLLs
---
 .github/workflows/release.yaml | 5 -----
 llm/llm.go                     | 4 ++--
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 0005c69d3..61ca3c433 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -304,11 +304,6 @@ jobs:
           write-host "Installing plugin"
           & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
           write-host "plugin installed"
-      - name: remove unwanted mingw dll.a files
-        run: |
-          Get-ChildItem -Path "C:\mingw64" -Recurse -Filter "libpthread.dll.a" -File | Remove-Item -Force
-          Get-ChildItem -Path "C:\mingw64" -Recurse -Filter "libwinpthread.dll.a" -File | Remove-Item -Force
-          Get-ChildItem -Path "C:\mingw64" -Recurse -Filter "libstdc++.dll.a" -File | Remove-Item -Force
       - uses: actions/setup-go@v5
         with:
           go-version-file: go.mod
diff --git a/llm/llm.go b/llm/llm.go
index 88c0258d6..f2a5e557a 100644
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -4,8 +4,8 @@ package llm
 // #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread
 // #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal
 // #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src
-// #cgo windows,amd64 LDFLAGS: -L${SRCDIR}/build/windows/amd64_static -L${SRCDIR}/build/windows/amd64_static/src -L${SRCDIR}/build/windows/amd64_static/ggml/src
-// #cgo windows,arm64 LDFLAGS: -L${SRCDIR}/build/windows/arm64_static -L${SRCDIR}/build/windows/arm64_static/src -L${SRCDIR}/build/windows/arm64_static/ggml/src
+// #cgo windows,amd64 LDFLAGS: -static-libstdc++ -static-libgcc -static -L${SRCDIR}/build/windows/amd64_static -L${SRCDIR}/build/windows/amd64_static/src -L${SRCDIR}/build/windows/amd64_static/ggml/src
+// #cgo windows,arm64 LDFLAGS: -static-libstdc++ -static-libgcc -static -L${SRCDIR}/build/windows/arm64_static -L${SRCDIR}/build/windows/arm64_static/src -L${SRCDIR}/build/windows/arm64_static/ggml/src
 // #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/linux/x86_64_static -L${SRCDIR}/build/linux/x86_64_static/src -L${SRCDIR}/build/linux/x86_64_static/ggml/src
 // #cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/linux/arm64_static -L${SRCDIR}/build/linux/arm64_static/src -L${SRCDIR}/build/linux/arm64_static/ggml/src
 // #include <stdlib.h>