OpenAI Delete Endpoint

2024-06-14 16:28:22 -07:00
304 changed files with 5106 additions and 13922 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1 @@
 llm/ext_server/* linguist-vendored
 * text eol=lf
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -31,7 +31,7 @@ jobs:
          security set-keychain-settings -lut 3600 build.keychain
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: Build Darwin
        env:
@@ -87,7 +87,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - run: go get ./...
      - run: |
@@ -141,13 +141,13 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: 'Install ROCm'
        run: |
          $ErrorActionPreference = "Stop"
          write-host "downloading AMD HIP Installer"
-          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
+          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
          write-host "Installing AMD HIP"
          Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
          write-host "Completed AMD HIP"
@@ -218,7 +218,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: 'Install CUDA'
        run: |
@@ -306,7 +306,7 @@ jobs:
          write-host "plugin installed"
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - run: go get
      - uses: actions/download-artifact@v4
@@ -437,7 +437,6 @@ jobs:
    env:
      OLLAMA_SKIP_IMAGE_BUILD: '1'
      PUSH: '1'
      GH_TOKEN: ${{ github.token }}
    steps:
      - uses: actions/checkout@v4
      - name: Set Version
@@ -461,20 +460,15 @@ jobs:
          ls -lh dist/
          (cd dist; sha256sum * > sha256sum.txt)
          cat dist/sha256sum.txt
-      - name: Create or update Release
+      - uses: ncipollo/release-action@v1
-        run: |
+        with:
-          echo "Looking for existing release for ${{ env.RELEASE_VERSION }}"
+          name: ${{ env.RELEASE_VERSION }}
-          OLD_TAG=$(gh release ls --json name,tagName | jq -r ".[] | select(.name == \"${{ env.RELEASE_VERSION }}\") | .tagName")
+          allowUpdates: true
-          if [ -n "$OLD_TAG" ]; then
+          artifacts: 'dist/*'
-            echo "Updating release ${{ env.RELEASE_VERSION }} to point to new tag ${GITHUB_REF_NAME}"
+          draft: true
-            gh release edit ${OLD_TAG} --tag ${GITHUB_REF_NAME}
+          prerelease: true
-          else
+          omitBodyDuringUpdate: true
-            echo "Creating new release ${{ env.RELEASE_VERSION }} pointing to tag ${GITHUB_REF_NAME}"
+          generateReleaseNotes: true
-            gh release create ${GITHUB_REF_NAME} \
+          omitDraftDuringUpdate: true
-              --title ${{ env.RELEASE_VERSION }} \
+          omitPrereleaseDuringUpdate: true
-              --draft \
+          replacesArtifacts: true
              --generate-notes \
              --prerelease
          fi
          echo "Uploading artifacts for tag ${GITHUB_REF_NAME}"
          gh release upload ${GITHUB_REF_NAME} dist/* --clobber
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -58,12 +58,11 @@ jobs:
    runs-on: ${{ matrix.os }}
    env:
      GOARCH: ${{ matrix.arch }}
      CGO_ENABLED: '1'
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - run: go get ./...
      - run: |
@@ -80,7 +79,6 @@ jobs:
      - run: go generate -x ./...
        if: ${{ ! startsWith(matrix.os, 'windows-') }}
        name: 'Unix Go Generate'
      - run: go build .
      - uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
@@ -126,7 +124,7 @@ jobs:
    strategy:
      matrix:
        rocm-version:
-          - '6.1.2'
+          - '6.0.2'
    runs-on: linux
    container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
    steps:
@@ -163,13 +161,13 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: 'Install ROCm'
        run: |
          $ErrorActionPreference = "Stop"
          write-host "downloading AMD HIP Installer"
-          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
+          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
          write-host "Installing AMD HIP"
          Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
          write-host "Completed AMD HIP"
@@ -200,7 +198,7 @@ jobs:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - name: 'Install CUDA'
        run: |
@@ -255,7 +253,7 @@ jobs:
          submodules: recursive
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: false
      - run: |
          case ${{ matrix.arch }} in
@@ -273,7 +271,7 @@ jobs:
        if: ${{ startsWith(matrix.os, 'macos-') }}
      - uses: golangci/golangci-lint-action@v6
        with:
-          args: --timeout 8m0s -v
+          args: --timeout 8m0s -v ${{ startsWith(matrix.os, 'windows-') && '' || '--disable gofmt --disable goimports' }}
  test:
    strategy:
      matrix:
@@ -297,7 +295,7 @@ jobs:
          submodules: recursive
      - uses: actions/setup-go@v5
        with:
-          go-version: "stable"
+          go-version-file: go.mod
          cache: true
      - run: |
          case ${{ matrix.arch }} in
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -7,32 +7,22 @@ linters:
    - bodyclose
    - containedctx
    - contextcheck
    - errcheck
    - exportloopref
    - gci
    - gocheckcompilerdirectives
-    - gofmt
+    # conditionally enable this on linux/macos
-    - gofumpt
+    # - gofmt
-    - gosimple
+    # - goimports
    - govet
    - ineffassign
    - intrange
    - makezero
    - misspell
    - nilerr
    - nolintlint
    - nosprintfhostport
    - staticcheck
    - tenv
    - testifylint
    - unconvert
    - unused
    - usestdlibvars
    - wastedassign
    - whitespace
-linters-settings:
+    - usestdlibvars
  gci:
    sections: [standard, default, localmodule]
 severity:
  default-severity: error
  rules:
--- a/8
+++ b/8
@@ -1,8 +1,8 @@
-ARG GOLANG_VERSION=1.22.5
+ARG GOLANG_VERSION=1.22.1
 ARG CMAKE_VERSION=3.22.1
 # this CUDA_VERSION corresponds with the one specified in docs/gpu.md
 ARG CUDA_VERSION=11.3.1
-ARG ROCM_VERSION=6.1.2
+ARG ROCM_VERSION=6.0.2
 # Copy the minimal context we need to run the generate scripts
 FROM scratch AS llm-code
@@ -70,12 +70,12 @@ RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh
 FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
 RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh
-FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
+FROM --platform=linux/arm64 centos:7 AS cpu-builder-arm64
 ARG CMAKE_VERSION
 ARG GOLANG_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
+ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
 COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 ARG OLLAMA_CUSTOM_CPU_DEFS
 ARG CGO_CFLAGS
--- a/README.md
+++ b/README.md
@@ -35,10 +35,10 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla
 ## Quickstart
-To run and chat with [Llama 3.1](https://ollama.com/library/llama3.1):
+To run and chat with [Llama 3](https://ollama.com/library/llama3):
 ```
-ollama run llama3.1
+ollama run llama3
 ```
 ## Model library
@@ -49,14 +49,12 @@ Here are some example models that can be downloaded:
 | Model              | Parameters | Size  | Download                       |
 | ------------------ | ---------- | ----- | ------------------------------ |
-| Llama 3.1          | 8B         | 4.7GB | `ollama run llama3.1`          |
+| Llama 3            | 8B         | 4.7GB | `ollama run llama3`            |
-| Llama 3.1          | 70B        | 40GB  | `ollama run llama3.1:70b`      |
+| Llama 3            | 70B        | 40GB  | `ollama run llama3:70b`        |
 | Llama 3.1          | 405B       | 231GB | `ollama run llama3.1:405b`     |
 | Phi 3 Mini         | 3.8B       | 2.3GB | `ollama run phi3`              |
 | Phi 3 Medium       | 14B        | 7.9GB | `ollama run phi3:medium`       |
-| Gemma 2            | 2B         | 1.6GB | `ollama run gemma2:2b`         |
+| Gemma              | 2B         | 1.4GB | `ollama run gemma:2b`          |
-| Gemma 2            | 9B         | 5.5GB | `ollama run gemma2`            |
+| Gemma              | 7B         | 4.8GB | `ollama run gemma:7b`          |
 | Gemma 2            | 27B        | 16GB  | `ollama run gemma2:27b`        |
 | Mistral            | 7B         | 4.1GB | `ollama run mistral`           |
 | Moondream 2        | 1.4B       | 829MB | `ollama run moondream`         |
 | Neural Chat        | 7B         | 4.1GB | `ollama run neural-chat`       |
@@ -66,8 +64,7 @@ Here are some example models that can be downloaded:
 | LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
 | Solar              | 10.7B      | 6.1GB | `ollama run solar`             |
-> [!NOTE]
+> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
 > You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
 ## Customize a model
@@ -99,16 +96,16 @@ See the [guide](docs/import.md) on importing models for more information.
 ### Customize a prompt
-Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.1` model:
+Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3` model:
 ```
-ollama pull llama3.1
+ollama pull llama3
 ```
 Create a `Modelfile`:
 ```
-FROM llama3.1
+FROM llama3
 # set the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1
@@ -143,7 +140,7 @@ ollama create mymodel -f ./Modelfile
 ### Pull a model
 ```
-ollama pull llama3.1
+ollama pull llama3
 ```
 > This command can also be used to update a local model. Only the diff will be pulled.
@@ -151,13 +148,13 @@ ollama pull llama3.1
 ### Remove a model
 ```
-ollama rm llama3.1
+ollama rm llama3
 ```
 ### Copy a model
 ```
-ollama cp llama3.1 my-model
+ollama cp llama3 my-model
 ```
 ### Multiline input
@@ -174,23 +171,17 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
 ### Multimodal models
 ```
-ollama run llava "What's in this image? /Users/jmorgan/Desktop/smile.png"
+>>> What's in this image? /Users/jmorgan/Desktop/smile.png
 The image features a yellow smiley face, which is likely the central focus of the picture.
 ```
 ### Pass the prompt as an argument
 ```
-$ ollama run llama3.1 "Summarize this file: $(cat README.md)"
+$ ollama run llama3 "Summarize this file: $(cat README.md)"
 Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
 ```
 ### Show model information
 ```
 ollama show llama3.1
 ```
 ### List models on your computer
 ```
@@ -216,7 +207,7 @@ Next, start the server:
 Finally, in a separate shell, run a model:
 ```
-./ollama run llama3.1
+./ollama run llama3
 ```
 ## REST API
@@ -227,7 +218,7 @@ Ollama has a REST API for running and managing models.
 ```
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3.1",
+  "model": "llama3",
  "prompt":"Why is the sky blue?"
 }'
 ```
@@ -236,7 +227,7 @@ curl http://localhost:11434/api/generate -d '{
 ```
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.1",
+  "model": "llama3",
  "messages": [
    { "role": "user", "content": "why is the sky blue?" }
  ]
@@ -295,13 +286,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
 - [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
 - [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
 - [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
 - [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
 - [AI Studio](https://github.com/MindWorkAI/AI-Studio)
 - [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
 - [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
 - [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
 - [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
 ### Terminal
@@ -340,7 +324,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 ### Libraries
 - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
 - [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama)
 - [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
 - [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
 - [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
@@ -394,7 +377,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
 - [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
 - [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face)
+- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
 - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
 - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,25 +0,0 @@
 # Security
 The Ollama maintainer team takes security seriously and will actively work to resolve security issues.
 ## Reporting a vulnerability
 If you discover a security vulnerability, please do not open a public issue. Instead, please report it by emailing hello@ollama.com. We ask that you give us sufficient time to investigate and address the vulnerability before disclosing it publicly.
 Please include the following details in your report:
 - A description of the vulnerability
 - Steps to reproduce the issue
 - Your assessment of the potential impact
 - Any possible mitigations
 ## Security best practices
 While the maintainer team does their best to secure Ollama, users are encouraged to implement their own security best practices, such as:
 - Regularly updating to the latest version of Ollama
 - Securing access to hosted instances of Ollama
 - Monitoring systems for unusual activity
 ## Contact
 For any other questions or concerns related to security, please contact us at hello@ollama.com
--- a/api/client.go
+++ b/api/client.go
@@ -18,9 +18,9 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"net"
 	"net/http"
 	"net/url"
 	"runtime"
@@ -63,8 +63,13 @@ func checkError(resp *http.Response, body []byte) error {
 // If the variable is not specified, a default ollama host and port will be
 // used.
 func ClientFromEnvironment() (*Client, error) {
 	ollamaHost := envconfig.Host
 	return &Client{
-		base: envconfig.Host(),
+		base: &url.URL{
 			Scheme: ollamaHost.Scheme,
 			Host:   net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
 		},
 		http: http.DefaultClient,
 	}, nil
 }
@@ -173,7 +178,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 		}
 		if errorResponse.Error != "" {
-			return errors.New(errorResponse.Error)
+			return fmt.Errorf(errorResponse.Error)
 		}
 		if response.StatusCode >= http.StatusBadRequest {
@@ -342,16 +347,7 @@ func (c *Client) Heartbeat(ctx context.Context) error {
 	return nil
 }
-// Embed generates embeddings from a model.
+// Embeddings generates embeddings from a model.
 func (c *Client) Embed(ctx context.Context, req *EmbedRequest) (*EmbedResponse, error) {
 	var resp EmbedResponse
 	if err := c.do(ctx, http.MethodPost, "/api/embed", req, &resp); err != nil {
 		return nil, err
 	}
 	return &resp, nil
 }
 // Embeddings generates an embedding from a model.
 func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error) {
 	var resp EmbeddingResponse
 	if err := c.do(ctx, http.MethodPost, "/api/embeddings", req, &resp); err != nil {
--- a/api/client_test.go
+++ b/api/client_test.go
@@ -2,6 +2,8 @@ package api
 import (
 	"testing"
 	"github.com/ollama/ollama/envconfig"
 )
 func TestClientFromEnvironment(t *testing.T) {
@@ -31,6 +33,7 @@ func TestClientFromEnvironment(t *testing.T) {
 	for k, v := range testCases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_HOST", v.value)
 			envconfig.LoadConfig()
 			client, err := ClientFromEnvironment()
 			if err != v.err {
--- a/api/types.go
+++ b/api/types.go
@@ -47,9 +47,6 @@ type GenerateRequest struct {
 	// Prompt is the textual prompt to send to the model.
 	Prompt string `json:"prompt"`
 	// Suffix is the text that comes after the inserted text.
 	Suffix string `json:"suffix"`
 	// System overrides the model's default system message/prompt.
 	System string `json:"system"`
@@ -100,85 +97,17 @@ type ChatRequest struct {
 	// followin the request.
 	KeepAlive *Duration `json:"keep_alive,omitempty"`
 	// Tools is an optional list of tools the model has access to.
 	Tools `json:"tools,omitempty"`
 	// Options lists model-specific options.
 	Options map[string]interface{} `json:"options"`
 }
 type Tools []Tool
 func (t Tools) String() string {
 	bts, _ := json.Marshal(t)
 	return string(bts)
 }
 func (t Tool) String() string {
 	bts, _ := json.Marshal(t)
 	return string(bts)
 }
 // Message is a single message in a chat sequence. The message contains the
 // role ("system", "user", or "assistant"), the content and an optional list
 // of images.
 type Message struct {
-	Role      string      `json:"role"`
+	Role    string      `json:"role"`
-	Content   string      `json:"content"`
+	Content string      `json:"content"`
-	Images    []ImageData `json:"images,omitempty"`
+	Images  []ImageData `json:"images,omitempty"`
 	ToolCalls []ToolCall  `json:"tool_calls,omitempty"`
 }
 func (m *Message) UnmarshalJSON(b []byte) error {
 	type Alias Message
 	var a Alias
 	if err := json.Unmarshal(b, &a); err != nil {
 		return err
 	}
 	*m = Message(a)
 	m.Role = strings.ToLower(m.Role)
 	return nil
 }
 type ToolCall struct {
 	Function ToolCallFunction `json:"function"`
 }
 type ToolCallFunction struct {
 	Name      string                    `json:"name"`
 	Arguments ToolCallFunctionArguments `json:"arguments"`
 }
 type ToolCallFunctionArguments map[string]any
 func (t *ToolCallFunctionArguments) String() string {
 	bts, _ := json.Marshal(t)
 	return string(bts)
 }
 type Tool struct {
 	Type     string       `json:"type"`
 	Function ToolFunction `json:"function"`
 }
 type ToolFunction struct {
 	Name        string `json:"name"`
 	Description string `json:"description"`
 	Parameters  struct {
 		Type       string   `json:"type"`
 		Required   []string `json:"required"`
 		Properties map[string]struct {
 			Type        string   `json:"type"`
 			Description string   `json:"description"`
 			Enum        []string `json:"enum,omitempty"`
 		} `json:"properties"`
 	} `json:"parameters"`
 }
 func (t *ToolFunction) String() string {
 	bts, _ := json.Marshal(t)
 	return string(bts)
 }
 // ChatResponse is the response returned by [Client.Chat]. Its fields are
@@ -214,7 +143,6 @@ type Options struct {
 	NumPredict       int      `json:"num_predict,omitempty"`
 	TopK             int      `json:"top_k,omitempty"`
 	TopP             float32  `json:"top_p,omitempty"`
 	MinP             float32  `json:"min_p,omitempty"`
 	TFSZ             float32  `json:"tfs_z,omitempty"`
 	TypicalP         float32  `json:"typical_p,omitempty"`
 	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
@@ -231,45 +159,18 @@ type Options struct {
 // Runner options which must be set when the model is loaded into memory
 type Runner struct {
-	NumCtx    int   `json:"num_ctx,omitempty"`
+	UseNUMA   bool `json:"numa,omitempty"`
-	NumBatch  int   `json:"num_batch,omitempty"`
+	NumCtx    int  `json:"num_ctx,omitempty"`
-	NumGPU    int   `json:"num_gpu,omitempty"`
+	NumBatch  int  `json:"num_batch,omitempty"`
-	MainGPU   int   `json:"main_gpu,omitempty"`
+	NumGPU    int  `json:"num_gpu,omitempty"`
-	LowVRAM   bool  `json:"low_vram,omitempty"`
+	MainGPU   int  `json:"main_gpu,omitempty"`
-	F16KV     bool  `json:"f16_kv,omitempty"`
+	LowVRAM   bool `json:"low_vram,omitempty"`
-	LogitsAll bool  `json:"logits_all,omitempty"`
+	F16KV     bool `json:"f16_kv,omitempty"`
-	VocabOnly bool  `json:"vocab_only,omitempty"`
+	LogitsAll bool `json:"logits_all,omitempty"`
-	UseMMap   *bool `json:"use_mmap,omitempty"`
+	VocabOnly bool `json:"vocab_only,omitempty"`
-	UseMLock  bool  `json:"use_mlock,omitempty"`
+	UseMMap   bool `json:"use_mmap,omitempty"`
-	NumThread int   `json:"num_thread,omitempty"`
+	UseMLock  bool `json:"use_mlock,omitempty"`
-}
+	NumThread int  `json:"num_thread,omitempty"`
 // EmbedRequest is the request passed to [Client.Embed].
 type EmbedRequest struct {
 	// Model is the model name.
 	Model string `json:"model"`
 	// Input is the input to embed.
 	Input any `json:"input"`
 	// KeepAlive controls how long the model will stay loaded in memory following
 	// this request.
 	KeepAlive *Duration `json:"keep_alive,omitempty"`
 	Truncate *bool `json:"truncate,omitempty"`
 	// Options lists model-specific options.
 	Options map[string]interface{} `json:"options"`
 }
 // EmbedResponse is the response from [Client.Embed].
 type EmbedResponse struct {
 	Model      string      `json:"model"`
 	Embeddings [][]float32 `json:"embeddings"`
 	TotalDuration   time.Duration `json:"total_duration,omitempty"`
 	LoadDuration    time.Duration `json:"load_duration,omitempty"`
 	PromptEvalCount int           `json:"prompt_eval_count,omitempty"`
 }
 // EmbeddingRequest is the request passed to [Client.Embeddings].
@@ -318,12 +219,9 @@ type DeleteRequest struct {
 // ShowRequest is the request passed to [Client.Show].
 type ShowRequest struct {
-	Model  string `json:"model"`
+	Model    string `json:"model"`
-	System string `json:"system"`
+	System   string `json:"system"`
 	// Template is deprecated
 	Template string `json:"template"`
 	Verbose  bool   `json:"verbose"`
 	Options map[string]interface{} `json:"options"`
@@ -333,16 +231,13 @@ type ShowRequest struct {
 // ShowResponse is the response returned from [Client.Show].
 type ShowResponse struct {
-	License       string         `json:"license,omitempty"`
+	License    string       `json:"license,omitempty"`
-	Modelfile     string         `json:"modelfile,omitempty"`
+	Modelfile  string       `json:"modelfile,omitempty"`
-	Parameters    string         `json:"parameters,omitempty"`
+	Parameters string       `json:"parameters,omitempty"`
-	Template      string         `json:"template,omitempty"`
+	Template   string       `json:"template,omitempty"`
-	System        string         `json:"system,omitempty"`
+	System     string       `json:"system,omitempty"`
-	Details       ModelDetails   `json:"details,omitempty"`
+	Details    ModelDetails `json:"details,omitempty"`
-	Messages      []Message      `json:"messages,omitempty"`
+	Messages   []Message    `json:"messages,omitempty"`
 	ModelInfo     map[string]any `json:"model_info,omitempty"`
 	ProjectorInfo map[string]any `json:"projector_info,omitempty"`
 	ModifiedAt    time.Time      `json:"modified_at,omitempty"`
 }
 // CopyRequest is the request passed to [Client.Copy].
@@ -415,13 +310,6 @@ type ProcessModelResponse struct {
 	SizeVRAM  int64        `json:"size_vram"`
 }
 type RetrieveModelResponse struct {
 	Id      string `json:"id"`
 	Object  string `json:"object"`
 	Created int64  `json:"created"`
 	OwnedBy string `json:"owned_by"`
 }
 type TokenResponse struct {
 	Token string `json:"token"`
 }
@@ -504,7 +392,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 	for key, val := range m {
 		opt, ok := jsonOpts[key]
 		if !ok {
-			slog.Warn("invalid option provided", "option", key)
+			slog.Warn("invalid option provided", "option", opt.Name)
 			continue
 		}
@@ -560,17 +448,6 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 					slice[i] = str
 				}
 				field.Set(reflect.ValueOf(slice))
 			case reflect.Pointer:
 				var b bool
 				if field.Type() == reflect.TypeOf(&b) {
 					val, ok := val.(bool)
 					if !ok {
 						return fmt.Errorf("option %q must be of type boolean", key)
 					}
 					field.Set(reflect.ValueOf(&val))
 				} else {
 					return fmt.Errorf("unknown type loading config params: %v %v", field.Kind(), field.Type())
 				}
 			default:
 				return fmt.Errorf("unknown type loading config params: %v", field.Kind())
 			}
@@ -613,7 +490,8 @@ func DefaultOptions() Options {
 			LowVRAM:   false,
 			F16KV:     true,
 			UseMLock:  false,
-			UseMMap:   nil,
+			UseMMap:   true,
 			UseNUMA:   false,
 		},
 	}
 }
@@ -709,17 +587,6 @@ func FormatParams(params map[string][]string) (map[string]interface{}, error) {
 				case reflect.Slice:
 					// TODO: only string slices are supported right now
 					out[key] = vals
 				case reflect.Pointer:
 					var b bool
 					if field.Type() == reflect.TypeOf(&b) {
 						boolVal, err := strconv.ParseBool(vals[0])
 						if err != nil {
 							return nil, fmt.Errorf("invalid bool value %s", vals)
 						}
 						out[key] = &boolVal
 					} else {
 						return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
 					}
 				default:
 					return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
 				}
--- a/api/types_test.go
+++ b/api/types_test.go
@@ -2,7 +2,6 @@ package api
 import (
 	"encoding/json"
 	"errors"
 	"math"
 	"testing"
 	"time"
@@ -106,128 +105,3 @@ func TestDurationMarshalUnmarshal(t *testing.T) {
 		})
 	}
 }
 func TestUseMmapParsingFromJSON(t *testing.T) {
 	tr := true
 	fa := false
 	tests := []struct {
 		name string
 		req  string
 		exp  *bool
 	}{
 		{
 			name: "Undefined",
 			req:  `{ }`,
 			exp:  nil,
 		},
 		{
 			name: "True",
 			req:  `{ "use_mmap": true }`,
 			exp:  &tr,
 		},
 		{
 			name: "False",
 			req:  `{ "use_mmap": false }`,
 			exp:  &fa,
 		},
 	}
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
 			var oMap map[string]interface{}
 			err := json.Unmarshal([]byte(test.req), &oMap)
 			require.NoError(t, err)
 			opts := DefaultOptions()
 			err = opts.FromMap(oMap)
 			require.NoError(t, err)
 			assert.Equal(t, test.exp, opts.UseMMap)
 		})
 	}
 }
 func TestUseMmapFormatParams(t *testing.T) {
 	tr := true
 	fa := false
 	tests := []struct {
 		name string
 		req  map[string][]string
 		exp  *bool
 		err  error
 	}{
 		{
 			name: "True",
 			req: map[string][]string{
 				"use_mmap": {"true"},
 			},
 			exp: &tr,
 			err: nil,
 		},
 		{
 			name: "False",
 			req: map[string][]string{
 				"use_mmap": {"false"},
 			},
 			exp: &fa,
 			err: nil,
 		},
 		{
 			name: "Numeric True",
 			req: map[string][]string{
 				"use_mmap": {"1"},
 			},
 			exp: &tr,
 			err: nil,
 		},
 		{
 			name: "Numeric False",
 			req: map[string][]string{
 				"use_mmap": {"0"},
 			},
 			exp: &fa,
 			err: nil,
 		},
 		{
 			name: "invalid string",
 			req: map[string][]string{
 				"use_mmap": {"foo"},
 			},
 			exp: nil,
 			err: errors.New("invalid bool value [foo]"),
 		},
 	}
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
 			resp, err := FormatParams(test.req)
 			require.Equal(t, test.err, err)
 			respVal, ok := resp["use_mmap"]
 			if test.exp != nil {
 				assert.True(t, ok, "resp: %v", resp)
 				assert.Equal(t, *test.exp, *respVal.(*bool))
 			}
 		})
 	}
 }
 func TestMessage_UnmarshalJSON(t *testing.T) {
 	tests := []struct {
 		input    string
 		expected string
 	}{
 		{`{"role": "USER", "content": "Hello!"}`, "user"},
 		{`{"role": "System", "content": "Initialization complete."}`, "system"},
 		{`{"role": "assistant", "content": "How can I help you?"}`, "assistant"},
 		{`{"role": "TOOl", "content": "Access granted."}`, "tool"},
 	}
 	for _, test := range tests {
 		var msg Message
 		if err := json.Unmarshal([]byte(test.input), &msg); err != nil {
 			t.Errorf("Unexpected error: %v", err)
 		}
 		if msg.Role != test.expected {
 			t.Errorf("role not lowercased: got %v, expected %v", msg.Role, test.expected)
 		}
 	}
 }
--- a/app/lifecycle/getstarted_nonwindows.go
+++ b/app/lifecycle/getstarted_nonwindows.go
@@ -2,8 +2,8 @@
 package lifecycle
-import "errors"
+import "fmt"
 func GetStarted() error {
-	return errors.New("not implemented")
+	return fmt.Errorf("GetStarted not implemented")
 }
--- a/app/lifecycle/getstarted_windows.go
+++ b/app/lifecycle/getstarted_windows.go
@@ -34,6 +34,7 @@ func GetStarted() error {
 		Sys:   &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
 	}
 	proc, err := os.StartProcess(args[0], args, attrs)
 	if err != nil {
 		return fmt.Errorf("unable to start getting started shell %w", err)
 	}
--- a/app/lifecycle/logging.go
+++ b/app/lifecycle/logging.go
@@ -5,8 +5,6 @@ import (
 	"log/slog"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
 	"github.com/ollama/ollama/envconfig"
 )
@@ -14,7 +12,7 @@ import (
 func InitLogging() {
 	level := slog.LevelInfo
-	if envconfig.Debug() {
+	if envconfig.Debug {
 		level = slog.LevelDebug
 	}
@@ -26,8 +24,7 @@ func InitLogging() {
 		logFile = os.Stderr
 		// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
 	} else {
-		rotateLogs(AppLogFile)
+		logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
 		logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
 		if err != nil {
 			slog.Error(fmt.Sprintf("failed to create server log %v", err))
 			return
@@ -49,32 +46,3 @@ func InitLogging() {
 	slog.Info("ollama app started")
 }
 func rotateLogs(logFile string) {
 	if _, err := os.Stat(logFile); os.IsNotExist(err) {
 		return
 	}
 	index := strings.LastIndex(logFile, ".")
 	pre := logFile[:index]
 	post := "." + logFile[index+1:]
 	for i := LogRotationCount; i > 0; i-- {
 		older := pre + "-" + strconv.Itoa(i) + post
 		newer := pre + "-" + strconv.Itoa(i-1) + post
 		if i == 1 {
 			newer = pre + post
 		}
 		if _, err := os.Stat(newer); err == nil {
 			if _, err := os.Stat(older); err == nil {
 				err := os.Remove(older)
 				if err != nil {
 					slog.Warn("Failed to remove older log", "older", older, "error", err)
 					continue
 				}
 			}
 			err := os.Rename(newer, older)
 			if err != nil {
 				slog.Warn("Failed to rotate log", "older", older, "newer", newer, "error", err)
 			}
 		}
 	}
 }
--- a/app/lifecycle/logging_nonwindows.go
+++ b/app/lifecycle/logging_nonwindows.go
@@ -5,5 +5,5 @@ package lifecycle
 import "log/slog"
 func ShowLogs() {
-	slog.Warn("not implemented")
+	slog.Warn("ShowLogs not yet implemented")
 }
--- a/app/lifecycle/logging_test.go
+++ b/app/lifecycle/logging_test.go
@@ -1,44 +0,0 @@
 package lifecycle
 import (
 	"os"
 	"path/filepath"
 	"strconv"
 	"testing"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 func TestRotateLogs(t *testing.T) {
 	logDir := t.TempDir()
 	logFile := filepath.Join(logDir, "testlog.log")
 	// No log exists
 	rotateLogs(logFile)
 	require.NoError(t, os.WriteFile(logFile, []byte("1"), 0o644))
 	assert.FileExists(t, logFile)
 	// First rotation
 	rotateLogs(logFile)
 	assert.FileExists(t, filepath.Join(logDir, "testlog-1.log"))
 	assert.NoFileExists(t, filepath.Join(logDir, "testlog-2.log"))
 	assert.NoFileExists(t, logFile)
 	// Should be a no-op without a new log
 	rotateLogs(logFile)
 	assert.FileExists(t, filepath.Join(logDir, "testlog-1.log"))
 	assert.NoFileExists(t, filepath.Join(logDir, "testlog-2.log"))
 	assert.NoFileExists(t, logFile)
 	for i := 2; i <= LogRotationCount+1; i++ {
 		require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0o644))
 		assert.FileExists(t, logFile)
 		rotateLogs(logFile)
 		assert.NoFileExists(t, logFile)
 		for j := 1; j < i; j++ {
 			assert.FileExists(t, filepath.Join(logDir, "testlog-"+strconv.Itoa(j)+".log"))
 		}
 		assert.NoFileExists(t, filepath.Join(logDir, "testlog-"+strconv.Itoa(i+1)+".log"))
 	}
 }
--- a/app/lifecycle/paths.go
+++ b/app/lifecycle/paths.go
@@ -16,12 +16,11 @@ var (
 	AppDir     = "/opt/Ollama"
 	AppDataDir = "/opt/Ollama"
 	// TODO - should there be a distinct log dir?
-	UpdateStageDir   = "/tmp"
+	UpdateStageDir = "/tmp"
-	AppLogFile       = "/tmp/ollama_app.log"
+	AppLogFile     = "/tmp/ollama_app.log"
-	ServerLogFile    = "/tmp/ollama.log"
+	ServerLogFile  = "/tmp/ollama.log"
-	UpgradeLogFile   = "/tmp/ollama_update.log"
+	UpgradeLogFile = "/tmp/ollama_update.log"
-	Installer        = "OllamaSetup.exe"
+	Installer      = "OllamaSetup.exe"
 	LogRotationCount = 5
 )
 func init() {
--- a/app/lifecycle/server.go
+++ b/app/lifecycle/server.go
@@ -54,8 +54,8 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
 		return nil, fmt.Errorf("failed to spawn server stderr pipe: %w", err)
 	}
-	rotateLogs(ServerLogFile)
+	// TODO - rotation
-	logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
+	logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create server log: %w", err)
 	}
--- a/app/lifecycle/updater.go
+++ b/app/lifecycle/updater.go
@@ -15,7 +15,6 @@ import (
 	"path"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
 	"time"
@@ -47,7 +46,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
 	query.Add("os", runtime.GOOS)
 	query.Add("arch", runtime.GOARCH)
 	query.Add("version", version.Version)
-	query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))
+	query.Add("ts", fmt.Sprintf("%d", time.Now().Unix()))
 	nonce, err := auth.NewNonce(rand.Reader, 16)
 	if err != nil {
--- a/app/lifecycle/updater_nonwindows.go
+++ b/app/lifecycle/updater_nonwindows.go
@@ -4,9 +4,9 @@ package lifecycle
 import (
 	"context"
-	"errors"
+	"fmt"
 )
 func DoUpgrade(cancel context.CancelFunc, done chan int) error {
-	return errors.New("not implemented")
+	return fmt.Errorf("DoUpgrade not yet implemented")
 }
--- a/app/lifecycle/updater_windows.go
+++ b/app/lifecycle/updater_windows.go
@@ -2,7 +2,6 @@ package lifecycle
 import (
 	"context"
 	"errors"
 	"fmt"
 	"log/slog"
 	"os"
@@ -16,7 +15,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 		return fmt.Errorf("failed to lookup downloads: %s", err)
 	}
 	if len(files) == 0 {
-		return errors.New("no update downloads found")
+		return fmt.Errorf("no update downloads found")
 	} else if len(files) > 1 {
 		// Shouldn't happen
 		slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
@@ -65,7 +64,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 		}
 	} else {
 		// TODO - some details about why it didn't start, or is this a pedantic error case?
-		return errors.New("installer process did not start")
+		return fmt.Errorf("installer process did not start")
 	}
 	// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
--- a/app/ollama.iss
+++ b/app/ollama.iss
@@ -88,15 +88,10 @@ DialogFontSize=12
 [Files]
 Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
 Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
 Source: "..\dist\windows-{#ARCH}\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
 Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
 Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
 Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
 #if DirExists("..\dist\windows-amd64\cuda")
  Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs
 #endif
 #if DirExists("..\dist\windows-amd64\oneapi")
  Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs
 #endif
 #if DirExists("..\dist\windows-amd64\rocm")
  Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
 #endif
@@ -127,10 +122,6 @@ Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\models"
 Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\history"
 ; NOTE: if the user has a custom OLLAMA_MODELS it will be preserved
 [InstallDelete]
 Type: filesandordirs; Name: "{%TEMP}\ollama*"
 Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
 [Messages]
 WizardReady=Ollama Windows Preview
 ReadyLabel1=%nLet's get you up and running with your own large language models.
@@ -138,7 +129,7 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
 ;FinishedHeadingLabel=Run your first model
-;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3.1
+;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3
 ;ClickFinish=%n
 [Registry]
--- a/app/ollama_welcome.ps1
+++ b/app/ollama_welcome.ps1
@@ -4,5 +4,5 @@ write-host "Welcome to Ollama!"
 write-host ""
 write-host "Run your first model:"
 write-host ""
-write-host "`tollama run llama3.1"
+write-host "`tollama run llama3"
 write-host ""
--- a/app/tray/tray_nonwindows.go
+++ b/app/tray/tray_nonwindows.go
@@ -3,11 +3,11 @@
 package tray
 import (
-	"errors"
+	"fmt"
 	"github.com/ollama/ollama/app/tray/commontray"
 )
 func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
-	return nil, errors.New("not implemented")
+	return nil, fmt.Errorf("NOT IMPLEMENTED YET")
 }
--- a/app/tray/wintray/eventloop.go
+++ b/app/tray/wintray/eventloop.go
@@ -11,7 +11,9 @@ import (
 	"golang.org/x/sys/windows"
 )
-var quitOnce sync.Once
+var (
 	quitOnce sync.Once
 )
 func (t *winTray) Run() {
 	nativeLoop()
--- a/app/tray/wintray/tray.go
+++ b/app/tray/wintray/tray.go
@@ -13,9 +13,8 @@ import (
 	"sync"
 	"unsafe"
 	"golang.org/x/sys/windows"
 	"github.com/ollama/ollama/app/tray/commontray"
 	"golang.org/x/sys/windows"
 )
 // Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
@@ -415,7 +414,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
 	iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
 	if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
-		if err := os.WriteFile(iconFilePath, iconBytes, 0o644); err != nil {
+		if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil {
 			return "", err
 		}
 	}
--- a/auth/auth.go
+++ b/auth/auth.go
@@ -5,7 +5,6 @@ import (
 	"context"
 	"crypto/rand"
 	"encoding/base64"
 	"errors"
 	"fmt"
 	"io"
 	"log/slog"
@@ -79,7 +78,7 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
 	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
 	parts := bytes.Split(publicKey, []byte(" "))
 	if len(parts) < 2 {
-		return "", errors.New("malformed public key")
+		return "", fmt.Errorf("malformed public key")
 	}
 	signedData, err := privateKey.Sign(rand.Reader, bts)
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -162,6 +162,9 @@ func tempZipFiles(path string) (string, error) {
 	}
 	defer tempfile.Close()
 	zipfile := zip.NewWriter(tempfile)
 	defer zipfile.Close()
 	detectContentType := func(path string) (string, error) {
 		f, err := os.Open(path)
 		if err != nil {
@@ -230,9 +233,6 @@ func tempZipFiles(path string) (string, error) {
 		files = append(files, tks...)
 	}
 	zipfile := zip.NewWriter(tempfile)
 	defer zipfile.Close()
 	for _, file := range files {
 		f, err := os.Open(file)
 		if err != nil {
@@ -287,12 +287,38 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er
 }
 func RunHandler(cmd *cobra.Command, args []string) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
 	name := args[0]
 	// check if the model exists on the server
 	show, err := client.Show(cmd.Context(), &api.ShowRequest{Name: name})
 	var statusError api.StatusError
 	switch {
 	case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
 		if err := PullHandler(cmd, []string{name}); err != nil {
 			return err
 		}
 		show, err = client.Show(cmd.Context(), &api.ShowRequest{Name: name})
 		if err != nil {
 			return err
 		}
 	case err != nil:
 		return err
 	}
 	interactive := true
 	opts := runOptions{
-		Model:    args[0],
+		Model:       args[0],
-		WordWrap: os.Getenv("TERM") == "xterm-256color",
+		WordWrap:    os.Getenv("TERM") == "xterm-256color",
-		Options:  map[string]interface{}{},
+		Options:     map[string]interface{}{},
 		MultiModal:  slices.Contains(show.Details.Families, "clip"),
 		ParentModel: show.Details.ParentModel,
 	}
 	format, err := cmd.Flags().GetString("format")
@@ -336,53 +362,11 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 	}
 	opts.WordWrap = !nowrap
-	// Fill out the rest of the options based on information about the
+	if !interactive {
-	// model.
+		return generate(cmd, opts)
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
-	name := args[0]
+	return generateInteractive(cmd, opts)
 	info, err := func() (*api.ShowResponse, error) {
 		showReq := &api.ShowRequest{Name: name}
 		info, err := client.Show(cmd.Context(), showReq)
 		var se api.StatusError
 		if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
 			if err := PullHandler(cmd, []string{name}); err != nil {
 				return nil, err
 			}
 			return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
 		}
 		return info, err
 	}()
 	if err != nil {
 		return err
 	}
 	opts.MultiModal = slices.Contains(info.Details.Families, "clip")
 	opts.ParentModel = info.Details.ParentModel
 	if interactive {
 		if err := loadModel(cmd, &opts); err != nil {
 			return err
 		}
 		for _, msg := range info.Messages {
 			switch msg.Role {
 			case "user":
 				fmt.Printf(">>> %s\n", msg.Content)
 			case "assistant":
 				state := &displayResponseState{}
 				displayResponse(msg.Content, opts.WordWrap, state)
 				fmt.Println()
 				fmt.Println()
 			}
 		}
 		return generateInteractive(cmd, opts)
 	}
 	return generate(cmd, opts)
 }
 func errFromUnknownKey(unknownKeyErr error) error {
@@ -595,6 +579,10 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}
 	if len(args) != 1 {
 		return errors.New("missing model name")
 	}
 	license, errLicense := cmd.Flags().GetBool("license")
 	modelfile, errModelfile := cmd.Flags().GetBool("modelfile")
 	parameters, errParams := cmd.Flags().GetBool("parameters")
@@ -637,6 +625,8 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
 	if flagsSet > 1 {
 		return errors.New("only one of '--license', '--modelfile', '--parameters', '--system', or '--template' can be specified")
 	} else if flagsSet == 0 {
 		return errors.New("one of '--license', '--modelfile', '--parameters', '--system', or '--template' must be specified")
 	}
 	req := api.ShowRequest{Name: args[0]}
@@ -645,141 +635,22 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}
-	if flagsSet == 1 {
+	switch showType {
-		switch showType {
+	case "license":
-		case "license":
+		fmt.Println(resp.License)
-			fmt.Println(resp.License)
+	case "modelfile":
-		case "modelfile":
+		fmt.Println(resp.Modelfile)
-			fmt.Println(resp.Modelfile)
+	case "parameters":
-		case "parameters":
+		fmt.Println(resp.Parameters)
-			fmt.Println(resp.Parameters)
+	case "system":
-		case "system":
+		fmt.Println(resp.System)
-			fmt.Println(resp.System)
+	case "template":
-		case "template":
+		fmt.Println(resp.Template)
 			fmt.Println(resp.Template)
 		}
 		return nil
 	}
 	showInfo(resp)
 	return nil
 }
 func showInfo(resp *api.ShowResponse) {
 	arch := resp.ModelInfo["general.architecture"].(string)
 	modelData := [][]string{
 		{"arch", arch},
 		{"parameters", resp.Details.ParameterSize},
 		{"quantization", resp.Details.QuantizationLevel},
 		{"context length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64))},
 		{"embedding length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.embedding_length", arch)].(float64))},
 	}
 	mainTableData := [][]string{
 		{"Model"},
 		{renderSubTable(modelData, false)},
 	}
 	if resp.ProjectorInfo != nil {
 		projectorData := [][]string{
 			{"arch", "clip"},
 			{"parameters", format.HumanNumber(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))},
 		}
 		if projectorType, ok := resp.ProjectorInfo["clip.projector_type"]; ok {
 			projectorData = append(projectorData, []string{"projector type", projectorType.(string)})
 		}
 		projectorData = append(projectorData,
 			[]string{"embedding length", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.embedding_length"].(float64))},
 			[]string{"projection dimensionality", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.projection_dim"].(float64))},
 		)
 		mainTableData = append(mainTableData,
 			[]string{"Projector"},
 			[]string{renderSubTable(projectorData, false)},
 		)
 	}
 	if resp.Parameters != "" {
 		mainTableData = append(mainTableData, []string{"Parameters"}, []string{formatParams(resp.Parameters)})
 	}
 	if resp.System != "" {
 		mainTableData = append(mainTableData, []string{"System"}, []string{renderSubTable(twoLines(resp.System), true)})
 	}
 	if resp.License != "" {
 		mainTableData = append(mainTableData, []string{"License"}, []string{renderSubTable(twoLines(resp.License), true)})
 	}
 	table := tablewriter.NewWriter(os.Stdout)
 	table.SetAutoWrapText(false)
 	table.SetBorder(false)
 	table.SetAlignment(tablewriter.ALIGN_LEFT)
 	for _, v := range mainTableData {
 		table.Append(v)
 	}
 	table.Render()
 }
 func renderSubTable(data [][]string, file bool) string {
 	var buf bytes.Buffer
 	table := tablewriter.NewWriter(&buf)
 	table.SetAutoWrapText(!file)
 	table.SetBorder(false)
 	table.SetNoWhiteSpace(true)
 	table.SetTablePadding("\t")
 	table.SetAlignment(tablewriter.ALIGN_LEFT)
 	for _, v := range data {
 		table.Append(v)
 	}
 	table.Render()
 	renderedTable := buf.String()
 	lines := strings.Split(renderedTable, "\n")
 	for i, line := range lines {
 		lines[i] = "\t" + line
 	}
 	return strings.Join(lines, "\n")
 }
 func twoLines(s string) [][]string {
 	lines := strings.Split(s, "\n")
 	res := [][]string{}
 	count := 0
 	for _, line := range lines {
 		line = strings.TrimSpace(line)
 		if line != "" {
 			count++
 			res = append(res, []string{line})
 			if count == 2 {
 				return res
 			}
 		}
 	}
 	return res
 }
 func formatParams(s string) string {
 	lines := strings.Split(s, "\n")
 	table := [][]string{}
 	for _, line := range lines {
 		table = append(table, strings.Fields(line))
 	}
 	return renderSubTable(table, false)
 }
 func CopyHandler(cmd *cobra.Command, args []string) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
@@ -858,6 +729,7 @@ type runOptions struct {
 	WordWrap    bool
 	Format      string
 	System      string
 	Template    string
 	Images      []api.ImageData
 	Options     map[string]interface{}
 	MultiModal  bool
@@ -1051,6 +923,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 		Images:    opts.Images,
 		Format:    opts.Format,
 		System:    opts.System,
 		Template:  opts.Template,
 		Options:   opts.Options,
 		KeepAlive: opts.KeepAlive,
 	}
@@ -1091,7 +964,7 @@ func RunServer(cmd *cobra.Command, _ []string) error {
 		return err
 	}
-	ln, err := net.Listen("tcp", envconfig.Host().Host)
+	ln, err := net.Listen("tcp", net.JoinHostPort(envconfig.Host.Host, envconfig.Host.Port))
 	if err != nil {
 		return err
 	}
@@ -1160,7 +1033,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
 			return err
 		}
 		if err := startApp(cmd.Context(), client); err != nil {
-			return errors.New("could not connect to ollama app, is it running?")
+			return fmt.Errorf("could not connect to ollama app, is it running?")
 		}
 	}
 	return nil
@@ -1356,10 +1229,10 @@ func NewCLI() *cobra.Command {
 				envVars["OLLAMA_NUM_PARALLEL"],
 				envVars["OLLAMA_NOPRUNE"],
 				envVars["OLLAMA_ORIGINS"],
 				envVars["OLLAMA_SCHED_SPREAD"],
 				envVars["OLLAMA_TMPDIR"],
 				envVars["OLLAMA_FLASH_ATTENTION"],
 				envVars["OLLAMA_LLM_LIBRARY"],
 				envVars["OLLAMA_MAX_VRAM"],
 			})
 		default:
 			appendEnvDocs(cmd, envs)
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -1,7 +1,6 @@
 package cmd
 import (
 	"cmp"
 	"errors"
 	"fmt"
 	"io"
@@ -10,14 +9,13 @@ import (
 	"path/filepath"
 	"regexp"
 	"slices"
 	"sort"
 	"strings"
 	"github.com/spf13/cobra"
 	"golang.org/x/exp/maps"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/progress"
 	"github.com/ollama/ollama/readline"
 	"github.com/ollama/ollama/types/errtypes"
@@ -29,29 +27,74 @@ const (
 	MultilineNone MultilineState = iota
 	MultilinePrompt
 	MultilineSystem
 	MultilineTemplate
 )
 func loadModel(cmd *cobra.Command, opts *runOptions) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
 	p := progress.NewProgress(os.Stderr)
 	defer p.StopAndClear()
 	spinner := progress.NewSpinner("")
 	p.Add("", spinner)
-	client, err := api.ClientFromEnvironment()
+	showReq := api.ShowRequest{Name: opts.Model}
 	showResp, err := client.Show(cmd.Context(), &showReq)
 	if err != nil {
 		return err
 	}
 	opts.MultiModal = slices.Contains(showResp.Details.Families, "clip")
 	opts.ParentModel = showResp.Details.ParentModel
 	if len(showResp.Messages) > 0 {
 		opts.Messages = append(opts.Messages, showResp.Messages...)
 	}
 	chatReq := &api.ChatRequest{
 		Model:    opts.Model,
 		Messages: []api.Message{},
 	}
 	if opts.KeepAlive != nil {
 		chatReq.KeepAlive = opts.KeepAlive
 	}
 	err = client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
 		p.StopAndClear()
 		if len(opts.Messages) > 0 {
 			for _, msg := range opts.Messages {
 				switch msg.Role {
 				case "user":
 					fmt.Printf(">>> %s\n", msg.Content)
 				case "assistant":
 					state := &displayResponseState{}
 					displayResponse(msg.Content, opts.WordWrap, state)
 					fmt.Println()
 					fmt.Println()
 				}
 			}
 		}
 		return nil
 	})
 	if err != nil {
 		return err
 	}
-	chatReq := &api.ChatRequest{
+	return nil
 		Model:     opts.Model,
 		KeepAlive: opts.KeepAlive,
 	}
 	return client.Chat(cmd.Context(), chatReq, func(api.ChatResponse) error { return nil })
 }
 func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 	opts.Messages = make([]api.Message, 0)
 	err := loadModel(cmd, &opts)
 	if err != nil {
 		return err
 	}
 	usage := func() {
 		fmt.Fprintln(os.Stderr, "Available Commands:")
 		fmt.Fprintln(os.Stderr, "  /set            Set session variables")
@@ -76,6 +119,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "Available Commands:")
 		fmt.Fprintln(os.Stderr, "  /set parameter ...     Set a parameter")
 		fmt.Fprintln(os.Stderr, "  /set system <string>   Set system message")
 		fmt.Fprintln(os.Stderr, "  /set template <string> Set prompt template")
 		fmt.Fprintln(os.Stderr, "  /set history           Enable history")
 		fmt.Fprintln(os.Stderr, "  /set nohistory         Disable history")
 		fmt.Fprintln(os.Stderr, "  /set wordwrap          Enable wordwrap")
@@ -121,7 +165,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "  /set parameter num_predict <int>      Max number of tokens to predict")
 		fmt.Fprintln(os.Stderr, "  /set parameter top_k <int>            Pick from top k num of tokens")
 		fmt.Fprintln(os.Stderr, "  /set parameter top_p <float>          Pick token based on sum of probabilities")
 		fmt.Fprintln(os.Stderr, "  /set parameter min_p <float>          Pick token based on top token probability * min_p")
 		fmt.Fprintln(os.Stderr, "  /set parameter num_ctx <int>          Set the context size")
 		fmt.Fprintln(os.Stderr, "  /set parameter temperature <float>    Set creativity level")
 		fmt.Fprintln(os.Stderr, "  /set parameter repeat_penalty <float> How strongly to penalize repetitions")
@@ -141,7 +184,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		return err
 	}
-	if envconfig.NoHistory() {
+	if envconfig.NoHistory {
 		scanner.HistoryDisable()
 	}
@@ -186,6 +229,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 				opts.Messages = append(opts.Messages, api.Message{Role: "system", Content: opts.System})
 				fmt.Println("Set system message.")
 				sb.Reset()
 			case MultilineTemplate:
 				opts.Template = sb.String()
 				fmt.Println("Set prompt template.")
 				sb.Reset()
 			}
 			multiline = MultilineNone
@@ -304,13 +351,17 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 					}
 					fmt.Printf("Set parameter '%s' to '%s'\n", args[2], strings.Join(params, ", "))
 					opts.Options[args[2]] = fp[args[2]]
-				case "system":
+				case "system", "template":
 					if len(args) < 3 {
 						usageSet()
 						continue
 					}
-					multiline = MultilineSystem
+					if args[1] == "system" {
 						multiline = MultilineSystem
 					} else if args[1] == "template" {
 						multiline = MultilineTemplate
 					}
 					line := strings.Join(args[2:], " ")
 					line, ok := strings.CutPrefix(line, `"""`)
@@ -330,17 +381,23 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 						continue
 					}
-					opts.System = sb.String() // for display in modelfile
+					if args[1] == "system" {
-					newMessage := api.Message{Role: "system", Content: sb.String()}
+						opts.System = sb.String() // for display in modelfile
-					// Check if the slice is not empty and the last message is from 'system'
+						newMessage := api.Message{Role: "system", Content: sb.String()}
-					if len(opts.Messages) > 0 && opts.Messages[len(opts.Messages)-1].Role == "system" {
+						// Check if the slice is not empty and the last message is from 'system'
-						// Replace the last message
+						if len(opts.Messages) > 0 && opts.Messages[len(opts.Messages)-1].Role == "system" {
-						opts.Messages[len(opts.Messages)-1] = newMessage
+							// Replace the last message
-					} else {
+							opts.Messages[len(opts.Messages)-1] = newMessage
-						opts.Messages = append(opts.Messages, newMessage)
+						} else {
 							opts.Messages = append(opts.Messages, newMessage)
 						}
 						fmt.Println("Set system message.")
 						sb.Reset()
 					} else if args[1] == "template" {
 						opts.Template = sb.String()
 						fmt.Println("Set prompt template.")
 						sb.Reset()
 					}
 					fmt.Println("Set system message.")
 					sb.Reset()
 					sb.Reset()
 					continue
@@ -359,9 +416,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 					return err
 				}
 				req := &api.ShowRequest{
-					Name:    opts.Model,
+					Name:     opts.Model,
-					System:  opts.System,
+					System:   opts.System,
-					Options: opts.Options,
+					Template: opts.Template,
 					Options:  opts.Options,
 				}
 				resp, err := client.Show(cmd.Context(), req)
 				if err != nil {
@@ -371,7 +429,15 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 				switch args[1] {
 				case "info":
-					showInfo(resp)
+					fmt.Println("Model details:")
 					if len(resp.Details.Families) > 0 {
 						fmt.Printf("Family              %s\n", strings.Join(resp.Details.Families, ", "))
 					} else if resp.Details.Family != "" {
 						fmt.Printf("Family              %s\n", resp.Details.Family)
 					}
 					fmt.Printf("Parameter Size      %s\n", resp.Details.ParameterSize)
 					fmt.Printf("Quantization Level  %s\n", resp.Details.QuantizationLevel)
 					fmt.Println("")
 				case "license":
 					if resp.License == "" {
 						fmt.Println("No license was specified for this model.")
@@ -404,9 +470,12 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 						fmt.Println("No system message was specified for this model.")
 					}
 				case "template":
-					if resp.Template != "" {
+					switch {
 					case opts.Template != "":
 						fmt.Println(opts.Template + "\n")
 					case resp.Template != "":
 						fmt.Println(resp.Template)
-					} else {
+					default:
 						fmt.Println("No prompt template was specified for this model.")
 					}
 				default:
@@ -490,35 +559,35 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 }
 func buildModelfile(opts runOptions) string {
-	var f parser.File
+	var mf strings.Builder
-	f.Commands = append(f.Commands, parser.Command{Name: "model", Args: cmp.Or(opts.ParentModel, opts.Model)})
+	model := opts.ParentModel
-
+	if model == "" {
 		model = opts.Model
 	}
 	fmt.Fprintf(&mf, "FROM %s\n", model)
 	if opts.System != "" {
-		f.Commands = append(f.Commands, parser.Command{Name: "system", Args: opts.System})
+		fmt.Fprintf(&mf, "SYSTEM \"\"\"%s\"\"\"\n", opts.System)
 	}
-	keys := maps.Keys(opts.Options)
+	if opts.Template != "" {
-	slices.Sort(keys)
+		fmt.Fprintf(&mf, "TEMPLATE \"\"\"%s\"\"\"\n", opts.Template)
 	}
 	keys := make([]string, 0)
 	for k := range opts.Options {
 		keys = append(keys, k)
 	}
 	sort.Strings(keys)
 	for _, k := range keys {
-		v := opts.Options[k]
+		fmt.Fprintf(&mf, "PARAMETER %s %v\n", k, opts.Options[k])
 		var cmds []parser.Command
 		switch t := v.(type) {
 		case []string:
 			for _, s := range t {
 				cmds = append(cmds, parser.Command{Name: k, Args: s})
 			}
 		default:
 			cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", t)})
 		}
 		f.Commands = append(f.Commands, cmds...)
 	}
 	fmt.Fprintln(&mf)
 	for _, msg := range opts.Messages {
-		f.Commands = append(f.Commands, parser.Command{Name: "message", Args: fmt.Sprintf("%s: %s", msg.Role, msg.Content)})
+		fmt.Fprintf(&mf, "MESSAGE %s \"\"\"%s\"\"\"\n", msg.Role, msg.Content)
 	}
-	return f.String()
+	return mf.String()
 }
 func normalizeFilePath(fp string) string {
@@ -604,7 +673,7 @@ func getImageData(filePath string) ([]byte, error) {
 	// Check if the file size exceeds 100MB
 	var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
 	if info.Size() > maxSize {
-		return nil, errors.New("file size exceeds maximum limit (100MB)")
+		return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
 	}
 	buf = make([]byte, info.Size())
--- a/cmd/interactive_test.go
+++ b/cmd/interactive_test.go
@@ -1,10 +1,12 @@
 package cmd
 import (
 	"bytes"
 	"testing"
 	"text/template"
 	"github.com/google/go-cmp/cmp"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/ollama/ollama/api"
 )
@@ -55,53 +57,61 @@ d:\path with\spaces\seven.svg inbetween7 c:\users\jdoe\eight.png inbetween8
 func TestModelfileBuilder(t *testing.T) {
 	opts := runOptions{
-		Model:  "hork",
+		Model:    "hork",
-		System: "You are part horse and part shark, but all hork. Do horklike things",
+		System:   "You are part horse and part shark, but all hork. Do horklike things",
 		Template: "This is a template.",
 		Messages: []api.Message{
 			{Role: "user", Content: "Hey there hork!"},
 			{Role: "assistant", Content: "Yes it is true, I am half horse, half shark."},
 		},
-		Options: map[string]any{
+		Options: map[string]interface{}{},
 			"temperature":      0.9,
 			"seed":             42,
 			"penalize_newline": false,
 			"stop":             []string{"hi", "there"},
 		},
 	}
-	t.Run("model", func(t *testing.T) {
+	opts.Options["temperature"] = 0.9
-		expect := `FROM hork
+	opts.Options["seed"] = 42
-SYSTEM You are part horse and part shark, but all hork. Do horklike things
+	opts.Options["penalize_newline"] = false
 	opts.Options["stop"] = []string{"hi", "there"}
 	mf := buildModelfile(opts)
 	expectedModelfile := `FROM {{.Model}}
 SYSTEM """{{.System}}"""
 TEMPLATE """{{.Template}}"""
 PARAMETER penalize_newline false
 PARAMETER seed 42
-PARAMETER stop hi
+PARAMETER stop [hi there]
 PARAMETER stop there
 PARAMETER temperature 0.9
-MESSAGE user Hey there hork!
+
-MESSAGE assistant Yes it is true, I am half horse, half shark.
+MESSAGE user """Hey there hork!"""
 MESSAGE assistant """Yes it is true, I am half horse, half shark."""
 `
-		actual := buildModelfile(opts)
+	tmpl, err := template.New("").Parse(expectedModelfile)
-		if diff := cmp.Diff(expect, actual); diff != "" {
+	require.NoError(t, err)
 			t.Errorf("mismatch (-want +got):\n%s", diff)
 		}
 	})
-	t.Run("parent model", func(t *testing.T) {
+	var buf bytes.Buffer
-		opts.ParentModel = "horseshark"
+	err = tmpl.Execute(&buf, opts)
-		expect := `FROM horseshark
+	require.NoError(t, err)
-SYSTEM You are part horse and part shark, but all hork. Do horklike things
+	assert.Equal(t, buf.String(), mf)
 	opts.ParentModel = "horseshark"
 	mf = buildModelfile(opts)
 	expectedModelfile = `FROM {{.ParentModel}}
 SYSTEM """{{.System}}"""
 TEMPLATE """{{.Template}}"""
 PARAMETER penalize_newline false
 PARAMETER seed 42
-PARAMETER stop hi
+PARAMETER stop [hi there]
 PARAMETER stop there
 PARAMETER temperature 0.9
-MESSAGE user Hey there hork!
+
-MESSAGE assistant Yes it is true, I am half horse, half shark.
+MESSAGE user """Hey there hork!"""
 MESSAGE assistant """Yes it is true, I am half horse, half shark."""
 `
-		actual := buildModelfile(opts)
+
-		if diff := cmp.Diff(expect, actual); diff != "" {
+	tmpl, err = template.New("").Parse(expectedModelfile)
-			t.Errorf("mismatch (-want +got):\n%s", diff)
+	require.NoError(t, err)
-		}
+
-	})
+	var parentBuf bytes.Buffer
 	err = tmpl.Execute(&parentBuf, opts)
 	require.NoError(t, err)
 	assert.Equal(t, parentBuf.String(), mf)
 }
--- a/cmd/start_darwin.go
+++ b/cmd/start_darwin.go
@@ -2,7 +2,7 @@ package cmd
 import (
 	"context"
-	"errors"
+	"fmt"
 	"os"
 	"os/exec"
 	"strings"
@@ -20,7 +20,7 @@ func startApp(ctx context.Context, client *api.Client) error {
 		return err
 	}
 	if !strings.Contains(link, "Ollama.app") {
-		return errors.New("could not find ollama app")
+		return fmt.Errorf("could not find ollama app")
 	}
 	path := strings.Split(link, "Ollama.app")
 	if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
--- a/cmd/start_default.go
+++ b/cmd/start_default.go
@@ -4,11 +4,11 @@ package cmd
 import (
 	"context"
-	"errors"
+	"fmt"
 	"github.com/ollama/ollama/api"
 )
 func startApp(ctx context.Context, client *api.Client) error {
-	return errors.New("could not connect to ollama server, run 'ollama serve' to start it")
+	return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it")
 }
--- a/cmd/start_windows.go
+++ b/cmd/start_windows.go
@@ -31,7 +31,7 @@ func startApp(ctx context.Context, client *api.Client) error {
 			// Finally look in the path
 			appExe, err = exec.LookPath(AppName)
 			if err != nil {
-				return errors.New("could not locate ollama app")
+				return fmt.Errorf("could not locate ollama app")
 			}
 		}
 	}
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -1,122 +1,200 @@
 package convert
 import (
 	"cmp"
 	"encoding/binary"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"io/fs"
 	"log/slog"
 	"os"
 	"path/filepath"
 	"slices"
 	"strings"
 	"google.golang.org/protobuf/proto"
 	"github.com/ollama/ollama/convert/sentencepiece"
 	"github.com/ollama/ollama/llm"
 )
-type Parameters struct {
+const (
-	Architectures []string `json:"architectures"`
+	_ int32 = iota
-	VocabSize     uint32   `json:"vocab_size"`
+	tokenTypeNormal
 	tokenTypeUnknown
 	tokenTypeControl
 	tokenTypeUserDefined
 	tokenTypeUnused
 	tokenTypeByte
 )
 type Params struct {
 	Architectures     []string `json:"architectures"`
 	VocabSize         int      `json:"vocab_size"`
 	HiddenSize        int      `json:"hidden_size"`       // n_embd
 	HiddenLayers      int      `json:"num_hidden_layers"` // n_layer
 	ContextSize       int      `json:"max_position_embeddings"`
 	IntermediateSize  int      `json:"intermediate_size"`
 	AttentionHeads    int      `json:"num_attention_heads"` // n_head
 	KeyValHeads       int      `json:"num_key_value_heads"`
 	NormEPS           float64  `json:"rms_norm_eps"`
 	BoSTokenID        int      `json:"bos_token_id"`
 	EoSTokenID        int      `json:"eos_token_id"`
 	HeadDimension     int      `json:"head_dim"`
 	PaddingTokenID    int      `json:"pad_token_id"`
 	RopeFrequencyBase float64  `json:"rope_theta"`
 	Experts     int `json:"num_local_experts"`
 	ExpertsUsed int `json:"num_experts_per_tok"`
 	PreTokenizer string
 	ByteOrder
 }
-func (Parameters) KV(t *Tokenizer) llm.KV {
+type ByteOrder interface {
-	kv := llm.KV{
+	binary.ByteOrder
-		"general.file_type":            uint32(1),
+	binary.AppendByteOrder
 		"general.quantization_version": uint32(2),
 		"tokenizer.ggml.pre":           t.Pre,
 		"tokenizer.ggml.model":         t.Vocabulary.Model,
 		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
 		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
 		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
 	}
 	if t.Template != "" {
 		kv["tokenizer.chat_template"] = t.Template
 	}
 	for _, sv := range t.SpecialVocabulary {
 		kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
 		kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
 	}
 	return kv
 }
-func (Parameters) specialTokenTypes() []string {
+type ModelArch interface {
-	return []string{
+	GetTensors() error
-		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
+	LoadVocab() error
-	}
+	WriteGGUF(io.WriteSeeker) error
 }
-func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
+type ModelFormat interface {
-	return llm.WriteGGUF(ws, kv, ts)
+	GetLayerName(string) (string, error)
 	GetTensors(string, *Params) ([]llm.Tensor, error)
 	GetParams(string) (*Params, error)
 	GetModelArch(string, string, *Params) (ModelArch, error)
 }
-type Converter interface {
+type ModelData struct {
-	// KV maps parameters to LLM key-values
+	Path    string
-	KV(*Tokenizer) llm.KV
+	Name    string
-	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
+	Params  *Params
-	Tensors([]Tensor) []llm.Tensor
+	Vocab   *Vocab
-
+	Tensors []llm.Tensor
-	// tensorName returns the LLM tensor name for a specific input name
+	Format  ModelFormat
 	tensorName(string) string
 	// specialTokenTypes returns any special token types the model uses
 	specialTokenTypes() []string
 	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
 }
-// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
+func GetModelFormat(dirname string) (ModelFormat, error) {
-// and files it finds in the input path.
+	files, err := filepath.Glob(filepath.Join(dirname, "*"))
 // Supported input model formats include safetensors.
 // Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
 func Convert(fsys fs.FS, ws io.WriteSeeker) error {
 	bts, err := fs.ReadFile(fsys, "config.json")
 	if err != nil {
-		return err
+		return nil, err
 	}
-	var p Parameters
+	for _, fn := range files {
-	if err := json.Unmarshal(bts, &p); err != nil {
+		if strings.HasSuffix(fn, ".safetensors") {
-		return err
+			return &SafetensorFormat{}, nil
-	}
+		} else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") {
-
+			slog.Debug("model is torch")
-	if len(p.Architectures) < 1 {
+			return &TorchFormat{}, nil
 		return errors.New("unknown architecture")
 	}
 	var conv Converter
 	switch p.Architectures[0] {
 	case "LlamaForCausalLM", "MistralForCausalLM":
 		conv = &llama{}
 	case "MixtralForCausalLM":
 		conv = &mixtral{}
 	case "GemmaForCausalLM":
 		conv = &gemma{}
 	default:
 		return errors.New("unsupported architecture")
 	}
 	if err := json.Unmarshal(bts, conv); err != nil {
 		return err
 	}
 	t, err := parseTokenizer(fsys, conv.specialTokenTypes())
 	if err != nil {
 		return err
 	}
 	if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) {
 		slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens))
 		for i := range vocabSize - len(t.Vocabulary.Tokens) {
 			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
 			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
 			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
 		}
 	} else {
 		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
 	}
-	ts, err := parseTensors(fsys)
+	return nil, fmt.Errorf("couldn't determine model format")
-	if err != nil {
+}
-		return err
+
-	}
+// Details on gguf's tokenizer can be found at:
-
+// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer
-	return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
+type Vocab struct {
 	Tokens []string
 	Scores []float32
 	Types  []int32
 	Merges []string
 }
 func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
 	slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model")))
 	in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model"))
 	if err != nil {
 		return nil, err
 	}
 	// To regenerate sentencepiece from the protobufs use:
 	// protoc -I=./ --go_out=./ sentencepiece_model.proto
 	modelProto := &sentencepiece.ModelProto{}
 	if err := proto.Unmarshal(in, modelProto); err != nil {
 		return nil, err
 	}
 	v := &Vocab{
 		Tokens: make([]string, 0),
 		Scores: make([]float32, 0),
 		Types:  make([]int32, 0),
 	}
 	pieces := modelProto.GetPieces()
 	for _, p := range pieces {
 		v.Tokens = append(v.Tokens, p.GetPiece())
 		v.Scores = append(v.Scores, p.GetScore())
 		t := p.GetType()
 		switch t {
 		case sentencepiece.ModelProto_SentencePiece_UNKNOWN:
 		case sentencepiece.ModelProto_SentencePiece_CONTROL:
 		case sentencepiece.ModelProto_SentencePiece_UNUSED:
 		case sentencepiece.ModelProto_SentencePiece_BYTE:
 		default:
 			t = sentencepiece.ModelProto_SentencePiece_NORMAL
 		}
 		v.Types = append(v.Types, int32(t))
 	}
 	slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens)))
 	// add any additional tokens
 	addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json"))
 	if os.IsNotExist(err) {
 		return v, nil
 	} else if err != nil {
 		return nil, err
 	}
 	slog.Info("reading user defined tokens")
 	var extraTokenData map[string]int
 	if err := json.Unmarshal(addIn, &extraTokenData); err != nil {
 		return nil, err
 	}
 	type token struct {
 		key string
 		pos int
 	}
 	extraTokens := make([]token, 0)
 	for k, id := range extraTokenData {
 		extraTokens = append(extraTokens, token{k, id})
 	}
 	slices.SortFunc(extraTokens, func(a, b token) int {
 		return cmp.Compare(a.pos, b.pos)
 	})
 	numToks := len(v.Tokens)
 	for cnt, t := range extraTokens {
 		// the token id should match the specific index for the total number of tokens
 		if t.pos != cnt+numToks {
 			return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key)
 		}
 		v.Tokens = append(v.Tokens, t.key)
 		v.Scores = append(v.Scores, -1000.0)
 		v.Types = append(v.Types, tokenTypeUserDefined)
 	}
 	slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))
 	if params.VocabSize > len(v.Tokens) {
 		missingTokens := params.VocabSize - len(v.Tokens)
 		slog.Warn(fmt.Sprintf("vocab is missing %d tokens", missingTokens))
 		for cnt := range missingTokens {
 			v.Tokens = append(v.Tokens, fmt.Sprintf("<dummy%05d>", cnt+1))
 			v.Scores = append(v.Scores, -1)
 			v.Types = append(v.Types, tokenTypeUserDefined)
 		}
 	}
 	return v, nil
 }
--- a/convert/convert_gemma.go
+++ b/convert/convert_gemma.go
@@ -1,103 +0,0 @@
 package convert
 import (
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type gemma struct {
 	Parameters
 	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
 	HiddenSize            uint32  `json:"hidden_size"`
 	HiddenLayers          uint32  `json:"num_hidden_layers"`
 	IntermediateSize      uint32  `json:"intermediate_size"`
 	NumAttentionHeads     uint32  `json:"num_attention_heads"`
 	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
 	RMSNormEPS            float32 `json:"rms_norm_eps"`
 	HeadDim               uint32  `json:"head_dim"`
 }
 var _ Converter = (*gemma)(nil)
 func (p *gemma) KV(t *Tokenizer) llm.KV {
 	kv := p.Parameters.KV(t)
 	kv["general.architecture"] = "gemma"
 	kv["general.name"] = "gemma"
 	kv["gemma.context_length"] = p.MaxPositionEmbeddings
 	kv["gemma.embedding_length"] = p.HiddenSize
 	kv["gemma.block_count"] = p.HiddenLayers
 	kv["gemma.feed_forward_length"] = p.IntermediateSize
 	kv["gemma.attention.head_count"] = p.NumAttentionHeads
 	kv["gemma.attention.head_count_kv"] = p.NumKeyValueHeads
 	kv["gemma.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
 	kv["gemma.attention.key_length"] = p.HeadDim
 	kv["gemma.attention.value_length"] = p.HeadDim
 	kv["tokenizer.ggml.eot_token_id"] = uint32(107)
 	kv["tokenizer.ggml.middle_token_id"] = uint32(68)
 	kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
 	kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
 	return kv
 }
 func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
 	var out []llm.Tensor
 	for _, t := range ts {
 		name := p.tensorName(t.Name())
 		if strings.HasSuffix(name, "_norm.weight") {
 			t.SetRepacker(p.addOne)
 		}
 		out = append(out, llm.Tensor{
 			Name:     name,
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (p *gemma) tensorName(n string) string {
 	return strings.NewReplacer(
 		"model.embed_tokens", "token_embd",
 		"model.norm", "output_norm",
 		"model.layers", "blk",
 		"input_layernorm", "attn_norm",
 		"self_attn.q_proj", "attn_q",
 		"self_attn.k_proj", "attn_k",
 		"self_attn.v_proj", "attn_v",
 		"self_attn.o_proj", "attn_output",
 		"mlp.gate_proj", "ffn_gate",
 		"mlp.down_proj", "ffn_down",
 		"mlp.up_proj", "ffn_up",
 		"post_attention_layernorm", "ffn_norm",
 		"block_sparse_moe.gate", "ffn_inp",
 	).Replace(n)
 }
 func (*gemma) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
 	n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
 	ones := tensor.Ones(tensor.Float32, int(shape[0]))
 	n, err := n.Add(ones)
 	if err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 0)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
--- a/convert/convert_llama.go
+++ b/convert/convert_llama.go
@@ -1,183 +0,0 @@
 package convert
 import (
 	"cmp"
 	"fmt"
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type llama struct {
 	Parameters
 	NLayers               uint32  `json:"n_layers"`
 	NumHiddenLayers       uint32  `json:"num_hidden_layers"`
 	NLayer                uint32  `json:"n_layer"`
 	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
 	NCtx                  uint32  `json:"n_ctx"`
 	HiddenSize            uint32  `json:"hidden_size"`
 	NEmbd                 uint32  `json:"n_embd"`
 	IntermediateSize      uint32  `json:"intermediate_size"`
 	NInner                uint32  `json:"n_inner"`
 	NumAttentionHeads     uint32  `json:"num_attention_heads"`
 	NHead                 uint32  `json:"n_head"`
 	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
 	RopeTheta             float32 `json:"rope_theta"`
 	RopeScaling           struct {
 		Type   string  `json:"type"`
 		Factor float32 `json:"factor"`
 	} `json:"rope_scaling"`
 	RMSNormEPS       float32 `json:"rms_norm_eps"`
 	LayerNormEPS     float32 `json:"layer_norm_eps"`
 	LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
 	NormEpsilon      float32 `json:"norm_epsilon"`
 	HeadDim          uint32  `json:"head_dim"`
 }
 var _ Converter = (*llama)(nil)
 func (p *llama) KV(t *Tokenizer) llm.KV {
 	kv := p.Parameters.KV(t)
 	kv["general.architecture"] = "llama"
 	kv["general.name"] = "llama"
 	kv["llama.vocab_size"] = p.VocabSize
 	kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
 	if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
 		kv["llama.context_length"] = contextLength
 	}
 	if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
 		kv["llama.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
 	}
 	if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
 		kv["llama.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
 	}
 	if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
 		kv["llama.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
 		kv["llama.rope.dimension_count"] = p.HiddenSize / headCount
 	}
 	if p.RopeTheta > 0 {
 		kv["llama.rope.freq_base"] = p.RopeTheta
 	}
 	if p.RopeScaling.Type == "linear" {
 		kv["llama.rope.scaling.type"] = p.RopeScaling.Type
 		kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
 	}
 	if p.NumKeyValueHeads > 0 {
 		kv["llama.attention.head_count_kv"] = p.NumKeyValueHeads
 	}
 	if p.RMSNormEPS > 0 {
 		kv["llama.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
 	}
 	if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
 		kv["llama.attention.layer_norm_epsilon"] = layerNormEpsilon
 	}
 	if p.HeadDim > 0 {
 		kv["llama.attention.key_length"] = p.HeadDim
 		kv["llama.attention.value_length"] = p.HeadDim
 	}
 	if len(t.Merges) > 0 {
 		kv["tokenizer.ggml.merges"] = t.Merges
 	}
 	return kv
 }
 func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
 	var out []llm.Tensor
 	for _, t := range ts {
 		name := p.tensorName(t.Name())
 		if strings.HasSuffix(name, "attn_q.weight") ||
 			strings.HasSuffix(name, "attn_k.weight") {
 			t.SetRepacker(p.repack)
 		}
 		out = append(out, llm.Tensor{
 			Name:     name,
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (p *llama) tensorName(n string) string {
 	return strings.NewReplacer(
 		"lm_head", "output",
 		"model.embed_tokens", "token_embd",
 		"model.norm", "output_norm",
 		"model.layers", "blk",
 		"input_layernorm", "attn_norm",
 		"self_attn.q_proj", "attn_q",
 		"self_attn.k_proj", "attn_k",
 		"self_attn.v_proj", "attn_v",
 		"self_attn.o_proj", "attn_output",
 		"mlp.gate_proj", "ffn_gate",
 		"mlp.down_proj", "ffn_down",
 		"mlp.up_proj", "ffn_up",
 		"post_attention_layernorm", "ffn_norm",
 		// mixtral
 		"block_sparse_moe.gate", "ffn_gate_inp",
 	).Replace(n)
 }
 func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	var dims []int
 	for _, dim := range shape {
 		dims = append(dims, int(dim))
 	}
 	var heads uint32
 	if strings.HasSuffix(name, "q_proj.weight") {
 		heads = p.NumAttentionHeads
 	} else if strings.HasSuffix(name, "k_proj.weight") {
 		heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
 	} else {
 		return nil, fmt.Errorf("unknown tensor for repack: %s", name)
 	}
 	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
 	if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
 		return nil, err
 	}
 	if err := n.T(0, 2, 1, 3); err != nil {
 		return nil, err
 	}
 	if err := n.Reshape(dims...); err != nil {
 		return nil, err
 	}
 	if err := n.Transpose(); err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 1)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
--- a/convert/convert_mixtral.go
+++ b/convert/convert_mixtral.go
@@ -1,89 +0,0 @@
 package convert
 import (
 	"fmt"
 	"io"
 	"slices"
 	"strings"
 	"github.com/ollama/ollama/llm"
 )
 type mixtral struct {
 	llama
 	NumLocalExperts    uint32 `json:"num_local_experts"`
 	NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
 }
 var _ Converter = (*mixtral)(nil)
 func (p *mixtral) KV(t *Tokenizer) llm.KV {
 	kv := p.llama.KV(t)
 	if p.NumLocalExperts > 0 {
 		kv["llama.expert_count"] = p.NumLocalExperts
 	}
 	if p.NumExpertsPerToken > 0 {
 		kv["llama.expert_used_count"] = p.NumExpertsPerToken
 	}
 	return kv
 }
 func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
 	oldnew := []string{
 		"model.layers", "blk",
 		"w1", "ffn_gate_exps",
 		"w2", "ffn_down_exps",
 		"w3", "ffn_up_exps",
 	}
 	for i := range p.NumLocalExperts {
 		oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".")
 	}
 	// group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor
 	namer := strings.NewReplacer(oldnew...)
 	experts := make(map[string]experts)
 	// merge experts into a single tensor while removing them from ts
 	ts = slices.DeleteFunc(ts, func(t Tensor) bool {
 		if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") {
 			return false
 		}
 		name := namer.Replace(t.Name())
 		experts[name] = append(experts[name], t)
 		return true
 	})
 	var out []llm.Tensor
 	for n, e := range experts {
 		// TODO(mxyng): sanity check experts
 		out = append(out, llm.Tensor{
 			Name:     n,
 			Kind:     e[0].Kind(),
 			Shape:    append([]uint64{uint64(len(e))}, e[0].Shape()...),
 			WriterTo: e,
 		})
 	}
 	return append(out, p.llama.Tensors(ts)...)
 }
 type experts []Tensor
 func (e experts) WriteTo(w io.Writer) (int64, error) {
 	// TODO(mxyng): experts _should_ be numerically sorted by expert but this should check
 	for _, t := range e {
 		// the canonical merged experts tensor stacks all experts along a new, 0 axis,
 		// e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers
 		// this accomplishes the same thing by writing each expert tensor in sequence
 		if _, err := t.WriteTo(w); err != nil {
 			return 0, err
 		}
 	}
 	return 0, nil
 }
--- a/convert/convert_test.go
+++ b/convert/convert_test.go
@@ -1,35 +1,48 @@
 //go:build slow
 package convert
 import (
 	"crypto/sha256"
 	"encoding/hex"
 	"encoding/json"
 	"flag"
 	"fmt"
 	"io"
 	"io/fs"
 	"log/slog"
 	"math"
 	"os"
 	"path/filepath"
 	"slices"
 	"testing"
 	"golang.org/x/exp/maps"
 	"github.com/ollama/ollama/llm"
 )
-func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
+func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) {
 	t.Helper()
 	mf, err := GetModelFormat(p)
 	if err != nil {
 		t.Fatal(err)
 	}
 	params, err := mf.GetParams(p)
 	if err != nil {
 		t.Fatal(err)
 	}
 	arch, err := mf.GetModelArch("", p, params)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if err := arch.LoadVocab(); err != nil {
 		t.Fatal(err)
 	}
 	if err := arch.GetTensors(); err != nil {
 		t.Fatal(err)
 	}
 	f, err := os.CreateTemp(t.TempDir(), "f16")
 	if err != nil {
 		t.Fatal(err)
 	}
 	defer f.Close()
-	if err := Convert(fsys, f); err != nil {
+	if err := arch.WriteGGUF(f); err != nil {
 		t.Fatal(err)
 	}
@@ -37,91 +50,53 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	t.Cleanup(func() { r.Close() })
+	defer r.Close()
-	m, _, err := llm.DecodeGGML(r, math.MaxInt)
+	m, _, err := llm.DecodeGGML(r)
 	if err != nil {
 		t.Fatal(err)
 	}
-	if _, err := r.Seek(0, io.SeekStart); err != nil {
+	return m.KV(), m.Tensors()
 		t.Fatal(err)
 	}
 	return r, m.KV(), m.Tensors()
 }
 func TestMain(m *testing.M) {
 	var level slog.Level
 	flag.TextVar(&level, "level", slog.LevelInfo, "log level")
 	flag.Parse()
 	slog.SetLogLoggerLevel(level)
 	os.Exit(m.Run())
 }
 func TestConvertFull(t *testing.T) {
-	cases := []string{
+	cases := []struct {
-		"Meta-Llama-3-8B-Instruct",
+		path    string
-		"Mistral-7B-Instruct-v0.2",
+		arch    string
-		"Mixtral-8x7B-Instruct-v0.1",
+		tensors int
-		"gemma-2b-it",
+		layers  int
 	}{
 		{"Meta-Llama-3-8B-Instruct", "llama", 291, 35},
 		{"Mistral-7B-Instruct-v0.2", "llama", 291, 35},
 		{"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35},
 		{"gemma-2b-it", "gemma", 164, 20},
 	}
-	for i := range cases {
+	for _, tt := range cases {
-		tt := cases[i]
+		t.Run(tt.path, func(t *testing.T) {
-		t.Run(tt, func(t *testing.T) {
+			p := filepath.Join("testdata", tt.path)
-			t.Parallel()
+			if _, err := os.Stat(p); err != nil {
 			p := filepath.Join("testdata", tt)
 			if testing.Short() {
 				t.Skip("skipping in short mode")
 			} else if _, err := os.Stat(p); err != nil {
 				t.Skipf("%s not found", p)
 			}
-			f, kv, tensors := convertFull(t, os.DirFS(p))
+			kv, tensors := convertFull(t, p)
 			actual := make(map[string]string)
 			for k, v := range kv {
 				if s, ok := v.(json.Marshaler); !ok {
 					actual[k] = fmt.Sprintf("%v", v)
 				} else {
 					bts, err := json.Marshal(s)
 					if err != nil {
 						t.Fatal(err)
 					}
-					actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
+			if kv.Architecture() != tt.arch {
-				}
+				t.Fatalf("expected llama, got %s", kv.Architecture())
 			}
-			for _, tensor := range tensors.Items {
+			if kv.FileType().String() != "F16" {
-				sha256sum := sha256.New()
+				t.Fatalf("expected F16, got %s", kv.FileType())
 				sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
 				if _, err := io.Copy(sha256sum, sr); err != nil {
 					t.Fatal(err)
 				}
 				actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
 			}
-			expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
+			if len(tensors) != tt.tensors {
-			if err != nil {
+				t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors))
 				t.Fatal(err)
 			}
-			var expect map[string]string
+			layers := tensors.Layers()
-			if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {
+			if len(layers) != tt.layers {
-				t.Fatal(err)
+				t.Fatalf("expected %d layers, got %d", tt.layers, len(layers))
 			}
 			keys := maps.Keys(expect)
 			slices.Sort(keys)
 			for _, k := range keys {
 				if v, ok := actual[k]; !ok {
 					t.Errorf("missing %s", k)
 				} else if v != expect[k] {
 					t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v)
 				}
 			}
 		})
 	}
--- a/convert/fs.go
+++ b/convert/fs.go
@@ -1,58 +0,0 @@
 package convert
 import (
 	"archive/zip"
 	"errors"
 	"io"
 	"io/fs"
 	"os"
 	"path/filepath"
 )
 type ZipReader struct {
 	r *zip.Reader
 	p string
 	// limit is the maximum size of a file that can be read directly
 	// from the zip archive. Files larger than this size will be extracted
 	limit int64
 }
 func NewZipReader(r *zip.Reader, p string, limit int64) fs.FS {
 	return &ZipReader{r, p, limit}
 }
 func (z *ZipReader) Open(name string) (fs.File, error) {
 	r, err := z.r.Open(name)
 	if err != nil {
 		return nil, err
 	}
 	defer r.Close()
 	if fi, err := r.Stat(); err != nil {
 		return nil, err
 	} else if fi.Size() < z.limit {
 		return r, nil
 	}
 	if !filepath.IsLocal(name) {
 		return nil, zip.ErrInsecurePath
 	}
 	n := filepath.Join(z.p, name)
 	if _, err := os.Stat(n); errors.Is(err, os.ErrNotExist) {
 		w, err := os.Create(n)
 		if err != nil {
 			return nil, err
 		}
 		defer w.Close()
 		if _, err := io.Copy(w, r); err != nil {
 			return nil, err
 		}
 	} else if err != nil {
 		return nil, err
 	}
 	return os.Open(n)
 }
--- a/convert/gemma.go
+++ b/convert/gemma.go
@@ -0,0 +1,102 @@
 package convert
 import (
 	"fmt"
 	"io"
 	"log/slog"
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type GemmaModel struct {
 	ModelData
 }
 func addOnes(data []float32, vectorSize int) ([]float32, error) {
 	n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data))
 	ones := tensor.Ones(tensor.Float32, vectorSize)
 	n, err := n.Add(ones)
 	if err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 0)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
 func (m *GemmaModel) GetTensors() error {
 	t, err := m.Format.GetTensors(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	slog.Debug(fmt.Sprintf("Total tensors: %d", len(t)))
 	for _, l := range t {
 		if strings.HasSuffix(l.Name, "norm.weight") {
 			wt := l.WriterTo.(safetensorWriterTo)
 			wt.repacker = m.Repack
 			l.WriterTo = wt
 		}
 		m.Tensors = append(m.Tensors, l)
 	}
 	return nil
 }
 func (m *GemmaModel) LoadVocab() error {
 	v, err := LoadSentencePieceTokens(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	m.Vocab = v
 	return nil
 }
 func (m *GemmaModel) Repack(_ string, data []float32, shape []uint64) ([]float32, error) {
 	return addOnes(data, int(shape[0]))
 }
 func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":                   "gemma",
 		"general.name":                           m.Name,
 		"gemma.context_length":                   uint32(m.Params.ContextSize),
 		"gemma.embedding_length":                 uint32(m.Params.HiddenSize),
 		"gemma.block_count":                      uint32(m.Params.HiddenLayers),
 		"gemma.feed_forward_length":              uint32(m.Params.IntermediateSize),
 		"gemma.attention.head_count":             uint32(m.Params.AttentionHeads),
 		"gemma.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
 		"gemma.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
 		"gemma.attention.key_length":             uint32(m.Params.HeadDimension),
 		"gemma.attention.value_length":           uint32(m.Params.HeadDimension),
 		"general.file_type":                      uint32(1),
 		"tokenizer.ggml.model":                   "llama",
 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
 		"tokenizer.ggml.scores":     m.Vocab.Scores,
 		"tokenizer.ggml.token_type": m.Vocab.Types,
 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
 		"tokenizer.ggml.padding_token_id": uint32(m.Params.PaddingTokenID),
 		"tokenizer.ggml.unknown_token_id": uint32(3),
 		"tokenizer.ggml.add_bos_token":    true,
 		"tokenizer.ggml.add_eos_token":    false,
 	}
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
--- a/convert/llama.go
+++ b/convert/llama.go
@@ -0,0 +1,159 @@
 package convert
 import (
 	"cmp"
 	"errors"
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type LlamaModel struct {
 	ModelData
 }
 func (m *LlamaModel) GetTensors() error {
 	t, err := m.Format.GetTensors(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
 	re, err := regexp.Compile(pattern)
 	if err != nil {
 		return err
 	}
 	for _, l := range t {
 		matches := re.FindAllStringSubmatch(l.Name, -1)
 		if len(matches) > 0 {
 			switch m.Format.(type) {
 			case *TorchFormat:
 				wt := l.WriterTo.(torchWriterTo)
 				wt.repacker = m.Repack
 				l.WriterTo = wt
 			case *SafetensorFormat:
 				wt := l.WriterTo.(safetensorWriterTo)
 				wt.repacker = m.Repack
 				l.WriterTo = wt
 			}
 		}
 		m.Tensors = append(m.Tensors, l)
 	}
 	return nil
 }
 func (m *LlamaModel) LoadVocab() (err error) {
 	pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json"))
 	if errors.Is(err, os.ErrNotExist) {
 		return nil
 	} else if err != nil {
 		return err
 	}
 	m.Vocab = &Vocab{}
 	for _, t := range ts {
 		m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content)
 		m.Vocab.Types = append(m.Vocab.Types, t.Type())
 	}
 	m.Vocab.Merges = merges
 	m.Params.PreTokenizer = pre
 	return nil
 }
 func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":                   "llama",
 		"general.name":                           m.Name,
 		"llama.vocab_size":                       uint32(len(m.Vocab.Tokens)),
 		"llama.context_length":                   uint32(m.Params.ContextSize),
 		"llama.embedding_length":                 uint32(m.Params.HiddenSize),
 		"llama.block_count":                      uint32(m.Params.HiddenLayers),
 		"llama.feed_forward_length":              uint32(m.Params.IntermediateSize),
 		"llama.rope.freq_base":                   float32(m.Params.RopeFrequencyBase),
 		"llama.rope.dimension_count":             uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
 		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
 		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
 		"general.file_type":                      uint32(1),
 		"tokenizer.ggml.model":                   "gpt2",
 		"tokenizer.ggml.pre":        m.Params.PreTokenizer,
 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
 		"tokenizer.ggml.token_type": m.Vocab.Types,
 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
 		"tokenizer.ggml.unknown_token_id": uint32(0),
 	}
 	if len(m.Vocab.Merges) > 0 {
 		kv["tokenizer.ggml.merges"] = m.Vocab.Merges
 	} else {
 		kv["tokenizer.ggml.scores"] = m.Vocab.Scores
 	}
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
 func (m *LlamaModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	return llamaRepack(name, m.Params, data, shape)
 }
 func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([]float32, error) {
 	var dims []int
 	for _, dim := range shape {
 		if dim != 0 {
 			dims = append(dims, int(dim))
 		}
 	}
 	var heads int
 	switch {
 	case strings.HasSuffix(name, "attn_q.weight"):
 		heads = params.AttentionHeads
 	case strings.HasSuffix(name, "attn_k.weight"):
 		heads = cmp.Or(params.KeyValHeads, params.AttentionHeads)
 	default:
 		return nil, fmt.Errorf("unknown tensor name: %s", name)
 	}
 	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
 	if err := n.Reshape(append([]int{heads, 2, dims[0] / heads / 2}, dims[1:]...)...); err != nil {
 		return nil, err
 	}
 	if err := n.T(0, 2, 1, 3); err != nil {
 		return nil, err
 	}
 	if err := n.Reshape(dims...); err != nil {
 		return nil, err
 	}
 	if err := n.Transpose(); err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 1)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
--- a/convert/mistral.go
+++ b/convert/mistral.go
@@ -0,0 +1,79 @@
 package convert
 import (
 	"io"
 	"regexp"
 	"github.com/ollama/ollama/llm"
 )
 type MistralModel struct {
 	ModelData
 }
 func (m *MistralModel) GetTensors() error {
 	t, err := m.Format.GetTensors(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
 	re, err := regexp.Compile(pattern)
 	if err != nil {
 		return err
 	}
 	for _, l := range t {
 		matches := re.FindAllStringSubmatch(l.Name, -1)
 		if len(matches) > 0 {
 			wt := l.WriterTo.(safetensorWriterTo)
 			wt.repacker = m.Repack
 			l.WriterTo = wt
 		}
 		m.Tensors = append(m.Tensors, l)
 	}
 	return nil
 }
 func (m *MistralModel) LoadVocab() error {
 	v, err := LoadSentencePieceTokens(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	m.Vocab = v
 	return nil
 }
 func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":                   "llama",
 		"general.name":                           m.Name,
 		"llama.context_length":                   uint32(m.Params.ContextSize),
 		"llama.embedding_length":                 uint32(m.Params.HiddenSize),
 		"llama.block_count":                      uint32(m.Params.HiddenLayers),
 		"llama.feed_forward_length":              uint32(m.Params.IntermediateSize),
 		"llama.rope.dimension_count":             uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
 		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
 		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
 		"general.file_type":                      uint32(1),
 		"tokenizer.ggml.model":                   "llama",
 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
 		"tokenizer.ggml.scores":     m.Vocab.Scores,
 		"tokenizer.ggml.token_type": m.Vocab.Types,
 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
 		"tokenizer.ggml.add_bos_token":    true,
 		"tokenizer.ggml.add_eos_token":    false,
 		"tokenizer.ggml.unknown_token_id": uint32(0),
 	}
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
 func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	return llamaRepack(name, m.Params, data, shape)
 }
--- a/convert/mixtral.go
+++ b/convert/mixtral.go
@@ -0,0 +1,87 @@
 package convert
 import (
 	"io"
 	"regexp"
 	"github.com/ollama/ollama/llm"
 )
 type MixtralModel struct {
 	ModelData
 }
 func (m *MixtralModel) GetTensors() error {
 	t, err := m.Format.GetTensors(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
 	re, err := regexp.Compile(pattern)
 	if err != nil {
 		return err
 	}
 	for _, l := range t {
 		matches := re.FindAllStringSubmatch(l.Name, -1)
 		if len(matches) > 0 {
 			wt := l.WriterTo.(safetensorWriterTo)
 			wt.repacker = m.Repack
 			l.WriterTo = wt
 		}
 		m.Tensors = append(m.Tensors, l)
 	}
 	return nil
 }
 func (m *MixtralModel) LoadVocab() error {
 	v, err := LoadSentencePieceTokens(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	m.Vocab = v
 	return nil
 }
 func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":          "llama",
 		"general.name":                  m.Name,
 		"llama.block_count":             uint32(m.Params.HiddenLayers),
 		"llama.context_length":          uint32(m.Params.ContextSize),
 		"llama.embedding_length":        uint32(m.Params.HiddenSize),
 		"llama.feed_forward_length":     uint32(m.Params.IntermediateSize),
 		"llama.attention.head_count":    uint32(m.Params.AttentionHeads),
 		"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
 		"llama.rope.freq_base":                   float32(m.Params.RopeFrequencyBase),
 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
 		"llama.expert_count":      uint32(m.Params.Experts),
 		"llama.expert_used_count": uint32(m.Params.ExpertsUsed),
 		"llama.vocab_size":           uint32(len(m.Vocab.Tokens)),
 		"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
 		"general.file_type":    uint32(1),
 		"tokenizer.ggml.model": "llama",
 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
 		"tokenizer.ggml.scores":     m.Vocab.Scores,
 		"tokenizer.ggml.token_type": m.Vocab.Types,
 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
 		"tokenizer.ggml.unknown_token_id": uint32(0),
 		"tokenizer.ggml.add_bos_token":    true,
 		"tokenizer.ggml.add_eos_token":    false,
 	}
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
 func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	return llamaRepack(name, m.Params, data, shape)
 }
--- a/convert/reader.go
+++ b/convert/reader.go
@@ -1,82 +0,0 @@
 package convert
 import (
 	"errors"
 	"io"
 	"io/fs"
 	"strings"
 )
 type Tensor interface {
 	Name() string
 	Shape() []uint64
 	Kind() uint32
 	SetRepacker(repacker)
 	WriteTo(io.Writer) (int64, error)
 }
 type tensorBase struct {
 	name  string
 	shape []uint64
 	repacker
 }
 func (t tensorBase) Name() string {
 	return t.name
 }
 func (t tensorBase) Shape() []uint64 {
 	return t.shape
 }
 const (
 	tensorKindF32 uint32 = iota
 	tensorKindF16
 )
 func (t tensorBase) Kind() uint32 {
 	if strings.HasSuffix(t.name, ".block_sparse_moe.gate.weight") {
 		return 0
 	}
 	switch len(t.shape) {
 	case 0:
 		panic("invalid tensor shape")
 	case 1:
 		return tensorKindF32
 	default:
 		return tensorKindF16
 	}
 }
 func (t *tensorBase) SetRepacker(fn repacker) {
 	t.repacker = fn
 }
 type repacker func(string, []float32, []uint64) ([]float32, error)
 func parseTensors(fsys fs.FS) ([]Tensor, error) {
 	patterns := []struct {
 		Pattern string
 		Func    func(fs.FS, ...string) ([]Tensor, error)
 	}{
 		{"model-*-of-*.safetensors", parseSafetensors},
 		{"model.safetensors", parseSafetensors},
 		{"pytorch_model-*-of-*.bin", parseTorch},
 		{"pytorch_model.bin", parseTorch},
 		{"consolidated.*.pth", parseTorch},
 	}
 	for _, pattern := range patterns {
 		matches, err := fs.Glob(fsys, pattern.Pattern)
 		if err != nil {
 			return nil, err
 		}
 		if len(matches) > 0 {
 			return pattern.Func(fsys, matches...)
 		}
 	}
 	return nil, errors.New("unknown tensor format")
 }
--- a/convert/reader_safetensors.go
+++ b/convert/reader_safetensors.go
@@ -1,150 +0,0 @@
 package convert
 import (
 	"bytes"
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"io"
 	"io/fs"
 	"slices"
 	"github.com/d4l3k/go-bfloat16"
 	"github.com/x448/float16"
 	"golang.org/x/exp/maps"
 )
 type safetensorMetadata struct {
 	Type    string   `json:"dtype"`
 	Shape   []uint64 `json:"shape"`
 	Offsets []int64  `json:"data_offsets"`
 }
 func parseSafetensors(fsys fs.FS, ps ...string) ([]Tensor, error) {
 	var ts []Tensor
 	for _, p := range ps {
 		f, err := fsys.Open(p)
 		if err != nil {
 			return nil, err
 		}
 		defer f.Close()
 		var n int64
 		if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
 			return nil, err
 		}
 		b := bytes.NewBuffer(make([]byte, 0, n))
 		if _, err = io.CopyN(b, f, n); err != nil {
 			return nil, err
 		}
 		var headers map[string]safetensorMetadata
 		if err := json.NewDecoder(b).Decode(&headers); err != nil {
 			return nil, err
 		}
 		keys := maps.Keys(headers)
 		slices.Sort(keys)
 		for _, key := range keys {
 			if value := headers[key]; value.Type != "" {
 				ts = append(ts, safetensor{
 					fs:     fsys,
 					path:   p,
 					dtype:  value.Type,
 					offset: safetensorsPad(n, value.Offsets[0]),
 					size:   safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
 					tensorBase: &tensorBase{
 						name:  key,
 						shape: value.Shape,
 					},
 				})
 			}
 		}
 	}
 	return ts, nil
 }
 // safetensorsPad returns the padded size of the safetensors file given a length n and offset s
 func safetensorsPad(n, offset int64) int64 {
 	return 8 + n + offset
 }
 type safetensor struct {
 	fs     fs.FS
 	path   string
 	dtype  string
 	offset int64
 	size   int64
 	*tensorBase
 }
 func (st safetensor) WriteTo(w io.Writer) (int64, error) {
 	f, err := st.fs.Open(st.path)
 	if err != nil {
 		return 0, err
 	}
 	defer f.Close()
 	if seeker, ok := f.(io.Seeker); ok {
 		if _, err := seeker.Seek(st.offset, io.SeekStart); err != nil {
 			return 0, err
 		}
 	} else {
 		if _, err := io.CopyN(io.Discard, f, st.offset); err != nil {
 			return 0, err
 		}
 	}
 	var f32s []float32
 	switch st.dtype {
 	case "F32":
 		f32s = make([]float32, st.size/4)
 		if err = binary.Read(f, binary.LittleEndian, f32s); err != nil {
 			return 0, err
 		}
 	case "F16":
 		u16s := make([]uint16, st.size/2)
 		if err = binary.Read(f, binary.LittleEndian, u16s); err != nil {
 			return 0, err
 		}
 		f32s = make([]float32, len(u16s))
 		for i := range u16s {
 			f32s[i] = float16.Frombits(u16s[i]).Float32()
 		}
 	case "BF16":
 		u8s := make([]uint8, st.size)
 		if err = binary.Read(f, binary.LittleEndian, u8s); err != nil {
 			return 0, err
 		}
 		f32s = bfloat16.DecodeFloat32(u8s)
 	default:
 		return 0, fmt.Errorf("unknown data type: %s", st.dtype)
 	}
 	if st.repacker != nil {
 		f32s, err = st.repacker(st.Name(), f32s, st.Shape())
 		if err != nil {
 			return 0, err
 		}
 	}
 	switch st.Kind() {
 	case tensorKindF32:
 		return 0, binary.Write(w, binary.LittleEndian, f32s)
 	case tensorKindF16:
 		f16s := make([]uint16, len(f32s))
 		for i := range f32s {
 			f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
 		}
 		return 0, binary.Write(w, binary.LittleEndian, f16s)
 	default:
 		return 0, fmt.Errorf("unknown storage type: %d", st.Kind())
 	}
 }
--- a/convert/reader_torch.go
+++ b/convert/reader_torch.go
@@ -1,47 +0,0 @@
 package convert
 import (
 	"io"
 	"io/fs"
 	"github.com/nlpodyssey/gopickle/pytorch"
 	"github.com/nlpodyssey/gopickle/types"
 )
 func parseTorch(fsys fs.FS, ps ...string) ([]Tensor, error) {
 	var ts []Tensor
 	for _, p := range ps {
 		pt, err := pytorch.Load(p)
 		if err != nil {
 			return nil, err
 		}
 		for _, k := range pt.(*types.Dict).Keys() {
 			t := pt.(*types.Dict).MustGet(k)
 			var shape []uint64
 			for dim := range t.(*pytorch.Tensor).Size {
 				shape = append(shape, uint64(dim))
 			}
 			ts = append(ts, torch{
 				storage: t.(*pytorch.Tensor).Source,
 				tensorBase: &tensorBase{
 					name:  k.(string),
 					shape: shape,
 				},
 			})
 		}
 	}
 	return ts, nil
 }
 type torch struct {
 	storage pytorch.StorageInterface
 	*tensorBase
 }
 func (pt torch) WriteTo(w io.Writer) (int64, error) {
 	return 0, nil
 }
--- a/convert/safetensors.go
+++ b/convert/safetensors.go
@@ -0,0 +1,309 @@
 package convert
 import (
 	"bytes"
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 	"regexp"
 	"slices"
 	"strings"
 	"github.com/d4l3k/go-bfloat16"
 	"github.com/x448/float16"
 	"github.com/ollama/ollama/llm"
 )
 type safetensorWriterTo struct {
 	t *llm.Tensor
 	params *Params
 	bo     ByteOrder
 	filename string
 	dtype    string
 	offset, size int64
 	repacker     func(string, []float32, []uint64) ([]float32, error)
 }
 type safetensorMetadata struct {
 	Type    string   `json:"dtype"`
 	Shape   []uint64 `json:"shape"`
 	Offsets []int64  `json:"data_offsets"`
 }
 type SafetensorFormat struct{}
 func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
 	var tensors []llm.Tensor
 	matches, err := filepath.Glob(filepath.Join(dirpath, "*.safetensors"))
 	if err != nil {
 		return nil, err
 	}
 	var offset uint64
 	for _, f := range matches {
 		var t []llm.Tensor
 		var err error
 		t, offset, err = m.readTensors(f, offset, params)
 		if err != nil {
 			return nil, err
 		}
 		tensors = append(tensors, t...)
 	}
 	return tensors, nil
 }
 func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) {
 	f, err := os.Open(fn)
 	if err != nil {
 		return nil, 0, err
 	}
 	defer f.Close()
 	var n int64
 	if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
 		return nil, 0, err
 	}
 	b := bytes.NewBuffer(make([]byte, 0, n))
 	if _, err = io.CopyN(b, f, n); err != nil {
 		return nil, 0, err
 	}
 	var headers map[string]safetensorMetadata
 	if err := json.NewDecoder(b).Decode(&headers); err != nil {
 		return nil, 0, err
 	}
 	var keys []string
 	for key := range headers {
 		if !strings.HasSuffix(key, "self_attn.rotary_embd.inv_freq") {
 			keys = append(keys, key)
 		}
 	}
 	slices.Sort(keys)
 	var tensors []llm.Tensor
 	for _, key := range keys {
 		value := headers[key]
 		var kind uint32
 		switch len(value.Shape) {
 		case 0:
 			// valuedata
 			continue
 		case 2:
 			kind = 1
 		}
 		name, err := m.GetLayerName(key)
 		if err != nil {
 			return nil, 0, err
 		}
 		shape := make([]uint64, len(value.Shape))
 		copy(shape, value.Shape)
 		pad := func(s int64) int64 {
 			return 8 + n + s
 		}
 		t := llm.Tensor{
 			Name:   name,
 			Kind:   kind,
 			Offset: offset,
 			Shape:  shape,
 		}
 		t.WriterTo = safetensorWriterTo{
 			t:        &t,
 			params:   params,
 			bo:       params.ByteOrder,
 			filename: fn,
 			dtype:    value.Type,
 			offset:   pad(value.Offsets[0]),
 			size:     pad(value.Offsets[1]) - pad(value.Offsets[0]),
 		}
 		offset += t.Size()
 		tensors = append(tensors, t)
 	}
 	return tensors, offset, nil
 }
 func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) {
 	f, err := os.Open(filepath.Join(dirpath, "config.json"))
 	if err != nil {
 		return nil, err
 	}
 	defer f.Close()
 	var params Params
 	if err := json.NewDecoder(f).Decode(&params); err != nil {
 		return nil, err
 	}
 	params.ByteOrder = binary.LittleEndian
 	return &params, nil
 }
 func (m *SafetensorFormat) GetLayerName(n string) (string, error) {
 	directMap := map[string]string{
 		"model.embed_tokens.weight": "token_embd.weight",
 		"lm_head.weight":            "output.weight",
 		"model.norm.weight":         "output_norm.weight",
 	}
 	tMap := map[string]string{
 		"model.layers.(\\d+).input_layernorm.weight":                    "blk.$1.attn_norm.weight",
 		"model.layers.(\\d+).mlp.down_proj.weight":                      "blk.$1.ffn_down.weight",
 		"model.layers.(\\d+).mlp.gate_proj.weight":                      "blk.$1.ffn_gate.weight",
 		"model.layers.(\\d+).mlp.up_proj.weight":                        "blk.$1.ffn_up.weight",
 		"model.layers.(\\d+).post_attention_layernorm.weight":           "blk.$1.ffn_norm.weight",
 		"model.layers.(\\d+).self_attn.k_proj.weight":                   "blk.$1.attn_k.weight",
 		"model.layers.(\\d+).self_attn.o_proj.weight":                   "blk.$1.attn_output.weight",
 		"model.layers.(\\d+).self_attn.q_proj.weight":                   "blk.$1.attn_q.weight",
 		"model.layers.(\\d+).self_attn.v_proj.weight":                   "blk.$1.attn_v.weight",
 		"model.layers.(\\d+).block_sparse_moe.gate.weight":              "blk.$1.ffn_gate_inp.weight",
 		"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w1.weight": "blk.$1.ffn_gate.$2.weight",
 		"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w2.weight": "blk.$1.ffn_down.$2.weight",
 		"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w3.weight": "blk.$1.ffn_up.$2.weight",
 	}
 	v, ok := directMap[n]
 	if ok {
 		return v, nil
 	}
 	// quick hack to rename the layers to gguf format
 	for k, v := range tMap {
 		re := regexp.MustCompile(k)
 		newName := re.ReplaceAllString(n, v)
 		if newName != n {
 			return newName, nil
 		}
 	}
 	return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
 }
 func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) {
 	f, err := os.Open(r.filename)
 	if err != nil {
 		return 0, err
 	}
 	defer f.Close()
 	if _, err = f.Seek(r.offset, io.SeekStart); err != nil {
 		return 0, err
 	}
 	var f32s []float32
 	switch r.dtype {
 	case "F32":
 		f32s = make([]float32, r.size/4)
 		if err = binary.Read(f, r.bo, f32s); err != nil {
 			return 0, err
 		}
 	case "F16":
 		u16s := make([]uint16, r.size/2)
 		if err = binary.Read(f, r.bo, u16s); err != nil {
 			return 0, err
 		}
 		for _, b := range u16s {
 			f32s = append(f32s, float16.Frombits(b).Float32())
 		}
 	case "BF16":
 		u8s := make([]uint8, r.size)
 		if err = binary.Read(f, r.bo, u8s); err != nil {
 			return 0, err
 		}
 		f32s = bfloat16.DecodeFloat32(u8s)
 	default:
 		return 0, fmt.Errorf("unknown data type: %s", r.dtype)
 	}
 	if r.repacker != nil {
 		f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
 		if err != nil {
 			return 0, err
 		}
 	}
 	switch r.t.Kind {
 	case 0:
 		return 0, binary.Write(w, r.bo, f32s)
 	case 1:
 		f16s := make([]uint16, len(f32s))
 		for i := range f32s {
 			f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
 		}
 		return 0, binary.Write(w, r.bo, f16s)
 	default:
 		return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
 	}
 }
 func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
 	switch len(params.Architectures) {
 	case 0:
 		return nil, fmt.Errorf("No architecture specified to convert")
 	case 1:
 		switch params.Architectures[0] {
 		case "LlamaForCausalLM":
 			return &LlamaModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		case "MistralForCausalLM":
 			return &MistralModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		case "MixtralForCausalLM":
 			return &MixtralModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		case "GemmaForCausalLM":
 			return &GemmaModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		default:
 			return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
 		}
 	}
 	return nil, fmt.Errorf("Unknown error")
 }
--- a/convert/testdata/Meta-Llama-3-8B-Instruct.json
+++ b/convert/testdata/Meta-Llama-3-8B-Instruct.json
@@ -1,313 +0,0 @@
 {
  "general.architecture": "llama",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "llama.block_count": "32",
  "llama.context_length": "8192",
  "llama.embedding_length": "4096",
  "llama.feed_forward_length": "14336",
  "llama.rope.dimension_count": "128",
  "llama.rope.freq_base": "500000",
  "llama.vocab_size": "128256",
  "llama.attention.head_count": "32",
  "llama.attention.head_count_kv": "8",
  "llama.attention.layer_norm_rms_epsilon": "1e-05",
  "tokenizer.ggml.model": "gpt2",
  "tokenizer.ggml.pre": "llama-bpe",
  "tokenizer.ggml.bos_token_id": "128000",
  "tokenizer.ggml.eos_token_id": "128009",
  "tokenizer.ggml.merges": "d0cbac1fcc9dcf03724b8db5c9bfb593ae1cf68fb9bc72eb1d15274dcbbf618b",
  "tokenizer.ggml.token_type": "d70a88809fd7da6f1f028622685cd64268a7a922c5d343c96f25b66327358978",
  "tokenizer.ggml.tokens": "765b529dbcbc42dd202ce657341c63807b51f3b07e09898f6aa6196326865d5a",
  "token_embd.weight": "b53102a11d9064bbd404833e3464b1b13e08ce73300b442312cccde2f19b2698",
  "blk.0.attn_norm.weight": "7318df3cca9e8d153ff0a503026a1265e63d20b2a8c1dd7a2769585082b5d1ee",
  "blk.0.ffn_down.weight": "b950806a1fc722c9fad7fd0b20c3c0a7fb50f14395e1e7663a590bfd62e20900",
  "blk.0.ffn_gate.weight": "e73e580af6d4f08e060a74a3c25efdf5d3bed99e183d95a5a85ae859014839fd",
  "blk.0.ffn_up.weight": "c8158af679ef99746da1befb67eebb19489e0bbe6ce7d97e13e348508244e516",
  "blk.0.ffn_norm.weight": "7ec69c3c31e95e49a3359003b0033f6b9e85561a3e3fd83e7476661ecdd756bb",
  "blk.0.attn_k.weight": "2732303257bac969b4964e0e32ec08b5a7f5c031bb02bf6ac4467b3ea0ebcf1e",
  "blk.0.attn_output.weight": "ecda1d43b4ccc91cd5b366d7e7a275353990ac78561a07c83d9c77031aba12dc",
  "blk.0.attn_q.weight": "569b1f5faf92b6f00910cf7effb2d5862f91038ce5c3b0019fc10e5d79fbd5e1",
  "blk.0.attn_v.weight": "aa8416c5ef7e32fb54a1f20d6ac651656845d4af240564b397c39bd83e06e3b8",
  "blk.1.attn_norm.weight": "03327e02862908c2a44b2f52decdb924bf4201f400b46f8037a9cb2e1d7a61ff",
  "blk.1.ffn_down.weight": "5a83a87603f38c99f8e1e370a2d5f967bb45ac51d881a609304a7811027321e0",
  "blk.1.ffn_gate.weight": "31da0572c79e655186c721c231376f85e56cdcc6257c28d08c8c5b40d5c22b40",
  "blk.1.ffn_up.weight": "e0c811d64ca155c8de10a868e72015d43888834804614ee1aa2953129ffbc90f",
  "blk.1.ffn_norm.weight": "5861f313d6137d6f0f904d423df47fffc6069e224ff746e1b637ac9c7f0af862",
  "blk.1.attn_k.weight": "5fbbec0acca6457b9416ebdcd90e526885d0224537b7628f6be376a7f275313d",
  "blk.1.attn_output.weight": "b237c9763fa3f75166a6f70b70f1566e77d0d89dfa164ed1b3137393e90575c3",
  "blk.1.attn_q.weight": "c0a9cf4a98b4882b16f3eb2b49d933793dcc5357abb246fd3fe3134ed2b12e1c",
  "blk.1.attn_v.weight": "96867111727200cac1af7865189dd41fd62b47584e5e5f33a91f1d34509cbd40",
  "blk.2.attn_norm.weight": "f392f8a88ee3a95b1cc19c40dd4ef66317037b0faaa1800f610779e129ee0539",
  "blk.2.ffn_down.weight": "73823eef46632aedcc8c1cb08a736b6aa97ca97842cd1fdfc5567d8dec459662",
  "blk.2.ffn_gate.weight": "f4909ae19fc3848b00bb8b9050122e74f8e903b89e22937036f4cc9fea20a718",
  "blk.2.ffn_up.weight": "16f4904a3d814ea68f00519724fc4943e48444a84c786bda39aa5efc298a7d84",
  "blk.2.ffn_norm.weight": "e3ccdf56e75cb969f6f69c39caf6daf7c4e70e89e25df0f4d2e4bc60e159aafe",
  "blk.2.attn_k.weight": "c3beb1e0a11bcf007ef0f0d8f6bdd3082d8b29090cd29597846b5d51e308a8e5",
  "blk.2.attn_output.weight": "bb9f66c32cff51154fea92933c2cd62549236f8cb1a767f9ef28d3f99809b343",
  "blk.2.attn_q.weight": "8eba394132eef2a05c5a92d62d2376000f7948448d7a2dc74e6b608203add20d",
  "blk.2.attn_v.weight": "88f61f77c53567c617db3eef8f30621109a750e679f6784f7911739bd42c2f02",
  "blk.3.attn_norm.weight": "7b996675b7ca75fa24107b3ebe0788653ede0f49ac83b8659d71ff54d591f81a",
  "blk.3.ffn_down.weight": "2cb332bc05e4821962fdc9dcbcc7cc12630f32117711b687d18fb53c0bc4fbf4",
  "blk.3.ffn_gate.weight": "340b387c7f208c8f0a6db904ef8d87c1e84b7d6ad57177abd32d86c8d18b760f",
  "blk.3.ffn_up.weight": "07484433f8a7ee061c55aa0de2ecc009f769b0617c9c0ec096e9bb2946df9f0e",
  "blk.3.ffn_norm.weight": "4f1a4ade36b393af341240bc894a2aab09cff7e4d56dc4658445deb107f9371b",
  "blk.3.attn_k.weight": "483dcd96acb4528df84b9842970994630dbd82b8715ace394aa8b39fcf8d6291",
  "blk.3.attn_output.weight": "beaff0810687923585642ee11d929cbf3b43dc6f87f30ddb552c222ab57bdbb3",
  "blk.3.attn_q.weight": "0739355002f6fce520863add697e0ff25fc88215322dc3f993be7bb68dcce7e8",
  "blk.3.attn_v.weight": "c216d17b6d90ee3e07f82598b8161fae34de2f392dbb0f745b682b578c324767",
  "blk.4.attn_norm.weight": "91ab405bc4ba15bf63af233f266aa43aaab43789a9e6596e14a357c2ac7df217",
  "blk.4.ffn_down.weight": "620f34ee75cdc73aecb8949af5fbb0d2437fd81422b6d8eb7acfc52addb9fc68",
  "blk.4.ffn_gate.weight": "f6feec7bc9acadf35ec22532f8998d8e50f31afedabb19263590dcf8b9a92eee",
  "blk.4.ffn_up.weight": "4a72af7cd28fd07b038f6cc4406678d120517280236ea85d9e76eff40ab2cc22",
  "blk.4.ffn_norm.weight": "1805b37b44d5d682bdbd2fadeafb763ee001617d7870848cc487079ee34b21f9",
  "blk.4.attn_k.weight": "a1e4f9d97cdf4c1b0d177cf00c4e32d1be30c1984a239b3c9bd73f8848888853",
  "blk.4.attn_output.weight": "a1547e2497c423b0aff0eee71d9300d6fdf4e4986679418b6e637b69a9a6720b",
  "blk.4.attn_q.weight": "0677483a9264ea6803d03d304d87a54632242cb516e8b76b6e3e8284c2f4de04",
  "blk.4.attn_v.weight": "02691ba3af344fcc1969428ab0df811ac94aaa2fd91b0dc4ec1ac0a58806980d",
  "blk.5.attn_norm.weight": "ba9c028335e5c895b87a5bd1448ca429248f9746ed97bdcb8679923206117156",
  "blk.5.ffn_down.weight": "ccfdc9006acad1940a6bc05042a3947f1066acd671e0bb53b7684e9eea9ef5c9",
  "blk.5.ffn_gate.weight": "623157679f1e742ccc3807c0b0153ddc8450104de75ec62f1370ec3807c09cf4",
  "blk.5.ffn_up.weight": "05748804c65091f963729b58b085f58351891cac8a2861f5eae26b06aa60b2a0",
  "blk.5.ffn_norm.weight": "84bae55af2efc8b8429f09056c8c04990c466dae31cb3f9356038b8957f1b406",
  "blk.5.attn_k.weight": "8c766180c726b037d587fc52371de6e3307140c52409011609d1225624b6a3eb",
  "blk.5.attn_output.weight": "490b582b3b1dc151ae55aee8b6743dad6c01fb49e43afefb6e68394b74be3d73",
  "blk.5.attn_q.weight": "6f7b8ca4d9025ec836a44bbcca46be30c66b471a9fb62943ddff8288b3731409",
  "blk.5.attn_v.weight": "9f70df3ba00c9e723214b3da83ff435a2163fff5915f75515c9664c05c866c27",
  "blk.6.attn_norm.weight": "1a4a66613a682df6f061fc7c4d986f9f7e9175b62f0c42fc1ef31db536bd5942",
  "blk.6.ffn_down.weight": "c56f25e4e49b443dbc82d88311ee63bc1f5002cc67e52f4787fd5f003aedeac1",
  "blk.6.ffn_gate.weight": "31a5cf1aa9b831a81588d508550f51fc425f9517c43254d4ef7096d38029cf04",
  "blk.6.ffn_up.weight": "ce135f3a1163e0c9297a615bdbe68a67ead21edce8debbfa9f6e15e6af8d4c94",
  "blk.6.ffn_norm.weight": "4e328ce0648c94e732bc40501858ef6262ad1161e2e407b0cdcf4813fa9d45d8",
  "blk.6.attn_k.weight": "1eb1c4c9f9c4c7ff7f5429075e0dc6a7782bed55109fa88df209a817dd8ef960",
  "blk.6.attn_output.weight": "3d32986b56873b88655ee1edabdd413fdd9ab18b82108c9ce90bdbc2d3a6f3a3",
  "blk.6.attn_q.weight": "8432f583b3a2809c99c393f9beb077cb0534dd5d247c17108f2986cadc6651f6",
  "blk.6.attn_v.weight": "5045381513815bb91839dbac8335ffe49bbc7b0008369de7ea97eb676c5e2b36",
  "blk.7.attn_norm.weight": "3dabd003638ec2499bfc8a48c49eef34276caab4fe76894eb963207848c2fdaf",
  "blk.7.ffn_down.weight": "194fae858608bdcffd235be59ab119d0b91c8549f864ea06dae69249e099935f",
  "blk.7.ffn_gate.weight": "00b24c29c30246892bce0791be804a89701d4c1332777e0bcdad5d9d5666604f",
  "blk.7.ffn_up.weight": "44d7082a5280080c90cef9e19d410391de34f212ca0736377769b8ddd0c82d5e",
  "blk.7.ffn_norm.weight": "21fe8a7fd6911c64e0d15a788b3b4cb6d71dd6ec51de65f760ee89afbb6ae53e",
  "blk.7.attn_k.weight": "57a149eec5f6744a9526cd3925ac073f9d12db0fbcb5afe042ef4dc846458c44",
  "blk.7.attn_output.weight": "0e9c28a3e81a2880251ce5eed77bcb8be8aaa1a51c9cb6de820b47ed83849fc2",
  "blk.7.attn_q.weight": "15ee75263ee4e2a43eb322bc159ae004bb7d77e3a7e63ee4ddab700430693fff",
  "blk.7.attn_v.weight": "440aa970bba4bff429fd7b7b1de21f2ad14fb2952b776cfa4acee68d7c6e9b8f",
  "blk.8.attn_norm.weight": "af5b44825633c42c1ae964c82bb2be6a242d3a751f0a91f1bae4f593e8f5b6ec",
  "blk.8.ffn_down.weight": "b11c14c76adca94fa200496dd2c10743becb23aab6642443ef1ae6d8710edbc1",
  "blk.8.ffn_gate.weight": "7bb03d3325bf8637ae2fa1296b0651356515578d46a7c5ca65c7a923d7de27bc",
  "blk.8.ffn_up.weight": "b956ef0a0669b5a9c9bf3a8da2d1c24f52d331cfb7354f6d7c51bd65be355e30",
  "blk.8.ffn_norm.weight": "c78c3d748302edfef76f71ea5cb2055c94352122eee8b9b1173779a1814d224e",
  "blk.8.attn_k.weight": "c0fba6a596ed9c1c32a7055c31a935a8b31e42b77282ee47c1f03ee3bde736b5",
  "blk.8.attn_output.weight": "83cf9947080c5d8d571f04a842bc3dcfe7bbb0195fb25b346e22635e8649f2d4",
  "blk.8.attn_q.weight": "47409350a576b333d97b7c877d69f47f46df504f3765102dfc0be9e521c7ecd6",
  "blk.8.attn_v.weight": "1999dff91404fdcf1ecb34d9eaaaa9244ec7658a74dec8feb7cfd1fddba0347e",
  "blk.9.attn_norm.weight": "1e6e29d5c3889ab4e1b0a5b9998cba60179b0f1fca133515df49cbc19d092593",
  "blk.9.ffn_down.weight": "acb898a6490adff592e10b4c62d70edc5941661ee6da44658500e9205357c8e9",
  "blk.9.ffn_gate.weight": "4cff63013593aadc3ffbaaa6ed70ffdba1224cd43c3644bf6f4162b5ac1ab542",
  "blk.9.ffn_up.weight": "f985b5a2d6cf4fe32c7256301c3c89b8ad22b59e516342c52da42d8110766a4e",
  "blk.9.ffn_norm.weight": "0d659c538bc6b21ed0018f107ab674a7424a00a42946c80e07208b479b21918f",
  "blk.9.attn_k.weight": "f67611d888780d1b38c1c146b361c65310c8183bdf64fd73e2259985c6e8517f",
  "blk.9.attn_output.weight": "f12ca1fa62a02ddc3f77f798bfb5707e0c50bf18ee0eaa67025521a98355f26b",
  "blk.9.attn_q.weight": "3865185f4361a645b086ad47b72904c095313fb1c624e511647bf1a7dfc1c476",
  "blk.9.attn_v.weight": "92125bbfed63544ab56052bd1e4aa453bbf34c795249ee54cde54907c8c6d1d3",
  "blk.10.attn_norm.weight": "5d6bfbe545bcc2fcb2fc75c68f64b1f4c918badaf53e0156fe2d88aa977b2f94",
  "blk.10.ffn_down.weight": "1dd9da8b0d2696ab5531fbca8a29c7d67567620a9d3e5fc2a19ec5d7e4c6cc8a",
  "blk.10.ffn_gate.weight": "6e55e7f014edaebda0ac6819a426221d3b025c27312a2e18cc5806f31e3db226",
  "blk.10.ffn_up.weight": "d80dde54af5db51241345ee8d64c1972608644f4deeac1e8195dc423bf27474a",
  "blk.10.ffn_norm.weight": "f6ca65951d58ae3379eee8247bec34ebd0db05674cc9295593573841b8a55df3",
  "blk.10.attn_k.weight": "b58e350bd6b49aba0fba4e4dd6865de3a2a0651ab865dbf2419b627b53ffc187",
  "blk.10.attn_output.weight": "6b26a986e12fe66ec286a21d7d5af5eaa1bfe6f2bf502165d270e4497235a54a",
  "blk.10.attn_q.weight": "3440e0e5b7e0d1e426424ae5a33f4e057be623249e9035ea12e57dbe5d3893c4",
  "blk.10.attn_v.weight": "ebfadcfe14bcd6dee933053df0a67e12e7a196d5cc45728c1ffb2a2daedd5ca2",
  "blk.11.attn_norm.weight": "3ed057b9576cd2de84507ef64c7646dc478c651efca4c2024cbe91a4f3fbf0bc",
  "blk.11.ffn_down.weight": "8ff1c2487d22f5c499761e4eb721418f141f960160d0bab779595a34e4d68898",
  "blk.11.ffn_gate.weight": "9c74e4507c7e45bf39b7cc7402198cd1dd77e3fff8c625b0413acaeb16efeb9f",
  "blk.11.ffn_up.weight": "4367158007161d29939e00a322bb6776016e43f648a94f9b08a96a477aae75be",
  "blk.11.ffn_norm.weight": "1cc0288c1491072121f4c9a0af20be0e13af49895696a3320e4fcac608768de3",
  "blk.11.attn_k.weight": "066f5b3c144fce1366835e1ebf376f768b333b8ae29f5b478c42d1d0c809c855",
  "blk.11.attn_output.weight": "e0d9f3d3f2c54aed59c02713ea4fb562799ddbacbe67ca3998dfc887bc44e47b",
  "blk.11.attn_q.weight": "28d3ecc8a88cb3815e89a7f7a7d043da7a71f702b337a126e4d3a2ac1cd6370f",
  "blk.11.attn_v.weight": "7c5cdef10ee73bca0a3b9f6ece5f0a0155664e0ce3d8de90ccdccfab5545e5e7",
  "blk.12.attn_norm.weight": "973b133301a1af760cd7b3a7955371ea0a750808b442deb6adaf7b98482bd0c6",
  "blk.12.ffn_down.weight": "d6c87b4b4ca03f75546ddd6a9e7fca720585a309188723c1ace8122438d4b200",
  "blk.12.ffn_gate.weight": "2189a6e0cab1540bd05d6089b922aa8fd694be51255654933c165f302a0c955f",
  "blk.12.ffn_up.weight": "5affbec19b58d092b9305721e3552481fe2eff51269ea3ed91cda3b9ef84d4df",
  "blk.12.ffn_norm.weight": "f650fd42a34e950f758b4a130e7b8b1a712b1dcbede0291bb8edde47aaed0ef6",
  "blk.12.attn_k.weight": "59b1e86f10450a7cc188beefc0856d2dcf44e8d7fdd9cd8859c30ec1ebaf24b6",
  "blk.12.attn_output.weight": "446b0d36b2f66bd72a2323f4f4e9d85a0f621e9a58872e89a27248d6b1123238",
  "blk.12.attn_q.weight": "3ed6bfd39f040301ed99fad882d3e569769d594259f9948445bef0e44ec881fb",
  "blk.12.attn_v.weight": "e73652cd5d0029b1931be3ba9d82508f6696dce5a29d085476a54fb7a2ddbabc",
  "blk.13.attn_norm.weight": "491b85278c0bd67bd31b9b8a9720902c244bd067e53a4a03641b7c0994782e82",
  "blk.13.ffn_down.weight": "ad71cc248a85e9ced49307a24a9bfae01d387e979a7689c82ff59998e09741f3",
  "blk.13.ffn_gate.weight": "0a55984d53971fab97575ee0ef5882013be7fdecfa76e3fbebb5dc85a07a14d4",
  "blk.13.ffn_up.weight": "378b697b35e2e53c0de98e8e29b73d42ae3ec112ec16129aa5997a9e2f3b5943",
  "blk.13.ffn_norm.weight": "f8aff2f69ab286210fad45a62b03f8d10b38f96a420d7baadf6b95d7b0b0bcd2",
  "blk.13.attn_k.weight": "25ceb841afb1034831bea7f4d6a6c578def2ce4d4c412c780ef147dc9a598360",
  "blk.13.attn_output.weight": "a242b322889c6bdaa14b67a7bab593db39df8eea3721638ef639abbb74d482e3",
  "blk.13.attn_q.weight": "d80be9945a369439e835c55cfb0e97828b8a66bb7ced534d9059c92487bf20a9",
  "blk.13.attn_v.weight": "ac33274cf9b67979d9ecdc967a55175afe0c9c4aeeff6391433cd9840c818706",
  "blk.14.attn_norm.weight": "12a1e1091de5b2da12c9e7c0b1c8e6f09ce2a749733cf7d5240445b8e21cd093",
  "blk.14.ffn_down.weight": "cfd41965c88266e32bc2dcdadda512499c35519e8686fefb9a7f249ab2291eb5",
  "blk.14.ffn_gate.weight": "8dcfe774f07a095c7c6cf0a901c9df70d938bad7b5ba347fbc8f694e7603c0d1",
  "blk.14.ffn_up.weight": "c7995577fe4a72ea0fb17c4a7b6b87b959072bbfdd5edacc6c367d43465809ae",
  "blk.14.ffn_norm.weight": "81c41ebde41739e7016ffec31d2256217b825dc3cae049a935f5f61a60d22003",
  "blk.14.attn_k.weight": "fb708bdebe4384f5c4b479c110028554f4d122f166b8091eda7d8d65e6780eb8",
  "blk.14.attn_output.weight": "f5295caf2dfdc60553dcabe17537a80577e8b153c902247daac058df23542514",
  "blk.14.attn_q.weight": "c12b7a3601c68c63ab5dc9d2599ebf3f3a10abc2c59d3a2126fffd5818f2763b",
  "blk.14.attn_v.weight": "1ce968d9149bf0d5e237d52cc6d6433565b4bbf03252a736262bb00a2b34a687",
  "blk.15.attn_norm.weight": "266fd2c36d7dcefc6b6bb7f1c9374c41f2bab5d6c84a063b6f91c4f682dad3c4",
  "blk.15.ffn_down.weight": "6154886e9ef0a6cc08ab0d264a35f497e6f0987efdac992ed04e87088bea7801",
  "blk.15.ffn_gate.weight": "183d9fd3c1b5657840099053d2fd3f72ad953b1de523296159b7761f20491a76",
  "blk.15.ffn_up.weight": "51546d4498842ae2340ee226a0888d5f61e7d2ca4d052dfa06a77b0451242d3d",
  "blk.15.ffn_norm.weight": "ef7378091a41a25a5f58bf1bf9d3bc64ea562e7f421e1c232b1f177c30fd3500",
  "blk.15.attn_k.weight": "8d556ab8d9639324141774999b6eed0e91d7ee645bf3e7a3dcd200b2e7a00751",
  "blk.15.attn_output.weight": "54aa6ba87def7cbe18b0c6ab3aff5c351cb3b6ca4a0d7b2cd5f75a1312991429",
  "blk.15.attn_q.weight": "10731b0dc031ea8e0ef37bd7f010e0a78518a10a6df05a8bae48e3148b73ef3e",
  "blk.15.attn_v.weight": "cbbe50c2ed7224866d3cf9b489c599f3ec41a4ea1aa3181e9f4e87e1fa0cefec",
  "blk.16.attn_norm.weight": "387058eb39d4b28c04cf1368247417f1faeae8ae79d894c9f293457e0eaa00b0",
  "blk.16.ffn_down.weight": "2cb26ccee585e933401ad5c82ed36ddacb3289efa0b28f8cf91b020ffbd9c333",
  "blk.16.ffn_gate.weight": "d745985efb5bab42304e5d509024631efe35f92f2b2ec4931ead6db97ca9727e",
  "blk.16.ffn_up.weight": "7a67bd195e0642828ca36eb7818149bb70c2c25f82de07e2b5807c520daf540e",
  "blk.16.ffn_norm.weight": "7cefd061c8182482a89272f8a4e88a954b12609a62716923ca1cb3593b1c1651",
  "blk.16.attn_k.weight": "d7968a2de67e755b4533e061aaad1cb62f8882af92dcad67f99d6d5112513439",
  "blk.16.attn_output.weight": "9e9ab5788272ca3394ea89eadbce8c86ecc3fd75b7899184d6191c134ad9aae0",
  "blk.16.attn_q.weight": "ef81c261b536c1a3a093b33f44cf2d42b86e5aa2d821674f07a0c80e992ed925",
  "blk.16.attn_v.weight": "aef38e7958301b4a437cbdd2fbae6197f677b09269ec1eaf63188cd5da428d25",
  "blk.17.attn_norm.weight": "28f6b289f1bc3131041e9f791b7a2a3a48baee0dfea27bf7051ebbb7ed364d80",
  "blk.17.ffn_down.weight": "1a502829aafc6a9bd6bc81f12573bf8632d5c8c659f0dfb13c8b2411f3b1ec05",
  "blk.17.ffn_gate.weight": "ddfd8aa0eb98846ebc9afe31366249159f46ae9815199dd70161527ed241ac4d",
  "blk.17.ffn_up.weight": "4211a3cc247071bd361b30de2131d02382f552855062bf3b3e004c17992e5d09",
  "blk.17.ffn_norm.weight": "647e5fa99a5b0d232af36d15816539f4d27e60a50a341b00aa88bb6e4474f8b9",
  "blk.17.attn_k.weight": "d9125ff33a19c502c0f8846433ffc24395048582fc2f463d34a0301a82156f02",
  "blk.17.attn_output.weight": "3d64fbb1cfef04444827f37c35fd9ad3413eb2165094d339ef89f00503f09de4",
  "blk.17.attn_q.weight": "e5b29424028f578beca385fd82e29f37adedf3037cd51e5889d5a1ffb0428ca7",
  "blk.17.attn_v.weight": "1809c5aaf2ac04c5d65539097564ad62796e87d24bb8b9ce5b095561a61d908a",
  "blk.18.attn_norm.weight": "99daca58d001c627523d3adfbca1d95f04e590382a326866544d57989d5f4835",
  "blk.18.ffn_down.weight": "84f30231ce6ca0f10227541dfc602d6418c1a210386b0c4926ef1656e7d4635c",
  "blk.18.ffn_gate.weight": "ca5bbe4468b541740e54f69b9e08fcc8e478c344b70551dab21b1206acfbaadb",
  "blk.18.ffn_up.weight": "0b3067b9dded31686dcfdc1e247eae3974a28a61ac59e9862758dbfaad64e8f7",
  "blk.18.ffn_norm.weight": "8154a102232dbc0f90ce77ae5c1ff8f26f8b6e4dcf326e9ec1645749669e7960",
  "blk.18.attn_k.weight": "25abb26021ccc481471a30e0d4cbeb7e1db29828417ec5136edeb93fecf09ac4",
  "blk.18.attn_output.weight": "d87d481d9b046b68efa06ccdd4ed8cbf61e692d61114b75b7fad5ed75f5d87b2",
  "blk.18.attn_q.weight": "cc6400379e15766992ff1293be79dc67682c28e9e15155a78109f4b64653b164",
  "blk.18.attn_v.weight": "45c75cb1dd496aea3173aafe2575b841dd1d02cbe010b3198099731eb98f531c",
  "blk.19.attn_norm.weight": "65389efc75297684773284ef8e5f8789a4504b636c9f33b8a32e0ee42499fa72",
  "blk.19.ffn_down.weight": "4eefab7e939f64a17e4a214ca3c77a6fa110d94f677e2d6401086f70fc538b04",
  "blk.19.ffn_gate.weight": "f1c0a59cafda66f466ab585b0b8b4861b58abe87a67cea1f6a488492242edfdf",
  "blk.19.ffn_up.weight": "c42d045eef588db4a0e56960a57e110e1ff92eb8041107d19899165fd3b90f17",
  "blk.19.ffn_norm.weight": "a8f33eda6d5d62ff5f333ad9771783caff556641f4e7df713451385676f441fa",
  "blk.19.attn_k.weight": "0bab5d9e9083492bfb05a5a3bb23b79c0e7b99ef6a6644817b4d57d5c453b8a5",
  "blk.19.attn_output.weight": "c99c551d70eafad0f7aea98fb6f9251635897168eb3895f76abf0d4ea3b3aa6f",
  "blk.19.attn_q.weight": "c98bde95627c3b54c9443813ca50b4e14f518319681db6bbf7b2332ba26e9a60",
  "blk.19.attn_v.weight": "ff3a490518cf64904db89ce0dc7d6eb89e870f1440e41883c6b55a221f82de84",
  "blk.20.ffn_gate.weight": "761f0e317229cafe9d3754048ab038a0a84e9a287b196ab65f633139f2d29aba",
  "blk.20.attn_k.weight": "45d13439b41066d282e8490a726785abf513605f46c79bd0c840f6419d27e790",
  "blk.20.attn_output.weight": "a3b958d84b4a097844179b7d55c18fd0e4f319cb15e918c6fde33b68de1bcac6",
  "blk.20.attn_q.weight": "127ab8e7d8c3f882874904196a02712bab42e6744fde45871b67350609d19f5e",
  "blk.20.attn_v.weight": "5f0ad2d14a8ae42dd3bbeccfb33295687a14055fa92c54bc946249373c1c9f17",
  "blk.20.attn_norm.weight": "77300b1755edc8c70089e0f45efa646056b9add7d8568b2324d2f3e62b64971a",
  "blk.20.ffn_down.weight": "ab93d0e075b42e9017b701a070d561e698050d90aac4b4b9919256fbe50c3204",
  "blk.20.ffn_up.weight": "4fd6628a07acc57a48d1ef83f81b7d7aa0bce569c1160a99d307284f8821322c",
  "blk.20.ffn_norm.weight": "2a9e46b9e48e8e55215de56592e1f189530037c1c94a1428e3d6f106c7f26fb2",
  "blk.21.attn_norm.weight": "4b3b5912c7bc61eb9da8e47d4651f896e85d9e59c4ecaa65df7acf3c21737298",
  "blk.21.ffn_down.weight": "7146f931663d93b8771cd84405cd4802ea6560d0729b0d6d44588203c095bc53",
  "blk.21.ffn_gate.weight": "b44ec5d64388fa40b90b3e9976d97a8b6800fa3b97584f32e64b03daffb8601f",
  "blk.21.ffn_up.weight": "0cf3643fd23c685e17062cd11e116e17ce57a405e5e78953bab94cd62fe48789",
  "blk.21.ffn_norm.weight": "4ef2cdb53da166df70b39f3e6b17af51848cfa5ea3c27ad6a1ae2a1bb1da1ce9",
  "blk.21.attn_k.weight": "5d40f32a706f670c19972b14176bf660d5b045e3637b110dbf8d7de4ff32101a",
  "blk.21.attn_output.weight": "18afaa916752ce16c9653ec0ec7e2fe60be55faa2aa5025d147be184adb75cac",
  "blk.21.attn_q.weight": "2621daa5f858931514a4b2f0fe8d81cf9b96f541e6af99bfa7539e9bde8e34ee",
  "blk.21.attn_v.weight": "63226dafc54c899bbce4aa49efceeedd8908e94faa613450fdda91f332b62864",
  "blk.22.attn_norm.weight": "cf3058daab4d2c04387e7d169d1553bb8e7358eea66285ec067703f6ce62043a",
  "blk.22.ffn_down.weight": "6a58d5fd220abdbac6cee7ba048abab794731af318f04982c2506df59413d0b3",
  "blk.22.ffn_gate.weight": "d5614535324b03c7b91727a903b2a72f8d07ad17f7aa8b61ea173cf9b895069e",
  "blk.22.ffn_up.weight": "ec20da3949566e93f66cabb67f8cd7eab399047ec6ebf5d43edfaf3669b82296",
  "blk.22.ffn_norm.weight": "84c82f38f53a649972a44466fc476bf764e064ce18de870291edc302f3700e28",
  "blk.22.attn_k.weight": "a3d2ecc37fde7c201176bb8abadf27f0d8ede9679a6034913e03d9db924fda12",
  "blk.22.attn_output.weight": "5a3b8bb433f43a387df43dd371bdf80ddfac986dfeaf38e9bac1d7a0ec6628de",
  "blk.22.attn_q.weight": "3a875cec661b4859f30a8fd2c866811184b25b68c9e36fe2663d299caf8b59c6",
  "blk.22.attn_v.weight": "8717a83b79035058dcfd3ef6f8e5b36e71d77379e5a239e1899eef8766fb7703",
  "blk.23.attn_norm.weight": "2b4a68a0a2f023dd646e4755c9bef17c2f631901154afd839edac7ac006ec99c",
  "blk.23.ffn_down.weight": "29499b1586c6fc4883c9b7a9c8cf388035146b5aecf90c5c4c8c8e082c71e7d7",
  "blk.23.ffn_gate.weight": "7d6554036d21c587b9b556428054f9c15cbef96d24b257f906fcef4ae38bd9c8",
  "blk.23.ffn_up.weight": "19761ecb288d6ebd44b681c4535661583b1e19dc29e96d0c007333cd8f00aacf",
  "blk.23.ffn_norm.weight": "37dc35500790a4ca33807b39cf7af65065e535dc25b9e94f3ed2759f61887ac9",
  "blk.23.attn_k.weight": "717547d00323817b0cb40a72ec5f8cf42ecd1f9e3e42715c2cc5e38f07fffffe",
  "blk.23.attn_output.weight": "a24786feb6a905fdf166d7500133757cbe494779d4ebcba9eb03046b319557df",
  "blk.23.attn_q.weight": "6a2c4a98f138b928d22136efa163562691d3b4ed526d52d46a2fa2694a8f3965",
  "blk.23.attn_v.weight": "c6e6081eb9c38a7fda023085957b460e9ea321e1fff408b38c2b58595c39979c",
  "blk.24.attn_norm.weight": "5e6283f891e538670425f3e244b08dc6f96f33dfa4aefa913f8eb17212421850",
  "blk.24.ffn_down.weight": "e09eb170f389deea0a4a1cbfdb52c12490768a2c60491b7bef8a4c445e2a08f5",
  "blk.24.ffn_gate.weight": "af29d815cf49a38fc2ebd0bf9b2dd9933d023a29f2d766981acb9a1b53f09117",
  "blk.24.ffn_up.weight": "36ccd9333426666de9d3088bd4dcdf5b624b09dca9e3a83a22fc0383f2d950fa",
  "blk.24.ffn_norm.weight": "a88e1692318826db6ac42582d182e51a3c698c655d0e21e04fa086318832d07b",
  "blk.24.attn_k.weight": "f7d61d6d1225289bcc502e3bbb0168b4584add0253218c1b77ac92ccef9a1c2e",
  "blk.24.attn_output.weight": "85a1363b3ccc87312094c2195022687c16b0dad7fafb9e80bb4ec474d53c29ac",
  "blk.24.attn_q.weight": "53482a2c008f42f4fad779ca323addc3712040149dfc12f782417756388a72bb",
  "blk.24.attn_v.weight": "67498272369af7dd10097c73b07f731b565cfc9a559e711cc0d526389e7b44e2",
  "blk.25.attn_norm.weight": "98dd617def5cb7825ee4833132ca2da2121245921585e1d9e36b93344adc321b",
  "blk.25.ffn_down.weight": "7fd477d6c50aed5f424a878dd284343379cffbee8a34c0b6e55100c8305fa13f",
  "blk.25.ffn_gate.weight": "f892c9806c8ec22e8aa746734ac9213428c534921cf161239e1d249fdb5d1ec0",
  "blk.25.ffn_up.weight": "528bed14c9bf9762f790525ee40412545221f4321d2a2323fa8e73c58b7643c5",
  "blk.25.ffn_norm.weight": "ca5831966672e7be6a578feeb631ec3570d3b5afe12860819ccb96e896ffc346",
  "blk.25.attn_k.weight": "610d3068cc9b20401f0c3a0efea39a279dd9f564fde19baf3403b2ec2319e4c4",
  "blk.25.attn_output.weight": "798aaf702e53b657265ac3b5e6caf3a0ab515bdadfeb1a3a156b4f3bfba76666",
  "blk.25.attn_q.weight": "8a7fa25248de83029fb97b51d036a01baebe31fcb4be121ab00dd8b7de209b10",
  "blk.25.attn_v.weight": "2a53d5e9f8a1218c66958c6388d3b37400a9af7956c785024ca44bfbc3c7d371",
  "blk.26.attn_norm.weight": "5f44fc043481eb0771f3e6d2420bcbcf73140afb9a9feb8eddb6575452acebee",
  "blk.26.ffn_down.weight": "944a60a409d0d5b6a851e33c69aca152454b691711a8b96f5bcc488772ab2833",
  "blk.26.ffn_gate.weight": "2a0ca4abb3de5593e6693d8be69b63d6d1a639855ac8332a75f520353f030c62",
  "blk.26.ffn_up.weight": "0b1df496163f9ac07bf89375d3eb441b51a81d41b47d769a04a61efc18dbe35b",
  "blk.26.ffn_norm.weight": "56b8dd046e9be6ea71f7efd80dbd14e7fb1aa020d3cd38e063275f3873fd12f8",
  "blk.26.attn_k.weight": "b1dabfabb970e6971c7ea6e53c63cf7ef56341e6a2edd9cf177785cad9af2f9a",
  "blk.26.attn_output.weight": "39532c7e836baad164a655fb97ec5114ea4da37ffba9fdea2684f6e4450e6f84",
  "blk.26.attn_q.weight": "8f48bf6aaa1252bc149e98af2be1777a5c0d2c3274c6d314171ea9344a41b604",
  "blk.26.attn_v.weight": "02fb145f7fd905133750e90571effacadddfd3f4966552dc59982ac3900ab8c4",
  "blk.27.attn_norm.weight": "654d168fc3cab716d91261f5719f180b7d697218401633b4878a759f1b5283f2",
  "blk.27.ffn_down.weight": "2823272bec3a1c12f02cc4cb24aa4031abd7e9dbe0b02676e2305b21671818f0",
  "blk.27.ffn_gate.weight": "b1a1d40cd02f97182cac17a79971d1934ee0daf3aa0bf11303568c636e208a64",
  "blk.27.ffn_up.weight": "ed62ec72a020d070e64eb7b50237b32213944727b5b2427f45d989f50df5fb2a",
  "blk.27.ffn_norm.weight": "c69649ac65d694b306a905dee8b03b89eec1ed188b1eaaf38f8e29d4b12e38a0",
  "blk.27.attn_k.weight": "cc57bbf413f1fd227128dc66efc8590c73634cbd6f96d01ec4878b5e7ca6a925",
  "blk.27.attn_output.weight": "cac407ad02361d53207b3c7e25ceab84dcb4347b8087055162e2efe14d11d84a",
  "blk.27.attn_q.weight": "0af18e07cee12015761c07c94407024f4f4d77d97bdb24163db0e16669e2cef3",
  "blk.27.attn_v.weight": "a1d08fbdfa40af773c5adcf93bd68b78a44ed144e3fc6bbeb8af02e937527eb6",
  "blk.28.attn_norm.weight": "f39a51f814512b040a1082143150e4a49ff730f85cef49d7f77fc79d83e91f40",
  "blk.28.ffn_down.weight": "74f29ed51055d1c1adb8f0660bbe538a27e016c65650f2d67efc6f1c84fa1b45",
  "blk.28.ffn_gate.weight": "ae48bb16487ded6781c60aafc0bf738fb4ae15729952906f247d216592ce249a",
  "blk.28.ffn_up.weight": "543009727718ac22f11ee4b17815f68ea6f15ba1f3e7ed5ecdb755cf6417565b",
  "blk.28.ffn_norm.weight": "b8f9e54c322079ff20a82b88948cdc2916c22c7db40b9a9ed6d3cbe89efb727e",
  "blk.28.attn_k.weight": "55d055ba653b728d6e784f9e013786fed07115c9fdf23367e3941386d5e77db8",
  "blk.28.attn_output.weight": "155101c03ddbf18f4fd0694bfc982f33c7bae25c9b087d6f5273c2bfbffcf2c9",
  "blk.28.attn_q.weight": "1ed19bfdd22e9c14eca014739982492e9516d411515a8585f65cf754d849e53f",
  "blk.28.attn_v.weight": "11ba854dd575c025d37256eee9041f6d1bd2b549a083d6409a09bfc1542913f3",
  "blk.29.attn_norm.weight": "02b0bf5e2fcefd11a153cc988c81ba672682e4844fcf6442423e21a0e10d566d",
  "blk.29.ffn_down.weight": "594bb692ec2779938721ff4748666ca8370e0e4fe85229503f616438b8884f5f",
  "blk.29.ffn_gate.weight": "8bedcf47e91dcb2cf4093de56b048ee411faab6ff472f89ab2c9c113a08e6967",
  "blk.29.ffn_up.weight": "e241a547b5fd6dfca8200b8141e21c1c487a96cbc4e5855f181a7ed1be91b642",
  "blk.29.ffn_norm.weight": "e63eba5e4c6b288bfd9f15e46e236086456c8b7f1f9c732c0b5de84962a2e7cc",
  "blk.29.attn_k.weight": "afe5979d5bcf211aebb526620f5974bcb0a2c39c8be71e815575c55d6385e3aa",
  "blk.29.attn_output.weight": "9c944ed44b124b014906fc240afd3b90aed56bbd9567f2eddfd5b7a685b3cb48",
  "blk.29.attn_q.weight": "e234e08e5c1bd9245a2edc8d63e9933b6b879f97c01392209cad4f55f05f3ada",
  "blk.29.attn_v.weight": "5cb8e3e5f954e775c5a5e4de7a9a62b17e9c6931bb0ff0e2f82c4126fd3e1a1c",
  "blk.30.attn_norm.weight": "a65483ee51a0b214144ec8a14f28ea5437586e9e12ebe342a57d1f8627ee12af",
  "blk.30.ffn_down.weight": "417959da77ceb33ead4271cbb9428b195196173a893c44e52880a7ec61b4856b",
  "blk.30.ffn_gate.weight": "a0d503ffcbe45dc927600bb98c9f6082487e65cb577ab545add400d666a87638",
  "blk.30.ffn_up.weight": "f8ab957b82ffcd10b21303cb5e866209b6fe95f827b1b94e9a949207952d12c0",
  "blk.30.ffn_norm.weight": "210c7ceb0514a9ef27b5d4d1b3aff6dde43f1af0345a050d71097940e0e73e03",
  "blk.30.attn_k.weight": "16861b9abcf5a3fe73c93d977ca45a1e6daa65be0fd85c2cff53486ce2033afa",
  "blk.30.attn_output.weight": "ca541fb2e57e2257118c35784845b0c731278af8db3036ac53d71aa1681fdbdc",
  "blk.30.attn_q.weight": "f7834917748e26bb456b945e230bc926c228e93696bc01fbc2b134bdeeac71a1",
  "blk.30.attn_v.weight": "9292783171dbe5eb689d17c9bda11e537f0e9b328fced6986c938d61ed590e81",
  "blk.31.ffn_gate.weight": "e4766a04bcd8f937ba883c6a144101e546747804ca66c35c97281d6ccb47b566",
  "blk.31.ffn_up.weight": "cc1e666116f7e6b06736db4aa4b81003c583f54f4d9200bfa48842249940e16a",
  "blk.31.attn_k.weight": "fc80b57557687504efae7d24265cb7dc39b8f826bb3d897a11783012dbedc44f",
  "blk.31.attn_output.weight": "215617f50a1f5d9b2250b82f3652b35a9e9aa0ad9ef2b485d73965a14b2b872a",
  "blk.31.attn_q.weight": "274b4f1dfb0bdec28632705677049fb3e327ce6d9e1f3baaad1560439039982f",
  "blk.31.attn_v.weight": "e641b8b926f9dfcbbf6b6da1c02555525ac4b1c306d96f20cfbba7d6662c4e56",
  "blk.31.attn_norm.weight": "b3243c361d4041ddb892ce6862dd5091f57d87357e3c67e177451b85d8baf34d",
  "blk.31.ffn_down.weight": "0a00cd3ecd5e91624a27f9e239b1de425d5ba3cfff82c256a11a4ad434abf3c2",
  "blk.31.ffn_norm.weight": "2a0d67ea2bb1303975712243f07273c92fce83baa11b1cd6d8e42e74ea3c810b",
  "output.weight": "768615f077fb797967844571c58b94d7c399d884d115be3ab4b0154504cae892",
  "output_norm.weight": "7cc5b7ce10e5082000fa00bfa68af8c7c5da218e59e2c41cf2f1499d40ca229e"
 }
--- a/convert/testdata/Mistral-7B-Instruct-v0.2.json
+++ b/convert/testdata/Mistral-7B-Instruct-v0.2.json
@@ -1,313 +0,0 @@
 {
  "general.architecture": "llama",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "llama.block_count": "32",
  "llama.context_length": "32768",
  "llama.embedding_length": "4096",
  "llama.feed_forward_length": "14336",
  "llama.attention.head_count": "32",
  "llama.attention.head_count_kv": "8",
  "llama.attention.layer_norm_rms_epsilon": "1e-05",
  "llama.rope.dimension_count": "128",
  "tokenizer.ggml.model": "llama",
  "tokenizer.ggml.add_bos_token": "true",
  "tokenizer.ggml.add_eos_token": "false",
  "tokenizer.ggml.bos_token_id": "1",
  "tokenizer.ggml.eos_token_id": "2",
  "tokenizer.ggml.unknown_token_id": "0",
  "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
  "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
  "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
  "token_embd.weight": "cde834ccac5e94324b25cb81b02d27312cac0c551b55a7e1d555d90bf6cb6e81",
  "blk.0.attn_k.weight": "458bfdd9715c66e017c2447b1ed3c582963a3111479314e664faad8c914f42be",
  "blk.0.attn_norm.weight": "e1fd60b95f713bae7b7e3ca933c64ae6c9cd1e8d808000204bbfdc19f0ba635b",
  "blk.0.attn_output.weight": "df13b6a157d9d4f96c53b012b3b9bcd207d0c94144cbd22ae3ec13bb07d6c373",
  "blk.0.attn_q.weight": "13b4126b4245bf06c915a93317c42b8174e05053535ec99dc576541e4cec7c25",
  "blk.0.attn_v.weight": "5b1781d3a341214511b27eb4e268674ea3ea829dbdf8ae5a6bb89b3c0b33fafd",
  "blk.0.ffn_down.weight": "49186f5d8148d316b07458841d13a2e66587f4af69b776188a809591ed9c070d",
  "blk.0.ffn_gate.weight": "4397e30ece09136f00f4ff84ff49e5241b765a374deb8c5a12e897e2bf73473e",
  "blk.0.ffn_norm.weight": "43260589aac3850a779bca3f9649f793bbfbe5db538361cb743b3830217f8287",
  "blk.0.ffn_up.weight": "fd7ac918240a07566f6967527ffca58fcf433a30b78fdd6d84b2136d4ebd9987",
  "blk.1.attn_k.weight": "209839566c7d235bdc20565a4766378b6ee8553133a5a3315abe8a85baa80712",
  "blk.1.attn_norm.weight": "58c52986f7c69784ba327cb7f350923420782bee17fa39b1fbd13839d4005357",
  "blk.1.attn_output.weight": "5067cc628449682665dfcf59b16e58fe2a9d2a81cb099f0fcd42f4f8670c6740",
  "blk.1.attn_q.weight": "f410f9f0dd5edc09401af597d02e2a4c727f1502ec3ec3898321617b36c6df6b",
  "blk.1.attn_v.weight": "d40fa49e07c102c0644e130e7909eaa93ed0d54e2edddc0759e721d58a4e4f5e",
  "blk.1.ffn_down.weight": "594b1eff6ed4defbdd819fabbe2d48764984f08878a860bdb808511d5a25b8db",
  "blk.1.ffn_gate.weight": "4cda97541e388a5bb607ce4cc8b3db1da7045830a630e7ba4d17807befcff346",
  "blk.1.ffn_norm.weight": "66c13d7481be65b97aa474735ddc9674f33d512ddda76fa6fb45c7464b09f1ed",
  "blk.1.ffn_up.weight": "1adc6de288ba4cc1237833ca8b4eb81107149842e38bc452e18e5cfe284338a2",
  "blk.2.attn_k.weight": "5420423559f236ab22d85a00849f31e0cc6e9c7dd879de724393d8cd2b379153",
  "blk.2.attn_norm.weight": "495fe1ab40cc52aa054ddd4f0c2d2790f4326c8d103296b1b38f3b1060db2a24",
  "blk.2.attn_output.weight": "ccb83e7085381f558bfd65588c525ad2671feddcbc3887afb4038ad9c7aac348",
  "blk.2.attn_q.weight": "2e8f77478392bc93c2a391f2e0f4a173a952bbab88a7aca099c6ee909726409a",
  "blk.2.attn_v.weight": "d64512590f3b7ebbb9e77c2eb97fbda90b00d45c944f2b174f03a2cb11007567",
  "blk.2.ffn_down.weight": "1de5084a05dcaa6b1bd926e83517dbe9ebe7fde79235fe56018b3028b1aa6397",
  "blk.2.ffn_gate.weight": "cbea526b557f49aad8c976973cf367fcd12175b900f551984f498b9e07e4b7fd",
  "blk.2.ffn_norm.weight": "530aa49b10c7eae08899d143409240deb95dae4e1d5bf78cea3b26393cff3ba1",
  "blk.2.ffn_up.weight": "13a5fc19b96b4dcc1e9bd01998c8272ebe52034c1933ed123a506b711fae9a5c",
  "blk.3.attn_k.weight": "1913b63a73305941d8cdc472e7f101c633d3357a78602eac0a4b49a744261075",
  "blk.3.attn_norm.weight": "9c11bed5ab41f4adbfdae4ead65b525c8f19443e656a8c61ba412a4e1ad1193b",
  "blk.3.attn_output.weight": "bb0b42c1d34779c5943272ed71f1dbb31ad8edd75f8bcd5c868f88505ac3a610",
  "blk.3.attn_q.weight": "3461a1fe4e49f5319ea047cae98ccdb46528a3ec23831183fe87610b48c94948",
  "blk.3.attn_v.weight": "82aa30be6a61526a41fb79bb28a2617416f5909f0477aa9e95e16be9370fcb38",
  "blk.3.ffn_down.weight": "68521011ae03f5e3b0966127111afa8ee9f2eaeeef8d3a0b86b633e0332e9fbf",
  "blk.3.ffn_gate.weight": "1e89e26338fd364bb679695968c65106382f15ad55c95cbb5ec9bdfeb766f432",
  "blk.3.ffn_norm.weight": "c81932529a5a8c417c27b888dbe95fff8b447c2ea5f6f560444ec5d50b93832c",
  "blk.3.ffn_up.weight": "305021735afd8669afefd713f56137248d5e817e60471a112ad06b7fa07ffe88",
  "blk.4.attn_k.weight": "cc26ba5c5c28082a79e6abfe61186029e80b145252ca6a7924c437f0bcf2d51b",
  "blk.4.attn_norm.weight": "302d251fdcc91f7468cf33f80b49484251d8917d7018ad264ab3a85c8ecf9ddd",
  "blk.4.attn_output.weight": "a012f5bee3520cd4ce51f0076c132ebc3653309f304032ad051aa308f55f36de",
  "blk.4.attn_q.weight": "3c8d607e447f5ef21e73af71e3c0d32fae16f91f31faae34ff06912cf9cb68fa",
  "blk.4.attn_v.weight": "49f6c81a634ce46d71c2350206ecbd231b1732af96e4e4e67693c41a07e007d8",
  "blk.4.ffn_down.weight": "e89504f311a4a34dc819a67b761022f14d71c43df3ead4f892c87aaa8e9f0adf",
  "blk.4.ffn_gate.weight": "18b22f079a2fbaefe3572eec61fdcd996fd747724e2f0ff4f08cfcb43eb7bfb6",
  "blk.4.ffn_norm.weight": "22415a492c168a0878912b05c854a631228b01c3ea8842e1d75989ec46c18a65",
  "blk.4.ffn_up.weight": "f57379eae2874d8853f14ddf0f0fcc4ff1338574d5ed5d7e88331d5fb84f5642",
  "blk.5.attn_k.weight": "d627af853c40bddf9762ce3988008c1ff17f2686fa8f73a0b5da38010147c316",
  "blk.5.attn_norm.weight": "9ce01092c7f7f1c3ef72d6b794da12d77aa1f6a24fb96ba1b9bd5a0bcc3e2443",
  "blk.5.attn_output.weight": "0388da8064c4b6b795ce2d8079e8a36535e82b2c9cf794e38ce8ae460aae726d",
  "blk.5.attn_q.weight": "039b7ce1c909761fdf475c06cf14cabe5a90199282c89e4dcf460e95a4b6275d",
  "blk.5.attn_v.weight": "c47bfd8d2496bdb6e00e03b903e15fd0ee806a515094ec257e43cc433147ab7e",
  "blk.5.ffn_down.weight": "1d62e6708974bae318cbf00a8bf621d9ba0537e549ce4710a536520a8d14168e",
  "blk.5.ffn_gate.weight": "8b42b1b11c92db19985094cbb50434e3a7c9cfea71ee6f21ea79eae7c49284a5",
  "blk.5.ffn_norm.weight": "e0bc520f1505e687ec391d632a381d38d8ebcdec19f614a11a2000ab573e8b7b",
  "blk.5.ffn_up.weight": "8cdcd17d2ea89bb9ab902dbc6bf3f827fa4ee029c6bf19eecbdefd146d8b6f2f",
  "blk.6.attn_k.weight": "5dc6bcff89794d1756bf57ec665b58622d9352130d31082a6c66e1a079f99932",
  "blk.6.attn_norm.weight": "13b26008abe0f119b5104b9d78ebd5e797d3cdd68122b93d73a3b4831a54d085",
  "blk.6.attn_output.weight": "f5a49917ea70c3fb311ccfffbfafa63ab18416a5d55e5429b70ce8bfba57c075",
  "blk.6.attn_q.weight": "d9c2f652c87dbd09ec3822e12876648fa32e86553ac25afab723b1cd9f8cef90",
  "blk.6.attn_v.weight": "5ecc5fe67609a35151011cb526f45c56fc0a999079ae0ff37c755ca03c68c555",
  "blk.6.ffn_down.weight": "0ec125ae0ecb2d9277fdb1b04f17efee94e37d0ae37311057c212ca2db3fe6d1",
  "blk.6.ffn_gate.weight": "fa4d6d38355ee8aa3b80b476d65ae7e343c9b7770d7b097fc848ee8a6e091d1f",
  "blk.6.ffn_norm.weight": "30e8f7defc627532e1739dc76d31223d45767391a431f925b63dabe334b0f392",
  "blk.6.ffn_up.weight": "6b97cc32b290fa9087806b5d65aa6dc1760737730c8c71394cc4f30c2157f9ab",
  "blk.7.attn_k.weight": "0231cb127cb7c3714cd72b8f39343891d7715a9bab2237ade9e7bc5f4ed2e68a",
  "blk.7.attn_norm.weight": "7c3187f07eead7d219d98ab2daf87905e88d5f1ace109b6f5fa55dce3914981f",
  "blk.7.attn_output.weight": "2f30ad972c284ae7c8eb0482053433495ebe8fe9c5ee2c28b4bc4ed1f33050fe",
  "blk.7.attn_q.weight": "3a2b4b8d61cc9956d304fa9f82a9e65b4bb9fda2196670b16df7e0d8c43eff2c",
  "blk.7.attn_v.weight": "d2aab97d0dcf0f61dd2f32848f7a8a99c423a4948a660a660a03a546972b8db8",
  "blk.7.ffn_down.weight": "2270d520468c5549cd30023ff9c452a277058310104c4239a616373fc5a94387",
  "blk.7.ffn_gate.weight": "4134a3ef71b3eac8f76b6f1a2e58625b3bae48081f175994bc3ed7d8b0d4f2d0",
  "blk.7.ffn_norm.weight": "42df4abd4b8769b16f3930068f96960af1b061f1aeb7505384f272233b2badff",
  "blk.7.ffn_up.weight": "c920549054ec16ff8c73a72f5d837cf4e11885e44db57c1c1c584c18fbd7a9a5",
  "blk.8.attn_k.weight": "01c609bd3bf31ce65688f1f640ee413740e821330134d4ed1877a3065d1527d5",
  "blk.8.attn_norm.weight": "48857411f769b00290f4e4f2e593e092781fdc2503f80c1e3eeda1b85a20f74d",
  "blk.8.attn_output.weight": "90fb273f8df83744554bd59236515c16c5a5a698ca3fbedc17cc89ddcee354ff",
  "blk.8.attn_q.weight": "ade617ac4653c7f00593dbb51837a468afef20a14eaab3780fb96ac3d6714369",
  "blk.8.attn_v.weight": "c2c37496494864fee5c527d1fe1f88529d31c73f9cbd02ef9b2e9b23611ea50f",
  "blk.8.ffn_down.weight": "2da58572e9ad79087c03cbb0c23c9ef69f93ec221fd5fe4ed92fb93871d23ffa",
  "blk.8.ffn_gate.weight": "4483294e628edaa4901708e73e92c917bdd93b780fa01aa74aed57166f2bbf0a",
  "blk.8.ffn_norm.weight": "c0cbb7a4f8123b62f0c4652a687f3b394802bc32870dc446eefb709e42043a7f",
  "blk.8.ffn_up.weight": "9eaf8a2060cb9224cd585997cd671866c4051ad885c2c6d9fdc7056c2a5c0d89",
  "blk.9.attn_k.weight": "5dd36c45fbc9c50fd35c36cd75576288506971eac5c5311d4f5c16ef60099645",
  "blk.9.attn_norm.weight": "3c8ca64f2f75ed7c8fc1da010c23be787648139a96ca0ef3ad10be7b14942b8d",
  "blk.9.attn_output.weight": "6277e1f833024f53c409be919ec76d34464a78b278c8f9dbf79e777746e3b995",
  "blk.9.attn_q.weight": "87352b70d9e328c2d51d59090cf5ea5a046529864a890d0bc8986447a0a5c006",
  "blk.9.attn_v.weight": "2efdf01161d7a82a9117cc2d87d37dba5ffefcf730781cb94fcc95130e48ff9e",
  "blk.9.ffn_down.weight": "e7658a2ca984961c7ace16acb679387bedb1fef656b5330bbbf588db19673a75",
  "blk.9.ffn_gate.weight": "773cd330d4ff5d64be8af00adf2e2722fae4e33fc26bb9d03549f6f4b3b0fe57",
  "blk.9.ffn_norm.weight": "c8b86cd5c43b332f72060b807091c33a258e5dac01358ff4733b916cd34c9c97",
  "blk.9.ffn_up.weight": "d8cc3bcff18bd46124ba2aa7caacc71220b44eeef6fccb993b4c6cb53e8f2c3a",
  "blk.10.attn_k.weight": "964bdf3b4e77b915a216f750ff7b0f2eb1dd6bfa071358aef21010b90111044d",
  "blk.10.attn_norm.weight": "59ed411d91d14775764eb514acb0895a75a10cbbfbc1c15d453bc50f8046cb7f",
  "blk.10.attn_output.weight": "4d35a2a44cfe4ac0a83fd3ab0dcf1f5a0bf54cdb3b7be9fc353ed32c8a3eb81c",
  "blk.10.attn_q.weight": "defff5339450dd881ac352f5c459293f39e07b9619ebd10ed632d79a3f310278",
  "blk.10.attn_v.weight": "b9803e8d6a54acea58f662d4c0a5c8ebdf986676de7dfe12d4b288937881ce93",
  "blk.10.ffn_down.weight": "eba856be64e4be20b92fb4639a783454dd92427250759df92a337e39f1971c08",
  "blk.10.ffn_gate.weight": "2d5c509b066584db4de3632b01234e86edcde35409c5ebce18957dc80fe465e3",
  "blk.10.ffn_norm.weight": "ecb9a8679945ff0273856624ce435dd250ffe5a440ea0861a5c84f0e4c44d2c6",
  "blk.10.ffn_up.weight": "e76ec7e993f399af02958778c643aa78368e3067846714165eb5aba9d5f547f5",
  "blk.11.attn_k.weight": "29c6d1f34bd3ba2f0904e57b32a5bf8dcb2834d439159a33edf234ce0b775677",
  "blk.11.attn_norm.weight": "b5817b275149cd2abe18a6a10e19854605fc58fd364666744362ceee8cfe49f4",
  "blk.11.attn_output.weight": "1e05653220e237cbe0cc770033e183c9a0eed5680510997409b16186c6691950",
  "blk.11.attn_q.weight": "03db725ae669151e4d536e50285b3b047ad097f52475df208ed3e790e31a44be",
  "blk.11.attn_v.weight": "27cdf1d4e971326c451a4615a0b79a8c7fe9508f9b76c0d52fa01971fc7eb403",
  "blk.11.ffn_down.weight": "176938cd7c2966094f614cace8ba568b10532e45a0d438f80eccd19b6c2a7f87",
  "blk.11.ffn_gate.weight": "9782339915dd6fa70013628a01524ee1d01ad8beab04068da7ac6a5ee7603a60",
  "blk.11.ffn_norm.weight": "8245f6391e3be97811c0ff27f0d8f484ecc82a468a837c893f059745bfcd95eb",
  "blk.11.ffn_up.weight": "15616ddde096d0d25e906375c548b6de4bd5576d1f6b68eefdc29f14e183af42",
  "blk.12.attn_k.weight": "66dd21604993edd1b1fe547bcaa06f5bb7e31c9204902d147a227e4badf7feec",
  "blk.12.attn_norm.weight": "23a69f85dd8a0904b9839cc5d0afcda299b74e82ae2642106224a1c820f2b761",
  "blk.12.attn_output.weight": "4a98d132e376beb274a39d4ea9b6a1b870ad5c66625439d7ff6f45c229c3ca04",
  "blk.12.attn_q.weight": "1c6c309d63afcfde32fe37257e300a78e25d01117e33490801107c0e75d1ea66",
  "blk.12.attn_v.weight": "723d9e4ebe4e2b1974afa01d8f512b52933698fa36717dd47b37b07760c50a10",
  "blk.12.ffn_down.weight": "00e0fb09e1f1fbbf3803f1dee373eaae7a93756b6e13063ab77f9927bc6f996a",
  "blk.12.ffn_gate.weight": "89159f7f97aefb1e100107e3ac2d694e1008ad873f79bb953d60c2c1bb22724d",
  "blk.12.ffn_norm.weight": "5f70aebd0e43a39d6373d8658cc670c13aadd7818831d3d84f761d5f688442f0",
  "blk.12.ffn_up.weight": "faec21b446f061eb4dca561a3180712724347b77a71eb312e7afe9be9e89fa04",
  "blk.13.attn_k.weight": "3d440825d19eac3b1753b34d94fee2b3a3cb6636c10b2703ffcf688d3c1eded3",
  "blk.13.attn_norm.weight": "47b575e57e410738ad13fd3c74bb49c06b3d31030910834ece509cd1a5c6d9be",
  "blk.13.attn_output.weight": "05436d8e613f4475741c1798a7c371b53d61b229507fa04fe23c504ba1f0e12a",
  "blk.13.attn_q.weight": "002b5024ce520da41256e3ded5cdc60e5ae07ad9b202cb19d76ab511efd02b1b",
  "blk.13.attn_v.weight": "c1f2d6763587c50312cee0d7140fa2c7ee326f5b172bc99b2d8946e08329cabd",
  "blk.13.ffn_down.weight": "b5c4e0d8a3ff96cd76a135e415b89f02d28c28f7f3c16a36af31ef0ab8773da5",
  "blk.13.ffn_gate.weight": "ae06e9e3d2e1f64c7ad23a4009dc904c2eccd7241f9f91c4974ab2504f116be0",
  "blk.13.ffn_norm.weight": "e44a22321bcbcb4a3c345b504e939e8071370f54a8cd702fabdb40b97e0d7683",
  "blk.13.ffn_up.weight": "7e6f366d538e21ad431264b12c011892d0be9dfe4c4da9f730af677f920641ba",
  "blk.14.attn_k.weight": "95492d6417952ec24b2cab87bceb750fc7e95ac6b1944fc328a3852d980164be",
  "blk.14.attn_norm.weight": "6b7b09e1c51addcdbb160ea59edf032531421c520ec5645fe1ff9ca4180cef54",
  "blk.14.attn_output.weight": "75887474e4d72c218e6ab0f69f1bf3ec3dc414d51b36fc59df00cdb23421bb6a",
  "blk.14.attn_q.weight": "940e33f76e48c21215d19e8a21234c8246d4d084381a7d9806aecb24b071d5bd",
  "blk.14.attn_v.weight": "c58601cf5a9833f80f7f9a5b2656e8eab5eb133211446ebd48f8be15fed4ebb9",
  "blk.14.ffn_down.weight": "f9f886e7f9b2a54d717b08947a25a0a93e8c2a5b8bcd5a907c06817c8ee3ac11",
  "blk.14.ffn_gate.weight": "727ed0ee68594a3f59d704ed3240b6929f083b9c36650fb848d182315737245c",
  "blk.14.ffn_norm.weight": "bd2471008ff1b2bae9aa26bea019393fb2bbc5b9493b8cec3ebd2c280fca24ca",
  "blk.14.ffn_up.weight": "b006446769f51e4f93b503c4727deae897bc1fc7f4fad49f85024b63c4548d38",
  "blk.15.attn_k.weight": "23bb70f9035356624039547a603e46be7d1e4403616eafc2451cc09c5373d522",
  "blk.15.attn_norm.weight": "718cb371ca052eeb3bfac6ac506abb887df125271821fd171797a7f2d8dd6313",
  "blk.15.attn_output.weight": "c76a2695a204b43a8e5acfa5720590b5d449a9ad9e082cbe3e80fab5903ea16a",
  "blk.15.attn_q.weight": "2b3e4037b9e91bdd26d6e8d904cf39f948192dcf09bb6445cb55ca058d4f4626",
  "blk.15.attn_v.weight": "7c15e89b6acafc8619e86aa9d412f5893ab17843ff2cfaf40eea9637b24910c6",
  "blk.15.ffn_down.weight": "e16fd4bdc6d1c1209c6b633454df4992870c8cefb2cb0e8c92a7e489e9fb5d19",
  "blk.15.ffn_gate.weight": "95a46bea366c260337c537fde06b4cbeaeec52484a69c3390bb1d178eb0525c9",
  "blk.15.ffn_norm.weight": "37730293f704da265dc6d1896b3be00c39c0a41dab07f573af39dc30a481d623",
  "blk.15.ffn_up.weight": "ba74a199da2d0875d7410824238c4ffafbda3993568812284a72b8800df91f15",
  "blk.16.attn_k.weight": "f58f79a2a91c9a763adefce0c53a71eb5ce6bd8442f4af554b04b58083bff27e",
  "blk.16.attn_norm.weight": "0c16e41b95e81978e0e0e3b338e2afe2d297426578cacee94de15df74e94eaad",
  "blk.16.attn_output.weight": "ead22fc337514e4add49aee19720008558e52090466866e849671953a1fccba4",
  "blk.16.attn_q.weight": "ef59c4e8fe8918c1add43d7e9c6fb3ef799dd3e1bdd731ec7b6a4a6f97c86048",
  "blk.16.attn_v.weight": "902e6b84c2b64241470b13e6f412f859f66b4b223bcfb9c15d5cb1106b07ef3b",
  "blk.16.ffn_down.weight": "2ad6e9eb4d8372c32a554395d460d17cfb02d6dbcb757cc962b6bfa36db4f5ee",
  "blk.16.ffn_gate.weight": "825b2d50fcce3dbe6a5d8d8a50a95466f83ca4a10343efe67894c20b4628fb15",
  "blk.16.ffn_norm.weight": "3bf6ac90befb0e17e077c8ea9454a8485a30f89f2d761ec7751b60c90aed1af9",
  "blk.16.ffn_up.weight": "9fbdd08739b32411f5ab0252174d386bab19eb0b17884862f760429b7d41d78c",
  "blk.17.attn_k.weight": "4033398718bf3674830ed1b73071ed8482b6dd4ef27f31a6c5fbb998321b6c07",
  "blk.17.attn_norm.weight": "714f2e8ac9592966a0f1c02ee979eee8f84586405b992e8ee9543e840199ffa1",
  "blk.17.attn_output.weight": "b6bbb618597d767b8f535117be68f92911e4a71d4eb4d8b5d943444151445ece",
  "blk.17.attn_q.weight": "b84a0dc00ceb515faa2628125dcec502eed923077b21cfe900a4ff16c2e5f9ed",
  "blk.17.attn_v.weight": "4387c7d6a17da9cc7a6bca8f4a75618b20407d570792056283a8e93b6ec65f18",
  "blk.17.ffn_down.weight": "47db95c6f1e12b399c3eaf9ddba261782dd71173dd163b52af96541cf87b5196",
  "blk.17.ffn_gate.weight": "59abaded0aedfd12f01df81f7a811e84db6a227f51b60abe9a247ca726e87392",
  "blk.17.ffn_norm.weight": "b7e86445be5c7b722e01ddb98d5c7527ca86cb827ce0354f2c269e0f2558751e",
  "blk.17.ffn_up.weight": "8e31c293bac649d2f60da4b3fc4a3acdce1111ec6058d8805eeeb242443011de",
  "blk.18.attn_k.weight": "5ce762ab7b032511c131df81093b587871718c7097f79d8e07d707571f18a47b",
  "blk.18.attn_norm.weight": "1f52cdc7af1f4dc1f0ef6ad1ad02e18cda32133654e57cfa9c72ada9c0b1d995",
  "blk.18.attn_output.weight": "6486957f30bf8a88516e25772c6650f98b13923f490a2865a8752e36439d1cfa",
  "blk.18.attn_q.weight": "93621c8abf69d2ca29c5207180eb628fb2b544d89de6c4a7fb0699be95534899",
  "blk.18.attn_v.weight": "11604083b5a74828ac1d226af015ad5dc0215a1fdca44fa7131c2163c02d8156",
  "blk.18.ffn_down.weight": "8f9997feb94385f106915df810239c9753b31efda2bf14bdf18a9fbbeec8233d",
  "blk.18.ffn_gate.weight": "427c213b3a4e94af703429daf2f65766f70424d8230c123e7e712a18bceb5ecb",
  "blk.18.ffn_norm.weight": "c45d305c4ea6a54013ba112f12dafaade064a32cf01317373464a3618d8ba44a",
  "blk.18.ffn_up.weight": "a2811f2e73ac9eb9cce91a21a454e84e230a155244e2cd73f2c12aad3c9b8cfd",
  "blk.19.attn_k.weight": "b2daed159925eac58c291e2f1e2000beed21002b03c9e1bc7e7a52e22240666c",
  "blk.19.attn_norm.weight": "6307306ede2ab5bffa1bcac3f8b139354678c0376b1d9f5530c1fcb4268cfeb4",
  "blk.19.attn_output.weight": "ebb98218b2a9c84d3fb6baeb02c5df264b7ab80d994d1098ba1cd47aa398effe",
  "blk.19.attn_q.weight": "4f10df2ad09177e7528e9456039b670d07db22940a49417101b725d239c16724",
  "blk.19.attn_v.weight": "30f1efc5114badaeaafa91fa466dc7fa14b1616db433c6f563ab851f7333a5dd",
  "blk.19.ffn_down.weight": "be5ec7fe6b48855cd0015b0e430d1b70c620de87a7ff188c7c1afef546d7b6bd",
  "blk.19.ffn_gate.weight": "10dffea4213881f8a9b583ee0fd370e033756d32255ed15053f794375b9400e9",
  "blk.19.ffn_norm.weight": "e75cd24ade45dca78fdb0cbcaaa2d4a17d83a5a73dcc94ce0ec2d68fbdb2a881",
  "blk.19.ffn_up.weight": "63e81bdb951410ffa81bcfba1b94a679ec9ebae59cd1623ce2651ed5d4c78bfd",
  "blk.20.attn_k.weight": "c2fc5ad39e9bdd45e73c6e54aecc474388d944c4be1ee1921b7fcd035bad02e0",
  "blk.20.attn_norm.weight": "aaa9169171937bdce20c1f057e94e9252f221cabacf1ced12e11b9586f23d308",
  "blk.20.attn_output.weight": "a9f4fb496e4bc053e3f6cf2e72e22d4cd2b545ef6c32f7e782c2ef6ebcc21d4b",
  "blk.20.attn_q.weight": "5a07ac619ed251494170b213921ef3fcc4c2712839da262516d9d5b8ea1ff185",
  "blk.20.attn_v.weight": "d6689473105d241eacb17f09f06000ee237336916cf5ec4f48271c5b41bcb8e7",
  "blk.20.ffn_down.weight": "74be38db51df736f26ede7c6b52ea787e385f181cb66231e2cced4556a25c9b8",
  "blk.20.ffn_gate.weight": "ea91e06dc3d051c0ba0243b5a8bb40edbf254eadfb54fda7247e05cfdd88cbe2",
  "blk.20.ffn_norm.weight": "5fbd357b3d6f44a7a91e8a4fc246b24303891b7957e0f3c32818ae5dc16ddd8d",
  "blk.20.ffn_up.weight": "fe3290333e056af4ed12942ac72aeba97a6b562e2db05e79cd35dd07eab5b101",
  "blk.21.attn_k.weight": "201ec6ee95f06ea5eb80fe86fd07bd016d3ae9ab6abd25d631834414e14a010e",
  "blk.21.attn_norm.weight": "ea8154f93e06485828475a00b98cc397ac84768dd70e06ecc0c075b5712d7276",
  "blk.21.attn_output.weight": "9f8af74d531478fd304723fd8e4e01578db598441b80dc7c960cb801dbbc501e",
  "blk.21.attn_q.weight": "277de9953a8d3cff894ffd06c15ad0ee1407e319df0c1a693d4f45fa9c74ac7f",
  "blk.21.attn_v.weight": "6bfdc16cfb898909b7788ddd39dd04b928f31d6732772195d53c558004638dca",
  "blk.21.ffn_down.weight": "173877146cb94801157796ee9e5eecf3f46acb3b5e797f90b83a3fc22395eb30",
  "blk.21.ffn_gate.weight": "53146713e2ca1be80496024077a028f6b6d749b02e71003c349e113b436f48f4",
  "blk.21.ffn_norm.weight": "b28b97e18ab20a5c553ba422f7d7f6014f5902f1d62a69abd20d9fe19a5f9462",
  "blk.21.ffn_up.weight": "5c39d0ac4d602b8ec8909dade93b2efcd6b6d9d84a19b252d76bb66dcfaab87c",
  "blk.22.attn_k.weight": "01f26272c82917a87a3ccf922fa1d521a952b05de878241b7efe3525b617ac87",
  "blk.22.attn_norm.weight": "5ffc96249d8873b506e9eb7158bdfd07fa1429e53c1951430ca7505d25f11c76",
  "blk.22.attn_output.weight": "9c2201569358f720244b9c9497e4da02585a167b1414c8a506b85ad75ba990d0",
  "blk.22.attn_q.weight": "906036eb4ddf027f6d920f9356a6a2a5e529b96f4e1231a0496d46b4434a5842",
  "blk.22.attn_v.weight": "30ede8b0d166003a4b8a81fc99437f557719fc36e5c4dd510c9f161f36a47e73",
  "blk.22.ffn_down.weight": "d04c164beabab30e1837b843e18852260efccfbb9d96a34ddd816e6fb3ba23c5",
  "blk.22.ffn_gate.weight": "19c889db6b19179f0a62d5981a1506592c65de83760d67afbe00d202202750a8",
  "blk.22.ffn_norm.weight": "4885eff2d851b32dbd306bd632c725857e6d164f0fa8b3d5857e572e6ef98ee9",
  "blk.22.ffn_up.weight": "365594d8db8e95cf87cc33ac23947942dc326110175cc8ec5a07b5c7059089a7",
  "blk.23.attn_k.weight": "badfea1569da0fc6ab817c5727ca3a69b07d9cfd622fb8be5e66678d5b3f7ae2",
  "blk.23.attn_norm.weight": "8968f78a379ac3ca5458b4ed4251e8d9112aca6d6dd1ef6440b4bb0b380375a4",
  "blk.23.attn_output.weight": "93e43393c03956287b1fe31e9735ff1cfe84f4ae56b83dbaebe96275e4e11831",
  "blk.23.attn_q.weight": "aaff73c725a8700ae66bf26ac8869dfe96738eff23a8ff340de2ab53400a5795",
  "blk.23.attn_v.weight": "3a86a8dcf14a746ed1411f5a7e634064bc4dfd6511c24cfeccfb2c9ebb6b4101",
  "blk.23.ffn_down.weight": "d4da6f37bd7ef69bb203f7b0dd59f50bce37432c70627e6cf274ab81548af5cf",
  "blk.23.ffn_gate.weight": "5b6072936c4a693923bb4e3d1473fd45545cb02fc07799aca458ef0449a04061",
  "blk.23.ffn_norm.weight": "cd76e37025f84773180298ddb15e0d4ba9cfc7d832e19c791049daa47c6d9c10",
  "blk.23.ffn_up.weight": "cde43b99b83124a13b2e4753d12674b3a61dfb34c04703007ced3e8e2aee1801",
  "blk.24.attn_k.weight": "457379edc4cce4cbbe107385079019bc922264fdfc7bd1d1ae84343a81460c66",
  "blk.24.attn_norm.weight": "0ce0dfab2edeede5da419fa7833db78e36222cf25c358d08f3ec664310f031fb",
  "blk.24.attn_output.weight": "0cf91c2fd40c204d2fd4b9c85b69281e5ad4ea8442972fcd44b5fc8e835ffdf8",
  "blk.24.attn_q.weight": "87ede30c09eafec6a4e6285674c1bc4637140b168b2da4ed34f36fdb6e176cc9",
  "blk.24.attn_v.weight": "4c0b078b2798ca35d6d2c2258fe499820d2bc88700654ba4016e4b028f563590",
  "blk.24.ffn_down.weight": "cdb8540c32b1ab988f984484928d39f6841f2131c1cebe90ad9456737fccbcaf",
  "blk.24.ffn_gate.weight": "da2e0e913648b5526bd2bbb344038dd067639343aed3b413662b064b0db7556e",
  "blk.24.ffn_norm.weight": "8940bd781c610d75eb2be63cfc8d869a3af05e53c963dc7fd4c6f653df5a80ab",
  "blk.24.ffn_up.weight": "90cbac2a58801abe11ed6c24560aa4acb949f79429f2aa8ff129ac05868bb87d",
  "blk.25.attn_k.weight": "90607131e36998e990ce718ad05cbecd1bcaed010931401ce6baa3b0d93ebce6",
  "blk.25.attn_norm.weight": "fbf679c85656c04a6cf8fedd5412c1ace22960e6c2d47f2d43997827811fbb97",
  "blk.25.attn_output.weight": "08412724ee7a2086514406e6f68fb9f622e10bac25b0c373b294709f4b09bd2b",
  "blk.25.attn_q.weight": "9c1238e98a2747654a0d4371d3e7ea8b979867f609dc42482544f25591e85c7f",
  "blk.25.attn_v.weight": "a57796a535c6cb09581cbafd6a91dc14adc8cca2a2465a7ffd0aec546cd84074",
  "blk.25.ffn_down.weight": "f7e34e8a6391b480da08b52640613ccadce268373934b409759743a1735b74d6",
  "blk.25.ffn_gate.weight": "b8d0b2f4612678b5ce42bd4a683f8024514b75fb5ebf6b22c600811e95582ee4",
  "blk.25.ffn_norm.weight": "cde1fdba2369d315f3c6940a997c471ec891924e642505db580d732763bd7b75",
  "blk.25.ffn_up.weight": "72e700c32ac8b9c47559c2222e45888a480b527ea512075423c5dc01678e2bb3",
  "blk.26.attn_k.weight": "6ac83b3414ae75bf3a9055c32e49d2c40fe611ab21f8444f03d2f465d18122c9",
  "blk.26.attn_norm.weight": "55f9d6dc9d75973dc75136ecb9d991b4398097ac133070873fb96ec76a6f60bc",
  "blk.26.attn_output.weight": "ebc4fcbd15b33263e50ed2ad45740867cce15bc90e1216623babcb1820734509",
  "blk.26.attn_q.weight": "080f057521073e412936fe3fee64fd574c8128fa4a148b879d3e598fe4954581",
  "blk.26.attn_v.weight": "0fa2830d6746487ac91b243716e4302361f891e4e008eddd14abec47c7809d5e",
  "blk.26.ffn_down.weight": "cb2ab8af1653adc57111ada49d2825c6995e338c8208455b92de10e580f60f31",
  "blk.26.ffn_gate.weight": "231ce30966086bce2dc0e0afd34a22a1958cfda7a57c41b3b8e9444c5dfde8a6",
  "blk.26.ffn_norm.weight": "35d959d25d17b00617590f5d5831bf705c385c51e46297a14375a700effca6af",
  "blk.26.ffn_up.weight": "367680c8d332538b467d1ef87cfeb36cc5c6af564c5023c5fb50e728e3438287",
  "blk.27.attn_k.weight": "0bfcb351c6d17aeac5b55a915074fbdf00f11c4bda98babb196ac8804805746b",
  "blk.27.attn_norm.weight": "5d598a88c2e75ba59dd7ba4fee940bdec92d72038f1286536d2dfb71d008a09c",
  "blk.27.attn_output.weight": "23a9da7347336479f6a10ded14cb3f46e06b5bd56dc4b0fbc526c688552ec840",
  "blk.27.attn_q.weight": "b83319dba9055f069208e9c9d66da08bc6874f23e575288fcd81697d1777aa54",
  "blk.27.attn_v.weight": "36ed34ccb2f36fdf16b2c2dd225a98ea6b7b0e376e7791191136ccd7bd7a4add",
  "blk.27.ffn_down.weight": "5488e1d3a58c71b5e9ddda430540b4776b268cfe1457cbc1c2622dedd9e4526e",
  "blk.27.ffn_gate.weight": "4ff48011ee0bac39af704849d9132a2410392c87a509c684f2062f6b76b498fb",
  "blk.27.ffn_norm.weight": "32afe99675983da3de2961d1b5ca41c98970a356823597fe29e91f6e86abf0e8",
  "blk.27.ffn_up.weight": "1eae3088a75629571fdbf6a20f141bc2bb2ed3f5ba2b9fd1d949f80695e442a1",
  "blk.28.attn_k.weight": "c4e80af714962d6f9040d2c09f316f4a1cbc3a2e994e19902d7c653cf3c73dba",
  "blk.28.attn_norm.weight": "c1ecf85dedc1c83d5d402bb7c94fb8b9c11f1a3e5f64e7680f80912d4a560794",
  "blk.28.attn_output.weight": "72ba47c061b21f5ebc5213a455eaf6fc49c8f8e04ff9ce37e6ed4921b629161d",
  "blk.28.attn_q.weight": "c4abc47234307f44b8ca789aa6668e298158fa4b459b2c1e84bd581806591cc1",
  "blk.28.attn_v.weight": "aeba950799d4950e491ad0fcbe30334e39b8975177990a2cb339031c45ac153c",
  "blk.28.ffn_down.weight": "4e84ce382a37b994fb8608df451a60040559e3f4f3241c3b3cb8989a3ed50d83",
  "blk.28.ffn_gate.weight": "04df157acdc8e8534ad60acc2d2a4dd3a7a6610f6382535ec728994fa6f83f83",
  "blk.28.ffn_norm.weight": "4d0386dae2bd1c1a9d0f9730718333e3a486c3bc6a5c5d482193c75d39832c80",
  "blk.28.ffn_up.weight": "fec60bb0a3daf182a14bd8311fe6dd1e3fd020c5fc273e2549cdb1a2d6b79b05",
  "blk.29.attn_k.weight": "b0532a263aa5a4e2a7a80adc83fc5dec974493bd18da7f953e7ebfc3f3a19aae",
  "blk.29.attn_norm.weight": "593fc3b4000c35b7a59dace09ca1756c08be0105b2edd354a0e1c16c82898859",
  "blk.29.attn_output.weight": "315b896f9f0cbacd0ca8937384c3a3a227efa908cb8c3a9125ec00c480e32b9b",
  "blk.29.attn_q.weight": "d482d45386d4ad3394f08e9dff233ee3a70d0427d65c0b8fa05905da7e25ca53",
  "blk.29.attn_v.weight": "cd3b5a6e2852da796902930a6a84bc87fc6a7c7bf51f8fc23758d12a39013b36",
  "blk.29.ffn_down.weight": "5b3dba6f9753bd1b1ebcba65ef5373dd62c38e755c44b7231b95d93d45761f89",
  "blk.29.ffn_gate.weight": "8610d9d2db15c256243ffcca3ffd31786d0ada0af0e7c7aa3fd20524370ab036",
  "blk.29.ffn_norm.weight": "1a2ef2d38b7ac3e51190b9ccb8b6552ba83ab290e523356a7f851ddb35dedca2",
  "blk.29.ffn_up.weight": "a5fdd15811bde16dc27677cf1a4c97daab4c28cb12a9530f1a0e573134fdb69c",
  "blk.30.attn_k.weight": "1efeb0b5f4b45a85cdf47300f892ac77ac1f38000ec3653565d1303d1fb8c743",
  "blk.30.attn_norm.weight": "c73934c182c7fe80838ec1d0b92f50a583f75f7a3d78d822f009b58ad2c80e65",
  "blk.30.attn_output.weight": "3a0fd89de2d274614750345d827a9c886a4f97b343a13cdf680390505df596a3",
  "blk.30.attn_q.weight": "711e113362bdb067db843c66236704eb1cd3fc5f40e3767143e96d510686ef4e",
  "blk.30.attn_v.weight": "82b12a9a74fd3d91b73cc2e841e2b3f0a5197ccd2998afa17020995f880d2267",
  "blk.30.ffn_down.weight": "af9f4b1287c0d824ae22d6e335d19e04a70135b835be7caa2435f1d85e931993",
  "blk.30.ffn_gate.weight": "e2ab3e6f15f5c50fca66c084cb6a57a2b6b82406d65150e82ea0437b93dd9a46",
  "blk.30.ffn_norm.weight": "c1b9c325c83f00e177386a4d7e769945f2995e60950c4a576c0a2c4ab9703d04",
  "blk.30.ffn_up.weight": "9b94a21efd419715d82071b490d3b635cf1e8da080620dcc39e5bde976d7e9a6",
  "blk.31.attn_k.weight": "0db0d82e3ddcc2c06209f5f013e1d72a84a996c40bf00186be485b909cc268e8",
  "blk.31.attn_norm.weight": "2b8b7239471f57140c5cdfe06bd224a4f6326282f99736e44fba4c7b120ac101",
  "blk.31.attn_output.weight": "a310b048840cc3ff2be4b84796340e8e2cdf05ec89d14bd3655c109b2bfa9fcd",
  "blk.31.attn_q.weight": "f45e0cd95645175ea82813455356d171838539bc3f7676d877c698f2af0a0eda",
  "blk.31.attn_v.weight": "8bde008e809112aa7e7c23e9c3099087bcc557313b01306c87efa0a4a30805ba",
  "blk.31.ffn_down.weight": "8266fec7e203fbfad7033120861e44984581ff8b6851d01dfb7b81c5d8fa90ec",
  "blk.31.ffn_gate.weight": "b73bc0aa5baf006d9ef6403104891b8133671b0992398fe038380b67e0d7e2cf",
  "blk.31.ffn_norm.weight": "9c62cc27a7b6017c1df8ad49bff249a8245e8895c6754f402cd44623fda83268",
  "blk.31.ffn_up.weight": "5b970a4694ea3171a0167f6e1636d9f00268bc1c9640430ffc35218494884adb",
  "output.weight": "74fa0ef08c57a30e633e7117b1e9c805f833e2e5e21434bc79ddf9c92c6d7330",
  "output_norm.weight": "59b8a59fd3fbf39353506116e43e5e76edd0cbf2a2873d869da4cf27a04997c3"
 }
--- a/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
+++ b/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
@@ -1,348 +0,0 @@
 {
  "general.architecture": "llama",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "llama.block_count": "32",
  "llama.context_length": "32768",
  "llama.embedding_length": "4096",
  "llama.feed_forward_length": "14336",
  "llama.rope.dimension_count": "128",
  "llama.rope.freq_base": "1e+06",
  "llama.attention.head_count": "32",
  "llama.attention.head_count_kv": "8",
  "llama.attention.layer_norm_rms_epsilon": "1e-05",
  "llama.expert_count": "8",
  "llama.expert_used_count": "2",
  "tokenizer.ggml.model": "llama",
  "tokenizer.ggml.add_bos_token": "true",
  "tokenizer.ggml.add_eos_token": "false",
  "tokenizer.ggml.bos_token_id": "1",
  "tokenizer.ggml.eos_token_id": "2",
  "tokenizer.ggml.unknown_token_id": "0",
  "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
  "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
  "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
  "token_embd.weight": "1d1d1d39a867d5a4bfb32792a47247d2638c10c95a6259391d02843583505cc4",
  "blk.0.ffn_gate_exps.weight": "2e5cd43ac3f26c44f071926ff6c3f239ecc52a34bc9a5b5906d3d4c1bf2fbbfa",
  "blk.0.ffn_down_exps.weight": "a4dfc7e7c96e7402eb70279601675b956bb7331da8101e63fe5c0a611b6972e5",
  "blk.0.ffn_up_exps.weight": "2d5d87b378b2319c344ed2c642598b6f7cb6beeb582a8ea51abc9ae690d473c3",
  "blk.0.ffn_gate_inp.weight": "a46aaf5aba7401ce6e41f158242b4879d34901661f3ede85496cbd0ce79d6314",
  "blk.0.attn_norm.weight": "3fe37d913bdd2b65076bcdd6efe64a37b0b03cacbb1b80b9f7089068aa35f38c",
  "blk.0.ffn_norm.weight": "5e14308a3c894734eb204c8f558bdc817e94bbd5b4e9cb4094e91ba388c8f7f2",
  "blk.0.attn_k.weight": "73d943dcac0911e87bd771f4aa1c901e1bfe1aed293af06e1a67812159859f67",
  "blk.0.attn_output.weight": "4c5f754c855e262e8d4c94c6fbbb57af06399dc0e170d7d99a1a17fc9aab9227",
  "blk.0.attn_q.weight": "d6fd7403c873d49c05f6f03208f30d99ad34cb3b71c9990c47334d502a8e4c7b",
  "blk.0.attn_v.weight": "cf17cf64b2d683bd9de6cebaf60e5c264df6fdc38fe719dde9d54c80334f6366",
  "blk.1.ffn_gate_inp.weight": "0d524de81cd915816b4e714bf595ad6946a9130b3de731cd89428b2781230809",
  "blk.1.attn_k.weight": "2ea47f412992b374c70674730fe84700e0c8cce177086ce9b6635e42408964bd",
  "blk.1.attn_output.weight": "b4b2520794d54113e86c8ff678eacfc62e35be4395a594a6c8c22b4383ebcc0c",
  "blk.1.attn_q.weight": "5db930c98c4f91f6eab57eb974c72210b158e366d23d6d2890b2759c053bee33",
  "blk.1.attn_v.weight": "079bdde09668394bf7af9f8bc175017b4f48f0ab64e6dd855a4d7561d1693c0f",
  "blk.1.ffn_gate_exps.weight": "146a62de19f9ab093deb101f9640534ffc3dc40d69f508be12fc0475d01b0c7a",
  "blk.1.ffn_down_exps.weight": "949da94a3c0f375160672a979e85f7def284264b10d48d038238aad5f5ece793",
  "blk.1.ffn_up_exps.weight": "7016a3f467d9e3f2f4b4019579ed86b757469cd367f2b225483305376b4bb3c1",
  "blk.1.attn_norm.weight": "1614d1e6ed537737275eb888666c7bac533f4eefbe73dec92b591045ca9e1afd",
  "blk.1.ffn_norm.weight": "405a455fa7d1ec36894652ceb554bbcb09a07fd6405f42741e66dc4a4665c19c",
  "blk.2.ffn_gate_exps.weight": "90d5003fc7421f44220c0842d43128955e91488f6f785fe570b62d81b719e964",
  "blk.2.ffn_down_exps.weight": "ecdc2b5a8b504ef0a7833acff47d69b0c1fa9c22126de1bb120ff5e48c3d6e2c",
  "blk.2.ffn_up_exps.weight": "2cbd9485a32460d315eb50a2f3b00863fd77245bfe885b7565efac1cdb1f191e",
  "blk.2.ffn_gate_inp.weight": "0d0a17a1a2c7a61f2cca49ecbb479154dc93a870873257bc4f225e7607f2e2c2",
  "blk.2.attn_norm.weight": "b2e4c5a977f87a6f880896bd73596234c9b83622fa0d7add5892501e3155913c",
  "blk.2.ffn_norm.weight": "0ab875b4280afa922376cfc7b9aa3f7071c9432ea1254091ce7de3749df0e8e6",
  "blk.2.attn_k.weight": "bb884af51fb51550acfef54ccf1b58ce8284e587806e6a2f88c8265e1ad05a5e",
  "blk.2.attn_output.weight": "0f03099ba1ef342ea61af9cd71d028123bbd8b1dd7d7fd9b509aef77815427d9",
  "blk.2.attn_q.weight": "8fad0d29eb4c9d24e564774ee3316b9eb7a4c4985e4567111d2c836c830f6cf3",
  "blk.2.attn_v.weight": "fe04c847ff677632401a94e7b6b6fdca60391ab21cb23bd791533115de6303a1",
  "blk.3.ffn_gate_inp.weight": "29e3aaa724590c070e614af8288939603d2641b0ef11e8c0f476bebb2776673c",
  "blk.3.attn_k.weight": "231cc5631def10f7f292d8862d6125ff555164cd70480ac76362149fad204497",
  "blk.3.attn_output.weight": "86467a605c62852e05fda1a7ef43150df2cf715fe59785dbcba09f1c27cfa086",
  "blk.3.attn_q.weight": "901822402453922225c2d6ac79616691d48217635d5ff7338daa971d5ddee210",
  "blk.3.attn_v.weight": "27030784f44375720df2f090933645a31a022d3fb3b14573e5ca0b78f44070c1",
  "blk.3.ffn_gate_exps.weight": "231ba59cc0b988d125d77bf627aa3f04636684870af88f081f3944b48a160d86",
  "blk.3.ffn_down_exps.weight": "530c3ab44ae4d66e8afa4d10c153ba5dfcdfb7321989a988e62e9d12e7234625",
  "blk.3.ffn_up_exps.weight": "b85c2d4d9d11332e702b3c0a6610d4f525f9a93e5d12f5c7c55c592c40755e75",
  "blk.3.attn_norm.weight": "05dbb6d88cfa6b199f9d705ccbda97c0ef13f9ec875c595398a1a42d009a4555",
  "blk.3.ffn_norm.weight": "6880b1c27d46969ce36fac049c05dc8b89e4bb47dc89df357e32df7e18fc512e",
  "blk.4.ffn_gate_exps.weight": "a883b4f225b760c5a2f6605dc5e2167ab85bb398c70bf64ceb539fcbd6128dcd",
  "blk.4.ffn_down_exps.weight": "d291bb656aae77947d4b525e2819bf4112afece53ff31de9dab999af1f65f9c4",
  "blk.4.ffn_up_exps.weight": "38592afb8ba3dcfb26970f906174f7d3fa62da44fa4be4fc6912a19030ea9164",
  "blk.4.ffn_gate_inp.weight": "1596cb74e8fd6c3080b937b06468bb397b0dbb661e6d180a6bcbdc43e8bfd0c6",
  "blk.4.attn_norm.weight": "f90c83c5ff4366281d283384efc941620542b9cfdea160d678dc54a75e33f758",
  "blk.4.ffn_norm.weight": "d28d8c49d1746b7cc085562d1074905fd14023844de823dc4fb22202bb280790",
  "blk.4.attn_k.weight": "792bbf412cc357140fdaba543e547a9b2f7582919e307bbd9a80c7d6d8f5f1f9",
  "blk.4.attn_output.weight": "d98e4a062d2631d9c315f1990d5f6ca9a88e7e0e46387f611ccb0353f876aa12",
  "blk.4.attn_q.weight": "1a11a55a91d9f748a72176ff6b1c174844df406e00d1b66b9aa64dc6ee4bcd1d",
  "blk.4.attn_v.weight": "04cb3c02b12a6313c7ac7044513441083d534fb4c5a3f63bbaa58f7edbd2fadb",
  "blk.5.ffn_gate_inp.weight": "cbd5cdf015d33a2da6703eb74c22fcb97581fb9175435173b6dc4f9e8364320d",
  "blk.5.attn_k.weight": "4fdf3405e4d657403f5647b51233521310ee984b4b81bbcd901cb3e6ab76b7ff",
  "blk.5.attn_output.weight": "4a25662c46979a29600ed77e1907cf81fb16ef30e724c155444e54ccb76af481",
  "blk.5.attn_q.weight": "e2acb30e30b97300039bb20ad0878f05159d5657fa811748a51d5b6fb35d631e",
  "blk.5.attn_v.weight": "306504b6a26aa123c63dbbed3f4ced0ed2ee8fb6a30bf0093539b817539f5ece",
  "blk.5.ffn_gate_exps.weight": "7e34df9b9944dbeea5e8565786d3aa6937314a4b87acd4d0874687877c5a39fd",
  "blk.5.ffn_down_exps.weight": "c4b7a57a42b5ac0a8ae27dcd5cb2646d7a7cc7123126d44a56ab128e85f60b13",
  "blk.5.ffn_up_exps.weight": "09d47593b6dd6c664a9155bff02fc2eb7ac4a70219a88162d05c802a01d3c6ba",
  "blk.5.attn_norm.weight": "58804a036d6ac4c1fe357b8b6a97a5c37cae1c2f06ee0086c041d449c1c6ef6a",
  "blk.5.ffn_norm.weight": "d872dee6789f0826211aa46ca9d0869e3e96bcace9e77d6559a7b6f3e524f3ca",
  "blk.6.ffn_gate_inp.weight": "fb1eae732e974d6c1d020a5b4ef98c5f33016f984701bcea656f999a99daad66",
  "blk.6.attn_k.weight": "55e9c59c5051ab5519b3a7962e1b5fa96a3c0251cb6200dc2f177885ad2de470",
  "blk.6.attn_output.weight": "f3c834a8d0027370350e2b6294d95434d31432e57be6313b013c15a56303d61c",
  "blk.6.attn_q.weight": "efaefe5f11c2140dc7cb532b0832c2a0b363a165cbda21f00fadae77efca377b",
  "blk.6.attn_v.weight": "900bd734d75616d846a90a121c97e081c956a3d1ab012f66dd0bc62c43e1ec3c",
  "blk.6.ffn_gate_exps.weight": "312a99661b1468fcaed2474621116f1681432755e973f3ee79d01912974fd424",
  "blk.6.ffn_down_exps.weight": "ac9cd7db67a2ef0d2b5def86873673d05e48d49d147dd944469dbb8e2d4c46f6",
  "blk.6.ffn_up_exps.weight": "57613e7e09579400a1a09fee4445acfbfe83f2f327fdf317877787d96ada6b84",
  "blk.6.attn_norm.weight": "0e8801e09885c633bc01a9a5b85d4e878d30158a4eb41a937dc5b760ebd044cb",
  "blk.6.ffn_norm.weight": "b8c58062ac93072f878446b0e7f958c737aa47fb769fc3a8f593133d12db2dd1",
  "blk.7.ffn_gate_exps.weight": "1ef611732ff13edfa8d30981ed9dac00c15ceba9fc012ed0b199e9280a849948",
  "blk.7.ffn_down_exps.weight": "856c6811945c7b0fa461ca17811cfa43436b4cdf5326bad23cbc30883486d7cc",
  "blk.7.ffn_up_exps.weight": "6725e3e33994302ee13fa5ec163631ce2dcaa08aadde8fc166c2265d4561c5c5",
  "blk.7.ffn_gate_inp.weight": "36b49d7f80c1003dc392b2c1b9960cd49889dd69e77b26b9e4b13d01f3d0a32a",
  "blk.7.attn_norm.weight": "7a0ec49acc5e20ee71c6f80ca02f4f1e564c485e0ae0621309e7c2eb0c616cf0",
  "blk.7.ffn_norm.weight": "eeae035c39ab6e64bc06a4baa1bf6e50d4c8b8797cb0ad8abd48be86974802c0",
  "blk.7.attn_k.weight": "e8f78c1def01a7a38d2d9bf7becb17755e28fefe4927856f7890fbee52840187",
  "blk.7.attn_output.weight": "5367f05ac3bb49ef8745ba5902e1bdd4442415a3ebff2c7e1a3918d7be6fe948",
  "blk.7.attn_q.weight": "37c95fc5acc55a4f6e5f02cab9be60e4fe54c08b65f98f4455741b4aa542ff4e",
  "blk.7.attn_v.weight": "c89f1343486ba55814233511e94090f7365662a8a4214aa4c278cdadc79196c2",
  "blk.8.ffn_gate_inp.weight": "4e239afe8c7afb8de3a005757c887cf14b1622ca2d224227591cb0e5301f4c17",
  "blk.8.attn_k.weight": "2ad0229f30fdcc1e85ce64e00d8f75902238294844a81d5af43e14ba75c02983",
  "blk.8.attn_output.weight": "2e44a4722acb3b521b81d0b910f8ca2f6c286d874a92ddd02150566454061699",
  "blk.8.attn_q.weight": "1cd2b09cb2f43e08de776b5f7eac197a5a6d4ffdfd52b21baa36319450147bd0",
  "blk.8.attn_v.weight": "5a22c57ebfd33ac500cbcfd321d5b5b1783f8728801db6f3f8bed51c7183e4db",
  "blk.8.ffn_gate_exps.weight": "91063fe56cb4f3ff3b41052bb5046fcf8ef61516a603ee90aab893a9d68c15a7",
  "blk.8.ffn_down_exps.weight": "d4c3abc8f1d1b462f67f70bd8f404b3fcf45dceeaa8527fa120527254c383c90",
  "blk.8.ffn_up_exps.weight": "76a1a1f08ec577716a2e7027b45293e9205751126424f1bebe1de89c78f087d5",
  "blk.8.attn_norm.weight": "f980d774da39eb76c52358afac3e38cb4c81cb323deaabbe5c41822e3f17a98e",
  "blk.8.ffn_norm.weight": "1c937658cf90f1a85db9a5f26e077730fdd4b694607dbeeb825c5fb2bc407e0b",
  "blk.9.ffn_gate_exps.weight": "a2532471ecb7896d5c78e5a34e10cfaf4125265e1595166c8d0d0dfbe2a3187f",
  "blk.9.ffn_down_exps.weight": "b47921a28412d48fee450b8b9d97cee42344a2e69f06d407fd9523d7adf13333",
  "blk.9.ffn_up_exps.weight": "7c461bd1b2a73b439cff6a10d94afa01e8b06f7e6f09d9a6f28e3876aef48bce",
  "blk.9.ffn_gate_inp.weight": "1648dfb08b5c06d7953a5a97ecb764995fae9487fb729a1c867023b2538149d0",
  "blk.9.attn_norm.weight": "8635db0f299882a63b7cfcd1d4259c9e53fab22c31d3d054de36b1001380b31b",
  "blk.9.ffn_norm.weight": "f9309aa323062d174c463613afef9b0a33501b510bfaa58a8e0e866d12ffef3c",
  "blk.9.attn_k.weight": "dfe62030441e947a588512d18d9c6e4ed72c2f71c227d622c095e4263b23dadf",
  "blk.9.attn_output.weight": "1977beb75c6349c50ba7dd3865d7c0a9c5c5ddc854413147b0eec98ac4fda351",
  "blk.9.attn_q.weight": "eb132596719605cd6bd1782487f121994629e115190edd69240b12af66e734f5",
  "blk.9.attn_v.weight": "9e708f15d332d7c5187b0693b1a977eb30a2fa10bf7df48ed9d7537c0aa6ed99",
  "blk.10.ffn_gate_inp.weight": "97503a5d166c1925f9b65c0eed980753d411714d66896f3d0fad5286c7aba702",
  "blk.10.attn_k.weight": "1ebdd222336bd25b48df1b138cdbe09021c4a5562ea7cb78cadd1255d2be3a39",
  "blk.10.attn_output.weight": "5e98faa38e9d514b9057e1c8342c509cbe1083defd518e506f6bad89117d1f5a",
  "blk.10.attn_q.weight": "3323a26c87d936d1dd87c577d0b763459fced726679612c874b3de5fc6d969c5",
  "blk.10.attn_v.weight": "d5fa73cb56aca388e205f44455e4b4f676fdc12ed7fac4542fbb3b41ecea59ad",
  "blk.10.ffn_gate_exps.weight": "225021b53782800906cd13b70be3a4161e8b300b97f984a959ccad6a6e8adcbd",
  "blk.10.ffn_down_exps.weight": "f08eb91526bd22f5fd0402fe925d6141cdbb308a1ced0330858d0c85c71f5ef3",
  "blk.10.ffn_up_exps.weight": "a9f688350c3b53eaada5103b5848bd9a3d7d6b327a70fa16c24bf28ece933eac",
  "blk.10.attn_norm.weight": "5ba426c9dfc79805015ccd76cd1068b0ad3bb7a8453e14bb1d35486f122d8f95",
  "blk.10.ffn_norm.weight": "98891d6acbc3986b2581b7a3af9f5946a392d9188972c6a8b15d4e745a4f2482",
  "blk.11.ffn_gate_inp.weight": "b2365a60566e7dace892e1cb0e62eb73ce387352601723e847052b34874feaa6",
  "blk.11.attn_k.weight": "0efbc1d1430505543ff71532a4fcda821aeac616ef6c1dca40e00d4f2ff70bea",
  "blk.11.attn_output.weight": "3d5bd4d9a41236f30d4293edb9ae27beaa113ffb31b4fbfadff3a4c370dfd3e6",
  "blk.11.attn_q.weight": "aa11e9db14dd9c77951511443077c2a1a78070753d7bd3d9811038473f69e325",
  "blk.11.attn_v.weight": "5adc567f377aa11d1763d35f50e53fb2896a8b03b623ac36acc45efa2486d512",
  "blk.11.ffn_gate_exps.weight": "71d07d982aabfab9eed3c733d49c20f023bf475368fc71db5084d91beadc4b47",
  "blk.11.ffn_down_exps.weight": "9a06e61461e48b3925a9f7d9cca634d048c8b62163d7bc5c43e35899f959319e",
  "blk.11.ffn_up_exps.weight": "bc05494d0dcec61021b3ac0c5bc1bf502736cadf48224e213bc139d562699a89",
  "blk.11.attn_norm.weight": "a5758a10bdd0404ae1470e8e9db903985d4d07f60553c5001a5e7b660d4f7ada",
  "blk.11.ffn_norm.weight": "814ae037563aad3771787316bec4806c95bf6f5991dd6474b4b1e5cc13dc18ee",
  "blk.12.ffn_gate_exps.weight": "3a68b831ba1606fb9ef6dffed4732032447ecef23ea563ff4e79317586c7eb49",
  "blk.12.ffn_down_exps.weight": "268b25e13f4b7beab08686e83705a41b21d15251809ee4784526f78a580da829",
  "blk.12.ffn_up_exps.weight": "9105751a5b5b42ca2614d0456f24f779d2e2ac8cdff0f96842aa7ae2b70f341e",
  "blk.12.ffn_gate_inp.weight": "d0de1558cc1d458c5c504f63ddc59785c323df7330474bb0644c346104b40a3a",
  "blk.12.attn_norm.weight": "859a4c8113678e2e202d10299850e0cfb52eb11ea50bcbf4fe3ff39bdd394154",
  "blk.12.ffn_norm.weight": "7fbf4c459c1760218877e9ee3f5ad49e960956a4369bcfe96c143f04ff9ddf97",
  "blk.12.attn_k.weight": "0a7e254fdf3730a57372b6ff421a613eabaea68cdefd64800857941411318374",
  "blk.12.attn_output.weight": "ceb763fc15d88af149d8fb78e82db2b7dab3aeae584af8cf7611a12356a397e5",
  "blk.12.attn_q.weight": "a43402d23c46cb2d3cb3c2a98c81b19d10026b7e6742370fed6b2880b6e049b5",
  "blk.12.attn_v.weight": "3bc24f2c0480ce91ef72993ee8f1cf962f7359e12183424583ffa1246bf3db52",
  "blk.13.ffn_gate_inp.weight": "a6d68c82bfe66d8bab68f980f5f18268a9e2c0cd6b8832ed39010e0de198ae05",
  "blk.13.attn_k.weight": "0166c39546b37dc2e01b2b396ba43e183f797dd04eaa51a6d103d8b58ee4bace",
  "blk.13.attn_output.weight": "2ce5eb198deab9557475a58b69b11e9874b547e05c23f223c6e42fa35ddca069",
  "blk.13.attn_q.weight": "745c1bbdf434284a7fae98f45e821c076dd9c2a2467dba6a9d8cf0041e419dbc",
  "blk.13.attn_v.weight": "9ece68d5ac64d1421ea7aa32e1cff9cc1fecf5175f4c4da858dd31d8633e3337",
  "blk.13.ffn_gate_exps.weight": "ccfdcb4670b131689de12d396a010b5ea737795cf5c15a14a304d720b3c7c899",
  "blk.13.ffn_down_exps.weight": "8b8fb328664764f1aaa5cbdec336d5654e981e965a02ef622bde5f07ea1c164d",
  "blk.13.ffn_up_exps.weight": "d2ace0236c2fb3365fdc85499d676a7f65813c48e5085348b1df1799922766ec",
  "blk.13.attn_norm.weight": "1ed29d7d89ce52d7cb4d57e895ff7115430466e917136c049c385c030ed44e9c",
  "blk.13.ffn_norm.weight": "a194fc542597a4dcfdfaec5e3cba2a2b2b21b21edfc87c39c0d7f7651355bc4d",
  "blk.14.ffn_gate_exps.weight": "a625e3574e5e740e7f8e2f9c40390f2f382c720aab5b10534e298002dd8d1fb9",
  "blk.14.ffn_down_exps.weight": "bc366f015b83c865946afd74c8a884943e0ea2c671314a0b7bb72f21a44d2f78",
  "blk.14.ffn_up_exps.weight": "ee3199bf2086de77b49f57f487676be8ee70e102a2fb5a5ef8ddbbc28a9eff41",
  "blk.14.ffn_gate_inp.weight": "2b437870c850fa2e2044d032bb02908af634356e37466fdae260b933e48ee8b4",
  "blk.14.attn_norm.weight": "cd8344d193a1cbd42bd898e17f4bcb1ca0b2918420fbdafa9249a6f2b7f4ae06",
  "blk.14.ffn_norm.weight": "70eec40374e558fed5b07257283cf36342b6b0129285a00007deb59c32c9f7c8",
  "blk.14.attn_k.weight": "4053bdb507e0543d724b632570bac86b31707696d90a0db44c49b2a082e0d599",
  "blk.14.attn_output.weight": "0182632cb0e06a07241b8293d25d109fbc1862e1e337d435f908e8681e2eb1ab",
  "blk.14.attn_q.weight": "ffc7794a4c1b6f793c842dba969435330a7a80b9212e457b4b2ac33e68b41241",
  "blk.14.attn_v.weight": "6411805292d528e61bbaad8f9aab9dd073529a17946c057fb06864fad9cf3211",
  "blk.15.ffn_gate_inp.weight": "77d0744567c76e6abb67f81ba9c715b2b544841186d5b948309571eff213bafb",
  "blk.15.attn_k.weight": "1f7957954ea4c6521c257b35a360e868ffa02bdb3de91f146d5e06bb4a545c98",
  "blk.15.attn_output.weight": "d7809d36bd8d3342240c46fd87bcc7f9821a222f48d9a95e45ae50460265d3cf",
  "blk.15.attn_q.weight": "25f509313ae4d8401b871904059f472a26f5714e7c791c725de77a1a522c976e",
  "blk.15.attn_v.weight": "96fedf5a591fc0f020e6de10fd72ff12b3ef9cf70cd21dabaa0d3e7b06f54e73",
  "blk.15.ffn_gate_exps.weight": "8f950d976b2fd9a3d213b84123cf114c1377efde9352767fb2ddee89e177c8ef",
  "blk.15.ffn_down_exps.weight": "6fd09d1557bb94b06efbd4f6a1ca4be532a202ba290e9315bc8da3d12a5c4c4a",
  "blk.15.ffn_up_exps.weight": "cbeb59ae7b0266a928dc7e3a6e70a9330b92f9ee1b17ee1ed91022108204a33c",
  "blk.15.attn_norm.weight": "2005330911ac2edc7b6d27aca021c67d30d16eb632e49b1a13f30fdb2717aed0",
  "blk.15.ffn_norm.weight": "0e9198f3b548eb78acc8961f2b3350d238d26cec110933ba753a8cf0035c501c",
  "blk.16.ffn_gate_inp.weight": "a41d1f99d739c8b150c3945b6949763988d0c6a4c5a2b5855592ca1a48ed23d5",
  "blk.16.attn_k.weight": "b624e2ec88c2d3047f60530fb87e72cb4a5e655a9663f6f3e9b09e5ad32cddaa",
  "blk.16.attn_output.weight": "687759ea75e45108526ffc1573d6fdf084728079bfc2dc89b9979e76280f43c4",
  "blk.16.attn_q.weight": "beff3a45c7e9ec82ffc6d3c701126be28654d10aabd747d03441210491fd31b6",
  "blk.16.attn_v.weight": "43a349b13f0b9d040cacecd942bcb168c030fef8c75c987d59a4fce6c14e855b",
  "blk.16.ffn_gate_exps.weight": "793406d6c13d727c82bb7b692ca98d65ca975baee69fc57be5378d77c5a19b62",
  "blk.16.ffn_down_exps.weight": "9bad3dd150d0230404b7f886ac7ff8803225757e813f195cdb26bad245243b4d",
  "blk.16.ffn_up_exps.weight": "7449d663023fea3496475bf0a9c1de7272ad0ce9adcb3265e8e424badaa674dc",
  "blk.16.attn_norm.weight": "a424ce34c195a401df1ce37ac4f2794e8a6720b1ee8acb21428e2b68c65e0125",
  "blk.16.ffn_norm.weight": "405a68bb8e16e1064df2de55ca3cd9ceddda1d9fc0af007a9bd7cad4b2676248",
  "blk.17.ffn_gate_exps.weight": "97c6e5321491ca5dc039ee88da0eb0e78f347372785411809af84b3298cb19dd",
  "blk.17.ffn_down_exps.weight": "1617ac19788a1be19bac69277408761e6bdf5719d63a8c7fea14d41cc27641b5",
  "blk.17.ffn_up_exps.weight": "4ead1c365f112581c10610ea3f63d2a1474311d2503d2060fed4b458ef337f5d",
  "blk.17.ffn_gate_inp.weight": "ed4b3393f2523f2b5e0fc7680a1caa2842e605728a529b5af68a7fa8d7abf940",
  "blk.17.attn_norm.weight": "beac17ef86a7fb2b5840cc72f7a95a5e3d6bd24e7fa698e0b0ebb9bdac45c561",
  "blk.17.ffn_norm.weight": "81cb58ec6d6dc02a0b4ede10adc336dc865fa76f982d4eab0e4a37b40f5b0fac",
  "blk.17.attn_k.weight": "eab569e5ea8c8b05e5a6a209fba031129453c2e28181eee3e736b3b04b36bbec",
  "blk.17.attn_output.weight": "f85b70f01438ce8fe5d10599b113f30bf18dee2bbae0657d3eba295870001db3",
  "blk.17.attn_q.weight": "887ceebfbf6a2b94b43d2df4439ac3a5bbc29311d4b28addc04d525546032047",
  "blk.17.attn_v.weight": "2df9414d65014c06a93da22ba3a668be7b83e2e8008e98d7771f7dfebed98298",
  "blk.18.ffn_gate_inp.weight": "9b07741a0950fc667e5fd25937e33bc22e1f764f80eb4ff3119f005327ae0f6e",
  "blk.18.attn_k.weight": "8649598dbb63938744c39bcda5ce8c31773e29c573be8d4d2c114f5030f8d3e8",
  "blk.18.attn_output.weight": "f8e391adb92622298ca834d5d1eda48b69c3b1c51c5a584ef6c54a725c298d75",
  "blk.18.attn_q.weight": "84bf8708a2eed618f48f69c178ed7dd11fa4c468102376e72e910ebd037d131f",
  "blk.18.attn_v.weight": "31db3cd773f09548c2c1b1eac2718e46364a7810970fe9c433fad9d8de5397eb",
  "blk.18.ffn_gate_exps.weight": "be2a2ba378002f1b61f86c273a69eede9b93786d5ce96b4fee1861f730dca4c4",
  "blk.18.ffn_down_exps.weight": "d35196159e37705db50a5343e3989f7335477f1a4add67ef42ad64a638cd07ae",
  "blk.18.ffn_up_exps.weight": "c6ceedd86e97913a6dcadc838e7abb762d629fb8dd55f15cf02fd9bd66d2ba78",
  "blk.18.attn_norm.weight": "41f0b1ad83d6e3cb9fbe0d27878c2e7ad4a351b9f554a6bc9117c01745cdf6e5",
  "blk.18.ffn_norm.weight": "96646204bd0d82f25dc77faba4dbd86b1332e449313e6684e00122da8be99057",
  "blk.19.ffn_gate_exps.weight": "c6eb7f61e7938bda0492dbc05e51e8f631c99224fe18e99861fc4fc53ba9e9ff",
  "blk.19.ffn_down_exps.weight": "4384803da3a3a3d44120d7dd192fe2c9bbd9a1a0cb492dbec1fdd7565230f1e8",
  "blk.19.ffn_up_exps.weight": "22d73de2fbb8bb0f1bd2caf17fad8a355c47d914143f7f6e6d0128f66f074a60",
  "blk.19.ffn_gate_inp.weight": "9a0cc4a2301a5634022fbce41189021bf0d1a961792d2d9330fd35556d18e5bd",
  "blk.19.attn_norm.weight": "c5cc56ec5df9a1f7d5ad71fbda49f1433132e58895d45cb44c73420bd61ebd6b",
  "blk.19.ffn_norm.weight": "77e17de741742ef2482fc7872fd423c8e3c1454dc4d2be89ee939084b6d78bc0",
  "blk.19.attn_k.weight": "a92ea36ce2e3569656306aeefb835ccd5d1b03b33a86e0d3d030644cc923b813",
  "blk.19.attn_output.weight": "5e2a912b37855f84ea964907a1a86d609cbdd79efa0c93c3e8e2fc07caf7c226",
  "blk.19.attn_q.weight": "4ef3a5913292ac3c1a6fd3e9e53d011021f2b41d0276cf849706d1ca925cf7a7",
  "blk.19.attn_v.weight": "42981b75b68ae852cee638b5433605c147da4392aaa6d7a06e756115b0171f39",
  "blk.20.ffn_gate_inp.weight": "71381b9879a7c80b9f7b475abc0aa31b8cd71ccc00856ebe89764a2acb9df2dc",
  "blk.20.attn_k.weight": "1928b7ebc054eb3967929ed6fb446314d5352f4aaf8b475ce55c6345019f2ea4",
  "blk.20.attn_output.weight": "6071ecd9ca91af0d2ba93fef4a1a56f3b243dd70f862a21a2d164d56f386043b",
  "blk.20.attn_q.weight": "002e95042a40f36ceed5829e3d0c8072e5f5e4ee86a089e2902b2348fed24dd5",
  "blk.20.attn_v.weight": "42f509cdb1c0e298f89f896e349be86952c5168e49b3f83bb17badbcb7596d57",
  "blk.20.ffn_gate_exps.weight": "a684a3ffe4b0a57c819a5fa9cb3521de223f392732927271e97ce925b6e33765",
  "blk.20.ffn_down_exps.weight": "e3081a7bc7ba750d8a4886bc8ca4f231b55db4ca082b54b4106c7531964725cb",
  "blk.20.ffn_up_exps.weight": "fad0fd5eca36ab154788da28be8ec25bb5d6db06c9d133db89e96df358a2f6a2",
  "blk.20.attn_norm.weight": "c3e3f2429715ae95e884ef1246b0b461b23c5cc0ed08beecf70a14cddd184820",
  "blk.20.ffn_norm.weight": "ff31f609dda65ca496b0584fabea6550e42edd05ebf229812aa6b7bb5ede15e6",
  "blk.21.ffn_gate_exps.weight": "366f09ef0ecfb86808eb3296cc9abdb957951d27f6533c03f1422b54061da660",
  "blk.21.ffn_down_exps.weight": "3fc495947d27fcca7fc0893c8a96e5d48ba27b2c8c58f8fcfb8dcfcd5539741c",
  "blk.21.ffn_up_exps.weight": "6713ed51410bcc8283cbb001c4ad784098f25701e8021f4fa4f411e186859c4a",
  "blk.21.ffn_gate_inp.weight": "6d4c92c01ec801647134d907bf1108878156df266a6107abc10526332b328b93",
  "blk.21.attn_norm.weight": "27605719ae2df24f4f2e85a730927cab20367631612cb501631f6bbf38eb1209",
  "blk.21.ffn_norm.weight": "ca80ee8177db185b15a4a378c1cb6f7143c76546a7f1726bda23f329323d4ffa",
  "blk.21.attn_k.weight": "9e49f743d4a5bda9b4bd9c40c2ca37cdae5aec7e54cb193897ac8b4945ada14d",
  "blk.21.attn_output.weight": "ab923540879753feaed152f5950f69cdd83d8f2413ca873f5f038b63ab0aea12",
  "blk.21.attn_q.weight": "62617fc3f1c9d2aa672a4d91a121c7a91b92d145b65e75f0b06b4bb7c825dc36",
  "blk.21.attn_v.weight": "15f8b2e72f8e8e992f2f6b3e93238a9d7be7bd6136f91c9d04b4b4cd0cd60369",
  "blk.22.ffn_gate_inp.weight": "3ddb1773d9257b68add7a2a4e94dad25ed926803e02707863dd742ab9b2dc179",
  "blk.22.attn_k.weight": "680e45a9e8d5feddee5266e119dc053bf80718fa9af1cf6803e6f493b265f1eb",
  "blk.22.attn_output.weight": "0d5fae3402fb2c5aa3a860010e3973fc8e3168d1015f7a76b7b2964681693206",
  "blk.22.attn_q.weight": "eee7e3d426ab533bd18d62c9aa142eedbde394bed07db58313e0fccc82a23237",
  "blk.22.attn_v.weight": "26b5be1fe3c2b6824c5a648a3e4bdf17691904526fca158fbc3ebb627b67e2f4",
  "blk.22.ffn_gate_exps.weight": "32ab7a7735313d60f6a75229b1aeee940b6aee176c9648536bf5921b0dc2929a",
  "blk.22.ffn_down_exps.weight": "67590808f6a67777d3eb7976c31fe616d388b98fecbb12253b72d1241d70753f",
  "blk.22.ffn_up_exps.weight": "fc245c0183e6d90829ff5e71a4ec93e4860b3d4c1a17b9dda2fb64f5f5c9ed32",
  "blk.22.attn_norm.weight": "128e99d206d4d6724758ec97468af767fa0aea592149c324b731659c1e74a1a8",
  "blk.22.ffn_norm.weight": "e45f498033f0cffa15da0eff2c47b4472e43fcf8921729fc4eeb2e3a6b3c78e2",
  "blk.23.ffn_gate_inp.weight": "d63e686f5325fbc89fa242c2c52a3b8ff54f867dca914c9ae6eea13e9d6f46e5",
  "blk.23.attn_k.weight": "f71f5a577f46ea12b1818f3a5ff4b85ddc45f9a2afb0fa2e041d71a3e31c6779",
  "blk.23.attn_output.weight": "92b13563c1e0eac0d748fb67b235dfd7a64c8f16e2dafb316885744582e23b4b",
  "blk.23.attn_q.weight": "2f9b9c35dc4f912f3f51c06e2d68f417b51a0de0a84aac530a64f9d3d7b0a2dd",
  "blk.23.attn_v.weight": "268e40813806e74a5c364b19556d087bf8374e76e7b6fcf55c381eb7da13ccd1",
  "blk.23.ffn_gate_exps.weight": "12f857e7a7ce228afac34d99b602c8d6fe96984f2a21118f459a58cb767ee65e",
  "blk.23.ffn_down_exps.weight": "cdb082c16599c3bb36a28066dcc122d9529b54fa91b6cf0153437ec960a5e16d",
  "blk.23.ffn_up_exps.weight": "f4b99f6f44d7b8b5a305894e88633bf5938fc1f6303a2b2092399da9c8b64d7c",
  "blk.23.attn_norm.weight": "a691392210383915916b4d3886d5e4d56e7855e27e37e414fbd73bf66b3712e6",
  "blk.23.ffn_norm.weight": "0c3dc72f667e5ae19b69bfa9f2bd2a01a57681f89ef9527bad4eb0d8c7b70da8",
  "blk.24.ffn_gate_exps.weight": "86baca2a3157994df7fd8ced5e08436d5c1810dc29c0715637c36de723e0e7d1",
  "blk.24.ffn_down_exps.weight": "ac5d559562b35c34993e34b071f66d15c65be5907797078c2d2a49aba54e3192",
  "blk.24.ffn_up_exps.weight": "fce0a099cf09777f44fbab3606ceb75f7fae6f0b80725f9e871654b8cdf9262a",
  "blk.24.ffn_gate_inp.weight": "e7c6800c0cfc56b565b2d35ad6f1dbfdb70dd0b05b338bc8da2286ffc3678d79",
  "blk.24.attn_norm.weight": "dc6cc18ec52d102d015153c4a1132f9d7a504e29cbdec81c5edbf3b9e65815e1",
  "blk.24.ffn_norm.weight": "480d5a1397af5e0e657f1e67d20ec0cdef5724e71246a326843321b87ffabd33",
  "blk.24.attn_k.weight": "338c0597954a9b95a782545b2fe36469553e73f86ae2d2b5697767b28e1c7daa",
  "blk.24.attn_output.weight": "a77d23b79933c67e52f1eef7f83a3dff4f767ce0bbcc39572f8cec4acd457643",
  "blk.24.attn_q.weight": "45c9478593002be1998e96e70668aafa2dd3972380fbc1df12fb05c24ba959e0",
  "blk.24.attn_v.weight": "515729420885408a6a9614bc27cda393ed907521318d14d21335d39a3eff0b61",
  "blk.25.ffn_gate_inp.weight": "aae4ac40e9ab3925241f9d784b54b38851d9bc999a6c3bc03fc3f17c9b28a67c",
  "blk.25.attn_k.weight": "4ab4808d02396c35b00b426f536015673b71c17ae6cd55bbc2e6bfe7a4c59d0c",
  "blk.25.attn_output.weight": "1990bb982b77e0c947cd1a8ef0b36227ee1259e6dbbc2829e5c136edf88675eb",
  "blk.25.attn_q.weight": "a1490f3048e8c0ec8784f8550c43adf5cc8d0f2f90131c934713fe4b1b015bd7",
  "blk.25.attn_v.weight": "f15e53c6d45b3b6f58808fa968425d65e0b26b7f9b268127a77abb1227c67431",
  "blk.25.ffn_gate_exps.weight": "656662447ff54f56ee80f78a1b9483f7efdc40f7375d0cd8a9c72ccf21f77e7b",
  "blk.25.ffn_down_exps.weight": "db06f101bccbaef19cced0f6c185166e18202465f4a42cddfd535fbe5cbabb4a",
  "blk.25.ffn_up_exps.weight": "584a7b02456f27fe1d8d3c7ccd21d426b6ea887795a3ed77f704596a1e3841d7",
  "blk.25.attn_norm.weight": "8f0f3597982930fd237e9d609776c64f2b909a455b21678f83a7ebd4bbb83e64",
  "blk.25.ffn_norm.weight": "3e7079c32582afba0c55e032f254adc18d2997705eec860185e9a6dd3d82f07e",
  "blk.26.ffn_gate_exps.weight": "e70341691b583b86489812b29b77aa41eb658b1865733d6118da54c66e3bfcc6",
  "blk.26.ffn_down_exps.weight": "5c1b812d11dfb064af816ced5ab6463bf9722eefdfc341b8a93705d5038fd781",
  "blk.26.ffn_up_exps.weight": "e18118362ae54ef7432781c83884f9fb230a9d934e342aabeda8822ea5f71fb6",
  "blk.26.ffn_gate_inp.weight": "cd1c5f6710166b9567c6b74c97b2348b191c60aa860958c6bc264ab095261dff",
  "blk.26.attn_norm.weight": "71d087531af2520bda2e676c489e8529cef5db8aeea1eec0a937a8b4f2fa2e54",
  "blk.26.ffn_norm.weight": "7f704e936fda28eb5c2cc339f0f6a5f78170b5aa43c01265b21668870d819c82",
  "blk.26.attn_k.weight": "1cc62a0ce0ae251275d898c52c4a9fba5995fca10955d2011d10dd1a59e1afb8",
  "blk.26.attn_output.weight": "636e881b1505f9cef656a4be98bec6a4765321d51f9bf1dac8933397cf44b765",
  "blk.26.attn_q.weight": "89a3c4d202d7d6adebb9e0c1bcfd8b775f6456386f1be25e86e43acc949c1e16",
  "blk.26.attn_v.weight": "ff2cc963b597cdf1a21703f3e7022af3bb4c65a34a19e19d9309a7c5e198b5bd",
  "blk.27.ffn_gate_inp.weight": "6150139498fefe380bb99d11e72028da47a15ecb73dfc5b2774f726f4bed8f9e",
  "blk.27.attn_k.weight": "f286eb9e5c56c7b801a497aedc40158c2a27877d7f9fb59b3fc67834798902d2",
  "blk.27.attn_output.weight": "5dc3d3a05f9f7729509147fd09c16fb53f85f520cdab5cb69abf4bae3fd460c7",
  "blk.27.attn_q.weight": "8462e40f86b24251960d6f35a9ea99b8793a01937faf1aec2859f2e5395dbb61",
  "blk.27.attn_v.weight": "bac1a99e38e25953f8315f7212eb9777dc216cadb09b959977885ae62724ceca",
  "blk.27.ffn_gate_exps.weight": "6a15eca7f0f6ecfd93db2e55c63875348ec4a78c4ff643ec46df9e958c0101e4",
  "blk.27.ffn_down_exps.weight": "2e1c91247c4359e2073a8e5f26fd7f6426da7be3ed5bc65dcfff701f0a5022b2",
  "blk.27.ffn_up_exps.weight": "65d6f5c553c9332085eae4aeadf25090b5d7768212ea7b08ed698102c21b29a1",
  "blk.27.attn_norm.weight": "7fab8ae63ec8e91ce625cd130ab96d8427dad3a7413bb21b25ec5f408c5b9f5a",
  "blk.27.ffn_norm.weight": "532720546b0fdcd423a02ca6e3e9d8aacb84b1b3e8269968f88a47fe2a69bab4",
  "blk.28.ffn_gate_inp.weight": "a305ea58d98962d9dcf0c53ad2389b7acc8936fb35a0e3fc9410e7767cd49dea",
  "blk.28.attn_k.weight": "8315e8a2e4f78dfdf36d4fc18fffc74bc95fe42c3ae4f9af2b6c874612c0f71b",
  "blk.28.attn_output.weight": "9b5fdedd32d39ef46a22cca7cd5355d7b93bd07ea305f466a8aad6ca5a4f3778",
  "blk.28.attn_q.weight": "4e8fb96997c30e231c437130f410d7c91d541a816f6c568b5f3bfdb4b8dece74",
  "blk.28.attn_v.weight": "1fec739cf3bd7b4913f72ca358d4cf31391c304de44ac0ae31ecb825beaa7cfd",
  "blk.28.ffn_gate_exps.weight": "9f259789d535e09268266b9a8020f32d6a6779966c909d91d3a10574f06238a2",
  "blk.28.ffn_down_exps.weight": "516d3f8abaedb01b9916a4b67d4672159769138ef2850158bc1b32c41e31f0e8",
  "blk.28.ffn_up_exps.weight": "f2f1d88d2c31ed588806fb5ad981d68f5134d7284c4fc022fd018de2eef437fc",
  "blk.28.attn_norm.weight": "960fd005598deadaebd969996f4367a9dbfad90539a863674fe95730935acc64",
  "blk.28.ffn_norm.weight": "e1993b37ced93d4049e9af2c47b0d9207d8f7e6f2cc3a52f57bef30bc806d805",
  "blk.29.ffn_gate_exps.weight": "58927146338f443513337476b3cd30e6341742f096c2beb5890d400f10121298",
  "blk.29.ffn_down_exps.weight": "03a3386e4f0b75a28c5608e23b2de8f0de25f21954e4aa7fc343431bde9db07e",
  "blk.29.ffn_up_exps.weight": "6916b7490a7ae7b04a5d81cc1e7ac9b20c483434f3b186b12d87fe176bf1567b",
  "blk.29.ffn_gate_inp.weight": "98e710e467a3d567abe4ce29d78b8e8dc033148762290c0c5e1ae4d78efd8c78",
  "blk.29.attn_norm.weight": "4e64cb307d37be20d55f38c94faf7e451d11df5e60df347906cbaf9c5441be71",
  "blk.29.ffn_norm.weight": "696c23a52f742679bd44440d687a4c44b4302d57f1e9dc5610d23374336187e7",
  "blk.29.attn_k.weight": "e85253652fd6120c623634ba66b725bf7cd491318b54ccdad2c7df8851d64c0a",
  "blk.29.attn_output.weight": "4f650a71efb150d1f24cd4d114d4187bf570ac424da3b92ea6455abdf1aea705",
  "blk.29.attn_q.weight": "69fa7da901026ebcbbbc848455b425458b7e3295007d7fc093acf4b38e2166ea",
  "blk.29.attn_v.weight": "17e2e7590b317b21f106de546aafd955579703d1e95d6aea044ee72ec3a514c9",
  "blk.30.ffn_gate_inp.weight": "3a03284b4aa60d59d4a2ec86253469b61fc656372afca427cb77a5332fbcc62c",
  "blk.30.attn_k.weight": "d518cfd0db9708e769eb1399e87ee49357dc54d5afdbac3d4c0ca46c64e789eb",
  "blk.30.attn_output.weight": "9b44378714d784c5ef9ab604359091baca4e0ec222afa139b7f840eaefb371fd",
  "blk.30.attn_q.weight": "cbb95365bbfbcad0c9cd99b4eebb5a5d32de68ce08e4063b5ec3e792b7548044",
  "blk.30.attn_v.weight": "e7985c04fe1740e35a9598f43b67b0922b4fc2d00b68a92a9f917b82c3248de1",
  "blk.30.ffn_gate_exps.weight": "8ac4bbd07935d98f895ba94dc174e5ad5046c3c222b53729d60f987c05e7eb70",
  "blk.30.ffn_down_exps.weight": "dd672cc71e82abf05064a18121b8e55fe1a4f19bc1d7cb9a142f4add54bc336e",
  "blk.30.ffn_up_exps.weight": "12282f664a2a12aa25e2deac58946108715ebb978bafed5274cef24569107646",
  "blk.30.attn_norm.weight": "1a33458fee054c6c9c896a4bb0a4e1fbfa0293b2408c7dd2b81d692e966e7273",
  "blk.30.ffn_norm.weight": "311e33b68051f507f1478ed8f2693fddb846170ddb7285a91be43f795c2ce31e",
  "blk.31.ffn_gate_exps.weight": "8af43d9867a51cd8392fb48b981b0ceee0ae979c491c07d711b3b56b5162c786",
  "blk.31.ffn_down_exps.weight": "5579cb7758c1600b19d1f540deffe081b575962e37437b3b2efb2fb0a2924e40",
  "blk.31.ffn_up_exps.weight": "f2e7c005276b3a001fb40753f027fa10b4d5a346f43cf4b4bbdeec6e74e1cf6a",
  "blk.31.ffn_gate_inp.weight": "89885dc0e30b6b16a90c0331d7fa3174671e941364e8102d934f02132237e61b",
  "blk.31.attn_norm.weight": "99e4e9bf86a9edf8c404153a7e8a82324ba79da462622196e2faba161bd95172",
  "blk.31.ffn_norm.weight": "55335997cf6de781bf332b943de96ff4646966b05d9fee86b76ea897e27b6ca7",
  "blk.31.attn_k.weight": "cee570762b78da6316b637892cc4b080e40f57af5551ffb1866b9a8e80e96628",
  "blk.31.attn_output.weight": "fa321ff55ec7819ead7b819fd45215262f39744569765ba2113c989c03588802",
  "blk.31.attn_q.weight": "9e2c409b878f8a2a1436874abf428fceb1c534b21f9ad4dd6f532b8a469007f0",
  "blk.31.attn_v.weight": "a845d0be68ba537b4a775bfba4d897faf7c82a811a2612b0b7420cc4f3574cb8",
  "output.weight": "16101cbb74b54cda9ebc07ca3c762e3263a56efb3cc011156184b95807d7cf13",
  "output_norm.weight": "d7aa61585baedd60157aafe157930785742c55989c288573566a971b02423564"
 }
--- a/convert/testdata/gemma-2b-it.json
+++ b/convert/testdata/gemma-2b-it.json
@@ -1,188 +0,0 @@
 {
  "general.architecture": "gemma",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "gemma.block_count": "18",
  "gemma.context_length": "8192",
  "gemma.embedding_length": "2048",
  "gemma.feed_forward_length": "16384",
  "gemma.attention.head_count": "8",
  "gemma.attention.head_count_kv": "1",
  "gemma.attention.key_length": "256",
  "gemma.attention.value_length": "256",
  "gemma.attention.layer_norm_rms_epsilon": "1e-06",
  "tokenizer.ggml.model": "llama",
  "tokenizer.ggml.add_bos_token": "true",
  "tokenizer.ggml.add_eos_token": "false",
  "tokenizer.ggml.bos_token_id": "2",
  "tokenizer.ggml.eos_token_id": "1",
  "tokenizer.ggml.padding_token_id": "0",
  "tokenizer.ggml.unknown_token_id": "3",
  "tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8",
  "tokenizer.ggml.token_type": "485e40bf3d715a4764818fc097d6a2a41db872d82ee714bc500872a3437ff48d",
  "tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda",
  "token_embd.weight": "17b87ab2c01c80657855a5413d0457b4a041afaeda0cc785080e44e2f04acf07",
  "blk.0.attn_k.weight": "28ac0da05754ad2714ae95da28a5ad191192140b30b8fd22d108d4700c9d989f",
  "blk.0.attn_norm.weight": "3f9d5675d1ab0eb8a816719dac9fab81f2e95c52be02c34263339acbc087febb",
  "blk.0.attn_output.weight": "703295c2c63990ff896778685c678f145298886f680f3ed5dc2a7ad54c293265",
  "blk.0.attn_q.weight": "69c2d0e4870e9d722a190d356203c9605575a16863466c3d1747966ef1cf5791",
  "blk.0.attn_v.weight": "95219c9c07b5ffe9a9a01e456d845eef2b11f4fc12c93dbbba479db395444c13",
  "blk.0.ffn_down.weight": "a2feb5eb3d572c57c5bafbf0ab506862df1160fe40965dcfe4b9fd855c08bed7",
  "blk.0.ffn_gate.weight": "fcca072c445c31f4dc4d5dfaa785b1bdf7271342442099b74fd17268b5829fbf",
  "blk.0.ffn_norm.weight": "7621f95dbd245cade6fffd6b08797d69d8e3954e960f0b5551b90d967ab95448",
  "blk.0.ffn_up.weight": "14a9bcdd451403c67136391e1b6e53b3b1830f00199bd911dbcc56d8749c14f4",
  "blk.1.attn_k.weight": "c70f73c5df20579cb44d971164b48b5f0d8d5abdb38b381e7a8b880ba12aa406",
  "blk.1.attn_norm.weight": "88b6b91f93a1ef83425a7c7dc2a2fbd3b22704a04c64a80061df376ac8c33626",
  "blk.1.attn_output.weight": "f031a537490c452be3b3bb51e6b7949a636405756e160976a1c070a792ea00ee",
  "blk.1.attn_q.weight": "bdb23214b1cf9cfd30f863a0a5868e52c6809d93b7e8f44df096a94204d9896a",
  "blk.1.attn_v.weight": "e9bbc0b05f2c872fb1403f8f938cd1612b502229ee401f12593b1164c61acc00",
  "blk.1.ffn_down.weight": "5ff53811038b661a7b8f2bfdf213bebfb185ec1a6060b662f063714f33584d79",
  "blk.1.ffn_gate.weight": "205085c8c951a5c7543b1495183cd96028fb49f67464b3e9862a2693a6077a33",
  "blk.1.ffn_norm.weight": "798f354fc85afce9625f5d10093a585a966831698a0560e6c9b97ce659eb4b22",
  "blk.1.ffn_up.weight": "db92dc5684cb6e90940e13f4d1da555ed20ba4f8cab1e990ddfd7553e2e91315",
  "blk.2.attn_k.weight": "ef5ce360c4eed6d00d03ca4761e0f8e4b0af4509978468314be14f3d46621044",
  "blk.2.attn_norm.weight": "6dadbc05dbd0d3fabb4216affa60a3de1378a82d2859dc90b338cbe70f50d455",
  "blk.2.attn_output.weight": "6bbf87a966f691bbfd7c8d25629aa4e6710107bd431a667434861febb391edc5",
  "blk.2.attn_q.weight": "4e575c09ae2de417ce9057ce8b073680e860a24aae13a472b68f101b760752e5",
  "blk.2.attn_v.weight": "cd33f7f01141e9439afdaf2ea1aaced9feaa335e32a58daa136ebd555d4d96f4",
  "blk.2.ffn_down.weight": "b970ff1b0b6494165defe2fbfa1d31425766ed71e64de9ec4e66ac3955c8bc5f",
  "blk.2.ffn_gate.weight": "dbb3e1360402e0e369b101995bb686b73f95d4a7673f061be85d64d15dfb0061",
  "blk.2.ffn_norm.weight": "bfb7980105d8ac9647710454f57a5cdac50598a0f6f4884e16f1d94b00844687",
  "blk.2.ffn_up.weight": "50ef89339b275a438b664686f6227dd9b6e43853ed6856ec9e33ef4bbd90bda1",
  "blk.3.attn_k.weight": "be942ea98151434eebcd2c1da4b00e0146152fe524a530689b1fd491cb833d21",
  "blk.3.attn_norm.weight": "0df2f218daf609c289fb7c60c5f375fa99c0d4e04381ad5a494a19144edd8e20",
  "blk.3.attn_output.weight": "c2184aaf86aa2cb8f47be49f60b165834e97205f39c6ee1dfd19fd4411a156ce",
  "blk.3.attn_q.weight": "4f86e2a0a4221c1c84ff9c409ac89893cb95d7208cf65bf1e98e24e01125f991",
  "blk.3.attn_v.weight": "abfdb8a60c349dadde641d1afc9542025e24fbf41a3238bfa9675e0b1f1e4b68",
  "blk.3.ffn_down.weight": "58821a8d87008d47d122427911c6fad5272aca70c448bbae223256a74bacd07e",
  "blk.3.ffn_gate.weight": "776e051f1a0ddd5c4934e69186683a75ca9a3c8c0f61911bba321fed1dd287d2",
  "blk.3.ffn_norm.weight": "7f380f29335e28be90bfcfae6f6d69fdf5751211b36d2dd62aa5541ed113e4f2",
  "blk.3.ffn_up.weight": "fc5ae8d488894cbd4951059675468d227da27871d26e925c9941863841c097ee",
  "blk.4.attn_k.weight": "14833b078cc4c5137bdd5fdc0538047974ca147a99b0282e1b144440c78bc1db",
  "blk.4.attn_norm.weight": "0a69957d4a15599fb80ad4753558020804925221457d9a5052926754d3768065",
  "blk.4.attn_output.weight": "887a49b6130fb6297cf10767207c3dd97191b2cf63723449af9c27bca8dbeda0",
  "blk.4.attn_q.weight": "51fd577b76764824dd6f0d4891c137ebe4736f591b5ca2793c5fff2be49abbde",
  "blk.4.attn_v.weight": "1a623c43cf9c509d1b7ea0d1a5c04d0af4809665f9f9e93b7d6dba8c5df178fa",
  "blk.4.ffn_down.weight": "5d61e8856d8941d2b1fd138116d015f63840d0fa1e31e20e20a5ceca1536ceec",
  "blk.4.ffn_gate.weight": "06640f7273764f8ca5df7e386547417916b6cd7d565a8343153113239a94b0a1",
  "blk.4.ffn_norm.weight": "91a6c6c41b894228e361435ecbc5058dca34d4911a23da5b56de219299c964d3",
  "blk.4.ffn_up.weight": "d016dac1055e36d6a10b6317e57f98a904709ea892ef3194342f4d2f6326561e",
  "blk.5.attn_k.weight": "987146afe124131500808cc0da33c06d207433656d41df6e6d8c99118a83bac5",
  "blk.5.attn_norm.weight": "6b354938966f2608a2fb8d0f5b363ed0d8b0967c2ec8d0abd5c625b413042ded",
  "blk.5.attn_output.weight": "cdcbfe02c6ff79d5326882b017a02099f5af71beedf6b1b3eb4de01e3a844536",
  "blk.5.attn_q.weight": "b910d0cff781d3efb42eab0a302f46f286b2de717079175680d5b42bf8c309c8",
  "blk.5.attn_v.weight": "66d3a279f747412f9f4b0e8abad44540c122ab2e811a7ee74c1f33bc36caade9",
  "blk.5.ffn_down.weight": "c9b0efd2212981f16d956d8571f054b68780ad01f4917033647e359b557a4653",
  "blk.5.ffn_gate.weight": "fe96b94109ca141c01f6a04788e20783019ca6ec334aa1f3134810bdb499e557",
  "blk.5.ffn_norm.weight": "aa7b016e832e7055a36c6e20de58ea1936f995f390401fff1c5fc65906064e49",
  "blk.5.ffn_up.weight": "555ce27c4873d3375394f38ad3b45e3d8848f9d5642dc1602383d0f0a33c2a14",
  "blk.6.attn_k.weight": "88280d461db324c4f36475ce396793063e61a27283ec64511b0480890fb5b3b4",
  "blk.6.attn_norm.weight": "af8f460c411f660d33196286d208f1845fd5a2b45f7b56549a4df31e7515447a",
  "blk.6.attn_output.weight": "dd9996fb0a256e8375ad3917705258a33fce006bcea0f536caae420a77974d8b",
  "blk.6.attn_q.weight": "7a4841541191e037cfb9b07930c4d8cab451809658b182f0ada6ccde9615c003",
  "blk.6.attn_v.weight": "ae81e6a592b64d701a9d40233e986039a56cba8d8d24f61aea93c6393cf3078a",
  "blk.6.ffn_down.weight": "622dd1ce1706355cbc659a8ab2c4509678ffe0f3ad34258e5e25ed2a5d951bcd",
  "blk.6.ffn_gate.weight": "8389a735c0bd5591010f8ced9805a2a12c749f6df0d3c18ad4d05c2a302e7168",
  "blk.6.ffn_norm.weight": "621f5346400382474d61358397bd58fb1459b07c53e376e4bca15e08b3f9b3fb",
  "blk.6.ffn_up.weight": "8d834e4c42f13c251dfee36cf89e12f1bd400680d00d5c2e6cac0459e9ce2f7f",
  "blk.7.attn_k.weight": "8bd0412de65a3e64901ef8fe6a28c95e116bf39dc9aa22f0126b9d36688e5ea7",
  "blk.7.attn_norm.weight": "056d8e56be4e87d6dc6f900762f0dc6fde07bfdc50dd85bfc510415e2bba3f3d",
  "blk.7.attn_output.weight": "27972eda51da53d416ff95aed78149a2c5a287b47d2cd46f2f544ca692ecb3bb",
  "blk.7.attn_q.weight": "41eca977b9371f7932800c11a9c45b931310196919e2a0651b847703b180fc7f",
  "blk.7.attn_v.weight": "13c74fd7e07f08883a09fb070a1fe5bbdd2341b4cb8d1cac07c4b637049b5774",
  "blk.7.ffn_down.weight": "9e75db42468800849a9a7da603d0072c5e86c8ed2b4d8b20a312a51fb86a7a10",
  "blk.7.ffn_gate.weight": "db6bdc3117f910088aaf7db51f2da63ea5bd933de36af5599c215bfb26f7db2b",
  "blk.7.ffn_norm.weight": "48bb82b49bfc8679a1e77f282ee182d952db7a3c11be7ef9a102ee2ddd8011e2",
  "blk.7.ffn_up.weight": "feebea87175817a0f3585ec0af09dc873d94c203581ae97a712eb356d3b49efe",
  "blk.8.attn_k.weight": "d5640ad71b6af68d88e17bf8e7fc26c907d2262605457a84247dd9afc2884d69",
  "blk.8.attn_norm.weight": "75b850c481a69083ae09d0207ba7317b37c735a39fcf5fef5400e6c84fb1257f",
  "blk.8.attn_output.weight": "cbd669dbdea2bdd90f9f0cc97566b3dffff3c56cecb4f47290ceef30da83b2d6",
  "blk.8.attn_q.weight": "9edcb63087a431bac361822497e6ecdaa06d9ea4a1a754e36da7ba9f8db81c7c",
  "blk.8.attn_v.weight": "3fb72c2c4f95a83626aa3e30062f9450b09ab37c7871e229f18bbc5cf744633c",
  "blk.8.ffn_down.weight": "bd69d2c9172974fff154441b237b4787fb53b2d185325442d5048130ef5bc4ef",
  "blk.8.ffn_gate.weight": "d04689c80553edd011d1cbaa5d570fffa7fa91e88b66cf1352d89ab60b72f908",
  "blk.8.ffn_norm.weight": "e49984183b735b7f2c4e4730c289eed9394056d2e283a00fd83ea0915df31a73",
  "blk.8.ffn_up.weight": "8fe62a1ce8e847e567add6c6f6bf2922bc467495b5eb4c116b3cb85b85b3b211",
  "blk.9.attn_k.weight": "d90904959e5004cf0d6e729c6bff18cc33c094798b802473c1ec55ab8d276183",
  "blk.9.attn_norm.weight": "79277f290cc07411115d8fa138045edf4a17b3416ab2145409cbe8ab829fd4ee",
  "blk.9.attn_output.weight": "5a21bf2e1f09a81405025f96d4153ffb630158e17269cff8ffff935c38ceb1a7",
  "blk.9.attn_q.weight": "51b1d0febc3b350945be4504f55afa4347517bde0f710e1a4b88e6b17e71e7c7",
  "blk.9.attn_v.weight": "aab7e1db0a8b50a03036356791ffce736ab010d15674c96eaef8049d80076054",
  "blk.9.ffn_down.weight": "cbf43ec84becb40c9359a181ab0e641fd7faae7d34b549501f7cfb7afdc3d764",
  "blk.9.ffn_gate.weight": "dce0e8661c778327bed7f03b6790d26710764188aed9dc746e6e05863891fa57",
  "blk.9.ffn_norm.weight": "6d41642104f995c77bf31122b13237caebda3e7fcccb1367ce91db36b015e923",
  "blk.9.ffn_up.weight": "82fe4c67bf24e7b2d6f6e05f7b1234c2bf90c3932951091a9066211b8e15ecbb",
  "blk.10.attn_k.weight": "f6a9ed8fd8d3229b5d03175c413ffc56a07f2ce7236271986361dd3d8993f9aa",
  "blk.10.attn_norm.weight": "cebbef89f0326ca8e02df3867a571e4d61c20c2a12f295f98ae590d62bc86010",
  "blk.10.attn_output.weight": "34f5efb86accb4f06347d83a32558ea8eab3039d128969161a741ebacbb656ff",
  "blk.10.attn_q.weight": "1e0efe27df2d5d50f7157253ba2cfd436d6781c3dc78ca176d0c16a210b5b763",
  "blk.10.attn_v.weight": "8f085bf50a2b0f83cd6cdda3c8ef5a9e204a36348ed95871aac725d1f68640cf",
  "blk.10.ffn_down.weight": "bf3b3cb4cace435809ac7b4cc933f20853af12f1f272d3dcefe7f19c0f203b8b",
  "blk.10.ffn_gate.weight": "d3df7a1413b1c5adf1a1dcda9e5225a15c89874bae53bb6137ad1ea42fca2d34",
  "blk.10.ffn_norm.weight": "a1da603b0480471b5ed8e862148cecd5fed918f8304d6933ab0bdb25b8d2fb8f",
  "blk.10.ffn_up.weight": "bffbba605922e972dc47dda88a0b4659aa52236c76e5fe861a949e6d9a367492",
  "blk.11.attn_k.weight": "9f31c63d66cd32c29b1eb8bb829d0c8525ce2ae936e0eefdaab6335a2d12a3df",
  "blk.11.attn_norm.weight": "0bde1a266d8b2e8f202bb7e2e88b19147ca83021901f6d3cae77a4df5548c754",
  "blk.11.attn_output.weight": "e10725c7cf746ed4a7e472cf7aea6cb564e5db6a1d5197adc980d650a387ccea",
  "blk.11.attn_q.weight": "05ee758a7d065802630f8c65dca424364c1c8825e389aa33f9405c45e8a50cce",
  "blk.11.attn_v.weight": "0c3ae7090f11775d24c51120db6e305db6aff706493e7ee123dcab74485ba789",
  "blk.11.ffn_down.weight": "7ba40b8e12c09c5fb2006b77a771cb01ce894e88a3b3e1877f927a5b89c91709",
  "blk.11.ffn_gate.weight": "db76388a023b98097972d354ba1c6a5e26efdeb1c596b9c28bf2cd8f6596975e",
  "blk.11.ffn_norm.weight": "a38c3ae1b89a68ddc7b72c99c5b28be7fe3787c4fad9904d0c43d64eaf00c474",
  "blk.11.ffn_up.weight": "13c8142f9cf1eddc658babf978daf3515c4ccc45f849f3e7e3930aa18a8480a0",
  "blk.12.attn_k.weight": "f03241c36ac87cb57429a2ef22186b8d7d0b590a8b173beb01fa13d93772f3b1",
  "blk.12.attn_norm.weight": "4568f654e6d65104d586e7c16ba960c83428698ce103022b7e0be15e2884e13b",
  "blk.12.attn_output.weight": "04867603f82f91e41306e09b33ecda0104b3ee4834061f2c0bbdc8da33c72509",
  "blk.12.attn_q.weight": "70fe04b9a8e08b6100cc8d6b58bf4cbbad15ca1de82d63baca5d352ba6c4cbae",
  "blk.12.attn_v.weight": "15cb28db61a86c98687991d7e611bc92a1fcc6007f3432149cfb5fe518a4f65e",
  "blk.12.ffn_down.weight": "6d10c790a4e3dc44c2dc36d96251ae97cdf30a4fa04d4c43e31bfbd038e6a7b7",
  "blk.12.ffn_gate.weight": "3462a2d8f6b4743b25e24da51b90018ac2858d05ac7e582bcb69063cfdac1104",
  "blk.12.ffn_norm.weight": "1f96392c1faa34e34ae5dea55a6a86c5aa4c79758952075d53d28de89dd88456",
  "blk.12.ffn_up.weight": "d22eacc612a7411953d948483c5fb201e11722955ee0754da866e7bec578ac6d",
  "blk.13.attn_k.weight": "5864977e6b733ea942647d6feed5c76156c48c200649c22e4e11b9e5860e57f3",
  "blk.13.attn_norm.weight": "87e053535144723db4145aa5402acc54331b7696752d852bb9fc542ff33f0fb5",
  "blk.13.attn_output.weight": "078145f5ad83f8b14f97a869346f7fd1583b24d1e3edadaa95d3da4242973f8f",
  "blk.13.attn_q.weight": "3b8caf35504cbc4d1a7dd6e011a95760703b7f71e2218b030b1254f811362dd7",
  "blk.13.attn_v.weight": "4fdf8365a603e043e5b40c4a21c84ac167f9be62794178f9d8a608dfe5653bf9",
  "blk.13.ffn_down.weight": "a07d3abbfcacf48ba028df2cab895be32cc15022d23389a745286e79c1b1d1fd",
  "blk.13.ffn_gate.weight": "1d2ab39666aa2909acc96787432a3ed13b19d25170f74665fadff9b17bbaffb1",
  "blk.13.ffn_norm.weight": "4f2e809fda5f3eadf52578ee50e0ba36e53be91e55dce418c12dfe595f5f18e7",
  "blk.13.ffn_up.weight": "8783d2720c2c37ca176a5801e0b3ef1f9cc9cf3ef1cd37af423aaf6b2a27e2bd",
  "blk.14.attn_k.weight": "ce9428e2b55d43ae0c6690dbd56182f99adc427694ba8236b405cc8ea5035e86",
  "blk.14.attn_norm.weight": "6abb35f9db8251d6ae954bda147c6ada2371b0574d11702e828f3c6ac99b7cc0",
  "blk.14.attn_output.weight": "fe3880916d0ceb5bff672c88bbefb7060a545be609bf049beb2024b38221836d",
  "blk.14.attn_q.weight": "7c8ad81be6f4a350931fd108b5f7c9e366e8c26ef62d1d85ffef5dca8fd893f8",
  "blk.14.attn_v.weight": "e4bdedffacbebe38567a0734dfd67db90e911d9a9669fcde9a7c4ad8a0066c52",
  "blk.14.ffn_down.weight": "ef6694dff1e05820aac0cd2b22f39ac7788b4967afc9250775575554c66aab2c",
  "blk.14.ffn_gate.weight": "db63c4179e2db704bc505e2b4696e055b593e295a1b7c4c586fc793bdd5aab19",
  "blk.14.ffn_norm.weight": "2796a62d832a9710148f95d533320492a33e712b2e5218659c548705bd11684d",
  "blk.14.ffn_up.weight": "3f78c78d8c2d54df45f799d4ff902316628af296834afe4ceed63d4a324ff03e",
  "blk.15.attn_k.weight": "6e810ee3859e07695645ee0c9a5efc7962668984a5f0a9325f47e462743b447c",
  "blk.15.attn_norm.weight": "0956b576ae96db0b28cb09f761f801cfd9281432284664f0fe181c8d9c55d1ec",
  "blk.15.attn_output.weight": "03a17f7e94208177aace5cc41b7f54670ba57873b7274ff6e23caf58cce110ca",
  "blk.15.attn_q.weight": "b8edafe7d2216a6f8b4ae4905a906475490e6ea418f6e1d3cec563dbdc6fab91",
  "blk.15.attn_v.weight": "f8ae8cae0f4cfa34a459824eba57350c3c248104ba5607e7d9dc7d7c39aaf4a6",
  "blk.15.ffn_down.weight": "8d02eb439da852246d2ca67e9b7b6de0b090b80744355e64728a23e41926505b",
  "blk.15.ffn_gate.weight": "ed5bf361c67db8731f186b775826f21c33bdb521111fd2d922539719a770239f",
  "blk.15.ffn_norm.weight": "5942ca3c73209ac9a0c8bfd9b4aab7f7be7aee9aa12d9c35833493b44af76767",
  "blk.15.ffn_up.weight": "f4bebf4ad99ec5f911327dec347be6c595814885309c7bc5647ce28c7f4d1cf5",
  "blk.16.attn_k.weight": "756a534c19364448e0958b8948fe33891c6ccda0fbb4dfa2024e1f532a87804b",
  "blk.16.attn_norm.weight": "386b7b9e4e6509f6af9c022d942b6c6c6cc136aeed8751ecb037c74d7c4bfb93",
  "blk.16.attn_output.weight": "3ba1a766a25830b84d7c22178203635f9c5624caad290bc5e5d73da5d5e7a2ec",
  "blk.16.attn_q.weight": "d39b0c91e1fda7685d50a0f7cc8d18c44b5bdc90a142c7fda0bc329cca1afa74",
  "blk.16.attn_v.weight": "98b33fcb0ee3483cff1b06ecb44d7b7ffb4d34c268248e4d73dfdf82b2065b2f",
  "blk.16.ffn_down.weight": "14006f5e4acb2f9416271ae562e299359cd2585739c7fc77ccbca54495563948",
  "blk.16.ffn_gate.weight": "12f8abae2d301d8f88bedb6af98b1daecc7b0b8d05148594f931f30958d77aca",
  "blk.16.ffn_norm.weight": "129a15a046ee96d06de288bd43c80f77a6b0fb3a159c7367154c6e4aaf362672",
  "blk.16.ffn_up.weight": "b4a5911a45f3871ef1d4efb7dc7108645a564b70f818eccf45beebef2e844ee9",
  "blk.17.attn_k.weight": "5e1bfcff0146ebdde3817b656952892eb671e14e75afc92fa53f84f8eecbec4c",
  "blk.17.attn_norm.weight": "60bc988fab7c4b29ee9de599df41a8de00caa94fcd74677da011fac82f60f465",
  "blk.17.attn_output.weight": "ba49b40d6a0b5685f749c24b0edbed3adc44dbe13b5d5e5fa1e56169fc746555",
  "blk.17.attn_q.weight": "82bb415d24efcd14d03ace03f907bb70db6a204c76a0bdd1892e0fba165db87d",
  "blk.17.attn_v.weight": "73dbe54beb91a899884e275ea81ffc5187a20cb7d5b68d5c299b783096999d94",
  "blk.17.ffn_down.weight": "7c086166241e0664f8963fd1ca4ed74c737abfb2525ec20f8435821ff50158f3",
  "blk.17.ffn_gate.weight": "51a32f78244d42a539f619c5ce661db9e6cf41636280a826d439b5444edcd28c",
  "blk.17.ffn_norm.weight": "c4bb247fccd1ecc84875028af63dd20aaf5cbd17eb94a9bc36679c09285dccab",
  "blk.17.ffn_up.weight": "b5886182790bc6fbadd63de9bc4ffee416f3b69a66280d197ab8c18edf769abf",
  "output_norm.weight": "481f3097d0a20412e35b3a739b1b958487bcd41ff67744baa3c9acbddd2ee4d4"
 }
--- a/convert/tokenizer.go
+++ b/convert/tokenizer.go
@@ -3,150 +3,19 @@ package convert
 import (
 	"cmp"
 	"crypto/sha256"
 	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io/fs"
 	"log/slog"
 	"os"
 	"slices"
 )
-const (
+	"golang.org/x/exp/maps"
 	_ int32 = iota
 	tokenTypeNormal
 	tokenTypeUnknown
 	tokenTypeControl
 	tokenTypeUserDefined
 	tokenTypeUnused
 	tokenTypeByte
 )
 type Tokenizer struct {
-	*Vocabulary
+	Version     string         `json:"version"`
-	SpecialVocabulary []*SpecialVocabulary
+	AddedTokens []Token        `json:"added_tokens"`
-	Merges            []string
+	Model       TokenizerModel `json:"model"`
 	Pre      string
 	Template string
 }
 func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error) {
 	v, err := parseVocabulary(fsys)
 	if err != nil {
 		return nil, err
 	}
 	t := &Tokenizer{
 		Vocabulary: v,
 		Pre:        "default",
 	}
 	addedTokens := make(map[string]token)
 	if f, err := fsys.Open("tokenizer.json"); errors.Is(err, os.ErrNotExist) {
 	} else if err != nil {
 		return nil, err
 	} else {
 		defer f.Close()
 		var tt tokenizer
 		if err := json.NewDecoder(f).Decode(&tt); err != nil {
 			return nil, err
 		}
 		for _, t := range tt.AddedTokens {
 			addedTokens[t.Content] = t
 		}
 		t.Merges = tt.Model.Merges
 		sha256sum := sha256.New()
 		for _, pt := range tt.PreTokenizer.PreTokenizers {
 			switch pt.Type {
 			case "Split":
 				if pt.Pattern.Regex != "" {
 					// create a checksum of all Split pretokenizers which should be sufficient
 					// to identify the pretokenizer
 					sha256sum.Write([]byte(pt.Pattern.Regex))
 				}
 			}
 		}
 		switch digest := hex.EncodeToString(sha256sum.Sum(nil)); digest {
 		case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
 			t.Pre = "llama-bpe"
 		case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
 			t.Pre = "deepseek-llm"
 		case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
 			t.Pre = "deepseek-coder"
 		case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855":
 			// noop, empty pretokenizer
 		default:
 			slog.Warn("unknown pretokenizer, using default", "digest", digest)
 		}
 	}
 	if f, err := fsys.Open("tokenizer_config.json"); errors.Is(err, os.ErrNotExist) {
 	} else if err != nil {
 		return nil, err
 	} else {
 		defer f.Close()
 		var p map[string]json.RawMessage
 		if err := json.NewDecoder(f).Decode(&p); err != nil {
 			return nil, err
 		}
 		if template, ok := p["chat_template"]; ok {
 			if err := json.Unmarshal(template, &t.Template); err != nil {
 				return nil, err
 			}
 		}
 		for _, st := range specialTokenTypes {
 			sv := SpecialVocabulary{Type: st}
 			if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok {
 				if err := json.Unmarshal(bts, &sv.AddToken); err != nil {
 					return nil, err
 				}
 			}
 			if bts, ok := p[fmt.Sprintf("%s_token", st)]; ok {
 				var content string
 				if err := json.Unmarshal(bts, &content); err != nil {
 					var mm map[string]any
 					if err := json.Unmarshal(bts, &mm); err != nil {
 						continue
 					}
 					content, ok = mm["content"].(string)
 					if !ok {
 						continue
 					}
 				}
 				sv.Content = content
 			}
 			if id, ok := addedTokens[sv.Content]; ok {
 				sv.ID = id.ID
 				t.SpecialVocabulary = append(t.SpecialVocabulary, &sv)
 			}
 		}
 	}
 	return t, nil
 }
 type tokenizer struct {
 	Version     string  `json:"version"`
 	AddedTokens []token `json:"added_tokens"`
 	Model       struct {
 		Type   string         `json:"type"`
 		Vocab  map[string]int `json:"vocab"`
 		Merges []string       `json:"merges"`
 	} `json:"model"`
 	PreTokenizer struct {
 		PreTokenizers []struct {
@@ -158,108 +27,80 @@ type tokenizer struct {
 	} `json:"pre_tokenizer"`
 }
-type token struct {
+type TokenizerModel struct {
 	Type   string         `json:"type"`
 	Vocab  map[string]int `json:"vocab"`
 	Merges []string       `json:"merges"`
 	Tokens []Token
 }
 type Token struct {
 	ID          int    `json:"id"`
 	Content     string `json:"content"`
 	Special     bool   `json:"special"`
 	UserDefined bool
 }
-type Vocabulary struct {
+func (t *Token) Type() int32 {
-	Model  string
+	switch {
-	Tokens []string
+	case t.Special:
-	Scores []float32
+		return tokenTypeControl
-	Types  []int32
+	case t.UserDefined:
 		return tokenTypeUserDefined
 	default:
 		return tokenTypeNormal
 	}
 }
-func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) {
+func (t *Tokenizer) maxID() int {
-	f, err := fsys.Open("tokenizer.json")
+	return max(
 		slices.Max(maps.Values(t.Model.Vocab)),
 		slices.MaxFunc(t.AddedTokens, func(a, b Token) int {
 			return cmp.Compare(a.ID, b.ID)
 		}).ID,
 	)
 }
 func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) {
 	f, err := os.Open(dirpath)
 	if err != nil {
-		return nil, err
+		panic(err)
 	}
 	defer f.Close()
-	var t tokenizer
+	var t Tokenizer
 	if err := json.NewDecoder(f).Decode(&t); err != nil {
-		return nil, err
+		return "", nil, nil, err
 	}
-	var tokens []token
+	tokens = make([]Token, t.maxID()+1)
 	for k, v := range t.Model.Vocab {
-		tokens = append(tokens, token{
+		tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false}
 			ID:      v,
 			Content: k,
 		})
 	}
-	for _, t := range t.AddedTokens {
+	for _, v := range t.AddedTokens {
-		t.UserDefined = true
+		v.UserDefined = true
-		tokens = append(tokens, t)
+		tokens[v.ID] = v
 	}
-	slices.SortFunc(tokens, func(i, j token) int {
+	sha256sum := sha256.New()
-		return cmp.Compare(i.ID, j.ID)
+	for _, pt := range t.PreTokenizer.PreTokenizers {
-	})
+		if pt.Type == "Split" && pt.Pattern.Regex != "" {
-
+			sha256sum.Write([]byte(pt.Pattern.Regex))
 	v := Vocabulary{Model: "gpt2"}
 	for _, t := range tokens {
 		v.Tokens = append(v.Tokens, t.Content)
 		v.Scores = append(v.Scores, float32(t.ID))
 		switch {
 		case t.Special:
 			v.Types = append(v.Types, tokenTypeControl)
 		case t.UserDefined:
 			v.Types = append(v.Types, tokenTypeUserDefined)
 		default:
 			v.Types = append(v.Types, tokenTypeNormal)
 		}
 	}
-	return &v, nil
+	switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest {
-}
+	case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
-
+		pre = "llama-bpe"
-func parseVocabulary(fsys fs.FS) (*Vocabulary, error) {
+	case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
-	patterns := []struct {
+		pre = "deepseek-llm"
-		Pattern string
+	case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
-		Func    func(fs.FS) (*Vocabulary, error)
+		pre = "deepseek-coder"
-	}{
+	default:
-		{"tokenizer.model", parseSentencePiece},
+		slog.Warn("unknown pretokenizer, using default", "digest", digest)
-		{"tokenizer.json", parseVocabularyFromTokenizer},
+		pre = "default"
 	}
-	for _, pattern := range patterns {
+	return pre, tokens, t.Model.Merges, nil
 		if _, err := fs.Stat(fsys, pattern.Pattern); errors.Is(err, os.ErrNotExist) {
 			continue
 		} else if err != nil {
 			return nil, err
 		}
 		return pattern.Func(fsys)
 	}
 	return nil, errors.New("unknown tensor format")
 }
 type SpecialVocabulary struct {
 	Type     string
 	ID       int
 	Content  string
 	AddToken bool
 }
 func (sv SpecialVocabulary) Key() string {
 	switch t := sv.Type; t {
 	case "bos", "eos", "cls", "mask":
 		return t
 	case "unk":
 		return "unknown"
 	case "sep":
 		//nolint:misspell // this is an upstream typo
 		return "seperator"
 	case "pad":
 		return "padding"
 	}
 	panic("unknown special vocabulary type")
 }
--- a/convert/tokenizer_spm.go
+++ b/convert/tokenizer_spm.go
@@ -1,83 +0,0 @@
 package convert
 import (
 	"cmp"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io/fs"
 	"os"
 	"slices"
 	"google.golang.org/protobuf/proto"
 	"github.com/ollama/ollama/convert/sentencepiece"
 )
 func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
 	bts, err := fs.ReadFile(fsys, "tokenizer.model")
 	if err != nil {
 		return nil, err
 	}
 	var spm sentencepiece.ModelProto
 	if err := proto.Unmarshal(bts, &spm); err != nil {
 		return nil, err
 	}
 	v := Vocabulary{Model: "llama"}
 	for _, piece := range spm.GetPieces() {
 		v.Tokens = append(v.Tokens, piece.GetPiece())
 		v.Scores = append(v.Scores, piece.GetScore())
 		switch t := piece.GetType(); t {
 		case sentencepiece.ModelProto_SentencePiece_UNKNOWN,
 			sentencepiece.ModelProto_SentencePiece_CONTROL,
 			sentencepiece.ModelProto_SentencePiece_UNUSED,
 			sentencepiece.ModelProto_SentencePiece_BYTE:
 			v.Types = append(v.Types, int32(t))
 		default:
 			v.Types = append(v.Types, int32(sentencepiece.ModelProto_SentencePiece_NORMAL))
 		}
 	}
 	f, err := fsys.Open("added_tokens.json")
 	if errors.Is(err, os.ErrNotExist) {
 		return &v, nil
 	} else if err != nil {
 		return nil, err
 	}
 	defer f.Close()
 	var atm map[string]int
 	if err := json.NewDecoder(f).Decode(&atm); err != nil {
 		return nil, err
 	}
 	type t struct {
 		id      int
 		content string
 	}
 	var ts []t
 	for content, id := range atm {
 		ts = append(ts, t{id, content})
 	}
 	slices.SortFunc(ts, func(i, j t) int {
 		return cmp.Compare(i.id, j.id)
 	})
 	n := len(v.Tokens)
 	for i, t := range ts {
 		if t.id != i+n {
 			return nil, fmt.Errorf("invalid token id: %d", t.id)
 		}
 		v.Tokens = append(v.Tokens, t.content)
 		v.Scores = append(v.Scores, -1000.0)
 		v.Types = append(v.Types, tokenTypeUserDefined)
 	}
 	return &v, nil
 }
--- a/convert/torch.go
+++ b/convert/torch.go
@@ -0,0 +1,287 @@
 package convert
 import (
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"io"
 	"log/slog"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 	"github.com/nlpodyssey/gopickle/pytorch"
 	"github.com/nlpodyssey/gopickle/types"
 	"github.com/x448/float16"
 	"github.com/ollama/ollama/llm"
 )
 type torchWriterTo struct {
 	t *llm.Tensor
 	params *Params
 	bo     ByteOrder
 	storage  pytorch.StorageInterface
 	repacker func(string, []float32, []uint64) ([]float32, error)
 }
 type TorchFormat struct{}
 func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
 	slog.Debug("getting torch tensors")
 	var files []string
 	if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 {
 		files = append(files, pt...)
 	} else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 {
 		files = append(files, pt...)
 	}
 	var offset uint64
 	var tensors []llm.Tensor
 	for _, fn := range files {
 		m, err := pytorch.Load(fn)
 		if err != nil {
 			slog.Error(fmt.Sprintf("error unpickling: %q", err))
 			return []llm.Tensor{}, err
 		}
 		for _, k := range m.(*types.Dict).Keys() {
 			if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") {
 				continue
 			}
 			t, _ := m.(*types.Dict).Get(k)
 			tshape := t.(*pytorch.Tensor).Size
 			var size uint64
 			var kind uint32
 			switch len(tshape) {
 			case 0:
 				continue
 			case 1:
 				// convert to float32
 				kind = 0
 				size = uint64(tshape[0] * 4)
 			case 2:
 				// convert to float16
 				kind = 1
 				size = uint64(tshape[0] * tshape[1] * 2)
 			}
 			ggufName, err := tf.GetLayerName(k.(string))
 			if err != nil {
 				slog.Error(err.Error())
 				return nil, err
 			}
 			slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape))
 			shape := []uint64{0, 0, 0, 0}
 			for i := range tshape {
 				shape[i] = uint64(tshape[i])
 			}
 			tensor := llm.Tensor{
 				Name:   ggufName,
 				Kind:   kind,
 				Offset: offset, // calculate the offset
 				Shape:  shape,
 			}
 			tensor.WriterTo = torchWriterTo{
 				t:       &tensor,
 				params:  params,
 				bo:      params.ByteOrder,
 				storage: t.(*pytorch.Tensor).Source,
 			}
 			tensors = append(tensors, tensor)
 			offset += size
 		}
 	}
 	return tensors, nil
 }
 func getAltParams(dirpath string) (*Params, error) {
 	f, err := os.Open(filepath.Join(dirpath, "params.json"))
 	if err != nil {
 		slog.Error("no params.json")
 		return nil, err
 	}
 	defer f.Close()
 	type TorchParams struct {
 		HiddenSize     int     `json:"dim"`
 		AttentionHeads int     `json:"n_heads"`
 		KeyValHeads    int     `json:"n_kv_heads"`
 		HiddenLayers   int     `json:"n_layers"`
 		RopeTheta      float64 `json:"rope_theta"`
 		NormEPS        float64 `json:"norm_eps"`
 	}
 	var tparams TorchParams
 	d := json.NewDecoder(f)
 	err = d.Decode(&tparams)
 	if err != nil {
 		return nil, err
 	}
 	params := &Params{
 		Architectures:  []string{"LlamaForCausalLM"},
 		HiddenSize:     tparams.HiddenSize,
 		AttentionHeads: tparams.AttentionHeads,
 		KeyValHeads:    tparams.KeyValHeads,
 		HiddenLayers:   tparams.HiddenLayers,
 		NormEPS:        tparams.NormEPS,
 	}
 	switch {
 	case tparams.RopeTheta == 1000000:
 		// Codellama
 		params.ContextSize = 16384
 	case tparams.NormEPS == 1e-06:
 		// llama2
 		slog.Debug("Found llama2 - setting context size to 4096")
 		params.ContextSize = 4096
 	default:
 		params.ContextSize = 2048
 	}
 	params.ByteOrder = binary.LittleEndian
 	return params, nil
 }
 func (m *TorchFormat) GetParams(dirpath string) (*Params, error) {
 	f, err := os.Open(filepath.Join(dirpath, "config.json"))
 	if err != nil {
 		if os.IsNotExist(err) {
 			// try params.json instead
 			return getAltParams(dirpath)
 		} else {
 			return nil, err
 		}
 	}
 	var params Params
 	d := json.NewDecoder(f)
 	err = d.Decode(&params)
 	if err != nil {
 		return nil, err
 	}
 	params.ByteOrder = binary.LittleEndian
 	return &params, nil
 }
 func (m *TorchFormat) GetLayerName(n string) (string, error) {
 	directMap := map[string]string{
 		"tok_embeddings.weight":     "token_embd.weight",
 		"output.weight":             "output.weight",
 		"norm.weight":               "output_norm.weight",
 		"rope.freqs":                "rope_freqs.weight",
 		"model.embed_tokens.weight": "token_embd.weight",
 		"lm_head.weight":            "output.weight",
 		"model.norm.weight":         "output_norm.weight",
 	}
 	lMap := map[string]string{
 		"layers.(\\d+).attention_norm.weight":                 "blk.$1.attn_norm.weight",
 		"layers.(\\d+).attention_output_norm.weight":          "blk.$1.attn_norm.weight",
 		"layers.(\\d+).feed_forward.w2.weight":                "blk.$1.ffn_down.weight",
 		"layers.(\\d+).feed_forward.w1.weight":                "blk.$1.ffn_gate.weight",
 		"layers.(\\d+).feed_forward.w3.weight":                "blk.$1.ffn_up.weight",
 		"layers.(\\d+).ffn_norm.weight":                       "blk.$1.ffn_norm.weight",
 		"layers.(\\d+).attention.wk.weight":                   "blk.$1.attn_k.weight",
 		"layers.(\\d+).attention.wo.weight":                   "blk.$1.attn_output.weight",
 		"layers.(\\d+).attention.wq.weight":                   "blk.$1.attn_q.weight",
 		"layers.(\\d+).attention.wv.weight":                   "blk.$1.attn_v.weight",
 		"model.layers.(\\d+).input_layernorm.weight":          "blk.$1.attn_norm.weight",
 		"model.layers.(\\d+).mlp.down_proj.weight":            "blk.$1.ffn_down.weight",
 		"model.layers.(\\d+).mlp.gate_proj.weight":            "blk.$1.ffn_gate.weight",
 		"model.layers.(\\d+).mlp.up_proj.weight":              "blk.$1.ffn_up.weight",
 		"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
 		"model.layers.(\\d+).self_attn.k_proj.weight":         "blk.$1.attn_k.weight",
 		"model.layers.(\\d+).self_attn.o_proj.weight":         "blk.$1.attn_output.weight",
 		"model.layers.(\\d+).self_attn.q_proj.weight":         "blk.$1.attn_q.weight",
 		"model.layers.(\\d+).self_attn.v_proj.weight":         "blk.$1.attn_v.weight",
 	}
 	v, ok := directMap[n]
 	if ok {
 		return v, nil
 	}
 	// quick hack to rename the layers to gguf format
 	for k, v := range lMap {
 		re := regexp.MustCompile(k)
 		newName := re.ReplaceAllString(n, v)
 		if newName != n {
 			return newName, nil
 		}
 	}
 	return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
 }
 func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) {
 	var f32s []float32
 	switch s := r.storage.(type) {
 	case *pytorch.FloatStorage:
 		f32s = s.Data
 	case *pytorch.HalfStorage:
 		f32s = s.Data
 	case *pytorch.BFloat16Storage:
 		f32s = s.Data
 	default:
 		return 0, fmt.Errorf("unknown data type: %T", s)
 	}
 	if r.repacker != nil {
 		f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
 		if err != nil {
 			return 0, err
 		}
 	}
 	switch r.t.Kind {
 	case 0:
 		return 0, binary.Write(w, r.bo, f32s)
 	case 1:
 		f16s := make([]uint16, len(f32s))
 		for i := range f32s {
 			f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
 		}
 		return 0, binary.Write(w, r.bo, f16s)
 	default:
 		return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
 	}
 }
 func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
 	switch len(params.Architectures) {
 	case 0:
 		return nil, fmt.Errorf("No architecture specified to convert")
 	case 1:
 		switch params.Architectures[0] {
 		case "LlamaForCausalLM":
 			return &LlamaModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		default:
 			return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
 		}
 	}
 	return nil, fmt.Errorf("Unknown error")
 }
--- a/docs/api.md
+++ b/docs/api.md
@@ -26,7 +26,7 @@ All durations are returned in nanoseconds.
 ### Streaming responses
-Certain endpoints stream responses as JSON objects. Streaming can be disabled by providing `{"stream": false}` for these endpoints.
+Certain endpoints stream responses as JSON objects and can optional return non-streamed responses.
 ## Generate a completion
@@ -40,7 +40,6 @@ Generate a response for a given prompt with a provided model. This is a streamin
 - `model`: (required) the [model name](#model-names)
 - `prompt`: the prompt to generate a response for
 - `suffix`: the text after the model response
 - `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)
 Advanced parameters (optional):
@@ -58,8 +57,7 @@ Advanced parameters (optional):
 Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
-> [!IMPORTANT]
+> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
 > It's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
 ### Examples
@@ -150,44 +148,8 @@ If `stream` is set to `false`, the response will be a single JSON object:
 }
 ```
 #### Request (with suffix)
 ##### Request
 ```shell
 curl http://localhost:11434/api/generate -d '{
  "model": "codellama:code",
  "prompt": "def compute_gcd(a, b):",
  "suffix": "    return result",
  "options": {
    "temperature": 0
  },
  "stream": false
 }'
 ```
 ##### Response
 ```json
 {
  "model": "codellama:code",
  "created_at": "2024-07-22T20:47:51.147561Z",
  "response": "\n  if a == 0:\n    return b\n  else:\n    return compute_gcd(b % a, a)\n\ndef compute_lcm(a, b):\n  result = (a * b) / compute_gcd(a, b)\n",
  "done": true,
  "done_reason": "stop",
  "context": [...],
  "total_duration": 1162761250,
  "load_duration": 6683708,
  "prompt_eval_count": 17,
  "prompt_eval_duration": 201222000,
  "eval_count": 63,
  "eval_duration": 953997000
 }
 ```
 #### Request (JSON mode)
 > [!IMPORTANT]
 > When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.
 ##### Request
@@ -336,7 +298,6 @@ curl http://localhost:11434/api/generate -d '{
    "num_predict": 100,
    "top_k": 20,
    "top_p": 0.9,
    "min_p": 0.0,
    "tfs_z": 0.5,
    "typical_p": 0.7,
    "repeat_last_n": 33,
@@ -419,14 +380,12 @@ Generate the next message in a chat with a provided model. This is a streaming e
 - `model`: (required) the [model name](#model-names)
 - `messages`: the messages of the chat, this can be used to keep a chat memory
 - `tools`: tools for the model to use if supported. Requires `stream` to be set to `false`
 The `message` object has the following fields:
- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool`
+- `role`: the role of the message, either `system`, `user` or `assistant`
 - `content`: the content of the message
 - `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
 - `tool_calls` (optional): a list of tools the model wants to use
 Advanced parameters (optional):
@@ -587,7 +546,7 @@ Final response:
 ##### Request
-Send a chat message with images. The images should be provided as an array, with the individual images encoded in Base64.
+Send a chat message with a conversation history.
 ```shell
 curl http://localhost:11434/api/chat -d '{
@@ -663,79 +622,6 @@ curl http://localhost:11434/api/chat -d '{
 }
 ```
 #### Chat request (with tools)
 ##### Request
 ```
 curl http://localhost:11434/api/chat -d '{
  "model": "mistral",
  "messages": [
    {
      "role": "user",
      "content": "What is the weather today in Paris?"
    }
  ],
  "stream": false,
  "tools": [
    {
      "type": "function",
      "function": {
        "name": "get_current_weather",
        "description": "Get the current weather for a location",
        "parameters": {
          "type": "object",
          "properties": {
            "location": {
              "type": "string",
              "description": "The location to get the weather for, e.g. San Francisco, CA"
            },
            "format": {
              "type": "string",
              "description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
              "enum": ["celsius", "fahrenheit"]
            }
          },
          "required": ["location", "format"]
        }
      }
    }
  ]
 }'
 ```
 ##### Response
 ```json
 {
  "model": "mistral:7b-instruct-v0.3-q4_K_M",
  "created_at": "2024-07-22T20:33:28.123648Z",
  "message": {
    "role": "assistant",
    "content": "",
    "tool_calls": [
      {
        "function": {
          "name": "get_current_weather",
          "arguments": {
            "format": "celsius",
            "location": "Paris, FR"
          }
        }
      }
    ]
  },
  "done_reason": "stop",
  "done": true,
  "total_duration": 885095291,
  "load_duration": 3753500,
  "prompt_eval_count": 122,
  "prompt_eval_duration": 328493000,
  "eval_count": 33,
  "eval_duration": 552222000
 }
 ```
 ## Create a Model
 ```shell
@@ -891,12 +777,11 @@ A single JSON object will be returned.
 POST /api/show
 ```
-Show information about a model including details, modelfile, template, parameters, license, system prompt.
+Show information about a model including details, modelfile, template, parameters, license, and system prompt.
 ### Parameters
 - `name`: name of the model to show
 - `verbose`: (optional) if set to `true`, returns full data for verbose response fields
 ### Examples
@@ -913,40 +798,14 @@ curl http://localhost:11434/api/show -d '{
 ```json
 {
  "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /Users/matt/.ollama/models/blobs/sha256:200765e1283640ffbd013184bf496e261032fa75b99498a9613be4e94d63ad52\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: \"\"\"\nPARAMETER num_ctx 4096\nPARAMETER stop \"\u003c/s\u003e\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSISTANT:\"",
-  "parameters": "num_keep                       24\nstop                           \"<|start_header_id|>\"\nstop                           \"<|end_header_id|>\"\nstop                           \"<|eot_id|>\"",
+  "parameters": "num_ctx                        4096\nstop                           \u003c/s\u003e\nstop                           USER:\nstop                           ASSISTANT:",
-  "template": "{{ if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>\n\n{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ .Response }}<|eot_id|>",
+  "template": "{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: ",
  "details": {
    "parent_model": "",
    "format": "gguf",
    "family": "llama",
-    "families": [
+    "families": ["llama", "clip"],
-      "llama"
+    "parameter_size": "7B",
    ],
    "parameter_size": "8.0B",
    "quantization_level": "Q4_0"
  },
  "model_info": {
    "general.architecture": "llama",
    "general.file_type": 2,
    "general.parameter_count": 8030261248,
    "general.quantization_version": 2,
    "llama.attention.head_count": 32,
    "llama.attention.head_count_kv": 8,
    "llama.attention.layer_norm_rms_epsilon": 0.00001,
    "llama.block_count": 32,
    "llama.context_length": 8192,
    "llama.embedding_length": 4096,
    "llama.feed_forward_length": 14336,
    "llama.rope.dimension_count": 128,
    "llama.rope.freq_base": 500000,
    "llama.vocab_size": 128256,
    "tokenizer.ggml.bos_token_id": 128000,
    "tokenizer.ggml.eos_token_id": 128009,
    "tokenizer.ggml.merges": [],            // populates if `verbose=true`
    "tokenizer.ggml.model": "gpt2",
    "tokenizer.ggml.pre": "llama-bpe",
    "tokenizer.ggml.token_type": [],        // populates if `verbose=true`
    "tokenizer.ggml.tokens": []             // populates if `verbose=true`
  }
 }
 ```
@@ -1140,7 +999,7 @@ If `stream` is set to `false`, then the response is a single JSON object:
 ## Generate Embeddings
 ```shell
-POST /api/embed
+POST /api/embeddings
 ```
 Generate embeddings from a model
@@ -1148,11 +1007,10 @@ Generate embeddings from a model
 ### Parameters
 - `model`: name of model to generate embeddings from
- `input`: text or list of text to generate embeddings for
+- `prompt`: text to generate embeddings for
 Advanced parameters:
 - `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true`
 - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
 - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
@@ -1161,9 +1019,9 @@ Advanced parameters:
 #### Request
 ```shell
-curl http://localhost:11434/api/embed -d '{
+curl http://localhost:11434/api/embeddings -d '{
  "model": "all-minilm",
-  "input": "Why is the sky blue?"
+  "prompt": "Here is an article about llamas..."
 }'
 ```
@@ -1171,35 +1029,10 @@ curl http://localhost:11434/api/embed -d '{
 ```json
 {
-  "model": "all-minilm",
+  "embedding": [
-  "embeddings": [[
+    0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
-    0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
+    0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
-    0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
+  ]
  ]]
 }
 ```
 #### Request (Multiple input)
 ```shell
 curl http://localhost:11434/api/embed -d '{
  "model": "all-minilm",
  "input": ["Why is the sky blue?", "Why is the grass green?"]
 }'
 ```
 #### Response
 ```json
 {
  "model": "all-minilm",
  "embeddings": [[
    0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
    0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
  ],[
    -0.0098027075, 0.06042469, 0.025257962, -0.006364387, 0.07272725,
    0.017194884, 0.09032035, -0.051705178, 0.09951512, 0.09072481
  ]]
 }
 ```
@@ -1246,45 +1079,3 @@ A single JSON object will be returned.
  ]
 }
 ```
 ## Generate Embedding
 > Note: this endpoint has been superseded by `/api/embed`
 ```shell
 POST /api/embeddings
 ```
 Generate embeddings from a model
 ### Parameters
 - `model`: name of model to generate embeddings from
 - `prompt`: text to generate embeddings for
 Advanced parameters:
 - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
 - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
 ### Examples
 #### Request
 ```shell
 curl http://localhost:11434/api/embeddings -d '{
  "model": "all-minilm",
  "prompt": "Here is an article about llamas..."
 }'
 ```
 #### Response
 ```json
 {
  "embedding": [
    0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
    0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
  ]
 }
 ```
--- a/docs/development.md
+++ b/docs/development.md
@@ -104,7 +104,7 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H,
 you might use:
 ```
-OLLAMA_CUSTOM_CPU_DEFS="-DGGML_AVX=on -DGGML_AVX2=on -DGGML_F16C=on -DGGML_FMA=on" go generate ./...
+OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./...
 go build .
 ```
@@ -114,18 +114,15 @@ If you have Docker available, you can build linux binaries with `./scripts/build
 ### Windows
-Note: The Windows build for Ollama is still under development.
+Note: The windows build for Ollama is still under development.
-First, install required tools:
+Install required tools:
 - MSVC toolchain - C/C++ and cmake as minimal requirements
 - Go version 1.22 or higher
 - MinGW (pick one variant) with GCC.
  - [MinGW-w64](https://www.mingw-w64.org/)
  - [MSYS2](https://www.msys2.org/)
 - The `ThreadJob` Powershell module: `Install-Module -Name ThreadJob -Scope CurrentUser`
 Then, build the `ollama` binary:
 ```powershell
 $env:CGO_ENABLED="1"
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -1,71 +1,71 @@
-# Ollama Docker image
+# Ollama Docker image
-
+
-### CPU only
+### CPU only
-
+
-```bash
+```bash
-docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
-```
+```
-
+
-### Nvidia GPU
+### Nvidia GPU
-Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
+Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
-
+
-#### Install with Apt
+#### Install with Apt
-1.  Configure the repository
+1.  Configure the repository
-```bash
+```bash
-curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
+curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
-    | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
+    | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
-curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
+curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
-    | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
+    | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
-    | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+    | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
-sudo apt-get update
+sudo apt-get update
-```
+```
-2.  Install the NVIDIA Container Toolkit packages
+2.  Install the NVIDIA Container Toolkit packages
-```bash
+```bash
-sudo apt-get install -y nvidia-container-toolkit
+sudo apt-get install -y nvidia-container-toolkit
-```
+```
-
+
-#### Install with Yum or Dnf
+#### Install with Yum or Dnf
-1.  Configure the repository
+1.  Configure the repository
-
+    
-```bash
+```bash
-curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
+curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
-    | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
+    | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
-```
+```
-
+    
-2. Install the NVIDIA Container Toolkit packages
+2. Install the NVIDIA Container Toolkit packages
-
+    
-```bash
+```bash
-sudo yum install -y nvidia-container-toolkit
+sudo yum install -y nvidia-container-toolkit
-```
+```
-
+
-#### Configure Docker to use Nvidia driver
+#### Configure Docker to use Nvidia driver 
-```
+```
-sudo nvidia-ctk runtime configure --runtime=docker
+sudo nvidia-ctk runtime configure --runtime=docker
-sudo systemctl restart docker
+sudo systemctl restart docker
-```
+```
-
+
-#### Start the container
+#### Start the container
-
+
-```bash
+```bash
-docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
-```
+```
-
+
-### AMD GPU
+### AMD GPU
-
+
-To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
+To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
-
+
-```
+```
-docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
+docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
-```
+```
-
+
-### Run model locally
+### Run model locally
-
+
-Now you can run a model:
+Now you can run a model:
-
+
-```
+```
-docker exec -it ollama ollama run llama3.1
+docker exec -it ollama ollama run llama3
-```
+```
-
+
-### Try different models
+### Try different models
-
+
-More models can be found on the [Ollama library](https://ollama.com/library).
+More models can be found on the [Ollama library](https://ollama.com/library).
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -227,7 +227,7 @@ curl http://localhost:11434/api/chat -d '{"model": "mistral"}'
 To preload a model using the CLI, use the command:
 ```shell
-ollama run llama3.1 ""
+ollama run llama3 ""
 ```
 ## How do I keep a model loaded in memory or make it unload immediately?
@@ -257,23 +257,3 @@ If you wish to override the `OLLAMA_KEEP_ALIVE` setting, use the `keep_alive` AP
 ## How do I manage the maximum number of requests the Ollama server can queue?
 If too many requests are sent to the server, it will respond with a 503 error indicating the server is overloaded.  You can adjust how many requests may be queue by setting `OLLAMA_MAX_QUEUE`.
 ## How does Ollama handle concurrent requests?
 Ollama supports two levels of concurrent processing.  If your system has sufficient available memory (system memory when using CPU inference, or VRAM for GPU inference) then multiple models can be loaded at the same time.  For a given model, if there is sufficient available memory when the model is loaded, it is configured to allow parallel request processing.
 If there is insufficient available memory to load a new model request while one or more models are already loaded, all new requests will be queued until the new model can be loaded.  As prior models become idle, one or more will be unloaded to make room for the new model.  Queued requests will be processed in order.  When using GPU inference new models must be able to completely fit in VRAM to allow concurrent model loads.
 Parallel request processing for a given model results in increasing the context size by the number of parallel requests.  For example, a 2K context with 4 parallel requests will result in an 8K context and additional memory allocation.
 The following server settings may be used to adjust how Ollama handles concurrent requests on most platforms:
 - `OLLAMA_MAX_LOADED_MODELS` - The maximum number of models that can be loaded concurrently provided they fit in available memory.  The default is 3 * the number of GPUs or 3 for CPU inference.
 - `OLLAMA_NUM_PARALLEL` - The maximum number of parallel requests each model will process at the same time.  The default will auto-select either 4 or 1 based on available memory.
 - `OLLAMA_MAX_QUEUE` - The maximum number of requests Ollama will queue when busy before rejecting additional requests. The default is 512
 Note: Windows with Radeon GPUs currently default to 1 model maximum due to limitations in ROCm v5.7 for available VRAM reporting.  Once ROCm v6.2 is available, Windows Radeon will follow the defaults above.  You may enable concurrent model loads on Radeon on Windows, but ensure you don't load more models than will fit into your GPUs VRAM.
 ## How does Ollama load models on multiple GPUs?
 Installing multiple GPUs of the same brand can be a great way to increase your available VRAM to load larger models.  When you load a new model, Ollama evaluates the required VRAM for the model against what is currently available.  If the model will entirely fit on any single GPU, Ollama will load the model on that GPU.  This typically provides the best performance as it reduces the amount of data transfering across the PCI bus during inference.  If the model does not fit entirely on one GPU, then it will be spread across all the available GPUs.
--- a/docs/gpu.md
+++ b/docs/gpu.md
@@ -8,7 +8,7 @@ Check your compute compatibility to see if your card is supported:
 | Compute Capability | Family              | Cards                                                                                                       |
 | ------------------ | ------------------- | ----------------------------------------------------------------------------------------------------------- |
 | 9.0                | NVIDIA              | `H100`                                                                                                      |
-| 8.9                | GeForce RTX 40xx    | `RTX 4090` `RTX 4080 SUPER` `RTX 4080` `RTX 4070 Ti SUPER` `RTX 4070 Ti` `RTX 4070 SUPER` `RTX 4070` `RTX 4060 Ti` `RTX 4060`  |
+| 8.9                | GeForce RTX 40xx    | `RTX 4090` `RTX 4080` `RTX 4070 Ti` `RTX 4060 Ti`                                                           |
 |                    | NVIDIA Professional | `L4` `L40` `RTX 6000`                                                                                       |
 | 8.6                | GeForce RTX 30xx    | `RTX 3090 Ti` `RTX 3090` `RTX 3080 Ti` `RTX 3080` `RTX 3070 Ti` `RTX 3070` `RTX 3060 Ti` `RTX 3060`         |
 |                    | NVIDIA Professional | `A40` `RTX A6000` `RTX A5000` `RTX A4000` `RTX A3000` `RTX A2000` `A10` `A16` `A2`                          |
@@ -18,7 +18,7 @@ Check your compute compatibility to see if your card is supported:
 |                    | Quadro              | `RTX 8000` `RTX 6000` `RTX 5000` `RTX 4000`                                                                 |
 | 7.0                | NVIDIA              | `TITAN V` `V100` `Quadro GV100`                                                                             |
 | 6.1                | NVIDIA TITAN        | `TITAN Xp` `TITAN X`                                                                                        |
-|                    | GeForce GTX         | `GTX 1080 Ti` `GTX 1080` `GTX 1070 Ti` `GTX 1070` `GTX 1060` `GTX 1050 Ti` `GTX 1050`                       |
+|                    | GeForce GTX         | `GTX 1080 Ti` `GTX 1080` `GTX 1070 Ti` `GTX 1070` `GTX 1060` `GTX 1050`                                     |
 |                    | Quadro              | `P6000` `P5200` `P4200` `P3200` `P5000` `P4000` `P3000` `P2200` `P2000` `P1000` `P620` `P600` `P500` `P520` |
 |                    | Tesla               | `P40` `P4`                                                                                                  |
 | 6.0                | NVIDIA              | `Tesla P100` `Quadro GP100`                                                                                 |
@@ -46,24 +46,13 @@ sudo modprobe nvidia_uvm`
 ## AMD Radeon
 Ollama supports the following AMD GPUs:
 ### Linux Support
 | Family         | Cards and accelerators                                                                                                               |
 | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
 | AMD Radeon RX  | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800` `Vega 64` `Vega 56`    |
 | AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` `V420` `V340` `V320` `Vega II Duo` `Vega II` `VII` `SSG` |
 | AMD Instinct   | `MI300X` `MI300A` `MI300` `MI250X` `MI250` `MI210` `MI200` `MI100` `MI60` `MI50`                                                               |
-### Windows Support
+### Overrides
 With ROCm v6.1, the following GPUs are supported on Windows.
 | Family         | Cards and accelerators                                                                                                               |
 | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
 | AMD Radeon RX  | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800`    |
 | AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` |
 ### Overrides on Linux
 Ollama leverages the AMD ROCm library, which does not support all AMD GPUs. In
 some cases you can force the system to try to use a similar LLVM target that is
 close.  For example The Radeon RX 5400 is `gfx1034` (also known as 10.3.4)
@@ -74,7 +63,7 @@ would set `HSA_OVERRIDE_GFX_VERSION="10.3.0"` as an environment variable for the
 server.  If you have an unsupported AMD GPU you can experiment using the list of
 supported types below.
-At this time, the known supported GPU types on linux are the following LLVM Targets.
+At this time, the known supported GPU types are the following LLVM Targets.
 This table shows some example GPUs that map to these LLVM targets:
 | **LLVM Target** | **An Example GPU** |
 |-----------------|---------------------|
--- a/docs/import.md
+++ b/docs/import.md
@@ -47,13 +47,19 @@ success
 ### Supported Quantizations
 <details>
 <summary>Legacy Quantization</summary>
 - `Q4_0`
 - `Q4_1`
 - `Q5_0`
 - `Q5_1`
 - `Q8_0`
-#### K-means Quantizations
+</details>
 <details>
 <summary>K-means Quantization</summary>`
 - `Q3_K_S`
 - `Q3_K_M`
@@ -64,6 +70,11 @@ success
 - `Q5_K_M`
 - `Q6_K`
 </details>
 > [!NOTE]
 > Activation-aware Weight Quantization (i.e. IQ) are not currently supported for automatic quantization however you can still import the quantized model into Ollama, see [Import GGUF](#import-gguf).
 ## Template Detection
 > [!NOTE]
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -1,7 +1,6 @@
 # Ollama Model File
-> [!NOTE]
+> Note: `Modelfile` syntax is in development
 > `Modelfile` syntax is in development
 A model file is the blueprint to create and share models with Ollama.
@@ -141,7 +140,6 @@ PARAMETER <parameter> <parametervalue>
 | num_predict    | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)                                                                                                                                   | int        | num_predict 42       |
 | top_k          | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)                                                                        | int        | top_k 40             |
 | top_p          | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)                                                                 | float      | top_p 0.9            |
 | min_p          | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float      | min_p 0.05            |
 ### TEMPLATE
--- a/docs/openai.md
+++ b/docs/openai.md
@@ -27,37 +27,6 @@ chat_completion = client.chat.completions.create(
    ],
    model='llama3',
 )
 response = client.chat.completions.create(
    model="llava",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What's in this image?"},
                {
                    "type": "image_url",
                    "image_url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
                },
            ],
        }
    ],
    max_tokens=300,
 )
 completion = client.completions.create(
    model="llama3",
    prompt="Say this is a test",
 )
 list_completion = client.models.list()
 model = client.models.retrieve("llama3")
 embeddings = client.embeddings.create(
    model="all-minilm",
    input=["why is the sky blue?", "why is the grass green?"],
 )
 ```
 ### OpenAI JavaScript library
@@ -73,44 +42,14 @@ const openai = new OpenAI({
 })
 const chatCompletion = await openai.chat.completions.create({
-    messages: [{ role: 'user', content: 'Say this is a test' }],
+  messages: [{ role: 'user', content: 'Say this is a test' }],
-    model: 'llama3',
+  model: 'llama3',
 })
 const response = await openai.chat.completions.create({
    model: "llava",
    messages: [
        {
        role: "user",
        content: [
            { type: "text", text: "What's in this image?" },
            {
            type: "image_url",
            image_url: "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
            },
        ],
        },
    ],
 })
 const completion = await openai.completions.create({
    model: "llama3",
    prompt: "Say this is a test.",
 })
 const listCompletion = await openai.models.list()
 const model = await openai.models.retrieve("llama3")
 const embedding = await openai.embeddings.create({
  model: "all-minilm",
  input: ["why is the sky blue?", "why is the grass green?"],
 })
 ```
 ### `curl`
-``` shell
+```
 curl http://localhost:11434/v1/chat/completions \
    -H "Content-Type: application/json" \
    -d '{
@@ -126,48 +65,6 @@ curl http://localhost:11434/v1/chat/completions \
            }
        ]
    }'
 curl http://localhost:11434/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "llava",
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": "What'\''s in this image?"
          },
          {
            "type": "image_url",
            "image_url": {
               "url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"
            }
          }
        ]
      }
    ],
    "max_tokens": 300
  }'
 curl http://localhost:11434/v1/completions \
    -H "Content-Type: application/json" \
    -d '{
        "model": "llama3",
        "prompt": "Say this is a test"
    }'
 curl http://localhost:11434/v1/models
 curl http://localhost:11434/v1/models/llama3
 curl http://localhost:11434/v1/embeddings \
    -H "Content-Type: application/json" \
    -d '{
        "model": "all-minilm",
        "input": ["why is the sky blue?", "why is the grass green?"]
    }'
 ```
 ## Endpoints
@@ -180,9 +77,8 @@ curl http://localhost:11434/v1/embeddings \
 - [x] Streaming
 - [x] JSON mode
 - [x] Reproducible outputs
 - [x] Vision
 - [x] Tools (streaming support coming soon)
 - [ ] Vision
 - [ ] Function calling
 - [ ] Logprobs
 #### Supported request fields
@@ -190,10 +86,7 @@ curl http://localhost:11434/v1/embeddings \
 - [x] `model`
 - [x] `messages`
  - [x] Text `content`
-  - [x] Image `content`
+  - [ ] Array of `content` parts
    - [x] Base64 encoded image
    - [ ] Image URL
  - [x] Array of `content` parts
 - [x] `frequency_penalty`
 - [x] `presence_penalty`
 - [x] `response_format`
@@ -203,72 +96,17 @@ curl http://localhost:11434/v1/embeddings \
 - [x] `temperature`
 - [x] `top_p`
 - [x] `max_tokens`
- [x] `tools`
+- [ ] `logit_bias`
 - [ ] `tools`
 - [ ] `tool_choice`
 - [ ] `logit_bias`
 - [ ] `user`
 - [ ] `n`
 ### `/v1/completions`
 #### Supported features
 - [x] Completions
 - [x] Streaming
 - [x] JSON mode
 - [x] Reproducible outputs
 - [ ] Logprobs
 #### Supported request fields
 - [x] `model`
 - [x] `prompt`
 - [x] `frequency_penalty`
 - [x] `presence_penalty`
 - [x] `seed`
 - [x] `stop`
 - [x] `stream`
 - [x] `temperature`
 - [x] `top_p`
 - [x] `max_tokens`
 - [x] `suffix`
 - [ ] `best_of`
 - [ ] `echo`
 - [ ] `logit_bias`
 - [ ] `user`
 - [ ] `n`
 #### Notes
- `prompt` currently only accepts a string
+- Setting `seed` will always set `temperature` to `0`
-
+- `finish_reason` will always be `stop`
-### `/v1/models`
+- `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached
 #### Notes
 - `created` corresponds to when the model was last modified
 - `owned_by` corresponds to the ollama username, defaulting to `"library"`
 ### `/v1/models/{model}`
 #### Notes
 - `created` corresponds to when the model was last modified
 - `owned_by` corresponds to the ollama username, defaulting to `"library"`
 ### `/v1/embeddings`
 #### Supported request fields
 - [x] `model`
 - [x] `input`
  - [x] string
  - [x] array of strings
  - [ ] array of tokens
  - [ ] array of token arrays
 - [ ] `encoding format`
 - [ ] `dimensions`
 - [ ] `user`
 ## Models
--- a/docs/template.md
+++ b/docs/template.md
@@ -1,173 +0,0 @@
 # Template
 Ollama provides a powerful templating engine backed by Go's built-in templating engine to construct prompts for your large language model. This feature is a valuable tool to get the most out of your models.
 ## Basic Template Structure
 A basic Go template consists of three main parts:
 * **Layout**: The overall structure of the template.
 * **Variables**: Placeholders for dynamic data that will be replaced with actual values when the template is rendered.
 * **Functions**: Custom functions or logic that can be used to manipulate the template's content.
 Here's an example of a simple chat template:
 ```gotmpl
 {{- range .Messages }}
 {{ .Role }}: {{ .Content }}
 {{- end }}
 ```
 In this example, we have:
 * A basic messages structure (layout)
 * Three variables: `Messages`, `Role`, and `Content` (variables)
 * A custom function (action) that iterates over an array of items (`range .Messages`) and displays each item
 ## Adding templates to your model
 By default, models imported into Ollama have a default template of `{{ .Prompt }}`, i.e. user inputs are sent verbatim to the LLM. This is appropriate for text or code completion models but lacks essential markers for chat or instruction models.
 Omitting a template in these models puts the responsibility of correctly templating input onto the user. Adding a template allows users to easily get the best results from the model.
 To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
 ```dockerfile
 FROM llama3
 TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
 {{ .System }}<|eot_id|>
 {{- end }}
 {{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
 {{ .Content }}<|eot_id|>
 {{- end }}<|start_header_id|>assistant<|end_header_id|>
 """
 ```
 ## Variables
 `System` (string): system prompt
 `Prompt` (string): user prompt
 `Response` (string): assistant response
 `Suffix` (string): text inserted after the assistant's response
 `Messages` (list): list of messages
 `Messages[].Role` (string): role which can be one of `system`, `user`, `assistant`, or `tool`
 `Messages[].Content` (string):  message content
 `Messages[].ToolCalls` (list): list of tools the model wants to call
 `Messages[].ToolCalls[].Function` (object): function to call
 `Messages[].ToolCalls[].Function.Name` (string): function name
 `Messages[].ToolCalls[].Function.Arguments` (map): mapping of argument name to argument value
 `Tools` (list): list of tools the model can access
 `Tools[].Type` (string): schema type. `type` is always `function`
 `Tools[].Function` (object): function definition
 `Tools[].Function.Name` (string): function name
 `Tools[].Function.Description` (string): function description
 `Tools[].Function.Parameters` (object): function parameters
 `Tools[].Function.Parameters.Type` (string): schema type. `type` is always `object`
 `Tools[].Function.Parameters.Required` (list): list of required properties
 `Tools[].Function.Parameters.Properties` (map): mapping of property name to property definition
 `Tools[].Function.Parameters.Properties[].Type` (string): property type
 `Tools[].Function.Parameters.Properties[].Description` (string): property description
 `Tools[].Function.Parameters.Properties[].Enum` (list): list of valid values
 ## Tips and Best Practices
 Keep the following tips and best practices in mind when working with Go templates:
 * **Be mindful of dot**: Control flow structures like `range` and `with` changes the value `.`
 * **Out-of-scope variables**: Use `$.` to reference variables not currently in scope, starting from the root
 * **Whitespace control**: Use `-` to trim leading (`{{-`) and trailing (`-}}`) whitespace
 ## Examples
 ### Example Messages
 #### ChatML
 ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
 ```gotmpl
 {{- if .System }}<|im_start|>system
 {{ .System }}<|im_end|>
 {{ end }}
 {{- range .Messages }}<|im_start|>{{ .Role }}
 {{ .Content }}<|im_end|>
 {{ end }}<|im_start|>assistant
 {{ else }}
 {{ if .System }}<|im_start|>system
 {{ .System }}<|im_end|>
 ```
 ### Example Tools
 Tools support can be added to a model by adding a `{{ .Tools }}` node to the template. This feature is useful for models trained to call external tools and can a powerful tool for retrieving real-time data or performing complex tasks.
 #### Mistral
 Mistral v0.3 and Mixtral 8x22B supports tool calling.
 ```gotmpl
 {{- range $index, $_ := .Messages }}
 {{- if eq .Role "user" }}
 {{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS]
 {{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}
 {{ end }}{{ .Content }}[/INST]
 {{- else if eq .Role "assistant" }}
 {{- if .Content }} {{ .Content }}</s>
 {{- else if .ToolCalls }}[TOOL_CALLS] [
 {{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}}
 {{- end }}]</s>
 {{- end }}
 {{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
 {{- end }}
 {{- end }}
 ```
 ### Example Fill-in-Middle
 Fill-in-middle support can be added to a model by adding a `{{ .Suffix }}` node to the template. This feature is useful for models that are trained to generate text in the middle of user input, such as code completion models.
 #### CodeLlama
 CodeLlama [7B](https://ollama.com/library/codellama:7b-code) and [13B](https://ollama.com/library/codellama:13b-code) code completion models support fill-in-middle.
 ```gotmpl
 <PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
 ```
 > [!NOTE]
 > CodeLlama 34B and 70B code completion and all instruct and Python fine-tuned models do not support fill-in-middle.
 #### Codestral
 Codestral [22B](https://ollama.com/library/codestral:22b) supports fill-in-middle.
 ```gotmpl
 [SUFFIX]{{ .Suffix }}[PREFIX] {{ .Prompt }}
 ```
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -9,7 +9,7 @@ cat ~/.ollama/logs/server.log
 On **Linux** systems with systemd, the logs can be found with this command:
 ```shell
-journalctl -u ollama --no-pager
+journalctl -u ollama
 ```
 When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
@@ -22,7 +22,7 @@ docker logs <container-name>
 If manually running `ollama serve` in a terminal, the logs will be on that terminal.
 When you run Ollama on **Windows**, there are a few different locations. You can view them in the explorer window by hitting `<cmd>+R` and type in:
- `explorer %LOCALAPPDATA%\Ollama` to view logs.  The most recent server logs will be in `server.log` and older logs will be in `server-#.log` 
+- `explorer %LOCALAPPDATA%\Ollama` to view logs
 - `explorer %LOCALAPPDATA%\Programs\Ollama` to browse the binaries (The installer adds this to your user PATH)
 - `explorer %HOMEPATH%\.ollama` to browse where models and configuration is stored
 - `explorer %TEMP%` where temporary executable files are stored in one or more `ollama*` directories
@@ -70,18 +70,14 @@ curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION="0.1.29" sh
 If your system is configured with the "noexec" flag where Ollama stores its temporary executable files, you can specify an alternate location by setting OLLAMA_TMPDIR to a location writable by the user ollama runs as. For example OLLAMA_TMPDIR=/usr/share/ollama/
-## NVIDIA GPU Discovery
+## Container fails to run on NVIDIA GPU
-When Ollama starts up, it takes inventory of the GPUs present in the system to determine compatibility and how much VRAM is available.  Sometimes this discovery can fail to find your GPUs.  In general, running the latest driver will yield the best results.
+Make sure you've set up the container runtime first as described in [docker.md](./docker.md)
-### Linux NVIDIA Troubleshooting
+Sometimes the container runtime can have difficulties initializing the GPU. When you check the server logs, this can show up as various error codes, such as "3" (not initialized), "46" (device unavailable), "100" (no device), "999" (unknown), or others. The following troubleshooting techniques may help resolve the problem
-If you are using a container to run Ollama, make sure you've set up the container runtime first as described in [docker.md](./docker.md)
+- Is the container runtime working?  Try `docker run --gpus all ubuntu nvidia-smi` - if this doesn't work, Ollama wont be able to see your NVIDIA GPU.
-
+- Is the uvm driver not loaded? `sudo nvidia-modprobe -u`
 Sometimes the Ollama can have difficulties initializing the GPU. When you check the server logs, this can show up as various error codes, such as "3" (not initialized), "46" (device unavailable), "100" (no device), "999" (unknown), or others. The following troubleshooting techniques may help resolve the problem
 - If you are using a container, is the container runtime working?  Try `docker run --gpus all ubuntu nvidia-smi` - if this doesn't work, Ollama wont be able to see your NVIDIA GPU.
 - Is the uvm driver loaded? `sudo nvidia-modprobe -u`
 - Try reloading the nvidia_uvm driver - `sudo rmmod nvidia_uvm` then `sudo modprobe nvidia_uvm`
 - Try rebooting
 - Make sure you're running the latest nvidia drivers
@@ -89,8 +85,3 @@ Sometimes the Ollama can have difficulties initializing the GPU. When you check
 If none of those resolve the problem, gather additional information and file an issue:
 - Set `CUDA_ERROR_LEVEL=50` and try again to get more diagnostic logs
 - Check dmesg for any errors `sudo dmesg | grep -i nvrm` and `sudo dmesg | grep -i nvidia`
 ## Windows Terminal Errors
 Older versions of Windows 10 (e.g., 21H1) are known to have a bug where the standard terminal program does not display control characters correctly.  This can result in a long string of strings like `←[?25h←[?25l` being displayed, sometimes erroring with `The parameter is incorrect`  To resolve this problem, please update to Win 10 22H1 or newer.
--- a/docs/tutorials/langchainjs.md
+++ b/docs/tutorials/langchainjs.md
@@ -15,7 +15,7 @@ import { Ollama } from "@langchain/community/llms/ollama";
 const ollama = new Ollama({
  baseUrl: "http://localhost:11434",
-  model: "llama3.1",
+  model: "llama3",
 });
 const answer = await ollama.invoke(`why is the sky blue?`);
@@ -23,7 +23,7 @@ const answer = await ollama.invoke(`why is the sky blue?`);
 console.log(answer);
 ```
-That will get us the same thing as if we ran `ollama run llama3.1 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
+That will get us the same thing as if we ran `ollama run llama3 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
 ```bash
 npm install cheerio
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -19,12 +19,10 @@ Logs will often be helpful in diagnosing the problem (see
 ## System Requirements
-* Windows 10 22H2 or newer, Home or Pro
+* Windows 10 or newer, Home or Pro
 * NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
 * AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
 Ollama uses unicode characters for progress indication, which may render as unknown squares in some older terminal fonts in Windows 10. If you see this, try changing your terminal font settings.
 ## API Access
 Here's a quick example showing API access from `powershell`
@@ -41,8 +39,8 @@ server.
 Ollama on Windows stores files in a few different locations.  You can view them in
 the explorer window by hitting `<cmd>+R` and type in:
 - `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
-    - *app.log* contains most resent logs from the GUI application
+    - *app.log* contains logs from the GUI application
-    - *server.log* contains the most recent server logs
+    - *server.log* contains the server logs
    - *upgrade.log* contains log output for upgrades
 - `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
 - `explorer %HOMEPATH%\.ollama` contains models and configuration
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -1,29 +1,272 @@
 package envconfig
 import (
 	"errors"
 	"fmt"
 	"log/slog"
 	"math"
 	"net"
 	"net/url"
 	"os"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
 	"time"
 )
-// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
+type OllamaHost struct {
-// Default is scheme "http" and host "127.0.0.1:11434"
+	Scheme string
-func Host() *url.URL {
+	Host   string
 	Port   string
 }
 func (o OllamaHost) String() string {
 	return fmt.Sprintf("%s://%s:%s", o.Scheme, o.Host, o.Port)
 }
 var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
 var (
 	// Set via OLLAMA_ORIGINS in the environment
 	AllowOrigins []string
 	// Set via OLLAMA_DEBUG in the environment
 	Debug bool
 	// Experimental flash attention
 	FlashAttention bool
 	// Set via OLLAMA_HOST in the environment
 	Host *OllamaHost
 	// Set via OLLAMA_KEEP_ALIVE in the environment
 	KeepAlive string
 	// Set via OLLAMA_LLM_LIBRARY in the environment
 	LLMLibrary string
 	// Set via OLLAMA_MAX_LOADED_MODELS in the environment
 	MaxRunners int
 	// Set via OLLAMA_MAX_QUEUE in the environment
 	MaxQueuedRequests int
 	// Set via OLLAMA_MODELS in the environment
 	ModelsDir string
 	// Set via OLLAMA_MAX_VRAM in the environment
 	MaxVRAM uint64
 	// Set via OLLAMA_NOHISTORY in the environment
 	NoHistory bool
 	// Set via OLLAMA_NOPRUNE in the environment
 	NoPrune bool
 	// Set via OLLAMA_NUM_PARALLEL in the environment
 	NumParallel int
 	// Set via OLLAMA_RUNNERS_DIR in the environment
 	RunnersDir string
 	// Set via OLLAMA_TMPDIR in the environment
 	TmpDir string
 )
 type EnvVar struct {
 	Name        string
 	Value       any
 	Description string
 }
 func AsMap() map[string]EnvVar {
 	return map[string]EnvVar{
 		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
 		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
 		"OLLAMA_HOST":              {"OLLAMA_HOST", Host, "IP Address for the ollama server (default 127.0.0.1:11434)"},
 		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
 		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
 		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models (default 1)"},
 		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
 		"OLLAMA_MAX_VRAM":          {"OLLAMA_MAX_VRAM", MaxVRAM, "Maximum VRAM"},
 		"OLLAMA_MODELS":            {"OLLAMA_MODELS", ModelsDir, "The path to the models directory"},
 		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"},
 		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"},
 		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests (default 1)"},
 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
 		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"},
 		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
 	}
 }
 func Values() map[string]string {
 	vals := make(map[string]string)
 	for k, v := range AsMap() {
 		vals[k] = fmt.Sprintf("%v", v.Value)
 	}
 	return vals
 }
 var defaultAllowOrigins = []string{
 	"localhost",
 	"127.0.0.1",
 	"0.0.0.0",
 }
 // Clean quotes and spaces from the value
 func clean(key string) string {
 	return strings.Trim(os.Getenv(key), "\"' ")
 }
 func init() {
 	// default values
 	NumParallel = 1
 	MaxRunners = 1
 	MaxQueuedRequests = 512
 	LoadConfig()
 }
 func LoadConfig() {
 	if debug := clean("OLLAMA_DEBUG"); debug != "" {
 		d, err := strconv.ParseBool(debug)
 		if err == nil {
 			Debug = d
 		} else {
 			Debug = true
 		}
 	}
 	if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" {
 		d, err := strconv.ParseBool(fa)
 		if err == nil {
 			FlashAttention = d
 		}
 	}
 	RunnersDir = clean("OLLAMA_RUNNERS_DIR")
 	if runtime.GOOS == "windows" && RunnersDir == "" {
 		// On Windows we do not carry the payloads inside the main executable
 		appExe, err := os.Executable()
 		if err != nil {
 			slog.Error("failed to lookup executable path", "error", err)
 		}
 		cwd, err := os.Getwd()
 		if err != nil {
 			slog.Error("failed to lookup working directory", "error", err)
 		}
 		var paths []string
 		for _, root := range []string{filepath.Dir(appExe), cwd} {
 			paths = append(paths,
 				root,
 				filepath.Join(root, "windows-"+runtime.GOARCH),
 				filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
 			)
 		}
 		// Try a few variations to improve developer experience when building from source in the local tree
 		for _, p := range paths {
 			candidate := filepath.Join(p, "ollama_runners")
 			_, err := os.Stat(candidate)
 			if err == nil {
 				RunnersDir = candidate
 				break
 			}
 		}
 		if RunnersDir == "" {
 			slog.Error("unable to locate llm runner directory.  Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
 		}
 	}
 	TmpDir = clean("OLLAMA_TMPDIR")
 	userLimit := clean("OLLAMA_MAX_VRAM")
 	if userLimit != "" {
 		avail, err := strconv.ParseUint(userLimit, 10, 64)
 		if err != nil {
 			slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
 		} else {
 			MaxVRAM = avail
 		}
 	}
 	LLMLibrary = clean("OLLAMA_LLM_LIBRARY")
 	if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" {
 		val, err := strconv.Atoi(onp)
 		if err != nil || val <= 0 {
 			slog.Error("invalid setting must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
 		} else {
 			NumParallel = val
 		}
 	}
 	if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" {
 		NoHistory = true
 	}
 	if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
 		NoPrune = true
 	}
 	if origins := clean("OLLAMA_ORIGINS"); origins != "" {
 		AllowOrigins = strings.Split(origins, ",")
 	}
 	for _, allowOrigin := range defaultAllowOrigins {
 		AllowOrigins = append(AllowOrigins,
 			fmt.Sprintf("http://%s", allowOrigin),
 			fmt.Sprintf("https://%s", allowOrigin),
 			fmt.Sprintf("http://%s", net.JoinHostPort(allowOrigin, "*")),
 			fmt.Sprintf("https://%s", net.JoinHostPort(allowOrigin, "*")),
 		)
 	}
 	AllowOrigins = append(AllowOrigins,
 		"app://*",
 		"file://*",
 		"tauri://*",
 	)
 	maxRunners := clean("OLLAMA_MAX_LOADED_MODELS")
 	if maxRunners != "" {
 		m, err := strconv.Atoi(maxRunners)
 		if err != nil {
 			slog.Error("invalid setting", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
 		} else {
 			MaxRunners = m
 		}
 	}
 	if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" {
 		p, err := strconv.Atoi(onp)
 		if err != nil || p <= 0 {
 			slog.Error("invalid setting", "OLLAMA_MAX_QUEUE", onp, "error", err)
 		} else {
 			MaxQueuedRequests = p
 		}
 	}
 	KeepAlive = clean("OLLAMA_KEEP_ALIVE")
 	var err error
 	ModelsDir, err = getModelsDir()
 	if err != nil {
 		slog.Error("invalid setting", "OLLAMA_MODELS", ModelsDir, "error", err)
 	}
 	Host, err = getOllamaHost()
 	if err != nil {
 		slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port)
 	}
 }
 func getModelsDir() (string, error) {
 	if models, exists := os.LookupEnv("OLLAMA_MODELS"); exists {
 		return models, nil
 	}
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return "", err
 	}
 	return filepath.Join(home, ".ollama", "models"), nil
 }
 func getOllamaHost() (*OllamaHost, error) {
 	defaultPort := "11434"
-	s := strings.TrimSpace(Var("OLLAMA_HOST"))
+	hostVar := os.Getenv("OLLAMA_HOST")
-	scheme, hostport, ok := strings.Cut(s, "://")
+	hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
 	scheme, hostport, ok := strings.Cut(hostVar, "://")
 	switch {
 	case !ok:
-		scheme, hostport = "http", s
+		scheme, hostport = "http", hostVar
 	case scheme == "http":
 		defaultPort = "80"
 	case scheme == "https":
@@ -43,242 +286,17 @@ func Host() *url.URL {
 		}
 	}
-	if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
+	if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
-		slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
+		return &OllamaHost{
 		return &url.URL{
 			Scheme: scheme,
-			Host:   net.JoinHostPort(host, defaultPort),
+			Host:   host,
-		}
+			Port:   defaultPort,
 		}, ErrInvalidHostPort
 	}
-	return &url.URL{
+	return &OllamaHost{
 		Scheme: scheme,
-		Host:   net.JoinHostPort(host, port),
+		Host:   host,
-	}
+		Port:   port,
-}
+	}, nil
 // Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
 func Origins() (origins []string) {
 	if s := Var("OLLAMA_ORIGINS"); s != "" {
 		origins = strings.Split(s, ",")
 	}
 	for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
 		origins = append(origins,
 			fmt.Sprintf("http://%s", origin),
 			fmt.Sprintf("https://%s", origin),
 			fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
 			fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
 		)
 	}
 	origins = append(origins,
 		"app://*",
 		"file://*",
 		"tauri://*",
 	)
 	return origins
 }
 // Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
 // Default is $HOME/.ollama/models
 func Models() string {
 	if s := Var("OLLAMA_MODELS"); s != "" {
 		return s
 	}
 	home, err := os.UserHomeDir()
 	if err != nil {
 		panic(err)
 	}
 	return filepath.Join(home, ".ollama", "models")
 }
 // KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
 // Negative values are treated as infinite. Zero is treated as no keep alive.
 // Default is 5 minutes.
 func KeepAlive() (keepAlive time.Duration) {
 	keepAlive = 5 * time.Minute
 	if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
 		if d, err := time.ParseDuration(s); err == nil {
 			keepAlive = d
 		} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
 			keepAlive = time.Duration(n) * time.Second
 		}
 	}
 	if keepAlive < 0 {
 		return time.Duration(math.MaxInt64)
 	}
 	return keepAlive
 }
 func Bool(k string) func() bool {
 	return func() bool {
 		if s := Var(k); s != "" {
 			b, err := strconv.ParseBool(s)
 			if err != nil {
 				return true
 			}
 			return b
 		}
 		return false
 	}
 }
 var (
 	// Debug enabled additional debug information.
 	Debug = Bool("OLLAMA_DEBUG")
 	// FlashAttention enables the experimental flash attention feature.
 	FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
 	// NoHistory disables readline history.
 	NoHistory = Bool("OLLAMA_NOHISTORY")
 	// NoPrune disables pruning of model blobs on startup.
 	NoPrune = Bool("OLLAMA_NOPRUNE")
 	// SchedSpread allows scheduling models across all GPUs.
 	SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
 	// IntelGPU enables experimental Intel GPU detection.
 	IntelGPU = Bool("OLLAMA_INTEL_GPU")
 )
 func String(s string) func() string {
 	return func() string {
 		return Var(s)
 	}
 }
 var (
 	LLMLibrary = String("OLLAMA_LLM_LIBRARY")
 	TmpDir     = String("OLLAMA_TMPDIR")
 	CudaVisibleDevices    = String("CUDA_VISIBLE_DEVICES")
 	HipVisibleDevices     = String("HIP_VISIBLE_DEVICES")
 	RocrVisibleDevices    = String("ROCR_VISIBLE_DEVICES")
 	GpuDeviceOrdinal      = String("GPU_DEVICE_ORDINAL")
 	HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
 )
 func RunnersDir() (p string) {
 	if p := Var("OLLAMA_RUNNERS_DIR"); p != "" {
 		return p
 	}
 	if runtime.GOOS != "windows" {
 		return
 	}
 	defer func() {
 		if p == "" {
 			slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
 		}
 	}()
 	// On Windows we do not carry the payloads inside the main executable
 	exe, err := os.Executable()
 	if err != nil {
 		return
 	}
 	cwd, err := os.Getwd()
 	if err != nil {
 		return
 	}
 	var paths []string
 	for _, root := range []string{filepath.Dir(exe), cwd} {
 		paths = append(paths,
 			root,
 			filepath.Join(root, "windows-"+runtime.GOARCH),
 			filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
 		)
 	}
 	// Try a few variations to improve developer experience when building from source in the local tree
 	for _, path := range paths {
 		candidate := filepath.Join(path, "ollama_runners")
 		if _, err := os.Stat(candidate); err == nil {
 			p = candidate
 			break
 		}
 	}
 	return p
 }
 func Uint(key string, defaultValue uint) func() uint {
 	return func() uint {
 		if s := Var(key); s != "" {
 			if n, err := strconv.ParseUint(s, 10, 64); err != nil {
 				slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
 			} else {
 				return uint(n)
 			}
 		}
 		return defaultValue
 	}
 }
 var (
 	// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
 	NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
 	// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
 	MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
 	// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
 	MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
 	// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
 	MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
 )
 type EnvVar struct {
 	Name        string
 	Value       any
 	Description string
 }
 func AsMap() map[string]EnvVar {
 	ret := map[string]EnvVar{
 		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
 		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
 		"OLLAMA_HOST":              {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
 		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
 		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
 		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
 		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
 		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
 		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
 		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
 		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
 		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
 		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
 	}
 	if runtime.GOOS != "darwin" {
 		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
 		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
 		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
 		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
 		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
 		ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
 	}
 	return ret
 }
 func Values() map[string]string {
 	vals := make(map[string]string)
 	for k, v := range AsMap() {
 		vals[k] = fmt.Sprintf("%v", v.Value)
 	}
 	return vals
 }
 // Var returns an environment variable stripped of leading and trailing quotes or spaces
 func Var(key string) string {
 	return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
 }
--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -1,234 +1,70 @@
 package envconfig
 import (
-	"math"
+	"fmt"
 	"net"
 	"testing"
 	"time"
-	"github.com/google/go-cmp/cmp"
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
-func TestHost(t *testing.T) {
+func TestConfig(t *testing.T) {
-	cases := map[string]struct {
+	Debug = false // Reset whatever was loaded in init()
 	t.Setenv("OLLAMA_DEBUG", "")
 	LoadConfig()
 	require.False(t, Debug)
 	t.Setenv("OLLAMA_DEBUG", "false")
 	LoadConfig()
 	require.False(t, Debug)
 	t.Setenv("OLLAMA_DEBUG", "1")
 	LoadConfig()
 	require.True(t, Debug)
 	t.Setenv("OLLAMA_FLASH_ATTENTION", "1")
 	LoadConfig()
 	require.True(t, FlashAttention)
 }
 func TestClientFromEnvironment(t *testing.T) {
 	type testCase struct {
 		value  string
 		expect string
-	}{
+		err    error
 		"empty":               {"", "127.0.0.1:11434"},
 		"only address":        {"1.2.3.4", "1.2.3.4:11434"},
 		"only port":           {":1234", ":1234"},
 		"address and port":    {"1.2.3.4:1234", "1.2.3.4:1234"},
 		"hostname":            {"example.com", "example.com:11434"},
 		"hostname and port":   {"example.com:1234", "example.com:1234"},
 		"zero port":           {":0", ":0"},
 		"too large port":      {":66000", ":11434"},
 		"too small port":      {":-1", ":11434"},
 		"ipv6 localhost":      {"[::1]", "[::1]:11434"},
 		"ipv6 world open":     {"[::]", "[::]:11434"},
 		"ipv6 no brackets":    {"::1", "[::1]:11434"},
 		"ipv6 + port":         {"[::1]:1337", "[::1]:1337"},
 		"extra space":         {" 1.2.3.4 ", "1.2.3.4:11434"},
 		"extra quotes":        {"\"1.2.3.4\"", "1.2.3.4:11434"},
 		"extra space+quotes":  {" \" 1.2.3.4 \" ", "1.2.3.4:11434"},
 		"extra single quotes": {"'1.2.3.4'", "1.2.3.4:11434"},
 		"http":                {"http://1.2.3.4", "1.2.3.4:80"},
 		"http port":           {"http://1.2.3.4:4321", "1.2.3.4:4321"},
 		"https":               {"https://1.2.3.4", "1.2.3.4:443"},
 		"https port":          {"https://1.2.3.4:4321", "1.2.3.4:4321"},
 	}
-	for name, tt := range cases {
+	hostTestCases := map[string]*testCase{
-		t.Run(name, func(t *testing.T) {
+		"empty":               {value: "", expect: "127.0.0.1:11434"},
-			t.Setenv("OLLAMA_HOST", tt.value)
+		"only address":        {value: "1.2.3.4", expect: "1.2.3.4:11434"},
-			if host := Host(); host.Host != tt.expect {
+		"only port":           {value: ":1234", expect: ":1234"},
-				t.Errorf("%s: expected %s, got %s", name, tt.expect, host.Host)
+		"address and port":    {value: "1.2.3.4:1234", expect: "1.2.3.4:1234"},
-			}
+		"hostname":            {value: "example.com", expect: "example.com:11434"},
-		})
+		"hostname and port":   {value: "example.com:1234", expect: "example.com:1234"},
-	}
+		"zero port":           {value: ":0", expect: ":0"},
-}
+		"too large port":      {value: ":66000", err: ErrInvalidHostPort},
-
+		"too small port":      {value: ":-1", err: ErrInvalidHostPort},
-func TestOrigins(t *testing.T) {
+		"ipv6 localhost":      {value: "[::1]", expect: "[::1]:11434"},
-	cases := []struct {
+		"ipv6 world open":     {value: "[::]", expect: "[::]:11434"},
-		value  string
+		"ipv6 no brackets":    {value: "::1", expect: "[::1]:11434"},
-		expect []string
+		"ipv6 + port":         {value: "[::1]:1337", expect: "[::1]:1337"},
-	}{
+		"extra space":         {value: " 1.2.3.4 ", expect: "1.2.3.4:11434"},
-		{"", []string{
+		"extra quotes":        {value: "\"1.2.3.4\"", expect: "1.2.3.4:11434"},
-			"http://localhost",
+		"extra space+quotes":  {value: " \" 1.2.3.4 \" ", expect: "1.2.3.4:11434"},
-			"https://localhost",
+		"extra single quotes": {value: "'1.2.3.4'", expect: "1.2.3.4:11434"},
 			"http://localhost:*",
 			"https://localhost:*",
 			"http://127.0.0.1",
 			"https://127.0.0.1",
 			"http://127.0.0.1:*",
 			"https://127.0.0.1:*",
 			"http://0.0.0.0",
 			"https://0.0.0.0",
 			"http://0.0.0.0:*",
 			"https://0.0.0.0:*",
 			"app://*",
 			"file://*",
 			"tauri://*",
 		}},
 		{"http://10.0.0.1", []string{
 			"http://10.0.0.1",
 			"http://localhost",
 			"https://localhost",
 			"http://localhost:*",
 			"https://localhost:*",
 			"http://127.0.0.1",
 			"https://127.0.0.1",
 			"http://127.0.0.1:*",
 			"https://127.0.0.1:*",
 			"http://0.0.0.0",
 			"https://0.0.0.0",
 			"http://0.0.0.0:*",
 			"https://0.0.0.0:*",
 			"app://*",
 			"file://*",
 			"tauri://*",
 		}},
 		{"http://172.16.0.1,https://192.168.0.1", []string{
 			"http://172.16.0.1",
 			"https://192.168.0.1",
 			"http://localhost",
 			"https://localhost",
 			"http://localhost:*",
 			"https://localhost:*",
 			"http://127.0.0.1",
 			"https://127.0.0.1",
 			"http://127.0.0.1:*",
 			"https://127.0.0.1:*",
 			"http://0.0.0.0",
 			"https://0.0.0.0",
 			"http://0.0.0.0:*",
 			"https://0.0.0.0:*",
 			"app://*",
 			"file://*",
 			"tauri://*",
 		}},
 		{"http://totally.safe,http://definitely.legit", []string{
 			"http://totally.safe",
 			"http://definitely.legit",
 			"http://localhost",
 			"https://localhost",
 			"http://localhost:*",
 			"https://localhost:*",
 			"http://127.0.0.1",
 			"https://127.0.0.1",
 			"http://127.0.0.1:*",
 			"https://127.0.0.1:*",
 			"http://0.0.0.0",
 			"https://0.0.0.0",
 			"http://0.0.0.0:*",
 			"https://0.0.0.0:*",
 			"app://*",
 			"file://*",
 			"tauri://*",
 		}},
 	}
 	for _, tt := range cases {
 		t.Run(tt.value, func(t *testing.T) {
 			t.Setenv("OLLAMA_ORIGINS", tt.value)
 			if diff := cmp.Diff(Origins(), tt.expect); diff != "" {
 				t.Errorf("%s: mismatch (-want +got):\n%s", tt.value, diff)
 			}
 		})
 	}
 }
 func TestBool(t *testing.T) {
 	cases := map[string]bool{
 		"":      false,
 		"true":  true,
 		"false": false,
 		"1":     true,
 		"0":     false,
 		// invalid values
 		"random":    true,
 		"something": true,
 	}
-	for k, v := range cases {
+	for k, v := range hostTestCases {
 		t.Run(k, func(t *testing.T) {
-			t.Setenv("OLLAMA_BOOL", k)
+			t.Setenv("OLLAMA_HOST", v.value)
-			if b := Bool("OLLAMA_BOOL")(); b != v {
+			LoadConfig()
-				t.Errorf("%s: expected %t, got %t", k, v, b)
+
-			}
+			oh, err := getOllamaHost()
-		})
+			if err != v.err {
-	}
+				t.Fatalf("expected %s, got %s", v.err, err)
-}
+			}
-
+
-func TestUint(t *testing.T) {
+			if err == nil {
-	cases := map[string]uint{
+				host := net.JoinHostPort(oh.Host, oh.Port)
-		"0":    0,
+				assert.Equal(t, v.expect, host, fmt.Sprintf("%s: expected %s, got %s", k, v.expect, host))
 		"1":    1,
 		"1337": 1337,
 		// default values
 		"":       11434,
 		"-1":     11434,
 		"0o10":   11434,
 		"0x10":   11434,
 		"string": 11434,
 	}
 	for k, v := range cases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_UINT", k)
 			if i := Uint("OLLAMA_UINT", 11434)(); i != v {
 				t.Errorf("%s: expected %d, got %d", k, v, i)
 			}
 		})
 	}
 }
 func TestKeepAlive(t *testing.T) {
 	cases := map[string]time.Duration{
 		"":       5 * time.Minute,
 		"1s":     time.Second,
 		"1m":     time.Minute,
 		"1h":     time.Hour,
 		"5m0s":   5 * time.Minute,
 		"1h2m3s": 1*time.Hour + 2*time.Minute + 3*time.Second,
 		"0":      time.Duration(0),
 		"60":     60 * time.Second,
 		"120":    2 * time.Minute,
 		"3600":   time.Hour,
 		"-0":     time.Duration(0),
 		"-1":     time.Duration(math.MaxInt64),
 		"-1m":    time.Duration(math.MaxInt64),
 		// invalid values
 		" ":   5 * time.Minute,
 		"???": 5 * time.Minute,
 		"1d":  5 * time.Minute,
 		"1y":  5 * time.Minute,
 		"1w":  5 * time.Minute,
 	}
 	for tt, expect := range cases {
 		t.Run(tt, func(t *testing.T) {
 			t.Setenv("OLLAMA_KEEP_ALIVE", tt)
 			if actual := KeepAlive(); actual != expect {
 				t.Errorf("%s: expected %s, got %s", tt, expect, actual)
 			}
 		})
 	}
 }
 func TestVar(t *testing.T) {
 	cases := map[string]string{
 		"value":       "value",
 		" value ":     "value",
 		" 'value' ":   "value",
 		` "value" `:   "value",
 		" ' value ' ": " value ",
 		` " value " `: " value ",
 	}
 	for k, v := range cases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_VAR", k)
 			if s := Var("OLLAMA_VAR"); s != v {
 				t.Errorf("%s: expected %q, got %q", k, v, s)
 			}
 		})
 	}
--- a/examples/go-chat/main.go
+++ b/examples/go-chat/main.go
@@ -35,7 +35,7 @@ func main() {
 	ctx := context.Background()
 	req := &api.ChatRequest{
-		Model:    "llama3.1",
+		Model:    "llama3",
 		Messages: messages,
 	}
--- a/examples/go-generate-streaming/main.go
+++ b/examples/go-generate-streaming/main.go
@@ -16,7 +16,7 @@ func main() {
 	// By default, GenerateRequest is streaming.
 	req := &api.GenerateRequest{
-		Model:  "gemma2",
+		Model:  "gemma",
 		Prompt: "how many planets are there?",
 	}
--- a/examples/go-generate/main.go
+++ b/examples/go-generate/main.go
@@ -15,7 +15,7 @@ func main() {
 	}
 	req := &api.GenerateRequest{
-		Model:  "gemma2",
+		Model:  "gemma",
 		Prompt: "how many planets are there?",
 		// set streaming to false
--- a/examples/go-http-generate/README.md
+++ b/examples/go-http-generate/README.md
--- a/examples/langchain-python-rag-document/README.md
+++ b/examples/langchain-python-rag-document/README.md
@@ -4,14 +4,6 @@ This example provides an interface for asking questions to a PDF document.
 ## Setup
 1. Ensure you have the `llama3.1` model installed:
 ```
 ollama pull llama3.1
 ```
 2. Install the Python Requirements.
 ```
 pip install -r requirements.txt
 ```
--- a/examples/langchain-python-rag-document/main.py
+++ b/examples/langchain-python-rag-document/main.py
@@ -51,7 +51,7 @@ while True:
        template=template,
    )
-    llm = Ollama(model="llama3.1", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
+    llm = Ollama(model="llama3:8b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectorstore.as_retriever(),
--- a/examples/langchain-python-rag-websummary/README.md
+++ b/examples/langchain-python-rag-websummary/README.md
@@ -4,10 +4,10 @@ This example summarizes the website, [https://ollama.com/blog/run-llama2-uncenso
 ## Running the Example
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama2` model installed:
   ```bash
-   ollama pull llama3.1
+   ollama pull llama2
   ```
 2. Install the Python Requirements.
--- a/examples/langchain-python-rag-websummary/main.py
+++ b/examples/langchain-python-rag-websummary/main.py
@@ -5,8 +5,8 @@ from langchain.chains.summarize import load_summarize_chain
 loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally")
 docs = loader.load()
-llm = Ollama(model="llama3.1")
+llm = Ollama(model="llama3")
 chain = load_summarize_chain(llm, chain_type="stuff")
-result = chain.invoke(docs)
+result = chain.invoke(docs) 
 print(result)
--- a/examples/langchain-python-simple/README.md
+++ b/examples/langchain-python-simple/README.md
@@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama.
 ## Running the Example
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama3` model installed:
   ```bash
-   ollama pull llama3.1
+   ollama pull llama3
   ```
 2. Install the Python Requirements.
--- a/examples/langchain-python-simple/main.py
+++ b/examples/langchain-python-simple/main.py
@@ -1,6 +1,6 @@
 from langchain.llms import Ollama
 input = input("What is your question?")
-llm = Ollama(model="llama3.1")
+llm = Ollama(model="llama3")
 res = llm.predict(input)
 print (res)
--- a/examples/modelfile-mario/Modelfile
+++ b/examples/modelfile-mario/Modelfile
@@ -1,4 +1,4 @@
-FROM llama3.1
+FROM llama3
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from super mario bros, acting as an assistant.
--- a/examples/modelfile-mario/readme.md
+++ b/examples/modelfile-mario/readme.md
@@ -2,12 +2,12 @@
 # Example character: Mario
-This example shows how to create a basic character using Llama3.1 as the base model.
+This example shows how to create a basic character using Llama3 as the base model.
 To run this example:
 1. Download the Modelfile
-2. `ollama pull llama3.1` to get the base model used in the model file.
+2. `ollama pull llama3` to get the base model used in the model file.
 3. `ollama create NAME -f ./Modelfile`
 4. `ollama run NAME`
@@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?"
 What the model file looks like:
 ```
-FROM llama3.1
+FROM llama3
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from Super Mario Bros, acting as an assistant.
--- a/examples/python-dockerit/dockerit.py
+++ b/examples/python-dockerit/dockerit.py
@@ -4,7 +4,7 @@ imageName = input("Enter the name of the image: ")
 client = docker.from_env()
 s = requests.Session()
 output=""
-with s.post('http://localhost:11434/api/generate', json={'model': 'mattw/dockerit', 'prompt': inputDescription}, stream=True) as r:
+with s.post('http://localhost:11434/api/generate', json={'model': 'dockerit', 'prompt': inputDescription}, stream=True) as r:
  for line in r.iter_lines():
    if line:
      j = json.loads(line)
--- a/examples/python-json-datagenerator/predefinedschema.py
+++ b/examples/python-json-datagenerator/predefinedschema.py
@@ -2,7 +2,7 @@ import requests
 import json
 import random
-model = "llama3.1"
+model = "llama3"
 template = {
  "firstName": "",
  "lastName": "",
--- a/examples/python-json-datagenerator/randomaddresses.py
+++ b/examples/python-json-datagenerator/randomaddresses.py
@@ -12,7 +12,7 @@ countries = [
    "France",
 ]
 country = random.choice(countries)
-model = "llama3.1"
+model = "llama3"
 prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
--- a/examples/python-json-datagenerator/readme.md
+++ b/examples/python-json-datagenerator/readme.md
@@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran
 ## Running the Example
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama3` model installed:
   ```bash
-   ollama pull llama3.1
+   ollama pull llama3
   ```
 2. Install the Python Requirements.
--- a/examples/python-simplechat/client.py
+++ b/examples/python-simplechat/client.py
@@ -2,7 +2,7 @@ import json
 import requests
 # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
-model = "llama3.1"  # TODO: update this for whatever model you wish to use
+model = "llama3"  # TODO: update this for whatever model you wish to use
 def chat(messages):
--- a/examples/python-simplechat/readme.md
+++ b/examples/python-simplechat/readme.md
@@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam
 ## Running the Example
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama3` model installed:
   ```bash
-   ollama pull llama3.1
+   ollama pull llama3
   ```
 2. Install the Python Requirements.
--- a/examples/typescript-simplechat/client.ts
+++ b/examples/typescript-simplechat/client.ts
@@ -1,6 +1,6 @@
 import * as readline from "readline";
-const model = "llama3.1";
+const model = "llama3";
 type Message = {
  role: "assistant" | "user" | "system";
  content: string;
--- a/format/format.go
+++ b/format/format.go
@@ -3,7 +3,6 @@ package format
 import (
 	"fmt"
 	"math"
 	"strconv"
 )
 const (
@@ -29,6 +28,6 @@ func HumanNumber(b uint64) string {
 	case b >= Thousand:
 		return fmt.Sprintf("%.0fK", float64(b)/Thousand)
 	default:
-		return strconv.FormatUint(b, 10)
+		return fmt.Sprintf("%d", b)
 	}
 }
--- a/go.mod
+++ b/go.mod
@@ -18,7 +18,6 @@ require (
 require (
 	github.com/agnivade/levenshtein v1.1.1
 	github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
 	github.com/google/go-cmp v0.6.0
 	github.com/mattn/go-runewidth v0.0.14
 	github.com/nlpodyssey/gopickle v0.3.0
 	github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
@@ -72,7 +71,7 @@ require (
 	golang.org/x/net v0.25.0 // indirect
 	golang.org/x/sys v0.20.0
 	golang.org/x/term v0.20.0
-	golang.org/x/text v0.15.0
+	golang.org/x/text v0.15.0 // indirect
 	google.golang.org/protobuf v1.34.1
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
--- a/gpu/amd_common.go
+++ b/gpu/amd_common.go
@@ -3,7 +3,7 @@
 package gpu
 import (
-	"errors"
+	"fmt"
 	"log/slog"
 	"os"
 	"path/filepath"
@@ -49,17 +49,9 @@ func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
 }
 func commonAMDValidateLibDir() (string, error) {
-	// Favor our bundled version
+	// We try to favor system paths first, so that we can wire up the subprocess to use
-
+	// the system version.  Only use our bundled version if the system version doesn't work
-	// Installer payload location if we're running the installed binary
+	// This gives users a more recovery options if versions have subtle problems at runtime
 	exe, err := os.Executable()
 	if err == nil {
 		rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm")
 		if rocmLibUsable(rocmTargetDir) {
 			slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
 			return rocmTargetDir, nil
 		}
 	}
 	// Prefer explicit HIP env var
 	hipPath := os.Getenv("HIP_PATH")
@@ -95,5 +87,14 @@ func commonAMDValidateLibDir() (string, error) {
 		}
 	}
-	return "", errors.New("no suitable rocm found, falling back to CPU")
+	// Installer payload location if we're running the installed binary
 	exe, err := os.Executable()
 	if err == nil {
 		rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm")
 		if rocmLibUsable(rocmTargetDir) {
 			slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
 			return rocmTargetDir, nil
 		}
 	}
 	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
 }
--- a/gpu/amd_hip_windows.go
+++ b/gpu/amd_hip_windows.go
@@ -1,7 +1,6 @@
 package gpu
 import (
 	"errors"
 	"fmt"
 	"log/slog"
 	"syscall"
@@ -34,10 +33,9 @@ type HipLib struct {
 }
 func NewHipLib() (*HipLib, error) {
-	// At runtime we depend on v6, so discover GPUs with the same library for a consistent set of GPUs
+	h, err := windows.LoadLibrary("amdhip64.dll")
 	h, err := windows.LoadLibrary("amdhip64_6.dll")
 	if err != nil {
-		return nil, fmt.Errorf("unable to load amdhip64_6.dll, please make sure to upgrade to the latest amd driver: %w", err)
+		return nil, fmt.Errorf("unable to load amdhip64.dll: %w", err)
 	}
 	hl := &HipLib{}
 	hl.dll = h
@@ -77,7 +75,7 @@ func (hl *HipLib) Release() {
 func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
 	if hl.dll == 0 {
-		return 0, 0, errors.New("dll has been unloaded")
+		return 0, 0, fmt.Errorf("dll has been unloaded")
 	}
 	var version int
 	status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
@@ -86,8 +84,9 @@ func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
 	}
 	slog.Debug("hipDriverGetVersion", "version", version)
-	driverMajor = version / 10000000
+	// TODO - this isn't actually right, but the docs claim hipDriverGetVersion isn't accurate anyway...
-	driverMinor = (version - (driverMajor * 10000000)) / 100000
+	driverMajor = version / 1000
 	driverMinor = (version - (driverMajor * 1000)) / 10
 	return driverMajor, driverMinor, nil
 }
@@ -111,7 +110,7 @@ func (hl *HipLib) HipGetDeviceCount() int {
 func (hl *HipLib) HipSetDevice(device int) error {
 	if hl.dll == 0 {
-		return errors.New("dll has been unloaded")
+		return fmt.Errorf("dll has been unloaded")
 	}
 	status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
 	if status != hipSuccess {
@@ -122,7 +121,7 @@ func (hl *HipLib) HipSetDevice(device int) error {
 func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
 	if hl.dll == 0 {
-		return nil, errors.New("dll has been unloaded")
+		return nil, fmt.Errorf("dll has been unloaded")
 	}
 	var props hipDevicePropMinimal
 	status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
@@ -135,7 +134,7 @@ func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, err
 // free, total, err
 func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
 	if hl.dll == 0 {
-		return 0, 0, errors.New("dll has been unloaded")
+		return 0, 0, fmt.Errorf("dll has been unloaded")
 	}
 	var totalMemory uint64
 	var freeMemory uint64
--- a/gpu/amd_linux.go
+++ b/gpu/amd_linux.go
@@ -10,11 +10,9 @@ import (
 	"path/filepath"
 	"regexp"
 	"slices"
 	"sort"
 	"strconv"
 	"strings"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 )
@@ -27,16 +25,7 @@ const (
 	// Prefix with the node dir
 	GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
-
+	GPUUsedMemoryFileGlob  = "mem_banks/*/used_memory"
 	// Direct Rendering Manager sysfs location
 	DRMDeviceDirGlob   = "/sys/class/drm/card*/device"
 	DRMTotalMemoryFile = "mem_info_vram_total"
 	DRMUsedMemoryFile  = "mem_info_vram_used"
 	// In hex; properties file is in decimal
 	DRMUniqueIDFile = "unique_id"
 	DRMVendorFile   = "vendor"
 	DRMDeviceFile   = "device"
 )
 var (
@@ -46,8 +35,8 @@ var (
 )
 // Gather GPU information from the amdgpu driver if any supported GPUs are detected
-func AMDGetGPUInfo() []RocmGPUInfo {
+func AMDGetGPUInfo() []GpuInfo {
-	resp := []RocmGPUInfo{}
+	resp := []GpuInfo{}
 	if !AMDDetected() {
 		return resp
 	}
@@ -61,9 +50,9 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 	// Determine if the user has already pre-selected which GPUs to look at, then ignore the others
 	var visibleDevices []string
-	hipVD := envconfig.HipVisibleDevices()   // zero based index only
+	hipVD := os.Getenv("HIP_VISIBLE_DEVICES")   // zero based index only
-	rocrVD := envconfig.RocrVisibleDevices() // zero based index or UUID, but consumer cards seem to not support UUID
+	rocrVD := os.Getenv("ROCR_VISIBLE_DEVICES") // zero based index or UUID, but consumer cards seem to not support UUID
-	gpuDO := envconfig.GpuDeviceOrdinal()    // zero based index
+	gpuDO := os.Getenv("GPU_DEVICE_ORDINAL")    // zero based index
 	switch {
 	// TODO is this priorty order right?
 	case hipVD != "":
@@ -76,27 +65,13 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 		visibleDevices = strings.Split(gpuDO, ",")
 	}
-	gfxOverride := envconfig.HsaOverrideGfxVersion()
+	gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION")
 	var supported []string
 	libDir := ""
 	// The amdgpu driver always exposes the host CPU(s) first, but we have to skip them and subtract
 	// from the other IDs to get alignment with the HIP libraries expectations (zero is the first GPU, not the CPU)
 	matches, _ := filepath.Glob(GPUPropertiesFileGlob)
 	sort.Slice(matches, func(i, j int) bool {
 		// /sys/class/kfd/kfd/topology/nodes/<number>/properties
 		a, err := strconv.ParseInt(filepath.Base(filepath.Dir(matches[i])), 10, 64)
 		if err != nil {
 			slog.Debug("parse err", "error", err, "match", matches[i])
 			return false
 		}
 		b, err := strconv.ParseInt(filepath.Base(filepath.Dir(matches[j])), 10, 64)
 		if err != nil {
 			slog.Debug("parse err", "error", err, "match", matches[i])
 			return false
 		}
 		return a < b
 	})
 	cpuCount := 0
 	for _, match := range matches {
 		slog.Debug("evaluating amdgpu node " + match)
@@ -115,7 +90,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 		scanner := bufio.NewScanner(fp)
 		isCPU := false
 		var major, minor, patch uint64
-		var vendor, device, uniqueID uint64
+		var vendor, device uint64
 		for scanner.Scan() {
 			line := strings.TrimSpace(scanner.Text())
 			// Note: we could also use "cpu_cores_count X" where X is greater than zero to detect CPUs
@@ -146,43 +121,30 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 			} else if strings.HasPrefix(line, "vendor_id") {
 				ver := strings.Fields(line)
 				if len(ver) != 2 {
-					slog.Debug("malformed", "vendor_id", line)
+					slog.Debug("malformed vendor_id", "vendor_id", line)
 					continue
 				}
-				vendor, err = strconv.ParseUint(ver[1], 10, 64)
+				vendor, err = strconv.ParseUint(ver[1], 10, 32)
 				if err != nil {
-					slog.Debug("malformed", "vendor_id", line, "error", err)
+					slog.Debug("malformed vendor_id" + line)
 				}
 			} else if strings.HasPrefix(line, "device_id") {
 				ver := strings.Fields(line)
 				if len(ver) != 2 {
-					slog.Debug("malformed", "device_id", line)
+					slog.Debug("malformed device_id", "device_id", line)
 					continue
 				}
-				device, err = strconv.ParseUint(ver[1], 10, 64)
+				device, err = strconv.ParseUint(ver[1], 10, 32)
 				if err != nil {
-					slog.Debug("malformed", "device_id", line, "error", err)
+					slog.Debug("malformed device_id" + line)
 				}
 			} else if strings.HasPrefix(line, "unique_id") {
 				ver := strings.Fields(line)
 				if len(ver) != 2 {
 					slog.Debug("malformed", "unique_id", line)
 					continue
 				}
 				uniqueID, err = strconv.ParseUint(ver[1], 10, 64)
 				if err != nil {
 					slog.Debug("malformed", "unique_id", line, "error", err)
 				}
 			}
 			// TODO - any other properties we want to extract and record?
 			// vendor_id + device_id -> pci lookup for "Name"
 			// Other metrics that may help us understand relative performance between multiple GPUs
 		}
 		// Note: while ./mem_banks/*/used_memory exists, it doesn't appear to take other VRAM consumers
 		// into consideration, so we instead map the device over to the DRM driver sysfs nodes which
 		// do reliably report VRAM usage.
 		if isCPU {
 			cpuCount++
 			continue
@@ -194,7 +156,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 		// Shouldn't happen, but just in case...
 		if gpuID < 0 {
 			slog.Error("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue")
-			return nil
+			return []GpuInfo{}
 		}
 		if int(major) < RocmComputeMin {
@@ -205,68 +167,65 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 		// Look up the memory for the current node
 		totalMemory := uint64(0)
 		usedMemory := uint64(0)
-		var usedFile string
+		propGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(nodeID), GPUTotalMemoryFileGlob)
-		mapping := []struct {
+		propFiles, err := filepath.Glob(propGlob)
-			id       uint64
+		if err != nil {
-			filename string
+			slog.Warn("error looking up total GPU memory", "glob", propGlob, "error", err)
 		}{
 			{vendor, DRMVendorFile},
 			{device, DRMDeviceFile},
 			{uniqueID, DRMUniqueIDFile}, // Not all devices will report this
 		}
-		slog.Debug("mapping amdgpu to drm sysfs nodes", "amdgpu", match, "vendor", vendor, "device", device, "unique_id", uniqueID)
+		// 1 or more memory banks - sum the values of all of them
-		// Map over to DRM location to find the total/free memory
+		for _, propFile := range propFiles {
-		drmMatches, _ := filepath.Glob(DRMDeviceDirGlob)
+			fp, err := os.Open(propFile)
-		for _, devDir := range drmMatches {
+			if err != nil {
-			matched := true
+				slog.Warn("failed to open sysfs node", "file", propFile, "erroir", err)
 			for _, m := range mapping {
 				if m.id == 0 {
 					// Null ID means it didn't populate, so we can't use it to match
 					continue
 				}
 				filename := filepath.Join(devDir, m.filename)
 				buf, err := os.ReadFile(filename)
 				if err != nil {
 					slog.Debug("failed to read sysfs node", "file", filename, "error", err)
 					matched = false
 					break
 				}
 				// values here are in hex, strip off the lead 0x and parse so we can compare the numeric (decimal) values in amdgpu
 				cmp, err := strconv.ParseUint(strings.TrimPrefix(strings.TrimSpace(string(buf)), "0x"), 16, 64)
 				if err != nil {
 					slog.Debug("failed to parse sysfs node", "file", filename, "error", err)
 					matched = false
 					break
 				}
 				if cmp != m.id {
 					matched = false
 					break
 				}
 			}
 			if !matched {
 				continue
 			}
-
+			defer fp.Close()
-			// Found the matching DRM directory
+			scanner := bufio.NewScanner(fp)
-			slog.Debug("matched", "amdgpu", match, "drm", devDir)
+			for scanner.Scan() {
-			totalFile := filepath.Join(devDir, DRMTotalMemoryFile)
+				line := strings.TrimSpace(scanner.Text())
-			buf, err := os.ReadFile(totalFile)
+				if strings.HasPrefix(line, "size_in_bytes") {
-			if err != nil {
+					ver := strings.Fields(line)
-				slog.Debug("failed to read sysfs node", "file", totalFile, "error", err)
+					if len(ver) != 2 {
-				break
+						slog.Warn("malformed " + line)
 						continue
 					}
 					bankSizeInBytes, err := strconv.ParseUint(ver[1], 10, 64)
 					if err != nil {
 						slog.Warn("malformed int " + line)
 						continue
 					}
 					totalMemory += bankSizeInBytes
 				}
 			}
-			totalMemory, err = strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
+		}
 		if totalMemory == 0 {
 			slog.Warn("amdgpu reports zero total memory", "gpu", gpuID)
 			continue
 		}
 		usedGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(nodeID), GPUUsedMemoryFileGlob)
 		usedFiles, err := filepath.Glob(usedGlob)
 		if err != nil {
 			slog.Warn("error looking up used GPU memory", "glob", usedGlob, "error", err)
 			continue
 		}
 		for _, usedFile := range usedFiles {
 			fp, err := os.Open(usedFile)
 			if err != nil {
-				slog.Debug("failed to parse sysfs node", "file", totalFile, "error", err)
+				slog.Warn("failed to open sysfs node", "file", usedFile, "error", err)
-				break
+				continue
 			}
-
+			defer fp.Close()
-			usedFile = filepath.Join(devDir, DRMUsedMemoryFile)
+			data, err := io.ReadAll(fp)
 			usedMemory, err = getFreeMemory(usedFile)
 			if err != nil {
-				slog.Debug("failed to update used memory", "error", err)
+				slog.Warn("failed to read sysfs node", "file", usedFile, "error", err)
 				continue
 			}
-			break
+			used, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
 			if err != nil {
 				slog.Warn("malformed used memory", "data", string(data), "error", err)
 				continue
 			}
 			usedMemory += used
 		}
 		// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
@@ -282,21 +241,18 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 		slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
 		slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
-		gpuInfo := RocmGPUInfo{
+		gpuInfo := GpuInfo{
-			GpuInfo: GpuInfo{
+			Library: "rocm",
-				Library: "rocm",
+			memInfo: memInfo{
-				memInfo: memInfo{
+				TotalMemory: totalMemory,
-					TotalMemory: totalMemory,
+				FreeMemory:  (totalMemory - usedMemory),
 					FreeMemory:  (totalMemory - usedMemory),
 				},
 				ID:            strconv.Itoa(gpuID),
 				Name:          name,
 				Compute:       fmt.Sprintf("gfx%d%x%x", major, minor, patch),
 				MinimumMemory: rocmMinimumMemory,
 				DriverMajor:   driverMajor,
 				DriverMinor:   driverMinor,
 			},
-			usedFilepath: usedFile,
+			ID:            fmt.Sprintf("%d", gpuID),
 			Name:          name,
 			Compute:       fmt.Sprintf("gfx%d%x%x", major, minor, patch),
 			MinimumMemory: rocmMinimumMemory,
 			DriverMajor:   driverMajor,
 			DriverMinor:   driverMinor,
 		}
 		// If the user wants to filter to a subset of devices, filter out if we aren't a match
@@ -320,7 +276,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 			libDir, err = AMDValidateLibDir()
 			if err != nil {
 				slog.Warn("unable to verify rocm library, will use cpu", "error", err)
-				return nil
+				return []GpuInfo{}
 			}
 		}
 		gpuInfo.DependencyPath = libDir
@@ -331,7 +287,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 				supported, err = GetSupportedGFX(libDir)
 				if err != nil {
 					slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err)
-					return nil
+					return []GpuInfo{}
 				}
 				slog.Debug("rocm supported GPUs", "types", supported)
 			}
@@ -348,11 +304,6 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 			slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
 		}
 		// Check for env var workarounds
 		if name == "1002:687f" { // Vega RX 56
 			gpuInfo.EnvWorkarounds = append(gpuInfo.EnvWorkarounds, [2]string{"HSA_ENABLE_SDMA", "0"})
 		}
 		// The GPU has passed all the verification steps and is supported
 		resp = append(resp, gpuInfo)
 	}
@@ -393,7 +344,7 @@ func AMDValidateLibDir() (string, error) {
 	// If we still haven't found a usable rocm, the user will have to install it on their own
 	slog.Warn("amdgpu detected, but no compatible rocm library found.  Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install")
-	return "", errors.New("no suitable rocm found, falling back to CPU")
+	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
 }
 func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
@@ -427,31 +378,3 @@ func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
 	}
 	return driverMajor, driverMinor, nil
 }
 func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
 	if len(gpus) == 0 {
 		return nil
 	}
 	for i := range gpus {
 		usedMemory, err := getFreeMemory(gpus[i].usedFilepath)
 		if err != nil {
 			return err
 		}
 		slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(gpus[i].TotalMemory-usedMemory))
 		gpus[i].FreeMemory = gpus[i].TotalMemory - usedMemory
 	}
 	return nil
 }
 func getFreeMemory(usedFile string) (uint64, error) {
 	buf, err := os.ReadFile(usedFile)
 	if err != nil {
 		return 0, fmt.Errorf("failed to read sysfs node %s %w", usedFile, err)
 	}
 	usedMemory, err := strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
 	if err != nil {
 		slog.Debug("failed to parse sysfs node", "file", usedFile, "error", err)
 		return 0, fmt.Errorf("failed to parse sysfs node %s %w", usedFile, err)
 	}
 	return usedMemory, nil
 }
--- a/gpu/amd_windows.go
+++ b/gpu/amd_windows.go
@@ -2,15 +2,13 @@ package gpu
 import (
 	"bytes"
-	"errors"
+	"fmt"
 	"log/slog"
 	"os"
 	"path/filepath"
 	"slices"
 	"strconv"
 	"strings"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 )
@@ -22,12 +20,12 @@ const (
 var (
 	// Used to validate if the given ROCm lib is usable
-	ROCmLibGlobs          = []string{"hipblas.dll", "rocblas"}                 // This is not sufficient to discern v5 vs v6
+	ROCmLibGlobs          = []string{"hipblas.dll", "rocblas"}                 // TODO - probably include more coverage of files here...
-	RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\6.1\\bin"} // TODO glob?
+	RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\5.7\\bin"} // TODO glob?
 )
-func AMDGetGPUInfo() []RocmGPUInfo {
+func AMDGetGPUInfo() []GpuInfo {
-	resp := []RocmGPUInfo{}
+	resp := []GpuInfo{}
 	hl, err := NewHipLib()
 	if err != nil {
 		slog.Debug(err.Error())
@@ -35,11 +33,12 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 	}
 	defer hl.Release()
-	driverMajor, driverMinor, err := hl.AMDDriverVersion()
+	// TODO - this reports incorrect version information, so omitting for now
-	if err != nil {
+	// driverMajor, driverMinor, err := hl.AMDDriverVersion()
-		// For now this is benign, but we may eventually need to fail compatibility checks
+	// if err != nil {
-		slog.Debug("error looking up amd driver version", "error", err)
+	// 	// For now this is benign, but we may eventually need to fail compatibility checks
-	}
+	// 	slog.Debug("error looking up amd driver version", "error", err)
 	// }
 	// Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified
 	count := hl.HipGetDeviceCount()
@@ -53,7 +52,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 	}
 	var supported []string
-	gfxOverride := envconfig.HsaOverrideGfxVersion()
+	gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION")
 	if gfxOverride == "" {
 		supported, err = GetSupportedGFX(libDir)
 		if err != nil {
@@ -85,15 +84,14 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 		n = bytes.IndexByte(props.GcnArchName[:], 0)
 		gfx := string(props.GcnArchName[:n])
 		slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
-		// slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY!  Always 0
+		//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY!  Always 0
 		// TODO  Why isn't props.iGPU accurate!?
 		if strings.EqualFold(name, iGPUName) {
 			slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
 			continue
 		}
 		if gfxOverride == "" {
-			// Strip off Target Features when comparing
+			if !slices.Contains[[]string, string](supported, gfx) {
 			if !slices.Contains[[]string, string](supported, strings.Split(gfx, ":")[0]) {
 				slog.Warn("amdgpu is not supported", "gpu", i, "gpu_type", gfx, "library", libDir, "supported_types", supported)
 				// TODO - consider discrete markdown just for ROCM troubleshooting?
 				slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
@@ -115,27 +113,25 @@ func AMDGetGPUInfo() []RocmGPUInfo {
 			continue
 		}
 		// TODO revisit this once ROCm v6 is available on windows.
 		// v5.7 only reports VRAM used by this process, so it's completely wrong and unusable
 		slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
 		slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
-		gpuInfo := RocmGPUInfo{
+		gpuInfo := GpuInfo{
-			GpuInfo: GpuInfo{
+			Library: "rocm",
-				Library: "rocm",
+			memInfo: memInfo{
-				memInfo: memInfo{
+				TotalMemory: totalMemory,
-					TotalMemory: totalMemory,
+				FreeMemory:  freeMemory,
 					FreeMemory:  freeMemory,
 				},
 				// Free memory reporting on Windows is not reliable until we bump to ROCm v6.2
 				UnreliableFreeMemory: true,
 				ID:             strconv.Itoa(i), // TODO this is probably wrong if we specify visible devices
 				DependencyPath: libDir,
 				MinimumMemory:  rocmMinimumMemory,
 				Name:           name,
 				Compute:        gfx,
 				DriverMajor:    driverMajor,
 				DriverMinor:    driverMinor,
 			},
-			index: i,
+			ID:             fmt.Sprintf("%d", i), // TODO this is probably wrong if we specify visible devices
 			DependencyPath: libDir,
 			MinimumMemory:  rocmMinimumMemory,
 			Name:           name,
 			Compute:        gfx,
 			// TODO - this information isn't accurate on windows, so don't report it until we find the right way to retrieve
 			// DriverMajor:    driverMajor,
 			// DriverMinor:    driverMinor,
 		}
 		resp = append(resp, gpuInfo)
@@ -161,32 +157,5 @@ func AMDValidateLibDir() (string, error) {
 	// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
 	slog.Warn("amdgpu detected, but no compatible rocm library found.  Please install ROCm")
-	return "", errors.New("no suitable rocm found, falling back to CPU")
+	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
 }
 func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
 	if len(gpus) == 0 {
 		return nil
 	}
 	hl, err := NewHipLib()
 	if err != nil {
 		slog.Debug(err.Error())
 		return nil
 	}
 	defer hl.Release()
 	for i := range gpus {
 		err := hl.HipSetDevice(gpus[i].index)
 		if err != nil {
 			return err
 		}
 		freeMemory, _, err := hl.HipMemGetInfo()
 		if err != nil {
 			slog.Warn("get mem info", "id", i, "error", err)
 			continue
 		}
 		slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(freeMemory))
 		gpus[i].FreeMemory = freeMemory
 	}
 	return nil
 }
--- a/gpu/assets.go
+++ b/gpu/assets.go
@@ -26,7 +26,7 @@ func PayloadsDir() (string, error) {
 	defer lock.Unlock()
 	var err error
 	if payloadsDir == "" {
-		runnersDir := envconfig.RunnersDir()
+		runnersDir := envconfig.RunnersDir
 		if runnersDir != "" {
 			payloadsDir = runnersDir
@@ -35,14 +35,14 @@ func PayloadsDir() (string, error) {
 		// The remainder only applies on non-windows where we still carry payloads in the main executable
 		cleanupTmpDirs()
-		tmpDir := envconfig.TmpDir()
+		tmpDir := envconfig.TmpDir
 		if tmpDir == "" {
 			tmpDir, err = os.MkdirTemp("", "ollama")
 			if err != nil {
 				return "", fmt.Errorf("failed to generate tmp dir: %w", err)
 			}
 		} else {
-			err = os.MkdirAll(tmpDir, 0o755)
+			err = os.MkdirAll(tmpDir, 0755)
 			if err != nil {
 				return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err)
 			}
@@ -54,7 +54,7 @@ func PayloadsDir() (string, error) {
 		if err != nil {
 			return "", err
 		}
-		if _, err := pidFile.Write([]byte(strconv.Itoa(os.Getpid()))); err != nil {
+		if _, err := pidFile.Write([]byte(fmt.Sprint(os.Getpid()))); err != nil {
 			return "", err
 		}
@@ -77,27 +77,20 @@ func cleanupTmpDirs() {
 			continue
 		}
 		raw, err := os.ReadFile(filepath.Join(d, "ollama.pid"))
 		if err == nil {
 			pid, err := strconv.Atoi(string(raw))
 			if err == nil {
 				if proc, err := os.FindProcess(pid); err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
 					// Another running ollama, ignore this tmpdir
 					continue
 				}
 			}
 		} else {
 			slog.Debug("failed to open ollama.pid", "path", d, "error", err)
 		}
 		err = os.RemoveAll(d)
 		if err != nil {
-			slog.Warn("failed to read ollama.pid", "path", d, "error", err)
+			slog.Debug("unable to cleanup stale tmpdir", "path", d, "error", err)
 			// No pid, ignore this tmpdir
 			continue
 		}
 		pid, err := strconv.Atoi(string(raw))
 		if err != nil {
 			slog.Warn("failed to parse pid", "path", d, "error", err)
 			continue
 		}
 		proc, err := os.FindProcess(pid)
 		if err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
 			slog.Warn("found running ollama", "pid", pid, "path", d)
 			// Another running ollama, ignore this tmpdir
 			continue
 		}
 		if err := os.Remove(d); err != nil {
 			slog.Warn("unable to cleanup stale tmpdir", "path", d, "error", err)
 		}
 	}
 }
@@ -105,7 +98,7 @@ func cleanupTmpDirs() {
 func Cleanup() {
 	lock.Lock()
 	defer lock.Unlock()
-	runnersDir := envconfig.RunnersDir()
+	runnersDir := envconfig.RunnersDir
 	if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" {
 		// We want to fully clean up the tmpdir parent of the payloads dir
 		tmpDir := filepath.Clean(filepath.Join(payloadsDir, ".."))
--- a/gpu/cpu_common.go
+++ b/gpu/cpu_common.go
@@ -1,37 +1,21 @@
 package gpu
 import (
-	"os"
+	"log/slog"
 	"path/filepath"
 	"runtime"
 	"strings"
 	"golang.org/x/sys/cpu"
 )
-func GetCPUCapability() CPUCapability {
+func GetCPUVariant() string {
 	if cpu.X86.HasAVX2 {
-		return CPUCapabilityAVX2
+		slog.Debug("CPU has AVX2")
 		return "avx2"
 	}
 	if cpu.X86.HasAVX {
-		return CPUCapabilityAVX
+		slog.Debug("CPU has AVX")
 		return "avx"
 	}
 	slog.Debug("CPU does not have vector extensions")
 	// else LCD
-	return CPUCapabilityNone
+	return ""
 }
 func IsNUMA() bool {
 	if runtime.GOOS != "linux" {
 		// numa support in llama.cpp is linux only
 		return false
 	}
 	ids := map[string]interface{}{}
 	packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
 	for _, packageId := range packageIds {
 		id, err := os.ReadFile(packageId)
 		if err == nil {
 			ids[strings.TrimSpace(string(id))] = struct{}{}
 		}
 	}
 	return len(ids) > 1
 }
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -7,9 +7,9 @@ package gpu
 #cgo windows LDFLAGS: -lpthread
 #include "gpu_info.h"
 */
 import "C"
 import (
 	"fmt"
 	"log/slog"
@@ -24,37 +24,19 @@ import (
 	"github.com/ollama/ollama/format"
 )
-type cudaHandles struct {
+type handles struct {
 	deviceCount int
 	cudart      *C.cudart_handle_t
 	nvcuda      *C.nvcuda_handle_t
 	nvml        *C.nvml_handle_t
 }
 type oneapiHandles struct {
 	oneapi      *C.oneapi_handle_t
 	deviceCount int
 }
 const (
 	cudaMinimumMemory = 457 * format.MebiByte
 	rocmMinimumMemory = 457 * format.MebiByte
 	// TODO OneAPI minimum memory
 )
-var (
+var gpuMutex sync.Mutex
 	gpuMutex      sync.Mutex
 	bootstrapped  bool
 	cpuCapability CPUCapability
 	cpus          []CPUInfo
 	cudaGPUs      []CudaGPUInfo
 	nvcudaLibPath string
 	cudartLibPath string
 	oneapiLibPath string
 	nvmlLibPath   string
 	rocmGPUs      []RocmGPUInfo
 	oneapiGPUs    []OneapiGPUInfo
 )
 // With our current CUDA compile flags, older than 5.0 will not work properly
 var CudaComputeMin = [2]C.int{5, 0}
@@ -64,112 +46,113 @@ var RocmComputeMin = 9
 // TODO find a better way to detect iGPU instead of minimum memory
 const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
 var CudartLinuxGlobs = []string{
 	"/usr/local/cuda/lib64/libcudart.so*",
 	"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
 	"/usr/lib/x86_64-linux-gnu/libcudart.so*",
 	"/usr/lib/wsl/lib/libcudart.so*",
 	"/usr/lib/wsl/drivers/*/libcudart.so*",
 	"/opt/cuda/lib64/libcudart.so*",
 	"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
 	"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
 	"/usr/lib/aarch64-linux-gnu/libcudart.so*",
 	"/usr/local/cuda/lib*/libcudart.so*",
 	"/usr/lib*/libcudart.so*",
 	"/usr/local/lib*/libcudart.so*",
 }
 var CudartWindowsGlobs = []string{
 	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
 }
 var NvcudaLinuxGlobs = []string{
 	"/usr/local/cuda*/targets/*/lib/libcuda.so*",
 	"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
 	"/usr/lib/*-linux-gnu/libcuda.so*",
 	"/usr/lib/wsl/lib/libcuda.so*",
 	"/usr/lib/wsl/drivers/*/libcuda.so*",
 	"/opt/cuda/lib*/libcuda.so*",
 	"/usr/local/cuda/lib*/libcuda.so*",
 	"/usr/lib*/libcuda.so*",
 	"/usr/local/lib*/libcuda.so*",
 }
 var NvcudaWindowsGlobs = []string{
 	"c:\\windows\\system*\\nvcuda.dll",
 }
 var OneapiWindowsGlobs = []string{
 	"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
 }
 var OneapiLinuxGlobs = []string{
 	"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
 	"/usr/lib*/libze_intel_gpu.so*",
 }
 // Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
 // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
 var CudaTegra string = os.Getenv("JETSON_JETPACK")
 // Note: gpuMutex must already be held
-func initCudaHandles() *cudaHandles {
+func initGPUHandles() *handles {
 	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
-	cHandles := &cudaHandles{}
+	gpuHandles := &handles{}
-	// Short Circuit if we already know which library to use
+	var cudartMgmtName string
 	if nvmlLibPath != "" {
 		cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
 		return cHandles
 	}
 	if nvcudaLibPath != "" {
 		cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
 		return cHandles
 	}
 	if cudartLibPath != "" {
 		cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
 		return cHandles
 	}
 	slog.Debug("searching for GPU discovery libraries for NVIDIA")
 	var cudartMgmtPatterns []string
 	var nvcudaMgmtName string
 	var nvcudaMgmtPatterns []string
 	// Aligned with driver, we can't carry as payloads
 	nvcudaMgmtPatterns := NvcudaGlobs
 	if runtime.GOOS == "windows" {
 		localAppData := os.Getenv("LOCALAPPDATA")
 		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
 	}
 	tmpDir, _ := PayloadsDir()
-	if tmpDir != "" {
+	switch runtime.GOOS {
-		// TODO - add "payloads" for subprocess
+	case "windows":
-		cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)}
+		cudartMgmtName = "cudart64_*.dll"
-	}
+		localAppData := os.Getenv("LOCALAPPDATA")
-	cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
+		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
-
+		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
-	if len(NvmlGlobs) > 0 {
+		// Aligned with driver, we can't carry as payloads
-		nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
+		nvcudaMgmtName = "nvcuda.dll"
-		if len(nvmlLibPaths) > 0 {
+		nvcudaMgmtPatterns = NvcudaWindowsGlobs
-			nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
+	case "linux":
-			if nvml != nil {
+		cudartMgmtName = "libcudart.so*"
-				slog.Debug("nvidia-ml loaded", "library", libPath)
+		if tmpDir != "" {
-				cHandles.nvml = nvml
+			// TODO - add "payloads" for subprocess
-				nvmlLibPath = libPath
+			cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
 			}
 		}
 		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
 		// Aligned with driver, we can't carry as payloads
 		nvcudaMgmtName = "libcuda.so*"
 		nvcudaMgmtPatterns = NvcudaLinuxGlobs
 	default:
 		return gpuHandles
 	}
-	nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
+	slog.Debug("Detecting GPUs")
 	nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
 	if len(nvcudaLibPaths) > 0 {
 		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
 		if nvcuda != nil {
 			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
-			cHandles.nvcuda = nvcuda
+			gpuHandles.nvcuda = nvcuda
-			cHandles.deviceCount = deviceCount
+			gpuHandles.deviceCount = deviceCount
-			nvcudaLibPath = libPath
+			return gpuHandles
 			return cHandles
 		}
 	}
-	cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
+	cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
 	if len(cudartLibPaths) > 0 {
 		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
 		if cudart != nil {
 			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
-			cHandles.cudart = cudart
+			gpuHandles.cudart = cudart
-			cHandles.deviceCount = deviceCount
+			gpuHandles.deviceCount = deviceCount
-			cudartLibPath = libPath
+			return gpuHandles
 			return cHandles
 		}
 	}
-	return cHandles
+	return gpuHandles
 }
 // Note: gpuMutex must already be held
 func initOneAPIHandles() *oneapiHandles {
 	oHandles := &oneapiHandles{}
 	// Short Circuit if we already know which library to use
 	if oneapiLibPath != "" {
 		oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
 		return oHandles
 	}
 	oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
 	if len(oneapiLibPaths) > 0 {
 		oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
 	}
 	return oHandles
 }
 func GetCPUInfo() GpuInfoList {
 	gpuMutex.Lock()
 	if !bootstrapped {
 		gpuMutex.Unlock()
 		GetGPUInfo()
 	} else {
 		gpuMutex.Unlock()
 	}
 	return GpuInfoList{cpus[0].GpuInfo}
 }
 func GetGPUInfo() GpuInfoList {
@@ -177,290 +160,112 @@ func GetGPUInfo() GpuInfoList {
 	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
 	gpuMutex.Lock()
 	defer gpuMutex.Unlock()
-	needRefresh := true
+
-	var cHandles *cudaHandles
+	gpuHandles := initGPUHandles()
 	var oHandles *oneapiHandles
 	defer func() {
-		if cHandles != nil {
+		if gpuHandles.cudart != nil {
-			if cHandles.cudart != nil {
+			C.cudart_release(*gpuHandles.cudart)
 				C.cudart_release(*cHandles.cudart)
 			}
 			if cHandles.nvcuda != nil {
 				C.nvcuda_release(*cHandles.nvcuda)
 			}
 			if cHandles.nvml != nil {
 				C.nvml_release(*cHandles.nvml)
 			}
 		}
-		if oHandles != nil {
+		if gpuHandles.nvcuda != nil {
-			if oHandles.oneapi != nil {
+			C.nvcuda_release(*gpuHandles.nvcuda)
 				// TODO - is this needed?
 				C.oneapi_release(*oHandles.oneapi)
 			}
 		}
 	}()
-	if !bootstrapped {
+	// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
-		slog.Info("looking for compatible GPUs")
+	cpuVariant := GetCPUVariant()
-		needRefresh = false
+	if cpuVariant == "" && runtime.GOARCH == "amd64" {
-		cpuCapability = GetCPUCapability()
+		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
 		var memInfo C.mem_info_t
 		mem, err := GetCPUMem()
 		if err != nil {
 			slog.Warn("error looking up system memory", "error", err)
 		}
 		cpus = []CPUInfo{
 			{
 				GpuInfo: GpuInfo{
 					memInfo: mem,
 					Library: "cpu",
 					Variant: cpuCapability,
 					ID:      "0",
 				},
 			},
 		}
 		// Fallback to CPU mode if we're lacking required vector extensions on x86
 		if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
 			slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability)
 			bootstrapped = true
 			// No need to do any GPU discovery, since we can't run on them
 			return GpuInfoList{cpus[0].GpuInfo}
 		}
 		// On windows we bundle the nvidia library one level above the runner dir
 		depPath := ""
 		if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" {
 			depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "cuda")
 		}
 		// Load ALL libraries
 		cHandles = initCudaHandles()
 		// NVIDIA
 		for i := range cHandles.deviceCount {
 			if cHandles.cudart != nil || cHandles.nvcuda != nil {
 				gpuInfo := CudaGPUInfo{
 					GpuInfo: GpuInfo{
 						Library: "cuda",
 					},
 					index: i,
 				}
 				var driverMajor int
 				var driverMinor int
 				if cHandles.cudart != nil {
 					C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
 				} else {
 					C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
 					driverMajor = int(cHandles.nvcuda.driver_major)
 					driverMinor = int(cHandles.nvcuda.driver_minor)
 				}
 				if memInfo.err != nil {
 					slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
 					C.free(unsafe.Pointer(memInfo.err))
 					continue
 				}
 				if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
 					slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
 					continue
 				}
 				gpuInfo.TotalMemory = uint64(memInfo.total)
 				gpuInfo.FreeMemory = uint64(memInfo.free)
 				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
 				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
 				gpuInfo.MinimumMemory = cudaMinimumMemory
 				gpuInfo.DependencyPath = depPath
 				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
 				gpuInfo.DriverMajor = driverMajor
 				gpuInfo.DriverMinor = driverMinor
 				// query the management library as well so we can record any skew between the two
 				// which represents overhead on the GPU we must set aside on subsequent updates
 				if cHandles.nvml != nil {
 					C.nvml_get_free(*cHandles.nvml, C.int(gpuInfo.index), &memInfo.free, &memInfo.total, &memInfo.used)
 					if memInfo.err != nil {
 						slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
 						C.free(unsafe.Pointer(memInfo.err))
 					} else {
 						if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
 							gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
 							slog.Info("detected OS VRAM overhead",
 								"id", gpuInfo.ID,
 								"library", gpuInfo.Library,
 								"compute", gpuInfo.Compute,
 								"driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
 								"name", gpuInfo.Name,
 								"overhead", format.HumanBytes2(gpuInfo.OSOverhead),
 							)
 						}
 					}
 				}
 				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
 				cudaGPUs = append(cudaGPUs, gpuInfo)
 			}
 		}
 		// Intel
 		if envconfig.IntelGPU() {
 			oHandles = initOneAPIHandles()
 			// On windows we bundle the oneapi library one level above the runner dir
 			depPath = ""
 			if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" {
 				depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "oneapi")
 			}
 			for d := range oHandles.oneapi.num_drivers {
 				if oHandles.oneapi == nil {
 					// shouldn't happen
 					slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
 					continue
 				}
 				devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
 				for i := range devCount {
 					gpuInfo := OneapiGPUInfo{
 						GpuInfo: GpuInfo{
 							Library: "oneapi",
 						},
 						driverIndex: int(d),
 						gpuIndex:    int(i),
 					}
 					// TODO - split bootstrapping from updating free memory
 					C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
 					// TODO - convert this to MinimumMemory based on testing...
 					var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
 					memInfo.free = C.uint64_t(totalFreeMem)
 					gpuInfo.TotalMemory = uint64(memInfo.total)
 					gpuInfo.FreeMemory = uint64(memInfo.free)
 					gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
 					gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
 					gpuInfo.DependencyPath = depPath
 					oneapiGPUs = append(oneapiGPUs, gpuInfo)
 				}
 			}
 		}
 		rocmGPUs = AMDGetGPUInfo()
 		bootstrapped = true
 		if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
 			slog.Info("no compatible GPUs were discovered")
 		}
 	}
-	// For detected GPUs, load library if not loaded
+	// On windows we bundle the nvidia library one level above the runner dir
 	depPath := ""
 	if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
 		depPath = filepath.Dir(envconfig.RunnersDir)
 	}
-	// Refresh free memory usage
+	var memInfo C.mem_info_t
-	if needRefresh {
+	resp := []GpuInfo{}
 		mem, err := GetCPUMem()
 		if err != nil {
 			slog.Warn("error looking up system memory", "error", err)
 		} else {
 			slog.Debug("updating system memory data",
 				slog.Group(
 					"before",
 					"total", format.HumanBytes2(cpus[0].TotalMemory),
 					"free", format.HumanBytes2(cpus[0].FreeMemory),
 					"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
 				),
 				slog.Group(
 					"now",
 					"total", format.HumanBytes2(mem.TotalMemory),
 					"free", format.HumanBytes2(mem.FreeMemory),
 					"free_swap", format.HumanBytes2(mem.FreeSwap),
 				),
 			)
 			cpus[0].FreeMemory = mem.FreeMemory
 			cpus[0].FreeSwap = mem.FreeSwap
 		}
-		var memInfo C.mem_info_t
+	// NVIDIA first
-		if cHandles == nil && len(cudaGPUs) > 0 {
+	for i := range gpuHandles.deviceCount {
-			cHandles = initCudaHandles()
+		// TODO once we support CPU compilation variants of GPU libraries refine this...
 		if cpuVariant == "" && runtime.GOARCH == "amd64" {
 			continue
 		}
-		for i, gpu := range cudaGPUs {
+		if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
-			if cHandles.nvml != nil {
+			gpuInfo := GpuInfo{
-				C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
+				Library: "cuda",
-			} else if cHandles.cudart != nil {
+			}
-				C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
+			var driverMajor int
-			} else if cHandles.nvcuda != nil {
+			var driverMinor int
-				C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
+			if gpuHandles.cudart != nil {
-				memInfo.used = memInfo.total - memInfo.free
+				C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
 			} else {
-				// shouldn't happen
+				C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
-				slog.Warn("no valid cuda library loaded to refresh vram usage")
+				driverMajor = int(gpuHandles.nvcuda.driver_major)
-				break
+				driverMinor = int(gpuHandles.nvcuda.driver_minor)
 			}
 			if memInfo.err != nil {
-				slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
+				slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
 				C.free(unsafe.Pointer(memInfo.err))
 				continue
 			}
-			if memInfo.free == 0 {
+			if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
-				slog.Warn("error looking up nvidia GPU memory")
+				slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
 				continue
 			}
-			if cHandles.nvml != nil && gpu.OSOverhead > 0 {
+			gpuInfo.TotalMemory = uint64(memInfo.total)
-				// When using the management library update based on recorded overhead
+			gpuInfo.FreeMemory = uint64(memInfo.free)
-				memInfo.free -= C.uint64_t(gpu.OSOverhead)
+			gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
-			}
+			gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
-			slog.Debug("updating cuda memory data",
+			gpuInfo.MinimumMemory = cudaMinimumMemory
-				"gpu", gpu.ID,
+			gpuInfo.DependencyPath = depPath
-				"name", gpu.Name,
+			gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
-				"overhead", format.HumanBytes2(gpu.OSOverhead),
+			gpuInfo.DriverMajor = driverMajor
-				slog.Group(
+			gpuInfo.DriverMinor = driverMinor
 					"before",
 					"total", format.HumanBytes2(gpu.TotalMemory),
 					"free", format.HumanBytes2(gpu.FreeMemory),
 				),
 				slog.Group(
 					"now",
 					"total", format.HumanBytes2(uint64(memInfo.total)),
 					"free", format.HumanBytes2(uint64(memInfo.free)),
 					"used", format.HumanBytes2(uint64(memInfo.used)),
 				),
 			)
 			cudaGPUs[i].FreeMemory = uint64(memInfo.free)
 		}
-		if oHandles == nil && len(oneapiGPUs) > 0 {
+			// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
-			oHandles = initOneAPIHandles()
+			resp = append(resp, gpuInfo)
 		}
 		for i, gpu := range oneapiGPUs {
 			if oHandles.oneapi == nil {
 				// shouldn't happen
 				slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
 				continue
 			}
 			C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
 			// TODO - convert this to MinimumMemory based on testing...
 			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
 			memInfo.free = C.uint64_t(totalFreeMem)
 			oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
 		}
 		err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
 		if err != nil {
 			slog.Debug("problem refreshing ROCm free memory", "error", err)
 		}
 	}
-	resp := []GpuInfo{}
+	// Then AMD
-	for _, gpu := range cudaGPUs {
+	resp = append(resp, AMDGetGPUInfo()...)
-		resp = append(resp, gpu.GpuInfo)
+
 	}
 	for _, gpu := range rocmGPUs {
 		resp = append(resp, gpu.GpuInfo)
 	}
 	for _, gpu := range oneapiGPUs {
 		resp = append(resp, gpu.GpuInfo)
 	}
 	if len(resp) == 0 {
-		resp = append(resp, cpus[0].GpuInfo)
+		C.cpu_check_ram(&memInfo)
 		if memInfo.err != nil {
 			slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
 			C.free(unsafe.Pointer(memInfo.err))
 			return resp
 		}
 		gpuInfo := GpuInfo{
 			Library: "cpu",
 			Variant: cpuVariant,
 		}
 		gpuInfo.TotalMemory = uint64(memInfo.total)
 		gpuInfo.FreeMemory = uint64(memInfo.free)
 		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
 		resp = append(resp, gpuInfo)
 	}
 	return resp
 }
 func GetCPUMem() (memInfo, error) {
 	var ret memInfo
 	var info C.mem_info_t
 	C.cpu_check_ram(&info)
 	if info.err != nil {
 		defer C.free(unsafe.Pointer(info.err))
 		return ret, fmt.Errorf(C.GoString(info.err))
 	}
 	ret.FreeMemory = uint64(info.free)
 	ret.TotalMemory = uint64(info.total)
 	return ret, nil
 }
 func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
 	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
 	var ldPaths []string
@@ -548,23 +353,7 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
 		defer C.free(unsafe.Pointer(lib))
 		C.nvcuda_init(lib, &resp)
 		if resp.err != nil {
-			// Decide what log level based on the type of error message to help users understand why
+			slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
 			msg := C.GoString(resp.err)
 			switch resp.cudaErr {
 			case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
 				slog.Warn("version mismatch between driver and cuda driver library - reboot or upgrade may be required", "library", libPath, "error", msg)
 			case C.CUDA_ERROR_NO_DEVICE:
 				slog.Info("no nvidia devices detected", "library", libPath)
 			case C.CUDA_ERROR_UNKNOWN:
 				slog.Warn("unknown error initializing cuda driver library", "library", libPath, "error", msg)
 				slog.Warn("see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information")
 			default:
 				if strings.Contains(msg, "wrong ELF class") {
 					slog.Debug("skipping 32bit library", "library", libPath)
 				} else {
 					slog.Info("unable to load cuda driver library", "library", libPath, "error", msg)
 				}
 			}
 			C.free(unsafe.Pointer(resp.err))
 		} else {
 			return int(resp.num_devices), &resp.ch, libPath
@@ -573,26 +362,8 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
 	return 0, nil, ""
 }
 func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
 	var resp C.nvml_init_resp_t
 	resp.ch.verbose = getVerboseState()
 	for _, libPath := range nvmlLibPaths {
 		lib := C.CString(libPath)
 		defer C.free(unsafe.Pointer(lib))
 		C.nvml_init(lib, &resp)
 		if resp.err != nil {
 			slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
 			C.free(unsafe.Pointer(resp.err))
 		} else {
 			return &resp.ch, libPath
 		}
 	}
 	return nil, ""
 }
 func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
 	var resp C.oneapi_init_resp_t
 	num_devices := 0
 	resp.oh.verbose = getVerboseState()
 	for _, libPath := range oneapiLibPaths {
 		lib := C.CString(libPath)
@@ -602,17 +373,14 @@ func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
 			slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
 			C.free(unsafe.Pointer(resp.err))
 		} else {
-			for i := range resp.oh.num_drivers {
+			return int(resp.num_devices), &resp.oh, libPath
 				num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
 			}
 			return num_devices, &resp.oh, libPath
 		}
 	}
 	return 0, nil, ""
 }
 func getVerboseState() C.uint16_t {
-	if envconfig.Debug() {
+	if envconfig.Debug {
 		return C.uint16_t(1)
 	}
 	return C.uint16_t(0)
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@@ -8,7 +8,6 @@ package gpu
 #include "gpu_info_darwin.h"
 */
 import "C"
 import (
 	"runtime"
@@ -25,7 +24,7 @@ func GetGPUInfo() GpuInfoList {
 		return []GpuInfo{
 			{
 				Library: "cpu",
-				Variant: GetCPUCapability(),
+				Variant: GetCPUVariant(),
 				memInfo: mem,
 			},
 		}
@@ -43,22 +42,10 @@ func GetGPUInfo() GpuInfoList {
 	return []GpuInfo{info}
 }
 func GetCPUInfo() GpuInfoList {
 	mem, _ := GetCPUMem()
 	return []GpuInfo{
 		{
 			Library: "cpu",
 			Variant: GetCPUCapability(),
 			memInfo: mem,
 		},
 	}
 }
 func GetCPUMem() (memInfo, error) {
 	return memInfo{
 		TotalMemory: uint64(C.getPhysicalMemory()),
-		FreeMemory:  uint64(C.getFreeMemory()),
+		FreeMemory:  0,
 		// FreeSwap omitted as Darwin uses dynamic paging
 	}, nil
 }
--- a/gpu/gpu_info.h
+++ b/gpu/gpu_info.h
@@ -47,7 +47,6 @@ typedef struct mem_info {
  char gpu_name[GPU_NAME_LEN];
  uint64_t total;
  uint64_t free;
  uint64_t used;
  // Compute Capability
  int major; 
@@ -63,8 +62,7 @@ void cpu_check_ram(mem_info_t *resp);
 #include "gpu_info_cudart.h"
 #include "gpu_info_nvcuda.h"
 #include "gpu_info_nvml.h"
 #include "gpu_info_oneapi.h"
 #endif  // __GPU_INFO_H__
-#endif  // __APPLE__
+#endif  // __APPLE__
--- a/gpu/gpu_info_cpu.c
+++ b/gpu/gpu_info_cpu.c
@@ -0,0 +1,45 @@
 #include "gpu_info.h"
 // Fallbacks for CPU mode
 #ifdef _WIN32
 #include <sysinfoapi.h>
 void cpu_check_ram(mem_info_t *resp) {
  resp->err = NULL;
  MEMORYSTATUSEX info;
  info.dwLength = sizeof(info);
  if (GlobalMemoryStatusEx(&info) != 0) {
    resp->total = info.ullTotalPhys;
    resp->free = info.ullAvailPhys;
    snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0");
  } else {
    resp->err = LOAD_ERR();
  }
  return;
 }
 #elif __linux__
 #include <errno.h>
 #include <string.h>
 #include <sys/sysinfo.h>
 void cpu_check_ram(mem_info_t *resp) {
  struct sysinfo info;
  resp->err = NULL;
  if (sysinfo(&info) != 0) {
    resp->err = strdup(strerror(errno));
  } else {
    resp->total = info.totalram * info.mem_unit;
    resp->free = info.freeram * info.mem_unit;
    snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0");
  }
  return;
 }
 #elif __APPLE__
 // TODO consider an Apple implementation that does something useful
 // mem_info_t cpu_check_ram() {
 //   mem_info_t resp = {0, 0, NULL};
 //   return resp;
 // }
 #else
 #error "Unsupported platform"
 #endif
--- a/gpu/gpu_info_cudart.c
+++ b/gpu/gpu_info_cudart.c
@@ -40,7 +40,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
  for (i = 0; l[i].s != NULL; i++) {
    *l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
-    if (!*(l[i].p)) {
+    if (!l[i].p) {
      char *msg = LOAD_ERR();
      LOG(resp->ch.verbose, "dlerr: %s\n", msg);
      UNLOAD_LIBRARY(resp->ch.handle);
@@ -94,7 +94,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
 }
-void cudart_bootstrap(cudart_handle_t h, int i, mem_info_t *resp) {
+void cudart_check_vram(cudart_handle_t h, int i, mem_info_t *resp) {
  resp->err = NULL;
  cudartMemory_t memInfo = {0,0,0};
  cudartReturn_t ret;
@@ -166,11 +166,9 @@ void cudart_bootstrap(cudart_handle_t h, int i, mem_info_t *resp) {
  resp->total = memInfo.total;
  resp->free = memInfo.free;
  resp->used = memInfo.used;
  LOG(h.verbose, "[%s] CUDA totalMem %lu\n", resp->gpu_id, resp->total);
  LOG(h.verbose, "[%s] CUDA freeMem %lu\n", resp->gpu_id, resp->free);
  LOG(h.verbose, "[%s] CUDA usedMem %lu\n", resp->gpu_id, resp->used);
  LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);
 }
--- a/Show More
+++ b/Show More
`@@ -1,2 +1 @@`
	`llm/ext_server/* linguist-vendored`	`llm/ext_server/* linguist-vendored`
	`* text eol=lf`