a8db2a9

patch again
patch llama.cpp
2024-07-10 13:10:58 -07:00 · 2024-07-10 13:06:40 -07:00 · 2024-07-10 13:02:37 -07:00 · 2024-07-10 12:46:45 -07:00 · 2024-07-10 12:46:45 -07:00 · 2024-07-10 12:46:44 -07:00
254 changed files with 4605 additions and 10984 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1 @@
 llm/ext_server/* linguist-vendored
 * text=auto
 *.go text eol=lf
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -147,7 +147,7 @@ jobs:
        run: |
          $ErrorActionPreference = "Stop"
          write-host "downloading AMD HIP Installer"
-          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
+          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
          write-host "Installing AMD HIP"
          Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
          write-host "Completed AMD HIP"
@@ -187,13 +187,6 @@ jobs:
  generate-windows-cuda:
    environment: release
    runs-on: windows
    strategy:
      matrix:
        cuda:
          - version: "11"
            url: 'https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe'
          - version: "12"
            url: 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe'
    env:
      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
    steps:
@@ -227,11 +220,11 @@ jobs:
        with:
          go-version-file: go.mod
          cache: true
-      - name: 'Install CUDA ${{ matrix.cuda.version }}'
+      - name: 'Install CUDA'
        run: |
          $ErrorActionPreference = "Stop"
          write-host "downloading CUDA Installer"
-          Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
+          Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
          write-host "Installing CUDA"
          Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
          write-host "Completed CUDA"
@@ -263,16 +256,15 @@ jobs:
          cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
      - uses: actions/upload-artifact@v4
        with:
-          name: generate-windows-cuda-${{ matrix.cuda.version }}
+          name: generate-windows-cuda
          path: |
            llm/build/**/bin/*
            dist/windows-amd64/**
      - uses: actions/upload-artifact@v4
        with:
-          name: windows-cuda-deps-${{ matrix.cuda.version }}
+          name: windows-cuda-deps
          path: dist/deps/*
  # Import the prior generation steps and build the final windows assets
  build-windows:
    environment: release
@@ -322,16 +314,10 @@ jobs:
          name: generate-windows-cpu
      - uses: actions/download-artifact@v4
        with:
-          name: generate-windows-cuda-11
+          name: generate-windows-cuda
      - uses: actions/download-artifact@v4
        with:
-          name: generate-windows-cuda-12
+          name: windows-cuda-deps
      - uses: actions/download-artifact@v4
        with:
          name: windows-cuda-deps-11
      - uses: actions/download-artifact@v4
        with:
          name: windows-cuda-deps-12
      - uses: actions/download-artifact@v4
        with:
          name: windows-rocm-deps
@@ -377,6 +363,7 @@ jobs:
      - run: |
          ./scripts/build_linux.sh
          ./scripts/build_docker.sh
          mv dist/deps/* dist/
      - uses: actions/upload-artifact@v4
        with:
          name: dist-linux-amd64
@@ -472,10 +459,7 @@ jobs:
          merge-multiple: true
      - run: |
          ls -lh dist/
-          (cd dist; find . -type f | xargs sha256sum > ../sha256sum.txt)
+          (cd dist; sha256sum * > sha256sum.txt)
          mv sha256sum.txt dist/
          mv dist/linux-???64 .
          mv dist/linux-amd64-rocm .
          cat dist/sha256sum.txt
      - name: Create or update Release
        run: |
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -126,7 +126,7 @@ jobs:
    strategy:
      matrix:
        rocm-version:
-          - '6.1.2'
+          - '6.1.1'
    runs-on: linux
    container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
    steps:
@@ -169,7 +169,7 @@ jobs:
        run: |
          $ErrorActionPreference = "Stop"
          write-host "downloading AMD HIP Installer"
-          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
+          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
          write-host "Installing AMD HIP"
          Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
          write-host "Completed AMD HIP"
@@ -273,7 +273,7 @@ jobs:
        if: ${{ startsWith(matrix.os, 'macos-') }}
      - uses: golangci/golangci-lint-action@v6
        with:
-          args: --timeout 8m0s -v
+          args: --timeout 8m0s -v ${{ startsWith(matrix.os, 'windows-') && '' || '--disable gofmt --disable goimports' }}
  test:
    strategy:
      matrix:
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -7,31 +7,22 @@ linters:
    - bodyclose
    - containedctx
    - contextcheck
    - errcheck
    - exportloopref
    - gci
    - gocheckcompilerdirectives
-    - gofmt
+    # conditionally enable this on linux/macos
-    - gofumpt
+    # - gofmt
-    - gosimple
+    # - goimports
    - govet
    - ineffassign
    - intrange
    - makezero
    - misspell
    - nilerr
    - nolintlint
    - nosprintfhostport
-    - staticcheck
+    - testifylint
    - tenv
    - unconvert
    - unused
    - usestdlibvars
    - wastedassign
    - whitespace
-linters-settings:
+    - usestdlibvars
  gci:
    sections: [standard, default, localmodule]
 severity:
  default-severity: error
  rules:
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,37 +0,0 @@
 # Contributing to Ollama
 Thank you for your interest in contributing to Ollama! Here are a few guidelines to help get you started.
 ## Set up
 See the [development documentation](./docs/development.md) for instructions on how to build and run Ollama locally.
 ## Pull requests
 ### Ideal issues
 * [Bugs](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Abug): issues where Ollama stops working or where it results in an unexpected error.
 * [Performance](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Aperformance): issues to make Ollama faster at model inference, downloading or uploading.
 * [Security](https://github.com/ollama/ollama/blob/main/SECURITY.md): issues that could lead to a security vulnerability. As mentioned in [SECURITY.md](https://github.com/ollama/ollama/blob/main/SECURITY.md), please do not disclose security vulnerabilities publicly.
 ### Issues that are harder to review
 * New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future.
 * Refactoring: large code improvements are important, but can be harder or take longer to review and merge.
 * Documentation: small updates to fill in or dorrect missing documentation is helpful, however large documentation additions can be hard to maintain over time.
 ### Issues that may not be accepted
 * Changes that break backwards compatibility in Ollama's API (including the OpenAI-compatible API)
 * Changes that add significant friction to the user experience
 * Changes that create a large future maintenance burden for maintainers and contributors
 ### Best practices
 * Commit messages: please leave both a title and a description in your commit messages. The title should be a short summary of the changes, with a leading word that explains the section of the code being changed (e.g. `api: fix parsing of prompt field`) . In the description, leave a short 2-3 sentences that explain more about the change and its impact.
 * Tests: please add test coverage to changes where possible.
 * Minimize dependencies: avoid adding new dependencies unless absolutely necessary.
 ## Need help?
 If you need help with anything, feel free to reach out to us on our [Discord server](https://discord.gg/ollama).
--- a/135
+++ b/135
@@ -1,10 +1,8 @@
-ARG GOLANG_VERSION=1.22.5
+ARG GOLANG_VERSION=1.22.1
 ARG CMAKE_VERSION=3.22.1
-ARG CUDA_VERSION_11=11.3.1
+# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
-ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
+ARG CUDA_VERSION=11.3.1
-ARG CUDA_VERSION_12=12.4.0
+ARG ROCM_VERSION=6.1.1
 ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
 ARG ROCM_VERSION=6.1.2
 # Copy the minimal context we need to run the generate scripts
 FROM scratch AS llm-code
@@ -12,7 +10,7 @@ COPY .git .git
 COPY .gitmodules .gitmodules
 COPY llm llm
-FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_11-devel-centos7 AS cuda-11-build-amd64
+FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
 ARG CMAKE_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
@@ -20,34 +18,9 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
 COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 ARG CGO_CFLAGS
-ARG CUDA_V11_ARCHITECTURES
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
 ENV GOARCH amd64 
 RUN --mount=type=cache,target=/root/.ccache \
    OLLAMA_SKIP_STATIC_GENERATE=1 \
    OLLAMA_SKIP_CPU_GENERATE=1 \
    CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
    CUDA_VARIANT="_v11" \
    bash gen_linux.sh
-FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64
+FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
 ARG CMAKE_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
 COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 ARG CGO_CFLAGS
 ARG CUDA_V12_ARCHITECTURES
 ENV GOARCH amd64 
 RUN --mount=type=cache,target=/root/.ccache \
    OLLAMA_SKIP_STATIC_GENERATE=1 \
    OLLAMA_SKIP_CPU_GENERATE=1 \
    CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
    CUDA_VARIANT="_v12" \
    OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
    bash gen_linux.sh
 FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
 ARG CMAKE_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
@@ -55,32 +28,7 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
 COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 ARG CGO_CFLAGS
-ARG CUDA_V11_ARCHITECTURES
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
 ENV GOARCH arm64 
 RUN OLLAMA_SKIP_STATIC_GENERATE=1 \
    OLLAMA_SKIP_CPU_GENERATE=1 \
    CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
    CUDA_VARIANT="_v11" \
    bash gen_linux.sh
 FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
 ARG CMAKE_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
 ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
 COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 ARG CGO_CFLAGS
 ARG CUDA_V12_ARCHITECTURES
 ENV GOARCH arm64 
 RUN --mount=type=cache,target=/root/.ccache \
    OLLAMA_SKIP_STATIC_GENERATE=1 \
    OLLAMA_SKIP_CPU_GENERATE=1 \
    CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
    CUDA_VARIANT="_v12" \
    OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
    bash gen_linux.sh
 FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
 ARG CMAKE_VERSION
@@ -92,11 +40,15 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 ARG CGO_CFLAGS
 ARG AMDGPU_TARGETS
-ENV GOARCH amd64 
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
-RUN --mount=type=cache,target=/root/.ccache \
+RUN mkdir /tmp/scratch && \
-    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
+    for dep in $(zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm*/bin/deps.txt.gz) ; do \
-RUN mkdir -p ../../dist/linux-amd64-rocm/lib/ollama && \
+        cp ${dep} /tmp/scratch/ || exit 1 ; \
-    (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64-rocm/lib/ollama && tar xf - )
+    done && \
    (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
    mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \
    (cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . )
 FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
 ARG CMAKE_VERSION
@@ -107,21 +59,16 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
 COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 ARG OLLAMA_CUSTOM_CPU_DEFS
 ARG CGO_CFLAGS
 ENV GOARCH amd64 
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
-RUN --mount=type=cache,target=/root/.ccache \
+RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
    OLLAMA_CPU_TARGET="static" bash gen_linux.sh
 FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
-RUN --mount=type=cache,target=/root/.ccache \
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
 FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
-RUN --mount=type=cache,target=/root/.ccache \
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh
    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" bash gen_linux.sh
 FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
-RUN --mount=type=cache,target=/root/.ccache \
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh
    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" bash gen_linux.sh
 FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
 ARG CMAKE_VERSION
@@ -132,15 +79,12 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
 COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 ARG OLLAMA_CUSTOM_CPU_DEFS
 ARG CGO_CFLAGS
 ENV GOARCH arm64
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
-RUN --mount=type=cache,target=/root/.ccache \
+RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
    OLLAMA_CPU_TARGET="static" bash gen_linux.sh
 FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
-RUN --mount=type=cache,target=/root/.ccache \
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
 # Intermediate stage used for ./scripts/build_linux.sh
@@ -151,16 +95,12 @@ COPY . .
 COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
+COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
 COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
 COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
 ARG GOFLAGS
 ARG CGO_CFLAGS
-RUN --mount=type=cache,target=/root/.ccache \
+RUN go build -trimpath .
    go build -trimpath -o dist/linux-amd64/bin/ollama .
 # Intermediate stage used for ./scripts/build_linux.sh
 FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
@@ -169,36 +109,23 @@ ARG GOLANG_VERSION
 WORKDIR /go/src/github.com/ollama/ollama
 COPY . .
 COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
+COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
 COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 ARG GOFLAGS
 ARG CGO_CFLAGS
-RUN --mount=type=cache,target=/root/.ccache \
+RUN go build -trimpath .
    go build -trimpath -o dist/linux-arm64/bin/ollama .
 # Strip out ROCm dependencies to keep the primary image lean
 FROM --platform=linux/amd64 ubuntu:22.04 as amd64-libs-without-rocm
 COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/
 RUN cd /scratch/ollama/ && rm -rf rocblas libamd* libdrm* libroc* libhip* libhsa* 
 # Runtime stages
 FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
 COPY --from=amd64-libs-without-rocm /scratch/ /lib/
 RUN apt-get update && apt-get install -y ca-certificates
-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
+COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
 FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
 COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
 RUN apt-get update && apt-get install -y ca-certificates
-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
+COPY --from=build-arm64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
 # Radeon images are much larger so we keep it distinct from the CPU/CUDA image
 FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
 RUN update-pciids
-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
+COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
 RUN ln -s /opt/rocm/lib /lib/ollama
 EXPOSE 11434
 ENV OLLAMA_HOST 0.0.0.0
--- a/README.md
+++ b/README.md
@@ -35,10 +35,10 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla
 ## Quickstart
-To run and chat with [Llama 3.1](https://ollama.com/library/llama3.1):
+To run and chat with [Llama 3](https://ollama.com/library/llama3):
 ```
-ollama run llama3.1
+ollama run llama3
 ```
 ## Model library
@@ -49,12 +49,10 @@ Here are some example models that can be downloaded:
 | Model              | Parameters | Size  | Download                       |
 | ------------------ | ---------- | ----- | ------------------------------ |
-| Llama 3.1          | 8B         | 4.7GB | `ollama run llama3.1`          |
+| Llama 3            | 8B         | 4.7GB | `ollama run llama3`            |
-| Llama 3.1          | 70B        | 40GB  | `ollama run llama3.1:70b`      |
+| Llama 3            | 70B        | 40GB  | `ollama run llama3:70b`        |
 | Llama 3.1          | 405B       | 231GB | `ollama run llama3.1:405b`     |
 | Phi 3 Mini         | 3.8B       | 2.3GB | `ollama run phi3`              |
 | Phi 3 Medium       | 14B        | 7.9GB | `ollama run phi3:medium`       |
 | Gemma 2            | 2B         | 1.6GB | `ollama run gemma2:2b`         |
 | Gemma 2            | 9B         | 5.5GB | `ollama run gemma2`            |
 | Gemma 2            | 27B        | 16GB  | `ollama run gemma2:27b`        |
 | Mistral            | 7B         | 4.1GB | `ollama run mistral`           |
@@ -66,8 +64,7 @@ Here are some example models that can be downloaded:
 | LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
 | Solar              | 10.7B      | 6.1GB | `ollama run solar`             |
-> [!NOTE]
+> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
 > You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
 ## Customize a model
@@ -99,16 +96,16 @@ See the [guide](docs/import.md) on importing models for more information.
 ### Customize a prompt
-Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.1` model:
+Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3` model:
 ```
-ollama pull llama3.1
+ollama pull llama3
 ```
 Create a `Modelfile`:
 ```
-FROM llama3.1
+FROM llama3
 # set the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1
@@ -143,7 +140,7 @@ ollama create mymodel -f ./Modelfile
 ### Pull a model
 ```
-ollama pull llama3.1
+ollama pull llama3
 ```
 > This command can also be used to update a local model. Only the diff will be pulled.
@@ -151,13 +148,13 @@ ollama pull llama3.1
 ### Remove a model
 ```
-ollama rm llama3.1
+ollama rm llama3
 ```
 ### Copy a model
 ```
-ollama cp llama3.1 my-model
+ollama cp llama3 my-model
 ```
 ### Multiline input
@@ -174,21 +171,21 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
 ### Multimodal models
 ```
-ollama run llava "What's in this image? /Users/jmorgan/Desktop/smile.png"
+>>> What's in this image? /Users/jmorgan/Desktop/smile.png
 The image features a yellow smiley face, which is likely the central focus of the picture.
 ```
 ### Pass the prompt as an argument
 ```
-$ ollama run llama3.1 "Summarize this file: $(cat README.md)"
+$ ollama run llama3 "Summarize this file: $(cat README.md)"
 Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
 ```
 ### Show model information
 ```
-ollama show llama3.1
+ollama show llama3
 ```
 ### List models on your computer
@@ -216,7 +213,7 @@ Next, start the server:
 Finally, in a separate shell, run a model:
 ```
-./ollama run llama3.1
+./ollama run llama3
 ```
 ## REST API
@@ -227,7 +224,7 @@ Ollama has a REST API for running and managing models.
 ```
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3.1",
+  "model": "llama3",
  "prompt":"Why is the sky blue?"
 }'
 ```
@@ -236,7 +233,7 @@ curl http://localhost:11434/api/generate -d '{
 ```
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.1",
+  "model": "llama3",
  "messages": [
    { "role": "user", "content": "why is the sky blue?" }
  ]
@@ -296,12 +293,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
 - [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
 - [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
 - [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
 - [AI Studio](https://github.com/MindWorkAI/AI-Studio)
 - [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
 - [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
 - [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
 - [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
 ### Terminal
@@ -325,7 +316,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [tlm](https://github.com/yusufcanb/tlm)
 - [podman-ollama](https://github.com/ericcurtin/podman-ollama)
 - [gollama](https://github.com/sammcj/gollama)
 - [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/)
 ### Database
@@ -341,7 +331,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 ### Libraries
 - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
 - [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama)
 - [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
 - [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
 - [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
@@ -395,7 +384,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
 - [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
 - [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face)
+- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
 - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
 - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,25 +0,0 @@
 # Security
 The Ollama maintainer team takes security seriously and will actively work to resolve security issues.
 ## Reporting a vulnerability
 If you discover a security vulnerability, please do not open a public issue. Instead, please report it by emailing hello@ollama.com. We ask that you give us sufficient time to investigate and address the vulnerability before disclosing it publicly.
 Please include the following details in your report:
 - A description of the vulnerability
 - Steps to reproduce the issue
 - Your assessment of the potential impact
 - Any possible mitigations
 ## Security best practices
 While the maintainer team does their best to secure Ollama, users are encouraged to implement their own security best practices, such as:
 - Regularly updating to the latest version of Ollama
 - Securing access to hosted instances of Ollama
 - Monitoring systems for unusual activity
 ## Contact
 For any other questions or concerns related to security, please contact us at hello@ollama.com
--- a/api/client.go
+++ b/api/client.go
@@ -17,14 +17,20 @@ import (
 	"bufio"
 	"bytes"
 	"context"
 	"encoding/base64"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"net"
 	"net/http"
 	"net/url"
 	"os"
 	"path/filepath"
 	"runtime"
 	"strings"
 	"time"
 	"github.com/ollama/ollama/auth"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/version"
@@ -63,8 +69,13 @@ func checkError(resp *http.Response, body []byte) error {
 // If the variable is not specified, a default ollama host and port will be
 // used.
 func ClientFromEnvironment() (*Client, error) {
 	ollamaHost := envconfig.Host
 	return &Client{
-		base: envconfig.Host(),
+		base: &url.URL{
 			Scheme: ollamaHost.Scheme,
 			Host:   net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
 		},
 		http: http.DefaultClient,
 	}, nil
 }
@@ -173,7 +184,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 		}
 		if errorResponse.Error != "" {
-			return errors.New(errorResponse.Error)
+			return fmt.Errorf(errorResponse.Error)
 		}
 		if response.StatusCode >= http.StatusBadRequest {
@@ -298,7 +309,7 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
 	return &lr, nil
 }
-// ListRunning lists running models.
+// List running models.
 func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
 	var lr ProcessResponse
 	if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
@@ -333,7 +344,7 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err
 	return &resp, nil
 }
-// Heartbeat checks if the server has started and is responsive; if yes, it
+// Hearbeat checks if the server has started and is responsive; if yes, it
 // returns nil, otherwise an error.
 func (c *Client) Heartbeat(ctx context.Context) error {
 	if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
@@ -342,16 +353,7 @@ func (c *Client) Heartbeat(ctx context.Context) error {
 	return nil
 }
-// Embed generates embeddings from a model.
+// Embeddings generates embeddings from a model.
 func (c *Client) Embed(ctx context.Context, req *EmbedRequest) (*EmbedResponse, error) {
 	var resp EmbedResponse
 	if err := c.do(ctx, http.MethodPost, "/api/embed", req, &resp); err != nil {
 		return nil, err
 	}
 	return &resp, nil
 }
 // Embeddings generates an embedding from a model.
 func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error) {
 	var resp EmbeddingResponse
 	if err := c.do(ctx, http.MethodPost, "/api/embeddings", req, &resp); err != nil {
@@ -378,3 +380,27 @@ func (c *Client) Version(ctx context.Context) (string, error) {
 	return version.Version, nil
 }
 func Authorization(ctx context.Context, request *http.Request) (string, error) {
 	data := []byte(fmt.Sprintf("%s,%s,%d", request.Method, request.URL.RequestURI(), time.Now().Unix()))
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return "", err
 	}
 	knownHostsFile, err := os.OpenFile(filepath.Join(home, ".ollama", "known_hosts"), os.O_CREATE|os.O_RDWR|os.O_APPEND, 0600)
 	if err != nil {
 		return "", err
 	}
 	defer knownHostsFile.Close()
 	token, err := auth.Sign(ctx, data)
 	if err != nil {
 		return "", err
 	}
 	// interleave request data into the token
 	key, sig, _ := strings.Cut(token, ":")
 	return fmt.Sprintf("%s:%s:%s", key, base64.StdEncoding.EncodeToString(data), sig), nil
 }
--- a/api/client_test.go
+++ b/api/client_test.go
@@ -2,6 +2,8 @@ package api
 import (
 	"testing"
 	"github.com/ollama/ollama/envconfig"
 )
 func TestClientFromEnvironment(t *testing.T) {
@@ -31,6 +33,7 @@ func TestClientFromEnvironment(t *testing.T) {
 	for k, v := range testCases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_HOST", v.value)
 			envconfig.LoadConfig()
 			client, err := ClientFromEnvironment()
 			if err != v.err {
--- a/api/types.go
+++ b/api/types.go
@@ -47,9 +47,6 @@ type GenerateRequest struct {
 	// Prompt is the textual prompt to send to the model.
 	Prompt string `json:"prompt"`
 	// Suffix is the text that comes after the inserted text.
 	Suffix string `json:"suffix"`
 	// System overrides the model's default system message/prompt.
 	System string `json:"system"`
@@ -100,85 +97,17 @@ type ChatRequest struct {
 	// followin the request.
 	KeepAlive *Duration `json:"keep_alive,omitempty"`
 	// Tools is an optional list of tools the model has access to.
 	Tools `json:"tools,omitempty"`
 	// Options lists model-specific options.
 	Options map[string]interface{} `json:"options"`
 }
 type Tools []Tool
 func (t Tools) String() string {
 	bts, _ := json.Marshal(t)
 	return string(bts)
 }
 func (t Tool) String() string {
 	bts, _ := json.Marshal(t)
 	return string(bts)
 }
 // Message is a single message in a chat sequence. The message contains the
 // role ("system", "user", or "assistant"), the content and an optional list
 // of images.
 type Message struct {
-	Role      string      `json:"role"`
+	Role    string      `json:"role"`
-	Content   string      `json:"content"`
+	Content string      `json:"content"`
-	Images    []ImageData `json:"images,omitempty"`
+	Images  []ImageData `json:"images,omitempty"`
 	ToolCalls []ToolCall  `json:"tool_calls,omitempty"`
 }
 func (m *Message) UnmarshalJSON(b []byte) error {
 	type Alias Message
 	var a Alias
 	if err := json.Unmarshal(b, &a); err != nil {
 		return err
 	}
 	*m = Message(a)
 	m.Role = strings.ToLower(m.Role)
 	return nil
 }
 type ToolCall struct {
 	Function ToolCallFunction `json:"function"`
 }
 type ToolCallFunction struct {
 	Name      string                    `json:"name"`
 	Arguments ToolCallFunctionArguments `json:"arguments"`
 }
 type ToolCallFunctionArguments map[string]any
 func (t *ToolCallFunctionArguments) String() string {
 	bts, _ := json.Marshal(t)
 	return string(bts)
 }
 type Tool struct {
 	Type     string       `json:"type"`
 	Function ToolFunction `json:"function"`
 }
 type ToolFunction struct {
 	Name        string `json:"name"`
 	Description string `json:"description"`
 	Parameters  struct {
 		Type       string   `json:"type"`
 		Required   []string `json:"required"`
 		Properties map[string]struct {
 			Type        string   `json:"type"`
 			Description string   `json:"description"`
 			Enum        []string `json:"enum,omitempty"`
 		} `json:"properties"`
 	} `json:"parameters"`
 }
 func (t *ToolFunction) String() string {
 	bts, _ := json.Marshal(t)
 	return string(bts)
 }
 // ChatResponse is the response returned by [Client.Chat]. Its fields are
@@ -214,7 +143,6 @@ type Options struct {
 	NumPredict       int      `json:"num_predict,omitempty"`
 	TopK             int      `json:"top_k,omitempty"`
 	TopP             float32  `json:"top_p,omitempty"`
 	MinP             float32  `json:"min_p,omitempty"`
 	TFSZ             float32  `json:"tfs_z,omitempty"`
 	TypicalP         float32  `json:"typical_p,omitempty"`
 	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
@@ -231,6 +159,7 @@ type Options struct {
 // Runner options which must be set when the model is loaded into memory
 type Runner struct {
 	UseNUMA   bool  `json:"numa,omitempty"`
 	NumCtx    int   `json:"num_ctx,omitempty"`
 	NumBatch  int   `json:"num_batch,omitempty"`
 	NumGPU    int   `json:"num_gpu,omitempty"`
@@ -244,34 +173,6 @@ type Runner struct {
 	NumThread int   `json:"num_thread,omitempty"`
 }
 // EmbedRequest is the request passed to [Client.Embed].
 type EmbedRequest struct {
 	// Model is the model name.
 	Model string `json:"model"`
 	// Input is the input to embed.
 	Input any `json:"input"`
 	// KeepAlive controls how long the model will stay loaded in memory following
 	// this request.
 	KeepAlive *Duration `json:"keep_alive,omitempty"`
 	Truncate *bool `json:"truncate,omitempty"`
 	// Options lists model-specific options.
 	Options map[string]interface{} `json:"options"`
 }
 // EmbedResponse is the response from [Client.Embed].
 type EmbedResponse struct {
 	Model      string      `json:"model"`
 	Embeddings [][]float32 `json:"embeddings"`
 	TotalDuration   time.Duration `json:"total_duration,omitempty"`
 	LoadDuration    time.Duration `json:"load_duration,omitempty"`
 	PromptEvalCount int           `json:"prompt_eval_count,omitempty"`
 }
 // EmbeddingRequest is the request passed to [Client.Embeddings].
 type EmbeddingRequest struct {
 	// Model is the model name.
@@ -318,10 +219,8 @@ type DeleteRequest struct {
 // ShowRequest is the request passed to [Client.Show].
 type ShowRequest struct {
-	Model  string `json:"model"`
+	Model    string `json:"model"`
-	System string `json:"system"`
+	System   string `json:"system"`
 	// Template is deprecated
 	Template string `json:"template"`
 	Verbose  bool   `json:"verbose"`
@@ -368,6 +267,7 @@ type PullRequest struct {
 type ProgressResponse struct {
 	Status    string `json:"status"`
 	Digest    string `json:"digest,omitempty"`
 	Quantize  string `json:"quantize,omitempty"`
 	Total     int64  `json:"total,omitempty"`
 	Completed int64  `json:"completed,omitempty"`
 }
@@ -504,7 +404,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 	for key, val := range m {
 		opt, ok := jsonOpts[key]
 		if !ok {
-			slog.Warn("invalid option provided", "option", key)
+			slog.Warn("invalid option provided", "option", opt.Name)
 			continue
 		}
@@ -614,6 +514,7 @@ func DefaultOptions() Options {
 			F16KV:     true,
 			UseMLock:  false,
 			UseMMap:   nil,
 			UseNUMA:   false,
 		},
 	}
 }
--- a/api/types_test.go
+++ b/api/types_test.go
@@ -2,7 +2,7 @@ package api
 import (
 	"encoding/json"
-	"errors"
+	"fmt"
 	"math"
 	"testing"
 	"time"
@@ -192,7 +192,7 @@ func TestUseMmapFormatParams(t *testing.T) {
 				"use_mmap": {"foo"},
 			},
 			exp: nil,
-			err: errors.New("invalid bool value [foo]"),
+			err: fmt.Errorf("invalid bool value [foo]"),
 		},
 	}
@@ -208,26 +208,3 @@ func TestUseMmapFormatParams(t *testing.T) {
 		})
 	}
 }
 func TestMessage_UnmarshalJSON(t *testing.T) {
 	tests := []struct {
 		input    string
 		expected string
 	}{
 		{`{"role": "USER", "content": "Hello!"}`, "user"},
 		{`{"role": "System", "content": "Initialization complete."}`, "system"},
 		{`{"role": "assistant", "content": "How can I help you?"}`, "assistant"},
 		{`{"role": "TOOl", "content": "Access granted."}`, "tool"},
 	}
 	for _, test := range tests {
 		var msg Message
 		if err := json.Unmarshal([]byte(test.input), &msg); err != nil {
 			t.Errorf("Unexpected error: %v", err)
 		}
 		if msg.Role != test.expected {
 			t.Errorf("role not lowercased: got %v, expected %v", msg.Role, test.expected)
 		}
 	}
 }
--- a/app/lifecycle/getstarted_nonwindows.go
+++ b/app/lifecycle/getstarted_nonwindows.go
@@ -2,8 +2,8 @@
 package lifecycle
-import "errors"
+import "fmt"
 func GetStarted() error {
-	return errors.New("not implemented")
+	return fmt.Errorf("GetStarted not implemented")
 }
--- a/app/lifecycle/getstarted_windows.go
+++ b/app/lifecycle/getstarted_windows.go
@@ -34,6 +34,7 @@ func GetStarted() error {
 		Sys:   &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
 	}
 	proc, err := os.StartProcess(args[0], args, attrs)
 	if err != nil {
 		return fmt.Errorf("unable to start getting started shell %w", err)
 	}
--- a/app/lifecycle/logging.go
+++ b/app/lifecycle/logging.go
@@ -14,7 +14,7 @@ import (
 func InitLogging() {
 	level := slog.LevelInfo
-	if envconfig.Debug() {
+	if envconfig.Debug {
 		level = slog.LevelDebug
 	}
@@ -27,7 +27,7 @@ func InitLogging() {
 		// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
 	} else {
 		rotateLogs(AppLogFile)
-		logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
+		logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
 		if err != nil {
 			slog.Error(fmt.Sprintf("failed to create server log %v", err))
 			return
--- a/app/lifecycle/logging_nonwindows.go
+++ b/app/lifecycle/logging_nonwindows.go
@@ -5,5 +5,5 @@ package lifecycle
 import "log/slog"
 func ShowLogs() {
-	slog.Warn("not implemented")
+	slog.Warn("ShowLogs not yet implemented")
 }
--- a/app/lifecycle/logging_test.go
+++ b/app/lifecycle/logging_test.go
@@ -17,7 +17,7 @@ func TestRotateLogs(t *testing.T) {
 	// No log exists
 	rotateLogs(logFile)
-	require.NoError(t, os.WriteFile(logFile, []byte("1"), 0o644))
+	require.NoError(t, os.WriteFile(logFile, []byte("1"), 0644))
 	assert.FileExists(t, logFile)
 	// First rotation
 	rotateLogs(logFile)
@@ -32,7 +32,7 @@ func TestRotateLogs(t *testing.T) {
 	assert.NoFileExists(t, logFile)
 	for i := 2; i <= LogRotationCount+1; i++ {
-		require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0o644))
+		require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0644))
 		assert.FileExists(t, logFile)
 		rotateLogs(logFile)
 		assert.NoFileExists(t, logFile)
--- a/app/lifecycle/server.go
+++ b/app/lifecycle/server.go
@@ -55,7 +55,7 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
 	}
 	rotateLogs(ServerLogFile)
-	logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
+	logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create server log: %w", err)
 	}
--- a/app/lifecycle/updater.go
+++ b/app/lifecycle/updater.go
@@ -15,7 +15,6 @@ import (
 	"path"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
 	"time"
@@ -47,7 +46,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
 	query.Add("os", runtime.GOOS)
 	query.Add("arch", runtime.GOARCH)
 	query.Add("version", version.Version)
-	query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))
+	query.Add("ts", fmt.Sprintf("%d", time.Now().Unix()))
 	nonce, err := auth.NewNonce(rand.Reader, 16)
 	if err != nil {
--- a/app/lifecycle/updater_nonwindows.go
+++ b/app/lifecycle/updater_nonwindows.go
@@ -4,9 +4,9 @@ package lifecycle
 import (
 	"context"
-	"errors"
+	"fmt"
 )
 func DoUpgrade(cancel context.CancelFunc, done chan int) error {
-	return errors.New("not implemented")
+	return fmt.Errorf("DoUpgrade not yet implemented")
 }
--- a/app/lifecycle/updater_windows.go
+++ b/app/lifecycle/updater_windows.go
@@ -2,7 +2,6 @@ package lifecycle
 import (
 	"context"
 	"errors"
 	"fmt"
 	"log/slog"
 	"os"
@@ -16,7 +15,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 		return fmt.Errorf("failed to lookup downloads: %s", err)
 	}
 	if len(files) == 0 {
-		return errors.New("no update downloads found")
+		return fmt.Errorf("no update downloads found")
 	} else if len(files) > 1 {
 		// Shouldn't happen
 		slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
@@ -65,7 +64,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 		}
 	} else {
 		// TODO - some details about why it didn't start, or is this a pedantic error case?
-		return errors.New("installer process did not start")
+		return fmt.Errorf("installer process did not start")
 	}
 	// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
--- a/app/ollama.iss
+++ b/app/ollama.iss
@@ -87,11 +87,20 @@ DialogFontSize=12
 [Files]
 Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
-Source: "..\ollama.exe"; DestDir: "{app}\bin"; Flags: ignoreversion 64bit
+Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
-Source: "..\dist\windows-{#ARCH}\lib\ollama\runners\*"; DestDir: "{app}\lib\ollama\runners"; Flags: ignoreversion 64bit recursesubdirs
+Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
 Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
 Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
-Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Flags: ignoreversion recursesubdirs
+#if DirExists("..\dist\windows-amd64\cuda")
  Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs
 #endif
 #if DirExists("..\dist\windows-amd64\oneapi")
  Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs
 #endif
 #if DirExists("..\dist\windows-amd64\rocm")
  Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
 #endif
 [Icons]
 Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
@@ -99,7 +108,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
 Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
 [Run]
-Filename: "{cmd}"; Parameters: "/C set PATH={app}\bin;%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
+Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
 [UninstallRun]
 ; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
@@ -118,10 +127,6 @@ Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\models"
 Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\history"
 ; NOTE: if the user has a custom OLLAMA_MODELS it will be preserved
 [InstallDelete]
 Type: filesandordirs; Name: "{%TEMP}\ollama*"
 Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
 [Messages]
 WizardReady=Ollama Windows Preview
 ReadyLabel1=%nLet's get you up and running with your own large language models.
@@ -129,13 +134,13 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
 ;FinishedHeadingLabel=Run your first model
-;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3.1
+;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3
 ;ClickFinish=%n
 [Registry]
 Root: HKCU; Subkey: "Environment"; \
-    ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}\bin"; \
+    ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \
-    Check: NeedsAddPath('{app}\bin')
+    Check: NeedsAddPath('{app}')
 [Code]
--- a/app/ollama_welcome.ps1
+++ b/app/ollama_welcome.ps1
@@ -4,5 +4,5 @@ write-host "Welcome to Ollama!"
 write-host ""
 write-host "Run your first model:"
 write-host ""
-write-host "`tollama run llama3.1"
+write-host "`tollama run llama3"
 write-host ""
--- a/app/tray/tray_nonwindows.go
+++ b/app/tray/tray_nonwindows.go
@@ -3,11 +3,11 @@
 package tray
 import (
-	"errors"
+	"fmt"
 	"github.com/ollama/ollama/app/tray/commontray"
 )
 func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
-	return nil, errors.New("not implemented")
+	return nil, fmt.Errorf("NOT IMPLEMENTED YET")
 }
--- a/app/tray/wintray/eventloop.go
+++ b/app/tray/wintray/eventloop.go
@@ -11,7 +11,9 @@ import (
 	"golang.org/x/sys/windows"
 )
-var quitOnce sync.Once
+var (
 	quitOnce sync.Once
 )
 func (t *winTray) Run() {
 	nativeLoop()
--- a/app/tray/wintray/menus.go
+++ b/app/tray/wintray/menus.go
@@ -11,12 +11,12 @@ import (
 )
 const (
-	updateAvailableMenuID = 1
+	updatAvailableMenuID = 1
-	updateMenuID          = updateAvailableMenuID + 1
+	updateMenuID         = updatAvailableMenuID + 1
-	separatorMenuID       = updateMenuID + 1
+	separatorMenuID      = updateMenuID + 1
-	diagLogsMenuID        = separatorMenuID + 1
+	diagLogsMenuID       = separatorMenuID + 1
-	diagSeparatorMenuID   = diagLogsMenuID + 1
+	diagSeparatorMenuID  = diagLogsMenuID + 1
-	quitMenuID            = diagSeparatorMenuID + 1
+	quitMenuID           = diagSeparatorMenuID + 1
 )
 func (t *winTray) initMenus() error {
@@ -35,7 +35,7 @@ func (t *winTray) initMenus() error {
 func (t *winTray) UpdateAvailable(ver string) error {
 	if !t.updateNotified {
 		slog.Debug("updating menu and sending notification for new update")
-		if err := t.addOrUpdateMenuItem(updateAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
+		if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
 			return fmt.Errorf("unable to create menu entries %w", err)
 		}
 		if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
--- a/app/tray/wintray/tray.go
+++ b/app/tray/wintray/tray.go
@@ -11,12 +11,10 @@ import (
 	"path/filepath"
 	"sort"
 	"sync"
 	"syscall"
 	"unsafe"
 	"golang.org/x/sys/windows"
 	"github.com/ollama/ollama/app/tray/commontray"
 	"golang.org/x/sys/windows"
 )
 // Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
@@ -416,7 +414,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
 	iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
 	if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
-		if err := os.WriteFile(iconFilePath, iconBytes, 0o644); err != nil {
+		if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil {
 			return "", err
 		}
 	}
@@ -434,12 +432,7 @@ func (t *winTray) setIcon(src string) error {
 	t.muNID.Lock()
 	defer t.muNID.Unlock()
 	t.nid.Icon = h
-	t.nid.Flags |= NIF_ICON | NIF_TIP
+	t.nid.Flags |= NIF_ICON
 	if toolTipUTF16, err := syscall.UTF16FromString(commontray.ToolTip); err == nil {
 		copy(t.nid.Tip[:], toolTipUTF16)
 	} else {
 		return err
 	}
 	t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
 	return t.nid.modify()
--- a/app/tray/wintray/w32api.go
+++ b/app/tray/wintray/w32api.go
@@ -61,7 +61,6 @@ const (
 	MIIM_SUBMENU        = 0x00000004
 	MIM_APPLYTOSUBMENUS = 0x80000000
 	NIF_ICON            = 0x00000002
 	NIF_TIP             = 0x00000004
 	NIF_INFO            = 0x00000010
 	NIF_MESSAGE         = 0x00000001
 	SW_HIDE             = 0
--- a/auth/auth.go
+++ b/auth/auth.go
@@ -5,48 +5,42 @@ import (
 	"context"
 	"crypto/rand"
 	"encoding/base64"
 	"errors"
 	"fmt"
 	"io"
 	"log/slog"
 	"os"
 	"path/filepath"
 	"strings"
 	"golang.org/x/crypto/ssh"
 )
 const defaultPrivateKey = "id_ed25519"
-func keyPath() (string, error) {
+func keyPath() (ssh.Signer, error) {
 	home, err := os.UserHomeDir()
 	if err != nil {
-		return "", err
+		return nil, err
 	}
 	return filepath.Join(home, ".ollama", defaultPrivateKey), nil
 }
 func GetPublicKey() (string, error) {
 	keyPath, err := keyPath()
 	if err != nil {
 		return "", err
 	}
 	keyPath := filepath.Join(home, ".ollama", defaultPrivateKey)
 	privateKeyFile, err := os.ReadFile(keyPath)
 	if err != nil {
 		slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
-		return "", err
+		return nil, err
 	}
-	privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
+	return ssh.ParsePrivateKey(privateKeyFile)
 }
 func GetPublicKey() (ssh.PublicKey, error) {
 	privateKey, err := keyPath()
 	// if privateKey, try public key directly
 	if err != nil {
-		return "", err
+		return nil, err
 	}
-	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
+	return privateKey.PublicKey(), nil
 	return strings.TrimSpace(string(publicKey)), nil
 }
 func NewNonce(r io.Reader, length int) (string, error) {
@@ -59,27 +53,22 @@ func NewNonce(r io.Reader, length int) (string, error) {
 }
 func Sign(ctx context.Context, bts []byte) (string, error) {
-	keyPath, err := keyPath()
+	privateKey, err := keyPath()
 	if err != nil {
 		return "", err
 	}
 	privateKeyFile, err := os.ReadFile(keyPath)
 	if err != nil {
 		slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
 		return "", err
 	}
 	privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
 	if err != nil {
 		return "", err
 	}
 	// get the pubkey, but remove the type
-	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
+	publicKey, err := GetPublicKey()
-	parts := bytes.Split(publicKey, []byte(" "))
+	if err != nil {
 		return "", err
 	}
 	publicKeyBytes := ssh.MarshalAuthorizedKey(publicKey)
 	parts := bytes.Split(publicKeyBytes, []byte(" "))
 	if len(parts) < 2 {
-		return "", errors.New("malformed public key")
+		return "", fmt.Errorf("malformed public key")
 	}
 	signedData, err := privateKey.Sign(rand.Reader, bts)
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -7,6 +7,7 @@ import (
 	"crypto/ed25519"
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/json"
 	"encoding/pem"
 	"errors"
 	"fmt"
@@ -15,6 +16,7 @@ import (
 	"math"
 	"net"
 	"net/http"
 	"net/url"
 	"os"
 	"os/signal"
 	"path/filepath"
@@ -22,7 +24,6 @@ import (
 	"runtime"
 	"slices"
 	"strings"
 	"sync/atomic"
 	"syscall"
 	"time"
@@ -114,16 +115,17 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				path = tempfile
 			}
 			// spinner.Stop()
 			digest, err := createBlob(cmd, client, path, spinner)
 			if err != nil {
 				return err
 			}
 			modelfile.Commands[i].Args = "@" + digest
 		}
 	}
 	bars := make(map[string]*progress.Bar)
 	var quantizeSpin *progress.Spinner
 	fn := func(resp api.ProgressResponse) error {
 		if resp.Digest != "" {
 			spinner.Stop()
@@ -136,11 +138,20 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 			}
 			bar.Set(resp.Completed)
 		} else if resp.Quantize != "" {
 			spinner.Stop()
 			if quantizeSpin != nil {
 				quantizeSpin.SetMessage(resp.Status)
 			} else {
 				quantizeSpin = progress.NewSpinner(resp.Status)
 				p.Add("quantize", quantizeSpin)
 			}
 		} else if status != resp.Status {
 			spinner.Stop()
 			status = resp.Status
-			spinner = progress.NewSpinner(status)
+			spinner := progress.NewSpinner(status)
 			p.Add(status, spinner)
 		}
@@ -204,12 +215,6 @@ func tempZipFiles(path string) (string, error) {
 		// safetensors files might be unresolved git lfs references; skip if they are
 		// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
 		files = append(files, st...)
 	} else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 {
 		// covers adapters.safetensors
 		files = append(files, st...)
 	} else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 {
 		// covers adapter_model.safetensors
 		files = append(files, st...)
 	} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
 		// pytorch files might also be unresolved git lfs references; skip if they are
 		// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
@@ -229,14 +234,6 @@ func tempZipFiles(path string) (string, error) {
 	}
 	files = append(files, js...)
 	// bert models require a nested config.json
 	// TODO(mxyng): merge this with the glob above
 	js, err = glob(filepath.Join(path, "**/*.json"), "text/plain")
 	if err != nil {
 		return "", err
 	}
 	files = append(files, js...)
 	if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
 		// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
 		// tokenizer.model might be a unresolved git lfs reference; error if it is
@@ -266,11 +263,6 @@ func tempZipFiles(path string) (string, error) {
 			return "", err
 		}
 		zfi.Name, err = filepath.Rel(path, file)
 		if err != nil {
 			return "", err
 		}
 		zf, err := zipfile.CreateHeader(zfi)
 		if err != nil {
 			return "", err
@@ -284,6 +276,8 @@ func tempZipFiles(path string) (string, error) {
 	return tempfile.Name(), nil
 }
 var ErrBlobExists = errors.New("blob exists")
 func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *progress.Spinner) (string, error) {
 	bin, err := os.Open(path)
 	if err != nil {
@@ -308,19 +302,25 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *pr
 	}
 	var pw progressWriter
 	// Create a progress bar and start a goroutine to update it
 	// JK Let's use a percentage
 	//bar := progress.NewBar("transferring model data...", fileSize, 0)
 	//p.Add("transferring model data", bar)
 	status := "transferring model data 0%"
 	spinner.SetMessage(status)
 	ticker := time.NewTicker(60 * time.Millisecond)
 	done := make(chan struct{})
 	defer close(done)
 	go func() {
 		ticker := time.NewTicker(60 * time.Millisecond)
 		defer ticker.Stop()
 		for {
 			select {
 			case <-ticker.C:
-				spinner.SetMessage(fmt.Sprintf("transferring model data %d%%", int(100*pw.n.Load()/fileSize)))
+				spinner.SetMessage(fmt.Sprintf("transferring model data %d%%", int(100*pw.n/fileSize)))
 			case <-done:
 				spinner.SetMessage("transferring model data 100%")
 				return
@@ -329,6 +329,34 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *pr
 	}()
 	digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
 	// We check if we can find the models directory locally
 	// If we can, we return the path to the directory
 	// If we can't, we return an error
 	// If the blob exists already, we return the digest
 	dest, err := getLocalPath(cmd.Context(), digest)
 	if errors.Is(err, ErrBlobExists) {
 		return digest, nil
 	}
 	// Successfully found the model directory
 	if err == nil {
 		// Copy blob in via OS specific copy
 		// Linux errors out to use io.copy
 		err = localCopy(path, dest)
 		if err == nil {
 			return digest, nil
 		}
 		// Default copy using io.copy
 		err = defaultCopy(path, dest)
 		if err == nil {
 			return digest, nil
 		}
 	}
 	// If at any point copying the blob over locally fails, we default to the copy through the server
 	if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
 		return "", err
 	}
@@ -336,14 +364,94 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *pr
 }
 type progressWriter struct {
-	n atomic.Int64
+	n int64
 }
 func (w *progressWriter) Write(p []byte) (n int, err error) {
-	w.n.Add(int64(len(p)))
+	w.n += int64(len(p))
 	return len(p), nil
 }
 func getLocalPath(ctx context.Context, digest string) (string, error) {
 	ollamaHost := envconfig.Host
 	client := http.DefaultClient
 	base := &url.URL{
 		Scheme: ollamaHost.Scheme,
 		Host:   net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
 	}
 	data, err := json.Marshal(digest)
 	if err != nil {
 		return "", err
 	}
 	reqBody := bytes.NewReader(data)
 	path := fmt.Sprintf("/api/blobs/%s", digest)
 	requestURL := base.JoinPath(path)
 	request, err := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), reqBody)
 	if err != nil {
 		return "", err
 	}
 	authz, err := api.Authorization(ctx, request)
 	if err != nil {
 		return "", err
 	}
 	request.Header.Set("Authorization", authz)
 	request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
 	request.Header.Set("X-Redirect-Create", "1")
 	resp, err := client.Do(request)
 	if err != nil {
 		return "", err
 	}
 	defer resp.Body.Close()
 	if resp.StatusCode == http.StatusTemporaryRedirect {
 		dest := resp.Header.Get("LocalLocation")
 		return dest, nil
 	}
 	return "", ErrBlobExists
 }
 func defaultCopy(path string, dest string) error {
 	// This function should be called if the server is local
 	// It should find the model directory, copy the blob over, and return the digest
 	dirPath := filepath.Dir(dest)
 	if err := os.MkdirAll(dirPath, 0o755); err != nil {
 		return err
 	}
 	// Copy blob over
 	sourceFile, err := os.Open(path)
 	if err != nil {
 		return fmt.Errorf("could not open source file: %v", err)
 	}
 	defer sourceFile.Close()
 	destFile, err := os.Create(dest)
 	if err != nil {
 		return fmt.Errorf("could not create destination file: %v", err)
 	}
 	defer destFile.Close()
 	_, err = io.CopyBuffer(destFile, sourceFile, make([]byte, 4*1024*1024))
 	if err != nil {
 		return fmt.Errorf("error copying file: %v", err)
 	}
 	err = destFile.Sync()
 	if err != nil {
 		return fmt.Errorf("error flushing file: %v", err)
 	}
 	return nil
 }
 func RunHandler(cmd *cobra.Command, args []string) error {
 	interactive := true
@@ -420,24 +528,9 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 	opts.MultiModal = slices.Contains(info.Details.Families, "clip")
 	opts.ParentModel = info.Details.ParentModel
 	opts.Messages = append(opts.Messages, info.Messages...)
 	if interactive {
 		if err := loadModel(cmd, &opts); err != nil {
 			return err
 		}
 		for _, msg := range info.Messages {
 			switch msg.Role {
 			case "user":
 				fmt.Printf(">>> %s\n", msg.Content)
 			case "assistant":
 				state := &displayResponseState{}
 				displayResponse(msg.Content, opts.WordWrap, state)
 				fmt.Println()
 				fmt.Println()
 			}
 		}
 		return generateInteractive(cmd, opts)
 	}
 	return generate(cmd, opts)
@@ -452,11 +545,13 @@ func errFromUnknownKey(unknownKeyErr error) error {
 	if len(matches) > 0 {
 		serverPubKey := matches[0]
-		localPubKey, err := auth.GetPublicKey()
+		publicKey, err := auth.GetPublicKey()
 		if err != nil {
 			return unknownKeyErr
 		}
 		localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(publicKey)))
 		if runtime.GOOS == "linux" && serverPubKey != localPubKey {
 			// try the ollama service public key
 			svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
@@ -916,6 +1011,7 @@ type runOptions struct {
 	WordWrap    bool
 	Format      string
 	System      string
 	Template    string
 	Images      []api.ImageData
 	Options     map[string]interface{}
 	MultiModal  bool
@@ -1109,6 +1205,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 		Images:    opts.Images,
 		Format:    opts.Format,
 		System:    opts.System,
 		Template:  opts.Template,
 		Options:   opts.Options,
 		KeepAlive: opts.KeepAlive,
 	}
@@ -1144,12 +1241,12 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 	return nil
 }
-func RunServer(_ *cobra.Command, _ []string) error {
+func RunServer(cmd *cobra.Command, _ []string) error {
 	if err := initializeKeypair(); err != nil {
 		return err
 	}
-	ln, err := net.Listen("tcp", envconfig.Host().Host)
+	ln, err := net.Listen("tcp", net.JoinHostPort(envconfig.Host.Host, envconfig.Host.Port))
 	if err != nil {
 		return err
 	}
@@ -1218,7 +1315,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
 			return err
 		}
 		if err := startApp(cmd.Context(), client); err != nil {
-			return errors.New("could not connect to ollama app, is it running?")
+			return fmt.Errorf("could not connect to ollama app, is it running?")
 		}
 	}
 	return nil
@@ -1414,10 +1511,10 @@ func NewCLI() *cobra.Command {
 				envVars["OLLAMA_NUM_PARALLEL"],
 				envVars["OLLAMA_NOPRUNE"],
 				envVars["OLLAMA_ORIGINS"],
 				envVars["OLLAMA_SCHED_SPREAD"],
 				envVars["OLLAMA_TMPDIR"],
 				envVars["OLLAMA_FLASH_ATTENTION"],
 				envVars["OLLAMA_LLM_LIBRARY"],
 				envVars["OLLAMA_MAX_VRAM"],
 			})
 		default:
 			appendEnvDocs(cmd, envs)
--- a/cmd/copy_darwin.go
+++ b/cmd/copy_darwin.go
@@ -0,0 +1,23 @@
 package cmd
 import (
 	"os"
 	"path/filepath"
 	"golang.org/x/sys/unix"
 )
 func localCopy(src, target string) error {
 	dirPath := filepath.Dir(target)
 	if err := os.MkdirAll(dirPath, 0o755); err != nil {
 		return err
 	}
 	err := unix.Clonefile(src, target, 0)
 	if err != nil {
 		return err
 	}
 	return nil
 }
--- a/cmd/copy_linux.go
+++ b/cmd/copy_linux.go
@@ -0,0 +1,7 @@
 package cmd
 import "errors"
 func localCopy(src, target string) error {
 	return errors.New("no local copy implementation for linux")
 }
--- a/cmd/copy_windows.go
+++ b/cmd/copy_windows.go
@@ -0,0 +1,67 @@
 //go:build windows
 // +build windows
 package cmd
 import (
 	"os"
 	"path/filepath"
 	"syscall"
 	"unsafe"
 )
 func localCopy(src, target string) error {
 	// Create target directory if it doesn't exist
 	dirPath := filepath.Dir(target)
 	if err := os.MkdirAll(dirPath, 0o755); err != nil {
 		return err
 	}
 	// Open source file
 	sourceFile, err := os.Open(src)
 	if err != nil {
 		return err
 	}
 	defer sourceFile.Close()
 	// Create target file
 	targetFile, err := os.Create(target)
 	if err != nil {
 		return err
 	}
 	defer targetFile.Close()
 	// Use CopyFileExW to copy the file
 	err = copyFileEx(src, target)
 	if err != nil {
 		return err
 	}
 	return nil
 }
 func copyFileEx(src, dst string) error {
 	kernel32 := syscall.NewLazyDLL("kernel32.dll")
 	copyFileEx := kernel32.NewProc("CopyFileExW")
 	srcPtr, err := syscall.UTF16PtrFromString(src)
 	if err != nil {
 		return err
 	}
 	dstPtr, err := syscall.UTF16PtrFromString(dst)
 	if err != nil {
 		return err
 	}
 	r1, _, err := copyFileEx.Call(
 		uintptr(unsafe.Pointer(srcPtr)),
 		uintptr(unsafe.Pointer(dstPtr)),
 		0, 0, 0, 0)
 	if r1 == 0 {
 		return err
 	}
 	return nil
 }
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -1,7 +1,6 @@
 package cmd
 import (
 	"cmp"
 	"errors"
 	"fmt"
 	"io"
@@ -10,14 +9,13 @@ import (
 	"path/filepath"
 	"regexp"
 	"slices"
 	"sort"
 	"strings"
 	"github.com/spf13/cobra"
 	"golang.org/x/exp/maps"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/progress"
 	"github.com/ollama/ollama/readline"
 	"github.com/ollama/ollama/types/errtypes"
@@ -29,6 +27,7 @@ const (
 	MultilineNone MultilineState = iota
 	MultilinePrompt
 	MultilineSystem
 	MultilineTemplate
 )
 func loadModel(cmd *cobra.Command, opts *runOptions) error {
@@ -48,10 +47,29 @@ func loadModel(cmd *cobra.Command, opts *runOptions) error {
 		KeepAlive: opts.KeepAlive,
 	}
-	return client.Chat(cmd.Context(), chatReq, func(api.ChatResponse) error { return nil })
+	return client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
 		p.StopAndClear()
 		for _, msg := range opts.Messages {
 			switch msg.Role {
 			case "user":
 				fmt.Printf(">>> %s\n", msg.Content)
 			case "assistant":
 				state := &displayResponseState{}
 				displayResponse(msg.Content, opts.WordWrap, state)
 				fmt.Println()
 				fmt.Println()
 			}
 		}
 		return nil
 	})
 }
 func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 	err := loadModel(cmd, &opts)
 	if err != nil {
 		return err
 	}
 	usage := func() {
 		fmt.Fprintln(os.Stderr, "Available Commands:")
 		fmt.Fprintln(os.Stderr, "  /set            Set session variables")
@@ -76,6 +94,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "Available Commands:")
 		fmt.Fprintln(os.Stderr, "  /set parameter ...     Set a parameter")
 		fmt.Fprintln(os.Stderr, "  /set system <string>   Set system message")
 		fmt.Fprintln(os.Stderr, "  /set template <string> Set prompt template")
 		fmt.Fprintln(os.Stderr, "  /set history           Enable history")
 		fmt.Fprintln(os.Stderr, "  /set nohistory         Disable history")
 		fmt.Fprintln(os.Stderr, "  /set wordwrap          Enable wordwrap")
@@ -121,7 +140,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "  /set parameter num_predict <int>      Max number of tokens to predict")
 		fmt.Fprintln(os.Stderr, "  /set parameter top_k <int>            Pick from top k num of tokens")
 		fmt.Fprintln(os.Stderr, "  /set parameter top_p <float>          Pick token based on sum of probabilities")
 		fmt.Fprintln(os.Stderr, "  /set parameter min_p <float>          Pick token based on top token probability * min_p")
 		fmt.Fprintln(os.Stderr, "  /set parameter num_ctx <int>          Set the context size")
 		fmt.Fprintln(os.Stderr, "  /set parameter temperature <float>    Set creativity level")
 		fmt.Fprintln(os.Stderr, "  /set parameter repeat_penalty <float> How strongly to penalize repetitions")
@@ -141,7 +159,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		return err
 	}
-	if envconfig.NoHistory() {
+	if envconfig.NoHistory {
 		scanner.HistoryDisable()
 	}
@@ -186,6 +204,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 				opts.Messages = append(opts.Messages, api.Message{Role: "system", Content: opts.System})
 				fmt.Println("Set system message.")
 				sb.Reset()
 			case MultilineTemplate:
 				opts.Template = sb.String()
 				fmt.Println("Set prompt template.")
 				sb.Reset()
 			}
 			multiline = MultilineNone
@@ -304,13 +326,17 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 					}
 					fmt.Printf("Set parameter '%s' to '%s'\n", args[2], strings.Join(params, ", "))
 					opts.Options[args[2]] = fp[args[2]]
-				case "system":
+				case "system", "template":
 					if len(args) < 3 {
 						usageSet()
 						continue
 					}
-					multiline = MultilineSystem
+					if args[1] == "system" {
 						multiline = MultilineSystem
 					} else if args[1] == "template" {
 						multiline = MultilineTemplate
 					}
 					line := strings.Join(args[2:], " ")
 					line, ok := strings.CutPrefix(line, `"""`)
@@ -330,17 +356,23 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 						continue
 					}
-					opts.System = sb.String() // for display in modelfile
+					if args[1] == "system" {
-					newMessage := api.Message{Role: "system", Content: sb.String()}
+						opts.System = sb.String() // for display in modelfile
-					// Check if the slice is not empty and the last message is from 'system'
+						newMessage := api.Message{Role: "system", Content: sb.String()}
-					if len(opts.Messages) > 0 && opts.Messages[len(opts.Messages)-1].Role == "system" {
+						// Check if the slice is not empty and the last message is from 'system'
-						// Replace the last message
+						if len(opts.Messages) > 0 && opts.Messages[len(opts.Messages)-1].Role == "system" {
-						opts.Messages[len(opts.Messages)-1] = newMessage
+							// Replace the last message
-					} else {
+							opts.Messages[len(opts.Messages)-1] = newMessage
-						opts.Messages = append(opts.Messages, newMessage)
+						} else {
 							opts.Messages = append(opts.Messages, newMessage)
 						}
 						fmt.Println("Set system message.")
 						sb.Reset()
 					} else if args[1] == "template" {
 						opts.Template = sb.String()
 						fmt.Println("Set prompt template.")
 						sb.Reset()
 					}
 					fmt.Println("Set system message.")
 					sb.Reset()
 					sb.Reset()
 					continue
@@ -359,9 +391,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 					return err
 				}
 				req := &api.ShowRequest{
-					Name:    opts.Model,
+					Name:     opts.Model,
-					System:  opts.System,
+					System:   opts.System,
-					Options: opts.Options,
+					Template: opts.Template,
 					Options:  opts.Options,
 				}
 				resp, err := client.Show(cmd.Context(), req)
 				if err != nil {
@@ -404,9 +437,12 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 						fmt.Println("No system message was specified for this model.")
 					}
 				case "template":
-					if resp.Template != "" {
+					switch {
 					case opts.Template != "":
 						fmt.Println(opts.Template + "\n")
 					case resp.Template != "":
 						fmt.Println(resp.Template)
-					} else {
+					default:
 						fmt.Println("No prompt template was specified for this model.")
 					}
 				default:
@@ -490,35 +526,35 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 }
 func buildModelfile(opts runOptions) string {
-	var f parser.File
+	var mf strings.Builder
-	f.Commands = append(f.Commands, parser.Command{Name: "model", Args: cmp.Or(opts.ParentModel, opts.Model)})
+	model := opts.ParentModel
-
+	if model == "" {
 		model = opts.Model
 	}
 	fmt.Fprintf(&mf, "FROM %s\n", model)
 	if opts.System != "" {
-		f.Commands = append(f.Commands, parser.Command{Name: "system", Args: opts.System})
+		fmt.Fprintf(&mf, "SYSTEM \"\"\"%s\"\"\"\n", opts.System)
 	}
-	keys := maps.Keys(opts.Options)
+	if opts.Template != "" {
-	slices.Sort(keys)
+		fmt.Fprintf(&mf, "TEMPLATE \"\"\"%s\"\"\"\n", opts.Template)
 	}
 	keys := make([]string, 0)
 	for k := range opts.Options {
 		keys = append(keys, k)
 	}
 	sort.Strings(keys)
 	for _, k := range keys {
-		v := opts.Options[k]
+		fmt.Fprintf(&mf, "PARAMETER %s %v\n", k, opts.Options[k])
 		var cmds []parser.Command
 		switch t := v.(type) {
 		case []string:
 			for _, s := range t {
 				cmds = append(cmds, parser.Command{Name: k, Args: s})
 			}
 		default:
 			cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", t)})
 		}
 		f.Commands = append(f.Commands, cmds...)
 	}
 	fmt.Fprintln(&mf)
 	for _, msg := range opts.Messages {
-		f.Commands = append(f.Commands, parser.Command{Name: "message", Args: fmt.Sprintf("%s: %s", msg.Role, msg.Content)})
+		fmt.Fprintf(&mf, "MESSAGE %s \"\"\"%s\"\"\"\n", msg.Role, msg.Content)
 	}
-	return f.String()
+	return mf.String()
 }
 func normalizeFilePath(fp string) string {
@@ -604,7 +640,7 @@ func getImageData(filePath string) ([]byte, error) {
 	// Check if the file size exceeds 100MB
 	var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
 	if info.Size() > maxSize {
-		return nil, errors.New("file size exceeds maximum limit (100MB)")
+		return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
 	}
 	buf = make([]byte, info.Size())
--- a/cmd/interactive_test.go
+++ b/cmd/interactive_test.go
@@ -1,10 +1,12 @@
 package cmd
 import (
 	"bytes"
 	"testing"
 	"text/template"
 	"github.com/google/go-cmp/cmp"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/ollama/ollama/api"
 )
@@ -55,53 +57,61 @@ d:\path with\spaces\seven.svg inbetween7 c:\users\jdoe\eight.png inbetween8
 func TestModelfileBuilder(t *testing.T) {
 	opts := runOptions{
-		Model:  "hork",
+		Model:    "hork",
-		System: "You are part horse and part shark, but all hork. Do horklike things",
+		System:   "You are part horse and part shark, but all hork. Do horklike things",
 		Template: "This is a template.",
 		Messages: []api.Message{
 			{Role: "user", Content: "Hey there hork!"},
 			{Role: "assistant", Content: "Yes it is true, I am half horse, half shark."},
 		},
-		Options: map[string]any{
+		Options: map[string]interface{}{},
 			"temperature":      0.9,
 			"seed":             42,
 			"penalize_newline": false,
 			"stop":             []string{"hi", "there"},
 		},
 	}
-	t.Run("model", func(t *testing.T) {
+	opts.Options["temperature"] = 0.9
-		expect := `FROM hork
+	opts.Options["seed"] = 42
-SYSTEM You are part horse and part shark, but all hork. Do horklike things
+	opts.Options["penalize_newline"] = false
 	opts.Options["stop"] = []string{"hi", "there"}
 	mf := buildModelfile(opts)
 	expectedModelfile := `FROM {{.Model}}
 SYSTEM """{{.System}}"""
 TEMPLATE """{{.Template}}"""
 PARAMETER penalize_newline false
 PARAMETER seed 42
-PARAMETER stop hi
+PARAMETER stop [hi there]
 PARAMETER stop there
 PARAMETER temperature 0.9
-MESSAGE user Hey there hork!
+
-MESSAGE assistant Yes it is true, I am half horse, half shark.
+MESSAGE user """Hey there hork!"""
 MESSAGE assistant """Yes it is true, I am half horse, half shark."""
 `
-		actual := buildModelfile(opts)
+	tmpl, err := template.New("").Parse(expectedModelfile)
-		if diff := cmp.Diff(expect, actual); diff != "" {
+	require.NoError(t, err)
 			t.Errorf("mismatch (-want +got):\n%s", diff)
 		}
 	})
-	t.Run("parent model", func(t *testing.T) {
+	var buf bytes.Buffer
-		opts.ParentModel = "horseshark"
+	err = tmpl.Execute(&buf, opts)
-		expect := `FROM horseshark
+	require.NoError(t, err)
-SYSTEM You are part horse and part shark, but all hork. Do horklike things
+	assert.Equal(t, buf.String(), mf)
 	opts.ParentModel = "horseshark"
 	mf = buildModelfile(opts)
 	expectedModelfile = `FROM {{.ParentModel}}
 SYSTEM """{{.System}}"""
 TEMPLATE """{{.Template}}"""
 PARAMETER penalize_newline false
 PARAMETER seed 42
-PARAMETER stop hi
+PARAMETER stop [hi there]
 PARAMETER stop there
 PARAMETER temperature 0.9
-MESSAGE user Hey there hork!
+
-MESSAGE assistant Yes it is true, I am half horse, half shark.
+MESSAGE user """Hey there hork!"""
 MESSAGE assistant """Yes it is true, I am half horse, half shark."""
 `
-		actual := buildModelfile(opts)
+
-		if diff := cmp.Diff(expect, actual); diff != "" {
+	tmpl, err = template.New("").Parse(expectedModelfile)
-			t.Errorf("mismatch (-want +got):\n%s", diff)
+	require.NoError(t, err)
-		}
+
-	})
+	var parentBuf bytes.Buffer
 	err = tmpl.Execute(&parentBuf, opts)
 	require.NoError(t, err)
 	assert.Equal(t, parentBuf.String(), mf)
 }
--- a/cmd/start_darwin.go
+++ b/cmd/start_darwin.go
@@ -2,7 +2,7 @@ package cmd
 import (
 	"context"
-	"errors"
+	"fmt"
 	"os"
 	"os/exec"
 	"strings"
@@ -20,7 +20,7 @@ func startApp(ctx context.Context, client *api.Client) error {
 		return err
 	}
 	if !strings.Contains(link, "Ollama.app") {
-		return errors.New("could not find ollama app")
+		return fmt.Errorf("could not find ollama app")
 	}
 	path := strings.Split(link, "Ollama.app")
 	if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
--- a/cmd/start_default.go
+++ b/cmd/start_default.go
@@ -4,11 +4,11 @@ package cmd
 import (
 	"context"
-	"errors"
+	"fmt"
 	"github.com/ollama/ollama/api"
 )
 func startApp(ctx context.Context, client *api.Client) error {
-	return errors.New("could not connect to ollama server, run 'ollama serve' to start it")
+	return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it")
 }
--- a/cmd/start_windows.go
+++ b/cmd/start_windows.go
@@ -31,7 +31,7 @@ func startApp(ctx context.Context, client *api.Client) error {
 			// Finally look in the path
 			appExe, err = exec.LookPath(AppName)
 			if err != nil {
-				return errors.New("could not locate ollama app")
+				return fmt.Errorf("could not locate ollama app")
 			}
 		}
 	}
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -1,228 +1,200 @@
 package convert
 import (
 	"cmp"
 	"encoding/binary"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"io/fs"
 	"log/slog"
 	"os"
 	"path/filepath"
 	"slices"
 	"strings"
 	"google.golang.org/protobuf/proto"
 	"github.com/ollama/ollama/convert/sentencepiece"
 	"github.com/ollama/ollama/llm"
 )
-type ModelParameters struct {
+const (
-	Architectures []string `json:"architectures"`
+	_ int32 = iota
-	VocabSize     uint32   `json:"vocab_size"`
+	tokenTypeNormal
 	tokenTypeUnknown
 	tokenTypeControl
 	tokenTypeUserDefined
 	tokenTypeUnused
 	tokenTypeByte
 )
 type Params struct {
 	Architectures     []string `json:"architectures"`
 	VocabSize         int      `json:"vocab_size"`
 	HiddenSize        int      `json:"hidden_size"`       // n_embd
 	HiddenLayers      int      `json:"num_hidden_layers"` // n_layer
 	ContextSize       int      `json:"max_position_embeddings"`
 	IntermediateSize  int      `json:"intermediate_size"`
 	AttentionHeads    int      `json:"num_attention_heads"` // n_head
 	KeyValHeads       int      `json:"num_key_value_heads"`
 	NormEPS           float64  `json:"rms_norm_eps"`
 	BoSTokenID        int      `json:"bos_token_id"`
 	EoSTokenID        int      `json:"eos_token_id"`
 	HeadDimension     int      `json:"head_dim"`
 	PaddingTokenID    int      `json:"pad_token_id"`
 	RopeFrequencyBase float64  `json:"rope_theta"`
 	Experts     int `json:"num_local_experts"`
 	ExpertsUsed int `json:"num_experts_per_tok"`
 	PreTokenizer string
 	ByteOrder
 }
-type AdapterParameters struct {
+type ByteOrder interface {
-	Alpha          uint32 `json:"lora_alpha"`
+	binary.ByteOrder
-	LoraLayers     uint32 `json:"lora_layers"`
+	binary.AppendByteOrder
 	LoraParameters struct {
 		Rank  uint32  `json:"rank"`
 		Alpha float32 `json:"alpha"`
 		Scale float32 `json:"scale"`
 	} `json:"lora_parameters"`
 }
-func (ModelParameters) KV(t *Tokenizer) llm.KV {
+type ModelArch interface {
-	kv := llm.KV{
+	GetTensors() error
-		"general.file_type":            uint32(1),
+	LoadVocab() error
-		"general.quantization_version": uint32(2),
+	WriteGGUF(io.WriteSeeker) error
 		"tokenizer.ggml.pre":           t.Pre,
 		"tokenizer.ggml.model":         t.Vocabulary.Model,
 		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
 		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
 		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
 	}
 	if len(t.Merges) > 0 {
 		kv["tokenizer.ggml.merges"] = t.Merges
 	}
 	if t.Template != "" {
 		kv["tokenizer.chat_template"] = t.Template
 	}
 	for _, sv := range t.SpecialVocabulary {
 		kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
 		kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
 	}
 	return kv
 }
-func (p AdapterParameters) KV() llm.KV {
+type ModelFormat interface {
-	var alpha float32
+	GetLayerName(string) (string, error)
-	if p.LoraParameters.Alpha == 0 {
+	GetTensors(string, *Params) ([]llm.Tensor, error)
-		alpha = float32(p.Alpha)
+	GetParams(string) (*Params, error)
-	} else {
+	GetModelArch(string, string, *Params) (ModelArch, error)
 		alpha = p.LoraParameters.Alpha
 	}
 	kv := llm.KV{
 		"adapter.lora.alpha": alpha,
 		"adapter.type":       "lora",
 		"general.file_type":  uint32(1),
 		"general.type":       "adapter",
 		"general.version":    "v0.2",
 	}
 	return kv
 }
-func (ModelParameters) specialTokenTypes() []string {
+type ModelData struct {
-	return []string{
+	Path    string
-		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
+	Name    string
-	}
+	Params  *Params
 	Vocab   *Vocab
 	Tensors []llm.Tensor
 	Format  ModelFormat
 }
-func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
+func GetModelFormat(dirname string) (ModelFormat, error) {
-	return llm.WriteGGUF(ws, kv, ts)
+	files, err := filepath.Glob(filepath.Join(dirname, "*"))
 }
 func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
 	return llm.WriteGGUF(ws, kv, ts)
 }
 type ModelConverter interface {
 	// KV maps parameters to LLM key-values
 	KV(*Tokenizer) llm.KV
 	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
 	Tensors([]Tensor) []llm.Tensor
 	// Replacements returns a list of string pairs to replace in tensor names.
 	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
 	Replacements() []string
 	// specialTokenTypes returns any special token types the model uses
 	specialTokenTypes() []string
 	// writeFile writes the model to the provided io.WriteSeeker
 	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
 }
 type moreParser interface {
 	parseMore(fs.FS) error
 }
 type AdapterConverter interface {
 	// KV maps parameters to LLM key-values
 	KV(llm.KV) llm.KV
 	// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
 	Tensors([]Tensor) []llm.Tensor
 	// Replacements returns a list of string pairs to replace in tensor names.
 	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
 	Replacements() []string
 	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
 }
 func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
 	bts, err := fs.ReadFile(fsys, "adapter_config.json")
 	if err != nil {
-		return err
+		return nil, err
 	}
-	var p AdapterParameters
+	for _, fn := range files {
-	if err := json.Unmarshal(bts, &p); err != nil {
+		if strings.HasSuffix(fn, ".safetensors") {
-		return err
+			return &SafetensorFormat{}, nil
-	}
+		} else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") {
-
+			slog.Debug("model is torch")
-	arch, ok := baseKV["general.architecture"]
+			return &TorchFormat{}, nil
 	if !ok {
 		return errors.New("architecture not set for the base model")
 	}
 	var conv AdapterConverter
 	switch arch {
 	case "llama":
 		conv = &llamaAdapter{}
 	case "gemma2":
 		conv = &gemma2Adapter{}
 	default:
 		return errors.New("unsupported architecture")
 	}
 	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
 	if err != nil {
 		return err
 	}
 	if err := json.Unmarshal(bts, conv); err != nil {
 		return err
 	}
 	return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
 }
 // Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
 // and files it finds in the input path.
 // Supported input model formats include safetensors.
 // Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
 func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
 	bts, err := fs.ReadFile(fsys, "config.json")
 	if err != nil {
 		return err
 	}
 	var p ModelParameters
 	if err := json.Unmarshal(bts, &p); err != nil {
 		return err
 	}
 	if len(p.Architectures) < 1 {
 		return errors.New("unknown architecture")
 	}
 	var conv ModelConverter
 	switch p.Architectures[0] {
 	case "LlamaForCausalLM", "MistralForCausalLM":
 		conv = &llamaModel{}
 	case "MixtralForCausalLM":
 		conv = &mixtralModel{}
 	case "GemmaForCausalLM":
 		conv = &gemmaModel{}
 	case "Gemma2ForCausalLM":
 		conv = &gemma2Model{}
 	case "Phi3ForCausalLM":
 		conv = &phi3Model{}
 	case "BertModel":
 		conv = &bertModel{}
 	default:
 		return errors.New("unsupported architecture")
 	}
 	if err := json.Unmarshal(bts, conv); err != nil {
 		return err
 	}
 	if t, ok := conv.(moreParser); ok {
 		if err := t.parseMore(fsys); err != nil {
 			return err
 		}
 	}
-	t, err := parseTokenizer(fsys, conv.specialTokenTypes())
+	return nil, fmt.Errorf("couldn't determine model format")
-	if err != nil {
+}
-		return err
+
-	}
+// Details on gguf's tokenizer can be found at:
-
+// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer
-	if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) {
+type Vocab struct {
-		slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens))
+	Tokens []string
-		for i := range vocabSize - len(t.Vocabulary.Tokens) {
+	Scores []float32
-			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
+	Types  []int32
-			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
+	Merges []string
-			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
+}
-		}
+
-	} else {
+func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
-		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
+	slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model")))
-	}
+	in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model"))
-
+	if err != nil {
-	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
+		return nil, err
-	if err != nil {
+	}
-		return err
+
-	}
+	// To regenerate sentencepiece from the protobufs use:
-
+	// protoc -I=./ --go_out=./ sentencepiece_model.proto
-	return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
+	modelProto := &sentencepiece.ModelProto{}
 	if err := proto.Unmarshal(in, modelProto); err != nil {
 		return nil, err
 	}
 	v := &Vocab{
 		Tokens: make([]string, 0),
 		Scores: make([]float32, 0),
 		Types:  make([]int32, 0),
 	}
 	pieces := modelProto.GetPieces()
 	for _, p := range pieces {
 		v.Tokens = append(v.Tokens, p.GetPiece())
 		v.Scores = append(v.Scores, p.GetScore())
 		t := p.GetType()
 		switch t {
 		case sentencepiece.ModelProto_SentencePiece_UNKNOWN:
 		case sentencepiece.ModelProto_SentencePiece_CONTROL:
 		case sentencepiece.ModelProto_SentencePiece_UNUSED:
 		case sentencepiece.ModelProto_SentencePiece_BYTE:
 		default:
 			t = sentencepiece.ModelProto_SentencePiece_NORMAL
 		}
 		v.Types = append(v.Types, int32(t))
 	}
 	slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens)))
 	// add any additional tokens
 	addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json"))
 	if os.IsNotExist(err) {
 		return v, nil
 	} else if err != nil {
 		return nil, err
 	}
 	slog.Info("reading user defined tokens")
 	var extraTokenData map[string]int
 	if err := json.Unmarshal(addIn, &extraTokenData); err != nil {
 		return nil, err
 	}
 	type token struct {
 		key string
 		pos int
 	}
 	extraTokens := make([]token, 0)
 	for k, id := range extraTokenData {
 		extraTokens = append(extraTokens, token{k, id})
 	}
 	slices.SortFunc(extraTokens, func(a, b token) int {
 		return cmp.Compare(a.pos, b.pos)
 	})
 	numToks := len(v.Tokens)
 	for cnt, t := range extraTokens {
 		// the token id should match the specific index for the total number of tokens
 		if t.pos != cnt+numToks {
 			return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key)
 		}
 		v.Tokens = append(v.Tokens, t.key)
 		v.Scores = append(v.Scores, -1000.0)
 		v.Types = append(v.Types, tokenTypeUserDefined)
 	}
 	slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))
 	if params.VocabSize > len(v.Tokens) {
 		missingTokens := params.VocabSize - len(v.Tokens)
 		slog.Warn(fmt.Sprintf("vocab is missing %d tokens", missingTokens))
 		for cnt := range missingTokens {
 			v.Tokens = append(v.Tokens, fmt.Sprintf("<dummy%05d>", cnt+1))
 			v.Scores = append(v.Scores, -1)
 			v.Types = append(v.Types, tokenTypeUserDefined)
 		}
 	}
 	return v, nil
 }
--- a/convert/convert_bert.go
+++ b/convert/convert_bert.go
@@ -1,174 +0,0 @@
 package convert
 import (
 	"cmp"
 	"encoding/json"
 	"io/fs"
 	"path/filepath"
 	"slices"
 	"strings"
 	"github.com/ollama/ollama/llm"
 )
 type bertModel struct {
 	ModelParameters
 	NLayers               uint32  `json:"n_layers"`
 	NumHiddenLayers       uint32  `json:"num_hidden_layers"`
 	NLayer                uint32  `json:"n_layer"`
 	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
 	NCtx                  uint32  `json:"n_ctx"`
 	HiddenSize            uint32  `json:"hidden_size"`
 	NEmbd                 uint32  `json:"n_embd"`
 	IntermediateSize      uint32  `json:"intermediate_size"`
 	NInner                uint32  `json:"n_inner"`
 	NumAttentionHeads     uint32  `json:"num_attention_heads"`
 	NHead                 uint32  `json:"n_head"`
 	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
 	LayerNormEPS          float32 `json:"layer_norm_eps"`
 	LayerNormEpsilon      float32 `json:"layer_norm_epsilon"`
 	NormEpsilon           float32 `json:"norm_epsilon"`
 	PoolingType uint32
 }
 var (
 	_ ModelConverter = (*bertModel)(nil)
 	_ moreParser     = (*bertModel)(nil)
 )
 func (p *bertModel) parseMore(fsys fs.FS) error {
 	bts, err := fs.ReadFile(fsys, "modules.json")
 	if err != nil {
 		return err
 	}
 	var modules []struct {
 		Type string `json:"type"`
 		Path string `json:"path"`
 	}
 	if err := json.Unmarshal(bts, &modules); err != nil {
 		return err
 	}
 	var pooling string
 	for _, m := range modules {
 		if m.Type == "sentence_transformers.models.Pooling" {
 			pooling = m.Path
 			break
 		}
 	}
 	if pooling != "" {
 		bts, err := fs.ReadFile(fsys, filepath.Join(pooling, "config.json"))
 		if err != nil {
 			return err
 		}
 		var pc struct {
 			PoolingModeCLSToken   bool `json:"pooling_mode_cls_token"`
 			PoolingModeMeanTokens bool `json:"pooling_mode_mean_tokens"`
 		}
 		if err := json.Unmarshal(bts, &pc); err != nil {
 			return err
 		}
 		if pc.PoolingModeMeanTokens {
 			p.PoolingType = 1
 		} else if pc.PoolingModeCLSToken {
 			p.PoolingType = 2
 		}
 	}
 	return nil
 }
 func (p *bertModel) KV(t *Tokenizer) llm.KV {
 	kv := p.ModelParameters.KV(t)
 	kv["general.architecture"] = "bert"
 	kv["bert.attention.causal"] = false
 	kv["bert.pooling_type"] = p.PoolingType
 	kv["bert.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
 	if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
 		kv["bert.context_length"] = contextLength
 	}
 	if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
 		kv["bert.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
 	}
 	if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
 		kv["bert.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
 	}
 	if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
 		kv["bert.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
 	}
 	if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
 		kv["bert.attention.layer_norm_epsilon"] = layerNormEpsilon
 	}
 	kv["tokenizer.ggml.model"] = "bert"
 	kv["tokenizer.ggml.token_type_count"] = uint32(2)
 	// convert to phantom space tokens
 	for i, e := range t.Tokens {
 		if strings.HasPrefix(e, "[") && strings.HasSuffix(e, "]") {
 			// noop
 		} else if strings.HasPrefix(e, "##") {
 			t.Tokens[i] = e[2:]
 		} else {
 			t.Tokens[i] = "\u2581" + e
 		}
 	}
 	kv["tokenizer.ggml.tokens"] = t.Tokens
 	return kv
 }
 func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
 	var out []llm.Tensor
 	for _, t := range ts {
 		if slices.Contains([]string{
 			"embeddings.position_ids",
 			"pooler.dense.weight",
 			"pooler.dense.bias",
 		}, t.Name()) {
 			continue
 		}
 		out = append(out, llm.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (bertModel) Replacements() []string {
 	return []string{
 		"encoder.layer", "blk",
 		"encoder.layers", "blk",
 		"embeddings.word_embeddings", "token_embd",
 		"embeddings.token_type_embeddings", "token_types",
 		"embeddings.LayerNorm", "token_embd_norm",
 		"embeddings.position_embeddings", "position_embd",
 		"attention.self.query", "attn_q",
 		"attention.self.key", "attn_k",
 		"attention.self.value", "attn_v",
 		"attention.output.dense", "attn_output",
 		"attention.output.LayerNorm", "attn_output_norm",
 		"intermediate.dense", "ffn_up",
 		"output.dense", "ffn_down",
 		"output.LayerNorm", "layer_output_norm",
 	}
 }
--- a/convert/convert_gemma.go
+++ b/convert/convert_gemma.go
@@ -1,100 +0,0 @@
 package convert
 import (
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type gemmaModel struct {
 	ModelParameters
 	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
 	HiddenSize            uint32  `json:"hidden_size"`
 	HiddenLayers          uint32  `json:"num_hidden_layers"`
 	IntermediateSize      uint32  `json:"intermediate_size"`
 	NumAttentionHeads     uint32  `json:"num_attention_heads"`
 	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
 	RMSNormEPS            float32 `json:"rms_norm_eps"`
 	HeadDim               uint32  `json:"head_dim"`
 }
 var _ ModelConverter = (*gemmaModel)(nil)
 func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
 	kv := p.ModelParameters.KV(t)
 	kv["general.architecture"] = "gemma"
 	kv["gemma.context_length"] = p.MaxPositionEmbeddings
 	kv["gemma.embedding_length"] = p.HiddenSize
 	kv["gemma.block_count"] = p.HiddenLayers
 	kv["gemma.feed_forward_length"] = p.IntermediateSize
 	kv["gemma.attention.head_count"] = p.NumAttentionHeads
 	kv["gemma.attention.head_count_kv"] = p.NumKeyValueHeads
 	kv["gemma.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
 	kv["gemma.attention.key_length"] = p.HeadDim
 	kv["gemma.attention.value_length"] = p.HeadDim
 	kv["tokenizer.ggml.eot_token_id"] = uint32(107)
 	kv["tokenizer.ggml.middle_token_id"] = uint32(68)
 	kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
 	kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
 	return kv
 }
 func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor {
 	var out []llm.Tensor
 	for _, t := range ts {
 		if strings.HasSuffix(t.Name(), "_norm.weight") {
 			t.SetRepacker(p.addOne)
 		}
 		out = append(out, llm.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (p *gemmaModel) Replacements() []string {
 	return []string{
 		"model.embed_tokens", "token_embd",
 		"model.norm", "output_norm",
 		"model.layers", "blk",
 		"input_layernorm", "attn_norm",
 		"self_attn.q_proj", "attn_q",
 		"self_attn.k_proj", "attn_k",
 		"self_attn.v_proj", "attn_v",
 		"self_attn.o_proj", "attn_output",
 		"mlp.gate_proj", "ffn_gate",
 		"mlp.down_proj", "ffn_down",
 		"mlp.up_proj", "ffn_up",
 		"post_attention_layernorm", "ffn_norm",
 	}
 }
 func (*gemmaModel) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
 	n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
 	ones := tensor.Ones(tensor.Float32, int(shape[0]))
 	n, err := n.Add(ones)
 	if err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 0)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
--- a/convert/convert_gemma2.go
+++ b/convert/convert_gemma2.go
@@ -1,43 +0,0 @@
 package convert
 import (
 	"github.com/ollama/ollama/llm"
 )
 type gemma2Model struct {
 	gemmaModel
 	SlidingWindow         uint32  `json:"sliding_window"`
 	AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
 	FinalLogitSoftcap     float32 `json:"final_logit_softcapping"`
 }
 func (p *gemma2Model) KV(t *Tokenizer) llm.KV {
 	kv := p.ModelParameters.KV(t)
 	kv["general.architecture"] = "gemma2"
 	kv["gemma2.context_length"] = p.MaxPositionEmbeddings
 	kv["gemma2.embedding_length"] = p.HiddenSize
 	kv["gemma2.block_count"] = p.HiddenLayers
 	kv["gemma2.feed_forward_length"] = p.IntermediateSize
 	kv["gemma2.attention.head_count"] = p.NumAttentionHeads
 	kv["gemma2.attention.head_count_kv"] = p.NumKeyValueHeads
 	kv["gemma2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
 	kv["gemma2.attention.key_length"] = p.HeadDim
 	kv["gemma2.attention.value_length"] = p.HeadDim
 	kv["gemma2.attention.sliding_window"] = p.SlidingWindow
 	kv["gemma2.attn_logit_softcapping"] = p.AttentionLogitSoftcap
 	kv["gemma2.final_logit_softcapping"] = p.FinalLogitSoftcap
 	kv["tokenizer.ggml.eot_token_id"] = uint32(107)
 	kv["tokenizer.ggml.middle_token_id"] = uint32(68)
 	kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
 	kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
 	return kv
 }
 func (p *gemma2Model) Replacements() []string {
 	return append(
 		p.gemmaModel.Replacements(),
 		"post_attention_layernorm", "post_attention_norm",
 		"pre_feedforward_layernorm", "ffn_norm",
 		"post_feedforward_layernorm", "post_ffw_norm",
 	)
 }
--- a/convert/convert_gemma2_adapter.go
+++ b/convert/convert_gemma2_adapter.go
@@ -1,91 +0,0 @@
 package convert
 import (
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type gemma2Adapter struct {
 	AdapterParameters
 }
 var _ AdapterConverter = (*gemma2Adapter)(nil)
 func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV {
 	kv := p.AdapterParameters.KV()
 	kv["general.architecture"] = "gemma2"
 	return kv
 }
 func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
 	var out []llm.Tensor
 	for _, t := range ts {
 		shape := t.Shape()
 		if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
 			(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
 			shape[0], shape[1] = shape[1], shape[0]
 			t.SetRepacker(p.repack)
 		}
 		out = append(out, llm.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (p *gemma2Adapter) Replacements() []string {
 	return []string{
 		"base_model.model.", "",
 		"model.layers", "blk",
 		"self_attn.q_proj", "attn_q",
 		"self_attn.k_proj", "attn_k",
 		"self_attn.v_proj", "attn_v",
 		"self_attn.o_proj", "attn_output",
 		"mlp.gate_proj", "ffn_gate",
 		"mlp.down_proj", "ffn_down",
 		"mlp.up_proj", "ffn_up",
 		"lora_A.weight", "weight.lora_a",
 		"lora_B.weight", "weight.lora_b",
 		"lora_a", "weight.lora_a",
 		"lora_b", "weight.lora_b",
 	}
 }
 func (p *gemma2Adapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	dims := []int{int(shape[1]), int(shape[0])}
 	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
 	if err := n.T(1, 0); err != nil {
 		return nil, err
 	}
 	if err := n.Reshape(dims...); err != nil {
 		return nil, err
 	}
 	if err := n.Transpose(); err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 1)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
--- a/convert/convert_llama.go
+++ b/convert/convert_llama.go
@@ -1,213 +0,0 @@
 package convert
 import (
 	"cmp"
 	"fmt"
 	"math"
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type llamaModel struct {
 	ModelParameters
 	NLayers               uint32  `json:"n_layers"`
 	NumHiddenLayers       uint32  `json:"num_hidden_layers"`
 	NLayer                uint32  `json:"n_layer"`
 	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
 	NCtx                  uint32  `json:"n_ctx"`
 	HiddenSize            uint32  `json:"hidden_size"`
 	NEmbd                 uint32  `json:"n_embd"`
 	IntermediateSize      uint32  `json:"intermediate_size"`
 	NInner                uint32  `json:"n_inner"`
 	NumAttentionHeads     uint32  `json:"num_attention_heads"`
 	NHead                 uint32  `json:"n_head"`
 	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
 	RopeTheta             float32 `json:"rope_theta"`
 	RopeScaling           struct {
 		Type                            string  `json:"type"`
 		RopeType                        string  `json:"rope_type"`
 		Factor                          float32 `json:"factor"`
 		LowFrequencyFactor              float32 `json:"low_freq_factor"`
 		HighFrequencyFactor             float32 `json:"high_freq_factor"`
 		OriginalMaxPositionalEmbeddings uint32  `json:"original_max_positional_embeddings"`
 		factors ropeFactor
 	} `json:"rope_scaling"`
 	RMSNormEPS       float32 `json:"rms_norm_eps"`
 	LayerNormEPS     float32 `json:"layer_norm_eps"`
 	LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
 	NormEpsilon      float32 `json:"norm_epsilon"`
 	HeadDim          uint32  `json:"head_dim"`
 }
 var _ ModelConverter = (*llamaModel)(nil)
 func (p *llamaModel) KV(t *Tokenizer) llm.KV {
 	kv := p.ModelParameters.KV(t)
 	kv["general.architecture"] = "llama"
 	kv["llama.vocab_size"] = p.VocabSize
 	kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
 	if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
 		kv["llama.context_length"] = contextLength
 	}
 	if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
 		kv["llama.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
 	}
 	if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
 		kv["llama.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
 	}
 	if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
 		kv["llama.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
 		kv["llama.rope.dimension_count"] = p.HiddenSize / headCount
 	}
 	if p.RopeTheta > 0 {
 		kv["llama.rope.freq_base"] = p.RopeTheta
 	}
 	if p.RopeScaling.Type == "linear" {
 		kv["llama.rope.scaling.type"] = p.RopeScaling.Type
 		kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
 	} else if p.RopeScaling.RopeType == "llama3" {
 		dim := p.HiddenSize / p.NumAttentionHeads
 		for i := uint32(0); i < dim; i += 2 {
 			factor := cmp.Or(p.RopeScaling.Factor, 8.0)
 			factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
 			factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
 			original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192)
 			lambdaLow := float32(original) / factorLow
 			lambdaHigh := float32(original) / factorHigh
 			lambda := 2 * math.Pi * math.Pow(float64(p.RopeTheta), float64(i)/float64(dim))
 			if lambda < float64(lambdaHigh) {
 				p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0)
 			} else if lambda > float64(lambdaLow) {
 				p.RopeScaling.factors = append(p.RopeScaling.factors, factor)
 			} else {
 				smooth := (float32(original)/float32(lambda) - factorLow) / (factorHigh - factorLow)
 				p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0/((1-smooth)/factor+smooth))
 			}
 		}
 	}
 	if p.NumKeyValueHeads > 0 {
 		kv["llama.attention.head_count_kv"] = p.NumKeyValueHeads
 	}
 	if p.RMSNormEPS > 0 {
 		kv["llama.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
 	}
 	if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
 		kv["llama.attention.layer_norm_epsilon"] = layerNormEpsilon
 	}
 	if p.HeadDim > 0 {
 		kv["llama.attention.key_length"] = p.HeadDim
 		kv["llama.attention.value_length"] = p.HeadDim
 	}
 	return kv
 }
 func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
 	var out []llm.Tensor
 	if p.RopeScaling.factors != nil {
 		out = append(out, llm.Tensor{
 			Name:     "rope_freqs.weight",
 			Kind:     0,
 			Shape:    []uint64{uint64(len(p.RopeScaling.factors))},
 			WriterTo: p.RopeScaling.factors,
 		})
 	}
 	for _, t := range ts {
 		if strings.HasSuffix(t.Name(), "attn_q.weight") ||
 			strings.HasSuffix(t.Name(), "attn_k.weight") {
 			t.SetRepacker(p.repack)
 		}
 		out = append(out, llm.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (p *llamaModel) Replacements() []string {
 	return []string{
 		"lm_head", "output",
 		"model.embed_tokens", "token_embd",
 		"model.norm", "output_norm",
 		"model.layers", "blk",
 		"input_layernorm", "attn_norm",
 		"self_attn.q_proj", "attn_q",
 		"self_attn.k_proj", "attn_k",
 		"self_attn.v_proj", "attn_v",
 		"self_attn.o_proj", "attn_output",
 		"mlp.gate_proj", "ffn_gate",
 		"mlp.down_proj", "ffn_down",
 		"mlp.up_proj", "ffn_up",
 		"post_attention_layernorm", "ffn_norm",
 	}
 }
 func (p *llamaModel) repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	var dims []int
 	for _, dim := range shape {
 		dims = append(dims, int(dim))
 	}
 	var heads uint32
 	if strings.HasSuffix(name, "attn_q.weight") {
 		heads = p.NumAttentionHeads
 	} else if strings.HasSuffix(name, "attn_k.weight") {
 		heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
 	} else {
 		return nil, fmt.Errorf("unknown tensor for repack: %s", name)
 	}
 	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
 	if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
 		return nil, err
 	}
 	if err := n.T(0, 2, 1, 3); err != nil {
 		return nil, err
 	}
 	if err := n.Reshape(dims...); err != nil {
 		return nil, err
 	}
 	if err := n.Transpose(); err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 1)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
--- a/convert/convert_llama_adapter.go
+++ b/convert/convert_llama_adapter.go
@@ -1,169 +0,0 @@
 package convert
 import (
 	"cmp"
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type llamaAdapter struct {
 	AdapterParameters
 	NumAttentionHeads uint32 `json:"num_attention_heads"`
 	NumKeyValueHeads  uint32 `json:"num_key_value_heads"`
 }
 var _ AdapterConverter = (*llamaAdapter)(nil)
 func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
 	kv := p.AdapterParameters.KV()
 	kv["general.architecture"] = "llama"
 	kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
 	kv["llama.attention.head_count_kv"] = baseKV["llama.attention.head_count_kv"]
 	p.NumAttentionHeads = baseKV["llama.attention.head_count"].(uint32)
 	return kv
 }
 func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
 	var out []llm.Tensor
 	for _, t := range ts {
 		shape := t.Shape()
 		if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
 			(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
 			shape[0], shape[1] = shape[1], shape[0]
 			t.SetRepacker(p.repackAndTranspose)
 		} else {
 			t.SetRepacker(p.repack)
 		}
 		out = append(out, llm.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    shape,
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (p *llamaAdapter) Replacements() []string {
 	return []string{
 		"base_model.model.", "",
 		"model.layers", "blk",
 		"self_attn.q_proj", "attn_q",
 		"self_attn.k_proj", "attn_k",
 		"self_attn.v_proj", "attn_v",
 		"self_attn.o_proj", "attn_output",
 		"mlp.gate_proj", "ffn_gate",
 		"mlp.down_proj", "ffn_down",
 		"mlp.up_proj", "ffn_up",
 		"lora_A.weight", "weight.lora_a",
 		"lora_B.weight", "weight.lora_b",
 		"lora_a", "weight.lora_a",
 		"lora_b", "weight.lora_b",
 	}
 }
 func (p *llamaAdapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	dims := []int{int(shape[1]), int(shape[0])}
 	var heads uint32
 	if strings.HasSuffix(name, "attn_q.weight.lora_a") {
 		heads = p.NumAttentionHeads
 	} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
 		heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
 	} else {
 		return data, nil
 	}
 	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
 	if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
 		return nil, err
 	}
 	if err := n.T(0, 2, 1, 3); err != nil {
 		return nil, err
 	}
 	if err := n.Reshape(dims...); err != nil {
 		return nil, err
 	}
 	if err := n.Transpose(); err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 1)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
 func (p *llamaAdapter) repackAndTranspose(name string, data []float32, shape []uint64) ([]float32, error) {
 	dims := []int{int(shape[1]), int(shape[0])}
 	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
 	var heads uint32
 	if strings.HasSuffix(name, "attn_q.weight.lora_a") {
 		heads = p.NumAttentionHeads
 	} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
 		heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
 	}
 	if heads > 0 {
 		if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
 			return nil, err
 		}
 		if err := n.T(0, 2, 1, 3); err != nil {
 			return nil, err
 		}
 		if err := n.Reshape(dims...); err != nil {
 			return nil, err
 		}
 		if err := n.Transpose(); err != nil {
 			return nil, err
 		}
 	}
 	if err := n.T(1, 0); err != nil {
 		return nil, err
 	}
 	if err := n.Reshape(dims...); err != nil {
 		return nil, err
 	}
 	if err := n.Transpose(); err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 1)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
--- a/convert/convert_mixtral.go
+++ b/convert/convert_mixtral.go
@@ -1,94 +0,0 @@
 package convert
 import (
 	"fmt"
 	"io"
 	"slices"
 	"strings"
 	"github.com/ollama/ollama/llm"
 )
 type mixtralModel struct {
 	llamaModel
 	NumLocalExperts    uint32 `json:"num_local_experts"`
 	NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
 }
 func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
 	kv := p.llamaModel.KV(t)
 	if p.NumLocalExperts > 0 {
 		kv["llama.expert_count"] = p.NumLocalExperts
 	}
 	if p.NumExpertsPerToken > 0 {
 		kv["llama.expert_used_count"] = p.NumExpertsPerToken
 	}
 	return kv
 }
 func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
 	oldnew := []string{
 		"model.layers", "blk",
 		"w1", "ffn_gate_exps",
 		"w2", "ffn_down_exps",
 		"w3", "ffn_up_exps",
 	}
 	for i := range p.NumLocalExperts {
 		oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".")
 	}
 	// group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor
 	namer := strings.NewReplacer(oldnew...)
 	experts := make(map[string]experts)
 	// merge experts into a single tensor while removing them from ts
 	ts = slices.DeleteFunc(ts, func(t Tensor) bool {
 		if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") {
 			return false
 		}
 		name := namer.Replace(t.Name())
 		experts[name] = append(experts[name], t)
 		return true
 	})
 	var out []llm.Tensor
 	for n, e := range experts {
 		// TODO(mxyng): sanity check experts
 		out = append(out, llm.Tensor{
 			Name:     n,
 			Kind:     e[0].Kind(),
 			Shape:    append([]uint64{uint64(len(e))}, e[0].Shape()...),
 			WriterTo: e,
 		})
 	}
 	return append(out, p.llamaModel.Tensors(ts)...)
 }
 func (p *mixtralModel) Replacements() []string {
 	return append(
 		p.llamaModel.Replacements(),
 		"block_sparse_moe.gate", "ffn_gate_inp",
 	)
 }
 type experts []Tensor
 func (e experts) WriteTo(w io.Writer) (int64, error) {
 	// TODO(mxyng): experts _should_ be numerically sorted by expert but this should check
 	for _, t := range e {
 		// the canonical merged experts tensor stacks all experts along a new, 0 axis,
 		// e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers
 		// this accomplishes the same thing by writing each expert tensor in sequence
 		if _, err := t.WriteTo(w); err != nil {
 			return 0, err
 		}
 	}
 	return 0, nil
 }
--- a/convert/convert_phi3.go
+++ b/convert/convert_phi3.go
@@ -1,123 +0,0 @@
 package convert
 import (
 	"cmp"
 	"encoding/binary"
 	"io"
 	"math"
 	"strings"
 	"sync"
 	"github.com/ollama/ollama/llm"
 )
 type phi3Model struct {
 	ModelParameters
 	NumHiddenLayers   uint32  `json:"num_hidden_layers"`
 	NLayers           uint32  `json:"n_layers"`
 	HiddenSize        uint32  `json:"hidden_size"`
 	NEmbd             uint32  `json:"n_embd"`
 	IntermediateSize  uint32  `json:"intermediate_size"`
 	NumAttentionHeads uint32  `json:"num_attention_heads"`
 	NHead             uint32  `json:"n_head"`
 	NumKeyValueHeads  uint32  `json:"num_key_value_heads"`
 	NHeadKV           uint32  `json:"n_head_kv"`
 	RopeTheta         float32 `json:"rope_theta"`
 	RopeScaling       struct {
 		Type        string     `json:"type"`
 		LongFactor  ropeFactor `json:"long_factor"`
 		ShortFactor ropeFactor `json:"short_factor"`
 	} `json:"rope_scaling"`
 	RMSNormEPS                    float32 `json:"rms_norm_eps"`
 	NPositions                    uint32  `json:"n_positions"`
 	MaxPositionEmbeddings         uint32  `json:"max_position_embeddings"`
 	OriginalMaxPositionEmbeddings uint32  `json:"original_max_position_embeddings"`
 	SlidingWindow                 uint32  `json:"sliding_window"`
 }
 var _ ModelConverter = (*phi3Model)(nil)
 func (p *phi3Model) KV(t *Tokenizer) llm.KV {
 	kv := p.ModelParameters.KV(t)
 	kv["general.architecture"] = "phi3"
 	kv["phi3.context_length"] = p.MaxPositionEmbeddings
 	kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
 	kv["phi3.feed_forward_length"] = p.IntermediateSize
 	kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
 	kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
 	kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
 	kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
 	kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
 	kv["phi3.rope.freq_base"] = p.RopeTheta
 	kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
 	kv["phi3.attention.sliding_window"] = p.SlidingWindow
 	scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)
 	switch p.RopeScaling.Type {
 	case "":
 		// no scaling
 	case "su", "longrope":
 		kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
 	case "yarn":
 		kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
 	default:
 		panic("unknown rope scaling type")
 	}
 	return kv
 }
 func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
 	var addRopeFactors sync.Once
 	out := make([]llm.Tensor, 0, len(ts)+2)
 	for _, t := range ts {
 		if strings.HasPrefix(t.Name(), "blk.0.") {
 			addRopeFactors.Do(func() {
 				out = append(out, llm.Tensor{
 					Name:     "rope_factors_long.weight",
 					Kind:     0,
 					Shape:    []uint64{uint64(len(p.RopeScaling.LongFactor))},
 					WriterTo: p.RopeScaling.LongFactor,
 				}, llm.Tensor{
 					Name:     "rope_factors_short.weight",
 					Kind:     0,
 					Shape:    []uint64{uint64(len(p.RopeScaling.ShortFactor))},
 					WriterTo: p.RopeScaling.ShortFactor,
 				})
 			})
 		}
 		out = append(out, llm.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (p *phi3Model) Replacements() []string {
 	return []string{
 		"lm_head", "output",
 		"model.embed_tokens", "token_embd",
 		"model.norm", "output_norm",
 		"model.layers", "blk",
 		"input_layernorm", "attn_norm",
 		"self_attn.qkv_proj", "attn_qkv",
 		"self_attn.o_proj", "attn_output",
 		"mlp.down_proj", "ffn_down",
 		"mlp.gate_up_proj", "ffn_up",
 		"post_attention_layernorm", "ffn_norm",
 	}
 }
 type ropeFactor []float32
 func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
 	err := binary.Write(w, binary.LittleEndian, r)
 	return 0, err
 }
--- a/convert/convert_test.go
+++ b/convert/convert_test.go
@@ -1,37 +1,48 @@
 //go:build slow
 package convert
 import (
 	"bytes"
 	"crypto/sha256"
 	"encoding/binary"
 	"encoding/hex"
 	"encoding/json"
 	"flag"
 	"fmt"
 	"io"
 	"io/fs"
 	"log/slog"
 	"math"
 	"os"
 	"path/filepath"
 	"slices"
 	"testing"
 	"golang.org/x/exp/maps"
 	"github.com/ollama/ollama/llm"
 )
-func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
+func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) {
 	t.Helper()
 	mf, err := GetModelFormat(p)
 	if err != nil {
 		t.Fatal(err)
 	}
 	params, err := mf.GetParams(p)
 	if err != nil {
 		t.Fatal(err)
 	}
 	arch, err := mf.GetModelArch("", p, params)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if err := arch.LoadVocab(); err != nil {
 		t.Fatal(err)
 	}
 	if err := arch.GetTensors(); err != nil {
 		t.Fatal(err)
 	}
 	f, err := os.CreateTemp(t.TempDir(), "f16")
 	if err != nil {
 		t.Fatal(err)
 	}
 	defer f.Close()
-	if err := ConvertModel(fsys, f); err != nil {
+	if err := arch.WriteGGUF(f); err != nil {
 		t.Fatal(err)
 	}
@@ -39,309 +50,54 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	t.Cleanup(func() { r.Close() })
+	defer r.Close()
-	m, _, err := llm.DecodeGGML(r, math.MaxInt)
+	m, _, err := llm.DecodeGGML(r)
 	if err != nil {
 		t.Fatal(err)
 	}
-	if _, err := r.Seek(0, io.SeekStart); err != nil {
+	return m.KV(), m.Tensors()
 		t.Fatal(err)
 	}
 	return r, m.KV(), m.Tensors()
 }
 func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors llm.Tensors) map[string]string {
 	actual := make(map[string]string)
 	for k, v := range kv {
 		if s, ok := v.(json.Marshaler); !ok {
 			actual[k] = fmt.Sprintf("%v", v)
 		} else {
 			bts, err := json.Marshal(s)
 			if err != nil {
 				t.Fatal(err)
 			}
 			actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
 		}
 	}
 	for _, tensor := range tensors.Items {
 		sha256sum := sha256.New()
 		sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
 		if _, err := io.Copy(sha256sum, sr); err != nil {
 			t.Fatal(err)
 		}
 		actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
 	}
 	return actual
 }
 func TestMain(m *testing.M) {
 	var level slog.Level
 	flag.TextVar(&level, "level", slog.LevelInfo, "log level")
 	flag.Parse()
 	slog.SetLogLoggerLevel(level)
 	os.Exit(m.Run())
 }
 func TestConvertFull(t *testing.T) {
-	cases := []string{
+	cases := []struct {
-		"Meta-Llama-3-8B-Instruct",
+		path    string
-		"Meta-Llama-3.1-8B-Instruct",
+		arch    string
-		"Mistral-7B-Instruct-v0.2",
+		tensors int
-		"Mixtral-8x7B-Instruct-v0.1",
+		layers  int
-		"gemma-2b-it",
+	}{
-		// microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
+		{"Meta-Llama-3-8B-Instruct", "llama", 291, 35},
-		"Phi-3-mini-128k-instruct",
+		{"Mistral-7B-Instruct-v0.2", "llama", 291, 35},
-		"all-MiniLM-L6-v2",
+		{"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35},
-		"gemma-2-9b-it",
+		{"gemma-2b-it", "gemma", 164, 20},
 	}
-	for i := range cases {
+	for _, tt := range cases {
-		tt := cases[i]
+		t.Run(tt.path, func(t *testing.T) {
-		t.Run(tt, func(t *testing.T) {
+			p := filepath.Join("testdata", tt.path)
-			t.Parallel()
+			if _, err := os.Stat(p); err != nil {
 			p := filepath.Join("testdata", tt)
 			if testing.Short() {
 				t.Skip("skipping in short mode")
 			} else if _, err := os.Stat(p); err != nil {
 				t.Skipf("%s not found", p)
 			}
-			f, kv, tensors := convertFull(t, os.DirFS(p))
+			kv, tensors := convertFull(t, p)
 			actual := generateResultsJSON(t, f, kv, tensors)
-			expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
+			if kv.Architecture() != tt.arch {
-			if err != nil {
+				t.Fatalf("expected llama, got %s", kv.Architecture())
 				t.Fatal(err)
 			}
-			var expect map[string]string
+			if kv.FileType().String() != "F16" {
-			if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {
+				t.Fatalf("expected F16, got %s", kv.FileType())
 				t.Fatal(err)
 			}
-			keys := maps.Keys(expect)
+			if len(tensors) != tt.tensors {
-			slices.Sort(keys)
+				t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors))
-			for _, k := range keys {
+			}
-				if v, ok := actual[k]; !ok {
+
-					t.Errorf("missing %s", k)
+			layers := tensors.Layers()
-				} else if v != expect[k] {
+			if len(layers) != tt.layers {
-					t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v)
+				t.Fatalf("expected %d layers, got %d", tt.layers, len(layers))
 				}
 			}
 		})
 	}
 }
 func TestConvertAdapter(t *testing.T) {
 	type AdapterCase struct {
 		Name     string
 		BaseKV   map[string]any
 		Expected map[string]string
 	}
 	cases := []AdapterCase{
 		{
 			Name: "discollama",
 			BaseKV: map[string]any{
 				"general.architecture":          "llama",
 				"llama.attention.head_count":    uint32(32),
 				"llama.attention.head_count_kv": uint32(8),
 			},
 			Expected: map[string]string{
 				"general.architecture":          "llama",
 				"general.file_type":             "1",
 				"general.parameter_count":       "106496",
 				"general.type":                  "adapter",
 				"general.version":               "v0.2",
 				"adapter.lora.alpha":            "16",
 				"adapter.type":                  "lora",
 				"llama.attention.head_count":    "32",
 				"llama.attention.head_count_kv": "8",
 				"blk.31.attn_q.weight.lora_a":   "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
 				"blk.31.attn_q.weight.lora_b":   "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
 				"blk.31.attn_v.weight.lora_a":   "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
 				"blk.31.attn_v.weight.lora_b":   "071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857",
 			},
 		},
 	}
 	for _, c := range cases {
 		t.Run(c.Name, func(t *testing.T) {
 			t.Parallel()
 			f, err := os.CreateTemp(t.TempDir(), "f16")
 			if err != nil {
 				t.Fatal(err)
 			}
 			defer f.Close()
 			tempDir := t.TempDir()
 			generateLoraTestData(t, tempDir)
 			if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil {
 				t.Fatal(err)
 			}
 			r, err := os.Open(f.Name())
 			if err != nil {
 				t.Fatal(err)
 			}
 			defer r.Close()
 			m, _, err := llm.DecodeGGML(r, math.MaxInt)
 			if err != nil {
 				t.Fatal(err)
 			}
 			if _, err := r.Seek(0, io.SeekStart); err != nil {
 				t.Fatal(err)
 			}
 			actual := generateResultsJSON(t, r, m.KV(), m.Tensors())
 			keys := maps.Keys(c.Expected)
 			slices.Sort(keys)
 			for _, k := range keys {
 				if v, ok := actual[k]; !ok {
 					t.Errorf("missing %s", k)
 				} else if v != c.Expected[k] {
 					t.Errorf("unexpected %s: want %s, got %s", k, c.Expected[k], v)
 				}
 			}
 		})
 	}
 }
 func generateLoraTestData(t *testing.T, tempDir string) {
 	type tensorData struct {
 		Offsets []int  `json:"data_offsets"`
 		Type    string `json:"dtype"`
 		Shape   []int  `json:"shape"`
 	}
 	offset := 4096 * 8 * 4
 	td := map[string]*tensorData{"__metadata__": nil}
 	td["model.layers.31.self_attn.q_proj.lora_a"] = &tensorData{
 		Offsets: []int{0, offset},
 		Type:    "F32",
 		Shape:   []int{4096, 8},
 	}
 	td["model.layers.31.self_attn.q_proj.lora_b"] = &tensorData{
 		Offsets: []int{offset, offset * 2},
 		Type:    "F32",
 		Shape:   []int{8, 4096},
 	}
 	td["model.layers.31.self_attn.v_proj.lora_a"] = &tensorData{
 		Offsets: []int{offset * 2, offset * 3},
 		Type:    "F32",
 		Shape:   []int{4096, 8},
 	}
 	td["model.layers.31.self_attn.v_proj.lora_b"] = &tensorData{
 		Offsets: []int{offset * 3, offset*3 + 8*1024*4},
 		Type:    "F32",
 		Shape:   []int{8, 1024},
 	}
 	data, err := json.Marshal(td)
 	if err != nil {
 		t.Fatal(err)
 	}
 	var buf bytes.Buffer
 	l := int64(len(data))
 	err = binary.Write(&buf, binary.LittleEndian, l)
 	if err != nil {
 		t.Fatal(err)
 	}
 	_, err = buf.Write(data)
 	if err != nil {
 		t.Fatal(err)
 	}
 	// write some data for the tensors
 	ones := make([]float32, 4096*8)
 	for i := range ones {
 		ones[i] = float32(1)
 	}
 	for range 3 {
 		err = binary.Write(&buf, binary.LittleEndian, ones)
 		if err != nil {
 			t.Fatal(err)
 		}
 	}
 	ones = make([]float32, 1024*8)
 	for i := range ones {
 		ones[i] = float32(1)
 	}
 	err = binary.Write(&buf, binary.LittleEndian, ones)
 	if err != nil {
 		t.Fatal(err)
 	}
 	fdata, err := os.Create(filepath.Join(tempDir, "adapters.safetensors"))
 	if err != nil {
 		t.Fatal(err)
 	}
 	defer fdata.Close()
 	_, err = fdata.Write(buf.Bytes())
 	if err != nil {
 		t.Fatal(err)
 	}
 	configData := `
 {
    "adapter_path": "adapters-test",
    "batch_size": 8,
    "config": "config-tiny.json",
    "data": "../discollama-completion",
    "grad_checkpoint": null,
    "iters": 1000,
    "learning_rate": 1e-05,
    "lora_layers": 1,
    "lora_parameters": {
        "rank": 8,
        "alpha": 16,
        "dropout": 0.0,
        "scale": 2.0
    },
    "lr_schedule": null,
    "max_seq_length": 2048,
    "model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct",
    "resume_adapter_file": null,
    "save_every": 100,
    "seed": 0,
    "steps_per_eval": 200,
    "steps_per_report": 10,
    "test": false,
    "test_batches": 500,
    "train": true,
    "use_dora": false,
    "val_batches": 25
 }
 `
 	f, err := os.Create(filepath.Join(tempDir, "adapter_config.json"))
 	if err != nil {
 		t.Fatal(err)
 	}
 	defer f.Close()
 	_, err = f.WriteString(configData)
 	if err != nil {
 		t.Fatal(err)
 	}
 }
--- a/convert/fs.go
+++ b/convert/fs.go
@@ -1,58 +0,0 @@
 package convert
 import (
 	"archive/zip"
 	"errors"
 	"io"
 	"io/fs"
 	"os"
 	"path/filepath"
 )
 type ZipReader struct {
 	r *zip.Reader
 	p string
 	// limit is the maximum size of a file that can be read directly
 	// from the zip archive. Files larger than this size will be extracted
 	limit int64
 }
 func NewZipReader(r *zip.Reader, p string, limit int64) fs.FS {
 	return &ZipReader{r, p, limit}
 }
 func (z *ZipReader) Open(name string) (fs.File, error) {
 	r, err := z.r.Open(name)
 	if err != nil {
 		return nil, err
 	}
 	defer r.Close()
 	if fi, err := r.Stat(); err != nil {
 		return nil, err
 	} else if fi.Size() < z.limit {
 		return r, nil
 	}
 	if !filepath.IsLocal(name) {
 		return nil, zip.ErrInsecurePath
 	}
 	n := filepath.Join(z.p, name)
 	if _, err := os.Stat(n); errors.Is(err, os.ErrNotExist) {
 		w, err := os.Create(n)
 		if err != nil {
 			return nil, err
 		}
 		defer w.Close()
 		if _, err := io.Copy(w, r); err != nil {
 			return nil, err
 		}
 	} else if err != nil {
 		return nil, err
 	}
 	return os.Open(n)
 }
--- a/convert/gemma.go
+++ b/convert/gemma.go
@@ -0,0 +1,102 @@
 package convert
 import (
 	"fmt"
 	"io"
 	"log/slog"
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type GemmaModel struct {
 	ModelData
 }
 func addOnes(data []float32, vectorSize int) ([]float32, error) {
 	n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data))
 	ones := tensor.Ones(tensor.Float32, vectorSize)
 	n, err := n.Add(ones)
 	if err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 0)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
 func (m *GemmaModel) GetTensors() error {
 	t, err := m.Format.GetTensors(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	slog.Debug(fmt.Sprintf("Total tensors: %d", len(t)))
 	for _, l := range t {
 		if strings.HasSuffix(l.Name, "norm.weight") {
 			wt := l.WriterTo.(safetensorWriterTo)
 			wt.repacker = m.Repack
 			l.WriterTo = wt
 		}
 		m.Tensors = append(m.Tensors, l)
 	}
 	return nil
 }
 func (m *GemmaModel) LoadVocab() error {
 	v, err := LoadSentencePieceTokens(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	m.Vocab = v
 	return nil
 }
 func (m *GemmaModel) Repack(_ string, data []float32, shape []uint64) ([]float32, error) {
 	return addOnes(data, int(shape[0]))
 }
 func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":                   "gemma",
 		"general.name":                           m.Name,
 		"gemma.context_length":                   uint32(m.Params.ContextSize),
 		"gemma.embedding_length":                 uint32(m.Params.HiddenSize),
 		"gemma.block_count":                      uint32(m.Params.HiddenLayers),
 		"gemma.feed_forward_length":              uint32(m.Params.IntermediateSize),
 		"gemma.attention.head_count":             uint32(m.Params.AttentionHeads),
 		"gemma.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
 		"gemma.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
 		"gemma.attention.key_length":             uint32(m.Params.HeadDimension),
 		"gemma.attention.value_length":           uint32(m.Params.HeadDimension),
 		"general.file_type":                      uint32(1),
 		"tokenizer.ggml.model":                   "llama",
 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
 		"tokenizer.ggml.scores":     m.Vocab.Scores,
 		"tokenizer.ggml.token_type": m.Vocab.Types,
 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
 		"tokenizer.ggml.padding_token_id": uint32(m.Params.PaddingTokenID),
 		"tokenizer.ggml.unknown_token_id": uint32(3),
 		"tokenizer.ggml.add_bos_token":    true,
 		"tokenizer.ggml.add_eos_token":    false,
 	}
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
--- a/convert/llama.go
+++ b/convert/llama.go
@@ -0,0 +1,159 @@
 package convert
 import (
 	"cmp"
 	"errors"
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type LlamaModel struct {
 	ModelData
 }
 func (m *LlamaModel) GetTensors() error {
 	t, err := m.Format.GetTensors(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
 	re, err := regexp.Compile(pattern)
 	if err != nil {
 		return err
 	}
 	for _, l := range t {
 		matches := re.FindAllStringSubmatch(l.Name, -1)
 		if len(matches) > 0 {
 			switch m.Format.(type) {
 			case *TorchFormat:
 				wt := l.WriterTo.(torchWriterTo)
 				wt.repacker = m.Repack
 				l.WriterTo = wt
 			case *SafetensorFormat:
 				wt := l.WriterTo.(safetensorWriterTo)
 				wt.repacker = m.Repack
 				l.WriterTo = wt
 			}
 		}
 		m.Tensors = append(m.Tensors, l)
 	}
 	return nil
 }
 func (m *LlamaModel) LoadVocab() (err error) {
 	pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json"))
 	if errors.Is(err, os.ErrNotExist) {
 		return nil
 	} else if err != nil {
 		return err
 	}
 	m.Vocab = &Vocab{}
 	for _, t := range ts {
 		m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content)
 		m.Vocab.Types = append(m.Vocab.Types, t.Type())
 	}
 	m.Vocab.Merges = merges
 	m.Params.PreTokenizer = pre
 	return nil
 }
 func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":                   "llama",
 		"general.name":                           m.Name,
 		"llama.vocab_size":                       uint32(len(m.Vocab.Tokens)),
 		"llama.context_length":                   uint32(m.Params.ContextSize),
 		"llama.embedding_length":                 uint32(m.Params.HiddenSize),
 		"llama.block_count":                      uint32(m.Params.HiddenLayers),
 		"llama.feed_forward_length":              uint32(m.Params.IntermediateSize),
 		"llama.rope.freq_base":                   float32(m.Params.RopeFrequencyBase),
 		"llama.rope.dimension_count":             uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
 		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
 		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
 		"general.file_type":                      uint32(1),
 		"tokenizer.ggml.model":                   "gpt2",
 		"tokenizer.ggml.pre":        m.Params.PreTokenizer,
 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
 		"tokenizer.ggml.token_type": m.Vocab.Types,
 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
 		"tokenizer.ggml.unknown_token_id": uint32(0),
 	}
 	if len(m.Vocab.Merges) > 0 {
 		kv["tokenizer.ggml.merges"] = m.Vocab.Merges
 	} else {
 		kv["tokenizer.ggml.scores"] = m.Vocab.Scores
 	}
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
 func (m *LlamaModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	return llamaRepack(name, m.Params, data, shape)
 }
 func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([]float32, error) {
 	var dims []int
 	for _, dim := range shape {
 		if dim != 0 {
 			dims = append(dims, int(dim))
 		}
 	}
 	var heads int
 	switch {
 	case strings.HasSuffix(name, "attn_q.weight"):
 		heads = params.AttentionHeads
 	case strings.HasSuffix(name, "attn_k.weight"):
 		heads = cmp.Or(params.KeyValHeads, params.AttentionHeads)
 	default:
 		return nil, fmt.Errorf("unknown tensor name: %s", name)
 	}
 	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
 	if err := n.Reshape(append([]int{heads, 2, dims[0] / heads / 2}, dims[1:]...)...); err != nil {
 		return nil, err
 	}
 	if err := n.T(0, 2, 1, 3); err != nil {
 		return nil, err
 	}
 	if err := n.Reshape(dims...); err != nil {
 		return nil, err
 	}
 	if err := n.Transpose(); err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 1)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
--- a/convert/mistral.go
+++ b/convert/mistral.go
@@ -0,0 +1,79 @@
 package convert
 import (
 	"io"
 	"regexp"
 	"github.com/ollama/ollama/llm"
 )
 type MistralModel struct {
 	ModelData
 }
 func (m *MistralModel) GetTensors() error {
 	t, err := m.Format.GetTensors(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
 	re, err := regexp.Compile(pattern)
 	if err != nil {
 		return err
 	}
 	for _, l := range t {
 		matches := re.FindAllStringSubmatch(l.Name, -1)
 		if len(matches) > 0 {
 			wt := l.WriterTo.(safetensorWriterTo)
 			wt.repacker = m.Repack
 			l.WriterTo = wt
 		}
 		m.Tensors = append(m.Tensors, l)
 	}
 	return nil
 }
 func (m *MistralModel) LoadVocab() error {
 	v, err := LoadSentencePieceTokens(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	m.Vocab = v
 	return nil
 }
 func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":                   "llama",
 		"general.name":                           m.Name,
 		"llama.context_length":                   uint32(m.Params.ContextSize),
 		"llama.embedding_length":                 uint32(m.Params.HiddenSize),
 		"llama.block_count":                      uint32(m.Params.HiddenLayers),
 		"llama.feed_forward_length":              uint32(m.Params.IntermediateSize),
 		"llama.rope.dimension_count":             uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
 		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
 		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
 		"general.file_type":                      uint32(1),
 		"tokenizer.ggml.model":                   "llama",
 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
 		"tokenizer.ggml.scores":     m.Vocab.Scores,
 		"tokenizer.ggml.token_type": m.Vocab.Types,
 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
 		"tokenizer.ggml.add_bos_token":    true,
 		"tokenizer.ggml.add_eos_token":    false,
 		"tokenizer.ggml.unknown_token_id": uint32(0),
 	}
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
 func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	return llamaRepack(name, m.Params, data, shape)
 }
--- a/convert/mixtral.go
+++ b/convert/mixtral.go
@@ -0,0 +1,87 @@
 package convert
 import (
 	"io"
 	"regexp"
 	"github.com/ollama/ollama/llm"
 )
 type MixtralModel struct {
 	ModelData
 }
 func (m *MixtralModel) GetTensors() error {
 	t, err := m.Format.GetTensors(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
 	re, err := regexp.Compile(pattern)
 	if err != nil {
 		return err
 	}
 	for _, l := range t {
 		matches := re.FindAllStringSubmatch(l.Name, -1)
 		if len(matches) > 0 {
 			wt := l.WriterTo.(safetensorWriterTo)
 			wt.repacker = m.Repack
 			l.WriterTo = wt
 		}
 		m.Tensors = append(m.Tensors, l)
 	}
 	return nil
 }
 func (m *MixtralModel) LoadVocab() error {
 	v, err := LoadSentencePieceTokens(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	m.Vocab = v
 	return nil
 }
 func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":          "llama",
 		"general.name":                  m.Name,
 		"llama.block_count":             uint32(m.Params.HiddenLayers),
 		"llama.context_length":          uint32(m.Params.ContextSize),
 		"llama.embedding_length":        uint32(m.Params.HiddenSize),
 		"llama.feed_forward_length":     uint32(m.Params.IntermediateSize),
 		"llama.attention.head_count":    uint32(m.Params.AttentionHeads),
 		"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
 		"llama.rope.freq_base":                   float32(m.Params.RopeFrequencyBase),
 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
 		"llama.expert_count":      uint32(m.Params.Experts),
 		"llama.expert_used_count": uint32(m.Params.ExpertsUsed),
 		"llama.vocab_size":           uint32(len(m.Vocab.Tokens)),
 		"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
 		"general.file_type":    uint32(1),
 		"tokenizer.ggml.model": "llama",
 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
 		"tokenizer.ggml.scores":     m.Vocab.Scores,
 		"tokenizer.ggml.token_type": m.Vocab.Types,
 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
 		"tokenizer.ggml.unknown_token_id": uint32(0),
 		"tokenizer.ggml.add_bos_token":    true,
 		"tokenizer.ggml.add_eos_token":    false,
 	}
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
 func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	return llamaRepack(name, m.Params, data, shape)
 }
--- a/convert/reader.go
+++ b/convert/reader.go
@@ -1,86 +0,0 @@
 package convert
 import (
 	"errors"
 	"io"
 	"io/fs"
 	"strings"
 )
 type Tensor interface {
 	Name() string
 	Shape() []uint64
 	Kind() uint32
 	SetRepacker(repacker)
 	WriteTo(io.Writer) (int64, error)
 }
 type tensorBase struct {
 	name  string
 	shape []uint64
 	repacker
 }
 func (t tensorBase) Name() string {
 	return t.name
 }
 func (t tensorBase) Shape() []uint64 {
 	return t.shape
 }
 const (
 	tensorKindF32 uint32 = iota
 	tensorKindF16
 )
 func (t tensorBase) Kind() uint32 {
 	if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
 		t.name == "token_types.weight" {
 		// these tensors are always F32
 		return 0
 	}
 	switch len(t.shape) {
 	case 0:
 		panic("invalid tensor shape")
 	case 1:
 		return tensorKindF32
 	default:
 		return tensorKindF16
 	}
 }
 func (t *tensorBase) SetRepacker(fn repacker) {
 	t.repacker = fn
 }
 type repacker func(string, []float32, []uint64) ([]float32, error)
 func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
 	patterns := []struct {
 		Pattern string
 		Func    func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error)
 	}{
 		{"model-*-of-*.safetensors", parseSafetensors},
 		{"model.safetensors", parseSafetensors},
 		{"adapters.safetensors", parseSafetensors},
 		{"adapter_model.safetensors", parseSafetensors},
 		{"pytorch_model-*-of-*.bin", parseTorch},
 		{"pytorch_model.bin", parseTorch},
 		{"consolidated.*.pth", parseTorch},
 	}
 	for _, pattern := range patterns {
 		matches, err := fs.Glob(fsys, pattern.Pattern)
 		if err != nil {
 			return nil, err
 		}
 		if len(matches) > 0 {
 			return pattern.Func(fsys, replacer, matches...)
 		}
 	}
 	return nil, errors.New("unknown tensor format")
 }
--- a/convert/reader_safetensors.go
+++ b/convert/reader_safetensors.go
@@ -1,151 +0,0 @@
 package convert
 import (
 	"bytes"
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"io"
 	"io/fs"
 	"slices"
 	"strings"
 	"github.com/d4l3k/go-bfloat16"
 	"github.com/x448/float16"
 	"golang.org/x/exp/maps"
 )
 type safetensorMetadata struct {
 	Type    string   `json:"dtype"`
 	Shape   []uint64 `json:"shape"`
 	Offsets []int64  `json:"data_offsets"`
 }
 func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
 	var ts []Tensor
 	for _, p := range ps {
 		f, err := fsys.Open(p)
 		if err != nil {
 			return nil, err
 		}
 		defer f.Close()
 		var n int64
 		if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
 			return nil, err
 		}
 		b := bytes.NewBuffer(make([]byte, 0, n))
 		if _, err = io.CopyN(b, f, n); err != nil {
 			return nil, err
 		}
 		var headers map[string]safetensorMetadata
 		if err := json.NewDecoder(b).Decode(&headers); err != nil {
 			return nil, err
 		}
 		keys := maps.Keys(headers)
 		slices.Sort(keys)
 		for _, key := range keys {
 			if value := headers[key]; value.Type != "" {
 				ts = append(ts, safetensor{
 					fs:     fsys,
 					path:   p,
 					dtype:  value.Type,
 					offset: safetensorsPad(n, value.Offsets[0]),
 					size:   safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
 					tensorBase: &tensorBase{
 						name:  replacer.Replace(key),
 						shape: value.Shape,
 					},
 				})
 			}
 		}
 	}
 	return ts, nil
 }
 // safetensorsPad returns the padded size of the safetensors file given a length n and offset s
 func safetensorsPad(n, offset int64) int64 {
 	return 8 + n + offset
 }
 type safetensor struct {
 	fs     fs.FS
 	path   string
 	dtype  string
 	offset int64
 	size   int64
 	*tensorBase
 }
 func (st safetensor) WriteTo(w io.Writer) (int64, error) {
 	f, err := st.fs.Open(st.path)
 	if err != nil {
 		return 0, err
 	}
 	defer f.Close()
 	if seeker, ok := f.(io.Seeker); ok {
 		if _, err := seeker.Seek(st.offset, io.SeekStart); err != nil {
 			return 0, err
 		}
 	} else {
 		if _, err := io.CopyN(io.Discard, f, st.offset); err != nil {
 			return 0, err
 		}
 	}
 	var f32s []float32
 	switch st.dtype {
 	case "F32":
 		f32s = make([]float32, st.size/4)
 		if err = binary.Read(f, binary.LittleEndian, f32s); err != nil {
 			return 0, err
 		}
 	case "F16":
 		u16s := make([]uint16, st.size/2)
 		if err = binary.Read(f, binary.LittleEndian, u16s); err != nil {
 			return 0, err
 		}
 		f32s = make([]float32, len(u16s))
 		for i := range u16s {
 			f32s[i] = float16.Frombits(u16s[i]).Float32()
 		}
 	case "BF16":
 		u8s := make([]uint8, st.size)
 		if err = binary.Read(f, binary.LittleEndian, u8s); err != nil {
 			return 0, err
 		}
 		f32s = bfloat16.DecodeFloat32(u8s)
 	default:
 		return 0, fmt.Errorf("unknown data type: %s", st.dtype)
 	}
 	if st.repacker != nil {
 		f32s, err = st.repacker(st.Name(), f32s, st.Shape())
 		if err != nil {
 			return 0, err
 		}
 	}
 	switch st.Kind() {
 	case tensorKindF32:
 		return 0, binary.Write(w, binary.LittleEndian, f32s)
 	case tensorKindF16:
 		f16s := make([]uint16, len(f32s))
 		for i := range f32s {
 			f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
 		}
 		return 0, binary.Write(w, binary.LittleEndian, f16s)
 	default:
 		return 0, fmt.Errorf("unknown storage type: %d", st.Kind())
 	}
 }
--- a/convert/reader_torch.go
+++ b/convert/reader_torch.go
@@ -1,48 +0,0 @@
 package convert
 import (
 	"io"
 	"io/fs"
 	"strings"
 	"github.com/nlpodyssey/gopickle/pytorch"
 	"github.com/nlpodyssey/gopickle/types"
 )
 func parseTorch(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
 	var ts []Tensor
 	for _, p := range ps {
 		pt, err := pytorch.Load(p)
 		if err != nil {
 			return nil, err
 		}
 		for _, k := range pt.(*types.Dict).Keys() {
 			t := pt.(*types.Dict).MustGet(k)
 			var shape []uint64
 			for dim := range t.(*pytorch.Tensor).Size {
 				shape = append(shape, uint64(dim))
 			}
 			ts = append(ts, torch{
 				storage: t.(*pytorch.Tensor).Source,
 				tensorBase: &tensorBase{
 					name:  replacer.Replace(k.(string)),
 					shape: shape,
 				},
 			})
 		}
 	}
 	return ts, nil
 }
 type torch struct {
 	storage pytorch.StorageInterface
 	*tensorBase
 }
 func (pt torch) WriteTo(w io.Writer) (int64, error) {
 	return 0, nil
 }
--- a/convert/safetensors.go
+++ b/convert/safetensors.go
@@ -0,0 +1,309 @@
 package convert
 import (
 	"bytes"
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 	"regexp"
 	"slices"
 	"strings"
 	"github.com/d4l3k/go-bfloat16"
 	"github.com/x448/float16"
 	"github.com/ollama/ollama/llm"
 )
 type safetensorWriterTo struct {
 	t *llm.Tensor
 	params *Params
 	bo     ByteOrder
 	filename string
 	dtype    string
 	offset, size int64
 	repacker     func(string, []float32, []uint64) ([]float32, error)
 }
 type safetensorMetadata struct {
 	Type    string   `json:"dtype"`
 	Shape   []uint64 `json:"shape"`
 	Offsets []int64  `json:"data_offsets"`
 }
 type SafetensorFormat struct{}
 func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
 	var tensors []llm.Tensor
 	matches, err := filepath.Glob(filepath.Join(dirpath, "*.safetensors"))
 	if err != nil {
 		return nil, err
 	}
 	var offset uint64
 	for _, f := range matches {
 		var t []llm.Tensor
 		var err error
 		t, offset, err = m.readTensors(f, offset, params)
 		if err != nil {
 			return nil, err
 		}
 		tensors = append(tensors, t...)
 	}
 	return tensors, nil
 }
 func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) {
 	f, err := os.Open(fn)
 	if err != nil {
 		return nil, 0, err
 	}
 	defer f.Close()
 	var n int64
 	if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
 		return nil, 0, err
 	}
 	b := bytes.NewBuffer(make([]byte, 0, n))
 	if _, err = io.CopyN(b, f, n); err != nil {
 		return nil, 0, err
 	}
 	var headers map[string]safetensorMetadata
 	if err := json.NewDecoder(b).Decode(&headers); err != nil {
 		return nil, 0, err
 	}
 	var keys []string
 	for key := range headers {
 		if !strings.HasSuffix(key, "self_attn.rotary_embd.inv_freq") {
 			keys = append(keys, key)
 		}
 	}
 	slices.Sort(keys)
 	var tensors []llm.Tensor
 	for _, key := range keys {
 		value := headers[key]
 		var kind uint32
 		switch len(value.Shape) {
 		case 0:
 			// valuedata
 			continue
 		case 2:
 			kind = 1
 		}
 		name, err := m.GetLayerName(key)
 		if err != nil {
 			return nil, 0, err
 		}
 		shape := make([]uint64, len(value.Shape))
 		copy(shape, value.Shape)
 		pad := func(s int64) int64 {
 			return 8 + n + s
 		}
 		t := llm.Tensor{
 			Name:   name,
 			Kind:   kind,
 			Offset: offset,
 			Shape:  shape,
 		}
 		t.WriterTo = safetensorWriterTo{
 			t:        &t,
 			params:   params,
 			bo:       params.ByteOrder,
 			filename: fn,
 			dtype:    value.Type,
 			offset:   pad(value.Offsets[0]),
 			size:     pad(value.Offsets[1]) - pad(value.Offsets[0]),
 		}
 		offset += t.Size()
 		tensors = append(tensors, t)
 	}
 	return tensors, offset, nil
 }
 func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) {
 	f, err := os.Open(filepath.Join(dirpath, "config.json"))
 	if err != nil {
 		return nil, err
 	}
 	defer f.Close()
 	var params Params
 	if err := json.NewDecoder(f).Decode(&params); err != nil {
 		return nil, err
 	}
 	params.ByteOrder = binary.LittleEndian
 	return &params, nil
 }
 func (m *SafetensorFormat) GetLayerName(n string) (string, error) {
 	directMap := map[string]string{
 		"model.embed_tokens.weight": "token_embd.weight",
 		"lm_head.weight":            "output.weight",
 		"model.norm.weight":         "output_norm.weight",
 	}
 	tMap := map[string]string{
 		"model.layers.(\\d+).input_layernorm.weight":                    "blk.$1.attn_norm.weight",
 		"model.layers.(\\d+).mlp.down_proj.weight":                      "blk.$1.ffn_down.weight",
 		"model.layers.(\\d+).mlp.gate_proj.weight":                      "blk.$1.ffn_gate.weight",
 		"model.layers.(\\d+).mlp.up_proj.weight":                        "blk.$1.ffn_up.weight",
 		"model.layers.(\\d+).post_attention_layernorm.weight":           "blk.$1.ffn_norm.weight",
 		"model.layers.(\\d+).self_attn.k_proj.weight":                   "blk.$1.attn_k.weight",
 		"model.layers.(\\d+).self_attn.o_proj.weight":                   "blk.$1.attn_output.weight",
 		"model.layers.(\\d+).self_attn.q_proj.weight":                   "blk.$1.attn_q.weight",
 		"model.layers.(\\d+).self_attn.v_proj.weight":                   "blk.$1.attn_v.weight",
 		"model.layers.(\\d+).block_sparse_moe.gate.weight":              "blk.$1.ffn_gate_inp.weight",
 		"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w1.weight": "blk.$1.ffn_gate.$2.weight",
 		"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w2.weight": "blk.$1.ffn_down.$2.weight",
 		"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w3.weight": "blk.$1.ffn_up.$2.weight",
 	}
 	v, ok := directMap[n]
 	if ok {
 		return v, nil
 	}
 	// quick hack to rename the layers to gguf format
 	for k, v := range tMap {
 		re := regexp.MustCompile(k)
 		newName := re.ReplaceAllString(n, v)
 		if newName != n {
 			return newName, nil
 		}
 	}
 	return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
 }
 func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) {
 	f, err := os.Open(r.filename)
 	if err != nil {
 		return 0, err
 	}
 	defer f.Close()
 	if _, err = f.Seek(r.offset, io.SeekStart); err != nil {
 		return 0, err
 	}
 	var f32s []float32
 	switch r.dtype {
 	case "F32":
 		f32s = make([]float32, r.size/4)
 		if err = binary.Read(f, r.bo, f32s); err != nil {
 			return 0, err
 		}
 	case "F16":
 		u16s := make([]uint16, r.size/2)
 		if err = binary.Read(f, r.bo, u16s); err != nil {
 			return 0, err
 		}
 		for _, b := range u16s {
 			f32s = append(f32s, float16.Frombits(b).Float32())
 		}
 	case "BF16":
 		u8s := make([]uint8, r.size)
 		if err = binary.Read(f, r.bo, u8s); err != nil {
 			return 0, err
 		}
 		f32s = bfloat16.DecodeFloat32(u8s)
 	default:
 		return 0, fmt.Errorf("unknown data type: %s", r.dtype)
 	}
 	if r.repacker != nil {
 		f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
 		if err != nil {
 			return 0, err
 		}
 	}
 	switch r.t.Kind {
 	case 0:
 		return 0, binary.Write(w, r.bo, f32s)
 	case 1:
 		f16s := make([]uint16, len(f32s))
 		for i := range f32s {
 			f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
 		}
 		return 0, binary.Write(w, r.bo, f16s)
 	default:
 		return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
 	}
 }
 func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
 	switch len(params.Architectures) {
 	case 0:
 		return nil, fmt.Errorf("No architecture specified to convert")
 	case 1:
 		switch params.Architectures[0] {
 		case "LlamaForCausalLM":
 			return &LlamaModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		case "MistralForCausalLM":
 			return &MistralModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		case "MixtralForCausalLM":
 			return &MixtralModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		case "GemmaForCausalLM":
 			return &GemmaModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		default:
 			return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
 		}
 	}
 	return nil, fmt.Errorf("Unknown error")
 }
--- a/convert/testdata/Meta-Llama-3-8B-Instruct.json
+++ b/convert/testdata/Meta-Llama-3-8B-Instruct.json
@@ -1,313 +0,0 @@
 {
  "general.architecture": "llama",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "llama.block_count": "32",
  "llama.context_length": "8192",
  "llama.embedding_length": "4096",
  "llama.feed_forward_length": "14336",
  "llama.rope.dimension_count": "128",
  "llama.rope.freq_base": "500000",
  "llama.vocab_size": "128256",
  "llama.attention.head_count": "32",
  "llama.attention.head_count_kv": "8",
  "llama.attention.layer_norm_rms_epsilon": "1e-05",
  "tokenizer.ggml.model": "gpt2",
  "tokenizer.ggml.pre": "llama-bpe",
  "tokenizer.ggml.bos_token_id": "128000",
  "tokenizer.ggml.eos_token_id": "128009",
  "tokenizer.ggml.merges": "d0cbac1fcc9dcf03724b8db5c9bfb593ae1cf68fb9bc72eb1d15274dcbbf618b",
  "tokenizer.ggml.token_type": "d70a88809fd7da6f1f028622685cd64268a7a922c5d343c96f25b66327358978",
  "tokenizer.ggml.tokens": "765b529dbcbc42dd202ce657341c63807b51f3b07e09898f6aa6196326865d5a",
  "token_embd.weight": "b53102a11d9064bbd404833e3464b1b13e08ce73300b442312cccde2f19b2698",
  "blk.0.attn_norm.weight": "7318df3cca9e8d153ff0a503026a1265e63d20b2a8c1dd7a2769585082b5d1ee",
  "blk.0.ffn_down.weight": "b950806a1fc722c9fad7fd0b20c3c0a7fb50f14395e1e7663a590bfd62e20900",
  "blk.0.ffn_gate.weight": "e73e580af6d4f08e060a74a3c25efdf5d3bed99e183d95a5a85ae859014839fd",
  "blk.0.ffn_up.weight": "c8158af679ef99746da1befb67eebb19489e0bbe6ce7d97e13e348508244e516",
  "blk.0.ffn_norm.weight": "7ec69c3c31e95e49a3359003b0033f6b9e85561a3e3fd83e7476661ecdd756bb",
  "blk.0.attn_k.weight": "2732303257bac969b4964e0e32ec08b5a7f5c031bb02bf6ac4467b3ea0ebcf1e",
  "blk.0.attn_output.weight": "ecda1d43b4ccc91cd5b366d7e7a275353990ac78561a07c83d9c77031aba12dc",
  "blk.0.attn_q.weight": "569b1f5faf92b6f00910cf7effb2d5862f91038ce5c3b0019fc10e5d79fbd5e1",
  "blk.0.attn_v.weight": "aa8416c5ef7e32fb54a1f20d6ac651656845d4af240564b397c39bd83e06e3b8",
  "blk.1.attn_norm.weight": "03327e02862908c2a44b2f52decdb924bf4201f400b46f8037a9cb2e1d7a61ff",
  "blk.1.ffn_down.weight": "5a83a87603f38c99f8e1e370a2d5f967bb45ac51d881a609304a7811027321e0",
  "blk.1.ffn_gate.weight": "31da0572c79e655186c721c231376f85e56cdcc6257c28d08c8c5b40d5c22b40",
  "blk.1.ffn_up.weight": "e0c811d64ca155c8de10a868e72015d43888834804614ee1aa2953129ffbc90f",
  "blk.1.ffn_norm.weight": "5861f313d6137d6f0f904d423df47fffc6069e224ff746e1b637ac9c7f0af862",
  "blk.1.attn_k.weight": "5fbbec0acca6457b9416ebdcd90e526885d0224537b7628f6be376a7f275313d",
  "blk.1.attn_output.weight": "b237c9763fa3f75166a6f70b70f1566e77d0d89dfa164ed1b3137393e90575c3",
  "blk.1.attn_q.weight": "c0a9cf4a98b4882b16f3eb2b49d933793dcc5357abb246fd3fe3134ed2b12e1c",
  "blk.1.attn_v.weight": "96867111727200cac1af7865189dd41fd62b47584e5e5f33a91f1d34509cbd40",
  "blk.2.attn_norm.weight": "f392f8a88ee3a95b1cc19c40dd4ef66317037b0faaa1800f610779e129ee0539",
  "blk.2.ffn_down.weight": "73823eef46632aedcc8c1cb08a736b6aa97ca97842cd1fdfc5567d8dec459662",
  "blk.2.ffn_gate.weight": "f4909ae19fc3848b00bb8b9050122e74f8e903b89e22937036f4cc9fea20a718",
  "blk.2.ffn_up.weight": "16f4904a3d814ea68f00519724fc4943e48444a84c786bda39aa5efc298a7d84",
  "blk.2.ffn_norm.weight": "e3ccdf56e75cb969f6f69c39caf6daf7c4e70e89e25df0f4d2e4bc60e159aafe",
  "blk.2.attn_k.weight": "c3beb1e0a11bcf007ef0f0d8f6bdd3082d8b29090cd29597846b5d51e308a8e5",
  "blk.2.attn_output.weight": "bb9f66c32cff51154fea92933c2cd62549236f8cb1a767f9ef28d3f99809b343",
  "blk.2.attn_q.weight": "8eba394132eef2a05c5a92d62d2376000f7948448d7a2dc74e6b608203add20d",
  "blk.2.attn_v.weight": "88f61f77c53567c617db3eef8f30621109a750e679f6784f7911739bd42c2f02",
  "blk.3.attn_norm.weight": "7b996675b7ca75fa24107b3ebe0788653ede0f49ac83b8659d71ff54d591f81a",
  "blk.3.ffn_down.weight": "2cb332bc05e4821962fdc9dcbcc7cc12630f32117711b687d18fb53c0bc4fbf4",
  "blk.3.ffn_gate.weight": "340b387c7f208c8f0a6db904ef8d87c1e84b7d6ad57177abd32d86c8d18b760f",
  "blk.3.ffn_up.weight": "07484433f8a7ee061c55aa0de2ecc009f769b0617c9c0ec096e9bb2946df9f0e",
  "blk.3.ffn_norm.weight": "4f1a4ade36b393af341240bc894a2aab09cff7e4d56dc4658445deb107f9371b",
  "blk.3.attn_k.weight": "483dcd96acb4528df84b9842970994630dbd82b8715ace394aa8b39fcf8d6291",
  "blk.3.attn_output.weight": "beaff0810687923585642ee11d929cbf3b43dc6f87f30ddb552c222ab57bdbb3",
  "blk.3.attn_q.weight": "0739355002f6fce520863add697e0ff25fc88215322dc3f993be7bb68dcce7e8",
  "blk.3.attn_v.weight": "c216d17b6d90ee3e07f82598b8161fae34de2f392dbb0f745b682b578c324767",
  "blk.4.attn_norm.weight": "91ab405bc4ba15bf63af233f266aa43aaab43789a9e6596e14a357c2ac7df217",
  "blk.4.ffn_down.weight": "620f34ee75cdc73aecb8949af5fbb0d2437fd81422b6d8eb7acfc52addb9fc68",
  "blk.4.ffn_gate.weight": "f6feec7bc9acadf35ec22532f8998d8e50f31afedabb19263590dcf8b9a92eee",
  "blk.4.ffn_up.weight": "4a72af7cd28fd07b038f6cc4406678d120517280236ea85d9e76eff40ab2cc22",
  "blk.4.ffn_norm.weight": "1805b37b44d5d682bdbd2fadeafb763ee001617d7870848cc487079ee34b21f9",
  "blk.4.attn_k.weight": "a1e4f9d97cdf4c1b0d177cf00c4e32d1be30c1984a239b3c9bd73f8848888853",
  "blk.4.attn_output.weight": "a1547e2497c423b0aff0eee71d9300d6fdf4e4986679418b6e637b69a9a6720b",
  "blk.4.attn_q.weight": "0677483a9264ea6803d03d304d87a54632242cb516e8b76b6e3e8284c2f4de04",
  "blk.4.attn_v.weight": "02691ba3af344fcc1969428ab0df811ac94aaa2fd91b0dc4ec1ac0a58806980d",
  "blk.5.attn_norm.weight": "ba9c028335e5c895b87a5bd1448ca429248f9746ed97bdcb8679923206117156",
  "blk.5.ffn_down.weight": "ccfdc9006acad1940a6bc05042a3947f1066acd671e0bb53b7684e9eea9ef5c9",
  "blk.5.ffn_gate.weight": "623157679f1e742ccc3807c0b0153ddc8450104de75ec62f1370ec3807c09cf4",
  "blk.5.ffn_up.weight": "05748804c65091f963729b58b085f58351891cac8a2861f5eae26b06aa60b2a0",
  "blk.5.ffn_norm.weight": "84bae55af2efc8b8429f09056c8c04990c466dae31cb3f9356038b8957f1b406",
  "blk.5.attn_k.weight": "8c766180c726b037d587fc52371de6e3307140c52409011609d1225624b6a3eb",
  "blk.5.attn_output.weight": "490b582b3b1dc151ae55aee8b6743dad6c01fb49e43afefb6e68394b74be3d73",
  "blk.5.attn_q.weight": "6f7b8ca4d9025ec836a44bbcca46be30c66b471a9fb62943ddff8288b3731409",
  "blk.5.attn_v.weight": "9f70df3ba00c9e723214b3da83ff435a2163fff5915f75515c9664c05c866c27",
  "blk.6.attn_norm.weight": "1a4a66613a682df6f061fc7c4d986f9f7e9175b62f0c42fc1ef31db536bd5942",
  "blk.6.ffn_down.weight": "c56f25e4e49b443dbc82d88311ee63bc1f5002cc67e52f4787fd5f003aedeac1",
  "blk.6.ffn_gate.weight": "31a5cf1aa9b831a81588d508550f51fc425f9517c43254d4ef7096d38029cf04",
  "blk.6.ffn_up.weight": "ce135f3a1163e0c9297a615bdbe68a67ead21edce8debbfa9f6e15e6af8d4c94",
  "blk.6.ffn_norm.weight": "4e328ce0648c94e732bc40501858ef6262ad1161e2e407b0cdcf4813fa9d45d8",
  "blk.6.attn_k.weight": "1eb1c4c9f9c4c7ff7f5429075e0dc6a7782bed55109fa88df209a817dd8ef960",
  "blk.6.attn_output.weight": "3d32986b56873b88655ee1edabdd413fdd9ab18b82108c9ce90bdbc2d3a6f3a3",
  "blk.6.attn_q.weight": "8432f583b3a2809c99c393f9beb077cb0534dd5d247c17108f2986cadc6651f6",
  "blk.6.attn_v.weight": "5045381513815bb91839dbac8335ffe49bbc7b0008369de7ea97eb676c5e2b36",
  "blk.7.attn_norm.weight": "3dabd003638ec2499bfc8a48c49eef34276caab4fe76894eb963207848c2fdaf",
  "blk.7.ffn_down.weight": "194fae858608bdcffd235be59ab119d0b91c8549f864ea06dae69249e099935f",
  "blk.7.ffn_gate.weight": "00b24c29c30246892bce0791be804a89701d4c1332777e0bcdad5d9d5666604f",
  "blk.7.ffn_up.weight": "44d7082a5280080c90cef9e19d410391de34f212ca0736377769b8ddd0c82d5e",
  "blk.7.ffn_norm.weight": "21fe8a7fd6911c64e0d15a788b3b4cb6d71dd6ec51de65f760ee89afbb6ae53e",
  "blk.7.attn_k.weight": "57a149eec5f6744a9526cd3925ac073f9d12db0fbcb5afe042ef4dc846458c44",
  "blk.7.attn_output.weight": "0e9c28a3e81a2880251ce5eed77bcb8be8aaa1a51c9cb6de820b47ed83849fc2",
  "blk.7.attn_q.weight": "15ee75263ee4e2a43eb322bc159ae004bb7d77e3a7e63ee4ddab700430693fff",
  "blk.7.attn_v.weight": "440aa970bba4bff429fd7b7b1de21f2ad14fb2952b776cfa4acee68d7c6e9b8f",
  "blk.8.attn_norm.weight": "af5b44825633c42c1ae964c82bb2be6a242d3a751f0a91f1bae4f593e8f5b6ec",
  "blk.8.ffn_down.weight": "b11c14c76adca94fa200496dd2c10743becb23aab6642443ef1ae6d8710edbc1",
  "blk.8.ffn_gate.weight": "7bb03d3325bf8637ae2fa1296b0651356515578d46a7c5ca65c7a923d7de27bc",
  "blk.8.ffn_up.weight": "b956ef0a0669b5a9c9bf3a8da2d1c24f52d331cfb7354f6d7c51bd65be355e30",
  "blk.8.ffn_norm.weight": "c78c3d748302edfef76f71ea5cb2055c94352122eee8b9b1173779a1814d224e",
  "blk.8.attn_k.weight": "c0fba6a596ed9c1c32a7055c31a935a8b31e42b77282ee47c1f03ee3bde736b5",
  "blk.8.attn_output.weight": "83cf9947080c5d8d571f04a842bc3dcfe7bbb0195fb25b346e22635e8649f2d4",
  "blk.8.attn_q.weight": "47409350a576b333d97b7c877d69f47f46df504f3765102dfc0be9e521c7ecd6",
  "blk.8.attn_v.weight": "1999dff91404fdcf1ecb34d9eaaaa9244ec7658a74dec8feb7cfd1fddba0347e",
  "blk.9.attn_norm.weight": "1e6e29d5c3889ab4e1b0a5b9998cba60179b0f1fca133515df49cbc19d092593",
  "blk.9.ffn_down.weight": "acb898a6490adff592e10b4c62d70edc5941661ee6da44658500e9205357c8e9",
  "blk.9.ffn_gate.weight": "4cff63013593aadc3ffbaaa6ed70ffdba1224cd43c3644bf6f4162b5ac1ab542",
  "blk.9.ffn_up.weight": "f985b5a2d6cf4fe32c7256301c3c89b8ad22b59e516342c52da42d8110766a4e",
  "blk.9.ffn_norm.weight": "0d659c538bc6b21ed0018f107ab674a7424a00a42946c80e07208b479b21918f",
  "blk.9.attn_k.weight": "f67611d888780d1b38c1c146b361c65310c8183bdf64fd73e2259985c6e8517f",
  "blk.9.attn_output.weight": "f12ca1fa62a02ddc3f77f798bfb5707e0c50bf18ee0eaa67025521a98355f26b",
  "blk.9.attn_q.weight": "3865185f4361a645b086ad47b72904c095313fb1c624e511647bf1a7dfc1c476",
  "blk.9.attn_v.weight": "92125bbfed63544ab56052bd1e4aa453bbf34c795249ee54cde54907c8c6d1d3",
  "blk.10.attn_norm.weight": "5d6bfbe545bcc2fcb2fc75c68f64b1f4c918badaf53e0156fe2d88aa977b2f94",
  "blk.10.ffn_down.weight": "1dd9da8b0d2696ab5531fbca8a29c7d67567620a9d3e5fc2a19ec5d7e4c6cc8a",
  "blk.10.ffn_gate.weight": "6e55e7f014edaebda0ac6819a426221d3b025c27312a2e18cc5806f31e3db226",
  "blk.10.ffn_up.weight": "d80dde54af5db51241345ee8d64c1972608644f4deeac1e8195dc423bf27474a",
  "blk.10.ffn_norm.weight": "f6ca65951d58ae3379eee8247bec34ebd0db05674cc9295593573841b8a55df3",
  "blk.10.attn_k.weight": "b58e350bd6b49aba0fba4e4dd6865de3a2a0651ab865dbf2419b627b53ffc187",
  "blk.10.attn_output.weight": "6b26a986e12fe66ec286a21d7d5af5eaa1bfe6f2bf502165d270e4497235a54a",
  "blk.10.attn_q.weight": "3440e0e5b7e0d1e426424ae5a33f4e057be623249e9035ea12e57dbe5d3893c4",
  "blk.10.attn_v.weight": "ebfadcfe14bcd6dee933053df0a67e12e7a196d5cc45728c1ffb2a2daedd5ca2",
  "blk.11.attn_norm.weight": "3ed057b9576cd2de84507ef64c7646dc478c651efca4c2024cbe91a4f3fbf0bc",
  "blk.11.ffn_down.weight": "8ff1c2487d22f5c499761e4eb721418f141f960160d0bab779595a34e4d68898",
  "blk.11.ffn_gate.weight": "9c74e4507c7e45bf39b7cc7402198cd1dd77e3fff8c625b0413acaeb16efeb9f",
  "blk.11.ffn_up.weight": "4367158007161d29939e00a322bb6776016e43f648a94f9b08a96a477aae75be",
  "blk.11.ffn_norm.weight": "1cc0288c1491072121f4c9a0af20be0e13af49895696a3320e4fcac608768de3",
  "blk.11.attn_k.weight": "066f5b3c144fce1366835e1ebf376f768b333b8ae29f5b478c42d1d0c809c855",
  "blk.11.attn_output.weight": "e0d9f3d3f2c54aed59c02713ea4fb562799ddbacbe67ca3998dfc887bc44e47b",
  "blk.11.attn_q.weight": "28d3ecc8a88cb3815e89a7f7a7d043da7a71f702b337a126e4d3a2ac1cd6370f",
  "blk.11.attn_v.weight": "7c5cdef10ee73bca0a3b9f6ece5f0a0155664e0ce3d8de90ccdccfab5545e5e7",
  "blk.12.attn_norm.weight": "973b133301a1af760cd7b3a7955371ea0a750808b442deb6adaf7b98482bd0c6",
  "blk.12.ffn_down.weight": "d6c87b4b4ca03f75546ddd6a9e7fca720585a309188723c1ace8122438d4b200",
  "blk.12.ffn_gate.weight": "2189a6e0cab1540bd05d6089b922aa8fd694be51255654933c165f302a0c955f",
  "blk.12.ffn_up.weight": "5affbec19b58d092b9305721e3552481fe2eff51269ea3ed91cda3b9ef84d4df",
  "blk.12.ffn_norm.weight": "f650fd42a34e950f758b4a130e7b8b1a712b1dcbede0291bb8edde47aaed0ef6",
  "blk.12.attn_k.weight": "59b1e86f10450a7cc188beefc0856d2dcf44e8d7fdd9cd8859c30ec1ebaf24b6",
  "blk.12.attn_output.weight": "446b0d36b2f66bd72a2323f4f4e9d85a0f621e9a58872e89a27248d6b1123238",
  "blk.12.attn_q.weight": "3ed6bfd39f040301ed99fad882d3e569769d594259f9948445bef0e44ec881fb",
  "blk.12.attn_v.weight": "e73652cd5d0029b1931be3ba9d82508f6696dce5a29d085476a54fb7a2ddbabc",
  "blk.13.attn_norm.weight": "491b85278c0bd67bd31b9b8a9720902c244bd067e53a4a03641b7c0994782e82",
  "blk.13.ffn_down.weight": "ad71cc248a85e9ced49307a24a9bfae01d387e979a7689c82ff59998e09741f3",
  "blk.13.ffn_gate.weight": "0a55984d53971fab97575ee0ef5882013be7fdecfa76e3fbebb5dc85a07a14d4",
  "blk.13.ffn_up.weight": "378b697b35e2e53c0de98e8e29b73d42ae3ec112ec16129aa5997a9e2f3b5943",
  "blk.13.ffn_norm.weight": "f8aff2f69ab286210fad45a62b03f8d10b38f96a420d7baadf6b95d7b0b0bcd2",
  "blk.13.attn_k.weight": "25ceb841afb1034831bea7f4d6a6c578def2ce4d4c412c780ef147dc9a598360",
  "blk.13.attn_output.weight": "a242b322889c6bdaa14b67a7bab593db39df8eea3721638ef639abbb74d482e3",
  "blk.13.attn_q.weight": "d80be9945a369439e835c55cfb0e97828b8a66bb7ced534d9059c92487bf20a9",
  "blk.13.attn_v.weight": "ac33274cf9b67979d9ecdc967a55175afe0c9c4aeeff6391433cd9840c818706",
  "blk.14.attn_norm.weight": "12a1e1091de5b2da12c9e7c0b1c8e6f09ce2a749733cf7d5240445b8e21cd093",
  "blk.14.ffn_down.weight": "cfd41965c88266e32bc2dcdadda512499c35519e8686fefb9a7f249ab2291eb5",
  "blk.14.ffn_gate.weight": "8dcfe774f07a095c7c6cf0a901c9df70d938bad7b5ba347fbc8f694e7603c0d1",
  "blk.14.ffn_up.weight": "c7995577fe4a72ea0fb17c4a7b6b87b959072bbfdd5edacc6c367d43465809ae",
  "blk.14.ffn_norm.weight": "81c41ebde41739e7016ffec31d2256217b825dc3cae049a935f5f61a60d22003",
  "blk.14.attn_k.weight": "fb708bdebe4384f5c4b479c110028554f4d122f166b8091eda7d8d65e6780eb8",
  "blk.14.attn_output.weight": "f5295caf2dfdc60553dcabe17537a80577e8b153c902247daac058df23542514",
  "blk.14.attn_q.weight": "c12b7a3601c68c63ab5dc9d2599ebf3f3a10abc2c59d3a2126fffd5818f2763b",
  "blk.14.attn_v.weight": "1ce968d9149bf0d5e237d52cc6d6433565b4bbf03252a736262bb00a2b34a687",
  "blk.15.attn_norm.weight": "266fd2c36d7dcefc6b6bb7f1c9374c41f2bab5d6c84a063b6f91c4f682dad3c4",
  "blk.15.ffn_down.weight": "6154886e9ef0a6cc08ab0d264a35f497e6f0987efdac992ed04e87088bea7801",
  "blk.15.ffn_gate.weight": "183d9fd3c1b5657840099053d2fd3f72ad953b1de523296159b7761f20491a76",
  "blk.15.ffn_up.weight": "51546d4498842ae2340ee226a0888d5f61e7d2ca4d052dfa06a77b0451242d3d",
  "blk.15.ffn_norm.weight": "ef7378091a41a25a5f58bf1bf9d3bc64ea562e7f421e1c232b1f177c30fd3500",
  "blk.15.attn_k.weight": "8d556ab8d9639324141774999b6eed0e91d7ee645bf3e7a3dcd200b2e7a00751",
  "blk.15.attn_output.weight": "54aa6ba87def7cbe18b0c6ab3aff5c351cb3b6ca4a0d7b2cd5f75a1312991429",
  "blk.15.attn_q.weight": "10731b0dc031ea8e0ef37bd7f010e0a78518a10a6df05a8bae48e3148b73ef3e",
  "blk.15.attn_v.weight": "cbbe50c2ed7224866d3cf9b489c599f3ec41a4ea1aa3181e9f4e87e1fa0cefec",
  "blk.16.attn_norm.weight": "387058eb39d4b28c04cf1368247417f1faeae8ae79d894c9f293457e0eaa00b0",
  "blk.16.ffn_down.weight": "2cb26ccee585e933401ad5c82ed36ddacb3289efa0b28f8cf91b020ffbd9c333",
  "blk.16.ffn_gate.weight": "d745985efb5bab42304e5d509024631efe35f92f2b2ec4931ead6db97ca9727e",
  "blk.16.ffn_up.weight": "7a67bd195e0642828ca36eb7818149bb70c2c25f82de07e2b5807c520daf540e",
  "blk.16.ffn_norm.weight": "7cefd061c8182482a89272f8a4e88a954b12609a62716923ca1cb3593b1c1651",
  "blk.16.attn_k.weight": "d7968a2de67e755b4533e061aaad1cb62f8882af92dcad67f99d6d5112513439",
  "blk.16.attn_output.weight": "9e9ab5788272ca3394ea89eadbce8c86ecc3fd75b7899184d6191c134ad9aae0",
  "blk.16.attn_q.weight": "ef81c261b536c1a3a093b33f44cf2d42b86e5aa2d821674f07a0c80e992ed925",
  "blk.16.attn_v.weight": "aef38e7958301b4a437cbdd2fbae6197f677b09269ec1eaf63188cd5da428d25",
  "blk.17.attn_norm.weight": "28f6b289f1bc3131041e9f791b7a2a3a48baee0dfea27bf7051ebbb7ed364d80",
  "blk.17.ffn_down.weight": "1a502829aafc6a9bd6bc81f12573bf8632d5c8c659f0dfb13c8b2411f3b1ec05",
  "blk.17.ffn_gate.weight": "ddfd8aa0eb98846ebc9afe31366249159f46ae9815199dd70161527ed241ac4d",
  "blk.17.ffn_up.weight": "4211a3cc247071bd361b30de2131d02382f552855062bf3b3e004c17992e5d09",
  "blk.17.ffn_norm.weight": "647e5fa99a5b0d232af36d15816539f4d27e60a50a341b00aa88bb6e4474f8b9",
  "blk.17.attn_k.weight": "d9125ff33a19c502c0f8846433ffc24395048582fc2f463d34a0301a82156f02",
  "blk.17.attn_output.weight": "3d64fbb1cfef04444827f37c35fd9ad3413eb2165094d339ef89f00503f09de4",
  "blk.17.attn_q.weight": "e5b29424028f578beca385fd82e29f37adedf3037cd51e5889d5a1ffb0428ca7",
  "blk.17.attn_v.weight": "1809c5aaf2ac04c5d65539097564ad62796e87d24bb8b9ce5b095561a61d908a",
  "blk.18.attn_norm.weight": "99daca58d001c627523d3adfbca1d95f04e590382a326866544d57989d5f4835",
  "blk.18.ffn_down.weight": "84f30231ce6ca0f10227541dfc602d6418c1a210386b0c4926ef1656e7d4635c",
  "blk.18.ffn_gate.weight": "ca5bbe4468b541740e54f69b9e08fcc8e478c344b70551dab21b1206acfbaadb",
  "blk.18.ffn_up.weight": "0b3067b9dded31686dcfdc1e247eae3974a28a61ac59e9862758dbfaad64e8f7",
  "blk.18.ffn_norm.weight": "8154a102232dbc0f90ce77ae5c1ff8f26f8b6e4dcf326e9ec1645749669e7960",
  "blk.18.attn_k.weight": "25abb26021ccc481471a30e0d4cbeb7e1db29828417ec5136edeb93fecf09ac4",
  "blk.18.attn_output.weight": "d87d481d9b046b68efa06ccdd4ed8cbf61e692d61114b75b7fad5ed75f5d87b2",
  "blk.18.attn_q.weight": "cc6400379e15766992ff1293be79dc67682c28e9e15155a78109f4b64653b164",
  "blk.18.attn_v.weight": "45c75cb1dd496aea3173aafe2575b841dd1d02cbe010b3198099731eb98f531c",
  "blk.19.attn_norm.weight": "65389efc75297684773284ef8e5f8789a4504b636c9f33b8a32e0ee42499fa72",
  "blk.19.ffn_down.weight": "4eefab7e939f64a17e4a214ca3c77a6fa110d94f677e2d6401086f70fc538b04",
  "blk.19.ffn_gate.weight": "f1c0a59cafda66f466ab585b0b8b4861b58abe87a67cea1f6a488492242edfdf",
  "blk.19.ffn_up.weight": "c42d045eef588db4a0e56960a57e110e1ff92eb8041107d19899165fd3b90f17",
  "blk.19.ffn_norm.weight": "a8f33eda6d5d62ff5f333ad9771783caff556641f4e7df713451385676f441fa",
  "blk.19.attn_k.weight": "0bab5d9e9083492bfb05a5a3bb23b79c0e7b99ef6a6644817b4d57d5c453b8a5",
  "blk.19.attn_output.weight": "c99c551d70eafad0f7aea98fb6f9251635897168eb3895f76abf0d4ea3b3aa6f",
  "blk.19.attn_q.weight": "c98bde95627c3b54c9443813ca50b4e14f518319681db6bbf7b2332ba26e9a60",
  "blk.19.attn_v.weight": "ff3a490518cf64904db89ce0dc7d6eb89e870f1440e41883c6b55a221f82de84",
  "blk.20.ffn_gate.weight": "761f0e317229cafe9d3754048ab038a0a84e9a287b196ab65f633139f2d29aba",
  "blk.20.attn_k.weight": "45d13439b41066d282e8490a726785abf513605f46c79bd0c840f6419d27e790",
  "blk.20.attn_output.weight": "a3b958d84b4a097844179b7d55c18fd0e4f319cb15e918c6fde33b68de1bcac6",
  "blk.20.attn_q.weight": "127ab8e7d8c3f882874904196a02712bab42e6744fde45871b67350609d19f5e",
  "blk.20.attn_v.weight": "5f0ad2d14a8ae42dd3bbeccfb33295687a14055fa92c54bc946249373c1c9f17",
  "blk.20.attn_norm.weight": "77300b1755edc8c70089e0f45efa646056b9add7d8568b2324d2f3e62b64971a",
  "blk.20.ffn_down.weight": "ab93d0e075b42e9017b701a070d561e698050d90aac4b4b9919256fbe50c3204",
  "blk.20.ffn_up.weight": "4fd6628a07acc57a48d1ef83f81b7d7aa0bce569c1160a99d307284f8821322c",
  "blk.20.ffn_norm.weight": "2a9e46b9e48e8e55215de56592e1f189530037c1c94a1428e3d6f106c7f26fb2",
  "blk.21.attn_norm.weight": "4b3b5912c7bc61eb9da8e47d4651f896e85d9e59c4ecaa65df7acf3c21737298",
  "blk.21.ffn_down.weight": "7146f931663d93b8771cd84405cd4802ea6560d0729b0d6d44588203c095bc53",
  "blk.21.ffn_gate.weight": "b44ec5d64388fa40b90b3e9976d97a8b6800fa3b97584f32e64b03daffb8601f",
  "blk.21.ffn_up.weight": "0cf3643fd23c685e17062cd11e116e17ce57a405e5e78953bab94cd62fe48789",
  "blk.21.ffn_norm.weight": "4ef2cdb53da166df70b39f3e6b17af51848cfa5ea3c27ad6a1ae2a1bb1da1ce9",
  "blk.21.attn_k.weight": "5d40f32a706f670c19972b14176bf660d5b045e3637b110dbf8d7de4ff32101a",
  "blk.21.attn_output.weight": "18afaa916752ce16c9653ec0ec7e2fe60be55faa2aa5025d147be184adb75cac",
  "blk.21.attn_q.weight": "2621daa5f858931514a4b2f0fe8d81cf9b96f541e6af99bfa7539e9bde8e34ee",
  "blk.21.attn_v.weight": "63226dafc54c899bbce4aa49efceeedd8908e94faa613450fdda91f332b62864",
  "blk.22.attn_norm.weight": "cf3058daab4d2c04387e7d169d1553bb8e7358eea66285ec067703f6ce62043a",
  "blk.22.ffn_down.weight": "6a58d5fd220abdbac6cee7ba048abab794731af318f04982c2506df59413d0b3",
  "blk.22.ffn_gate.weight": "d5614535324b03c7b91727a903b2a72f8d07ad17f7aa8b61ea173cf9b895069e",
  "blk.22.ffn_up.weight": "ec20da3949566e93f66cabb67f8cd7eab399047ec6ebf5d43edfaf3669b82296",
  "blk.22.ffn_norm.weight": "84c82f38f53a649972a44466fc476bf764e064ce18de870291edc302f3700e28",
  "blk.22.attn_k.weight": "a3d2ecc37fde7c201176bb8abadf27f0d8ede9679a6034913e03d9db924fda12",
  "blk.22.attn_output.weight": "5a3b8bb433f43a387df43dd371bdf80ddfac986dfeaf38e9bac1d7a0ec6628de",
  "blk.22.attn_q.weight": "3a875cec661b4859f30a8fd2c866811184b25b68c9e36fe2663d299caf8b59c6",
  "blk.22.attn_v.weight": "8717a83b79035058dcfd3ef6f8e5b36e71d77379e5a239e1899eef8766fb7703",
  "blk.23.attn_norm.weight": "2b4a68a0a2f023dd646e4755c9bef17c2f631901154afd839edac7ac006ec99c",
  "blk.23.ffn_down.weight": "29499b1586c6fc4883c9b7a9c8cf388035146b5aecf90c5c4c8c8e082c71e7d7",
  "blk.23.ffn_gate.weight": "7d6554036d21c587b9b556428054f9c15cbef96d24b257f906fcef4ae38bd9c8",
  "blk.23.ffn_up.weight": "19761ecb288d6ebd44b681c4535661583b1e19dc29e96d0c007333cd8f00aacf",
  "blk.23.ffn_norm.weight": "37dc35500790a4ca33807b39cf7af65065e535dc25b9e94f3ed2759f61887ac9",
  "blk.23.attn_k.weight": "717547d00323817b0cb40a72ec5f8cf42ecd1f9e3e42715c2cc5e38f07fffffe",
  "blk.23.attn_output.weight": "a24786feb6a905fdf166d7500133757cbe494779d4ebcba9eb03046b319557df",
  "blk.23.attn_q.weight": "6a2c4a98f138b928d22136efa163562691d3b4ed526d52d46a2fa2694a8f3965",
  "blk.23.attn_v.weight": "c6e6081eb9c38a7fda023085957b460e9ea321e1fff408b38c2b58595c39979c",
  "blk.24.attn_norm.weight": "5e6283f891e538670425f3e244b08dc6f96f33dfa4aefa913f8eb17212421850",
  "blk.24.ffn_down.weight": "e09eb170f389deea0a4a1cbfdb52c12490768a2c60491b7bef8a4c445e2a08f5",
  "blk.24.ffn_gate.weight": "af29d815cf49a38fc2ebd0bf9b2dd9933d023a29f2d766981acb9a1b53f09117",
  "blk.24.ffn_up.weight": "36ccd9333426666de9d3088bd4dcdf5b624b09dca9e3a83a22fc0383f2d950fa",
  "blk.24.ffn_norm.weight": "a88e1692318826db6ac42582d182e51a3c698c655d0e21e04fa086318832d07b",
  "blk.24.attn_k.weight": "f7d61d6d1225289bcc502e3bbb0168b4584add0253218c1b77ac92ccef9a1c2e",
  "blk.24.attn_output.weight": "85a1363b3ccc87312094c2195022687c16b0dad7fafb9e80bb4ec474d53c29ac",
  "blk.24.attn_q.weight": "53482a2c008f42f4fad779ca323addc3712040149dfc12f782417756388a72bb",
  "blk.24.attn_v.weight": "67498272369af7dd10097c73b07f731b565cfc9a559e711cc0d526389e7b44e2",
  "blk.25.attn_norm.weight": "98dd617def5cb7825ee4833132ca2da2121245921585e1d9e36b93344adc321b",
  "blk.25.ffn_down.weight": "7fd477d6c50aed5f424a878dd284343379cffbee8a34c0b6e55100c8305fa13f",
  "blk.25.ffn_gate.weight": "f892c9806c8ec22e8aa746734ac9213428c534921cf161239e1d249fdb5d1ec0",
  "blk.25.ffn_up.weight": "528bed14c9bf9762f790525ee40412545221f4321d2a2323fa8e73c58b7643c5",
  "blk.25.ffn_norm.weight": "ca5831966672e7be6a578feeb631ec3570d3b5afe12860819ccb96e896ffc346",
  "blk.25.attn_k.weight": "610d3068cc9b20401f0c3a0efea39a279dd9f564fde19baf3403b2ec2319e4c4",
  "blk.25.attn_output.weight": "798aaf702e53b657265ac3b5e6caf3a0ab515bdadfeb1a3a156b4f3bfba76666",
  "blk.25.attn_q.weight": "8a7fa25248de83029fb97b51d036a01baebe31fcb4be121ab00dd8b7de209b10",
  "blk.25.attn_v.weight": "2a53d5e9f8a1218c66958c6388d3b37400a9af7956c785024ca44bfbc3c7d371",
  "blk.26.attn_norm.weight": "5f44fc043481eb0771f3e6d2420bcbcf73140afb9a9feb8eddb6575452acebee",
  "blk.26.ffn_down.weight": "944a60a409d0d5b6a851e33c69aca152454b691711a8b96f5bcc488772ab2833",
  "blk.26.ffn_gate.weight": "2a0ca4abb3de5593e6693d8be69b63d6d1a639855ac8332a75f520353f030c62",
  "blk.26.ffn_up.weight": "0b1df496163f9ac07bf89375d3eb441b51a81d41b47d769a04a61efc18dbe35b",
  "blk.26.ffn_norm.weight": "56b8dd046e9be6ea71f7efd80dbd14e7fb1aa020d3cd38e063275f3873fd12f8",
  "blk.26.attn_k.weight": "b1dabfabb970e6971c7ea6e53c63cf7ef56341e6a2edd9cf177785cad9af2f9a",
  "blk.26.attn_output.weight": "39532c7e836baad164a655fb97ec5114ea4da37ffba9fdea2684f6e4450e6f84",
  "blk.26.attn_q.weight": "8f48bf6aaa1252bc149e98af2be1777a5c0d2c3274c6d314171ea9344a41b604",
  "blk.26.attn_v.weight": "02fb145f7fd905133750e90571effacadddfd3f4966552dc59982ac3900ab8c4",
  "blk.27.attn_norm.weight": "654d168fc3cab716d91261f5719f180b7d697218401633b4878a759f1b5283f2",
  "blk.27.ffn_down.weight": "2823272bec3a1c12f02cc4cb24aa4031abd7e9dbe0b02676e2305b21671818f0",
  "blk.27.ffn_gate.weight": "b1a1d40cd02f97182cac17a79971d1934ee0daf3aa0bf11303568c636e208a64",
  "blk.27.ffn_up.weight": "ed62ec72a020d070e64eb7b50237b32213944727b5b2427f45d989f50df5fb2a",
  "blk.27.ffn_norm.weight": "c69649ac65d694b306a905dee8b03b89eec1ed188b1eaaf38f8e29d4b12e38a0",
  "blk.27.attn_k.weight": "cc57bbf413f1fd227128dc66efc8590c73634cbd6f96d01ec4878b5e7ca6a925",
  "blk.27.attn_output.weight": "cac407ad02361d53207b3c7e25ceab84dcb4347b8087055162e2efe14d11d84a",
  "blk.27.attn_q.weight": "0af18e07cee12015761c07c94407024f4f4d77d97bdb24163db0e16669e2cef3",
  "blk.27.attn_v.weight": "a1d08fbdfa40af773c5adcf93bd68b78a44ed144e3fc6bbeb8af02e937527eb6",
  "blk.28.attn_norm.weight": "f39a51f814512b040a1082143150e4a49ff730f85cef49d7f77fc79d83e91f40",
  "blk.28.ffn_down.weight": "74f29ed51055d1c1adb8f0660bbe538a27e016c65650f2d67efc6f1c84fa1b45",
  "blk.28.ffn_gate.weight": "ae48bb16487ded6781c60aafc0bf738fb4ae15729952906f247d216592ce249a",
  "blk.28.ffn_up.weight": "543009727718ac22f11ee4b17815f68ea6f15ba1f3e7ed5ecdb755cf6417565b",
  "blk.28.ffn_norm.weight": "b8f9e54c322079ff20a82b88948cdc2916c22c7db40b9a9ed6d3cbe89efb727e",
  "blk.28.attn_k.weight": "55d055ba653b728d6e784f9e013786fed07115c9fdf23367e3941386d5e77db8",
  "blk.28.attn_output.weight": "155101c03ddbf18f4fd0694bfc982f33c7bae25c9b087d6f5273c2bfbffcf2c9",
  "blk.28.attn_q.weight": "1ed19bfdd22e9c14eca014739982492e9516d411515a8585f65cf754d849e53f",
  "blk.28.attn_v.weight": "11ba854dd575c025d37256eee9041f6d1bd2b549a083d6409a09bfc1542913f3",
  "blk.29.attn_norm.weight": "02b0bf5e2fcefd11a153cc988c81ba672682e4844fcf6442423e21a0e10d566d",
  "blk.29.ffn_down.weight": "594bb692ec2779938721ff4748666ca8370e0e4fe85229503f616438b8884f5f",
  "blk.29.ffn_gate.weight": "8bedcf47e91dcb2cf4093de56b048ee411faab6ff472f89ab2c9c113a08e6967",
  "blk.29.ffn_up.weight": "e241a547b5fd6dfca8200b8141e21c1c487a96cbc4e5855f181a7ed1be91b642",
  "blk.29.ffn_norm.weight": "e63eba5e4c6b288bfd9f15e46e236086456c8b7f1f9c732c0b5de84962a2e7cc",
  "blk.29.attn_k.weight": "afe5979d5bcf211aebb526620f5974bcb0a2c39c8be71e815575c55d6385e3aa",
  "blk.29.attn_output.weight": "9c944ed44b124b014906fc240afd3b90aed56bbd9567f2eddfd5b7a685b3cb48",
  "blk.29.attn_q.weight": "e234e08e5c1bd9245a2edc8d63e9933b6b879f97c01392209cad4f55f05f3ada",
  "blk.29.attn_v.weight": "5cb8e3e5f954e775c5a5e4de7a9a62b17e9c6931bb0ff0e2f82c4126fd3e1a1c",
  "blk.30.attn_norm.weight": "a65483ee51a0b214144ec8a14f28ea5437586e9e12ebe342a57d1f8627ee12af",
  "blk.30.ffn_down.weight": "417959da77ceb33ead4271cbb9428b195196173a893c44e52880a7ec61b4856b",
  "blk.30.ffn_gate.weight": "a0d503ffcbe45dc927600bb98c9f6082487e65cb577ab545add400d666a87638",
  "blk.30.ffn_up.weight": "f8ab957b82ffcd10b21303cb5e866209b6fe95f827b1b94e9a949207952d12c0",
  "blk.30.ffn_norm.weight": "210c7ceb0514a9ef27b5d4d1b3aff6dde43f1af0345a050d71097940e0e73e03",
  "blk.30.attn_k.weight": "16861b9abcf5a3fe73c93d977ca45a1e6daa65be0fd85c2cff53486ce2033afa",
  "blk.30.attn_output.weight": "ca541fb2e57e2257118c35784845b0c731278af8db3036ac53d71aa1681fdbdc",
  "blk.30.attn_q.weight": "f7834917748e26bb456b945e230bc926c228e93696bc01fbc2b134bdeeac71a1",
  "blk.30.attn_v.weight": "9292783171dbe5eb689d17c9bda11e537f0e9b328fced6986c938d61ed590e81",
  "blk.31.ffn_gate.weight": "e4766a04bcd8f937ba883c6a144101e546747804ca66c35c97281d6ccb47b566",
  "blk.31.ffn_up.weight": "cc1e666116f7e6b06736db4aa4b81003c583f54f4d9200bfa48842249940e16a",
  "blk.31.attn_k.weight": "fc80b57557687504efae7d24265cb7dc39b8f826bb3d897a11783012dbedc44f",
  "blk.31.attn_output.weight": "215617f50a1f5d9b2250b82f3652b35a9e9aa0ad9ef2b485d73965a14b2b872a",
  "blk.31.attn_q.weight": "274b4f1dfb0bdec28632705677049fb3e327ce6d9e1f3baaad1560439039982f",
  "blk.31.attn_v.weight": "e641b8b926f9dfcbbf6b6da1c02555525ac4b1c306d96f20cfbba7d6662c4e56",
  "blk.31.attn_norm.weight": "b3243c361d4041ddb892ce6862dd5091f57d87357e3c67e177451b85d8baf34d",
  "blk.31.ffn_down.weight": "0a00cd3ecd5e91624a27f9e239b1de425d5ba3cfff82c256a11a4ad434abf3c2",
  "blk.31.ffn_norm.weight": "2a0d67ea2bb1303975712243f07273c92fce83baa11b1cd6d8e42e74ea3c810b",
  "output.weight": "768615f077fb797967844571c58b94d7c399d884d115be3ab4b0154504cae892",
  "output_norm.weight": "7cc5b7ce10e5082000fa00bfa68af8c7c5da218e59e2c41cf2f1499d40ca229e"
 }
--- a/convert/testdata/Meta-Llama-3.1-8B-Instruct.json
+++ b/convert/testdata/Meta-Llama-3.1-8B-Instruct.json
@@ -1,3 +0,0 @@
 {
  "rope_freqs.weight": "80fd5efb2f729381785b293a091a268cfeceb0079167f6ece9b07070e662b222"
 }
--- a/convert/testdata/Mistral-7B-Instruct-v0.2.json
+++ b/convert/testdata/Mistral-7B-Instruct-v0.2.json
@@ -1,313 +0,0 @@
 {
  "general.architecture": "llama",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "llama.block_count": "32",
  "llama.context_length": "32768",
  "llama.embedding_length": "4096",
  "llama.feed_forward_length": "14336",
  "llama.attention.head_count": "32",
  "llama.attention.head_count_kv": "8",
  "llama.attention.layer_norm_rms_epsilon": "1e-05",
  "llama.rope.dimension_count": "128",
  "tokenizer.ggml.model": "llama",
  "tokenizer.ggml.add_bos_token": "true",
  "tokenizer.ggml.add_eos_token": "false",
  "tokenizer.ggml.bos_token_id": "1",
  "tokenizer.ggml.eos_token_id": "2",
  "tokenizer.ggml.unknown_token_id": "0",
  "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
  "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
  "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
  "token_embd.weight": "cde834ccac5e94324b25cb81b02d27312cac0c551b55a7e1d555d90bf6cb6e81",
  "blk.0.attn_k.weight": "458bfdd9715c66e017c2447b1ed3c582963a3111479314e664faad8c914f42be",
  "blk.0.attn_norm.weight": "e1fd60b95f713bae7b7e3ca933c64ae6c9cd1e8d808000204bbfdc19f0ba635b",
  "blk.0.attn_output.weight": "df13b6a157d9d4f96c53b012b3b9bcd207d0c94144cbd22ae3ec13bb07d6c373",
  "blk.0.attn_q.weight": "13b4126b4245bf06c915a93317c42b8174e05053535ec99dc576541e4cec7c25",
  "blk.0.attn_v.weight": "5b1781d3a341214511b27eb4e268674ea3ea829dbdf8ae5a6bb89b3c0b33fafd",
  "blk.0.ffn_down.weight": "49186f5d8148d316b07458841d13a2e66587f4af69b776188a809591ed9c070d",
  "blk.0.ffn_gate.weight": "4397e30ece09136f00f4ff84ff49e5241b765a374deb8c5a12e897e2bf73473e",
  "blk.0.ffn_norm.weight": "43260589aac3850a779bca3f9649f793bbfbe5db538361cb743b3830217f8287",
  "blk.0.ffn_up.weight": "fd7ac918240a07566f6967527ffca58fcf433a30b78fdd6d84b2136d4ebd9987",
  "blk.1.attn_k.weight": "209839566c7d235bdc20565a4766378b6ee8553133a5a3315abe8a85baa80712",
  "blk.1.attn_norm.weight": "58c52986f7c69784ba327cb7f350923420782bee17fa39b1fbd13839d4005357",
  "blk.1.attn_output.weight": "5067cc628449682665dfcf59b16e58fe2a9d2a81cb099f0fcd42f4f8670c6740",
  "blk.1.attn_q.weight": "f410f9f0dd5edc09401af597d02e2a4c727f1502ec3ec3898321617b36c6df6b",
  "blk.1.attn_v.weight": "d40fa49e07c102c0644e130e7909eaa93ed0d54e2edddc0759e721d58a4e4f5e",
  "blk.1.ffn_down.weight": "594b1eff6ed4defbdd819fabbe2d48764984f08878a860bdb808511d5a25b8db",
  "blk.1.ffn_gate.weight": "4cda97541e388a5bb607ce4cc8b3db1da7045830a630e7ba4d17807befcff346",
  "blk.1.ffn_norm.weight": "66c13d7481be65b97aa474735ddc9674f33d512ddda76fa6fb45c7464b09f1ed",
  "blk.1.ffn_up.weight": "1adc6de288ba4cc1237833ca8b4eb81107149842e38bc452e18e5cfe284338a2",
  "blk.2.attn_k.weight": "5420423559f236ab22d85a00849f31e0cc6e9c7dd879de724393d8cd2b379153",
  "blk.2.attn_norm.weight": "495fe1ab40cc52aa054ddd4f0c2d2790f4326c8d103296b1b38f3b1060db2a24",
  "blk.2.attn_output.weight": "ccb83e7085381f558bfd65588c525ad2671feddcbc3887afb4038ad9c7aac348",
  "blk.2.attn_q.weight": "2e8f77478392bc93c2a391f2e0f4a173a952bbab88a7aca099c6ee909726409a",
  "blk.2.attn_v.weight": "d64512590f3b7ebbb9e77c2eb97fbda90b00d45c944f2b174f03a2cb11007567",
  "blk.2.ffn_down.weight": "1de5084a05dcaa6b1bd926e83517dbe9ebe7fde79235fe56018b3028b1aa6397",
  "blk.2.ffn_gate.weight": "cbea526b557f49aad8c976973cf367fcd12175b900f551984f498b9e07e4b7fd",
  "blk.2.ffn_norm.weight": "530aa49b10c7eae08899d143409240deb95dae4e1d5bf78cea3b26393cff3ba1",
  "blk.2.ffn_up.weight": "13a5fc19b96b4dcc1e9bd01998c8272ebe52034c1933ed123a506b711fae9a5c",
  "blk.3.attn_k.weight": "1913b63a73305941d8cdc472e7f101c633d3357a78602eac0a4b49a744261075",
  "blk.3.attn_norm.weight": "9c11bed5ab41f4adbfdae4ead65b525c8f19443e656a8c61ba412a4e1ad1193b",
  "blk.3.attn_output.weight": "bb0b42c1d34779c5943272ed71f1dbb31ad8edd75f8bcd5c868f88505ac3a610",
  "blk.3.attn_q.weight": "3461a1fe4e49f5319ea047cae98ccdb46528a3ec23831183fe87610b48c94948",
  "blk.3.attn_v.weight": "82aa30be6a61526a41fb79bb28a2617416f5909f0477aa9e95e16be9370fcb38",
  "blk.3.ffn_down.weight": "68521011ae03f5e3b0966127111afa8ee9f2eaeeef8d3a0b86b633e0332e9fbf",
  "blk.3.ffn_gate.weight": "1e89e26338fd364bb679695968c65106382f15ad55c95cbb5ec9bdfeb766f432",
  "blk.3.ffn_norm.weight": "c81932529a5a8c417c27b888dbe95fff8b447c2ea5f6f560444ec5d50b93832c",
  "blk.3.ffn_up.weight": "305021735afd8669afefd713f56137248d5e817e60471a112ad06b7fa07ffe88",
  "blk.4.attn_k.weight": "cc26ba5c5c28082a79e6abfe61186029e80b145252ca6a7924c437f0bcf2d51b",
  "blk.4.attn_norm.weight": "302d251fdcc91f7468cf33f80b49484251d8917d7018ad264ab3a85c8ecf9ddd",
  "blk.4.attn_output.weight": "a012f5bee3520cd4ce51f0076c132ebc3653309f304032ad051aa308f55f36de",
  "blk.4.attn_q.weight": "3c8d607e447f5ef21e73af71e3c0d32fae16f91f31faae34ff06912cf9cb68fa",
  "blk.4.attn_v.weight": "49f6c81a634ce46d71c2350206ecbd231b1732af96e4e4e67693c41a07e007d8",
  "blk.4.ffn_down.weight": "e89504f311a4a34dc819a67b761022f14d71c43df3ead4f892c87aaa8e9f0adf",
  "blk.4.ffn_gate.weight": "18b22f079a2fbaefe3572eec61fdcd996fd747724e2f0ff4f08cfcb43eb7bfb6",
  "blk.4.ffn_norm.weight": "22415a492c168a0878912b05c854a631228b01c3ea8842e1d75989ec46c18a65",
  "blk.4.ffn_up.weight": "f57379eae2874d8853f14ddf0f0fcc4ff1338574d5ed5d7e88331d5fb84f5642",
  "blk.5.attn_k.weight": "d627af853c40bddf9762ce3988008c1ff17f2686fa8f73a0b5da38010147c316",
  "blk.5.attn_norm.weight": "9ce01092c7f7f1c3ef72d6b794da12d77aa1f6a24fb96ba1b9bd5a0bcc3e2443",
  "blk.5.attn_output.weight": "0388da8064c4b6b795ce2d8079e8a36535e82b2c9cf794e38ce8ae460aae726d",
  "blk.5.attn_q.weight": "039b7ce1c909761fdf475c06cf14cabe5a90199282c89e4dcf460e95a4b6275d",
  "blk.5.attn_v.weight": "c47bfd8d2496bdb6e00e03b903e15fd0ee806a515094ec257e43cc433147ab7e",
  "blk.5.ffn_down.weight": "1d62e6708974bae318cbf00a8bf621d9ba0537e549ce4710a536520a8d14168e",
  "blk.5.ffn_gate.weight": "8b42b1b11c92db19985094cbb50434e3a7c9cfea71ee6f21ea79eae7c49284a5",
  "blk.5.ffn_norm.weight": "e0bc520f1505e687ec391d632a381d38d8ebcdec19f614a11a2000ab573e8b7b",
  "blk.5.ffn_up.weight": "8cdcd17d2ea89bb9ab902dbc6bf3f827fa4ee029c6bf19eecbdefd146d8b6f2f",
  "blk.6.attn_k.weight": "5dc6bcff89794d1756bf57ec665b58622d9352130d31082a6c66e1a079f99932",
  "blk.6.attn_norm.weight": "13b26008abe0f119b5104b9d78ebd5e797d3cdd68122b93d73a3b4831a54d085",
  "blk.6.attn_output.weight": "f5a49917ea70c3fb311ccfffbfafa63ab18416a5d55e5429b70ce8bfba57c075",
  "blk.6.attn_q.weight": "d9c2f652c87dbd09ec3822e12876648fa32e86553ac25afab723b1cd9f8cef90",
  "blk.6.attn_v.weight": "5ecc5fe67609a35151011cb526f45c56fc0a999079ae0ff37c755ca03c68c555",
  "blk.6.ffn_down.weight": "0ec125ae0ecb2d9277fdb1b04f17efee94e37d0ae37311057c212ca2db3fe6d1",
  "blk.6.ffn_gate.weight": "fa4d6d38355ee8aa3b80b476d65ae7e343c9b7770d7b097fc848ee8a6e091d1f",
  "blk.6.ffn_norm.weight": "30e8f7defc627532e1739dc76d31223d45767391a431f925b63dabe334b0f392",
  "blk.6.ffn_up.weight": "6b97cc32b290fa9087806b5d65aa6dc1760737730c8c71394cc4f30c2157f9ab",
  "blk.7.attn_k.weight": "0231cb127cb7c3714cd72b8f39343891d7715a9bab2237ade9e7bc5f4ed2e68a",
  "blk.7.attn_norm.weight": "7c3187f07eead7d219d98ab2daf87905e88d5f1ace109b6f5fa55dce3914981f",
  "blk.7.attn_output.weight": "2f30ad972c284ae7c8eb0482053433495ebe8fe9c5ee2c28b4bc4ed1f33050fe",
  "blk.7.attn_q.weight": "3a2b4b8d61cc9956d304fa9f82a9e65b4bb9fda2196670b16df7e0d8c43eff2c",
  "blk.7.attn_v.weight": "d2aab97d0dcf0f61dd2f32848f7a8a99c423a4948a660a660a03a546972b8db8",
  "blk.7.ffn_down.weight": "2270d520468c5549cd30023ff9c452a277058310104c4239a616373fc5a94387",
  "blk.7.ffn_gate.weight": "4134a3ef71b3eac8f76b6f1a2e58625b3bae48081f175994bc3ed7d8b0d4f2d0",
  "blk.7.ffn_norm.weight": "42df4abd4b8769b16f3930068f96960af1b061f1aeb7505384f272233b2badff",
  "blk.7.ffn_up.weight": "c920549054ec16ff8c73a72f5d837cf4e11885e44db57c1c1c584c18fbd7a9a5",
  "blk.8.attn_k.weight": "01c609bd3bf31ce65688f1f640ee413740e821330134d4ed1877a3065d1527d5",
  "blk.8.attn_norm.weight": "48857411f769b00290f4e4f2e593e092781fdc2503f80c1e3eeda1b85a20f74d",
  "blk.8.attn_output.weight": "90fb273f8df83744554bd59236515c16c5a5a698ca3fbedc17cc89ddcee354ff",
  "blk.8.attn_q.weight": "ade617ac4653c7f00593dbb51837a468afef20a14eaab3780fb96ac3d6714369",
  "blk.8.attn_v.weight": "c2c37496494864fee5c527d1fe1f88529d31c73f9cbd02ef9b2e9b23611ea50f",
  "blk.8.ffn_down.weight": "2da58572e9ad79087c03cbb0c23c9ef69f93ec221fd5fe4ed92fb93871d23ffa",
  "blk.8.ffn_gate.weight": "4483294e628edaa4901708e73e92c917bdd93b780fa01aa74aed57166f2bbf0a",
  "blk.8.ffn_norm.weight": "c0cbb7a4f8123b62f0c4652a687f3b394802bc32870dc446eefb709e42043a7f",
  "blk.8.ffn_up.weight": "9eaf8a2060cb9224cd585997cd671866c4051ad885c2c6d9fdc7056c2a5c0d89",
  "blk.9.attn_k.weight": "5dd36c45fbc9c50fd35c36cd75576288506971eac5c5311d4f5c16ef60099645",
  "blk.9.attn_norm.weight": "3c8ca64f2f75ed7c8fc1da010c23be787648139a96ca0ef3ad10be7b14942b8d",
  "blk.9.attn_output.weight": "6277e1f833024f53c409be919ec76d34464a78b278c8f9dbf79e777746e3b995",
  "blk.9.attn_q.weight": "87352b70d9e328c2d51d59090cf5ea5a046529864a890d0bc8986447a0a5c006",
  "blk.9.attn_v.weight": "2efdf01161d7a82a9117cc2d87d37dba5ffefcf730781cb94fcc95130e48ff9e",
  "blk.9.ffn_down.weight": "e7658a2ca984961c7ace16acb679387bedb1fef656b5330bbbf588db19673a75",
  "blk.9.ffn_gate.weight": "773cd330d4ff5d64be8af00adf2e2722fae4e33fc26bb9d03549f6f4b3b0fe57",
  "blk.9.ffn_norm.weight": "c8b86cd5c43b332f72060b807091c33a258e5dac01358ff4733b916cd34c9c97",
  "blk.9.ffn_up.weight": "d8cc3bcff18bd46124ba2aa7caacc71220b44eeef6fccb993b4c6cb53e8f2c3a",
  "blk.10.attn_k.weight": "964bdf3b4e77b915a216f750ff7b0f2eb1dd6bfa071358aef21010b90111044d",
  "blk.10.attn_norm.weight": "59ed411d91d14775764eb514acb0895a75a10cbbfbc1c15d453bc50f8046cb7f",
  "blk.10.attn_output.weight": "4d35a2a44cfe4ac0a83fd3ab0dcf1f5a0bf54cdb3b7be9fc353ed32c8a3eb81c",
  "blk.10.attn_q.weight": "defff5339450dd881ac352f5c459293f39e07b9619ebd10ed632d79a3f310278",
  "blk.10.attn_v.weight": "b9803e8d6a54acea58f662d4c0a5c8ebdf986676de7dfe12d4b288937881ce93",
  "blk.10.ffn_down.weight": "eba856be64e4be20b92fb4639a783454dd92427250759df92a337e39f1971c08",
  "blk.10.ffn_gate.weight": "2d5c509b066584db4de3632b01234e86edcde35409c5ebce18957dc80fe465e3",
  "blk.10.ffn_norm.weight": "ecb9a8679945ff0273856624ce435dd250ffe5a440ea0861a5c84f0e4c44d2c6",
  "blk.10.ffn_up.weight": "e76ec7e993f399af02958778c643aa78368e3067846714165eb5aba9d5f547f5",
  "blk.11.attn_k.weight": "29c6d1f34bd3ba2f0904e57b32a5bf8dcb2834d439159a33edf234ce0b775677",
  "blk.11.attn_norm.weight": "b5817b275149cd2abe18a6a10e19854605fc58fd364666744362ceee8cfe49f4",
  "blk.11.attn_output.weight": "1e05653220e237cbe0cc770033e183c9a0eed5680510997409b16186c6691950",
  "blk.11.attn_q.weight": "03db725ae669151e4d536e50285b3b047ad097f52475df208ed3e790e31a44be",
  "blk.11.attn_v.weight": "27cdf1d4e971326c451a4615a0b79a8c7fe9508f9b76c0d52fa01971fc7eb403",
  "blk.11.ffn_down.weight": "176938cd7c2966094f614cace8ba568b10532e45a0d438f80eccd19b6c2a7f87",
  "blk.11.ffn_gate.weight": "9782339915dd6fa70013628a01524ee1d01ad8beab04068da7ac6a5ee7603a60",
  "blk.11.ffn_norm.weight": "8245f6391e3be97811c0ff27f0d8f484ecc82a468a837c893f059745bfcd95eb",
  "blk.11.ffn_up.weight": "15616ddde096d0d25e906375c548b6de4bd5576d1f6b68eefdc29f14e183af42",
  "blk.12.attn_k.weight": "66dd21604993edd1b1fe547bcaa06f5bb7e31c9204902d147a227e4badf7feec",
  "blk.12.attn_norm.weight": "23a69f85dd8a0904b9839cc5d0afcda299b74e82ae2642106224a1c820f2b761",
  "blk.12.attn_output.weight": "4a98d132e376beb274a39d4ea9b6a1b870ad5c66625439d7ff6f45c229c3ca04",
  "blk.12.attn_q.weight": "1c6c309d63afcfde32fe37257e300a78e25d01117e33490801107c0e75d1ea66",
  "blk.12.attn_v.weight": "723d9e4ebe4e2b1974afa01d8f512b52933698fa36717dd47b37b07760c50a10",
  "blk.12.ffn_down.weight": "00e0fb09e1f1fbbf3803f1dee373eaae7a93756b6e13063ab77f9927bc6f996a",
  "blk.12.ffn_gate.weight": "89159f7f97aefb1e100107e3ac2d694e1008ad873f79bb953d60c2c1bb22724d",
  "blk.12.ffn_norm.weight": "5f70aebd0e43a39d6373d8658cc670c13aadd7818831d3d84f761d5f688442f0",
  "blk.12.ffn_up.weight": "faec21b446f061eb4dca561a3180712724347b77a71eb312e7afe9be9e89fa04",
  "blk.13.attn_k.weight": "3d440825d19eac3b1753b34d94fee2b3a3cb6636c10b2703ffcf688d3c1eded3",
  "blk.13.attn_norm.weight": "47b575e57e410738ad13fd3c74bb49c06b3d31030910834ece509cd1a5c6d9be",
  "blk.13.attn_output.weight": "05436d8e613f4475741c1798a7c371b53d61b229507fa04fe23c504ba1f0e12a",
  "blk.13.attn_q.weight": "002b5024ce520da41256e3ded5cdc60e5ae07ad9b202cb19d76ab511efd02b1b",
  "blk.13.attn_v.weight": "c1f2d6763587c50312cee0d7140fa2c7ee326f5b172bc99b2d8946e08329cabd",
  "blk.13.ffn_down.weight": "b5c4e0d8a3ff96cd76a135e415b89f02d28c28f7f3c16a36af31ef0ab8773da5",
  "blk.13.ffn_gate.weight": "ae06e9e3d2e1f64c7ad23a4009dc904c2eccd7241f9f91c4974ab2504f116be0",
  "blk.13.ffn_norm.weight": "e44a22321bcbcb4a3c345b504e939e8071370f54a8cd702fabdb40b97e0d7683",
  "blk.13.ffn_up.weight": "7e6f366d538e21ad431264b12c011892d0be9dfe4c4da9f730af677f920641ba",
  "blk.14.attn_k.weight": "95492d6417952ec24b2cab87bceb750fc7e95ac6b1944fc328a3852d980164be",
  "blk.14.attn_norm.weight": "6b7b09e1c51addcdbb160ea59edf032531421c520ec5645fe1ff9ca4180cef54",
  "blk.14.attn_output.weight": "75887474e4d72c218e6ab0f69f1bf3ec3dc414d51b36fc59df00cdb23421bb6a",
  "blk.14.attn_q.weight": "940e33f76e48c21215d19e8a21234c8246d4d084381a7d9806aecb24b071d5bd",
  "blk.14.attn_v.weight": "c58601cf5a9833f80f7f9a5b2656e8eab5eb133211446ebd48f8be15fed4ebb9",
  "blk.14.ffn_down.weight": "f9f886e7f9b2a54d717b08947a25a0a93e8c2a5b8bcd5a907c06817c8ee3ac11",
  "blk.14.ffn_gate.weight": "727ed0ee68594a3f59d704ed3240b6929f083b9c36650fb848d182315737245c",
  "blk.14.ffn_norm.weight": "bd2471008ff1b2bae9aa26bea019393fb2bbc5b9493b8cec3ebd2c280fca24ca",
  "blk.14.ffn_up.weight": "b006446769f51e4f93b503c4727deae897bc1fc7f4fad49f85024b63c4548d38",
  "blk.15.attn_k.weight": "23bb70f9035356624039547a603e46be7d1e4403616eafc2451cc09c5373d522",
  "blk.15.attn_norm.weight": "718cb371ca052eeb3bfac6ac506abb887df125271821fd171797a7f2d8dd6313",
  "blk.15.attn_output.weight": "c76a2695a204b43a8e5acfa5720590b5d449a9ad9e082cbe3e80fab5903ea16a",
  "blk.15.attn_q.weight": "2b3e4037b9e91bdd26d6e8d904cf39f948192dcf09bb6445cb55ca058d4f4626",
  "blk.15.attn_v.weight": "7c15e89b6acafc8619e86aa9d412f5893ab17843ff2cfaf40eea9637b24910c6",
  "blk.15.ffn_down.weight": "e16fd4bdc6d1c1209c6b633454df4992870c8cefb2cb0e8c92a7e489e9fb5d19",
  "blk.15.ffn_gate.weight": "95a46bea366c260337c537fde06b4cbeaeec52484a69c3390bb1d178eb0525c9",
  "blk.15.ffn_norm.weight": "37730293f704da265dc6d1896b3be00c39c0a41dab07f573af39dc30a481d623",
  "blk.15.ffn_up.weight": "ba74a199da2d0875d7410824238c4ffafbda3993568812284a72b8800df91f15",
  "blk.16.attn_k.weight": "f58f79a2a91c9a763adefce0c53a71eb5ce6bd8442f4af554b04b58083bff27e",
  "blk.16.attn_norm.weight": "0c16e41b95e81978e0e0e3b338e2afe2d297426578cacee94de15df74e94eaad",
  "blk.16.attn_output.weight": "ead22fc337514e4add49aee19720008558e52090466866e849671953a1fccba4",
  "blk.16.attn_q.weight": "ef59c4e8fe8918c1add43d7e9c6fb3ef799dd3e1bdd731ec7b6a4a6f97c86048",
  "blk.16.attn_v.weight": "902e6b84c2b64241470b13e6f412f859f66b4b223bcfb9c15d5cb1106b07ef3b",
  "blk.16.ffn_down.weight": "2ad6e9eb4d8372c32a554395d460d17cfb02d6dbcb757cc962b6bfa36db4f5ee",
  "blk.16.ffn_gate.weight": "825b2d50fcce3dbe6a5d8d8a50a95466f83ca4a10343efe67894c20b4628fb15",
  "blk.16.ffn_norm.weight": "3bf6ac90befb0e17e077c8ea9454a8485a30f89f2d761ec7751b60c90aed1af9",
  "blk.16.ffn_up.weight": "9fbdd08739b32411f5ab0252174d386bab19eb0b17884862f760429b7d41d78c",
  "blk.17.attn_k.weight": "4033398718bf3674830ed1b73071ed8482b6dd4ef27f31a6c5fbb998321b6c07",
  "blk.17.attn_norm.weight": "714f2e8ac9592966a0f1c02ee979eee8f84586405b992e8ee9543e840199ffa1",
  "blk.17.attn_output.weight": "b6bbb618597d767b8f535117be68f92911e4a71d4eb4d8b5d943444151445ece",
  "blk.17.attn_q.weight": "b84a0dc00ceb515faa2628125dcec502eed923077b21cfe900a4ff16c2e5f9ed",
  "blk.17.attn_v.weight": "4387c7d6a17da9cc7a6bca8f4a75618b20407d570792056283a8e93b6ec65f18",
  "blk.17.ffn_down.weight": "47db95c6f1e12b399c3eaf9ddba261782dd71173dd163b52af96541cf87b5196",
  "blk.17.ffn_gate.weight": "59abaded0aedfd12f01df81f7a811e84db6a227f51b60abe9a247ca726e87392",
  "blk.17.ffn_norm.weight": "b7e86445be5c7b722e01ddb98d5c7527ca86cb827ce0354f2c269e0f2558751e",
  "blk.17.ffn_up.weight": "8e31c293bac649d2f60da4b3fc4a3acdce1111ec6058d8805eeeb242443011de",
  "blk.18.attn_k.weight": "5ce762ab7b032511c131df81093b587871718c7097f79d8e07d707571f18a47b",
  "blk.18.attn_norm.weight": "1f52cdc7af1f4dc1f0ef6ad1ad02e18cda32133654e57cfa9c72ada9c0b1d995",
  "blk.18.attn_output.weight": "6486957f30bf8a88516e25772c6650f98b13923f490a2865a8752e36439d1cfa",
  "blk.18.attn_q.weight": "93621c8abf69d2ca29c5207180eb628fb2b544d89de6c4a7fb0699be95534899",
  "blk.18.attn_v.weight": "11604083b5a74828ac1d226af015ad5dc0215a1fdca44fa7131c2163c02d8156",
  "blk.18.ffn_down.weight": "8f9997feb94385f106915df810239c9753b31efda2bf14bdf18a9fbbeec8233d",
  "blk.18.ffn_gate.weight": "427c213b3a4e94af703429daf2f65766f70424d8230c123e7e712a18bceb5ecb",
  "blk.18.ffn_norm.weight": "c45d305c4ea6a54013ba112f12dafaade064a32cf01317373464a3618d8ba44a",
  "blk.18.ffn_up.weight": "a2811f2e73ac9eb9cce91a21a454e84e230a155244e2cd73f2c12aad3c9b8cfd",
  "blk.19.attn_k.weight": "b2daed159925eac58c291e2f1e2000beed21002b03c9e1bc7e7a52e22240666c",
  "blk.19.attn_norm.weight": "6307306ede2ab5bffa1bcac3f8b139354678c0376b1d9f5530c1fcb4268cfeb4",
  "blk.19.attn_output.weight": "ebb98218b2a9c84d3fb6baeb02c5df264b7ab80d994d1098ba1cd47aa398effe",
  "blk.19.attn_q.weight": "4f10df2ad09177e7528e9456039b670d07db22940a49417101b725d239c16724",
  "blk.19.attn_v.weight": "30f1efc5114badaeaafa91fa466dc7fa14b1616db433c6f563ab851f7333a5dd",
  "blk.19.ffn_down.weight": "be5ec7fe6b48855cd0015b0e430d1b70c620de87a7ff188c7c1afef546d7b6bd",
  "blk.19.ffn_gate.weight": "10dffea4213881f8a9b583ee0fd370e033756d32255ed15053f794375b9400e9",
  "blk.19.ffn_norm.weight": "e75cd24ade45dca78fdb0cbcaaa2d4a17d83a5a73dcc94ce0ec2d68fbdb2a881",
  "blk.19.ffn_up.weight": "63e81bdb951410ffa81bcfba1b94a679ec9ebae59cd1623ce2651ed5d4c78bfd",
  "blk.20.attn_k.weight": "c2fc5ad39e9bdd45e73c6e54aecc474388d944c4be1ee1921b7fcd035bad02e0",
  "blk.20.attn_norm.weight": "aaa9169171937bdce20c1f057e94e9252f221cabacf1ced12e11b9586f23d308",
  "blk.20.attn_output.weight": "a9f4fb496e4bc053e3f6cf2e72e22d4cd2b545ef6c32f7e782c2ef6ebcc21d4b",
  "blk.20.attn_q.weight": "5a07ac619ed251494170b213921ef3fcc4c2712839da262516d9d5b8ea1ff185",
  "blk.20.attn_v.weight": "d6689473105d241eacb17f09f06000ee237336916cf5ec4f48271c5b41bcb8e7",
  "blk.20.ffn_down.weight": "74be38db51df736f26ede7c6b52ea787e385f181cb66231e2cced4556a25c9b8",
  "blk.20.ffn_gate.weight": "ea91e06dc3d051c0ba0243b5a8bb40edbf254eadfb54fda7247e05cfdd88cbe2",
  "blk.20.ffn_norm.weight": "5fbd357b3d6f44a7a91e8a4fc246b24303891b7957e0f3c32818ae5dc16ddd8d",
  "blk.20.ffn_up.weight": "fe3290333e056af4ed12942ac72aeba97a6b562e2db05e79cd35dd07eab5b101",
  "blk.21.attn_k.weight": "201ec6ee95f06ea5eb80fe86fd07bd016d3ae9ab6abd25d631834414e14a010e",
  "blk.21.attn_norm.weight": "ea8154f93e06485828475a00b98cc397ac84768dd70e06ecc0c075b5712d7276",
  "blk.21.attn_output.weight": "9f8af74d531478fd304723fd8e4e01578db598441b80dc7c960cb801dbbc501e",
  "blk.21.attn_q.weight": "277de9953a8d3cff894ffd06c15ad0ee1407e319df0c1a693d4f45fa9c74ac7f",
  "blk.21.attn_v.weight": "6bfdc16cfb898909b7788ddd39dd04b928f31d6732772195d53c558004638dca",
  "blk.21.ffn_down.weight": "173877146cb94801157796ee9e5eecf3f46acb3b5e797f90b83a3fc22395eb30",
  "blk.21.ffn_gate.weight": "53146713e2ca1be80496024077a028f6b6d749b02e71003c349e113b436f48f4",
  "blk.21.ffn_norm.weight": "b28b97e18ab20a5c553ba422f7d7f6014f5902f1d62a69abd20d9fe19a5f9462",
  "blk.21.ffn_up.weight": "5c39d0ac4d602b8ec8909dade93b2efcd6b6d9d84a19b252d76bb66dcfaab87c",
  "blk.22.attn_k.weight": "01f26272c82917a87a3ccf922fa1d521a952b05de878241b7efe3525b617ac87",
  "blk.22.attn_norm.weight": "5ffc96249d8873b506e9eb7158bdfd07fa1429e53c1951430ca7505d25f11c76",
  "blk.22.attn_output.weight": "9c2201569358f720244b9c9497e4da02585a167b1414c8a506b85ad75ba990d0",
  "blk.22.attn_q.weight": "906036eb4ddf027f6d920f9356a6a2a5e529b96f4e1231a0496d46b4434a5842",
  "blk.22.attn_v.weight": "30ede8b0d166003a4b8a81fc99437f557719fc36e5c4dd510c9f161f36a47e73",
  "blk.22.ffn_down.weight": "d04c164beabab30e1837b843e18852260efccfbb9d96a34ddd816e6fb3ba23c5",
  "blk.22.ffn_gate.weight": "19c889db6b19179f0a62d5981a1506592c65de83760d67afbe00d202202750a8",
  "blk.22.ffn_norm.weight": "4885eff2d851b32dbd306bd632c725857e6d164f0fa8b3d5857e572e6ef98ee9",
  "blk.22.ffn_up.weight": "365594d8db8e95cf87cc33ac23947942dc326110175cc8ec5a07b5c7059089a7",
  "blk.23.attn_k.weight": "badfea1569da0fc6ab817c5727ca3a69b07d9cfd622fb8be5e66678d5b3f7ae2",
  "blk.23.attn_norm.weight": "8968f78a379ac3ca5458b4ed4251e8d9112aca6d6dd1ef6440b4bb0b380375a4",
  "blk.23.attn_output.weight": "93e43393c03956287b1fe31e9735ff1cfe84f4ae56b83dbaebe96275e4e11831",
  "blk.23.attn_q.weight": "aaff73c725a8700ae66bf26ac8869dfe96738eff23a8ff340de2ab53400a5795",
  "blk.23.attn_v.weight": "3a86a8dcf14a746ed1411f5a7e634064bc4dfd6511c24cfeccfb2c9ebb6b4101",
  "blk.23.ffn_down.weight": "d4da6f37bd7ef69bb203f7b0dd59f50bce37432c70627e6cf274ab81548af5cf",
  "blk.23.ffn_gate.weight": "5b6072936c4a693923bb4e3d1473fd45545cb02fc07799aca458ef0449a04061",
  "blk.23.ffn_norm.weight": "cd76e37025f84773180298ddb15e0d4ba9cfc7d832e19c791049daa47c6d9c10",
  "blk.23.ffn_up.weight": "cde43b99b83124a13b2e4753d12674b3a61dfb34c04703007ced3e8e2aee1801",
  "blk.24.attn_k.weight": "457379edc4cce4cbbe107385079019bc922264fdfc7bd1d1ae84343a81460c66",
  "blk.24.attn_norm.weight": "0ce0dfab2edeede5da419fa7833db78e36222cf25c358d08f3ec664310f031fb",
  "blk.24.attn_output.weight": "0cf91c2fd40c204d2fd4b9c85b69281e5ad4ea8442972fcd44b5fc8e835ffdf8",
  "blk.24.attn_q.weight": "87ede30c09eafec6a4e6285674c1bc4637140b168b2da4ed34f36fdb6e176cc9",
  "blk.24.attn_v.weight": "4c0b078b2798ca35d6d2c2258fe499820d2bc88700654ba4016e4b028f563590",
  "blk.24.ffn_down.weight": "cdb8540c32b1ab988f984484928d39f6841f2131c1cebe90ad9456737fccbcaf",
  "blk.24.ffn_gate.weight": "da2e0e913648b5526bd2bbb344038dd067639343aed3b413662b064b0db7556e",
  "blk.24.ffn_norm.weight": "8940bd781c610d75eb2be63cfc8d869a3af05e53c963dc7fd4c6f653df5a80ab",
  "blk.24.ffn_up.weight": "90cbac2a58801abe11ed6c24560aa4acb949f79429f2aa8ff129ac05868bb87d",
  "blk.25.attn_k.weight": "90607131e36998e990ce718ad05cbecd1bcaed010931401ce6baa3b0d93ebce6",
  "blk.25.attn_norm.weight": "fbf679c85656c04a6cf8fedd5412c1ace22960e6c2d47f2d43997827811fbb97",
  "blk.25.attn_output.weight": "08412724ee7a2086514406e6f68fb9f622e10bac25b0c373b294709f4b09bd2b",
  "blk.25.attn_q.weight": "9c1238e98a2747654a0d4371d3e7ea8b979867f609dc42482544f25591e85c7f",
  "blk.25.attn_v.weight": "a57796a535c6cb09581cbafd6a91dc14adc8cca2a2465a7ffd0aec546cd84074",
  "blk.25.ffn_down.weight": "f7e34e8a6391b480da08b52640613ccadce268373934b409759743a1735b74d6",
  "blk.25.ffn_gate.weight": "b8d0b2f4612678b5ce42bd4a683f8024514b75fb5ebf6b22c600811e95582ee4",
  "blk.25.ffn_norm.weight": "cde1fdba2369d315f3c6940a997c471ec891924e642505db580d732763bd7b75",
  "blk.25.ffn_up.weight": "72e700c32ac8b9c47559c2222e45888a480b527ea512075423c5dc01678e2bb3",
  "blk.26.attn_k.weight": "6ac83b3414ae75bf3a9055c32e49d2c40fe611ab21f8444f03d2f465d18122c9",
  "blk.26.attn_norm.weight": "55f9d6dc9d75973dc75136ecb9d991b4398097ac133070873fb96ec76a6f60bc",
  "blk.26.attn_output.weight": "ebc4fcbd15b33263e50ed2ad45740867cce15bc90e1216623babcb1820734509",
  "blk.26.attn_q.weight": "080f057521073e412936fe3fee64fd574c8128fa4a148b879d3e598fe4954581",
  "blk.26.attn_v.weight": "0fa2830d6746487ac91b243716e4302361f891e4e008eddd14abec47c7809d5e",
  "blk.26.ffn_down.weight": "cb2ab8af1653adc57111ada49d2825c6995e338c8208455b92de10e580f60f31",
  "blk.26.ffn_gate.weight": "231ce30966086bce2dc0e0afd34a22a1958cfda7a57c41b3b8e9444c5dfde8a6",
  "blk.26.ffn_norm.weight": "35d959d25d17b00617590f5d5831bf705c385c51e46297a14375a700effca6af",
  "blk.26.ffn_up.weight": "367680c8d332538b467d1ef87cfeb36cc5c6af564c5023c5fb50e728e3438287",
  "blk.27.attn_k.weight": "0bfcb351c6d17aeac5b55a915074fbdf00f11c4bda98babb196ac8804805746b",
  "blk.27.attn_norm.weight": "5d598a88c2e75ba59dd7ba4fee940bdec92d72038f1286536d2dfb71d008a09c",
  "blk.27.attn_output.weight": "23a9da7347336479f6a10ded14cb3f46e06b5bd56dc4b0fbc526c688552ec840",
  "blk.27.attn_q.weight": "b83319dba9055f069208e9c9d66da08bc6874f23e575288fcd81697d1777aa54",
  "blk.27.attn_v.weight": "36ed34ccb2f36fdf16b2c2dd225a98ea6b7b0e376e7791191136ccd7bd7a4add",
  "blk.27.ffn_down.weight": "5488e1d3a58c71b5e9ddda430540b4776b268cfe1457cbc1c2622dedd9e4526e",
  "blk.27.ffn_gate.weight": "4ff48011ee0bac39af704849d9132a2410392c87a509c684f2062f6b76b498fb",
  "blk.27.ffn_norm.weight": "32afe99675983da3de2961d1b5ca41c98970a356823597fe29e91f6e86abf0e8",
  "blk.27.ffn_up.weight": "1eae3088a75629571fdbf6a20f141bc2bb2ed3f5ba2b9fd1d949f80695e442a1",
  "blk.28.attn_k.weight": "c4e80af714962d6f9040d2c09f316f4a1cbc3a2e994e19902d7c653cf3c73dba",
  "blk.28.attn_norm.weight": "c1ecf85dedc1c83d5d402bb7c94fb8b9c11f1a3e5f64e7680f80912d4a560794",
  "blk.28.attn_output.weight": "72ba47c061b21f5ebc5213a455eaf6fc49c8f8e04ff9ce37e6ed4921b629161d",
  "blk.28.attn_q.weight": "c4abc47234307f44b8ca789aa6668e298158fa4b459b2c1e84bd581806591cc1",
  "blk.28.attn_v.weight": "aeba950799d4950e491ad0fcbe30334e39b8975177990a2cb339031c45ac153c",
  "blk.28.ffn_down.weight": "4e84ce382a37b994fb8608df451a60040559e3f4f3241c3b3cb8989a3ed50d83",
  "blk.28.ffn_gate.weight": "04df157acdc8e8534ad60acc2d2a4dd3a7a6610f6382535ec728994fa6f83f83",
  "blk.28.ffn_norm.weight": "4d0386dae2bd1c1a9d0f9730718333e3a486c3bc6a5c5d482193c75d39832c80",
  "blk.28.ffn_up.weight": "fec60bb0a3daf182a14bd8311fe6dd1e3fd020c5fc273e2549cdb1a2d6b79b05",
  "blk.29.attn_k.weight": "b0532a263aa5a4e2a7a80adc83fc5dec974493bd18da7f953e7ebfc3f3a19aae",
  "blk.29.attn_norm.weight": "593fc3b4000c35b7a59dace09ca1756c08be0105b2edd354a0e1c16c82898859",
  "blk.29.attn_output.weight": "315b896f9f0cbacd0ca8937384c3a3a227efa908cb8c3a9125ec00c480e32b9b",
  "blk.29.attn_q.weight": "d482d45386d4ad3394f08e9dff233ee3a70d0427d65c0b8fa05905da7e25ca53",
  "blk.29.attn_v.weight": "cd3b5a6e2852da796902930a6a84bc87fc6a7c7bf51f8fc23758d12a39013b36",
  "blk.29.ffn_down.weight": "5b3dba6f9753bd1b1ebcba65ef5373dd62c38e755c44b7231b95d93d45761f89",
  "blk.29.ffn_gate.weight": "8610d9d2db15c256243ffcca3ffd31786d0ada0af0e7c7aa3fd20524370ab036",
  "blk.29.ffn_norm.weight": "1a2ef2d38b7ac3e51190b9ccb8b6552ba83ab290e523356a7f851ddb35dedca2",
  "blk.29.ffn_up.weight": "a5fdd15811bde16dc27677cf1a4c97daab4c28cb12a9530f1a0e573134fdb69c",
  "blk.30.attn_k.weight": "1efeb0b5f4b45a85cdf47300f892ac77ac1f38000ec3653565d1303d1fb8c743",
  "blk.30.attn_norm.weight": "c73934c182c7fe80838ec1d0b92f50a583f75f7a3d78d822f009b58ad2c80e65",
  "blk.30.attn_output.weight": "3a0fd89de2d274614750345d827a9c886a4f97b343a13cdf680390505df596a3",
  "blk.30.attn_q.weight": "711e113362bdb067db843c66236704eb1cd3fc5f40e3767143e96d510686ef4e",
  "blk.30.attn_v.weight": "82b12a9a74fd3d91b73cc2e841e2b3f0a5197ccd2998afa17020995f880d2267",
  "blk.30.ffn_down.weight": "af9f4b1287c0d824ae22d6e335d19e04a70135b835be7caa2435f1d85e931993",
  "blk.30.ffn_gate.weight": "e2ab3e6f15f5c50fca66c084cb6a57a2b6b82406d65150e82ea0437b93dd9a46",
  "blk.30.ffn_norm.weight": "c1b9c325c83f00e177386a4d7e769945f2995e60950c4a576c0a2c4ab9703d04",
  "blk.30.ffn_up.weight": "9b94a21efd419715d82071b490d3b635cf1e8da080620dcc39e5bde976d7e9a6",
  "blk.31.attn_k.weight": "0db0d82e3ddcc2c06209f5f013e1d72a84a996c40bf00186be485b909cc268e8",
  "blk.31.attn_norm.weight": "2b8b7239471f57140c5cdfe06bd224a4f6326282f99736e44fba4c7b120ac101",
  "blk.31.attn_output.weight": "a310b048840cc3ff2be4b84796340e8e2cdf05ec89d14bd3655c109b2bfa9fcd",
  "blk.31.attn_q.weight": "f45e0cd95645175ea82813455356d171838539bc3f7676d877c698f2af0a0eda",
  "blk.31.attn_v.weight": "8bde008e809112aa7e7c23e9c3099087bcc557313b01306c87efa0a4a30805ba",
  "blk.31.ffn_down.weight": "8266fec7e203fbfad7033120861e44984581ff8b6851d01dfb7b81c5d8fa90ec",
  "blk.31.ffn_gate.weight": "b73bc0aa5baf006d9ef6403104891b8133671b0992398fe038380b67e0d7e2cf",
  "blk.31.ffn_norm.weight": "9c62cc27a7b6017c1df8ad49bff249a8245e8895c6754f402cd44623fda83268",
  "blk.31.ffn_up.weight": "5b970a4694ea3171a0167f6e1636d9f00268bc1c9640430ffc35218494884adb",
  "output.weight": "74fa0ef08c57a30e633e7117b1e9c805f833e2e5e21434bc79ddf9c92c6d7330",
  "output_norm.weight": "59b8a59fd3fbf39353506116e43e5e76edd0cbf2a2873d869da4cf27a04997c3"
 }
--- a/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
+++ b/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
@@ -1,348 +0,0 @@
 {
  "general.architecture": "llama",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "llama.block_count": "32",
  "llama.context_length": "32768",
  "llama.embedding_length": "4096",
  "llama.feed_forward_length": "14336",
  "llama.rope.dimension_count": "128",
  "llama.rope.freq_base": "1e+06",
  "llama.attention.head_count": "32",
  "llama.attention.head_count_kv": "8",
  "llama.attention.layer_norm_rms_epsilon": "1e-05",
  "llama.expert_count": "8",
  "llama.expert_used_count": "2",
  "tokenizer.ggml.model": "llama",
  "tokenizer.ggml.add_bos_token": "true",
  "tokenizer.ggml.add_eos_token": "false",
  "tokenizer.ggml.bos_token_id": "1",
  "tokenizer.ggml.eos_token_id": "2",
  "tokenizer.ggml.unknown_token_id": "0",
  "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
  "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
  "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
  "token_embd.weight": "1d1d1d39a867d5a4bfb32792a47247d2638c10c95a6259391d02843583505cc4",
  "blk.0.ffn_gate_exps.weight": "2e5cd43ac3f26c44f071926ff6c3f239ecc52a34bc9a5b5906d3d4c1bf2fbbfa",
  "blk.0.ffn_down_exps.weight": "a4dfc7e7c96e7402eb70279601675b956bb7331da8101e63fe5c0a611b6972e5",
  "blk.0.ffn_up_exps.weight": "2d5d87b378b2319c344ed2c642598b6f7cb6beeb582a8ea51abc9ae690d473c3",
  "blk.0.ffn_gate_inp.weight": "a46aaf5aba7401ce6e41f158242b4879d34901661f3ede85496cbd0ce79d6314",
  "blk.0.attn_norm.weight": "3fe37d913bdd2b65076bcdd6efe64a37b0b03cacbb1b80b9f7089068aa35f38c",
  "blk.0.ffn_norm.weight": "5e14308a3c894734eb204c8f558bdc817e94bbd5b4e9cb4094e91ba388c8f7f2",
  "blk.0.attn_k.weight": "73d943dcac0911e87bd771f4aa1c901e1bfe1aed293af06e1a67812159859f67",
  "blk.0.attn_output.weight": "4c5f754c855e262e8d4c94c6fbbb57af06399dc0e170d7d99a1a17fc9aab9227",
  "blk.0.attn_q.weight": "d6fd7403c873d49c05f6f03208f30d99ad34cb3b71c9990c47334d502a8e4c7b",
  "blk.0.attn_v.weight": "cf17cf64b2d683bd9de6cebaf60e5c264df6fdc38fe719dde9d54c80334f6366",
  "blk.1.ffn_gate_inp.weight": "0d524de81cd915816b4e714bf595ad6946a9130b3de731cd89428b2781230809",
  "blk.1.attn_k.weight": "2ea47f412992b374c70674730fe84700e0c8cce177086ce9b6635e42408964bd",
  "blk.1.attn_output.weight": "b4b2520794d54113e86c8ff678eacfc62e35be4395a594a6c8c22b4383ebcc0c",
  "blk.1.attn_q.weight": "5db930c98c4f91f6eab57eb974c72210b158e366d23d6d2890b2759c053bee33",
  "blk.1.attn_v.weight": "079bdde09668394bf7af9f8bc175017b4f48f0ab64e6dd855a4d7561d1693c0f",
  "blk.1.ffn_gate_exps.weight": "146a62de19f9ab093deb101f9640534ffc3dc40d69f508be12fc0475d01b0c7a",
  "blk.1.ffn_down_exps.weight": "949da94a3c0f375160672a979e85f7def284264b10d48d038238aad5f5ece793",
  "blk.1.ffn_up_exps.weight": "7016a3f467d9e3f2f4b4019579ed86b757469cd367f2b225483305376b4bb3c1",
  "blk.1.attn_norm.weight": "1614d1e6ed537737275eb888666c7bac533f4eefbe73dec92b591045ca9e1afd",
  "blk.1.ffn_norm.weight": "405a455fa7d1ec36894652ceb554bbcb09a07fd6405f42741e66dc4a4665c19c",
  "blk.2.ffn_gate_exps.weight": "90d5003fc7421f44220c0842d43128955e91488f6f785fe570b62d81b719e964",
  "blk.2.ffn_down_exps.weight": "ecdc2b5a8b504ef0a7833acff47d69b0c1fa9c22126de1bb120ff5e48c3d6e2c",
  "blk.2.ffn_up_exps.weight": "2cbd9485a32460d315eb50a2f3b00863fd77245bfe885b7565efac1cdb1f191e",
  "blk.2.ffn_gate_inp.weight": "0d0a17a1a2c7a61f2cca49ecbb479154dc93a870873257bc4f225e7607f2e2c2",
  "blk.2.attn_norm.weight": "b2e4c5a977f87a6f880896bd73596234c9b83622fa0d7add5892501e3155913c",
  "blk.2.ffn_norm.weight": "0ab875b4280afa922376cfc7b9aa3f7071c9432ea1254091ce7de3749df0e8e6",
  "blk.2.attn_k.weight": "bb884af51fb51550acfef54ccf1b58ce8284e587806e6a2f88c8265e1ad05a5e",
  "blk.2.attn_output.weight": "0f03099ba1ef342ea61af9cd71d028123bbd8b1dd7d7fd9b509aef77815427d9",
  "blk.2.attn_q.weight": "8fad0d29eb4c9d24e564774ee3316b9eb7a4c4985e4567111d2c836c830f6cf3",
  "blk.2.attn_v.weight": "fe04c847ff677632401a94e7b6b6fdca60391ab21cb23bd791533115de6303a1",
  "blk.3.ffn_gate_inp.weight": "29e3aaa724590c070e614af8288939603d2641b0ef11e8c0f476bebb2776673c",
  "blk.3.attn_k.weight": "231cc5631def10f7f292d8862d6125ff555164cd70480ac76362149fad204497",
  "blk.3.attn_output.weight": "86467a605c62852e05fda1a7ef43150df2cf715fe59785dbcba09f1c27cfa086",
  "blk.3.attn_q.weight": "901822402453922225c2d6ac79616691d48217635d5ff7338daa971d5ddee210",
  "blk.3.attn_v.weight": "27030784f44375720df2f090933645a31a022d3fb3b14573e5ca0b78f44070c1",
  "blk.3.ffn_gate_exps.weight": "231ba59cc0b988d125d77bf627aa3f04636684870af88f081f3944b48a160d86",
  "blk.3.ffn_down_exps.weight": "530c3ab44ae4d66e8afa4d10c153ba5dfcdfb7321989a988e62e9d12e7234625",
  "blk.3.ffn_up_exps.weight": "b85c2d4d9d11332e702b3c0a6610d4f525f9a93e5d12f5c7c55c592c40755e75",
  "blk.3.attn_norm.weight": "05dbb6d88cfa6b199f9d705ccbda97c0ef13f9ec875c595398a1a42d009a4555",
  "blk.3.ffn_norm.weight": "6880b1c27d46969ce36fac049c05dc8b89e4bb47dc89df357e32df7e18fc512e",
  "blk.4.ffn_gate_exps.weight": "a883b4f225b760c5a2f6605dc5e2167ab85bb398c70bf64ceb539fcbd6128dcd",
  "blk.4.ffn_down_exps.weight": "d291bb656aae77947d4b525e2819bf4112afece53ff31de9dab999af1f65f9c4",
  "blk.4.ffn_up_exps.weight": "38592afb8ba3dcfb26970f906174f7d3fa62da44fa4be4fc6912a19030ea9164",
  "blk.4.ffn_gate_inp.weight": "1596cb74e8fd6c3080b937b06468bb397b0dbb661e6d180a6bcbdc43e8bfd0c6",
  "blk.4.attn_norm.weight": "f90c83c5ff4366281d283384efc941620542b9cfdea160d678dc54a75e33f758",
  "blk.4.ffn_norm.weight": "d28d8c49d1746b7cc085562d1074905fd14023844de823dc4fb22202bb280790",
  "blk.4.attn_k.weight": "792bbf412cc357140fdaba543e547a9b2f7582919e307bbd9a80c7d6d8f5f1f9",
  "blk.4.attn_output.weight": "d98e4a062d2631d9c315f1990d5f6ca9a88e7e0e46387f611ccb0353f876aa12",
  "blk.4.attn_q.weight": "1a11a55a91d9f748a72176ff6b1c174844df406e00d1b66b9aa64dc6ee4bcd1d",
  "blk.4.attn_v.weight": "04cb3c02b12a6313c7ac7044513441083d534fb4c5a3f63bbaa58f7edbd2fadb",
  "blk.5.ffn_gate_inp.weight": "cbd5cdf015d33a2da6703eb74c22fcb97581fb9175435173b6dc4f9e8364320d",
  "blk.5.attn_k.weight": "4fdf3405e4d657403f5647b51233521310ee984b4b81bbcd901cb3e6ab76b7ff",
  "blk.5.attn_output.weight": "4a25662c46979a29600ed77e1907cf81fb16ef30e724c155444e54ccb76af481",
  "blk.5.attn_q.weight": "e2acb30e30b97300039bb20ad0878f05159d5657fa811748a51d5b6fb35d631e",
  "blk.5.attn_v.weight": "306504b6a26aa123c63dbbed3f4ced0ed2ee8fb6a30bf0093539b817539f5ece",
  "blk.5.ffn_gate_exps.weight": "7e34df9b9944dbeea5e8565786d3aa6937314a4b87acd4d0874687877c5a39fd",
  "blk.5.ffn_down_exps.weight": "c4b7a57a42b5ac0a8ae27dcd5cb2646d7a7cc7123126d44a56ab128e85f60b13",
  "blk.5.ffn_up_exps.weight": "09d47593b6dd6c664a9155bff02fc2eb7ac4a70219a88162d05c802a01d3c6ba",
  "blk.5.attn_norm.weight": "58804a036d6ac4c1fe357b8b6a97a5c37cae1c2f06ee0086c041d449c1c6ef6a",
  "blk.5.ffn_norm.weight": "d872dee6789f0826211aa46ca9d0869e3e96bcace9e77d6559a7b6f3e524f3ca",
  "blk.6.ffn_gate_inp.weight": "fb1eae732e974d6c1d020a5b4ef98c5f33016f984701bcea656f999a99daad66",
  "blk.6.attn_k.weight": "55e9c59c5051ab5519b3a7962e1b5fa96a3c0251cb6200dc2f177885ad2de470",
  "blk.6.attn_output.weight": "f3c834a8d0027370350e2b6294d95434d31432e57be6313b013c15a56303d61c",
  "blk.6.attn_q.weight": "efaefe5f11c2140dc7cb532b0832c2a0b363a165cbda21f00fadae77efca377b",
  "blk.6.attn_v.weight": "900bd734d75616d846a90a121c97e081c956a3d1ab012f66dd0bc62c43e1ec3c",
  "blk.6.ffn_gate_exps.weight": "312a99661b1468fcaed2474621116f1681432755e973f3ee79d01912974fd424",
  "blk.6.ffn_down_exps.weight": "ac9cd7db67a2ef0d2b5def86873673d05e48d49d147dd944469dbb8e2d4c46f6",
  "blk.6.ffn_up_exps.weight": "57613e7e09579400a1a09fee4445acfbfe83f2f327fdf317877787d96ada6b84",
  "blk.6.attn_norm.weight": "0e8801e09885c633bc01a9a5b85d4e878d30158a4eb41a937dc5b760ebd044cb",
  "blk.6.ffn_norm.weight": "b8c58062ac93072f878446b0e7f958c737aa47fb769fc3a8f593133d12db2dd1",
  "blk.7.ffn_gate_exps.weight": "1ef611732ff13edfa8d30981ed9dac00c15ceba9fc012ed0b199e9280a849948",
  "blk.7.ffn_down_exps.weight": "856c6811945c7b0fa461ca17811cfa43436b4cdf5326bad23cbc30883486d7cc",
  "blk.7.ffn_up_exps.weight": "6725e3e33994302ee13fa5ec163631ce2dcaa08aadde8fc166c2265d4561c5c5",
  "blk.7.ffn_gate_inp.weight": "36b49d7f80c1003dc392b2c1b9960cd49889dd69e77b26b9e4b13d01f3d0a32a",
  "blk.7.attn_norm.weight": "7a0ec49acc5e20ee71c6f80ca02f4f1e564c485e0ae0621309e7c2eb0c616cf0",
  "blk.7.ffn_norm.weight": "eeae035c39ab6e64bc06a4baa1bf6e50d4c8b8797cb0ad8abd48be86974802c0",
  "blk.7.attn_k.weight": "e8f78c1def01a7a38d2d9bf7becb17755e28fefe4927856f7890fbee52840187",
  "blk.7.attn_output.weight": "5367f05ac3bb49ef8745ba5902e1bdd4442415a3ebff2c7e1a3918d7be6fe948",
  "blk.7.attn_q.weight": "37c95fc5acc55a4f6e5f02cab9be60e4fe54c08b65f98f4455741b4aa542ff4e",
  "blk.7.attn_v.weight": "c89f1343486ba55814233511e94090f7365662a8a4214aa4c278cdadc79196c2",
  "blk.8.ffn_gate_inp.weight": "4e239afe8c7afb8de3a005757c887cf14b1622ca2d224227591cb0e5301f4c17",
  "blk.8.attn_k.weight": "2ad0229f30fdcc1e85ce64e00d8f75902238294844a81d5af43e14ba75c02983",
  "blk.8.attn_output.weight": "2e44a4722acb3b521b81d0b910f8ca2f6c286d874a92ddd02150566454061699",
  "blk.8.attn_q.weight": "1cd2b09cb2f43e08de776b5f7eac197a5a6d4ffdfd52b21baa36319450147bd0",
  "blk.8.attn_v.weight": "5a22c57ebfd33ac500cbcfd321d5b5b1783f8728801db6f3f8bed51c7183e4db",
  "blk.8.ffn_gate_exps.weight": "91063fe56cb4f3ff3b41052bb5046fcf8ef61516a603ee90aab893a9d68c15a7",
  "blk.8.ffn_down_exps.weight": "d4c3abc8f1d1b462f67f70bd8f404b3fcf45dceeaa8527fa120527254c383c90",
  "blk.8.ffn_up_exps.weight": "76a1a1f08ec577716a2e7027b45293e9205751126424f1bebe1de89c78f087d5",
  "blk.8.attn_norm.weight": "f980d774da39eb76c52358afac3e38cb4c81cb323deaabbe5c41822e3f17a98e",
  "blk.8.ffn_norm.weight": "1c937658cf90f1a85db9a5f26e077730fdd4b694607dbeeb825c5fb2bc407e0b",
  "blk.9.ffn_gate_exps.weight": "a2532471ecb7896d5c78e5a34e10cfaf4125265e1595166c8d0d0dfbe2a3187f",
  "blk.9.ffn_down_exps.weight": "b47921a28412d48fee450b8b9d97cee42344a2e69f06d407fd9523d7adf13333",
  "blk.9.ffn_up_exps.weight": "7c461bd1b2a73b439cff6a10d94afa01e8b06f7e6f09d9a6f28e3876aef48bce",
  "blk.9.ffn_gate_inp.weight": "1648dfb08b5c06d7953a5a97ecb764995fae9487fb729a1c867023b2538149d0",
  "blk.9.attn_norm.weight": "8635db0f299882a63b7cfcd1d4259c9e53fab22c31d3d054de36b1001380b31b",
  "blk.9.ffn_norm.weight": "f9309aa323062d174c463613afef9b0a33501b510bfaa58a8e0e866d12ffef3c",
  "blk.9.attn_k.weight": "dfe62030441e947a588512d18d9c6e4ed72c2f71c227d622c095e4263b23dadf",
  "blk.9.attn_output.weight": "1977beb75c6349c50ba7dd3865d7c0a9c5c5ddc854413147b0eec98ac4fda351",
  "blk.9.attn_q.weight": "eb132596719605cd6bd1782487f121994629e115190edd69240b12af66e734f5",
  "blk.9.attn_v.weight": "9e708f15d332d7c5187b0693b1a977eb30a2fa10bf7df48ed9d7537c0aa6ed99",
  "blk.10.ffn_gate_inp.weight": "97503a5d166c1925f9b65c0eed980753d411714d66896f3d0fad5286c7aba702",
  "blk.10.attn_k.weight": "1ebdd222336bd25b48df1b138cdbe09021c4a5562ea7cb78cadd1255d2be3a39",
  "blk.10.attn_output.weight": "5e98faa38e9d514b9057e1c8342c509cbe1083defd518e506f6bad89117d1f5a",
  "blk.10.attn_q.weight": "3323a26c87d936d1dd87c577d0b763459fced726679612c874b3de5fc6d969c5",
  "blk.10.attn_v.weight": "d5fa73cb56aca388e205f44455e4b4f676fdc12ed7fac4542fbb3b41ecea59ad",
  "blk.10.ffn_gate_exps.weight": "225021b53782800906cd13b70be3a4161e8b300b97f984a959ccad6a6e8adcbd",
  "blk.10.ffn_down_exps.weight": "f08eb91526bd22f5fd0402fe925d6141cdbb308a1ced0330858d0c85c71f5ef3",
  "blk.10.ffn_up_exps.weight": "a9f688350c3b53eaada5103b5848bd9a3d7d6b327a70fa16c24bf28ece933eac",
  "blk.10.attn_norm.weight": "5ba426c9dfc79805015ccd76cd1068b0ad3bb7a8453e14bb1d35486f122d8f95",
  "blk.10.ffn_norm.weight": "98891d6acbc3986b2581b7a3af9f5946a392d9188972c6a8b15d4e745a4f2482",
  "blk.11.ffn_gate_inp.weight": "b2365a60566e7dace892e1cb0e62eb73ce387352601723e847052b34874feaa6",
  "blk.11.attn_k.weight": "0efbc1d1430505543ff71532a4fcda821aeac616ef6c1dca40e00d4f2ff70bea",
  "blk.11.attn_output.weight": "3d5bd4d9a41236f30d4293edb9ae27beaa113ffb31b4fbfadff3a4c370dfd3e6",
  "blk.11.attn_q.weight": "aa11e9db14dd9c77951511443077c2a1a78070753d7bd3d9811038473f69e325",
  "blk.11.attn_v.weight": "5adc567f377aa11d1763d35f50e53fb2896a8b03b623ac36acc45efa2486d512",
  "blk.11.ffn_gate_exps.weight": "71d07d982aabfab9eed3c733d49c20f023bf475368fc71db5084d91beadc4b47",
  "blk.11.ffn_down_exps.weight": "9a06e61461e48b3925a9f7d9cca634d048c8b62163d7bc5c43e35899f959319e",
  "blk.11.ffn_up_exps.weight": "bc05494d0dcec61021b3ac0c5bc1bf502736cadf48224e213bc139d562699a89",
  "blk.11.attn_norm.weight": "a5758a10bdd0404ae1470e8e9db903985d4d07f60553c5001a5e7b660d4f7ada",
  "blk.11.ffn_norm.weight": "814ae037563aad3771787316bec4806c95bf6f5991dd6474b4b1e5cc13dc18ee",
  "blk.12.ffn_gate_exps.weight": "3a68b831ba1606fb9ef6dffed4732032447ecef23ea563ff4e79317586c7eb49",
  "blk.12.ffn_down_exps.weight": "268b25e13f4b7beab08686e83705a41b21d15251809ee4784526f78a580da829",
  "blk.12.ffn_up_exps.weight": "9105751a5b5b42ca2614d0456f24f779d2e2ac8cdff0f96842aa7ae2b70f341e",
  "blk.12.ffn_gate_inp.weight": "d0de1558cc1d458c5c504f63ddc59785c323df7330474bb0644c346104b40a3a",
  "blk.12.attn_norm.weight": "859a4c8113678e2e202d10299850e0cfb52eb11ea50bcbf4fe3ff39bdd394154",
  "blk.12.ffn_norm.weight": "7fbf4c459c1760218877e9ee3f5ad49e960956a4369bcfe96c143f04ff9ddf97",
  "blk.12.attn_k.weight": "0a7e254fdf3730a57372b6ff421a613eabaea68cdefd64800857941411318374",
  "blk.12.attn_output.weight": "ceb763fc15d88af149d8fb78e82db2b7dab3aeae584af8cf7611a12356a397e5",
  "blk.12.attn_q.weight": "a43402d23c46cb2d3cb3c2a98c81b19d10026b7e6742370fed6b2880b6e049b5",
  "blk.12.attn_v.weight": "3bc24f2c0480ce91ef72993ee8f1cf962f7359e12183424583ffa1246bf3db52",
  "blk.13.ffn_gate_inp.weight": "a6d68c82bfe66d8bab68f980f5f18268a9e2c0cd6b8832ed39010e0de198ae05",
  "blk.13.attn_k.weight": "0166c39546b37dc2e01b2b396ba43e183f797dd04eaa51a6d103d8b58ee4bace",
  "blk.13.attn_output.weight": "2ce5eb198deab9557475a58b69b11e9874b547e05c23f223c6e42fa35ddca069",
  "blk.13.attn_q.weight": "745c1bbdf434284a7fae98f45e821c076dd9c2a2467dba6a9d8cf0041e419dbc",
  "blk.13.attn_v.weight": "9ece68d5ac64d1421ea7aa32e1cff9cc1fecf5175f4c4da858dd31d8633e3337",
  "blk.13.ffn_gate_exps.weight": "ccfdcb4670b131689de12d396a010b5ea737795cf5c15a14a304d720b3c7c899",
  "blk.13.ffn_down_exps.weight": "8b8fb328664764f1aaa5cbdec336d5654e981e965a02ef622bde5f07ea1c164d",
  "blk.13.ffn_up_exps.weight": "d2ace0236c2fb3365fdc85499d676a7f65813c48e5085348b1df1799922766ec",
  "blk.13.attn_norm.weight": "1ed29d7d89ce52d7cb4d57e895ff7115430466e917136c049c385c030ed44e9c",
  "blk.13.ffn_norm.weight": "a194fc542597a4dcfdfaec5e3cba2a2b2b21b21edfc87c39c0d7f7651355bc4d",
  "blk.14.ffn_gate_exps.weight": "a625e3574e5e740e7f8e2f9c40390f2f382c720aab5b10534e298002dd8d1fb9",
  "blk.14.ffn_down_exps.weight": "bc366f015b83c865946afd74c8a884943e0ea2c671314a0b7bb72f21a44d2f78",
  "blk.14.ffn_up_exps.weight": "ee3199bf2086de77b49f57f487676be8ee70e102a2fb5a5ef8ddbbc28a9eff41",
  "blk.14.ffn_gate_inp.weight": "2b437870c850fa2e2044d032bb02908af634356e37466fdae260b933e48ee8b4",
  "blk.14.attn_norm.weight": "cd8344d193a1cbd42bd898e17f4bcb1ca0b2918420fbdafa9249a6f2b7f4ae06",
  "blk.14.ffn_norm.weight": "70eec40374e558fed5b07257283cf36342b6b0129285a00007deb59c32c9f7c8",
  "blk.14.attn_k.weight": "4053bdb507e0543d724b632570bac86b31707696d90a0db44c49b2a082e0d599",
  "blk.14.attn_output.weight": "0182632cb0e06a07241b8293d25d109fbc1862e1e337d435f908e8681e2eb1ab",
  "blk.14.attn_q.weight": "ffc7794a4c1b6f793c842dba969435330a7a80b9212e457b4b2ac33e68b41241",
  "blk.14.attn_v.weight": "6411805292d528e61bbaad8f9aab9dd073529a17946c057fb06864fad9cf3211",
  "blk.15.ffn_gate_inp.weight": "77d0744567c76e6abb67f81ba9c715b2b544841186d5b948309571eff213bafb",
  "blk.15.attn_k.weight": "1f7957954ea4c6521c257b35a360e868ffa02bdb3de91f146d5e06bb4a545c98",
  "blk.15.attn_output.weight": "d7809d36bd8d3342240c46fd87bcc7f9821a222f48d9a95e45ae50460265d3cf",
  "blk.15.attn_q.weight": "25f509313ae4d8401b871904059f472a26f5714e7c791c725de77a1a522c976e",
  "blk.15.attn_v.weight": "96fedf5a591fc0f020e6de10fd72ff12b3ef9cf70cd21dabaa0d3e7b06f54e73",
  "blk.15.ffn_gate_exps.weight": "8f950d976b2fd9a3d213b84123cf114c1377efde9352767fb2ddee89e177c8ef",
  "blk.15.ffn_down_exps.weight": "6fd09d1557bb94b06efbd4f6a1ca4be532a202ba290e9315bc8da3d12a5c4c4a",
  "blk.15.ffn_up_exps.weight": "cbeb59ae7b0266a928dc7e3a6e70a9330b92f9ee1b17ee1ed91022108204a33c",
  "blk.15.attn_norm.weight": "2005330911ac2edc7b6d27aca021c67d30d16eb632e49b1a13f30fdb2717aed0",
  "blk.15.ffn_norm.weight": "0e9198f3b548eb78acc8961f2b3350d238d26cec110933ba753a8cf0035c501c",
  "blk.16.ffn_gate_inp.weight": "a41d1f99d739c8b150c3945b6949763988d0c6a4c5a2b5855592ca1a48ed23d5",
  "blk.16.attn_k.weight": "b624e2ec88c2d3047f60530fb87e72cb4a5e655a9663f6f3e9b09e5ad32cddaa",
  "blk.16.attn_output.weight": "687759ea75e45108526ffc1573d6fdf084728079bfc2dc89b9979e76280f43c4",
  "blk.16.attn_q.weight": "beff3a45c7e9ec82ffc6d3c701126be28654d10aabd747d03441210491fd31b6",
  "blk.16.attn_v.weight": "43a349b13f0b9d040cacecd942bcb168c030fef8c75c987d59a4fce6c14e855b",
  "blk.16.ffn_gate_exps.weight": "793406d6c13d727c82bb7b692ca98d65ca975baee69fc57be5378d77c5a19b62",
  "blk.16.ffn_down_exps.weight": "9bad3dd150d0230404b7f886ac7ff8803225757e813f195cdb26bad245243b4d",
  "blk.16.ffn_up_exps.weight": "7449d663023fea3496475bf0a9c1de7272ad0ce9adcb3265e8e424badaa674dc",
  "blk.16.attn_norm.weight": "a424ce34c195a401df1ce37ac4f2794e8a6720b1ee8acb21428e2b68c65e0125",
  "blk.16.ffn_norm.weight": "405a68bb8e16e1064df2de55ca3cd9ceddda1d9fc0af007a9bd7cad4b2676248",
  "blk.17.ffn_gate_exps.weight": "97c6e5321491ca5dc039ee88da0eb0e78f347372785411809af84b3298cb19dd",
  "blk.17.ffn_down_exps.weight": "1617ac19788a1be19bac69277408761e6bdf5719d63a8c7fea14d41cc27641b5",
  "blk.17.ffn_up_exps.weight": "4ead1c365f112581c10610ea3f63d2a1474311d2503d2060fed4b458ef337f5d",
  "blk.17.ffn_gate_inp.weight": "ed4b3393f2523f2b5e0fc7680a1caa2842e605728a529b5af68a7fa8d7abf940",
  "blk.17.attn_norm.weight": "beac17ef86a7fb2b5840cc72f7a95a5e3d6bd24e7fa698e0b0ebb9bdac45c561",
  "blk.17.ffn_norm.weight": "81cb58ec6d6dc02a0b4ede10adc336dc865fa76f982d4eab0e4a37b40f5b0fac",
  "blk.17.attn_k.weight": "eab569e5ea8c8b05e5a6a209fba031129453c2e28181eee3e736b3b04b36bbec",
  "blk.17.attn_output.weight": "f85b70f01438ce8fe5d10599b113f30bf18dee2bbae0657d3eba295870001db3",
  "blk.17.attn_q.weight": "887ceebfbf6a2b94b43d2df4439ac3a5bbc29311d4b28addc04d525546032047",
  "blk.17.attn_v.weight": "2df9414d65014c06a93da22ba3a668be7b83e2e8008e98d7771f7dfebed98298",
  "blk.18.ffn_gate_inp.weight": "9b07741a0950fc667e5fd25937e33bc22e1f764f80eb4ff3119f005327ae0f6e",
  "blk.18.attn_k.weight": "8649598dbb63938744c39bcda5ce8c31773e29c573be8d4d2c114f5030f8d3e8",
  "blk.18.attn_output.weight": "f8e391adb92622298ca834d5d1eda48b69c3b1c51c5a584ef6c54a725c298d75",
  "blk.18.attn_q.weight": "84bf8708a2eed618f48f69c178ed7dd11fa4c468102376e72e910ebd037d131f",
  "blk.18.attn_v.weight": "31db3cd773f09548c2c1b1eac2718e46364a7810970fe9c433fad9d8de5397eb",
  "blk.18.ffn_gate_exps.weight": "be2a2ba378002f1b61f86c273a69eede9b93786d5ce96b4fee1861f730dca4c4",
  "blk.18.ffn_down_exps.weight": "d35196159e37705db50a5343e3989f7335477f1a4add67ef42ad64a638cd07ae",
  "blk.18.ffn_up_exps.weight": "c6ceedd86e97913a6dcadc838e7abb762d629fb8dd55f15cf02fd9bd66d2ba78",
  "blk.18.attn_norm.weight": "41f0b1ad83d6e3cb9fbe0d27878c2e7ad4a351b9f554a6bc9117c01745cdf6e5",
  "blk.18.ffn_norm.weight": "96646204bd0d82f25dc77faba4dbd86b1332e449313e6684e00122da8be99057",
  "blk.19.ffn_gate_exps.weight": "c6eb7f61e7938bda0492dbc05e51e8f631c99224fe18e99861fc4fc53ba9e9ff",
  "blk.19.ffn_down_exps.weight": "4384803da3a3a3d44120d7dd192fe2c9bbd9a1a0cb492dbec1fdd7565230f1e8",
  "blk.19.ffn_up_exps.weight": "22d73de2fbb8bb0f1bd2caf17fad8a355c47d914143f7f6e6d0128f66f074a60",
  "blk.19.ffn_gate_inp.weight": "9a0cc4a2301a5634022fbce41189021bf0d1a961792d2d9330fd35556d18e5bd",
  "blk.19.attn_norm.weight": "c5cc56ec5df9a1f7d5ad71fbda49f1433132e58895d45cb44c73420bd61ebd6b",
  "blk.19.ffn_norm.weight": "77e17de741742ef2482fc7872fd423c8e3c1454dc4d2be89ee939084b6d78bc0",
  "blk.19.attn_k.weight": "a92ea36ce2e3569656306aeefb835ccd5d1b03b33a86e0d3d030644cc923b813",
  "blk.19.attn_output.weight": "5e2a912b37855f84ea964907a1a86d609cbdd79efa0c93c3e8e2fc07caf7c226",
  "blk.19.attn_q.weight": "4ef3a5913292ac3c1a6fd3e9e53d011021f2b41d0276cf849706d1ca925cf7a7",
  "blk.19.attn_v.weight": "42981b75b68ae852cee638b5433605c147da4392aaa6d7a06e756115b0171f39",
  "blk.20.ffn_gate_inp.weight": "71381b9879a7c80b9f7b475abc0aa31b8cd71ccc00856ebe89764a2acb9df2dc",
  "blk.20.attn_k.weight": "1928b7ebc054eb3967929ed6fb446314d5352f4aaf8b475ce55c6345019f2ea4",
  "blk.20.attn_output.weight": "6071ecd9ca91af0d2ba93fef4a1a56f3b243dd70f862a21a2d164d56f386043b",
  "blk.20.attn_q.weight": "002e95042a40f36ceed5829e3d0c8072e5f5e4ee86a089e2902b2348fed24dd5",
  "blk.20.attn_v.weight": "42f509cdb1c0e298f89f896e349be86952c5168e49b3f83bb17badbcb7596d57",
  "blk.20.ffn_gate_exps.weight": "a684a3ffe4b0a57c819a5fa9cb3521de223f392732927271e97ce925b6e33765",
  "blk.20.ffn_down_exps.weight": "e3081a7bc7ba750d8a4886bc8ca4f231b55db4ca082b54b4106c7531964725cb",
  "blk.20.ffn_up_exps.weight": "fad0fd5eca36ab154788da28be8ec25bb5d6db06c9d133db89e96df358a2f6a2",
  "blk.20.attn_norm.weight": "c3e3f2429715ae95e884ef1246b0b461b23c5cc0ed08beecf70a14cddd184820",
  "blk.20.ffn_norm.weight": "ff31f609dda65ca496b0584fabea6550e42edd05ebf229812aa6b7bb5ede15e6",
  "blk.21.ffn_gate_exps.weight": "366f09ef0ecfb86808eb3296cc9abdb957951d27f6533c03f1422b54061da660",
  "blk.21.ffn_down_exps.weight": "3fc495947d27fcca7fc0893c8a96e5d48ba27b2c8c58f8fcfb8dcfcd5539741c",
  "blk.21.ffn_up_exps.weight": "6713ed51410bcc8283cbb001c4ad784098f25701e8021f4fa4f411e186859c4a",
  "blk.21.ffn_gate_inp.weight": "6d4c92c01ec801647134d907bf1108878156df266a6107abc10526332b328b93",
  "blk.21.attn_norm.weight": "27605719ae2df24f4f2e85a730927cab20367631612cb501631f6bbf38eb1209",
  "blk.21.ffn_norm.weight": "ca80ee8177db185b15a4a378c1cb6f7143c76546a7f1726bda23f329323d4ffa",
  "blk.21.attn_k.weight": "9e49f743d4a5bda9b4bd9c40c2ca37cdae5aec7e54cb193897ac8b4945ada14d",
  "blk.21.attn_output.weight": "ab923540879753feaed152f5950f69cdd83d8f2413ca873f5f038b63ab0aea12",
  "blk.21.attn_q.weight": "62617fc3f1c9d2aa672a4d91a121c7a91b92d145b65e75f0b06b4bb7c825dc36",
  "blk.21.attn_v.weight": "15f8b2e72f8e8e992f2f6b3e93238a9d7be7bd6136f91c9d04b4b4cd0cd60369",
  "blk.22.ffn_gate_inp.weight": "3ddb1773d9257b68add7a2a4e94dad25ed926803e02707863dd742ab9b2dc179",
  "blk.22.attn_k.weight": "680e45a9e8d5feddee5266e119dc053bf80718fa9af1cf6803e6f493b265f1eb",
  "blk.22.attn_output.weight": "0d5fae3402fb2c5aa3a860010e3973fc8e3168d1015f7a76b7b2964681693206",
  "blk.22.attn_q.weight": "eee7e3d426ab533bd18d62c9aa142eedbde394bed07db58313e0fccc82a23237",
  "blk.22.attn_v.weight": "26b5be1fe3c2b6824c5a648a3e4bdf17691904526fca158fbc3ebb627b67e2f4",
  "blk.22.ffn_gate_exps.weight": "32ab7a7735313d60f6a75229b1aeee940b6aee176c9648536bf5921b0dc2929a",
  "blk.22.ffn_down_exps.weight": "67590808f6a67777d3eb7976c31fe616d388b98fecbb12253b72d1241d70753f",
  "blk.22.ffn_up_exps.weight": "fc245c0183e6d90829ff5e71a4ec93e4860b3d4c1a17b9dda2fb64f5f5c9ed32",
  "blk.22.attn_norm.weight": "128e99d206d4d6724758ec97468af767fa0aea592149c324b731659c1e74a1a8",
  "blk.22.ffn_norm.weight": "e45f498033f0cffa15da0eff2c47b4472e43fcf8921729fc4eeb2e3a6b3c78e2",
  "blk.23.ffn_gate_inp.weight": "d63e686f5325fbc89fa242c2c52a3b8ff54f867dca914c9ae6eea13e9d6f46e5",
  "blk.23.attn_k.weight": "f71f5a577f46ea12b1818f3a5ff4b85ddc45f9a2afb0fa2e041d71a3e31c6779",
  "blk.23.attn_output.weight": "92b13563c1e0eac0d748fb67b235dfd7a64c8f16e2dafb316885744582e23b4b",
  "blk.23.attn_q.weight": "2f9b9c35dc4f912f3f51c06e2d68f417b51a0de0a84aac530a64f9d3d7b0a2dd",
  "blk.23.attn_v.weight": "268e40813806e74a5c364b19556d087bf8374e76e7b6fcf55c381eb7da13ccd1",
  "blk.23.ffn_gate_exps.weight": "12f857e7a7ce228afac34d99b602c8d6fe96984f2a21118f459a58cb767ee65e",
  "blk.23.ffn_down_exps.weight": "cdb082c16599c3bb36a28066dcc122d9529b54fa91b6cf0153437ec960a5e16d",
  "blk.23.ffn_up_exps.weight": "f4b99f6f44d7b8b5a305894e88633bf5938fc1f6303a2b2092399da9c8b64d7c",
  "blk.23.attn_norm.weight": "a691392210383915916b4d3886d5e4d56e7855e27e37e414fbd73bf66b3712e6",
  "blk.23.ffn_norm.weight": "0c3dc72f667e5ae19b69bfa9f2bd2a01a57681f89ef9527bad4eb0d8c7b70da8",
  "blk.24.ffn_gate_exps.weight": "86baca2a3157994df7fd8ced5e08436d5c1810dc29c0715637c36de723e0e7d1",
  "blk.24.ffn_down_exps.weight": "ac5d559562b35c34993e34b071f66d15c65be5907797078c2d2a49aba54e3192",
  "blk.24.ffn_up_exps.weight": "fce0a099cf09777f44fbab3606ceb75f7fae6f0b80725f9e871654b8cdf9262a",
  "blk.24.ffn_gate_inp.weight": "e7c6800c0cfc56b565b2d35ad6f1dbfdb70dd0b05b338bc8da2286ffc3678d79",
  "blk.24.attn_norm.weight": "dc6cc18ec52d102d015153c4a1132f9d7a504e29cbdec81c5edbf3b9e65815e1",
  "blk.24.ffn_norm.weight": "480d5a1397af5e0e657f1e67d20ec0cdef5724e71246a326843321b87ffabd33",
  "blk.24.attn_k.weight": "338c0597954a9b95a782545b2fe36469553e73f86ae2d2b5697767b28e1c7daa",
  "blk.24.attn_output.weight": "a77d23b79933c67e52f1eef7f83a3dff4f767ce0bbcc39572f8cec4acd457643",
  "blk.24.attn_q.weight": "45c9478593002be1998e96e70668aafa2dd3972380fbc1df12fb05c24ba959e0",
  "blk.24.attn_v.weight": "515729420885408a6a9614bc27cda393ed907521318d14d21335d39a3eff0b61",
  "blk.25.ffn_gate_inp.weight": "aae4ac40e9ab3925241f9d784b54b38851d9bc999a6c3bc03fc3f17c9b28a67c",
  "blk.25.attn_k.weight": "4ab4808d02396c35b00b426f536015673b71c17ae6cd55bbc2e6bfe7a4c59d0c",
  "blk.25.attn_output.weight": "1990bb982b77e0c947cd1a8ef0b36227ee1259e6dbbc2829e5c136edf88675eb",
  "blk.25.attn_q.weight": "a1490f3048e8c0ec8784f8550c43adf5cc8d0f2f90131c934713fe4b1b015bd7",
  "blk.25.attn_v.weight": "f15e53c6d45b3b6f58808fa968425d65e0b26b7f9b268127a77abb1227c67431",
  "blk.25.ffn_gate_exps.weight": "656662447ff54f56ee80f78a1b9483f7efdc40f7375d0cd8a9c72ccf21f77e7b",
  "blk.25.ffn_down_exps.weight": "db06f101bccbaef19cced0f6c185166e18202465f4a42cddfd535fbe5cbabb4a",
  "blk.25.ffn_up_exps.weight": "584a7b02456f27fe1d8d3c7ccd21d426b6ea887795a3ed77f704596a1e3841d7",
  "blk.25.attn_norm.weight": "8f0f3597982930fd237e9d609776c64f2b909a455b21678f83a7ebd4bbb83e64",
  "blk.25.ffn_norm.weight": "3e7079c32582afba0c55e032f254adc18d2997705eec860185e9a6dd3d82f07e",
  "blk.26.ffn_gate_exps.weight": "e70341691b583b86489812b29b77aa41eb658b1865733d6118da54c66e3bfcc6",
  "blk.26.ffn_down_exps.weight": "5c1b812d11dfb064af816ced5ab6463bf9722eefdfc341b8a93705d5038fd781",
  "blk.26.ffn_up_exps.weight": "e18118362ae54ef7432781c83884f9fb230a9d934e342aabeda8822ea5f71fb6",
  "blk.26.ffn_gate_inp.weight": "cd1c5f6710166b9567c6b74c97b2348b191c60aa860958c6bc264ab095261dff",
  "blk.26.attn_norm.weight": "71d087531af2520bda2e676c489e8529cef5db8aeea1eec0a937a8b4f2fa2e54",
  "blk.26.ffn_norm.weight": "7f704e936fda28eb5c2cc339f0f6a5f78170b5aa43c01265b21668870d819c82",
  "blk.26.attn_k.weight": "1cc62a0ce0ae251275d898c52c4a9fba5995fca10955d2011d10dd1a59e1afb8",
  "blk.26.attn_output.weight": "636e881b1505f9cef656a4be98bec6a4765321d51f9bf1dac8933397cf44b765",
  "blk.26.attn_q.weight": "89a3c4d202d7d6adebb9e0c1bcfd8b775f6456386f1be25e86e43acc949c1e16",
  "blk.26.attn_v.weight": "ff2cc963b597cdf1a21703f3e7022af3bb4c65a34a19e19d9309a7c5e198b5bd",
  "blk.27.ffn_gate_inp.weight": "6150139498fefe380bb99d11e72028da47a15ecb73dfc5b2774f726f4bed8f9e",
  "blk.27.attn_k.weight": "f286eb9e5c56c7b801a497aedc40158c2a27877d7f9fb59b3fc67834798902d2",
  "blk.27.attn_output.weight": "5dc3d3a05f9f7729509147fd09c16fb53f85f520cdab5cb69abf4bae3fd460c7",
  "blk.27.attn_q.weight": "8462e40f86b24251960d6f35a9ea99b8793a01937faf1aec2859f2e5395dbb61",
  "blk.27.attn_v.weight": "bac1a99e38e25953f8315f7212eb9777dc216cadb09b959977885ae62724ceca",
  "blk.27.ffn_gate_exps.weight": "6a15eca7f0f6ecfd93db2e55c63875348ec4a78c4ff643ec46df9e958c0101e4",
  "blk.27.ffn_down_exps.weight": "2e1c91247c4359e2073a8e5f26fd7f6426da7be3ed5bc65dcfff701f0a5022b2",
  "blk.27.ffn_up_exps.weight": "65d6f5c553c9332085eae4aeadf25090b5d7768212ea7b08ed698102c21b29a1",
  "blk.27.attn_norm.weight": "7fab8ae63ec8e91ce625cd130ab96d8427dad3a7413bb21b25ec5f408c5b9f5a",
  "blk.27.ffn_norm.weight": "532720546b0fdcd423a02ca6e3e9d8aacb84b1b3e8269968f88a47fe2a69bab4",
  "blk.28.ffn_gate_inp.weight": "a305ea58d98962d9dcf0c53ad2389b7acc8936fb35a0e3fc9410e7767cd49dea",
  "blk.28.attn_k.weight": "8315e8a2e4f78dfdf36d4fc18fffc74bc95fe42c3ae4f9af2b6c874612c0f71b",
  "blk.28.attn_output.weight": "9b5fdedd32d39ef46a22cca7cd5355d7b93bd07ea305f466a8aad6ca5a4f3778",
  "blk.28.attn_q.weight": "4e8fb96997c30e231c437130f410d7c91d541a816f6c568b5f3bfdb4b8dece74",
  "blk.28.attn_v.weight": "1fec739cf3bd7b4913f72ca358d4cf31391c304de44ac0ae31ecb825beaa7cfd",
  "blk.28.ffn_gate_exps.weight": "9f259789d535e09268266b9a8020f32d6a6779966c909d91d3a10574f06238a2",
  "blk.28.ffn_down_exps.weight": "516d3f8abaedb01b9916a4b67d4672159769138ef2850158bc1b32c41e31f0e8",
  "blk.28.ffn_up_exps.weight": "f2f1d88d2c31ed588806fb5ad981d68f5134d7284c4fc022fd018de2eef437fc",
  "blk.28.attn_norm.weight": "960fd005598deadaebd969996f4367a9dbfad90539a863674fe95730935acc64",
  "blk.28.ffn_norm.weight": "e1993b37ced93d4049e9af2c47b0d9207d8f7e6f2cc3a52f57bef30bc806d805",
  "blk.29.ffn_gate_exps.weight": "58927146338f443513337476b3cd30e6341742f096c2beb5890d400f10121298",
  "blk.29.ffn_down_exps.weight": "03a3386e4f0b75a28c5608e23b2de8f0de25f21954e4aa7fc343431bde9db07e",
  "blk.29.ffn_up_exps.weight": "6916b7490a7ae7b04a5d81cc1e7ac9b20c483434f3b186b12d87fe176bf1567b",
  "blk.29.ffn_gate_inp.weight": "98e710e467a3d567abe4ce29d78b8e8dc033148762290c0c5e1ae4d78efd8c78",
  "blk.29.attn_norm.weight": "4e64cb307d37be20d55f38c94faf7e451d11df5e60df347906cbaf9c5441be71",
  "blk.29.ffn_norm.weight": "696c23a52f742679bd44440d687a4c44b4302d57f1e9dc5610d23374336187e7",
  "blk.29.attn_k.weight": "e85253652fd6120c623634ba66b725bf7cd491318b54ccdad2c7df8851d64c0a",
  "blk.29.attn_output.weight": "4f650a71efb150d1f24cd4d114d4187bf570ac424da3b92ea6455abdf1aea705",
  "blk.29.attn_q.weight": "69fa7da901026ebcbbbc848455b425458b7e3295007d7fc093acf4b38e2166ea",
  "blk.29.attn_v.weight": "17e2e7590b317b21f106de546aafd955579703d1e95d6aea044ee72ec3a514c9",
  "blk.30.ffn_gate_inp.weight": "3a03284b4aa60d59d4a2ec86253469b61fc656372afca427cb77a5332fbcc62c",
  "blk.30.attn_k.weight": "d518cfd0db9708e769eb1399e87ee49357dc54d5afdbac3d4c0ca46c64e789eb",
  "blk.30.attn_output.weight": "9b44378714d784c5ef9ab604359091baca4e0ec222afa139b7f840eaefb371fd",
  "blk.30.attn_q.weight": "cbb95365bbfbcad0c9cd99b4eebb5a5d32de68ce08e4063b5ec3e792b7548044",
  "blk.30.attn_v.weight": "e7985c04fe1740e35a9598f43b67b0922b4fc2d00b68a92a9f917b82c3248de1",
  "blk.30.ffn_gate_exps.weight": "8ac4bbd07935d98f895ba94dc174e5ad5046c3c222b53729d60f987c05e7eb70",
  "blk.30.ffn_down_exps.weight": "dd672cc71e82abf05064a18121b8e55fe1a4f19bc1d7cb9a142f4add54bc336e",
  "blk.30.ffn_up_exps.weight": "12282f664a2a12aa25e2deac58946108715ebb978bafed5274cef24569107646",
  "blk.30.attn_norm.weight": "1a33458fee054c6c9c896a4bb0a4e1fbfa0293b2408c7dd2b81d692e966e7273",
  "blk.30.ffn_norm.weight": "311e33b68051f507f1478ed8f2693fddb846170ddb7285a91be43f795c2ce31e",
  "blk.31.ffn_gate_exps.weight": "8af43d9867a51cd8392fb48b981b0ceee0ae979c491c07d711b3b56b5162c786",
  "blk.31.ffn_down_exps.weight": "5579cb7758c1600b19d1f540deffe081b575962e37437b3b2efb2fb0a2924e40",
  "blk.31.ffn_up_exps.weight": "f2e7c005276b3a001fb40753f027fa10b4d5a346f43cf4b4bbdeec6e74e1cf6a",
  "blk.31.ffn_gate_inp.weight": "89885dc0e30b6b16a90c0331d7fa3174671e941364e8102d934f02132237e61b",
  "blk.31.attn_norm.weight": "99e4e9bf86a9edf8c404153a7e8a82324ba79da462622196e2faba161bd95172",
  "blk.31.ffn_norm.weight": "55335997cf6de781bf332b943de96ff4646966b05d9fee86b76ea897e27b6ca7",
  "blk.31.attn_k.weight": "cee570762b78da6316b637892cc4b080e40f57af5551ffb1866b9a8e80e96628",
  "blk.31.attn_output.weight": "fa321ff55ec7819ead7b819fd45215262f39744569765ba2113c989c03588802",
  "blk.31.attn_q.weight": "9e2c409b878f8a2a1436874abf428fceb1c534b21f9ad4dd6f532b8a469007f0",
  "blk.31.attn_v.weight": "a845d0be68ba537b4a775bfba4d897faf7c82a811a2612b0b7420cc4f3574cb8",
  "output.weight": "16101cbb74b54cda9ebc07ca3c762e3263a56efb3cc011156184b95807d7cf13",
  "output_norm.weight": "d7aa61585baedd60157aafe157930785742c55989c288573566a971b02423564"
 }
--- a/convert/testdata/Phi-3-mini-128k-instruct.json
+++ b/convert/testdata/Phi-3-mini-128k-instruct.json
@@ -1,225 +0,0 @@
 {
  "general.architecture": "phi3",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "phi3.block_count": "32",
  "phi3.context_length": "131072",
  "phi3.embedding_length": "3072",
  "phi3.feed_forward_length": "8192",
  "phi3.rope.scaling.original_context_length": "4096",
  "phi3.rope.dimension_count": "96",
  "phi3.rope.freq_base": "10000",
  "phi3.rope.scaling.attn_factor": "1.1902381",
  "phi3.attention.head_count": "32",
  "phi3.attention.head_count_kv": "32",
  "phi3.attention.layer_norm_rms_epsilon": "1e-05",
  "phi3.attention.sliding_window": "262144",
  "tokenizer.ggml.model": "llama",
  "tokenizer.ggml.pre": "default",
  "tokenizer.ggml.add_bos_token": "false",
  "tokenizer.ggml.add_eos_token": "false",
  "tokenizer.ggml.bos_token_id": "1",
  "tokenizer.ggml.eos_token_id": "32000",
  "tokenizer.ggml.unknown_token_id": "0",
  "tokenizer.ggml.padding_token_id": "32000",
  "tokenizer.ggml.scores": "6e37bcde2adc7e350e87c496eddd7a2124329c1dc66c5bf3ad3997253e4f7a62",
  "tokenizer.ggml.token_type": "b6ecf55ec64ee67d87750bdb8d757a2c58bf78377e9f4219f5689a6c4dea57ce",
  "tokenizer.ggml.tokens": "d168da3ddd3eee820916945fcb9baf24dd3cde42f606cffa2d19e7c8a8743918",
  "blk.0.attn_norm.weight": "216aeb2c9e0c271f899e1ef2a63cceeb8f41e97642e84fada54b1d3c1c11cf25",
  "blk.0.attn_output.weight": "b597d56f7188ffc1fafc273fadc59d41738cffd677ae98c61a62c3285b3a3099",
  "blk.0.attn_qkv.weight": "d28a6b44e13f59be5483e4be2bedb544e346168d720aca27f47d1a5a722be91e",
  "blk.0.ffn_down.weight": "4a691370e5a61fcbbf540fbcbf4c0f1d15dec0364528c0e916d0744f6262b63b",
  "blk.0.ffn_norm.weight": "0c00af2b4a3128bec64a0cbb1084b042fdbe13d9ad0d03bd577f9449dfead338",
  "blk.0.ffn_up.weight": "b32b52f790c1c083bfb8a3126dc1111cfeeb28dc8c584a930a1e5334cb176bf4",
  "blk.1.attn_norm.weight": "68748011503c6c029e8e69a84a8e5a89338f378769627b6dbf7f93d715c292e1",
  "blk.1.attn_output.weight": "2267344add13b048ca59e4377c86dc512be8046a57156901fa32a20fa74e4ee0",
  "blk.1.attn_qkv.weight": "9109d2e3d7a2eacfda5226587b8be124a3bf44b972da7ebb17aa15795897eacc",
  "blk.1.ffn_down.weight": "d675df4df4dd039c0c339ad6445d39eddd2004db6bf35bed6314c7497245a633",
  "blk.1.ffn_norm.weight": "3b5767ae977bc8baaa06b06efdbea193b6b3ba605ce76d77a76ce317e935500c",
  "blk.1.ffn_up.weight": "80dfd6d9d234b00334c89b8e0a02f81899c2efd377321c34ba5ba51a5f61b5ff",
  "blk.2.attn_norm.weight": "6a6743b057e5088f145bc179e92c9bfb41163e7295d7b81c62e23dd89d2b59c4",
  "blk.2.attn_output.weight": "bc5491ea54e0db81462d7d9b7d25cbdda380c2db8de041bd1c4ab7b76a1d19c3",
  "blk.2.attn_qkv.weight": "a61287a9852e2f5aca9c100b471d98398b2913a3497c743de3c70ec9ddd7087f",
  "blk.2.ffn_down.weight": "4fddcc382c8dceeab027fe43d8d44e67edb5e8ce4b9a1b7f773c87770380ade1",
  "blk.2.ffn_norm.weight": "07e05f82b3f63f711db3b684ca79aed25c0657917e66f88af47348a82065c227",
  "blk.2.ffn_up.weight": "4835a682ef1826c12df01ae7663fc45f9c82bc8e64b665f13fb7da8e201ec0fb",
  "blk.3.attn_norm.weight": "f22aba7c03999ba7136f39cda747a39715e498699dc1716cd97fc5dfc58d1b1c",
  "blk.3.attn_output.weight": "53b579855366fd786c5126b2b30aac4d583ca7bda56833c4865f5cadb5c18c6d",
  "blk.3.attn_qkv.weight": "bb56aba78158123140fcea59c69ac562ca208f6d3086819417cdad8c50f333ad",
  "blk.3.ffn_down.weight": "97280897a7cd86db2830c004bccc5bc094f50e293baded0189159a2019145a6e",
  "blk.3.ffn_norm.weight": "10a8c99f8b57a960e8e0a1133c4a26f9148403d1b9bff2eff114917de996f3b5",
  "blk.3.ffn_up.weight": "7324046c915e75d621b2043597a245a428d8eea31869135e6257a861491d8dcc",
  "blk.4.attn_norm.weight": "507d8e164de94646edbfe33def8e8fbf7c9a6ee3fbaedb5000f72d9f51ec5e36",
  "blk.4.attn_output.weight": "bbb3429e6efa98c150e0fdbf48c16180cbf0d0cbc1b3c253c6c319d78f4593a2",
  "blk.4.attn_qkv.weight": "b95ee5be0786d3901273d806c339fe6c20e6bfffd2a20672a9f56af80921e8ab",
  "blk.4.ffn_down.weight": "806bbf91df92a5a22bd5aa1ffb7fc2869f7293ffc7704771c290ecc583b27975",
  "blk.4.ffn_norm.weight": "cfc2930a81df7aee3a5e7f726a15c1182233e868bf0d9d37f6b6ae6d8c15c234",
  "blk.4.ffn_up.weight": "c3390c69533de2c8424e8069323ccc5d0c4543111535da04cf2c7d26745576aa",
  "blk.5.attn_norm.weight": "0d71c4fbcefabbd021569442853d2fe90668b19409ae2805a718a829ca60beab",
  "blk.5.attn_output.weight": "10ebd93629112bf2df5c30dd0953a4a5e9020306768283181ed426934d47e14f",
  "blk.5.attn_qkv.weight": "5cb05633369f12d4b00e0ff787736bd846856682115720ebc6cce05270c334f6",
  "blk.5.ffn_down.weight": "e28bcc5094212eafc7476dbc5b7a520d25b79578cbf4229d698e2655956a80ad",
  "blk.5.ffn_norm.weight": "b6f2c4cf9f34bb4d59989f96165c14a67dc1e266ad0a6d0fcc49f1add929e6ff",
  "blk.5.ffn_up.weight": "0f9ef99423cc07ebedc0e9cfa95809f2d7108d910bb4ef97ebc0b0309c440750",
  "blk.6.attn_norm.weight": "b3edcc47a42218234f7564d7470611b49401a41ae8cd42123f86557c69f5d7f2",
  "blk.6.attn_output.weight": "eb9b7d257b388bb5b8fe0515e5c6873317239cb94cda236e4b6ada2a6c57c65c",
  "blk.6.attn_qkv.weight": "eb968081f478c52f07bd9c2761741e982dba33cc4eeadeea3557d391b9ac2106",
  "blk.6.ffn_down.weight": "1b8588bb7463206290322695577dcfced300895d6e6f4b26966c53a9ae2f0f84",
  "blk.6.ffn_norm.weight": "1219c04b7770983c77814200eefe743f46d15328ea2b12711e44f8103eab08d3",
  "blk.6.ffn_up.weight": "197ef287239fec47c55677f0fbb66eaf0644f775bc382de843971730721394f6",
  "blk.7.attn_norm.weight": "b630ad08c80d564ed1c024384818e9fd3f22a36cd7a14aa96e7e2759a8285099",
  "blk.7.attn_output.weight": "970255aa750828a47d6b9d399f9612b5bf25aefe7dadbcba41fc416d0d4067c1",
  "blk.7.attn_qkv.weight": "ebb157c880293e6de8d629f263ba8853ed1dbdc02c311d43432bb8cfbb310739",
  "blk.7.ffn_down.weight": "24bcd4db4cba844c89f878b81843c373dbbc0675e889d32c5b12e63384a7b670",
  "blk.7.ffn_norm.weight": "b9c6f71001808ee873ce7db8056e4b53fb4cccec8b7f0f312899b575fae39d39",
  "blk.7.ffn_up.weight": "979f1828d227455c26015a2a11afe9dd05f2bb97a8ba6b38c8dab3f50e627401",
  "blk.8.attn_norm.weight": "4e8e347e3775010b7112ee630f2f4f2383be7ff64e6ca6154b9b22566552eaa6",
  "blk.8.attn_output.weight": "65a44babf44a435a1829945211b3168f9ec78ac3cb7a049a733e93d11f0d6659",
  "blk.8.attn_qkv.weight": "343ed07671da400b040812a4058482fa38284b5d9af9becfed07417fe26ce747",
  "blk.8.ffn_down.weight": "7fb7e073e3c2c503c4e9d60efa0988fed7398d900cc003695fe3fffd3e188b82",
  "blk.8.ffn_norm.weight": "b07c1f655d8593e3892a2cf73f8a0c19ce8e5cb613fafbe7cbd430da8ce4c57d",
  "blk.8.ffn_up.weight": "8b26e14de54b3fdc2e2d3ea41720f9d9c236a93688c3b7fd7bf43f5fbb327c9b",
  "blk.9.attn_norm.weight": "46394d408a8e316916177e6aa261de32e137a82d729c0b1800b072f0c38c39b6",
  "blk.9.attn_output.weight": "d57f3d46107947a7073373a0b35d6ecf7759b5df15406f4a3590a60666af6b16",
  "blk.9.attn_qkv.weight": "14bb8ace8c5453148f4b536e9f4279c813f31136716947256f5cca333448639c",
  "blk.9.ffn_down.weight": "2b8d98e2b5ed68338f6e4de43bf7de0c4858cc69103cd5177725f7444eec7694",
  "blk.9.ffn_norm.weight": "41a499dfd418cc4c6b8c12313f673f7e2cd4a3f9c4065eb6c4feb5eed02fb542",
  "blk.9.ffn_up.weight": "143aab7533a64b17fbe201490a6f674bc7f0bd370c094500b2e100419073d1c2",
  "blk.10.attn_norm.weight": "ebb670aafd36816a794347287269d8f1a5b19c1e3c0a1e38023bc19fdba9b073",
  "blk.10.attn_output.weight": "b5d65bbc0ed5e49fdd9d754bc18163cd042a285024d0cf6f954c503bc8c877cb",
  "blk.10.attn_qkv.weight": "f06b15bac88da798fa34a62b03eaac0dbe8b846020516603c387541f2d8dd672",
  "blk.10.ffn_down.weight": "fb091fcd1b4de25d1bea94d1755e255cb02914a030d23e3a234e57b8d46bde6e",
  "blk.10.ffn_norm.weight": "eb347bdf9c40414af87e13a8e72e40b31f004b50f7cb366f1a219ced60a61355",
  "blk.10.ffn_up.weight": "ed2d52fc881a173f404fe8a1067862c9856d6c3e0d2e90a330a7aa394e3f84d1",
  "blk.11.attn_norm.weight": "64e252603cf010a0e502ca39fdf8d0a196a79aec67c0d2bb9213fc0cb80c47d4",
  "blk.11.attn_output.weight": "228e33e21c69f52efc74fdfc831bc9af271e44b2a29a3dced1d64e667ce36eb5",
  "blk.11.attn_qkv.weight": "ab9ce6d4ef9e42ee0da3f20a7708a3bbc5e79e967b05fa86ba946a05e2eb63eb",
  "blk.11.ffn_down.weight": "0ca133b7835c98dc77c25d64e4eb7873778bdb5e4d22d8b80f920f46865b43bd",
  "blk.11.ffn_norm.weight": "02455741a0dfd161c79aa1ecc381901721f229fdcda5615622a629631fb61cfd",
  "blk.11.ffn_up.weight": "9fecdcc099fbb8e23c6b1ea9294702a027f4a58d265543ec5e7be79b8f63b354",
  "blk.12.attn_norm.weight": "783bb459911b1b3609a9b2bdfe272f1670add73b5471da738e07ac47e2e07dfd",
  "blk.12.attn_output.weight": "1e1a914c9e48b857206ac5a1f7cead994bc1ea91d5d4fff8c834d73f2e38ef5d",
  "blk.12.attn_qkv.weight": "5953e7185ccb87fb4dae8f9426ec86315d4c7794326e8ab59b3a95d4af2189f0",
  "blk.12.ffn_down.weight": "a3eecf0f394f86e2cfb48a5940a5c50ca86d71883b2f79fcc642a935fabce0d4",
  "blk.12.ffn_norm.weight": "0a4272e41373c23bd72f10d2d82930aa3a1480aac75832bfbf01cebf0b86b6a4",
  "blk.12.ffn_up.weight": "06f42776de3a7ceac3025f26a7a8bd20e062233cce2bdaa2183470dc4b30b87d",
  "blk.13.attn_norm.weight": "5915da60fb03e201fa649faba780e5fdf1c761c262b206e5415cf83181f65780",
  "blk.13.attn_output.weight": "4dbf6eab074fa3835fd32bd631a8208e511037d5056d2fd3015735cca7674ef7",
  "blk.13.attn_qkv.weight": "d3d8339a1c4782d9e73d77fdebe154d3c5b83ac40c9175b3e91a4977d08f876b",
  "blk.13.ffn_down.weight": "de6772b46a55e1fd42b007637dfbf68b6598e5d5b61622da0935002e1e192d3a",
  "blk.13.ffn_norm.weight": "5a640ea3b8c7be49c95a58a2327e10d8e8d9d142504bde5c8091613e5b961d7a",
  "blk.13.ffn_up.weight": "f35e3545e4bd3531b2e843b5efd31dee0c13c807ee6386e65473ba67bbec30d0",
  "blk.14.attn_norm.weight": "9b34986450b7c98b4927e81e61a816f9e84b1addc7c14926402100037aad6678",
  "blk.14.attn_output.weight": "155d52efb23d366016d861a251d4d1f4a0c13699188c50d50dba016a0d8bfcd9",
  "blk.14.attn_qkv.weight": "8e1415084e1f33c73a777f19e752489f4dd312cca047733e5ea643cd4a955e04",
  "blk.14.ffn_down.weight": "a2a142226b94baa01ccb65bdea2b7418e49085c1d9c3c63e544e3112c58a25da",
  "blk.14.ffn_norm.weight": "8aecfd9b0ae6affaea31a80c5c9a4a14b31deaa0db7bd8f6da2a64d23447921c",
  "blk.14.ffn_up.weight": "0c1407237b8c1bd02f193346b5681926fe698a5055eac6a7450451b0f991707c",
  "blk.15.attn_norm.weight": "e037bd19880bfa83d983200fb0c7866f8ad16c3ff5cc4b4f3a37ca7373870ff6",
  "blk.15.attn_output.weight": "045fe4fc95cc129a1b92771b179c11b12845c4c088786c607f17bd98857e68e1",
  "blk.15.attn_qkv.weight": "7621b7559705cab1d4dea1c69f76dbf9dc1c8837a203b656f484703b9c1b70ce",
  "blk.15.ffn_down.weight": "7e5ac20e290bc60761e1cd972354fde225b7fa861048d44d9a0dd9b046d55f58",
  "blk.15.ffn_norm.weight": "b6d830d88f1db1825687973c8c2b1a24c6fa84f07af8d0e3ef9c86009baca0b2",
  "blk.15.ffn_up.weight": "dcda0957cd04fc45476774dba2bbf9aa89d6b05d5ca7b10ae6f73ad2c49b1cd3",
  "blk.16.attn_norm.weight": "4ee9b70ba15cb2a08240f93990e90f5068c48fceb481f8e2186bec8b7214eb3f",
  "blk.16.attn_output.weight": "315cfe5536658d2498192b2980eade15b2c9a4ff220e4011911457b1727fa103",
  "blk.16.attn_qkv.weight": "3c8122e3ad637583b9dcde8ff3a323267d3014bb1f0f9771e5322260ca9ecc8d",
  "blk.16.ffn_down.weight": "3b5fbebd5ee2b86cad96fb8a9b45a8770d08f82c1c8b74d7061e866f7020a18d",
  "blk.16.ffn_norm.weight": "ffab69f20bda372de6e5878f0539163e2fc6ba113621ded95705fc3b1465c9f0",
  "blk.16.ffn_up.weight": "0935ea3d258da42d6258406365f39f58ddaabfe97ea5977580db3635188f24a1",
  "blk.17.attn_norm.weight": "f030441733f3d147b4a06a1eb4aeb8465c7c24d9c53bf4c48fe7e134d3629803",
  "blk.17.attn_output.weight": "07a955ef09e8dc766ac0df647d0b2c69f23c4c69a7137654b4aad80303ed0eda",
  "blk.17.attn_qkv.weight": "1c10688061e21e2fe12ad0cb54bf03895c1f83c3b0df743a42f548b52cbca1b2",
  "blk.17.ffn_down.weight": "ebb9cc9836f41d88fdae2aa9a4355514e4edaec8d1577ffeb947a35204e77f52",
  "blk.17.ffn_norm.weight": "50aff44f6528b13db5389f2ddcdb7676244947610bd7ffbff3f881c968c2a0d4",
  "blk.17.ffn_up.weight": "d716537949582be33bde6b02e38f5a70081c9642a9fb05a61312126718b8d148",
  "blk.18.attn_norm.weight": "0ea695c4e53d637902f46663a6ee42adc493c36794476acc7dbddaa05b13840d",
  "blk.18.attn_output.weight": "5fd35b500221a612eb4f4bddf0e9b6b7db4d7733032a75f8802fb2d884647c2e",
  "blk.18.attn_qkv.weight": "b0da37fd030fe69581f990bf23bfd35467a1bbe558af6de7c0924f6b72e92317",
  "blk.18.ffn_down.weight": "b355c33f44b328f4bb977567de8f7544db4b005d7a8fbded658518ecf3c5a153",
  "blk.18.ffn_norm.weight": "58b3fe9094079989a86e0387143259e1cc35952d24dc3df290c4ba6df44f5c51",
  "blk.18.ffn_up.weight": "2ce530954c342c30ed2ead5353f931960bfae1d278868504c0efb973560fabbe",
  "blk.19.attn_norm.weight": "533e9aed66feea8f0392aa81f9e293240e1f009a5334253915fb60c2749b615d",
  "blk.19.attn_output.weight": "84f2d00f98a4113a779d3b5d1c3e7c914eb47784d3ab13b290367c124c2994aa",
  "blk.19.attn_qkv.weight": "fbe6b9f53b07fa7537d3b3d452d20a9bc666f9fd41ec2091dd28bc2f70fc668f",
  "blk.19.ffn_down.weight": "b30199e098c8bb3f890183d8b18471e80b62b604729b277ad62488dd71e1206b",
  "blk.19.ffn_norm.weight": "c81373e41cd340b7badb19f9517c77c4250b4eb9a02dc758b8b49b652487d7ff",
  "blk.19.ffn_up.weight": "5a5cb083ca7725720e3a890f7fa46354760e8007a8188849a092e305694a75e3",
  "blk.20.attn_norm.weight": "4953091b4477e354357a8e743ba0a1900633e52f1599ee082a0c9b0b2b5cd978",
  "blk.20.attn_output.weight": "62d54f7749cd6856097b2632066a322b0296df915fe66f382c5b5981be0d4f23",
  "blk.20.attn_qkv.weight": "406de9e35b0729ebe902d7a47905cc7fb29a921431ed35dbef0c03e5690a1329",
  "blk.20.ffn_down.weight": "62fb678b0d1261e19a4903a2b347d67afcc8acff01feb33a687a35a2d1e6f9a5",
  "blk.20.ffn_norm.weight": "cd9d36b7e71e55c8925b97bb09c28219f182626bcff094878ae39c3db887a14b",
  "blk.20.ffn_up.weight": "b9276771d79d3e932e73ccc520c3f8476342b9ef312ed2ee1e0da822e6e3ad18",
  "blk.21.attn_norm.weight": "66d8c8a35e13ce9c2a0e75b670150e2c31484a55c2316df46075312196178ed3",
  "blk.21.attn_output.weight": "12ab46c9382648f9b3350fdd92a6be6352743d62d6b520d7e2024e0c838588f5",
  "blk.21.attn_qkv.weight": "a7909676ee1675ca23cd29a5fdd226df8dd9d68f94c6c9bbb51dd9fd38504008",
  "blk.21.ffn_down.weight": "6fb317279c6542e82f97d5a12a60fac1bd0fa0405154f9fbe265e2fe39bd49cc",
  "blk.21.ffn_norm.weight": "c0f703eb3ff161b5ba4490d87d8684b8a6c47a8f433e12f418333b9db439010a",
  "blk.21.ffn_up.weight": "6dbdb80ef0c35e364bbce12d40d5e74c7963c7b55d58d9579567a07ffce7b863",
  "blk.22.attn_norm.weight": "f94237433bf03d675cb2f655b81ca91a1ce2447bc6b00b13d6b0ccfe2d411eff",
  "blk.22.attn_output.weight": "e821f95995ce497c01e63ca64f737713b1b65f11df1903e51d444aa516f33f71",
  "blk.22.attn_qkv.weight": "1b0f717c73afb5eb4c82a1708c4e85c969e8a2a8770d9ddb78b1870a2d8a781e",
  "blk.22.ffn_down.weight": "0f33f7a3cdc685484be99aa0c03642b0b20850a27d1fddbe054b13a9382f3ccb",
  "blk.22.ffn_norm.weight": "9df285cf211ddd7df2b36a50489af574755c7d4d98b29a05cd04566ae613c8dc",
  "blk.22.ffn_up.weight": "63ac300e1efb34041dd0136cf43ea622fac6f0caccce1cd9262f5e08d2cf179c",
  "blk.23.attn_norm.weight": "5f72d9e88689b4027b28f5f8f26cd3abb03635ceea7ec98a4c91a9fc691f6707",
  "blk.23.attn_output.weight": "6ecf04ff61125c5fc768f8656497152149373daf321ee9c957e8f7245a1184d1",
  "blk.23.attn_qkv.weight": "a9d9978806724c2959f2cf386c233831f08e1e933dbf2b32665e788d9d512ea4",
  "blk.23.ffn_down.weight": "72c7d17886a3da17fa0daa456aa5e877b2ef5b8b403182b870d9ca5ca9c70347",
  "blk.23.ffn_norm.weight": "971e4b712e3025a13419b5b57d674b5e4ab7f18f74b57b9afc4671623da90c4b",
  "blk.23.ffn_up.weight": "df2b5c7dbd5834545b815073af0c7355b065124e6d6f0fee78d8fa5b2076dc3e",
  "blk.24.attn_norm.weight": "c41957c4a79ad3b16f6e11daec1c7f530b9f3f4b618e1e4367c3b67787ac4ab6",
  "blk.24.attn_output.weight": "ef7d61f5fc88ac6f31bf60cb5f4d2d6b8df42d38825807112361a7224b0dee3b",
  "blk.24.attn_qkv.weight": "3e6a58fe7d49c90bb6971efbad3371c32256881173ea5aee4b0c296cb206490f",
  "blk.24.ffn_down.weight": "f43619144047de42fed81dfa495f1815d3cb771330e574043e2b67620819292c",
  "blk.24.ffn_norm.weight": "5501d4a2a98c8ca6b42e77b53b221dbc08f530f6a067256d787534ec6fe028bd",
  "blk.24.ffn_up.weight": "d64c8b0e509e2b1118f6000176f8956cacecdbb200c7e95ed93fb78b6e26c84a",
  "blk.25.attn_norm.weight": "502fa3c302d371f61c5791f4615b73018ffb1daa09b6499b227116581244c5d4",
  "blk.25.attn_output.weight": "ad8391d4e9c980856f2547aa945b2b6a407a6382158dc1ddd4f08d94ecc24be6",
  "blk.25.attn_qkv.weight": "42e8983780d4a01a02c54ad23d4df21eea437f119a10af5a9c12a76a42d308c1",
  "blk.25.ffn_down.weight": "302dd010d4e0ab4eeaee89090409ea0dddeeeed3236415eb8f97c942497eea91",
  "blk.25.ffn_norm.weight": "fb34c1ee5bca96986c08834df0a0c047ba041c1123ac1f563e9d64312bf82d6a",
  "blk.25.ffn_up.weight": "10739a8de156816d93c92b935386540bfa976bdbef204f0312960f6fc657582f",
  "blk.26.attn_norm.weight": "7036c711609128c4e55968ff3681d3043338879a5737efd6c2ac9e1a2a61f1a0",
  "blk.26.attn_output.weight": "db5db45dead5cb911fa01da59832f121b7c18b2d167bf53741c40819f24d346c",
  "blk.26.attn_qkv.weight": "cae34c6b7f82ed14348d5ed30a79919c383737c1694a9cb9c0de609d3b0c1d0a",
  "blk.26.ffn_down.weight": "491ec3a4da9b4f49f8ebc6be658ce397a9b801ae9fb35e82177e47808c65e5d0",
  "blk.26.ffn_norm.weight": "fd7059d75d7f0e5288511ddeeb0f772eb3cae3ccfe4226b877015834edc3c386",
  "blk.26.ffn_up.weight": "ea1ee1274c56458ce056d2205e5bb6e5422ce4cb0ad58006b8141749b97a0c39",
  "blk.27.attn_norm.weight": "cc362c9a937609265052cd38544af17a1a7448cea086d4c801139e1fc865832d",
  "blk.27.attn_output.weight": "ba757a81dabde9cb1b069d1bb616fe79649a1724f756567ec61caed1304fe6cf",
  "blk.27.attn_qkv.weight": "1ab8d7d02d87756c12c2275636823aa5ede3d683178225c4cac4bd892c319bd4",
  "blk.27.ffn_down.weight": "deb1c711c8a66acf4dcd2d088e1548f8e08f296f755e4067d6557fa55afde88c",
  "blk.27.ffn_norm.weight": "fc6242d8cb8a4a37a8ddb7e41e7e60a63d4a89edf36acb35df052f10b9c91ece",
  "blk.27.ffn_up.weight": "8df39b09c4801f343aca78f2918a1f6db78c8c55e591eda4c69eadb74c26e180",
  "blk.28.attn_norm.weight": "75b539308f77e3cefdc6d98484d8b5cbf0538f0c2869a77b7373a145a18bc850",
  "blk.28.attn_output.weight": "ae128940eb60a6d2e121762ef4b3e9dcf9eb3e105b249507fa7f12de0e19822c",
  "blk.28.attn_qkv.weight": "bdda781c288e9326c240e33905f8e621b6a2ad902e620739d34f93fcd6f933de",
  "blk.28.ffn_down.weight": "f1d6e6d1c286b1138bfd7e53fe477f399ae93bc2c04e35416f84218ed7247965",
  "blk.28.ffn_norm.weight": "3f837ce82c8b9bde0d61d08b6f5fe5574886ea5328dbdc53f2929f18da8b4087",
  "blk.28.ffn_up.weight": "2af027002e31d1b6cfedbdb30a2b9d7213f3aa691167c353913adfd48fda31e4",
  "blk.29.attn_norm.weight": "61e8003b5329462ffe0fe172f2b160260de006aed858332d49d75504b6b6aa7a",
  "blk.29.attn_output.weight": "ca44542a72a37476dc73dbdcc01f5b7497cb3ebc4ea230a55c9634ccd8e56ad4",
  "blk.29.attn_qkv.weight": "abb3d9d6abe57872ae3daa51935d43264093ded5ce63b49d1e280ee5758be0e4",
  "blk.29.ffn_down.weight": "6764b895fce881df097489c263446f0106de36217997660c15984b3ee22a5a06",
  "blk.29.ffn_norm.weight": "89e03e9a33fc0e6e31ba9f0c2bd7c5734a118c5602bb90148793e08a80e8d0ae",
  "blk.29.ffn_up.weight": "fa7ad57a84954f4121653152efed1a871d8adb20a1ea9086e3e849ce359d7d2e",
  "blk.30.attn_norm.weight": "91a697aca1e42af54f806a20211031c3369e8d0bd58df1b0147fe24954e1f5a4",
  "blk.30.attn_output.weight": "36063fcf766c89ac75be56f688cc63cefe5f2c733fbf4378ea9956ad386fa148",
  "blk.30.attn_qkv.weight": "2cacd1161f1121a2c0b979930134f4666f73fb8d7237b3b0659ae091b15955a6",
  "blk.30.ffn_down.weight": "9f3fcb6217100595850c05dc98f9ab2a263afdb6ab28df2fcb08aeff512057d7",
  "blk.30.ffn_norm.weight": "6c600bc1fc7de39d4f8917b81fc7d1d5ed2a9b56492234c13a4bd6028c30d880",
  "blk.30.ffn_up.weight": "73cabd1bb011956b2689ea3338bb76642ef3a57c197377d666d2ab5f56317668",
  "blk.31.attn_norm.weight": "72d3e1cc771380645fa75a899858c95f39857a4f3f1ed60fe1578df383b8bc53",
  "blk.31.attn_output.weight": "40089cdd29994dc19a1d89fa15902a89cfeca3540f12dc9bf4d00ef82506e456",
  "blk.31.attn_qkv.weight": "1d0bb40e9258071ae14290a53c619a8e331dda07354d2a02ef45766c029ae5e4",
  "blk.31.ffn_down.weight": "8defa0e06335b793fa8be03883f0a322d6c5b33f52c69c943c35c60d16e42c0a",
  "blk.31.ffn_norm.weight": "33c55d9d0c496ccfb130361fe131649346e098abaaac39c0519507e5d846721d",
  "blk.31.ffn_up.weight": "599f6503f61c692c1f82001973d35119f9688db5e6be9d9c298411491c93f09b",
  "output.weight": "14b8dc662bfa3308ebb2e102c562d8e52c15670e538f20f3216a9c310ca9dd41",
  "output_norm.weight": "7f2294ba94ce65681df6c7ddd8698799199b9d77dc83c10bdad5c3999f0fdb82",
  "rope_factors_long.weight": "e34d378664e354652c38f47d10dafb0498ccc2fb042d39ff7fef768146fff22b",
  "rope_factors_short.weight": "9379146a4988f373d362fe47b06c75e7fe7c54aa4dc9558758df79b7a87471fd",
  "token_embd.weight": "19a03c1fb5ac0baee93b0a7d8b0f26e9a9b011e229b694afc50ebfc13d84f8bf"
 }
--- a/convert/testdata/all-MiniLM-L6-v2.json
+++ b/convert/testdata/all-MiniLM-L6-v2.json
@@ -1,124 +0,0 @@
 {
  "general.architecture": "bert",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "bert.attention.causal": "false",
  "bert.attention.head_count": "12",
  "bert.attention.layer_norm_epsilon": "1e-12",
  "bert.block_count": "6",
  "bert.context_length": "512",
  "bert.embedding_length": "384",
  "bert.feed_forward_length": "1536",
  "bert.pooling_type": "1",
  "tokenizer.ggml.model": "bert",
  "tokenizer.ggml.padding_token_id": "0",
  "tokenizer.ggml.unknown_token_id": "100",
  "tokenizer.ggml.cls_token_id": "101",
  "tokenizer.ggml.seperator_token_id": "102",
  "tokenizer.ggml.mask_token_id": "103",
  "tokenizer.ggml.token_type_count": "2",
  "tokenizer.ggml.scores": "6db964fe67338aca57790481a390121ff3dd643eebe49f7dd308029ad99abb6f",
  "tokenizer.ggml.token_type": "98d247c5404b6b18f05f133b92dd56edf6efefefac326794b00d7b351f6c5aa1",
  "tokenizer.ggml.tokens": "9efe405e229a45ff9916f54c475d151d2200cd2ab0006f347abfb069cf096c86",
  "token_embd.weight": "8c1ee80a9ea4f65aa385ba30112010068af3d209bebc6e149d3d4589c2cd0a5a",
  "position_embd.weight": "6c516f0b1c4e2388ab90394dd80ad69e4e4509b890982fc3408108ae66210eb6",
  "token_types.weight": "f879f8e422ed211948f28b560d3c5e17aae7993f063b51196a28cf5c0fb3da21",
  "token_embd_norm.weight": "75076e095d717aab96f8b6beeee503c27940d9a76f2b891a0e3de72f8a6043e4",
  "token_embd_norm.bias": "298735285ffe944e1bf03e5d35c7280326b85cf121bde9874f1af5dc51ab939d",
  "blk.0.attn_q.weight": "ab0923ce4c1549175112dcdfcc860fe30137f991e03ea6857fb5993670adaf6c",
  "blk.0.attn_q.bias": "a3ec29551dabf976e1d34256b8ab5ab7b758f3ed9742c3cafdbd984d5441df62",
  "blk.0.attn_k.weight": "4c1038a6d035c3e9ffed7fa672b614627814752503755fbad0cfb76a41ad71ba",
  "blk.0.attn_k.bias": "e0363930eb588d91816aa3d230bb03b6e2551c165117b80b8d60397413819ef9",
  "blk.0.attn_v.weight": "425e2e53e3f00ce98d29c3e6a161eb55d3e6ae0d96fdb9f6242d1c4fd6eef4b3",
  "blk.0.attn_v.bias": "6579173a1e65ee124fbd0bd53cbdca4225515b4f2c5f18fb1bfd000f5978f9bb",
  "blk.0.attn_output.weight": "a6d70a08cd7164de5d12af65d86d657c3db35aaecde778b2b3fda9193c4c9802",
  "blk.0.attn_output.bias": "2b8d12c4f9a9c5bfaa29c597839568f6e0525cb41eeaf64ddeb6bd84dfeb9701",
  "blk.0.attn_output_norm.weight": "bbe6e502a473228b525aeed26cc31b7db123ad63bdc5a6eebac6ea70b8b51d62",
  "blk.0.attn_output_norm.bias": "36eaacaf0007c5c62daea97aab0115390c0682914f78482e37eb76885f4b7a50",
  "blk.0.ffn_up.weight": "24654561c76ce387d125759ba843f06b904ef721fcceaeff6ccc62180a48e874",
  "blk.0.ffn_up.bias": "fd3f0126aa1d95768fa60eb6f4ab8a2763cfcb7e5405f35b92353031d86f4d34",
  "blk.0.ffn_down.weight": "97a829763a6a5bf3329ceb4d39c424ba4787d61653a5b0bbd1f84782e4d4e0ca",
  "blk.0.ffn_down.bias": "7aa980c30ae8b4ee7f69df28808dbf5c431f56ccc4a80340f644a0419f16c054",
  "blk.0.layer_output_norm.weight": "ef30dad4c2a083ae1ff5039a2a6cda60ecc89bf1e486a6f8c0d15f50589603f8",
  "blk.0.layer_output_norm.bias": "8b1b77e67568b1bce43fc476de1b177c53ff688d66beb66995e8eb3dc290da8a",
  "blk.1.attn_q.weight": "284331622a1f6f9b87ccee4f652bd66a394ca493c4d93be4d1844e4f6159ad10",
  "blk.1.attn_q.bias": "e24ebd4860330e08f6bfdd077a82db0bee33f4c8846cf1db26327a34754c7069",
  "blk.1.attn_k.weight": "729dd0d555544b5bd0f7580b3c8b384256b974605f0e7487b95f295aa032997d",
  "blk.1.attn_k.bias": "2aa51a828a858f35473f54477583fea54ce2ccc34ea60fbd1d228fbe9bca827f",
  "blk.1.attn_v.weight": "6be304671cc311d5ca5c103f2b51467ee800c589bc5b8101e09ff5aed1f68c21",
  "blk.1.attn_v.bias": "43bcbab78a8819e07f723bc9e5b737b71e87a7594f15234e882b63e327a64199",
  "blk.1.attn_output.weight": "15ec8a1a12b26c9976445308a09f748ab0e4bef0f583d13ab08c3129f8738d73",
  "blk.1.attn_output.bias": "dac2146f4baa6ed16f6c0dc7443831fb7ec79bedcceafd80d1a4b628a1bb072d",
  "blk.1.attn_output_norm.weight": "d2151eb33bffac536787a4c9a5d2b31c7a80b17c4611877842a3cce2cd6e98d8",
  "blk.1.attn_output_norm.bias": "31e1b779716dafb855d2cf5631ee168a0ccf372eb9c6ea6091f66fa97a9b9d2d",
  "blk.1.ffn_up.weight": "a57547fc3fc3b77406f5cdcb0c87af9bc184701f175c39c1f35297826fce3cc7",
  "blk.1.ffn_up.bias": "123be6d541d086202913c75d878c54d59a749f3af7b58f7ef9eb9e7c62a24c9a",
  "blk.1.ffn_down.weight": "cfdb79788377e5cbded8790cd41b9e66c397ecab75474071fcd7cf32d30f9613",
  "blk.1.ffn_down.bias": "bcb58315519a573097960891c9ae41cf4c685ab78c3e0e77471471758a7eae88",
  "blk.1.layer_output_norm.weight": "819b554271452bfb1d84c2603b90377b2e41a0ac1e3aa8b417ccf9dce63375bd",
  "blk.1.layer_output_norm.bias": "47a3433ac27f5ce8947fb38dd491f3706df4ef6adb0ddf74612bf0f54b19e164",
  "blk.2.attn_q.weight": "1557a9ea852b1880551f7290e00aded4f35e6c4180fdcbed1b0039bf805f639e",
  "blk.2.attn_q.bias": "c3bfe5f3066f655fd36b055530997b59ff33ef013563aaeb3cb8ff07dabd59a9",
  "blk.2.attn_k.weight": "cfd08eb69c61ae2f9f14f9b7ff5c5394ca264b1a9f3d48156677f90dd1766289",
  "blk.2.attn_k.bias": "9b839bc0e79974a0b3f5d1895972bc6f5c9a1bc16052e1af786e6a530758152d",
  "blk.2.attn_v.weight": "02b26b1208480eaeeb00e7b4cf8b690006ca14759357fc44ed4a2a8924ead993",
  "blk.2.attn_v.bias": "e7e6f0089fded1659a867ab736c220d9653ea7da6b1b94baf5c8d30a748b63ab",
  "blk.2.attn_output.weight": "a1db121c7d33806b349cadd050300a57db49fdc91224fd07c9ac43bf4299dc79",
  "blk.2.attn_output.bias": "7675128b6a92555cd955c820311e91e9417d31f48848f45d047b4100c62148b3",
  "blk.2.attn_output_norm.weight": "5b4595e0fbcba67a700c4331adf746d2fba3546364a4db5607ae241947bb1a21",
  "blk.2.attn_output_norm.bias": "7b8e16826ea30e5a2ba0b02e0095a901775981a296e98819625320e983060d08",
  "blk.2.ffn_up.weight": "a0d815d946ac07a65095c4ae4df77b818845e6d97795c7d82f55e689d944db59",
  "blk.2.ffn_up.bias": "ce37c0a4174d6bf773ded7bd016ede627ad3bdb8bc99b9992a18dc8e8898f252",
  "blk.2.ffn_down.weight": "f6231d2a25426fbd45b9f1160aa484220eb227ceef0348c4a6a6de890606e5ef",
  "blk.2.ffn_down.bias": "429e00556e8dc63a785238b309b9d83738500c1ef6d736fe6526ad88ea496d27",
  "blk.2.layer_output_norm.weight": "651457a573adf3f7dd9ee5dfe1c8e89389e94443993aab77ec6a0b05aa621e35",
  "blk.2.layer_output_norm.bias": "41fbbeda7fd89b0cef5f945ae44011c316982390401d6f75ba8c6d365e185247",
  "blk.3.attn_q.weight": "95a43f32949d2cb8d22815bb27a44abfc6665ba96221af817dfe058cb6ca72c6",
  "blk.3.attn_q.bias": "f4e34385e75d8108b6b3bd336106e2133a8c9be0cc343dfe5dc48c32a823c7cb",
  "blk.3.attn_k.weight": "6b892da6a17d4d3265265a15f695864a31813ee8c8e710ae9bc9e1adbc6c9a18",
  "blk.3.attn_k.bias": "40b8067b641a56014cee42548240aa8930820958b1933004892b5f04fbaef39e",
  "blk.3.attn_v.weight": "9fcd5922319dd2a461082a5ce040c1dfe65d87d70ca6547dd0b46eeecc3eeb2b",
  "blk.3.attn_v.bias": "b528c56212e66931fdbe267ac327a9c2f87cd03baff3ea719e30afe681da15f1",
  "blk.3.attn_output.weight": "e3b178c1b03981e75510e0d277af23ea59cc404b5394e61bd32291825719b502",
  "blk.3.attn_output.bias": "712c84d39a6a5a9c06a09da8fd9939ba0d5525524a4bba61ea4de09b48f45cae",
  "blk.3.attn_output_norm.weight": "d1ffac88e675592ff72f8a617be32b4a381d443b2f8f2645dbe44a1e5745aac0",
  "blk.3.attn_output_norm.bias": "ea31a1c73146234c50e0e43f485c458413714867b8e2703af66482f7db2d6c40",
  "blk.3.ffn_up.weight": "4ef4f3b9a1ea6ab2ef2eb6e8b008e06a44790d099d97482a05a51e39a29afac0",
  "blk.3.ffn_up.bias": "06a4296dda16f452675c51f108079fe7722552d6521c737d97734943818b9a2b",
  "blk.3.ffn_down.weight": "f114b2bebe392c7d80433bb880c6730293aa4561b0b0370dcdaf7472daebd847",
  "blk.3.ffn_down.bias": "2c8e67831d28a3bf613fc7912ae3259b63d72abcaf4d30efd8800758400158de",
  "blk.3.layer_output_norm.weight": "a1dfeb7b5a51dd56447312ca41e2ad2f361a3ea12ddc355127f5f4219fb0a482",
  "blk.3.layer_output_norm.bias": "1ed630021b25c6c6fc93fd32988b9907df966d4982a93081f639aac3044618ab",
  "blk.4.attn_q.weight": "b5fae4c1f9a5f33a2a2e816ac0c01c25f422e4efdd59ef1ed93da2610e5370fc",
  "blk.4.attn_q.bias": "c2e376524ea98ac3b10d9eee19ecb1b1e261fa5149efe0232844c923dfb428fb",
  "blk.4.attn_k.weight": "a4632f5ebf9321d9d08f9112a4e5dda2efe5671df4a4e67fee24845f5b14af16",
  "blk.4.attn_k.bias": "a9a02ffb8b8b4f6dfe487a7e0341f1d5318c9d2b793a688f34cb1b22fc66ef60",
  "blk.4.attn_v.weight": "10ad8deb81d9fa093b1e5c0f24ea82aa7df43e6aca49e260fcbea56eab8cc86a",
  "blk.4.attn_v.bias": "7326813e181e021130bd33ac136293fcffccce2d1d8cb59041e5b13a8cceacf6",
  "blk.4.attn_output.weight": "c92573088c7437c2b3cda51490e152c27fb19e5468df591eabba5a49d5398d44",
  "blk.4.attn_output.bias": "14e10b419e5859af1eb685af5c330aee67048cd704dcead9217840c6f5393222",
  "blk.4.attn_output_norm.weight": "02b6831c0e0fb0edbc579a92812a1dd972cb15d14fcd382d4427c5a7b300ac44",
  "blk.4.attn_output_norm.bias": "7eed5cd503bb6bb6ceb1bc8b07cc077903a4f14fb8b9d6cdf39644815ecf1374",
  "blk.4.ffn_up.weight": "8d0c91d62e74d6431321116a37cf3339e630bd50ba164d3304fc4fe8dd831223",
  "blk.4.ffn_up.bias": "d325f07f73c005a273c484c7be8e7abb4d6e8a5c4fd093f5869133b97629d017",
  "blk.4.ffn_down.weight": "7ba7bd81143f40537b84f938e403e19f30e4928625eb371de052b9025beb4d21",
  "blk.4.ffn_down.bias": "2853d9c2a75288214a4bf4907dc19d04d01926f4913d302b1aa7bdbfcce0f7a1",
  "blk.4.layer_output_norm.weight": "a4ed1885fa77b90fed5300c355ef0aa0c876a8c747151d9d790939d464d57d4f",
  "blk.4.layer_output_norm.bias": "62142a81e813a9e636333b2b805d6bc3b17c5e7cd4b15adce1ada6bc9a32563c",
  "blk.5.attn_q.weight": "afc1dff080a72c3daad01384b1448d476aaf789871017c8ff8e144788887995d",
  "blk.5.attn_q.bias": "748a820371c1d4f872c84545b36358d239c35bf6c99e2812c237d88c3292763b",
  "blk.5.attn_k.weight": "59e30c1ed8acd2cbb01de5f62e7804015b9ecf98ba157d98cab016344639eda5",
  "blk.5.attn_k.bias": "f839520078f9e589496e982e86d0126c7aa14196047339abffcf49a696229f77",
  "blk.5.attn_v.weight": "3e21fb874e21b90308e1f46af034a3c32d3eba1628d62ae5f2246d6af5818923",
  "blk.5.attn_v.bias": "5cd4852bf95c1444d10d756750f6bf49f842c0b39e9953c7f408bb67c325ac8c",
  "blk.5.attn_output.weight": "636ce6a7752895f204b9d01ba0aedd9a294f908b42f372c22a16d9dd590d7471",
  "blk.5.attn_output.bias": "82d924d4b0d2b94f2bbff91619216d6967a3541ce9b1531a6a60457a67b5d219",
  "blk.5.attn_output_norm.weight": "5e7bd0a8d3396080f3360d7c4700bf094a06216431bd014c4479eef72ecf4271",
  "blk.5.attn_output_norm.bias": "66c6de5edda5466d029c6753780be81ccd4218bf8bc00680000e0f06856ab712",
  "blk.5.ffn_up.weight": "5bbf6e7ea380e216e33f8bee06d25f2265359d3876a300e92bc6e41d48e33430",
  "blk.5.ffn_up.bias": "9d795388bb36fb33ad3a37fea3ccb4937838e02800a608fb47d363cd06b47370",
  "blk.5.ffn_down.weight": "2fd628974e7f075479dd227b46fbd48ae8d3ca34d735b36f391ac06410730368",
  "blk.5.ffn_down.bias": "cd213ba9eaa75fa541648097fbe9c96e58077e6c3ad6ad2fb1f21f8350f44291",
  "blk.5.layer_output_norm.weight": "159a9df41d15b7022d136f86a2a2631c4635f9816e957472217077b522bcf52a",
  "blk.5.layer_output_norm.bias": "24c1f27ffd1eb4e5be7e3a2909943e6f0980635d761fa1efdd0c19645da23766"
 }
--- a/convert/testdata/gemma-2-9b-it.json
+++ b/convert/testdata/gemma-2-9b-it.json
@@ -1,6 +0,0 @@
 {
  "general.architecture": "gemma2",
  "gemma2.attention.sliding_window": "4096",
  "gemma2.attn_logit_softcapping": "50",
  "gemma2.final_logit_softcapping": "30"
 }
--- a/convert/testdata/gemma-2b-it.json
+++ b/convert/testdata/gemma-2b-it.json
@@ -1,188 +0,0 @@
 {
  "general.architecture": "gemma",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "gemma.block_count": "18",
  "gemma.context_length": "8192",
  "gemma.embedding_length": "2048",
  "gemma.feed_forward_length": "16384",
  "gemma.attention.head_count": "8",
  "gemma.attention.head_count_kv": "1",
  "gemma.attention.key_length": "256",
  "gemma.attention.value_length": "256",
  "gemma.attention.layer_norm_rms_epsilon": "1e-06",
  "tokenizer.ggml.model": "llama",
  "tokenizer.ggml.add_bos_token": "true",
  "tokenizer.ggml.add_eos_token": "false",
  "tokenizer.ggml.bos_token_id": "2",
  "tokenizer.ggml.eos_token_id": "1",
  "tokenizer.ggml.padding_token_id": "0",
  "tokenizer.ggml.unknown_token_id": "3",
  "tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8",
  "tokenizer.ggml.token_type": "485e40bf3d715a4764818fc097d6a2a41db872d82ee714bc500872a3437ff48d",
  "tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda",
  "token_embd.weight": "17b87ab2c01c80657855a5413d0457b4a041afaeda0cc785080e44e2f04acf07",
  "blk.0.attn_k.weight": "28ac0da05754ad2714ae95da28a5ad191192140b30b8fd22d108d4700c9d989f",
  "blk.0.attn_norm.weight": "3f9d5675d1ab0eb8a816719dac9fab81f2e95c52be02c34263339acbc087febb",
  "blk.0.attn_output.weight": "703295c2c63990ff896778685c678f145298886f680f3ed5dc2a7ad54c293265",
  "blk.0.attn_q.weight": "69c2d0e4870e9d722a190d356203c9605575a16863466c3d1747966ef1cf5791",
  "blk.0.attn_v.weight": "95219c9c07b5ffe9a9a01e456d845eef2b11f4fc12c93dbbba479db395444c13",
  "blk.0.ffn_down.weight": "a2feb5eb3d572c57c5bafbf0ab506862df1160fe40965dcfe4b9fd855c08bed7",
  "blk.0.ffn_gate.weight": "fcca072c445c31f4dc4d5dfaa785b1bdf7271342442099b74fd17268b5829fbf",
  "blk.0.ffn_norm.weight": "7621f95dbd245cade6fffd6b08797d69d8e3954e960f0b5551b90d967ab95448",
  "blk.0.ffn_up.weight": "14a9bcdd451403c67136391e1b6e53b3b1830f00199bd911dbcc56d8749c14f4",
  "blk.1.attn_k.weight": "c70f73c5df20579cb44d971164b48b5f0d8d5abdb38b381e7a8b880ba12aa406",
  "blk.1.attn_norm.weight": "88b6b91f93a1ef83425a7c7dc2a2fbd3b22704a04c64a80061df376ac8c33626",
  "blk.1.attn_output.weight": "f031a537490c452be3b3bb51e6b7949a636405756e160976a1c070a792ea00ee",
  "blk.1.attn_q.weight": "bdb23214b1cf9cfd30f863a0a5868e52c6809d93b7e8f44df096a94204d9896a",
  "blk.1.attn_v.weight": "e9bbc0b05f2c872fb1403f8f938cd1612b502229ee401f12593b1164c61acc00",
  "blk.1.ffn_down.weight": "5ff53811038b661a7b8f2bfdf213bebfb185ec1a6060b662f063714f33584d79",
  "blk.1.ffn_gate.weight": "205085c8c951a5c7543b1495183cd96028fb49f67464b3e9862a2693a6077a33",
  "blk.1.ffn_norm.weight": "798f354fc85afce9625f5d10093a585a966831698a0560e6c9b97ce659eb4b22",
  "blk.1.ffn_up.weight": "db92dc5684cb6e90940e13f4d1da555ed20ba4f8cab1e990ddfd7553e2e91315",
  "blk.2.attn_k.weight": "ef5ce360c4eed6d00d03ca4761e0f8e4b0af4509978468314be14f3d46621044",
  "blk.2.attn_norm.weight": "6dadbc05dbd0d3fabb4216affa60a3de1378a82d2859dc90b338cbe70f50d455",
  "blk.2.attn_output.weight": "6bbf87a966f691bbfd7c8d25629aa4e6710107bd431a667434861febb391edc5",
  "blk.2.attn_q.weight": "4e575c09ae2de417ce9057ce8b073680e860a24aae13a472b68f101b760752e5",
  "blk.2.attn_v.weight": "cd33f7f01141e9439afdaf2ea1aaced9feaa335e32a58daa136ebd555d4d96f4",
  "blk.2.ffn_down.weight": "b970ff1b0b6494165defe2fbfa1d31425766ed71e64de9ec4e66ac3955c8bc5f",
  "blk.2.ffn_gate.weight": "dbb3e1360402e0e369b101995bb686b73f95d4a7673f061be85d64d15dfb0061",
  "blk.2.ffn_norm.weight": "bfb7980105d8ac9647710454f57a5cdac50598a0f6f4884e16f1d94b00844687",
  "blk.2.ffn_up.weight": "50ef89339b275a438b664686f6227dd9b6e43853ed6856ec9e33ef4bbd90bda1",
  "blk.3.attn_k.weight": "be942ea98151434eebcd2c1da4b00e0146152fe524a530689b1fd491cb833d21",
  "blk.3.attn_norm.weight": "0df2f218daf609c289fb7c60c5f375fa99c0d4e04381ad5a494a19144edd8e20",
  "blk.3.attn_output.weight": "c2184aaf86aa2cb8f47be49f60b165834e97205f39c6ee1dfd19fd4411a156ce",
  "blk.3.attn_q.weight": "4f86e2a0a4221c1c84ff9c409ac89893cb95d7208cf65bf1e98e24e01125f991",
  "blk.3.attn_v.weight": "abfdb8a60c349dadde641d1afc9542025e24fbf41a3238bfa9675e0b1f1e4b68",
  "blk.3.ffn_down.weight": "58821a8d87008d47d122427911c6fad5272aca70c448bbae223256a74bacd07e",
  "blk.3.ffn_gate.weight": "776e051f1a0ddd5c4934e69186683a75ca9a3c8c0f61911bba321fed1dd287d2",
  "blk.3.ffn_norm.weight": "7f380f29335e28be90bfcfae6f6d69fdf5751211b36d2dd62aa5541ed113e4f2",
  "blk.3.ffn_up.weight": "fc5ae8d488894cbd4951059675468d227da27871d26e925c9941863841c097ee",
  "blk.4.attn_k.weight": "14833b078cc4c5137bdd5fdc0538047974ca147a99b0282e1b144440c78bc1db",
  "blk.4.attn_norm.weight": "0a69957d4a15599fb80ad4753558020804925221457d9a5052926754d3768065",
  "blk.4.attn_output.weight": "887a49b6130fb6297cf10767207c3dd97191b2cf63723449af9c27bca8dbeda0",
  "blk.4.attn_q.weight": "51fd577b76764824dd6f0d4891c137ebe4736f591b5ca2793c5fff2be49abbde",
  "blk.4.attn_v.weight": "1a623c43cf9c509d1b7ea0d1a5c04d0af4809665f9f9e93b7d6dba8c5df178fa",
  "blk.4.ffn_down.weight": "5d61e8856d8941d2b1fd138116d015f63840d0fa1e31e20e20a5ceca1536ceec",
  "blk.4.ffn_gate.weight": "06640f7273764f8ca5df7e386547417916b6cd7d565a8343153113239a94b0a1",
  "blk.4.ffn_norm.weight": "91a6c6c41b894228e361435ecbc5058dca34d4911a23da5b56de219299c964d3",
  "blk.4.ffn_up.weight": "d016dac1055e36d6a10b6317e57f98a904709ea892ef3194342f4d2f6326561e",
  "blk.5.attn_k.weight": "987146afe124131500808cc0da33c06d207433656d41df6e6d8c99118a83bac5",
  "blk.5.attn_norm.weight": "6b354938966f2608a2fb8d0f5b363ed0d8b0967c2ec8d0abd5c625b413042ded",
  "blk.5.attn_output.weight": "cdcbfe02c6ff79d5326882b017a02099f5af71beedf6b1b3eb4de01e3a844536",
  "blk.5.attn_q.weight": "b910d0cff781d3efb42eab0a302f46f286b2de717079175680d5b42bf8c309c8",
  "blk.5.attn_v.weight": "66d3a279f747412f9f4b0e8abad44540c122ab2e811a7ee74c1f33bc36caade9",
  "blk.5.ffn_down.weight": "c9b0efd2212981f16d956d8571f054b68780ad01f4917033647e359b557a4653",
  "blk.5.ffn_gate.weight": "fe96b94109ca141c01f6a04788e20783019ca6ec334aa1f3134810bdb499e557",
  "blk.5.ffn_norm.weight": "aa7b016e832e7055a36c6e20de58ea1936f995f390401fff1c5fc65906064e49",
  "blk.5.ffn_up.weight": "555ce27c4873d3375394f38ad3b45e3d8848f9d5642dc1602383d0f0a33c2a14",
  "blk.6.attn_k.weight": "88280d461db324c4f36475ce396793063e61a27283ec64511b0480890fb5b3b4",
  "blk.6.attn_norm.weight": "af8f460c411f660d33196286d208f1845fd5a2b45f7b56549a4df31e7515447a",
  "blk.6.attn_output.weight": "dd9996fb0a256e8375ad3917705258a33fce006bcea0f536caae420a77974d8b",
  "blk.6.attn_q.weight": "7a4841541191e037cfb9b07930c4d8cab451809658b182f0ada6ccde9615c003",
  "blk.6.attn_v.weight": "ae81e6a592b64d701a9d40233e986039a56cba8d8d24f61aea93c6393cf3078a",
  "blk.6.ffn_down.weight": "622dd1ce1706355cbc659a8ab2c4509678ffe0f3ad34258e5e25ed2a5d951bcd",
  "blk.6.ffn_gate.weight": "8389a735c0bd5591010f8ced9805a2a12c749f6df0d3c18ad4d05c2a302e7168",
  "blk.6.ffn_norm.weight": "621f5346400382474d61358397bd58fb1459b07c53e376e4bca15e08b3f9b3fb",
  "blk.6.ffn_up.weight": "8d834e4c42f13c251dfee36cf89e12f1bd400680d00d5c2e6cac0459e9ce2f7f",
  "blk.7.attn_k.weight": "8bd0412de65a3e64901ef8fe6a28c95e116bf39dc9aa22f0126b9d36688e5ea7",
  "blk.7.attn_norm.weight": "056d8e56be4e87d6dc6f900762f0dc6fde07bfdc50dd85bfc510415e2bba3f3d",
  "blk.7.attn_output.weight": "27972eda51da53d416ff95aed78149a2c5a287b47d2cd46f2f544ca692ecb3bb",
  "blk.7.attn_q.weight": "41eca977b9371f7932800c11a9c45b931310196919e2a0651b847703b180fc7f",
  "blk.7.attn_v.weight": "13c74fd7e07f08883a09fb070a1fe5bbdd2341b4cb8d1cac07c4b637049b5774",
  "blk.7.ffn_down.weight": "9e75db42468800849a9a7da603d0072c5e86c8ed2b4d8b20a312a51fb86a7a10",
  "blk.7.ffn_gate.weight": "db6bdc3117f910088aaf7db51f2da63ea5bd933de36af5599c215bfb26f7db2b",
  "blk.7.ffn_norm.weight": "48bb82b49bfc8679a1e77f282ee182d952db7a3c11be7ef9a102ee2ddd8011e2",
  "blk.7.ffn_up.weight": "feebea87175817a0f3585ec0af09dc873d94c203581ae97a712eb356d3b49efe",
  "blk.8.attn_k.weight": "d5640ad71b6af68d88e17bf8e7fc26c907d2262605457a84247dd9afc2884d69",
  "blk.8.attn_norm.weight": "75b850c481a69083ae09d0207ba7317b37c735a39fcf5fef5400e6c84fb1257f",
  "blk.8.attn_output.weight": "cbd669dbdea2bdd90f9f0cc97566b3dffff3c56cecb4f47290ceef30da83b2d6",
  "blk.8.attn_q.weight": "9edcb63087a431bac361822497e6ecdaa06d9ea4a1a754e36da7ba9f8db81c7c",
  "blk.8.attn_v.weight": "3fb72c2c4f95a83626aa3e30062f9450b09ab37c7871e229f18bbc5cf744633c",
  "blk.8.ffn_down.weight": "bd69d2c9172974fff154441b237b4787fb53b2d185325442d5048130ef5bc4ef",
  "blk.8.ffn_gate.weight": "d04689c80553edd011d1cbaa5d570fffa7fa91e88b66cf1352d89ab60b72f908",
  "blk.8.ffn_norm.weight": "e49984183b735b7f2c4e4730c289eed9394056d2e283a00fd83ea0915df31a73",
  "blk.8.ffn_up.weight": "8fe62a1ce8e847e567add6c6f6bf2922bc467495b5eb4c116b3cb85b85b3b211",
  "blk.9.attn_k.weight": "d90904959e5004cf0d6e729c6bff18cc33c094798b802473c1ec55ab8d276183",
  "blk.9.attn_norm.weight": "79277f290cc07411115d8fa138045edf4a17b3416ab2145409cbe8ab829fd4ee",
  "blk.9.attn_output.weight": "5a21bf2e1f09a81405025f96d4153ffb630158e17269cff8ffff935c38ceb1a7",
  "blk.9.attn_q.weight": "51b1d0febc3b350945be4504f55afa4347517bde0f710e1a4b88e6b17e71e7c7",
  "blk.9.attn_v.weight": "aab7e1db0a8b50a03036356791ffce736ab010d15674c96eaef8049d80076054",
  "blk.9.ffn_down.weight": "cbf43ec84becb40c9359a181ab0e641fd7faae7d34b549501f7cfb7afdc3d764",
  "blk.9.ffn_gate.weight": "dce0e8661c778327bed7f03b6790d26710764188aed9dc746e6e05863891fa57",
  "blk.9.ffn_norm.weight": "6d41642104f995c77bf31122b13237caebda3e7fcccb1367ce91db36b015e923",
  "blk.9.ffn_up.weight": "82fe4c67bf24e7b2d6f6e05f7b1234c2bf90c3932951091a9066211b8e15ecbb",
  "blk.10.attn_k.weight": "f6a9ed8fd8d3229b5d03175c413ffc56a07f2ce7236271986361dd3d8993f9aa",
  "blk.10.attn_norm.weight": "cebbef89f0326ca8e02df3867a571e4d61c20c2a12f295f98ae590d62bc86010",
  "blk.10.attn_output.weight": "34f5efb86accb4f06347d83a32558ea8eab3039d128969161a741ebacbb656ff",
  "blk.10.attn_q.weight": "1e0efe27df2d5d50f7157253ba2cfd436d6781c3dc78ca176d0c16a210b5b763",
  "blk.10.attn_v.weight": "8f085bf50a2b0f83cd6cdda3c8ef5a9e204a36348ed95871aac725d1f68640cf",
  "blk.10.ffn_down.weight": "bf3b3cb4cace435809ac7b4cc933f20853af12f1f272d3dcefe7f19c0f203b8b",
  "blk.10.ffn_gate.weight": "d3df7a1413b1c5adf1a1dcda9e5225a15c89874bae53bb6137ad1ea42fca2d34",
  "blk.10.ffn_norm.weight": "a1da603b0480471b5ed8e862148cecd5fed918f8304d6933ab0bdb25b8d2fb8f",
  "blk.10.ffn_up.weight": "bffbba605922e972dc47dda88a0b4659aa52236c76e5fe861a949e6d9a367492",
  "blk.11.attn_k.weight": "9f31c63d66cd32c29b1eb8bb829d0c8525ce2ae936e0eefdaab6335a2d12a3df",
  "blk.11.attn_norm.weight": "0bde1a266d8b2e8f202bb7e2e88b19147ca83021901f6d3cae77a4df5548c754",
  "blk.11.attn_output.weight": "e10725c7cf746ed4a7e472cf7aea6cb564e5db6a1d5197adc980d650a387ccea",
  "blk.11.attn_q.weight": "05ee758a7d065802630f8c65dca424364c1c8825e389aa33f9405c45e8a50cce",
  "blk.11.attn_v.weight": "0c3ae7090f11775d24c51120db6e305db6aff706493e7ee123dcab74485ba789",
  "blk.11.ffn_down.weight": "7ba40b8e12c09c5fb2006b77a771cb01ce894e88a3b3e1877f927a5b89c91709",
  "blk.11.ffn_gate.weight": "db76388a023b98097972d354ba1c6a5e26efdeb1c596b9c28bf2cd8f6596975e",
  "blk.11.ffn_norm.weight": "a38c3ae1b89a68ddc7b72c99c5b28be7fe3787c4fad9904d0c43d64eaf00c474",
  "blk.11.ffn_up.weight": "13c8142f9cf1eddc658babf978daf3515c4ccc45f849f3e7e3930aa18a8480a0",
  "blk.12.attn_k.weight": "f03241c36ac87cb57429a2ef22186b8d7d0b590a8b173beb01fa13d93772f3b1",
  "blk.12.attn_norm.weight": "4568f654e6d65104d586e7c16ba960c83428698ce103022b7e0be15e2884e13b",
  "blk.12.attn_output.weight": "04867603f82f91e41306e09b33ecda0104b3ee4834061f2c0bbdc8da33c72509",
  "blk.12.attn_q.weight": "70fe04b9a8e08b6100cc8d6b58bf4cbbad15ca1de82d63baca5d352ba6c4cbae",
  "blk.12.attn_v.weight": "15cb28db61a86c98687991d7e611bc92a1fcc6007f3432149cfb5fe518a4f65e",
  "blk.12.ffn_down.weight": "6d10c790a4e3dc44c2dc36d96251ae97cdf30a4fa04d4c43e31bfbd038e6a7b7",
  "blk.12.ffn_gate.weight": "3462a2d8f6b4743b25e24da51b90018ac2858d05ac7e582bcb69063cfdac1104",
  "blk.12.ffn_norm.weight": "1f96392c1faa34e34ae5dea55a6a86c5aa4c79758952075d53d28de89dd88456",
  "blk.12.ffn_up.weight": "d22eacc612a7411953d948483c5fb201e11722955ee0754da866e7bec578ac6d",
  "blk.13.attn_k.weight": "5864977e6b733ea942647d6feed5c76156c48c200649c22e4e11b9e5860e57f3",
  "blk.13.attn_norm.weight": "87e053535144723db4145aa5402acc54331b7696752d852bb9fc542ff33f0fb5",
  "blk.13.attn_output.weight": "078145f5ad83f8b14f97a869346f7fd1583b24d1e3edadaa95d3da4242973f8f",
  "blk.13.attn_q.weight": "3b8caf35504cbc4d1a7dd6e011a95760703b7f71e2218b030b1254f811362dd7",
  "blk.13.attn_v.weight": "4fdf8365a603e043e5b40c4a21c84ac167f9be62794178f9d8a608dfe5653bf9",
  "blk.13.ffn_down.weight": "a07d3abbfcacf48ba028df2cab895be32cc15022d23389a745286e79c1b1d1fd",
  "blk.13.ffn_gate.weight": "1d2ab39666aa2909acc96787432a3ed13b19d25170f74665fadff9b17bbaffb1",
  "blk.13.ffn_norm.weight": "4f2e809fda5f3eadf52578ee50e0ba36e53be91e55dce418c12dfe595f5f18e7",
  "blk.13.ffn_up.weight": "8783d2720c2c37ca176a5801e0b3ef1f9cc9cf3ef1cd37af423aaf6b2a27e2bd",
  "blk.14.attn_k.weight": "ce9428e2b55d43ae0c6690dbd56182f99adc427694ba8236b405cc8ea5035e86",
  "blk.14.attn_norm.weight": "6abb35f9db8251d6ae954bda147c6ada2371b0574d11702e828f3c6ac99b7cc0",
  "blk.14.attn_output.weight": "fe3880916d0ceb5bff672c88bbefb7060a545be609bf049beb2024b38221836d",
  "blk.14.attn_q.weight": "7c8ad81be6f4a350931fd108b5f7c9e366e8c26ef62d1d85ffef5dca8fd893f8",
  "blk.14.attn_v.weight": "e4bdedffacbebe38567a0734dfd67db90e911d9a9669fcde9a7c4ad8a0066c52",
  "blk.14.ffn_down.weight": "ef6694dff1e05820aac0cd2b22f39ac7788b4967afc9250775575554c66aab2c",
  "blk.14.ffn_gate.weight": "db63c4179e2db704bc505e2b4696e055b593e295a1b7c4c586fc793bdd5aab19",
  "blk.14.ffn_norm.weight": "2796a62d832a9710148f95d533320492a33e712b2e5218659c548705bd11684d",
  "blk.14.ffn_up.weight": "3f78c78d8c2d54df45f799d4ff902316628af296834afe4ceed63d4a324ff03e",
  "blk.15.attn_k.weight": "6e810ee3859e07695645ee0c9a5efc7962668984a5f0a9325f47e462743b447c",
  "blk.15.attn_norm.weight": "0956b576ae96db0b28cb09f761f801cfd9281432284664f0fe181c8d9c55d1ec",
  "blk.15.attn_output.weight": "03a17f7e94208177aace5cc41b7f54670ba57873b7274ff6e23caf58cce110ca",
  "blk.15.attn_q.weight": "b8edafe7d2216a6f8b4ae4905a906475490e6ea418f6e1d3cec563dbdc6fab91",
  "blk.15.attn_v.weight": "f8ae8cae0f4cfa34a459824eba57350c3c248104ba5607e7d9dc7d7c39aaf4a6",
  "blk.15.ffn_down.weight": "8d02eb439da852246d2ca67e9b7b6de0b090b80744355e64728a23e41926505b",
  "blk.15.ffn_gate.weight": "ed5bf361c67db8731f186b775826f21c33bdb521111fd2d922539719a770239f",
  "blk.15.ffn_norm.weight": "5942ca3c73209ac9a0c8bfd9b4aab7f7be7aee9aa12d9c35833493b44af76767",
  "blk.15.ffn_up.weight": "f4bebf4ad99ec5f911327dec347be6c595814885309c7bc5647ce28c7f4d1cf5",
  "blk.16.attn_k.weight": "756a534c19364448e0958b8948fe33891c6ccda0fbb4dfa2024e1f532a87804b",
  "blk.16.attn_norm.weight": "386b7b9e4e6509f6af9c022d942b6c6c6cc136aeed8751ecb037c74d7c4bfb93",
  "blk.16.attn_output.weight": "3ba1a766a25830b84d7c22178203635f9c5624caad290bc5e5d73da5d5e7a2ec",
  "blk.16.attn_q.weight": "d39b0c91e1fda7685d50a0f7cc8d18c44b5bdc90a142c7fda0bc329cca1afa74",
  "blk.16.attn_v.weight": "98b33fcb0ee3483cff1b06ecb44d7b7ffb4d34c268248e4d73dfdf82b2065b2f",
  "blk.16.ffn_down.weight": "14006f5e4acb2f9416271ae562e299359cd2585739c7fc77ccbca54495563948",
  "blk.16.ffn_gate.weight": "12f8abae2d301d8f88bedb6af98b1daecc7b0b8d05148594f931f30958d77aca",
  "blk.16.ffn_norm.weight": "129a15a046ee96d06de288bd43c80f77a6b0fb3a159c7367154c6e4aaf362672",
  "blk.16.ffn_up.weight": "b4a5911a45f3871ef1d4efb7dc7108645a564b70f818eccf45beebef2e844ee9",
  "blk.17.attn_k.weight": "5e1bfcff0146ebdde3817b656952892eb671e14e75afc92fa53f84f8eecbec4c",
  "blk.17.attn_norm.weight": "60bc988fab7c4b29ee9de599df41a8de00caa94fcd74677da011fac82f60f465",
  "blk.17.attn_output.weight": "ba49b40d6a0b5685f749c24b0edbed3adc44dbe13b5d5e5fa1e56169fc746555",
  "blk.17.attn_q.weight": "82bb415d24efcd14d03ace03f907bb70db6a204c76a0bdd1892e0fba165db87d",
  "blk.17.attn_v.weight": "73dbe54beb91a899884e275ea81ffc5187a20cb7d5b68d5c299b783096999d94",
  "blk.17.ffn_down.weight": "7c086166241e0664f8963fd1ca4ed74c737abfb2525ec20f8435821ff50158f3",
  "blk.17.ffn_gate.weight": "51a32f78244d42a539f619c5ce661db9e6cf41636280a826d439b5444edcd28c",
  "blk.17.ffn_norm.weight": "c4bb247fccd1ecc84875028af63dd20aaf5cbd17eb94a9bc36679c09285dccab",
  "blk.17.ffn_up.weight": "b5886182790bc6fbadd63de9bc4ffee416f3b69a66280d197ab8c18edf769abf",
  "output_norm.weight": "481f3097d0a20412e35b3a739b1b958487bcd41ff67744baa3c9acbddd2ee4d4"
 }
--- a/convert/tokenizer.go
+++ b/convert/tokenizer.go
@@ -1,12 +1,10 @@
 package convert
 import (
 	"cmp"
 	"crypto/sha256"
 	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io/fs"
 	"log/slog"
 	"os"
 	"slices"
@@ -14,140 +12,10 @@ import (
 	"golang.org/x/exp/maps"
 )
 const (
 	_ int32 = iota
 	tokenTypeNormal
 	tokenTypeUnknown
 	tokenTypeControl
 	tokenTypeUserDefined
 	tokenTypeUnused
 	tokenTypeByte
 )
 type Tokenizer struct {
-	*Vocabulary
+	Version     string         `json:"version"`
-	SpecialVocabulary []*SpecialVocabulary
+	AddedTokens []Token        `json:"added_tokens"`
-	Merges            []string
+	Model       TokenizerModel `json:"model"`
 	Pre      string
 	Template string
 }
 func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error) {
 	v, err := parseVocabulary(fsys)
 	if err != nil {
 		return nil, err
 	}
 	t := &Tokenizer{
 		Vocabulary: v,
 		Pre:        "default",
 	}
 	addedTokens := make(map[string]token)
 	if f, err := fsys.Open("tokenizer.json"); errors.Is(err, os.ErrNotExist) {
 	} else if err != nil {
 		return nil, err
 	} else {
 		defer f.Close()
 		var tt tokenizer
 		if err := json.NewDecoder(f).Decode(&tt); err != nil {
 			return nil, err
 		}
 		for _, t := range tt.AddedTokens {
 			addedTokens[t.Content] = t
 		}
 		t.Merges = tt.Model.Merges
 		sha256sum := sha256.New()
 		for _, pt := range tt.PreTokenizer.PreTokenizers {
 			switch pt.Type {
 			case "Split":
 				if pt.Pattern.Regex != "" {
 					// create a checksum of all Split pretokenizers which should be sufficient
 					// to identify the pretokenizer
 					sha256sum.Write([]byte(pt.Pattern.Regex))
 				}
 			}
 		}
 		switch digest := hex.EncodeToString(sha256sum.Sum(nil)); digest {
 		case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
 			t.Pre = "llama-bpe"
 		case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
 			t.Pre = "deepseek-llm"
 		case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
 			t.Pre = "deepseek-coder"
 		case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855":
 			// noop, empty pretokenizer
 		default:
 			slog.Warn("unknown pretokenizer, using default", "digest", digest)
 		}
 	}
 	if f, err := fsys.Open("tokenizer_config.json"); errors.Is(err, os.ErrNotExist) {
 	} else if err != nil {
 		return nil, err
 	} else {
 		defer f.Close()
 		var p map[string]json.RawMessage
 		if err := json.NewDecoder(f).Decode(&p); err != nil {
 			return nil, err
 		}
 		if template, ok := p["chat_template"]; ok {
 			if err := json.Unmarshal(template, &t.Template); err != nil {
 				return nil, err
 			}
 		}
 		for _, st := range specialTokenTypes {
 			sv := SpecialVocabulary{Type: st}
 			if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok {
 				if err := json.Unmarshal(bts, &sv.AddToken); err != nil {
 					return nil, err
 				}
 			}
 			if bts, ok := p[fmt.Sprintf("%s_token", st)]; ok {
 				var content string
 				if err := json.Unmarshal(bts, &content); err != nil {
 					var mm map[string]any
 					if err := json.Unmarshal(bts, &mm); err != nil {
 						continue
 					}
 					content, ok = mm["content"].(string)
 					if !ok {
 						continue
 					}
 				}
 				sv.Content = content
 			}
 			if id, ok := addedTokens[sv.Content]; ok {
 				sv.ID = id.ID
 				t.SpecialVocabulary = append(t.SpecialVocabulary, &sv)
 			}
 		}
 	}
 	return t, nil
 }
 type tokenizer struct {
 	Version     string  `json:"version"`
 	AddedTokens []token `json:"added_tokens"`
 	Model       struct {
 		Type   string         `json:"type"`
 		Vocab  map[string]int `json:"vocab"`
 		Merges []string       `json:"merges"`
 	} `json:"model"`
 	PreTokenizer struct {
 		PreTokenizers []struct {
@@ -159,108 +27,80 @@ type tokenizer struct {
 	} `json:"pre_tokenizer"`
 }
-type token struct {
+type TokenizerModel struct {
 	Type   string         `json:"type"`
 	Vocab  map[string]int `json:"vocab"`
 	Merges []string       `json:"merges"`
 	Tokens []Token
 }
 type Token struct {
 	ID          int    `json:"id"`
 	Content     string `json:"content"`
 	Special     bool   `json:"special"`
 	UserDefined bool
 }
-type Vocabulary struct {
+func (t *Token) Type() int32 {
-	Model  string
+	switch {
-	Tokens []string
+	case t.Special:
-	Scores []float32
+		return tokenTypeControl
-	Types  []int32
+	case t.UserDefined:
 		return tokenTypeUserDefined
 	default:
 		return tokenTypeNormal
 	}
 }
-func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) {
+func (t *Tokenizer) maxID() int {
-	f, err := fsys.Open("tokenizer.json")
+	return max(
 		slices.Max(maps.Values(t.Model.Vocab)),
 		slices.MaxFunc(t.AddedTokens, func(a, b Token) int {
 			return cmp.Compare(a.ID, b.ID)
 		}).ID,
 	)
 }
 func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) {
 	f, err := os.Open(dirpath)
 	if err != nil {
-		return nil, err
+		panic(err)
 	}
 	defer f.Close()
-	var t tokenizer
+	var t Tokenizer
 	if err := json.NewDecoder(f).Decode(&t); err != nil {
-		return nil, err
+		return "", nil, nil, err
 	}
-	tokens := make(map[int]token, len(t.Model.Vocab))
+	tokens = make([]Token, t.maxID()+1)
 	for k, v := range t.Model.Vocab {
-		tokens[v] = token{
+		tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false}
-			ID:      v,
+	}
-			Content: k,
+
 	for _, v := range t.AddedTokens {
 		v.UserDefined = true
 		tokens[v.ID] = v
 	}
 	sha256sum := sha256.New()
 	for _, pt := range t.PreTokenizer.PreTokenizers {
 		if pt.Type == "Split" && pt.Pattern.Regex != "" {
 			sha256sum.Write([]byte(pt.Pattern.Regex))
 		}
 	}
-	for _, token := range t.AddedTokens {
+	switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest {
-		token.UserDefined = true
+	case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
-		tokens[token.ID] = token
+		pre = "llama-bpe"
 	case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
 		pre = "deepseek-llm"
 	case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
 		pre = "deepseek-coder"
 	default:
 		slog.Warn("unknown pretokenizer, using default", "digest", digest)
 		pre = "default"
 	}
-	keys := maps.Keys(tokens)
+	return pre, tokens, t.Model.Merges, nil
 	slices.Sort(keys)
 	v := Vocabulary{Model: "gpt2"}
 	for _, k := range keys {
 		token := tokens[k]
 		v.Tokens = append(v.Tokens, token.Content)
 		v.Scores = append(v.Scores, float32(token.ID))
 		switch {
 		case token.Special:
 			v.Types = append(v.Types, tokenTypeControl)
 		case token.UserDefined:
 			v.Types = append(v.Types, tokenTypeUserDefined)
 		default:
 			v.Types = append(v.Types, tokenTypeNormal)
 		}
 	}
 	return &v, nil
 }
 func parseVocabulary(fsys fs.FS) (*Vocabulary, error) {
 	patterns := []struct {
 		Pattern string
 		Func    func(fs.FS) (*Vocabulary, error)
 	}{
 		{"tokenizer.model", parseSentencePiece},
 		{"tokenizer.json", parseVocabularyFromTokenizer},
 	}
 	for _, pattern := range patterns {
 		if _, err := fs.Stat(fsys, pattern.Pattern); errors.Is(err, os.ErrNotExist) {
 			continue
 		} else if err != nil {
 			return nil, err
 		}
 		return pattern.Func(fsys)
 	}
 	return nil, errors.New("unknown tensor format")
 }
 type SpecialVocabulary struct {
 	Type     string
 	ID       int
 	Content  string
 	AddToken bool
 }
 func (sv SpecialVocabulary) Key() string {
 	switch t := sv.Type; t {
 	case "bos", "eos", "cls", "mask":
 		return t
 	case "unk":
 		return "unknown"
 	case "sep":
 		//nolint:misspell // this is an upstream typo
 		return "seperator"
 	case "pad":
 		return "padding"
 	}
 	panic("unknown special vocabulary type")
 }
--- a/convert/tokenizer_spm.go
+++ b/convert/tokenizer_spm.go
@@ -1,113 +0,0 @@
 package convert
 import (
 	"cmp"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io/fs"
 	"os"
 	"slices"
 	"google.golang.org/protobuf/proto"
 	"github.com/ollama/ollama/convert/sentencepiece"
 )
 func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
 	ast, err := parseAdditionalSpecialTokens(fsys)
 	if err != nil {
 		return nil, err
 	}
 	bts, err := fs.ReadFile(fsys, "tokenizer.model")
 	if err != nil {
 		return nil, err
 	}
 	var spm sentencepiece.ModelProto
 	if err := proto.Unmarshal(bts, &spm); err != nil {
 		return nil, err
 	}
 	v := Vocabulary{Model: "llama"}
 	for _, piece := range spm.GetPieces() {
 		v.Tokens = append(v.Tokens, piece.GetPiece())
 		v.Scores = append(v.Scores, piece.GetScore())
 		switch t := piece.GetType(); t {
 		case sentencepiece.ModelProto_SentencePiece_UNKNOWN,
 			sentencepiece.ModelProto_SentencePiece_CONTROL,
 			sentencepiece.ModelProto_SentencePiece_UNUSED,
 			sentencepiece.ModelProto_SentencePiece_BYTE:
 			v.Types = append(v.Types, int32(t))
 		default:
 			tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL)
 			if slices.Contains(ast, piece.GetPiece()) {
 				tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
 			}
 			v.Types = append(v.Types, tt)
 		}
 	}
 	f, err := fsys.Open("added_tokens.json")
 	if errors.Is(err, os.ErrNotExist) {
 		return &v, nil
 	} else if err != nil {
 		return nil, err
 	}
 	defer f.Close()
 	var atm map[string]int
 	if err := json.NewDecoder(f).Decode(&atm); err != nil {
 		return nil, err
 	}
 	type t struct {
 		id      int
 		content string
 	}
 	var ts []t
 	for content, id := range atm {
 		ts = append(ts, t{id, content})
 	}
 	slices.SortFunc(ts, func(i, j t) int {
 		return cmp.Compare(i.id, j.id)
 	})
 	n := len(v.Tokens)
 	for i, t := range ts {
 		if t.id != i+n {
 			return nil, fmt.Errorf("invalid token id: %d", t.id)
 		}
 		v.Tokens = append(v.Tokens, t.content)
 		v.Scores = append(v.Scores, -1000.0)
 		v.Types = append(v.Types, tokenTypeUserDefined)
 	}
 	return &v, nil
 }
 func parseAdditionalSpecialTokens(fsys fs.FS) ([]string, error) {
 	f, err := fsys.Open("special_tokens_map.json")
 	if errors.Is(err, os.ErrNotExist) {
 		return nil, nil
 	} else if err != nil {
 		return nil, err
 	}
 	defer f.Close()
 	var m struct {
 		AdditionalSpecialTokens []string `json:"additional_special_tokens"`
 	}
 	if err := json.NewDecoder(f).Decode(&m); err != nil {
 		return nil, err
 	}
 	return m.AdditionalSpecialTokens, nil
 }
--- a/convert/torch.go
+++ b/convert/torch.go
@@ -0,0 +1,287 @@
 package convert
 import (
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"io"
 	"log/slog"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 	"github.com/nlpodyssey/gopickle/pytorch"
 	"github.com/nlpodyssey/gopickle/types"
 	"github.com/x448/float16"
 	"github.com/ollama/ollama/llm"
 )
 type torchWriterTo struct {
 	t *llm.Tensor
 	params *Params
 	bo     ByteOrder
 	storage  pytorch.StorageInterface
 	repacker func(string, []float32, []uint64) ([]float32, error)
 }
 type TorchFormat struct{}
 func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
 	slog.Debug("getting torch tensors")
 	var files []string
 	if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 {
 		files = append(files, pt...)
 	} else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 {
 		files = append(files, pt...)
 	}
 	var offset uint64
 	var tensors []llm.Tensor
 	for _, fn := range files {
 		m, err := pytorch.Load(fn)
 		if err != nil {
 			slog.Error(fmt.Sprintf("error unpickling: %q", err))
 			return []llm.Tensor{}, err
 		}
 		for _, k := range m.(*types.Dict).Keys() {
 			if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") {
 				continue
 			}
 			t, _ := m.(*types.Dict).Get(k)
 			tshape := t.(*pytorch.Tensor).Size
 			var size uint64
 			var kind uint32
 			switch len(tshape) {
 			case 0:
 				continue
 			case 1:
 				// convert to float32
 				kind = 0
 				size = uint64(tshape[0] * 4)
 			case 2:
 				// convert to float16
 				kind = 1
 				size = uint64(tshape[0] * tshape[1] * 2)
 			}
 			ggufName, err := tf.GetLayerName(k.(string))
 			if err != nil {
 				slog.Error(err.Error())
 				return nil, err
 			}
 			slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape))
 			shape := []uint64{0, 0, 0, 0}
 			for i := range tshape {
 				shape[i] = uint64(tshape[i])
 			}
 			tensor := llm.Tensor{
 				Name:   ggufName,
 				Kind:   kind,
 				Offset: offset, // calculate the offset
 				Shape:  shape,
 			}
 			tensor.WriterTo = torchWriterTo{
 				t:       &tensor,
 				params:  params,
 				bo:      params.ByteOrder,
 				storage: t.(*pytorch.Tensor).Source,
 			}
 			tensors = append(tensors, tensor)
 			offset += size
 		}
 	}
 	return tensors, nil
 }
 func getAltParams(dirpath string) (*Params, error) {
 	f, err := os.Open(filepath.Join(dirpath, "params.json"))
 	if err != nil {
 		slog.Error("no params.json")
 		return nil, err
 	}
 	defer f.Close()
 	type TorchParams struct {
 		HiddenSize     int     `json:"dim"`
 		AttentionHeads int     `json:"n_heads"`
 		KeyValHeads    int     `json:"n_kv_heads"`
 		HiddenLayers   int     `json:"n_layers"`
 		RopeTheta      float64 `json:"rope_theta"`
 		NormEPS        float64 `json:"norm_eps"`
 	}
 	var tparams TorchParams
 	d := json.NewDecoder(f)
 	err = d.Decode(&tparams)
 	if err != nil {
 		return nil, err
 	}
 	params := &Params{
 		Architectures:  []string{"LlamaForCausalLM"},
 		HiddenSize:     tparams.HiddenSize,
 		AttentionHeads: tparams.AttentionHeads,
 		KeyValHeads:    tparams.KeyValHeads,
 		HiddenLayers:   tparams.HiddenLayers,
 		NormEPS:        tparams.NormEPS,
 	}
 	switch {
 	case tparams.RopeTheta == 1000000:
 		// Codellama
 		params.ContextSize = 16384
 	case tparams.NormEPS == 1e-06:
 		// llama2
 		slog.Debug("Found llama2 - setting context size to 4096")
 		params.ContextSize = 4096
 	default:
 		params.ContextSize = 2048
 	}
 	params.ByteOrder = binary.LittleEndian
 	return params, nil
 }
 func (m *TorchFormat) GetParams(dirpath string) (*Params, error) {
 	f, err := os.Open(filepath.Join(dirpath, "config.json"))
 	if err != nil {
 		if os.IsNotExist(err) {
 			// try params.json instead
 			return getAltParams(dirpath)
 		} else {
 			return nil, err
 		}
 	}
 	var params Params
 	d := json.NewDecoder(f)
 	err = d.Decode(&params)
 	if err != nil {
 		return nil, err
 	}
 	params.ByteOrder = binary.LittleEndian
 	return &params, nil
 }
 func (m *TorchFormat) GetLayerName(n string) (string, error) {
 	directMap := map[string]string{
 		"tok_embeddings.weight":     "token_embd.weight",
 		"output.weight":             "output.weight",
 		"norm.weight":               "output_norm.weight",
 		"rope.freqs":                "rope_freqs.weight",
 		"model.embed_tokens.weight": "token_embd.weight",
 		"lm_head.weight":            "output.weight",
 		"model.norm.weight":         "output_norm.weight",
 	}
 	lMap := map[string]string{
 		"layers.(\\d+).attention_norm.weight":                 "blk.$1.attn_norm.weight",
 		"layers.(\\d+).attention_output_norm.weight":          "blk.$1.attn_norm.weight",
 		"layers.(\\d+).feed_forward.w2.weight":                "blk.$1.ffn_down.weight",
 		"layers.(\\d+).feed_forward.w1.weight":                "blk.$1.ffn_gate.weight",
 		"layers.(\\d+).feed_forward.w3.weight":                "blk.$1.ffn_up.weight",
 		"layers.(\\d+).ffn_norm.weight":                       "blk.$1.ffn_norm.weight",
 		"layers.(\\d+).attention.wk.weight":                   "blk.$1.attn_k.weight",
 		"layers.(\\d+).attention.wo.weight":                   "blk.$1.attn_output.weight",
 		"layers.(\\d+).attention.wq.weight":                   "blk.$1.attn_q.weight",
 		"layers.(\\d+).attention.wv.weight":                   "blk.$1.attn_v.weight",
 		"model.layers.(\\d+).input_layernorm.weight":          "blk.$1.attn_norm.weight",
 		"model.layers.(\\d+).mlp.down_proj.weight":            "blk.$1.ffn_down.weight",
 		"model.layers.(\\d+).mlp.gate_proj.weight":            "blk.$1.ffn_gate.weight",
 		"model.layers.(\\d+).mlp.up_proj.weight":              "blk.$1.ffn_up.weight",
 		"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
 		"model.layers.(\\d+).self_attn.k_proj.weight":         "blk.$1.attn_k.weight",
 		"model.layers.(\\d+).self_attn.o_proj.weight":         "blk.$1.attn_output.weight",
 		"model.layers.(\\d+).self_attn.q_proj.weight":         "blk.$1.attn_q.weight",
 		"model.layers.(\\d+).self_attn.v_proj.weight":         "blk.$1.attn_v.weight",
 	}
 	v, ok := directMap[n]
 	if ok {
 		return v, nil
 	}
 	// quick hack to rename the layers to gguf format
 	for k, v := range lMap {
 		re := regexp.MustCompile(k)
 		newName := re.ReplaceAllString(n, v)
 		if newName != n {
 			return newName, nil
 		}
 	}
 	return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
 }
 func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) {
 	var f32s []float32
 	switch s := r.storage.(type) {
 	case *pytorch.FloatStorage:
 		f32s = s.Data
 	case *pytorch.HalfStorage:
 		f32s = s.Data
 	case *pytorch.BFloat16Storage:
 		f32s = s.Data
 	default:
 		return 0, fmt.Errorf("unknown data type: %T", s)
 	}
 	if r.repacker != nil {
 		f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
 		if err != nil {
 			return 0, err
 		}
 	}
 	switch r.t.Kind {
 	case 0:
 		return 0, binary.Write(w, r.bo, f32s)
 	case 1:
 		f16s := make([]uint16, len(f32s))
 		for i := range f32s {
 			f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
 		}
 		return 0, binary.Write(w, r.bo, f16s)
 	default:
 		return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
 	}
 }
 func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
 	switch len(params.Architectures) {
 	case 0:
 		return nil, fmt.Errorf("No architecture specified to convert")
 	case 1:
 		switch params.Architectures[0] {
 		case "LlamaForCausalLM":
 			return &LlamaModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		default:
 			return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
 		}
 	}
 	return nil, fmt.Errorf("Unknown error")
 }
--- a/docs/api.md
+++ b/docs/api.md
@@ -40,7 +40,6 @@ Generate a response for a given prompt with a provided model. This is a streamin
 - `model`: (required) the [model name](#model-names)
 - `prompt`: the prompt to generate a response for
 - `suffix`: the text after the model response
 - `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)
 Advanced parameters (optional):
@@ -58,8 +57,7 @@ Advanced parameters (optional):
 Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
-> [!IMPORTANT]
+> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
 > It's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
 ### Examples
@@ -150,44 +148,8 @@ If `stream` is set to `false`, the response will be a single JSON object:
 }
 ```
 #### Request (with suffix)
 ##### Request
 ```shell
 curl http://localhost:11434/api/generate -d '{
  "model": "codellama:code",
  "prompt": "def compute_gcd(a, b):",
  "suffix": "    return result",
  "options": {
    "temperature": 0
  },
  "stream": false
 }'
 ```
 ##### Response
 ```json
 {
  "model": "codellama:code",
  "created_at": "2024-07-22T20:47:51.147561Z",
  "response": "\n  if a == 0:\n    return b\n  else:\n    return compute_gcd(b % a, a)\n\ndef compute_lcm(a, b):\n  result = (a * b) / compute_gcd(a, b)\n",
  "done": true,
  "done_reason": "stop",
  "context": [...],
  "total_duration": 1162761250,
  "load_duration": 6683708,
  "prompt_eval_count": 17,
  "prompt_eval_duration": 201222000,
  "eval_count": 63,
  "eval_duration": 953997000
 }
 ```
 #### Request (JSON mode)
 > [!IMPORTANT]
 > When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.
 ##### Request
@@ -336,7 +298,6 @@ curl http://localhost:11434/api/generate -d '{
    "num_predict": 100,
    "top_k": 20,
    "top_p": 0.9,
    "min_p": 0.0,
    "tfs_z": 0.5,
    "typical_p": 0.7,
    "repeat_last_n": 33,
@@ -419,14 +380,12 @@ Generate the next message in a chat with a provided model. This is a streaming e
 - `model`: (required) the [model name](#model-names)
 - `messages`: the messages of the chat, this can be used to keep a chat memory
 - `tools`: tools for the model to use if supported. Requires `stream` to be set to `false`
 The `message` object has the following fields:
- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool`
+- `role`: the role of the message, either `system`, `user` or `assistant`
 - `content`: the content of the message
 - `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
 - `tool_calls` (optional): a list of tools the model wants to use
 Advanced parameters (optional):
@@ -587,7 +546,7 @@ Final response:
 ##### Request
-Send a chat message with images. The images should be provided as an array, with the individual images encoded in Base64.
+Send a chat message with a conversation history.
 ```shell
 curl http://localhost:11434/api/chat -d '{
@@ -663,79 +622,6 @@ curl http://localhost:11434/api/chat -d '{
 }
 ```
 #### Chat request (with tools)
 ##### Request
 ```
 curl http://localhost:11434/api/chat -d '{
  "model": "llama3.1",
  "messages": [
    {
      "role": "user",
      "content": "What is the weather today in Paris?"
    }
  ],
  "stream": false,
  "tools": [
    {
      "type": "function",
      "function": {
        "name": "get_current_weather",
        "description": "Get the current weather for a location",
        "parameters": {
          "type": "object",
          "properties": {
            "location": {
              "type": "string",
              "description": "The location to get the weather for, e.g. San Francisco, CA"
            },
            "format": {
              "type": "string",
              "description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
              "enum": ["celsius", "fahrenheit"]
            }
          },
          "required": ["location", "format"]
        }
      }
    }
  ]
 }'
 ```
 ##### Response
 ```json
 {
  "model": "llama3.1",
  "created_at": "2024-07-22T20:33:28.123648Z",
  "message": {
    "role": "assistant",
    "content": "",
    "tool_calls": [
      {
        "function": {
          "name": "get_current_weather",
          "arguments": {
            "format": "celsius",
            "location": "Paris, FR"
          }
        }
      }
    ]
  },
  "done_reason": "stop",
  "done": true,
  "total_duration": 885095291,
  "load_duration": 3753500,
  "prompt_eval_count": 122,
  "prompt_eval_duration": 328493000,
  "eval_count": 33,
  "eval_duration": 552222000
 }
 ```
 ## Create a Model
 ```shell
@@ -1140,7 +1026,7 @@ If `stream` is set to `false`, then the response is a single JSON object:
 ## Generate Embeddings
 ```shell
-POST /api/embed
+POST /api/embeddings
 ```
 Generate embeddings from a model
@@ -1148,11 +1034,10 @@ Generate embeddings from a model
 ### Parameters
 - `model`: name of model to generate embeddings from
- `input`: text or list of text to generate embeddings for
+- `prompt`: text to generate embeddings for
 Advanced parameters:
 - `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true`
 - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
 - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
@@ -1161,9 +1046,9 @@ Advanced parameters:
 #### Request
 ```shell
-curl http://localhost:11434/api/embed -d '{
+curl http://localhost:11434/api/embeddings -d '{
  "model": "all-minilm",
-  "input": "Why is the sky blue?"
+  "prompt": "Here is an article about llamas..."
 }'
 ```
@@ -1171,38 +1056,10 @@ curl http://localhost:11434/api/embed -d '{
 ```json
 {
-  "model": "all-minilm",
+  "embedding": [
-  "embeddings": [[
+    0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
-    0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
+    0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
-    0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
+  ]
  ]],
  "total_duration": 14143917,
  "load_duration": 1019500,
  "prompt_eval_count": 8
 }
 ```
 #### Request (Multiple input)
 ```shell
 curl http://localhost:11434/api/embed -d '{
  "model": "all-minilm",
  "input": ["Why is the sky blue?", "Why is the grass green?"]
 }'
 ```
 #### Response
 ```json
 {
  "model": "all-minilm",
  "embeddings": [[
    0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
    0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
  ],[
    -0.0098027075, 0.06042469, 0.025257962, -0.006364387, 0.07272725,
    0.017194884, 0.09032035, -0.051705178, 0.09951512, 0.09072481
  ]]
 }
 ```
@@ -1249,45 +1106,3 @@ A single JSON object will be returned.
  ]
 }
 ```
 ## Generate Embedding
 > Note: this endpoint has been superseded by `/api/embed`
 ```shell
 POST /api/embeddings
 ```
 Generate embeddings from a model
 ### Parameters
 - `model`: name of model to generate embeddings from
 - `prompt`: text to generate embeddings for
 Advanced parameters:
 - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
 - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
 ### Examples
 #### Request
 ```shell
 curl http://localhost:11434/api/embeddings -d '{
  "model": "all-minilm",
  "prompt": "Here is an article about llamas..."
 }'
 ```
 #### Response
 ```json
 {
  "embedding": [
    0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
    0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
  ]
 }
 ```
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -1,71 +1,71 @@
-# Ollama Docker image
+# Ollama Docker image
-
+
-### CPU only
+### CPU only
-
+
-```bash
+```bash
-docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
-```
+```
-
+
-### Nvidia GPU
+### Nvidia GPU
-Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
+Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
-
+
-#### Install with Apt
+#### Install with Apt
-1.  Configure the repository
+1.  Configure the repository
-```bash
+```bash
-curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
+curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
-    | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
+    | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
-curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
+curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
-    | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
+    | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
-    | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+    | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
-sudo apt-get update
+sudo apt-get update
-```
+```
-2.  Install the NVIDIA Container Toolkit packages
+2.  Install the NVIDIA Container Toolkit packages
-```bash
+```bash
-sudo apt-get install -y nvidia-container-toolkit
+sudo apt-get install -y nvidia-container-toolkit
-```
+```
-
+
-#### Install with Yum or Dnf
+#### Install with Yum or Dnf
-1.  Configure the repository
+1.  Configure the repository
-
+    
-```bash
+```bash
-curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
+curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
-    | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
+    | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
-```
+```
-
+    
-2. Install the NVIDIA Container Toolkit packages
+2. Install the NVIDIA Container Toolkit packages
-
+    
-```bash
+```bash
-sudo yum install -y nvidia-container-toolkit
+sudo yum install -y nvidia-container-toolkit
-```
+```
-
+
-#### Configure Docker to use Nvidia driver
+#### Configure Docker to use Nvidia driver 
-```
+```
-sudo nvidia-ctk runtime configure --runtime=docker
+sudo nvidia-ctk runtime configure --runtime=docker
-sudo systemctl restart docker
+sudo systemctl restart docker
-```
+```
-
+
-#### Start the container
+#### Start the container
-
+
-```bash
+```bash
-docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
-```
+```
-
+
-### AMD GPU
+### AMD GPU
-
+
-To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
+To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
-
+
-```
+```
-docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
+docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
-```
+```
-
+
-### Run model locally
+### Run model locally
-
+
-Now you can run a model:
+Now you can run a model:
-
+
-```
+```
-docker exec -it ollama ollama run llama3.1
+docker exec -it ollama ollama run llama3
-```
+```
-
+
-### Try different models
+### Try different models
-
+
-More models can be found on the [Ollama library](https://ollama.com/library).
+More models can be found on the [Ollama library](https://ollama.com/library).
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -111,10 +111,7 @@ On Windows, Ollama inherits your user and system environment variables.
 ## How do I use Ollama behind a proxy?
-Ollama pulls models from the Internet and may require a proxy server to access the models. Use `HTTPS_PROXY` to redirect outbound requests through the proxy. Ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform.
+Ollama is compatible with proxy servers if `HTTP_PROXY` or `HTTPS_PROXY` are configured. When using either variables, ensure it is set where `ollama serve` can access the values. When using `HTTPS_PROXY`, ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform.
 > [!NOTE]
 > Avoid setting `HTTP_PROXY`. Ollama does not use HTTP for model pulls, only HTTPS. Setting `HTTP_PROXY` may interrupt client connections to the server.
 ### How do I use Ollama behind a proxy in Docker?
@@ -230,7 +227,7 @@ curl http://localhost:11434/api/chat -d '{"model": "mistral"}'
 To preload a model using the CLI, use the command:
 ```shell
-ollama run llama3.1 ""
+ollama run llama3 ""
 ```
 ## How do I keep a model loaded in memory or make it unload immediately?
@@ -275,8 +272,4 @@ The following server settings may be used to adjust how Ollama handles concurren
 - `OLLAMA_NUM_PARALLEL` - The maximum number of parallel requests each model will process at the same time.  The default will auto-select either 4 or 1 based on available memory.
 - `OLLAMA_MAX_QUEUE` - The maximum number of requests Ollama will queue when busy before rejecting additional requests. The default is 512
-Note: Windows with Radeon GPUs currently default to 1 model maximum due to limitations in ROCm v5.7 for available VRAM reporting.  Once ROCm v6.2 is available, Windows Radeon will follow the defaults above.  You may enable concurrent model loads on Radeon on Windows, but ensure you don't load more models than will fit into your GPUs VRAM.
+Note: Windows with Radeon GPUs currently default to 1 model maximum due to limitations in ROCm v5.7 for available VRAM reporting.  Once ROCm v6 is available, Windows Radeon will follow the defaults above.  You may enable concurrent model loads on Radeon on Windows, but ensure you don't load more models than will fit into your GPUs VRAM.
 ## How does Ollama load models on multiple GPUs?
 Installing multiple GPUs of the same brand can be a great way to increase your available VRAM to load larger models.  When you load a new model, Ollama evaluates the required VRAM for the model against what is currently available.  If the model will entirely fit on any single GPU, Ollama will load the model on that GPU.  This typically provides the best performance as it reduces the amount of data transfering across the PCI bus during inference.  If the model does not fit entirely on one GPU, then it will be spread across all the available GPUs.
--- a/docs/gpu.md
+++ b/docs/gpu.md
@@ -46,24 +46,13 @@ sudo modprobe nvidia_uvm`
 ## AMD Radeon
 Ollama supports the following AMD GPUs:
 ### Linux Support
 | Family         | Cards and accelerators                                                                                                               |
 | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
 | AMD Radeon RX  | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800` `Vega 64` `Vega 56`    |
 | AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` `V420` `V340` `V320` `Vega II Duo` `Vega II` `VII` `SSG` |
 | AMD Instinct   | `MI300X` `MI300A` `MI300` `MI250X` `MI250` `MI210` `MI200` `MI100` `MI60` `MI50`                                                               |
-### Windows Support
+### Overrides
 With ROCm v6.1, the following GPUs are supported on Windows.
 | Family         | Cards and accelerators                                                                                                               |
 | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
 | AMD Radeon RX  | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800`    |
 | AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` |
 ### Overrides on Linux
 Ollama leverages the AMD ROCm library, which does not support all AMD GPUs. In
 some cases you can force the system to try to use a similar LLVM target that is
 close.  For example The Radeon RX 5400 is `gfx1034` (also known as 10.3.4)
@@ -74,7 +63,7 @@ would set `HSA_OVERRIDE_GFX_VERSION="10.3.0"` as an environment variable for the
 server.  If you have an unsupported AMD GPU you can experiment using the list of
 supported types below.
-At this time, the known supported GPU types on linux are the following LLVM Targets.
+At this time, the known supported GPU types are the following LLVM Targets.
 This table shows some example GPUs that map to these LLVM targets:
 | **LLVM Target** | **An Example GPU** |
 |-----------------|---------------------|
--- a/docs/images/ollama-keys.png
+++ b/docs/images/ollama-keys.png
--- a/docs/images/signup.png
+++ b/docs/images/signup.png
--- a/docs/import.md
+++ b/docs/import.md
@@ -1,129 +1,42 @@
-# Importing a model
+# Import
-## Table of Contents
+GGUF models and select Safetensors models can be imported directly into Ollama.
-  * [Importing a Safetensors adapter](#Importing-a-fine-tuned-adapter-from-Safetensors-weights)
+## Import GGUF
  * [Importing a Safetensors model](#Importing-a-model-from-Safetensors-weights)
  * [Importing a GGUF file](#Importing-a-GGUF-based-model-or-adapter)
  * [Sharing models on ollama.com](#Sharing-your-model-on-ollamacom)
-## Importing a fine tuned adapter from Safetensors weights
+A binary GGUF file can be imported directly into Ollama through a Modelfile.
 First, create a `Modelfile` with a `FROM` command pointing at the base model you used for fine tuning, and an `ADAPTER` command which points to the directory with your Safetensors adapter:
 ```dockerfile
 FROM <base model name>
 ADAPTER /path/to/safetensors/adapter/directory
 ```
 Make sure that you use the same base model in the `FROM` command as you used to create the adapter otherwise you will get erratic results. Most frameworks use different quantization methods, so it's best to use non-quantized (i.e. non-QLoRA) adapters. If your adapter is in the same directory as your `Modelfile`, use `ADAPTER .` to specify the adapter path.
 Now run `ollama create` from the directory where the `Modelfile` was created:
 ```bash
 ollama create my-model
 ```
 Lastly, test the model:
 ```bash
 ollama run my-model
 ```
 Ollama supports importing adapters based on several different model architectures including:
  * Llama (including Llama 2, Llama 3, and Llama 3.1);
  * Mistral (including Mistral 1, Mistral 2, and Mixtral); and
  * Gemma (including Gemma 1 and Gemma 2)
 You can create the adapter using a fine tuning framework or tool which can output adapters in the Safetensors format, such as:
  * Hugging Face [fine tuning framework] (https://huggingface.co/docs/transformers/en/training)
  * [Unsloth](https://github.com/unslothai/unsloth)
  * [MLX](https://github.com/ml-explore/mlx)
 ## Importing a model from Safetensors weights
 First, create a `Modelfile` with a `FROM` command which points to the directory containing your Safetensors weights:
 ```dockerfile
 FROM /path/to/safetensors/directory
 ```
 If you create the Modelfile in the same directory as the weights, you can use the command `FROM .`.
 Now run the `ollama create` command from the directory where you created the `Modelfile`:
 ```shell
 ollama create my-model
 ```
 Lastly, test the model:
 ```shell
 ollama run my-model
 ```
 Ollama supports importing models for several different architectures including:
  * Llama (including Llama 2, Llama 3, and Llama 3.1);
  * Mistral (including Mistral 1, Mistral 2, and Mixtral);
  * Gemma (including Gemma 1 and Gemma 2); and
  * Phi3
 This includes importing foundation models as well as any fine tuned models which which have been _fused_ with a foundation model.
 ## Importing a GGUF based model or adapter
 If you have a GGUF based model or adapter it is possible to import it into Ollama. You can obtain a GGUF model or adapter by:
  * converting a Safetensors model with the `convert_hf_to_gguf.py` from Llama.cpp; 
  * converting a Safetensors adapter with the `convert_lora_to_gguf.py` from Llama.cpp; or
  * downloading a model or adapter from a place such as HuggingFace
 To import a GGUF model, create a `Modelfile` containg:
 ```dockerfile
 FROM /path/to/file.gguf
 ```
-For a GGUF adapter, create the `Modelfile` with:
+## Import Safetensors
 If the model being imported is one of these architectures, it can be imported directly into Ollama through a Modelfile:
 - LlamaForCausalLM
 - MistralForCausalLM
 - GemmaForCausalLM
 ```dockerfile
-FROM <model name>
+FROM /path/to/safetensors/directory
 ADAPTER /path/to/file.gguf
 ```
-When importing a GGUF adapter, it's important to use the same base model as the base model that the adapter was created with. You can use:
+For architectures not directly convertable by Ollama, see llama.cpp's [guide](https://github.com/ggerganov/llama.cpp/blob/master/README.md#prepare-and-quantize) on conversion. After conversion, see [Import GGUF](#import-gguf).
- * a model from Ollama
+## Automatic Quantization
 * a GGUF file
 * a Safetensors based model 
-Once you have created your `Modelfile`, use the `ollama create` command to build the model.
+> [!NOTE]
 > Automatic quantization requires v0.1.35 or higher.
-```shell
+Ollama is capable of quantizing FP16 or FP32 models to any of the supported quantizations with the `-q/--quantize` flag in `ollama create`.
 ollama create my-model
 ```
 ## Quantizing a Model
 Quantizing a model allows you to run models faster and with less memory consumption but at reduced accuracy. This allows you to run a model on more modest hardware.
 Ollama can quantize FP16 and FP32 based models into different quantization levels using the `-q/--quantize` flag with the `ollama create` command.
 First, create a Modelfile with the FP16 or FP32 based model you wish to quantize.
 ```dockerfile
 FROM /path/to/my/gemma/f16/model
 ```
 Use `ollama create` to then create the quantized model.
 ```shell
-$ ollama create --quantize q4_K_M mymodel
+$ ollama create -q Q4_K_M mymodel
 transferring model data
 quantizing F16 model to Q4_K_M
 creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd
@@ -134,53 +47,42 @@ success
 ### Supported Quantizations
- `q4_0`
+- `Q4_0`
- `q4_1`
+- `Q4_1`
- `q5_0`
+- `Q5_0`
- `q5_1`
+- `Q5_1`
- `q8_0`
+- `Q8_0`
 #### K-means Quantizations
- `q3_K_S`
+- `Q3_K_S`
- `q3_K_M`
+- `Q3_K_M`
- `q3_K_L`
+- `Q3_K_L`
- `q4_K_S`
+- `Q4_K_S`
- `q4_K_M`
+- `Q4_K_M`
- `q5_K_S`
+- `Q5_K_S`
- `q5_K_M`
+- `Q5_K_M`
- `q6_K`
+- `Q6_K`
 ## Template Detection
-## Sharing your model on ollama.com
+> [!NOTE]
 > Template detection requires v0.1.42 or higher.
-You can share any model you have created by pushing it to [ollama.com](https://ollama.com) so that other users can try it out.
+Ollama uses model metadata, specifically `tokenizer.chat_template`, to automatically create a template appropriate for the model you're importing.
-First, use your browser to go to the [Ollama Sign-Up](https://ollama.com/signup) page. If you already have an account, you can skip this step.
+```dockerfile
-
+FROM /path/to/my/gemma/model
 ![Sign-Up](images/signup.png)
 The `Username` field will be used as part of your model's name (e.g. `jmorganca/mymodel`), so make sure you are comfortable with the username that you have selected.
 Now that you have created an account and are signed-in, go to the [Ollama Keys Settings](https://ollama.com/settings/keys) page.
 Follow the directions on the page to determine where your Ollama Public Key is located.
 ![Ollama Key](images/ollama-keys.png)
 Click on the `Add Ollama Public Key` button, and copy and paste the contents of your Ollama Public Key into the text field.
 To push a model to [ollama.com](https://ollama.com), first make sure that it is named correctly with your username. You may have to use the `ollama cp` command to copy
 your model to give it the correct name. Once you're happy with your model's name, use the `ollama push` command to push it to [ollama.com](https://ollama.com).
 ```shell
 ollama cp mymodel myuser/mymodel
 ollama push myuser/mymodel
 ```
 Once your model has been pushed, other users can pull and run it by using the command:
 ```shell
-ollama run myuser/mymodel
+$ ollama create mymodel
 transferring model data
 using autodetected template gemma-instruct
 creating new layer sha256:baa2a0edc27d19cc6b7537578a9a7ba1a4e3214dc185ed5ae43692b319af7b84
 creating new layer sha256:ba66c3309914dbef07e5149a648fd1877f030d337a4f240d444ea335008943cb
 writing manifest
 success
 ```
 Defining a template in the Modelfile will disable this feature which may be useful if you want to use a different template than the autodetected one.
--- a/docs/linux.md
+++ b/docs/linux.md
@@ -20,12 +20,13 @@ GPU.
 ## Manual install
-### Download `ollama`
+### Download the `ollama` binary
-Download and extract the Linux package:
+Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:
 ```bash
-curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
+sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
 sudo chmod +x /usr/bin/ollama
 ```
 ### Adding Ollama as a startup service (recommended)
@@ -95,7 +96,8 @@ curl -fsSL https://ollama.com/install.sh | sh
 Or by downloading the ollama binary:
 ```bash
-curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
+sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
 sudo chmod +x /usr/bin/ollama
 ```
 ## Installing specific versions
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -1,7 +1,6 @@
 # Ollama Model File
-> [!NOTE]
+> Note: `Modelfile` syntax is in development
 > `Modelfile` syntax is in development
 A model file is the blueprint to create and share models with Ollama.
@@ -141,7 +140,6 @@ PARAMETER <parameter> <parametervalue>
 | num_predict    | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)                                                                                                                                   | int        | num_predict 42       |
 | top_k          | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)                                                                        | int        | top_k 40             |
 | top_p          | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)                                                                 | float      | top_p 0.9            |
 | min_p          | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float      | min_p 0.05            |
 ### TEMPLATE
--- a/docs/openai.md
+++ b/docs/openai.md
@@ -27,37 +27,6 @@ chat_completion = client.chat.completions.create(
    ],
    model='llama3',
 )
 response = client.chat.completions.create(
    model="llava",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What's in this image?"},
                {
                    "type": "image_url",
                    "image_url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
                },
            ],
        }
    ],
    max_tokens=300,
 )
 completion = client.completions.create(
    model="llama3",
    prompt="Say this is a test",
 )
 list_completion = client.models.list()
 model = client.models.retrieve("llama3")
 embeddings = client.embeddings.create(
    model="all-minilm",
    input=["why is the sky blue?", "why is the grass green?"],
 )
 ```
 ### OpenAI JavaScript library
@@ -73,44 +42,14 @@ const openai = new OpenAI({
 })
 const chatCompletion = await openai.chat.completions.create({
-    messages: [{ role: 'user', content: 'Say this is a test' }],
+  messages: [{ role: 'user', content: 'Say this is a test' }],
-    model: 'llama3',
+  model: 'llama3',
 })
 const response = await openai.chat.completions.create({
    model: "llava",
    messages: [
        {
        role: "user",
        content: [
            { type: "text", text: "What's in this image?" },
            {
            type: "image_url",
            image_url: "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
            },
        ],
        },
    ],
 })
 const completion = await openai.completions.create({
    model: "llama3",
    prompt: "Say this is a test.",
 })
 const listCompletion = await openai.models.list()
 const model = await openai.models.retrieve("llama3")
 const embedding = await openai.embeddings.create({
  model: "all-minilm",
  input: ["why is the sky blue?", "why is the grass green?"],
 })
 ```
 ### `curl`
-``` shell
+```
 curl http://localhost:11434/v1/chat/completions \
    -H "Content-Type: application/json" \
    -d '{
@@ -127,47 +66,6 @@ curl http://localhost:11434/v1/chat/completions \
        ]
    }'
 curl http://localhost:11434/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "llava",
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": "What'\''s in this image?"
          },
          {
            "type": "image_url",
            "image_url": {
               "url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"
            }
          }
        ]
      }
    ],
    "max_tokens": 300
  }'
 curl http://localhost:11434/v1/completions \
    -H "Content-Type: application/json" \
    -d '{
        "model": "llama3",
        "prompt": "Say this is a test"
    }'
 curl http://localhost:11434/v1/models
 curl http://localhost:11434/v1/models/llama3
 curl http://localhost:11434/v1/embeddings \
    -H "Content-Type: application/json" \
    -d '{
        "model": "all-minilm",
        "input": ["why is the sky blue?", "why is the grass green?"]
    }'
 ```
 ## Endpoints
@@ -180,8 +78,8 @@ curl http://localhost:11434/v1/embeddings \
 - [x] Streaming
 - [x] JSON mode
 - [x] Reproducible outputs
- [x] Vision
+- [ ] Vision
- [x] Tools (streaming support coming soon)
+- [ ] Function calling
 - [ ] Logprobs
 #### Supported request fields
@@ -189,10 +87,7 @@ curl http://localhost:11434/v1/embeddings \
 - [x] `model`
 - [x] `messages`
  - [x] Text `content`
-  - [x] Image `content`
+  - [ ] Array of `content` parts
    - [x] Base64 encoded image
    - [ ] Image URL
  - [x] Array of `content` parts
 - [x] `frequency_penalty`
 - [x] `presence_penalty`
 - [x] `response_format`
@@ -202,72 +97,15 @@ curl http://localhost:11434/v1/embeddings \
 - [x] `temperature`
 - [x] `top_p`
 - [x] `max_tokens`
- [x] `tools`
+- [ ] `logit_bias`
 - [ ] `tools`
 - [ ] `tool_choice`
 - [ ] `logit_bias`
 - [ ] `user`
 - [ ] `n`
 ### `/v1/completions`
 #### Supported features
 - [x] Completions
 - [x] Streaming
 - [x] JSON mode
 - [x] Reproducible outputs
 - [ ] Logprobs
 #### Supported request fields
 - [x] `model`
 - [x] `prompt`
 - [x] `frequency_penalty`
 - [x] `presence_penalty`
 - [x] `seed`
 - [x] `stop`
 - [x] `stream`
 - [x] `temperature`
 - [x] `top_p`
 - [x] `max_tokens`
 - [x] `suffix`
 - [ ] `best_of`
 - [ ] `echo`
 - [ ] `logit_bias`
 - [ ] `user`
 - [ ] `n`
 #### Notes
- `prompt` currently only accepts a string
+- `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached
 ### `/v1/models`
 #### Notes
 - `created` corresponds to when the model was last modified
 - `owned_by` corresponds to the ollama username, defaulting to `"library"`
 ### `/v1/models/{model}`
 #### Notes
 - `created` corresponds to when the model was last modified
 - `owned_by` corresponds to the ollama username, defaulting to `"library"`
 ### `/v1/embeddings`
 #### Supported request fields
 - [x] `model`
 - [x] `input`
  - [x] string
  - [x] array of strings
  - [ ] array of tokens
  - [ ] array of token arrays
 - [ ] `encoding format`
 - [ ] `dimensions`
 - [ ] `user`
 ## Models
--- a/docs/template.md
+++ b/docs/template.md
@@ -1,167 +0,0 @@
 # Template
 Ollama provides a powerful templating engine backed by Go's built-in templating engine to construct prompts for your large language model. This feature is a valuable tool to get the most out of your models.
 ## Basic Template Structure
 A basic Go template consists of three main parts:
 * **Layout**: The overall structure of the template.
 * **Variables**: Placeholders for dynamic data that will be replaced with actual values when the template is rendered.
 * **Functions**: Custom functions or logic that can be used to manipulate the template's content.
 Here's an example of a simple chat template:
 ```gotmpl
 {{- range .Messages }}
 {{ .Role }}: {{ .Content }}
 {{- end }}
 ```
 In this example, we have:
 * A basic messages structure (layout)
 * Three variables: `Messages`, `Role`, and `Content` (variables)
 * A custom function (action) that iterates over an array of items (`range .Messages`) and displays each item
 ## Adding templates to your model
 By default, models imported into Ollama have a default template of `{{ .Prompt }}`, i.e. user inputs are sent verbatim to the LLM. This is appropriate for text or code completion models but lacks essential markers for chat or instruction models.
 Omitting a template in these models puts the responsibility of correctly templating input onto the user. Adding a template allows users to easily get the best results from the model.
 To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
 ```dockerfile
 FROM llama3
 TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
 {{ .System }}<|eot_id|>
 {{- end }}
 {{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
 {{ .Content }}<|eot_id|>
 {{- end }}<|start_header_id|>assistant<|end_header_id|>
 """
 ```
 ## Variables
 `System` (string): system prompt
 `Prompt` (string): user prompt
 `Response` (string): assistant response
 `Suffix` (string): text inserted after the assistant's response
 `Messages` (list): list of messages
 `Messages[].Role` (string): role which can be one of `system`, `user`, `assistant`, or `tool`
 `Messages[].Content` (string):  message content
 `Messages[].ToolCalls` (list): list of tools the model wants to call
 `Messages[].ToolCalls[].Function` (object): function to call
 `Messages[].ToolCalls[].Function.Name` (string): function name
 `Messages[].ToolCalls[].Function.Arguments` (map): mapping of argument name to argument value
 `Tools` (list): list of tools the model can access
 `Tools[].Type` (string): schema type. `type` is always `function`
 `Tools[].Function` (object): function definition
 `Tools[].Function.Name` (string): function name
 `Tools[].Function.Description` (string): function description
 `Tools[].Function.Parameters` (object): function parameters
 `Tools[].Function.Parameters.Type` (string): schema type. `type` is always `object`
 `Tools[].Function.Parameters.Required` (list): list of required properties
 `Tools[].Function.Parameters.Properties` (map): mapping of property name to property definition
 `Tools[].Function.Parameters.Properties[].Type` (string): property type
 `Tools[].Function.Parameters.Properties[].Description` (string): property description
 `Tools[].Function.Parameters.Properties[].Enum` (list): list of valid values
 ## Tips and Best Practices
 Keep the following tips and best practices in mind when working with Go templates:
 * **Be mindful of dot**: Control flow structures like `range` and `with` changes the value `.`
 * **Out-of-scope variables**: Use `$.` to reference variables not currently in scope, starting from the root
 * **Whitespace control**: Use `-` to trim leading (`{{-`) and trailing (`-}}`) whitespace
 ## Examples
 ### Example Messages
 #### ChatML
 ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
 ```gotmpl
 {{- range .Messages }}<|im_start|>{{ .Role }}
 {{ .Content }}<|im_end|>
 {{ end }}<|im_start|>assistant
 ```
 ### Example Tools
 Tools support can be added to a model by adding a `{{ .Tools }}` node to the template. This feature is useful for models trained to call external tools and can a powerful tool for retrieving real-time data or performing complex tasks.
 #### Mistral
 Mistral v0.3 and Mixtral 8x22B supports tool calling.
 ```gotmpl
 {{- range $index, $_ := .Messages }}
 {{- if eq .Role "user" }}
 {{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS]
 {{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}
 {{ end }}{{ .Content }}[/INST]
 {{- else if eq .Role "assistant" }}
 {{- if .Content }} {{ .Content }}</s>
 {{- else if .ToolCalls }}[TOOL_CALLS] [
 {{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}}
 {{- end }}]</s>
 {{- end }}
 {{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
 {{- end }}
 {{- end }}
 ```
 ### Example Fill-in-Middle
 Fill-in-middle support can be added to a model by adding a `{{ .Suffix }}` node to the template. This feature is useful for models that are trained to generate text in the middle of user input, such as code completion models.
 #### CodeLlama
 CodeLlama [7B](https://ollama.com/library/codellama:7b-code) and [13B](https://ollama.com/library/codellama:13b-code) code completion models support fill-in-middle.
 ```gotmpl
 <PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
 ```
 > [!NOTE]
 > CodeLlama 34B and 70B code completion and all instruct and Python fine-tuned models do not support fill-in-middle.
 #### Codestral
 Codestral [22B](https://ollama.com/library/codestral:22b) supports fill-in-middle.
 ```gotmpl
 [SUFFIX]{{ .Suffix }}[PREFIX] {{ .Prompt }}
 ```
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -9,7 +9,7 @@ cat ~/.ollama/logs/server.log
 On **Linux** systems with systemd, the logs can be found with this command:
 ```shell
-journalctl -u ollama --no-pager
+journalctl -u ollama
 ```
 When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
--- a/docs/tutorials/langchainjs.md
+++ b/docs/tutorials/langchainjs.md
@@ -15,7 +15,7 @@ import { Ollama } from "@langchain/community/llms/ollama";
 const ollama = new Ollama({
  baseUrl: "http://localhost:11434",
-  model: "llama3.1",
+  model: "llama3",
 });
 const answer = await ollama.invoke(`why is the sky blue?`);
@@ -23,7 +23,7 @@ const answer = await ollama.invoke(`why is the sky blue?`);
 console.log(answer);
 ```
-That will get us the same thing as if we ran `ollama run llama3.1 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
+That will get us the same thing as if we ran `ollama run llama3 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
 ```bash
 npm install cheerio
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -23,8 +23,6 @@ Logs will often be helpful in diagnosing the problem (see
 * NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
 * AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
 Ollama uses unicode characters for progress indication, which may render as unknown squares in some older terminal fonts in Windows 10. If you see this, try changing your terminal font settings.
 ## API Access
 Here's a quick example showing API access from `powershell`
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -1,11 +1,11 @@
 package envconfig
 import (
 	"errors"
 	"fmt"
 	"log/slog"
 	"math"
 	"net"
 	"net/url"
 	"os"
 	"path/filepath"
 	"runtime"
@@ -14,16 +14,309 @@ import (
 	"time"
 )
-// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
+type OllamaHost struct {
-// Default is scheme "http" and host "127.0.0.1:11434"
+	Scheme string
-func Host() *url.URL {
+	Host   string
 	Port   string
 }
 func (o OllamaHost) String() string {
 	return fmt.Sprintf("%s://%s:%s", o.Scheme, o.Host, o.Port)
 }
 var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
 var (
 	// Set via OLLAMA_ORIGINS in the environment
 	AllowOrigins []string
 	// Set via OLLAMA_DEBUG in the environment
 	Debug bool
 	// Experimental flash attention
 	FlashAttention bool
 	// Set via OLLAMA_HOST in the environment
 	Host *OllamaHost
 	// Set via OLLAMA_KEEP_ALIVE in the environment
 	KeepAlive time.Duration
 	// Set via OLLAMA_LLM_LIBRARY in the environment
 	LLMLibrary string
 	// Set via OLLAMA_MAX_LOADED_MODELS in the environment
 	MaxRunners int
 	// Set via OLLAMA_MAX_QUEUE in the environment
 	MaxQueuedRequests int
 	// Set via OLLAMA_MAX_VRAM in the environment
 	MaxVRAM uint64
 	// Set via OLLAMA_MODELS in the environment
 	ModelsDir string
 	// Set via OLLAMA_NOHISTORY in the environment
 	NoHistory bool
 	// Set via OLLAMA_NOPRUNE in the environment
 	NoPrune bool
 	// Set via OLLAMA_NUM_PARALLEL in the environment
 	NumParallel int
 	// Set via OLLAMA_RUNNERS_DIR in the environment
 	RunnersDir string
 	// Set via OLLAMA_SCHED_SPREAD in the environment
 	SchedSpread bool
 	// Set via OLLAMA_TMPDIR in the environment
 	TmpDir string
 	// Set via OLLAMA_INTEL_GPU in the environment
 	IntelGpu bool
 	// Set via CUDA_VISIBLE_DEVICES in the environment
 	CudaVisibleDevices string
 	// Set via HIP_VISIBLE_DEVICES in the environment
 	HipVisibleDevices string
 	// Set via ROCR_VISIBLE_DEVICES in the environment
 	RocrVisibleDevices string
 	// Set via GPU_DEVICE_ORDINAL in the environment
 	GpuDeviceOrdinal string
 	// Set via HSA_OVERRIDE_GFX_VERSION in the environment
 	HsaOverrideGfxVersion string
 )
 type EnvVar struct {
 	Name        string
 	Value       any
 	Description string
 }
 func AsMap() map[string]EnvVar {
 	ret := map[string]EnvVar{
 		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
 		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
 		"OLLAMA_HOST":              {"OLLAMA_HOST", Host, "IP Address for the ollama server (default 127.0.0.1:11434)"},
 		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
 		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
 		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models per GPU"},
 		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
 		"OLLAMA_MAX_VRAM":          {"OLLAMA_MAX_VRAM", MaxVRAM, "Maximum VRAM"},
 		"OLLAMA_MODELS":            {"OLLAMA_MODELS", ModelsDir, "The path to the models directory"},
 		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"},
 		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"},
 		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests"},
 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
 		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"},
 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread, "Always schedule model across all GPUs"},
 		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
 	}
 	if runtime.GOOS != "darwin" {
 		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices, "Set which NVIDIA devices are visible"}
 		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices, "Set which AMD devices are visible"}
 		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices, "Set which AMD devices are visible"}
 		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal, "Set which AMD devices are visible"}
 		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion, "Override the gfx used for all detected AMD GPUs"}
 		ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGpu, "Enable experimental Intel GPU detection"}
 	}
 	return ret
 }
 func Values() map[string]string {
 	vals := make(map[string]string)
 	for k, v := range AsMap() {
 		vals[k] = fmt.Sprintf("%v", v.Value)
 	}
 	return vals
 }
 var defaultAllowOrigins = []string{
 	"localhost",
 	"127.0.0.1",
 	"0.0.0.0",
 }
 // Clean quotes and spaces from the value
 func clean(key string) string {
 	return strings.Trim(os.Getenv(key), "\"' ")
 }
 func init() {
 	// default values
 	NumParallel = 0 // Autoselect
 	MaxRunners = 0  // Autoselect
 	MaxQueuedRequests = 512
 	KeepAlive = 5 * time.Minute
 	LoadConfig()
 }
 func LoadConfig() {
 	if debug := clean("OLLAMA_DEBUG"); debug != "" {
 		d, err := strconv.ParseBool(debug)
 		if err == nil {
 			Debug = d
 		} else {
 			Debug = true
 		}
 	}
 	if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" {
 		d, err := strconv.ParseBool(fa)
 		if err == nil {
 			FlashAttention = d
 		}
 	}
 	RunnersDir = clean("OLLAMA_RUNNERS_DIR")
 	if runtime.GOOS == "windows" && RunnersDir == "" {
 		// On Windows we do not carry the payloads inside the main executable
 		appExe, err := os.Executable()
 		if err != nil {
 			slog.Error("failed to lookup executable path", "error", err)
 		}
 		cwd, err := os.Getwd()
 		if err != nil {
 			slog.Error("failed to lookup working directory", "error", err)
 		}
 		var paths []string
 		for _, root := range []string{filepath.Dir(appExe), cwd} {
 			paths = append(paths,
 				root,
 				filepath.Join(root, "windows-"+runtime.GOARCH),
 				filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
 			)
 		}
 		// Try a few variations to improve developer experience when building from source in the local tree
 		for _, p := range paths {
 			candidate := filepath.Join(p, "ollama_runners")
 			_, err := os.Stat(candidate)
 			if err == nil {
 				RunnersDir = candidate
 				break
 			}
 		}
 		if RunnersDir == "" {
 			slog.Error("unable to locate llm runner directory.  Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
 		}
 	}
 	TmpDir = clean("OLLAMA_TMPDIR")
 	userLimit := clean("OLLAMA_MAX_VRAM")
 	if userLimit != "" {
 		avail, err := strconv.ParseUint(userLimit, 10, 64)
 		if err != nil {
 			slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
 		} else {
 			MaxVRAM = avail
 		}
 	}
 	LLMLibrary = clean("OLLAMA_LLM_LIBRARY")
 	if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" {
 		val, err := strconv.Atoi(onp)
 		if err != nil {
 			slog.Error("invalid setting, ignoring", "OLLAMA_NUM_PARALLEL", onp, "error", err)
 		} else {
 			NumParallel = val
 		}
 	}
 	if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" {
 		NoHistory = true
 	}
 	if spread := clean("OLLAMA_SCHED_SPREAD"); spread != "" {
 		s, err := strconv.ParseBool(spread)
 		if err == nil {
 			SchedSpread = s
 		} else {
 			SchedSpread = true
 		}
 	}
 	if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
 		NoPrune = true
 	}
 	if origins := clean("OLLAMA_ORIGINS"); origins != "" {
 		AllowOrigins = strings.Split(origins, ",")
 	}
 	for _, allowOrigin := range defaultAllowOrigins {
 		AllowOrigins = append(AllowOrigins,
 			fmt.Sprintf("http://%s", allowOrigin),
 			fmt.Sprintf("https://%s", allowOrigin),
 			fmt.Sprintf("http://%s", net.JoinHostPort(allowOrigin, "*")),
 			fmt.Sprintf("https://%s", net.JoinHostPort(allowOrigin, "*")),
 		)
 	}
 	AllowOrigins = append(AllowOrigins,
 		"app://*",
 		"file://*",
 		"tauri://*",
 	)
 	maxRunners := clean("OLLAMA_MAX_LOADED_MODELS")
 	if maxRunners != "" {
 		m, err := strconv.Atoi(maxRunners)
 		if err != nil {
 			slog.Error("invalid setting, ignoring", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
 		} else {
 			MaxRunners = m
 		}
 	}
 	if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" {
 		p, err := strconv.Atoi(onp)
 		if err != nil || p <= 0 {
 			slog.Error("invalid setting, ignoring", "OLLAMA_MAX_QUEUE", onp, "error", err)
 		} else {
 			MaxQueuedRequests = p
 		}
 	}
 	ka := clean("OLLAMA_KEEP_ALIVE")
 	if ka != "" {
 		loadKeepAlive(ka)
 	}
 	var err error
 	ModelsDir, err = getModelsDir()
 	if err != nil {
 		slog.Error("invalid setting", "OLLAMA_MODELS", ModelsDir, "error", err)
 	}
 	Host, err = getOllamaHost()
 	if err != nil {
 		slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port)
 	}
 	if set, err := strconv.ParseBool(clean("OLLAMA_INTEL_GPU")); err == nil {
 		IntelGpu = set
 	}
 	CudaVisibleDevices = clean("CUDA_VISIBLE_DEVICES")
 	HipVisibleDevices = clean("HIP_VISIBLE_DEVICES")
 	RocrVisibleDevices = clean("ROCR_VISIBLE_DEVICES")
 	GpuDeviceOrdinal = clean("GPU_DEVICE_ORDINAL")
 	HsaOverrideGfxVersion = clean("HSA_OVERRIDE_GFX_VERSION")
 }
 func getModelsDir() (string, error) {
 	if models, exists := os.LookupEnv("OLLAMA_MODELS"); exists {
 		return models, nil
 	}
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return "", err
 	}
 	return filepath.Join(home, ".ollama", "models"), nil
 }
 func getOllamaHost() (*OllamaHost, error) {
 	defaultPort := "11434"
-	s := strings.TrimSpace(Var("OLLAMA_HOST"))
+	hostVar := os.Getenv("OLLAMA_HOST")
-	scheme, hostport, ok := strings.Cut(s, "://")
+	hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
 	scheme, hostport, ok := strings.Cut(hostVar, "://")
 	switch {
 	case !ok:
-		scheme, hostport = "http", s
+		scheme, hostport = "http", hostVar
 	case scheme == "http":
 		defaultPort = "80"
 	case scheme == "https":
@@ -43,242 +336,38 @@ func Host() *url.URL {
 		}
 	}
-	if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
+	if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
-		slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
+		return &OllamaHost{
 		return &url.URL{
 			Scheme: scheme,
-			Host:   net.JoinHostPort(host, defaultPort),
+			Host:   host,
-		}
+			Port:   defaultPort,
 		}, ErrInvalidHostPort
 	}
-	return &url.URL{
+	return &OllamaHost{
 		Scheme: scheme,
-		Host:   net.JoinHostPort(host, port),
+		Host:   host,
-	}
+		Port:   port,
 	}, nil
 }
-// Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
+func loadKeepAlive(ka string) {
-func Origins() (origins []string) {
+	v, err := strconv.Atoi(ka)
 	if s := Var("OLLAMA_ORIGINS"); s != "" {
 		origins = strings.Split(s, ",")
 	}
 	for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
 		origins = append(origins,
 			fmt.Sprintf("http://%s", origin),
 			fmt.Sprintf("https://%s", origin),
 			fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
 			fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
 		)
 	}
 	origins = append(origins,
 		"app://*",
 		"file://*",
 		"tauri://*",
 	)
 	return origins
 }
 // Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
 // Default is $HOME/.ollama/models
 func Models() string {
 	if s := Var("OLLAMA_MODELS"); s != "" {
 		return s
 	}
 	home, err := os.UserHomeDir()
 	if err != nil {
-		panic(err)
+		d, err := time.ParseDuration(ka)
-	}
+		if err == nil {
-
+			if d < 0 {
-	return filepath.Join(home, ".ollama", "models")
+				KeepAlive = time.Duration(math.MaxInt64)
 }
 // KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
 // Negative values are treated as infinite. Zero is treated as no keep alive.
 // Default is 5 minutes.
 func KeepAlive() (keepAlive time.Duration) {
 	keepAlive = 5 * time.Minute
 	if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
 		if d, err := time.ParseDuration(s); err == nil {
 			keepAlive = d
 		} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
 			keepAlive = time.Duration(n) * time.Second
 		}
 	}
 	if keepAlive < 0 {
 		return time.Duration(math.MaxInt64)
 	}
 	return keepAlive
 }
 func Bool(k string) func() bool {
 	return func() bool {
 		if s := Var(k); s != "" {
 			b, err := strconv.ParseBool(s)
 			if err != nil {
 				return true
 			}
 			return b
 		}
 		return false
 	}
 }
 var (
 	// Debug enabled additional debug information.
 	Debug = Bool("OLLAMA_DEBUG")
 	// FlashAttention enables the experimental flash attention feature.
 	FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
 	// NoHistory disables readline history.
 	NoHistory = Bool("OLLAMA_NOHISTORY")
 	// NoPrune disables pruning of model blobs on startup.
 	NoPrune = Bool("OLLAMA_NOPRUNE")
 	// SchedSpread allows scheduling models across all GPUs.
 	SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
 	// IntelGPU enables experimental Intel GPU detection.
 	IntelGPU = Bool("OLLAMA_INTEL_GPU")
 )
 func String(s string) func() string {
 	return func() string {
 		return Var(s)
 	}
 }
 var (
 	LLMLibrary = String("OLLAMA_LLM_LIBRARY")
 	TmpDir     = String("OLLAMA_TMPDIR")
 	CudaVisibleDevices    = String("CUDA_VISIBLE_DEVICES")
 	HipVisibleDevices     = String("HIP_VISIBLE_DEVICES")
 	RocrVisibleDevices    = String("ROCR_VISIBLE_DEVICES")
 	GpuDeviceOrdinal      = String("GPU_DEVICE_ORDINAL")
 	HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
 )
 func RunnersDir() (p string) {
 	if p := Var("OLLAMA_RUNNERS_DIR"); p != "" {
 		return p
 	}
 	if runtime.GOOS != "windows" {
 		return
 	}
 	defer func() {
 		if p == "" {
 			slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'")
 		}
 	}()
 	// On Windows we do not carry the payloads inside the main executable
 	exe, err := os.Executable()
 	if err != nil {
 		return
 	}
 	cwd, err := os.Getwd()
 	if err != nil {
 		return
 	}
 	var paths []string
 	for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), ".."), cwd} {
 		paths = append(paths,
 			root,
 			filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
 			filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
 		)
 	}
 	// Try a few variations to improve developer experience when building from source in the local tree
 	for _, path := range paths {
 		candidate := filepath.Join(path, "lib", "ollama", "runners")
 		if _, err := os.Stat(candidate); err == nil {
 			p = candidate
 			break
 		}
 	}
 	return p
 }
 func Uint(key string, defaultValue uint) func() uint {
 	return func() uint {
 		if s := Var(key); s != "" {
 			if n, err := strconv.ParseUint(s, 10, 64); err != nil {
 				slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
 			} else {
-				return uint(n)
+				KeepAlive = d
 			}
 		}
-
+	} else {
-		return defaultValue
+		d := time.Duration(v) * time.Second
 		if d < 0 {
 			KeepAlive = time.Duration(math.MaxInt64)
 		} else {
 			KeepAlive = d
 		}
 	}
 }
 var (
 	// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
 	NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
 	// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
 	MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
 	// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
 	MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
 	// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
 	MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
 )
 type EnvVar struct {
 	Name        string
 	Value       any
 	Description string
 }
 func AsMap() map[string]EnvVar {
 	ret := map[string]EnvVar{
 		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
 		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
 		"OLLAMA_HOST":              {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
 		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
 		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
 		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
 		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
 		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
 		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
 		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
 		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
 		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
 		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
 	}
 	if runtime.GOOS != "darwin" {
 		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
 		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
 		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
 		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
 		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
 		ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
 	}
 	return ret
 }
 func Values() map[string]string {
 	vals := make(map[string]string)
 	for k, v := range AsMap() {
 		vals[k] = fmt.Sprintf("%v", v.Value)
 	}
 	return vals
 }
 // Var returns an environment variable stripped of leading and trailing quotes or spaces
 func Var(key string) string {
 	return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
 }
--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -1,234 +1,87 @@
 package envconfig
 import (
 	"fmt"
 	"math"
 	"net"
 	"testing"
 	"time"
-	"github.com/google/go-cmp/cmp"
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
-func TestHost(t *testing.T) {
+func TestConfig(t *testing.T) {
-	cases := map[string]struct {
+	Debug = false // Reset whatever was loaded in init()
 	t.Setenv("OLLAMA_DEBUG", "")
 	LoadConfig()
 	require.False(t, Debug)
 	t.Setenv("OLLAMA_DEBUG", "false")
 	LoadConfig()
 	require.False(t, Debug)
 	t.Setenv("OLLAMA_DEBUG", "1")
 	LoadConfig()
 	require.True(t, Debug)
 	t.Setenv("OLLAMA_FLASH_ATTENTION", "1")
 	LoadConfig()
 	require.True(t, FlashAttention)
 	t.Setenv("OLLAMA_KEEP_ALIVE", "")
 	LoadConfig()
 	require.Equal(t, 5*time.Minute, KeepAlive)
 	t.Setenv("OLLAMA_KEEP_ALIVE", "3")
 	LoadConfig()
 	require.Equal(t, 3*time.Second, KeepAlive)
 	t.Setenv("OLLAMA_KEEP_ALIVE", "1h")
 	LoadConfig()
 	require.Equal(t, 1*time.Hour, KeepAlive)
 	t.Setenv("OLLAMA_KEEP_ALIVE", "-1s")
 	LoadConfig()
 	require.Equal(t, time.Duration(math.MaxInt64), KeepAlive)
 	t.Setenv("OLLAMA_KEEP_ALIVE", "-1")
 	LoadConfig()
 	require.Equal(t, time.Duration(math.MaxInt64), KeepAlive)
 }
 func TestClientFromEnvironment(t *testing.T) {
 	type testCase struct {
 		value  string
 		expect string
-	}{
+		err    error
 		"empty":               {"", "127.0.0.1:11434"},
 		"only address":        {"1.2.3.4", "1.2.3.4:11434"},
 		"only port":           {":1234", ":1234"},
 		"address and port":    {"1.2.3.4:1234", "1.2.3.4:1234"},
 		"hostname":            {"example.com", "example.com:11434"},
 		"hostname and port":   {"example.com:1234", "example.com:1234"},
 		"zero port":           {":0", ":0"},
 		"too large port":      {":66000", ":11434"},
 		"too small port":      {":-1", ":11434"},
 		"ipv6 localhost":      {"[::1]", "[::1]:11434"},
 		"ipv6 world open":     {"[::]", "[::]:11434"},
 		"ipv6 no brackets":    {"::1", "[::1]:11434"},
 		"ipv6 + port":         {"[::1]:1337", "[::1]:1337"},
 		"extra space":         {" 1.2.3.4 ", "1.2.3.4:11434"},
 		"extra quotes":        {"\"1.2.3.4\"", "1.2.3.4:11434"},
 		"extra space+quotes":  {" \" 1.2.3.4 \" ", "1.2.3.4:11434"},
 		"extra single quotes": {"'1.2.3.4'", "1.2.3.4:11434"},
 		"http":                {"http://1.2.3.4", "1.2.3.4:80"},
 		"http port":           {"http://1.2.3.4:4321", "1.2.3.4:4321"},
 		"https":               {"https://1.2.3.4", "1.2.3.4:443"},
 		"https port":          {"https://1.2.3.4:4321", "1.2.3.4:4321"},
 	}
-	for name, tt := range cases {
+	hostTestCases := map[string]*testCase{
-		t.Run(name, func(t *testing.T) {
+		"empty":               {value: "", expect: "127.0.0.1:11434"},
-			t.Setenv("OLLAMA_HOST", tt.value)
+		"only address":        {value: "1.2.3.4", expect: "1.2.3.4:11434"},
-			if host := Host(); host.Host != tt.expect {
+		"only port":           {value: ":1234", expect: ":1234"},
-				t.Errorf("%s: expected %s, got %s", name, tt.expect, host.Host)
+		"address and port":    {value: "1.2.3.4:1234", expect: "1.2.3.4:1234"},
-			}
+		"hostname":            {value: "example.com", expect: "example.com:11434"},
-		})
+		"hostname and port":   {value: "example.com:1234", expect: "example.com:1234"},
-	}
+		"zero port":           {value: ":0", expect: ":0"},
-}
+		"too large port":      {value: ":66000", err: ErrInvalidHostPort},
-
+		"too small port":      {value: ":-1", err: ErrInvalidHostPort},
-func TestOrigins(t *testing.T) {
+		"ipv6 localhost":      {value: "[::1]", expect: "[::1]:11434"},
-	cases := []struct {
+		"ipv6 world open":     {value: "[::]", expect: "[::]:11434"},
-		value  string
+		"ipv6 no brackets":    {value: "::1", expect: "[::1]:11434"},
-		expect []string
+		"ipv6 + port":         {value: "[::1]:1337", expect: "[::1]:1337"},
-	}{
+		"extra space":         {value: " 1.2.3.4 ", expect: "1.2.3.4:11434"},
-		{"", []string{
+		"extra quotes":        {value: "\"1.2.3.4\"", expect: "1.2.3.4:11434"},
-			"http://localhost",
+		"extra space+quotes":  {value: " \" 1.2.3.4 \" ", expect: "1.2.3.4:11434"},
-			"https://localhost",
+		"extra single quotes": {value: "'1.2.3.4'", expect: "1.2.3.4:11434"},
 			"http://localhost:*",
 			"https://localhost:*",
 			"http://127.0.0.1",
 			"https://127.0.0.1",
 			"http://127.0.0.1:*",
 			"https://127.0.0.1:*",
 			"http://0.0.0.0",
 			"https://0.0.0.0",
 			"http://0.0.0.0:*",
 			"https://0.0.0.0:*",
 			"app://*",
 			"file://*",
 			"tauri://*",
 		}},
 		{"http://10.0.0.1", []string{
 			"http://10.0.0.1",
 			"http://localhost",
 			"https://localhost",
 			"http://localhost:*",
 			"https://localhost:*",
 			"http://127.0.0.1",
 			"https://127.0.0.1",
 			"http://127.0.0.1:*",
 			"https://127.0.0.1:*",
 			"http://0.0.0.0",
 			"https://0.0.0.0",
 			"http://0.0.0.0:*",
 			"https://0.0.0.0:*",
 			"app://*",
 			"file://*",
 			"tauri://*",
 		}},
 		{"http://172.16.0.1,https://192.168.0.1", []string{
 			"http://172.16.0.1",
 			"https://192.168.0.1",
 			"http://localhost",
 			"https://localhost",
 			"http://localhost:*",
 			"https://localhost:*",
 			"http://127.0.0.1",
 			"https://127.0.0.1",
 			"http://127.0.0.1:*",
 			"https://127.0.0.1:*",
 			"http://0.0.0.0",
 			"https://0.0.0.0",
 			"http://0.0.0.0:*",
 			"https://0.0.0.0:*",
 			"app://*",
 			"file://*",
 			"tauri://*",
 		}},
 		{"http://totally.safe,http://definitely.legit", []string{
 			"http://totally.safe",
 			"http://definitely.legit",
 			"http://localhost",
 			"https://localhost",
 			"http://localhost:*",
 			"https://localhost:*",
 			"http://127.0.0.1",
 			"https://127.0.0.1",
 			"http://127.0.0.1:*",
 			"https://127.0.0.1:*",
 			"http://0.0.0.0",
 			"https://0.0.0.0",
 			"http://0.0.0.0:*",
 			"https://0.0.0.0:*",
 			"app://*",
 			"file://*",
 			"tauri://*",
 		}},
 	}
 	for _, tt := range cases {
 		t.Run(tt.value, func(t *testing.T) {
 			t.Setenv("OLLAMA_ORIGINS", tt.value)
 			if diff := cmp.Diff(Origins(), tt.expect); diff != "" {
 				t.Errorf("%s: mismatch (-want +got):\n%s", tt.value, diff)
 			}
 		})
 	}
 }
 func TestBool(t *testing.T) {
 	cases := map[string]bool{
 		"":      false,
 		"true":  true,
 		"false": false,
 		"1":     true,
 		"0":     false,
 		// invalid values
 		"random":    true,
 		"something": true,
 	}
-	for k, v := range cases {
+	for k, v := range hostTestCases {
 		t.Run(k, func(t *testing.T) {
-			t.Setenv("OLLAMA_BOOL", k)
+			t.Setenv("OLLAMA_HOST", v.value)
-			if b := Bool("OLLAMA_BOOL")(); b != v {
+			LoadConfig()
-				t.Errorf("%s: expected %t, got %t", k, v, b)
+
-			}
+			oh, err := getOllamaHost()
-		})
+			if err != v.err {
-	}
+				t.Fatalf("expected %s, got %s", v.err, err)
-}
+			}
-
+
-func TestUint(t *testing.T) {
+			if err == nil {
-	cases := map[string]uint{
+				host := net.JoinHostPort(oh.Host, oh.Port)
-		"0":    0,
+				assert.Equal(t, v.expect, host, fmt.Sprintf("%s: expected %s, got %s", k, v.expect, host))
 		"1":    1,
 		"1337": 1337,
 		// default values
 		"":       11434,
 		"-1":     11434,
 		"0o10":   11434,
 		"0x10":   11434,
 		"string": 11434,
 	}
 	for k, v := range cases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_UINT", k)
 			if i := Uint("OLLAMA_UINT", 11434)(); i != v {
 				t.Errorf("%s: expected %d, got %d", k, v, i)
 			}
 		})
 	}
 }
 func TestKeepAlive(t *testing.T) {
 	cases := map[string]time.Duration{
 		"":       5 * time.Minute,
 		"1s":     time.Second,
 		"1m":     time.Minute,
 		"1h":     time.Hour,
 		"5m0s":   5 * time.Minute,
 		"1h2m3s": 1*time.Hour + 2*time.Minute + 3*time.Second,
 		"0":      time.Duration(0),
 		"60":     60 * time.Second,
 		"120":    2 * time.Minute,
 		"3600":   time.Hour,
 		"-0":     time.Duration(0),
 		"-1":     time.Duration(math.MaxInt64),
 		"-1m":    time.Duration(math.MaxInt64),
 		// invalid values
 		" ":   5 * time.Minute,
 		"???": 5 * time.Minute,
 		"1d":  5 * time.Minute,
 		"1y":  5 * time.Minute,
 		"1w":  5 * time.Minute,
 	}
 	for tt, expect := range cases {
 		t.Run(tt, func(t *testing.T) {
 			t.Setenv("OLLAMA_KEEP_ALIVE", tt)
 			if actual := KeepAlive(); actual != expect {
 				t.Errorf("%s: expected %s, got %s", tt, expect, actual)
 			}
 		})
 	}
 }
 func TestVar(t *testing.T) {
 	cases := map[string]string{
 		"value":       "value",
 		" value ":     "value",
 		" 'value' ":   "value",
 		` "value" `:   "value",
 		" ' value ' ": " value ",
 		` " value " `: " value ",
 	}
 	for k, v := range cases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_VAR", k)
 			if s := Var("OLLAMA_VAR"); s != v {
 				t.Errorf("%s: expected %q, got %q", k, v, s)
 			}
 		})
 	}
--- a/examples/go-chat/main.go
+++ b/examples/go-chat/main.go
@@ -35,7 +35,7 @@ func main() {
 	ctx := context.Background()
 	req := &api.ChatRequest{
-		Model:    "llama3.1",
+		Model:    "llama3",
 		Messages: messages,
 	}
--- a/examples/go-generate-streaming/main.go
+++ b/examples/go-generate-streaming/main.go
@@ -16,7 +16,7 @@ func main() {
 	// By default, GenerateRequest is streaming.
 	req := &api.GenerateRequest{
-		Model:  "gemma2",
+		Model:  "gemma",
 		Prompt: "how many planets are there?",
 	}
--- a/examples/go-generate/main.go
+++ b/examples/go-generate/main.go
@@ -15,7 +15,7 @@ func main() {
 	}
 	req := &api.GenerateRequest{
-		Model:  "gemma2",
+		Model:  "gemma",
 		Prompt: "how many planets are there?",
 		// set streaming to false
--- a/examples/go-http-generate/README.md
+++ b/examples/go-http-generate/README.md
--- a/examples/langchain-python-rag-document/README.md
+++ b/examples/langchain-python-rag-document/README.md
@@ -4,14 +4,6 @@ This example provides an interface for asking questions to a PDF document.
 ## Setup
 1. Ensure you have the `llama3.1` model installed:
 ```
 ollama pull llama3.1
 ```
 2. Install the Python Requirements.
 ```
 pip install -r requirements.txt
 ```
--- a/examples/langchain-python-rag-document/main.py
+++ b/examples/langchain-python-rag-document/main.py
@@ -51,7 +51,7 @@ while True:
        template=template,
    )
-    llm = Ollama(model="llama3.1", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
+    llm = Ollama(model="llama3:8b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectorstore.as_retriever(),
--- a/examples/langchain-python-rag-websummary/README.md
+++ b/examples/langchain-python-rag-websummary/README.md
@@ -4,10 +4,10 @@ This example summarizes the website, [https://ollama.com/blog/run-llama2-uncenso
 ## Running the Example
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama2` model installed:
   ```bash
-   ollama pull llama3.1
+   ollama pull llama2
   ```
 2. Install the Python Requirements.
--- a/examples/langchain-python-rag-websummary/main.py
+++ b/examples/langchain-python-rag-websummary/main.py
@@ -5,8 +5,8 @@ from langchain.chains.summarize import load_summarize_chain
 loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally")
 docs = loader.load()
-llm = Ollama(model="llama3.1")
+llm = Ollama(model="llama3")
 chain = load_summarize_chain(llm, chain_type="stuff")
-result = chain.invoke(docs)
+result = chain.invoke(docs) 
 print(result)
--- a/examples/langchain-python-simple/README.md
+++ b/examples/langchain-python-simple/README.md
@@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama.
 ## Running the Example
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama3` model installed:
   ```bash
-   ollama pull llama3.1
+   ollama pull llama3
   ```
 2. Install the Python Requirements.
--- a/examples/langchain-python-simple/main.py
+++ b/examples/langchain-python-simple/main.py
@@ -1,6 +1,6 @@
 from langchain.llms import Ollama
 input = input("What is your question?")
-llm = Ollama(model="llama3.1")
+llm = Ollama(model="llama3")
 res = llm.predict(input)
 print (res)
--- a/examples/modelfile-mario/Modelfile
+++ b/examples/modelfile-mario/Modelfile
@@ -1,4 +1,4 @@
-FROM llama3.1
+FROM llama3
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from super mario bros, acting as an assistant.
--- a/examples/modelfile-mario/readme.md
+++ b/examples/modelfile-mario/readme.md
@@ -2,12 +2,12 @@
 # Example character: Mario
-This example shows how to create a basic character using Llama3.1 as the base model.
+This example shows how to create a basic character using Llama3 as the base model.
 To run this example:
 1. Download the Modelfile
-2. `ollama pull llama3.1` to get the base model used in the model file.
+2. `ollama pull llama3` to get the base model used in the model file.
 3. `ollama create NAME -f ./Modelfile`
 4. `ollama run NAME`
@@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?"
 What the model file looks like:
 ```
-FROM llama3.1
+FROM llama3
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from Super Mario Bros, acting as an assistant.
--- a/examples/python-dockerit/dockerit.py
+++ b/examples/python-dockerit/dockerit.py
@@ -4,7 +4,7 @@ imageName = input("Enter the name of the image: ")
 client = docker.from_env()
 s = requests.Session()
 output=""
-with s.post('http://localhost:11434/api/generate', json={'model': 'mattw/dockerit', 'prompt': inputDescription}, stream=True) as r:
+with s.post('http://localhost:11434/api/generate', json={'model': 'dockerit', 'prompt': inputDescription}, stream=True) as r:
  for line in r.iter_lines():
    if line:
      j = json.loads(line)
--- a/examples/python-json-datagenerator/predefinedschema.py
+++ b/examples/python-json-datagenerator/predefinedschema.py
@@ -2,7 +2,7 @@ import requests
 import json
 import random
-model = "llama3.1"
+model = "llama3"
 template = {
  "firstName": "",
  "lastName": "",
--- a/examples/python-json-datagenerator/randomaddresses.py
+++ b/examples/python-json-datagenerator/randomaddresses.py
@@ -12,7 +12,7 @@ countries = [
    "France",
 ]
 country = random.choice(countries)
-model = "llama3.1"
+model = "llama3"
 prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
--- a/examples/python-json-datagenerator/readme.md
+++ b/examples/python-json-datagenerator/readme.md
@@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran
 ## Running the Example
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama3` model installed:
   ```bash
-   ollama pull llama3.1
+   ollama pull llama3
   ```
 2. Install the Python Requirements.
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Josh Yan	a548eb6003	a8db2a9	2024-07-10 13:10:58 -07:00
Josh Yan	f92818d90d	patch again	2024-07-10 13:06:40 -07:00
Josh Yan	1ef59057d0	patch llama.cpp	2024-07-10 13:02:37 -07:00
Josh Yan	106fe6b4ae	patch	2024-07-10 12:46:45 -07:00
Josh Yan	5fd359d117	added patch	2024-07-10 12:46:45 -07:00
Josh Yan	b0e4e8d76c	change	2024-07-10 12:46:44 -07:00
Josh Yan	e59453982d	logs	2024-07-10 12:46:22 -07:00
Josh Yan	369113970a	wooh	2024-07-10 12:46:14 -07:00
Josh Yan	26ed829415	test	2024-07-10 12:46:14 -07:00
Josh Yan	542134bf50	new	2024-07-10 12:46:09 -07:00
Josh Yan	9e0b8f1fe2	another change	2024-07-10 12:46:06 -07:00
Josh Yan	c498609ba3	cast	2024-07-10 12:45:47 -07:00
Josh Yan	c800a67f1b	cast	2024-07-10 12:45:12 -07:00
Josh Yan	dfc62648f3	cast	2024-07-10 12:45:12 -07:00
Josh Yan	24e8292e94	new changes	2024-07-10 12:45:10 -07:00
Josh Yan	c63b4ecbf7	quantize	2024-07-10 12:44:40 -07:00
Josh Yan	ee2b9b076c	stop spinner	2024-07-10 12:40:29 -07:00
Josh Yan	bec9100f32	tensor count	2024-07-10 12:40:29 -07:00
Josh Yan	1344843515	image	2024-07-10 12:40:29 -07:00
Josh Yan	e87eafe5cd	quantize percentage	2024-07-10 12:40:29 -07:00
Josh Yan	6bab0e2368	lint	2024-07-10 12:36:32 -07:00
Josh Yan	c4cccaf936	remove rebase err	2024-07-10 11:37:55 -07:00
Josh Yan	9fe5c393e4	hi	2024-07-10 11:35:01 -07:00
Josh Yan	007c988dba	rmv double msg	2024-07-10 11:34:31 -07:00
Josh Yan	91d21e7c7b	rmv double msg	2024-07-10 11:34:31 -07:00
Josh Yan	3e64284f69	percent	2024-07-10 11:34:31 -07:00
Josh Yan	39910f2ab2	percent	2024-07-10 11:34:29 -07:00
Josh Yan	96d0cd92f2	rebase	2024-07-10 11:31:53 -07:00
Josh Yan	3a724a7c80	isLocal firstdraft	2024-07-10 11:31:12 -07:00
Josh Yan	f520f0056e	rm config	2024-07-10 11:29:51 -07:00
Josh Yan	d25f85ede4	on disk copy	2024-07-10 11:29:49 -07:00
Josh Yan	b48420b74b	percent	2024-07-10 11:27:33 -07:00
Josh Yan	784958a1cb	transfer data	2024-07-10 11:24:50 -07:00
Josh Yan	ae65cc8dea	progress	2024-07-10 11:24:48 -07:00
Josh Yan	a037528bba	lint	2024-07-10 11:20:02 -07:00
Josh Yan	04bf41deb5	clean	2024-07-10 11:20:02 -07:00
Josh Yan	c23cec9547	removed cmt and prints	2024-07-10 11:20:02 -07:00
Josh Yan	8377dc48d0	removed client isLocal()	2024-07-10 11:20:02 -07:00
Josh Yan	3aee405dfa	lint	2024-07-10 11:20:02 -07:00
Josh Yan	9b3f47b674	lint	2024-07-10 11:20:02 -07:00
Josh Yan	f5441f01a2	lint	2024-07-10 11:20:02 -07:00
Josh Yan	ab165df43a	syscopy windows	2024-07-10 11:20:02 -07:00
Josh Yan	79cc4c9585	os copy	2024-07-10 11:20:02 -07:00
Josh Yan	bc3f59a6ad	rmv prints	2024-07-10 11:20:02 -07:00
Josh Yan	1a85cb904c	local copy	2024-07-10 11:20:02 -07:00
Josh Yan	10ea0987e9	isLocal firstdraft	2024-07-10 11:19:50 -07:00
Josh Yan	413d368a6a	clean	2024-07-10 11:19:32 -07:00
Josh Yan	cabf375059	rm bench	2024-07-10 11:19:32 -07:00
Josh Yan	ca0ee1d4fe	rm config	2024-07-10 11:19:32 -07:00
Josh Yan	1142999aab	rm config	2024-07-10 11:19:32 -07:00
Josh Yan	0d5a72aba9	clean	2024-07-10 11:19:32 -07:00
Josh Yan	ea837412c2	local path	2024-07-10 11:19:32 -07:00
Josh Yan	736ad6f438	still works	2024-07-10 11:19:32 -07:00
Josh Yan	64607d16a5	working	2024-07-10 11:19:32 -07:00
Josh Yan	a6cfe7f00b	benchmark	2024-07-10 11:19:32 -07:00
Josh Yan	c3b411a515	on disk copy	2024-07-10 11:19:32 -07:00
Josh Yan	928f37e3ae	start tests	2024-07-10 11:19:32 -07:00