Reincluding Numbers

Draft for Multi-Language Modelfile Creation
2024-05-29 12:22:36 -07:00 · 2024-05-29 11:51:57 -07:00
332 changed files with 5927 additions and 17977 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1 @@
 llm/ext_server/* linguist-vendored
 * text=auto
 *.go text eol=lf
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -147,7 +147,7 @@ jobs:
        run: |
          $ErrorActionPreference = "Stop"
          write-host "downloading AMD HIP Installer"
-          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
+          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
          write-host "Installing AMD HIP"
          Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
          write-host "Completed AMD HIP"
@@ -187,13 +187,6 @@ jobs:
  generate-windows-cuda:
    environment: release
    runs-on: windows
    strategy:
      matrix:
        cuda:
          - version: "11"
            url: 'https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe'
          - version: "12"
            url: 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe'
    env:
      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
    steps:
@@ -227,11 +220,11 @@ jobs:
        with:
          go-version-file: go.mod
          cache: true
-      - name: 'Install CUDA ${{ matrix.cuda.version }}'
+      - name: 'Install CUDA'
        run: |
          $ErrorActionPreference = "Stop"
          write-host "downloading CUDA Installer"
-          Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
+          Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
          write-host "Installing CUDA"
          Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
          write-host "Completed CUDA"
@@ -263,16 +256,15 @@ jobs:
          cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
      - uses: actions/upload-artifact@v4
        with:
-          name: generate-windows-cuda-${{ matrix.cuda.version }}
+          name: generate-windows-cuda
          path: |
            llm/build/**/bin/*
            dist/windows-amd64/**
      - uses: actions/upload-artifact@v4
        with:
-          name: windows-cuda-deps-${{ matrix.cuda.version }}
+          name: windows-cuda-deps
          path: dist/deps/*
  # Import the prior generation steps and build the final windows assets
  build-windows:
    environment: release
@@ -322,16 +314,10 @@ jobs:
          name: generate-windows-cpu
      - uses: actions/download-artifact@v4
        with:
-          name: generate-windows-cuda-11
+          name: generate-windows-cuda
      - uses: actions/download-artifact@v4
        with:
-          name: generate-windows-cuda-12
+          name: windows-cuda-deps
      - uses: actions/download-artifact@v4
        with:
          name: windows-cuda-deps-11
      - uses: actions/download-artifact@v4
        with:
          name: windows-cuda-deps-12
      - uses: actions/download-artifact@v4
        with:
          name: windows-rocm-deps
@@ -377,6 +363,7 @@ jobs:
      - run: |
          ./scripts/build_linux.sh
          ./scripts/build_docker.sh
          mv dist/deps/* dist/
      - uses: actions/upload-artifact@v4
        with:
          name: dist-linux-amd64
@@ -450,7 +437,6 @@ jobs:
    env:
      OLLAMA_SKIP_IMAGE_BUILD: '1'
      PUSH: '1'
      GH_TOKEN: ${{ github.token }}
    steps:
      - uses: actions/checkout@v4
      - name: Set Version
@@ -472,25 +458,17 @@ jobs:
          merge-multiple: true
      - run: |
          ls -lh dist/
-          (cd dist; find . -type f | xargs sha256sum > ../sha256sum.txt)
+          (cd dist; sha256sum * > sha256sum.txt)
          mv sha256sum.txt dist/
          mv dist/linux-???64 .
          mv dist/linux-amd64-rocm .
          cat dist/sha256sum.txt
-      - name: Create or update Release
+      - uses: ncipollo/release-action@v1
-        run: |
+        with:
-          echo "Looking for existing release for ${{ env.RELEASE_VERSION }}"
+          name: ${{ env.RELEASE_VERSION }}
-          OLD_TAG=$(gh release ls --json name,tagName | jq -r ".[] | select(.name == \"${{ env.RELEASE_VERSION }}\") | .tagName")
+          allowUpdates: true
-          if [ -n "$OLD_TAG" ]; then
+          artifacts: 'dist/*'
-            echo "Updating release ${{ env.RELEASE_VERSION }} to point to new tag ${GITHUB_REF_NAME}"
+          draft: true
-            gh release edit ${OLD_TAG} --tag ${GITHUB_REF_NAME}
+          prerelease: true
-          else
+          omitBodyDuringUpdate: true
-            echo "Creating new release ${{ env.RELEASE_VERSION }} pointing to tag ${GITHUB_REF_NAME}"
+          generateReleaseNotes: true
-            gh release create ${GITHUB_REF_NAME} \
+          omitDraftDuringUpdate: true
-              --title ${{ env.RELEASE_VERSION }} \
+          omitPrereleaseDuringUpdate: true
-              --draft \
+          replacesArtifacts: true
              --generate-notes \
              --prerelease
          fi
          echo "Uploading artifacts for tag ${GITHUB_REF_NAME}"
          gh release upload ${GITHUB_REF_NAME} dist/* --clobber
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -34,13 +34,13 @@ jobs:
            git diff-tree -r --no-commit-id --name-only \
              $(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
              ${{ github.event.pull_request.head.sha }} \
-              | xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
+              | xargs python3 -c "import sys; print(any([x.startswith('$1') for x in sys.argv[1:]]))"
          }
          {
-            echo GENERATE=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
+            echo GENERATE=$(changed llm/)
-            echo GENERATE_CUDA=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
+            echo GENERATE_CUDA=$(changed llm/)
-            echo GENERATE_ROCM=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
+            echo GENERATE_ROCM=$(changed llm/)
          } >>$GITHUB_OUTPUT
  generate:
@@ -58,7 +58,6 @@ jobs:
    runs-on: ${{ matrix.os }}
    env:
      GOARCH: ${{ matrix.arch }}
      CGO_ENABLED: '1'
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
@@ -80,7 +79,6 @@ jobs:
      - run: go generate -x ./...
        if: ${{ ! startsWith(matrix.os, 'windows-') }}
        name: 'Unix Go Generate'
      - run: go build .
      - uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
@@ -126,7 +124,7 @@ jobs:
    strategy:
      matrix:
        rocm-version:
-          - '6.1.2'
+          - '6.0.2'
    runs-on: linux
    container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
    steps:
@@ -169,7 +167,7 @@ jobs:
        run: |
          $ErrorActionPreference = "Stop"
          write-host "downloading AMD HIP Installer"
-          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
+          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
          write-host "Installing AMD HIP"
          Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
          write-host "Completed AMD HIP"
@@ -271,7 +269,7 @@ jobs:
          mkdir -p llm/build/darwin/$ARCH/stub/bin
          touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
        if: ${{ startsWith(matrix.os, 'macos-') }}
-      - uses: golangci/golangci-lint-action@v6
+      - uses: golangci/golangci-lint-action@v4
        with:
          args: --timeout 8m0s -v
  test:
@@ -289,8 +287,6 @@ jobs:
      GOARCH: ${{ matrix.arch }}
      CGO_ENABLED: '1'
      OLLAMA_CPU_TARGET: 'static'
      OLLAMA_SKIP_CPU_GENERATE: '1'
      OLLAMA_SKIP_METAL_GENERATE: '1'
    steps:
      - uses: actions/checkout@v4
        with:
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -7,37 +7,11 @@ linters:
    - bodyclose
    - containedctx
    - contextcheck
    - errcheck
    - exportloopref
    - gci
    - gocheckcompilerdirectives
-    - gofmt
+    # FIXME: for some reason this errors on windows
-    - gofumpt
+    # - gofmt
-    - gosimple
+    # - goimports
    - govet
    - ineffassign
    - intrange
    - makezero
    - misspell
    - nilerr
    - nolintlint
    - nosprintfhostport
    - staticcheck
    - tenv
    - unconvert
    - unused
    - usestdlibvars
    - wastedassign
    - whitespace
 linters-settings:
  gci:
    sections: [standard, default, localmodule]
 severity:
  default-severity: error
  rules:
    - linters:
        - gofmt
        - goimports
        - intrange
        - usestdlibvars
      severity: info
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,37 +0,0 @@
 # Contributing to Ollama
 Thank you for your interest in contributing to Ollama! Here are a few guidelines to help get you started.
 ## Set up
 See the [development documentation](./docs/development.md) for instructions on how to build and run Ollama locally.
 ## Pull requests
 ### Ideal issues
 * [Bugs](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Abug): issues where Ollama stops working or where it results in an unexpected error.
 * [Performance](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Aperformance): issues to make Ollama faster at model inference, downloading or uploading.
 * [Security](https://github.com/ollama/ollama/blob/main/SECURITY.md): issues that could lead to a security vulnerability. As mentioned in [SECURITY.md](https://github.com/ollama/ollama/blob/main/SECURITY.md), please do not disclose security vulnerabilities publicly.
 ### Issues that are harder to review
 * New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future.
 * Refactoring: large code improvements are important, but can be harder or take longer to review and merge.
 * Documentation: small updates to fill in or dorrect missing documentation is helpful, however large documentation additions can be hard to maintain over time.
 ### Issues that may not be accepted
 * Changes that break backwards compatibility in Ollama's API (including the OpenAI-compatible API)
 * Changes that add significant friction to the user experience
 * Changes that create a large future maintenance burden for maintainers and contributors
 ### Best practices
 * Commit messages: please leave both a title and a description in your commit messages. The title should be a short summary of the changes, with a leading word that explains the section of the code being changed (e.g. `api: fix parsing of prompt field`) . In the description, leave a short 2-3 sentences that explain more about the change and its impact.
 * Tests: please add test coverage to changes where possible.
 * Minimize dependencies: avoid adding new dependencies unless absolutely necessary.
 ## Need help?
 If you need help with anything, feel free to reach out to us on our [Discord server](https://discord.gg/ollama).
--- a/139
+++ b/139
@@ -1,10 +1,8 @@
-ARG GOLANG_VERSION=1.22.5
+ARG GOLANG_VERSION=1.22.1
 ARG CMAKE_VERSION=3.22.1
-ARG CUDA_VERSION_11=11.3.1
+# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
-ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
+ARG CUDA_VERSION=11.3.1
-ARG CUDA_VERSION_12=12.4.0
+ARG ROCM_VERSION=6.0.2
 ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
 ARG ROCM_VERSION=6.1.2
 # Copy the minimal context we need to run the generate scripts
 FROM scratch AS llm-code
@@ -12,7 +10,7 @@ COPY .git .git
 COPY .gitmodules .gitmodules
 COPY llm llm
-FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_11-devel-centos7 AS cuda-11-build-amd64
+FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
 ARG CMAKE_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
@@ -20,34 +18,9 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
 COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 ARG CGO_CFLAGS
-ARG CUDA_V11_ARCHITECTURES
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
 ENV GOARCH amd64 
 RUN --mount=type=cache,target=/root/.ccache \
    OLLAMA_SKIP_STATIC_GENERATE=1 \
    OLLAMA_SKIP_CPU_GENERATE=1 \
    CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
    CUDA_VARIANT="_v11" \
    bash gen_linux.sh
-FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64
+FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
 ARG CMAKE_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
 COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 ARG CGO_CFLAGS
 ARG CUDA_V12_ARCHITECTURES
 ENV GOARCH amd64 
 RUN --mount=type=cache,target=/root/.ccache \
    OLLAMA_SKIP_STATIC_GENERATE=1 \
    OLLAMA_SKIP_CPU_GENERATE=1 \
    CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
    CUDA_VARIANT="_v12" \
    OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
    bash gen_linux.sh
 FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
 ARG CMAKE_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
@@ -55,32 +28,7 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
 COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 ARG CGO_CFLAGS
-ARG CUDA_V11_ARCHITECTURES
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
 ENV GOARCH arm64 
 RUN OLLAMA_SKIP_STATIC_GENERATE=1 \
    OLLAMA_SKIP_CPU_GENERATE=1 \
    CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
    CUDA_VARIANT="_v11" \
    bash gen_linux.sh
 FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
 ARG CMAKE_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
 ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
 COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 ARG CGO_CFLAGS
 ARG CUDA_V12_ARCHITECTURES
 ENV GOARCH arm64 
 RUN --mount=type=cache,target=/root/.ccache \
    OLLAMA_SKIP_STATIC_GENERATE=1 \
    OLLAMA_SKIP_CPU_GENERATE=1 \
    CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
    CUDA_VARIANT="_v12" \
    OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
    bash gen_linux.sh
 FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
 ARG CMAKE_VERSION
@@ -92,11 +40,15 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 ARG CGO_CFLAGS
 ARG AMDGPU_TARGETS
-ENV GOARCH amd64 
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
-RUN --mount=type=cache,target=/root/.ccache \
+RUN mkdir /tmp/scratch && \
-    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
+    for dep in $(zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm*/bin/deps.txt.gz) ; do \
-RUN mkdir -p ../../dist/linux-amd64-rocm/lib/ollama && \
+        cp ${dep} /tmp/scratch/ || exit 1 ; \
-    (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64-rocm/lib/ollama && tar xf - )
+    done && \
    (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
    mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \
    (cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . )
 FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
 ARG CMAKE_VERSION
@@ -107,40 +59,32 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
 COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 ARG OLLAMA_CUSTOM_CPU_DEFS
 ARG CGO_CFLAGS
 ENV GOARCH amd64 
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
-RUN --mount=type=cache,target=/root/.ccache \
+RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
    OLLAMA_CPU_TARGET="static" bash gen_linux.sh
 FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
-RUN --mount=type=cache,target=/root/.ccache \
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
 FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
-RUN --mount=type=cache,target=/root/.ccache \
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh
    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" bash gen_linux.sh
 FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
-RUN --mount=type=cache,target=/root/.ccache \
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh
    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" bash gen_linux.sh
-FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
+FROM --platform=linux/arm64 centos:7 AS cpu-builder-arm64
 ARG CMAKE_VERSION
 ARG GOLANG_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
+ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
 COPY --from=llm-code / /go/src/github.com/ollama/ollama/
 ARG OLLAMA_CUSTOM_CPU_DEFS
 ARG CGO_CFLAGS
 ENV GOARCH arm64
 WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
-RUN --mount=type=cache,target=/root/.ccache \
+RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
    OLLAMA_CPU_TARGET="static" bash gen_linux.sh
 FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
-RUN --mount=type=cache,target=/root/.ccache \
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
    OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
 # Intermediate stage used for ./scripts/build_linux.sh
@@ -151,16 +95,12 @@ COPY . .
 COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
+COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
 COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
 COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
 ARG GOFLAGS
 ARG CGO_CFLAGS
-RUN --mount=type=cache,target=/root/.ccache \
+RUN go build -trimpath .
    go build -trimpath -o dist/linux-amd64/bin/ollama .
 # Intermediate stage used for ./scripts/build_linux.sh
 FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
@@ -169,36 +109,23 @@ ARG GOLANG_VERSION
 WORKDIR /go/src/github.com/ollama/ollama
 COPY . .
 COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
+COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
 COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 ARG GOFLAGS
 ARG CGO_CFLAGS
-RUN --mount=type=cache,target=/root/.ccache \
+RUN go build -trimpath .
    go build -trimpath -o dist/linux-arm64/bin/ollama .
 # Strip out ROCm dependencies to keep the primary image lean
 FROM --platform=linux/amd64 ubuntu:22.04 as amd64-libs-without-rocm
 COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/
 RUN cd /scratch/ollama/ && rm -rf rocblas libamd* libdrm* libroc* libhip* libhsa* 
 # Runtime stages
 FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
 COPY --from=amd64-libs-without-rocm /scratch/ /lib/
 RUN apt-get update && apt-get install -y ca-certificates
-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
+COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
 FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
 COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
 RUN apt-get update && apt-get install -y ca-certificates
-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
+COPY --from=build-arm64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
 # Radeon images are much larger so we keep it distinct from the CPU/CUDA image
 FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
 RUN update-pciids
-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
+COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
 RUN ln -s /opt/rocm/lib /lib/ollama
 EXPOSE 11434
 ENV OLLAMA_HOST 0.0.0.0
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
 [![Discord](https://dcbadge.vercel.app/api/server/ollama?style=flat&compact=true)](https://discord.gg/ollama)
-Get up and running with large language models.
+Get up and running with large language models locally.
 ### macOS
@@ -35,10 +35,10 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla
 ## Quickstart
-To run and chat with [Llama 3.1](https://ollama.com/library/llama3.1):
+To run and chat with [Llama 3](https://ollama.com/library/llama3):
 ```
-ollama run llama3.1
+ollama run llama3
 ```
 ## Model library
@@ -49,14 +49,12 @@ Here are some example models that can be downloaded:
 | Model              | Parameters | Size  | Download                       |
 | ------------------ | ---------- | ----- | ------------------------------ |
-| Llama 3.1          | 8B         | 4.7GB | `ollama run llama3.1`          |
+| Llama 3            | 8B         | 4.7GB | `ollama run llama3`            |
-| Llama 3.1          | 70B        | 40GB  | `ollama run llama3.1:70b`      |
+| Llama 3            | 70B        | 40GB  | `ollama run llama3:70b`        |
 | Llama 3.1          | 405B       | 231GB | `ollama run llama3.1:405b`     |
 | Phi 3 Mini         | 3.8B       | 2.3GB | `ollama run phi3`              |
 | Phi 3 Medium       | 14B        | 7.9GB | `ollama run phi3:medium`       |
-| Gemma 2            | 2B         | 1.6GB | `ollama run gemma2:2b`         |
+| Gemma              | 2B         | 1.4GB | `ollama run gemma:2b`          |
-| Gemma 2            | 9B         | 5.5GB | `ollama run gemma2`            |
+| Gemma              | 7B         | 4.8GB | `ollama run gemma:7b`          |
 | Gemma 2            | 27B        | 16GB  | `ollama run gemma2:27b`        |
 | Mistral            | 7B         | 4.1GB | `ollama run mistral`           |
 | Moondream 2        | 1.4B       | 829MB | `ollama run moondream`         |
 | Neural Chat        | 7B         | 4.1GB | `ollama run neural-chat`       |
@@ -66,8 +64,7 @@ Here are some example models that can be downloaded:
 | LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
 | Solar              | 10.7B      | 6.1GB | `ollama run solar`             |
-> [!NOTE]
+> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
 > You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
 ## Customize a model
@@ -99,16 +96,16 @@ See the [guide](docs/import.md) on importing models for more information.
 ### Customize a prompt
-Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.1` model:
+Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3` model:
 ```
-ollama pull llama3.1
+ollama pull llama3
 ```
 Create a `Modelfile`:
 ```
-FROM llama3.1
+FROM llama3
 # set the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1
@@ -143,7 +140,7 @@ ollama create mymodel -f ./Modelfile
 ### Pull a model
 ```
-ollama pull llama3.1
+ollama pull llama3
 ```
 > This command can also be used to update a local model. Only the diff will be pulled.
@@ -151,13 +148,13 @@ ollama pull llama3.1
 ### Remove a model
 ```
-ollama rm llama3.1
+ollama rm llama3
 ```
 ### Copy a model
 ```
-ollama cp llama3.1 my-model
+ollama cp llama3 my-model
 ```
 ### Multiline input
@@ -174,23 +171,17 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
 ### Multimodal models
 ```
-ollama run llava "What's in this image? /Users/jmorgan/Desktop/smile.png"
+>>> What's in this image? /Users/jmorgan/Desktop/smile.png
 The image features a yellow smiley face, which is likely the central focus of the picture.
 ```
 ### Pass the prompt as an argument
 ```
-$ ollama run llama3.1 "Summarize this file: $(cat README.md)"
+$ ollama run llama3 "Summarize this file: $(cat README.md)"
 Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
 ```
 ### Show model information
 ```
 ollama show llama3.1
 ```
 ### List models on your computer
 ```
@@ -216,7 +207,7 @@ Next, start the server:
 Finally, in a separate shell, run a model:
 ```
-./ollama run llama3.1
+./ollama run llama3
 ```
 ## REST API
@@ -227,7 +218,7 @@ Ollama has a REST API for running and managing models.
 ```
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3.1",
+  "model": "llama3",
  "prompt":"Why is the sky blue?"
 }'
 ```
@@ -236,7 +227,7 @@ curl http://localhost:11434/api/generate -d '{
 ```
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.1",
+  "model": "llama3",
  "messages": [
    { "role": "user", "content": "why is the sky blue?" }
  ]
@@ -294,14 +285,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends)
 - [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
 - [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
 - [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
 - [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
 - [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
 - [AI Studio](https://github.com/MindWorkAI/AI-Studio)
 - [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
 - [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
 - [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
 - [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
 ### Terminal
@@ -324,8 +307,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [ShellOracle](https://github.com/djcopley/ShellOracle)
 - [tlm](https://github.com/yusufcanb/tlm)
 - [podman-ollama](https://github.com/ericcurtin/podman-ollama)
 - [gollama](https://github.com/sammcj/gollama)
 - [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/)
 ### Database
@@ -341,16 +322,13 @@ See the [API documentation](./docs/api.md) for all endpoints.
 ### Libraries
 - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
 - [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama)
 - [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
 - [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
 - [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
 - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
 - [LiteLLM](https://github.com/BerriAI/litellm)
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
 - [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
 - [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
 - [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
 - [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
 - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
 - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
@@ -368,7 +346,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama)
 - [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama)
 - [LlamaScript](https://github.com/Project-Llama/llamascript)
 ### Mobile
 - [Enchanted](https://github.com/AugustDev/enchanted)
@@ -395,15 +372,13 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
 - [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
 - [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face)
+- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
 - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
 - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
 - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
 - [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
 - [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
 ### Supported backends 
 - [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,25 +0,0 @@
 # Security
 The Ollama maintainer team takes security seriously and will actively work to resolve security issues.
 ## Reporting a vulnerability
 If you discover a security vulnerability, please do not open a public issue. Instead, please report it by emailing hello@ollama.com. We ask that you give us sufficient time to investigate and address the vulnerability before disclosing it publicly.
 Please include the following details in your report:
 - A description of the vulnerability
 - Steps to reproduce the issue
 - Your assessment of the potential impact
 - Any possible mitigations
 ## Security best practices
 While the maintainer team does their best to secure Ollama, users are encouraged to implement their own security best practices, such as:
 - Regularly updating to the latest version of Ollama
 - Securing access to hosted instances of Ollama
 - Monitoring systems for unusual activity
 ## Contact
 For any other questions or concerns related to security, please contact us at hello@ollama.com
--- a/api/client.go
+++ b/api/client.go
@@ -18,14 +18,16 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"net"
 	"net/http"
 	"net/url"
 	"os"
 	"runtime"
 	"strconv"
 	"strings"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/version"
 )
@@ -63,12 +65,66 @@ func checkError(resp *http.Response, body []byte) error {
 // If the variable is not specified, a default ollama host and port will be
 // used.
 func ClientFromEnvironment() (*Client, error) {
 	ollamaHost, err := GetOllamaHost()
 	if err != nil {
 		return nil, err
 	}
 	return &Client{
-		base: envconfig.Host(),
+		base: &url.URL{
 			Scheme: ollamaHost.Scheme,
 			Host:   net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
 		},
 		http: http.DefaultClient,
 	}, nil
 }
 type OllamaHost struct {
 	Scheme string
 	Host   string
 	Port   string
 }
 func GetOllamaHost() (OllamaHost, error) {
 	defaultPort := "11434"
 	hostVar := os.Getenv("OLLAMA_HOST")
 	hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
 	scheme, hostport, ok := strings.Cut(hostVar, "://")
 	switch {
 	case !ok:
 		scheme, hostport = "http", hostVar
 	case scheme == "http":
 		defaultPort = "80"
 	case scheme == "https":
 		defaultPort = "443"
 	}
 	// trim trailing slashes
 	hostport = strings.TrimRight(hostport, "/")
 	host, port, err := net.SplitHostPort(hostport)
 	if err != nil {
 		host, port = "127.0.0.1", defaultPort
 		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
 			host = ip.String()
 		} else if hostport != "" {
 			host = hostport
 		}
 	}
 	if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
 		return OllamaHost{}, ErrInvalidHostPort
 	}
 	return OllamaHost{
 		Scheme: scheme,
 		Host:   host,
 		Port:   port,
 	}, nil
 }
 func NewClient(base *url.URL, http *http.Client) *Client {
 	return &Client{
 		base: base,
@@ -173,7 +229,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 		}
 		if errorResponse.Error != "" {
-			return errors.New(errorResponse.Error)
+			return fmt.Errorf(errorResponse.Error)
 		}
 		if response.StatusCode >= http.StatusBadRequest {
@@ -298,9 +354,9 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
 	return &lr, nil
 }
-// ListRunning lists running models.
+// List running models.
-func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
+func (c *Client) ListRunning(ctx context.Context) (*ListResponse, error) {
-	var lr ProcessResponse
+	var lr ListResponse
 	if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
 		return nil, err
 	}
@@ -333,7 +389,7 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err
 	return &resp, nil
 }
-// Heartbeat checks if the server has started and is responsive; if yes, it
+// Hearbeat checks if the server has started and is responsive; if yes, it
 // returns nil, otherwise an error.
 func (c *Client) Heartbeat(ctx context.Context) error {
 	if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
@@ -342,16 +398,7 @@ func (c *Client) Heartbeat(ctx context.Context) error {
 	return nil
 }
-// Embed generates embeddings from a model.
+// Embeddings generates embeddings from a model.
 func (c *Client) Embed(ctx context.Context, req *EmbedRequest) (*EmbedResponse, error) {
 	var resp EmbedResponse
 	if err := c.do(ctx, http.MethodPost, "/api/embed", req, &resp); err != nil {
 		return nil, err
 	}
 	return &resp, nil
 }
 // Embeddings generates an embedding from a model.
 func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error) {
 	var resp EmbeddingResponse
 	if err := c.do(ctx, http.MethodPost, "/api/embeddings", req, &resp); err != nil {
--- a/api/client_test.go
+++ b/api/client_test.go
@@ -1,7 +1,11 @@
 package api
 import (
 	"fmt"
 	"net"
 	"testing"
 	"github.com/stretchr/testify/assert"
 )
 func TestClientFromEnvironment(t *testing.T) {
@@ -42,4 +46,40 @@ func TestClientFromEnvironment(t *testing.T) {
 			}
 		})
 	}
 	hostTestCases := map[string]*testCase{
 		"empty":               {value: "", expect: "127.0.0.1:11434"},
 		"only address":        {value: "1.2.3.4", expect: "1.2.3.4:11434"},
 		"only port":           {value: ":1234", expect: ":1234"},
 		"address and port":    {value: "1.2.3.4:1234", expect: "1.2.3.4:1234"},
 		"hostname":            {value: "example.com", expect: "example.com:11434"},
 		"hostname and port":   {value: "example.com:1234", expect: "example.com:1234"},
 		"zero port":           {value: ":0", expect: ":0"},
 		"too large port":      {value: ":66000", err: ErrInvalidHostPort},
 		"too small port":      {value: ":-1", err: ErrInvalidHostPort},
 		"ipv6 localhost":      {value: "[::1]", expect: "[::1]:11434"},
 		"ipv6 world open":     {value: "[::]", expect: "[::]:11434"},
 		"ipv6 no brackets":    {value: "::1", expect: "[::1]:11434"},
 		"ipv6 + port":         {value: "[::1]:1337", expect: "[::1]:1337"},
 		"extra space":         {value: " 1.2.3.4 ", expect: "1.2.3.4:11434"},
 		"extra quotes":        {value: "\"1.2.3.4\"", expect: "1.2.3.4:11434"},
 		"extra space+quotes":  {value: " \" 1.2.3.4 \" ", expect: "1.2.3.4:11434"},
 		"extra single quotes": {value: "'1.2.3.4'", expect: "1.2.3.4:11434"},
 	}
 	for k, v := range hostTestCases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_HOST", v.value)
 			oh, err := GetOllamaHost()
 			if err != v.err {
 				t.Fatalf("expected %s, got %s", v.err, err)
 			}
 			if err == nil {
 				host := net.JoinHostPort(oh.Host, oh.Port)
 				assert.Equal(t, v.expect, host, fmt.Sprintf("%s: expected %s, got %s", k, v.expect, host))
 			}
 		})
 	}
 }
--- a/api/types.go
+++ b/api/types.go
@@ -2,6 +2,7 @@ package api
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"log/slog"
 	"math"
@@ -47,9 +48,6 @@ type GenerateRequest struct {
 	// Prompt is the textual prompt to send to the model.
 	Prompt string `json:"prompt"`
 	// Suffix is the text that comes after the inserted text.
 	Suffix string `json:"suffix"`
 	// System overrides the model's default system message/prompt.
 	System string `json:"system"`
@@ -100,25 +98,10 @@ type ChatRequest struct {
 	// followin the request.
 	KeepAlive *Duration `json:"keep_alive,omitempty"`
 	// Tools is an optional list of tools the model has access to.
 	Tools `json:"tools,omitempty"`
 	// Options lists model-specific options.
 	Options map[string]interface{} `json:"options"`
 }
 type Tools []Tool
 func (t Tools) String() string {
 	bts, _ := json.Marshal(t)
 	return string(bts)
 }
 func (t Tool) String() string {
 	bts, _ := json.Marshal(t)
 	return string(bts)
 }
 // Message is a single message in a chat sequence. The message contains the
 // role ("system", "user", or "assistant"), the content and an optional list
 // of images.
@@ -126,59 +109,6 @@ type Message struct {
 	Role    string      `json:"role"`
 	Content string      `json:"content"`
 	Images  []ImageData `json:"images,omitempty"`
 	ToolCalls []ToolCall  `json:"tool_calls,omitempty"`
 }
 func (m *Message) UnmarshalJSON(b []byte) error {
 	type Alias Message
 	var a Alias
 	if err := json.Unmarshal(b, &a); err != nil {
 		return err
 	}
 	*m = Message(a)
 	m.Role = strings.ToLower(m.Role)
 	return nil
 }
 type ToolCall struct {
 	Function ToolCallFunction `json:"function"`
 }
 type ToolCallFunction struct {
 	Name      string                    `json:"name"`
 	Arguments ToolCallFunctionArguments `json:"arguments"`
 }
 type ToolCallFunctionArguments map[string]any
 func (t *ToolCallFunctionArguments) String() string {
 	bts, _ := json.Marshal(t)
 	return string(bts)
 }
 type Tool struct {
 	Type     string       `json:"type"`
 	Function ToolFunction `json:"function"`
 }
 type ToolFunction struct {
 	Name        string `json:"name"`
 	Description string `json:"description"`
 	Parameters  struct {
 		Type       string   `json:"type"`
 		Required   []string `json:"required"`
 		Properties map[string]struct {
 			Type        string   `json:"type"`
 			Description string   `json:"description"`
 			Enum        []string `json:"enum,omitempty"`
 		} `json:"properties"`
 	} `json:"parameters"`
 }
 func (t *ToolFunction) String() string {
 	bts, _ := json.Marshal(t)
 	return string(bts)
 }
 // ChatResponse is the response returned by [Client.Chat]. Its fields are
@@ -214,7 +144,6 @@ type Options struct {
 	NumPredict       int      `json:"num_predict,omitempty"`
 	TopK             int      `json:"top_k,omitempty"`
 	TopP             float32  `json:"top_p,omitempty"`
 	MinP             float32  `json:"min_p,omitempty"`
 	TFSZ             float32  `json:"tfs_z,omitempty"`
 	TypicalP         float32  `json:"typical_p,omitempty"`
 	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
@@ -231,6 +160,7 @@ type Options struct {
 // Runner options which must be set when the model is loaded into memory
 type Runner struct {
 	UseNUMA   bool `json:"numa,omitempty"`
 	NumCtx    int  `json:"num_ctx,omitempty"`
 	NumBatch  int  `json:"num_batch,omitempty"`
 	NumGPU    int  `json:"num_gpu,omitempty"`
@@ -239,39 +169,11 @@ type Runner struct {
 	F16KV     bool `json:"f16_kv,omitempty"`
 	LogitsAll bool `json:"logits_all,omitempty"`
 	VocabOnly bool `json:"vocab_only,omitempty"`
-	UseMMap   *bool `json:"use_mmap,omitempty"`
+	UseMMap   bool `json:"use_mmap,omitempty"`
 	UseMLock  bool `json:"use_mlock,omitempty"`
 	NumThread int  `json:"num_thread,omitempty"`
 }
 // EmbedRequest is the request passed to [Client.Embed].
 type EmbedRequest struct {
 	// Model is the model name.
 	Model string `json:"model"`
 	// Input is the input to embed.
 	Input any `json:"input"`
 	// KeepAlive controls how long the model will stay loaded in memory following
 	// this request.
 	KeepAlive *Duration `json:"keep_alive,omitempty"`
 	Truncate *bool `json:"truncate,omitempty"`
 	// Options lists model-specific options.
 	Options map[string]interface{} `json:"options"`
 }
 // EmbedResponse is the response from [Client.Embed].
 type EmbedResponse struct {
 	Model      string      `json:"model"`
 	Embeddings [][]float32 `json:"embeddings"`
 	TotalDuration   time.Duration `json:"total_duration,omitempty"`
 	LoadDuration    time.Duration `json:"load_duration,omitempty"`
 	PromptEvalCount int           `json:"prompt_eval_count,omitempty"`
 }
 // EmbeddingRequest is the request passed to [Client.Embeddings].
 type EmbeddingRequest struct {
 	// Model is the model name.
@@ -320,10 +222,7 @@ type DeleteRequest struct {
 type ShowRequest struct {
 	Model    string `json:"model"`
 	System   string `json:"system"`
 	// Template is deprecated
 	Template string `json:"template"`
 	Verbose  bool   `json:"verbose"`
 	Options map[string]interface{} `json:"options"`
@@ -340,9 +239,6 @@ type ShowResponse struct {
 	System     string       `json:"system,omitempty"`
 	Details    ModelDetails `json:"details,omitempty"`
 	Messages   []Message    `json:"messages,omitempty"`
 	ModelInfo     map[string]any `json:"model_info,omitempty"`
 	ProjectorInfo map[string]any `json:"projector_info,omitempty"`
 	ModifiedAt    time.Time      `json:"modified_at,omitempty"`
 }
 // CopyRequest is the request passed to [Client.Copy].
@@ -386,40 +282,19 @@ type PushRequest struct {
 // ListResponse is the response from [Client.List].
 type ListResponse struct {
-	Models []ListModelResponse `json:"models"`
+	Models []ModelResponse `json:"models"`
 }
-// ProcessResponse is the response from [Client.Process].
+// ModelResponse is a single model description in [ListResponse].
-type ProcessResponse struct {
+type ModelResponse struct {
 	Models []ProcessModelResponse `json:"models"`
 }
 // ListModelResponse is a single model description in [ListResponse].
 type ListModelResponse struct {
 	Name       string       `json:"name"`
 	Model      string       `json:"model"`
-	ModifiedAt time.Time    `json:"modified_at"`
+	ModifiedAt time.Time    `json:"modified_at,omitempty"`
 	Size       int64        `json:"size"`
 	Digest     string       `json:"digest"`
 	Details    ModelDetails `json:"details,omitempty"`
-}
+	ExpiresAt  time.Time    `json:"expires_at,omitempty"`
-
+	SizeVRAM   int64        `json:"size_vram,omitempty"`
 // ProcessModelResponse is a single model description in [ProcessResponse].
 type ProcessModelResponse struct {
 	Name      string       `json:"name"`
 	Model     string       `json:"model"`
 	Size      int64        `json:"size"`
 	Digest    string       `json:"digest"`
 	Details   ModelDetails `json:"details,omitempty"`
 	ExpiresAt time.Time    `json:"expires_at"`
 	SizeVRAM  int64        `json:"size_vram"`
 }
 type RetrieveModelResponse struct {
 	Id      string `json:"id"`
 	Object  string `json:"object"`
 	Created int64  `json:"created"`
 	OwnedBy string `json:"owned_by"`
 }
 type TokenResponse struct {
@@ -431,7 +306,7 @@ type GenerateResponse struct {
 	// Model is the model name that generated the response.
 	Model string `json:"model"`
-	// CreatedAt is the timestamp of the response.
+	//CreatedAt is the timestamp of the response.
 	CreatedAt time.Time `json:"created_at"`
 	// Response is the textual response itself.
@@ -488,6 +363,8 @@ func (m *Metrics) Summary() {
 	}
 }
 var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
 func (opts *Options) FromMap(m map[string]interface{}) error {
 	valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
 	typeOpts := reflect.TypeOf(opts).Elem()   // types of the fields in the options struct
@@ -504,7 +381,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 	for key, val := range m {
 		opt, ok := jsonOpts[key]
 		if !ok {
-			slog.Warn("invalid option provided", "option", key)
+			slog.Warn("invalid option provided", "option", opt.Name)
 			continue
 		}
@@ -560,17 +437,6 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 					slice[i] = str
 				}
 				field.Set(reflect.ValueOf(slice))
 			case reflect.Pointer:
 				var b bool
 				if field.Type() == reflect.TypeOf(&b) {
 					val, ok := val.(bool)
 					if !ok {
 						return fmt.Errorf("option %q must be of type boolean", key)
 					}
 					field.Set(reflect.ValueOf(&val))
 				} else {
 					return fmt.Errorf("unknown type loading config params: %v %v", field.Kind(), field.Type())
 				}
 			default:
 				return fmt.Errorf("unknown type loading config params: %v", field.Kind())
 			}
@@ -613,7 +479,8 @@ func DefaultOptions() Options {
 			LowVRAM:   false,
 			F16KV:     true,
 			UseMLock:  false,
-			UseMMap:   nil,
+			UseMMap:   true,
 			UseNUMA:   false,
 		},
 	}
 }
@@ -709,17 +576,6 @@ func FormatParams(params map[string][]string) (map[string]interface{}, error) {
 				case reflect.Slice:
 					// TODO: only string slices are supported right now
 					out[key] = vals
 				case reflect.Pointer:
 					var b bool
 					if field.Type() == reflect.TypeOf(&b) {
 						boolVal, err := strconv.ParseBool(vals[0])
 						if err != nil {
 							return nil, fmt.Errorf("invalid bool value %s", vals)
 						}
 						out[key] = &boolVal
 					} else {
 						return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
 					}
 				default:
 					return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
 				}
--- a/api/types_test.go
+++ b/api/types_test.go
@@ -2,7 +2,6 @@ package api
 import (
 	"encoding/json"
 	"errors"
 	"math"
 	"testing"
 	"time"
@@ -73,13 +72,13 @@ func TestDurationMarshalUnmarshal(t *testing.T) {
 		},
 		{
 			"positive duration",
-			42 * time.Second,
+			time.Duration(42 * time.Second),
-			42 * time.Second,
+			time.Duration(42 * time.Second),
 		},
 		{
 			"another positive duration",
-			42 * time.Minute,
+			time.Duration(42 * time.Minute),
-			42 * time.Minute,
+			time.Duration(42 * time.Minute),
 		},
 		{
 			"zero duration",
@@ -106,128 +105,3 @@ func TestDurationMarshalUnmarshal(t *testing.T) {
 		})
 	}
 }
 func TestUseMmapParsingFromJSON(t *testing.T) {
 	tr := true
 	fa := false
 	tests := []struct {
 		name string
 		req  string
 		exp  *bool
 	}{
 		{
 			name: "Undefined",
 			req:  `{ }`,
 			exp:  nil,
 		},
 		{
 			name: "True",
 			req:  `{ "use_mmap": true }`,
 			exp:  &tr,
 		},
 		{
 			name: "False",
 			req:  `{ "use_mmap": false }`,
 			exp:  &fa,
 		},
 	}
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
 			var oMap map[string]interface{}
 			err := json.Unmarshal([]byte(test.req), &oMap)
 			require.NoError(t, err)
 			opts := DefaultOptions()
 			err = opts.FromMap(oMap)
 			require.NoError(t, err)
 			assert.Equal(t, test.exp, opts.UseMMap)
 		})
 	}
 }
 func TestUseMmapFormatParams(t *testing.T) {
 	tr := true
 	fa := false
 	tests := []struct {
 		name string
 		req  map[string][]string
 		exp  *bool
 		err  error
 	}{
 		{
 			name: "True",
 			req: map[string][]string{
 				"use_mmap": {"true"},
 			},
 			exp: &tr,
 			err: nil,
 		},
 		{
 			name: "False",
 			req: map[string][]string{
 				"use_mmap": {"false"},
 			},
 			exp: &fa,
 			err: nil,
 		},
 		{
 			name: "Numeric True",
 			req: map[string][]string{
 				"use_mmap": {"1"},
 			},
 			exp: &tr,
 			err: nil,
 		},
 		{
 			name: "Numeric False",
 			req: map[string][]string{
 				"use_mmap": {"0"},
 			},
 			exp: &fa,
 			err: nil,
 		},
 		{
 			name: "invalid string",
 			req: map[string][]string{
 				"use_mmap": {"foo"},
 			},
 			exp: nil,
 			err: errors.New("invalid bool value [foo]"),
 		},
 	}
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
 			resp, err := FormatParams(test.req)
 			require.Equal(t, test.err, err)
 			respVal, ok := resp["use_mmap"]
 			if test.exp != nil {
 				assert.True(t, ok, "resp: %v", resp)
 				assert.Equal(t, *test.exp, *respVal.(*bool))
 			}
 		})
 	}
 }
 func TestMessage_UnmarshalJSON(t *testing.T) {
 	tests := []struct {
 		input    string
 		expected string
 	}{
 		{`{"role": "USER", "content": "Hello!"}`, "user"},
 		{`{"role": "System", "content": "Initialization complete."}`, "system"},
 		{`{"role": "assistant", "content": "How can I help you?"}`, "assistant"},
 		{`{"role": "TOOl", "content": "Access granted."}`, "tool"},
 	}
 	for _, test := range tests {
 		var msg Message
 		if err := json.Unmarshal([]byte(test.input), &msg); err != nil {
 			t.Errorf("Unexpected error: %v", err)
 		}
 		if msg.Role != test.expected {
 			t.Errorf("role not lowercased: got %v, expected %v", msg.Role, test.expected)
 		}
 	}
 }
--- a/app/lifecycle/getstarted_nonwindows.go
+++ b/app/lifecycle/getstarted_nonwindows.go
@@ -2,8 +2,8 @@
 package lifecycle
-import "errors"
+import "fmt"
 func GetStarted() error {
-	return errors.New("not implemented")
+	return fmt.Errorf("GetStarted not implemented")
 }
--- a/app/lifecycle/getstarted_windows.go
+++ b/app/lifecycle/getstarted_windows.go
@@ -34,6 +34,7 @@ func GetStarted() error {
 		Sys:   &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
 	}
 	proc, err := os.StartProcess(args[0], args, attrs)
 	if err != nil {
 		return fmt.Errorf("unable to start getting started shell %w", err)
 	}
--- a/app/lifecycle/logging.go
+++ b/app/lifecycle/logging.go
@@ -5,8 +5,6 @@ import (
 	"log/slog"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
 	"github.com/ollama/ollama/envconfig"
 )
@@ -14,7 +12,7 @@ import (
 func InitLogging() {
 	level := slog.LevelInfo
-	if envconfig.Debug() {
+	if envconfig.Debug {
 		level = slog.LevelDebug
 	}
@@ -26,8 +24,7 @@ func InitLogging() {
 		logFile = os.Stderr
 		// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
 	} else {
-		rotateLogs(AppLogFile)
+		logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
 		logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
 		if err != nil {
 			slog.Error(fmt.Sprintf("failed to create server log %v", err))
 			return
@@ -49,32 +46,3 @@ func InitLogging() {
 	slog.Info("ollama app started")
 }
 func rotateLogs(logFile string) {
 	if _, err := os.Stat(logFile); os.IsNotExist(err) {
 		return
 	}
 	index := strings.LastIndex(logFile, ".")
 	pre := logFile[:index]
 	post := "." + logFile[index+1:]
 	for i := LogRotationCount; i > 0; i-- {
 		older := pre + "-" + strconv.Itoa(i) + post
 		newer := pre + "-" + strconv.Itoa(i-1) + post
 		if i == 1 {
 			newer = pre + post
 		}
 		if _, err := os.Stat(newer); err == nil {
 			if _, err := os.Stat(older); err == nil {
 				err := os.Remove(older)
 				if err != nil {
 					slog.Warn("Failed to remove older log", "older", older, "error", err)
 					continue
 				}
 			}
 			err := os.Rename(newer, older)
 			if err != nil {
 				slog.Warn("Failed to rotate log", "older", older, "newer", newer, "error", err)
 			}
 		}
 	}
 }
--- a/app/lifecycle/logging_nonwindows.go
+++ b/app/lifecycle/logging_nonwindows.go
@@ -5,5 +5,5 @@ package lifecycle
 import "log/slog"
 func ShowLogs() {
-	slog.Warn("not implemented")
+	slog.Warn("ShowLogs not yet implemented")
 }
--- a/app/lifecycle/logging_test.go
+++ b/app/lifecycle/logging_test.go
@@ -1,44 +0,0 @@
 package lifecycle
 import (
 	"os"
 	"path/filepath"
 	"strconv"
 	"testing"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 func TestRotateLogs(t *testing.T) {
 	logDir := t.TempDir()
 	logFile := filepath.Join(logDir, "testlog.log")
 	// No log exists
 	rotateLogs(logFile)
 	require.NoError(t, os.WriteFile(logFile, []byte("1"), 0o644))
 	assert.FileExists(t, logFile)
 	// First rotation
 	rotateLogs(logFile)
 	assert.FileExists(t, filepath.Join(logDir, "testlog-1.log"))
 	assert.NoFileExists(t, filepath.Join(logDir, "testlog-2.log"))
 	assert.NoFileExists(t, logFile)
 	// Should be a no-op without a new log
 	rotateLogs(logFile)
 	assert.FileExists(t, filepath.Join(logDir, "testlog-1.log"))
 	assert.NoFileExists(t, filepath.Join(logDir, "testlog-2.log"))
 	assert.NoFileExists(t, logFile)
 	for i := 2; i <= LogRotationCount+1; i++ {
 		require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0o644))
 		assert.FileExists(t, logFile)
 		rotateLogs(logFile)
 		assert.NoFileExists(t, logFile)
 		for j := 1; j < i; j++ {
 			assert.FileExists(t, filepath.Join(logDir, "testlog-"+strconv.Itoa(j)+".log"))
 		}
 		assert.NoFileExists(t, filepath.Join(logDir, "testlog-"+strconv.Itoa(i+1)+".log"))
 	}
 }
--- a/app/lifecycle/paths.go
+++ b/app/lifecycle/paths.go
@@ -21,7 +21,6 @@ var (
 	ServerLogFile  = "/tmp/ollama.log"
 	UpgradeLogFile = "/tmp/ollama_update.log"
 	Installer      = "OllamaSetup.exe"
 	LogRotationCount = 5
 )
 func init() {
@@ -70,6 +69,7 @@ func init() {
 				slog.Error(fmt.Sprintf("create ollama dir %s: %v", AppDataDir, err))
 			}
 		}
 	} else if runtime.GOOS == "darwin" {
 		// TODO
 		AppName += ".app"
--- a/app/lifecycle/server.go
+++ b/app/lifecycle/server.go
@@ -15,7 +15,7 @@ import (
 )
 func getCLIFullPath(command string) string {
-	var cmdPath string
+	cmdPath := ""
 	appExe, err := os.Executable()
 	if err == nil {
 		cmdPath = filepath.Join(filepath.Dir(appExe), command)
@@ -54,8 +54,8 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
 		return nil, fmt.Errorf("failed to spawn server stderr pipe: %w", err)
 	}
-	rotateLogs(ServerLogFile)
+	// TODO - rotation
-	logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
+	logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create server log: %w", err)
 	}
@@ -65,6 +65,7 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
 	if err != nil {
 		if !errors.Is(err, os.ErrNotExist) {
 			return nil, fmt.Errorf("stat ollama server log dir %s: %v", logDir, err)
 		}
 		if err := os.MkdirAll(logDir, 0o755); err != nil {
--- a/app/lifecycle/server_windows.go
+++ b/app/lifecycle/server_windows.go
@@ -24,8 +24,7 @@ func terminate(cmd *exec.Cmd) error {
 	if err != nil {
 		return err
 	}
-	//nolint:errcheck
+	defer dll.Release() // nolint: errcheck
 	defer dll.Release()
 	pid := cmd.Process.Pid
@@ -74,8 +73,7 @@ func isProcessExited(pid int) (bool, error) {
 	if err != nil {
 		return false, fmt.Errorf("failed to open process: %v", err)
 	}
-	//nolint:errcheck
+	defer windows.CloseHandle(hProcess) // nolint: errcheck
 	defer windows.CloseHandle(hProcess)
 	var exitCode uint32
 	err = windows.GetExitCodeProcess(hProcess, &exitCode)
--- a/app/lifecycle/updater.go
+++ b/app/lifecycle/updater.go
@@ -15,7 +15,6 @@ import (
 	"path"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
 	"time"
@@ -47,7 +46,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
 	query.Add("os", runtime.GOOS)
 	query.Add("arch", runtime.GOARCH)
 	query.Add("version", version.Version)
-	query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))
+	query.Add("ts", fmt.Sprintf("%d", time.Now().Unix()))
 	nonce, err := auth.NewNonce(rand.Reader, 16)
 	if err != nil {
@@ -79,7 +78,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
 	}
 	defer resp.Body.Close()
-	if resp.StatusCode == http.StatusNoContent {
+	if resp.StatusCode == 204 {
 		slog.Debug("check update response 204 (current version is up to date)")
 		return false, updateResp
 	}
@@ -88,7 +87,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
 		slog.Warn(fmt.Sprintf("failed to read body response: %s", err))
 	}
-	if resp.StatusCode != http.StatusOK {
+	if resp.StatusCode != 200 {
 		slog.Info(fmt.Sprintf("check update error %d - %.96s", resp.StatusCode, string(body)))
 		return false, updateResp
 	}
@@ -115,7 +114,7 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
 	if err != nil {
 		return fmt.Errorf("error checking update: %w", err)
 	}
-	if resp.StatusCode != http.StatusOK {
+	if resp.StatusCode != 200 {
 		return fmt.Errorf("unexpected status attempting to download update %d", resp.StatusCode)
 	}
 	resp.Body.Close()
--- a/app/lifecycle/updater_nonwindows.go
+++ b/app/lifecycle/updater_nonwindows.go
@@ -4,9 +4,9 @@ package lifecycle
 import (
 	"context"
-	"errors"
+	"fmt"
 )
 func DoUpgrade(cancel context.CancelFunc, done chan int) error {
-	return errors.New("not implemented")
+	return fmt.Errorf("DoUpgrade not yet implemented")
 }
--- a/app/lifecycle/updater_windows.go
+++ b/app/lifecycle/updater_windows.go
@@ -2,7 +2,6 @@ package lifecycle
 import (
 	"context"
 	"errors"
 	"fmt"
 	"log/slog"
 	"os"
@@ -16,7 +15,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 		return fmt.Errorf("failed to lookup downloads: %s", err)
 	}
 	if len(files) == 0 {
-		return errors.New("no update downloads found")
+		return fmt.Errorf("no update downloads found")
 	} else if len(files) > 1 {
 		// Shouldn't happen
 		slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
@@ -65,7 +64,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 		}
 	} else {
 		// TODO - some details about why it didn't start, or is this a pedantic error case?
-		return errors.New("installer process did not start")
+		return fmt.Errorf("installer process did not start")
 	}
 	// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
--- a/app/ollama.iss
+++ b/app/ollama.iss
@@ -87,11 +87,15 @@ DialogFontSize=12
 [Files]
 Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
-Source: "..\ollama.exe"; DestDir: "{app}\bin"; Flags: ignoreversion 64bit
+Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
-Source: "..\dist\windows-{#ARCH}\lib\ollama\runners\*"; DestDir: "{app}\lib\ollama\runners"; Flags: ignoreversion 64bit recursesubdirs
+Source: "..\dist\windows-{#ARCH}\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
 Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
 Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
 Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
-Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Flags: ignoreversion recursesubdirs
+#if DirExists("..\dist\windows-amd64\rocm")
  Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
 #endif
 [Icons]
 Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
@@ -99,7 +103,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
 Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
 [Run]
-Filename: "{cmd}"; Parameters: "/C set PATH={app}\bin;%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
+Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
 [UninstallRun]
 ; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
@@ -118,10 +122,6 @@ Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\models"
 Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\history"
 ; NOTE: if the user has a custom OLLAMA_MODELS it will be preserved
 [InstallDelete]
 Type: filesandordirs; Name: "{%TEMP}\ollama*"
 Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
 [Messages]
 WizardReady=Ollama Windows Preview
 ReadyLabel1=%nLet's get you up and running with your own large language models.
@@ -129,13 +129,13 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
 ;FinishedHeadingLabel=Run your first model
-;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3.1
+;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3
 ;ClickFinish=%n
 [Registry]
 Root: HKCU; Subkey: "Environment"; \
-    ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}\bin"; \
+    ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \
-    Check: NeedsAddPath('{app}\bin')
+    Check: NeedsAddPath('{app}')
 [Code]
--- a/app/ollama_welcome.ps1
+++ b/app/ollama_welcome.ps1
@@ -4,5 +4,5 @@ write-host "Welcome to Ollama!"
 write-host ""
 write-host "Run your first model:"
 write-host ""
-write-host "`tollama run llama3.1"
+write-host "`tollama run llama2"
 write-host ""
--- a/app/store/store.go
+++ b/app/store/store.go
@@ -29,6 +29,7 @@ func GetID() string {
 		initStore()
 	}
 	return store.ID
 }
 func GetFirstTimeRun() bool {
--- a/app/tray/tray_nonwindows.go
+++ b/app/tray/tray_nonwindows.go
@@ -3,11 +3,11 @@
 package tray
 import (
-	"errors"
+	"fmt"
 	"github.com/ollama/ollama/app/tray/commontray"
 )
 func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
-	return nil, errors.New("not implemented")
+	return nil, fmt.Errorf("NOT IMPLEMENTED YET")
 }
--- a/app/tray/wintray/eventloop.go
+++ b/app/tray/wintray/eventloop.go
@@ -11,7 +11,9 @@ import (
 	"golang.org/x/sys/windows"
 )
-var quitOnce sync.Once
+var (
 	quitOnce sync.Once
 )
 func (t *winTray) Run() {
 	nativeLoop()
@@ -45,6 +47,7 @@ func nativeLoop() {
 		default:
 			pTranslateMessage.Call(uintptr(unsafe.Pointer(m))) //nolint:errcheck
 			pDispatchMessage.Call(uintptr(unsafe.Pointer(m)))  //nolint:errcheck
 		}
 	}
 }
@@ -157,8 +160,8 @@ func (t *winTray) wndProc(hWnd windows.Handle, message uint32, wParam, lParam ui
 		lResult, _, _ = pDefWindowProc.Call(
 			uintptr(hWnd),
 			uintptr(message),
-			wParam,
+			uintptr(wParam),
-			lParam,
+			uintptr(lParam),
 		)
 	}
 	return
--- a/app/tray/wintray/menus.go
+++ b/app/tray/wintray/menus.go
@@ -11,8 +11,8 @@ import (
 )
 const (
-	updateAvailableMenuID = 1
+	updatAvailableMenuID = 1
-	updateMenuID          = updateAvailableMenuID + 1
+	updateMenuID         = updatAvailableMenuID + 1
 	separatorMenuID      = updateMenuID + 1
 	diagLogsMenuID       = separatorMenuID + 1
 	diagSeparatorMenuID  = diagLogsMenuID + 1
@@ -35,7 +35,7 @@ func (t *winTray) initMenus() error {
 func (t *winTray) UpdateAvailable(ver string) error {
 	if !t.updateNotified {
 		slog.Debug("updating menu and sending notification for new update")
-		if err := t.addOrUpdateMenuItem(updateAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
+		if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
 			return fmt.Errorf("unable to create menu entries %w", err)
 		}
 		if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
--- a/app/tray/wintray/tray.go
+++ b/app/tray/wintray/tray.go
@@ -11,12 +11,10 @@ import (
 	"path/filepath"
 	"sort"
 	"sync"
 	"syscall"
 	"unsafe"
 	"golang.org/x/sys/windows"
 	"github.com/ollama/ollama/app/tray/commontray"
 	"golang.org/x/sys/windows"
 )
 // Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
@@ -188,7 +186,7 @@ func (t *winTray) initInstance() error {
 	t.muNID.Lock()
 	defer t.muNID.Unlock()
 	t.nid = &notifyIconData{
-		Wnd:             t.window,
+		Wnd:             windows.Handle(t.window),
 		ID:              100,
 		Flags:           NIF_MESSAGE,
 		CallbackMessage: t.wmSystrayMessage,
@@ -199,6 +197,7 @@ func (t *winTray) initInstance() error {
 }
 func (t *winTray) createMenu() error {
 	menuHandle, _, err := pCreatePopupMenu.Call()
 	if menuHandle == 0 {
 		return err
@@ -247,7 +246,7 @@ func (t *winTray) addOrUpdateMenuItem(menuItemId uint32, parentId uint32, title
 	mi := menuItemInfo{
 		Mask:     MIIM_FTYPE | MIIM_STRING | MIIM_ID | MIIM_STATE,
 		Type:     MFT_STRING,
-		ID:       menuItemId,
+		ID:       uint32(menuItemId),
 		TypeData: titlePtr,
 		Cch:      uint32(len(title)),
 	}
@@ -303,10 +302,11 @@ func (t *winTray) addOrUpdateMenuItem(menuItemId uint32, parentId uint32, title
 }
 func (t *winTray) addSeparatorMenuItem(menuItemId, parentId uint32) error {
 	mi := menuItemInfo{
 		Mask: MIIM_FTYPE | MIIM_ID | MIIM_STATE,
 		Type: MFT_SEPARATOR,
-		ID:   menuItemId,
+		ID:   uint32(menuItemId),
 	}
 	mi.Size = uint32(unsafe.Sizeof(mi))
@@ -416,7 +416,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
 	iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
 	if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
-		if err := os.WriteFile(iconFilePath, iconBytes, 0o644); err != nil {
+		if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil {
 			return "", err
 		}
 	}
@@ -426,6 +426,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
 // Loads an image from file and shows it in tray.
 // Shell_NotifyIcon: https://msdn.microsoft.com/en-us/library/windows/desktop/bb762159(v=vs.85).aspx
 func (t *winTray) setIcon(src string) error {
 	h, err := t.loadIconFrom(src)
 	if err != nil {
 		return err
@@ -434,12 +435,7 @@ func (t *winTray) setIcon(src string) error {
 	t.muNID.Lock()
 	defer t.muNID.Unlock()
 	t.nid.Icon = h
-	t.nid.Flags |= NIF_ICON | NIF_TIP
+	t.nid.Flags |= NIF_ICON
 	if toolTipUTF16, err := syscall.UTF16FromString(commontray.ToolTip); err == nil {
 		copy(t.nid.Tip[:], toolTipUTF16)
 	} else {
 		return err
 	}
 	t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
 	return t.nid.modify()
@@ -448,6 +444,7 @@ func (t *winTray) setIcon(src string) error {
 // Loads an image from file to be shown in tray or menu item.
 // LoadImage: https://msdn.microsoft.com/en-us/library/windows/desktop/ms648045(v=vs.85).aspx
 func (t *winTray) loadIconFrom(src string) (windows.Handle, error) {
 	// Save and reuse handles of loaded images
 	t.muLoadedImages.RLock()
 	h, ok := t.loadedImages[src]
--- a/app/tray/wintray/w32api.go
+++ b/app/tray/wintray/w32api.go
@@ -61,7 +61,6 @@ const (
 	MIIM_SUBMENU        = 0x00000004
 	MIM_APPLYTOSUBMENUS = 0x80000000
 	NIF_ICON            = 0x00000002
 	NIF_TIP             = 0x00000004
 	NIF_INFO            = 0x00000010
 	NIF_MESSAGE         = 0x00000001
 	SW_HIDE             = 0
--- a/auth/auth.go
+++ b/auth/auth.go
@@ -5,7 +5,6 @@ import (
 	"context"
 	"crypto/rand"
 	"encoding/base64"
 	"errors"
 	"fmt"
 	"io"
 	"log/slog"
@@ -79,7 +78,7 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
 	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
 	parts := bytes.Split(publicKey, []byte(" "))
 	if len(parts) < 2 {
-		return "", errors.New("malformed public key")
+		return "", fmt.Errorf("malformed public key")
 	}
 	signedData, err := privateKey.Sign(rand.Reader, bts)
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -20,9 +20,7 @@ import (
 	"path/filepath"
 	"regexp"
 	"runtime"
 	"slices"
 	"strings"
 	"sync/atomic"
 	"syscall"
 	"time"
@@ -31,6 +29,7 @@ import (
 	"github.com/olekukonko/tablewriter"
 	"github.com/spf13/cobra"
 	"golang.org/x/crypto/ssh"
 	"golang.org/x/exp/slices"
 	"golang.org/x/term"
 	"github.com/ollama/ollama/api"
@@ -79,7 +78,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	status := "transferring model data"
 	spinner := progress.NewSpinner(status)
 	p.Add(status, spinner)
 	defer p.Stop()
 	for i := range modelfile.Commands {
 		switch modelfile.Commands[i].Name {
@@ -114,7 +112,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				path = tempfile
 			}
-			digest, err := createBlob(cmd, client, path, spinner)
+			digest, err := createBlob(cmd, client, path)
 			if err != nil {
 				return err
 			}
@@ -164,6 +162,9 @@ func tempZipFiles(path string) (string, error) {
 	}
 	defer tempfile.Close()
 	zipfile := zip.NewWriter(tempfile)
 	defer zipfile.Close()
 	detectContentType := func(path string) (string, error) {
 		f, err := os.Open(path)
 		if err != nil {
@@ -204,12 +205,6 @@ func tempZipFiles(path string) (string, error) {
 		// safetensors files might be unresolved git lfs references; skip if they are
 		// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
 		files = append(files, st...)
 	} else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 {
 		// covers adapters.safetensors
 		files = append(files, st...)
 	} else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 {
 		// covers adapter_model.safetensors
 		files = append(files, st...)
 	} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
 		// pytorch files might also be unresolved git lfs references; skip if they are
 		// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
@@ -229,14 +224,6 @@ func tempZipFiles(path string) (string, error) {
 	}
 	files = append(files, js...)
 	// bert models require a nested config.json
 	// TODO(mxyng): merge this with the glob above
 	js, err = glob(filepath.Join(path, "**/*.json"), "text/plain")
 	if err != nil {
 		return "", err
 	}
 	files = append(files, js...)
 	if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
 		// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
 		// tokenizer.model might be a unresolved git lfs reference; error if it is
@@ -246,9 +233,6 @@ func tempZipFiles(path string) (string, error) {
 		files = append(files, tks...)
 	}
 	zipfile := zip.NewWriter(tempfile)
 	defer zipfile.Close()
 	for _, file := range files {
 		f, err := os.Open(file)
 		if err != nil {
@@ -266,11 +250,6 @@ func tempZipFiles(path string) (string, error) {
 			return "", err
 		}
 		zfi.Name, err = filepath.Rel(path, file)
 		if err != nil {
 			return "", err
 		}
 		zf, err := zipfile.CreateHeader(zfi)
 		if err != nil {
 			return "", err
@@ -284,20 +263,13 @@ func tempZipFiles(path string) (string, error) {
 	return tempfile.Name(), nil
 }
-func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *progress.Spinner) (string, error) {
+func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
 	bin, err := os.Open(path)
 	if err != nil {
 		return "", err
 	}
 	defer bin.Close()
 	// Get file info to retrieve the size
 	fileInfo, err := bin.Stat()
 	if err != nil {
 		return "", err
 	}
 	fileSize := fileInfo.Size()
 	hash := sha256.New()
 	if _, err := io.Copy(hash, bin); err != nil {
 		return "", err
@@ -307,50 +279,46 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *pr
 		return "", err
 	}
 	var pw progressWriter
 	status := "transferring model data 0%"
 	spinner.SetMessage(status)
 	done := make(chan struct{})
 	defer close(done)
 	go func() {
 		ticker := time.NewTicker(60 * time.Millisecond)
 		defer ticker.Stop()
 		for {
 			select {
 			case <-ticker.C:
 				spinner.SetMessage(fmt.Sprintf("transferring model data %d%%", int(100*pw.n.Load()/fileSize)))
 			case <-done:
 				spinner.SetMessage("transferring model data 100%")
 				return
 			}
 		}
 	}()
 	digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
-	if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
+	if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
 		return "", err
 	}
 	return digest, nil
 }
 type progressWriter struct {
 	n atomic.Int64
 }
 func (w *progressWriter) Write(p []byte) (n int, err error) {
 	w.n.Add(int64(len(p)))
 	return len(p), nil
 }
 func RunHandler(cmd *cobra.Command, args []string) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
 	name := args[0]
 	// check if the model exists on the server
 	show, err := client.Show(cmd.Context(), &api.ShowRequest{Name: name})
 	var statusError api.StatusError
 	switch {
 	case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
 		if err := PullHandler(cmd, []string{name}); err != nil {
 			return err
 		}
 		show, err = client.Show(cmd.Context(), &api.ShowRequest{Name: name})
 		if err != nil {
 			return err
 		}
 	case err != nil:
 		return err
 	}
 	interactive := true
 	opts := runOptions{
 		Model:       args[0],
 		WordWrap:    os.Getenv("TERM") == "xterm-256color",
 		Options:     map[string]interface{}{},
 		MultiModal:  slices.Contains(show.Details.Families, "clip"),
 		ParentModel: show.Details.ParentModel,
 	}
 	format, err := cmd.Flags().GetString("format")
@@ -394,53 +362,11 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 	}
 	opts.WordWrap = !nowrap
-	// Fill out the rest of the options based on information about the
+	if !interactive {
-	// model.
+		return generate(cmd, opts)
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
 	name := args[0]
 	info, err := func() (*api.ShowResponse, error) {
 		showReq := &api.ShowRequest{Name: name}
 		info, err := client.Show(cmd.Context(), showReq)
 		var se api.StatusError
 		if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
 			if err := PullHandler(cmd, []string{name}); err != nil {
 				return nil, err
 			}
 			return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
 		}
 		return info, err
 	}()
 	if err != nil {
 		return err
 	}
 	opts.MultiModal = slices.Contains(info.Details.Families, "clip")
 	opts.ParentModel = info.Details.ParentModel
 	if interactive {
 		if err := loadModel(cmd, &opts); err != nil {
 			return err
 		}
 		for _, msg := range info.Messages {
 			switch msg.Role {
 			case "user":
 				fmt.Printf(">>> %s\n", msg.Content)
 			case "assistant":
 				state := &displayResponseState{}
 				displayResponse(msg.Content, opts.WordWrap, state)
 				fmt.Println()
 				fmt.Println()
 			}
 	}
 	return generateInteractive(cmd, opts)
 	}
 	return generate(cmd, opts)
 }
 func errFromUnknownKey(unknownKeyErr error) error {
@@ -653,6 +579,10 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}
 	if len(args) != 1 {
 		return errors.New("missing model name")
 	}
 	license, errLicense := cmd.Flags().GetBool("license")
 	modelfile, errModelfile := cmd.Flags().GetBool("modelfile")
 	parameters, errParams := cmd.Flags().GetBool("parameters")
@@ -695,6 +625,8 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
 	if flagsSet > 1 {
 		return errors.New("only one of '--license', '--modelfile', '--parameters', '--system', or '--template' can be specified")
 	} else if flagsSet == 0 {
 		return errors.New("one of '--license', '--modelfile', '--parameters', '--system', or '--template' must be specified")
 	}
 	req := api.ShowRequest{Name: args[0]}
@@ -703,7 +635,6 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}
 	if flagsSet == 1 {
 	switch showType {
 	case "license":
 		fmt.Println(resp.License)
@@ -718,124 +649,6 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
 	}
 	return nil
 	}
 	showInfo(resp)
 	return nil
 }
 func showInfo(resp *api.ShowResponse) {
 	arch := resp.ModelInfo["general.architecture"].(string)
 	modelData := [][]string{
 		{"arch", arch},
 		{"parameters", resp.Details.ParameterSize},
 		{"quantization", resp.Details.QuantizationLevel},
 		{"context length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64))},
 		{"embedding length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.embedding_length", arch)].(float64))},
 	}
 	mainTableData := [][]string{
 		{"Model"},
 		{renderSubTable(modelData, false)},
 	}
 	if resp.ProjectorInfo != nil {
 		projectorData := [][]string{
 			{"arch", "clip"},
 			{"parameters", format.HumanNumber(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))},
 		}
 		if projectorType, ok := resp.ProjectorInfo["clip.projector_type"]; ok {
 			projectorData = append(projectorData, []string{"projector type", projectorType.(string)})
 		}
 		projectorData = append(projectorData,
 			[]string{"embedding length", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.embedding_length"].(float64))},
 			[]string{"projection dimensionality", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.projection_dim"].(float64))},
 		)
 		mainTableData = append(mainTableData,
 			[]string{"Projector"},
 			[]string{renderSubTable(projectorData, false)},
 		)
 	}
 	if resp.Parameters != "" {
 		mainTableData = append(mainTableData, []string{"Parameters"}, []string{formatParams(resp.Parameters)})
 	}
 	if resp.System != "" {
 		mainTableData = append(mainTableData, []string{"System"}, []string{renderSubTable(twoLines(resp.System), true)})
 	}
 	if resp.License != "" {
 		mainTableData = append(mainTableData, []string{"License"}, []string{renderSubTable(twoLines(resp.License), true)})
 	}
 	table := tablewriter.NewWriter(os.Stdout)
 	table.SetAutoWrapText(false)
 	table.SetBorder(false)
 	table.SetAlignment(tablewriter.ALIGN_LEFT)
 	for _, v := range mainTableData {
 		table.Append(v)
 	}
 	table.Render()
 }
 func renderSubTable(data [][]string, file bool) string {
 	var buf bytes.Buffer
 	table := tablewriter.NewWriter(&buf)
 	table.SetAutoWrapText(!file)
 	table.SetBorder(false)
 	table.SetNoWhiteSpace(true)
 	table.SetTablePadding("\t")
 	table.SetAlignment(tablewriter.ALIGN_LEFT)
 	for _, v := range data {
 		table.Append(v)
 	}
 	table.Render()
 	renderedTable := buf.String()
 	lines := strings.Split(renderedTable, "\n")
 	for i, line := range lines {
 		lines[i] = "\t" + line
 	}
 	return strings.Join(lines, "\n")
 }
 func twoLines(s string) [][]string {
 	lines := strings.Split(s, "\n")
 	res := [][]string{}
 	count := 0
 	for _, line := range lines {
 		line = strings.TrimSpace(line)
 		if line != "" {
 			count++
 			res = append(res, []string{line})
 			if count == 2 {
 				return res
 			}
 		}
 	}
 	return res
 }
 func formatParams(s string) string {
 	lines := strings.Split(s, "\n")
 	table := [][]string{}
 	for _, line := range lines {
 		table = append(table, strings.Fields(line))
 	}
 	return renderSubTable(table, false)
 }
 func CopyHandler(cmd *cobra.Command, args []string) error {
@@ -916,6 +729,7 @@ type runOptions struct {
 	WordWrap    bool
 	Format      string
 	System      string
 	Template    string
 	Images      []api.ImageData
 	Options     map[string]interface{}
 	MultiModal  bool
@@ -932,6 +746,7 @@ func displayResponse(content string, wordWrap bool, state *displayResponseState)
 	if wordWrap && termWidth >= 10 {
 		for _, ch := range content {
 			if state.lineLength+1 > termWidth-5 {
 				if runewidth.StringWidth(state.wordBuffer) > termWidth-10 {
 					fmt.Printf("%s%c", state.wordBuffer, ch)
 					state.wordBuffer = ""
@@ -940,11 +755,7 @@ func displayResponse(content string, wordWrap bool, state *displayResponseState)
 				}
 				// backtrack the length of the last word and clear to the end of the line
-				a := runewidth.StringWidth(state.wordBuffer)
+				fmt.Printf("\x1b[%dD\x1b[K\n", runewidth.StringWidth(state.wordBuffer))
 				if a > 0 {
 					fmt.Printf("\x1b[%dD", a)
 				}
 				fmt.Printf("\x1b[K\n")
 				fmt.Printf("%s%c", state.wordBuffer, ch)
 				chWidth := runewidth.RuneWidth(ch)
@@ -1109,6 +920,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 		Images:    opts.Images,
 		Format:    opts.Format,
 		System:    opts.System,
 		Template:  opts.Template,
 		Options:   opts.Options,
 		KeepAlive: opts.KeepAlive,
 	}
@@ -1144,12 +956,18 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 	return nil
 }
-func RunServer(_ *cobra.Command, _ []string) error {
+func RunServer(cmd *cobra.Command, _ []string) error {
 	// retrieve the OLLAMA_HOST environment variable
 	ollamaHost, err := api.GetOllamaHost()
 	if err != nil {
 		return err
 	}
 	if err := initializeKeypair(); err != nil {
 		return err
 	}
-	ln, err := net.Listen("tcp", envconfig.Host().Host)
+	ln, err := net.Listen("tcp", net.JoinHostPort(ollamaHost.Host, ollamaHost.Port))
 	if err != nil {
 		return err
 	}
@@ -1208,6 +1026,24 @@ func initializeKeypair() error {
 	return nil
 }
 //nolint:unused
 func waitForServer(ctx context.Context, client *api.Client) error {
 	// wait for the server to start
 	timeout := time.After(5 * time.Second)
 	tick := time.Tick(500 * time.Millisecond)
 	for {
 		select {
 		case <-timeout:
 			return errors.New("timed out waiting for server to start")
 		case <-tick:
 			if err := client.Heartbeat(ctx); err == nil {
 				return nil // server has started
 			}
 		}
 	}
 }
 func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
@@ -1218,7 +1054,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
 			return err
 		}
 		if err := startApp(cmd.Context(), client); err != nil {
-			return errors.New("could not connect to ollama app, is it running?")
+			return fmt.Errorf("could not connect to ollama app, is it running?")
 		}
 	}
 	return nil
@@ -1414,10 +1250,7 @@ func NewCLI() *cobra.Command {
 				envVars["OLLAMA_NUM_PARALLEL"],
 				envVars["OLLAMA_NOPRUNE"],
 				envVars["OLLAMA_ORIGINS"],
 				envVars["OLLAMA_SCHED_SPREAD"],
 				envVars["OLLAMA_TMPDIR"],
 				envVars["OLLAMA_FLASH_ATTENTION"],
 				envVars["OLLAMA_LLM_LIBRARY"],
 			})
 		default:
 			appendEnvDocs(cmd, envs)
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -1,7 +1,6 @@
 package cmd
 import (
 	"cmp"
 	"errors"
 	"fmt"
 	"io"
@@ -9,15 +8,14 @@ import (
 	"os"
 	"path/filepath"
 	"regexp"
-	"slices"
+	"sort"
 	"strings"
 	"github.com/spf13/cobra"
-	"golang.org/x/exp/maps"
+	"golang.org/x/exp/slices"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/progress"
 	"github.com/ollama/ollama/readline"
 	"github.com/ollama/ollama/types/errtypes"
@@ -29,29 +27,74 @@ const (
 	MultilineNone MultilineState = iota
 	MultilinePrompt
 	MultilineSystem
 	MultilineTemplate
 )
 func loadModel(cmd *cobra.Command, opts *runOptions) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
 	p := progress.NewProgress(os.Stderr)
 	defer p.StopAndClear()
 	spinner := progress.NewSpinner("")
 	p.Add("", spinner)
-	client, err := api.ClientFromEnvironment()
+	showReq := api.ShowRequest{Name: opts.Model}
 	showResp, err := client.Show(cmd.Context(), &showReq)
 	if err != nil {
 		return err
 	}
 	opts.MultiModal = slices.Contains(showResp.Details.Families, "clip")
 	opts.ParentModel = showResp.Details.ParentModel
 	if len(showResp.Messages) > 0 {
 		opts.Messages = append(opts.Messages, showResp.Messages...)
 	}
 	chatReq := &api.ChatRequest{
 		Model:    opts.Model,
-		KeepAlive: opts.KeepAlive,
+		Messages: []api.Message{},
 	}
-	return client.Chat(cmd.Context(), chatReq, func(api.ChatResponse) error { return nil })
+	if opts.KeepAlive != nil {
 		chatReq.KeepAlive = opts.KeepAlive
 	}
 	err = client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
 		p.StopAndClear()
 		if len(opts.Messages) > 0 {
 			for _, msg := range opts.Messages {
 				switch msg.Role {
 				case "user":
 					fmt.Printf(">>> %s\n", msg.Content)
 				case "assistant":
 					state := &displayResponseState{}
 					displayResponse(msg.Content, opts.WordWrap, state)
 					fmt.Println()
 					fmt.Println()
 				}
 			}
 		}
 		return nil
 	})
 	if err != nil {
 		return err
 	}
 	return nil
 }
 func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 	opts.Messages = make([]api.Message, 0)
 	err := loadModel(cmd, &opts)
 	if err != nil {
 		return err
 	}
 	usage := func() {
 		fmt.Fprintln(os.Stderr, "Available Commands:")
 		fmt.Fprintln(os.Stderr, "  /set            Set session variables")
@@ -76,6 +119,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "Available Commands:")
 		fmt.Fprintln(os.Stderr, "  /set parameter ...     Set a parameter")
 		fmt.Fprintln(os.Stderr, "  /set system <string>   Set system message")
 		fmt.Fprintln(os.Stderr, "  /set template <string> Set prompt template")
 		fmt.Fprintln(os.Stderr, "  /set history           Enable history")
 		fmt.Fprintln(os.Stderr, "  /set nohistory         Disable history")
 		fmt.Fprintln(os.Stderr, "  /set wordwrap          Enable wordwrap")
@@ -121,7 +165,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "  /set parameter num_predict <int>      Max number of tokens to predict")
 		fmt.Fprintln(os.Stderr, "  /set parameter top_k <int>            Pick from top k num of tokens")
 		fmt.Fprintln(os.Stderr, "  /set parameter top_p <float>          Pick token based on sum of probabilities")
 		fmt.Fprintln(os.Stderr, "  /set parameter min_p <float>          Pick token based on top token probability * min_p")
 		fmt.Fprintln(os.Stderr, "  /set parameter num_ctx <int>          Set the context size")
 		fmt.Fprintln(os.Stderr, "  /set parameter temperature <float>    Set creativity level")
 		fmt.Fprintln(os.Stderr, "  /set parameter repeat_penalty <float> How strongly to penalize repetitions")
@@ -141,7 +184,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		return err
 	}
-	if envconfig.NoHistory() {
+	if envconfig.NoHistory {
 		scanner.HistoryDisable()
 	}
@@ -186,6 +229,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 				opts.Messages = append(opts.Messages, api.Message{Role: "system", Content: opts.System})
 				fmt.Println("Set system message.")
 				sb.Reset()
 			case MultilineTemplate:
 				opts.Template = sb.String()
 				fmt.Println("Set prompt template.")
 				sb.Reset()
 			}
 			multiline = MultilineNone
@@ -304,13 +351,17 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 					}
 					fmt.Printf("Set parameter '%s' to '%s'\n", args[2], strings.Join(params, ", "))
 					opts.Options[args[2]] = fp[args[2]]
-				case "system":
+				case "system", "template":
 					if len(args) < 3 {
 						usageSet()
 						continue
 					}
 					if args[1] == "system" {
 						multiline = MultilineSystem
 					} else if args[1] == "template" {
 						multiline = MultilineTemplate
 					}
 					line := strings.Join(args[2:], " ")
 					line, ok := strings.CutPrefix(line, `"""`)
@@ -330,6 +381,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 						continue
 					}
 					if args[1] == "system" {
 						opts.System = sb.String() // for display in modelfile
 						newMessage := api.Message{Role: "system", Content: sb.String()}
 						// Check if the slice is not empty and the last message is from 'system'
@@ -341,6 +393,11 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 						}
 						fmt.Println("Set system message.")
 						sb.Reset()
 					} else if args[1] == "template" {
 						opts.Template = sb.String()
 						fmt.Println("Set prompt template.")
 						sb.Reset()
 					}
 					sb.Reset()
 					continue
@@ -361,6 +418,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 				req := &api.ShowRequest{
 					Name:     opts.Model,
 					System:   opts.System,
 					Template: opts.Template,
 					Options:  opts.Options,
 				}
 				resp, err := client.Show(cmd.Context(), req)
@@ -371,7 +429,15 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 				switch args[1] {
 				case "info":
-					showInfo(resp)
+					fmt.Println("Model details:")
 					if len(resp.Details.Families) > 0 {
 						fmt.Printf("Family              %s\n", strings.Join(resp.Details.Families, ", "))
 					} else if resp.Details.Family != "" {
 						fmt.Printf("Family              %s\n", resp.Details.Family)
 					}
 					fmt.Printf("Parameter Size      %s\n", resp.Details.ParameterSize)
 					fmt.Printf("Quantization Level  %s\n", resp.Details.QuantizationLevel)
 					fmt.Println("")
 				case "license":
 					if resp.License == "" {
 						fmt.Println("No license was specified for this model.")
@@ -404,9 +470,12 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 						fmt.Println("No system message was specified for this model.")
 					}
 				case "template":
-					if resp.Template != "" {
+					switch {
 					case opts.Template != "":
 						fmt.Println(opts.Template + "\n")
 					case resp.Template != "":
 						fmt.Println(resp.Template)
-					} else {
+					default:
 						fmt.Println("No prompt template was specified for this model.")
 					}
 				default:
@@ -490,35 +559,35 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 }
 func buildModelfile(opts runOptions) string {
-	var f parser.File
+	var mf strings.Builder
-	f.Commands = append(f.Commands, parser.Command{Name: "model", Args: cmp.Or(opts.ParentModel, opts.Model)})
+	model := opts.ParentModel
-
+	if model == "" {
 		model = opts.Model
 	}
 	fmt.Fprintf(&mf, "FROM %s\n", model)
 	if opts.System != "" {
-		f.Commands = append(f.Commands, parser.Command{Name: "system", Args: opts.System})
+		fmt.Fprintf(&mf, "SYSTEM \"\"\"%s\"\"\"\n", opts.System)
 	}
-	keys := maps.Keys(opts.Options)
+	if opts.Template != "" {
-	slices.Sort(keys)
+		fmt.Fprintf(&mf, "TEMPLATE \"\"\"%s\"\"\"\n", opts.Template)
 	}
 	keys := make([]string, 0)
 	for k := range opts.Options {
 		keys = append(keys, k)
 	}
 	sort.Strings(keys)
 	for _, k := range keys {
-		v := opts.Options[k]
+		fmt.Fprintf(&mf, "PARAMETER %s %v\n", k, opts.Options[k])
 		var cmds []parser.Command
 		switch t := v.(type) {
 		case []string:
 			for _, s := range t {
 				cmds = append(cmds, parser.Command{Name: k, Args: s})
 			}
 		default:
 			cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", t)})
 		}
 		f.Commands = append(f.Commands, cmds...)
 	}
 	fmt.Fprintln(&mf)
 	for _, msg := range opts.Messages {
-		f.Commands = append(f.Commands, parser.Command{Name: "message", Args: fmt.Sprintf("%s: %s", msg.Role, msg.Content)})
+		fmt.Fprintf(&mf, "MESSAGE %s \"\"\"%s\"\"\"\n", msg.Role, msg.Content)
 	}
-	return f.String()
+	return mf.String()
 }
 func normalizeFilePath(fp string) string {
@@ -604,7 +673,7 @@ func getImageData(filePath string) ([]byte, error) {
 	// Check if the file size exceeds 100MB
 	var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
 	if info.Size() > maxSize {
-		return nil, errors.New("file size exceeds maximum limit (100MB)")
+		return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
 	}
 	buf = make([]byte, info.Size())
--- a/cmd/interactive_test.go
+++ b/cmd/interactive_test.go
@@ -1,9 +1,10 @@
 package cmd
 import (
 	"bytes"
 	"testing"
 	"text/template"
 	"github.com/google/go-cmp/cmp"
 	"github.com/stretchr/testify/assert"
 	"github.com/ollama/ollama/api"
@@ -57,51 +58,59 @@ func TestModelfileBuilder(t *testing.T) {
 	opts := runOptions{
 		Model:    "hork",
 		System:   "You are part horse and part shark, but all hork. Do horklike things",
 		Template: "This is a template.",
 		Messages: []api.Message{
 			{Role: "user", Content: "Hey there hork!"},
 			{Role: "assistant", Content: "Yes it is true, I am half horse, half shark."},
 		},
-		Options: map[string]any{
+		Options: map[string]interface{}{},
 			"temperature":      0.9,
 			"seed":             42,
 			"penalize_newline": false,
 			"stop":             []string{"hi", "there"},
 		},
 	}
-	t.Run("model", func(t *testing.T) {
+	opts.Options["temperature"] = 0.9
-		expect := `FROM hork
+	opts.Options["seed"] = 42
-SYSTEM You are part horse and part shark, but all hork. Do horklike things
+	opts.Options["penalize_newline"] = false
 	opts.Options["stop"] = []string{"hi", "there"}
 	mf := buildModelfile(opts)
 	expectedModelfile := `FROM {{.Model}}
 SYSTEM """{{.System}}"""
 TEMPLATE """{{.Template}}"""
 PARAMETER penalize_newline false
 PARAMETER seed 42
-PARAMETER stop hi
+PARAMETER stop [hi there]
 PARAMETER stop there
 PARAMETER temperature 0.9
-MESSAGE user Hey there hork!
+
-MESSAGE assistant Yes it is true, I am half horse, half shark.
+MESSAGE user """Hey there hork!"""
 MESSAGE assistant """Yes it is true, I am half horse, half shark."""
 `
-		actual := buildModelfile(opts)
+	tmpl, err := template.New("").Parse(expectedModelfile)
-		if diff := cmp.Diff(expect, actual); diff != "" {
+	assert.Nil(t, err)
-			t.Errorf("mismatch (-want +got):\n%s", diff)
+
-		}
+	var buf bytes.Buffer
-	})
+	err = tmpl.Execute(&buf, opts)
 	assert.Nil(t, err)
 	assert.Equal(t, buf.String(), mf)
 	t.Run("parent model", func(t *testing.T) {
 	opts.ParentModel = "horseshark"
-		expect := `FROM horseshark
+	mf = buildModelfile(opts)
-SYSTEM You are part horse and part shark, but all hork. Do horklike things
+	expectedModelfile = `FROM {{.ParentModel}}
 SYSTEM """{{.System}}"""
 TEMPLATE """{{.Template}}"""
 PARAMETER penalize_newline false
 PARAMETER seed 42
-PARAMETER stop hi
+PARAMETER stop [hi there]
 PARAMETER stop there
 PARAMETER temperature 0.9
-MESSAGE user Hey there hork!
+
-MESSAGE assistant Yes it is true, I am half horse, half shark.
+MESSAGE user """Hey there hork!"""
 MESSAGE assistant """Yes it is true, I am half horse, half shark."""
 `
-		actual := buildModelfile(opts)
+
-		if diff := cmp.Diff(expect, actual); diff != "" {
+	tmpl, err = template.New("").Parse(expectedModelfile)
-			t.Errorf("mismatch (-want +got):\n%s", diff)
+	assert.Nil(t, err)
-		}
+
-	})
+	var parentBuf bytes.Buffer
 	err = tmpl.Execute(&parentBuf, opts)
 	assert.Nil(t, err)
 	assert.Equal(t, parentBuf.String(), mf)
 }
--- a/cmd/start.go
+++ b/cmd/start.go
@@ -1,27 +0,0 @@
 //go:build darwin || windows
 package cmd
 import (
 	"context"
 	"errors"
 	"time"
 	"github.com/ollama/ollama/api"
 )
 func waitForServer(ctx context.Context, client *api.Client) error {
 	// wait for the server to start
 	timeout := time.After(5 * time.Second)
 	tick := time.Tick(500 * time.Millisecond)
 	for {
 		select {
 		case <-timeout:
 			return errors.New("timed out waiting for server to start")
 		case <-tick:
 			if err := client.Heartbeat(ctx); err == nil {
 				return nil // server has started
 			}
 		}
 	}
 }
--- a/cmd/start_darwin.go
+++ b/cmd/start_darwin.go
@@ -2,7 +2,7 @@ package cmd
 import (
 	"context"
-	"errors"
+	"fmt"
 	"os"
 	"os/exec"
 	"strings"
@@ -20,7 +20,7 @@ func startApp(ctx context.Context, client *api.Client) error {
 		return err
 	}
 	if !strings.Contains(link, "Ollama.app") {
-		return errors.New("could not find ollama app")
+		return fmt.Errorf("could not find ollama app")
 	}
 	path := strings.Split(link, "Ollama.app")
 	if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
--- a/cmd/start_default.go
+++ b/cmd/start_default.go
@@ -4,11 +4,11 @@ package cmd
 import (
 	"context"
-	"errors"
+	"fmt"
 	"github.com/ollama/ollama/api"
 )
 func startApp(ctx context.Context, client *api.Client) error {
-	return errors.New("could not connect to ollama server, run 'ollama serve' to start it")
+	return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it")
 }
--- a/cmd/start_windows.go
+++ b/cmd/start_windows.go
@@ -31,7 +31,7 @@ func startApp(ctx context.Context, client *api.Client) error {
 			// Finally look in the path
 			appExe, err = exec.LookPath(AppName)
 			if err != nil {
-				return errors.New("could not locate ollama app")
+				return fmt.Errorf("could not locate ollama app")
 			}
 		}
 	}
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -1,228 +1,200 @@
 package convert
 import (
 	"cmp"
 	"encoding/binary"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"io/fs"
 	"log/slog"
 	"os"
 	"path/filepath"
 	"slices"
 	"strings"
 	"google.golang.org/protobuf/proto"
 	"github.com/ollama/ollama/convert/sentencepiece"
 	"github.com/ollama/ollama/llm"
 )
-type ModelParameters struct {
+const (
 	_ int32 = iota
 	tokenTypeNormal
 	tokenTypeUnknown
 	tokenTypeControl
 	tokenTypeUserDefined
 	tokenTypeUnused
 	tokenTypeByte
 )
 type Params struct {
 	Architectures     []string `json:"architectures"`
-	VocabSize     uint32   `json:"vocab_size"`
+	VocabSize         int      `json:"vocab_size"`
 	HiddenSize        int      `json:"hidden_size"`       // n_embd
 	HiddenLayers      int      `json:"num_hidden_layers"` // n_layer
 	ContextSize       int      `json:"max_position_embeddings"`
 	IntermediateSize  int      `json:"intermediate_size"`
 	AttentionHeads    int      `json:"num_attention_heads"` // n_head
 	KeyValHeads       int      `json:"num_key_value_heads"`
 	NormEPS           float64  `json:"rms_norm_eps"`
 	BoSTokenID        int      `json:"bos_token_id"`
 	EoSTokenID        int      `json:"eos_token_id"`
 	HeadDimension     int      `json:"head_dim"`
 	PaddingTokenID    int      `json:"pad_token_id"`
 	RopeFrequencyBase float64  `json:"rope_theta"`
 	Experts     int `json:"num_local_experts"`
 	ExpertsUsed int `json:"num_experts_per_tok"`
 	PreTokenizer string
 	ByteOrder
 }
-type AdapterParameters struct {
+type ByteOrder interface {
-	Alpha          uint32 `json:"lora_alpha"`
+	binary.ByteOrder
-	LoraLayers     uint32 `json:"lora_layers"`
+	binary.AppendByteOrder
 	LoraParameters struct {
 		Rank  uint32  `json:"rank"`
 		Alpha float32 `json:"alpha"`
 		Scale float32 `json:"scale"`
 	} `json:"lora_parameters"`
 }
-func (ModelParameters) KV(t *Tokenizer) llm.KV {
+type ModelArch interface {
-	kv := llm.KV{
+	GetTensors() error
-		"general.file_type":            uint32(1),
+	LoadVocab() error
-		"general.quantization_version": uint32(2),
+	WriteGGUF(io.WriteSeeker) error
 		"tokenizer.ggml.pre":           t.Pre,
 		"tokenizer.ggml.model":         t.Vocabulary.Model,
 		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
 		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
 		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
 	}
 	if len(t.Merges) > 0 {
 		kv["tokenizer.ggml.merges"] = t.Merges
 	}
 	if t.Template != "" {
 		kv["tokenizer.chat_template"] = t.Template
 	}
 	for _, sv := range t.SpecialVocabulary {
 		kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
 		kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
 	}
 	return kv
 }
-func (p AdapterParameters) KV() llm.KV {
+type ModelFormat interface {
-	var alpha float32
+	GetLayerName(string) (string, error)
-	if p.LoraParameters.Alpha == 0 {
+	GetTensors(string, *Params) ([]llm.Tensor, error)
-		alpha = float32(p.Alpha)
+	GetParams(string) (*Params, error)
-	} else {
+	GetModelArch(string, string, *Params) (ModelArch, error)
 		alpha = p.LoraParameters.Alpha
 	}
 	kv := llm.KV{
 		"adapter.lora.alpha": alpha,
 		"adapter.type":       "lora",
 		"general.file_type":  uint32(1),
 		"general.type":       "adapter",
 		"general.version":    "v0.2",
 	}
 	return kv
 }
-func (ModelParameters) specialTokenTypes() []string {
+type ModelData struct {
-	return []string{
+	Path    string
-		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
+	Name    string
-	}
+	Params  *Params
 	Vocab   *Vocab
 	Tensors []llm.Tensor
 	Format  ModelFormat
 }
-func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
+func GetModelFormat(dirname string) (ModelFormat, error) {
-	return llm.WriteGGUF(ws, kv, ts)
+	files, err := filepath.Glob(filepath.Join(dirname, "*"))
 }
 func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
 	return llm.WriteGGUF(ws, kv, ts)
 }
 type ModelConverter interface {
 	// KV maps parameters to LLM key-values
 	KV(*Tokenizer) llm.KV
 	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
 	Tensors([]Tensor) []llm.Tensor
 	// Replacements returns a list of string pairs to replace in tensor names.
 	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
 	Replacements() []string
 	// specialTokenTypes returns any special token types the model uses
 	specialTokenTypes() []string
 	// writeFile writes the model to the provided io.WriteSeeker
 	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
 }
 type moreParser interface {
 	parseMore(fs.FS) error
 }
 type AdapterConverter interface {
 	// KV maps parameters to LLM key-values
 	KV(llm.KV) llm.KV
 	// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
 	Tensors([]Tensor) []llm.Tensor
 	// Replacements returns a list of string pairs to replace in tensor names.
 	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
 	Replacements() []string
 	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
 }
 func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
 	bts, err := fs.ReadFile(fsys, "adapter_config.json")
 	if err != nil {
-		return err
+		return nil, err
 	}
-	var p AdapterParameters
+	for _, fn := range files {
-	if err := json.Unmarshal(bts, &p); err != nil {
+		if strings.HasSuffix(fn, ".safetensors") {
-		return err
+			return &SafetensorFormat{}, nil
 		} else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") {
 			slog.Debug("model is torch")
 			return &TorchFormat{}, nil
 		}
 	}
-	arch, ok := baseKV["general.architecture"]
+	return nil, fmt.Errorf("couldn't determine model format")
-	if !ok {
+}
-		return errors.New("architecture not set for the base model")
+
 // Details on gguf's tokenizer can be found at:
 // https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer
 type Vocab struct {
 	Tokens []string
 	Scores []float32
 	Types  []int32
 	Merges []string
 }
 func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
 	slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model")))
 	in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model"))
 	if err != nil {
 		return nil, err
 	}
-	var conv AdapterConverter
+	// To regenerate sentencepiece from the protobufs use:
-	switch arch {
+	// protoc -I=./ --go_out=./ sentencepiece_model.proto
-	case "llama":
+	modelProto := &sentencepiece.ModelProto{}
-		conv = &llamaAdapter{}
+	if err := proto.Unmarshal(in, modelProto); err != nil {
-	case "gemma2":
+		return nil, err
-		conv = &gemma2Adapter{}
+	}
 	v := &Vocab{
 		Tokens: make([]string, 0),
 		Scores: make([]float32, 0),
 		Types:  make([]int32, 0),
 	}
 	pieces := modelProto.GetPieces()
 	for _, p := range pieces {
 		v.Tokens = append(v.Tokens, p.GetPiece())
 		v.Scores = append(v.Scores, p.GetScore())
 		t := p.GetType()
 		switch t {
 		case sentencepiece.ModelProto_SentencePiece_UNKNOWN:
 		case sentencepiece.ModelProto_SentencePiece_CONTROL:
 		case sentencepiece.ModelProto_SentencePiece_UNUSED:
 		case sentencepiece.ModelProto_SentencePiece_BYTE:
 		default:
-		return errors.New("unsupported architecture")
+			t = sentencepiece.ModelProto_SentencePiece_NORMAL
 		}
 		v.Types = append(v.Types, int32(t))
 	}
-	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
+	slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens)))
-	if err != nil {
+
-		return err
+	// add any additional tokens
 	addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json"))
 	if os.IsNotExist(err) {
 		return v, nil
 	} else if err != nil {
 		return nil, err
 	}
-	if err := json.Unmarshal(bts, conv); err != nil {
+	slog.Info("reading user defined tokens")
-		return err
+
 	var extraTokenData map[string]int
 	if err := json.Unmarshal(addIn, &extraTokenData); err != nil {
 		return nil, err
 	}
-	return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
+	type token struct {
-}
+		key string
-
+		pos int
-// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
+	}
-// and files it finds in the input path.
+
-// Supported input model formats include safetensors.
+	extraTokens := make([]token, 0)
-// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
+	for k, id := range extraTokenData {
-func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
+		extraTokens = append(extraTokens, token{k, id})
-	bts, err := fs.ReadFile(fsys, "config.json")
+	}
-	if err != nil {
+
-		return err
+	slices.SortFunc(extraTokens, func(a, b token) int {
-	}
+		return cmp.Compare(a.pos, b.pos)
-
+	})
-	var p ModelParameters
+
-	if err := json.Unmarshal(bts, &p); err != nil {
+	numToks := len(v.Tokens)
-		return err
+
-	}
+	for cnt, t := range extraTokens {
-
+		// the token id should match the specific index for the total number of tokens
-	if len(p.Architectures) < 1 {
+		if t.pos != cnt+numToks {
-		return errors.New("unknown architecture")
+			return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key)
-	}
+		}
-
+		v.Tokens = append(v.Tokens, t.key)
-	var conv ModelConverter
+		v.Scores = append(v.Scores, -1000.0)
-	switch p.Architectures[0] {
+		v.Types = append(v.Types, tokenTypeUserDefined)
-	case "LlamaForCausalLM", "MistralForCausalLM":
+	}
-		conv = &llamaModel{}
+	slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))
-	case "MixtralForCausalLM":
+
-		conv = &mixtralModel{}
+	if params.VocabSize > len(v.Tokens) {
-	case "GemmaForCausalLM":
+		missingTokens := params.VocabSize - len(v.Tokens)
-		conv = &gemmaModel{}
+		slog.Warn(fmt.Sprintf("vocab is missing %d tokens", missingTokens))
-	case "Gemma2ForCausalLM":
+		for cnt := 0; cnt < missingTokens; cnt++ {
-		conv = &gemma2Model{}
+			v.Tokens = append(v.Tokens, fmt.Sprintf("<dummy%05d>", cnt+1))
-	case "Phi3ForCausalLM":
+			v.Scores = append(v.Scores, -1)
-		conv = &phi3Model{}
+			v.Types = append(v.Types, tokenTypeUserDefined)
-	case "BertModel":
+		}
-		conv = &bertModel{}
+	}
-	default:
+
-		return errors.New("unsupported architecture")
+	return v, nil
 	}
 	if err := json.Unmarshal(bts, conv); err != nil {
 		return err
 	}
 	if t, ok := conv.(moreParser); ok {
 		if err := t.parseMore(fsys); err != nil {
 			return err
 		}
 	}
 	t, err := parseTokenizer(fsys, conv.specialTokenTypes())
 	if err != nil {
 		return err
 	}
 	if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) {
 		slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens))
 		for i := range vocabSize - len(t.Vocabulary.Tokens) {
 			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
 			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
 			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
 		}
 	} else {
 		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
 	}
 	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
 	if err != nil {
 		return err
 	}
 	return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
 }
--- a/convert/convert_bert.go
+++ b/convert/convert_bert.go
@@ -1,174 +0,0 @@
 package convert
 import (
 	"cmp"
 	"encoding/json"
 	"io/fs"
 	"path/filepath"
 	"slices"
 	"strings"
 	"github.com/ollama/ollama/llm"
 )
 type bertModel struct {
 	ModelParameters
 	NLayers               uint32  `json:"n_layers"`
 	NumHiddenLayers       uint32  `json:"num_hidden_layers"`
 	NLayer                uint32  `json:"n_layer"`
 	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
 	NCtx                  uint32  `json:"n_ctx"`
 	HiddenSize            uint32  `json:"hidden_size"`
 	NEmbd                 uint32  `json:"n_embd"`
 	IntermediateSize      uint32  `json:"intermediate_size"`
 	NInner                uint32  `json:"n_inner"`
 	NumAttentionHeads     uint32  `json:"num_attention_heads"`
 	NHead                 uint32  `json:"n_head"`
 	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
 	LayerNormEPS          float32 `json:"layer_norm_eps"`
 	LayerNormEpsilon      float32 `json:"layer_norm_epsilon"`
 	NormEpsilon           float32 `json:"norm_epsilon"`
 	PoolingType uint32
 }
 var (
 	_ ModelConverter = (*bertModel)(nil)
 	_ moreParser     = (*bertModel)(nil)
 )
 func (p *bertModel) parseMore(fsys fs.FS) error {
 	bts, err := fs.ReadFile(fsys, "modules.json")
 	if err != nil {
 		return err
 	}
 	var modules []struct {
 		Type string `json:"type"`
 		Path string `json:"path"`
 	}
 	if err := json.Unmarshal(bts, &modules); err != nil {
 		return err
 	}
 	var pooling string
 	for _, m := range modules {
 		if m.Type == "sentence_transformers.models.Pooling" {
 			pooling = m.Path
 			break
 		}
 	}
 	if pooling != "" {
 		bts, err := fs.ReadFile(fsys, filepath.Join(pooling, "config.json"))
 		if err != nil {
 			return err
 		}
 		var pc struct {
 			PoolingModeCLSToken   bool `json:"pooling_mode_cls_token"`
 			PoolingModeMeanTokens bool `json:"pooling_mode_mean_tokens"`
 		}
 		if err := json.Unmarshal(bts, &pc); err != nil {
 			return err
 		}
 		if pc.PoolingModeMeanTokens {
 			p.PoolingType = 1
 		} else if pc.PoolingModeCLSToken {
 			p.PoolingType = 2
 		}
 	}
 	return nil
 }
 func (p *bertModel) KV(t *Tokenizer) llm.KV {
 	kv := p.ModelParameters.KV(t)
 	kv["general.architecture"] = "bert"
 	kv["bert.attention.causal"] = false
 	kv["bert.pooling_type"] = p.PoolingType
 	kv["bert.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
 	if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
 		kv["bert.context_length"] = contextLength
 	}
 	if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
 		kv["bert.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
 	}
 	if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
 		kv["bert.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
 	}
 	if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
 		kv["bert.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
 	}
 	if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
 		kv["bert.attention.layer_norm_epsilon"] = layerNormEpsilon
 	}
 	kv["tokenizer.ggml.model"] = "bert"
 	kv["tokenizer.ggml.token_type_count"] = uint32(2)
 	// convert to phantom space tokens
 	for i, e := range t.Tokens {
 		if strings.HasPrefix(e, "[") && strings.HasSuffix(e, "]") {
 			// noop
 		} else if strings.HasPrefix(e, "##") {
 			t.Tokens[i] = e[2:]
 		} else {
 			t.Tokens[i] = "\u2581" + e
 		}
 	}
 	kv["tokenizer.ggml.tokens"] = t.Tokens
 	return kv
 }
 func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
 	var out []llm.Tensor
 	for _, t := range ts {
 		if slices.Contains([]string{
 			"embeddings.position_ids",
 			"pooler.dense.weight",
 			"pooler.dense.bias",
 		}, t.Name()) {
 			continue
 		}
 		out = append(out, llm.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (bertModel) Replacements() []string {
 	return []string{
 		"encoder.layer", "blk",
 		"encoder.layers", "blk",
 		"embeddings.word_embeddings", "token_embd",
 		"embeddings.token_type_embeddings", "token_types",
 		"embeddings.LayerNorm", "token_embd_norm",
 		"embeddings.position_embeddings", "position_embd",
 		"attention.self.query", "attn_q",
 		"attention.self.key", "attn_k",
 		"attention.self.value", "attn_v",
 		"attention.output.dense", "attn_output",
 		"attention.output.LayerNorm", "attn_output_norm",
 		"intermediate.dense", "ffn_up",
 		"output.dense", "ffn_down",
 		"output.LayerNorm", "layer_output_norm",
 	}
 }
--- a/convert/convert_gemma.go
+++ b/convert/convert_gemma.go
@@ -1,100 +0,0 @@
 package convert
 import (
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type gemmaModel struct {
 	ModelParameters
 	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
 	HiddenSize            uint32  `json:"hidden_size"`
 	HiddenLayers          uint32  `json:"num_hidden_layers"`
 	IntermediateSize      uint32  `json:"intermediate_size"`
 	NumAttentionHeads     uint32  `json:"num_attention_heads"`
 	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
 	RMSNormEPS            float32 `json:"rms_norm_eps"`
 	HeadDim               uint32  `json:"head_dim"`
 }
 var _ ModelConverter = (*gemmaModel)(nil)
 func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
 	kv := p.ModelParameters.KV(t)
 	kv["general.architecture"] = "gemma"
 	kv["gemma.context_length"] = p.MaxPositionEmbeddings
 	kv["gemma.embedding_length"] = p.HiddenSize
 	kv["gemma.block_count"] = p.HiddenLayers
 	kv["gemma.feed_forward_length"] = p.IntermediateSize
 	kv["gemma.attention.head_count"] = p.NumAttentionHeads
 	kv["gemma.attention.head_count_kv"] = p.NumKeyValueHeads
 	kv["gemma.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
 	kv["gemma.attention.key_length"] = p.HeadDim
 	kv["gemma.attention.value_length"] = p.HeadDim
 	kv["tokenizer.ggml.eot_token_id"] = uint32(107)
 	kv["tokenizer.ggml.middle_token_id"] = uint32(68)
 	kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
 	kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
 	return kv
 }
 func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor {
 	var out []llm.Tensor
 	for _, t := range ts {
 		if strings.HasSuffix(t.Name(), "_norm.weight") {
 			t.SetRepacker(p.addOne)
 		}
 		out = append(out, llm.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (p *gemmaModel) Replacements() []string {
 	return []string{
 		"model.embed_tokens", "token_embd",
 		"model.norm", "output_norm",
 		"model.layers", "blk",
 		"input_layernorm", "attn_norm",
 		"self_attn.q_proj", "attn_q",
 		"self_attn.k_proj", "attn_k",
 		"self_attn.v_proj", "attn_v",
 		"self_attn.o_proj", "attn_output",
 		"mlp.gate_proj", "ffn_gate",
 		"mlp.down_proj", "ffn_down",
 		"mlp.up_proj", "ffn_up",
 		"post_attention_layernorm", "ffn_norm",
 	}
 }
 func (*gemmaModel) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
 	n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
 	ones := tensor.Ones(tensor.Float32, int(shape[0]))
 	n, err := n.Add(ones)
 	if err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 0)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
--- a/convert/convert_gemma2.go
+++ b/convert/convert_gemma2.go
@@ -1,43 +0,0 @@
 package convert
 import (
 	"github.com/ollama/ollama/llm"
 )
 type gemma2Model struct {
 	gemmaModel
 	SlidingWindow         uint32  `json:"sliding_window"`
 	AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
 	FinalLogitSoftcap     float32 `json:"final_logit_softcapping"`
 }
 func (p *gemma2Model) KV(t *Tokenizer) llm.KV {
 	kv := p.ModelParameters.KV(t)
 	kv["general.architecture"] = "gemma2"
 	kv["gemma2.context_length"] = p.MaxPositionEmbeddings
 	kv["gemma2.embedding_length"] = p.HiddenSize
 	kv["gemma2.block_count"] = p.HiddenLayers
 	kv["gemma2.feed_forward_length"] = p.IntermediateSize
 	kv["gemma2.attention.head_count"] = p.NumAttentionHeads
 	kv["gemma2.attention.head_count_kv"] = p.NumKeyValueHeads
 	kv["gemma2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
 	kv["gemma2.attention.key_length"] = p.HeadDim
 	kv["gemma2.attention.value_length"] = p.HeadDim
 	kv["gemma2.attention.sliding_window"] = p.SlidingWindow
 	kv["gemma2.attn_logit_softcapping"] = p.AttentionLogitSoftcap
 	kv["gemma2.final_logit_softcapping"] = p.FinalLogitSoftcap
 	kv["tokenizer.ggml.eot_token_id"] = uint32(107)
 	kv["tokenizer.ggml.middle_token_id"] = uint32(68)
 	kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
 	kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
 	return kv
 }
 func (p *gemma2Model) Replacements() []string {
 	return append(
 		p.gemmaModel.Replacements(),
 		"post_attention_layernorm", "post_attention_norm",
 		"pre_feedforward_layernorm", "ffn_norm",
 		"post_feedforward_layernorm", "post_ffw_norm",
 	)
 }
--- a/convert/convert_gemma2_adapter.go
+++ b/convert/convert_gemma2_adapter.go
@@ -1,91 +0,0 @@
 package convert
 import (
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type gemma2Adapter struct {
 	AdapterParameters
 }
 var _ AdapterConverter = (*gemma2Adapter)(nil)
 func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV {
 	kv := p.AdapterParameters.KV()
 	kv["general.architecture"] = "gemma2"
 	return kv
 }
 func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
 	var out []llm.Tensor
 	for _, t := range ts {
 		shape := t.Shape()
 		if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
 			(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
 			shape[0], shape[1] = shape[1], shape[0]
 			t.SetRepacker(p.repack)
 		}
 		out = append(out, llm.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (p *gemma2Adapter) Replacements() []string {
 	return []string{
 		"base_model.model.", "",
 		"model.layers", "blk",
 		"self_attn.q_proj", "attn_q",
 		"self_attn.k_proj", "attn_k",
 		"self_attn.v_proj", "attn_v",
 		"self_attn.o_proj", "attn_output",
 		"mlp.gate_proj", "ffn_gate",
 		"mlp.down_proj", "ffn_down",
 		"mlp.up_proj", "ffn_up",
 		"lora_A.weight", "weight.lora_a",
 		"lora_B.weight", "weight.lora_b",
 		"lora_a", "weight.lora_a",
 		"lora_b", "weight.lora_b",
 	}
 }
 func (p *gemma2Adapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	dims := []int{int(shape[1]), int(shape[0])}
 	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
 	if err := n.T(1, 0); err != nil {
 		return nil, err
 	}
 	if err := n.Reshape(dims...); err != nil {
 		return nil, err
 	}
 	if err := n.Transpose(); err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 1)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
--- a/convert/convert_llama.go
+++ b/convert/convert_llama.go
@@ -1,213 +0,0 @@
 package convert
 import (
 	"cmp"
 	"fmt"
 	"math"
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type llamaModel struct {
 	ModelParameters
 	NLayers               uint32  `json:"n_layers"`
 	NumHiddenLayers       uint32  `json:"num_hidden_layers"`
 	NLayer                uint32  `json:"n_layer"`
 	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
 	NCtx                  uint32  `json:"n_ctx"`
 	HiddenSize            uint32  `json:"hidden_size"`
 	NEmbd                 uint32  `json:"n_embd"`
 	IntermediateSize      uint32  `json:"intermediate_size"`
 	NInner                uint32  `json:"n_inner"`
 	NumAttentionHeads     uint32  `json:"num_attention_heads"`
 	NHead                 uint32  `json:"n_head"`
 	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
 	RopeTheta             float32 `json:"rope_theta"`
 	RopeScaling           struct {
 		Type                            string  `json:"type"`
 		RopeType                        string  `json:"rope_type"`
 		Factor                          float32 `json:"factor"`
 		LowFrequencyFactor              float32 `json:"low_freq_factor"`
 		HighFrequencyFactor             float32 `json:"high_freq_factor"`
 		OriginalMaxPositionalEmbeddings uint32  `json:"original_max_positional_embeddings"`
 		factors ropeFactor
 	} `json:"rope_scaling"`
 	RMSNormEPS       float32 `json:"rms_norm_eps"`
 	LayerNormEPS     float32 `json:"layer_norm_eps"`
 	LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
 	NormEpsilon      float32 `json:"norm_epsilon"`
 	HeadDim          uint32  `json:"head_dim"`
 }
 var _ ModelConverter = (*llamaModel)(nil)
 func (p *llamaModel) KV(t *Tokenizer) llm.KV {
 	kv := p.ModelParameters.KV(t)
 	kv["general.architecture"] = "llama"
 	kv["llama.vocab_size"] = p.VocabSize
 	kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
 	if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
 		kv["llama.context_length"] = contextLength
 	}
 	if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
 		kv["llama.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
 	}
 	if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
 		kv["llama.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
 	}
 	if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
 		kv["llama.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
 		kv["llama.rope.dimension_count"] = p.HiddenSize / headCount
 	}
 	if p.RopeTheta > 0 {
 		kv["llama.rope.freq_base"] = p.RopeTheta
 	}
 	if p.RopeScaling.Type == "linear" {
 		kv["llama.rope.scaling.type"] = p.RopeScaling.Type
 		kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
 	} else if p.RopeScaling.RopeType == "llama3" {
 		dim := p.HiddenSize / p.NumAttentionHeads
 		for i := uint32(0); i < dim; i += 2 {
 			factor := cmp.Or(p.RopeScaling.Factor, 8.0)
 			factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
 			factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
 			original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192)
 			lambdaLow := float32(original) / factorLow
 			lambdaHigh := float32(original) / factorHigh
 			lambda := 2 * math.Pi * math.Pow(float64(p.RopeTheta), float64(i)/float64(dim))
 			if lambda < float64(lambdaHigh) {
 				p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0)
 			} else if lambda > float64(lambdaLow) {
 				p.RopeScaling.factors = append(p.RopeScaling.factors, factor)
 			} else {
 				smooth := (float32(original)/float32(lambda) - factorLow) / (factorHigh - factorLow)
 				p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0/((1-smooth)/factor+smooth))
 			}
 		}
 	}
 	if p.NumKeyValueHeads > 0 {
 		kv["llama.attention.head_count_kv"] = p.NumKeyValueHeads
 	}
 	if p.RMSNormEPS > 0 {
 		kv["llama.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
 	}
 	if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
 		kv["llama.attention.layer_norm_epsilon"] = layerNormEpsilon
 	}
 	if p.HeadDim > 0 {
 		kv["llama.attention.key_length"] = p.HeadDim
 		kv["llama.attention.value_length"] = p.HeadDim
 	}
 	return kv
 }
 func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
 	var out []llm.Tensor
 	if p.RopeScaling.factors != nil {
 		out = append(out, llm.Tensor{
 			Name:     "rope_freqs.weight",
 			Kind:     0,
 			Shape:    []uint64{uint64(len(p.RopeScaling.factors))},
 			WriterTo: p.RopeScaling.factors,
 		})
 	}
 	for _, t := range ts {
 		if strings.HasSuffix(t.Name(), "attn_q.weight") ||
 			strings.HasSuffix(t.Name(), "attn_k.weight") {
 			t.SetRepacker(p.repack)
 		}
 		out = append(out, llm.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (p *llamaModel) Replacements() []string {
 	return []string{
 		"lm_head", "output",
 		"model.embed_tokens", "token_embd",
 		"model.norm", "output_norm",
 		"model.layers", "blk",
 		"input_layernorm", "attn_norm",
 		"self_attn.q_proj", "attn_q",
 		"self_attn.k_proj", "attn_k",
 		"self_attn.v_proj", "attn_v",
 		"self_attn.o_proj", "attn_output",
 		"mlp.gate_proj", "ffn_gate",
 		"mlp.down_proj", "ffn_down",
 		"mlp.up_proj", "ffn_up",
 		"post_attention_layernorm", "ffn_norm",
 	}
 }
 func (p *llamaModel) repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	var dims []int
 	for _, dim := range shape {
 		dims = append(dims, int(dim))
 	}
 	var heads uint32
 	if strings.HasSuffix(name, "attn_q.weight") {
 		heads = p.NumAttentionHeads
 	} else if strings.HasSuffix(name, "attn_k.weight") {
 		heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
 	} else {
 		return nil, fmt.Errorf("unknown tensor for repack: %s", name)
 	}
 	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
 	if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
 		return nil, err
 	}
 	if err := n.T(0, 2, 1, 3); err != nil {
 		return nil, err
 	}
 	if err := n.Reshape(dims...); err != nil {
 		return nil, err
 	}
 	if err := n.Transpose(); err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 1)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
--- a/convert/convert_llama_adapter.go
+++ b/convert/convert_llama_adapter.go
@@ -1,169 +0,0 @@
 package convert
 import (
 	"cmp"
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type llamaAdapter struct {
 	AdapterParameters
 	NumAttentionHeads uint32 `json:"num_attention_heads"`
 	NumKeyValueHeads  uint32 `json:"num_key_value_heads"`
 }
 var _ AdapterConverter = (*llamaAdapter)(nil)
 func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
 	kv := p.AdapterParameters.KV()
 	kv["general.architecture"] = "llama"
 	kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
 	kv["llama.attention.head_count_kv"] = baseKV["llama.attention.head_count_kv"]
 	p.NumAttentionHeads = baseKV["llama.attention.head_count"].(uint32)
 	return kv
 }
 func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
 	var out []llm.Tensor
 	for _, t := range ts {
 		shape := t.Shape()
 		if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
 			(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
 			shape[0], shape[1] = shape[1], shape[0]
 			t.SetRepacker(p.repackAndTranspose)
 		} else {
 			t.SetRepacker(p.repack)
 		}
 		out = append(out, llm.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    shape,
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (p *llamaAdapter) Replacements() []string {
 	return []string{
 		"base_model.model.", "",
 		"model.layers", "blk",
 		"self_attn.q_proj", "attn_q",
 		"self_attn.k_proj", "attn_k",
 		"self_attn.v_proj", "attn_v",
 		"self_attn.o_proj", "attn_output",
 		"mlp.gate_proj", "ffn_gate",
 		"mlp.down_proj", "ffn_down",
 		"mlp.up_proj", "ffn_up",
 		"lora_A.weight", "weight.lora_a",
 		"lora_B.weight", "weight.lora_b",
 		"lora_a", "weight.lora_a",
 		"lora_b", "weight.lora_b",
 	}
 }
 func (p *llamaAdapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	dims := []int{int(shape[1]), int(shape[0])}
 	var heads uint32
 	if strings.HasSuffix(name, "attn_q.weight.lora_a") {
 		heads = p.NumAttentionHeads
 	} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
 		heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
 	} else {
 		return data, nil
 	}
 	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
 	if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
 		return nil, err
 	}
 	if err := n.T(0, 2, 1, 3); err != nil {
 		return nil, err
 	}
 	if err := n.Reshape(dims...); err != nil {
 		return nil, err
 	}
 	if err := n.Transpose(); err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 1)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
 func (p *llamaAdapter) repackAndTranspose(name string, data []float32, shape []uint64) ([]float32, error) {
 	dims := []int{int(shape[1]), int(shape[0])}
 	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
 	var heads uint32
 	if strings.HasSuffix(name, "attn_q.weight.lora_a") {
 		heads = p.NumAttentionHeads
 	} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
 		heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
 	}
 	if heads > 0 {
 		if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
 			return nil, err
 		}
 		if err := n.T(0, 2, 1, 3); err != nil {
 			return nil, err
 		}
 		if err := n.Reshape(dims...); err != nil {
 			return nil, err
 		}
 		if err := n.Transpose(); err != nil {
 			return nil, err
 		}
 	}
 	if err := n.T(1, 0); err != nil {
 		return nil, err
 	}
 	if err := n.Reshape(dims...); err != nil {
 		return nil, err
 	}
 	if err := n.Transpose(); err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 1)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
--- a/convert/convert_mixtral.go
+++ b/convert/convert_mixtral.go
@@ -1,94 +0,0 @@
 package convert
 import (
 	"fmt"
 	"io"
 	"slices"
 	"strings"
 	"github.com/ollama/ollama/llm"
 )
 type mixtralModel struct {
 	llamaModel
 	NumLocalExperts    uint32 `json:"num_local_experts"`
 	NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
 }
 func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
 	kv := p.llamaModel.KV(t)
 	if p.NumLocalExperts > 0 {
 		kv["llama.expert_count"] = p.NumLocalExperts
 	}
 	if p.NumExpertsPerToken > 0 {
 		kv["llama.expert_used_count"] = p.NumExpertsPerToken
 	}
 	return kv
 }
 func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
 	oldnew := []string{
 		"model.layers", "blk",
 		"w1", "ffn_gate_exps",
 		"w2", "ffn_down_exps",
 		"w3", "ffn_up_exps",
 	}
 	for i := range p.NumLocalExperts {
 		oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".")
 	}
 	// group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor
 	namer := strings.NewReplacer(oldnew...)
 	experts := make(map[string]experts)
 	// merge experts into a single tensor while removing them from ts
 	ts = slices.DeleteFunc(ts, func(t Tensor) bool {
 		if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") {
 			return false
 		}
 		name := namer.Replace(t.Name())
 		experts[name] = append(experts[name], t)
 		return true
 	})
 	var out []llm.Tensor
 	for n, e := range experts {
 		// TODO(mxyng): sanity check experts
 		out = append(out, llm.Tensor{
 			Name:     n,
 			Kind:     e[0].Kind(),
 			Shape:    append([]uint64{uint64(len(e))}, e[0].Shape()...),
 			WriterTo: e,
 		})
 	}
 	return append(out, p.llamaModel.Tensors(ts)...)
 }
 func (p *mixtralModel) Replacements() []string {
 	return append(
 		p.llamaModel.Replacements(),
 		"block_sparse_moe.gate", "ffn_gate_inp",
 	)
 }
 type experts []Tensor
 func (e experts) WriteTo(w io.Writer) (int64, error) {
 	// TODO(mxyng): experts _should_ be numerically sorted by expert but this should check
 	for _, t := range e {
 		// the canonical merged experts tensor stacks all experts along a new, 0 axis,
 		// e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers
 		// this accomplishes the same thing by writing each expert tensor in sequence
 		if _, err := t.WriteTo(w); err != nil {
 			return 0, err
 		}
 	}
 	return 0, nil
 }
--- a/convert/convert_phi3.go
+++ b/convert/convert_phi3.go
@@ -1,123 +0,0 @@
 package convert
 import (
 	"cmp"
 	"encoding/binary"
 	"io"
 	"math"
 	"strings"
 	"sync"
 	"github.com/ollama/ollama/llm"
 )
 type phi3Model struct {
 	ModelParameters
 	NumHiddenLayers   uint32  `json:"num_hidden_layers"`
 	NLayers           uint32  `json:"n_layers"`
 	HiddenSize        uint32  `json:"hidden_size"`
 	NEmbd             uint32  `json:"n_embd"`
 	IntermediateSize  uint32  `json:"intermediate_size"`
 	NumAttentionHeads uint32  `json:"num_attention_heads"`
 	NHead             uint32  `json:"n_head"`
 	NumKeyValueHeads  uint32  `json:"num_key_value_heads"`
 	NHeadKV           uint32  `json:"n_head_kv"`
 	RopeTheta         float32 `json:"rope_theta"`
 	RopeScaling       struct {
 		Type        string     `json:"type"`
 		LongFactor  ropeFactor `json:"long_factor"`
 		ShortFactor ropeFactor `json:"short_factor"`
 	} `json:"rope_scaling"`
 	RMSNormEPS                    float32 `json:"rms_norm_eps"`
 	NPositions                    uint32  `json:"n_positions"`
 	MaxPositionEmbeddings         uint32  `json:"max_position_embeddings"`
 	OriginalMaxPositionEmbeddings uint32  `json:"original_max_position_embeddings"`
 	SlidingWindow                 uint32  `json:"sliding_window"`
 }
 var _ ModelConverter = (*phi3Model)(nil)
 func (p *phi3Model) KV(t *Tokenizer) llm.KV {
 	kv := p.ModelParameters.KV(t)
 	kv["general.architecture"] = "phi3"
 	kv["phi3.context_length"] = p.MaxPositionEmbeddings
 	kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
 	kv["phi3.feed_forward_length"] = p.IntermediateSize
 	kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
 	kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
 	kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
 	kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
 	kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
 	kv["phi3.rope.freq_base"] = p.RopeTheta
 	kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
 	kv["phi3.attention.sliding_window"] = p.SlidingWindow
 	scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)
 	switch p.RopeScaling.Type {
 	case "":
 		// no scaling
 	case "su", "longrope":
 		kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
 	case "yarn":
 		kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
 	default:
 		panic("unknown rope scaling type")
 	}
 	return kv
 }
 func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
 	var addRopeFactors sync.Once
 	out := make([]llm.Tensor, 0, len(ts)+2)
 	for _, t := range ts {
 		if strings.HasPrefix(t.Name(), "blk.0.") {
 			addRopeFactors.Do(func() {
 				out = append(out, llm.Tensor{
 					Name:     "rope_factors_long.weight",
 					Kind:     0,
 					Shape:    []uint64{uint64(len(p.RopeScaling.LongFactor))},
 					WriterTo: p.RopeScaling.LongFactor,
 				}, llm.Tensor{
 					Name:     "rope_factors_short.weight",
 					Kind:     0,
 					Shape:    []uint64{uint64(len(p.RopeScaling.ShortFactor))},
 					WriterTo: p.RopeScaling.ShortFactor,
 				})
 			})
 		}
 		out = append(out, llm.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (p *phi3Model) Replacements() []string {
 	return []string{
 		"lm_head", "output",
 		"model.embed_tokens", "token_embd",
 		"model.norm", "output_norm",
 		"model.layers", "blk",
 		"input_layernorm", "attn_norm",
 		"self_attn.qkv_proj", "attn_qkv",
 		"self_attn.o_proj", "attn_output",
 		"mlp.down_proj", "ffn_down",
 		"mlp.gate_up_proj", "ffn_up",
 		"post_attention_layernorm", "ffn_norm",
 	}
 }
 type ropeFactor []float32
 func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
 	err := binary.Write(w, binary.LittleEndian, r)
 	return 0, err
 }
--- a/convert/convert_test.go
+++ b/convert/convert_test.go
@@ -1,181 +1,40 @@
 //go:build slow
 package convert
 import (
 	"bytes"
 	"crypto/sha256"
 	"encoding/binary"
 	"encoding/hex"
 	"encoding/json"
 	"flag"
 	"fmt"
 	"io"
 	"io/fs"
 	"log/slog"
 	"math"
 	"os"
 	"path/filepath"
 	"slices"
 	"testing"
 	"golang.org/x/exp/maps"
 	"github.com/ollama/ollama/llm"
 )
-func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
+func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) {
 	t.Helper()
-	f, err := os.CreateTemp(t.TempDir(), "f16")
+	mf, err := GetModelFormat(p)
 	if err != nil {
 		t.Fatal(err)
 	}
 	defer f.Close()
 	if err := ConvertModel(fsys, f); err != nil {
 		t.Fatal(err)
 	}
 	r, err := os.Open(f.Name())
 	if err != nil {
 		t.Fatal(err)
 	}
 	t.Cleanup(func() { r.Close() })
 	m, _, err := llm.DecodeGGML(r, math.MaxInt)
 	if err != nil {
 		t.Fatal(err)
 	}
-	if _, err := r.Seek(0, io.SeekStart); err != nil {
+	params, err := mf.GetParams(p)
 		t.Fatal(err)
 	}
 	return r, m.KV(), m.Tensors()
 }
 func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors llm.Tensors) map[string]string {
 	actual := make(map[string]string)
 	for k, v := range kv {
 		if s, ok := v.(json.Marshaler); !ok {
 			actual[k] = fmt.Sprintf("%v", v)
 		} else {
 			bts, err := json.Marshal(s)
 	if err != nil {
 		t.Fatal(err)
 	}
-			actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
+	arch, err := mf.GetModelArch("", p, params)
 		}
 	}
 	for _, tensor := range tensors.Items {
 		sha256sum := sha256.New()
 		sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
 		if _, err := io.Copy(sha256sum, sr); err != nil {
 			t.Fatal(err)
 		}
 		actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
 	}
 	return actual
 }
 func TestMain(m *testing.M) {
 	var level slog.Level
 	flag.TextVar(&level, "level", slog.LevelInfo, "log level")
 	flag.Parse()
 	slog.SetLogLoggerLevel(level)
 	os.Exit(m.Run())
 }
 func TestConvertFull(t *testing.T) {
 	cases := []string{
 		"Meta-Llama-3-8B-Instruct",
 		"Meta-Llama-3.1-8B-Instruct",
 		"Mistral-7B-Instruct-v0.2",
 		"Mixtral-8x7B-Instruct-v0.1",
 		"gemma-2b-it",
 		// microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
 		"Phi-3-mini-128k-instruct",
 		"all-MiniLM-L6-v2",
 		"gemma-2-9b-it",
 	}
 	for i := range cases {
 		tt := cases[i]
 		t.Run(tt, func(t *testing.T) {
 			t.Parallel()
 			p := filepath.Join("testdata", tt)
 			if testing.Short() {
 				t.Skip("skipping in short mode")
 			} else if _, err := os.Stat(p); err != nil {
 				t.Skipf("%s not found", p)
 			}
 			f, kv, tensors := convertFull(t, os.DirFS(p))
 			actual := generateResultsJSON(t, f, kv, tensors)
 			expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
 	if err != nil {
 		t.Fatal(err)
 	}
-			var expect map[string]string
+	if err := arch.LoadVocab(); err != nil {
 			if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {
 		t.Fatal(err)
 	}
-			keys := maps.Keys(expect)
+	if err := arch.GetTensors(); err != nil {
-			slices.Sort(keys)
+		t.Fatal(err)
 			for _, k := range keys {
 				if v, ok := actual[k]; !ok {
 					t.Errorf("missing %s", k)
 				} else if v != expect[k] {
 					t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v)
 	}
 			}
 		})
 	}
 }
 func TestConvertAdapter(t *testing.T) {
 	type AdapterCase struct {
 		Name     string
 		BaseKV   map[string]any
 		Expected map[string]string
 	}
 	cases := []AdapterCase{
 		{
 			Name: "discollama",
 			BaseKV: map[string]any{
 				"general.architecture":          "llama",
 				"llama.attention.head_count":    uint32(32),
 				"llama.attention.head_count_kv": uint32(8),
 			},
 			Expected: map[string]string{
 				"general.architecture":          "llama",
 				"general.file_type":             "1",
 				"general.parameter_count":       "106496",
 				"general.type":                  "adapter",
 				"general.version":               "v0.2",
 				"adapter.lora.alpha":            "16",
 				"adapter.type":                  "lora",
 				"llama.attention.head_count":    "32",
 				"llama.attention.head_count_kv": "8",
 				"blk.31.attn_q.weight.lora_a":   "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
 				"blk.31.attn_q.weight.lora_b":   "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
 				"blk.31.attn_v.weight.lora_a":   "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
 				"blk.31.attn_v.weight.lora_b":   "071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857",
 			},
 		},
 	}
 	for _, c := range cases {
 		t.Run(c.Name, func(t *testing.T) {
 			t.Parallel()
 	f, err := os.CreateTemp(t.TempDir(), "f16")
 	if err != nil {
@@ -183,10 +42,7 @@ func TestConvertAdapter(t *testing.T) {
 	}
 	defer f.Close()
-			tempDir := t.TempDir()
+	if err := arch.WriteGGUF(f); err != nil {
 			generateLoraTestData(t, tempDir)
 			if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil {
 		t.Fatal(err)
 	}
@@ -196,152 +52,52 @@ func TestConvertAdapter(t *testing.T) {
 	}
 	defer r.Close()
-			m, _, err := llm.DecodeGGML(r, math.MaxInt)
+	m, _, err := llm.DecodeGGML(r)
 	if err != nil {
 		t.Fatal(err)
 	}
-			if _, err := r.Seek(0, io.SeekStart); err != nil {
+	return m.KV(), m.Tensors()
-				t.Fatal(err)
+}
 func TestConvertFull(t *testing.T) {
 	cases := []struct {
 		path    string
 		arch    string
 		tensors int
 		layers  int
 	}{
 		{"Meta-Llama-3-8B-Instruct", "llama", 291, 35},
 		{"Mistral-7B-Instruct-v0.2", "llama", 291, 35},
 		{"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35},
 		{"gemma-2b-it", "gemma", 164, 20},
 	}
-			actual := generateResultsJSON(t, r, m.KV(), m.Tensors())
+	for _, tt := range cases {
-
+		t.Run(tt.path, func(t *testing.T) {
-			keys := maps.Keys(c.Expected)
+			p := filepath.Join("testdata", tt.path)
-			slices.Sort(keys)
+			if _, err := os.Stat(p); err != nil {
-			for _, k := range keys {
+				t.Skipf("%s not found", p)
 				if v, ok := actual[k]; !ok {
 					t.Errorf("missing %s", k)
 				} else if v != c.Expected[k] {
 					t.Errorf("unexpected %s: want %s, got %s", k, c.Expected[k], v)
 			}
 			kv, tensors := convertFull(t, p)
 			if kv.Architecture() != tt.arch {
 				t.Fatalf("expected llama, got %s", kv.Architecture())
 			}
 			if kv.FileType().String() != "F16" {
 				t.Fatalf("expected F16, got %s", kv.FileType())
 			}
 			if len(tensors) != tt.tensors {
 				t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors))
 			}
 			layers := tensors.Layers()
 			if len(layers) != tt.layers {
 				t.Fatalf("expected %d layers, got %d", tt.layers, len(layers))
 			}
 		})
 	}
 }
 func generateLoraTestData(t *testing.T, tempDir string) {
 	type tensorData struct {
 		Offsets []int  `json:"data_offsets"`
 		Type    string `json:"dtype"`
 		Shape   []int  `json:"shape"`
 	}
 	offset := 4096 * 8 * 4
 	td := map[string]*tensorData{"__metadata__": nil}
 	td["model.layers.31.self_attn.q_proj.lora_a"] = &tensorData{
 		Offsets: []int{0, offset},
 		Type:    "F32",
 		Shape:   []int{4096, 8},
 	}
 	td["model.layers.31.self_attn.q_proj.lora_b"] = &tensorData{
 		Offsets: []int{offset, offset * 2},
 		Type:    "F32",
 		Shape:   []int{8, 4096},
 	}
 	td["model.layers.31.self_attn.v_proj.lora_a"] = &tensorData{
 		Offsets: []int{offset * 2, offset * 3},
 		Type:    "F32",
 		Shape:   []int{4096, 8},
 	}
 	td["model.layers.31.self_attn.v_proj.lora_b"] = &tensorData{
 		Offsets: []int{offset * 3, offset*3 + 8*1024*4},
 		Type:    "F32",
 		Shape:   []int{8, 1024},
 	}
 	data, err := json.Marshal(td)
 	if err != nil {
 		t.Fatal(err)
 	}
 	var buf bytes.Buffer
 	l := int64(len(data))
 	err = binary.Write(&buf, binary.LittleEndian, l)
 	if err != nil {
 		t.Fatal(err)
 	}
 	_, err = buf.Write(data)
 	if err != nil {
 		t.Fatal(err)
 	}
 	// write some data for the tensors
 	ones := make([]float32, 4096*8)
 	for i := range ones {
 		ones[i] = float32(1)
 	}
 	for range 3 {
 		err = binary.Write(&buf, binary.LittleEndian, ones)
 		if err != nil {
 			t.Fatal(err)
 		}
 	}
 	ones = make([]float32, 1024*8)
 	for i := range ones {
 		ones[i] = float32(1)
 	}
 	err = binary.Write(&buf, binary.LittleEndian, ones)
 	if err != nil {
 		t.Fatal(err)
 	}
 	fdata, err := os.Create(filepath.Join(tempDir, "adapters.safetensors"))
 	if err != nil {
 		t.Fatal(err)
 	}
 	defer fdata.Close()
 	_, err = fdata.Write(buf.Bytes())
 	if err != nil {
 		t.Fatal(err)
 	}
 	configData := `
 {
    "adapter_path": "adapters-test",
    "batch_size": 8,
    "config": "config-tiny.json",
    "data": "../discollama-completion",
    "grad_checkpoint": null,
    "iters": 1000,
    "learning_rate": 1e-05,
    "lora_layers": 1,
    "lora_parameters": {
        "rank": 8,
        "alpha": 16,
        "dropout": 0.0,
        "scale": 2.0
    },
    "lr_schedule": null,
    "max_seq_length": 2048,
    "model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct",
    "resume_adapter_file": null,
    "save_every": 100,
    "seed": 0,
    "steps_per_eval": 200,
    "steps_per_report": 10,
    "test": false,
    "test_batches": 500,
    "train": true,
    "use_dora": false,
    "val_batches": 25
 }
 `
 	f, err := os.Create(filepath.Join(tempDir, "adapter_config.json"))
 	if err != nil {
 		t.Fatal(err)
 	}
 	defer f.Close()
 	_, err = f.WriteString(configData)
 	if err != nil {
 		t.Fatal(err)
 	}
 }
--- a/convert/fs.go
+++ b/convert/fs.go
@@ -1,58 +0,0 @@
 package convert
 import (
 	"archive/zip"
 	"errors"
 	"io"
 	"io/fs"
 	"os"
 	"path/filepath"
 )
 type ZipReader struct {
 	r *zip.Reader
 	p string
 	// limit is the maximum size of a file that can be read directly
 	// from the zip archive. Files larger than this size will be extracted
 	limit int64
 }
 func NewZipReader(r *zip.Reader, p string, limit int64) fs.FS {
 	return &ZipReader{r, p, limit}
 }
 func (z *ZipReader) Open(name string) (fs.File, error) {
 	r, err := z.r.Open(name)
 	if err != nil {
 		return nil, err
 	}
 	defer r.Close()
 	if fi, err := r.Stat(); err != nil {
 		return nil, err
 	} else if fi.Size() < z.limit {
 		return r, nil
 	}
 	if !filepath.IsLocal(name) {
 		return nil, zip.ErrInsecurePath
 	}
 	n := filepath.Join(z.p, name)
 	if _, err := os.Stat(n); errors.Is(err, os.ErrNotExist) {
 		w, err := os.Create(n)
 		if err != nil {
 			return nil, err
 		}
 		defer w.Close()
 		if _, err := io.Copy(w, r); err != nil {
 			return nil, err
 		}
 	} else if err != nil {
 		return nil, err
 	}
 	return os.Open(n)
 }
--- a/convert/gemma.go
+++ b/convert/gemma.go
@@ -0,0 +1,103 @@
 package convert
 import (
 	"fmt"
 	"io"
 	"log/slog"
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type GemmaModel struct {
 	ModelData
 }
 func addOnes(data []float32, vectorSize int) ([]float32, error) {
 	n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data))
 	ones := tensor.Ones(tensor.Float32, vectorSize)
 	n, err := n.Add(ones)
 	if err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 0)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
 func (m *GemmaModel) GetTensors() error {
 	t, err := m.Format.GetTensors(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	slog.Debug(fmt.Sprintf("Total tensors: %d", len(t)))
 	for _, l := range t {
 		if strings.HasSuffix(l.Name, "norm.weight") {
 			wt := l.WriterTo.(safetensorWriterTo)
 			wt.repacker = m.Repack
 			l.WriterTo = wt
 		}
 		m.Tensors = append(m.Tensors, l)
 	}
 	return nil
 }
 func (m *GemmaModel) LoadVocab() error {
 	v, err := LoadSentencePieceTokens(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	m.Vocab = v
 	return nil
 }
 func (m *GemmaModel) Repack(_ string, data []float32, shape []uint64) ([]float32, error) {
 	return addOnes(data, int(shape[0]))
 }
 func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":                   "gemma",
 		"general.name":                           m.Name,
 		"gemma.context_length":                   uint32(m.Params.ContextSize),
 		"gemma.embedding_length":                 uint32(m.Params.HiddenSize),
 		"gemma.block_count":                      uint32(m.Params.HiddenLayers),
 		"gemma.feed_forward_length":              uint32(m.Params.IntermediateSize),
 		"gemma.attention.head_count":             uint32(m.Params.AttentionHeads),
 		"gemma.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
 		"gemma.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
 		"gemma.attention.key_length":             uint32(m.Params.HeadDimension),
 		"gemma.attention.value_length":           uint32(m.Params.HeadDimension),
 		"general.file_type":                      uint32(1),
 		"tokenizer.ggml.model":                   "llama",
 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
 		"tokenizer.ggml.scores":     m.Vocab.Scores,
 		"tokenizer.ggml.token_type": m.Vocab.Types,
 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
 		"tokenizer.ggml.padding_token_id": uint32(m.Params.PaddingTokenID),
 		"tokenizer.ggml.unknown_token_id": uint32(3),
 		"tokenizer.ggml.add_bos_token":    true,
 		"tokenizer.ggml.add_eos_token":    false,
 	}
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
--- a/convert/llama.go
+++ b/convert/llama.go
@@ -0,0 +1,158 @@
 package convert
 import (
 	"cmp"
 	"errors"
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"
 	"github.com/ollama/ollama/llm"
 )
 type LlamaModel struct {
 	ModelData
 }
 func (m *LlamaModel) GetTensors() error {
 	t, err := m.Format.GetTensors(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
 	re, err := regexp.Compile(pattern)
 	if err != nil {
 		return err
 	}
 	for _, l := range t {
 		matches := re.FindAllStringSubmatch(l.Name, -1)
 		if len(matches) > 0 {
 			switch m.Format.(type) {
 			case *TorchFormat:
 				wt := l.WriterTo.(torchWriterTo)
 				wt.repacker = m.Repack
 				l.WriterTo = wt
 			case *SafetensorFormat:
 				wt := l.WriterTo.(safetensorWriterTo)
 				wt.repacker = m.Repack
 				l.WriterTo = wt
 			}
 		}
 		m.Tensors = append(m.Tensors, l)
 	}
 	return nil
 }
 func (m *LlamaModel) LoadVocab() (err error) {
 	pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json"))
 	if errors.Is(err, os.ErrNotExist) {
 		return nil
 	} else if err != nil {
 		return err
 	}
 	m.Vocab = &Vocab{}
 	for _, t := range ts {
 		m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content)
 		m.Vocab.Types = append(m.Vocab.Types, t.Type())
 	}
 	m.Vocab.Merges = merges
 	m.Params.PreTokenizer = pre
 	return nil
 }
 func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":                   "llama",
 		"general.name":                           m.Name,
 		"llama.vocab_size":                       uint32(len(m.Vocab.Tokens)),
 		"llama.context_length":                   uint32(m.Params.ContextSize),
 		"llama.embedding_length":                 uint32(m.Params.HiddenSize),
 		"llama.block_count":                      uint32(m.Params.HiddenLayers),
 		"llama.feed_forward_length":              uint32(m.Params.IntermediateSize),
 		"llama.rope.freq_base":                   float32(m.Params.RopeFrequencyBase),
 		"llama.rope.dimension_count":             uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
 		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
 		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
 		"general.file_type":                      uint32(1),
 		"tokenizer.ggml.model":                   "gpt2",
 		"tokenizer.ggml.pre":        m.Params.PreTokenizer,
 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
 		"tokenizer.ggml.token_type": m.Vocab.Types,
 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
 		"tokenizer.ggml.unknown_token_id": uint32(0),
 	}
 	if len(m.Vocab.Merges) > 0 {
 		kv["tokenizer.ggml.merges"] = m.Vocab.Merges
 	} else {
 		kv["tokenizer.ggml.scores"] = m.Vocab.Scores
 	}
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
 func (m *LlamaModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	return llamaRepack(name, m.Params, data, shape)
 }
 func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([]float32, error) {
 	var dims []int
 	for _, dim := range shape {
 		if dim != 0 {
 			dims = append(dims, int(dim))
 		}
 	}
 	var heads int
 	if strings.HasSuffix(name, "attn_q.weight") {
 		heads = params.AttentionHeads
 	} else if strings.HasSuffix(name, "attn_k.weight") {
 		heads = cmp.Or(params.KeyValHeads, params.AttentionHeads)
 	} else {
 		return nil, fmt.Errorf("unknown tensor name: %s", name)
 	}
 	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
 	if err := n.Reshape(append([]int{heads, 2, dims[0] / heads / 2}, dims[1:]...)...); err != nil {
 		return nil, err
 	}
 	if err := n.T(0, 2, 1, 3); err != nil {
 		return nil, err
 	}
 	if err := n.Reshape(dims...); err != nil {
 		return nil, err
 	}
 	if err := n.Transpose(); err != nil {
 		return nil, err
 	}
 	ts, err := native.SelectF32(n, 1)
 	if err != nil {
 		return nil, err
 	}
 	var f32s []float32
 	for _, t := range ts {
 		f32s = append(f32s, t...)
 	}
 	return f32s, nil
 }
--- a/convert/mistral.go
+++ b/convert/mistral.go
@@ -0,0 +1,79 @@
 package convert
 import (
 	"io"
 	"regexp"
 	"github.com/ollama/ollama/llm"
 )
 type MistralModel struct {
 	ModelData
 }
 func (m *MistralModel) GetTensors() error {
 	t, err := m.Format.GetTensors(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
 	re, err := regexp.Compile(pattern)
 	if err != nil {
 		return err
 	}
 	for _, l := range t {
 		matches := re.FindAllStringSubmatch(l.Name, -1)
 		if len(matches) > 0 {
 			wt := l.WriterTo.(safetensorWriterTo)
 			wt.repacker = m.Repack
 			l.WriterTo = wt
 		}
 		m.Tensors = append(m.Tensors, l)
 	}
 	return nil
 }
 func (m *MistralModel) LoadVocab() error {
 	v, err := LoadSentencePieceTokens(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	m.Vocab = v
 	return nil
 }
 func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":                   "llama",
 		"general.name":                           m.Name,
 		"llama.context_length":                   uint32(m.Params.ContextSize),
 		"llama.embedding_length":                 uint32(m.Params.HiddenSize),
 		"llama.block_count":                      uint32(m.Params.HiddenLayers),
 		"llama.feed_forward_length":              uint32(m.Params.IntermediateSize),
 		"llama.rope.dimension_count":             uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
 		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
 		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
 		"general.file_type":                      uint32(1),
 		"tokenizer.ggml.model":                   "llama",
 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
 		"tokenizer.ggml.scores":     m.Vocab.Scores,
 		"tokenizer.ggml.token_type": m.Vocab.Types,
 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
 		"tokenizer.ggml.add_bos_token":    true,
 		"tokenizer.ggml.add_eos_token":    false,
 		"tokenizer.ggml.unknown_token_id": uint32(0),
 	}
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
 func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	return llamaRepack(name, m.Params, data, shape)
 }
--- a/convert/mixtral.go
+++ b/convert/mixtral.go
@@ -0,0 +1,87 @@
 package convert
 import (
 	"io"
 	"regexp"
 	"github.com/ollama/ollama/llm"
 )
 type MixtralModel struct {
 	ModelData
 }
 func (m *MixtralModel) GetTensors() error {
 	t, err := m.Format.GetTensors(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
 	re, err := regexp.Compile(pattern)
 	if err != nil {
 		return err
 	}
 	for _, l := range t {
 		matches := re.FindAllStringSubmatch(l.Name, -1)
 		if len(matches) > 0 {
 			wt := l.WriterTo.(safetensorWriterTo)
 			wt.repacker = m.Repack
 			l.WriterTo = wt
 		}
 		m.Tensors = append(m.Tensors, l)
 	}
 	return nil
 }
 func (m *MixtralModel) LoadVocab() error {
 	v, err := LoadSentencePieceTokens(m.Path, m.Params)
 	if err != nil {
 		return err
 	}
 	m.Vocab = v
 	return nil
 }
 func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
 	kv := llm.KV{
 		"general.architecture":          "llama",
 		"general.name":                  m.Name,
 		"llama.block_count":             uint32(m.Params.HiddenLayers),
 		"llama.context_length":          uint32(m.Params.ContextSize),
 		"llama.embedding_length":        uint32(m.Params.HiddenSize),
 		"llama.feed_forward_length":     uint32(m.Params.IntermediateSize),
 		"llama.attention.head_count":    uint32(m.Params.AttentionHeads),
 		"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
 		"llama.rope.freq_base":                   float32(m.Params.RopeFrequencyBase),
 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
 		"llama.expert_count":      uint32(m.Params.Experts),
 		"llama.expert_used_count": uint32(m.Params.ExpertsUsed),
 		"llama.vocab_size":           uint32(len(m.Vocab.Tokens)),
 		"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
 		"general.file_type":    uint32(1),
 		"tokenizer.ggml.model": "llama",
 		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
 		"tokenizer.ggml.scores":     m.Vocab.Scores,
 		"tokenizer.ggml.token_type": m.Vocab.Types,
 		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
 		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
 		"tokenizer.ggml.unknown_token_id": uint32(0),
 		"tokenizer.ggml.add_bos_token":    true,
 		"tokenizer.ggml.add_eos_token":    false,
 	}
 	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
 }
 func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
 	return llamaRepack(name, m.Params, data, shape)
 }
--- a/convert/reader.go
+++ b/convert/reader.go
@@ -1,86 +0,0 @@
 package convert
 import (
 	"errors"
 	"io"
 	"io/fs"
 	"strings"
 )
 type Tensor interface {
 	Name() string
 	Shape() []uint64
 	Kind() uint32
 	SetRepacker(repacker)
 	WriteTo(io.Writer) (int64, error)
 }
 type tensorBase struct {
 	name  string
 	shape []uint64
 	repacker
 }
 func (t tensorBase) Name() string {
 	return t.name
 }
 func (t tensorBase) Shape() []uint64 {
 	return t.shape
 }
 const (
 	tensorKindF32 uint32 = iota
 	tensorKindF16
 )
 func (t tensorBase) Kind() uint32 {
 	if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
 		t.name == "token_types.weight" {
 		// these tensors are always F32
 		return 0
 	}
 	switch len(t.shape) {
 	case 0:
 		panic("invalid tensor shape")
 	case 1:
 		return tensorKindF32
 	default:
 		return tensorKindF16
 	}
 }
 func (t *tensorBase) SetRepacker(fn repacker) {
 	t.repacker = fn
 }
 type repacker func(string, []float32, []uint64) ([]float32, error)
 func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
 	patterns := []struct {
 		Pattern string
 		Func    func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error)
 	}{
 		{"model-*-of-*.safetensors", parseSafetensors},
 		{"model.safetensors", parseSafetensors},
 		{"adapters.safetensors", parseSafetensors},
 		{"adapter_model.safetensors", parseSafetensors},
 		{"pytorch_model-*-of-*.bin", parseTorch},
 		{"pytorch_model.bin", parseTorch},
 		{"consolidated.*.pth", parseTorch},
 	}
 	for _, pattern := range patterns {
 		matches, err := fs.Glob(fsys, pattern.Pattern)
 		if err != nil {
 			return nil, err
 		}
 		if len(matches) > 0 {
 			return pattern.Func(fsys, replacer, matches...)
 		}
 	}
 	return nil, errors.New("unknown tensor format")
 }
--- a/convert/reader_safetensors.go
+++ b/convert/reader_safetensors.go
@@ -1,151 +0,0 @@
 package convert
 import (
 	"bytes"
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"io"
 	"io/fs"
 	"slices"
 	"strings"
 	"github.com/d4l3k/go-bfloat16"
 	"github.com/x448/float16"
 	"golang.org/x/exp/maps"
 )
 type safetensorMetadata struct {
 	Type    string   `json:"dtype"`
 	Shape   []uint64 `json:"shape"`
 	Offsets []int64  `json:"data_offsets"`
 }
 func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
 	var ts []Tensor
 	for _, p := range ps {
 		f, err := fsys.Open(p)
 		if err != nil {
 			return nil, err
 		}
 		defer f.Close()
 		var n int64
 		if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
 			return nil, err
 		}
 		b := bytes.NewBuffer(make([]byte, 0, n))
 		if _, err = io.CopyN(b, f, n); err != nil {
 			return nil, err
 		}
 		var headers map[string]safetensorMetadata
 		if err := json.NewDecoder(b).Decode(&headers); err != nil {
 			return nil, err
 		}
 		keys := maps.Keys(headers)
 		slices.Sort(keys)
 		for _, key := range keys {
 			if value := headers[key]; value.Type != "" {
 				ts = append(ts, safetensor{
 					fs:     fsys,
 					path:   p,
 					dtype:  value.Type,
 					offset: safetensorsPad(n, value.Offsets[0]),
 					size:   safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
 					tensorBase: &tensorBase{
 						name:  replacer.Replace(key),
 						shape: value.Shape,
 					},
 				})
 			}
 		}
 	}
 	return ts, nil
 }
 // safetensorsPad returns the padded size of the safetensors file given a length n and offset s
 func safetensorsPad(n, offset int64) int64 {
 	return 8 + n + offset
 }
 type safetensor struct {
 	fs     fs.FS
 	path   string
 	dtype  string
 	offset int64
 	size   int64
 	*tensorBase
 }
 func (st safetensor) WriteTo(w io.Writer) (int64, error) {
 	f, err := st.fs.Open(st.path)
 	if err != nil {
 		return 0, err
 	}
 	defer f.Close()
 	if seeker, ok := f.(io.Seeker); ok {
 		if _, err := seeker.Seek(st.offset, io.SeekStart); err != nil {
 			return 0, err
 		}
 	} else {
 		if _, err := io.CopyN(io.Discard, f, st.offset); err != nil {
 			return 0, err
 		}
 	}
 	var f32s []float32
 	switch st.dtype {
 	case "F32":
 		f32s = make([]float32, st.size/4)
 		if err = binary.Read(f, binary.LittleEndian, f32s); err != nil {
 			return 0, err
 		}
 	case "F16":
 		u16s := make([]uint16, st.size/2)
 		if err = binary.Read(f, binary.LittleEndian, u16s); err != nil {
 			return 0, err
 		}
 		f32s = make([]float32, len(u16s))
 		for i := range u16s {
 			f32s[i] = float16.Frombits(u16s[i]).Float32()
 		}
 	case "BF16":
 		u8s := make([]uint8, st.size)
 		if err = binary.Read(f, binary.LittleEndian, u8s); err != nil {
 			return 0, err
 		}
 		f32s = bfloat16.DecodeFloat32(u8s)
 	default:
 		return 0, fmt.Errorf("unknown data type: %s", st.dtype)
 	}
 	if st.repacker != nil {
 		f32s, err = st.repacker(st.Name(), f32s, st.Shape())
 		if err != nil {
 			return 0, err
 		}
 	}
 	switch st.Kind() {
 	case tensorKindF32:
 		return 0, binary.Write(w, binary.LittleEndian, f32s)
 	case tensorKindF16:
 		f16s := make([]uint16, len(f32s))
 		for i := range f32s {
 			f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
 		}
 		return 0, binary.Write(w, binary.LittleEndian, f16s)
 	default:
 		return 0, fmt.Errorf("unknown storage type: %d", st.Kind())
 	}
 }
--- a/convert/reader_torch.go
+++ b/convert/reader_torch.go
@@ -1,48 +0,0 @@
 package convert
 import (
 	"io"
 	"io/fs"
 	"strings"
 	"github.com/nlpodyssey/gopickle/pytorch"
 	"github.com/nlpodyssey/gopickle/types"
 )
 func parseTorch(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
 	var ts []Tensor
 	for _, p := range ps {
 		pt, err := pytorch.Load(p)
 		if err != nil {
 			return nil, err
 		}
 		for _, k := range pt.(*types.Dict).Keys() {
 			t := pt.(*types.Dict).MustGet(k)
 			var shape []uint64
 			for dim := range t.(*pytorch.Tensor).Size {
 				shape = append(shape, uint64(dim))
 			}
 			ts = append(ts, torch{
 				storage: t.(*pytorch.Tensor).Source,
 				tensorBase: &tensorBase{
 					name:  replacer.Replace(k.(string)),
 					shape: shape,
 				},
 			})
 		}
 	}
 	return ts, nil
 }
 type torch struct {
 	storage pytorch.StorageInterface
 	*tensorBase
 }
 func (pt torch) WriteTo(w io.Writer) (int64, error) {
 	return 0, nil
 }
--- a/convert/safetensors.go
+++ b/convert/safetensors.go
@@ -0,0 +1,309 @@
 package convert
 import (
 	"bytes"
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 	"regexp"
 	"slices"
 	"strings"
 	"github.com/d4l3k/go-bfloat16"
 	"github.com/x448/float16"
 	"github.com/ollama/ollama/llm"
 )
 type safetensorWriterTo struct {
 	t *llm.Tensor
 	params *Params
 	bo     ByteOrder
 	filename string
 	dtype    string
 	offset, size int64
 	repacker     func(string, []float32, []uint64) ([]float32, error)
 }
 type safetensorMetadata struct {
 	Type    string   `json:"dtype"`
 	Shape   []uint64 `json:"shape"`
 	Offsets []int64  `json:"data_offsets"`
 }
 type SafetensorFormat struct{}
 func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
 	var tensors []llm.Tensor
 	matches, err := filepath.Glob(filepath.Join(dirpath, "*.safetensors"))
 	if err != nil {
 		return nil, err
 	}
 	var offset uint64
 	for _, f := range matches {
 		var t []llm.Tensor
 		var err error
 		t, offset, err = m.readTensors(f, offset, params)
 		if err != nil {
 			return nil, err
 		}
 		tensors = append(tensors, t...)
 	}
 	return tensors, nil
 }
 func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) {
 	f, err := os.Open(fn)
 	if err != nil {
 		return nil, 0, err
 	}
 	defer f.Close()
 	var n int64
 	if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
 		return nil, 0, err
 	}
 	b := bytes.NewBuffer(make([]byte, 0, n))
 	if _, err = io.CopyN(b, f, n); err != nil {
 		return nil, 0, err
 	}
 	var headers map[string]safetensorMetadata
 	if err := json.NewDecoder(b).Decode(&headers); err != nil {
 		return nil, 0, err
 	}
 	var keys []string
 	for key := range headers {
 		if !strings.HasSuffix(key, "self_attn.rotary_embd.inv_freq") {
 			keys = append(keys, key)
 		}
 	}
 	slices.Sort(keys)
 	var tensors []llm.Tensor
 	for _, key := range keys {
 		value := headers[key]
 		var kind uint32
 		switch len(value.Shape) {
 		case 0:
 			// valuedata
 			continue
 		case 2:
 			kind = 1
 		}
 		name, err := m.GetLayerName(key)
 		if err != nil {
 			return nil, 0, err
 		}
 		shape := make([]uint64, len(value.Shape))
 		copy(shape, value.Shape)
 		pad := func(s int64) int64 {
 			return 8 + n + s
 		}
 		t := llm.Tensor{
 			Name:   name,
 			Kind:   kind,
 			Offset: offset,
 			Shape:  shape[:],
 		}
 		t.WriterTo = safetensorWriterTo{
 			t:        &t,
 			params:   params,
 			bo:       params.ByteOrder,
 			filename: fn,
 			dtype:    value.Type,
 			offset:   pad(value.Offsets[0]),
 			size:     pad(value.Offsets[1]) - pad(value.Offsets[0]),
 		}
 		offset += t.Size()
 		tensors = append(tensors, t)
 	}
 	return tensors, offset, nil
 }
 func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) {
 	f, err := os.Open(filepath.Join(dirpath, "config.json"))
 	if err != nil {
 		return nil, err
 	}
 	defer f.Close()
 	var params Params
 	if err := json.NewDecoder(f).Decode(&params); err != nil {
 		return nil, err
 	}
 	params.ByteOrder = binary.LittleEndian
 	return &params, nil
 }
 func (m *SafetensorFormat) GetLayerName(n string) (string, error) {
 	directMap := map[string]string{
 		"model.embed_tokens.weight": "token_embd.weight",
 		"lm_head.weight":            "output.weight",
 		"model.norm.weight":         "output_norm.weight",
 	}
 	tMap := map[string]string{
 		"model.layers.(\\d+).input_layernorm.weight":                    "blk.$1.attn_norm.weight",
 		"model.layers.(\\d+).mlp.down_proj.weight":                      "blk.$1.ffn_down.weight",
 		"model.layers.(\\d+).mlp.gate_proj.weight":                      "blk.$1.ffn_gate.weight",
 		"model.layers.(\\d+).mlp.up_proj.weight":                        "blk.$1.ffn_up.weight",
 		"model.layers.(\\d+).post_attention_layernorm.weight":           "blk.$1.ffn_norm.weight",
 		"model.layers.(\\d+).self_attn.k_proj.weight":                   "blk.$1.attn_k.weight",
 		"model.layers.(\\d+).self_attn.o_proj.weight":                   "blk.$1.attn_output.weight",
 		"model.layers.(\\d+).self_attn.q_proj.weight":                   "blk.$1.attn_q.weight",
 		"model.layers.(\\d+).self_attn.v_proj.weight":                   "blk.$1.attn_v.weight",
 		"model.layers.(\\d+).block_sparse_moe.gate.weight":              "blk.$1.ffn_gate_inp.weight",
 		"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w1.weight": "blk.$1.ffn_gate.$2.weight",
 		"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w2.weight": "blk.$1.ffn_down.$2.weight",
 		"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w3.weight": "blk.$1.ffn_up.$2.weight",
 	}
 	v, ok := directMap[n]
 	if ok {
 		return v, nil
 	}
 	// quick hack to rename the layers to gguf format
 	for k, v := range tMap {
 		re := regexp.MustCompile(k)
 		newName := re.ReplaceAllString(n, v)
 		if newName != n {
 			return newName, nil
 		}
 	}
 	return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
 }
 func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) {
 	f, err := os.Open(r.filename)
 	if err != nil {
 		return 0, err
 	}
 	defer f.Close()
 	if _, err = f.Seek(r.offset, io.SeekStart); err != nil {
 		return 0, err
 	}
 	var f32s []float32
 	switch r.dtype {
 	case "F32":
 		f32s = make([]float32, r.size/4)
 		if err = binary.Read(f, r.bo, f32s); err != nil {
 			return 0, err
 		}
 	case "F16":
 		u16s := make([]uint16, r.size/2)
 		if err = binary.Read(f, r.bo, u16s); err != nil {
 			return 0, err
 		}
 		for _, b := range u16s {
 			f32s = append(f32s, float16.Frombits(b).Float32())
 		}
 	case "BF16":
 		u8s := make([]uint8, r.size)
 		if err = binary.Read(f, r.bo, u8s); err != nil {
 			return 0, err
 		}
 		f32s = bfloat16.DecodeFloat32(u8s)
 	default:
 		return 0, fmt.Errorf("unknown data type: %s", r.dtype)
 	}
 	if r.repacker != nil {
 		f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
 		if err != nil {
 			return 0, err
 		}
 	}
 	switch r.t.Kind {
 	case 0:
 		return 0, binary.Write(w, r.bo, f32s)
 	case 1:
 		f16s := make([]uint16, len(f32s))
 		for i := range f32s {
 			f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
 		}
 		return 0, binary.Write(w, r.bo, f16s)
 	default:
 		return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
 	}
 }
 func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
 	switch len(params.Architectures) {
 	case 0:
 		return nil, fmt.Errorf("No architecture specified to convert")
 	case 1:
 		switch params.Architectures[0] {
 		case "LlamaForCausalLM":
 			return &LlamaModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		case "MistralForCausalLM":
 			return &MistralModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		case "MixtralForCausalLM":
 			return &MixtralModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		case "GemmaForCausalLM":
 			return &GemmaModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		default:
 			return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
 		}
 	}
 	return nil, fmt.Errorf("Unknown error")
 }
--- a/convert/testdata/Meta-Llama-3-8B-Instruct.json
+++ b/convert/testdata/Meta-Llama-3-8B-Instruct.json
@@ -1,313 +0,0 @@
 {
  "general.architecture": "llama",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "llama.block_count": "32",
  "llama.context_length": "8192",
  "llama.embedding_length": "4096",
  "llama.feed_forward_length": "14336",
  "llama.rope.dimension_count": "128",
  "llama.rope.freq_base": "500000",
  "llama.vocab_size": "128256",
  "llama.attention.head_count": "32",
  "llama.attention.head_count_kv": "8",
  "llama.attention.layer_norm_rms_epsilon": "1e-05",
  "tokenizer.ggml.model": "gpt2",
  "tokenizer.ggml.pre": "llama-bpe",
  "tokenizer.ggml.bos_token_id": "128000",
  "tokenizer.ggml.eos_token_id": "128009",
  "tokenizer.ggml.merges": "d0cbac1fcc9dcf03724b8db5c9bfb593ae1cf68fb9bc72eb1d15274dcbbf618b",
  "tokenizer.ggml.token_type": "d70a88809fd7da6f1f028622685cd64268a7a922c5d343c96f25b66327358978",
  "tokenizer.ggml.tokens": "765b529dbcbc42dd202ce657341c63807b51f3b07e09898f6aa6196326865d5a",
  "token_embd.weight": "b53102a11d9064bbd404833e3464b1b13e08ce73300b442312cccde2f19b2698",
  "blk.0.attn_norm.weight": "7318df3cca9e8d153ff0a503026a1265e63d20b2a8c1dd7a2769585082b5d1ee",
  "blk.0.ffn_down.weight": "b950806a1fc722c9fad7fd0b20c3c0a7fb50f14395e1e7663a590bfd62e20900",
  "blk.0.ffn_gate.weight": "e73e580af6d4f08e060a74a3c25efdf5d3bed99e183d95a5a85ae859014839fd",
  "blk.0.ffn_up.weight": "c8158af679ef99746da1befb67eebb19489e0bbe6ce7d97e13e348508244e516",
  "blk.0.ffn_norm.weight": "7ec69c3c31e95e49a3359003b0033f6b9e85561a3e3fd83e7476661ecdd756bb",
  "blk.0.attn_k.weight": "2732303257bac969b4964e0e32ec08b5a7f5c031bb02bf6ac4467b3ea0ebcf1e",
  "blk.0.attn_output.weight": "ecda1d43b4ccc91cd5b366d7e7a275353990ac78561a07c83d9c77031aba12dc",
  "blk.0.attn_q.weight": "569b1f5faf92b6f00910cf7effb2d5862f91038ce5c3b0019fc10e5d79fbd5e1",
  "blk.0.attn_v.weight": "aa8416c5ef7e32fb54a1f20d6ac651656845d4af240564b397c39bd83e06e3b8",
  "blk.1.attn_norm.weight": "03327e02862908c2a44b2f52decdb924bf4201f400b46f8037a9cb2e1d7a61ff",
  "blk.1.ffn_down.weight": "5a83a87603f38c99f8e1e370a2d5f967bb45ac51d881a609304a7811027321e0",
  "blk.1.ffn_gate.weight": "31da0572c79e655186c721c231376f85e56cdcc6257c28d08c8c5b40d5c22b40",
  "blk.1.ffn_up.weight": "e0c811d64ca155c8de10a868e72015d43888834804614ee1aa2953129ffbc90f",
  "blk.1.ffn_norm.weight": "5861f313d6137d6f0f904d423df47fffc6069e224ff746e1b637ac9c7f0af862",
  "blk.1.attn_k.weight": "5fbbec0acca6457b9416ebdcd90e526885d0224537b7628f6be376a7f275313d",
  "blk.1.attn_output.weight": "b237c9763fa3f75166a6f70b70f1566e77d0d89dfa164ed1b3137393e90575c3",
  "blk.1.attn_q.weight": "c0a9cf4a98b4882b16f3eb2b49d933793dcc5357abb246fd3fe3134ed2b12e1c",
  "blk.1.attn_v.weight": "96867111727200cac1af7865189dd41fd62b47584e5e5f33a91f1d34509cbd40",
  "blk.2.attn_norm.weight": "f392f8a88ee3a95b1cc19c40dd4ef66317037b0faaa1800f610779e129ee0539",
  "blk.2.ffn_down.weight": "73823eef46632aedcc8c1cb08a736b6aa97ca97842cd1fdfc5567d8dec459662",
  "blk.2.ffn_gate.weight": "f4909ae19fc3848b00bb8b9050122e74f8e903b89e22937036f4cc9fea20a718",
  "blk.2.ffn_up.weight": "16f4904a3d814ea68f00519724fc4943e48444a84c786bda39aa5efc298a7d84",
  "blk.2.ffn_norm.weight": "e3ccdf56e75cb969f6f69c39caf6daf7c4e70e89e25df0f4d2e4bc60e159aafe",
  "blk.2.attn_k.weight": "c3beb1e0a11bcf007ef0f0d8f6bdd3082d8b29090cd29597846b5d51e308a8e5",
  "blk.2.attn_output.weight": "bb9f66c32cff51154fea92933c2cd62549236f8cb1a767f9ef28d3f99809b343",
  "blk.2.attn_q.weight": "8eba394132eef2a05c5a92d62d2376000f7948448d7a2dc74e6b608203add20d",
  "blk.2.attn_v.weight": "88f61f77c53567c617db3eef8f30621109a750e679f6784f7911739bd42c2f02",
  "blk.3.attn_norm.weight": "7b996675b7ca75fa24107b3ebe0788653ede0f49ac83b8659d71ff54d591f81a",
  "blk.3.ffn_down.weight": "2cb332bc05e4821962fdc9dcbcc7cc12630f32117711b687d18fb53c0bc4fbf4",
  "blk.3.ffn_gate.weight": "340b387c7f208c8f0a6db904ef8d87c1e84b7d6ad57177abd32d86c8d18b760f",
  "blk.3.ffn_up.weight": "07484433f8a7ee061c55aa0de2ecc009f769b0617c9c0ec096e9bb2946df9f0e",
  "blk.3.ffn_norm.weight": "4f1a4ade36b393af341240bc894a2aab09cff7e4d56dc4658445deb107f9371b",
  "blk.3.attn_k.weight": "483dcd96acb4528df84b9842970994630dbd82b8715ace394aa8b39fcf8d6291",
  "blk.3.attn_output.weight": "beaff0810687923585642ee11d929cbf3b43dc6f87f30ddb552c222ab57bdbb3",
  "blk.3.attn_q.weight": "0739355002f6fce520863add697e0ff25fc88215322dc3f993be7bb68dcce7e8",
  "blk.3.attn_v.weight": "c216d17b6d90ee3e07f82598b8161fae34de2f392dbb0f745b682b578c324767",
  "blk.4.attn_norm.weight": "91ab405bc4ba15bf63af233f266aa43aaab43789a9e6596e14a357c2ac7df217",
  "blk.4.ffn_down.weight": "620f34ee75cdc73aecb8949af5fbb0d2437fd81422b6d8eb7acfc52addb9fc68",
  "blk.4.ffn_gate.weight": "f6feec7bc9acadf35ec22532f8998d8e50f31afedabb19263590dcf8b9a92eee",
  "blk.4.ffn_up.weight": "4a72af7cd28fd07b038f6cc4406678d120517280236ea85d9e76eff40ab2cc22",
  "blk.4.ffn_norm.weight": "1805b37b44d5d682bdbd2fadeafb763ee001617d7870848cc487079ee34b21f9",
  "blk.4.attn_k.weight": "a1e4f9d97cdf4c1b0d177cf00c4e32d1be30c1984a239b3c9bd73f8848888853",
  "blk.4.attn_output.weight": "a1547e2497c423b0aff0eee71d9300d6fdf4e4986679418b6e637b69a9a6720b",
  "blk.4.attn_q.weight": "0677483a9264ea6803d03d304d87a54632242cb516e8b76b6e3e8284c2f4de04",
  "blk.4.attn_v.weight": "02691ba3af344fcc1969428ab0df811ac94aaa2fd91b0dc4ec1ac0a58806980d",
  "blk.5.attn_norm.weight": "ba9c028335e5c895b87a5bd1448ca429248f9746ed97bdcb8679923206117156",
  "blk.5.ffn_down.weight": "ccfdc9006acad1940a6bc05042a3947f1066acd671e0bb53b7684e9eea9ef5c9",
  "blk.5.ffn_gate.weight": "623157679f1e742ccc3807c0b0153ddc8450104de75ec62f1370ec3807c09cf4",
  "blk.5.ffn_up.weight": "05748804c65091f963729b58b085f58351891cac8a2861f5eae26b06aa60b2a0",
  "blk.5.ffn_norm.weight": "84bae55af2efc8b8429f09056c8c04990c466dae31cb3f9356038b8957f1b406",
  "blk.5.attn_k.weight": "8c766180c726b037d587fc52371de6e3307140c52409011609d1225624b6a3eb",
  "blk.5.attn_output.weight": "490b582b3b1dc151ae55aee8b6743dad6c01fb49e43afefb6e68394b74be3d73",
  "blk.5.attn_q.weight": "6f7b8ca4d9025ec836a44bbcca46be30c66b471a9fb62943ddff8288b3731409",
  "blk.5.attn_v.weight": "9f70df3ba00c9e723214b3da83ff435a2163fff5915f75515c9664c05c866c27",
  "blk.6.attn_norm.weight": "1a4a66613a682df6f061fc7c4d986f9f7e9175b62f0c42fc1ef31db536bd5942",
  "blk.6.ffn_down.weight": "c56f25e4e49b443dbc82d88311ee63bc1f5002cc67e52f4787fd5f003aedeac1",
  "blk.6.ffn_gate.weight": "31a5cf1aa9b831a81588d508550f51fc425f9517c43254d4ef7096d38029cf04",
  "blk.6.ffn_up.weight": "ce135f3a1163e0c9297a615bdbe68a67ead21edce8debbfa9f6e15e6af8d4c94",
  "blk.6.ffn_norm.weight": "4e328ce0648c94e732bc40501858ef6262ad1161e2e407b0cdcf4813fa9d45d8",
  "blk.6.attn_k.weight": "1eb1c4c9f9c4c7ff7f5429075e0dc6a7782bed55109fa88df209a817dd8ef960",
  "blk.6.attn_output.weight": "3d32986b56873b88655ee1edabdd413fdd9ab18b82108c9ce90bdbc2d3a6f3a3",
  "blk.6.attn_q.weight": "8432f583b3a2809c99c393f9beb077cb0534dd5d247c17108f2986cadc6651f6",
  "blk.6.attn_v.weight": "5045381513815bb91839dbac8335ffe49bbc7b0008369de7ea97eb676c5e2b36",
  "blk.7.attn_norm.weight": "3dabd003638ec2499bfc8a48c49eef34276caab4fe76894eb963207848c2fdaf",
  "blk.7.ffn_down.weight": "194fae858608bdcffd235be59ab119d0b91c8549f864ea06dae69249e099935f",
  "blk.7.ffn_gate.weight": "00b24c29c30246892bce0791be804a89701d4c1332777e0bcdad5d9d5666604f",
  "blk.7.ffn_up.weight": "44d7082a5280080c90cef9e19d410391de34f212ca0736377769b8ddd0c82d5e",
  "blk.7.ffn_norm.weight": "21fe8a7fd6911c64e0d15a788b3b4cb6d71dd6ec51de65f760ee89afbb6ae53e",
  "blk.7.attn_k.weight": "57a149eec5f6744a9526cd3925ac073f9d12db0fbcb5afe042ef4dc846458c44",
  "blk.7.attn_output.weight": "0e9c28a3e81a2880251ce5eed77bcb8be8aaa1a51c9cb6de820b47ed83849fc2",
  "blk.7.attn_q.weight": "15ee75263ee4e2a43eb322bc159ae004bb7d77e3a7e63ee4ddab700430693fff",
  "blk.7.attn_v.weight": "440aa970bba4bff429fd7b7b1de21f2ad14fb2952b776cfa4acee68d7c6e9b8f",
  "blk.8.attn_norm.weight": "af5b44825633c42c1ae964c82bb2be6a242d3a751f0a91f1bae4f593e8f5b6ec",
  "blk.8.ffn_down.weight": "b11c14c76adca94fa200496dd2c10743becb23aab6642443ef1ae6d8710edbc1",
  "blk.8.ffn_gate.weight": "7bb03d3325bf8637ae2fa1296b0651356515578d46a7c5ca65c7a923d7de27bc",
  "blk.8.ffn_up.weight": "b956ef0a0669b5a9c9bf3a8da2d1c24f52d331cfb7354f6d7c51bd65be355e30",
  "blk.8.ffn_norm.weight": "c78c3d748302edfef76f71ea5cb2055c94352122eee8b9b1173779a1814d224e",
  "blk.8.attn_k.weight": "c0fba6a596ed9c1c32a7055c31a935a8b31e42b77282ee47c1f03ee3bde736b5",
  "blk.8.attn_output.weight": "83cf9947080c5d8d571f04a842bc3dcfe7bbb0195fb25b346e22635e8649f2d4",
  "blk.8.attn_q.weight": "47409350a576b333d97b7c877d69f47f46df504f3765102dfc0be9e521c7ecd6",
  "blk.8.attn_v.weight": "1999dff91404fdcf1ecb34d9eaaaa9244ec7658a74dec8feb7cfd1fddba0347e",
  "blk.9.attn_norm.weight": "1e6e29d5c3889ab4e1b0a5b9998cba60179b0f1fca133515df49cbc19d092593",
  "blk.9.ffn_down.weight": "acb898a6490adff592e10b4c62d70edc5941661ee6da44658500e9205357c8e9",
  "blk.9.ffn_gate.weight": "4cff63013593aadc3ffbaaa6ed70ffdba1224cd43c3644bf6f4162b5ac1ab542",
  "blk.9.ffn_up.weight": "f985b5a2d6cf4fe32c7256301c3c89b8ad22b59e516342c52da42d8110766a4e",
  "blk.9.ffn_norm.weight": "0d659c538bc6b21ed0018f107ab674a7424a00a42946c80e07208b479b21918f",
  "blk.9.attn_k.weight": "f67611d888780d1b38c1c146b361c65310c8183bdf64fd73e2259985c6e8517f",
  "blk.9.attn_output.weight": "f12ca1fa62a02ddc3f77f798bfb5707e0c50bf18ee0eaa67025521a98355f26b",
  "blk.9.attn_q.weight": "3865185f4361a645b086ad47b72904c095313fb1c624e511647bf1a7dfc1c476",
  "blk.9.attn_v.weight": "92125bbfed63544ab56052bd1e4aa453bbf34c795249ee54cde54907c8c6d1d3",
  "blk.10.attn_norm.weight": "5d6bfbe545bcc2fcb2fc75c68f64b1f4c918badaf53e0156fe2d88aa977b2f94",
  "blk.10.ffn_down.weight": "1dd9da8b0d2696ab5531fbca8a29c7d67567620a9d3e5fc2a19ec5d7e4c6cc8a",
  "blk.10.ffn_gate.weight": "6e55e7f014edaebda0ac6819a426221d3b025c27312a2e18cc5806f31e3db226",
  "blk.10.ffn_up.weight": "d80dde54af5db51241345ee8d64c1972608644f4deeac1e8195dc423bf27474a",
  "blk.10.ffn_norm.weight": "f6ca65951d58ae3379eee8247bec34ebd0db05674cc9295593573841b8a55df3",
  "blk.10.attn_k.weight": "b58e350bd6b49aba0fba4e4dd6865de3a2a0651ab865dbf2419b627b53ffc187",
  "blk.10.attn_output.weight": "6b26a986e12fe66ec286a21d7d5af5eaa1bfe6f2bf502165d270e4497235a54a",
  "blk.10.attn_q.weight": "3440e0e5b7e0d1e426424ae5a33f4e057be623249e9035ea12e57dbe5d3893c4",
  "blk.10.attn_v.weight": "ebfadcfe14bcd6dee933053df0a67e12e7a196d5cc45728c1ffb2a2daedd5ca2",
  "blk.11.attn_norm.weight": "3ed057b9576cd2de84507ef64c7646dc478c651efca4c2024cbe91a4f3fbf0bc",
  "blk.11.ffn_down.weight": "8ff1c2487d22f5c499761e4eb721418f141f960160d0bab779595a34e4d68898",
  "blk.11.ffn_gate.weight": "9c74e4507c7e45bf39b7cc7402198cd1dd77e3fff8c625b0413acaeb16efeb9f",
  "blk.11.ffn_up.weight": "4367158007161d29939e00a322bb6776016e43f648a94f9b08a96a477aae75be",
  "blk.11.ffn_norm.weight": "1cc0288c1491072121f4c9a0af20be0e13af49895696a3320e4fcac608768de3",
  "blk.11.attn_k.weight": "066f5b3c144fce1366835e1ebf376f768b333b8ae29f5b478c42d1d0c809c855",
  "blk.11.attn_output.weight": "e0d9f3d3f2c54aed59c02713ea4fb562799ddbacbe67ca3998dfc887bc44e47b",
  "blk.11.attn_q.weight": "28d3ecc8a88cb3815e89a7f7a7d043da7a71f702b337a126e4d3a2ac1cd6370f",
  "blk.11.attn_v.weight": "7c5cdef10ee73bca0a3b9f6ece5f0a0155664e0ce3d8de90ccdccfab5545e5e7",
  "blk.12.attn_norm.weight": "973b133301a1af760cd7b3a7955371ea0a750808b442deb6adaf7b98482bd0c6",
  "blk.12.ffn_down.weight": "d6c87b4b4ca03f75546ddd6a9e7fca720585a309188723c1ace8122438d4b200",
  "blk.12.ffn_gate.weight": "2189a6e0cab1540bd05d6089b922aa8fd694be51255654933c165f302a0c955f",
  "blk.12.ffn_up.weight": "5affbec19b58d092b9305721e3552481fe2eff51269ea3ed91cda3b9ef84d4df",
  "blk.12.ffn_norm.weight": "f650fd42a34e950f758b4a130e7b8b1a712b1dcbede0291bb8edde47aaed0ef6",
  "blk.12.attn_k.weight": "59b1e86f10450a7cc188beefc0856d2dcf44e8d7fdd9cd8859c30ec1ebaf24b6",
  "blk.12.attn_output.weight": "446b0d36b2f66bd72a2323f4f4e9d85a0f621e9a58872e89a27248d6b1123238",
  "blk.12.attn_q.weight": "3ed6bfd39f040301ed99fad882d3e569769d594259f9948445bef0e44ec881fb",
  "blk.12.attn_v.weight": "e73652cd5d0029b1931be3ba9d82508f6696dce5a29d085476a54fb7a2ddbabc",
  "blk.13.attn_norm.weight": "491b85278c0bd67bd31b9b8a9720902c244bd067e53a4a03641b7c0994782e82",
  "blk.13.ffn_down.weight": "ad71cc248a85e9ced49307a24a9bfae01d387e979a7689c82ff59998e09741f3",
  "blk.13.ffn_gate.weight": "0a55984d53971fab97575ee0ef5882013be7fdecfa76e3fbebb5dc85a07a14d4",
  "blk.13.ffn_up.weight": "378b697b35e2e53c0de98e8e29b73d42ae3ec112ec16129aa5997a9e2f3b5943",
  "blk.13.ffn_norm.weight": "f8aff2f69ab286210fad45a62b03f8d10b38f96a420d7baadf6b95d7b0b0bcd2",
  "blk.13.attn_k.weight": "25ceb841afb1034831bea7f4d6a6c578def2ce4d4c412c780ef147dc9a598360",
  "blk.13.attn_output.weight": "a242b322889c6bdaa14b67a7bab593db39df8eea3721638ef639abbb74d482e3",
  "blk.13.attn_q.weight": "d80be9945a369439e835c55cfb0e97828b8a66bb7ced534d9059c92487bf20a9",
  "blk.13.attn_v.weight": "ac33274cf9b67979d9ecdc967a55175afe0c9c4aeeff6391433cd9840c818706",
  "blk.14.attn_norm.weight": "12a1e1091de5b2da12c9e7c0b1c8e6f09ce2a749733cf7d5240445b8e21cd093",
  "blk.14.ffn_down.weight": "cfd41965c88266e32bc2dcdadda512499c35519e8686fefb9a7f249ab2291eb5",
  "blk.14.ffn_gate.weight": "8dcfe774f07a095c7c6cf0a901c9df70d938bad7b5ba347fbc8f694e7603c0d1",
  "blk.14.ffn_up.weight": "c7995577fe4a72ea0fb17c4a7b6b87b959072bbfdd5edacc6c367d43465809ae",
  "blk.14.ffn_norm.weight": "81c41ebde41739e7016ffec31d2256217b825dc3cae049a935f5f61a60d22003",
  "blk.14.attn_k.weight": "fb708bdebe4384f5c4b479c110028554f4d122f166b8091eda7d8d65e6780eb8",
  "blk.14.attn_output.weight": "f5295caf2dfdc60553dcabe17537a80577e8b153c902247daac058df23542514",
  "blk.14.attn_q.weight": "c12b7a3601c68c63ab5dc9d2599ebf3f3a10abc2c59d3a2126fffd5818f2763b",
  "blk.14.attn_v.weight": "1ce968d9149bf0d5e237d52cc6d6433565b4bbf03252a736262bb00a2b34a687",
  "blk.15.attn_norm.weight": "266fd2c36d7dcefc6b6bb7f1c9374c41f2bab5d6c84a063b6f91c4f682dad3c4",
  "blk.15.ffn_down.weight": "6154886e9ef0a6cc08ab0d264a35f497e6f0987efdac992ed04e87088bea7801",
  "blk.15.ffn_gate.weight": "183d9fd3c1b5657840099053d2fd3f72ad953b1de523296159b7761f20491a76",
  "blk.15.ffn_up.weight": "51546d4498842ae2340ee226a0888d5f61e7d2ca4d052dfa06a77b0451242d3d",
  "blk.15.ffn_norm.weight": "ef7378091a41a25a5f58bf1bf9d3bc64ea562e7f421e1c232b1f177c30fd3500",
  "blk.15.attn_k.weight": "8d556ab8d9639324141774999b6eed0e91d7ee645bf3e7a3dcd200b2e7a00751",
  "blk.15.attn_output.weight": "54aa6ba87def7cbe18b0c6ab3aff5c351cb3b6ca4a0d7b2cd5f75a1312991429",
  "blk.15.attn_q.weight": "10731b0dc031ea8e0ef37bd7f010e0a78518a10a6df05a8bae48e3148b73ef3e",
  "blk.15.attn_v.weight": "cbbe50c2ed7224866d3cf9b489c599f3ec41a4ea1aa3181e9f4e87e1fa0cefec",
  "blk.16.attn_norm.weight": "387058eb39d4b28c04cf1368247417f1faeae8ae79d894c9f293457e0eaa00b0",
  "blk.16.ffn_down.weight": "2cb26ccee585e933401ad5c82ed36ddacb3289efa0b28f8cf91b020ffbd9c333",
  "blk.16.ffn_gate.weight": "d745985efb5bab42304e5d509024631efe35f92f2b2ec4931ead6db97ca9727e",
  "blk.16.ffn_up.weight": "7a67bd195e0642828ca36eb7818149bb70c2c25f82de07e2b5807c520daf540e",
  "blk.16.ffn_norm.weight": "7cefd061c8182482a89272f8a4e88a954b12609a62716923ca1cb3593b1c1651",
  "blk.16.attn_k.weight": "d7968a2de67e755b4533e061aaad1cb62f8882af92dcad67f99d6d5112513439",
  "blk.16.attn_output.weight": "9e9ab5788272ca3394ea89eadbce8c86ecc3fd75b7899184d6191c134ad9aae0",
  "blk.16.attn_q.weight": "ef81c261b536c1a3a093b33f44cf2d42b86e5aa2d821674f07a0c80e992ed925",
  "blk.16.attn_v.weight": "aef38e7958301b4a437cbdd2fbae6197f677b09269ec1eaf63188cd5da428d25",
  "blk.17.attn_norm.weight": "28f6b289f1bc3131041e9f791b7a2a3a48baee0dfea27bf7051ebbb7ed364d80",
  "blk.17.ffn_down.weight": "1a502829aafc6a9bd6bc81f12573bf8632d5c8c659f0dfb13c8b2411f3b1ec05",
  "blk.17.ffn_gate.weight": "ddfd8aa0eb98846ebc9afe31366249159f46ae9815199dd70161527ed241ac4d",
  "blk.17.ffn_up.weight": "4211a3cc247071bd361b30de2131d02382f552855062bf3b3e004c17992e5d09",
  "blk.17.ffn_norm.weight": "647e5fa99a5b0d232af36d15816539f4d27e60a50a341b00aa88bb6e4474f8b9",
  "blk.17.attn_k.weight": "d9125ff33a19c502c0f8846433ffc24395048582fc2f463d34a0301a82156f02",
  "blk.17.attn_output.weight": "3d64fbb1cfef04444827f37c35fd9ad3413eb2165094d339ef89f00503f09de4",
  "blk.17.attn_q.weight": "e5b29424028f578beca385fd82e29f37adedf3037cd51e5889d5a1ffb0428ca7",
  "blk.17.attn_v.weight": "1809c5aaf2ac04c5d65539097564ad62796e87d24bb8b9ce5b095561a61d908a",
  "blk.18.attn_norm.weight": "99daca58d001c627523d3adfbca1d95f04e590382a326866544d57989d5f4835",
  "blk.18.ffn_down.weight": "84f30231ce6ca0f10227541dfc602d6418c1a210386b0c4926ef1656e7d4635c",
  "blk.18.ffn_gate.weight": "ca5bbe4468b541740e54f69b9e08fcc8e478c344b70551dab21b1206acfbaadb",
  "blk.18.ffn_up.weight": "0b3067b9dded31686dcfdc1e247eae3974a28a61ac59e9862758dbfaad64e8f7",
  "blk.18.ffn_norm.weight": "8154a102232dbc0f90ce77ae5c1ff8f26f8b6e4dcf326e9ec1645749669e7960",
  "blk.18.attn_k.weight": "25abb26021ccc481471a30e0d4cbeb7e1db29828417ec5136edeb93fecf09ac4",
  "blk.18.attn_output.weight": "d87d481d9b046b68efa06ccdd4ed8cbf61e692d61114b75b7fad5ed75f5d87b2",
  "blk.18.attn_q.weight": "cc6400379e15766992ff1293be79dc67682c28e9e15155a78109f4b64653b164",
  "blk.18.attn_v.weight": "45c75cb1dd496aea3173aafe2575b841dd1d02cbe010b3198099731eb98f531c",
  "blk.19.attn_norm.weight": "65389efc75297684773284ef8e5f8789a4504b636c9f33b8a32e0ee42499fa72",
  "blk.19.ffn_down.weight": "4eefab7e939f64a17e4a214ca3c77a6fa110d94f677e2d6401086f70fc538b04",
  "blk.19.ffn_gate.weight": "f1c0a59cafda66f466ab585b0b8b4861b58abe87a67cea1f6a488492242edfdf",
  "blk.19.ffn_up.weight": "c42d045eef588db4a0e56960a57e110e1ff92eb8041107d19899165fd3b90f17",
  "blk.19.ffn_norm.weight": "a8f33eda6d5d62ff5f333ad9771783caff556641f4e7df713451385676f441fa",
  "blk.19.attn_k.weight": "0bab5d9e9083492bfb05a5a3bb23b79c0e7b99ef6a6644817b4d57d5c453b8a5",
  "blk.19.attn_output.weight": "c99c551d70eafad0f7aea98fb6f9251635897168eb3895f76abf0d4ea3b3aa6f",
  "blk.19.attn_q.weight": "c98bde95627c3b54c9443813ca50b4e14f518319681db6bbf7b2332ba26e9a60",
  "blk.19.attn_v.weight": "ff3a490518cf64904db89ce0dc7d6eb89e870f1440e41883c6b55a221f82de84",
  "blk.20.ffn_gate.weight": "761f0e317229cafe9d3754048ab038a0a84e9a287b196ab65f633139f2d29aba",
  "blk.20.attn_k.weight": "45d13439b41066d282e8490a726785abf513605f46c79bd0c840f6419d27e790",
  "blk.20.attn_output.weight": "a3b958d84b4a097844179b7d55c18fd0e4f319cb15e918c6fde33b68de1bcac6",
  "blk.20.attn_q.weight": "127ab8e7d8c3f882874904196a02712bab42e6744fde45871b67350609d19f5e",
  "blk.20.attn_v.weight": "5f0ad2d14a8ae42dd3bbeccfb33295687a14055fa92c54bc946249373c1c9f17",
  "blk.20.attn_norm.weight": "77300b1755edc8c70089e0f45efa646056b9add7d8568b2324d2f3e62b64971a",
  "blk.20.ffn_down.weight": "ab93d0e075b42e9017b701a070d561e698050d90aac4b4b9919256fbe50c3204",
  "blk.20.ffn_up.weight": "4fd6628a07acc57a48d1ef83f81b7d7aa0bce569c1160a99d307284f8821322c",
  "blk.20.ffn_norm.weight": "2a9e46b9e48e8e55215de56592e1f189530037c1c94a1428e3d6f106c7f26fb2",
  "blk.21.attn_norm.weight": "4b3b5912c7bc61eb9da8e47d4651f896e85d9e59c4ecaa65df7acf3c21737298",
  "blk.21.ffn_down.weight": "7146f931663d93b8771cd84405cd4802ea6560d0729b0d6d44588203c095bc53",
  "blk.21.ffn_gate.weight": "b44ec5d64388fa40b90b3e9976d97a8b6800fa3b97584f32e64b03daffb8601f",
  "blk.21.ffn_up.weight": "0cf3643fd23c685e17062cd11e116e17ce57a405e5e78953bab94cd62fe48789",
  "blk.21.ffn_norm.weight": "4ef2cdb53da166df70b39f3e6b17af51848cfa5ea3c27ad6a1ae2a1bb1da1ce9",
  "blk.21.attn_k.weight": "5d40f32a706f670c19972b14176bf660d5b045e3637b110dbf8d7de4ff32101a",
  "blk.21.attn_output.weight": "18afaa916752ce16c9653ec0ec7e2fe60be55faa2aa5025d147be184adb75cac",
  "blk.21.attn_q.weight": "2621daa5f858931514a4b2f0fe8d81cf9b96f541e6af99bfa7539e9bde8e34ee",
  "blk.21.attn_v.weight": "63226dafc54c899bbce4aa49efceeedd8908e94faa613450fdda91f332b62864",
  "blk.22.attn_norm.weight": "cf3058daab4d2c04387e7d169d1553bb8e7358eea66285ec067703f6ce62043a",
  "blk.22.ffn_down.weight": "6a58d5fd220abdbac6cee7ba048abab794731af318f04982c2506df59413d0b3",
  "blk.22.ffn_gate.weight": "d5614535324b03c7b91727a903b2a72f8d07ad17f7aa8b61ea173cf9b895069e",
  "blk.22.ffn_up.weight": "ec20da3949566e93f66cabb67f8cd7eab399047ec6ebf5d43edfaf3669b82296",
  "blk.22.ffn_norm.weight": "84c82f38f53a649972a44466fc476bf764e064ce18de870291edc302f3700e28",
  "blk.22.attn_k.weight": "a3d2ecc37fde7c201176bb8abadf27f0d8ede9679a6034913e03d9db924fda12",
  "blk.22.attn_output.weight": "5a3b8bb433f43a387df43dd371bdf80ddfac986dfeaf38e9bac1d7a0ec6628de",
  "blk.22.attn_q.weight": "3a875cec661b4859f30a8fd2c866811184b25b68c9e36fe2663d299caf8b59c6",
  "blk.22.attn_v.weight": "8717a83b79035058dcfd3ef6f8e5b36e71d77379e5a239e1899eef8766fb7703",
  "blk.23.attn_norm.weight": "2b4a68a0a2f023dd646e4755c9bef17c2f631901154afd839edac7ac006ec99c",
  "blk.23.ffn_down.weight": "29499b1586c6fc4883c9b7a9c8cf388035146b5aecf90c5c4c8c8e082c71e7d7",
  "blk.23.ffn_gate.weight": "7d6554036d21c587b9b556428054f9c15cbef96d24b257f906fcef4ae38bd9c8",
  "blk.23.ffn_up.weight": "19761ecb288d6ebd44b681c4535661583b1e19dc29e96d0c007333cd8f00aacf",
  "blk.23.ffn_norm.weight": "37dc35500790a4ca33807b39cf7af65065e535dc25b9e94f3ed2759f61887ac9",
  "blk.23.attn_k.weight": "717547d00323817b0cb40a72ec5f8cf42ecd1f9e3e42715c2cc5e38f07fffffe",
  "blk.23.attn_output.weight": "a24786feb6a905fdf166d7500133757cbe494779d4ebcba9eb03046b319557df",
  "blk.23.attn_q.weight": "6a2c4a98f138b928d22136efa163562691d3b4ed526d52d46a2fa2694a8f3965",
  "blk.23.attn_v.weight": "c6e6081eb9c38a7fda023085957b460e9ea321e1fff408b38c2b58595c39979c",
  "blk.24.attn_norm.weight": "5e6283f891e538670425f3e244b08dc6f96f33dfa4aefa913f8eb17212421850",
  "blk.24.ffn_down.weight": "e09eb170f389deea0a4a1cbfdb52c12490768a2c60491b7bef8a4c445e2a08f5",
  "blk.24.ffn_gate.weight": "af29d815cf49a38fc2ebd0bf9b2dd9933d023a29f2d766981acb9a1b53f09117",
  "blk.24.ffn_up.weight": "36ccd9333426666de9d3088bd4dcdf5b624b09dca9e3a83a22fc0383f2d950fa",
  "blk.24.ffn_norm.weight": "a88e1692318826db6ac42582d182e51a3c698c655d0e21e04fa086318832d07b",
  "blk.24.attn_k.weight": "f7d61d6d1225289bcc502e3bbb0168b4584add0253218c1b77ac92ccef9a1c2e",
  "blk.24.attn_output.weight": "85a1363b3ccc87312094c2195022687c16b0dad7fafb9e80bb4ec474d53c29ac",
  "blk.24.attn_q.weight": "53482a2c008f42f4fad779ca323addc3712040149dfc12f782417756388a72bb",
  "blk.24.attn_v.weight": "67498272369af7dd10097c73b07f731b565cfc9a559e711cc0d526389e7b44e2",
  "blk.25.attn_norm.weight": "98dd617def5cb7825ee4833132ca2da2121245921585e1d9e36b93344adc321b",
  "blk.25.ffn_down.weight": "7fd477d6c50aed5f424a878dd284343379cffbee8a34c0b6e55100c8305fa13f",
  "blk.25.ffn_gate.weight": "f892c9806c8ec22e8aa746734ac9213428c534921cf161239e1d249fdb5d1ec0",
  "blk.25.ffn_up.weight": "528bed14c9bf9762f790525ee40412545221f4321d2a2323fa8e73c58b7643c5",
  "blk.25.ffn_norm.weight": "ca5831966672e7be6a578feeb631ec3570d3b5afe12860819ccb96e896ffc346",
  "blk.25.attn_k.weight": "610d3068cc9b20401f0c3a0efea39a279dd9f564fde19baf3403b2ec2319e4c4",
  "blk.25.attn_output.weight": "798aaf702e53b657265ac3b5e6caf3a0ab515bdadfeb1a3a156b4f3bfba76666",
  "blk.25.attn_q.weight": "8a7fa25248de83029fb97b51d036a01baebe31fcb4be121ab00dd8b7de209b10",
  "blk.25.attn_v.weight": "2a53d5e9f8a1218c66958c6388d3b37400a9af7956c785024ca44bfbc3c7d371",
  "blk.26.attn_norm.weight": "5f44fc043481eb0771f3e6d2420bcbcf73140afb9a9feb8eddb6575452acebee",
  "blk.26.ffn_down.weight": "944a60a409d0d5b6a851e33c69aca152454b691711a8b96f5bcc488772ab2833",
  "blk.26.ffn_gate.weight": "2a0ca4abb3de5593e6693d8be69b63d6d1a639855ac8332a75f520353f030c62",
  "blk.26.ffn_up.weight": "0b1df496163f9ac07bf89375d3eb441b51a81d41b47d769a04a61efc18dbe35b",
  "blk.26.ffn_norm.weight": "56b8dd046e9be6ea71f7efd80dbd14e7fb1aa020d3cd38e063275f3873fd12f8",
  "blk.26.attn_k.weight": "b1dabfabb970e6971c7ea6e53c63cf7ef56341e6a2edd9cf177785cad9af2f9a",
  "blk.26.attn_output.weight": "39532c7e836baad164a655fb97ec5114ea4da37ffba9fdea2684f6e4450e6f84",
  "blk.26.attn_q.weight": "8f48bf6aaa1252bc149e98af2be1777a5c0d2c3274c6d314171ea9344a41b604",
  "blk.26.attn_v.weight": "02fb145f7fd905133750e90571effacadddfd3f4966552dc59982ac3900ab8c4",
  "blk.27.attn_norm.weight": "654d168fc3cab716d91261f5719f180b7d697218401633b4878a759f1b5283f2",
  "blk.27.ffn_down.weight": "2823272bec3a1c12f02cc4cb24aa4031abd7e9dbe0b02676e2305b21671818f0",
  "blk.27.ffn_gate.weight": "b1a1d40cd02f97182cac17a79971d1934ee0daf3aa0bf11303568c636e208a64",
  "blk.27.ffn_up.weight": "ed62ec72a020d070e64eb7b50237b32213944727b5b2427f45d989f50df5fb2a",
  "blk.27.ffn_norm.weight": "c69649ac65d694b306a905dee8b03b89eec1ed188b1eaaf38f8e29d4b12e38a0",
  "blk.27.attn_k.weight": "cc57bbf413f1fd227128dc66efc8590c73634cbd6f96d01ec4878b5e7ca6a925",
  "blk.27.attn_output.weight": "cac407ad02361d53207b3c7e25ceab84dcb4347b8087055162e2efe14d11d84a",
  "blk.27.attn_q.weight": "0af18e07cee12015761c07c94407024f4f4d77d97bdb24163db0e16669e2cef3",
  "blk.27.attn_v.weight": "a1d08fbdfa40af773c5adcf93bd68b78a44ed144e3fc6bbeb8af02e937527eb6",
  "blk.28.attn_norm.weight": "f39a51f814512b040a1082143150e4a49ff730f85cef49d7f77fc79d83e91f40",
  "blk.28.ffn_down.weight": "74f29ed51055d1c1adb8f0660bbe538a27e016c65650f2d67efc6f1c84fa1b45",
  "blk.28.ffn_gate.weight": "ae48bb16487ded6781c60aafc0bf738fb4ae15729952906f247d216592ce249a",
  "blk.28.ffn_up.weight": "543009727718ac22f11ee4b17815f68ea6f15ba1f3e7ed5ecdb755cf6417565b",
  "blk.28.ffn_norm.weight": "b8f9e54c322079ff20a82b88948cdc2916c22c7db40b9a9ed6d3cbe89efb727e",
  "blk.28.attn_k.weight": "55d055ba653b728d6e784f9e013786fed07115c9fdf23367e3941386d5e77db8",
  "blk.28.attn_output.weight": "155101c03ddbf18f4fd0694bfc982f33c7bae25c9b087d6f5273c2bfbffcf2c9",
  "blk.28.attn_q.weight": "1ed19bfdd22e9c14eca014739982492e9516d411515a8585f65cf754d849e53f",
  "blk.28.attn_v.weight": "11ba854dd575c025d37256eee9041f6d1bd2b549a083d6409a09bfc1542913f3",
  "blk.29.attn_norm.weight": "02b0bf5e2fcefd11a153cc988c81ba672682e4844fcf6442423e21a0e10d566d",
  "blk.29.ffn_down.weight": "594bb692ec2779938721ff4748666ca8370e0e4fe85229503f616438b8884f5f",
  "blk.29.ffn_gate.weight": "8bedcf47e91dcb2cf4093de56b048ee411faab6ff472f89ab2c9c113a08e6967",
  "blk.29.ffn_up.weight": "e241a547b5fd6dfca8200b8141e21c1c487a96cbc4e5855f181a7ed1be91b642",
  "blk.29.ffn_norm.weight": "e63eba5e4c6b288bfd9f15e46e236086456c8b7f1f9c732c0b5de84962a2e7cc",
  "blk.29.attn_k.weight": "afe5979d5bcf211aebb526620f5974bcb0a2c39c8be71e815575c55d6385e3aa",
  "blk.29.attn_output.weight": "9c944ed44b124b014906fc240afd3b90aed56bbd9567f2eddfd5b7a685b3cb48",
  "blk.29.attn_q.weight": "e234e08e5c1bd9245a2edc8d63e9933b6b879f97c01392209cad4f55f05f3ada",
  "blk.29.attn_v.weight": "5cb8e3e5f954e775c5a5e4de7a9a62b17e9c6931bb0ff0e2f82c4126fd3e1a1c",
  "blk.30.attn_norm.weight": "a65483ee51a0b214144ec8a14f28ea5437586e9e12ebe342a57d1f8627ee12af",
  "blk.30.ffn_down.weight": "417959da77ceb33ead4271cbb9428b195196173a893c44e52880a7ec61b4856b",
  "blk.30.ffn_gate.weight": "a0d503ffcbe45dc927600bb98c9f6082487e65cb577ab545add400d666a87638",
  "blk.30.ffn_up.weight": "f8ab957b82ffcd10b21303cb5e866209b6fe95f827b1b94e9a949207952d12c0",
  "blk.30.ffn_norm.weight": "210c7ceb0514a9ef27b5d4d1b3aff6dde43f1af0345a050d71097940e0e73e03",
  "blk.30.attn_k.weight": "16861b9abcf5a3fe73c93d977ca45a1e6daa65be0fd85c2cff53486ce2033afa",
  "blk.30.attn_output.weight": "ca541fb2e57e2257118c35784845b0c731278af8db3036ac53d71aa1681fdbdc",
  "blk.30.attn_q.weight": "f7834917748e26bb456b945e230bc926c228e93696bc01fbc2b134bdeeac71a1",
  "blk.30.attn_v.weight": "9292783171dbe5eb689d17c9bda11e537f0e9b328fced6986c938d61ed590e81",
  "blk.31.ffn_gate.weight": "e4766a04bcd8f937ba883c6a144101e546747804ca66c35c97281d6ccb47b566",
  "blk.31.ffn_up.weight": "cc1e666116f7e6b06736db4aa4b81003c583f54f4d9200bfa48842249940e16a",
  "blk.31.attn_k.weight": "fc80b57557687504efae7d24265cb7dc39b8f826bb3d897a11783012dbedc44f",
  "blk.31.attn_output.weight": "215617f50a1f5d9b2250b82f3652b35a9e9aa0ad9ef2b485d73965a14b2b872a",
  "blk.31.attn_q.weight": "274b4f1dfb0bdec28632705677049fb3e327ce6d9e1f3baaad1560439039982f",
  "blk.31.attn_v.weight": "e641b8b926f9dfcbbf6b6da1c02555525ac4b1c306d96f20cfbba7d6662c4e56",
  "blk.31.attn_norm.weight": "b3243c361d4041ddb892ce6862dd5091f57d87357e3c67e177451b85d8baf34d",
  "blk.31.ffn_down.weight": "0a00cd3ecd5e91624a27f9e239b1de425d5ba3cfff82c256a11a4ad434abf3c2",
  "blk.31.ffn_norm.weight": "2a0d67ea2bb1303975712243f07273c92fce83baa11b1cd6d8e42e74ea3c810b",
  "output.weight": "768615f077fb797967844571c58b94d7c399d884d115be3ab4b0154504cae892",
  "output_norm.weight": "7cc5b7ce10e5082000fa00bfa68af8c7c5da218e59e2c41cf2f1499d40ca229e"
 }
--- a/convert/testdata/Meta-Llama-3.1-8B-Instruct.json
+++ b/convert/testdata/Meta-Llama-3.1-8B-Instruct.json
@@ -1,3 +0,0 @@
 {
  "rope_freqs.weight": "80fd5efb2f729381785b293a091a268cfeceb0079167f6ece9b07070e662b222"
 }
--- a/convert/testdata/Mistral-7B-Instruct-v0.2.json
+++ b/convert/testdata/Mistral-7B-Instruct-v0.2.json
@@ -1,313 +0,0 @@
 {
  "general.architecture": "llama",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "llama.block_count": "32",
  "llama.context_length": "32768",
  "llama.embedding_length": "4096",
  "llama.feed_forward_length": "14336",
  "llama.attention.head_count": "32",
  "llama.attention.head_count_kv": "8",
  "llama.attention.layer_norm_rms_epsilon": "1e-05",
  "llama.rope.dimension_count": "128",
  "tokenizer.ggml.model": "llama",
  "tokenizer.ggml.add_bos_token": "true",
  "tokenizer.ggml.add_eos_token": "false",
  "tokenizer.ggml.bos_token_id": "1",
  "tokenizer.ggml.eos_token_id": "2",
  "tokenizer.ggml.unknown_token_id": "0",
  "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
  "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
  "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
  "token_embd.weight": "cde834ccac5e94324b25cb81b02d27312cac0c551b55a7e1d555d90bf6cb6e81",
  "blk.0.attn_k.weight": "458bfdd9715c66e017c2447b1ed3c582963a3111479314e664faad8c914f42be",
  "blk.0.attn_norm.weight": "e1fd60b95f713bae7b7e3ca933c64ae6c9cd1e8d808000204bbfdc19f0ba635b",
  "blk.0.attn_output.weight": "df13b6a157d9d4f96c53b012b3b9bcd207d0c94144cbd22ae3ec13bb07d6c373",
  "blk.0.attn_q.weight": "13b4126b4245bf06c915a93317c42b8174e05053535ec99dc576541e4cec7c25",
  "blk.0.attn_v.weight": "5b1781d3a341214511b27eb4e268674ea3ea829dbdf8ae5a6bb89b3c0b33fafd",
  "blk.0.ffn_down.weight": "49186f5d8148d316b07458841d13a2e66587f4af69b776188a809591ed9c070d",
  "blk.0.ffn_gate.weight": "4397e30ece09136f00f4ff84ff49e5241b765a374deb8c5a12e897e2bf73473e",
  "blk.0.ffn_norm.weight": "43260589aac3850a779bca3f9649f793bbfbe5db538361cb743b3830217f8287",
  "blk.0.ffn_up.weight": "fd7ac918240a07566f6967527ffca58fcf433a30b78fdd6d84b2136d4ebd9987",
  "blk.1.attn_k.weight": "209839566c7d235bdc20565a4766378b6ee8553133a5a3315abe8a85baa80712",
  "blk.1.attn_norm.weight": "58c52986f7c69784ba327cb7f350923420782bee17fa39b1fbd13839d4005357",
  "blk.1.attn_output.weight": "5067cc628449682665dfcf59b16e58fe2a9d2a81cb099f0fcd42f4f8670c6740",
  "blk.1.attn_q.weight": "f410f9f0dd5edc09401af597d02e2a4c727f1502ec3ec3898321617b36c6df6b",
  "blk.1.attn_v.weight": "d40fa49e07c102c0644e130e7909eaa93ed0d54e2edddc0759e721d58a4e4f5e",
  "blk.1.ffn_down.weight": "594b1eff6ed4defbdd819fabbe2d48764984f08878a860bdb808511d5a25b8db",
  "blk.1.ffn_gate.weight": "4cda97541e388a5bb607ce4cc8b3db1da7045830a630e7ba4d17807befcff346",
  "blk.1.ffn_norm.weight": "66c13d7481be65b97aa474735ddc9674f33d512ddda76fa6fb45c7464b09f1ed",
  "blk.1.ffn_up.weight": "1adc6de288ba4cc1237833ca8b4eb81107149842e38bc452e18e5cfe284338a2",
  "blk.2.attn_k.weight": "5420423559f236ab22d85a00849f31e0cc6e9c7dd879de724393d8cd2b379153",
  "blk.2.attn_norm.weight": "495fe1ab40cc52aa054ddd4f0c2d2790f4326c8d103296b1b38f3b1060db2a24",
  "blk.2.attn_output.weight": "ccb83e7085381f558bfd65588c525ad2671feddcbc3887afb4038ad9c7aac348",
  "blk.2.attn_q.weight": "2e8f77478392bc93c2a391f2e0f4a173a952bbab88a7aca099c6ee909726409a",
  "blk.2.attn_v.weight": "d64512590f3b7ebbb9e77c2eb97fbda90b00d45c944f2b174f03a2cb11007567",
  "blk.2.ffn_down.weight": "1de5084a05dcaa6b1bd926e83517dbe9ebe7fde79235fe56018b3028b1aa6397",
  "blk.2.ffn_gate.weight": "cbea526b557f49aad8c976973cf367fcd12175b900f551984f498b9e07e4b7fd",
  "blk.2.ffn_norm.weight": "530aa49b10c7eae08899d143409240deb95dae4e1d5bf78cea3b26393cff3ba1",
  "blk.2.ffn_up.weight": "13a5fc19b96b4dcc1e9bd01998c8272ebe52034c1933ed123a506b711fae9a5c",
  "blk.3.attn_k.weight": "1913b63a73305941d8cdc472e7f101c633d3357a78602eac0a4b49a744261075",
  "blk.3.attn_norm.weight": "9c11bed5ab41f4adbfdae4ead65b525c8f19443e656a8c61ba412a4e1ad1193b",
  "blk.3.attn_output.weight": "bb0b42c1d34779c5943272ed71f1dbb31ad8edd75f8bcd5c868f88505ac3a610",
  "blk.3.attn_q.weight": "3461a1fe4e49f5319ea047cae98ccdb46528a3ec23831183fe87610b48c94948",
  "blk.3.attn_v.weight": "82aa30be6a61526a41fb79bb28a2617416f5909f0477aa9e95e16be9370fcb38",
  "blk.3.ffn_down.weight": "68521011ae03f5e3b0966127111afa8ee9f2eaeeef8d3a0b86b633e0332e9fbf",
  "blk.3.ffn_gate.weight": "1e89e26338fd364bb679695968c65106382f15ad55c95cbb5ec9bdfeb766f432",
  "blk.3.ffn_norm.weight": "c81932529a5a8c417c27b888dbe95fff8b447c2ea5f6f560444ec5d50b93832c",
  "blk.3.ffn_up.weight": "305021735afd8669afefd713f56137248d5e817e60471a112ad06b7fa07ffe88",
  "blk.4.attn_k.weight": "cc26ba5c5c28082a79e6abfe61186029e80b145252ca6a7924c437f0bcf2d51b",
  "blk.4.attn_norm.weight": "302d251fdcc91f7468cf33f80b49484251d8917d7018ad264ab3a85c8ecf9ddd",
  "blk.4.attn_output.weight": "a012f5bee3520cd4ce51f0076c132ebc3653309f304032ad051aa308f55f36de",
  "blk.4.attn_q.weight": "3c8d607e447f5ef21e73af71e3c0d32fae16f91f31faae34ff06912cf9cb68fa",
  "blk.4.attn_v.weight": "49f6c81a634ce46d71c2350206ecbd231b1732af96e4e4e67693c41a07e007d8",
  "blk.4.ffn_down.weight": "e89504f311a4a34dc819a67b761022f14d71c43df3ead4f892c87aaa8e9f0adf",
  "blk.4.ffn_gate.weight": "18b22f079a2fbaefe3572eec61fdcd996fd747724e2f0ff4f08cfcb43eb7bfb6",
  "blk.4.ffn_norm.weight": "22415a492c168a0878912b05c854a631228b01c3ea8842e1d75989ec46c18a65",
  "blk.4.ffn_up.weight": "f57379eae2874d8853f14ddf0f0fcc4ff1338574d5ed5d7e88331d5fb84f5642",
  "blk.5.attn_k.weight": "d627af853c40bddf9762ce3988008c1ff17f2686fa8f73a0b5da38010147c316",
  "blk.5.attn_norm.weight": "9ce01092c7f7f1c3ef72d6b794da12d77aa1f6a24fb96ba1b9bd5a0bcc3e2443",
  "blk.5.attn_output.weight": "0388da8064c4b6b795ce2d8079e8a36535e82b2c9cf794e38ce8ae460aae726d",
  "blk.5.attn_q.weight": "039b7ce1c909761fdf475c06cf14cabe5a90199282c89e4dcf460e95a4b6275d",
  "blk.5.attn_v.weight": "c47bfd8d2496bdb6e00e03b903e15fd0ee806a515094ec257e43cc433147ab7e",
  "blk.5.ffn_down.weight": "1d62e6708974bae318cbf00a8bf621d9ba0537e549ce4710a536520a8d14168e",
  "blk.5.ffn_gate.weight": "8b42b1b11c92db19985094cbb50434e3a7c9cfea71ee6f21ea79eae7c49284a5",
  "blk.5.ffn_norm.weight": "e0bc520f1505e687ec391d632a381d38d8ebcdec19f614a11a2000ab573e8b7b",
  "blk.5.ffn_up.weight": "8cdcd17d2ea89bb9ab902dbc6bf3f827fa4ee029c6bf19eecbdefd146d8b6f2f",
  "blk.6.attn_k.weight": "5dc6bcff89794d1756bf57ec665b58622d9352130d31082a6c66e1a079f99932",
  "blk.6.attn_norm.weight": "13b26008abe0f119b5104b9d78ebd5e797d3cdd68122b93d73a3b4831a54d085",
  "blk.6.attn_output.weight": "f5a49917ea70c3fb311ccfffbfafa63ab18416a5d55e5429b70ce8bfba57c075",
  "blk.6.attn_q.weight": "d9c2f652c87dbd09ec3822e12876648fa32e86553ac25afab723b1cd9f8cef90",
  "blk.6.attn_v.weight": "5ecc5fe67609a35151011cb526f45c56fc0a999079ae0ff37c755ca03c68c555",
  "blk.6.ffn_down.weight": "0ec125ae0ecb2d9277fdb1b04f17efee94e37d0ae37311057c212ca2db3fe6d1",
  "blk.6.ffn_gate.weight": "fa4d6d38355ee8aa3b80b476d65ae7e343c9b7770d7b097fc848ee8a6e091d1f",
  "blk.6.ffn_norm.weight": "30e8f7defc627532e1739dc76d31223d45767391a431f925b63dabe334b0f392",
  "blk.6.ffn_up.weight": "6b97cc32b290fa9087806b5d65aa6dc1760737730c8c71394cc4f30c2157f9ab",
  "blk.7.attn_k.weight": "0231cb127cb7c3714cd72b8f39343891d7715a9bab2237ade9e7bc5f4ed2e68a",
  "blk.7.attn_norm.weight": "7c3187f07eead7d219d98ab2daf87905e88d5f1ace109b6f5fa55dce3914981f",
  "blk.7.attn_output.weight": "2f30ad972c284ae7c8eb0482053433495ebe8fe9c5ee2c28b4bc4ed1f33050fe",
  "blk.7.attn_q.weight": "3a2b4b8d61cc9956d304fa9f82a9e65b4bb9fda2196670b16df7e0d8c43eff2c",
  "blk.7.attn_v.weight": "d2aab97d0dcf0f61dd2f32848f7a8a99c423a4948a660a660a03a546972b8db8",
  "blk.7.ffn_down.weight": "2270d520468c5549cd30023ff9c452a277058310104c4239a616373fc5a94387",
  "blk.7.ffn_gate.weight": "4134a3ef71b3eac8f76b6f1a2e58625b3bae48081f175994bc3ed7d8b0d4f2d0",
  "blk.7.ffn_norm.weight": "42df4abd4b8769b16f3930068f96960af1b061f1aeb7505384f272233b2badff",
  "blk.7.ffn_up.weight": "c920549054ec16ff8c73a72f5d837cf4e11885e44db57c1c1c584c18fbd7a9a5",
  "blk.8.attn_k.weight": "01c609bd3bf31ce65688f1f640ee413740e821330134d4ed1877a3065d1527d5",
  "blk.8.attn_norm.weight": "48857411f769b00290f4e4f2e593e092781fdc2503f80c1e3eeda1b85a20f74d",
  "blk.8.attn_output.weight": "90fb273f8df83744554bd59236515c16c5a5a698ca3fbedc17cc89ddcee354ff",
  "blk.8.attn_q.weight": "ade617ac4653c7f00593dbb51837a468afef20a14eaab3780fb96ac3d6714369",
  "blk.8.attn_v.weight": "c2c37496494864fee5c527d1fe1f88529d31c73f9cbd02ef9b2e9b23611ea50f",
  "blk.8.ffn_down.weight": "2da58572e9ad79087c03cbb0c23c9ef69f93ec221fd5fe4ed92fb93871d23ffa",
  "blk.8.ffn_gate.weight": "4483294e628edaa4901708e73e92c917bdd93b780fa01aa74aed57166f2bbf0a",
  "blk.8.ffn_norm.weight": "c0cbb7a4f8123b62f0c4652a687f3b394802bc32870dc446eefb709e42043a7f",
  "blk.8.ffn_up.weight": "9eaf8a2060cb9224cd585997cd671866c4051ad885c2c6d9fdc7056c2a5c0d89",
  "blk.9.attn_k.weight": "5dd36c45fbc9c50fd35c36cd75576288506971eac5c5311d4f5c16ef60099645",
  "blk.9.attn_norm.weight": "3c8ca64f2f75ed7c8fc1da010c23be787648139a96ca0ef3ad10be7b14942b8d",
  "blk.9.attn_output.weight": "6277e1f833024f53c409be919ec76d34464a78b278c8f9dbf79e777746e3b995",
  "blk.9.attn_q.weight": "87352b70d9e328c2d51d59090cf5ea5a046529864a890d0bc8986447a0a5c006",
  "blk.9.attn_v.weight": "2efdf01161d7a82a9117cc2d87d37dba5ffefcf730781cb94fcc95130e48ff9e",
  "blk.9.ffn_down.weight": "e7658a2ca984961c7ace16acb679387bedb1fef656b5330bbbf588db19673a75",
  "blk.9.ffn_gate.weight": "773cd330d4ff5d64be8af00adf2e2722fae4e33fc26bb9d03549f6f4b3b0fe57",
  "blk.9.ffn_norm.weight": "c8b86cd5c43b332f72060b807091c33a258e5dac01358ff4733b916cd34c9c97",
  "blk.9.ffn_up.weight": "d8cc3bcff18bd46124ba2aa7caacc71220b44eeef6fccb993b4c6cb53e8f2c3a",
  "blk.10.attn_k.weight": "964bdf3b4e77b915a216f750ff7b0f2eb1dd6bfa071358aef21010b90111044d",
  "blk.10.attn_norm.weight": "59ed411d91d14775764eb514acb0895a75a10cbbfbc1c15d453bc50f8046cb7f",
  "blk.10.attn_output.weight": "4d35a2a44cfe4ac0a83fd3ab0dcf1f5a0bf54cdb3b7be9fc353ed32c8a3eb81c",
  "blk.10.attn_q.weight": "defff5339450dd881ac352f5c459293f39e07b9619ebd10ed632d79a3f310278",
  "blk.10.attn_v.weight": "b9803e8d6a54acea58f662d4c0a5c8ebdf986676de7dfe12d4b288937881ce93",
  "blk.10.ffn_down.weight": "eba856be64e4be20b92fb4639a783454dd92427250759df92a337e39f1971c08",
  "blk.10.ffn_gate.weight": "2d5c509b066584db4de3632b01234e86edcde35409c5ebce18957dc80fe465e3",
  "blk.10.ffn_norm.weight": "ecb9a8679945ff0273856624ce435dd250ffe5a440ea0861a5c84f0e4c44d2c6",
  "blk.10.ffn_up.weight": "e76ec7e993f399af02958778c643aa78368e3067846714165eb5aba9d5f547f5",
  "blk.11.attn_k.weight": "29c6d1f34bd3ba2f0904e57b32a5bf8dcb2834d439159a33edf234ce0b775677",
  "blk.11.attn_norm.weight": "b5817b275149cd2abe18a6a10e19854605fc58fd364666744362ceee8cfe49f4",
  "blk.11.attn_output.weight": "1e05653220e237cbe0cc770033e183c9a0eed5680510997409b16186c6691950",
  "blk.11.attn_q.weight": "03db725ae669151e4d536e50285b3b047ad097f52475df208ed3e790e31a44be",
  "blk.11.attn_v.weight": "27cdf1d4e971326c451a4615a0b79a8c7fe9508f9b76c0d52fa01971fc7eb403",
  "blk.11.ffn_down.weight": "176938cd7c2966094f614cace8ba568b10532e45a0d438f80eccd19b6c2a7f87",
  "blk.11.ffn_gate.weight": "9782339915dd6fa70013628a01524ee1d01ad8beab04068da7ac6a5ee7603a60",
  "blk.11.ffn_norm.weight": "8245f6391e3be97811c0ff27f0d8f484ecc82a468a837c893f059745bfcd95eb",
  "blk.11.ffn_up.weight": "15616ddde096d0d25e906375c548b6de4bd5576d1f6b68eefdc29f14e183af42",
  "blk.12.attn_k.weight": "66dd21604993edd1b1fe547bcaa06f5bb7e31c9204902d147a227e4badf7feec",
  "blk.12.attn_norm.weight": "23a69f85dd8a0904b9839cc5d0afcda299b74e82ae2642106224a1c820f2b761",
  "blk.12.attn_output.weight": "4a98d132e376beb274a39d4ea9b6a1b870ad5c66625439d7ff6f45c229c3ca04",
  "blk.12.attn_q.weight": "1c6c309d63afcfde32fe37257e300a78e25d01117e33490801107c0e75d1ea66",
  "blk.12.attn_v.weight": "723d9e4ebe4e2b1974afa01d8f512b52933698fa36717dd47b37b07760c50a10",
  "blk.12.ffn_down.weight": "00e0fb09e1f1fbbf3803f1dee373eaae7a93756b6e13063ab77f9927bc6f996a",
  "blk.12.ffn_gate.weight": "89159f7f97aefb1e100107e3ac2d694e1008ad873f79bb953d60c2c1bb22724d",
  "blk.12.ffn_norm.weight": "5f70aebd0e43a39d6373d8658cc670c13aadd7818831d3d84f761d5f688442f0",
  "blk.12.ffn_up.weight": "faec21b446f061eb4dca561a3180712724347b77a71eb312e7afe9be9e89fa04",
  "blk.13.attn_k.weight": "3d440825d19eac3b1753b34d94fee2b3a3cb6636c10b2703ffcf688d3c1eded3",
  "blk.13.attn_norm.weight": "47b575e57e410738ad13fd3c74bb49c06b3d31030910834ece509cd1a5c6d9be",
  "blk.13.attn_output.weight": "05436d8e613f4475741c1798a7c371b53d61b229507fa04fe23c504ba1f0e12a",
  "blk.13.attn_q.weight": "002b5024ce520da41256e3ded5cdc60e5ae07ad9b202cb19d76ab511efd02b1b",
  "blk.13.attn_v.weight": "c1f2d6763587c50312cee0d7140fa2c7ee326f5b172bc99b2d8946e08329cabd",
  "blk.13.ffn_down.weight": "b5c4e0d8a3ff96cd76a135e415b89f02d28c28f7f3c16a36af31ef0ab8773da5",
  "blk.13.ffn_gate.weight": "ae06e9e3d2e1f64c7ad23a4009dc904c2eccd7241f9f91c4974ab2504f116be0",
  "blk.13.ffn_norm.weight": "e44a22321bcbcb4a3c345b504e939e8071370f54a8cd702fabdb40b97e0d7683",
  "blk.13.ffn_up.weight": "7e6f366d538e21ad431264b12c011892d0be9dfe4c4da9f730af677f920641ba",
  "blk.14.attn_k.weight": "95492d6417952ec24b2cab87bceb750fc7e95ac6b1944fc328a3852d980164be",
  "blk.14.attn_norm.weight": "6b7b09e1c51addcdbb160ea59edf032531421c520ec5645fe1ff9ca4180cef54",
  "blk.14.attn_output.weight": "75887474e4d72c218e6ab0f69f1bf3ec3dc414d51b36fc59df00cdb23421bb6a",
  "blk.14.attn_q.weight": "940e33f76e48c21215d19e8a21234c8246d4d084381a7d9806aecb24b071d5bd",
  "blk.14.attn_v.weight": "c58601cf5a9833f80f7f9a5b2656e8eab5eb133211446ebd48f8be15fed4ebb9",
  "blk.14.ffn_down.weight": "f9f886e7f9b2a54d717b08947a25a0a93e8c2a5b8bcd5a907c06817c8ee3ac11",
  "blk.14.ffn_gate.weight": "727ed0ee68594a3f59d704ed3240b6929f083b9c36650fb848d182315737245c",
  "blk.14.ffn_norm.weight": "bd2471008ff1b2bae9aa26bea019393fb2bbc5b9493b8cec3ebd2c280fca24ca",
  "blk.14.ffn_up.weight": "b006446769f51e4f93b503c4727deae897bc1fc7f4fad49f85024b63c4548d38",
  "blk.15.attn_k.weight": "23bb70f9035356624039547a603e46be7d1e4403616eafc2451cc09c5373d522",
  "blk.15.attn_norm.weight": "718cb371ca052eeb3bfac6ac506abb887df125271821fd171797a7f2d8dd6313",
  "blk.15.attn_output.weight": "c76a2695a204b43a8e5acfa5720590b5d449a9ad9e082cbe3e80fab5903ea16a",
  "blk.15.attn_q.weight": "2b3e4037b9e91bdd26d6e8d904cf39f948192dcf09bb6445cb55ca058d4f4626",
  "blk.15.attn_v.weight": "7c15e89b6acafc8619e86aa9d412f5893ab17843ff2cfaf40eea9637b24910c6",
  "blk.15.ffn_down.weight": "e16fd4bdc6d1c1209c6b633454df4992870c8cefb2cb0e8c92a7e489e9fb5d19",
  "blk.15.ffn_gate.weight": "95a46bea366c260337c537fde06b4cbeaeec52484a69c3390bb1d178eb0525c9",
  "blk.15.ffn_norm.weight": "37730293f704da265dc6d1896b3be00c39c0a41dab07f573af39dc30a481d623",
  "blk.15.ffn_up.weight": "ba74a199da2d0875d7410824238c4ffafbda3993568812284a72b8800df91f15",
  "blk.16.attn_k.weight": "f58f79a2a91c9a763adefce0c53a71eb5ce6bd8442f4af554b04b58083bff27e",
  "blk.16.attn_norm.weight": "0c16e41b95e81978e0e0e3b338e2afe2d297426578cacee94de15df74e94eaad",
  "blk.16.attn_output.weight": "ead22fc337514e4add49aee19720008558e52090466866e849671953a1fccba4",
  "blk.16.attn_q.weight": "ef59c4e8fe8918c1add43d7e9c6fb3ef799dd3e1bdd731ec7b6a4a6f97c86048",
  "blk.16.attn_v.weight": "902e6b84c2b64241470b13e6f412f859f66b4b223bcfb9c15d5cb1106b07ef3b",
  "blk.16.ffn_down.weight": "2ad6e9eb4d8372c32a554395d460d17cfb02d6dbcb757cc962b6bfa36db4f5ee",
  "blk.16.ffn_gate.weight": "825b2d50fcce3dbe6a5d8d8a50a95466f83ca4a10343efe67894c20b4628fb15",
  "blk.16.ffn_norm.weight": "3bf6ac90befb0e17e077c8ea9454a8485a30f89f2d761ec7751b60c90aed1af9",
  "blk.16.ffn_up.weight": "9fbdd08739b32411f5ab0252174d386bab19eb0b17884862f760429b7d41d78c",
  "blk.17.attn_k.weight": "4033398718bf3674830ed1b73071ed8482b6dd4ef27f31a6c5fbb998321b6c07",
  "blk.17.attn_norm.weight": "714f2e8ac9592966a0f1c02ee979eee8f84586405b992e8ee9543e840199ffa1",
  "blk.17.attn_output.weight": "b6bbb618597d767b8f535117be68f92911e4a71d4eb4d8b5d943444151445ece",
  "blk.17.attn_q.weight": "b84a0dc00ceb515faa2628125dcec502eed923077b21cfe900a4ff16c2e5f9ed",
  "blk.17.attn_v.weight": "4387c7d6a17da9cc7a6bca8f4a75618b20407d570792056283a8e93b6ec65f18",
  "blk.17.ffn_down.weight": "47db95c6f1e12b399c3eaf9ddba261782dd71173dd163b52af96541cf87b5196",
  "blk.17.ffn_gate.weight": "59abaded0aedfd12f01df81f7a811e84db6a227f51b60abe9a247ca726e87392",
  "blk.17.ffn_norm.weight": "b7e86445be5c7b722e01ddb98d5c7527ca86cb827ce0354f2c269e0f2558751e",
  "blk.17.ffn_up.weight": "8e31c293bac649d2f60da4b3fc4a3acdce1111ec6058d8805eeeb242443011de",
  "blk.18.attn_k.weight": "5ce762ab7b032511c131df81093b587871718c7097f79d8e07d707571f18a47b",
  "blk.18.attn_norm.weight": "1f52cdc7af1f4dc1f0ef6ad1ad02e18cda32133654e57cfa9c72ada9c0b1d995",
  "blk.18.attn_output.weight": "6486957f30bf8a88516e25772c6650f98b13923f490a2865a8752e36439d1cfa",
  "blk.18.attn_q.weight": "93621c8abf69d2ca29c5207180eb628fb2b544d89de6c4a7fb0699be95534899",
  "blk.18.attn_v.weight": "11604083b5a74828ac1d226af015ad5dc0215a1fdca44fa7131c2163c02d8156",
  "blk.18.ffn_down.weight": "8f9997feb94385f106915df810239c9753b31efda2bf14bdf18a9fbbeec8233d",
  "blk.18.ffn_gate.weight": "427c213b3a4e94af703429daf2f65766f70424d8230c123e7e712a18bceb5ecb",
  "blk.18.ffn_norm.weight": "c45d305c4ea6a54013ba112f12dafaade064a32cf01317373464a3618d8ba44a",
  "blk.18.ffn_up.weight": "a2811f2e73ac9eb9cce91a21a454e84e230a155244e2cd73f2c12aad3c9b8cfd",
  "blk.19.attn_k.weight": "b2daed159925eac58c291e2f1e2000beed21002b03c9e1bc7e7a52e22240666c",
  "blk.19.attn_norm.weight": "6307306ede2ab5bffa1bcac3f8b139354678c0376b1d9f5530c1fcb4268cfeb4",
  "blk.19.attn_output.weight": "ebb98218b2a9c84d3fb6baeb02c5df264b7ab80d994d1098ba1cd47aa398effe",
  "blk.19.attn_q.weight": "4f10df2ad09177e7528e9456039b670d07db22940a49417101b725d239c16724",
  "blk.19.attn_v.weight": "30f1efc5114badaeaafa91fa466dc7fa14b1616db433c6f563ab851f7333a5dd",
  "blk.19.ffn_down.weight": "be5ec7fe6b48855cd0015b0e430d1b70c620de87a7ff188c7c1afef546d7b6bd",
  "blk.19.ffn_gate.weight": "10dffea4213881f8a9b583ee0fd370e033756d32255ed15053f794375b9400e9",
  "blk.19.ffn_norm.weight": "e75cd24ade45dca78fdb0cbcaaa2d4a17d83a5a73dcc94ce0ec2d68fbdb2a881",
  "blk.19.ffn_up.weight": "63e81bdb951410ffa81bcfba1b94a679ec9ebae59cd1623ce2651ed5d4c78bfd",
  "blk.20.attn_k.weight": "c2fc5ad39e9bdd45e73c6e54aecc474388d944c4be1ee1921b7fcd035bad02e0",
  "blk.20.attn_norm.weight": "aaa9169171937bdce20c1f057e94e9252f221cabacf1ced12e11b9586f23d308",
  "blk.20.attn_output.weight": "a9f4fb496e4bc053e3f6cf2e72e22d4cd2b545ef6c32f7e782c2ef6ebcc21d4b",
  "blk.20.attn_q.weight": "5a07ac619ed251494170b213921ef3fcc4c2712839da262516d9d5b8ea1ff185",
  "blk.20.attn_v.weight": "d6689473105d241eacb17f09f06000ee237336916cf5ec4f48271c5b41bcb8e7",
  "blk.20.ffn_down.weight": "74be38db51df736f26ede7c6b52ea787e385f181cb66231e2cced4556a25c9b8",
  "blk.20.ffn_gate.weight": "ea91e06dc3d051c0ba0243b5a8bb40edbf254eadfb54fda7247e05cfdd88cbe2",
  "blk.20.ffn_norm.weight": "5fbd357b3d6f44a7a91e8a4fc246b24303891b7957e0f3c32818ae5dc16ddd8d",
  "blk.20.ffn_up.weight": "fe3290333e056af4ed12942ac72aeba97a6b562e2db05e79cd35dd07eab5b101",
  "blk.21.attn_k.weight": "201ec6ee95f06ea5eb80fe86fd07bd016d3ae9ab6abd25d631834414e14a010e",
  "blk.21.attn_norm.weight": "ea8154f93e06485828475a00b98cc397ac84768dd70e06ecc0c075b5712d7276",
  "blk.21.attn_output.weight": "9f8af74d531478fd304723fd8e4e01578db598441b80dc7c960cb801dbbc501e",
  "blk.21.attn_q.weight": "277de9953a8d3cff894ffd06c15ad0ee1407e319df0c1a693d4f45fa9c74ac7f",
  "blk.21.attn_v.weight": "6bfdc16cfb898909b7788ddd39dd04b928f31d6732772195d53c558004638dca",
  "blk.21.ffn_down.weight": "173877146cb94801157796ee9e5eecf3f46acb3b5e797f90b83a3fc22395eb30",
  "blk.21.ffn_gate.weight": "53146713e2ca1be80496024077a028f6b6d749b02e71003c349e113b436f48f4",
  "blk.21.ffn_norm.weight": "b28b97e18ab20a5c553ba422f7d7f6014f5902f1d62a69abd20d9fe19a5f9462",
  "blk.21.ffn_up.weight": "5c39d0ac4d602b8ec8909dade93b2efcd6b6d9d84a19b252d76bb66dcfaab87c",
  "blk.22.attn_k.weight": "01f26272c82917a87a3ccf922fa1d521a952b05de878241b7efe3525b617ac87",
  "blk.22.attn_norm.weight": "5ffc96249d8873b506e9eb7158bdfd07fa1429e53c1951430ca7505d25f11c76",
  "blk.22.attn_output.weight": "9c2201569358f720244b9c9497e4da02585a167b1414c8a506b85ad75ba990d0",
  "blk.22.attn_q.weight": "906036eb4ddf027f6d920f9356a6a2a5e529b96f4e1231a0496d46b4434a5842",
  "blk.22.attn_v.weight": "30ede8b0d166003a4b8a81fc99437f557719fc36e5c4dd510c9f161f36a47e73",
  "blk.22.ffn_down.weight": "d04c164beabab30e1837b843e18852260efccfbb9d96a34ddd816e6fb3ba23c5",
  "blk.22.ffn_gate.weight": "19c889db6b19179f0a62d5981a1506592c65de83760d67afbe00d202202750a8",
  "blk.22.ffn_norm.weight": "4885eff2d851b32dbd306bd632c725857e6d164f0fa8b3d5857e572e6ef98ee9",
  "blk.22.ffn_up.weight": "365594d8db8e95cf87cc33ac23947942dc326110175cc8ec5a07b5c7059089a7",
  "blk.23.attn_k.weight": "badfea1569da0fc6ab817c5727ca3a69b07d9cfd622fb8be5e66678d5b3f7ae2",
  "blk.23.attn_norm.weight": "8968f78a379ac3ca5458b4ed4251e8d9112aca6d6dd1ef6440b4bb0b380375a4",
  "blk.23.attn_output.weight": "93e43393c03956287b1fe31e9735ff1cfe84f4ae56b83dbaebe96275e4e11831",
  "blk.23.attn_q.weight": "aaff73c725a8700ae66bf26ac8869dfe96738eff23a8ff340de2ab53400a5795",
  "blk.23.attn_v.weight": "3a86a8dcf14a746ed1411f5a7e634064bc4dfd6511c24cfeccfb2c9ebb6b4101",
  "blk.23.ffn_down.weight": "d4da6f37bd7ef69bb203f7b0dd59f50bce37432c70627e6cf274ab81548af5cf",
  "blk.23.ffn_gate.weight": "5b6072936c4a693923bb4e3d1473fd45545cb02fc07799aca458ef0449a04061",
  "blk.23.ffn_norm.weight": "cd76e37025f84773180298ddb15e0d4ba9cfc7d832e19c791049daa47c6d9c10",
  "blk.23.ffn_up.weight": "cde43b99b83124a13b2e4753d12674b3a61dfb34c04703007ced3e8e2aee1801",
  "blk.24.attn_k.weight": "457379edc4cce4cbbe107385079019bc922264fdfc7bd1d1ae84343a81460c66",
  "blk.24.attn_norm.weight": "0ce0dfab2edeede5da419fa7833db78e36222cf25c358d08f3ec664310f031fb",
  "blk.24.attn_output.weight": "0cf91c2fd40c204d2fd4b9c85b69281e5ad4ea8442972fcd44b5fc8e835ffdf8",
  "blk.24.attn_q.weight": "87ede30c09eafec6a4e6285674c1bc4637140b168b2da4ed34f36fdb6e176cc9",
  "blk.24.attn_v.weight": "4c0b078b2798ca35d6d2c2258fe499820d2bc88700654ba4016e4b028f563590",
  "blk.24.ffn_down.weight": "cdb8540c32b1ab988f984484928d39f6841f2131c1cebe90ad9456737fccbcaf",
  "blk.24.ffn_gate.weight": "da2e0e913648b5526bd2bbb344038dd067639343aed3b413662b064b0db7556e",
  "blk.24.ffn_norm.weight": "8940bd781c610d75eb2be63cfc8d869a3af05e53c963dc7fd4c6f653df5a80ab",
  "blk.24.ffn_up.weight": "90cbac2a58801abe11ed6c24560aa4acb949f79429f2aa8ff129ac05868bb87d",
  "blk.25.attn_k.weight": "90607131e36998e990ce718ad05cbecd1bcaed010931401ce6baa3b0d93ebce6",
  "blk.25.attn_norm.weight": "fbf679c85656c04a6cf8fedd5412c1ace22960e6c2d47f2d43997827811fbb97",
  "blk.25.attn_output.weight": "08412724ee7a2086514406e6f68fb9f622e10bac25b0c373b294709f4b09bd2b",
  "blk.25.attn_q.weight": "9c1238e98a2747654a0d4371d3e7ea8b979867f609dc42482544f25591e85c7f",
  "blk.25.attn_v.weight": "a57796a535c6cb09581cbafd6a91dc14adc8cca2a2465a7ffd0aec546cd84074",
  "blk.25.ffn_down.weight": "f7e34e8a6391b480da08b52640613ccadce268373934b409759743a1735b74d6",
  "blk.25.ffn_gate.weight": "b8d0b2f4612678b5ce42bd4a683f8024514b75fb5ebf6b22c600811e95582ee4",
  "blk.25.ffn_norm.weight": "cde1fdba2369d315f3c6940a997c471ec891924e642505db580d732763bd7b75",
  "blk.25.ffn_up.weight": "72e700c32ac8b9c47559c2222e45888a480b527ea512075423c5dc01678e2bb3",
  "blk.26.attn_k.weight": "6ac83b3414ae75bf3a9055c32e49d2c40fe611ab21f8444f03d2f465d18122c9",
  "blk.26.attn_norm.weight": "55f9d6dc9d75973dc75136ecb9d991b4398097ac133070873fb96ec76a6f60bc",
  "blk.26.attn_output.weight": "ebc4fcbd15b33263e50ed2ad45740867cce15bc90e1216623babcb1820734509",
  "blk.26.attn_q.weight": "080f057521073e412936fe3fee64fd574c8128fa4a148b879d3e598fe4954581",
  "blk.26.attn_v.weight": "0fa2830d6746487ac91b243716e4302361f891e4e008eddd14abec47c7809d5e",
  "blk.26.ffn_down.weight": "cb2ab8af1653adc57111ada49d2825c6995e338c8208455b92de10e580f60f31",
  "blk.26.ffn_gate.weight": "231ce30966086bce2dc0e0afd34a22a1958cfda7a57c41b3b8e9444c5dfde8a6",
  "blk.26.ffn_norm.weight": "35d959d25d17b00617590f5d5831bf705c385c51e46297a14375a700effca6af",
  "blk.26.ffn_up.weight": "367680c8d332538b467d1ef87cfeb36cc5c6af564c5023c5fb50e728e3438287",
  "blk.27.attn_k.weight": "0bfcb351c6d17aeac5b55a915074fbdf00f11c4bda98babb196ac8804805746b",
  "blk.27.attn_norm.weight": "5d598a88c2e75ba59dd7ba4fee940bdec92d72038f1286536d2dfb71d008a09c",
  "blk.27.attn_output.weight": "23a9da7347336479f6a10ded14cb3f46e06b5bd56dc4b0fbc526c688552ec840",
  "blk.27.attn_q.weight": "b83319dba9055f069208e9c9d66da08bc6874f23e575288fcd81697d1777aa54",
  "blk.27.attn_v.weight": "36ed34ccb2f36fdf16b2c2dd225a98ea6b7b0e376e7791191136ccd7bd7a4add",
  "blk.27.ffn_down.weight": "5488e1d3a58c71b5e9ddda430540b4776b268cfe1457cbc1c2622dedd9e4526e",
  "blk.27.ffn_gate.weight": "4ff48011ee0bac39af704849d9132a2410392c87a509c684f2062f6b76b498fb",
  "blk.27.ffn_norm.weight": "32afe99675983da3de2961d1b5ca41c98970a356823597fe29e91f6e86abf0e8",
  "blk.27.ffn_up.weight": "1eae3088a75629571fdbf6a20f141bc2bb2ed3f5ba2b9fd1d949f80695e442a1",
  "blk.28.attn_k.weight": "c4e80af714962d6f9040d2c09f316f4a1cbc3a2e994e19902d7c653cf3c73dba",
  "blk.28.attn_norm.weight": "c1ecf85dedc1c83d5d402bb7c94fb8b9c11f1a3e5f64e7680f80912d4a560794",
  "blk.28.attn_output.weight": "72ba47c061b21f5ebc5213a455eaf6fc49c8f8e04ff9ce37e6ed4921b629161d",
  "blk.28.attn_q.weight": "c4abc47234307f44b8ca789aa6668e298158fa4b459b2c1e84bd581806591cc1",
  "blk.28.attn_v.weight": "aeba950799d4950e491ad0fcbe30334e39b8975177990a2cb339031c45ac153c",
  "blk.28.ffn_down.weight": "4e84ce382a37b994fb8608df451a60040559e3f4f3241c3b3cb8989a3ed50d83",
  "blk.28.ffn_gate.weight": "04df157acdc8e8534ad60acc2d2a4dd3a7a6610f6382535ec728994fa6f83f83",
  "blk.28.ffn_norm.weight": "4d0386dae2bd1c1a9d0f9730718333e3a486c3bc6a5c5d482193c75d39832c80",
  "blk.28.ffn_up.weight": "fec60bb0a3daf182a14bd8311fe6dd1e3fd020c5fc273e2549cdb1a2d6b79b05",
  "blk.29.attn_k.weight": "b0532a263aa5a4e2a7a80adc83fc5dec974493bd18da7f953e7ebfc3f3a19aae",
  "blk.29.attn_norm.weight": "593fc3b4000c35b7a59dace09ca1756c08be0105b2edd354a0e1c16c82898859",
  "blk.29.attn_output.weight": "315b896f9f0cbacd0ca8937384c3a3a227efa908cb8c3a9125ec00c480e32b9b",
  "blk.29.attn_q.weight": "d482d45386d4ad3394f08e9dff233ee3a70d0427d65c0b8fa05905da7e25ca53",
  "blk.29.attn_v.weight": "cd3b5a6e2852da796902930a6a84bc87fc6a7c7bf51f8fc23758d12a39013b36",
  "blk.29.ffn_down.weight": "5b3dba6f9753bd1b1ebcba65ef5373dd62c38e755c44b7231b95d93d45761f89",
  "blk.29.ffn_gate.weight": "8610d9d2db15c256243ffcca3ffd31786d0ada0af0e7c7aa3fd20524370ab036",
  "blk.29.ffn_norm.weight": "1a2ef2d38b7ac3e51190b9ccb8b6552ba83ab290e523356a7f851ddb35dedca2",
  "blk.29.ffn_up.weight": "a5fdd15811bde16dc27677cf1a4c97daab4c28cb12a9530f1a0e573134fdb69c",
  "blk.30.attn_k.weight": "1efeb0b5f4b45a85cdf47300f892ac77ac1f38000ec3653565d1303d1fb8c743",
  "blk.30.attn_norm.weight": "c73934c182c7fe80838ec1d0b92f50a583f75f7a3d78d822f009b58ad2c80e65",
  "blk.30.attn_output.weight": "3a0fd89de2d274614750345d827a9c886a4f97b343a13cdf680390505df596a3",
  "blk.30.attn_q.weight": "711e113362bdb067db843c66236704eb1cd3fc5f40e3767143e96d510686ef4e",
  "blk.30.attn_v.weight": "82b12a9a74fd3d91b73cc2e841e2b3f0a5197ccd2998afa17020995f880d2267",
  "blk.30.ffn_down.weight": "af9f4b1287c0d824ae22d6e335d19e04a70135b835be7caa2435f1d85e931993",
  "blk.30.ffn_gate.weight": "e2ab3e6f15f5c50fca66c084cb6a57a2b6b82406d65150e82ea0437b93dd9a46",
  "blk.30.ffn_norm.weight": "c1b9c325c83f00e177386a4d7e769945f2995e60950c4a576c0a2c4ab9703d04",
  "blk.30.ffn_up.weight": "9b94a21efd419715d82071b490d3b635cf1e8da080620dcc39e5bde976d7e9a6",
  "blk.31.attn_k.weight": "0db0d82e3ddcc2c06209f5f013e1d72a84a996c40bf00186be485b909cc268e8",
  "blk.31.attn_norm.weight": "2b8b7239471f57140c5cdfe06bd224a4f6326282f99736e44fba4c7b120ac101",
  "blk.31.attn_output.weight": "a310b048840cc3ff2be4b84796340e8e2cdf05ec89d14bd3655c109b2bfa9fcd",
  "blk.31.attn_q.weight": "f45e0cd95645175ea82813455356d171838539bc3f7676d877c698f2af0a0eda",
  "blk.31.attn_v.weight": "8bde008e809112aa7e7c23e9c3099087bcc557313b01306c87efa0a4a30805ba",
  "blk.31.ffn_down.weight": "8266fec7e203fbfad7033120861e44984581ff8b6851d01dfb7b81c5d8fa90ec",
  "blk.31.ffn_gate.weight": "b73bc0aa5baf006d9ef6403104891b8133671b0992398fe038380b67e0d7e2cf",
  "blk.31.ffn_norm.weight": "9c62cc27a7b6017c1df8ad49bff249a8245e8895c6754f402cd44623fda83268",
  "blk.31.ffn_up.weight": "5b970a4694ea3171a0167f6e1636d9f00268bc1c9640430ffc35218494884adb",
  "output.weight": "74fa0ef08c57a30e633e7117b1e9c805f833e2e5e21434bc79ddf9c92c6d7330",
  "output_norm.weight": "59b8a59fd3fbf39353506116e43e5e76edd0cbf2a2873d869da4cf27a04997c3"
 }
--- a/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
+++ b/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
@@ -1,348 +0,0 @@
 {
  "general.architecture": "llama",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "llama.block_count": "32",
  "llama.context_length": "32768",
  "llama.embedding_length": "4096",
  "llama.feed_forward_length": "14336",
  "llama.rope.dimension_count": "128",
  "llama.rope.freq_base": "1e+06",
  "llama.attention.head_count": "32",
  "llama.attention.head_count_kv": "8",
  "llama.attention.layer_norm_rms_epsilon": "1e-05",
  "llama.expert_count": "8",
  "llama.expert_used_count": "2",
  "tokenizer.ggml.model": "llama",
  "tokenizer.ggml.add_bos_token": "true",
  "tokenizer.ggml.add_eos_token": "false",
  "tokenizer.ggml.bos_token_id": "1",
  "tokenizer.ggml.eos_token_id": "2",
  "tokenizer.ggml.unknown_token_id": "0",
  "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
  "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
  "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
  "token_embd.weight": "1d1d1d39a867d5a4bfb32792a47247d2638c10c95a6259391d02843583505cc4",
  "blk.0.ffn_gate_exps.weight": "2e5cd43ac3f26c44f071926ff6c3f239ecc52a34bc9a5b5906d3d4c1bf2fbbfa",
  "blk.0.ffn_down_exps.weight": "a4dfc7e7c96e7402eb70279601675b956bb7331da8101e63fe5c0a611b6972e5",
  "blk.0.ffn_up_exps.weight": "2d5d87b378b2319c344ed2c642598b6f7cb6beeb582a8ea51abc9ae690d473c3",
  "blk.0.ffn_gate_inp.weight": "a46aaf5aba7401ce6e41f158242b4879d34901661f3ede85496cbd0ce79d6314",
  "blk.0.attn_norm.weight": "3fe37d913bdd2b65076bcdd6efe64a37b0b03cacbb1b80b9f7089068aa35f38c",
  "blk.0.ffn_norm.weight": "5e14308a3c894734eb204c8f558bdc817e94bbd5b4e9cb4094e91ba388c8f7f2",
  "blk.0.attn_k.weight": "73d943dcac0911e87bd771f4aa1c901e1bfe1aed293af06e1a67812159859f67",
  "blk.0.attn_output.weight": "4c5f754c855e262e8d4c94c6fbbb57af06399dc0e170d7d99a1a17fc9aab9227",
  "blk.0.attn_q.weight": "d6fd7403c873d49c05f6f03208f30d99ad34cb3b71c9990c47334d502a8e4c7b",
  "blk.0.attn_v.weight": "cf17cf64b2d683bd9de6cebaf60e5c264df6fdc38fe719dde9d54c80334f6366",
  "blk.1.ffn_gate_inp.weight": "0d524de81cd915816b4e714bf595ad6946a9130b3de731cd89428b2781230809",
  "blk.1.attn_k.weight": "2ea47f412992b374c70674730fe84700e0c8cce177086ce9b6635e42408964bd",
  "blk.1.attn_output.weight": "b4b2520794d54113e86c8ff678eacfc62e35be4395a594a6c8c22b4383ebcc0c",
  "blk.1.attn_q.weight": "5db930c98c4f91f6eab57eb974c72210b158e366d23d6d2890b2759c053bee33",
  "blk.1.attn_v.weight": "079bdde09668394bf7af9f8bc175017b4f48f0ab64e6dd855a4d7561d1693c0f",
  "blk.1.ffn_gate_exps.weight": "146a62de19f9ab093deb101f9640534ffc3dc40d69f508be12fc0475d01b0c7a",
  "blk.1.ffn_down_exps.weight": "949da94a3c0f375160672a979e85f7def284264b10d48d038238aad5f5ece793",
  "blk.1.ffn_up_exps.weight": "7016a3f467d9e3f2f4b4019579ed86b757469cd367f2b225483305376b4bb3c1",
  "blk.1.attn_norm.weight": "1614d1e6ed537737275eb888666c7bac533f4eefbe73dec92b591045ca9e1afd",
  "blk.1.ffn_norm.weight": "405a455fa7d1ec36894652ceb554bbcb09a07fd6405f42741e66dc4a4665c19c",
  "blk.2.ffn_gate_exps.weight": "90d5003fc7421f44220c0842d43128955e91488f6f785fe570b62d81b719e964",
  "blk.2.ffn_down_exps.weight": "ecdc2b5a8b504ef0a7833acff47d69b0c1fa9c22126de1bb120ff5e48c3d6e2c",
  "blk.2.ffn_up_exps.weight": "2cbd9485a32460d315eb50a2f3b00863fd77245bfe885b7565efac1cdb1f191e",
  "blk.2.ffn_gate_inp.weight": "0d0a17a1a2c7a61f2cca49ecbb479154dc93a870873257bc4f225e7607f2e2c2",
  "blk.2.attn_norm.weight": "b2e4c5a977f87a6f880896bd73596234c9b83622fa0d7add5892501e3155913c",
  "blk.2.ffn_norm.weight": "0ab875b4280afa922376cfc7b9aa3f7071c9432ea1254091ce7de3749df0e8e6",
  "blk.2.attn_k.weight": "bb884af51fb51550acfef54ccf1b58ce8284e587806e6a2f88c8265e1ad05a5e",
  "blk.2.attn_output.weight": "0f03099ba1ef342ea61af9cd71d028123bbd8b1dd7d7fd9b509aef77815427d9",
  "blk.2.attn_q.weight": "8fad0d29eb4c9d24e564774ee3316b9eb7a4c4985e4567111d2c836c830f6cf3",
  "blk.2.attn_v.weight": "fe04c847ff677632401a94e7b6b6fdca60391ab21cb23bd791533115de6303a1",
  "blk.3.ffn_gate_inp.weight": "29e3aaa724590c070e614af8288939603d2641b0ef11e8c0f476bebb2776673c",
  "blk.3.attn_k.weight": "231cc5631def10f7f292d8862d6125ff555164cd70480ac76362149fad204497",
  "blk.3.attn_output.weight": "86467a605c62852e05fda1a7ef43150df2cf715fe59785dbcba09f1c27cfa086",
  "blk.3.attn_q.weight": "901822402453922225c2d6ac79616691d48217635d5ff7338daa971d5ddee210",
  "blk.3.attn_v.weight": "27030784f44375720df2f090933645a31a022d3fb3b14573e5ca0b78f44070c1",
  "blk.3.ffn_gate_exps.weight": "231ba59cc0b988d125d77bf627aa3f04636684870af88f081f3944b48a160d86",
  "blk.3.ffn_down_exps.weight": "530c3ab44ae4d66e8afa4d10c153ba5dfcdfb7321989a988e62e9d12e7234625",
  "blk.3.ffn_up_exps.weight": "b85c2d4d9d11332e702b3c0a6610d4f525f9a93e5d12f5c7c55c592c40755e75",
  "blk.3.attn_norm.weight": "05dbb6d88cfa6b199f9d705ccbda97c0ef13f9ec875c595398a1a42d009a4555",
  "blk.3.ffn_norm.weight": "6880b1c27d46969ce36fac049c05dc8b89e4bb47dc89df357e32df7e18fc512e",
  "blk.4.ffn_gate_exps.weight": "a883b4f225b760c5a2f6605dc5e2167ab85bb398c70bf64ceb539fcbd6128dcd",
  "blk.4.ffn_down_exps.weight": "d291bb656aae77947d4b525e2819bf4112afece53ff31de9dab999af1f65f9c4",
  "blk.4.ffn_up_exps.weight": "38592afb8ba3dcfb26970f906174f7d3fa62da44fa4be4fc6912a19030ea9164",
  "blk.4.ffn_gate_inp.weight": "1596cb74e8fd6c3080b937b06468bb397b0dbb661e6d180a6bcbdc43e8bfd0c6",
  "blk.4.attn_norm.weight": "f90c83c5ff4366281d283384efc941620542b9cfdea160d678dc54a75e33f758",
  "blk.4.ffn_norm.weight": "d28d8c49d1746b7cc085562d1074905fd14023844de823dc4fb22202bb280790",
  "blk.4.attn_k.weight": "792bbf412cc357140fdaba543e547a9b2f7582919e307bbd9a80c7d6d8f5f1f9",
  "blk.4.attn_output.weight": "d98e4a062d2631d9c315f1990d5f6ca9a88e7e0e46387f611ccb0353f876aa12",
  "blk.4.attn_q.weight": "1a11a55a91d9f748a72176ff6b1c174844df406e00d1b66b9aa64dc6ee4bcd1d",
  "blk.4.attn_v.weight": "04cb3c02b12a6313c7ac7044513441083d534fb4c5a3f63bbaa58f7edbd2fadb",
  "blk.5.ffn_gate_inp.weight": "cbd5cdf015d33a2da6703eb74c22fcb97581fb9175435173b6dc4f9e8364320d",
  "blk.5.attn_k.weight": "4fdf3405e4d657403f5647b51233521310ee984b4b81bbcd901cb3e6ab76b7ff",
  "blk.5.attn_output.weight": "4a25662c46979a29600ed77e1907cf81fb16ef30e724c155444e54ccb76af481",
  "blk.5.attn_q.weight": "e2acb30e30b97300039bb20ad0878f05159d5657fa811748a51d5b6fb35d631e",
  "blk.5.attn_v.weight": "306504b6a26aa123c63dbbed3f4ced0ed2ee8fb6a30bf0093539b817539f5ece",
  "blk.5.ffn_gate_exps.weight": "7e34df9b9944dbeea5e8565786d3aa6937314a4b87acd4d0874687877c5a39fd",
  "blk.5.ffn_down_exps.weight": "c4b7a57a42b5ac0a8ae27dcd5cb2646d7a7cc7123126d44a56ab128e85f60b13",
  "blk.5.ffn_up_exps.weight": "09d47593b6dd6c664a9155bff02fc2eb7ac4a70219a88162d05c802a01d3c6ba",
  "blk.5.attn_norm.weight": "58804a036d6ac4c1fe357b8b6a97a5c37cae1c2f06ee0086c041d449c1c6ef6a",
  "blk.5.ffn_norm.weight": "d872dee6789f0826211aa46ca9d0869e3e96bcace9e77d6559a7b6f3e524f3ca",
  "blk.6.ffn_gate_inp.weight": "fb1eae732e974d6c1d020a5b4ef98c5f33016f984701bcea656f999a99daad66",
  "blk.6.attn_k.weight": "55e9c59c5051ab5519b3a7962e1b5fa96a3c0251cb6200dc2f177885ad2de470",
  "blk.6.attn_output.weight": "f3c834a8d0027370350e2b6294d95434d31432e57be6313b013c15a56303d61c",
  "blk.6.attn_q.weight": "efaefe5f11c2140dc7cb532b0832c2a0b363a165cbda21f00fadae77efca377b",
  "blk.6.attn_v.weight": "900bd734d75616d846a90a121c97e081c956a3d1ab012f66dd0bc62c43e1ec3c",
  "blk.6.ffn_gate_exps.weight": "312a99661b1468fcaed2474621116f1681432755e973f3ee79d01912974fd424",
  "blk.6.ffn_down_exps.weight": "ac9cd7db67a2ef0d2b5def86873673d05e48d49d147dd944469dbb8e2d4c46f6",
  "blk.6.ffn_up_exps.weight": "57613e7e09579400a1a09fee4445acfbfe83f2f327fdf317877787d96ada6b84",
  "blk.6.attn_norm.weight": "0e8801e09885c633bc01a9a5b85d4e878d30158a4eb41a937dc5b760ebd044cb",
  "blk.6.ffn_norm.weight": "b8c58062ac93072f878446b0e7f958c737aa47fb769fc3a8f593133d12db2dd1",
  "blk.7.ffn_gate_exps.weight": "1ef611732ff13edfa8d30981ed9dac00c15ceba9fc012ed0b199e9280a849948",
  "blk.7.ffn_down_exps.weight": "856c6811945c7b0fa461ca17811cfa43436b4cdf5326bad23cbc30883486d7cc",
  "blk.7.ffn_up_exps.weight": "6725e3e33994302ee13fa5ec163631ce2dcaa08aadde8fc166c2265d4561c5c5",
  "blk.7.ffn_gate_inp.weight": "36b49d7f80c1003dc392b2c1b9960cd49889dd69e77b26b9e4b13d01f3d0a32a",
  "blk.7.attn_norm.weight": "7a0ec49acc5e20ee71c6f80ca02f4f1e564c485e0ae0621309e7c2eb0c616cf0",
  "blk.7.ffn_norm.weight": "eeae035c39ab6e64bc06a4baa1bf6e50d4c8b8797cb0ad8abd48be86974802c0",
  "blk.7.attn_k.weight": "e8f78c1def01a7a38d2d9bf7becb17755e28fefe4927856f7890fbee52840187",
  "blk.7.attn_output.weight": "5367f05ac3bb49ef8745ba5902e1bdd4442415a3ebff2c7e1a3918d7be6fe948",
  "blk.7.attn_q.weight": "37c95fc5acc55a4f6e5f02cab9be60e4fe54c08b65f98f4455741b4aa542ff4e",
  "blk.7.attn_v.weight": "c89f1343486ba55814233511e94090f7365662a8a4214aa4c278cdadc79196c2",
  "blk.8.ffn_gate_inp.weight": "4e239afe8c7afb8de3a005757c887cf14b1622ca2d224227591cb0e5301f4c17",
  "blk.8.attn_k.weight": "2ad0229f30fdcc1e85ce64e00d8f75902238294844a81d5af43e14ba75c02983",
  "blk.8.attn_output.weight": "2e44a4722acb3b521b81d0b910f8ca2f6c286d874a92ddd02150566454061699",
  "blk.8.attn_q.weight": "1cd2b09cb2f43e08de776b5f7eac197a5a6d4ffdfd52b21baa36319450147bd0",
  "blk.8.attn_v.weight": "5a22c57ebfd33ac500cbcfd321d5b5b1783f8728801db6f3f8bed51c7183e4db",
  "blk.8.ffn_gate_exps.weight": "91063fe56cb4f3ff3b41052bb5046fcf8ef61516a603ee90aab893a9d68c15a7",
  "blk.8.ffn_down_exps.weight": "d4c3abc8f1d1b462f67f70bd8f404b3fcf45dceeaa8527fa120527254c383c90",
  "blk.8.ffn_up_exps.weight": "76a1a1f08ec577716a2e7027b45293e9205751126424f1bebe1de89c78f087d5",
  "blk.8.attn_norm.weight": "f980d774da39eb76c52358afac3e38cb4c81cb323deaabbe5c41822e3f17a98e",
  "blk.8.ffn_norm.weight": "1c937658cf90f1a85db9a5f26e077730fdd4b694607dbeeb825c5fb2bc407e0b",
  "blk.9.ffn_gate_exps.weight": "a2532471ecb7896d5c78e5a34e10cfaf4125265e1595166c8d0d0dfbe2a3187f",
  "blk.9.ffn_down_exps.weight": "b47921a28412d48fee450b8b9d97cee42344a2e69f06d407fd9523d7adf13333",
  "blk.9.ffn_up_exps.weight": "7c461bd1b2a73b439cff6a10d94afa01e8b06f7e6f09d9a6f28e3876aef48bce",
  "blk.9.ffn_gate_inp.weight": "1648dfb08b5c06d7953a5a97ecb764995fae9487fb729a1c867023b2538149d0",
  "blk.9.attn_norm.weight": "8635db0f299882a63b7cfcd1d4259c9e53fab22c31d3d054de36b1001380b31b",
  "blk.9.ffn_norm.weight": "f9309aa323062d174c463613afef9b0a33501b510bfaa58a8e0e866d12ffef3c",
  "blk.9.attn_k.weight": "dfe62030441e947a588512d18d9c6e4ed72c2f71c227d622c095e4263b23dadf",
  "blk.9.attn_output.weight": "1977beb75c6349c50ba7dd3865d7c0a9c5c5ddc854413147b0eec98ac4fda351",
  "blk.9.attn_q.weight": "eb132596719605cd6bd1782487f121994629e115190edd69240b12af66e734f5",
  "blk.9.attn_v.weight": "9e708f15d332d7c5187b0693b1a977eb30a2fa10bf7df48ed9d7537c0aa6ed99",
  "blk.10.ffn_gate_inp.weight": "97503a5d166c1925f9b65c0eed980753d411714d66896f3d0fad5286c7aba702",
  "blk.10.attn_k.weight": "1ebdd222336bd25b48df1b138cdbe09021c4a5562ea7cb78cadd1255d2be3a39",
  "blk.10.attn_output.weight": "5e98faa38e9d514b9057e1c8342c509cbe1083defd518e506f6bad89117d1f5a",
  "blk.10.attn_q.weight": "3323a26c87d936d1dd87c577d0b763459fced726679612c874b3de5fc6d969c5",
  "blk.10.attn_v.weight": "d5fa73cb56aca388e205f44455e4b4f676fdc12ed7fac4542fbb3b41ecea59ad",
  "blk.10.ffn_gate_exps.weight": "225021b53782800906cd13b70be3a4161e8b300b97f984a959ccad6a6e8adcbd",
  "blk.10.ffn_down_exps.weight": "f08eb91526bd22f5fd0402fe925d6141cdbb308a1ced0330858d0c85c71f5ef3",
  "blk.10.ffn_up_exps.weight": "a9f688350c3b53eaada5103b5848bd9a3d7d6b327a70fa16c24bf28ece933eac",
  "blk.10.attn_norm.weight": "5ba426c9dfc79805015ccd76cd1068b0ad3bb7a8453e14bb1d35486f122d8f95",
  "blk.10.ffn_norm.weight": "98891d6acbc3986b2581b7a3af9f5946a392d9188972c6a8b15d4e745a4f2482",
  "blk.11.ffn_gate_inp.weight": "b2365a60566e7dace892e1cb0e62eb73ce387352601723e847052b34874feaa6",
  "blk.11.attn_k.weight": "0efbc1d1430505543ff71532a4fcda821aeac616ef6c1dca40e00d4f2ff70bea",
  "blk.11.attn_output.weight": "3d5bd4d9a41236f30d4293edb9ae27beaa113ffb31b4fbfadff3a4c370dfd3e6",
  "blk.11.attn_q.weight": "aa11e9db14dd9c77951511443077c2a1a78070753d7bd3d9811038473f69e325",
  "blk.11.attn_v.weight": "5adc567f377aa11d1763d35f50e53fb2896a8b03b623ac36acc45efa2486d512",
  "blk.11.ffn_gate_exps.weight": "71d07d982aabfab9eed3c733d49c20f023bf475368fc71db5084d91beadc4b47",
  "blk.11.ffn_down_exps.weight": "9a06e61461e48b3925a9f7d9cca634d048c8b62163d7bc5c43e35899f959319e",
  "blk.11.ffn_up_exps.weight": "bc05494d0dcec61021b3ac0c5bc1bf502736cadf48224e213bc139d562699a89",
  "blk.11.attn_norm.weight": "a5758a10bdd0404ae1470e8e9db903985d4d07f60553c5001a5e7b660d4f7ada",
  "blk.11.ffn_norm.weight": "814ae037563aad3771787316bec4806c95bf6f5991dd6474b4b1e5cc13dc18ee",
  "blk.12.ffn_gate_exps.weight": "3a68b831ba1606fb9ef6dffed4732032447ecef23ea563ff4e79317586c7eb49",
  "blk.12.ffn_down_exps.weight": "268b25e13f4b7beab08686e83705a41b21d15251809ee4784526f78a580da829",
  "blk.12.ffn_up_exps.weight": "9105751a5b5b42ca2614d0456f24f779d2e2ac8cdff0f96842aa7ae2b70f341e",
  "blk.12.ffn_gate_inp.weight": "d0de1558cc1d458c5c504f63ddc59785c323df7330474bb0644c346104b40a3a",
  "blk.12.attn_norm.weight": "859a4c8113678e2e202d10299850e0cfb52eb11ea50bcbf4fe3ff39bdd394154",
  "blk.12.ffn_norm.weight": "7fbf4c459c1760218877e9ee3f5ad49e960956a4369bcfe96c143f04ff9ddf97",
  "blk.12.attn_k.weight": "0a7e254fdf3730a57372b6ff421a613eabaea68cdefd64800857941411318374",
  "blk.12.attn_output.weight": "ceb763fc15d88af149d8fb78e82db2b7dab3aeae584af8cf7611a12356a397e5",
  "blk.12.attn_q.weight": "a43402d23c46cb2d3cb3c2a98c81b19d10026b7e6742370fed6b2880b6e049b5",
  "blk.12.attn_v.weight": "3bc24f2c0480ce91ef72993ee8f1cf962f7359e12183424583ffa1246bf3db52",
  "blk.13.ffn_gate_inp.weight": "a6d68c82bfe66d8bab68f980f5f18268a9e2c0cd6b8832ed39010e0de198ae05",
  "blk.13.attn_k.weight": "0166c39546b37dc2e01b2b396ba43e183f797dd04eaa51a6d103d8b58ee4bace",
  "blk.13.attn_output.weight": "2ce5eb198deab9557475a58b69b11e9874b547e05c23f223c6e42fa35ddca069",
  "blk.13.attn_q.weight": "745c1bbdf434284a7fae98f45e821c076dd9c2a2467dba6a9d8cf0041e419dbc",
  "blk.13.attn_v.weight": "9ece68d5ac64d1421ea7aa32e1cff9cc1fecf5175f4c4da858dd31d8633e3337",
  "blk.13.ffn_gate_exps.weight": "ccfdcb4670b131689de12d396a010b5ea737795cf5c15a14a304d720b3c7c899",
  "blk.13.ffn_down_exps.weight": "8b8fb328664764f1aaa5cbdec336d5654e981e965a02ef622bde5f07ea1c164d",
  "blk.13.ffn_up_exps.weight": "d2ace0236c2fb3365fdc85499d676a7f65813c48e5085348b1df1799922766ec",
  "blk.13.attn_norm.weight": "1ed29d7d89ce52d7cb4d57e895ff7115430466e917136c049c385c030ed44e9c",
  "blk.13.ffn_norm.weight": "a194fc542597a4dcfdfaec5e3cba2a2b2b21b21edfc87c39c0d7f7651355bc4d",
  "blk.14.ffn_gate_exps.weight": "a625e3574e5e740e7f8e2f9c40390f2f382c720aab5b10534e298002dd8d1fb9",
  "blk.14.ffn_down_exps.weight": "bc366f015b83c865946afd74c8a884943e0ea2c671314a0b7bb72f21a44d2f78",
  "blk.14.ffn_up_exps.weight": "ee3199bf2086de77b49f57f487676be8ee70e102a2fb5a5ef8ddbbc28a9eff41",
  "blk.14.ffn_gate_inp.weight": "2b437870c850fa2e2044d032bb02908af634356e37466fdae260b933e48ee8b4",
  "blk.14.attn_norm.weight": "cd8344d193a1cbd42bd898e17f4bcb1ca0b2918420fbdafa9249a6f2b7f4ae06",
  "blk.14.ffn_norm.weight": "70eec40374e558fed5b07257283cf36342b6b0129285a00007deb59c32c9f7c8",
  "blk.14.attn_k.weight": "4053bdb507e0543d724b632570bac86b31707696d90a0db44c49b2a082e0d599",
  "blk.14.attn_output.weight": "0182632cb0e06a07241b8293d25d109fbc1862e1e337d435f908e8681e2eb1ab",
  "blk.14.attn_q.weight": "ffc7794a4c1b6f793c842dba969435330a7a80b9212e457b4b2ac33e68b41241",
  "blk.14.attn_v.weight": "6411805292d528e61bbaad8f9aab9dd073529a17946c057fb06864fad9cf3211",
  "blk.15.ffn_gate_inp.weight": "77d0744567c76e6abb67f81ba9c715b2b544841186d5b948309571eff213bafb",
  "blk.15.attn_k.weight": "1f7957954ea4c6521c257b35a360e868ffa02bdb3de91f146d5e06bb4a545c98",
  "blk.15.attn_output.weight": "d7809d36bd8d3342240c46fd87bcc7f9821a222f48d9a95e45ae50460265d3cf",
  "blk.15.attn_q.weight": "25f509313ae4d8401b871904059f472a26f5714e7c791c725de77a1a522c976e",
  "blk.15.attn_v.weight": "96fedf5a591fc0f020e6de10fd72ff12b3ef9cf70cd21dabaa0d3e7b06f54e73",
  "blk.15.ffn_gate_exps.weight": "8f950d976b2fd9a3d213b84123cf114c1377efde9352767fb2ddee89e177c8ef",
  "blk.15.ffn_down_exps.weight": "6fd09d1557bb94b06efbd4f6a1ca4be532a202ba290e9315bc8da3d12a5c4c4a",
  "blk.15.ffn_up_exps.weight": "cbeb59ae7b0266a928dc7e3a6e70a9330b92f9ee1b17ee1ed91022108204a33c",
  "blk.15.attn_norm.weight": "2005330911ac2edc7b6d27aca021c67d30d16eb632e49b1a13f30fdb2717aed0",
  "blk.15.ffn_norm.weight": "0e9198f3b548eb78acc8961f2b3350d238d26cec110933ba753a8cf0035c501c",
  "blk.16.ffn_gate_inp.weight": "a41d1f99d739c8b150c3945b6949763988d0c6a4c5a2b5855592ca1a48ed23d5",
  "blk.16.attn_k.weight": "b624e2ec88c2d3047f60530fb87e72cb4a5e655a9663f6f3e9b09e5ad32cddaa",
  "blk.16.attn_output.weight": "687759ea75e45108526ffc1573d6fdf084728079bfc2dc89b9979e76280f43c4",
  "blk.16.attn_q.weight": "beff3a45c7e9ec82ffc6d3c701126be28654d10aabd747d03441210491fd31b6",
  "blk.16.attn_v.weight": "43a349b13f0b9d040cacecd942bcb168c030fef8c75c987d59a4fce6c14e855b",
  "blk.16.ffn_gate_exps.weight": "793406d6c13d727c82bb7b692ca98d65ca975baee69fc57be5378d77c5a19b62",
  "blk.16.ffn_down_exps.weight": "9bad3dd150d0230404b7f886ac7ff8803225757e813f195cdb26bad245243b4d",
  "blk.16.ffn_up_exps.weight": "7449d663023fea3496475bf0a9c1de7272ad0ce9adcb3265e8e424badaa674dc",
  "blk.16.attn_norm.weight": "a424ce34c195a401df1ce37ac4f2794e8a6720b1ee8acb21428e2b68c65e0125",
  "blk.16.ffn_norm.weight": "405a68bb8e16e1064df2de55ca3cd9ceddda1d9fc0af007a9bd7cad4b2676248",
  "blk.17.ffn_gate_exps.weight": "97c6e5321491ca5dc039ee88da0eb0e78f347372785411809af84b3298cb19dd",
  "blk.17.ffn_down_exps.weight": "1617ac19788a1be19bac69277408761e6bdf5719d63a8c7fea14d41cc27641b5",
  "blk.17.ffn_up_exps.weight": "4ead1c365f112581c10610ea3f63d2a1474311d2503d2060fed4b458ef337f5d",
  "blk.17.ffn_gate_inp.weight": "ed4b3393f2523f2b5e0fc7680a1caa2842e605728a529b5af68a7fa8d7abf940",
  "blk.17.attn_norm.weight": "beac17ef86a7fb2b5840cc72f7a95a5e3d6bd24e7fa698e0b0ebb9bdac45c561",
  "blk.17.ffn_norm.weight": "81cb58ec6d6dc02a0b4ede10adc336dc865fa76f982d4eab0e4a37b40f5b0fac",
  "blk.17.attn_k.weight": "eab569e5ea8c8b05e5a6a209fba031129453c2e28181eee3e736b3b04b36bbec",
  "blk.17.attn_output.weight": "f85b70f01438ce8fe5d10599b113f30bf18dee2bbae0657d3eba295870001db3",
  "blk.17.attn_q.weight": "887ceebfbf6a2b94b43d2df4439ac3a5bbc29311d4b28addc04d525546032047",
  "blk.17.attn_v.weight": "2df9414d65014c06a93da22ba3a668be7b83e2e8008e98d7771f7dfebed98298",
  "blk.18.ffn_gate_inp.weight": "9b07741a0950fc667e5fd25937e33bc22e1f764f80eb4ff3119f005327ae0f6e",
  "blk.18.attn_k.weight": "8649598dbb63938744c39bcda5ce8c31773e29c573be8d4d2c114f5030f8d3e8",
  "blk.18.attn_output.weight": "f8e391adb92622298ca834d5d1eda48b69c3b1c51c5a584ef6c54a725c298d75",
  "blk.18.attn_q.weight": "84bf8708a2eed618f48f69c178ed7dd11fa4c468102376e72e910ebd037d131f",
  "blk.18.attn_v.weight": "31db3cd773f09548c2c1b1eac2718e46364a7810970fe9c433fad9d8de5397eb",
  "blk.18.ffn_gate_exps.weight": "be2a2ba378002f1b61f86c273a69eede9b93786d5ce96b4fee1861f730dca4c4",
  "blk.18.ffn_down_exps.weight": "d35196159e37705db50a5343e3989f7335477f1a4add67ef42ad64a638cd07ae",
  "blk.18.ffn_up_exps.weight": "c6ceedd86e97913a6dcadc838e7abb762d629fb8dd55f15cf02fd9bd66d2ba78",
  "blk.18.attn_norm.weight": "41f0b1ad83d6e3cb9fbe0d27878c2e7ad4a351b9f554a6bc9117c01745cdf6e5",
  "blk.18.ffn_norm.weight": "96646204bd0d82f25dc77faba4dbd86b1332e449313e6684e00122da8be99057",
  "blk.19.ffn_gate_exps.weight": "c6eb7f61e7938bda0492dbc05e51e8f631c99224fe18e99861fc4fc53ba9e9ff",
  "blk.19.ffn_down_exps.weight": "4384803da3a3a3d44120d7dd192fe2c9bbd9a1a0cb492dbec1fdd7565230f1e8",
  "blk.19.ffn_up_exps.weight": "22d73de2fbb8bb0f1bd2caf17fad8a355c47d914143f7f6e6d0128f66f074a60",
  "blk.19.ffn_gate_inp.weight": "9a0cc4a2301a5634022fbce41189021bf0d1a961792d2d9330fd35556d18e5bd",
  "blk.19.attn_norm.weight": "c5cc56ec5df9a1f7d5ad71fbda49f1433132e58895d45cb44c73420bd61ebd6b",
  "blk.19.ffn_norm.weight": "77e17de741742ef2482fc7872fd423c8e3c1454dc4d2be89ee939084b6d78bc0",
  "blk.19.attn_k.weight": "a92ea36ce2e3569656306aeefb835ccd5d1b03b33a86e0d3d030644cc923b813",
  "blk.19.attn_output.weight": "5e2a912b37855f84ea964907a1a86d609cbdd79efa0c93c3e8e2fc07caf7c226",
  "blk.19.attn_q.weight": "4ef3a5913292ac3c1a6fd3e9e53d011021f2b41d0276cf849706d1ca925cf7a7",
  "blk.19.attn_v.weight": "42981b75b68ae852cee638b5433605c147da4392aaa6d7a06e756115b0171f39",
  "blk.20.ffn_gate_inp.weight": "71381b9879a7c80b9f7b475abc0aa31b8cd71ccc00856ebe89764a2acb9df2dc",
  "blk.20.attn_k.weight": "1928b7ebc054eb3967929ed6fb446314d5352f4aaf8b475ce55c6345019f2ea4",
  "blk.20.attn_output.weight": "6071ecd9ca91af0d2ba93fef4a1a56f3b243dd70f862a21a2d164d56f386043b",
  "blk.20.attn_q.weight": "002e95042a40f36ceed5829e3d0c8072e5f5e4ee86a089e2902b2348fed24dd5",
  "blk.20.attn_v.weight": "42f509cdb1c0e298f89f896e349be86952c5168e49b3f83bb17badbcb7596d57",
  "blk.20.ffn_gate_exps.weight": "a684a3ffe4b0a57c819a5fa9cb3521de223f392732927271e97ce925b6e33765",
  "blk.20.ffn_down_exps.weight": "e3081a7bc7ba750d8a4886bc8ca4f231b55db4ca082b54b4106c7531964725cb",
  "blk.20.ffn_up_exps.weight": "fad0fd5eca36ab154788da28be8ec25bb5d6db06c9d133db89e96df358a2f6a2",
  "blk.20.attn_norm.weight": "c3e3f2429715ae95e884ef1246b0b461b23c5cc0ed08beecf70a14cddd184820",
  "blk.20.ffn_norm.weight": "ff31f609dda65ca496b0584fabea6550e42edd05ebf229812aa6b7bb5ede15e6",
  "blk.21.ffn_gate_exps.weight": "366f09ef0ecfb86808eb3296cc9abdb957951d27f6533c03f1422b54061da660",
  "blk.21.ffn_down_exps.weight": "3fc495947d27fcca7fc0893c8a96e5d48ba27b2c8c58f8fcfb8dcfcd5539741c",
  "blk.21.ffn_up_exps.weight": "6713ed51410bcc8283cbb001c4ad784098f25701e8021f4fa4f411e186859c4a",
  "blk.21.ffn_gate_inp.weight": "6d4c92c01ec801647134d907bf1108878156df266a6107abc10526332b328b93",
  "blk.21.attn_norm.weight": "27605719ae2df24f4f2e85a730927cab20367631612cb501631f6bbf38eb1209",
  "blk.21.ffn_norm.weight": "ca80ee8177db185b15a4a378c1cb6f7143c76546a7f1726bda23f329323d4ffa",
  "blk.21.attn_k.weight": "9e49f743d4a5bda9b4bd9c40c2ca37cdae5aec7e54cb193897ac8b4945ada14d",
  "blk.21.attn_output.weight": "ab923540879753feaed152f5950f69cdd83d8f2413ca873f5f038b63ab0aea12",
  "blk.21.attn_q.weight": "62617fc3f1c9d2aa672a4d91a121c7a91b92d145b65e75f0b06b4bb7c825dc36",
  "blk.21.attn_v.weight": "15f8b2e72f8e8e992f2f6b3e93238a9d7be7bd6136f91c9d04b4b4cd0cd60369",
  "blk.22.ffn_gate_inp.weight": "3ddb1773d9257b68add7a2a4e94dad25ed926803e02707863dd742ab9b2dc179",
  "blk.22.attn_k.weight": "680e45a9e8d5feddee5266e119dc053bf80718fa9af1cf6803e6f493b265f1eb",
  "blk.22.attn_output.weight": "0d5fae3402fb2c5aa3a860010e3973fc8e3168d1015f7a76b7b2964681693206",
  "blk.22.attn_q.weight": "eee7e3d426ab533bd18d62c9aa142eedbde394bed07db58313e0fccc82a23237",
  "blk.22.attn_v.weight": "26b5be1fe3c2b6824c5a648a3e4bdf17691904526fca158fbc3ebb627b67e2f4",
  "blk.22.ffn_gate_exps.weight": "32ab7a7735313d60f6a75229b1aeee940b6aee176c9648536bf5921b0dc2929a",
  "blk.22.ffn_down_exps.weight": "67590808f6a67777d3eb7976c31fe616d388b98fecbb12253b72d1241d70753f",
  "blk.22.ffn_up_exps.weight": "fc245c0183e6d90829ff5e71a4ec93e4860b3d4c1a17b9dda2fb64f5f5c9ed32",
  "blk.22.attn_norm.weight": "128e99d206d4d6724758ec97468af767fa0aea592149c324b731659c1e74a1a8",
  "blk.22.ffn_norm.weight": "e45f498033f0cffa15da0eff2c47b4472e43fcf8921729fc4eeb2e3a6b3c78e2",
  "blk.23.ffn_gate_inp.weight": "d63e686f5325fbc89fa242c2c52a3b8ff54f867dca914c9ae6eea13e9d6f46e5",
  "blk.23.attn_k.weight": "f71f5a577f46ea12b1818f3a5ff4b85ddc45f9a2afb0fa2e041d71a3e31c6779",
  "blk.23.attn_output.weight": "92b13563c1e0eac0d748fb67b235dfd7a64c8f16e2dafb316885744582e23b4b",
  "blk.23.attn_q.weight": "2f9b9c35dc4f912f3f51c06e2d68f417b51a0de0a84aac530a64f9d3d7b0a2dd",
  "blk.23.attn_v.weight": "268e40813806e74a5c364b19556d087bf8374e76e7b6fcf55c381eb7da13ccd1",
  "blk.23.ffn_gate_exps.weight": "12f857e7a7ce228afac34d99b602c8d6fe96984f2a21118f459a58cb767ee65e",
  "blk.23.ffn_down_exps.weight": "cdb082c16599c3bb36a28066dcc122d9529b54fa91b6cf0153437ec960a5e16d",
  "blk.23.ffn_up_exps.weight": "f4b99f6f44d7b8b5a305894e88633bf5938fc1f6303a2b2092399da9c8b64d7c",
  "blk.23.attn_norm.weight": "a691392210383915916b4d3886d5e4d56e7855e27e37e414fbd73bf66b3712e6",
  "blk.23.ffn_norm.weight": "0c3dc72f667e5ae19b69bfa9f2bd2a01a57681f89ef9527bad4eb0d8c7b70da8",
  "blk.24.ffn_gate_exps.weight": "86baca2a3157994df7fd8ced5e08436d5c1810dc29c0715637c36de723e0e7d1",
  "blk.24.ffn_down_exps.weight": "ac5d559562b35c34993e34b071f66d15c65be5907797078c2d2a49aba54e3192",
  "blk.24.ffn_up_exps.weight": "fce0a099cf09777f44fbab3606ceb75f7fae6f0b80725f9e871654b8cdf9262a",
  "blk.24.ffn_gate_inp.weight": "e7c6800c0cfc56b565b2d35ad6f1dbfdb70dd0b05b338bc8da2286ffc3678d79",
  "blk.24.attn_norm.weight": "dc6cc18ec52d102d015153c4a1132f9d7a504e29cbdec81c5edbf3b9e65815e1",
  "blk.24.ffn_norm.weight": "480d5a1397af5e0e657f1e67d20ec0cdef5724e71246a326843321b87ffabd33",
  "blk.24.attn_k.weight": "338c0597954a9b95a782545b2fe36469553e73f86ae2d2b5697767b28e1c7daa",
  "blk.24.attn_output.weight": "a77d23b79933c67e52f1eef7f83a3dff4f767ce0bbcc39572f8cec4acd457643",
  "blk.24.attn_q.weight": "45c9478593002be1998e96e70668aafa2dd3972380fbc1df12fb05c24ba959e0",
  "blk.24.attn_v.weight": "515729420885408a6a9614bc27cda393ed907521318d14d21335d39a3eff0b61",
  "blk.25.ffn_gate_inp.weight": "aae4ac40e9ab3925241f9d784b54b38851d9bc999a6c3bc03fc3f17c9b28a67c",
  "blk.25.attn_k.weight": "4ab4808d02396c35b00b426f536015673b71c17ae6cd55bbc2e6bfe7a4c59d0c",
  "blk.25.attn_output.weight": "1990bb982b77e0c947cd1a8ef0b36227ee1259e6dbbc2829e5c136edf88675eb",
  "blk.25.attn_q.weight": "a1490f3048e8c0ec8784f8550c43adf5cc8d0f2f90131c934713fe4b1b015bd7",
  "blk.25.attn_v.weight": "f15e53c6d45b3b6f58808fa968425d65e0b26b7f9b268127a77abb1227c67431",
  "blk.25.ffn_gate_exps.weight": "656662447ff54f56ee80f78a1b9483f7efdc40f7375d0cd8a9c72ccf21f77e7b",
  "blk.25.ffn_down_exps.weight": "db06f101bccbaef19cced0f6c185166e18202465f4a42cddfd535fbe5cbabb4a",
  "blk.25.ffn_up_exps.weight": "584a7b02456f27fe1d8d3c7ccd21d426b6ea887795a3ed77f704596a1e3841d7",
  "blk.25.attn_norm.weight": "8f0f3597982930fd237e9d609776c64f2b909a455b21678f83a7ebd4bbb83e64",
  "blk.25.ffn_norm.weight": "3e7079c32582afba0c55e032f254adc18d2997705eec860185e9a6dd3d82f07e",
  "blk.26.ffn_gate_exps.weight": "e70341691b583b86489812b29b77aa41eb658b1865733d6118da54c66e3bfcc6",
  "blk.26.ffn_down_exps.weight": "5c1b812d11dfb064af816ced5ab6463bf9722eefdfc341b8a93705d5038fd781",
  "blk.26.ffn_up_exps.weight": "e18118362ae54ef7432781c83884f9fb230a9d934e342aabeda8822ea5f71fb6",
  "blk.26.ffn_gate_inp.weight": "cd1c5f6710166b9567c6b74c97b2348b191c60aa860958c6bc264ab095261dff",
  "blk.26.attn_norm.weight": "71d087531af2520bda2e676c489e8529cef5db8aeea1eec0a937a8b4f2fa2e54",
  "blk.26.ffn_norm.weight": "7f704e936fda28eb5c2cc339f0f6a5f78170b5aa43c01265b21668870d819c82",
  "blk.26.attn_k.weight": "1cc62a0ce0ae251275d898c52c4a9fba5995fca10955d2011d10dd1a59e1afb8",
  "blk.26.attn_output.weight": "636e881b1505f9cef656a4be98bec6a4765321d51f9bf1dac8933397cf44b765",
  "blk.26.attn_q.weight": "89a3c4d202d7d6adebb9e0c1bcfd8b775f6456386f1be25e86e43acc949c1e16",
  "blk.26.attn_v.weight": "ff2cc963b597cdf1a21703f3e7022af3bb4c65a34a19e19d9309a7c5e198b5bd",
  "blk.27.ffn_gate_inp.weight": "6150139498fefe380bb99d11e72028da47a15ecb73dfc5b2774f726f4bed8f9e",
  "blk.27.attn_k.weight": "f286eb9e5c56c7b801a497aedc40158c2a27877d7f9fb59b3fc67834798902d2",
  "blk.27.attn_output.weight": "5dc3d3a05f9f7729509147fd09c16fb53f85f520cdab5cb69abf4bae3fd460c7",
  "blk.27.attn_q.weight": "8462e40f86b24251960d6f35a9ea99b8793a01937faf1aec2859f2e5395dbb61",
  "blk.27.attn_v.weight": "bac1a99e38e25953f8315f7212eb9777dc216cadb09b959977885ae62724ceca",
  "blk.27.ffn_gate_exps.weight": "6a15eca7f0f6ecfd93db2e55c63875348ec4a78c4ff643ec46df9e958c0101e4",
  "blk.27.ffn_down_exps.weight": "2e1c91247c4359e2073a8e5f26fd7f6426da7be3ed5bc65dcfff701f0a5022b2",
  "blk.27.ffn_up_exps.weight": "65d6f5c553c9332085eae4aeadf25090b5d7768212ea7b08ed698102c21b29a1",
  "blk.27.attn_norm.weight": "7fab8ae63ec8e91ce625cd130ab96d8427dad3a7413bb21b25ec5f408c5b9f5a",
  "blk.27.ffn_norm.weight": "532720546b0fdcd423a02ca6e3e9d8aacb84b1b3e8269968f88a47fe2a69bab4",
  "blk.28.ffn_gate_inp.weight": "a305ea58d98962d9dcf0c53ad2389b7acc8936fb35a0e3fc9410e7767cd49dea",
  "blk.28.attn_k.weight": "8315e8a2e4f78dfdf36d4fc18fffc74bc95fe42c3ae4f9af2b6c874612c0f71b",
  "blk.28.attn_output.weight": "9b5fdedd32d39ef46a22cca7cd5355d7b93bd07ea305f466a8aad6ca5a4f3778",
  "blk.28.attn_q.weight": "4e8fb96997c30e231c437130f410d7c91d541a816f6c568b5f3bfdb4b8dece74",
  "blk.28.attn_v.weight": "1fec739cf3bd7b4913f72ca358d4cf31391c304de44ac0ae31ecb825beaa7cfd",
  "blk.28.ffn_gate_exps.weight": "9f259789d535e09268266b9a8020f32d6a6779966c909d91d3a10574f06238a2",
  "blk.28.ffn_down_exps.weight": "516d3f8abaedb01b9916a4b67d4672159769138ef2850158bc1b32c41e31f0e8",
  "blk.28.ffn_up_exps.weight": "f2f1d88d2c31ed588806fb5ad981d68f5134d7284c4fc022fd018de2eef437fc",
  "blk.28.attn_norm.weight": "960fd005598deadaebd969996f4367a9dbfad90539a863674fe95730935acc64",
  "blk.28.ffn_norm.weight": "e1993b37ced93d4049e9af2c47b0d9207d8f7e6f2cc3a52f57bef30bc806d805",
  "blk.29.ffn_gate_exps.weight": "58927146338f443513337476b3cd30e6341742f096c2beb5890d400f10121298",
  "blk.29.ffn_down_exps.weight": "03a3386e4f0b75a28c5608e23b2de8f0de25f21954e4aa7fc343431bde9db07e",
  "blk.29.ffn_up_exps.weight": "6916b7490a7ae7b04a5d81cc1e7ac9b20c483434f3b186b12d87fe176bf1567b",
  "blk.29.ffn_gate_inp.weight": "98e710e467a3d567abe4ce29d78b8e8dc033148762290c0c5e1ae4d78efd8c78",
  "blk.29.attn_norm.weight": "4e64cb307d37be20d55f38c94faf7e451d11df5e60df347906cbaf9c5441be71",
  "blk.29.ffn_norm.weight": "696c23a52f742679bd44440d687a4c44b4302d57f1e9dc5610d23374336187e7",
  "blk.29.attn_k.weight": "e85253652fd6120c623634ba66b725bf7cd491318b54ccdad2c7df8851d64c0a",
  "blk.29.attn_output.weight": "4f650a71efb150d1f24cd4d114d4187bf570ac424da3b92ea6455abdf1aea705",
  "blk.29.attn_q.weight": "69fa7da901026ebcbbbc848455b425458b7e3295007d7fc093acf4b38e2166ea",
  "blk.29.attn_v.weight": "17e2e7590b317b21f106de546aafd955579703d1e95d6aea044ee72ec3a514c9",
  "blk.30.ffn_gate_inp.weight": "3a03284b4aa60d59d4a2ec86253469b61fc656372afca427cb77a5332fbcc62c",
  "blk.30.attn_k.weight": "d518cfd0db9708e769eb1399e87ee49357dc54d5afdbac3d4c0ca46c64e789eb",
  "blk.30.attn_output.weight": "9b44378714d784c5ef9ab604359091baca4e0ec222afa139b7f840eaefb371fd",
  "blk.30.attn_q.weight": "cbb95365bbfbcad0c9cd99b4eebb5a5d32de68ce08e4063b5ec3e792b7548044",
  "blk.30.attn_v.weight": "e7985c04fe1740e35a9598f43b67b0922b4fc2d00b68a92a9f917b82c3248de1",
  "blk.30.ffn_gate_exps.weight": "8ac4bbd07935d98f895ba94dc174e5ad5046c3c222b53729d60f987c05e7eb70",
  "blk.30.ffn_down_exps.weight": "dd672cc71e82abf05064a18121b8e55fe1a4f19bc1d7cb9a142f4add54bc336e",
  "blk.30.ffn_up_exps.weight": "12282f664a2a12aa25e2deac58946108715ebb978bafed5274cef24569107646",
  "blk.30.attn_norm.weight": "1a33458fee054c6c9c896a4bb0a4e1fbfa0293b2408c7dd2b81d692e966e7273",
  "blk.30.ffn_norm.weight": "311e33b68051f507f1478ed8f2693fddb846170ddb7285a91be43f795c2ce31e",
  "blk.31.ffn_gate_exps.weight": "8af43d9867a51cd8392fb48b981b0ceee0ae979c491c07d711b3b56b5162c786",
  "blk.31.ffn_down_exps.weight": "5579cb7758c1600b19d1f540deffe081b575962e37437b3b2efb2fb0a2924e40",
  "blk.31.ffn_up_exps.weight": "f2e7c005276b3a001fb40753f027fa10b4d5a346f43cf4b4bbdeec6e74e1cf6a",
  "blk.31.ffn_gate_inp.weight": "89885dc0e30b6b16a90c0331d7fa3174671e941364e8102d934f02132237e61b",
  "blk.31.attn_norm.weight": "99e4e9bf86a9edf8c404153a7e8a82324ba79da462622196e2faba161bd95172",
  "blk.31.ffn_norm.weight": "55335997cf6de781bf332b943de96ff4646966b05d9fee86b76ea897e27b6ca7",
  "blk.31.attn_k.weight": "cee570762b78da6316b637892cc4b080e40f57af5551ffb1866b9a8e80e96628",
  "blk.31.attn_output.weight": "fa321ff55ec7819ead7b819fd45215262f39744569765ba2113c989c03588802",
  "blk.31.attn_q.weight": "9e2c409b878f8a2a1436874abf428fceb1c534b21f9ad4dd6f532b8a469007f0",
  "blk.31.attn_v.weight": "a845d0be68ba537b4a775bfba4d897faf7c82a811a2612b0b7420cc4f3574cb8",
  "output.weight": "16101cbb74b54cda9ebc07ca3c762e3263a56efb3cc011156184b95807d7cf13",
  "output_norm.weight": "d7aa61585baedd60157aafe157930785742c55989c288573566a971b02423564"
 }
--- a/convert/testdata/Phi-3-mini-128k-instruct.json
+++ b/convert/testdata/Phi-3-mini-128k-instruct.json
@@ -1,225 +0,0 @@
 {
  "general.architecture": "phi3",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "phi3.block_count": "32",
  "phi3.context_length": "131072",
  "phi3.embedding_length": "3072",
  "phi3.feed_forward_length": "8192",
  "phi3.rope.scaling.original_context_length": "4096",
  "phi3.rope.dimension_count": "96",
  "phi3.rope.freq_base": "10000",
  "phi3.rope.scaling.attn_factor": "1.1902381",
  "phi3.attention.head_count": "32",
  "phi3.attention.head_count_kv": "32",
  "phi3.attention.layer_norm_rms_epsilon": "1e-05",
  "phi3.attention.sliding_window": "262144",
  "tokenizer.ggml.model": "llama",
  "tokenizer.ggml.pre": "default",
  "tokenizer.ggml.add_bos_token": "false",
  "tokenizer.ggml.add_eos_token": "false",
  "tokenizer.ggml.bos_token_id": "1",
  "tokenizer.ggml.eos_token_id": "32000",
  "tokenizer.ggml.unknown_token_id": "0",
  "tokenizer.ggml.padding_token_id": "32000",
  "tokenizer.ggml.scores": "6e37bcde2adc7e350e87c496eddd7a2124329c1dc66c5bf3ad3997253e4f7a62",
  "tokenizer.ggml.token_type": "b6ecf55ec64ee67d87750bdb8d757a2c58bf78377e9f4219f5689a6c4dea57ce",
  "tokenizer.ggml.tokens": "d168da3ddd3eee820916945fcb9baf24dd3cde42f606cffa2d19e7c8a8743918",
  "blk.0.attn_norm.weight": "216aeb2c9e0c271f899e1ef2a63cceeb8f41e97642e84fada54b1d3c1c11cf25",
  "blk.0.attn_output.weight": "b597d56f7188ffc1fafc273fadc59d41738cffd677ae98c61a62c3285b3a3099",
  "blk.0.attn_qkv.weight": "d28a6b44e13f59be5483e4be2bedb544e346168d720aca27f47d1a5a722be91e",
  "blk.0.ffn_down.weight": "4a691370e5a61fcbbf540fbcbf4c0f1d15dec0364528c0e916d0744f6262b63b",
  "blk.0.ffn_norm.weight": "0c00af2b4a3128bec64a0cbb1084b042fdbe13d9ad0d03bd577f9449dfead338",
  "blk.0.ffn_up.weight": "b32b52f790c1c083bfb8a3126dc1111cfeeb28dc8c584a930a1e5334cb176bf4",
  "blk.1.attn_norm.weight": "68748011503c6c029e8e69a84a8e5a89338f378769627b6dbf7f93d715c292e1",
  "blk.1.attn_output.weight": "2267344add13b048ca59e4377c86dc512be8046a57156901fa32a20fa74e4ee0",
  "blk.1.attn_qkv.weight": "9109d2e3d7a2eacfda5226587b8be124a3bf44b972da7ebb17aa15795897eacc",
  "blk.1.ffn_down.weight": "d675df4df4dd039c0c339ad6445d39eddd2004db6bf35bed6314c7497245a633",
  "blk.1.ffn_norm.weight": "3b5767ae977bc8baaa06b06efdbea193b6b3ba605ce76d77a76ce317e935500c",
  "blk.1.ffn_up.weight": "80dfd6d9d234b00334c89b8e0a02f81899c2efd377321c34ba5ba51a5f61b5ff",
  "blk.2.attn_norm.weight": "6a6743b057e5088f145bc179e92c9bfb41163e7295d7b81c62e23dd89d2b59c4",
  "blk.2.attn_output.weight": "bc5491ea54e0db81462d7d9b7d25cbdda380c2db8de041bd1c4ab7b76a1d19c3",
  "blk.2.attn_qkv.weight": "a61287a9852e2f5aca9c100b471d98398b2913a3497c743de3c70ec9ddd7087f",
  "blk.2.ffn_down.weight": "4fddcc382c8dceeab027fe43d8d44e67edb5e8ce4b9a1b7f773c87770380ade1",
  "blk.2.ffn_norm.weight": "07e05f82b3f63f711db3b684ca79aed25c0657917e66f88af47348a82065c227",
  "blk.2.ffn_up.weight": "4835a682ef1826c12df01ae7663fc45f9c82bc8e64b665f13fb7da8e201ec0fb",
  "blk.3.attn_norm.weight": "f22aba7c03999ba7136f39cda747a39715e498699dc1716cd97fc5dfc58d1b1c",
  "blk.3.attn_output.weight": "53b579855366fd786c5126b2b30aac4d583ca7bda56833c4865f5cadb5c18c6d",
  "blk.3.attn_qkv.weight": "bb56aba78158123140fcea59c69ac562ca208f6d3086819417cdad8c50f333ad",
  "blk.3.ffn_down.weight": "97280897a7cd86db2830c004bccc5bc094f50e293baded0189159a2019145a6e",
  "blk.3.ffn_norm.weight": "10a8c99f8b57a960e8e0a1133c4a26f9148403d1b9bff2eff114917de996f3b5",
  "blk.3.ffn_up.weight": "7324046c915e75d621b2043597a245a428d8eea31869135e6257a861491d8dcc",
  "blk.4.attn_norm.weight": "507d8e164de94646edbfe33def8e8fbf7c9a6ee3fbaedb5000f72d9f51ec5e36",
  "blk.4.attn_output.weight": "bbb3429e6efa98c150e0fdbf48c16180cbf0d0cbc1b3c253c6c319d78f4593a2",
  "blk.4.attn_qkv.weight": "b95ee5be0786d3901273d806c339fe6c20e6bfffd2a20672a9f56af80921e8ab",
  "blk.4.ffn_down.weight": "806bbf91df92a5a22bd5aa1ffb7fc2869f7293ffc7704771c290ecc583b27975",
  "blk.4.ffn_norm.weight": "cfc2930a81df7aee3a5e7f726a15c1182233e868bf0d9d37f6b6ae6d8c15c234",
  "blk.4.ffn_up.weight": "c3390c69533de2c8424e8069323ccc5d0c4543111535da04cf2c7d26745576aa",
  "blk.5.attn_norm.weight": "0d71c4fbcefabbd021569442853d2fe90668b19409ae2805a718a829ca60beab",
  "blk.5.attn_output.weight": "10ebd93629112bf2df5c30dd0953a4a5e9020306768283181ed426934d47e14f",
  "blk.5.attn_qkv.weight": "5cb05633369f12d4b00e0ff787736bd846856682115720ebc6cce05270c334f6",
  "blk.5.ffn_down.weight": "e28bcc5094212eafc7476dbc5b7a520d25b79578cbf4229d698e2655956a80ad",
  "blk.5.ffn_norm.weight": "b6f2c4cf9f34bb4d59989f96165c14a67dc1e266ad0a6d0fcc49f1add929e6ff",
  "blk.5.ffn_up.weight": "0f9ef99423cc07ebedc0e9cfa95809f2d7108d910bb4ef97ebc0b0309c440750",
  "blk.6.attn_norm.weight": "b3edcc47a42218234f7564d7470611b49401a41ae8cd42123f86557c69f5d7f2",
  "blk.6.attn_output.weight": "eb9b7d257b388bb5b8fe0515e5c6873317239cb94cda236e4b6ada2a6c57c65c",
  "blk.6.attn_qkv.weight": "eb968081f478c52f07bd9c2761741e982dba33cc4eeadeea3557d391b9ac2106",
  "blk.6.ffn_down.weight": "1b8588bb7463206290322695577dcfced300895d6e6f4b26966c53a9ae2f0f84",
  "blk.6.ffn_norm.weight": "1219c04b7770983c77814200eefe743f46d15328ea2b12711e44f8103eab08d3",
  "blk.6.ffn_up.weight": "197ef287239fec47c55677f0fbb66eaf0644f775bc382de843971730721394f6",
  "blk.7.attn_norm.weight": "b630ad08c80d564ed1c024384818e9fd3f22a36cd7a14aa96e7e2759a8285099",
  "blk.7.attn_output.weight": "970255aa750828a47d6b9d399f9612b5bf25aefe7dadbcba41fc416d0d4067c1",
  "blk.7.attn_qkv.weight": "ebb157c880293e6de8d629f263ba8853ed1dbdc02c311d43432bb8cfbb310739",
  "blk.7.ffn_down.weight": "24bcd4db4cba844c89f878b81843c373dbbc0675e889d32c5b12e63384a7b670",
  "blk.7.ffn_norm.weight": "b9c6f71001808ee873ce7db8056e4b53fb4cccec8b7f0f312899b575fae39d39",
  "blk.7.ffn_up.weight": "979f1828d227455c26015a2a11afe9dd05f2bb97a8ba6b38c8dab3f50e627401",
  "blk.8.attn_norm.weight": "4e8e347e3775010b7112ee630f2f4f2383be7ff64e6ca6154b9b22566552eaa6",
  "blk.8.attn_output.weight": "65a44babf44a435a1829945211b3168f9ec78ac3cb7a049a733e93d11f0d6659",
  "blk.8.attn_qkv.weight": "343ed07671da400b040812a4058482fa38284b5d9af9becfed07417fe26ce747",
  "blk.8.ffn_down.weight": "7fb7e073e3c2c503c4e9d60efa0988fed7398d900cc003695fe3fffd3e188b82",
  "blk.8.ffn_norm.weight": "b07c1f655d8593e3892a2cf73f8a0c19ce8e5cb613fafbe7cbd430da8ce4c57d",
  "blk.8.ffn_up.weight": "8b26e14de54b3fdc2e2d3ea41720f9d9c236a93688c3b7fd7bf43f5fbb327c9b",
  "blk.9.attn_norm.weight": "46394d408a8e316916177e6aa261de32e137a82d729c0b1800b072f0c38c39b6",
  "blk.9.attn_output.weight": "d57f3d46107947a7073373a0b35d6ecf7759b5df15406f4a3590a60666af6b16",
  "blk.9.attn_qkv.weight": "14bb8ace8c5453148f4b536e9f4279c813f31136716947256f5cca333448639c",
  "blk.9.ffn_down.weight": "2b8d98e2b5ed68338f6e4de43bf7de0c4858cc69103cd5177725f7444eec7694",
  "blk.9.ffn_norm.weight": "41a499dfd418cc4c6b8c12313f673f7e2cd4a3f9c4065eb6c4feb5eed02fb542",
  "blk.9.ffn_up.weight": "143aab7533a64b17fbe201490a6f674bc7f0bd370c094500b2e100419073d1c2",
  "blk.10.attn_norm.weight": "ebb670aafd36816a794347287269d8f1a5b19c1e3c0a1e38023bc19fdba9b073",
  "blk.10.attn_output.weight": "b5d65bbc0ed5e49fdd9d754bc18163cd042a285024d0cf6f954c503bc8c877cb",
  "blk.10.attn_qkv.weight": "f06b15bac88da798fa34a62b03eaac0dbe8b846020516603c387541f2d8dd672",
  "blk.10.ffn_down.weight": "fb091fcd1b4de25d1bea94d1755e255cb02914a030d23e3a234e57b8d46bde6e",
  "blk.10.ffn_norm.weight": "eb347bdf9c40414af87e13a8e72e40b31f004b50f7cb366f1a219ced60a61355",
  "blk.10.ffn_up.weight": "ed2d52fc881a173f404fe8a1067862c9856d6c3e0d2e90a330a7aa394e3f84d1",
  "blk.11.attn_norm.weight": "64e252603cf010a0e502ca39fdf8d0a196a79aec67c0d2bb9213fc0cb80c47d4",
  "blk.11.attn_output.weight": "228e33e21c69f52efc74fdfc831bc9af271e44b2a29a3dced1d64e667ce36eb5",
  "blk.11.attn_qkv.weight": "ab9ce6d4ef9e42ee0da3f20a7708a3bbc5e79e967b05fa86ba946a05e2eb63eb",
  "blk.11.ffn_down.weight": "0ca133b7835c98dc77c25d64e4eb7873778bdb5e4d22d8b80f920f46865b43bd",
  "blk.11.ffn_norm.weight": "02455741a0dfd161c79aa1ecc381901721f229fdcda5615622a629631fb61cfd",
  "blk.11.ffn_up.weight": "9fecdcc099fbb8e23c6b1ea9294702a027f4a58d265543ec5e7be79b8f63b354",
  "blk.12.attn_norm.weight": "783bb459911b1b3609a9b2bdfe272f1670add73b5471da738e07ac47e2e07dfd",
  "blk.12.attn_output.weight": "1e1a914c9e48b857206ac5a1f7cead994bc1ea91d5d4fff8c834d73f2e38ef5d",
  "blk.12.attn_qkv.weight": "5953e7185ccb87fb4dae8f9426ec86315d4c7794326e8ab59b3a95d4af2189f0",
  "blk.12.ffn_down.weight": "a3eecf0f394f86e2cfb48a5940a5c50ca86d71883b2f79fcc642a935fabce0d4",
  "blk.12.ffn_norm.weight": "0a4272e41373c23bd72f10d2d82930aa3a1480aac75832bfbf01cebf0b86b6a4",
  "blk.12.ffn_up.weight": "06f42776de3a7ceac3025f26a7a8bd20e062233cce2bdaa2183470dc4b30b87d",
  "blk.13.attn_norm.weight": "5915da60fb03e201fa649faba780e5fdf1c761c262b206e5415cf83181f65780",
  "blk.13.attn_output.weight": "4dbf6eab074fa3835fd32bd631a8208e511037d5056d2fd3015735cca7674ef7",
  "blk.13.attn_qkv.weight": "d3d8339a1c4782d9e73d77fdebe154d3c5b83ac40c9175b3e91a4977d08f876b",
  "blk.13.ffn_down.weight": "de6772b46a55e1fd42b007637dfbf68b6598e5d5b61622da0935002e1e192d3a",
  "blk.13.ffn_norm.weight": "5a640ea3b8c7be49c95a58a2327e10d8e8d9d142504bde5c8091613e5b961d7a",
  "blk.13.ffn_up.weight": "f35e3545e4bd3531b2e843b5efd31dee0c13c807ee6386e65473ba67bbec30d0",
  "blk.14.attn_norm.weight": "9b34986450b7c98b4927e81e61a816f9e84b1addc7c14926402100037aad6678",
  "blk.14.attn_output.weight": "155d52efb23d366016d861a251d4d1f4a0c13699188c50d50dba016a0d8bfcd9",
  "blk.14.attn_qkv.weight": "8e1415084e1f33c73a777f19e752489f4dd312cca047733e5ea643cd4a955e04",
  "blk.14.ffn_down.weight": "a2a142226b94baa01ccb65bdea2b7418e49085c1d9c3c63e544e3112c58a25da",
  "blk.14.ffn_norm.weight": "8aecfd9b0ae6affaea31a80c5c9a4a14b31deaa0db7bd8f6da2a64d23447921c",
  "blk.14.ffn_up.weight": "0c1407237b8c1bd02f193346b5681926fe698a5055eac6a7450451b0f991707c",
  "blk.15.attn_norm.weight": "e037bd19880bfa83d983200fb0c7866f8ad16c3ff5cc4b4f3a37ca7373870ff6",
  "blk.15.attn_output.weight": "045fe4fc95cc129a1b92771b179c11b12845c4c088786c607f17bd98857e68e1",
  "blk.15.attn_qkv.weight": "7621b7559705cab1d4dea1c69f76dbf9dc1c8837a203b656f484703b9c1b70ce",
  "blk.15.ffn_down.weight": "7e5ac20e290bc60761e1cd972354fde225b7fa861048d44d9a0dd9b046d55f58",
  "blk.15.ffn_norm.weight": "b6d830d88f1db1825687973c8c2b1a24c6fa84f07af8d0e3ef9c86009baca0b2",
  "blk.15.ffn_up.weight": "dcda0957cd04fc45476774dba2bbf9aa89d6b05d5ca7b10ae6f73ad2c49b1cd3",
  "blk.16.attn_norm.weight": "4ee9b70ba15cb2a08240f93990e90f5068c48fceb481f8e2186bec8b7214eb3f",
  "blk.16.attn_output.weight": "315cfe5536658d2498192b2980eade15b2c9a4ff220e4011911457b1727fa103",
  "blk.16.attn_qkv.weight": "3c8122e3ad637583b9dcde8ff3a323267d3014bb1f0f9771e5322260ca9ecc8d",
  "blk.16.ffn_down.weight": "3b5fbebd5ee2b86cad96fb8a9b45a8770d08f82c1c8b74d7061e866f7020a18d",
  "blk.16.ffn_norm.weight": "ffab69f20bda372de6e5878f0539163e2fc6ba113621ded95705fc3b1465c9f0",
  "blk.16.ffn_up.weight": "0935ea3d258da42d6258406365f39f58ddaabfe97ea5977580db3635188f24a1",
  "blk.17.attn_norm.weight": "f030441733f3d147b4a06a1eb4aeb8465c7c24d9c53bf4c48fe7e134d3629803",
  "blk.17.attn_output.weight": "07a955ef09e8dc766ac0df647d0b2c69f23c4c69a7137654b4aad80303ed0eda",
  "blk.17.attn_qkv.weight": "1c10688061e21e2fe12ad0cb54bf03895c1f83c3b0df743a42f548b52cbca1b2",
  "blk.17.ffn_down.weight": "ebb9cc9836f41d88fdae2aa9a4355514e4edaec8d1577ffeb947a35204e77f52",
  "blk.17.ffn_norm.weight": "50aff44f6528b13db5389f2ddcdb7676244947610bd7ffbff3f881c968c2a0d4",
  "blk.17.ffn_up.weight": "d716537949582be33bde6b02e38f5a70081c9642a9fb05a61312126718b8d148",
  "blk.18.attn_norm.weight": "0ea695c4e53d637902f46663a6ee42adc493c36794476acc7dbddaa05b13840d",
  "blk.18.attn_output.weight": "5fd35b500221a612eb4f4bddf0e9b6b7db4d7733032a75f8802fb2d884647c2e",
  "blk.18.attn_qkv.weight": "b0da37fd030fe69581f990bf23bfd35467a1bbe558af6de7c0924f6b72e92317",
  "blk.18.ffn_down.weight": "b355c33f44b328f4bb977567de8f7544db4b005d7a8fbded658518ecf3c5a153",
  "blk.18.ffn_norm.weight": "58b3fe9094079989a86e0387143259e1cc35952d24dc3df290c4ba6df44f5c51",
  "blk.18.ffn_up.weight": "2ce530954c342c30ed2ead5353f931960bfae1d278868504c0efb973560fabbe",
  "blk.19.attn_norm.weight": "533e9aed66feea8f0392aa81f9e293240e1f009a5334253915fb60c2749b615d",
  "blk.19.attn_output.weight": "84f2d00f98a4113a779d3b5d1c3e7c914eb47784d3ab13b290367c124c2994aa",
  "blk.19.attn_qkv.weight": "fbe6b9f53b07fa7537d3b3d452d20a9bc666f9fd41ec2091dd28bc2f70fc668f",
  "blk.19.ffn_down.weight": "b30199e098c8bb3f890183d8b18471e80b62b604729b277ad62488dd71e1206b",
  "blk.19.ffn_norm.weight": "c81373e41cd340b7badb19f9517c77c4250b4eb9a02dc758b8b49b652487d7ff",
  "blk.19.ffn_up.weight": "5a5cb083ca7725720e3a890f7fa46354760e8007a8188849a092e305694a75e3",
  "blk.20.attn_norm.weight": "4953091b4477e354357a8e743ba0a1900633e52f1599ee082a0c9b0b2b5cd978",
  "blk.20.attn_output.weight": "62d54f7749cd6856097b2632066a322b0296df915fe66f382c5b5981be0d4f23",
  "blk.20.attn_qkv.weight": "406de9e35b0729ebe902d7a47905cc7fb29a921431ed35dbef0c03e5690a1329",
  "blk.20.ffn_down.weight": "62fb678b0d1261e19a4903a2b347d67afcc8acff01feb33a687a35a2d1e6f9a5",
  "blk.20.ffn_norm.weight": "cd9d36b7e71e55c8925b97bb09c28219f182626bcff094878ae39c3db887a14b",
  "blk.20.ffn_up.weight": "b9276771d79d3e932e73ccc520c3f8476342b9ef312ed2ee1e0da822e6e3ad18",
  "blk.21.attn_norm.weight": "66d8c8a35e13ce9c2a0e75b670150e2c31484a55c2316df46075312196178ed3",
  "blk.21.attn_output.weight": "12ab46c9382648f9b3350fdd92a6be6352743d62d6b520d7e2024e0c838588f5",
  "blk.21.attn_qkv.weight": "a7909676ee1675ca23cd29a5fdd226df8dd9d68f94c6c9bbb51dd9fd38504008",
  "blk.21.ffn_down.weight": "6fb317279c6542e82f97d5a12a60fac1bd0fa0405154f9fbe265e2fe39bd49cc",
  "blk.21.ffn_norm.weight": "c0f703eb3ff161b5ba4490d87d8684b8a6c47a8f433e12f418333b9db439010a",
  "blk.21.ffn_up.weight": "6dbdb80ef0c35e364bbce12d40d5e74c7963c7b55d58d9579567a07ffce7b863",
  "blk.22.attn_norm.weight": "f94237433bf03d675cb2f655b81ca91a1ce2447bc6b00b13d6b0ccfe2d411eff",
  "blk.22.attn_output.weight": "e821f95995ce497c01e63ca64f737713b1b65f11df1903e51d444aa516f33f71",
  "blk.22.attn_qkv.weight": "1b0f717c73afb5eb4c82a1708c4e85c969e8a2a8770d9ddb78b1870a2d8a781e",
  "blk.22.ffn_down.weight": "0f33f7a3cdc685484be99aa0c03642b0b20850a27d1fddbe054b13a9382f3ccb",
  "blk.22.ffn_norm.weight": "9df285cf211ddd7df2b36a50489af574755c7d4d98b29a05cd04566ae613c8dc",
  "blk.22.ffn_up.weight": "63ac300e1efb34041dd0136cf43ea622fac6f0caccce1cd9262f5e08d2cf179c",
  "blk.23.attn_norm.weight": "5f72d9e88689b4027b28f5f8f26cd3abb03635ceea7ec98a4c91a9fc691f6707",
  "blk.23.attn_output.weight": "6ecf04ff61125c5fc768f8656497152149373daf321ee9c957e8f7245a1184d1",
  "blk.23.attn_qkv.weight": "a9d9978806724c2959f2cf386c233831f08e1e933dbf2b32665e788d9d512ea4",
  "blk.23.ffn_down.weight": "72c7d17886a3da17fa0daa456aa5e877b2ef5b8b403182b870d9ca5ca9c70347",
  "blk.23.ffn_norm.weight": "971e4b712e3025a13419b5b57d674b5e4ab7f18f74b57b9afc4671623da90c4b",
  "blk.23.ffn_up.weight": "df2b5c7dbd5834545b815073af0c7355b065124e6d6f0fee78d8fa5b2076dc3e",
  "blk.24.attn_norm.weight": "c41957c4a79ad3b16f6e11daec1c7f530b9f3f4b618e1e4367c3b67787ac4ab6",
  "blk.24.attn_output.weight": "ef7d61f5fc88ac6f31bf60cb5f4d2d6b8df42d38825807112361a7224b0dee3b",
  "blk.24.attn_qkv.weight": "3e6a58fe7d49c90bb6971efbad3371c32256881173ea5aee4b0c296cb206490f",
  "blk.24.ffn_down.weight": "f43619144047de42fed81dfa495f1815d3cb771330e574043e2b67620819292c",
  "blk.24.ffn_norm.weight": "5501d4a2a98c8ca6b42e77b53b221dbc08f530f6a067256d787534ec6fe028bd",
  "blk.24.ffn_up.weight": "d64c8b0e509e2b1118f6000176f8956cacecdbb200c7e95ed93fb78b6e26c84a",
  "blk.25.attn_norm.weight": "502fa3c302d371f61c5791f4615b73018ffb1daa09b6499b227116581244c5d4",
  "blk.25.attn_output.weight": "ad8391d4e9c980856f2547aa945b2b6a407a6382158dc1ddd4f08d94ecc24be6",
  "blk.25.attn_qkv.weight": "42e8983780d4a01a02c54ad23d4df21eea437f119a10af5a9c12a76a42d308c1",
  "blk.25.ffn_down.weight": "302dd010d4e0ab4eeaee89090409ea0dddeeeed3236415eb8f97c942497eea91",
  "blk.25.ffn_norm.weight": "fb34c1ee5bca96986c08834df0a0c047ba041c1123ac1f563e9d64312bf82d6a",
  "blk.25.ffn_up.weight": "10739a8de156816d93c92b935386540bfa976bdbef204f0312960f6fc657582f",
  "blk.26.attn_norm.weight": "7036c711609128c4e55968ff3681d3043338879a5737efd6c2ac9e1a2a61f1a0",
  "blk.26.attn_output.weight": "db5db45dead5cb911fa01da59832f121b7c18b2d167bf53741c40819f24d346c",
  "blk.26.attn_qkv.weight": "cae34c6b7f82ed14348d5ed30a79919c383737c1694a9cb9c0de609d3b0c1d0a",
  "blk.26.ffn_down.weight": "491ec3a4da9b4f49f8ebc6be658ce397a9b801ae9fb35e82177e47808c65e5d0",
  "blk.26.ffn_norm.weight": "fd7059d75d7f0e5288511ddeeb0f772eb3cae3ccfe4226b877015834edc3c386",
  "blk.26.ffn_up.weight": "ea1ee1274c56458ce056d2205e5bb6e5422ce4cb0ad58006b8141749b97a0c39",
  "blk.27.attn_norm.weight": "cc362c9a937609265052cd38544af17a1a7448cea086d4c801139e1fc865832d",
  "blk.27.attn_output.weight": "ba757a81dabde9cb1b069d1bb616fe79649a1724f756567ec61caed1304fe6cf",
  "blk.27.attn_qkv.weight": "1ab8d7d02d87756c12c2275636823aa5ede3d683178225c4cac4bd892c319bd4",
  "blk.27.ffn_down.weight": "deb1c711c8a66acf4dcd2d088e1548f8e08f296f755e4067d6557fa55afde88c",
  "blk.27.ffn_norm.weight": "fc6242d8cb8a4a37a8ddb7e41e7e60a63d4a89edf36acb35df052f10b9c91ece",
  "blk.27.ffn_up.weight": "8df39b09c4801f343aca78f2918a1f6db78c8c55e591eda4c69eadb74c26e180",
  "blk.28.attn_norm.weight": "75b539308f77e3cefdc6d98484d8b5cbf0538f0c2869a77b7373a145a18bc850",
  "blk.28.attn_output.weight": "ae128940eb60a6d2e121762ef4b3e9dcf9eb3e105b249507fa7f12de0e19822c",
  "blk.28.attn_qkv.weight": "bdda781c288e9326c240e33905f8e621b6a2ad902e620739d34f93fcd6f933de",
  "blk.28.ffn_down.weight": "f1d6e6d1c286b1138bfd7e53fe477f399ae93bc2c04e35416f84218ed7247965",
  "blk.28.ffn_norm.weight": "3f837ce82c8b9bde0d61d08b6f5fe5574886ea5328dbdc53f2929f18da8b4087",
  "blk.28.ffn_up.weight": "2af027002e31d1b6cfedbdb30a2b9d7213f3aa691167c353913adfd48fda31e4",
  "blk.29.attn_norm.weight": "61e8003b5329462ffe0fe172f2b160260de006aed858332d49d75504b6b6aa7a",
  "blk.29.attn_output.weight": "ca44542a72a37476dc73dbdcc01f5b7497cb3ebc4ea230a55c9634ccd8e56ad4",
  "blk.29.attn_qkv.weight": "abb3d9d6abe57872ae3daa51935d43264093ded5ce63b49d1e280ee5758be0e4",
  "blk.29.ffn_down.weight": "6764b895fce881df097489c263446f0106de36217997660c15984b3ee22a5a06",
  "blk.29.ffn_norm.weight": "89e03e9a33fc0e6e31ba9f0c2bd7c5734a118c5602bb90148793e08a80e8d0ae",
  "blk.29.ffn_up.weight": "fa7ad57a84954f4121653152efed1a871d8adb20a1ea9086e3e849ce359d7d2e",
  "blk.30.attn_norm.weight": "91a697aca1e42af54f806a20211031c3369e8d0bd58df1b0147fe24954e1f5a4",
  "blk.30.attn_output.weight": "36063fcf766c89ac75be56f688cc63cefe5f2c733fbf4378ea9956ad386fa148",
  "blk.30.attn_qkv.weight": "2cacd1161f1121a2c0b979930134f4666f73fb8d7237b3b0659ae091b15955a6",
  "blk.30.ffn_down.weight": "9f3fcb6217100595850c05dc98f9ab2a263afdb6ab28df2fcb08aeff512057d7",
  "blk.30.ffn_norm.weight": "6c600bc1fc7de39d4f8917b81fc7d1d5ed2a9b56492234c13a4bd6028c30d880",
  "blk.30.ffn_up.weight": "73cabd1bb011956b2689ea3338bb76642ef3a57c197377d666d2ab5f56317668",
  "blk.31.attn_norm.weight": "72d3e1cc771380645fa75a899858c95f39857a4f3f1ed60fe1578df383b8bc53",
  "blk.31.attn_output.weight": "40089cdd29994dc19a1d89fa15902a89cfeca3540f12dc9bf4d00ef82506e456",
  "blk.31.attn_qkv.weight": "1d0bb40e9258071ae14290a53c619a8e331dda07354d2a02ef45766c029ae5e4",
  "blk.31.ffn_down.weight": "8defa0e06335b793fa8be03883f0a322d6c5b33f52c69c943c35c60d16e42c0a",
  "blk.31.ffn_norm.weight": "33c55d9d0c496ccfb130361fe131649346e098abaaac39c0519507e5d846721d",
  "blk.31.ffn_up.weight": "599f6503f61c692c1f82001973d35119f9688db5e6be9d9c298411491c93f09b",
  "output.weight": "14b8dc662bfa3308ebb2e102c562d8e52c15670e538f20f3216a9c310ca9dd41",
  "output_norm.weight": "7f2294ba94ce65681df6c7ddd8698799199b9d77dc83c10bdad5c3999f0fdb82",
  "rope_factors_long.weight": "e34d378664e354652c38f47d10dafb0498ccc2fb042d39ff7fef768146fff22b",
  "rope_factors_short.weight": "9379146a4988f373d362fe47b06c75e7fe7c54aa4dc9558758df79b7a87471fd",
  "token_embd.weight": "19a03c1fb5ac0baee93b0a7d8b0f26e9a9b011e229b694afc50ebfc13d84f8bf"
 }
--- a/convert/testdata/all-MiniLM-L6-v2.json
+++ b/convert/testdata/all-MiniLM-L6-v2.json
@@ -1,124 +0,0 @@
 {
  "general.architecture": "bert",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "bert.attention.causal": "false",
  "bert.attention.head_count": "12",
  "bert.attention.layer_norm_epsilon": "1e-12",
  "bert.block_count": "6",
  "bert.context_length": "512",
  "bert.embedding_length": "384",
  "bert.feed_forward_length": "1536",
  "bert.pooling_type": "1",
  "tokenizer.ggml.model": "bert",
  "tokenizer.ggml.padding_token_id": "0",
  "tokenizer.ggml.unknown_token_id": "100",
  "tokenizer.ggml.cls_token_id": "101",
  "tokenizer.ggml.seperator_token_id": "102",
  "tokenizer.ggml.mask_token_id": "103",
  "tokenizer.ggml.token_type_count": "2",
  "tokenizer.ggml.scores": "6db964fe67338aca57790481a390121ff3dd643eebe49f7dd308029ad99abb6f",
  "tokenizer.ggml.token_type": "98d247c5404b6b18f05f133b92dd56edf6efefefac326794b00d7b351f6c5aa1",
  "tokenizer.ggml.tokens": "9efe405e229a45ff9916f54c475d151d2200cd2ab0006f347abfb069cf096c86",
  "token_embd.weight": "8c1ee80a9ea4f65aa385ba30112010068af3d209bebc6e149d3d4589c2cd0a5a",
  "position_embd.weight": "6c516f0b1c4e2388ab90394dd80ad69e4e4509b890982fc3408108ae66210eb6",
  "token_types.weight": "f879f8e422ed211948f28b560d3c5e17aae7993f063b51196a28cf5c0fb3da21",
  "token_embd_norm.weight": "75076e095d717aab96f8b6beeee503c27940d9a76f2b891a0e3de72f8a6043e4",
  "token_embd_norm.bias": "298735285ffe944e1bf03e5d35c7280326b85cf121bde9874f1af5dc51ab939d",
  "blk.0.attn_q.weight": "ab0923ce4c1549175112dcdfcc860fe30137f991e03ea6857fb5993670adaf6c",
  "blk.0.attn_q.bias": "a3ec29551dabf976e1d34256b8ab5ab7b758f3ed9742c3cafdbd984d5441df62",
  "blk.0.attn_k.weight": "4c1038a6d035c3e9ffed7fa672b614627814752503755fbad0cfb76a41ad71ba",
  "blk.0.attn_k.bias": "e0363930eb588d91816aa3d230bb03b6e2551c165117b80b8d60397413819ef9",
  "blk.0.attn_v.weight": "425e2e53e3f00ce98d29c3e6a161eb55d3e6ae0d96fdb9f6242d1c4fd6eef4b3",
  "blk.0.attn_v.bias": "6579173a1e65ee124fbd0bd53cbdca4225515b4f2c5f18fb1bfd000f5978f9bb",
  "blk.0.attn_output.weight": "a6d70a08cd7164de5d12af65d86d657c3db35aaecde778b2b3fda9193c4c9802",
  "blk.0.attn_output.bias": "2b8d12c4f9a9c5bfaa29c597839568f6e0525cb41eeaf64ddeb6bd84dfeb9701",
  "blk.0.attn_output_norm.weight": "bbe6e502a473228b525aeed26cc31b7db123ad63bdc5a6eebac6ea70b8b51d62",
  "blk.0.attn_output_norm.bias": "36eaacaf0007c5c62daea97aab0115390c0682914f78482e37eb76885f4b7a50",
  "blk.0.ffn_up.weight": "24654561c76ce387d125759ba843f06b904ef721fcceaeff6ccc62180a48e874",
  "blk.0.ffn_up.bias": "fd3f0126aa1d95768fa60eb6f4ab8a2763cfcb7e5405f35b92353031d86f4d34",
  "blk.0.ffn_down.weight": "97a829763a6a5bf3329ceb4d39c424ba4787d61653a5b0bbd1f84782e4d4e0ca",
  "blk.0.ffn_down.bias": "7aa980c30ae8b4ee7f69df28808dbf5c431f56ccc4a80340f644a0419f16c054",
  "blk.0.layer_output_norm.weight": "ef30dad4c2a083ae1ff5039a2a6cda60ecc89bf1e486a6f8c0d15f50589603f8",
  "blk.0.layer_output_norm.bias": "8b1b77e67568b1bce43fc476de1b177c53ff688d66beb66995e8eb3dc290da8a",
  "blk.1.attn_q.weight": "284331622a1f6f9b87ccee4f652bd66a394ca493c4d93be4d1844e4f6159ad10",
  "blk.1.attn_q.bias": "e24ebd4860330e08f6bfdd077a82db0bee33f4c8846cf1db26327a34754c7069",
  "blk.1.attn_k.weight": "729dd0d555544b5bd0f7580b3c8b384256b974605f0e7487b95f295aa032997d",
  "blk.1.attn_k.bias": "2aa51a828a858f35473f54477583fea54ce2ccc34ea60fbd1d228fbe9bca827f",
  "blk.1.attn_v.weight": "6be304671cc311d5ca5c103f2b51467ee800c589bc5b8101e09ff5aed1f68c21",
  "blk.1.attn_v.bias": "43bcbab78a8819e07f723bc9e5b737b71e87a7594f15234e882b63e327a64199",
  "blk.1.attn_output.weight": "15ec8a1a12b26c9976445308a09f748ab0e4bef0f583d13ab08c3129f8738d73",
  "blk.1.attn_output.bias": "dac2146f4baa6ed16f6c0dc7443831fb7ec79bedcceafd80d1a4b628a1bb072d",
  "blk.1.attn_output_norm.weight": "d2151eb33bffac536787a4c9a5d2b31c7a80b17c4611877842a3cce2cd6e98d8",
  "blk.1.attn_output_norm.bias": "31e1b779716dafb855d2cf5631ee168a0ccf372eb9c6ea6091f66fa97a9b9d2d",
  "blk.1.ffn_up.weight": "a57547fc3fc3b77406f5cdcb0c87af9bc184701f175c39c1f35297826fce3cc7",
  "blk.1.ffn_up.bias": "123be6d541d086202913c75d878c54d59a749f3af7b58f7ef9eb9e7c62a24c9a",
  "blk.1.ffn_down.weight": "cfdb79788377e5cbded8790cd41b9e66c397ecab75474071fcd7cf32d30f9613",
  "blk.1.ffn_down.bias": "bcb58315519a573097960891c9ae41cf4c685ab78c3e0e77471471758a7eae88",
  "blk.1.layer_output_norm.weight": "819b554271452bfb1d84c2603b90377b2e41a0ac1e3aa8b417ccf9dce63375bd",
  "blk.1.layer_output_norm.bias": "47a3433ac27f5ce8947fb38dd491f3706df4ef6adb0ddf74612bf0f54b19e164",
  "blk.2.attn_q.weight": "1557a9ea852b1880551f7290e00aded4f35e6c4180fdcbed1b0039bf805f639e",
  "blk.2.attn_q.bias": "c3bfe5f3066f655fd36b055530997b59ff33ef013563aaeb3cb8ff07dabd59a9",
  "blk.2.attn_k.weight": "cfd08eb69c61ae2f9f14f9b7ff5c5394ca264b1a9f3d48156677f90dd1766289",
  "blk.2.attn_k.bias": "9b839bc0e79974a0b3f5d1895972bc6f5c9a1bc16052e1af786e6a530758152d",
  "blk.2.attn_v.weight": "02b26b1208480eaeeb00e7b4cf8b690006ca14759357fc44ed4a2a8924ead993",
  "blk.2.attn_v.bias": "e7e6f0089fded1659a867ab736c220d9653ea7da6b1b94baf5c8d30a748b63ab",
  "blk.2.attn_output.weight": "a1db121c7d33806b349cadd050300a57db49fdc91224fd07c9ac43bf4299dc79",
  "blk.2.attn_output.bias": "7675128b6a92555cd955c820311e91e9417d31f48848f45d047b4100c62148b3",
  "blk.2.attn_output_norm.weight": "5b4595e0fbcba67a700c4331adf746d2fba3546364a4db5607ae241947bb1a21",
  "blk.2.attn_output_norm.bias": "7b8e16826ea30e5a2ba0b02e0095a901775981a296e98819625320e983060d08",
  "blk.2.ffn_up.weight": "a0d815d946ac07a65095c4ae4df77b818845e6d97795c7d82f55e689d944db59",
  "blk.2.ffn_up.bias": "ce37c0a4174d6bf773ded7bd016ede627ad3bdb8bc99b9992a18dc8e8898f252",
  "blk.2.ffn_down.weight": "f6231d2a25426fbd45b9f1160aa484220eb227ceef0348c4a6a6de890606e5ef",
  "blk.2.ffn_down.bias": "429e00556e8dc63a785238b309b9d83738500c1ef6d736fe6526ad88ea496d27",
  "blk.2.layer_output_norm.weight": "651457a573adf3f7dd9ee5dfe1c8e89389e94443993aab77ec6a0b05aa621e35",
  "blk.2.layer_output_norm.bias": "41fbbeda7fd89b0cef5f945ae44011c316982390401d6f75ba8c6d365e185247",
  "blk.3.attn_q.weight": "95a43f32949d2cb8d22815bb27a44abfc6665ba96221af817dfe058cb6ca72c6",
  "blk.3.attn_q.bias": "f4e34385e75d8108b6b3bd336106e2133a8c9be0cc343dfe5dc48c32a823c7cb",
  "blk.3.attn_k.weight": "6b892da6a17d4d3265265a15f695864a31813ee8c8e710ae9bc9e1adbc6c9a18",
  "blk.3.attn_k.bias": "40b8067b641a56014cee42548240aa8930820958b1933004892b5f04fbaef39e",
  "blk.3.attn_v.weight": "9fcd5922319dd2a461082a5ce040c1dfe65d87d70ca6547dd0b46eeecc3eeb2b",
  "blk.3.attn_v.bias": "b528c56212e66931fdbe267ac327a9c2f87cd03baff3ea719e30afe681da15f1",
  "blk.3.attn_output.weight": "e3b178c1b03981e75510e0d277af23ea59cc404b5394e61bd32291825719b502",
  "blk.3.attn_output.bias": "712c84d39a6a5a9c06a09da8fd9939ba0d5525524a4bba61ea4de09b48f45cae",
  "blk.3.attn_output_norm.weight": "d1ffac88e675592ff72f8a617be32b4a381d443b2f8f2645dbe44a1e5745aac0",
  "blk.3.attn_output_norm.bias": "ea31a1c73146234c50e0e43f485c458413714867b8e2703af66482f7db2d6c40",
  "blk.3.ffn_up.weight": "4ef4f3b9a1ea6ab2ef2eb6e8b008e06a44790d099d97482a05a51e39a29afac0",
  "blk.3.ffn_up.bias": "06a4296dda16f452675c51f108079fe7722552d6521c737d97734943818b9a2b",
  "blk.3.ffn_down.weight": "f114b2bebe392c7d80433bb880c6730293aa4561b0b0370dcdaf7472daebd847",
  "blk.3.ffn_down.bias": "2c8e67831d28a3bf613fc7912ae3259b63d72abcaf4d30efd8800758400158de",
  "blk.3.layer_output_norm.weight": "a1dfeb7b5a51dd56447312ca41e2ad2f361a3ea12ddc355127f5f4219fb0a482",
  "blk.3.layer_output_norm.bias": "1ed630021b25c6c6fc93fd32988b9907df966d4982a93081f639aac3044618ab",
  "blk.4.attn_q.weight": "b5fae4c1f9a5f33a2a2e816ac0c01c25f422e4efdd59ef1ed93da2610e5370fc",
  "blk.4.attn_q.bias": "c2e376524ea98ac3b10d9eee19ecb1b1e261fa5149efe0232844c923dfb428fb",
  "blk.4.attn_k.weight": "a4632f5ebf9321d9d08f9112a4e5dda2efe5671df4a4e67fee24845f5b14af16",
  "blk.4.attn_k.bias": "a9a02ffb8b8b4f6dfe487a7e0341f1d5318c9d2b793a688f34cb1b22fc66ef60",
  "blk.4.attn_v.weight": "10ad8deb81d9fa093b1e5c0f24ea82aa7df43e6aca49e260fcbea56eab8cc86a",
  "blk.4.attn_v.bias": "7326813e181e021130bd33ac136293fcffccce2d1d8cb59041e5b13a8cceacf6",
  "blk.4.attn_output.weight": "c92573088c7437c2b3cda51490e152c27fb19e5468df591eabba5a49d5398d44",
  "blk.4.attn_output.bias": "14e10b419e5859af1eb685af5c330aee67048cd704dcead9217840c6f5393222",
  "blk.4.attn_output_norm.weight": "02b6831c0e0fb0edbc579a92812a1dd972cb15d14fcd382d4427c5a7b300ac44",
  "blk.4.attn_output_norm.bias": "7eed5cd503bb6bb6ceb1bc8b07cc077903a4f14fb8b9d6cdf39644815ecf1374",
  "blk.4.ffn_up.weight": "8d0c91d62e74d6431321116a37cf3339e630bd50ba164d3304fc4fe8dd831223",
  "blk.4.ffn_up.bias": "d325f07f73c005a273c484c7be8e7abb4d6e8a5c4fd093f5869133b97629d017",
  "blk.4.ffn_down.weight": "7ba7bd81143f40537b84f938e403e19f30e4928625eb371de052b9025beb4d21",
  "blk.4.ffn_down.bias": "2853d9c2a75288214a4bf4907dc19d04d01926f4913d302b1aa7bdbfcce0f7a1",
  "blk.4.layer_output_norm.weight": "a4ed1885fa77b90fed5300c355ef0aa0c876a8c747151d9d790939d464d57d4f",
  "blk.4.layer_output_norm.bias": "62142a81e813a9e636333b2b805d6bc3b17c5e7cd4b15adce1ada6bc9a32563c",
  "blk.5.attn_q.weight": "afc1dff080a72c3daad01384b1448d476aaf789871017c8ff8e144788887995d",
  "blk.5.attn_q.bias": "748a820371c1d4f872c84545b36358d239c35bf6c99e2812c237d88c3292763b",
  "blk.5.attn_k.weight": "59e30c1ed8acd2cbb01de5f62e7804015b9ecf98ba157d98cab016344639eda5",
  "blk.5.attn_k.bias": "f839520078f9e589496e982e86d0126c7aa14196047339abffcf49a696229f77",
  "blk.5.attn_v.weight": "3e21fb874e21b90308e1f46af034a3c32d3eba1628d62ae5f2246d6af5818923",
  "blk.5.attn_v.bias": "5cd4852bf95c1444d10d756750f6bf49f842c0b39e9953c7f408bb67c325ac8c",
  "blk.5.attn_output.weight": "636ce6a7752895f204b9d01ba0aedd9a294f908b42f372c22a16d9dd590d7471",
  "blk.5.attn_output.bias": "82d924d4b0d2b94f2bbff91619216d6967a3541ce9b1531a6a60457a67b5d219",
  "blk.5.attn_output_norm.weight": "5e7bd0a8d3396080f3360d7c4700bf094a06216431bd014c4479eef72ecf4271",
  "blk.5.attn_output_norm.bias": "66c6de5edda5466d029c6753780be81ccd4218bf8bc00680000e0f06856ab712",
  "blk.5.ffn_up.weight": "5bbf6e7ea380e216e33f8bee06d25f2265359d3876a300e92bc6e41d48e33430",
  "blk.5.ffn_up.bias": "9d795388bb36fb33ad3a37fea3ccb4937838e02800a608fb47d363cd06b47370",
  "blk.5.ffn_down.weight": "2fd628974e7f075479dd227b46fbd48ae8d3ca34d735b36f391ac06410730368",
  "blk.5.ffn_down.bias": "cd213ba9eaa75fa541648097fbe9c96e58077e6c3ad6ad2fb1f21f8350f44291",
  "blk.5.layer_output_norm.weight": "159a9df41d15b7022d136f86a2a2631c4635f9816e957472217077b522bcf52a",
  "blk.5.layer_output_norm.bias": "24c1f27ffd1eb4e5be7e3a2909943e6f0980635d761fa1efdd0c19645da23766"
 }
--- a/convert/testdata/gemma-2-9b-it.json
+++ b/convert/testdata/gemma-2-9b-it.json
@@ -1,6 +0,0 @@
 {
  "general.architecture": "gemma2",
  "gemma2.attention.sliding_window": "4096",
  "gemma2.attn_logit_softcapping": "50",
  "gemma2.final_logit_softcapping": "30"
 }
--- a/convert/testdata/gemma-2b-it.json
+++ b/convert/testdata/gemma-2b-it.json
@@ -1,188 +0,0 @@
 {
  "general.architecture": "gemma",
  "general.file_type": "1",
  "general.quantization_version": "2",
  "gemma.block_count": "18",
  "gemma.context_length": "8192",
  "gemma.embedding_length": "2048",
  "gemma.feed_forward_length": "16384",
  "gemma.attention.head_count": "8",
  "gemma.attention.head_count_kv": "1",
  "gemma.attention.key_length": "256",
  "gemma.attention.value_length": "256",
  "gemma.attention.layer_norm_rms_epsilon": "1e-06",
  "tokenizer.ggml.model": "llama",
  "tokenizer.ggml.add_bos_token": "true",
  "tokenizer.ggml.add_eos_token": "false",
  "tokenizer.ggml.bos_token_id": "2",
  "tokenizer.ggml.eos_token_id": "1",
  "tokenizer.ggml.padding_token_id": "0",
  "tokenizer.ggml.unknown_token_id": "3",
  "tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8",
  "tokenizer.ggml.token_type": "485e40bf3d715a4764818fc097d6a2a41db872d82ee714bc500872a3437ff48d",
  "tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda",
  "token_embd.weight": "17b87ab2c01c80657855a5413d0457b4a041afaeda0cc785080e44e2f04acf07",
  "blk.0.attn_k.weight": "28ac0da05754ad2714ae95da28a5ad191192140b30b8fd22d108d4700c9d989f",
  "blk.0.attn_norm.weight": "3f9d5675d1ab0eb8a816719dac9fab81f2e95c52be02c34263339acbc087febb",
  "blk.0.attn_output.weight": "703295c2c63990ff896778685c678f145298886f680f3ed5dc2a7ad54c293265",
  "blk.0.attn_q.weight": "69c2d0e4870e9d722a190d356203c9605575a16863466c3d1747966ef1cf5791",
  "blk.0.attn_v.weight": "95219c9c07b5ffe9a9a01e456d845eef2b11f4fc12c93dbbba479db395444c13",
  "blk.0.ffn_down.weight": "a2feb5eb3d572c57c5bafbf0ab506862df1160fe40965dcfe4b9fd855c08bed7",
  "blk.0.ffn_gate.weight": "fcca072c445c31f4dc4d5dfaa785b1bdf7271342442099b74fd17268b5829fbf",
  "blk.0.ffn_norm.weight": "7621f95dbd245cade6fffd6b08797d69d8e3954e960f0b5551b90d967ab95448",
  "blk.0.ffn_up.weight": "14a9bcdd451403c67136391e1b6e53b3b1830f00199bd911dbcc56d8749c14f4",
  "blk.1.attn_k.weight": "c70f73c5df20579cb44d971164b48b5f0d8d5abdb38b381e7a8b880ba12aa406",
  "blk.1.attn_norm.weight": "88b6b91f93a1ef83425a7c7dc2a2fbd3b22704a04c64a80061df376ac8c33626",
  "blk.1.attn_output.weight": "f031a537490c452be3b3bb51e6b7949a636405756e160976a1c070a792ea00ee",
  "blk.1.attn_q.weight": "bdb23214b1cf9cfd30f863a0a5868e52c6809d93b7e8f44df096a94204d9896a",
  "blk.1.attn_v.weight": "e9bbc0b05f2c872fb1403f8f938cd1612b502229ee401f12593b1164c61acc00",
  "blk.1.ffn_down.weight": "5ff53811038b661a7b8f2bfdf213bebfb185ec1a6060b662f063714f33584d79",
  "blk.1.ffn_gate.weight": "205085c8c951a5c7543b1495183cd96028fb49f67464b3e9862a2693a6077a33",
  "blk.1.ffn_norm.weight": "798f354fc85afce9625f5d10093a585a966831698a0560e6c9b97ce659eb4b22",
  "blk.1.ffn_up.weight": "db92dc5684cb6e90940e13f4d1da555ed20ba4f8cab1e990ddfd7553e2e91315",
  "blk.2.attn_k.weight": "ef5ce360c4eed6d00d03ca4761e0f8e4b0af4509978468314be14f3d46621044",
  "blk.2.attn_norm.weight": "6dadbc05dbd0d3fabb4216affa60a3de1378a82d2859dc90b338cbe70f50d455",
  "blk.2.attn_output.weight": "6bbf87a966f691bbfd7c8d25629aa4e6710107bd431a667434861febb391edc5",
  "blk.2.attn_q.weight": "4e575c09ae2de417ce9057ce8b073680e860a24aae13a472b68f101b760752e5",
  "blk.2.attn_v.weight": "cd33f7f01141e9439afdaf2ea1aaced9feaa335e32a58daa136ebd555d4d96f4",
  "blk.2.ffn_down.weight": "b970ff1b0b6494165defe2fbfa1d31425766ed71e64de9ec4e66ac3955c8bc5f",
  "blk.2.ffn_gate.weight": "dbb3e1360402e0e369b101995bb686b73f95d4a7673f061be85d64d15dfb0061",
  "blk.2.ffn_norm.weight": "bfb7980105d8ac9647710454f57a5cdac50598a0f6f4884e16f1d94b00844687",
  "blk.2.ffn_up.weight": "50ef89339b275a438b664686f6227dd9b6e43853ed6856ec9e33ef4bbd90bda1",
  "blk.3.attn_k.weight": "be942ea98151434eebcd2c1da4b00e0146152fe524a530689b1fd491cb833d21",
  "blk.3.attn_norm.weight": "0df2f218daf609c289fb7c60c5f375fa99c0d4e04381ad5a494a19144edd8e20",
  "blk.3.attn_output.weight": "c2184aaf86aa2cb8f47be49f60b165834e97205f39c6ee1dfd19fd4411a156ce",
  "blk.3.attn_q.weight": "4f86e2a0a4221c1c84ff9c409ac89893cb95d7208cf65bf1e98e24e01125f991",
  "blk.3.attn_v.weight": "abfdb8a60c349dadde641d1afc9542025e24fbf41a3238bfa9675e0b1f1e4b68",
  "blk.3.ffn_down.weight": "58821a8d87008d47d122427911c6fad5272aca70c448bbae223256a74bacd07e",
  "blk.3.ffn_gate.weight": "776e051f1a0ddd5c4934e69186683a75ca9a3c8c0f61911bba321fed1dd287d2",
  "blk.3.ffn_norm.weight": "7f380f29335e28be90bfcfae6f6d69fdf5751211b36d2dd62aa5541ed113e4f2",
  "blk.3.ffn_up.weight": "fc5ae8d488894cbd4951059675468d227da27871d26e925c9941863841c097ee",
  "blk.4.attn_k.weight": "14833b078cc4c5137bdd5fdc0538047974ca147a99b0282e1b144440c78bc1db",
  "blk.4.attn_norm.weight": "0a69957d4a15599fb80ad4753558020804925221457d9a5052926754d3768065",
  "blk.4.attn_output.weight": "887a49b6130fb6297cf10767207c3dd97191b2cf63723449af9c27bca8dbeda0",
  "blk.4.attn_q.weight": "51fd577b76764824dd6f0d4891c137ebe4736f591b5ca2793c5fff2be49abbde",
  "blk.4.attn_v.weight": "1a623c43cf9c509d1b7ea0d1a5c04d0af4809665f9f9e93b7d6dba8c5df178fa",
  "blk.4.ffn_down.weight": "5d61e8856d8941d2b1fd138116d015f63840d0fa1e31e20e20a5ceca1536ceec",
  "blk.4.ffn_gate.weight": "06640f7273764f8ca5df7e386547417916b6cd7d565a8343153113239a94b0a1",
  "blk.4.ffn_norm.weight": "91a6c6c41b894228e361435ecbc5058dca34d4911a23da5b56de219299c964d3",
  "blk.4.ffn_up.weight": "d016dac1055e36d6a10b6317e57f98a904709ea892ef3194342f4d2f6326561e",
  "blk.5.attn_k.weight": "987146afe124131500808cc0da33c06d207433656d41df6e6d8c99118a83bac5",
  "blk.5.attn_norm.weight": "6b354938966f2608a2fb8d0f5b363ed0d8b0967c2ec8d0abd5c625b413042ded",
  "blk.5.attn_output.weight": "cdcbfe02c6ff79d5326882b017a02099f5af71beedf6b1b3eb4de01e3a844536",
  "blk.5.attn_q.weight": "b910d0cff781d3efb42eab0a302f46f286b2de717079175680d5b42bf8c309c8",
  "blk.5.attn_v.weight": "66d3a279f747412f9f4b0e8abad44540c122ab2e811a7ee74c1f33bc36caade9",
  "blk.5.ffn_down.weight": "c9b0efd2212981f16d956d8571f054b68780ad01f4917033647e359b557a4653",
  "blk.5.ffn_gate.weight": "fe96b94109ca141c01f6a04788e20783019ca6ec334aa1f3134810bdb499e557",
  "blk.5.ffn_norm.weight": "aa7b016e832e7055a36c6e20de58ea1936f995f390401fff1c5fc65906064e49",
  "blk.5.ffn_up.weight": "555ce27c4873d3375394f38ad3b45e3d8848f9d5642dc1602383d0f0a33c2a14",
  "blk.6.attn_k.weight": "88280d461db324c4f36475ce396793063e61a27283ec64511b0480890fb5b3b4",
  "blk.6.attn_norm.weight": "af8f460c411f660d33196286d208f1845fd5a2b45f7b56549a4df31e7515447a",
  "blk.6.attn_output.weight": "dd9996fb0a256e8375ad3917705258a33fce006bcea0f536caae420a77974d8b",
  "blk.6.attn_q.weight": "7a4841541191e037cfb9b07930c4d8cab451809658b182f0ada6ccde9615c003",
  "blk.6.attn_v.weight": "ae81e6a592b64d701a9d40233e986039a56cba8d8d24f61aea93c6393cf3078a",
  "blk.6.ffn_down.weight": "622dd1ce1706355cbc659a8ab2c4509678ffe0f3ad34258e5e25ed2a5d951bcd",
  "blk.6.ffn_gate.weight": "8389a735c0bd5591010f8ced9805a2a12c749f6df0d3c18ad4d05c2a302e7168",
  "blk.6.ffn_norm.weight": "621f5346400382474d61358397bd58fb1459b07c53e376e4bca15e08b3f9b3fb",
  "blk.6.ffn_up.weight": "8d834e4c42f13c251dfee36cf89e12f1bd400680d00d5c2e6cac0459e9ce2f7f",
  "blk.7.attn_k.weight": "8bd0412de65a3e64901ef8fe6a28c95e116bf39dc9aa22f0126b9d36688e5ea7",
  "blk.7.attn_norm.weight": "056d8e56be4e87d6dc6f900762f0dc6fde07bfdc50dd85bfc510415e2bba3f3d",
  "blk.7.attn_output.weight": "27972eda51da53d416ff95aed78149a2c5a287b47d2cd46f2f544ca692ecb3bb",
  "blk.7.attn_q.weight": "41eca977b9371f7932800c11a9c45b931310196919e2a0651b847703b180fc7f",
  "blk.7.attn_v.weight": "13c74fd7e07f08883a09fb070a1fe5bbdd2341b4cb8d1cac07c4b637049b5774",
  "blk.7.ffn_down.weight": "9e75db42468800849a9a7da603d0072c5e86c8ed2b4d8b20a312a51fb86a7a10",
  "blk.7.ffn_gate.weight": "db6bdc3117f910088aaf7db51f2da63ea5bd933de36af5599c215bfb26f7db2b",
  "blk.7.ffn_norm.weight": "48bb82b49bfc8679a1e77f282ee182d952db7a3c11be7ef9a102ee2ddd8011e2",
  "blk.7.ffn_up.weight": "feebea87175817a0f3585ec0af09dc873d94c203581ae97a712eb356d3b49efe",
  "blk.8.attn_k.weight": "d5640ad71b6af68d88e17bf8e7fc26c907d2262605457a84247dd9afc2884d69",
  "blk.8.attn_norm.weight": "75b850c481a69083ae09d0207ba7317b37c735a39fcf5fef5400e6c84fb1257f",
  "blk.8.attn_output.weight": "cbd669dbdea2bdd90f9f0cc97566b3dffff3c56cecb4f47290ceef30da83b2d6",
  "blk.8.attn_q.weight": "9edcb63087a431bac361822497e6ecdaa06d9ea4a1a754e36da7ba9f8db81c7c",
  "blk.8.attn_v.weight": "3fb72c2c4f95a83626aa3e30062f9450b09ab37c7871e229f18bbc5cf744633c",
  "blk.8.ffn_down.weight": "bd69d2c9172974fff154441b237b4787fb53b2d185325442d5048130ef5bc4ef",
  "blk.8.ffn_gate.weight": "d04689c80553edd011d1cbaa5d570fffa7fa91e88b66cf1352d89ab60b72f908",
  "blk.8.ffn_norm.weight": "e49984183b735b7f2c4e4730c289eed9394056d2e283a00fd83ea0915df31a73",
  "blk.8.ffn_up.weight": "8fe62a1ce8e847e567add6c6f6bf2922bc467495b5eb4c116b3cb85b85b3b211",
  "blk.9.attn_k.weight": "d90904959e5004cf0d6e729c6bff18cc33c094798b802473c1ec55ab8d276183",
  "blk.9.attn_norm.weight": "79277f290cc07411115d8fa138045edf4a17b3416ab2145409cbe8ab829fd4ee",
  "blk.9.attn_output.weight": "5a21bf2e1f09a81405025f96d4153ffb630158e17269cff8ffff935c38ceb1a7",
  "blk.9.attn_q.weight": "51b1d0febc3b350945be4504f55afa4347517bde0f710e1a4b88e6b17e71e7c7",
  "blk.9.attn_v.weight": "aab7e1db0a8b50a03036356791ffce736ab010d15674c96eaef8049d80076054",
  "blk.9.ffn_down.weight": "cbf43ec84becb40c9359a181ab0e641fd7faae7d34b549501f7cfb7afdc3d764",
  "blk.9.ffn_gate.weight": "dce0e8661c778327bed7f03b6790d26710764188aed9dc746e6e05863891fa57",
  "blk.9.ffn_norm.weight": "6d41642104f995c77bf31122b13237caebda3e7fcccb1367ce91db36b015e923",
  "blk.9.ffn_up.weight": "82fe4c67bf24e7b2d6f6e05f7b1234c2bf90c3932951091a9066211b8e15ecbb",
  "blk.10.attn_k.weight": "f6a9ed8fd8d3229b5d03175c413ffc56a07f2ce7236271986361dd3d8993f9aa",
  "blk.10.attn_norm.weight": "cebbef89f0326ca8e02df3867a571e4d61c20c2a12f295f98ae590d62bc86010",
  "blk.10.attn_output.weight": "34f5efb86accb4f06347d83a32558ea8eab3039d128969161a741ebacbb656ff",
  "blk.10.attn_q.weight": "1e0efe27df2d5d50f7157253ba2cfd436d6781c3dc78ca176d0c16a210b5b763",
  "blk.10.attn_v.weight": "8f085bf50a2b0f83cd6cdda3c8ef5a9e204a36348ed95871aac725d1f68640cf",
  "blk.10.ffn_down.weight": "bf3b3cb4cace435809ac7b4cc933f20853af12f1f272d3dcefe7f19c0f203b8b",
  "blk.10.ffn_gate.weight": "d3df7a1413b1c5adf1a1dcda9e5225a15c89874bae53bb6137ad1ea42fca2d34",
  "blk.10.ffn_norm.weight": "a1da603b0480471b5ed8e862148cecd5fed918f8304d6933ab0bdb25b8d2fb8f",
  "blk.10.ffn_up.weight": "bffbba605922e972dc47dda88a0b4659aa52236c76e5fe861a949e6d9a367492",
  "blk.11.attn_k.weight": "9f31c63d66cd32c29b1eb8bb829d0c8525ce2ae936e0eefdaab6335a2d12a3df",
  "blk.11.attn_norm.weight": "0bde1a266d8b2e8f202bb7e2e88b19147ca83021901f6d3cae77a4df5548c754",
  "blk.11.attn_output.weight": "e10725c7cf746ed4a7e472cf7aea6cb564e5db6a1d5197adc980d650a387ccea",
  "blk.11.attn_q.weight": "05ee758a7d065802630f8c65dca424364c1c8825e389aa33f9405c45e8a50cce",
  "blk.11.attn_v.weight": "0c3ae7090f11775d24c51120db6e305db6aff706493e7ee123dcab74485ba789",
  "blk.11.ffn_down.weight": "7ba40b8e12c09c5fb2006b77a771cb01ce894e88a3b3e1877f927a5b89c91709",
  "blk.11.ffn_gate.weight": "db76388a023b98097972d354ba1c6a5e26efdeb1c596b9c28bf2cd8f6596975e",
  "blk.11.ffn_norm.weight": "a38c3ae1b89a68ddc7b72c99c5b28be7fe3787c4fad9904d0c43d64eaf00c474",
  "blk.11.ffn_up.weight": "13c8142f9cf1eddc658babf978daf3515c4ccc45f849f3e7e3930aa18a8480a0",
  "blk.12.attn_k.weight": "f03241c36ac87cb57429a2ef22186b8d7d0b590a8b173beb01fa13d93772f3b1",
  "blk.12.attn_norm.weight": "4568f654e6d65104d586e7c16ba960c83428698ce103022b7e0be15e2884e13b",
  "blk.12.attn_output.weight": "04867603f82f91e41306e09b33ecda0104b3ee4834061f2c0bbdc8da33c72509",
  "blk.12.attn_q.weight": "70fe04b9a8e08b6100cc8d6b58bf4cbbad15ca1de82d63baca5d352ba6c4cbae",
  "blk.12.attn_v.weight": "15cb28db61a86c98687991d7e611bc92a1fcc6007f3432149cfb5fe518a4f65e",
  "blk.12.ffn_down.weight": "6d10c790a4e3dc44c2dc36d96251ae97cdf30a4fa04d4c43e31bfbd038e6a7b7",
  "blk.12.ffn_gate.weight": "3462a2d8f6b4743b25e24da51b90018ac2858d05ac7e582bcb69063cfdac1104",
  "blk.12.ffn_norm.weight": "1f96392c1faa34e34ae5dea55a6a86c5aa4c79758952075d53d28de89dd88456",
  "blk.12.ffn_up.weight": "d22eacc612a7411953d948483c5fb201e11722955ee0754da866e7bec578ac6d",
  "blk.13.attn_k.weight": "5864977e6b733ea942647d6feed5c76156c48c200649c22e4e11b9e5860e57f3",
  "blk.13.attn_norm.weight": "87e053535144723db4145aa5402acc54331b7696752d852bb9fc542ff33f0fb5",
  "blk.13.attn_output.weight": "078145f5ad83f8b14f97a869346f7fd1583b24d1e3edadaa95d3da4242973f8f",
  "blk.13.attn_q.weight": "3b8caf35504cbc4d1a7dd6e011a95760703b7f71e2218b030b1254f811362dd7",
  "blk.13.attn_v.weight": "4fdf8365a603e043e5b40c4a21c84ac167f9be62794178f9d8a608dfe5653bf9",
  "blk.13.ffn_down.weight": "a07d3abbfcacf48ba028df2cab895be32cc15022d23389a745286e79c1b1d1fd",
  "blk.13.ffn_gate.weight": "1d2ab39666aa2909acc96787432a3ed13b19d25170f74665fadff9b17bbaffb1",
  "blk.13.ffn_norm.weight": "4f2e809fda5f3eadf52578ee50e0ba36e53be91e55dce418c12dfe595f5f18e7",
  "blk.13.ffn_up.weight": "8783d2720c2c37ca176a5801e0b3ef1f9cc9cf3ef1cd37af423aaf6b2a27e2bd",
  "blk.14.attn_k.weight": "ce9428e2b55d43ae0c6690dbd56182f99adc427694ba8236b405cc8ea5035e86",
  "blk.14.attn_norm.weight": "6abb35f9db8251d6ae954bda147c6ada2371b0574d11702e828f3c6ac99b7cc0",
  "blk.14.attn_output.weight": "fe3880916d0ceb5bff672c88bbefb7060a545be609bf049beb2024b38221836d",
  "blk.14.attn_q.weight": "7c8ad81be6f4a350931fd108b5f7c9e366e8c26ef62d1d85ffef5dca8fd893f8",
  "blk.14.attn_v.weight": "e4bdedffacbebe38567a0734dfd67db90e911d9a9669fcde9a7c4ad8a0066c52",
  "blk.14.ffn_down.weight": "ef6694dff1e05820aac0cd2b22f39ac7788b4967afc9250775575554c66aab2c",
  "blk.14.ffn_gate.weight": "db63c4179e2db704bc505e2b4696e055b593e295a1b7c4c586fc793bdd5aab19",
  "blk.14.ffn_norm.weight": "2796a62d832a9710148f95d533320492a33e712b2e5218659c548705bd11684d",
  "blk.14.ffn_up.weight": "3f78c78d8c2d54df45f799d4ff902316628af296834afe4ceed63d4a324ff03e",
  "blk.15.attn_k.weight": "6e810ee3859e07695645ee0c9a5efc7962668984a5f0a9325f47e462743b447c",
  "blk.15.attn_norm.weight": "0956b576ae96db0b28cb09f761f801cfd9281432284664f0fe181c8d9c55d1ec",
  "blk.15.attn_output.weight": "03a17f7e94208177aace5cc41b7f54670ba57873b7274ff6e23caf58cce110ca",
  "blk.15.attn_q.weight": "b8edafe7d2216a6f8b4ae4905a906475490e6ea418f6e1d3cec563dbdc6fab91",
  "blk.15.attn_v.weight": "f8ae8cae0f4cfa34a459824eba57350c3c248104ba5607e7d9dc7d7c39aaf4a6",
  "blk.15.ffn_down.weight": "8d02eb439da852246d2ca67e9b7b6de0b090b80744355e64728a23e41926505b",
  "blk.15.ffn_gate.weight": "ed5bf361c67db8731f186b775826f21c33bdb521111fd2d922539719a770239f",
  "blk.15.ffn_norm.weight": "5942ca3c73209ac9a0c8bfd9b4aab7f7be7aee9aa12d9c35833493b44af76767",
  "blk.15.ffn_up.weight": "f4bebf4ad99ec5f911327dec347be6c595814885309c7bc5647ce28c7f4d1cf5",
  "blk.16.attn_k.weight": "756a534c19364448e0958b8948fe33891c6ccda0fbb4dfa2024e1f532a87804b",
  "blk.16.attn_norm.weight": "386b7b9e4e6509f6af9c022d942b6c6c6cc136aeed8751ecb037c74d7c4bfb93",
  "blk.16.attn_output.weight": "3ba1a766a25830b84d7c22178203635f9c5624caad290bc5e5d73da5d5e7a2ec",
  "blk.16.attn_q.weight": "d39b0c91e1fda7685d50a0f7cc8d18c44b5bdc90a142c7fda0bc329cca1afa74",
  "blk.16.attn_v.weight": "98b33fcb0ee3483cff1b06ecb44d7b7ffb4d34c268248e4d73dfdf82b2065b2f",
  "blk.16.ffn_down.weight": "14006f5e4acb2f9416271ae562e299359cd2585739c7fc77ccbca54495563948",
  "blk.16.ffn_gate.weight": "12f8abae2d301d8f88bedb6af98b1daecc7b0b8d05148594f931f30958d77aca",
  "blk.16.ffn_norm.weight": "129a15a046ee96d06de288bd43c80f77a6b0fb3a159c7367154c6e4aaf362672",
  "blk.16.ffn_up.weight": "b4a5911a45f3871ef1d4efb7dc7108645a564b70f818eccf45beebef2e844ee9",
  "blk.17.attn_k.weight": "5e1bfcff0146ebdde3817b656952892eb671e14e75afc92fa53f84f8eecbec4c",
  "blk.17.attn_norm.weight": "60bc988fab7c4b29ee9de599df41a8de00caa94fcd74677da011fac82f60f465",
  "blk.17.attn_output.weight": "ba49b40d6a0b5685f749c24b0edbed3adc44dbe13b5d5e5fa1e56169fc746555",
  "blk.17.attn_q.weight": "82bb415d24efcd14d03ace03f907bb70db6a204c76a0bdd1892e0fba165db87d",
  "blk.17.attn_v.weight": "73dbe54beb91a899884e275ea81ffc5187a20cb7d5b68d5c299b783096999d94",
  "blk.17.ffn_down.weight": "7c086166241e0664f8963fd1ca4ed74c737abfb2525ec20f8435821ff50158f3",
  "blk.17.ffn_gate.weight": "51a32f78244d42a539f619c5ce661db9e6cf41636280a826d439b5444edcd28c",
  "blk.17.ffn_norm.weight": "c4bb247fccd1ecc84875028af63dd20aaf5cbd17eb94a9bc36679c09285dccab",
  "blk.17.ffn_up.weight": "b5886182790bc6fbadd63de9bc4ffee416f3b69a66280d197ab8c18edf769abf",
  "output_norm.weight": "481f3097d0a20412e35b3a739b1b958487bcd41ff67744baa3c9acbddd2ee4d4"
 }
--- a/convert/tokenizer.go
+++ b/convert/tokenizer.go
@@ -1,12 +1,10 @@
 package convert
 import (
 	"cmp"
 	"crypto/sha256"
 	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io/fs"
 	"log/slog"
 	"os"
 	"slices"
@@ -14,140 +12,10 @@ import (
 	"golang.org/x/exp/maps"
 )
 const (
 	_ int32 = iota
 	tokenTypeNormal
 	tokenTypeUnknown
 	tokenTypeControl
 	tokenTypeUserDefined
 	tokenTypeUnused
 	tokenTypeByte
 )
 type Tokenizer struct {
 	*Vocabulary
 	SpecialVocabulary []*SpecialVocabulary
 	Merges            []string
 	Pre      string
 	Template string
 }
 func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error) {
 	v, err := parseVocabulary(fsys)
 	if err != nil {
 		return nil, err
 	}
 	t := &Tokenizer{
 		Vocabulary: v,
 		Pre:        "default",
 	}
 	addedTokens := make(map[string]token)
 	if f, err := fsys.Open("tokenizer.json"); errors.Is(err, os.ErrNotExist) {
 	} else if err != nil {
 		return nil, err
 	} else {
 		defer f.Close()
 		var tt tokenizer
 		if err := json.NewDecoder(f).Decode(&tt); err != nil {
 			return nil, err
 		}
 		for _, t := range tt.AddedTokens {
 			addedTokens[t.Content] = t
 		}
 		t.Merges = tt.Model.Merges
 		sha256sum := sha256.New()
 		for _, pt := range tt.PreTokenizer.PreTokenizers {
 			switch pt.Type {
 			case "Split":
 				if pt.Pattern.Regex != "" {
 					// create a checksum of all Split pretokenizers which should be sufficient
 					// to identify the pretokenizer
 					sha256sum.Write([]byte(pt.Pattern.Regex))
 				}
 			}
 		}
 		switch digest := hex.EncodeToString(sha256sum.Sum(nil)); digest {
 		case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
 			t.Pre = "llama-bpe"
 		case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
 			t.Pre = "deepseek-llm"
 		case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
 			t.Pre = "deepseek-coder"
 		case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855":
 			// noop, empty pretokenizer
 		default:
 			slog.Warn("unknown pretokenizer, using default", "digest", digest)
 		}
 	}
 	if f, err := fsys.Open("tokenizer_config.json"); errors.Is(err, os.ErrNotExist) {
 	} else if err != nil {
 		return nil, err
 	} else {
 		defer f.Close()
 		var p map[string]json.RawMessage
 		if err := json.NewDecoder(f).Decode(&p); err != nil {
 			return nil, err
 		}
 		if template, ok := p["chat_template"]; ok {
 			if err := json.Unmarshal(template, &t.Template); err != nil {
 				return nil, err
 			}
 		}
 		for _, st := range specialTokenTypes {
 			sv := SpecialVocabulary{Type: st}
 			if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok {
 				if err := json.Unmarshal(bts, &sv.AddToken); err != nil {
 					return nil, err
 				}
 			}
 			if bts, ok := p[fmt.Sprintf("%s_token", st)]; ok {
 				var content string
 				if err := json.Unmarshal(bts, &content); err != nil {
 					var mm map[string]any
 					if err := json.Unmarshal(bts, &mm); err != nil {
 						continue
 					}
 					content, ok = mm["content"].(string)
 					if !ok {
 						continue
 					}
 				}
 				sv.Content = content
 			}
 			if id, ok := addedTokens[sv.Content]; ok {
 				sv.ID = id.ID
 				t.SpecialVocabulary = append(t.SpecialVocabulary, &sv)
 			}
 		}
 	}
 	return t, nil
 }
 type tokenizer struct {
 	Version     string         `json:"version"`
-	AddedTokens []token `json:"added_tokens"`
+	AddedTokens []Token        `json:"added_tokens"`
-	Model       struct {
+	Model       TokenizerModel `json:"model"`
 		Type   string         `json:"type"`
 		Vocab  map[string]int `json:"vocab"`
 		Merges []string       `json:"merges"`
 	} `json:"model"`
 	PreTokenizer struct {
 		PreTokenizers []struct {
@@ -159,108 +27,83 @@ type tokenizer struct {
 	} `json:"pre_tokenizer"`
 }
-type token struct {
+type TokenizerModel struct {
 	Type   string         `json:"type"`
 	Vocab  map[string]int `json:"vocab"`
 	Merges []string       `json:"merges"`
 	Tokens []Token
 }
 type Token struct {
 	ID          int    `json:"id"`
 	Content     string `json:"content"`
 	Special     bool   `json:"special"`
 	UserDefined bool
 }
-type Vocabulary struct {
+func (t *Token) Type() int32 {
-	Model  string
+	switch {
-	Tokens []string
+	case t.Special:
-	Scores []float32
+		return tokenTypeControl
-	Types  []int32
+	case t.UserDefined:
 		return tokenTypeUserDefined
 	default:
 		return tokenTypeNormal
 	}
 }
-func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) {
+func (t *Tokenizer) maxID() int {
-	f, err := fsys.Open("tokenizer.json")
+	return max(
 		slices.Max(maps.Values(t.Model.Vocab)),
 		slices.MaxFunc(t.AddedTokens, func(a, b Token) int {
 			return cmp.Compare(a.ID, b.ID)
 		}).ID,
 	)
 }
 func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) {
 	f, err := os.Open(dirpath)
 	if err != nil {
-		return nil, err
+		panic(err)
 	}
 	defer f.Close()
-	var t tokenizer
+	var t Tokenizer
 	if err := json.NewDecoder(f).Decode(&t); err != nil {
-		return nil, err
+		return "", nil, nil, err
 	}
-	tokens := make(map[int]token, len(t.Model.Vocab))
+	tokens = make([]Token, t.maxID()+1)
 	for k, v := range t.Model.Vocab {
-		tokens[v] = token{
+		tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false}
-			ID:      v,
+	}
-			Content: k,
+
 	for _, v := range t.AddedTokens {
 		v.UserDefined = true
 		tokens[v.ID] = v
 	}
 	sha256sum := sha256.New()
 	for _, pt := range t.PreTokenizer.PreTokenizers {
 		switch pt.Type {
 		case "Split":
 			if pt.Pattern.Regex != "" {
 				sha256sum.Write([]byte(pt.Pattern.Regex))
 			}
 		}
 	}
-	for _, token := range t.AddedTokens {
+	switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest {
-		token.UserDefined = true
+	case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
-		tokens[token.ID] = token
+		pre = "llama-bpe"
-	}
+	case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
-
+		pre = "deepseek-llm"
-	keys := maps.Keys(tokens)
+	case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
-	slices.Sort(keys)
+		pre = "deepseek-coder"
 	v := Vocabulary{Model: "gpt2"}
 	for _, k := range keys {
 		token := tokens[k]
 		v.Tokens = append(v.Tokens, token.Content)
 		v.Scores = append(v.Scores, float32(token.ID))
 		switch {
 		case token.Special:
 			v.Types = append(v.Types, tokenTypeControl)
 		case token.UserDefined:
 			v.Types = append(v.Types, tokenTypeUserDefined)
 	default:
-			v.Types = append(v.Types, tokenTypeNormal)
+		slog.Warn("unknown pretokenizer, using default", "digest", digest)
-		}
+		pre = "default"
 	}
-	return &v, nil
+	return pre, tokens, t.Model.Merges, nil
 }
 func parseVocabulary(fsys fs.FS) (*Vocabulary, error) {
 	patterns := []struct {
 		Pattern string
 		Func    func(fs.FS) (*Vocabulary, error)
 	}{
 		{"tokenizer.model", parseSentencePiece},
 		{"tokenizer.json", parseVocabularyFromTokenizer},
 	}
 	for _, pattern := range patterns {
 		if _, err := fs.Stat(fsys, pattern.Pattern); errors.Is(err, os.ErrNotExist) {
 			continue
 		} else if err != nil {
 			return nil, err
 		}
 		return pattern.Func(fsys)
 	}
 	return nil, errors.New("unknown tensor format")
 }
 type SpecialVocabulary struct {
 	Type     string
 	ID       int
 	Content  string
 	AddToken bool
 }
 func (sv SpecialVocabulary) Key() string {
 	switch t := sv.Type; t {
 	case "bos", "eos", "cls", "mask":
 		return t
 	case "unk":
 		return "unknown"
 	case "sep":
 		//nolint:misspell // this is an upstream typo
 		return "seperator"
 	case "pad":
 		return "padding"
 	}
 	panic("unknown special vocabulary type")
 }
--- a/convert/tokenizer_spm.go
+++ b/convert/tokenizer_spm.go
@@ -1,113 +0,0 @@
 package convert
 import (
 	"cmp"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io/fs"
 	"os"
 	"slices"
 	"google.golang.org/protobuf/proto"
 	"github.com/ollama/ollama/convert/sentencepiece"
 )
 func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
 	ast, err := parseAdditionalSpecialTokens(fsys)
 	if err != nil {
 		return nil, err
 	}
 	bts, err := fs.ReadFile(fsys, "tokenizer.model")
 	if err != nil {
 		return nil, err
 	}
 	var spm sentencepiece.ModelProto
 	if err := proto.Unmarshal(bts, &spm); err != nil {
 		return nil, err
 	}
 	v := Vocabulary{Model: "llama"}
 	for _, piece := range spm.GetPieces() {
 		v.Tokens = append(v.Tokens, piece.GetPiece())
 		v.Scores = append(v.Scores, piece.GetScore())
 		switch t := piece.GetType(); t {
 		case sentencepiece.ModelProto_SentencePiece_UNKNOWN,
 			sentencepiece.ModelProto_SentencePiece_CONTROL,
 			sentencepiece.ModelProto_SentencePiece_UNUSED,
 			sentencepiece.ModelProto_SentencePiece_BYTE:
 			v.Types = append(v.Types, int32(t))
 		default:
 			tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL)
 			if slices.Contains(ast, piece.GetPiece()) {
 				tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
 			}
 			v.Types = append(v.Types, tt)
 		}
 	}
 	f, err := fsys.Open("added_tokens.json")
 	if errors.Is(err, os.ErrNotExist) {
 		return &v, nil
 	} else if err != nil {
 		return nil, err
 	}
 	defer f.Close()
 	var atm map[string]int
 	if err := json.NewDecoder(f).Decode(&atm); err != nil {
 		return nil, err
 	}
 	type t struct {
 		id      int
 		content string
 	}
 	var ts []t
 	for content, id := range atm {
 		ts = append(ts, t{id, content})
 	}
 	slices.SortFunc(ts, func(i, j t) int {
 		return cmp.Compare(i.id, j.id)
 	})
 	n := len(v.Tokens)
 	for i, t := range ts {
 		if t.id != i+n {
 			return nil, fmt.Errorf("invalid token id: %d", t.id)
 		}
 		v.Tokens = append(v.Tokens, t.content)
 		v.Scores = append(v.Scores, -1000.0)
 		v.Types = append(v.Types, tokenTypeUserDefined)
 	}
 	return &v, nil
 }
 func parseAdditionalSpecialTokens(fsys fs.FS) ([]string, error) {
 	f, err := fsys.Open("special_tokens_map.json")
 	if errors.Is(err, os.ErrNotExist) {
 		return nil, nil
 	} else if err != nil {
 		return nil, err
 	}
 	defer f.Close()
 	var m struct {
 		AdditionalSpecialTokens []string `json:"additional_special_tokens"`
 	}
 	if err := json.NewDecoder(f).Decode(&m); err != nil {
 		return nil, err
 	}
 	return m.AdditionalSpecialTokens, nil
 }
--- a/convert/torch.go
+++ b/convert/torch.go
@@ -0,0 +1,288 @@
 package convert
 import (
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"io"
 	"log/slog"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 	"github.com/nlpodyssey/gopickle/pytorch"
 	"github.com/nlpodyssey/gopickle/types"
 	"github.com/x448/float16"
 	"github.com/ollama/ollama/llm"
 )
 type torchWriterTo struct {
 	t *llm.Tensor
 	params *Params
 	bo     ByteOrder
 	storage  pytorch.StorageInterface
 	repacker func(string, []float32, []uint64) ([]float32, error)
 }
 type TorchFormat struct{}
 func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
 	slog.Debug("getting torch tensors")
 	var files []string
 	if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 {
 		files = append(files, pt...)
 	} else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 {
 		files = append(files, pt...)
 	}
 	var offset uint64
 	var tensors []llm.Tensor
 	for _, fn := range files {
 		m, err := pytorch.Load(fn)
 		if err != nil {
 			slog.Error(fmt.Sprintf("error unpickling: %q", err))
 			return []llm.Tensor{}, err
 		}
 		for _, k := range m.(*types.Dict).Keys() {
 			if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") {
 				continue
 			}
 			t, _ := m.(*types.Dict).Get(k)
 			tshape := t.(*pytorch.Tensor).Size
 			var size uint64
 			var kind uint32
 			switch len(tshape) {
 			case 0:
 				continue
 			case 1:
 				// convert to float32
 				kind = 0
 				size = uint64(tshape[0] * 4)
 			case 2:
 				// convert to float16
 				kind = 1
 				size = uint64(tshape[0] * tshape[1] * 2)
 			}
 			ggufName, err := tf.GetLayerName(k.(string))
 			if err != nil {
 				slog.Error(err.Error())
 				return nil, err
 			}
 			slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape))
 			shape := []uint64{0, 0, 0, 0}
 			for i := range tshape {
 				shape[i] = uint64(tshape[i])
 			}
 			tensor := llm.Tensor{
 				Name:   ggufName,
 				Kind:   kind,
 				Offset: offset, // calculate the offset
 				Shape:  shape[:],
 			}
 			tensor.WriterTo = torchWriterTo{
 				t:       &tensor,
 				params:  params,
 				bo:      params.ByteOrder,
 				storage: t.(*pytorch.Tensor).Source,
 			}
 			tensors = append(tensors, tensor)
 			offset += size
 		}
 	}
 	return tensors, nil
 }
 func getAltParams(dirpath string) (*Params, error) {
 	f, err := os.Open(filepath.Join(dirpath, "params.json"))
 	if err != nil {
 		slog.Error("no params.json")
 		return nil, err
 	}
 	defer f.Close()
 	type TorchParams struct {
 		HiddenSize     int     `json:"dim"`
 		AttentionHeads int     `json:"n_heads"`
 		KeyValHeads    int     `json:"n_kv_heads"`
 		HiddenLayers   int     `json:"n_layers"`
 		RopeTheta      float64 `json:"rope_theta"`
 		NormEPS        float64 `json:"norm_eps"`
 	}
 	var tparams TorchParams
 	d := json.NewDecoder(f)
 	err = d.Decode(&tparams)
 	if err != nil {
 		return nil, err
 	}
 	params := &Params{
 		Architectures:  []string{"LlamaForCausalLM"},
 		HiddenSize:     tparams.HiddenSize,
 		AttentionHeads: tparams.AttentionHeads,
 		KeyValHeads:    tparams.KeyValHeads,
 		HiddenLayers:   tparams.HiddenLayers,
 		NormEPS:        tparams.NormEPS,
 	}
 	switch {
 	case tparams.RopeTheta == 1000000:
 		// Codellama
 		params.ContextSize = 16384
 	case tparams.NormEPS == 1e-06:
 		// llama2
 		slog.Debug("Found llama2 - setting context size to 4096")
 		params.ContextSize = 4096
 	default:
 		params.ContextSize = 2048
 	}
 	params.ByteOrder = binary.LittleEndian
 	return params, nil
 }
 func (m *TorchFormat) GetParams(dirpath string) (*Params, error) {
 	f, err := os.Open(filepath.Join(dirpath, "config.json"))
 	if err != nil {
 		if os.IsNotExist(err) {
 			// try params.json instead
 			return getAltParams(dirpath)
 		} else {
 			return nil, err
 		}
 	}
 	var params Params
 	d := json.NewDecoder(f)
 	err = d.Decode(&params)
 	if err != nil {
 		return nil, err
 	}
 	params.ByteOrder = binary.LittleEndian
 	return &params, nil
 }
 func (m *TorchFormat) GetLayerName(n string) (string, error) {
 	directMap := map[string]string{
 		"tok_embeddings.weight":     "token_embd.weight",
 		"output.weight":             "output.weight",
 		"norm.weight":               "output_norm.weight",
 		"rope.freqs":                "rope_freqs.weight",
 		"model.embed_tokens.weight": "token_embd.weight",
 		"lm_head.weight":            "output.weight",
 		"model.norm.weight":         "output_norm.weight",
 	}
 	lMap := map[string]string{
 		"layers.(\\d+).attention_norm.weight":                 "blk.$1.attn_norm.weight",
 		"layers.(\\d+).attention_output_norm.weight":          "blk.$1.attn_norm.weight",
 		"layers.(\\d+).feed_forward.w2.weight":                "blk.$1.ffn_down.weight",
 		"layers.(\\d+).feed_forward.w1.weight":                "blk.$1.ffn_gate.weight",
 		"layers.(\\d+).feed_forward.w3.weight":                "blk.$1.ffn_up.weight",
 		"layers.(\\d+).ffn_norm.weight":                       "blk.$1.ffn_norm.weight",
 		"layers.(\\d+).attention.wk.weight":                   "blk.$1.attn_k.weight",
 		"layers.(\\d+).attention.wo.weight":                   "blk.$1.attn_output.weight",
 		"layers.(\\d+).attention.wq.weight":                   "blk.$1.attn_q.weight",
 		"layers.(\\d+).attention.wv.weight":                   "blk.$1.attn_v.weight",
 		"model.layers.(\\d+).input_layernorm.weight":          "blk.$1.attn_norm.weight",
 		"model.layers.(\\d+).mlp.down_proj.weight":            "blk.$1.ffn_down.weight",
 		"model.layers.(\\d+).mlp.gate_proj.weight":            "blk.$1.ffn_gate.weight",
 		"model.layers.(\\d+).mlp.up_proj.weight":              "blk.$1.ffn_up.weight",
 		"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
 		"model.layers.(\\d+).self_attn.k_proj.weight":         "blk.$1.attn_k.weight",
 		"model.layers.(\\d+).self_attn.o_proj.weight":         "blk.$1.attn_output.weight",
 		"model.layers.(\\d+).self_attn.q_proj.weight":         "blk.$1.attn_q.weight",
 		"model.layers.(\\d+).self_attn.v_proj.weight":         "blk.$1.attn_v.weight",
 	}
 	v, ok := directMap[n]
 	if ok {
 		return v, nil
 	}
 	// quick hack to rename the layers to gguf format
 	for k, v := range lMap {
 		re := regexp.MustCompile(k)
 		newName := re.ReplaceAllString(n, v)
 		if newName != n {
 			return newName, nil
 		}
 	}
 	return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
 }
 func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) {
 	var f32s []float32
 	switch s := r.storage.(type) {
 	case *pytorch.FloatStorage:
 		f32s = s.Data
 	case *pytorch.HalfStorage:
 		f32s = s.Data
 	case *pytorch.BFloat16Storage:
 		f32s = s.Data
 	default:
 		return 0, fmt.Errorf("unknown data type: %T", s)
 	}
 	if r.repacker != nil {
 		f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
 		if err != nil {
 			return 0, err
 		}
 	}
 	switch r.t.Kind {
 	case 0:
 		return 0, binary.Write(w, r.bo, f32s)
 	case 1:
 		f16s := make([]uint16, len(f32s))
 		for i := range f32s {
 			f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
 		}
 		return 0, binary.Write(w, r.bo, f16s)
 	default:
 		return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
 	}
 }
 func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
 	switch len(params.Architectures) {
 	case 0:
 		return nil, fmt.Errorf("No architecture specified to convert")
 	case 1:
 		switch params.Architectures[0] {
 		case "LlamaForCausalLM":
 			return &LlamaModel{
 				ModelData{
 					Name:   name,
 					Path:   dirPath,
 					Params: params,
 					Format: m,
 				},
 			}, nil
 		default:
 			return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
 		}
 	}
 	return nil, fmt.Errorf("Unknown error")
 }
--- a/docs/api.md
+++ b/docs/api.md
@@ -12,7 +12,6 @@
 - [Pull a Model](#pull-a-model)
 - [Push a Model](#push-a-model)
 - [Generate Embeddings](#generate-embeddings)
 - [List Running Models](#list-running-models)
 ## Conventions
@@ -26,7 +25,7 @@ All durations are returned in nanoseconds.
 ### Streaming responses
-Certain endpoints stream responses as JSON objects. Streaming can be disabled by providing `{"stream": false}` for these endpoints.
+Certain endpoints stream responses as JSON objects and can optional return non-streamed responses.
 ## Generate a completion
@@ -40,7 +39,6 @@ Generate a response for a given prompt with a provided model. This is a streamin
 - `model`: (required) the [model name](#model-names)
 - `prompt`: the prompt to generate a response for
 - `suffix`: the text after the model response
 - `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)
 Advanced parameters (optional):
@@ -58,8 +56,7 @@ Advanced parameters (optional):
 Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
-> [!IMPORTANT]
+> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
 > It's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
 ### Examples
@@ -150,44 +147,8 @@ If `stream` is set to `false`, the response will be a single JSON object:
 }
 ```
 #### Request (with suffix)
 ##### Request
 ```shell
 curl http://localhost:11434/api/generate -d '{
  "model": "codellama:code",
  "prompt": "def compute_gcd(a, b):",
  "suffix": "    return result",
  "options": {
    "temperature": 0
  },
  "stream": false
 }'
 ```
 ##### Response
 ```json
 {
  "model": "codellama:code",
  "created_at": "2024-07-22T20:47:51.147561Z",
  "response": "\n  if a == 0:\n    return b\n  else:\n    return compute_gcd(b % a, a)\n\ndef compute_lcm(a, b):\n  result = (a * b) / compute_gcd(a, b)\n",
  "done": true,
  "done_reason": "stop",
  "context": [...],
  "total_duration": 1162761250,
  "load_duration": 6683708,
  "prompt_eval_count": 17,
  "prompt_eval_duration": 201222000,
  "eval_count": 63,
  "eval_duration": 953997000
 }
 ```
 #### Request (JSON mode)
 > [!IMPORTANT]
 > When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.
 ##### Request
@@ -288,7 +249,7 @@ curl http://localhost:11434/api/generate -d '{
 #### Request (Reproducible outputs)
-For reproducible outputs, set `seed` to a number:
+For reproducible outputs, set `temperature` to 0 and `seed` to a number:
 ##### Request
@@ -297,7 +258,8 @@ curl http://localhost:11434/api/generate -d '{
  "model": "mistral",
  "prompt": "Why is the sky blue?",
  "options": {
-    "seed": 123
+    "seed": 123,
    "temperature": 0
  }
 }'
 ```
@@ -336,7 +298,6 @@ curl http://localhost:11434/api/generate -d '{
    "num_predict": 100,
    "top_k": 20,
    "top_p": 0.9,
    "min_p": 0.0,
    "tfs_z": 0.5,
    "typical_p": 0.7,
    "repeat_last_n": 33,
@@ -419,14 +380,12 @@ Generate the next message in a chat with a provided model. This is a streaming e
 - `model`: (required) the [model name](#model-names)
 - `messages`: the messages of the chat, this can be used to keep a chat memory
 - `tools`: tools for the model to use if supported. Requires `stream` to be set to `false`
 The `message` object has the following fields:
- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool`
+- `role`: the role of the message, either `system`, `user` or `assistant`
 - `content`: the content of the message
 - `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
 - `tool_calls` (optional): a list of tools the model wants to use
 Advanced parameters (optional):
@@ -587,7 +546,7 @@ Final response:
 ##### Request
-Send a chat message with images. The images should be provided as an array, with the individual images encoded in Base64.
+Send a chat message with a conversation history.
 ```shell
 curl http://localhost:11434/api/chat -d '{
@@ -663,79 +622,6 @@ curl http://localhost:11434/api/chat -d '{
 }
 ```
 #### Chat request (with tools)
 ##### Request
 ```
 curl http://localhost:11434/api/chat -d '{
  "model": "llama3.1",
  "messages": [
    {
      "role": "user",
      "content": "What is the weather today in Paris?"
    }
  ],
  "stream": false,
  "tools": [
    {
      "type": "function",
      "function": {
        "name": "get_current_weather",
        "description": "Get the current weather for a location",
        "parameters": {
          "type": "object",
          "properties": {
            "location": {
              "type": "string",
              "description": "The location to get the weather for, e.g. San Francisco, CA"
            },
            "format": {
              "type": "string",
              "description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
              "enum": ["celsius", "fahrenheit"]
            }
          },
          "required": ["location", "format"]
        }
      }
    }
  ]
 }'
 ```
 ##### Response
 ```json
 {
  "model": "llama3.1",
  "created_at": "2024-07-22T20:33:28.123648Z",
  "message": {
    "role": "assistant",
    "content": "",
    "tool_calls": [
      {
        "function": {
          "name": "get_current_weather",
          "arguments": {
            "format": "celsius",
            "location": "Paris, FR"
          }
        }
      }
    ]
  },
  "done_reason": "stop",
  "done": true,
  "total_duration": 885095291,
  "load_duration": 3753500,
  "prompt_eval_count": 122,
  "prompt_eval_duration": 328493000,
  "eval_count": 33,
  "eval_duration": 552222000
 }
 ```
 ## Create a Model
 ```shell
@@ -891,12 +777,11 @@ A single JSON object will be returned.
 POST /api/show
 ```
-Show information about a model including details, modelfile, template, parameters, license, system prompt.
+Show information about a model including details, modelfile, template, parameters, license, and system prompt.
 ### Parameters
 - `name`: name of the model to show
 - `verbose`: (optional) if set to `true`, returns full data for verbose response fields
 ### Examples
@@ -913,40 +798,14 @@ curl http://localhost:11434/api/show -d '{
 ```json
 {
  "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /Users/matt/.ollama/models/blobs/sha256:200765e1283640ffbd013184bf496e261032fa75b99498a9613be4e94d63ad52\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: \"\"\"\nPARAMETER num_ctx 4096\nPARAMETER stop \"\u003c/s\u003e\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSISTANT:\"",
-  "parameters": "num_keep                       24\nstop                           \"<|start_header_id|>\"\nstop                           \"<|end_header_id|>\"\nstop                           \"<|eot_id|>\"",
+  "parameters": "num_ctx                        4096\nstop                           \u003c/s\u003e\nstop                           USER:\nstop                           ASSISTANT:",
-  "template": "{{ if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>\n\n{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ .Response }}<|eot_id|>",
+  "template": "{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: ",
  "details": {
    "parent_model": "",
    "format": "gguf",
    "family": "llama",
-    "families": [
+    "families": ["llama", "clip"],
-      "llama"
+    "parameter_size": "7B",
    ],
    "parameter_size": "8.0B",
    "quantization_level": "Q4_0"
  },
  "model_info": {
    "general.architecture": "llama",
    "general.file_type": 2,
    "general.parameter_count": 8030261248,
    "general.quantization_version": 2,
    "llama.attention.head_count": 32,
    "llama.attention.head_count_kv": 8,
    "llama.attention.layer_norm_rms_epsilon": 0.00001,
    "llama.block_count": 32,
    "llama.context_length": 8192,
    "llama.embedding_length": 4096,
    "llama.feed_forward_length": 14336,
    "llama.rope.dimension_count": 128,
    "llama.rope.freq_base": 500000,
    "llama.vocab_size": 128256,
    "tokenizer.ggml.bos_token_id": 128000,
    "tokenizer.ggml.eos_token_id": 128009,
    "tokenizer.ggml.merges": [],            // populates if `verbose=true`
    "tokenizer.ggml.model": "gpt2",
    "tokenizer.ggml.pre": "llama-bpe",
    "tokenizer.ggml.token_type": [],        // populates if `verbose=true`
    "tokenizer.ggml.tokens": []             // populates if `verbose=true`
  }
 }
 ```
@@ -1139,121 +998,6 @@ If `stream` is set to `false`, then the response is a single JSON object:
 ## Generate Embeddings
 ```shell
 POST /api/embed
 ```
 Generate embeddings from a model
 ### Parameters
 - `model`: name of model to generate embeddings from
 - `input`: text or list of text to generate embeddings for
 Advanced parameters:
 - `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true`
 - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
 - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
 ### Examples
 #### Request
 ```shell
 curl http://localhost:11434/api/embed -d '{
  "model": "all-minilm",
  "input": "Why is the sky blue?"
 }'
 ```
 #### Response
 ```json
 {
  "model": "all-minilm",
  "embeddings": [[
    0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
    0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
  ]],
  "total_duration": 14143917,
  "load_duration": 1019500,
  "prompt_eval_count": 8
 }
 ```
 #### Request (Multiple input)
 ```shell
 curl http://localhost:11434/api/embed -d '{
  "model": "all-minilm",
  "input": ["Why is the sky blue?", "Why is the grass green?"]
 }'
 ```
 #### Response
 ```json
 {
  "model": "all-minilm",
  "embeddings": [[
    0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
    0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
  ],[
    -0.0098027075, 0.06042469, 0.025257962, -0.006364387, 0.07272725,
    0.017194884, 0.09032035, -0.051705178, 0.09951512, 0.09072481
  ]]
 }
 ```
 ## List Running Models
 ```shell
 GET /api/ps
 ```
 List models that are currently loaded into memory.
 #### Examples
 ### Request
 ```shell
 curl http://localhost:11434/api/ps
 ```
 #### Response
 A single JSON object will be returned.
 ```json
 {
  "models": [
    {
      "name": "mistral:latest",
      "model": "mistral:latest",
      "size": 5137025024,
      "digest": "2ae6f6dd7a3dd734790bbbf58b8909a606e0e7e97e94b7604e0aa7ae4490e6d8",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "llama",
        "families": [
          "llama"
        ],
        "parameter_size": "7.2B",
        "quantization_level": "Q4_0"
      },
      "expires_at": "2024-06-04T14:38:31.83753-07:00",
      "size_vram": 5137025024
    }
  ]
 }
 ```
 ## Generate Embedding
 > Note: this endpoint has been superseded by `/api/embed`
 ```shell
 POST /api/embeddings
 ```
--- a/docs/development.md
+++ b/docs/development.md
@@ -104,7 +104,7 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H,
 you might use:
 ```
-OLLAMA_CUSTOM_CPU_DEFS="-DGGML_AVX=on -DGGML_AVX2=on -DGGML_F16C=on -DGGML_FMA=on" go generate ./...
+OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./...
 go build .
 ```
@@ -114,18 +114,15 @@ If you have Docker available, you can build linux binaries with `./scripts/build
 ### Windows
-Note: The Windows build for Ollama is still under development.
+Note: The windows build for Ollama is still under development.
-First, install required tools:
+Install required tools:
 - MSVC toolchain - C/C++ and cmake as minimal requirements
 - Go version 1.22 or higher
 - MinGW (pick one variant) with GCC.
  - [MinGW-w64](https://www.mingw-w64.org/)
  - [MSYS2](https://www.msys2.org/)
 - The `ThreadJob` Powershell module: `Install-Module -Name ThreadJob -Scope CurrentUser`
 Then, build the `ollama` binary:
 ```powershell
 $env:CGO_ENABLED="1"
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -63,7 +63,7 @@ docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 114
 Now you can run a model:
 ```
-docker exec -it ollama ollama run llama3.1
+docker exec -it ollama ollama run llama3
 ```
 ### Try different models
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -111,10 +111,7 @@ On Windows, Ollama inherits your user and system environment variables.
 ## How do I use Ollama behind a proxy?
-Ollama pulls models from the Internet and may require a proxy server to access the models. Use `HTTPS_PROXY` to redirect outbound requests through the proxy. Ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform.
+Ollama is compatible with proxy servers if `HTTP_PROXY` or `HTTPS_PROXY` are configured. When using either variables, ensure it is set where `ollama serve` can access the values. When using `HTTPS_PROXY`, ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform.
 > [!NOTE]
 > Avoid setting `HTTP_PROXY`. Ollama does not use HTTP for model pulls, only HTTPS. Setting `HTTP_PROXY` may interrupt client connections to the server.
 ### How do I use Ollama behind a proxy in Docker?
@@ -230,7 +227,7 @@ curl http://localhost:11434/api/chat -d '{"model": "mistral"}'
 To preload a model using the CLI, use the command:
 ```shell
-ollama run llama3.1 ""
+ollama run llama3 ""
 ```
 ## How do I keep a model loaded in memory or make it unload immediately?
@@ -260,23 +257,3 @@ If you wish to override the `OLLAMA_KEEP_ALIVE` setting, use the `keep_alive` AP
 ## How do I manage the maximum number of requests the Ollama server can queue?
 If too many requests are sent to the server, it will respond with a 503 error indicating the server is overloaded.  You can adjust how many requests may be queue by setting `OLLAMA_MAX_QUEUE`.
 ## How does Ollama handle concurrent requests?
 Ollama supports two levels of concurrent processing.  If your system has sufficient available memory (system memory when using CPU inference, or VRAM for GPU inference) then multiple models can be loaded at the same time.  For a given model, if there is sufficient available memory when the model is loaded, it is configured to allow parallel request processing.
 If there is insufficient available memory to load a new model request while one or more models are already loaded, all new requests will be queued until the new model can be loaded.  As prior models become idle, one or more will be unloaded to make room for the new model.  Queued requests will be processed in order.  When using GPU inference new models must be able to completely fit in VRAM to allow concurrent model loads.
 Parallel request processing for a given model results in increasing the context size by the number of parallel requests.  For example, a 2K context with 4 parallel requests will result in an 8K context and additional memory allocation.
 The following server settings may be used to adjust how Ollama handles concurrent requests on most platforms:
 - `OLLAMA_MAX_LOADED_MODELS` - The maximum number of models that can be loaded concurrently provided they fit in available memory.  The default is 3 * the number of GPUs or 3 for CPU inference.
 - `OLLAMA_NUM_PARALLEL` - The maximum number of parallel requests each model will process at the same time.  The default will auto-select either 4 or 1 based on available memory.
 - `OLLAMA_MAX_QUEUE` - The maximum number of requests Ollama will queue when busy before rejecting additional requests. The default is 512
 Note: Windows with Radeon GPUs currently default to 1 model maximum due to limitations in ROCm v5.7 for available VRAM reporting.  Once ROCm v6.2 is available, Windows Radeon will follow the defaults above.  You may enable concurrent model loads on Radeon on Windows, but ensure you don't load more models than will fit into your GPUs VRAM.
 ## How does Ollama load models on multiple GPUs?
 Installing multiple GPUs of the same brand can be a great way to increase your available VRAM to load larger models.  When you load a new model, Ollama evaluates the required VRAM for the model against what is currently available.  If the model will entirely fit on any single GPU, Ollama will load the model on that GPU.  This typically provides the best performance as it reduces the amount of data transfering across the PCI bus during inference.  If the model does not fit entirely on one GPU, then it will be spread across all the available GPUs.
--- a/docs/gpu.md
+++ b/docs/gpu.md
@@ -8,7 +8,7 @@ Check your compute compatibility to see if your card is supported:
 | Compute Capability | Family              | Cards                                                                                                       |
 | ------------------ | ------------------- | ----------------------------------------------------------------------------------------------------------- |
 | 9.0                | NVIDIA              | `H100`                                                                                                      |
-| 8.9                | GeForce RTX 40xx    | `RTX 4090` `RTX 4080 SUPER` `RTX 4080` `RTX 4070 Ti SUPER` `RTX 4070 Ti` `RTX 4070 SUPER` `RTX 4070` `RTX 4060 Ti` `RTX 4060`  |
+| 8.9                | GeForce RTX 40xx    | `RTX 4090` `RTX 4080` `RTX 4070 Ti` `RTX 4060 Ti`                                                           |
 |                    | NVIDIA Professional | `L4` `L40` `RTX 6000`                                                                                       |
 | 8.6                | GeForce RTX 30xx    | `RTX 3090 Ti` `RTX 3090` `RTX 3080 Ti` `RTX 3080` `RTX 3070 Ti` `RTX 3070` `RTX 3060 Ti` `RTX 3060`         |
 |                    | NVIDIA Professional | `A40` `RTX A6000` `RTX A5000` `RTX A4000` `RTX A3000` `RTX A2000` `A10` `A16` `A2`                          |
@@ -18,7 +18,7 @@ Check your compute compatibility to see if your card is supported:
 |                    | Quadro              | `RTX 8000` `RTX 6000` `RTX 5000` `RTX 4000`                                                                 |
 | 7.0                | NVIDIA              | `TITAN V` `V100` `Quadro GV100`                                                                             |
 | 6.1                | NVIDIA TITAN        | `TITAN Xp` `TITAN X`                                                                                        |
-|                    | GeForce GTX         | `GTX 1080 Ti` `GTX 1080` `GTX 1070 Ti` `GTX 1070` `GTX 1060` `GTX 1050 Ti` `GTX 1050`                       |
+|                    | GeForce GTX         | `GTX 1080 Ti` `GTX 1080` `GTX 1070 Ti` `GTX 1070` `GTX 1060` `GTX 1050`                                     |
 |                    | Quadro              | `P6000` `P5200` `P4200` `P3200` `P5000` `P4000` `P3000` `P2200` `P2000` `P1000` `P620` `P600` `P500` `P520` |
 |                    | Tesla               | `P40` `P4`                                                                                                  |
 | 6.0                | NVIDIA              | `Tesla P100` `Quadro GP100`                                                                                 |
@@ -46,24 +46,13 @@ sudo modprobe nvidia_uvm`
 ## AMD Radeon
 Ollama supports the following AMD GPUs:
 ### Linux Support
 | Family         | Cards and accelerators                                                                                                               |
 | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
 | AMD Radeon RX  | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800` `Vega 64` `Vega 56`    |
 | AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` `V420` `V340` `V320` `Vega II Duo` `Vega II` `VII` `SSG` |
 | AMD Instinct   | `MI300X` `MI300A` `MI300` `MI250X` `MI250` `MI210` `MI200` `MI100` `MI60` `MI50`                                                               |
-### Windows Support
+### Overrides
 With ROCm v6.1, the following GPUs are supported on Windows.
 | Family         | Cards and accelerators                                                                                                               |
 | -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
 | AMD Radeon RX  | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800`    |
 | AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` |
 ### Overrides on Linux
 Ollama leverages the AMD ROCm library, which does not support all AMD GPUs. In
 some cases you can force the system to try to use a similar LLVM target that is
 close.  For example The Radeon RX 5400 is `gfx1034` (also known as 10.3.4)
@@ -74,7 +63,7 @@ would set `HSA_OVERRIDE_GFX_VERSION="10.3.0"` as an environment variable for the
 server.  If you have an unsupported AMD GPU you can experiment using the list of
 supported types below.
-At this time, the known supported GPU types on linux are the following LLVM Targets.
+At this time, the known supported GPU types are the following LLVM Targets.
 This table shows some example GPUs that map to these LLVM targets:
 | **LLVM Target** | **An Example GPU** |
 |-----------------|---------------------|
--- a/docs/images/ollama-keys.png
+++ b/docs/images/ollama-keys.png
--- a/docs/images/signup.png
+++ b/docs/images/signup.png
--- a/docs/import.md
+++ b/docs/import.md
@@ -1,186 +1,170 @@
-# Importing a model
+# Import a model
-## Table of Contents
+This guide walks through importing a GGUF, PyTorch or Safetensors model.
-  * [Importing a Safetensors adapter](#Importing-a-fine-tuned-adapter-from-Safetensors-weights)
+## Importing (GGUF)
  * [Importing a Safetensors model](#Importing-a-model-from-Safetensors-weights)
  * [Importing a GGUF file](#Importing-a-GGUF-based-model-or-adapter)
  * [Sharing models on ollama.com](#Sharing-your-model-on-ollamacom)
-## Importing a fine tuned adapter from Safetensors weights
+### Step 1: Write a `Modelfile`
-First, create a `Modelfile` with a `FROM` command pointing at the base model you used for fine tuning, and an `ADAPTER` command which points to the directory with your Safetensors adapter:
+Start by creating a `Modelfile`. This file is the blueprint for your model, specifying weights, parameters, prompt templates and more.
-```dockerfile
+```
-FROM <base model name>
+FROM ./mistral-7b-v0.1.Q4_0.gguf
 ADAPTER /path/to/safetensors/adapter/directory
 ```
-Make sure that you use the same base model in the `FROM` command as you used to create the adapter otherwise you will get erratic results. Most frameworks use different quantization methods, so it's best to use non-quantized (i.e. non-QLoRA) adapters. If your adapter is in the same directory as your `Modelfile`, use `ADAPTER .` to specify the adapter path.
+(Optional) many chat models require a prompt template in order to answer correctly. A default prompt template can be specified with the `TEMPLATE` instruction in the `Modelfile`:
-Now run `ollama create` from the directory where the `Modelfile` was created:
+```
-
+FROM ./mistral-7b-v0.1.Q4_0.gguf
-```bash
+TEMPLATE "[INST] {{ .Prompt }} [/INST]"
 ollama create my-model
 ```
-Lastly, test the model:
+### Step 2: Create the Ollama model
-```bash
+Finally, create a model from your `Modelfile`:
-ollama run my-model
+
 ```
 ollama create example -f Modelfile
 ```
-Ollama supports importing adapters based on several different model architectures including:
+### Step 3: Run your model
-  * Llama (including Llama 2, Llama 3, and Llama 3.1);
+Next, test the model with `ollama run`:
  * Mistral (including Mistral 1, Mistral 2, and Mixtral); and
  * Gemma (including Gemma 1 and Gemma 2)
-You can create the adapter using a fine tuning framework or tool which can output adapters in the Safetensors format, such as:
+```
-
+ollama run example "What is your favourite condiment?"
  * Hugging Face [fine tuning framework] (https://huggingface.co/docs/transformers/en/training)
  * [Unsloth](https://github.com/unslothai/unsloth)
  * [MLX](https://github.com/ml-explore/mlx)
 ## Importing a model from Safetensors weights
 First, create a `Modelfile` with a `FROM` command which points to the directory containing your Safetensors weights:
 ```dockerfile
 FROM /path/to/safetensors/directory
 ```
-If you create the Modelfile in the same directory as the weights, you can use the command `FROM .`.
+## Importing (PyTorch & Safetensors)
-Now run the `ollama create` command from the directory where you created the `Modelfile`:
+> Importing from PyTorch and Safetensors is a longer process than importing from GGUF. Improvements that make it easier are a work in progress.
 ### Setup
 First, clone the `ollama/ollama` repo:
 ```
 git clone git@github.com:ollama/ollama.git ollama
 cd ollama
 ```
 and then fetch its `llama.cpp` submodule:
 ```shell
-ollama create my-model
+git submodule init
 git submodule update llm/llama.cpp
 ```
-Lastly, test the model:
+Next, install the Python dependencies:
-```shell
+```
-ollama run my-model
+python3 -m venv llm/llama.cpp/.venv
 source llm/llama.cpp/.venv/bin/activate
 pip install -r llm/llama.cpp/requirements.txt
 ```
-Ollama supports importing models for several different architectures including:
+Then build the `quantize` tool:
-  * Llama (including Llama 2, Llama 3, and Llama 3.1);
+```
-  * Mistral (including Mistral 1, Mistral 2, and Mixtral);
+make -C llm/llama.cpp quantize
  * Gemma (including Gemma 1 and Gemma 2); and
  * Phi3
 This includes importing foundation models as well as any fine tuned models which which have been _fused_ with a foundation model.
 ## Importing a GGUF based model or adapter
 If you have a GGUF based model or adapter it is possible to import it into Ollama. You can obtain a GGUF model or adapter by:
  * converting a Safetensors model with the `convert_hf_to_gguf.py` from Llama.cpp; 
  * converting a Safetensors adapter with the `convert_lora_to_gguf.py` from Llama.cpp; or
  * downloading a model or adapter from a place such as HuggingFace
 To import a GGUF model, create a `Modelfile` containg:
 ```dockerfile
 FROM /path/to/file.gguf
 ```
-For a GGUF adapter, create the `Modelfile` with:
+### Clone the HuggingFace repository (optional)
-```dockerfile
+If the model is currently hosted in a HuggingFace repository, first clone that repository to download the raw model.
-FROM <model name>
+
-ADAPTER /path/to/file.gguf
+Install [Git LFS](https://docs.github.com/en/repositories/working-with-files/managing-large-files/installing-git-large-file-storage), verify it's installed, and then clone the model's repository:
 ```
 git lfs install
 git clone https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 model
 ```
-When importing a GGUF adapter, it's important to use the same base model as the base model that the adapter was created with. You can use:
+### Convert the model
- * a model from Ollama
+> Note: some model architectures require using specific convert scripts. For example, Qwen models require running `convert-hf-to-gguf.py` instead of `convert.py`
 * a GGUF file
 * a Safetensors based model 
-Once you have created your `Modelfile`, use the `ollama create` command to build the model.
+```
-
+python llm/llama.cpp/convert.py ./model --outtype f16 --outfile converted.bin
 ```shell
 ollama create my-model
 ```
-## Quantizing a Model
+### Quantize the model
-Quantizing a model allows you to run models faster and with less memory consumption but at reduced accuracy. This allows you to run a model on more modest hardware.
+```
-
+llm/llama.cpp/quantize converted.bin quantized.bin q4_0
 Ollama can quantize FP16 and FP32 based models into different quantization levels using the `-q/--quantize` flag with the `ollama create` command.
 First, create a Modelfile with the FP16 or FP32 based model you wish to quantize.
 ```dockerfile
 FROM /path/to/my/gemma/f16/model
 ```
-Use `ollama create` to then create the quantized model.
+### Step 3: Write a `Modelfile`
-```shell
+Next, create a `Modelfile` for your model:
-$ ollama create --quantize q4_K_M mymodel
+
-transferring model data
+```
-quantizing F16 model to Q4_K_M
+FROM quantized.bin
-creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd
+TEMPLATE "[INST] {{ .Prompt }} [/INST]"
 creating new layer sha256:0853f0ad24e5865173bbf9ffcc7b0f5d56b66fd690ab1009867e45e7d2c4db0f
 writing manifest
 success
 ```
-### Supported Quantizations
+### Step 4: Create the Ollama model
- `q4_0`
+Finally, create a model from your `Modelfile`:
 - `q4_1`
 - `q5_0`
 - `q5_1`
 - `q8_0`
-#### K-means Quantizations
+```
 ollama create example -f Modelfile
 ```
 ### Step 5: Run your model
 Next, test the model with `ollama run`:
 ```
 ollama run example "What is your favourite condiment?"
 ```
 ## Publishing your model (optional – early alpha)
 Publishing models is in early alpha. If you'd like to publish your model to share with others, follow these steps:
 1. Create [an account](https://ollama.com/signup)
 2. Copy your Ollama public key:
  - macOS: `cat ~/.ollama/id_ed25519.pub | pbcopy`
  - Windows: `type %USERPROFILE%\.ollama\id_ed25519.pub`
  - Linux: `cat /usr/share/ollama/.ollama/id_ed25519.pub`
 3. Add your public key to your [Ollama account](https://ollama.com/settings/keys)
 Next, copy your model to your username's namespace:
 ```
 ollama cp example <your username>/example
 ```
 > Note: model names may only contain lowercase letters, digits, and the characters `.`, `-`, and `_`.
 Then push the model:
 ```
 ollama push <your username>/example
 ```
 After publishing, your model will be available at `https://ollama.com/<your username>/example`.
 ## Quantization reference
 The quantization options are as follow (from highest highest to lowest levels of quantization). Note: some architectures such as Falcon do not support K quants.
 - `q2_K`
 - `q3_K`
 - `q3_K_S`
 - `q3_K_M`
 - `q3_K_L`
 - `q4_0` (recommended)
 - `q4_1`
 - `q4_K`
 - `q4_K_S`
 - `q4_K_M`
 - `q5_0`
 - `q5_1`
 - `q5_K`
 - `q5_K_S`
 - `q5_K_M`
 - `q6_K`
-
+- `q8_0`
-
+- `f16`
 ## Sharing your model on ollama.com
 You can share any model you have created by pushing it to [ollama.com](https://ollama.com) so that other users can try it out.
 First, use your browser to go to the [Ollama Sign-Up](https://ollama.com/signup) page. If you already have an account, you can skip this step.
 ![Sign-Up](images/signup.png)
 The `Username` field will be used as part of your model's name (e.g. `jmorganca/mymodel`), so make sure you are comfortable with the username that you have selected.
 Now that you have created an account and are signed-in, go to the [Ollama Keys Settings](https://ollama.com/settings/keys) page.
 Follow the directions on the page to determine where your Ollama Public Key is located.
 ![Ollama Key](images/ollama-keys.png)
 Click on the `Add Ollama Public Key` button, and copy and paste the contents of your Ollama Public Key into the text field.
 To push a model to [ollama.com](https://ollama.com), first make sure that it is named correctly with your username. You may have to use the `ollama cp` command to copy
 your model to give it the correct name. Once you're happy with your model's name, use the `ollama push` command to push it to [ollama.com](https://ollama.com).
 ```shell
 ollama cp mymodel myuser/mymodel
 ollama push myuser/mymodel
 ```
 Once your model has been pushed, other users can pull and run it by using the command:
 ```shell
 ollama run myuser/mymodel
 ```
--- a/docs/linux.md
+++ b/docs/linux.md
@@ -20,12 +20,13 @@ GPU.
 ## Manual install
-### Download `ollama`
+### Download the `ollama` binary
-Download and extract the Linux package:
+Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:
 ```bash
-curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
+sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
 sudo chmod +x /usr/bin/ollama
 ```
 ### Adding Ollama as a startup service (recommended)
@@ -95,17 +96,8 @@ curl -fsSL https://ollama.com/install.sh | sh
 Or by downloading the ollama binary:
 ```bash
-curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
+sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
-```
+sudo chmod +x /usr/bin/ollama
 ## Installing specific versions
 Use `OLLAMA_VERSION` environment variable with the install script to install a specific version of Ollama, including pre-releases. You can find the version numbers in the [releases page](https://github.com/ollama/ollama/releases). 
 For example:
 ```
 curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.1.32 sh
 ```
 ## Viewing logs
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -1,7 +1,6 @@
 # Ollama Model File
-> [!NOTE]
+> Note: `Modelfile` syntax is in development
 > `Modelfile` syntax is in development
 A model file is the blueprint to create and share models with Ollama.
@@ -141,7 +140,6 @@ PARAMETER <parameter> <parametervalue>
 | num_predict    | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)                                                                                                                                   | int        | num_predict 42       |
 | top_k          | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)                                                                        | int        | top_k 40             |
 | top_p          | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)                                                                 | float      | top_p 0.9            |
 | min_p          | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float      | min_p 0.05            |
 ### TEMPLATE
--- a/docs/openai.md
+++ b/docs/openai.md
@@ -27,37 +27,6 @@ chat_completion = client.chat.completions.create(
    ],
    model='llama3',
 )
 response = client.chat.completions.create(
    model="llava",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What's in this image?"},
                {
                    "type": "image_url",
                    "image_url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
                },
            ],
        }
    ],
    max_tokens=300,
 )
 completion = client.completions.create(
    model="llama3",
    prompt="Say this is a test",
 )
 list_completion = client.models.list()
 model = client.models.retrieve("llama3")
 embeddings = client.embeddings.create(
    model="all-minilm",
    input=["why is the sky blue?", "why is the grass green?"],
 )
 ```
 ### OpenAI JavaScript library
@@ -76,41 +45,11 @@ const chatCompletion = await openai.chat.completions.create({
  messages: [{ role: 'user', content: 'Say this is a test' }],
  model: 'llama3',
 })
 const response = await openai.chat.completions.create({
    model: "llava",
    messages: [
        {
        role: "user",
        content: [
            { type: "text", text: "What's in this image?" },
            {
            type: "image_url",
            image_url: "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
            },
        ],
        },
    ],
 })
 const completion = await openai.completions.create({
    model: "llama3",
    prompt: "Say this is a test.",
 })
 const listCompletion = await openai.models.list()
 const model = await openai.models.retrieve("llama3")
 const embedding = await openai.embeddings.create({
  model: "all-minilm",
  input: ["why is the sky blue?", "why is the grass green?"],
 })
 ```
 ### `curl`
-``` shell
+```
 curl http://localhost:11434/v1/chat/completions \
    -H "Content-Type: application/json" \
    -d '{
@@ -126,48 +65,6 @@ curl http://localhost:11434/v1/chat/completions \
            }
        ]
    }'
 curl http://localhost:11434/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "llava",
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": "What'\''s in this image?"
          },
          {
            "type": "image_url",
            "image_url": {
               "url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"
            }
          }
        ]
      }
    ],
    "max_tokens": 300
  }'
 curl http://localhost:11434/v1/completions \
    -H "Content-Type: application/json" \
    -d '{
        "model": "llama3",
        "prompt": "Say this is a test"
    }'
 curl http://localhost:11434/v1/models
 curl http://localhost:11434/v1/models/llama3
 curl http://localhost:11434/v1/embeddings \
    -H "Content-Type: application/json" \
    -d '{
        "model": "all-minilm",
        "input": ["why is the sky blue?", "why is the grass green?"]
    }'
 ```
 ## Endpoints
@@ -180,8 +77,8 @@ curl http://localhost:11434/v1/embeddings \
 - [x] Streaming
 - [x] JSON mode
 - [x] Reproducible outputs
- [x] Vision
+- [ ] Vision
- [x] Tools (streaming support coming soon)
+- [ ] Function calling
 - [ ] Logprobs
 #### Supported request fields
@@ -189,10 +86,7 @@ curl http://localhost:11434/v1/embeddings \
 - [x] `model`
 - [x] `messages`
  - [x] Text `content`
-  - [x] Image `content`
+  - [ ] Array of `content` parts
    - [x] Base64 encoded image
    - [ ] Image URL
  - [x] Array of `content` parts
 - [x] `frequency_penalty`
 - [x] `presence_penalty`
 - [x] `response_format`
@@ -202,72 +96,17 @@ curl http://localhost:11434/v1/embeddings \
 - [x] `temperature`
 - [x] `top_p`
 - [x] `max_tokens`
- [x] `tools`
+- [ ] `logit_bias`
 - [ ] `tools`
 - [ ] `tool_choice`
 - [ ] `logit_bias`
 - [ ] `user`
 - [ ] `n`
 ### `/v1/completions`
 #### Supported features
 - [x] Completions
 - [x] Streaming
 - [x] JSON mode
 - [x] Reproducible outputs
 - [ ] Logprobs
 #### Supported request fields
 - [x] `model`
 - [x] `prompt`
 - [x] `frequency_penalty`
 - [x] `presence_penalty`
 - [x] `seed`
 - [x] `stop`
 - [x] `stream`
 - [x] `temperature`
 - [x] `top_p`
 - [x] `max_tokens`
 - [x] `suffix`
 - [ ] `best_of`
 - [ ] `echo`
 - [ ] `logit_bias`
 - [ ] `user`
 - [ ] `n`
 #### Notes
- `prompt` currently only accepts a string
+- Setting `seed` will always set `temperature` to `0`
-
+- `finish_reason` will always be `stop`
-### `/v1/models`
+- `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached
 #### Notes
 - `created` corresponds to when the model was last modified
 - `owned_by` corresponds to the ollama username, defaulting to `"library"`
 ### `/v1/models/{model}`
 #### Notes
 - `created` corresponds to when the model was last modified
 - `owned_by` corresponds to the ollama username, defaulting to `"library"`
 ### `/v1/embeddings`
 #### Supported request fields
 - [x] `model`
 - [x] `input`
  - [x] string
  - [x] array of strings
  - [ ] array of tokens
  - [ ] array of token arrays
 - [ ] `encoding format`
 - [ ] `dimensions`
 - [ ] `user`
 ## Models
--- a/docs/template.md
+++ b/docs/template.md
@@ -1,167 +0,0 @@
 # Template
 Ollama provides a powerful templating engine backed by Go's built-in templating engine to construct prompts for your large language model. This feature is a valuable tool to get the most out of your models.
 ## Basic Template Structure
 A basic Go template consists of three main parts:
 * **Layout**: The overall structure of the template.
 * **Variables**: Placeholders for dynamic data that will be replaced with actual values when the template is rendered.
 * **Functions**: Custom functions or logic that can be used to manipulate the template's content.
 Here's an example of a simple chat template:
 ```gotmpl
 {{- range .Messages }}
 {{ .Role }}: {{ .Content }}
 {{- end }}
 ```
 In this example, we have:
 * A basic messages structure (layout)
 * Three variables: `Messages`, `Role`, and `Content` (variables)
 * A custom function (action) that iterates over an array of items (`range .Messages`) and displays each item
 ## Adding templates to your model
 By default, models imported into Ollama have a default template of `{{ .Prompt }}`, i.e. user inputs are sent verbatim to the LLM. This is appropriate for text or code completion models but lacks essential markers for chat or instruction models.
 Omitting a template in these models puts the responsibility of correctly templating input onto the user. Adding a template allows users to easily get the best results from the model.
 To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
 ```dockerfile
 FROM llama3
 TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
 {{ .System }}<|eot_id|>
 {{- end }}
 {{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
 {{ .Content }}<|eot_id|>
 {{- end }}<|start_header_id|>assistant<|end_header_id|>
 """
 ```
 ## Variables
 `System` (string): system prompt
 `Prompt` (string): user prompt
 `Response` (string): assistant response
 `Suffix` (string): text inserted after the assistant's response
 `Messages` (list): list of messages
 `Messages[].Role` (string): role which can be one of `system`, `user`, `assistant`, or `tool`
 `Messages[].Content` (string):  message content
 `Messages[].ToolCalls` (list): list of tools the model wants to call
 `Messages[].ToolCalls[].Function` (object): function to call
 `Messages[].ToolCalls[].Function.Name` (string): function name
 `Messages[].ToolCalls[].Function.Arguments` (map): mapping of argument name to argument value
 `Tools` (list): list of tools the model can access
 `Tools[].Type` (string): schema type. `type` is always `function`
 `Tools[].Function` (object): function definition
 `Tools[].Function.Name` (string): function name
 `Tools[].Function.Description` (string): function description
 `Tools[].Function.Parameters` (object): function parameters
 `Tools[].Function.Parameters.Type` (string): schema type. `type` is always `object`
 `Tools[].Function.Parameters.Required` (list): list of required properties
 `Tools[].Function.Parameters.Properties` (map): mapping of property name to property definition
 `Tools[].Function.Parameters.Properties[].Type` (string): property type
 `Tools[].Function.Parameters.Properties[].Description` (string): property description
 `Tools[].Function.Parameters.Properties[].Enum` (list): list of valid values
 ## Tips and Best Practices
 Keep the following tips and best practices in mind when working with Go templates:
 * **Be mindful of dot**: Control flow structures like `range` and `with` changes the value `.`
 * **Out-of-scope variables**: Use `$.` to reference variables not currently in scope, starting from the root
 * **Whitespace control**: Use `-` to trim leading (`{{-`) and trailing (`-}}`) whitespace
 ## Examples
 ### Example Messages
 #### ChatML
 ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
 ```gotmpl
 {{- range .Messages }}<|im_start|>{{ .Role }}
 {{ .Content }}<|im_end|>
 {{ end }}<|im_start|>assistant
 ```
 ### Example Tools
 Tools support can be added to a model by adding a `{{ .Tools }}` node to the template. This feature is useful for models trained to call external tools and can a powerful tool for retrieving real-time data or performing complex tasks.
 #### Mistral
 Mistral v0.3 and Mixtral 8x22B supports tool calling.
 ```gotmpl
 {{- range $index, $_ := .Messages }}
 {{- if eq .Role "user" }}
 {{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS]
 {{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}
 {{ end }}{{ .Content }}[/INST]
 {{- else if eq .Role "assistant" }}
 {{- if .Content }} {{ .Content }}</s>
 {{- else if .ToolCalls }}[TOOL_CALLS] [
 {{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}}
 {{- end }}]</s>
 {{- end }}
 {{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
 {{- end }}
 {{- end }}
 ```
 ### Example Fill-in-Middle
 Fill-in-middle support can be added to a model by adding a `{{ .Suffix }}` node to the template. This feature is useful for models that are trained to generate text in the middle of user input, such as code completion models.
 #### CodeLlama
 CodeLlama [7B](https://ollama.com/library/codellama:7b-code) and [13B](https://ollama.com/library/codellama:13b-code) code completion models support fill-in-middle.
 ```gotmpl
 <PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
 ```
 > [!NOTE]
 > CodeLlama 34B and 70B code completion and all instruct and Python fine-tuned models do not support fill-in-middle.
 #### Codestral
 Codestral [22B](https://ollama.com/library/codestral:22b) supports fill-in-middle.
 ```gotmpl
 [SUFFIX]{{ .Suffix }}[PREFIX] {{ .Prompt }}
 ```
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -9,7 +9,7 @@ cat ~/.ollama/logs/server.log
 On **Linux** systems with systemd, the logs can be found with this command:
 ```shell
-journalctl -u ollama --no-pager
+journalctl -u ollama
 ```
 When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
@@ -22,7 +22,7 @@ docker logs <container-name>
 If manually running `ollama serve` in a terminal, the logs will be on that terminal.
 When you run Ollama on **Windows**, there are a few different locations. You can view them in the explorer window by hitting `<cmd>+R` and type in:
- `explorer %LOCALAPPDATA%\Ollama` to view logs.  The most recent server logs will be in `server.log` and older logs will be in `server-#.log` 
+- `explorer %LOCALAPPDATA%\Ollama` to view logs
 - `explorer %LOCALAPPDATA%\Programs\Ollama` to browse the binaries (The installer adds this to your user PATH)
 - `explorer %HOMEPATH%\.ollama` to browse where models and configuration is stored
 - `explorer %TEMP%` where temporary executable files are stored in one or more `ollama*` directories
@@ -70,18 +70,13 @@ curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION="0.1.29" sh
 If your system is configured with the "noexec" flag where Ollama stores its temporary executable files, you can specify an alternate location by setting OLLAMA_TMPDIR to a location writable by the user ollama runs as. For example OLLAMA_TMPDIR=/usr/share/ollama/
-## NVIDIA GPU Discovery
+## Container fails to run on NVIDIA GPU
-When Ollama starts up, it takes inventory of the GPUs present in the system to determine compatibility and how much VRAM is available.  Sometimes this discovery can fail to find your GPUs.  In general, running the latest driver will yield the best results.
+Make sure you've set up the container runtime first as described in [docker.md](./docker.md)
-### Linux NVIDIA Troubleshooting
+Sometimes the container runtime can have difficulties initializing the GPU. When you check the server logs, this can show up as various error codes, such as "3" (not initialized), "46" (device unavailable), "100" (no device), "999" (unknown), or others. The following troubleshooting techniques may help resolve the problem
-If you are using a container to run Ollama, make sure you've set up the container runtime first as described in [docker.md](./docker.md)
+- Is the uvm driver not loaded? `sudo nvidia-modprobe -u`
 Sometimes the Ollama can have difficulties initializing the GPU. When you check the server logs, this can show up as various error codes, such as "3" (not initialized), "46" (device unavailable), "100" (no device), "999" (unknown), or others. The following troubleshooting techniques may help resolve the problem
 - If you are using a container, is the container runtime working?  Try `docker run --gpus all ubuntu nvidia-smi` - if this doesn't work, Ollama wont be able to see your NVIDIA GPU.
 - Is the uvm driver loaded? `sudo nvidia-modprobe -u`
 - Try reloading the nvidia_uvm driver - `sudo rmmod nvidia_uvm` then `sudo modprobe nvidia_uvm`
 - Try rebooting
 - Make sure you're running the latest nvidia drivers
@@ -89,8 +84,3 @@ Sometimes the Ollama can have difficulties initializing the GPU. When you check
 If none of those resolve the problem, gather additional information and file an issue:
 - Set `CUDA_ERROR_LEVEL=50` and try again to get more diagnostic logs
 - Check dmesg for any errors `sudo dmesg | grep -i nvrm` and `sudo dmesg | grep -i nvidia`
 ## Windows Terminal Errors
 Older versions of Windows 10 (e.g., 21H1) are known to have a bug where the standard terminal program does not display control characters correctly.  This can result in a long string of strings like `←[?25h←[?25l` being displayed, sometimes erroring with `The parameter is incorrect`  To resolve this problem, please update to Win 10 22H1 or newer.
--- a/docs/tutorials/langchainjs.md
+++ b/docs/tutorials/langchainjs.md
@@ -15,7 +15,7 @@ import { Ollama } from "@langchain/community/llms/ollama";
 const ollama = new Ollama({
  baseUrl: "http://localhost:11434",
-  model: "llama3.1",
+  model: "llama3",
 });
 const answer = await ollama.invoke(`why is the sky blue?`);
@@ -23,7 +23,7 @@ const answer = await ollama.invoke(`why is the sky blue?`);
 console.log(answer);
 ```
-That will get us the same thing as if we ran `ollama run llama3.1 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
+That will get us the same thing as if we ran `ollama run llama3 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
 ```bash
 npm install cheerio
--- a/docs/tutorials/langchainpy.md
+++ b/docs/tutorials/langchainpy.md
@@ -45,7 +45,7 @@ all_splits = text_splitter.split_documents(data)
 ```
 It's split up, but we have to find the relevant splits and then submit those to the model. We can do this by creating embeddings and storing them in a vector database. We can use Ollama directly to instantiate an embedding model. We will use ChromaDB in this example for a vector database. `pip install chromadb`
-We also need to pull embedding model: `ollama pull nomic-embed-text`
+
 ```python
 from langchain.embeddings import OllamaEmbeddings
 from langchain.vectorstores import Chroma
@@ -68,8 +68,7 @@ The next thing is to send the question and the relevant parts of the docs to the
 ```python
 from langchain.chains import RetrievalQA
 qachain=RetrievalQA.from_chain_type(ollama, retriever=vectorstore.as_retriever())
-res = qachain.invoke({"query": question})
+qachain.invoke({"query": question})
 print(res['result'])
 ```
 The answer received from this chain was:
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -19,12 +19,10 @@ Logs will often be helpful in diagnosing the problem (see
 ## System Requirements
-* Windows 10 22H2 or newer, Home or Pro
+* Windows 10 or newer, Home or Pro
 * NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
 * AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
 Ollama uses unicode characters for progress indication, which may render as unknown squares in some older terminal fonts in Windows 10. If you see this, try changing your terminal font settings.
 ## API Access
 Here's a quick example showing API access from `powershell`
@@ -41,8 +39,8 @@ server.
 Ollama on Windows stores files in a few different locations.  You can view them in
 the explorer window by hitting `<cmd>+R` and type in:
 - `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
-    - *app.log* contains most resent logs from the GUI application
+    - *app.log* contains logs from the GUI application
-    - *server.log* contains the most recent server logs
+    - *server.log* contains the server logs
    - *upgrade.log* contains log output for upgrades
 - `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
 - `explorer %HOMEPATH%\.ollama` contains models and configuration
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -3,236 +3,40 @@ package envconfig
 import (
 	"fmt"
 	"log/slog"
 	"math"
 	"net"
 	"net/url"
 	"os"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
 	"time"
 )
 // Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
 // Default is scheme "http" and host "127.0.0.1:11434"
 func Host() *url.URL {
 	defaultPort := "11434"
 	s := strings.TrimSpace(Var("OLLAMA_HOST"))
 	scheme, hostport, ok := strings.Cut(s, "://")
 	switch {
 	case !ok:
 		scheme, hostport = "http", s
 	case scheme == "http":
 		defaultPort = "80"
 	case scheme == "https":
 		defaultPort = "443"
 	}
 	// trim trailing slashes
 	hostport = strings.TrimRight(hostport, "/")
 	host, port, err := net.SplitHostPort(hostport)
 	if err != nil {
 		host, port = "127.0.0.1", defaultPort
 		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
 			host = ip.String()
 		} else if hostport != "" {
 			host = hostport
 		}
 	}
 	if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
 		slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
 		return &url.URL{
 			Scheme: scheme,
 			Host:   net.JoinHostPort(host, defaultPort),
 		}
 	}
 	return &url.URL{
 		Scheme: scheme,
 		Host:   net.JoinHostPort(host, port),
 	}
 }
 // Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
 func Origins() (origins []string) {
 	if s := Var("OLLAMA_ORIGINS"); s != "" {
 		origins = strings.Split(s, ",")
 	}
 	for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
 		origins = append(origins,
 			fmt.Sprintf("http://%s", origin),
 			fmt.Sprintf("https://%s", origin),
 			fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
 			fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
 		)
 	}
 	origins = append(origins,
 		"app://*",
 		"file://*",
 		"tauri://*",
 	)
 	return origins
 }
 // Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
 // Default is $HOME/.ollama/models
 func Models() string {
 	if s := Var("OLLAMA_MODELS"); s != "" {
 		return s
 	}
 	home, err := os.UserHomeDir()
 	if err != nil {
 		panic(err)
 	}
 	return filepath.Join(home, ".ollama", "models")
 }
 // KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
 // Negative values are treated as infinite. Zero is treated as no keep alive.
 // Default is 5 minutes.
 func KeepAlive() (keepAlive time.Duration) {
 	keepAlive = 5 * time.Minute
 	if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
 		if d, err := time.ParseDuration(s); err == nil {
 			keepAlive = d
 		} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
 			keepAlive = time.Duration(n) * time.Second
 		}
 	}
 	if keepAlive < 0 {
 		return time.Duration(math.MaxInt64)
 	}
 	return keepAlive
 }
 func Bool(k string) func() bool {
 	return func() bool {
 		if s := Var(k); s != "" {
 			b, err := strconv.ParseBool(s)
 			if err != nil {
 				return true
 			}
 			return b
 		}
 		return false
 	}
 }
 var (
-	// Debug enabled additional debug information.
+	// Set via OLLAMA_ORIGINS in the environment
-	Debug = Bool("OLLAMA_DEBUG")
+	AllowOrigins []string
-	// FlashAttention enables the experimental flash attention feature.
+	// Set via OLLAMA_DEBUG in the environment
-	FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
+	Debug bool
-	// NoHistory disables readline history.
+	// Experimental flash attention
-	NoHistory = Bool("OLLAMA_NOHISTORY")
+	FlashAttention bool
-	// NoPrune disables pruning of model blobs on startup.
+	// Set via OLLAMA_KEEP_ALIVE in the environment
-	NoPrune = Bool("OLLAMA_NOPRUNE")
+	KeepAlive string
-	// SchedSpread allows scheduling models across all GPUs.
+	// Set via OLLAMA_LLM_LIBRARY in the environment
-	SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
+	LLMLibrary string
-	// IntelGPU enables experimental Intel GPU detection.
+	// Set via OLLAMA_MAX_LOADED_MODELS in the environment
-	IntelGPU = Bool("OLLAMA_INTEL_GPU")
+	MaxRunners int
-)
+	// Set via OLLAMA_MAX_QUEUE in the environment
-
+	MaxQueuedRequests int
-func String(s string) func() string {
+	// Set via OLLAMA_MAX_VRAM in the environment
-	return func() string {
+	MaxVRAM uint64
-		return Var(s)
+	// Set via OLLAMA_NOHISTORY in the environment
-	}
+	NoHistory bool
-}
+	// Set via OLLAMA_NOPRUNE in the environment
-
+	NoPrune bool
-var (
+	// Set via OLLAMA_NUM_PARALLEL in the environment
-	LLMLibrary = String("OLLAMA_LLM_LIBRARY")
+	NumParallel int
-	TmpDir     = String("OLLAMA_TMPDIR")
+	// Set via OLLAMA_RUNNERS_DIR in the environment
-
+	RunnersDir string
-	CudaVisibleDevices    = String("CUDA_VISIBLE_DEVICES")
+	// Set via OLLAMA_TMPDIR in the environment
-	HipVisibleDevices     = String("HIP_VISIBLE_DEVICES")
+	TmpDir string
 	RocrVisibleDevices    = String("ROCR_VISIBLE_DEVICES")
 	GpuDeviceOrdinal      = String("GPU_DEVICE_ORDINAL")
 	HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
 )
 func RunnersDir() (p string) {
 	if p := Var("OLLAMA_RUNNERS_DIR"); p != "" {
 		return p
 	}
 	if runtime.GOOS != "windows" {
 		return
 	}
 	defer func() {
 		if p == "" {
 			slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'")
 		}
 	}()
 	// On Windows we do not carry the payloads inside the main executable
 	exe, err := os.Executable()
 	if err != nil {
 		return
 	}
 	cwd, err := os.Getwd()
 	if err != nil {
 		return
 	}
 	var paths []string
 	for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), ".."), cwd} {
 		paths = append(paths,
 			root,
 			filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
 			filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
 		)
 	}
 	// Try a few variations to improve developer experience when building from source in the local tree
 	for _, path := range paths {
 		candidate := filepath.Join(path, "lib", "ollama", "runners")
 		if _, err := os.Stat(candidate); err == nil {
 			p = candidate
 			break
 		}
 	}
 	return p
 }
 func Uint(key string, defaultValue uint) func() uint {
 	return func() uint {
 		if s := Var(key); s != "" {
 			if n, err := strconv.ParseUint(s, 10, 64); err != nil {
 				slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
 			} else {
 				return uint(n)
 			}
 		}
 		return defaultValue
 	}
 }
 var (
 	// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
 	NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
 	// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
 	MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
 	// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
 	MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
 	// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
 	MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
 )
 type EnvVar struct {
@@ -242,32 +46,23 @@ type EnvVar struct {
 }
 func AsMap() map[string]EnvVar {
-	ret := map[string]EnvVar{
+	return map[string]EnvVar{
-		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
+		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
-		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
+		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
-		"OLLAMA_HOST":              {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
+		"OLLAMA_HOST":              {"OLLAMA_HOST", "", "IP Address for the ollama server (default 127.0.0.1:11434)"},
-		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
+		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
-		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
+		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_ORIGINS", LLMLibrary, ""},
-		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
+		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models (default 1)"},
-		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
+		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
-		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
+		"OLLAMA_MAX_VRAM":          {"OLLAMA_MAX_VRAM", MaxVRAM, ""},
-		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
+		"OLLAMA_MODELS":            {"OLLAMA_MODELS", "", "The path to the models directory"},
-		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
+		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"},
-		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
+		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"},
-		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
+		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests (default 1)"},
-		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
+		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
-		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
+		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir, ""},
-		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
+		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
 	}
 	if runtime.GOOS != "darwin" {
 		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
 		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
 		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
 		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
 		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
 		ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
 	}
 	return ret
 }
 func Values() map[string]string {
@@ -278,7 +73,140 @@ func Values() map[string]string {
 	return vals
 }
-// Var returns an environment variable stripped of leading and trailing quotes or spaces
+var defaultAllowOrigins = []string{
-func Var(key string) string {
+	"localhost",
-	return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
+	"127.0.0.1",
 	"0.0.0.0",
 }
 // Clean quotes and spaces from the value
 func clean(key string) string {
 	return strings.Trim(os.Getenv(key), "\"' ")
 }
 func init() {
 	// default values
 	NumParallel = 1
 	MaxRunners = 1
 	MaxQueuedRequests = 512
 	LoadConfig()
 }
 func LoadConfig() {
 	if debug := clean("OLLAMA_DEBUG"); debug != "" {
 		d, err := strconv.ParseBool(debug)
 		if err == nil {
 			Debug = d
 		} else {
 			Debug = true
 		}
 	}
 	if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" {
 		d, err := strconv.ParseBool(fa)
 		if err == nil {
 			FlashAttention = d
 		}
 	}
 	RunnersDir = clean("OLLAMA_RUNNERS_DIR")
 	if runtime.GOOS == "windows" && RunnersDir == "" {
 		// On Windows we do not carry the payloads inside the main executable
 		appExe, err := os.Executable()
 		if err != nil {
 			slog.Error("failed to lookup executable path", "error", err)
 		}
 		cwd, err := os.Getwd()
 		if err != nil {
 			slog.Error("failed to lookup working directory", "error", err)
 		}
 		var paths []string
 		for _, root := range []string{filepath.Dir(appExe), cwd} {
 			paths = append(paths,
 				filepath.Join(root),
 				filepath.Join(root, "windows-"+runtime.GOARCH),
 				filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
 			)
 		}
 		// Try a few variations to improve developer experience when building from source in the local tree
 		for _, p := range paths {
 			candidate := filepath.Join(p, "ollama_runners")
 			_, err := os.Stat(candidate)
 			if err == nil {
 				RunnersDir = candidate
 				break
 			}
 		}
 		if RunnersDir == "" {
 			slog.Error("unable to locate llm runner directory.  Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
 		}
 	}
 	TmpDir = clean("OLLAMA_TMPDIR")
 	userLimit := clean("OLLAMA_MAX_VRAM")
 	if userLimit != "" {
 		avail, err := strconv.ParseUint(userLimit, 10, 64)
 		if err != nil {
 			slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
 		} else {
 			MaxVRAM = avail
 		}
 	}
 	LLMLibrary = clean("OLLAMA_LLM_LIBRARY")
 	if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" {
 		val, err := strconv.Atoi(onp)
 		if err != nil || val <= 0 {
 			slog.Error("invalid setting must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
 		} else {
 			NumParallel = val
 		}
 	}
 	if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" {
 		NoHistory = true
 	}
 	if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
 		NoPrune = true
 	}
 	if origins := clean("OLLAMA_ORIGINS"); origins != "" {
 		AllowOrigins = strings.Split(origins, ",")
 	}
 	for _, allowOrigin := range defaultAllowOrigins {
 		AllowOrigins = append(AllowOrigins,
 			fmt.Sprintf("http://%s", allowOrigin),
 			fmt.Sprintf("https://%s", allowOrigin),
 			fmt.Sprintf("http://%s:*", allowOrigin),
 			fmt.Sprintf("https://%s:*", allowOrigin),
 		)
 	}
 	maxRunners := clean("OLLAMA_MAX_LOADED_MODELS")
 	if maxRunners != "" {
 		m, err := strconv.Atoi(maxRunners)
 		if err != nil {
 			slog.Error("invalid setting", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
 		} else {
 			MaxRunners = m
 		}
 	}
 	if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" {
 		p, err := strconv.Atoi(onp)
 		if err != nil || p <= 0 {
 			slog.Error("invalid setting", "OLLAMA_MAX_QUEUE", onp, "error", err)
 		} else {
 			MaxQueuedRequests = p
 		}
 	}
 	KeepAlive = clean("OLLAMA_KEEP_ALIVE")
 }
--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -1,235 +1,23 @@
 package envconfig
 import (
 	"math"
 	"testing"
 	"time"
-	"github.com/google/go-cmp/cmp"
+	"github.com/stretchr/testify/require"
 )
-func TestHost(t *testing.T) {
+func TestConfig(t *testing.T) {
-	cases := map[string]struct {
+	Debug = false // Reset whatever was loaded in init()
-		value  string
+	t.Setenv("OLLAMA_DEBUG", "")
-		expect string
+	LoadConfig()
-	}{
+	require.False(t, Debug)
-		"empty":               {"", "127.0.0.1:11434"},
+	t.Setenv("OLLAMA_DEBUG", "false")
-		"only address":        {"1.2.3.4", "1.2.3.4:11434"},
+	LoadConfig()
-		"only port":           {":1234", ":1234"},
+	require.False(t, Debug)
-		"address and port":    {"1.2.3.4:1234", "1.2.3.4:1234"},
+	t.Setenv("OLLAMA_DEBUG", "1")
-		"hostname":            {"example.com", "example.com:11434"},
+	LoadConfig()
-		"hostname and port":   {"example.com:1234", "example.com:1234"},
+	require.True(t, Debug)
-		"zero port":           {":0", ":0"},
+	t.Setenv("OLLAMA_FLASH_ATTENTION", "1")
-		"too large port":      {":66000", ":11434"},
+	LoadConfig()
-		"too small port":      {":-1", ":11434"},
+	require.True(t, FlashAttention)
 		"ipv6 localhost":      {"[::1]", "[::1]:11434"},
 		"ipv6 world open":     {"[::]", "[::]:11434"},
 		"ipv6 no brackets":    {"::1", "[::1]:11434"},
 		"ipv6 + port":         {"[::1]:1337", "[::1]:1337"},
 		"extra space":         {" 1.2.3.4 ", "1.2.3.4:11434"},
 		"extra quotes":        {"\"1.2.3.4\"", "1.2.3.4:11434"},
 		"extra space+quotes":  {" \" 1.2.3.4 \" ", "1.2.3.4:11434"},
 		"extra single quotes": {"'1.2.3.4'", "1.2.3.4:11434"},
 		"http":                {"http://1.2.3.4", "1.2.3.4:80"},
 		"http port":           {"http://1.2.3.4:4321", "1.2.3.4:4321"},
 		"https":               {"https://1.2.3.4", "1.2.3.4:443"},
 		"https port":          {"https://1.2.3.4:4321", "1.2.3.4:4321"},
 	}
 	for name, tt := range cases {
 		t.Run(name, func(t *testing.T) {
 			t.Setenv("OLLAMA_HOST", tt.value)
 			if host := Host(); host.Host != tt.expect {
 				t.Errorf("%s: expected %s, got %s", name, tt.expect, host.Host)
 			}
 		})
 	}
 }
 func TestOrigins(t *testing.T) {
 	cases := []struct {
 		value  string
 		expect []string
 	}{
 		{"", []string{
 			"http://localhost",
 			"https://localhost",
 			"http://localhost:*",
 			"https://localhost:*",
 			"http://127.0.0.1",
 			"https://127.0.0.1",
 			"http://127.0.0.1:*",
 			"https://127.0.0.1:*",
 			"http://0.0.0.0",
 			"https://0.0.0.0",
 			"http://0.0.0.0:*",
 			"https://0.0.0.0:*",
 			"app://*",
 			"file://*",
 			"tauri://*",
 		}},
 		{"http://10.0.0.1", []string{
 			"http://10.0.0.1",
 			"http://localhost",
 			"https://localhost",
 			"http://localhost:*",
 			"https://localhost:*",
 			"http://127.0.0.1",
 			"https://127.0.0.1",
 			"http://127.0.0.1:*",
 			"https://127.0.0.1:*",
 			"http://0.0.0.0",
 			"https://0.0.0.0",
 			"http://0.0.0.0:*",
 			"https://0.0.0.0:*",
 			"app://*",
 			"file://*",
 			"tauri://*",
 		}},
 		{"http://172.16.0.1,https://192.168.0.1", []string{
 			"http://172.16.0.1",
 			"https://192.168.0.1",
 			"http://localhost",
 			"https://localhost",
 			"http://localhost:*",
 			"https://localhost:*",
 			"http://127.0.0.1",
 			"https://127.0.0.1",
 			"http://127.0.0.1:*",
 			"https://127.0.0.1:*",
 			"http://0.0.0.0",
 			"https://0.0.0.0",
 			"http://0.0.0.0:*",
 			"https://0.0.0.0:*",
 			"app://*",
 			"file://*",
 			"tauri://*",
 		}},
 		{"http://totally.safe,http://definitely.legit", []string{
 			"http://totally.safe",
 			"http://definitely.legit",
 			"http://localhost",
 			"https://localhost",
 			"http://localhost:*",
 			"https://localhost:*",
 			"http://127.0.0.1",
 			"https://127.0.0.1",
 			"http://127.0.0.1:*",
 			"https://127.0.0.1:*",
 			"http://0.0.0.0",
 			"https://0.0.0.0",
 			"http://0.0.0.0:*",
 			"https://0.0.0.0:*",
 			"app://*",
 			"file://*",
 			"tauri://*",
 		}},
 	}
 	for _, tt := range cases {
 		t.Run(tt.value, func(t *testing.T) {
 			t.Setenv("OLLAMA_ORIGINS", tt.value)
 			if diff := cmp.Diff(Origins(), tt.expect); diff != "" {
 				t.Errorf("%s: mismatch (-want +got):\n%s", tt.value, diff)
 			}
 		})
 	}
 }
 func TestBool(t *testing.T) {
 	cases := map[string]bool{
 		"":      false,
 		"true":  true,
 		"false": false,
 		"1":     true,
 		"0":     false,
 		// invalid values
 		"random":    true,
 		"something": true,
 	}
 	for k, v := range cases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_BOOL", k)
 			if b := Bool("OLLAMA_BOOL")(); b != v {
 				t.Errorf("%s: expected %t, got %t", k, v, b)
 			}
 		})
 	}
 }
 func TestUint(t *testing.T) {
 	cases := map[string]uint{
 		"0":    0,
 		"1":    1,
 		"1337": 1337,
 		// default values
 		"":       11434,
 		"-1":     11434,
 		"0o10":   11434,
 		"0x10":   11434,
 		"string": 11434,
 	}
 	for k, v := range cases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_UINT", k)
 			if i := Uint("OLLAMA_UINT", 11434)(); i != v {
 				t.Errorf("%s: expected %d, got %d", k, v, i)
 			}
 		})
 	}
 }
 func TestKeepAlive(t *testing.T) {
 	cases := map[string]time.Duration{
 		"":       5 * time.Minute,
 		"1s":     time.Second,
 		"1m":     time.Minute,
 		"1h":     time.Hour,
 		"5m0s":   5 * time.Minute,
 		"1h2m3s": 1*time.Hour + 2*time.Minute + 3*time.Second,
 		"0":      time.Duration(0),
 		"60":     60 * time.Second,
 		"120":    2 * time.Minute,
 		"3600":   time.Hour,
 		"-0":     time.Duration(0),
 		"-1":     time.Duration(math.MaxInt64),
 		"-1m":    time.Duration(math.MaxInt64),
 		// invalid values
 		" ":   5 * time.Minute,
 		"???": 5 * time.Minute,
 		"1d":  5 * time.Minute,
 		"1y":  5 * time.Minute,
 		"1w":  5 * time.Minute,
 	}
 	for tt, expect := range cases {
 		t.Run(tt, func(t *testing.T) {
 			t.Setenv("OLLAMA_KEEP_ALIVE", tt)
 			if actual := KeepAlive(); actual != expect {
 				t.Errorf("%s: expected %s, got %s", tt, expect, actual)
 			}
 		})
 	}
 }
 func TestVar(t *testing.T) {
 	cases := map[string]string{
 		"value":       "value",
 		" value ":     "value",
 		" 'value' ":   "value",
 		` "value" `:   "value",
 		" ' value ' ": " value ",
 		` " value " `: " value ",
 	}
 	for k, v := range cases {
 		t.Run(k, func(t *testing.T) {
 			t.Setenv("OLLAMA_VAR", k)
 			if s := Var("OLLAMA_VAR"); s != v {
 				t.Errorf("%s: expected %q, got %q", k, v, s)
 			}
 		})
 	}
 }
--- a/examples/go-chat/main.go
+++ b/examples/go-chat/main.go
@@ -35,7 +35,7 @@ func main() {
 	ctx := context.Background()
 	req := &api.ChatRequest{
-		Model:    "llama3.1",
+		Model:    "llama3",
 		Messages: messages,
 	}
--- a/examples/go-generate-streaming/main.go
+++ b/examples/go-generate-streaming/main.go
@@ -16,7 +16,7 @@ func main() {
 	// By default, GenerateRequest is streaming.
 	req := &api.GenerateRequest{
-		Model:  "gemma2",
+		Model:  "gemma",
 		Prompt: "how many planets are there?",
 	}
--- a/examples/go-generate/main.go
+++ b/examples/go-generate/main.go
@@ -15,7 +15,7 @@ func main() {
 	}
 	req := &api.GenerateRequest{
-		Model:  "gemma2",
+		Model:  "gemma",
 		Prompt: "how many planets are there?",
 		// set streaming to false
--- a/examples/go-http-generate/README.md
+++ b/examples/go-http-generate/README.md
--- a/examples/langchain-python-rag-document/README.md
+++ b/examples/langchain-python-rag-document/README.md
@@ -4,14 +4,6 @@ This example provides an interface for asking questions to a PDF document.
 ## Setup
 1. Ensure you have the `llama3.1` model installed:
 ```
 ollama pull llama3.1
 ```
 2. Install the Python Requirements.
 ```
 pip install -r requirements.txt
 ```
--- a/examples/langchain-python-rag-document/main.py
+++ b/examples/langchain-python-rag-document/main.py
@@ -51,7 +51,7 @@ while True:
        template=template,
    )
-    llm = Ollama(model="llama3.1", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
+    llm = Ollama(model="llama3:8b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectorstore.as_retriever(),
--- a/examples/langchain-python-rag-privategpt/ingest.py
+++ b/examples/langchain-python-rag-privategpt/ingest.py
@@ -77,21 +77,13 @@ LOADER_MAPPING = {
 def load_single_document(file_path: str) -> List[Document]:
-    if os.path.getsize(file_path) != 0:
+    ext = "." + file_path.rsplit(".", 1)[-1]
        filename, ext = os.path.splitext(file_path)
    if ext in LOADER_MAPPING:
        loader_class, loader_args = LOADER_MAPPING[ext]
            try:
        loader = loader_class(file_path, **loader_args)
                if loader:
        return loader.load()
            except:
                print(f"Corrupted file {file_path}. Ignoring it.")
        else:
            print(f"Unsupported file {file_path}. Ignoring it.")
    else:
        print(f"Empty file {file_path}. Ignoring it.")
    raise ValueError(f"Unsupported file extension '{ext}'")
 def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Document]:
    """
@@ -108,7 +100,6 @@ def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Docum
        results = []
        with tqdm(total=len(filtered_files), desc='Loading new documents', ncols=80) as pbar:
            for i, docs in enumerate(pool.imap_unordered(load_single_document, filtered_files)):
                if docs:
                results.extend(docs)
                pbar.update()
--- a/examples/langchain-python-rag-privategpt/requirements.txt
+++ b/examples/langchain-python-rag-privategpt/requirements.txt
@@ -12,4 +12,3 @@ pandoc==2.3
 pypandoc==1.11
 tqdm==4.66.1
 sentence_transformers==2.2.2
 numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability
--- a/examples/langchain-python-rag-websummary/README.md
+++ b/examples/langchain-python-rag-websummary/README.md
@@ -4,10 +4,10 @@ This example summarizes the website, [https://ollama.com/blog/run-llama2-uncenso
 ## Running the Example
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama2` model installed:
   ```bash
-   ollama pull llama3.1
+   ollama pull llama2
   ```
 2. Install the Python Requirements.
--- a/examples/langchain-python-rag-websummary/main.py
+++ b/examples/langchain-python-rag-websummary/main.py
@@ -5,7 +5,7 @@ from langchain.chains.summarize import load_summarize_chain
 loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally")
 docs = loader.load()
-llm = Ollama(model="llama3.1")
+llm = Ollama(model="llama3")
 chain = load_summarize_chain(llm, chain_type="stuff")
 result = chain.invoke(docs) 
--- a/examples/langchain-python-simple/README.md
+++ b/examples/langchain-python-simple/README.md
@@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama.
 ## Running the Example
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama3` model installed:
   ```bash
-   ollama pull llama3.1
+   ollama pull llama3
   ```
 2. Install the Python Requirements.
--- a/examples/langchain-python-simple/main.py
+++ b/examples/langchain-python-simple/main.py
@@ -1,6 +1,6 @@
 from langchain.llms import Ollama
 input = input("What is your question?")
-llm = Ollama(model="llama3.1")
+llm = Ollama(model="llama3")
 res = llm.predict(input)
 print (res)
--- a/examples/modelfile-mario/Modelfile
+++ b/examples/modelfile-mario/Modelfile
@@ -1,4 +1,4 @@
-FROM llama3.1
+FROM llama3
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from super mario bros, acting as an assistant.
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Roy Han	c79fd5c168	Reincluding Numbers	2024-05-29 12:22:36 -07:00
Roy Han	73fb9ea36e	Draft for Multi-Language Modelfile Creation	2024-05-29 11:51:57 -07:00