addressing new comments after merge

Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-15 14:17:23 -07:00
307 changed files with 8609 additions and 61553 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,9 +1,8 @@
 .vscode
 ollama
 app
-macapp
 dist
-llm/llama.cpp
+scripts
+llm/llama.cpp/ggml
+llm/llama.cpp/gguf
 .env
-.cache
-test_data
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +0,0 @@
-llm/ext_server/* linguist-vendored
--- a/.github/ISSUE_TEMPLATE/10_model_request.yml
+++ b/.github/ISSUE_TEMPLATE/10_model_request.yml
@@ -1,18 +0,0 @@
-name: Model request
-description: Request a new model for the library
-labels: [mr]
-body:
-  - type: markdown
-    attributes:
-      value: |
-        Please check if your Model request is [already available](https://ollama.com/search) or that you cannot [import it](https://github.com/ollama/ollama/blob/main/docs/import.md#import-a-model) yourself.
-        Tell us about which Model you'd like to see in the library!
-  - type: textarea
-    id: problem
-    attributes:
-      label: What model would you like?
-      description: Please provide a link to the model.
-  - type: markdown
-    attributes:
-      value: |
-        Thanks for filing a model request!
--- a/.github/ISSUE_TEMPLATE/20_feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/20_feature_request.yml
@@ -1,41 +0,0 @@
-name: Feature request
-description: Propose a new feature
-labels: [needs-triage, fr]
-body:
-  - type: markdown
-    attributes:
-      value: |
-        Please check if your feature request is [already filed](https://github.com/ollama/ollama/issues).
-        Tell us about your idea!
-  - type: textarea
-    id: problem
-    attributes:
-      label: What are you trying to do?
-      description: Tell us about the problem you're trying to solve.
-    validations:
-      required: false
-  - type: textarea
-    id: solution
-    attributes:
-      label: How should we solve this?
-      description: If you have an idea of how you'd like to see this feature work, let us know.
-    validations:
-      required: false
-  - type: textarea
-    id: alternative
-    attributes:
-      label: What is the impact of not solving this?
-      description: (How) Are you currently working around the issue?
-    validations:
-      required: false
-  - type: textarea
-    id: context
-    attributes:
-      label: Anything else?
-      description: Any additional context to share, e.g., links
-    validations:
-      required: false
-  - type: markdown
-    attributes:
-      value: |
-        Thanks for filing a feature request!
--- a/.github/ISSUE_TEMPLATE/90_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/90_bug_report.yml
@@ -1,125 +0,0 @@
-name: Bug report
-description: File a bug report. If you need help, please join our Discord server.
-labels: [needs-triage, bug]
-body:
-  - type: markdown
-    attributes:
-      value: |
-        Please check if your bug is [already filed](https://github.com/ollama/ollama/issues) before filing a new one.
-  - type: textarea
-    id: what-happened
-    attributes:
-      label: What is the issue?
-      description: What happened? What did you expect to happen?
-    validations:
-      required: true
-  - type: textarea
-    id: what-was-expected
-    attributes:
-      label: What did you expect to see?
-      description: What did you expect to see/happen instead?
-    validations:
-      required: true
-  - type: textarea
-    id: steps
-    attributes:
-      label: Steps to reproduce
-      description: What are the steps you took that hit this issue?
-    validations:
-      required: false
-  - type: textarea
-    id: changes
-    attributes:
-      label: Are there any recent changes that introduced the issue?
-      description: If so, what are those changes?
-    validations:
-      required: false
-  - type: dropdown
-    id: os
-    attributes:
-      label: OS
-      description: What OS are you using? You may select more than one.
-      multiple: true
-      options:
-        - Linux
-        - macOS
-        - Windows
-        - Other
-    validations:
-      required: false
-  - type: dropdown
-    id: architecture
-    attributes:
-      label: Architecture
-      description: What architecture are you using? You may select more than one.
-      multiple: true
-      options:
-        - arm64
-        - amd64
-        - x86
-        - Other
-  - type: dropdown
-    id: platform
-    attributes:
-      label: Platform
-      description: What platform are you using? You may select more than one.
-      multiple: true
-      options:
-        - Docker
-        - WSL
-        - WSL2
-    validations:
-      required: false
-  - type: input
-    id: ollama-version
-    attributes:
-      label: Ollama version
-      description: What Ollama version are you using? (`ollama --version`)
-      placeholder: e.g., 1.14.4
-    validations:
-      required: false
-  - type: dropdown
-    id: gpu
-    attributes:
-      label: GPU
-      description: What GPU, if any, are you using? You may select more than one.
-      multiple: true
-      options:
-        - Nvidia
-        - AMD
-        - Intel
-        - Apple
-        - Other
-    validations:
-      required: false
-  - type: textarea
-    id: gpu-info
-    attributes:
-      label: GPU info
-      description: What GPU info do you have? (`nvidia-smi`, `rocminfo`, `system_profiler SPDisplaysDataType`, etc.)
-    validations:
-      required: false
-  - type: dropdown
-    id: cpu
-    attributes:
-      label: CPU
-      description: What CPU are you using? You may select more than one.
-      multiple: true
-      options:
-        - Intel
-        - AMD
-        - Apple
-        - Other
-    validations:
-      required: false
-  - type: textarea
-    id: other-software
-    attributes:
-      label: Other software
-      description: What other software are you using that might be related to this issue?
-    validations:
-      required: false
-  - type: markdown
-    attributes:
-      value: |
-        Thanks for filing a bug report!
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,8 +0,0 @@
-blank_issues_enabled: true
-contact_links:
-  - name: Help
-    url: https://discord.com/invite/ollama
-    about: Please join our Discord server for help using Ollama
-  - name: Troubleshooting
-    url: https://github.com/ollama/ollama/blob/main/docs/faq.md#faq
-    about: See the FAQ for common issues and solutions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,463 +0,0 @@
-name: release
-
-on:
-  push:
-    tags:
-      - 'v*'
-
-jobs:
-  # Full build of the Mac assets
-  build-darwin:
-    runs-on: macos-latest
-    environment: release
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set Version
-        shell: bash
-        run: |
-          echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
-          echo "RELEASE_VERSION=$(echo ${GITHUB_REF_NAME} | cut -f1 -d-)" >> $GITHUB_ENV
-      - name: key
-        env:
-          MACOS_SIGNING_KEY: ${{ secrets.MACOS_SIGNING_KEY }}
-          MACOS_SIGNING_KEY_PASSWORD: ${{ secrets.MACOS_SIGNING_KEY_PASSWORD }}
-        run: |
-          echo $MACOS_SIGNING_KEY | base64 --decode > certificate.p12
-          security create-keychain -p password build.keychain
-          security default-keychain -s build.keychain
-          security unlock-keychain -p password build.keychain
-          security import certificate.p12 -k build.keychain -P $MACOS_SIGNING_KEY_PASSWORD -T /usr/bin/codesign
-          security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k password build.keychain
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.22'
-          cache: true
-      - name: Build Darwin
-        env:
-          APPLE_IDENTITY: ${{ secrets.APPLE_IDENTITY }}
-          APPLE_PASSWORD: ${{ secrets.APPLE_PASSWORD }}
-          APPLE_TEAM_ID: ${{ vars.APPLE_TEAM_ID }}
-          APPLE_ID: ${{ vars.APPLE_ID }}
-        run: |
-          ./scripts/build_darwin.sh
-        
-      - uses: actions/upload-artifact@v4
-        with:
-          name: dist-darwin
-          path: |
-            dist/*arwin*
-            !dist/*-cov
-
-
-  # Windows builds take a long time to both install the dependencies and build, so parallelize
-  # CPU generation step
-  generate-windows-cpu:
-    environment: release
-    runs-on: windows
-    env:
-      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set Version
-        shell: bash
-        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
-      - uses: 'google-github-actions/auth@v2'
-        with:
-          project_id: 'ollama'
-          credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}'
-      - run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt
-      - name: install Windows SDK 8.1 to get signtool
-        run: |
-          $ErrorActionPreference = "Stop"
-          write-host "downloading SDK"
-          Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe"
-          Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait
-          write-host "Win SDK 8.1 installed"
-          gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe'
-      - name: install signing plugin
-        run: |
-          $ErrorActionPreference = "Stop"
-          write-host "downloading plugin"
-          Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip"
-          Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\
-          write-host "Installing plugin"
-          & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
-          write-host "plugin installed"
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.22'
-          cache: true
-      - run: go get ./...
-      - run: |
-          $gopath=(get-command go).source | split-path -parent
-          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
-          cd $env:GITHUB_WORKSPACE
-          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
-          $env:PATH="$gopath;$env:PATH"
-          go generate -x ./...
-        name: go generate
-      - uses: actions/upload-artifact@v4
-        with:
-          name: generate-windows-cpu
-          path: llm/llama.cpp/build/**/lib/*
-
-  # ROCm generation step
-  generate-windows-rocm:
-    environment: release
-    runs-on: windows
-    env:
-      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set Version
-        shell: bash
-        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
-      - uses: 'google-github-actions/auth@v2'
-        with:
-          project_id: 'ollama'
-          credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}'
-      - run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt
-      - name: install Windows SDK 8.1 to get signtool
-        run: |
-          $ErrorActionPreference = "Stop"
-          write-host "downloading SDK"
-          Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe"
-          Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait
-          write-host "Win SDK 8.1 installed"
-          gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe'
-      - name: install signing plugin
-        run: |
-          $ErrorActionPreference = "Stop"
-          write-host "downloading plugin"
-          Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip"
-          Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\
-          write-host "Installing plugin"
-          & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
-          write-host "plugin installed"
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.22'
-          cache: true
-      - name: "Install ROCm"
-        run: |
-          $ErrorActionPreference = "Stop"
-          write-host "downloading AMD HIP Installer"
-          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
-          write-host "Installing AMD HIP"
-          Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
-          write-host "Completed AMD HIP"
-      - name: "Verify ROCm"
-        run: |
-          & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
-      - run: go get ./...
-      - run: |
-          $gopath=(get-command go).source | split-path -parent
-          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
-          cd $env:GITHUB_WORKSPACE
-          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
-          $env:PATH="$gopath;$env:PATH"
-          $env:OLLAMA_SKIP_CPU_GENERATE="1"
-          $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
-          go generate -x ./...
-        name: go generate
-      - name: "gather rocm dependencies"
-        run: |
-          $HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
-          md "dist\deps\bin\rocblas\library"
-          cp "${HIP_PATH}\bin\hipblas.dll" "dist\deps\bin\"
-          cp "${HIP_PATH}\bin\rocblas.dll" "dist\deps\bin\"
-          cp "${HIP_PATH}\bin\rocblas\library\*" "dist\deps\bin\rocblas\library\"
-      - uses: actions/upload-artifact@v4
-        with:
-          name: generate-windows-rocm
-          path: llm/llama.cpp/build/**/lib/*
-      - uses: actions/upload-artifact@v4
-        with:
-          name: windows-rocm-deps
-          path: dist/deps/*
-
-  # CUDA generation step
-  generate-windows-cuda:
-    environment: release
-    runs-on: windows
-    env:
-      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set Version
-        shell: bash
-        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
-      - uses: 'google-github-actions/auth@v2'
-        with:
-          project_id: 'ollama'
-          credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}'
-      - run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt
-      - name: install Windows SDK 8.1 to get signtool
-        run: |
-          $ErrorActionPreference = "Stop"
-          write-host "downloading SDK"
-          Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe"
-          Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait
-          write-host "Win SDK 8.1 installed"
-          gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe'
-      - name: install signing plugin
-        run: |
-          $ErrorActionPreference = "Stop"
-          write-host "downloading plugin"
-          Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip"
-          Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\
-          write-host "Installing plugin"
-          & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
-          write-host "plugin installed"
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.22'
-          cache: true
-      # TODO - consider replacing this action with a ps1 snippet to install
-      # This actions seems to fail sometimes with "no tools in cache" but a re-run of the failed job clears it
-      # https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
-      - name: "Install CUDA"
-        uses: Jimver/cuda-toolkit@v0.2.14
-        id: cuda-toolkit
-        with:
-          cuda: '11.3.1'      
-      - name: "Verify CUDA"
-        run: nvcc -V
-      - run: go get ./...
-      - name: go generate
-        run: |
-          $gopath=(get-command go).source | split-path -parent
-          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
-          cd $env:GITHUB_WORKSPACE
-          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
-          $env:PATH="$gopath;$env:PATH"
-          $env:OLLAMA_SKIP_CPU_GENERATE="1"
-          go generate -x ./...
-      - name: "gather cuda dependencies"
-        run: |
-          $NVIDIA_DIR=(resolve-path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*\bin\')[0]
-          md "dist\deps"
-          cp "${NVIDIA_DIR}\cudart64_*.dll" "dist\deps\"
-          cp "${NVIDIA_DIR}\cublas64_*.dll" "dist\deps\"
-          cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
-      - uses: actions/upload-artifact@v4
-        with:
-          name: generate-windows-cuda
-          path: llm/llama.cpp/build/**/lib/*
-      - uses: actions/upload-artifact@v4
-        with:
-          name: windows-cuda-deps
-          path: dist/deps/*
-
-  # Import the prior generation steps and build the final windows assets
-  build-windows:
-    environment: release
-    runs-on: windows
-    needs:
-      - generate-windows-cuda
-      - generate-windows-rocm
-      - generate-windows-cpu
-    env:
-      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Set Version
-        shell: bash
-        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
-      - uses: 'google-github-actions/auth@v2'
-        with:
-          project_id: 'ollama'
-          credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}'
-      - run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt
-      - name: install Windows SDK 8.1 to get signtool
-        run: |
-          $ErrorActionPreference = "Stop"
-          write-host "downloading SDK"
-          Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe"
-          Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait
-          write-host "Win SDK 8.1 installed"
-          gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe'
-      - name: install signing plugin
-        run: |
-          $ErrorActionPreference = "Stop"
-          write-host "downloading plugin"
-          Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip"
-          Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\
-          write-host "Installing plugin"
-          & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
-          write-host "plugin installed"
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.22'
-          cache: true
-      - run: go get
-      - uses: actions/download-artifact@v4
-        with:
-          name: generate-windows-cpu
-          path: llm/llama.cpp/build
-      - uses: actions/download-artifact@v4
-        with:
-          name: generate-windows-cuda
-          path: llm/llama.cpp/build
-      - uses: actions/download-artifact@v4
-        with:
-          name: windows-cuda-deps
-          path: dist/deps
-      - uses: actions/download-artifact@v4
-        with:
-          name: windows-rocm-deps
-          path: dist/deps
-      - uses: actions/download-artifact@v4
-        with:
-          name: generate-windows-rocm
-          path: llm/llama.cpp/build
-      - run: dir llm/llama.cpp/build
-      - run: |
-          $gopath=(get-command go).source | split-path -parent
-          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
-          cd $env:GITHUB_WORKSPACE
-          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
-          $env:PATH="$gopath;$env:PATH"
-          $env:OLLAMA_SKIP_GENERATE="1"
-          $env:NVIDIA_DIR=$(resolve-path ".\dist\deps")
-          $env:HIP_PATH=$(resolve-path ".\dist\deps")
-          & .\scripts\build_windows.ps1
-      - uses: actions/upload-artifact@v4
-        with:
-          name: dist-windows
-          path: dist/*.exe
-
-  # Linux x86 assets built using the container based build 
-  build-linux-amd64:
-    environment: release
-    runs-on: linux
-    env:
-      OLLAMA_SKIP_MANIFEST_CREATE: "1"
-      BUILD_ARCH: amd64
-      PUSH: "1"
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Set Version
-        shell: bash
-        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ vars.DOCKER_USER }}
-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
-      - run: |
-          ./scripts/build_linux.sh
-          ./scripts/build_docker.sh
-          mv dist/deps/* dist/
-      - uses: actions/upload-artifact@v4
-        with:
-          name: dist-linux-amd64
-          path: |
-            dist/*linux*
-            !dist/*-cov
-
-  # Linux ARM assets built using the container based build
-  # (at present, docker isn't pre-installed on arm ubunutu images)
-  build-linux-arm64:
-    environment: release
-    runs-on: linux-arm64
-    env:
-      OLLAMA_SKIP_MANIFEST_CREATE: "1"
-      BUILD_ARCH: arm64
-      PUSH: "1"
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Set Version
-        shell: bash
-        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
-      - name: "Install Docker"
-        run: |
-          # Add Docker's official GPG key:
-          env
-          uname -a
-          sudo apt-get update
-          sudo apt-get install -y ca-certificates curl
-          sudo install -m 0755 -d /etc/apt/keyrings
-          sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
-          sudo chmod a+r /etc/apt/keyrings/docker.asc
-
-          # Add the repository to Apt sources:
-          echo \
-            "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
-            $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
-            sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
-          sudo apt-get update
-          sudo apt-get install -y docker-ce docker-ce-cli containerd.io
-          sudo usermod -aG docker $USER
-          sudo apt-get install acl
-          sudo setfacl --modify user:$USER:rw /var/run/docker.sock
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ vars.DOCKER_USER }}
-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
-      - run: |
-          ./scripts/build_linux.sh
-          ./scripts/build_docker.sh
-      - uses: actions/upload-artifact@v4
-        with:
-          name: dist-linux-arm64
-          path: |
-            dist/*linux*
-            !dist/*-cov
-
-  # Aggregate all the assets and ship a release
-  release: 
-    needs:
-      - build-darwin
-      - build-windows
-      - build-linux-amd64
-      - build-linux-arm64
-    runs-on: linux
-    environment: release
-    permissions:
-      contents: write
-    env:
-      OLLAMA_SKIP_IMAGE_BUILD: "1"
-      PUSH: "1"
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set Version
-        shell: bash
-        run: |
-          echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
-          echo "RELEASE_VERSION=$(echo ${GITHUB_REF_NAME} | cut -f1 -d-)" >> $GITHUB_ENV
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ vars.DOCKER_USER }}
-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
-      - run: ./scripts/build_docker.sh
-      - name: Retrieve built artifact
-        uses: actions/download-artifact@v4
-        with:
-          path: dist
-          pattern: dist-*
-          merge-multiple: true
-      - run: |
-          ls -lh dist/
-          (cd dist; sha256sum * > sha256sum.txt)
-          cat dist/sha256sum.txt
-      - uses: ncipollo/release-action@v1
-        with:
-          name: ${{ env.RELEASE_VERSION }}
-          allowUpdates: true
-          artifacts: "dist/*"
-          draft: true
-          prerelease: true
-          omitBodyDuringUpdate: true
-          generateReleaseNotes: true
-          omitDraftDuringUpdate: true
-          omitPrereleaseDuringUpdate: true
-          replacesArtifacts: true
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,177 +0,0 @@
-name: test
-
-on:
-  pull_request:
-    paths:
-      - '**/*'
-      - '!docs/**'
-      - '!examples/**'
-      - '!README.md'
-
-jobs:
-  generate:
-    strategy:
-      matrix:
-        os: [ubuntu-latest, macos-latest, windows-2019]
-        arch: [amd64, arm64]
-        exclude:
-          - os: ubuntu-latest
-            arch: arm64
-          - os: windows-2019
-            arch: arm64
-    runs-on: ${{ matrix.os }}
-    env:
-      GOARCH: ${{ matrix.arch }}
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.22'
-          cache: true
-      - run: go get ./...
-      - run: |
-          $gopath=(get-command go).source | split-path -parent
-          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
-          cd $env:GITHUB_WORKSPACE
-          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
-          $env:PATH="$gopath;$env:PATH"
-          go generate -x ./...
-        if: ${{ startsWith(matrix.os, 'windows-') }}
-        name: "Windows Go Generate"
-      - run: go generate -x ./...
-        if: ${{ ! startsWith(matrix.os, 'windows-') }}
-        name: "Unix Go Generate"
-      - uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
-          path: llm/llama.cpp/build/**/lib/*
-  generate-cuda:
-    strategy:
-      matrix:
-        cuda-version:
-          - '11.8.0'
-    runs-on: linux
-    container: nvidia/cuda:${{ matrix.cuda-version }}-devel-ubuntu20.04
-    steps:
-      - run: |
-          apt-get update && apt-get install -y git build-essential curl
-          curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz \
-            | tar -zx -C /usr --strip-components 1
-        env:
-          DEBIAN_FRONTEND: noninteractive
-      - uses: actions/checkout@v4
-      - uses: actions/setup-go@v4
-        with:
-          go-version: '1.22'
-          cache: true
-      - run: go get ./...
-      - run: |
-          git config --global --add safe.directory /__w/ollama/ollama
-          go generate -x ./...
-        env:
-          OLLAMA_SKIP_CPU_GENERATE: '1'
-      - uses: actions/upload-artifact@v4
-        with:
-          name: cuda-${{ matrix.cuda-version }}-libraries
-          path: llm/llama.cpp/build/**/lib/*
-  generate-rocm:
-    strategy:
-      matrix:
-        rocm-version:
-          - '6.0'
-    runs-on: linux
-    container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
-    steps:
-      - run: |
-          apt-get update && apt-get install -y git build-essential curl rocm-libs
-          curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz \
-            | tar -zx -C /usr --strip-components 1
-        env:
-          DEBIAN_FRONTEND: noninteractive
-      - uses: actions/checkout@v4
-      - uses: actions/setup-go@v4
-        with:
-          go-version: '1.22'
-          cache: true
-      - run: go get ./...
-      - run: |
-          git config --global --add safe.directory /__w/ollama/ollama
-          go generate -x ./...
-        env:
-          OLLAMA_SKIP_CPU_GENERATE: '1'
-      - uses: actions/upload-artifact@v4
-        with:
-          name: rocm-${{ matrix.rocm-version }}-libraries
-          path: llm/llama.cpp/build/**/lib/*
-  lint:
-    strategy:
-      matrix:
-        os: [ubuntu-latest, macos-latest, windows-2019]
-        arch: [amd64, arm64]
-        exclude:
-          - os: ubuntu-latest
-            arch: arm64
-          - os: windows-2019
-            arch: arm64
-          - os: macos-latest
-            arch: amd64
-    runs-on: ${{ matrix.os }}
-    env:
-      GOARCH: ${{ matrix.arch }}
-      CGO_ENABLED: '1'
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.22'
-          cache: false
-      - run: |
-          mkdir -p llm/llama.cpp/build/linux/${{ matrix.arch }}/stub/lib/
-          touch llm/llama.cpp/build/linux/${{ matrix.arch }}/stub/lib/stub.so
-        if: ${{ startsWith(matrix.os, 'ubuntu-') }}
-      - run: |
-          mkdir -p llm/llama.cpp/build/darwin/${{ matrix.arch }}/stub/lib/
-          touch llm/llama.cpp/build/darwin/${{ matrix.arch }}/stub/lib/stub.dylib
-          touch llm/llama.cpp/ggml-metal.metal
-        if: ${{ startsWith(matrix.os, 'macos-') }}
-      - run: |
-          mkdir -p llm/llama.cpp/build/windows/${{ matrix.arch }}/stub/lib/
-          touch llm/llama.cpp/build/windows/${{ matrix.arch }}/stub/lib/stub.dll
-        if: ${{ startsWith(matrix.os, 'windows-') }}
-      - uses: golangci/golangci-lint-action@v3
-  test:
-    needs: generate
-    strategy:
-      matrix:
-        os: [ubuntu-latest, macos-latest, windows-2019]
-        arch: [amd64]
-        exclude:
-          - os: ubuntu-latest
-            arch: arm64
-          - os: windows-2019
-            arch: arm64
-    runs-on: ${{ matrix.os }}
-    env:
-      GOARCH: ${{ matrix.arch }}
-      CGO_ENABLED: '1'
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.22'
-          cache: true
-      - run: go get
-      - uses: actions/download-artifact@v4
-        with:
-          name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
-          path: llm/llama.cpp/build
-      - run: go build
-      - run: go test -v ./...
-      - uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.os }}-binaries
-          path: ollama
--- a/.gitignore
+++ b/.gitignore
@@ -6,8 +6,3 @@
 dist
 ollama
 ggml-metal.metal
-.cache
-*.exe
-.idea
-test_data
-*.crt
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,4 +1,10 @@
-[submodule "llama.cpp"]
-	path = llm/llama.cpp
-	url = https://github.com/ggerganov/llama.cpp.git
-	shallow = true
+[submodule "llm/llama.cpp/ggml"]
+    path = llm/llama.cpp/ggml
+    url = https://github.com/ggerganov/llama.cpp.git
+    ignore = dirty
+    shallow = true
+[submodule "llm/llama.cpp/gguf"]
+    path = llm/llama.cpp/gguf
+    url = https://github.com/ggerganov/llama.cpp.git
+    ignore = dirty
+    shallow = true
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -1,27 +0,0 @@
-run:
-  timeout: 5m
-linters:
-  enable:
-    - asasalint
-    - bidichk
-    - bodyclose
-    - containedctx
-    - contextcheck
-    - exportloopref
-    - gocheckcompilerdirectives
-    # FIXME: for some reason this errors on windows
-    # - gofmt
-    # - goimports
-    - misspell
-    - nilerr
-    - unused
-linters-settings:
-  errcheck:
-    # exclude the following functions since we don't generally
-    # need to be concerned with the returned errors
-    exclude-functions:
-      - encoding/binary.Read
-      - (*os.File).Seek
-      - (*bufio.Writer).WriteString
-      - (*github.com/spf13/pflag.FlagSet).Set
-      - (*github.com/ollama/ollama/llm.readSeekOffset).Seek
--- a/140
+++ b/140
@@ -1,137 +1,23 @@
-ARG GOLANG_VERSION=1.22.1
-ARG CMAKE_VERSION=3.22.1
-# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
-ARG CUDA_VERSION=11.3.1
-ARG ROCM_VERSION=6.0
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04

-# Copy the minimal context we need to run the generate scripts
-FROM scratch AS llm-code
-COPY .git .git
-COPY .gitmodules .gitmodules
-COPY llm llm
+ARG TARGETARCH
+ARG GOFLAGS="'-ldflags=-w -s'"

-FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
-ARG CMAKE_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
-ARG CGO_CFLAGS
-RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
+WORKDIR /go/src/github.com/jmorganca/ollama
+RUN apt-get update && apt-get install -y git build-essential cmake
+ADD https://dl.google.com/go/go1.21.1.linux-$TARGETARCH.tar.gz /tmp/go1.21.1.tar.gz
+RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.1.tar.gz

-FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
-ARG CMAKE_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
-ARG CGO_CFLAGS
-RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
-
-FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
-ARG CMAKE_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
-ENV LIBRARY_PATH /opt/amdgpu/lib64
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
-ARG CGO_CFLAGS
-ARG AMDGPU_TARGETS
-RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
-RUN mkdir /tmp/scratch && \
-    for dep in $(cat /go/src/github.com/ollama/ollama/llm/llama.cpp/build/linux/x86_64/rocm*/lib/deps.txt) ; do \
-        cp ${dep} /tmp/scratch/ || exit 1 ; \
-    done && \
-    (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
-    mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \
-    (cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . )
-
-
-FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
-ARG CMAKE_VERSION
-ARG GOLANG_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
-ARG OLLAMA_CUSTOM_CPU_DEFS
-ARG CGO_CFLAGS
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
-
-FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
-RUN OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
-FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
-RUN OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh
-FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
-RUN OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh
-
-FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64
-ARG CMAKE_VERSION
-ARG GOLANG_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
-# Note, we only build the "base" CPU variant on arm since avx/avx2 are x86 features
-ARG OLLAMA_CUSTOM_CPU_DEFS
-ARG CGO_CFLAGS
-RUN OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
-
-# Intermediate stage used for ./scripts/build_linux.sh
-FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
-ENV CGO_ENABLED 1
-WORKDIR /go/src/github.com/ollama/ollama
 COPY . .
-COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
-COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
-COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
-COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
-COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
-ARG GOFLAGS
-ARG CGO_CFLAGS
-RUN go build -trimpath .
+ENV GOARCH=$TARGETARCH
+ENV GOFLAGS=$GOFLAGS
+RUN /usr/local/go/bin/go generate ./... \
+    && /usr/local/go/bin/go build .

-# Intermediate stage used for ./scripts/build_linux.sh
-FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
-ENV CGO_ENABLED 1
-ARG GOLANG_VERSION
-WORKDIR /go/src/github.com/ollama/ollama
-COPY . .
-COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
-RUN mkdir -p /go/src/github.com/ollama/ollama/dist/deps/
-ARG GOFLAGS
-ARG CGO_CFLAGS
-RUN go build -trimpath .
-
-# Runtime stages
-FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
+FROM ubuntu:22.04
 RUN apt-get update && apt-get install -y ca-certificates
-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
-FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
-RUN apt-get update && apt-get install -y ca-certificates
-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
-
-# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
-FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
-RUN update-pciids
-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
+COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
 EXPOSE 11434
 ENV OLLAMA_HOST 0.0.0.0
-
-ENTRYPOINT ["/bin/ollama"]
-CMD ["serve"]
-
-FROM runtime-$TARGETARCH
-EXPOSE 11434
-ENV OLLAMA_HOST 0.0.0.0
-ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
-ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
-ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
-ENV NVIDIA_VISIBLE_DEVICES=all
-
 ENTRYPOINT ["/bin/ollama"]
 CMD ["serve"]
--- a/Dockerfile.build
+++ b/Dockerfile.build
@@ -0,0 +1,32 @@
+
+# centos7 amd64 dependencies
+FROM --platform=linux/amd64 nvidia/cuda:11.8.0-devel-centos7 AS base-amd64
+RUN yum install -y https://repo.ius.io/ius-release-el7.rpm centos-release-scl && \
+    yum update -y && \
+    yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++ git236 wget
+RUN wget "https://github.com/Kitware/CMake/releases/download/v3.27.6/cmake-3.27.6-linux-x86_64.sh" -O cmake-installer.sh && chmod +x cmake-installer.sh && ./cmake-installer.sh --skip-license --prefix=/usr/local
+ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
+
+# centos8 arm64 dependencies
+FROM --platform=linux/arm64 nvidia/cuda:11.4.3-devel-centos8 AS base-arm64
+RUN sed -i -e 's/mirrorlist/#mirrorlist/g' -e 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
+RUN yum install -y git cmake
+
+FROM base-${TARGETARCH}
+ARG TARGETARCH
+ARG GOFLAGS="'-ldflags -w -s'"
+
+# install go
+ADD https://dl.google.com/go/go1.21.1.linux-$TARGETARCH.tar.gz /tmp/go1.21.1.tar.gz
+RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.1.tar.gz
+
+# build the final binary
+WORKDIR /go/src/github.com/jmorganca/ollama
+COPY . .
+
+ENV GOOS=linux
+ENV GOARCH=$TARGETARCH
+ENV GOFLAGS=$GOFLAGS
+
+RUN /usr/local/go/bin/go generate ./... && \
+    /usr/local/go/bin/go build .
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
 <div align="center">
-  <img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" height="200px" srcset="https://github.com/jmorganca/ollama/assets/3325447/56ea1849-1284-4645-8970-956de6e51c3c">
+    <img alt="logo" height="200px" src="https://github.com/jmorganca/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
+  </picture>
 </div>

 # Ollama
@@ -10,32 +13,23 @@ Get up and running with large language models locally.

 ### macOS

-[Download](https://ollama.com/download/Ollama-darwin.zip)
+[Download](https://ollama.ai/download/Ollama-darwin.zip)

-### Windows preview
-
-[Download](https://ollama.com/download/OllamaSetup.exe)
-
-### Linux
+### Linux & WSL2

 ```
-curl -fsSL https://ollama.com/install.sh | sh
+curl https://ollama.ai/install.sh | sh
 ```

-[Manual install instructions](https://github.com/ollama/ollama/blob/main/docs/linux.md)
+[Manual install instructions](https://github.com/jmorganca/ollama/blob/main/docs/linux.md)

-### Docker
+### Windows

-The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `ollama/ollama` is available on Docker Hub.
-
-### Libraries
-
- [ollama-python](https://github.com/ollama/ollama-python)
- [ollama-js](https://github.com/ollama/ollama-js)
+coming soon

 ## Quickstart

-To run and chat with [Llama 2](https://ollama.com/library/llama2):
+To run and chat with [Llama 2](https://ollama.ai/library/llama2):

 ```
 ollama run llama2
@@ -43,37 +37,30 @@ ollama run llama2

 ## Model library

-Ollama supports a list of models available on [ollama.com/library](https://ollama.com/library 'ollama model library')
+Ollama supports a list of open-source models available on [ollama.ai/library](https://ollama.ai/library 'ollama model library')

-Here are some example models that can be downloaded:
+Here are some example open-source models that can be downloaded:

 | Model              | Parameters | Size  | Download                       |
 | ------------------ | ---------- | ----- | ------------------------------ |
-| Llama 2            | 7B         | 3.8GB | `ollama run llama2`            |
 | Mistral            | 7B         | 4.1GB | `ollama run mistral`           |
-| Dolphin Phi        | 2.7B       | 1.6GB | `ollama run dolphin-phi`       |
-| Phi-2              | 2.7B       | 1.7GB | `ollama run phi`               |
-| Neural Chat        | 7B         | 4.1GB | `ollama run neural-chat`       |
-| Starling           | 7B         | 4.1GB | `ollama run starling-lm`       |
+| Llama 2            | 7B         | 3.8GB | `ollama run llama2`            |
 | Code Llama         | 7B         | 3.8GB | `ollama run codellama`         |
 | Llama 2 Uncensored | 7B         | 3.8GB | `ollama run llama2-uncensored` |
 | Llama 2 13B        | 13B        | 7.3GB | `ollama run llama2:13b`        |
 | Llama 2 70B        | 70B        | 39GB  | `ollama run llama2:70b`        |
 | Orca Mini          | 3B         | 1.9GB | `ollama run orca-mini`         |
 | Vicuna             | 7B         | 3.8GB | `ollama run vicuna`            |
-| LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
-| Gemma              | 2B         | 1.4GB | `ollama run gemma:2b`          |
-| Gemma              | 7B         | 4.8GB | `ollama run gemma:7b`          |

-> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
+> Note: You should have at least 8 GB of RAM to run the 3B models, 16 GB to run the 7B models, and 32 GB to run the 13B models.

-## Customize a model
+## Customize your own model

-### Import from GGUF
+### Import from GGUF or GGML

-Ollama supports importing GGUF models in the Modelfile:
+Ollama supports importing GGUF and GGML file formats in the Modelfile. This means if you have a model that is not in the Ollama library, you can create it, iterate on it, and upload it to the Ollama library to share with others when you are ready.

-1. Create a file named `Modelfile`, with a `FROM` instruction with the local filepath to the model you want to import.
+1. Create a file named Modelfile, and add a `FROM` instruction with the local filepath to the model you want to import.

   ```
   FROM ./vicuna-33b.Q4_0.gguf
@@ -82,22 +69,18 @@ Ollama supports importing GGUF models in the Modelfile:
 2. Create the model in Ollama

   ```
-   ollama create example -f Modelfile
+   ollama create name -f path_to_modelfile
   ```

 3. Run the model

   ```
-   ollama run example
+   ollama run name
   ```

-### Import from PyTorch or Safetensors
-
-See the [guide](docs/import.md) on importing models for more information.
-
 ### Customize a prompt

-Models from the Ollama library can be customized with a prompt. For example, to customize the `llama2` model:
+Models from the Ollama library can be customized with a prompt. The example

 ```
 ollama pull llama2
@@ -111,7 +94,7 @@ FROM llama2
 # set the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1

-# set the system message
+# set the system prompt
 SYSTEM """
 You are Mario from Super Mario Bros. Answer as Mario, the assistant, only.
 """
@@ -134,10 +117,6 @@ For more examples, see the [examples](examples) directory. For more information

 `ollama create` is used to create a model from a Modelfile.

-```
-ollama create mymodel -f ./Modelfile
-```
-
 ### Pull a model

 ```
@@ -169,17 +148,10 @@ For multiline input, you can wrap text with `"""`:
 I'm a basic program that prints the famous "Hello, world!" message to the console.
 ```

-### Multimodal models
-
-```
->>> What's in this image? /Users/jmorgan/Desktop/smile.png
-The image features a yellow smiley face, which is likely the central focus of the picture.
-```
-
 ### Pass in prompt as arguments

 ```
-$ ollama run llama2 "Summarize this file: $(cat README.md)"
+$ ollama run llama2 "summarize this file:" "$(cat README.md)"
 Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
 ```

@@ -198,25 +170,17 @@ ollama list
 Install `cmake` and `go`:

 ```
-brew install cmake go
+brew install cmake
+brew install go
 ```

-Then generate dependencies:
+Then generate dependencies and build:

 ```
 go generate ./...
-```
-
-Then build the binary:
-
-```
 go build .
 ```

-More detailed instructions can be found in the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
-
-### Running local builds
-
 Next, start the server:

 ```
@@ -231,143 +195,30 @@ Finally, in a separate shell, run a model:

 ## REST API

-Ollama has a REST API for running and managing models.
+> See the [API documentation](docs/api.md) for all endpoints.

-### Generate a response
+Ollama has an API for running and managing models. For example to generate text from a model:

 ```
-curl http://localhost:11434/api/generate -d '{
+curl -X POST http://localhost:11434/api/generate -d '{
  "model": "llama2",
  "prompt":"Why is the sky blue?"
 }'
 ```

-### Chat with a model
-
-```
-curl http://localhost:11434/api/chat -d '{
-  "model": "mistral",
-  "messages": [
-    { "role": "user", "content": "why is the sky blue?" }
-  ]
-}'
-```
-
-See the [API documentation](./docs/api.md) for all endpoints.
-
 ## Community Integrations

-### Web & Desktop
-
- [LibreChat](https://github.com/danny-avila/LibreChat)
- [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
- [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
- [Saddle](https://github.com/jikkuatwork/saddle)
- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
- [Minimalistic React UI for Ollama Models](https://github.com/richawo/minimal-llm-ui)
- [Open WebUI](https://github.com/open-webui/open-webui)
- [Ollamac](https://github.com/kevinhermawan/Ollamac)
- [big-AGI](https://github.com/enricoros/big-AGI/blob/main/docs/config-local-ollama.md)
- [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core)
- [Amica](https://github.com/semperai/amica)
- [chatd](https://github.com/BruceMacD/chatd)
- [Ollama-SwiftUI](https://github.com/kghandour/Ollama-SwiftUI)
- [Dify.AI](https://github.com/langgenius/dify)
- [MindMac](https://mindmac.app)
- [NextJS Web Interface for Ollama](https://github.com/jakobhoeg/nextjs-ollama-llm-ui)
- [Msty](https://msty.app)
- [Chatbox](https://github.com/Bin-Huang/Chatbox)
- [WinForm Ollama Copilot](https://github.com/tgraupmann/WinForm_Ollama_Copilot)
- [NextChat](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web) with [Get Started Doc](https://docs.nextchat.dev/models/ollama)
- [Alpaca WebUI](https://github.com/mmo80/alpaca-webui)
- [OllamaGUI](https://github.com/enoch1118/ollamaGUI)
- [OpenAOE](https://github.com/InternLM/OpenAOE)
- [Odin Runes](https://github.com/leonid20000/OdinRunes)
- [LLM-X: Progressive Web App](https://github.com/mrdjohnson/llm-x)
- [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
- [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
- [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
-
-### Terminal
-
- [oterm](https://github.com/ggozad/oterm)
- [Ellama Emacs client](https://github.com/s-kostyaev/ellama)
- [Emacs client](https://github.com/zweifisch/ollama)
- [gen.nvim](https://github.com/David-Kunz/gen.nvim)
- [ollama.nvim](https://github.com/nomnivore/ollama.nvim)
- [ollero.nvim](https://github.com/marco-souza/ollero.nvim)
- [ollama-chat.nvim](https://github.com/gerazov/ollama-chat.nvim)
- [ogpt.nvim](https://github.com/huynle/ogpt.nvim)
- [gptel Emacs client](https://github.com/karthink/gptel)
- [Oatmeal](https://github.com/dustinblackman/oatmeal)
- [cmdh](https://github.com/pgibler/cmdh)
- [ooo](https://github.com/npahlfer/ooo)
- [tenere](https://github.com/pythops/tenere)
- [llm-ollama](https://github.com/taketwo/llm-ollama) for [Datasette's LLM CLI](https://llm.datasette.io/en/stable/).
- [typechat-cli](https://github.com/anaisbetts/typechat-cli)
- [ShellOracle](https://github.com/djcopley/ShellOracle)
- [tlm](https://github.com/yusufcanb/tlm)
-
-### Database
-
- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md)
-
-### Package managers
-
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
-
-### Libraries
-
 - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
 - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
- [LiteLLM](https://github.com/BerriAI/litellm)
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
- [Ollama for Dart](https://github.com/breitburg/dart-ollama)
- [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel)
- [LangChainDart](https://github.com/davidmigloz/langchain_dart)
- [Semantic Kernel - Python](https://github.com/microsoft/semantic-kernel/tree/main/python/semantic_kernel/connectors/ai/ollama)
- [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md)
- [Elixir LangChain](https://github.com/brainlid/langchain)
- [Ollama for R - rollama](https://github.com/JBGruber/rollama)
- [Ollama-ex for Elixir](https://github.com/lebrunel/ollama-ex)
- [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama)
- [Testcontainers](https://testcontainers.com/modules/ollama/)
-
-### Mobile
-
- [Enchanted](https://github.com/AugustDev/enchanted)
- [Maid](https://github.com/Mobile-Artificial-Intelligence/maid)
-
-### Extensions & Plugins
-
 - [Raycast extension](https://github.com/MassimilianoPasquini97/raycast_ollama)
 - [Discollama](https://github.com/mxyng/discollama) (Discord bot inside the Ollama discord channel)
 - [Continue](https://github.com/continuedev/continue)
 - [Obsidian Ollama plugin](https://github.com/hinterdupfinger/obsidian-ollama)
- [Logseq Ollama plugin](https://github.com/omagdy7/ollama-logseq)
- [NotesOllama](https://github.com/andersrex/notesollama) (Apple Notes Ollama plugin)
 - [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
+- [LiteLLM](https://github.com/BerriAI/litellm)
 - [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
- [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
- [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support)
- [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
+- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
+- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
+- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
+- [Dumbar](https://github.com/JerrySievert/Dumbar)
+- [Emacs client](https://github.com/zweifisch/ollama)
--- a/api/client.go
+++ b/api/client.go
@@ -5,7 +5,6 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
 	"net"
@@ -15,13 +14,16 @@ import (
 	"runtime"
 	"strings"

-	"github.com/ollama/ollama/format"
-	"github.com/ollama/ollama/version"
+	"github.com/jmorganca/ollama/version"
 )

+const DefaultHost = "127.0.0.1:11434"
+
+var envHost = os.Getenv("OLLAMA_HOST")
+
 type Client struct {
 	base *url.URL
-	http *http.Client
+	http http.Client
 }

 func checkError(resp *http.Response, body []byte) error {
@@ -41,57 +43,54 @@ func checkError(resp *http.Response, body []byte) error {
 }

 func ClientFromEnvironment() (*Client, error) {
-	defaultPort := "11434"
-
 	scheme, hostport, ok := strings.Cut(os.Getenv("OLLAMA_HOST"), "://")
-	switch {
-	case !ok:
+	if !ok {
 		scheme, hostport = "http", os.Getenv("OLLAMA_HOST")
-	case scheme == "http":
-		defaultPort = "80"
-	case scheme == "https":
-		defaultPort = "443"
 	}

-	// trim trailing slashes
-	hostport = strings.TrimRight(hostport, "/")
-
 	host, port, err := net.SplitHostPort(hostport)
 	if err != nil {
-		host, port = "127.0.0.1", defaultPort
-		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
+		host, port = "127.0.0.1", "11434"
+		if ip := net.ParseIP(strings.Trim(os.Getenv("OLLAMA_HOST"), "[]")); ip != nil {
 			host = ip.String()
-		} else if hostport != "" {
-			host = hostport
 		}
 	}

-	return &Client{
+	client := Client{
 		base: &url.URL{
 			Scheme: scheme,
 			Host:   net.JoinHostPort(host, port),
 		},
-		http: http.DefaultClient,
-	}, nil
+	}
+
+	mockRequest, err := http.NewRequest("HEAD", client.base.String(), nil)
+	if err != nil {
+		return nil, err
+	}
+
+	proxyURL, err := http.ProxyFromEnvironment(mockRequest)
+	if err != nil {
+		return nil, err
+	}
+
+	client.http = http.Client{
+		Transport: &http.Transport{
+			Proxy: http.ProxyURL(proxyURL),
+		},
+	}
+
+	return &client, nil
 }

 func (c *Client) do(ctx context.Context, method, path string, reqData, respData any) error {
 	var reqBody io.Reader
 	var data []byte
 	var err error
-
-	switch reqData := reqData.(type) {
-	case io.Reader:
-		// reqData is already an io.Reader
-		reqBody = reqData
-	case nil:
-		// noop
-	default:
+	if reqData != nil {
 		data, err = json.Marshal(reqData)
 		if err != nil {
 			return err
 		}
-
 		reqBody = bytes.NewReader(data)
 	}

@@ -128,7 +127,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
 	return nil
 }

-const maxBufferSize = 512 * format.KiloByte
+const maxBufferSize = 512 * 1000 // 512KB

 func (c *Client) stream(ctx context.Context, method, path string, data any, fn func([]byte) error) error {
 	var buf *bytes.Buffer
@@ -204,19 +203,6 @@ func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn Generate
 	})
 }

-type ChatResponseFunc func(ChatResponse) error
-
-func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc) error {
-	return c.stream(ctx, http.MethodPost, "/api/chat", req, func(bts []byte) error {
-		var resp ChatResponse
-		if err := json.Unmarshal(bts, &resp); err != nil {
-			return err
-		}
-
-		return fn(resp)
-	})
-}
-
 type PullProgressFunc func(ProgressResponse) error

 func (c *Client) Pull(ctx context.Context, req *PullRequest, fn PullProgressFunc) error {
@@ -292,37 +278,3 @@ func (c *Client) Heartbeat(ctx context.Context) error {
 	}
 	return nil
 }
-func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error) {
-	var resp EmbeddingResponse
-	if err := c.do(ctx, http.MethodPost, "/api/embeddings", req, &resp); err != nil {
-		return nil, err
-	}
-	return &resp, nil
-}
-
-func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
-	if err := c.do(ctx, http.MethodHead, fmt.Sprintf("/api/blobs/%s", digest), nil, nil); err != nil {
-		var statusError StatusError
-		if !errors.As(err, &statusError) || statusError.StatusCode != http.StatusNotFound {
-			return err
-		}
-
-		if err := c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-func (c *Client) Version(ctx context.Context) (string, error) {
-	var version struct {
-		Version string `json:"version"`
-	}
-
-	if err := c.do(ctx, http.MethodGet, "/api/version", nil, &version); err != nil {
-		return "", err
-	}
-
-	return version.Version, nil
-}
--- a/api/client.py
+++ b/api/client.py
@@ -0,0 +1,225 @@
+import os
+import json
+import requests
+
+BASE_URL = os.environ.get('OLLAMA_HOST', 'http://localhost:11434')
+
+# Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses.
+# The final response object will include statistics and additional data from the request. Use the callback function to override
+# the default handler.
+def generate(model_name, prompt, system=None, template=None, context=None, options=None, callback=None):
+    try:
+        url = f"{BASE_URL}/api/generate"
+        payload = {
+            "model": model_name, 
+            "prompt": prompt, 
+            "system": system, 
+            "template": template, 
+            "context": context, 
+            "options": options
+        }
+        
+        # Remove keys with None values
+        payload = {k: v for k, v in payload.items() if v is not None}
+        
+        with requests.post(url, json=payload, stream=True) as response:
+            response.raise_for_status()
+            
+            # Creating a variable to hold the context history of the final chunk
+            final_context = None
+            
+            # Variable to hold concatenated response strings if no callback is provided
+            full_response = ""
+
+            # Iterating over the response line by line and displaying the details
+            for line in response.iter_lines():
+                if line:
+                    # Parsing each line (JSON chunk) and extracting the details
+                    chunk = json.loads(line)
+                    
+                    # If a callback function is provided, call it with the chunk
+                    if callback:
+                        callback(chunk)
+                    else:
+                        # If this is not the last chunk, add the "response" field value to full_response and print it
+                        if not chunk.get("done"):
+                            response_piece = chunk.get("response", "")
+                            full_response += response_piece
+                            print(response_piece, end="", flush=True)
+                    
+                    # Check if it's the last chunk (done is true)
+                    if chunk.get("done"):
+                        final_context = chunk.get("context")
+            
+            # Return the full response and the final context
+            return full_response, final_context
+    except requests.exceptions.RequestException as e:
+        print(f"An error occurred: {e}")
+        return None, None
+
+# Create a model from a Modelfile. Use the callback function to override the default handler.
+def create(model_name, model_path, callback=None):
+    try:
+        url = f"{BASE_URL}/api/create"
+        payload = {"name": model_name, "path": model_path}
+        
+        # Making a POST request with the stream parameter set to True to handle streaming responses
+        with requests.post(url, json=payload, stream=True) as response:
+            response.raise_for_status()
+
+            # Iterating over the response line by line and displaying the status
+            for line in response.iter_lines():
+                if line:
+                    # Parsing each line (JSON chunk) and extracting the status
+                    chunk = json.loads(line)
+
+                    if callback:
+                        callback(chunk)
+                    else:
+                        print(f"Status: {chunk.get('status')}")
+    except requests.exceptions.RequestException as e:
+        print(f"An error occurred: {e}")
+
+# Pull a model from a the model registry. Cancelled pulls are resumed from where they left off, and multiple
+# calls to will share the same download progress. Use the callback function to override the default handler.
+def pull(model_name, insecure=False, callback=None):
+    try:
+        url = f"{BASE_URL}/api/pull"
+        payload = {
+            "name": model_name,
+            "insecure": insecure
+        }
+
+        # Making a POST request with the stream parameter set to True to handle streaming responses
+        with requests.post(url, json=payload, stream=True) as response:
+            response.raise_for_status()
+
+            # Iterating over the response line by line and displaying the details
+            for line in response.iter_lines():
+                if line:
+                    # Parsing each line (JSON chunk) and extracting the details
+                    chunk = json.loads(line)
+
+                    # If a callback function is provided, call it with the chunk
+                    if callback:
+                        callback(chunk)
+                    else:
+                        # Print the status message directly to the console
+                        print(chunk.get('status', ''), end='', flush=True)
+                    
+                    # If there's layer data, you might also want to print that (adjust as necessary)
+                    if 'digest' in chunk:
+                        print(f" - Digest: {chunk['digest']}", end='', flush=True)
+                        print(f" - Total: {chunk['total']}", end='', flush=True)
+                        print(f" - Completed: {chunk['completed']}", end='\n', flush=True)
+                    else:
+                        print()
+    except requests.exceptions.RequestException as e:
+        print(f"An error occurred: {e}")
+
+# Push a model to the model registry. Use the callback function to override the default handler.
+def push(model_name, insecure=False, callback=None):
+    try:
+        url = f"{BASE_URL}/api/push"
+        payload = {
+            "name": model_name,
+            "insecure": insecure
+        }
+
+        # Making a POST request with the stream parameter set to True to handle streaming responses
+        with requests.post(url, json=payload, stream=True) as response:
+            response.raise_for_status()
+
+            # Iterating over the response line by line and displaying the details
+            for line in response.iter_lines():
+                if line:
+                    # Parsing each line (JSON chunk) and extracting the details
+                    chunk = json.loads(line)
+
+                    # If a callback function is provided, call it with the chunk
+                    if callback:
+                        callback(chunk)
+                    else:
+                        # Print the status message directly to the console
+                        print(chunk.get('status', ''), end='', flush=True)
+                    
+                    # If there's layer data, you might also want to print that (adjust as necessary)
+                    if 'digest' in chunk:
+                        print(f" - Digest: {chunk['digest']}", end='', flush=True)
+                        print(f" - Total: {chunk['total']}", end='', flush=True)
+                        print(f" - Completed: {chunk['completed']}", end='\n', flush=True)
+                    else:
+                        print()
+    except requests.exceptions.RequestException as e:
+        print(f"An error occurred: {e}")
+
+# List models that are available locally.
+def list():
+    try:
+        response = requests.get(f"{BASE_URL}/api/tags")
+        response.raise_for_status()
+        data = response.json()
+        models = data.get('models', [])
+        return models
+
+    except requests.exceptions.RequestException as e:
+        print(f"An error occurred: {e}")
+        return None
+
+# Copy a model. Creates a model with another name from an existing model.
+def copy(source, destination):
+    try:
+        # Create the JSON payload
+        payload = {
+            "source": source,
+            "destination": destination
+        }
+        
+        response = requests.post(f"{BASE_URL}/api/copy", json=payload)
+        response.raise_for_status()
+        
+        # If the request was successful, return a message indicating that the copy was successful
+        return "Copy successful"
+
+    except requests.exceptions.RequestException as e:
+        print(f"An error occurred: {e}")
+        return None
+
+# Delete a model and its data.
+def delete(model_name):
+    try:
+        url = f"{BASE_URL}/api/delete"
+        payload = {"name": model_name}
+        response = requests.delete(url, json=payload)
+        response.raise_for_status()
+        return "Delete successful"
+    except requests.exceptions.RequestException as e:
+        print(f"An error occurred: {e}")
+        return None
+
+# Show info about a model.
+def show(model_name):
+    try:
+        url = f"{BASE_URL}/api/show"
+        payload = {"name": model_name}
+        response = requests.post(url, json=payload)
+        response.raise_for_status()
+        
+        # Parse the JSON response and return it
+        data = response.json()
+        return data
+    except requests.exceptions.RequestException as e:
+        print(f"An error occurred: {e}")
+        return None
+
+def heartbeat():
+    try:
+        url = f"{BASE_URL}/"
+        response = requests.head(url)
+        response.raise_for_status()
+        return "Ollama is running"
+    except requests.exceptions.RequestException as e:
+        print(f"An error occurred: {e}")
+        return "Ollama is not running"
+
+
--- a/api/client_test.go
+++ b/api/client_test.go
@@ -1,43 +0,0 @@
-package api
-
-import "testing"
-
-func TestClientFromEnvironment(t *testing.T) {
-	type testCase struct {
-		value  string
-		expect string
-		err    error
-	}
-
-	testCases := map[string]*testCase{
-		"empty":                      {value: "", expect: "http://127.0.0.1:11434"},
-		"only address":               {value: "1.2.3.4", expect: "http://1.2.3.4:11434"},
-		"only port":                  {value: ":1234", expect: "http://:1234"},
-		"address and port":           {value: "1.2.3.4:1234", expect: "http://1.2.3.4:1234"},
-		"scheme http and address":    {value: "http://1.2.3.4", expect: "http://1.2.3.4:80"},
-		"scheme https and address":   {value: "https://1.2.3.4", expect: "https://1.2.3.4:443"},
-		"scheme, address, and port":  {value: "https://1.2.3.4:1234", expect: "https://1.2.3.4:1234"},
-		"hostname":                   {value: "example.com", expect: "http://example.com:11434"},
-		"hostname and port":          {value: "example.com:1234", expect: "http://example.com:1234"},
-		"scheme http and hostname":   {value: "http://example.com", expect: "http://example.com:80"},
-		"scheme https and hostname":  {value: "https://example.com", expect: "https://example.com:443"},
-		"scheme, hostname, and port": {value: "https://example.com:1234", expect: "https://example.com:1234"},
-		"trailing slash":             {value: "example.com/", expect: "http://example.com:11434"},
-		"trailing slash port":        {value: "example.com:1234/", expect: "http://example.com:1234"},
-	}
-
-	for k, v := range testCases {
-		t.Run(k, func(t *testing.T) {
-			t.Setenv("OLLAMA_HOST", v.value)
-
-			client, err := ClientFromEnvironment()
-			if err != v.err {
-				t.Fatalf("expected %s, got %s", v.err, err)
-			}
-
-			if client.base.String() != v.expect {
-				t.Fatalf("expected %s, got %s", v.expect, client.base.String())
-			}
-		})
-	}
-}
--- a/api/types.go
+++ b/api/types.go
@@ -3,10 +3,10 @@ package api
 import (
 	"encoding/json"
 	"fmt"
+	"log"
 	"math"
 	"os"
 	"reflect"
-	"strconv"
 	"strings"
 	"time"
 )
@@ -31,50 +31,101 @@ func (e StatusError) Error() string {
 	}
 }

-type ImageData []byte
-
 type GenerateRequest struct {
-	Model     string      `json:"model"`
-	Prompt    string      `json:"prompt"`
-	System    string      `json:"system"`
-	Template  string      `json:"template"`
-	Context   []int       `json:"context,omitempty"`
-	Stream    *bool       `json:"stream,omitempty"`
-	Raw       bool        `json:"raw,omitempty"`
-	Format    string      `json:"format"`
-	KeepAlive *Duration   `json:"keep_alive,omitempty"`
-	Images    []ImageData `json:"images,omitempty"`
+	Model    string `json:"model"`
+	Prompt   string `json:"prompt"`
+	System   string `json:"system"`
+	Template string `json:"template"`
+	Context  []int  `json:"context,omitempty"`
+	Stream   *bool  `json:"stream,omitempty"`

 	Options map[string]interface{} `json:"options"`
 }

-type ChatRequest struct {
-	Model     string    `json:"model"`
-	Messages  []Message `json:"messages"`
-	Stream    *bool     `json:"stream,omitempty"`
-	Format    string    `json:"format"`
-	KeepAlive *Duration `json:"keep_alive,omitempty"`
+type EmbeddingRequest struct {
+	Model  string `json:"model"`
+	Prompt string `json:"prompt"`

 	Options map[string]interface{} `json:"options"`
 }

-type Message struct {
-	Role    string      `json:"role"` // one of ["system", "user", "assistant"]
-	Content string      `json:"content"`
-	Images  []ImageData `json:"images,omitempty"`
+type EmbeddingResponse struct {
+	Embedding []float64 `json:"embedding"`
 }

-type ChatResponse struct {
+type CreateRequest struct {
+	Name   string `json:"name"`
+	Path   string `json:"path"`
+	Stream *bool  `json:"stream,omitempty"`
+}
+
+type DeleteRequest struct {
+	Name string `json:"name"`
+}
+
+type ShowRequest struct {
+	Name string `json:"name"`
+}
+
+type ShowResponse struct {
+	License    string `json:"license,omitempty"`
+	Modelfile  string `json:"modelfile,omitempty"`
+	Parameters string `json:"parameters,omitempty"`
+	Template   string `json:"template,omitempty"`
+	System     string `json:"system,omitempty"`
+}
+
+type CopyRequest struct {
+	Source      string `json:"source"`
+	Destination string `json:"destination"`
+}
+
+type PullRequest struct {
+	Name     string `json:"name"`
+	Insecure bool   `json:"insecure,omitempty"`
+	Username string `json:"username"`
+	Password string `json:"password"`
+	Stream   *bool  `json:"stream,omitempty"`
+}
+
+type ProgressResponse struct {
+	Status    string `json:"status"`
+	Digest    string `json:"digest,omitempty"`
+	Total     int64  `json:"total,omitempty"`
+	Completed int64  `json:"completed,omitempty"`
+}
+
+type PushRequest struct {
+	Name     string `json:"name"`
+	Insecure bool   `json:"insecure,omitempty"`
+	Username string `json:"username"`
+	Password string `json:"password"`
+	Stream   *bool  `json:"stream,omitempty"`
+}
+
+type ListResponse struct {
+	Models []ModelResponse `json:"models"`
+}
+
+type ModelResponse struct {
+	Name       string    `json:"name"`
+	ModifiedAt time.Time `json:"modified_at"`
+	Size       int64     `json:"size"`
+	Digest     string    `json:"digest"`
+}
+
+type TokenResponse struct {
+	Token string `json:"token"`
+}
+
+type GenerateResponse struct {
 	Model     string    `json:"model"`
 	CreatedAt time.Time `json:"created_at"`
-	Message   Message   `json:"message"`
+	Response  string    `json:"response"`

-	Done bool `json:"done"`
+	Done    bool  `json:"done"`
+	Context []int `json:"context,omitempty"`

-	Metrics
-}
-
-type Metrics struct {
 	TotalDuration      time.Duration `json:"total_duration,omitempty"`
 	LoadDuration       time.Duration `json:"load_duration,omitempty"`
 	PromptEvalCount    int           `json:"prompt_eval_count,omitempty"`
@@ -83,13 +134,58 @@ type Metrics struct {
 	EvalDuration       time.Duration `json:"eval_duration,omitempty"`
 }

-// Options specified in GenerateRequest, if you add a new option here add it to the API docs also
-type Options struct {
-	Runner
+func (r *GenerateResponse) Summary() {
+	if r.TotalDuration > 0 {
+		fmt.Fprintf(os.Stderr, "total duration:       %v\n", r.TotalDuration)
+	}

-	// Predict options used at runtime
-	NumKeep          int      `json:"num_keep,omitempty"`
-	Seed             int      `json:"seed,omitempty"`
+	if r.LoadDuration > 0 {
+		fmt.Fprintf(os.Stderr, "load duration:        %v\n", r.LoadDuration)
+	}
+
+	if r.PromptEvalCount > 0 {
+		fmt.Fprintf(os.Stderr, "prompt eval count:    %d token(s)\n", r.PromptEvalCount)
+	}
+
+	if r.PromptEvalDuration > 0 {
+		fmt.Fprintf(os.Stderr, "prompt eval duration: %s\n", r.PromptEvalDuration)
+		fmt.Fprintf(os.Stderr, "prompt eval rate:     %.2f tokens/s\n", float64(r.PromptEvalCount)/r.PromptEvalDuration.Seconds())
+	}
+
+	if r.EvalCount > 0 {
+		fmt.Fprintf(os.Stderr, "eval count:           %d token(s)\n", r.EvalCount)
+	}
+
+	if r.EvalDuration > 0 {
+		fmt.Fprintf(os.Stderr, "eval duration:        %s\n", r.EvalDuration)
+		fmt.Fprintf(os.Stderr, "eval rate:            %.2f tokens/s\n", float64(r.EvalCount)/r.EvalDuration.Seconds())
+	}
+}
+
+type Options struct {
+	Seed int `json:"seed,omitempty"`
+
+	// Backend options
+	UseNUMA bool `json:"numa,omitempty"`
+
+	// Model options
+	NumCtx             int     `json:"num_ctx,omitempty"`
+	NumKeep            int     `json:"num_keep,omitempty"`
+	NumBatch           int     `json:"num_batch,omitempty"`
+	NumGQA             int     `json:"num_gqa,omitempty"`
+	NumGPU             int     `json:"num_gpu,omitempty"`
+	MainGPU            int     `json:"main_gpu,omitempty"`
+	LowVRAM            bool    `json:"low_vram,omitempty"`
+	F16KV              bool    `json:"f16_kv,omitempty"`
+	LogitsAll          bool    `json:"logits_all,omitempty"`
+	VocabOnly          bool    `json:"vocab_only,omitempty"`
+	UseMMap            bool    `json:"use_mmap,omitempty"`
+	UseMLock           bool    `json:"use_mlock,omitempty"`
+	EmbeddingOnly      bool    `json:"embedding_only,omitempty"`
+	RopeFrequencyBase  float32 `json:"rope_frequency_base,omitempty"`
+	RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
+
+	// Predict options
 	NumPredict       int      `json:"num_predict,omitempty"`
 	TopK             int      `json:"top_k,omitempty"`
 	TopP             float32  `json:"top_p,omitempty"`
@@ -105,174 +201,8 @@ type Options struct {
 	MirostatEta      float32  `json:"mirostat_eta,omitempty"`
 	PenalizeNewline  bool     `json:"penalize_newline,omitempty"`
 	Stop             []string `json:"stop,omitempty"`
-}

-// Runner options which must be set when the model is loaded into memory
-type Runner struct {
-	UseNUMA            bool    `json:"numa,omitempty"`
-	NumCtx             int     `json:"num_ctx,omitempty"`
-	NumBatch           int     `json:"num_batch,omitempty"`
-	NumGQA             int     `json:"num_gqa,omitempty"`
-	NumGPU             int     `json:"num_gpu,omitempty"`
-	MainGPU            int     `json:"main_gpu,omitempty"`
-	LowVRAM            bool    `json:"low_vram,omitempty"`
-	F16KV              bool    `json:"f16_kv,omitempty"`
-	LogitsAll          bool    `json:"logits_all,omitempty"`
-	VocabOnly          bool    `json:"vocab_only,omitempty"`
-	UseMMap            bool    `json:"use_mmap,omitempty"`
-	UseMLock           bool    `json:"use_mlock,omitempty"`
-	RopeFrequencyBase  float32 `json:"rope_frequency_base,omitempty"`
-	RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
-	NumThread          int     `json:"num_thread,omitempty"`
-}
-
-type EmbeddingRequest struct {
-	Model     string    `json:"model"`
-	Prompt    string    `json:"prompt"`
-	KeepAlive *Duration `json:"keep_alive,omitempty"`
-
-	Options map[string]interface{} `json:"options"`
-}
-
-type EmbeddingResponse struct {
-	Embedding []float64 `json:"embedding"`
-}
-
-type CreateRequest struct {
-	Model     string `json:"model"`
-	Path      string `json:"path"`
-	Modelfile string `json:"modelfile"`
-	Stream    *bool  `json:"stream,omitempty"`
-
-	// Name is deprecated, see Model
-	Name string `json:"name"`
-}
-
-type DeleteRequest struct {
-	Model string `json:"model"`
-
-	// Name is deprecated, see Model
-	Name string `json:"name"`
-}
-
-type ShowRequest struct {
-	Model    string `json:"model"`
-	System   string `json:"system"`
-	Template string `json:"template"`
-
-	Options map[string]interface{} `json:"options"`
-
-	// Name is deprecated, see Model
-	Name string `json:"name"`
-}
-
-type ShowResponse struct {
-	License    string       `json:"license,omitempty"`
-	Modelfile  string       `json:"modelfile,omitempty"`
-	Parameters string       `json:"parameters,omitempty"`
-	Template   string       `json:"template,omitempty"`
-	System     string       `json:"system,omitempty"`
-	Details    ModelDetails `json:"details,omitempty"`
-	Messages   []Message    `json:"messages,omitempty"`
-}
-
-type CopyRequest struct {
-	Source      string `json:"source"`
-	Destination string `json:"destination"`
-}
-
-type PullRequest struct {
-	Model    string `json:"model"`
-	Insecure bool   `json:"insecure,omitempty"`
-	Username string `json:"username"`
-	Password string `json:"password"`
-	Stream   *bool  `json:"stream,omitempty"`
-
-	// Name is deprecated, see Model
-	Name string `json:"name"`
-}
-
-type ProgressResponse struct {
-	Status    string `json:"status"`
-	Digest    string `json:"digest,omitempty"`
-	Total     int64  `json:"total,omitempty"`
-	Completed int64  `json:"completed,omitempty"`
-}
-
-type PushRequest struct {
-	Model    string `json:"model"`
-	Insecure bool   `json:"insecure,omitempty"`
-	Username string `json:"username"`
-	Password string `json:"password"`
-	Stream   *bool  `json:"stream,omitempty"`
-
-	// Name is deprecated, see Model
-	Name string `json:"name"`
-}
-
-type ListResponse struct {
-	Models []ModelResponse `json:"models"`
-}
-
-type ModelResponse struct {
-	Name       string       `json:"name"`
-	Model      string       `json:"model"`
-	ModifiedAt time.Time    `json:"modified_at"`
-	Size       int64        `json:"size"`
-	Digest     string       `json:"digest"`
-	Details    ModelDetails `json:"details,omitempty"`
-}
-
-type TokenResponse struct {
-	Token string `json:"token"`
-}
-
-type GenerateResponse struct {
-	Model     string    `json:"model"`
-	CreatedAt time.Time `json:"created_at"`
-	Response  string    `json:"response"`
-
-	Done    bool  `json:"done"`
-	Context []int `json:"context,omitempty"`
-
-	Metrics
-}
-
-type ModelDetails struct {
-	ParentModel       string   `json:"parent_model"`
-	Format            string   `json:"format"`
-	Family            string   `json:"family"`
-	Families          []string `json:"families"`
-	ParameterSize     string   `json:"parameter_size"`
-	QuantizationLevel string   `json:"quantization_level"`
-}
-
-func (m *Metrics) Summary() {
-	if m.TotalDuration > 0 {
-		fmt.Fprintf(os.Stderr, "total duration:       %v\n", m.TotalDuration)
-	}
-
-	if m.LoadDuration > 0 {
-		fmt.Fprintf(os.Stderr, "load duration:        %v\n", m.LoadDuration)
-	}
-
-	if m.PromptEvalCount > 0 {
-		fmt.Fprintf(os.Stderr, "prompt eval count:    %d token(s)\n", m.PromptEvalCount)
-	}
-
-	if m.PromptEvalDuration > 0 {
-		fmt.Fprintf(os.Stderr, "prompt eval duration: %s\n", m.PromptEvalDuration)
-		fmt.Fprintf(os.Stderr, "prompt eval rate:     %.2f tokens/s\n", float64(m.PromptEvalCount)/m.PromptEvalDuration.Seconds())
-	}
-
-	if m.EvalCount > 0 {
-		fmt.Fprintf(os.Stderr, "eval count:           %d token(s)\n", m.EvalCount)
-	}
-
-	if m.EvalDuration > 0 {
-		fmt.Fprintf(os.Stderr, "eval duration:        %s\n", m.EvalDuration)
-		fmt.Fprintf(os.Stderr, "eval rate:            %.2f tokens/s\n", float64(m.EvalCount)/m.EvalDuration.Seconds())
-	}
+	NumThread int `json:"num_thread,omitempty"`
 }

 var ErrInvalidOpts = fmt.Errorf("invalid options")
@@ -308,39 +238,44 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 						// when JSON unmarshals numbers, it uses float64, not int
 						field.SetInt(int64(t))
 					default:
-						return fmt.Errorf("option %q must be of type integer", key)
+						log.Printf("could not convert model parameter %v of type %T to int, skipped", key, val)
 					}
 				case reflect.Bool:
 					val, ok := val.(bool)
 					if !ok {
-						return fmt.Errorf("option %q must be of type boolean", key)
+						log.Printf("could not convert model parameter %v of type %T to bool, skipped", key, val)
+						continue
 					}
 					field.SetBool(val)
 				case reflect.Float32:
 					// JSON unmarshals to float64
 					val, ok := val.(float64)
 					if !ok {
-						return fmt.Errorf("option %q must be of type float32", key)
+						log.Printf("could not convert model parameter %v of type %T to float32, skipped", key, val)
+						continue
 					}
 					field.SetFloat(val)
 				case reflect.String:
 					val, ok := val.(string)
 					if !ok {
-						return fmt.Errorf("option %q must be of type string", key)
+						log.Printf("could not convert model parameter %v of type %T to string, skipped", key, val)
+						continue
 					}
 					field.SetString(val)
 				case reflect.Slice:
 					// JSON unmarshals to []interface{}, not []string
 					val, ok := val.([]interface{})
 					if !ok {
-						return fmt.Errorf("option %q must be of type array", key)
+						log.Printf("could not convert model parameter %v of type %T to slice, skipped", key, val)
+						continue
 					}
 					// convert []interface{} to []string
 					slice := make([]string, len(val))
 					for i, item := range val {
 						str, ok := item.(string)
 						if !ok {
-							return fmt.Errorf("option %q must be of an array of strings", key)
+							log.Printf("could not convert model parameter %v of type %T to slice of strings, skipped", key, item)
+							continue
 						}
 						slice[i] = str
 					}
@@ -364,7 +299,7 @@ func DefaultOptions() Options {
 	return Options{
 		// options set on request to runner
 		NumPredict:       -1,
-		NumKeep:          0,
+		NumKeep:          -1,
 		Temperature:      0.8,
 		TopK:             40,
 		TopP:             0.9,
@@ -380,21 +315,20 @@ func DefaultOptions() Options {
 		PenalizeNewline:  true,
 		Seed:             -1,

-		Runner: Runner{
-			// options set when the model is loaded
-			NumCtx:             2048,
-			RopeFrequencyBase:  10000.0,
-			RopeFrequencyScale: 1.0,
-			NumBatch:           512,
-			NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically
-			NumGQA:             1,
-			NumThread:          0, // let the runtime decide
-			LowVRAM:            false,
-			F16KV:              true,
-			UseMLock:           false,
-			UseMMap:            true,
-			UseNUMA:            false,
-		},
+		// options set when the model is loaded
+		NumCtx:             2048,
+		RopeFrequencyBase:  10000.0,
+		RopeFrequencyScale: 1.0,
+		NumBatch:           512,
+		NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically
+		NumGQA:             1,
+		NumThread:          0, // let the runtime decide
+		LowVRAM:            false,
+		F16KV:              true,
+		UseMLock:           false,
+		UseMMap:            true,
+		UseNUMA:            false,
+		EmbeddingOnly:      true,
 	}
 }

@@ -413,79 +347,16 @@ func (d *Duration) UnmarshalJSON(b []byte) (err error) {
 	switch t := v.(type) {
 	case float64:
 		if t < 0 {
-			d.Duration = time.Duration(math.MaxInt64)
-		} else {
-			d.Duration = time.Duration(t * float64(time.Second))
+			t = math.MaxFloat64
 		}
+
+		d.Duration = time.Duration(t)
 	case string:
 		d.Duration, err = time.ParseDuration(t)
 		if err != nil {
 			return err
 		}
-		if d.Duration < 0 {
-			d.Duration = time.Duration(math.MaxInt64)
-		}
 	}

 	return nil
 }
-
-// FormatParams converts specified parameter options to their correct types
-func FormatParams(params map[string][]string) (map[string]interface{}, error) {
-	opts := Options{}
-	valueOpts := reflect.ValueOf(&opts).Elem() // names of the fields in the options struct
-	typeOpts := reflect.TypeOf(opts)           // types of the fields in the options struct
-
-	// build map of json struct tags to their types
-	jsonOpts := make(map[string]reflect.StructField)
-	for _, field := range reflect.VisibleFields(typeOpts) {
-		jsonTag := strings.Split(field.Tag.Get("json"), ",")[0]
-		if jsonTag != "" {
-			jsonOpts[jsonTag] = field
-		}
-	}
-
-	out := make(map[string]interface{})
-	// iterate params and set values based on json struct tags
-	for key, vals := range params {
-		if opt, ok := jsonOpts[key]; !ok {
-			return nil, fmt.Errorf("unknown parameter '%s'", key)
-		} else {
-			field := valueOpts.FieldByName(opt.Name)
-			if field.IsValid() && field.CanSet() {
-				switch field.Kind() {
-				case reflect.Float32:
-					floatVal, err := strconv.ParseFloat(vals[0], 32)
-					if err != nil {
-						return nil, fmt.Errorf("invalid float value %s", vals)
-					}
-
-					out[key] = float32(floatVal)
-				case reflect.Int:
-					intVal, err := strconv.ParseInt(vals[0], 10, 64)
-					if err != nil {
-						return nil, fmt.Errorf("invalid int value %s", vals)
-					}
-
-					out[key] = intVal
-				case reflect.Bool:
-					boolVal, err := strconv.ParseBool(vals[0])
-					if err != nil {
-						return nil, fmt.Errorf("invalid bool value %s", vals)
-					}
-
-					out[key] = boolVal
-				case reflect.String:
-					out[key] = vals[0]
-				case reflect.Slice:
-					// TODO: only string slices are supported right now
-					out[key] = vals
-				default:
-					return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
-				}
-			}
-		}
-	}
-
-	return out, nil
-}
--- a/api/types_test.go
+++ b/api/types_test.go
@@ -1,50 +0,0 @@
-package api
-
-import (
-	"encoding/json"
-	"math"
-	"testing"
-	"time"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-func TestKeepAliveParsingFromJSON(t *testing.T) {
-	tests := []struct {
-		name string
-		req  string
-		exp  *Duration
-	}{
-		{
-			name: "Positive Integer",
-			req:  `{ "keep_alive": 42 }`,
-			exp:  &Duration{42 * time.Second},
-		},
-		{
-			name: "Positive Integer String",
-			req:  `{ "keep_alive": "42m" }`,
-			exp:  &Duration{42 * time.Minute},
-		},
-		{
-			name: "Negative Integer",
-			req:  `{ "keep_alive": -1 }`,
-			exp:  &Duration{math.MaxInt64},
-		},
-		{
-			name: "Negative Integer String",
-			req:  `{ "keep_alive": "-1m" }`,
-			exp:  &Duration{math.MaxInt64},
-		},
-	}
-
-	for _, test := range tests {
-		t.Run(test.name, func(t *testing.T) {
-			var dec ChatRequest
-			err := json.Unmarshal([]byte(test.req), &dec)
-			require.NoError(t, err)
-
-			assert.Equal(t, test.exp, dec.KeepAlive)
-		})
-	}
-}
--- a/macapp/.eslintrc.json
+++ b/macapp/.eslintrc.json
--- a/app/.gitignore
+++ b/app/.gitignore
@@ -1 +1,92 @@
-ollama.syso
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+.DS_Store
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+
+# nyc test coverage
+.nyc_output
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# TypeScript v1 declaration files
+typings/
+
+# TypeScript cache
+*.tsbuildinfo
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variables file
+.env
+.env.test
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+
+# next.js build output
+.next
+
+# nuxt.js build output
+.nuxt
+
+# vuepress build output
+.vuepress/dist
+
+# Serverless directories
+.serverless/
+
+# FuseBox cache
+.fusebox/
+
+# DynamoDB Local files
+.dynamodb/
+
+# Webpack
+.webpack/
+
+# Vite
+.vite/
+
+# Electron-Forge
+out/
--- a/app/README.md
+++ b/app/README.md
@@ -1,22 +1,21 @@
-# Ollama App
+# Desktop

-## Linux
+This app builds upon Ollama to provide a desktop experience for running models.

-TODO
+## Developing

-## MacOS
-
-TODO
-
-## Windows
-
-If you want to build the installer, youll need to install
- https://jrsoftware.org/isinfo.php
-
-
-In the top directory of this repo, run the following powershell script
-to build the ollama CLI, ollama app, and ollama installer.
+First, build the `ollama` binary:

 ```
-powershell -ExecutionPolicy Bypass -File .\scripts\build_windows.ps1
+cd ..
+go build .
 ```
+
+Then run the desktop app with `npm start`:
+
+```
+cd app
+npm install
+npm start
+```
+
--- a/app/assets/app.ico
+++ b/app/assets/app.ico
--- a/app/assets/assets.go
+++ b/app/assets/assets.go
@@ -1,17 +0,0 @@
-package assets
-
-import (
-	"embed"
-	"io/fs"
-)
-
-//go:embed *.ico
-var icons embed.FS
-
-func ListIcons() ([]string, error) {
-	return fs.Glob(icons, "*")
-}
-
-func GetIcon(filename string) ([]byte, error) {
-	return icons.ReadFile(filename)
-}
--- a/macapp/assets/icon.icns
+++ b/macapp/assets/icon.icns
--- a/macapp/assets/iconDarkTemplate.png
+++ b/macapp/assets/iconDarkTemplate.png
--- a/macapp/assets/iconDarkTemplate@2x.png
+++ b/macapp/assets/iconDarkTemplate@2x.png
--- a/macapp/assets/iconDarkUpdateTemplate.png
+++ b/macapp/assets/iconDarkUpdateTemplate.png
--- a/macapp/assets/iconDarkUpdateTemplate@2x.png
+++ b/macapp/assets/iconDarkUpdateTemplate@2x.png
--- a/macapp/assets/iconTemplate.png
+++ b/macapp/assets/iconTemplate.png
--- a/macapp/assets/iconTemplate@2x.png
+++ b/macapp/assets/iconTemplate@2x.png
--- a/macapp/assets/iconUpdateTemplate.png
+++ b/macapp/assets/iconUpdateTemplate.png
--- a/macapp/assets/iconUpdateTemplate@2x.png
+++ b/macapp/assets/iconUpdateTemplate@2x.png
--- a/app/assets/setup.bmp
+++ b/app/assets/setup.bmp
--- a/app/assets/tray.ico
+++ b/app/assets/tray.ico
--- a/app/assets/tray_upgrade.ico
+++ b/app/assets/tray_upgrade.ico
--- a/macapp/forge.config.ts
+++ b/macapp/forge.config.ts
@@ -47,6 +47,16 @@ const config: ForgeConfig = {
  },
  rebuildConfig: {},
  makers: [new MakerSquirrel({}), new MakerZIP({}, ['darwin'])],
+  publishers: [
+    new PublisherGithub({
+      repository: {
+        name: 'ollama',
+        owner: 'jmorganca',
+      },
+      draft: false,
+      prerelease: true,
+    }),
+  ],
  hooks: {
    readPackageJson: async (_, packageJson) => {
      return { ...packageJson, version: process.env.VERSION || packageJson.version }
--- a/app/lifecycle/getstarted_nonwindows.go
+++ b/app/lifecycle/getstarted_nonwindows.go
@@ -1,9 +0,0 @@
-//go:build !windows
-
-package lifecycle
-
-import "fmt"
-
-func GetStarted() error {
-	return fmt.Errorf("GetStarted not implemented")
-}
--- a/app/lifecycle/getstarted_windows.go
+++ b/app/lifecycle/getstarted_windows.go
@@ -1,44 +0,0 @@
-package lifecycle
-
-import (
-	"fmt"
-	"log/slog"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"syscall"
-)
-
-func GetStarted() error {
-	const CREATE_NEW_CONSOLE = 0x00000010
-	var err error
-	bannerScript := filepath.Join(AppDir, "ollama_welcome.ps1")
-	args := []string{
-		// TODO once we're signed, the execution policy bypass should be removed
-		"powershell", "-noexit", "-ExecutionPolicy", "Bypass", "-nologo", "-file", bannerScript,
-	}
-	args[0], err = exec.LookPath(args[0])
-	if err != nil {
-		return err
-	}
-
-	// Make sure the script actually exists
-	_, err = os.Stat(bannerScript)
-	if err != nil {
-		return fmt.Errorf("getting started banner script error %s", err)
-	}
-
-	slog.Info(fmt.Sprintf("opening getting started terminal with %v", args))
-	attrs := &os.ProcAttr{
-		Files: []*os.File{os.Stdin, os.Stdout, os.Stderr},
-		Sys:   &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
-	}
-	proc, err := os.StartProcess(args[0], args, attrs)
-
-	if err != nil {
-		return fmt.Errorf("unable to start getting started shell %w", err)
-	}
-
-	slog.Debug(fmt.Sprintf("getting started terminal PID: %d", proc.Pid))
-	return proc.Release()
-}
--- a/app/lifecycle/lifecycle.go
+++ b/app/lifecycle/lifecycle.go
@@ -1,92 +0,0 @@
-package lifecycle
-
-import (
-	"context"
-	"fmt"
-	"log"
-	"log/slog"
-	"os"
-	"os/signal"
-	"syscall"
-
-	"github.com/ollama/ollama/app/store"
-	"github.com/ollama/ollama/app/tray"
-)
-
-func Run() {
-	InitLogging()
-
-	ctx, cancel := context.WithCancel(context.Background())
-	var done chan int
-
-	t, err := tray.NewTray()
-	if err != nil {
-		log.Fatalf("Failed to start: %s", err)
-	}
-	callbacks := t.GetCallbacks()
-
-	signals := make(chan os.Signal, 1)
-	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
-
-	go func() {
-		slog.Debug("starting callback loop")
-		for {
-			select {
-			case <-callbacks.Quit:
-				slog.Debug("quit called")
-				t.Quit()
-			case <-signals:
-				slog.Debug("shutting down due to signal")
-				t.Quit()
-			case <-callbacks.Update:
-				err := DoUpgrade(cancel, done)
-				if err != nil {
-					slog.Warn(fmt.Sprintf("upgrade attempt failed: %s", err))
-				}
-			case <-callbacks.ShowLogs:
-				ShowLogs()
-			case <-callbacks.DoFirstUse:
-				err := GetStarted()
-				if err != nil {
-					slog.Warn(fmt.Sprintf("Failed to launch getting started shell: %s", err))
-				}
-			}
-		}
-	}()
-
-	// Are we first use?
-	if !store.GetFirstTimeRun() {
-		slog.Debug("First time run")
-		err = t.DisplayFirstUseNotification()
-		if err != nil {
-			slog.Debug(fmt.Sprintf("XXX failed to display first use notification %v", err))
-		}
-		store.SetFirstTimeRun(true)
-	} else {
-		slog.Debug("Not first time, skipping first run notification")
-	}
-
-	if IsServerRunning(ctx) {
-		slog.Info("Detected another instance of ollama running, exiting")
-		os.Exit(1)
-	} else {
-		done, err = SpawnServer(ctx, CLIName)
-		if err != nil {
-			// TODO - should we retry in a backoff loop?
-			// TODO - should we pop up a warning and maybe add a menu item to view application logs?
-			slog.Error(fmt.Sprintf("Failed to spawn ollama server %s", err))
-			done = make(chan int, 1)
-			done <- 1
-		}
-	}
-
-	StartBackgroundUpdaterChecker(ctx, t.UpdateAvailable)
-
-	t.Run()
-	cancel()
-	slog.Info("Waiting for ollama server to shutdown...")
-	if done != nil {
-		<-done
-	}
-	slog.Info("Ollama app exiting")
-}
--- a/app/lifecycle/logging.go
+++ b/app/lifecycle/logging.go
@@ -1,46 +0,0 @@
-package lifecycle
-
-import (
-	"fmt"
-	"log/slog"
-	"os"
-	"path/filepath"
-)
-
-func InitLogging() {
-	level := slog.LevelInfo
-
-	if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
-		level = slog.LevelDebug
-	}
-
-	var logFile *os.File
-	var err error
-	// Detect if we're a GUI app on windows, and if not, send logs to console
-	if os.Stderr.Fd() != 0 {
-		// Console app detected
-		logFile = os.Stderr
-		// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
-	} else {
-		logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
-		if err != nil {
-			slog.Error(fmt.Sprintf("failed to create server log %v", err))
-			return
-		}
-	}
-	handler := slog.NewTextHandler(logFile, &slog.HandlerOptions{
-		Level:     level,
-		AddSource: true,
-		ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
-			if attr.Key == slog.SourceKey {
-				source := attr.Value.Any().(*slog.Source)
-				source.File = filepath.Base(source.File)
-			}
-			return attr
-		},
-	})
-
-	slog.SetDefault(slog.New(handler))
-
-	slog.Info("ollama app started")
-}
--- a/app/lifecycle/logging_nonwindows.go
+++ b/app/lifecycle/logging_nonwindows.go
@@ -1,9 +0,0 @@
-//go:build !windows
-
-package lifecycle
-
-import "log/slog"
-
-func ShowLogs() {
-	slog.Warn("ShowLogs not yet implemented")
-}
--- a/app/lifecycle/logging_windows.go
+++ b/app/lifecycle/logging_windows.go
@@ -1,19 +0,0 @@
-package lifecycle
-
-import (
-	"fmt"
-	"log/slog"
-	"os/exec"
-	"syscall"
-)
-
-func ShowLogs() {
-	cmd_path := "c:\\Windows\\system32\\cmd.exe"
-	slog.Debug(fmt.Sprintf("viewing logs with start %s", AppDataDir))
-	cmd := exec.Command(cmd_path, "/c", "start", AppDataDir)
-	cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: false, CreationFlags: 0x08000000}
-	err := cmd.Start()
-	if err != nil {
-		slog.Error(fmt.Sprintf("Failed to open log dir: %s", err))
-	}
-}
--- a/app/lifecycle/paths.go
+++ b/app/lifecycle/paths.go
@@ -1,79 +0,0 @@
-package lifecycle
-
-import (
-	"errors"
-	"fmt"
-	"log/slog"
-	"os"
-	"path/filepath"
-	"runtime"
-	"strings"
-)
-
-var (
-	AppName    = "ollama app"
-	CLIName    = "ollama"
-	AppDir     = "/opt/Ollama"
-	AppDataDir = "/opt/Ollama"
-	// TODO - should there be a distinct log dir?
-	UpdateStageDir = "/tmp"
-	AppLogFile     = "/tmp/ollama_app.log"
-	ServerLogFile  = "/tmp/ollama.log"
-	UpgradeLogFile = "/tmp/ollama_update.log"
-	Installer      = "OllamaSetup.exe"
-)
-
-func init() {
-	if runtime.GOOS == "windows" {
-		AppName += ".exe"
-		CLIName += ".exe"
-		// Logs, configs, downloads go to LOCALAPPDATA
-		localAppData := os.Getenv("LOCALAPPDATA")
-		AppDataDir = filepath.Join(localAppData, "Ollama")
-		UpdateStageDir = filepath.Join(AppDataDir, "updates")
-		AppLogFile = filepath.Join(AppDataDir, "app.log")
-		ServerLogFile = filepath.Join(AppDataDir, "server.log")
-		UpgradeLogFile = filepath.Join(AppDataDir, "upgrade.log")
-
-		// Executables are stored in APPDATA
-		AppDir = filepath.Join(localAppData, "Programs", "Ollama")
-
-		// Make sure we have PATH set correctly for any spawned children
-		paths := strings.Split(os.Getenv("PATH"), ";")
-		// Start with whatever we find in the PATH/LD_LIBRARY_PATH
-		found := false
-		for _, path := range paths {
-			d, err := filepath.Abs(path)
-			if err != nil {
-				continue
-			}
-			if strings.EqualFold(AppDir, d) {
-				found = true
-			}
-		}
-		if !found {
-			paths = append(paths, AppDir)
-
-			pathVal := strings.Join(paths, ";")
-			slog.Debug("setting PATH=" + pathVal)
-			err := os.Setenv("PATH", pathVal)
-			if err != nil {
-				slog.Error(fmt.Sprintf("failed to update PATH: %s", err))
-			}
-		}
-
-		// Make sure our logging dir exists
-		_, err := os.Stat(AppDataDir)
-		if errors.Is(err, os.ErrNotExist) {
-			if err := os.MkdirAll(AppDataDir, 0o755); err != nil {
-				slog.Error(fmt.Sprintf("create ollama dir %s: %v", AppDataDir, err))
-			}
-		}
-
-	} else if runtime.GOOS == "darwin" {
-		// TODO
-		AppName += ".app"
-		// } else if runtime.GOOS == "linux" {
-		// TODO
-	}
-}
--- a/app/lifecycle/server.go
+++ b/app/lifecycle/server.go
@@ -1,139 +0,0 @@
-package lifecycle
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"io"
-	"log/slog"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"time"
-
-	"github.com/ollama/ollama/api"
-)
-
-func getCLIFullPath(command string) string {
-	cmdPath := ""
-	appExe, err := os.Executable()
-	if err == nil {
-		cmdPath = filepath.Join(filepath.Dir(appExe), command)
-		_, err := os.Stat(cmdPath)
-		if err == nil {
-			return cmdPath
-		}
-	}
-	cmdPath, err = exec.LookPath(command)
-	if err == nil {
-		_, err := os.Stat(cmdPath)
-		if err == nil {
-			return cmdPath
-		}
-	}
-	pwd, err := os.Getwd()
-	if err == nil {
-		cmdPath = filepath.Join(pwd, command)
-		_, err = os.Stat(cmdPath)
-		if err == nil {
-			return cmdPath
-		}
-	}
-
-	return command
-}
-
-func SpawnServer(ctx context.Context, command string) (chan int, error) {
-	done := make(chan int)
-
-	logDir := filepath.Dir(ServerLogFile)
-	_, err := os.Stat(logDir)
-	if errors.Is(err, os.ErrNotExist) {
-		if err := os.MkdirAll(logDir, 0o755); err != nil {
-			return done, fmt.Errorf("create ollama server log dir %s: %v", logDir, err)
-		}
-	}
-
-	cmd := getCmd(ctx, getCLIFullPath(command))
-	// send stdout and stderr to a file
-	stdout, err := cmd.StdoutPipe()
-	if err != nil {
-		return done, fmt.Errorf("failed to spawn server stdout pipe %s", err)
-	}
-	stderr, err := cmd.StderrPipe()
-	if err != nil {
-		return done, fmt.Errorf("failed to spawn server stderr pipe %s", err)
-	}
-	stdin, err := cmd.StdinPipe()
-	if err != nil {
-		return done, fmt.Errorf("failed to spawn server stdin pipe %s", err)
-	}
-
-	// TODO - rotation
-	logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
-	if err != nil {
-		return done, fmt.Errorf("failed to create server log %w", err)
-	}
-	go func() {
-		defer logFile.Close()
-		io.Copy(logFile, stdout) //nolint:errcheck
-	}()
-	go func() {
-		defer logFile.Close()
-		io.Copy(logFile, stderr) //nolint:errcheck
-	}()
-
-	// run the command and wait for it to finish
-	if err := cmd.Start(); err != nil {
-		return done, fmt.Errorf("failed to start server %w", err)
-	}
-	if cmd.Process != nil {
-		slog.Info(fmt.Sprintf("started ollama server with pid %d", cmd.Process.Pid))
-	}
-	slog.Info(fmt.Sprintf("ollama server logs %s", ServerLogFile))
-
-	go func() {
-		// Keep the server running unless we're shuttind down the app
-		crashCount := 0
-		for {
-			cmd.Wait() //nolint:errcheck
-			stdin.Close()
-			var code int
-			if cmd.ProcessState != nil {
-				code = cmd.ProcessState.ExitCode()
-			}
-
-			select {
-			case <-ctx.Done():
-				slog.Debug(fmt.Sprintf("server shutdown with exit code %d", code))
-				done <- code
-				return
-			default:
-				crashCount++
-				slog.Warn(fmt.Sprintf("server crash %d - exit code %d - respawning", crashCount, code))
-				time.Sleep(500 * time.Millisecond)
-				if err := cmd.Start(); err != nil {
-					slog.Error(fmt.Sprintf("failed to restart server %s", err))
-					// Keep trying, but back off if we keep failing
-					time.Sleep(time.Duration(crashCount) * time.Second)
-				}
-			}
-		}
-	}()
-	return done, nil
-}
-
-func IsServerRunning(ctx context.Context) bool {
-	client, err := api.ClientFromEnvironment()
-	if err != nil {
-		slog.Info("unable to connect to server")
-		return false
-	}
-	err = client.Heartbeat(ctx)
-	if err != nil {
-		slog.Debug(fmt.Sprintf("heartbeat from server: %s", err))
-		slog.Info("unable to connect to server")
-		return false
-	}
-	return true
-}
--- a/app/lifecycle/server_unix.go
+++ b/app/lifecycle/server_unix.go
@@ -1,12 +0,0 @@
-//go:build !windows
-
-package lifecycle
-
-import (
-	"context"
-	"os/exec"
-)
-
-func getCmd(ctx context.Context, cmd string) *exec.Cmd {
-	return exec.CommandContext(ctx, cmd, "serve")
-}
--- a/app/lifecycle/server_windows.go
+++ b/app/lifecycle/server_windows.go
@@ -1,13 +0,0 @@
-package lifecycle
-
-import (
-	"context"
-	"os/exec"
-	"syscall"
-)
-
-func getCmd(ctx context.Context, exePath string) *exec.Cmd {
-	cmd := exec.CommandContext(ctx, exePath, "serve")
-	cmd.SysProcAttr = &syscall.SysProcAttr{HideWindow: true, CreationFlags: 0x08000000}
-	return cmd
-}
--- a/app/lifecycle/updater.go
+++ b/app/lifecycle/updater.go
@@ -1,228 +0,0 @@
-package lifecycle
-
-import (
-	"context"
-	"crypto/rand"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io"
-	"log/slog"
-	"mime"
-	"net/http"
-	"net/url"
-	"os"
-	"path"
-	"path/filepath"
-	"runtime"
-	"strings"
-	"time"
-
-	"github.com/ollama/ollama/auth"
-	"github.com/ollama/ollama/version"
-)
-
-var (
-	UpdateCheckURLBase  = "https://ollama.com/api/update"
-	UpdateDownloaded    = false
-	UpdateCheckInterval = 60 * 60 * time.Second
-)
-
-// TODO - maybe move up to the API package?
-type UpdateResponse struct {
-	UpdateURL     string `json:"url"`
-	UpdateVersion string `json:"version"`
-}
-
-func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
-	var updateResp UpdateResponse
-
-	requestURL, err := url.Parse(UpdateCheckURLBase)
-	if err != nil {
-		return false, updateResp
-	}
-
-	query := requestURL.Query()
-	query.Add("os", runtime.GOOS)
-	query.Add("arch", runtime.GOARCH)
-	query.Add("version", version.Version)
-	query.Add("ts", fmt.Sprintf("%d", time.Now().Unix()))
-
-	nonce, err := auth.NewNonce(rand.Reader, 16)
-	if err != nil {
-		return false, updateResp
-	}
-
-	query.Add("nonce", nonce)
-	requestURL.RawQuery = query.Encode()
-
-	data := []byte(fmt.Sprintf("%s,%s", http.MethodGet, requestURL.RequestURI()))
-	signature, err := auth.Sign(ctx, data)
-	if err != nil {
-		return false, updateResp
-	}
-
-	req, err := http.NewRequestWithContext(ctx, http.MethodGet, requestURL.String(), nil)
-	if err != nil {
-		slog.Warn(fmt.Sprintf("failed to check for update: %s", err))
-		return false, updateResp
-	}
-	req.Header.Set("Authorization", signature)
-	req.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
-
-	slog.Debug("checking for available update", "requestURL", requestURL)
-	resp, err := http.DefaultClient.Do(req)
-	if err != nil {
-		slog.Warn(fmt.Sprintf("failed to check for update: %s", err))
-		return false, updateResp
-	}
-	defer resp.Body.Close()
-
-	if resp.StatusCode == 204 {
-		slog.Debug("check update response 204 (current version is up to date)")
-		return false, updateResp
-	}
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		slog.Warn(fmt.Sprintf("failed to read body response: %s", err))
-	}
-
-	if resp.StatusCode != 200 {
-		slog.Info(fmt.Sprintf("check update error %d - %.96s", resp.StatusCode, string(body)))
-		return false, updateResp
-	}
-	err = json.Unmarshal(body, &updateResp)
-	if err != nil {
-		slog.Warn(fmt.Sprintf("malformed response checking for update: %s", err))
-		return false, updateResp
-	}
-	// Extract the version string from the URL in the github release artifact path
-	updateResp.UpdateVersion = path.Base(path.Dir(updateResp.UpdateURL))
-
-	slog.Info("New update available at " + updateResp.UpdateURL)
-	return true, updateResp
-}
-
-func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
-	// Do a head first to check etag info
-	req, err := http.NewRequestWithContext(ctx, http.MethodHead, updateResp.UpdateURL, nil)
-	if err != nil {
-		return err
-	}
-
-	resp, err := http.DefaultClient.Do(req)
-	if err != nil {
-		return fmt.Errorf("error checking update: %w", err)
-	}
-	if resp.StatusCode != 200 {
-		return fmt.Errorf("unexpected status attempting to download update %d", resp.StatusCode)
-	}
-	resp.Body.Close()
-	etag := strings.Trim(resp.Header.Get("etag"), "\"")
-	if etag == "" {
-		slog.Debug("no etag detected, falling back to filename based dedup")
-		etag = "_"
-	}
-	filename := Installer
-	_, params, err := mime.ParseMediaType(resp.Header.Get("content-disposition"))
-	if err == nil {
-		filename = params["filename"]
-	}
-
-	stageFilename := filepath.Join(UpdateStageDir, etag, filename)
-
-	// Check to see if we already have it downloaded
-	_, err = os.Stat(stageFilename)
-	if err == nil {
-		slog.Info("update already downloaded")
-		return nil
-	}
-
-	cleanupOldDownloads()
-
-	req.Method = http.MethodGet
-	resp, err = http.DefaultClient.Do(req)
-	if err != nil {
-		return fmt.Errorf("error checking update: %w", err)
-	}
-	defer resp.Body.Close()
-	etag = strings.Trim(resp.Header.Get("etag"), "\"")
-	if etag == "" {
-		slog.Debug("no etag detected, falling back to filename based dedup") // TODO probably can get rid of this redundant log
-		etag = "_"
-	}
-
-	stageFilename = filepath.Join(UpdateStageDir, etag, filename)
-
-	_, err = os.Stat(filepath.Dir(stageFilename))
-	if errors.Is(err, os.ErrNotExist) {
-		if err := os.MkdirAll(filepath.Dir(stageFilename), 0o755); err != nil {
-			return fmt.Errorf("create ollama dir %s: %v", filepath.Dir(stageFilename), err)
-		}
-	}
-
-	payload, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return fmt.Errorf("failed to read body response: %w", err)
-	}
-	fp, err := os.OpenFile(stageFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
-	if err != nil {
-		return fmt.Errorf("write payload %s: %w", stageFilename, err)
-	}
-	defer fp.Close()
-	if n, err := fp.Write(payload); err != nil || n != len(payload) {
-		return fmt.Errorf("write payload %s: %d vs %d -- %w", stageFilename, n, len(payload), err)
-	}
-	slog.Info("new update downloaded " + stageFilename)
-
-	UpdateDownloaded = true
-	return nil
-}
-
-func cleanupOldDownloads() {
-	files, err := os.ReadDir(UpdateStageDir)
-	if err != nil && errors.Is(err, os.ErrNotExist) {
-		// Expected behavior on first run
-		return
-	} else if err != nil {
-		slog.Warn(fmt.Sprintf("failed to list stage dir: %s", err))
-		return
-	}
-	for _, file := range files {
-		fullname := filepath.Join(UpdateStageDir, file.Name())
-		slog.Debug("cleaning up old download: " + fullname)
-		err = os.RemoveAll(fullname)
-		if err != nil {
-			slog.Warn(fmt.Sprintf("failed to cleanup stale update download %s", err))
-		}
-	}
-}
-
-func StartBackgroundUpdaterChecker(ctx context.Context, cb func(string) error) {
-	go func() {
-		// Don't blast an update message immediately after startup
-		// time.Sleep(30 * time.Second)
-		time.Sleep(3 * time.Second)
-
-		for {
-			available, resp := IsNewReleaseAvailable(ctx)
-			if available {
-				err := DownloadNewRelease(ctx, resp)
-				if err != nil {
-					slog.Error(fmt.Sprintf("failed to download new release: %s", err))
-				}
-				err = cb(resp.UpdateVersion)
-				if err != nil {
-					slog.Warn(fmt.Sprintf("failed to register update available with tray: %s", err))
-				}
-			}
-			select {
-			case <-ctx.Done():
-				slog.Debug("stopping background update checker")
-				return
-			default:
-				time.Sleep(UpdateCheckInterval)
-			}
-		}
-	}()
-}
--- a/app/lifecycle/updater_nonwindows.go
+++ b/app/lifecycle/updater_nonwindows.go
@@ -1,12 +0,0 @@
-//go:build !windows
-
-package lifecycle
-
-import (
-	"context"
-	"fmt"
-)
-
-func DoUpgrade(cancel context.CancelFunc, done chan int) error {
-	return fmt.Errorf("DoUpgrade not yet implemented")
-}
--- a/app/lifecycle/updater_windows.go
+++ b/app/lifecycle/updater_windows.go
@@ -1,80 +0,0 @@
-package lifecycle
-
-import (
-	"context"
-	"fmt"
-	"log/slog"
-	"os"
-	"os/exec"
-	"path/filepath"
-)
-
-func DoUpgrade(cancel context.CancelFunc, done chan int) error {
-	files, err := filepath.Glob(filepath.Join(UpdateStageDir, "*", "*.exe")) // TODO generalize for multiplatform
-	if err != nil {
-		return fmt.Errorf("failed to lookup downloads: %s", err)
-	}
-	if len(files) == 0 {
-		return fmt.Errorf("no update downloads found")
-	} else if len(files) > 1 {
-		// Shouldn't happen
-		slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
-	}
-	installerExe := files[0]
-
-	slog.Info("starting upgrade with " + installerExe)
-	slog.Info("upgrade log file " + UpgradeLogFile)
-
-	// When running in debug mode, we'll be "verbose" and let the installer pop up and prompt
-	installArgs := []string{
-		"/CLOSEAPPLICATIONS",                    // Quit the tray app if it's still running
-		"/LOG=" + filepath.Base(UpgradeLogFile), // Only relative seems reliable, so set pwd
-		"/FORCECLOSEAPPLICATIONS",               // Force close the tray app - might be needed
-	}
-	// When we're not in debug mode, make the upgrade as quiet as possible (no GUI, no prompts)
-	// TODO - temporarily disable since we're pinning in debug mode for the preview
-	// if debug := os.Getenv("OLLAMA_DEBUG"); debug == "" {
-	installArgs = append(installArgs,
-		"/SP", // Skip the "This will install... Do you wish to continue" prompt
-		"/SUPPRESSMSGBOXES",
-		"/SILENT",
-		"/VERYSILENT",
-	)
-	// }
-
-	// Safeguard in case we have requests in flight that need to drain...
-	slog.Info("Waiting for server to shutdown")
-	cancel()
-	if done != nil {
-		<-done
-	} else {
-		// Shouldn't happen
-		slog.Warn("done chan was nil, not actually waiting")
-	}
-
-	slog.Debug(fmt.Sprintf("starting installer: %s %v", installerExe, installArgs))
-	os.Chdir(filepath.Dir(UpgradeLogFile)) //nolint:errcheck
-	cmd := exec.Command(installerExe, installArgs...)
-
-	if err := cmd.Start(); err != nil {
-		return fmt.Errorf("unable to start ollama app %w", err)
-	}
-
-	if cmd.Process != nil {
-		err = cmd.Process.Release()
-		if err != nil {
-			slog.Error(fmt.Sprintf("failed to release server process: %s", err))
-		}
-	} else {
-		// TODO - some details about why it didn't start, or is this a pedantic error case?
-		return fmt.Errorf("installer process did not start")
-	}
-
-	// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
-
-	slog.Info("Installer started in background, exiting")
-
-	os.Exit(0)
-	// Not reached
-	return nil
-}
--- a/app/main.go
+++ b/app/main.go
@@ -1,12 +0,0 @@
-package main
-
-// Compile with the following to get rid of the cmd pop up on windows
-// go build -ldflags="-H windowsgui" .
-
-import (
-	"github.com/ollama/ollama/app/lifecycle"
-)
-
-func main() {
-	lifecycle.Run()
-}
--- a/app/ollama.iss
+++ b/app/ollama.iss
@@ -1,159 +0,0 @@
-; Inno Setup Installer for Ollama
-;
-; To build the installer use the build script invoked from the top of the source tree
-; 
-; powershell -ExecutionPolicy Bypass -File .\scripts\build_windows.ps
-
-
-#define MyAppName "Ollama"
-#if GetEnv("PKG_VERSION") != ""
-  #define MyAppVersion GetEnv("PKG_VERSION")
-#else
-  #define MyAppVersion "0.0.0"
-#endif
-#define MyAppPublisher "Ollama"
-#define MyAppURL "https://ollama.com/"
-#define MyAppExeName "ollama app.exe"
-#define MyIcon ".\assets\app.ico"
-
-[Setup]
-; NOTE: The value of AppId uniquely identifies this application. Do not use the same AppId value in installers for other applications.
-; (To generate a new GUID, click Tools | Generate GUID inside the IDE.)
-AppId={{44E83376-CE68-45EB-8FC1-393500EB558C}
-AppName={#MyAppName}
-AppVersion={#MyAppVersion}
-VersionInfoVersion={#MyAppVersion}
-;AppVerName={#MyAppName} {#MyAppVersion}
-AppPublisher={#MyAppPublisher}
-AppPublisherURL={#MyAppURL}
-AppSupportURL={#MyAppURL}
-AppUpdatesURL={#MyAppURL}
-ArchitecturesAllowed=x64 arm64
-ArchitecturesInstallIn64BitMode=x64 arm64
-DefaultDirName={localappdata}\Programs\{#MyAppName}
-DefaultGroupName={#MyAppName}
-DisableProgramGroupPage=yes
-PrivilegesRequired=lowest
-OutputBaseFilename="OllamaSetup"
-SetupIconFile={#MyIcon}
-UninstallDisplayIcon={uninstallexe}
-Compression=lzma2
-SolidCompression=no
-WizardStyle=modern
-ChangesEnvironment=yes
-OutputDir=..\dist\
-
-; Disable logging once everything's battle tested
-; Filename will be %TEMP%\Setup Log*.txt
-SetupLogging=yes
-CloseApplications=yes
-RestartApplications=no
-
-; https://jrsoftware.org/ishelp/index.php?topic=setup_wizardimagefile
-WizardSmallImageFile=.\assets\setup.bmp
-
-; TODO verifty actual min windows version...
-; OG Win 10
-MinVersion=10.0.10240
-
-; First release that supports WinRT UI Composition for win32 apps
-; MinVersion=10.0.17134
-; First release with XAML Islands - possible UI path forward
-; MinVersion=10.0.18362
-
-; quiet...
-DisableDirPage=yes
-DisableFinishedPage=yes
-DisableReadyMemo=yes
-DisableReadyPage=yes
-DisableStartupPrompt=yes
-DisableWelcomePage=yes
-
-; TODO - percentage can't be set less than 100, so how to make it shorter?
-; WizardSizePercent=100,80
-
-#if GetEnv("KEY_CONTAINER")
-SignTool=MySignTool
-SignedUninstaller=yes
-#endif
-
-SetupMutex=OllamaSetupMutex
-
-[Languages]
-Name: "english"; MessagesFile: "compiler:Default.isl"
-
-[LangOptions]
-DialogFontSize=12
-
-[Files]
-Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
-Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
-Source: "..\dist\windeps\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
-Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
-Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
-; Assumes v5.7, may need adjustments for v6
-#if GetEnv("HIP_PATH") != ""
-  Source: "{#GetEnv('HIP_PATH')}\bin\hipblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
-  Source: "{#GetEnv('HIP_PATH')}\bin\rocblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
-  ; amdhip64.dll dependency comes from the driver and must be installed already
-  Source: "{#GetEnv('HIP_PATH')}\bin\rocblas\library\*"; DestDir: "{app}\rocm\rocblas\library\"; Flags: ignoreversion
-#endif
-
-
-[Icons]
-Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
-Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
-Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
-
-[Run]
-Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
-
-[UninstallRun]
-; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
-; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ollama.exe /f /t"; Flags: runhidden
-Filename: "taskkill"; Parameters: "/im ""{#MyAppExeName}"" /f /t"; Flags: runhidden
-Filename: "taskkill"; Parameters: "/im ""ollama.exe"" /f /t"; Flags: runhidden
-; HACK!  need to give the server and app enough time to exit
-; TODO - convert this to a Pascal code script so it waits until they're no longer running, then completes
-Filename: "{cmd}"; Parameters: "/c timeout 5"; Flags: runhidden
-
-[UninstallDelete]
-Type: filesandordirs; Name: "{%TEMP}\ollama*"
-Type: filesandordirs; Name: "{%LOCALAPPDATA}\Ollama"
-Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
-Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\models"
-Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\history"
-; NOTE: if the user has a custom OLLAMA_MODELS it will be preserved
-
-[Messages]
-WizardReady=Ollama Windows Preview
-ReadyLabel1=%nLet's get you up and running with your own large language models.
-SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or finish the other installer, then click OK to continue with this install, or Cancel to exit.
-
-
-;FinishedHeadingLabel=Run your first model
-;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama2
-;ClickFinish=%n
-
-[Registry]
-Root: HKCU; Subkey: "Environment"; \
-    ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \
-    Check: NeedsAddPath('{app}')
-
-[Code]
-
-function NeedsAddPath(Param: string): boolean;
-var
-  OrigPath: string;
-begin
-  if not RegQueryStringValue(HKEY_CURRENT_USER,
-    'Environment',
-    'Path', OrigPath)
-  then begin
-    Result := True;
-    exit;
-  end;
-  { look for the path with leading and trailing semicolon }
-  { Pos() returns 0 if not found }
-  Result := Pos(';' + ExpandConstant(Param) + ';', ';' + OrigPath + ';') = 0;
-end;
--- a/app/ollama.rc
+++ b/app/ollama.rc
@@ -1,29 +0,0 @@
-#include <winver.h>
-
-VS_VERSION_INFO VERSIONINFO
- FILEFLAGSMASK 0x3fL
-#ifdef _DEBUG
- FILEFLAGS 0x1L
-#else
- FILEFLAGS 0x0L
-#endif
- FILEOS 0x40004L
- FILETYPE 0x1L
- FILESUBTYPE 0x0L
-BEGIN
-    BLOCK "StringFileInfo"
-    BEGIN
-        BLOCK "040904b0"
-        BEGIN
-            VALUE "FileDescription", "Ollama"
-            VALUE "InternalName", "Ollama"
-            VALUE "OriginalFilename", "ollama app.exe"
-            VALUE "ProductName", "Ollama"
-        END
-    END
-
-    BLOCK "VarFileInfo"
-    BEGIN
-        VALUE "Translation", 0x409, 1200
-    END
-END
--- a/app/ollama_welcome.ps1
+++ b/app/ollama_welcome.ps1
@@ -1,8 +0,0 @@
-# TODO - consider ANSI colors and maybe ASCII art...
-write-host ""
-write-host "Welcome to Ollama!"
-write-host ""
-write-host "Run your first model:"
-write-host ""
-write-host "`tollama run llama2"
-write-host ""
--- a/macapp/package-lock.json
+++ b/macapp/package-lock.json
--- a/macapp/package.json
+++ b/macapp/package.json
@@ -46,7 +46,7 @@
    "chmodr": "^1.2.0",
    "copy-webpack-plugin": "^11.0.0",
    "css-loader": "^6.8.1",
-    "electron": "25.9.2",
+    "electron": "25.2.0",
    "eslint": "^8.43.0",
    "eslint-plugin-import": "^2.27.5",
    "fork-ts-checker-webpack-plugin": "^7.3.0",
--- a/macapp/postcss.config.js
+++ b/macapp/postcss.config.js
--- a/macapp/src/app.css
+++ b/macapp/src/app.css
--- a/macapp/src/app.tsx
+++ b/macapp/src/app.tsx
--- a/macapp/src/declarations.d.ts
+++ b/macapp/src/declarations.d.ts
--- a/macapp/src/index.html
+++ b/macapp/src/index.html
--- a/macapp/src/index.ts
+++ b/macapp/src/index.ts
@@ -162,56 +162,13 @@ app.on('before-quit', () => {
  }
 })

-const updateURL = `https://ollama.ai/api/update?os=${process.platform}&arch=${
-  process.arch
-}&version=${app.getVersion()}&id=${id()}`
-
-let latest = ''
-async function isNewReleaseAvailable() {
-  try {
-    const response = await fetch(updateURL)
-
-    if (!response.ok) {
-      return false
-    }
-
-    if (response.status === 204) {
-      return false
-    }
-
-    const data = await response.json()
-
-    const url = data?.url
-    if (!url) {
-      return false
-    }
-
-    if (latest === url) {
-      return false
-    }
-
-    latest = url
-
-    return true
-  } catch (error) {
-    logger.error(`update check failed - ${error}`)
-    return false
-  }
-}
-
-async function checkUpdate() {
-  const available = await isNewReleaseAvailable()
-  if (available) {
-    logger.info('checking for update')
-    autoUpdater.checkForUpdates()
-  }
-}
-
 function init() {
  if (app.isPackaged) {
-    checkUpdate()
+    autoUpdater.checkForUpdates()
    setInterval(() => {
-      checkUpdate()
+      if (!updateAvailable) {
+        autoUpdater.checkForUpdates()
+      }
    }, 60 * 60 * 1000)
  }

@@ -289,7 +246,11 @@ function id(): string {
  return uuid
 }

-autoUpdater.setFeedURL({ url: updateURL })
+autoUpdater.setFeedURL({
+  url: `https://ollama.ai/api/update?os=${process.platform}&arch=${
+    process.arch
+  }&version=${app.getVersion()}&id=${id()}`,
+})

 autoUpdater.on('error', e => {
  logger.error(`update check failed - ${e.message}`)
--- a/macapp/src/install.ts
+++ b/macapp/src/install.ts
--- a/macapp/src/ollama.svg
+++ b/macapp/src/ollama.svg
--- a/macapp/src/preload.ts
+++ b/macapp/src/preload.ts
--- a/macapp/src/renderer.tsx
+++ b/macapp/src/renderer.tsx
--- a/app/store/store.go
+++ b/app/store/store.go
@@ -1,98 +0,0 @@
-package store
-
-import (
-	"encoding/json"
-	"errors"
-	"fmt"
-	"log/slog"
-	"os"
-	"path/filepath"
-	"sync"
-
-	"github.com/google/uuid"
-)
-
-type Store struct {
-	ID           string `json:"id"`
-	FirstTimeRun bool   `json:"first-time-run"`
-}
-
-var (
-	lock  sync.Mutex
-	store Store
-)
-
-func GetID() string {
-	lock.Lock()
-	defer lock.Unlock()
-	if store.ID == "" {
-		initStore()
-	}
-	return store.ID
-
-}
-
-func GetFirstTimeRun() bool {
-	lock.Lock()
-	defer lock.Unlock()
-	if store.ID == "" {
-		initStore()
-	}
-	return store.FirstTimeRun
-}
-
-func SetFirstTimeRun(val bool) {
-	lock.Lock()
-	defer lock.Unlock()
-	if store.FirstTimeRun == val {
-		return
-	}
-	store.FirstTimeRun = val
-	writeStore(getStorePath())
-}
-
-// lock must be held
-func initStore() {
-	storeFile, err := os.Open(getStorePath())
-	if err == nil {
-		defer storeFile.Close()
-		err = json.NewDecoder(storeFile).Decode(&store)
-		if err == nil {
-			slog.Debug(fmt.Sprintf("loaded existing store %s - ID: %s", getStorePath(), store.ID))
-			return
-		}
-	} else if !errors.Is(err, os.ErrNotExist) {
-		slog.Debug(fmt.Sprintf("unexpected error searching for store: %s", err))
-	}
-	slog.Debug("initializing new store")
-	store.ID = uuid.New().String()
-	writeStore(getStorePath())
-}
-
-func writeStore(storeFilename string) {
-	ollamaDir := filepath.Dir(storeFilename)
-	_, err := os.Stat(ollamaDir)
-	if errors.Is(err, os.ErrNotExist) {
-		if err := os.MkdirAll(ollamaDir, 0o755); err != nil {
-			slog.Error(fmt.Sprintf("create ollama dir %s: %v", ollamaDir, err))
-			return
-		}
-	}
-	payload, err := json.Marshal(store)
-	if err != nil {
-		slog.Error(fmt.Sprintf("failed to marshal store: %s", err))
-		return
-	}
-	fp, err := os.OpenFile(storeFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
-	if err != nil {
-		slog.Error(fmt.Sprintf("write store payload %s: %v", storeFilename, err))
-		return
-	}
-	defer fp.Close()
-	if n, err := fp.Write(payload); err != nil || n != len(payload) {
-		slog.Error(fmt.Sprintf("write store payload %s: %d vs %d -- %v", storeFilename, n, len(payload), err))
-		return
-	}
-	slog.Debug("Store contents: " + string(payload))
-	slog.Info(fmt.Sprintf("wrote store: %s", storeFilename))
-}
--- a/app/store/store_darwin.go
+++ b/app/store/store_darwin.go
@@ -1,13 +0,0 @@
-package store
-
-import (
-	"os"
-	"path/filepath"
-)
-
-func getStorePath() string {
-	// TODO - system wide location?
-
-	home := os.Getenv("HOME")
-	return filepath.Join(home, "Library", "Application Support", "Ollama", "config.json")
-}
--- a/app/store/store_linux.go
+++ b/app/store/store_linux.go
@@ -1,16 +0,0 @@
-package store
-
-import (
-	"os"
-	"path/filepath"
-)
-
-func getStorePath() string {
-	if os.Geteuid() == 0 {
-		// TODO where should we store this on linux for system-wide operation?
-		return "/etc/ollama/config.json"
-	}
-
-	home := os.Getenv("HOME")
-	return filepath.Join(home, ".ollama", "config.json")
-}
--- a/app/store/store_windows.go
+++ b/app/store/store_windows.go
@@ -1,11 +0,0 @@
-package store
-
-import (
-	"os"
-	"path/filepath"
-)
-
-func getStorePath() string {
-	localAppData := os.Getenv("LOCALAPPDATA")
-	return filepath.Join(localAppData, "Ollama", "config.json")
-}
--- a/macapp/tailwind.config.js
+++ b/macapp/tailwind.config.js
--- a/app/tray/commontray/types.go
+++ b/app/tray/commontray/types.go
@@ -1,24 +0,0 @@
-package commontray
-
-var (
-	Title   = "Ollama"
-	ToolTip = "Ollama"
-
-	UpdateIconName = "tray_upgrade"
-	IconName       = "tray"
-)
-
-type Callbacks struct {
-	Quit       chan struct{}
-	Update     chan struct{}
-	DoFirstUse chan struct{}
-	ShowLogs   chan struct{}
-}
-
-type OllamaTray interface {
-	GetCallbacks() Callbacks
-	Run()
-	UpdateAvailable(ver string) error
-	DisplayFirstUseNotification() error
-	Quit()
-}
--- a/app/tray/tray.go
+++ b/app/tray/tray.go
@@ -1,33 +0,0 @@
-package tray
-
-import (
-	"fmt"
-	"runtime"
-
-	"github.com/ollama/ollama/app/assets"
-	"github.com/ollama/ollama/app/tray/commontray"
-)
-
-func NewTray() (commontray.OllamaTray, error) {
-	extension := ".png"
-	if runtime.GOOS == "windows" {
-		extension = ".ico"
-	}
-	iconName := commontray.UpdateIconName + extension
-	updateIcon, err := assets.GetIcon(iconName)
-	if err != nil {
-		return nil, fmt.Errorf("failed to load icon %s: %w", iconName, err)
-	}
-	iconName = commontray.IconName + extension
-	icon, err := assets.GetIcon(iconName)
-	if err != nil {
-		return nil, fmt.Errorf("failed to load icon %s: %w", iconName, err)
-	}
-
-	tray, err := InitPlatformTray(icon, updateIcon)
-	if err != nil {
-		return nil, err
-	}
-
-	return tray, nil
-}
--- a/app/tray/tray_nonwindows.go
+++ b/app/tray/tray_nonwindows.go
@@ -1,13 +0,0 @@
-//go:build !windows
-
-package tray
-
-import (
-	"fmt"
-
-	"github.com/ollama/ollama/app/tray/commontray"
-)
-
-func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
-	return nil, fmt.Errorf("NOT IMPLEMENTED YET")
-}
--- a/app/tray/tray_windows.go
+++ b/app/tray/tray_windows.go
@@ -1,10 +0,0 @@
-package tray
-
-import (
-	"github.com/ollama/ollama/app/tray/commontray"
-	"github.com/ollama/ollama/app/tray/wintray"
-)
-
-func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
-	return wintray.InitTray(icon, updateIcon)
-}
--- a/app/tray/wintray/eventloop.go
+++ b/app/tray/wintray/eventloop.go
@@ -1,184 +0,0 @@
-//go:build windows
-
-package wintray
-
-import (
-	"fmt"
-	"log/slog"
-	"sync"
-	"unsafe"
-
-	"golang.org/x/sys/windows"
-)
-
-var (
-	quitOnce sync.Once
-)
-
-func (t *winTray) Run() {
-	nativeLoop()
-}
-
-func nativeLoop() {
-	// Main message pump.
-	slog.Debug("starting event handling loop")
-	m := &struct {
-		WindowHandle windows.Handle
-		Message      uint32
-		Wparam       uintptr
-		Lparam       uintptr
-		Time         uint32
-		Pt           point
-		LPrivate     uint32
-	}{}
-	for {
-		ret, _, err := pGetMessage.Call(uintptr(unsafe.Pointer(m)), 0, 0, 0)
-
-		// If the function retrieves a message other than WM_QUIT, the return value is nonzero.
-		// If the function retrieves the WM_QUIT message, the return value is zero.
-		// If there is an error, the return value is -1
-		// https://msdn.microsoft.com/en-us/library/windows/desktop/ms644936(v=vs.85).aspx
-		switch int32(ret) {
-		case -1:
-			slog.Error(fmt.Sprintf("get message failure: %v", err))
-			return
-		case 0:
-			return
-		default:
-			pTranslateMessage.Call(uintptr(unsafe.Pointer(m))) //nolint:errcheck
-			pDispatchMessage.Call(uintptr(unsafe.Pointer(m)))  //nolint:errcheck
-
-		}
-	}
-}
-
-// WindowProc callback function that processes messages sent to a window.
-// https://msdn.microsoft.com/en-us/library/windows/desktop/ms633573(v=vs.85).aspx
-func (t *winTray) wndProc(hWnd windows.Handle, message uint32, wParam, lParam uintptr) (lResult uintptr) {
-	const (
-		WM_RBUTTONUP   = 0x0205
-		WM_LBUTTONUP   = 0x0202
-		WM_COMMAND     = 0x0111
-		WM_ENDSESSION  = 0x0016
-		WM_CLOSE       = 0x0010
-		WM_DESTROY     = 0x0002
-		WM_MOUSEMOVE   = 0x0200
-		WM_LBUTTONDOWN = 0x0201
-	)
-	switch message {
-	case WM_COMMAND:
-		menuItemId := int32(wParam)
-		// https://docs.microsoft.com/en-us/windows/win32/menurc/wm-command#menus
-		switch menuItemId {
-		case quitMenuID:
-			select {
-			case t.callbacks.Quit <- struct{}{}:
-			// should not happen but in case not listening
-			default:
-				slog.Error("no listener on Quit")
-			}
-		case updateMenuID:
-			select {
-			case t.callbacks.Update <- struct{}{}:
-			// should not happen but in case not listening
-			default:
-				slog.Error("no listener on Update")
-			}
-		case diagLogsMenuID:
-			select {
-			case t.callbacks.ShowLogs <- struct{}{}:
-			// should not happen but in case not listening
-			default:
-				slog.Error("no listener on ShowLogs")
-			}
-		default:
-			slog.Debug(fmt.Sprintf("Unexpected menu item id: %d", menuItemId))
-		}
-	case WM_CLOSE:
-		boolRet, _, err := pDestroyWindow.Call(uintptr(t.window))
-		if boolRet == 0 {
-			slog.Error(fmt.Sprintf("failed to destroy window: %s", err))
-		}
-		err = t.wcex.unregister()
-		if err != nil {
-			slog.Error(fmt.Sprintf("failed to uregister windo %s", err))
-		}
-	case WM_DESTROY:
-		// same as WM_ENDSESSION, but throws 0 exit code after all
-		defer pPostQuitMessage.Call(uintptr(int32(0))) //nolint:errcheck
-		fallthrough
-	case WM_ENDSESSION:
-		t.muNID.Lock()
-		if t.nid != nil {
-			err := t.nid.delete()
-			if err != nil {
-				slog.Error(fmt.Sprintf("failed to delete nid: %s", err))
-			}
-		}
-		t.muNID.Unlock()
-	case t.wmSystrayMessage:
-		switch lParam {
-		case WM_MOUSEMOVE, WM_LBUTTONDOWN:
-			// Ignore these...
-		case WM_RBUTTONUP, WM_LBUTTONUP:
-			err := t.showMenu()
-			if err != nil {
-				slog.Error(fmt.Sprintf("failed to show menu: %s", err))
-			}
-		case 0x405: // TODO - how is this magic value derived for the notification left click
-			if t.pendingUpdate {
-				select {
-				case t.callbacks.Update <- struct{}{}:
-				// should not happen but in case not listening
-				default:
-					slog.Error("no listener on Update")
-				}
-			} else {
-				select {
-				case t.callbacks.DoFirstUse <- struct{}{}:
-				// should not happen but in case not listening
-				default:
-					slog.Error("no listener on DoFirstUse")
-				}
-			}
-		case 0x404: // Middle click or close notification
-			// slog.Debug("doing nothing on close of first time notification")
-		default:
-			// 0x402 also seems common - what is it?
-			slog.Debug(fmt.Sprintf("unmanaged app message, lParm: 0x%x", lParam))
-		}
-	case t.wmTaskbarCreated: // on explorer.exe restarts
-		t.muNID.Lock()
-		err := t.nid.add()
-		if err != nil {
-			slog.Error(fmt.Sprintf("failed to refresh the taskbar on explorer restart: %s", err))
-		}
-		t.muNID.Unlock()
-	default:
-		// Calls the default window procedure to provide default processing for any window messages that an application does not process.
-		// https://msdn.microsoft.com/en-us/library/windows/desktop/ms633572(v=vs.85).aspx
-		lResult, _, _ = pDefWindowProc.Call(
-			uintptr(hWnd),
-			uintptr(message),
-			uintptr(wParam),
-			uintptr(lParam),
-		)
-	}
-	return
-}
-
-func (t *winTray) Quit() {
-	quitOnce.Do(quit)
-}
-
-func quit() {
-	boolRet, _, err := pPostMessage.Call(
-		uintptr(wt.window),
-		WM_CLOSE,
-		0,
-		0,
-	)
-	if boolRet == 0 {
-		slog.Error(fmt.Sprintf("failed to post close message on shutdown %s", err))
-	}
-}
--- a/app/tray/wintray/menus.go
+++ b/app/tray/wintray/menus.go
@@ -1,71 +0,0 @@
-//go:build windows
-
-package wintray
-
-import (
-	"fmt"
-	"log/slog"
-	"unsafe"
-
-	"golang.org/x/sys/windows"
-)
-
-const (
-	updatAvailableMenuID = 1
-	updateMenuID         = updatAvailableMenuID + 1
-	separatorMenuID      = updateMenuID + 1
-	diagLogsMenuID       = separatorMenuID + 1
-	diagSeparatorMenuID  = diagLogsMenuID + 1
-	quitMenuID           = diagSeparatorMenuID + 1
-)
-
-func (t *winTray) initMenus() error {
-	if err := t.addOrUpdateMenuItem(diagLogsMenuID, 0, diagLogsMenuTitle, false); err != nil {
-		return fmt.Errorf("unable to create menu entries %w\n", err)
-	}
-	if err := t.addSeparatorMenuItem(diagSeparatorMenuID, 0); err != nil {
-		return fmt.Errorf("unable to create menu entries %w", err)
-	}
-	if err := t.addOrUpdateMenuItem(quitMenuID, 0, quitMenuTitle, false); err != nil {
-		return fmt.Errorf("unable to create menu entries %w\n", err)
-	}
-	return nil
-}
-
-func (t *winTray) UpdateAvailable(ver string) error {
-	if !t.updateNotified {
-		slog.Debug("updating menu and sending notification for new update")
-		if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
-			return fmt.Errorf("unable to create menu entries %w", err)
-		}
-		if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
-			return fmt.Errorf("unable to create menu entries %w", err)
-		}
-		if err := t.addSeparatorMenuItem(separatorMenuID, 0); err != nil {
-			return fmt.Errorf("unable to create menu entries %w", err)
-		}
-		iconFilePath, err := iconBytesToFilePath(wt.updateIcon)
-		if err != nil {
-			return fmt.Errorf("unable to write icon data to temp file: %w", err)
-		}
-		if err := wt.setIcon(iconFilePath); err != nil {
-			return fmt.Errorf("unable to set icon: %w", err)
-		}
-		t.updateNotified = true
-
-		t.pendingUpdate = true
-		// Now pop up the notification
-		t.muNID.Lock()
-		defer t.muNID.Unlock()
-		copy(t.nid.InfoTitle[:], windows.StringToUTF16(updateTitle))
-		copy(t.nid.Info[:], windows.StringToUTF16(fmt.Sprintf(updateMessage, ver)))
-		t.nid.Flags |= NIF_INFO
-		t.nid.Timeout = 10
-		t.nid.Size = uint32(unsafe.Sizeof(*wt.nid))
-		err = t.nid.modify()
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}
--- a/app/tray/wintray/messages.go
+++ b/app/tray/wintray/messages.go
@@ -1,15 +0,0 @@
-//go:build windows
-
-package wintray
-
-const (
-	firstTimeTitle   = "Ollama is running"
-	firstTimeMessage = "Click here to get started"
-	updateTitle      = "Update available"
-	updateMessage    = "Ollama version %s is ready to install"
-
-	quitMenuTitle            = "Quit Ollama"
-	updateAvailableMenuTitle = "An update is available"
-	updateMenutTitle         = "Restart to update"
-	diagLogsMenuTitle        = "View logs"
-)
--- a/app/tray/wintray/notifyicon.go
+++ b/app/tray/wintray/notifyicon.go
@@ -1,66 +0,0 @@
-//go:build windows
-
-package wintray
-
-import (
-	"unsafe"
-
-	"golang.org/x/sys/windows"
-)
-
-// Contains information that the system needs to display notifications in the notification area.
-// Used by Shell_NotifyIcon.
-// https://msdn.microsoft.com/en-us/library/windows/desktop/bb773352(v=vs.85).aspx
-// https://msdn.microsoft.com/en-us/library/windows/desktop/bb762159
-type notifyIconData struct {
-	Size                       uint32
-	Wnd                        windows.Handle
-	ID, Flags, CallbackMessage uint32
-	Icon                       windows.Handle
-	Tip                        [128]uint16
-	State, StateMask           uint32
-	Info                       [256]uint16
-	// Timeout, Version           uint32
-	Timeout uint32
-
-	InfoTitle   [64]uint16
-	InfoFlags   uint32
-	GuidItem    windows.GUID
-	BalloonIcon windows.Handle
-}
-
-func (nid *notifyIconData) add() error {
-	const NIM_ADD = 0x00000000
-	res, _, err := pShellNotifyIcon.Call(
-		uintptr(NIM_ADD),
-		uintptr(unsafe.Pointer(nid)),
-	)
-	if res == 0 {
-		return err
-	}
-	return nil
-}
-
-func (nid *notifyIconData) modify() error {
-	const NIM_MODIFY = 0x00000001
-	res, _, err := pShellNotifyIcon.Call(
-		uintptr(NIM_MODIFY),
-		uintptr(unsafe.Pointer(nid)),
-	)
-	if res == 0 {
-		return err
-	}
-	return nil
-}
-
-func (nid *notifyIconData) delete() error {
-	const NIM_DELETE = 0x00000002
-	res, _, err := pShellNotifyIcon.Call(
-		uintptr(NIM_DELETE),
-		uintptr(unsafe.Pointer(nid)),
-	)
-	if res == 0 {
-		return err
-	}
-	return nil
-}
--- a/app/tray/wintray/tray.go
+++ b/app/tray/wintray/tray.go
@@ -1,485 +0,0 @@
-//go:build windows
-
-package wintray
-
-import (
-	"crypto/md5"
-	"encoding/hex"
-	"fmt"
-	"log/slog"
-	"os"
-	"path/filepath"
-	"sort"
-	"sync"
-	"unsafe"
-
-	"github.com/ollama/ollama/app/tray/commontray"
-	"golang.org/x/sys/windows"
-)
-
-// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
-
-// Contains information about loaded resources
-type winTray struct {
-	instance,
-	icon,
-	cursor,
-	window windows.Handle
-
-	loadedImages   map[string]windows.Handle
-	muLoadedImages sync.RWMutex
-
-	// menus keeps track of the submenus keyed by the menu item ID, plus 0
-	// which corresponds to the main popup menu.
-	menus    map[uint32]windows.Handle
-	muMenus  sync.RWMutex
-	menuOf   map[uint32]windows.Handle
-	muMenuOf sync.RWMutex
-	// menuItemIcons maintains the bitmap of each menu item (if applies). It's
-	// needed to show the icon correctly when showing a previously hidden menu
-	// item again.
-	// menuItemIcons   map[uint32]windows.Handle
-	// muMenuItemIcons sync.RWMutex
-	visibleItems   map[uint32][]uint32
-	muVisibleItems sync.RWMutex
-
-	nid   *notifyIconData
-	muNID sync.RWMutex
-	wcex  *wndClassEx
-
-	wmSystrayMessage,
-	wmTaskbarCreated uint32
-
-	pendingUpdate  bool
-	updateNotified bool // Only pop up the notification once - TODO consider daily nag?
-	// Callbacks
-	callbacks  commontray.Callbacks
-	normalIcon []byte
-	updateIcon []byte
-}
-
-var wt winTray
-
-func (t *winTray) GetCallbacks() commontray.Callbacks {
-	return t.callbacks
-}
-
-func InitTray(icon, updateIcon []byte) (*winTray, error) {
-	wt.callbacks.Quit = make(chan struct{})
-	wt.callbacks.Update = make(chan struct{})
-	wt.callbacks.ShowLogs = make(chan struct{})
-	wt.callbacks.DoFirstUse = make(chan struct{})
-	wt.normalIcon = icon
-	wt.updateIcon = updateIcon
-	if err := wt.initInstance(); err != nil {
-		return nil, fmt.Errorf("Unable to init instance: %w\n", err)
-	}
-
-	if err := wt.createMenu(); err != nil {
-		return nil, fmt.Errorf("Unable to create menu: %w\n", err)
-	}
-
-	iconFilePath, err := iconBytesToFilePath(wt.normalIcon)
-	if err != nil {
-		return nil, fmt.Errorf("Unable to write icon data to temp file: %w", err)
-	}
-	if err := wt.setIcon(iconFilePath); err != nil {
-		return nil, fmt.Errorf("Unable to set icon: %w", err)
-	}
-
-	return &wt, wt.initMenus()
-}
-
-func (t *winTray) initInstance() error {
-	const (
-		className  = "OllamaClass"
-		windowName = ""
-	)
-
-	t.wmSystrayMessage = WM_USER + 1
-	t.visibleItems = make(map[uint32][]uint32)
-	t.menus = make(map[uint32]windows.Handle)
-	t.menuOf = make(map[uint32]windows.Handle)
-
-	t.loadedImages = make(map[string]windows.Handle)
-
-	taskbarEventNamePtr, _ := windows.UTF16PtrFromString("TaskbarCreated")
-	// https://msdn.microsoft.com/en-us/library/windows/desktop/ms644947
-	res, _, err := pRegisterWindowMessage.Call(
-		uintptr(unsafe.Pointer(taskbarEventNamePtr)),
-	)
-	if res == 0 { // success 0xc000-0xfff
-		return fmt.Errorf("failed to register window: %w", err)
-	}
-	t.wmTaskbarCreated = uint32(res)
-
-	instanceHandle, _, err := pGetModuleHandle.Call(0)
-	if instanceHandle == 0 {
-		return err
-	}
-	t.instance = windows.Handle(instanceHandle)
-
-	// https://msdn.microsoft.com/en-us/library/windows/desktop/ms648072(v=vs.85).aspx
-	iconHandle, _, err := pLoadIcon.Call(0, uintptr(IDI_APPLICATION))
-	if iconHandle == 0 {
-		return err
-	}
-	t.icon = windows.Handle(iconHandle)
-
-	// https://msdn.microsoft.com/en-us/library/windows/desktop/ms648391(v=vs.85).aspx
-	cursorHandle, _, err := pLoadCursor.Call(0, uintptr(IDC_ARROW))
-	if cursorHandle == 0 {
-		return err
-	}
-	t.cursor = windows.Handle(cursorHandle)
-
-	classNamePtr, err := windows.UTF16PtrFromString(className)
-	if err != nil {
-		return err
-	}
-
-	windowNamePtr, err := windows.UTF16PtrFromString(windowName)
-	if err != nil {
-		return err
-	}
-
-	t.wcex = &wndClassEx{
-		Style:      CS_HREDRAW | CS_VREDRAW,
-		WndProc:    windows.NewCallback(t.wndProc),
-		Instance:   t.instance,
-		Icon:       t.icon,
-		Cursor:     t.cursor,
-		Background: windows.Handle(6), // (COLOR_WINDOW + 1)
-		ClassName:  classNamePtr,
-		IconSm:     t.icon,
-	}
-	if err := t.wcex.register(); err != nil {
-		return err
-	}
-
-	windowHandle, _, err := pCreateWindowEx.Call(
-		uintptr(0),
-		uintptr(unsafe.Pointer(classNamePtr)),
-		uintptr(unsafe.Pointer(windowNamePtr)),
-		uintptr(WS_OVERLAPPEDWINDOW),
-		uintptr(CW_USEDEFAULT),
-		uintptr(CW_USEDEFAULT),
-		uintptr(CW_USEDEFAULT),
-		uintptr(CW_USEDEFAULT),
-		uintptr(0),
-		uintptr(0),
-		uintptr(t.instance),
-		uintptr(0),
-	)
-	if windowHandle == 0 {
-		return err
-	}
-	t.window = windows.Handle(windowHandle)
-
-	pShowWindow.Call(uintptr(t.window), uintptr(SW_HIDE)) //nolint:errcheck
-
-	boolRet, _, err := pUpdateWindow.Call(uintptr(t.window))
-	if boolRet == 0 {
-		slog.Error(fmt.Sprintf("failed to update window: %s", err))
-	}
-
-	t.muNID.Lock()
-	defer t.muNID.Unlock()
-	t.nid = &notifyIconData{
-		Wnd:             windows.Handle(t.window),
-		ID:              100,
-		Flags:           NIF_MESSAGE,
-		CallbackMessage: t.wmSystrayMessage,
-	}
-	t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
-
-	return t.nid.add()
-}
-
-func (t *winTray) createMenu() error {
-
-	menuHandle, _, err := pCreatePopupMenu.Call()
-	if menuHandle == 0 {
-		return err
-	}
-	t.menus[0] = windows.Handle(menuHandle)
-
-	// https://msdn.microsoft.com/en-us/library/windows/desktop/ms647575(v=vs.85).aspx
-	mi := struct {
-		Size, Mask, Style, Max uint32
-		Background             windows.Handle
-		ContextHelpID          uint32
-		MenuData               uintptr
-	}{
-		Mask: MIM_APPLYTOSUBMENUS,
-	}
-	mi.Size = uint32(unsafe.Sizeof(mi))
-
-	res, _, err := pSetMenuInfo.Call(
-		uintptr(t.menus[0]),
-		uintptr(unsafe.Pointer(&mi)),
-	)
-	if res == 0 {
-		return err
-	}
-	return nil
-}
-
-// Contains information about a menu item.
-// https://msdn.microsoft.com/en-us/library/windows/desktop/ms647578(v=vs.85).aspx
-type menuItemInfo struct {
-	Size, Mask, Type, State     uint32
-	ID                          uint32
-	SubMenu, Checked, Unchecked windows.Handle
-	ItemData                    uintptr
-	TypeData                    *uint16
-	Cch                         uint32
-	BMPItem                     windows.Handle
-}
-
-func (t *winTray) addOrUpdateMenuItem(menuItemId uint32, parentId uint32, title string, disabled bool) error {
-	titlePtr, err := windows.UTF16PtrFromString(title)
-	if err != nil {
-		return err
-	}
-
-	mi := menuItemInfo{
-		Mask:     MIIM_FTYPE | MIIM_STRING | MIIM_ID | MIIM_STATE,
-		Type:     MFT_STRING,
-		ID:       uint32(menuItemId),
-		TypeData: titlePtr,
-		Cch:      uint32(len(title)),
-	}
-	mi.Size = uint32(unsafe.Sizeof(mi))
-	if disabled {
-		mi.State |= MFS_DISABLED
-	}
-
-	var res uintptr
-	t.muMenus.RLock()
-	menu := t.menus[parentId]
-	t.muMenus.RUnlock()
-	if t.getVisibleItemIndex(parentId, menuItemId) != -1 {
-		// We set the menu item info based on the menuID
-		boolRet, _, err := pSetMenuItemInfo.Call(
-			uintptr(menu),
-			uintptr(menuItemId),
-			0,
-			uintptr(unsafe.Pointer(&mi)),
-		)
-		if boolRet == 0 {
-			return fmt.Errorf("failed to set menu item: %w", err)
-		}
-	}
-
-	if res == 0 {
-		// Menu item does not already exist, create it
-		t.muMenus.RLock()
-		submenu, exists := t.menus[menuItemId]
-		t.muMenus.RUnlock()
-		if exists {
-			mi.Mask |= MIIM_SUBMENU
-			mi.SubMenu = submenu
-		}
-		t.addToVisibleItems(parentId, menuItemId)
-		position := t.getVisibleItemIndex(parentId, menuItemId)
-		res, _, err = pInsertMenuItem.Call(
-			uintptr(menu),
-			uintptr(position),
-			1,
-			uintptr(unsafe.Pointer(&mi)),
-		)
-		if res == 0 {
-			t.delFromVisibleItems(parentId, menuItemId)
-			return err
-		}
-		t.muMenuOf.Lock()
-		t.menuOf[menuItemId] = menu
-		t.muMenuOf.Unlock()
-	}
-
-	return nil
-}
-
-func (t *winTray) addSeparatorMenuItem(menuItemId, parentId uint32) error {
-
-	mi := menuItemInfo{
-		Mask: MIIM_FTYPE | MIIM_ID | MIIM_STATE,
-		Type: MFT_SEPARATOR,
-		ID:   uint32(menuItemId),
-	}
-
-	mi.Size = uint32(unsafe.Sizeof(mi))
-
-	t.addToVisibleItems(parentId, menuItemId)
-	position := t.getVisibleItemIndex(parentId, menuItemId)
-	t.muMenus.RLock()
-	menu := uintptr(t.menus[parentId])
-	t.muMenus.RUnlock()
-	res, _, err := pInsertMenuItem.Call(
-		menu,
-		uintptr(position),
-		1,
-		uintptr(unsafe.Pointer(&mi)),
-	)
-	if res == 0 {
-		return err
-	}
-
-	return nil
-}
-
-// func (t *winTray) hideMenuItem(menuItemId, parentId uint32) error {
-// 	const ERROR_SUCCESS syscall.Errno = 0
-
-// 	t.muMenus.RLock()
-// 	menu := uintptr(t.menus[parentId])
-// 	t.muMenus.RUnlock()
-// 	res, _, err := pRemoveMenu.Call(
-// 		menu,
-// 		uintptr(menuItemId),
-// 		MF_BYCOMMAND,
-// 	)
-// 	if res == 0 && err.(syscall.Errno) != ERROR_SUCCESS {
-// 		return err
-// 	}
-// 	t.delFromVisibleItems(parentId, menuItemId)
-
-// 	return nil
-// }
-
-func (t *winTray) showMenu() error {
-	p := point{}
-	boolRet, _, err := pGetCursorPos.Call(uintptr(unsafe.Pointer(&p)))
-	if boolRet == 0 {
-		return err
-	}
-	boolRet, _, err = pSetForegroundWindow.Call(uintptr(t.window))
-	if boolRet == 0 {
-		slog.Warn(fmt.Sprintf("failed to bring menu to foreground: %s", err))
-	}
-
-	boolRet, _, err = pTrackPopupMenu.Call(
-		uintptr(t.menus[0]),
-		TPM_BOTTOMALIGN|TPM_LEFTALIGN,
-		uintptr(p.X),
-		uintptr(p.Y),
-		0,
-		uintptr(t.window),
-		0,
-	)
-	if boolRet == 0 {
-		return err
-	}
-
-	return nil
-}
-
-func (t *winTray) delFromVisibleItems(parent, val uint32) {
-	t.muVisibleItems.Lock()
-	defer t.muVisibleItems.Unlock()
-	visibleItems := t.visibleItems[parent]
-	for i, itemval := range visibleItems {
-		if val == itemval {
-			t.visibleItems[parent] = append(visibleItems[:i], visibleItems[i+1:]...)
-			break
-		}
-	}
-}
-
-func (t *winTray) addToVisibleItems(parent, val uint32) {
-	t.muVisibleItems.Lock()
-	defer t.muVisibleItems.Unlock()
-	if visibleItems, exists := t.visibleItems[parent]; !exists {
-		t.visibleItems[parent] = []uint32{val}
-	} else {
-		newvisible := append(visibleItems, val)
-		sort.Slice(newvisible, func(i, j int) bool { return newvisible[i] < newvisible[j] })
-		t.visibleItems[parent] = newvisible
-	}
-}
-
-func (t *winTray) getVisibleItemIndex(parent, val uint32) int {
-	t.muVisibleItems.RLock()
-	defer t.muVisibleItems.RUnlock()
-	for i, itemval := range t.visibleItems[parent] {
-		if val == itemval {
-			return i
-		}
-	}
-	return -1
-}
-
-func iconBytesToFilePath(iconBytes []byte) (string, error) {
-	bh := md5.Sum(iconBytes)
-	dataHash := hex.EncodeToString(bh[:])
-	iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
-
-	if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
-		if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil {
-			return "", err
-		}
-	}
-	return iconFilePath, nil
-}
-
-// Loads an image from file and shows it in tray.
-// Shell_NotifyIcon: https://msdn.microsoft.com/en-us/library/windows/desktop/bb762159(v=vs.85).aspx
-func (t *winTray) setIcon(src string) error {
-
-	h, err := t.loadIconFrom(src)
-	if err != nil {
-		return err
-	}
-
-	t.muNID.Lock()
-	defer t.muNID.Unlock()
-	t.nid.Icon = h
-	t.nid.Flags |= NIF_ICON
-	t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
-
-	return t.nid.modify()
-}
-
-// Loads an image from file to be shown in tray or menu item.
-// LoadImage: https://msdn.microsoft.com/en-us/library/windows/desktop/ms648045(v=vs.85).aspx
-func (t *winTray) loadIconFrom(src string) (windows.Handle, error) {
-
-	// Save and reuse handles of loaded images
-	t.muLoadedImages.RLock()
-	h, ok := t.loadedImages[src]
-	t.muLoadedImages.RUnlock()
-	if !ok {
-		srcPtr, err := windows.UTF16PtrFromString(src)
-		if err != nil {
-			return 0, err
-		}
-		res, _, err := pLoadImage.Call(
-			0,
-			uintptr(unsafe.Pointer(srcPtr)),
-			IMAGE_ICON,
-			0,
-			0,
-			LR_LOADFROMFILE|LR_DEFAULTSIZE,
-		)
-		if res == 0 {
-			return 0, err
-		}
-		h = windows.Handle(res)
-		t.muLoadedImages.Lock()
-		t.loadedImages[src] = h
-		t.muLoadedImages.Unlock()
-	}
-	return h, nil
-}
-
-func (t *winTray) DisplayFirstUseNotification() error {
-	t.muNID.Lock()
-	defer t.muNID.Unlock()
-	copy(t.nid.InfoTitle[:], windows.StringToUTF16(firstTimeTitle))
-	copy(t.nid.Info[:], windows.StringToUTF16(firstTimeMessage))
-	t.nid.Flags |= NIF_INFO
-	t.nid.Size = uint32(unsafe.Sizeof(*wt.nid))
-
-	return t.nid.modify()
-}
--- a/app/tray/wintray/w32api.go
+++ b/app/tray/wintray/w32api.go
@@ -1,89 +0,0 @@
-//go:build windows
-
-package wintray
-
-import (
-	"runtime"
-
-	"golang.org/x/sys/windows"
-)
-
-var (
-	k32 = windows.NewLazySystemDLL("Kernel32.dll")
-	u32 = windows.NewLazySystemDLL("User32.dll")
-	s32 = windows.NewLazySystemDLL("Shell32.dll")
-
-	pCreatePopupMenu       = u32.NewProc("CreatePopupMenu")
-	pCreateWindowEx        = u32.NewProc("CreateWindowExW")
-	pDefWindowProc         = u32.NewProc("DefWindowProcW")
-	pDestroyWindow         = u32.NewProc("DestroyWindow")
-	pDispatchMessage       = u32.NewProc("DispatchMessageW")
-	pGetCursorPos          = u32.NewProc("GetCursorPos")
-	pGetMessage            = u32.NewProc("GetMessageW")
-	pGetModuleHandle       = k32.NewProc("GetModuleHandleW")
-	pInsertMenuItem        = u32.NewProc("InsertMenuItemW")
-	pLoadCursor            = u32.NewProc("LoadCursorW")
-	pLoadIcon              = u32.NewProc("LoadIconW")
-	pLoadImage             = u32.NewProc("LoadImageW")
-	pPostMessage           = u32.NewProc("PostMessageW")
-	pPostQuitMessage       = u32.NewProc("PostQuitMessage")
-	pRegisterClass         = u32.NewProc("RegisterClassExW")
-	pRegisterWindowMessage = u32.NewProc("RegisterWindowMessageW")
-	pSetForegroundWindow   = u32.NewProc("SetForegroundWindow")
-	pSetMenuInfo           = u32.NewProc("SetMenuInfo")
-	pSetMenuItemInfo       = u32.NewProc("SetMenuItemInfoW")
-	pShellNotifyIcon       = s32.NewProc("Shell_NotifyIconW")
-	pShowWindow            = u32.NewProc("ShowWindow")
-	pTrackPopupMenu        = u32.NewProc("TrackPopupMenu")
-	pTranslateMessage      = u32.NewProc("TranslateMessage")
-	pUnregisterClass       = u32.NewProc("UnregisterClassW")
-	pUpdateWindow          = u32.NewProc("UpdateWindow")
-)
-
-const (
-	CS_HREDRAW          = 0x0002
-	CS_VREDRAW          = 0x0001
-	CW_USEDEFAULT       = 0x80000000
-	IDC_ARROW           = 32512 // Standard arrow
-	IDI_APPLICATION     = 32512
-	IMAGE_ICON          = 1          // Loads an icon
-	LR_DEFAULTSIZE      = 0x00000040 // Loads default-size icon for windows(SM_CXICON x SM_CYICON) if cx, cy are set to zero
-	LR_LOADFROMFILE     = 0x00000010 // Loads the stand-alone image from the file
-	MF_BYCOMMAND        = 0x00000000
-	MFS_DISABLED        = 0x00000003
-	MFT_SEPARATOR       = 0x00000800
-	MFT_STRING          = 0x00000000
-	MIIM_BITMAP         = 0x00000080
-	MIIM_FTYPE          = 0x00000100
-	MIIM_ID             = 0x00000002
-	MIIM_STATE          = 0x00000001
-	MIIM_STRING         = 0x00000040
-	MIIM_SUBMENU        = 0x00000004
-	MIM_APPLYTOSUBMENUS = 0x80000000
-	NIF_ICON            = 0x00000002
-	NIF_INFO            = 0x00000010
-	NIF_MESSAGE         = 0x00000001
-	SW_HIDE             = 0
-	TPM_BOTTOMALIGN     = 0x0020
-	TPM_LEFTALIGN       = 0x0000
-	WM_CLOSE            = 0x0010
-	WM_USER             = 0x0400
-	WS_CAPTION          = 0x00C00000
-	WS_MAXIMIZEBOX      = 0x00010000
-	WS_MINIMIZEBOX      = 0x00020000
-	WS_OVERLAPPED       = 0x00000000
-	WS_OVERLAPPEDWINDOW = WS_OVERLAPPED | WS_CAPTION | WS_SYSMENU | WS_THICKFRAME | WS_MINIMIZEBOX | WS_MAXIMIZEBOX
-	WS_SYSMENU          = 0x00080000
-	WS_THICKFRAME       = 0x00040000
-)
-
-// Not sure if this is actually needed on windows
-func init() {
-	runtime.LockOSThread()
-}
-
-// The POINT structure defines the x- and y- coordinates of a point.
-// https://msdn.microsoft.com/en-us/library/windows/desktop/dd162805(v=vs.85).aspx
-type point struct {
-	X, Y int32
-}
--- a/app/tray/wintray/winclass.go
+++ b/app/tray/wintray/winclass.go
@@ -1,45 +0,0 @@
-//go:build windows
-
-package wintray
-
-import (
-	"unsafe"
-
-	"golang.org/x/sys/windows"
-)
-
-// Contains window class information.
-// It is used with the RegisterClassEx and GetClassInfoEx functions.
-// https://msdn.microsoft.com/en-us/library/ms633577.aspx
-type wndClassEx struct {
-	Size, Style                        uint32
-	WndProc                            uintptr
-	ClsExtra, WndExtra                 int32
-	Instance, Icon, Cursor, Background windows.Handle
-	MenuName, ClassName                *uint16
-	IconSm                             windows.Handle
-}
-
-// Registers a window class for subsequent use in calls to the CreateWindow or CreateWindowEx function.
-// https://msdn.microsoft.com/en-us/library/ms633587.aspx
-func (w *wndClassEx) register() error {
-	w.Size = uint32(unsafe.Sizeof(*w))
-	res, _, err := pRegisterClass.Call(uintptr(unsafe.Pointer(w)))
-	if res == 0 {
-		return err
-	}
-	return nil
-}
-
-// Unregisters a window class, freeing the memory required for the class.
-// https://msdn.microsoft.com/en-us/library/ms644899.aspx
-func (w *wndClassEx) unregister() error {
-	res, _, err := pUnregisterClass.Call(
-		uintptr(unsafe.Pointer(w.ClassName)),
-		uintptr(w.Instance),
-	)
-	if res == 0 {
-		return err
-	}
-	return nil
-}
--- a/macapp/tsconfig.json
+++ b/macapp/tsconfig.json
--- a/macapp/webpack.main.config.ts
+++ b/macapp/webpack.main.config.ts
--- a/macapp/webpack.plugins.ts
+++ b/macapp/webpack.plugins.ts
--- a/macapp/webpack.renderer.config.ts
+++ b/macapp/webpack.renderer.config.ts
--- a/macapp/webpack.rules.ts
+++ b/macapp/webpack.rules.ts
--- a/auth/auth.go
+++ b/auth/auth.go
@@ -1,61 +0,0 @@
-package auth
-
-import (
-	"bytes"
-	"context"
-	"crypto/rand"
-	"encoding/base64"
-	"fmt"
-	"io"
-	"log/slog"
-	"os"
-	"path/filepath"
-
-	"golang.org/x/crypto/ssh"
-)
-
-const defaultPrivateKey = "id_ed25519"
-
-func NewNonce(r io.Reader, length int) (string, error) {
-	nonce := make([]byte, length)
-	if _, err := io.ReadFull(r, nonce); err != nil {
-		return "", err
-	}
-
-	return base64.RawURLEncoding.EncodeToString(nonce), nil
-}
-
-func Sign(ctx context.Context, bts []byte) (string, error) {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return "", err
-	}
-
-	keyPath := filepath.Join(home, ".ollama", defaultPrivateKey)
-
-	privateKeyFile, err := os.ReadFile(keyPath)
-	if err != nil {
-		slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
-		return "", err
-	}
-
-	privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
-	if err != nil {
-		return "", err
-	}
-
-	// get the pubkey, but remove the type
-	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
-	parts := bytes.Split(publicKey, []byte(" "))
-	if len(parts) < 2 {
-		return "", fmt.Errorf("malformed public key")
-	}
-
-	signedData, err := privateKey.Sign(rand.Reader, bts)
-	if err != nil {
-		return "", err
-	}
-
-	// signature is <pubkey>:<signature>
-	return fmt.Sprintf("%s:%s", bytes.TrimSpace(parts[1]), base64.StdEncoding.EncodeToString(signedData.Blob)), nil
-}
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -1,663 +0,0 @@
-package cmd
-
-import (
-	"errors"
-	"fmt"
-	"io"
-	"net/http"
-	"os"
-	"path/filepath"
-	"regexp"
-	"sort"
-	"strings"
-
-	"github.com/spf13/cobra"
-	"golang.org/x/exp/slices"
-
-	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/progress"
-	"github.com/ollama/ollama/readline"
-)
-
-type MultilineState int
-
-const (
-	MultilineNone MultilineState = iota
-	MultilinePrompt
-	MultilineSystem
-	MultilineTemplate
-)
-
-func loadModel(cmd *cobra.Command, opts *runOptions) error {
-	client, err := api.ClientFromEnvironment()
-	if err != nil {
-		return err
-	}
-
-	p := progress.NewProgress(os.Stderr)
-	defer p.StopAndClear()
-
-	spinner := progress.NewSpinner("")
-	p.Add("", spinner)
-
-	showReq := api.ShowRequest{Name: opts.Model}
-	showResp, err := client.Show(cmd.Context(), &showReq)
-	if err != nil {
-		return err
-	}
-	opts.MultiModal = slices.Contains(showResp.Details.Families, "clip")
-	opts.ParentModel = showResp.Details.ParentModel
-
-	if len(showResp.Messages) > 0 {
-		opts.Messages = append(opts.Messages, showResp.Messages...)
-	}
-
-	chatReq := &api.ChatRequest{
-		Model:    opts.Model,
-		Messages: []api.Message{},
-	}
-	err = client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
-		p.StopAndClear()
-		if len(opts.Messages) > 0 {
-			for _, msg := range opts.Messages {
-				switch msg.Role {
-				case "user":
-					fmt.Printf(">>> %s\n", msg.Content)
-				case "assistant":
-					state := &displayResponseState{}
-					displayResponse(msg.Content, opts.WordWrap, state)
-					fmt.Println()
-					fmt.Println()
-				}
-			}
-		}
-		return nil
-	})
-	if err != nil {
-		return err
-	}
-
-	return nil
-}
-
-func generateInteractive(cmd *cobra.Command, opts runOptions) error {
-	opts.Messages = make([]api.Message, 0)
-
-	err := loadModel(cmd, &opts)
-	if err != nil {
-		return err
-	}
-
-	usage := func() {
-		fmt.Fprintln(os.Stderr, "Available Commands:")
-		fmt.Fprintln(os.Stderr, "  /set            Set session variables")
-		fmt.Fprintln(os.Stderr, "  /show           Show model information")
-		fmt.Fprintln(os.Stderr, "  /load <model>   Load a session or model")
-		fmt.Fprintln(os.Stderr, "  /save <model>   Save your current session")
-		fmt.Fprintln(os.Stderr, "  /bye            Exit")
-		fmt.Fprintln(os.Stderr, "  /?, /help       Help for a command")
-		fmt.Fprintln(os.Stderr, "  /? shortcuts    Help for keyboard shortcuts")
-		fmt.Fprintln(os.Stderr, "")
-		fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.")
-
-		if opts.MultiModal {
-			fmt.Fprintf(os.Stderr, "Use %s to include .jpg or .png images.\n", filepath.FromSlash("/path/to/file"))
-		}
-
-		fmt.Fprintln(os.Stderr, "")
-	}
-
-	usageSet := func() {
-		fmt.Fprintln(os.Stderr, "Available Commands:")
-		fmt.Fprintln(os.Stderr, "  /set parameter ...     Set a parameter")
-		fmt.Fprintln(os.Stderr, "  /set system <string>   Set system message")
-		fmt.Fprintln(os.Stderr, "  /set template <string> Set prompt template")
-		fmt.Fprintln(os.Stderr, "  /set history           Enable history")
-		fmt.Fprintln(os.Stderr, "  /set nohistory         Disable history")
-		fmt.Fprintln(os.Stderr, "  /set wordwrap          Enable wordwrap")
-		fmt.Fprintln(os.Stderr, "  /set nowordwrap        Disable wordwrap")
-		fmt.Fprintln(os.Stderr, "  /set format json       Enable JSON mode")
-		fmt.Fprintln(os.Stderr, "  /set noformat          Disable formatting")
-		fmt.Fprintln(os.Stderr, "  /set verbose           Show LLM stats")
-		fmt.Fprintln(os.Stderr, "  /set quiet             Disable LLM stats")
-		fmt.Fprintln(os.Stderr, "")
-	}
-
-	usageShortcuts := func() {
-		fmt.Fprintln(os.Stderr, "Available keyboard shortcuts:")
-		fmt.Fprintln(os.Stderr, "  Ctrl + a            Move to the beginning of the line (Home)")
-		fmt.Fprintln(os.Stderr, "  Ctrl + e            Move to the end of the line (End)")
-		fmt.Fprintln(os.Stderr, "   Alt + b            Move back (left) one word")
-		fmt.Fprintln(os.Stderr, "   Alt + f            Move forward (right) one word")
-		fmt.Fprintln(os.Stderr, "  Ctrl + k            Delete the sentence after the cursor")
-		fmt.Fprintln(os.Stderr, "  Ctrl + u            Delete the sentence before the cursor")
-		fmt.Fprintln(os.Stderr, "")
-		fmt.Fprintln(os.Stderr, "  Ctrl + l            Clear the screen")
-		fmt.Fprintln(os.Stderr, "  Ctrl + c            Stop the model from responding")
-		fmt.Fprintln(os.Stderr, "  Ctrl + d            Exit ollama (/bye)")
-		fmt.Fprintln(os.Stderr, "")
-	}
-
-	usageShow := func() {
-		fmt.Fprintln(os.Stderr, "Available Commands:")
-		fmt.Fprintln(os.Stderr, "  /show info         Show details for this model")
-		fmt.Fprintln(os.Stderr, "  /show license      Show model license")
-		fmt.Fprintln(os.Stderr, "  /show modelfile    Show Modelfile for this model")
-		fmt.Fprintln(os.Stderr, "  /show parameters   Show parameters for this model")
-		fmt.Fprintln(os.Stderr, "  /show system       Show system message")
-		fmt.Fprintln(os.Stderr, "  /show template     Show prompt template")
-		fmt.Fprintln(os.Stderr, "")
-	}
-
-	// only list out the most common parameters
-	usageParameters := func() {
-		fmt.Fprintln(os.Stderr, "Available Parameters:")
-		fmt.Fprintln(os.Stderr, "  /set parameter seed <int>             Random number seed")
-		fmt.Fprintln(os.Stderr, "  /set parameter num_predict <int>      Max number of tokens to predict")
-		fmt.Fprintln(os.Stderr, "  /set parameter top_k <int>            Pick from top k num of tokens")
-		fmt.Fprintln(os.Stderr, "  /set parameter top_p <float>          Pick token based on sum of probabilities")
-		fmt.Fprintln(os.Stderr, "  /set parameter num_ctx <int>          Set the context size")
-		fmt.Fprintln(os.Stderr, "  /set parameter temperature <float>    Set creativity level")
-		fmt.Fprintln(os.Stderr, "  /set parameter repeat_penalty <float> How strongly to penalize repetitions")
-		fmt.Fprintln(os.Stderr, "  /set parameter repeat_last_n <int>    Set how far back to look for repetitions")
-		fmt.Fprintln(os.Stderr, "  /set parameter num_gpu <int>          The number of layers to send to the GPU")
-		fmt.Fprintln(os.Stderr, "  /set parameter stop \"<string>\", ...   Set the stop parameters")
-		fmt.Fprintln(os.Stderr, "")
-	}
-
-	scanner, err := readline.New(readline.Prompt{
-		Prompt:         ">>> ",
-		AltPrompt:      "... ",
-		Placeholder:    "Send a message (/? for help)",
-		AltPlaceholder: `Use """ to end multi-line input`,
-	})
-	if err != nil {
-		return err
-	}
-
-	fmt.Print(readline.StartBracketedPaste)
-	defer fmt.Printf(readline.EndBracketedPaste)
-
-	var sb strings.Builder
-	var multiline MultilineState
-
-	for {
-		line, err := scanner.Readline()
-		switch {
-		case errors.Is(err, io.EOF):
-			fmt.Println()
-			return nil
-		case errors.Is(err, readline.ErrInterrupt):
-			if line == "" {
-				fmt.Println("\nUse Ctrl + d or /bye to exit.")
-			}
-
-			scanner.Prompt.UseAlt = false
-			sb.Reset()
-
-			continue
-		case err != nil:
-			return err
-		}
-
-		switch {
-		case multiline != MultilineNone:
-			// check if there's a multiline terminating string
-			before, ok := strings.CutSuffix(line, `"""`)
-			sb.WriteString(before)
-			if !ok {
-				fmt.Fprintln(&sb)
-				continue
-			}
-
-			switch multiline {
-			case MultilineSystem:
-				opts.System = sb.String()
-				opts.Messages = append(opts.Messages, api.Message{Role: "system", Content: opts.System})
-				fmt.Println("Set system message.")
-				sb.Reset()
-			case MultilineTemplate:
-				opts.Template = sb.String()
-				fmt.Println("Set prompt template.")
-				sb.Reset()
-			}
-
-			multiline = MultilineNone
-			scanner.Prompt.UseAlt = false
-		case strings.HasPrefix(line, `"""`):
-			line := strings.TrimPrefix(line, `"""`)
-			line, ok := strings.CutSuffix(line, `"""`)
-			sb.WriteString(line)
-			if !ok {
-				// no multiline terminating string; need more input
-				fmt.Fprintln(&sb)
-				multiline = MultilinePrompt
-				scanner.Prompt.UseAlt = true
-			}
-		case scanner.Pasting:
-			fmt.Fprintln(&sb, line)
-			continue
-		case strings.HasPrefix(line, "/list"):
-			args := strings.Fields(line)
-			if err := ListHandler(cmd, args[1:]); err != nil {
-				return err
-			}
-		case strings.HasPrefix(line, "/load"):
-			args := strings.Fields(line)
-			if len(args) != 2 {
-				fmt.Println("Usage:\n  /load <modelname>")
-				continue
-			}
-			opts.Model = args[1]
-			opts.Messages = []api.Message{}
-			fmt.Printf("Loading model '%s'\n", opts.Model)
-			if err := loadModel(cmd, &opts); err != nil {
-				return err
-			}
-			continue
-		case strings.HasPrefix(line, "/save"):
-			args := strings.Fields(line)
-			if len(args) != 2 {
-				fmt.Println("Usage:\n  /save <modelname>")
-				continue
-			}
-
-			client, err := api.ClientFromEnvironment()
-			if err != nil {
-				fmt.Println("error: couldn't connect to ollama server")
-				return err
-			}
-
-			req := &api.CreateRequest{
-				Name:      args[1],
-				Modelfile: buildModelfile(opts),
-			}
-			fn := func(resp api.ProgressResponse) error { return nil }
-			err = client.Create(cmd.Context(), req, fn)
-			if err != nil {
-				fmt.Println("error: couldn't save model")
-				return err
-			}
-			fmt.Printf("Created new model '%s'\n", args[1])
-			continue
-		case strings.HasPrefix(line, "/set"):
-			args := strings.Fields(line)
-			if len(args) > 1 {
-				switch args[1] {
-				case "history":
-					scanner.HistoryEnable()
-				case "nohistory":
-					scanner.HistoryDisable()
-				case "wordwrap":
-					opts.WordWrap = true
-					fmt.Println("Set 'wordwrap' mode.")
-				case "nowordwrap":
-					opts.WordWrap = false
-					fmt.Println("Set 'nowordwrap' mode.")
-				case "verbose":
-					cmd.Flags().Set("verbose", "true")
-					fmt.Println("Set 'verbose' mode.")
-				case "quiet":
-					cmd.Flags().Set("verbose", "false")
-					fmt.Println("Set 'quiet' mode.")
-				case "format":
-					if len(args) < 3 || args[2] != "json" {
-						fmt.Println("Invalid or missing format. For 'json' mode use '/set format json'")
-					} else {
-						opts.Format = args[2]
-						fmt.Printf("Set format to '%s' mode.\n", args[2])
-					}
-				case "noformat":
-					opts.Format = ""
-					fmt.Println("Disabled format.")
-				case "parameter":
-					if len(args) < 4 {
-						usageParameters()
-						continue
-					}
-					params := args[3:]
-					fp, err := api.FormatParams(map[string][]string{args[2]: params})
-					if err != nil {
-						fmt.Printf("Couldn't set parameter: %q\n", err)
-						continue
-					}
-					fmt.Printf("Set parameter '%s' to '%s'\n", args[2], strings.Join(params, ", "))
-					opts.Options[args[2]] = fp[args[2]]
-				case "system", "template":
-					if len(args) < 3 {
-						usageSet()
-						continue
-					}
-
-					if args[1] == "system" {
-						multiline = MultilineSystem
-					} else if args[1] == "template" {
-						multiline = MultilineTemplate
-					}
-
-					line := strings.Join(args[2:], " ")
-					line, ok := strings.CutPrefix(line, `"""`)
-					if !ok {
-						multiline = MultilineNone
-					} else {
-						// only cut suffix if the line is multiline
-						line, ok = strings.CutSuffix(line, `"""`)
-						if ok {
-							multiline = MultilineNone
-						}
-					}
-
-					sb.WriteString(line)
-					if multiline != MultilineNone {
-						scanner.Prompt.UseAlt = true
-						continue
-					}
-
-					if args[1] == "system" {
-						opts.System = sb.String() // for display in modelfile
-						newMessage := api.Message{Role: "system", Content: sb.String()}
-						// Check if the slice is not empty and the last message is from 'system'
-						if len(opts.Messages) > 0 && opts.Messages[len(opts.Messages)-1].Role == "system" {
-							// Replace the last message
-							opts.Messages[len(opts.Messages)-1] = newMessage
-						} else {
-							opts.Messages = append(opts.Messages, newMessage)
-						}
-						fmt.Println("Set system message.")
-						sb.Reset()
-					} else if args[1] == "template" {
-						opts.Template = sb.String()
-						fmt.Println("Set prompt template.")
-						sb.Reset()
-					}
-
-					sb.Reset()
-					continue
-				default:
-					fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
-				}
-			} else {
-				usageSet()
-			}
-		case strings.HasPrefix(line, "/show"):
-			args := strings.Fields(line)
-			if len(args) > 1 {
-				client, err := api.ClientFromEnvironment()
-				if err != nil {
-					fmt.Println("error: couldn't connect to ollama server")
-					return err
-				}
-				req := &api.ShowRequest{
-					Name:     opts.Model,
-					System:   opts.System,
-					Template: opts.Template,
-					Options:  opts.Options,
-				}
-				resp, err := client.Show(cmd.Context(), req)
-				if err != nil {
-					fmt.Println("error: couldn't get model")
-					return err
-				}
-
-				switch args[1] {
-				case "info":
-					fmt.Println("Model details:")
-					if len(resp.Details.Families) > 0 {
-						fmt.Printf("Family              %s\n", strings.Join(resp.Details.Families, ", "))
-					} else if resp.Details.Family != "" {
-						fmt.Printf("Family              %s\n", resp.Details.Family)
-					}
-					fmt.Printf("Parameter Size      %s\n", resp.Details.ParameterSize)
-					fmt.Printf("Quantization Level  %s\n", resp.Details.QuantizationLevel)
-					fmt.Println("")
-				case "license":
-					if resp.License == "" {
-						fmt.Println("No license was specified for this model.")
-					} else {
-						fmt.Println(resp.License)
-					}
-				case "modelfile":
-					fmt.Println(resp.Modelfile)
-				case "parameters":
-					if resp.Parameters == "" {
-						fmt.Println("No parameters were specified for this model.")
-					} else {
-						if len(opts.Options) > 0 {
-							fmt.Println("User defined parameters:")
-							for k, v := range opts.Options {
-								fmt.Printf("%-*s %v\n", 30, k, v)
-							}
-							fmt.Println()
-						}
-						fmt.Println("Model defined parameters:")
-						fmt.Println(resp.Parameters)
-					}
-				case "system":
-					switch {
-					case opts.System != "":
-						fmt.Println(opts.System + "\n")
-					case resp.System != "":
-						fmt.Println(resp.System + "\n")
-					default:
-						fmt.Println("No system message was specified for this model.")
-					}
-				case "template":
-					switch {
-					case opts.Template != "":
-						fmt.Println(opts.Template + "\n")
-					case resp.Template != "":
-						fmt.Println(resp.Template)
-					default:
-						fmt.Println("No prompt template was specified for this model.")
-					}
-				default:
-					fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
-				}
-			} else {
-				usageShow()
-			}
-		case strings.HasPrefix(line, "/help"), strings.HasPrefix(line, "/?"):
-			args := strings.Fields(line)
-			if len(args) > 1 {
-				switch args[1] {
-				case "set", "/set":
-					usageSet()
-				case "show", "/show":
-					usageShow()
-				case "shortcut", "shortcuts":
-					usageShortcuts()
-				}
-			} else {
-				usage()
-			}
-		case strings.HasPrefix(line, "/exit"), strings.HasPrefix(line, "/bye"):
-			return nil
-		case strings.HasPrefix(line, "/"):
-			args := strings.Fields(line)
-			isFile := false
-
-			if opts.MultiModal {
-				for _, f := range extractFileNames(line) {
-					if strings.HasPrefix(f, args[0]) {
-						isFile = true
-						break
-					}
-				}
-			}
-
-			if !isFile {
-				fmt.Printf("Unknown command '%s'. Type /? for help\n", args[0])
-				continue
-			}
-
-			sb.WriteString(line)
-		default:
-			sb.WriteString(line)
-		}
-
-		if sb.Len() > 0 && multiline == MultilineNone {
-			newMessage := api.Message{Role: "user", Content: sb.String()}
-
-			if opts.MultiModal {
-				msg, images, err := extractFileData(sb.String())
-				if err != nil {
-					return err
-				}
-
-				// clear all previous images for better responses
-				if len(images) > 0 {
-					for i := range opts.Messages {
-						opts.Messages[i].Images = nil
-					}
-				}
-
-				newMessage.Content = msg
-				newMessage.Images = images
-			}
-
-			opts.Messages = append(opts.Messages, newMessage)
-
-			assistant, err := chat(cmd, opts)
-			if err != nil {
-				return err
-			}
-			if assistant != nil {
-				opts.Messages = append(opts.Messages, *assistant)
-			}
-
-			sb.Reset()
-		}
-	}
-}
-
-func buildModelfile(opts runOptions) string {
-	var mf strings.Builder
-	model := opts.ParentModel
-	if model == "" {
-		model = opts.Model
-	}
-	fmt.Fprintf(&mf, "FROM %s\n", model)
-	if opts.System != "" {
-		fmt.Fprintf(&mf, "SYSTEM \"\"\"%s\"\"\"\n", opts.System)
-	}
-
-	if opts.Template != "" {
-		fmt.Fprintf(&mf, "TEMPLATE \"\"\"%s\"\"\"\n", opts.Template)
-	}
-
-	keys := make([]string, 0)
-	for k := range opts.Options {
-		keys = append(keys, k)
-	}
-	sort.Strings(keys)
-	for _, k := range keys {
-		fmt.Fprintf(&mf, "PARAMETER %s %v\n", k, opts.Options[k])
-	}
-	fmt.Fprintln(&mf)
-
-	for _, msg := range opts.Messages {
-		fmt.Fprintf(&mf, "MESSAGE %s \"\"\"%s\"\"\"\n", msg.Role, msg.Content)
-	}
-
-	return mf.String()
-}
-
-func normalizeFilePath(fp string) string {
-	// Define a map of escaped characters and their replacements
-	replacements := map[string]string{
-		"\\ ":  " ",  // Escaped space
-		"\\(":  "(",  // Escaped left parenthesis
-		"\\)":  ")",  // Escaped right parenthesis
-		"\\[":  "[",  // Escaped left square bracket
-		"\\]":  "]",  // Escaped right square bracket
-		"\\{":  "{",  // Escaped left curly brace
-		"\\}":  "}",  // Escaped right curly brace
-		"\\$":  "$",  // Escaped dollar sign
-		"\\&":  "&",  // Escaped ampersand
-		"\\;":  ";",  // Escaped semicolon
-		"\\'":  "'",  // Escaped single quote
-		"\\\\": "\\", // Escaped backslash
-		"\\*":  "*",  // Escaped asterisk
-		"\\?":  "?",  // Escaped question mark
-	}
-
-	for escaped, actual := range replacements {
-		fp = strings.ReplaceAll(fp, escaped, actual)
-	}
-	return fp
-}
-
-func extractFileNames(input string) []string {
-	// Regex to match file paths starting with optional drive letter, / ./ \ or .\ and include escaped or unescaped spaces (\ or %20)
-	// and followed by more characters and a file extension
-	// This will capture non filename strings, but we'll check for file existence to remove mismatches
-	regexPattern := `(?:[a-zA-Z]:)?(?:\./|/|\\)[\S\\ ]+?\.(?i:jpg|jpeg|png|svg)\b`
-	re := regexp.MustCompile(regexPattern)
-
-	return re.FindAllString(input, -1)
-}
-
-func extractFileData(input string) (string, []api.ImageData, error) {
-	filePaths := extractFileNames(input)
-	var imgs []api.ImageData
-
-	for _, fp := range filePaths {
-		nfp := normalizeFilePath(fp)
-		data, err := getImageData(nfp)
-		if err != nil {
-			if os.IsNotExist(err) {
-				continue
-			}
-			fmt.Fprintf(os.Stderr, "Couldn't process image: %q\n", err)
-			return "", imgs, err
-		}
-		fmt.Fprintf(os.Stderr, "Added image '%s'\n", nfp)
-		input = strings.ReplaceAll(input, fp, "")
-		imgs = append(imgs, data)
-	}
-	return input, imgs, nil
-}
-
-func getImageData(filePath string) ([]byte, error) {
-	file, err := os.Open(filePath)
-	if err != nil {
-		return nil, err
-	}
-	defer file.Close()
-
-	buf := make([]byte, 512)
-	_, err = file.Read(buf)
-	if err != nil {
-		return nil, err
-	}
-
-	contentType := http.DetectContentType(buf)
-	allowedTypes := []string{"image/jpeg", "image/jpg", "image/png"}
-	if !slices.Contains(allowedTypes, contentType) {
-		return nil, fmt.Errorf("invalid image type: %s", contentType)
-	}
-
-	info, err := file.Stat()
-	if err != nil {
-		return nil, err
-	}
-
-	// Check if the file size exceeds 100MB
-	var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
-	if info.Size() > maxSize {
-		return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
-	}
-
-	buf = make([]byte, info.Size())
-	_, err = file.Seek(0, 0)
-	if err != nil {
-		return nil, err
-	}
-
-	_, err = io.ReadFull(file, buf)
-	if err != nil {
-		return nil, err
-	}
-
-	return buf, nil
-}
--- a/cmd/interactive_test.go
+++ b/cmd/interactive_test.go
@@ -1,116 +0,0 @@
-package cmd
-
-import (
-	"bytes"
-	"testing"
-	"text/template"
-
-	"github.com/stretchr/testify/assert"
-
-	"github.com/ollama/ollama/api"
-)
-
-func TestExtractFilenames(t *testing.T) {
-	// Unix style paths
-	input := ` some preamble 
- ./relative\ path/one.png inbetween1 ./not a valid two.jpg inbetween2
-/unescaped space /three.jpeg inbetween3 /valid\ path/dir/four.png "./quoted with spaces/five.svg`
-	res := extractFileNames(input)
-	assert.Len(t, res, 5)
-	assert.Contains(t, res[0], "one.png")
-	assert.Contains(t, res[1], "two.jpg")
-	assert.Contains(t, res[2], "three.jpeg")
-	assert.Contains(t, res[3], "four.png")
-	assert.Contains(t, res[4], "five.svg")
-	assert.NotContains(t, res[4], '"')
-	assert.NotContains(t, res, "inbtween")
-
-	// Windows style paths
-	input = ` some preamble
- c:/users/jdoe/one.png inbetween1 c:/program files/someplace/two.jpg inbetween2 
- /absolute/nospace/three.jpeg inbetween3 /absolute/with space/four.png inbetween4
-./relative\ path/five.svg inbetween5 "./relative with/spaces/six.png inbetween6
-d:\path with\spaces\seven.svg inbetween7 c:\users\jdoe\eight.png inbetween8 
- d:\program files\someplace\nine.png inbetween9 "E:\program files\someplace\ten.svg some ending
-`
-	res = extractFileNames(input)
-	assert.Len(t, res, 10)
-	assert.NotContains(t, res, "inbtween")
-	assert.Contains(t, res[0], "one.png")
-	assert.Contains(t, res[0], "c:")
-	assert.Contains(t, res[1], "two.jpg")
-	assert.Contains(t, res[1], "c:")
-	assert.Contains(t, res[2], "three.jpeg")
-	assert.Contains(t, res[3], "four.png")
-	assert.Contains(t, res[4], "five.svg")
-	assert.Contains(t, res[5], "six.png")
-	assert.Contains(t, res[6], "seven.svg")
-	assert.Contains(t, res[6], "d:")
-	assert.Contains(t, res[7], "eight.png")
-	assert.Contains(t, res[7], "c:")
-	assert.Contains(t, res[8], "nine.png")
-	assert.Contains(t, res[8], "d:")
-	assert.Contains(t, res[9], "ten.svg")
-	assert.Contains(t, res[9], "E:")
-}
-
-func TestModelfileBuilder(t *testing.T) {
-	opts := runOptions{
-		Model:    "hork",
-		System:   "You are part horse and part shark, but all hork. Do horklike things",
-		Template: "This is a template.",
-		Messages: []api.Message{
-			{Role: "user", Content: "Hey there hork!"},
-			{Role: "assistant", Content: "Yes it is true, I am half horse, half shark."},
-		},
-		Options: map[string]interface{}{},
-	}
-
-	opts.Options["temperature"] = 0.9
-	opts.Options["seed"] = 42
-	opts.Options["penalize_newline"] = false
-	opts.Options["stop"] = []string{"hi", "there"}
-
-	mf := buildModelfile(opts)
-	expectedModelfile := `FROM {{.Model}}
-SYSTEM """{{.System}}"""
-TEMPLATE """{{.Template}}"""
-PARAMETER penalize_newline false
-PARAMETER seed 42
-PARAMETER stop [hi there]
-PARAMETER temperature 0.9
-
-MESSAGE user """Hey there hork!"""
-MESSAGE assistant """Yes it is true, I am half horse, half shark."""
-`
-
-	tmpl, err := template.New("").Parse(expectedModelfile)
-	assert.Nil(t, err)
-
-	var buf bytes.Buffer
-	err = tmpl.Execute(&buf, opts)
-	assert.Nil(t, err)
-	assert.Equal(t, buf.String(), mf)
-
-	opts.ParentModel = "horseshark"
-	mf = buildModelfile(opts)
-	expectedModelfile = `FROM {{.ParentModel}}
-SYSTEM """{{.System}}"""
-TEMPLATE """{{.Template}}"""
-PARAMETER penalize_newline false
-PARAMETER seed 42
-PARAMETER stop [hi there]
-PARAMETER temperature 0.9
-
-MESSAGE user """Hey there hork!"""
-MESSAGE assistant """Yes it is true, I am half horse, half shark."""
-`
-
-	tmpl, err = template.New("").Parse(expectedModelfile)
-	assert.Nil(t, err)
-
-	var parentBuf bytes.Buffer
-	err = tmpl.Execute(&parentBuf, opts)
-	assert.Nil(t, err)
-	assert.Equal(t, parentBuf.String(), mf)
-}
--- a/cmd/spinner.go
+++ b/cmd/spinner.go
@@ -0,0 +1,44 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+	"time"
+
+	"github.com/jmorganca/ollama/progressbar"
+)
+
+type Spinner struct {
+	description string
+	*progressbar.ProgressBar
+}
+
+func NewSpinner(description string) *Spinner {
+	return &Spinner{
+		description: description,
+		ProgressBar: progressbar.NewOptions(-1,
+			progressbar.OptionSetWriter(os.Stderr),
+			progressbar.OptionThrottle(60*time.Millisecond),
+			progressbar.OptionSpinnerType(14),
+			progressbar.OptionSetRenderBlankState(true),
+			progressbar.OptionSetElapsedTime(false),
+			progressbar.OptionClearOnFinish(),
+			progressbar.OptionSetDescription(description),
+		),
+	}
+}
+
+func (s *Spinner) Spin(tick time.Duration) {
+	for range time.Tick(tick) {
+		if s.IsFinished() {
+			break
+		}
+
+		s.Add(1)
+	}
+}
+
+func (s *Spinner) Stop() {
+	s.Finish()
+	fmt.Println(s.description)
+}
--- a/cmd/start_darwin.go
+++ b/cmd/start_darwin.go
@@ -1,30 +0,0 @@
-package cmd
-
-import (
-	"context"
-	"fmt"
-	"os"
-	"os/exec"
-	"strings"
-
-	"github.com/ollama/ollama/api"
-)
-
-func startApp(ctx context.Context, client *api.Client) error {
-	exe, err := os.Executable()
-	if err != nil {
-		return err
-	}
-	link, err := os.Readlink(exe)
-	if err != nil {
-		return err
-	}
-	if !strings.Contains(link, "Ollama.app") {
-		return fmt.Errorf("could not find ollama app")
-	}
-	path := strings.Split(link, "Ollama.app")
-	if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
-		return err
-	}
-	return waitForServer(ctx, client)
-}
--- a/cmd/start_default.go
+++ b/cmd/start_default.go
@@ -1,14 +0,0 @@
-//go:build !windows && !darwin
-
-package cmd
-
-import (
-	"context"
-	"fmt"
-
-	"github.com/ollama/ollama/api"
-)
-
-func startApp(ctx context.Context, client *api.Client) error {
-	return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it")
-}
--- a/cmd/start_windows.go
+++ b/cmd/start_windows.go
@@ -1,58 +0,0 @@
-package cmd
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"strings"
-	"syscall"
-
-	"github.com/ollama/ollama/api"
-)
-
-func startApp(ctx context.Context, client *api.Client) error {
-	// log.Printf("XXX Attempting to find and start ollama app")
-	AppName := "ollama app.exe"
-	exe, err := os.Executable()
-	if err != nil {
-		return err
-	}
-	appExe := filepath.Join(filepath.Dir(exe), AppName)
-	_, err = os.Stat(appExe)
-	if errors.Is(err, os.ErrNotExist) {
-		// Try the standard install location
-		localAppData := os.Getenv("LOCALAPPDATA")
-		appExe = filepath.Join(localAppData, "Ollama", AppName)
-		_, err := os.Stat(appExe)
-		if errors.Is(err, os.ErrNotExist) {
-			// Finally look in the path
-			appExe, err = exec.LookPath(AppName)
-			if err != nil {
-				return fmt.Errorf("could not locate ollama app")
-			}
-		}
-	}
-	// log.Printf("XXX attempting to start app %s", appExe)
-
-	cmd_path := "c:\\Windows\\system32\\cmd.exe"
-	cmd := exec.Command(cmd_path, "/c", appExe)
-	// TODO - these hide flags aren't working - still pops up a command window for some reason
-	cmd.SysProcAttr = &syscall.SysProcAttr{CreationFlags: 0x08000000, HideWindow: true}
-
-	// TODO this didn't help either...
-	cmd.Stdin = strings.NewReader("")
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-
-	if err := cmd.Start(); err != nil {
-		return fmt.Errorf("unable to start ollama app %w", err)
-	}
-
-	if cmd.Process != nil {
-		defer cmd.Process.Release() //nolint:errcheck
-	}
-	return waitForServer(ctx, client)
-}
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -1,331 +0,0 @@
-package convert
-
-import (
-	"bytes"
-	"cmp"
-	"encoding/binary"
-	"encoding/json"
-	"fmt"
-	"io"
-	"log/slog"
-	"os"
-	"path/filepath"
-	"regexp"
-	"slices"
-
-	"github.com/mitchellh/mapstructure"
-	"google.golang.org/protobuf/proto"
-
-	"github.com/ollama/ollama/convert/sentencepiece"
-	"github.com/ollama/ollama/llm"
-)
-
-type Params struct {
-	Architectures    []string `json:"architectures"`
-	VocabSize        int      `json:"vocab_size"`
-	HiddenSize       int      `json:"hidden_size"`       // n_embd
-	HiddenLayers     int      `json:"num_hidden_layers"` // n_layer
-	ContextSize      int      `json:"max_position_embeddings"`
-	IntermediateSize int      `json:"intermediate_size"`
-	AttentionHeads   int      `json:"num_attention_heads"` // n_head
-	KeyValHeads      int      `json:"num_key_value_heads"`
-	NormEPS          float64  `json:"rms_norm_eps"`
-	RopeFreqBase     float64  `json:"rope_theta"`
-	BoSTokenID       int      `json:"bos_token_id"`
-	EoSTokenID       int      `json:"eos_token_id"`
-}
-
-type MetaData struct {
-	Type    string `mapstructure:"dtype"`
-	Shape   []int  `mapstructure:"shape"`
-	Offsets []int  `mapstructure:"data_offsets"`
-}
-
-func ReadSafeTensors(fn string, offset uint64) ([]llm.Tensor, uint64, error) {
-	f, err := os.Open(fn)
-	if err != nil {
-		return []llm.Tensor{}, 0, err
-	}
-	defer f.Close()
-
-	var jsonSize uint64
-	binary.Read(f, binary.LittleEndian, &jsonSize)
-
-	buf := make([]byte, jsonSize)
-	_, err = io.ReadFull(f, buf)
-	if err != nil {
-		return []llm.Tensor{}, 0, err
-	}
-
-	d := json.NewDecoder(bytes.NewBuffer(buf))
-	d.UseNumber()
-	var parsed map[string]interface{}
-	if err = d.Decode(&parsed); err != nil {
-		return []llm.Tensor{}, 0, err
-	}
-
-	var keys []string
-	for k := range parsed {
-		keys = append(keys, k)
-	}
-
-	slices.Sort(keys)
-
-	slog.Info("converting layers")
-
-	var tensors []llm.Tensor
-	for _, k := range keys {
-		vals := parsed[k].(map[string]interface{})
-		var data MetaData
-		if err = mapstructure.Decode(vals, &data); err != nil {
-			return []llm.Tensor{}, 0, err
-		}
-
-		var size uint64
-		var kind uint32
-		switch len(data.Shape) {
-		case 0:
-			// metadata
-			continue
-		case 1:
-			// convert to float32
-			kind = 0
-			size = uint64(data.Shape[0] * 4)
-		case 2:
-			// convert to float16
-			kind = 1
-			size = uint64(data.Shape[0] * data.Shape[1] * 2)
-		}
-
-		ggufName, err := GetTensorName(k)
-		if err != nil {
-			slog.Error("%v", err)
-			return []llm.Tensor{}, 0, err
-		}
-
-		shape := []uint64{0, 0, 0, 0}
-		for i := range data.Shape {
-			shape[i] = uint64(data.Shape[i])
-		}
-
-		t := llm.Tensor{
-			Name:          ggufName,
-			Kind:          kind,
-			Offset:        offset,
-			Shape:         shape[:],
-			FileName:      fn,
-			OffsetPadding: 8 + jsonSize,
-			FileOffsets:   []uint64{uint64(data.Offsets[0]), uint64(data.Offsets[1])},
-		}
-		slog.Debug(fmt.Sprintf("%v", t))
-		tensors = append(tensors, t)
-		offset += size
-	}
-	return tensors, offset, nil
-}
-
-func GetSafeTensors(dirpath string) ([]llm.Tensor, error) {
-	var tensors []llm.Tensor
-	files, err := filepath.Glob(filepath.Join(dirpath, "/model-*.safetensors"))
-	if err != nil {
-		return []llm.Tensor{}, err
-	}
-
-	var offset uint64
-	for _, f := range files {
-		var t []llm.Tensor
-		var err error
-		t, offset, err = ReadSafeTensors(f, offset)
-		if err != nil {
-			slog.Error("%v", err)
-			return []llm.Tensor{}, err
-		}
-		tensors = append(tensors, t...)
-	}
-	return tensors, nil
-}
-
-func GetParams(dirpath string) (*Params, error) {
-	f, err := os.Open(filepath.Join(dirpath, "config.json"))
-	if err != nil {
-		return nil, err
-	}
-	defer f.Close()
-
-	var params Params
-
-	d := json.NewDecoder(f)
-	err = d.Decode(&params)
-	if err != nil {
-		return nil, err
-	}
-
-	return &params, nil
-}
-
-// Details on gguf's tokenizer can be found at:
-// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer
-type Vocab struct {
-	Tokens []string
-	Scores []float32
-	Types  []int32
-}
-
-func LoadTokens(dirpath string) (*Vocab, error) {
-	slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model")))
-	in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model"))
-	if err != nil {
-		return nil, err
-	}
-
-	// To regenerate sentencepiece from the protobufs use:
-	// protoc -I=./ --go_out=./ sentencepiece_model.proto
-	modelProto := &sentencepiece.ModelProto{}
-	if err := proto.Unmarshal(in, modelProto); err != nil {
-		return nil, err
-	}
-
-	v := &Vocab{
-		Tokens: make([]string, 0),
-		Scores: make([]float32, 0),
-		Types:  make([]int32, 0),
-	}
-
-	pieces := modelProto.GetPieces()
-	for _, p := range pieces {
-		v.Tokens = append(v.Tokens, p.GetPiece())
-		v.Scores = append(v.Scores, p.GetScore())
-		t := p.GetType()
-		v.Types = append(v.Types, int32(t))
-	}
-
-	slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens)))
-
-	// add any additional tokens
-	addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json"))
-	if os.IsNotExist(err) {
-		return v, nil
-	} else if err != nil {
-		return nil, err
-	}
-
-	slog.Info("reading user defined tokens")
-
-	var extraTokenData map[string]int
-	if err := json.Unmarshal(addIn, &extraTokenData); err != nil {
-		return nil, err
-	}
-
-	type token struct {
-		key string
-		pos int
-	}
-
-	extraTokens := make([]token, 0)
-	for k, id := range extraTokenData {
-		extraTokens = append(extraTokens, token{k, id})
-	}
-
-	slices.SortFunc(extraTokens, func(a, b token) int {
-		return cmp.Compare(a.pos, b.pos)
-	})
-
-	numToks := len(v.Tokens)
-
-	for cnt, t := range extraTokens {
-		// the token id should match the specific index for the total number of tokens
-		if t.pos != cnt+numToks {
-			return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key)
-		}
-		v.Tokens = append(v.Tokens, t.key)
-		v.Scores = append(v.Scores, -1000.0)
-		v.Types = append(v.Types, int32(llm.GGUFTokenUserDefined))
-	}
-	slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))
-
-	return v, nil
-}
-
-func GetTensorName(n string) (string, error) {
-	tMap := map[string]string{
-		"model.embed_tokens.weight":                           "token_embd.weight",
-		"model.layers.(\\d+).input_layernorm.weight":          "blk.$1.attn_norm.weight",
-		"model.layers.(\\d+).mlp.down_proj.weight":            "blk.$1.ffn_down.weight",
-		"model.layers.(\\d+).mlp.gate_proj.weight":            "blk.$1.ffn_gate.weight",
-		"model.layers.(\\d+).mlp.up_proj.weight":              "blk.$1.ffn_up.weight",
-		"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
-		"model.layers.(\\d+).self_attn.k_proj.weight":         "blk.$1.attn_k.weight",
-		"model.layers.(\\d+).self_attn.o_proj.weight":         "blk.$1.attn_output.weight",
-		"model.layers.(\\d+).self_attn.q_proj.weight":         "blk.$1.attn_q.weight",
-		"model.layers.(\\d+).self_attn.v_proj.weight":         "blk.$1.attn_v.weight",
-		"lm_head.weight":    "output.weight",
-		"model.norm.weight": "output_norm.weight",
-	}
-
-	v, ok := tMap[n]
-	if ok {
-		return v, nil
-	}
-
-	// quick hack to rename the layers to gguf format
-	for k, v := range tMap {
-		re := regexp.MustCompile(k)
-		newName := re.ReplaceAllString(n, v)
-		if newName != n {
-			return newName, nil
-		}
-	}
-
-	return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
-}
-
-func WriteGGUF(name string, tensors []llm.Tensor, params *Params, vocab *Vocab) (string, error) {
-	c := llm.ContainerGGUF{
-		ByteOrder: binary.LittleEndian,
-	}
-
-	m := llm.NewGGUFModel(&c)
-	m.Tensors = tensors
-	m.KV["general.architecture"] = "llama"
-	m.KV["general.name"] = name
-	m.KV["llama.context_length"] = uint32(params.ContextSize)
-	m.KV["llama.embedding_length"] = uint32(params.HiddenSize)
-	m.KV["llama.block_count"] = uint32(params.HiddenLayers)
-	m.KV["llama.feed_forward_length"] = uint32(params.IntermediateSize)
-	m.KV["llama.rope.dimension_count"] = uint32(128)
-	m.KV["llama.attention.head_count"] = uint32(params.AttentionHeads)
-	m.KV["llama.attention.head_count_kv"] = uint32(params.KeyValHeads)
-	m.KV["llama.attention.layer_norm_rms_epsilon"] = float32(params.NormEPS)
-	m.KV["llama.rope.freq_base"] = float32(params.RopeFreqBase)
-	m.KV["general.file_type"] = uint32(1)
-	m.KV["tokenizer.ggml.model"] = "llama"
-
-	m.KV["tokenizer.ggml.tokens"] = vocab.Tokens
-	m.KV["tokenizer.ggml.scores"] = vocab.Scores
-	m.KV["tokenizer.ggml.token_type"] = vocab.Types
-
-	m.KV["tokenizer.ggml.bos_token_id"] = uint32(params.BoSTokenID)
-	m.KV["tokenizer.ggml.eos_token_id"] = uint32(params.EoSTokenID)
-	m.KV["tokenizer.ggml.unknown_token_id"] = uint32(0)
-	m.KV["tokenizer.ggml.add_bos_token"] = true
-	m.KV["tokenizer.ggml.add_eos_token"] = false
-
-	// llamacpp sets the chat template, however we don't need to set it since we pass it in through a layer
-	// m.KV["tokenizer.chat_template"] = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}" // XXX removeme
-
-	c.V3.NumTensor = uint64(len(tensors))
-	c.V3.NumKV = uint64(len(m.KV))
-
-	f, err := os.CreateTemp("", "ollama-gguf")
-	if err != nil {
-		return "", err
-	}
-	defer f.Close()
-
-	err = m.Encode(f)
-	if err != nil {
-		return "", err
-	}
-
-	return f.Name(), nil
-}
--- a/convert/sentencepiece/sentencepiece_model.pb.go
+++ b/convert/sentencepiece/sentencepiece_model.pb.go
--- a/convert/sentencepiece_model.proto
+++ b/convert/sentencepiece_model.proto
@@ -1,333 +0,0 @@
-// Copyright 2016 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.!
-
-syntax = "proto2";
-
-// TODO(taku): Needs to use LITE RUNTIME in OSS release.
-option optimize_for = LITE_RUNTIME;
-option go_package = "./sentencepiece";
-
-package sentencepiece;
-
-// TrainerSpec encodes a various parameters for SentencePiece training.
-// Next id: 55
-message TrainerSpec {
-  ///////////////////////////////////////////////////////////////////
-  // General parameters
-  //
-  // Input corpus files.
-  //  Trainer accepts the following two formats:
-  //  A) Monolingual: plain text, one sentence per line.
-  //  B) Bilingual:   TSV, source sentence <tab> target sentence
-  //  When bilingual data is passed, shared vocabulary model is built.
-  //  Note that the input file must be raw corpus, not a preprocessed corpus.
-  //  Trainer only loads the first `input_sentence_size` sentences specified
-  //  with this parameter.
-  repeated string input = 1;
-
-  // Input corpus format:
-  // "text": one-sentence-per-line text format (default)
-  // "tsv":  sentence <tab> freq
-  optional string input_format = 7;
-
-  // Output model file prefix.
-  // <model_prefix>.model and <model_prefix>.vocab are generated.
-  optional string model_prefix = 2;
-
-  // Model type. only have UNIGRAM now.
-  enum ModelType {
-    UNIGRAM = 1;  // Unigram language model with dynamic algorithm
-    BPE = 2;      // Byte Pair Encoding
-    WORD = 3;     // Delimitered by whitespace.
-    CHAR = 4;     // tokenizes into character sequence
-  }
-  optional ModelType model_type = 3 [default = UNIGRAM];
-
-  // Vocabulary size. 8k is the default size.
-  optional int32 vocab_size = 4 [default = 8000];
-
-  // List of the languages this model can accept.
-  // Since the model is language-agnostic, this field is used as a reference.
-  repeated string accept_language = 5;
-
-  // Size of self-test samples, which are encoded in the model file.
-  optional int32 self_test_sample_size = 6 [default = 0];
-
-  // Whether to use DP version of sentencepiece. Use it with TSV input format
-  // (requires precomputed word tab counts to work).
-  optional bool enable_differential_privacy = 50 [default = false];
-  // Set these parameters if you need DP version of sentencepiece.
-  // std of noise to add.
-  optional float differential_privacy_noise_level = 51 [default = 0.0];
-  // Clipping threshold to apply after adding noise. All the words with
-  // frequency less than this value are dropped.
-  optional uint64 differential_privacy_clipping_threshold = 52 [default = 0];
-
-  ///////////////////////////////////////////////////////////////////
-  // Training parameters.
-  //
-  // Uses characters which cover the corpus with the ratio of `chars_coverage`.
-  // This parameter determines the set of basic Alphabet of sentence piece.
-  // 1.0 - `chars_coverage` characters are treated as UNK.
-  // See also required_chars field.
-  optional float character_coverage = 10 [default = 0.9995];
-
-  // Maximum size of sentences the trainer loads from `input` parameter.
-  // Trainer simply loads the `input` files in sequence.
-  // It is better to shuffle the input corpus randomly.
-  optional uint64 input_sentence_size = 11 [default = 0];
-  optional bool shuffle_input_sentence = 19 [default = true];
-
-  // Maximum size of sentences to make seed sentence pieces.
-  // Extended suffix array is constructed to extract frequent
-  // sub-strings from the corpus. This uses 20N working space,
-  // where N is the size of corpus.
-  optional int32 mining_sentence_size = 12 [deprecated = true];
-
-  // Maximum size of sentences to train sentence pieces.
-  optional int32 training_sentence_size = 13 [deprecated = true];
-
-  // The size of seed sentencepieces.
-  // `seed_sentencepiece_size` must be larger than `vocab_size`.
-  optional int32 seed_sentencepiece_size = 14 [default = 1000000];
-
-  // In every EM sub-iterations, keeps top
-  // `shrinking_factor` * `current sentencepieces size` with respect to
-  // the loss of the sentence piece. This value should be smaller than 1.0.
-  optional float shrinking_factor = 15 [default = 0.75];
-
-  // The maximum sentence length in byte. The sentences with the length
-  // larger than `max_sentence_length` is simply ignored.
-  // Longer input tends to bring the following risks:
-  //  * Overflow during EM training (unigram language model only)
-  //  * Performance drop because of O(n log n) cost in BPE.
-  optional int32 max_sentence_length = 18 [default = 4192];
-
-  // Number of threads in the training.
-  optional int32 num_threads = 16 [default = 16];
-
-  // Number of EM sub iterations.
-  optional int32 num_sub_iterations = 17 [default = 2];
-
-  ///////////////////////////////////////////////////////////////////
-  // SentencePiece parameters which control the shapes of sentence piece.
-  //
-  // Maximum length of sentencepiece.
-  optional int32 max_sentencepiece_length = 20 [default = 16];
-
-  // Uses Unicode script to split sentence pieces.
-  // When `split_by_unicode_script` is true, we do not allow sentence piece to
-  // include multiple Unicode scripts, e.g. "F1" is not a valid piece.
-  // Exception: CJ characters (Hiragana/Katakana/Han) are all handled
-  // as one script type, since Japanese word can consist of multiple scripts.
-  // This exception is always applied regardless of the accept-language
-  // parameter.
-  optional bool split_by_unicode_script = 21 [default = true];
-
-  // When `split_by_number` is true, put a boundary between number and
-  // non-number transition. If we want to treat "F1" is one token, set this flag
-  // to be false.
-  optional bool split_by_number = 23 [default = true];
-
-  // Use a white space to split sentence pieces.
-  // When `split_by_whitespace` is false, we may have the piece containing
-  // a white space in the middle. e.g., "in_the".
-  optional bool split_by_whitespace = 22 [default = true];
-
-  // Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
-  // hello_. When `treat_whitespace_as_suffix` is true,
-  // NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
-  // of sentence.
-  optional bool treat_whitespace_as_suffix = 24 [default = false];
-
-  // Allows pieces that only contain whitespaces instead of appearing only as
-  // prefix or suffix of other pieces.
-  optional bool allow_whitespace_only_pieces = 26 [default = false];
-
-  // Split all digits (0-9) into separate pieces.
-  optional bool split_digits = 25 [default = false];
-
-  // Defines the pre-tokenization delimiter.
-  // When specified, no pieces crossing this delimiter is not included
-  // in the vocab. Then the delimiter string is virtually ignored
-  // during the training. This field can allows constraints on the vocabulary
-  // selection. Note that this field is available on unigram mode.
-  optional string pretokenization_delimiter = 53 [ default = ""];
-
-  ///////////////////////////////////////////////////////////////////
-  // Vocabulary management
-  //
-  // Defines control symbols used as an indicator to
-  // change the behavior of the decoder. <s> and </s> are pre-defined.
-  // We can use this field to encode various meta information,
-  // including language indicator in multilingual model.
-  // These symbols are not visible to users, but visible to
-  // the decoder. Note that when the input sentence contains control symbols,
-  // they are not treated as one token, but segmented into normal pieces.
-  // Control symbols must be inserted independently from the segmentation.
-  repeated string control_symbols = 30;
-
-  // Defines user defined symbols.
-  // These symbols are added with extremely high score
-  // so they are always treated as one unique symbol in any context.
-  // Typical usage of user_defined_symbols is placeholder for named entities.
-  repeated string user_defined_symbols = 31;
-
-  // Defines required characters. Each UTF8 character in this string is included
-  // in the character set regardless of character_coverage value. Unlike
-  // user_defined_symbols, these characters have scores based on the frequency
-  // on input sentences, and the model can form subwords using characters
-  // in this field.
-  optional string required_chars = 36;
-
-  // Decomposes unknown pieces into UTF-8 bytes.
-  optional bool byte_fallback = 35 [default = false];
-
-  // When creating the vocabulary file, defines whether or not to additionally
-  // output the score for each piece.
-  optional bool vocabulary_output_piece_score = 32 [default = true];
-
-  // `vocab_size` is treated as hard limit. Crash if
-  // the model can not produce the vocab of size `vocab_size`,
-  // When `hard_vocab_limit` is false, vocab_size is treated
-  // as soft limit. Note that when model_type=char,
-  // always assumes hard_vocab_limit = false.
-  optional bool hard_vocab_limit = 33 [default = true];
-
-  // use all symbols for vocab extraction. This flag is valid
-  // if model type is either CHAR or WORD
-  optional bool use_all_vocab = 34 [default = false];
-
-  ///////////////////////////////////////////////////////////////////
-  // Reserved special meta tokens.
-  // * -1 is not used.
-  // * unk_id must not be -1.
-  // Id must starts with 0 and be contigous.
-  optional int32 unk_id = 40 [default = 0];   // <unk>
-  optional int32 bos_id = 41 [default = 1];   // <s>
-  optional int32 eos_id = 42 [default = 2];   // </s>
-  optional int32 pad_id = 43 [default = -1];  // <pad> (padding)
-  optional string unk_piece = 45 [default = "<unk>"];
-  optional string bos_piece = 46 [default = "<s>"];
-  optional string eos_piece = 47 [default = "</s>"];
-  optional string pad_piece = 48 [default = "<pad>"];
-
-  // Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
-  // since this character can be useful both for user and
-  // developer. We can easily figure out that <unk> is emitted.
-  optional string unk_surface = 44 [default = " \xE2\x81\x87 "];
-
-  // Increase bit depth to allow unigram model training on large
-  // (>10M sentences) corpora. A Side-effect of enabling this flag
-  // is increased memory usage.
-  optional bool train_extremely_large_corpus = 49 [default = false];
-
- // Path to a seed sentencepieces file, with one tab-separated
-  // seed sentencepiece <tab> frequency per line.
-  optional string seed_sentencepieces_file = 54 [default = ""];
-
-  // Customized extensions: the range of field numbers
-  // are open to third-party extensions.
-  extensions 200 to max;
-}
-
-// NormalizerSpec encodes a various parameters for string normalizaiton
-message NormalizerSpec {
-  // name of normalization rule.
-  optional string name = 1;
-
-  // Pre-compiled normalization rule created by
-  // Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
-  // Usually this field is set by Builder::GetNormalizerSpec() method.
-  optional bytes precompiled_charsmap = 2;
-
-  // Adds dummy whitespace at the beginning of text in order to
-  // treat "world" in "world" and "hello world" in the same way.
-  optional bool add_dummy_prefix = 3 [default = true];
-
-  // Removes leading, trailing, and duplicate internal whitespace.
-  optional bool remove_extra_whitespaces = 4 [default = true];
-
-  // Replaces whitespace with meta symbol.
-  // This field must be true to train sentence piece model.
-  optional bool escape_whitespaces = 5 [default = true];
-
-  // Custom normalization rule file in TSV format.
-  // https://github.com/google/sentencepiece/blob/master/doc/normalization.md
-  // This field is only used in SentencePieceTrainer::Train() method, which
-  // compiles the rule into the binary rule stored in `precompiled_charsmap`.
-  optional string normalization_rule_tsv = 6;
-
-  // Customized extensions: the range of field numbers
-  // are open to third-party extensions.
-  extensions 200 to max;
-}
-
-// Proto to store samples for self-testing.
-message SelfTestData {
-  message Sample {
-    optional string input = 1;
-    optional string expected = 2;
-  }
-  repeated Sample samples = 1;
-
-  // Customized extensions: the range of field numbers
-  // are open to third-party extensions.
-  extensions 200 to max;
-}
-
-// ModelProto stores model parameters.
-// SentencePieceProcessor is supposed to be self-contained.
-// All settings/parameters which may change the behavior must be encoded
-// in ModelProto.
-message ModelProto {
-  message SentencePiece {
-    enum Type {
-      NORMAL = 1;        // normal symbol
-      UNKNOWN = 2;       // unknown symbol. only <unk> for now.
-      CONTROL = 3;       // control symbols. </s>, <s>, <2ja> etc.
-      USER_DEFINED = 4;  // user defined symbols.
-                         // Typical usage of USER_DEFINED symbol
-                         // is placeholder.
-      BYTE = 6;          // byte symbols. Used when `byte_fallback` is true.
-      UNUSED = 5;        // this piece is not used.
-    }
-    optional string piece = 1;  // piece must not be empty.
-    optional float score = 2;
-    optional Type type = 3 [default = NORMAL];
-
-    // Customized extensions: the range of field numbers
-    // are open to third-party extensions.
-    extensions 200 to max;
-  }
-
-  // Sentence pieces with scores.
-  repeated SentencePiece pieces = 1;
-
-  // Spec used to generate this model file.
-  optional TrainerSpec trainer_spec = 2;
-
-  // Spec for text normalization.
-  optional NormalizerSpec normalizer_spec = 3;
-
-  // Stores sample input and its expected segmentation to verify the model.
-  optional SelfTestData self_test_data = 4;
-
-  // Spec for text de-normalization.
-  optional NormalizerSpec denormalizer_spec = 5;
-
-  // Customized extensions: the range of field numbers
-  // are open to third-party extensions.
-  extensions 200 to max;
-}
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,21 +1,6 @@
 # Documentation

-### Getting Started
-* [Quickstart](../README.md#quickstart)
-* [Examples](../examples)
-* [Importing models](./import.md)
-* [Linux Documentation](./linux.md)
-* [Windows Documentation](./windows.md)
-* [Docker Documentation](https://hub.docker.com/r/ollama/ollama)
-
-### Reference
-
-* [API Reference](./api.md)
-* [Modelfile Reference](./modelfile.md)
-* [OpenAI Compatibility](./openai.md)
-
-### Resources
-
-* [Troubleshooting Guide](./troubleshooting.md)
-* [FAQ](./faq.md)
-* [Development guide](./development.md)
+- [Modelfile](./modelfile.md)
+- [How to develop Ollama](./development.md)
+- [API](./api.md)
+- [Tutorials](./tutorials.md)
--- a/docs/api.md
+++ b/docs/api.md
--- a/Show More
+++ b/Show More