Compare commits
12 Commits
dhiltgen/r
...
royh-opena
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
568416ba17 | ||
|
|
80cba42ab2 | ||
|
|
6477a7aca4 | ||
|
|
51214ddef5 | ||
|
|
b950d749a9 | ||
|
|
3702ed7532 | ||
|
|
6266603b17 | ||
|
|
2644c4e682 | ||
|
|
04cde43b2a | ||
|
|
105e36765d | ||
|
|
fa7be5aab4 | ||
|
|
02169f3e60 |
@@ -3,7 +3,7 @@ ollama
|
|||||||
app
|
app
|
||||||
macapp
|
macapp
|
||||||
dist
|
dist
|
||||||
|
llm/llama.cpp
|
||||||
.env
|
.env
|
||||||
.cache
|
.cache
|
||||||
test_data
|
test_data
|
||||||
llama/build
|
|
||||||
|
|||||||
12
.gitattributes
vendored
12
.gitattributes
vendored
@@ -1,11 +1 @@
|
|||||||
llama/**/*.cpp linguist-vendored
|
llm/ext_server/* linguist-vendored
|
||||||
llama/**/*.hpp linguist-vendored
|
|
||||||
llama/**/*.h linguist-vendored
|
|
||||||
llama/**/*.c linguist-vendored
|
|
||||||
llama/**/*.cu linguist-vendored
|
|
||||||
llama/**/*.cuh linguist-vendored
|
|
||||||
llama/**/*.m linguist-vendored
|
|
||||||
llama/**/*.metal linguist-vendored
|
|
||||||
|
|
||||||
* text=auto
|
|
||||||
*.go text eol=lf
|
|
||||||
|
|||||||
445
.github/workflows/release.yaml
vendored
445
.github/workflows/release.yaml
vendored
@@ -48,8 +48,8 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: dist-darwin
|
name: dist-darwin
|
||||||
path: |
|
path: |
|
||||||
dist/Ollama-darwin.zip
|
dist/*arwin*
|
||||||
dist/ollama-darwin
|
!dist/*-cov
|
||||||
|
|
||||||
# Windows builds take a long time to both install the dependencies and build, so parallelize
|
# Windows builds take a long time to both install the dependencies and build, so parallelize
|
||||||
# CPU generation step
|
# CPU generation step
|
||||||
@@ -92,19 +92,18 @@ jobs:
|
|||||||
- run: go get ./...
|
- run: go get ./...
|
||||||
- run: |
|
- run: |
|
||||||
$gopath=(get-command go).source | split-path -parent
|
$gopath=(get-command go).source | split-path -parent
|
||||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
|
||||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
cd $env:GITHUB_WORKSPACE
|
||||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||||
$env:PATH="$gopath;$env:PATH"
|
$env:PATH="$gopath;$env:PATH"
|
||||||
$cores = (Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores
|
go generate -x ./...
|
||||||
make -j $cores
|
name: go generate
|
||||||
name: make
|
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: generate-windows-cpu
|
name: generate-windows-cpu
|
||||||
path: |
|
path: |
|
||||||
build/**/*
|
llm/build/**/bin/*
|
||||||
build/**/*.a
|
llm/build/**/*.a
|
||||||
dist/windows-amd64/**
|
dist/windows-amd64/**
|
||||||
|
|
||||||
# ROCm generation step
|
# ROCm generation step
|
||||||
@@ -158,33 +157,36 @@ jobs:
|
|||||||
- run: go get ./...
|
- run: go get ./...
|
||||||
- run: |
|
- run: |
|
||||||
$gopath=(get-command go).source | split-path -parent
|
$gopath=(get-command go).source | split-path -parent
|
||||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
|
||||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
cd $env:GITHUB_WORKSPACE
|
||||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||||
$env:PATH="$gopath;$env:PATH"
|
$env:PATH="$gopath;$env:PATH"
|
||||||
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
||||||
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
||||||
$cores = (Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores
|
go generate -x ./...
|
||||||
make -j $cores
|
name: go generate
|
||||||
name: make
|
- name: 'gather rocm dependencies'
|
||||||
|
run: |
|
||||||
|
$HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
||||||
|
md "dist\deps\bin\rocblas\library"
|
||||||
|
cp "${HIP_PATH}\bin\hipblas.dll" "dist\deps\bin\"
|
||||||
|
cp "${HIP_PATH}\bin\rocblas.dll" "dist\deps\bin\"
|
||||||
|
cp "${HIP_PATH}\bin\rocblas\library\*" "dist\deps\bin\rocblas\library\"
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: generate-windows-rocm
|
name: generate-windows-rocm
|
||||||
path: |
|
path: |
|
||||||
build/**/*
|
llm/build/**/bin/*
|
||||||
dist/windows-amd64/**
|
dist/windows-amd64/**
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: windows-rocm-deps
|
||||||
|
path: dist/deps/*
|
||||||
|
|
||||||
# CUDA generation step
|
# CUDA generation step
|
||||||
generate-windows-cuda:
|
generate-windows-cuda:
|
||||||
environment: release
|
environment: release
|
||||||
runs-on: windows
|
runs-on: windows
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
cuda:
|
|
||||||
- version: "11"
|
|
||||||
url: 'https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe'
|
|
||||||
- version: "12"
|
|
||||||
url: 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe'
|
|
||||||
env:
|
env:
|
||||||
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
|
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
|
||||||
steps:
|
steps:
|
||||||
@@ -218,11 +220,11 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
go-version-file: go.mod
|
go-version-file: go.mod
|
||||||
cache: true
|
cache: true
|
||||||
- name: 'Install CUDA ${{ matrix.cuda.version }}'
|
- name: 'Install CUDA'
|
||||||
run: |
|
run: |
|
||||||
$ErrorActionPreference = "Stop"
|
$ErrorActionPreference = "Stop"
|
||||||
write-host "downloading CUDA Installer"
|
write-host "downloading CUDA Installer"
|
||||||
Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
|
Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
|
||||||
write-host "Installing CUDA"
|
write-host "Installing CUDA"
|
||||||
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
|
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
|
||||||
write-host "Completed CUDA"
|
write-host "Completed CUDA"
|
||||||
@@ -235,175 +237,35 @@ jobs:
|
|||||||
- name: 'Verify CUDA'
|
- name: 'Verify CUDA'
|
||||||
run: nvcc -V
|
run: nvcc -V
|
||||||
- run: go get ./...
|
- run: go get ./...
|
||||||
- name: make
|
- name: go generate
|
||||||
run: |
|
run: |
|
||||||
$gopath=(get-command go).source | split-path -parent
|
$gopath=(get-command go).source | split-path -parent
|
||||||
$cudabin=(get-command nvcc).source | split-path
|
$cudabin=(get-command nvcc).source | split-path
|
||||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
|
||||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
cd $env:GITHUB_WORKSPACE
|
||||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||||
$env:PATH="$gopath;$cudabin;$env:PATH"
|
$env:PATH="$gopath;$cudabin;$env:PATH"
|
||||||
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
||||||
$cores = (Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores
|
go generate -x ./...
|
||||||
make -j $cores
|
- name: 'gather cuda dependencies'
|
||||||
|
run: |
|
||||||
|
$NVIDIA_DIR=(resolve-path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*\bin\')[0]
|
||||||
|
md "dist\deps"
|
||||||
|
cp "${NVIDIA_DIR}\cudart64_*.dll" "dist\deps\"
|
||||||
|
cp "${NVIDIA_DIR}\cublas64_*.dll" "dist\deps\"
|
||||||
|
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: generate-windows-cuda-${{ matrix.cuda.version }}
|
name: generate-windows-cuda
|
||||||
path: |
|
path: |
|
||||||
build/**/*
|
llm/build/**/bin/*
|
||||||
dist/windows-amd64/**
|
dist/windows-amd64/**
|
||||||
|
|
||||||
# windows arm64 generate, go build, and zip file (no installer)
|
|
||||||
# Output of this build is aggregated into the final x86 build
|
|
||||||
# for a unified windows installer
|
|
||||||
windows-arm64:
|
|
||||||
runs-on: windows-arm64
|
|
||||||
environment: release
|
|
||||||
env:
|
|
||||||
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
|
|
||||||
steps:
|
|
||||||
# The current Windows arm64 beta image has effectively zero dev tools installed...
|
|
||||||
- name: Install git and gzip
|
|
||||||
run: |
|
|
||||||
Set-ExecutionPolicy Bypass -Scope Process -Force
|
|
||||||
[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072
|
|
||||||
iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
|
|
||||||
choco install -y --no-progress git gzip
|
|
||||||
echo "C:\Program Files\Git\cmd" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
|
||||||
echo "C:\ProgramData\chocolatey\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
|
||||||
# pacman is buggy on win arm64, so we avoid using it, but rely on the binary artifacts
|
|
||||||
# we download the sfx (7zip bundle) which isn't fully set up, but the binaries we need to build work
|
|
||||||
- name: Install msys2 x64
|
|
||||||
run: |
|
|
||||||
$url="https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-base-x86_64-20240727.sfx.exe"
|
|
||||||
write-host "Downloading MSYS2"
|
|
||||||
Invoke-WebRequest -Uri "$url" -outfile "${env:RUNNER_TEMP}\msys2.exe"
|
|
||||||
write-host "Installing msys2"
|
|
||||||
Start-Process "${env:RUNNER_TEMP}\msys2.exe" -ArgumentList @(
|
|
||||||
'-y', '-oC:\'
|
|
||||||
) -NoNewWindow -Wait
|
|
||||||
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
|
||||||
# since pacman isn't reliable, we just download the tar file and extract directly
|
|
||||||
- name: Downloading and extracting msys2 make tar file
|
|
||||||
run: |
|
|
||||||
$url="https://mirror.msys2.org/msys/x86_64/make-4.4.1-2-x86_64.pkg.tar.zst"
|
|
||||||
write-host "Downloading make"
|
|
||||||
Invoke-WebRequest -Uri "$url" -outfile c:\msys64\make.tar.zst
|
|
||||||
cd c:\msys64; tar -xf make.tar.zst
|
|
||||||
rm c:\msys64\make.tar.zst
|
|
||||||
- name: Verify Make works properly
|
|
||||||
run: |
|
|
||||||
echo $env:PATH
|
|
||||||
make --version
|
|
||||||
- name: Install Visual Studio 2022
|
|
||||||
run: |
|
|
||||||
$components = @(
|
|
||||||
"Microsoft.VisualStudio.Component.CoreEditor",
|
|
||||||
"Microsoft.VisualStudio.Workload.CoreEditor",
|
|
||||||
"Microsoft.VisualStudio.Component.Roslyn.Compiler",
|
|
||||||
"Microsoft.Component.MSBuild",
|
|
||||||
"Microsoft.VisualStudio.Component.TextTemplating",
|
|
||||||
"Microsoft.VisualStudio.Component.Debugger.JustInTime",
|
|
||||||
"Microsoft.VisualStudio.Component.VC.CoreIde",
|
|
||||||
"Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
|
|
||||||
"Microsoft.VisualStudio.Component.Windows11SDK.22621",
|
|
||||||
"Microsoft.VisualStudio.Component.VC.Tools.ARM64EC",
|
|
||||||
"Microsoft.VisualStudio.Component.VC.Tools.ARM64",
|
|
||||||
"Microsoft.VisualStudio.Component.VC.ATL",
|
|
||||||
"Microsoft.VisualStudio.Component.VC.ATL.ARM64",
|
|
||||||
"Microsoft.VisualStudio.Component.Graphics",
|
|
||||||
"Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
|
|
||||||
"Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core",
|
|
||||||
"Microsoft.VisualStudio.Component.Windows11Sdk.WindowsPerformanceToolkit",
|
|
||||||
"Microsoft.VisualStudio.Component.CppBuildInsights",
|
|
||||||
"Microsoft.VisualStudio.Component.VC.DiagnosticTools",
|
|
||||||
"Microsoft.VisualStudio.ComponentGroup.WebToolsExtensions.CMake",
|
|
||||||
"Microsoft.VisualStudio.Component.VC.CMake.Project",
|
|
||||||
"Microsoft.VisualStudio.Component.VC.ASAN",
|
|
||||||
"Microsoft.VisualStudio.Component.Vcpkg",
|
|
||||||
"Microsoft.VisualStudio.Workload.NativeDesktop"
|
|
||||||
)
|
|
||||||
$config = @{
|
|
||||||
"version" = "1.0"
|
|
||||||
"components" = $components
|
|
||||||
"extensions" = @()
|
|
||||||
}
|
|
||||||
$configPath = "${env:RUNNER_TEMP}\vsconfig"
|
|
||||||
$config | ConvertTo-Json | Out-File -FilePath $configPath
|
|
||||||
$bootstrapperFilePath = "${env:RUNNER_TEMP}\vs_community.exe"
|
|
||||||
write-host "Downloading Visual Studio 2022"
|
|
||||||
Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_community.exe" -outfile $bootstrapperFilePath
|
|
||||||
$bootstrapperArgumentList = ('/c', $bootstrapperFilePath, '--config', $configPath, '--quiet', '--wait' )
|
|
||||||
write-host "Installing Visual Studio 2022"
|
|
||||||
$process = Start-Process -FilePath cmd.exe -ArgumentList $bootstrapperArgumentList -Wait -PassThru
|
|
||||||
$exitCode = $process.ExitCode
|
|
||||||
write-host $exitCode
|
|
||||||
# pacman in mingw/msys2 is ~broken on windows arm right now - hangs consistently during attempts to install
|
|
||||||
# so we'll use this alternative GCC binary
|
|
||||||
- name: Install llvm-mingw GCC
|
|
||||||
run: |
|
|
||||||
$gcc_url="https://github.com/mstorsjo/llvm-mingw/releases/download/20240619/llvm-mingw-20240619-ucrt-aarch64.zip"
|
|
||||||
write-host "Downloading llvm-mingw"
|
|
||||||
Invoke-WebRequest -Uri "${gcc_url}" -OutFile "${env:RUNNER_TEMP}\gcc.zip"
|
|
||||||
write-host "Unpacking llvm-mingw"
|
|
||||||
expand-archive -path "${env:RUNNER_TEMP}\gcc.zip" -destinationpath "c:\"
|
|
||||||
mv c:\llvm-mingw-* c:\llvm-mingw
|
|
||||||
echo "c:\llvm-mingw\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
|
||||||
- name: Verify GCC
|
|
||||||
run: |
|
|
||||||
echo $env:PATH
|
|
||||||
gcc --version
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- name: Set Version
|
|
||||||
run: |
|
|
||||||
$ver=${env:GITHUB_REF_NAME}.trim("v")
|
|
||||||
echo VERSION=$ver | Out-File -FilePath ${env:GITHUB_ENV} -Encoding utf8 -Append
|
|
||||||
- uses: 'google-github-actions/auth@v2'
|
|
||||||
with:
|
|
||||||
project_id: 'ollama'
|
|
||||||
credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}'
|
|
||||||
- run: echo "${{ vars.OLLAMA_CERT }}" | Out-File -FilePath ollama_inc.crt -Encoding utf8
|
|
||||||
- name: install Windows SDK 8.1 to get signtool
|
|
||||||
run: |
|
|
||||||
$ErrorActionPreference = "Stop"
|
|
||||||
write-host "downloading SDK"
|
|
||||||
Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe"
|
|
||||||
Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait
|
|
||||||
write-host "Win SDK 8.1 installed"
|
|
||||||
gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe'
|
|
||||||
- name: install signing plugin
|
|
||||||
run: |
|
|
||||||
$ErrorActionPreference = "Stop"
|
|
||||||
write-host "downloading plugin"
|
|
||||||
Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip"
|
|
||||||
Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\
|
|
||||||
write-host "Installing plugin"
|
|
||||||
& "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
|
|
||||||
write-host "plugin installed"
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version-file: go.mod
|
|
||||||
cache: true
|
|
||||||
- run: go get ./...
|
|
||||||
- run: |
|
|
||||||
$gopath=(get-command go).source | split-path -parent
|
|
||||||
$gccpath=(get-command gcc).source | split-path -parent
|
|
||||||
import-module 'C:\Program Files\Microsoft Visual Studio\2022\Community\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
|
||||||
Enter-VsDevShell -Arch arm64 -vsinstallpath 'C:\Program Files\Microsoft Visual Studio\2022\Community' -skipautomaticlocation
|
|
||||||
$env:PATH="$gopath;$gccpath;$env:PATH"
|
|
||||||
echo $env:PATH
|
|
||||||
$env:ARCH="arm64"
|
|
||||||
.\scripts\build_windows.ps1 buildOllama buildApp gatherDependencies distZip
|
|
||||||
name: 'Windows Build'
|
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: windows-arm64
|
name: windows-cuda-deps
|
||||||
path: |
|
path: dist/deps/*
|
||||||
dist/windows-arm64/**
|
|
||||||
dist/windows-arm64-app.exe
|
|
||||||
dist/ollama-windows-arm64.zip
|
|
||||||
|
|
||||||
# Import the prior generation steps plus the full arm64 build, and build the final windows assets
|
# Import the prior generation steps and build the final windows assets
|
||||||
build-windows:
|
build-windows:
|
||||||
environment: release
|
environment: release
|
||||||
runs-on: windows
|
runs-on: windows
|
||||||
@@ -411,7 +273,6 @@ jobs:
|
|||||||
- generate-windows-cuda
|
- generate-windows-cuda
|
||||||
- generate-windows-rocm
|
- generate-windows-rocm
|
||||||
- generate-windows-cpu
|
- generate-windows-cpu
|
||||||
- windows-arm64
|
|
||||||
env:
|
env:
|
||||||
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
|
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
|
||||||
steps:
|
steps:
|
||||||
@@ -453,26 +314,24 @@ jobs:
|
|||||||
name: generate-windows-cpu
|
name: generate-windows-cpu
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: generate-windows-cuda-11
|
name: generate-windows-cuda
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: generate-windows-cuda-12
|
name: windows-cuda-deps
|
||||||
|
- uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: windows-rocm-deps
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: generate-windows-rocm
|
name: generate-windows-rocm
|
||||||
- uses: actions/download-artifact@v4
|
- run: dir llm/build
|
||||||
with:
|
|
||||||
name: windows-arm64
|
|
||||||
path: dist
|
|
||||||
- run: dir build
|
|
||||||
- run: |
|
- run: |
|
||||||
$gopath=(get-command go).source | split-path -parent
|
$gopath=(get-command go).source | split-path -parent
|
||||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
|
||||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
cd $env:GITHUB_WORKSPACE
|
||||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||||
$env:PATH="$gopath;$env:PATH"
|
$env:PATH="$gopath;$env:PATH"
|
||||||
$env:OLLAMA_SKIP_GENERATE="1"
|
$env:OLLAMA_SKIP_GENERATE="1"
|
||||||
$env:ARCH="amd64"
|
|
||||||
& .\scripts\build_windows.ps1
|
& .\scripts\build_windows.ps1
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
@@ -486,7 +345,9 @@ jobs:
|
|||||||
environment: release
|
environment: release
|
||||||
runs-on: linux
|
runs-on: linux
|
||||||
env:
|
env:
|
||||||
PLATFORM: linux/amd64
|
OLLAMA_SKIP_MANIFEST_CREATE: '1'
|
||||||
|
BUILD_ARCH: amd64
|
||||||
|
PUSH: '1'
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@@ -494,8 +355,15 @@ jobs:
|
|||||||
- name: Set Version
|
- name: Set Version
|
||||||
shell: bash
|
shell: bash
|
||||||
run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
|
run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
|
||||||
|
- name: Login to Docker Hub
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
username: ${{ vars.DOCKER_USER }}
|
||||||
|
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
||||||
- run: |
|
- run: |
|
||||||
./scripts/build_linux.sh
|
./scripts/build_linux.sh
|
||||||
|
./scripts/build_docker.sh
|
||||||
|
mv dist/deps/* dist/
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: dist-linux-amd64
|
name: dist-linux-amd64
|
||||||
@@ -509,7 +377,9 @@ jobs:
|
|||||||
environment: release
|
environment: release
|
||||||
runs-on: linux-arm64
|
runs-on: linux-arm64
|
||||||
env:
|
env:
|
||||||
PLATFORM: linux/arm64
|
OLLAMA_SKIP_MANIFEST_CREATE: '1'
|
||||||
|
BUILD_ARCH: arm64
|
||||||
|
PUSH: '1'
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@@ -538,8 +408,14 @@ jobs:
|
|||||||
sudo usermod -aG docker $USER
|
sudo usermod -aG docker $USER
|
||||||
sudo apt-get install acl
|
sudo apt-get install acl
|
||||||
sudo setfacl --modify user:$USER:rw /var/run/docker.sock
|
sudo setfacl --modify user:$USER:rw /var/run/docker.sock
|
||||||
|
- name: Login to Docker Hub
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
username: ${{ vars.DOCKER_USER }}
|
||||||
|
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
||||||
- run: |
|
- run: |
|
||||||
./scripts/build_linux.sh
|
./scripts/build_linux.sh
|
||||||
|
./scripts/build_docker.sh
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: dist-linux-arm64
|
name: dist-linux-arm64
|
||||||
@@ -547,178 +423,6 @@ jobs:
|
|||||||
dist/*linux*
|
dist/*linux*
|
||||||
!dist/*-cov
|
!dist/*-cov
|
||||||
|
|
||||||
# Container image build
|
|
||||||
build-container-image:
|
|
||||||
environment: release
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
runner:
|
|
||||||
- linux
|
|
||||||
- linux-arm64
|
|
||||||
runs-on: ${{ matrix.runner }}
|
|
||||||
env:
|
|
||||||
FINAL_IMAGE_REPO: ollama/ollama
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
- name: 'Install Docker'
|
|
||||||
if: ${{ startsWith(matrix.runner, 'linux-arm64') }}
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y ca-certificates curl
|
|
||||||
sudo install -m 0755 -d /etc/apt/keyrings
|
|
||||||
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
|
|
||||||
sudo chmod a+r /etc/apt/keyrings/docker.asc
|
|
||||||
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
|
|
||||||
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
|
|
||||||
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
|
|
||||||
sudo usermod -aG docker $USER
|
|
||||||
sudo apt-get install acl
|
|
||||||
sudo setfacl --modify user:$USER:rw /var/run/docker.sock
|
|
||||||
- name: Docker meta
|
|
||||||
id: meta
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
|
||||||
images: ${{ env.FINAL_IMAGE_REPO }}
|
|
||||||
flavor: |
|
|
||||||
latest=false
|
|
||||||
tags: |
|
|
||||||
type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
|
|
||||||
type=semver,pattern={{version}}
|
|
||||||
- name: Set Version
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
machine=$(uname -m)
|
|
||||||
case ${machine} in
|
|
||||||
x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
|
|
||||||
aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
|
|
||||||
esac >>$GITHUB_ENV
|
|
||||||
echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
- name: Login to Docker Hub
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
username: ${{ vars.DOCKER_USER }}
|
|
||||||
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
|
||||||
- name: Build and push by digest
|
|
||||||
id: build
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
with:
|
|
||||||
context: "."
|
|
||||||
platforms: linux/${{ env.ARCH }}
|
|
||||||
build-args: |
|
|
||||||
GOFLAGS
|
|
||||||
outputs: type=image,name=${{ env.FINAL_IMAGE_REPO }},push-by-digest=true,name-canonical=true,push=true
|
|
||||||
- name: Export digest
|
|
||||||
run: |
|
|
||||||
mkdir -p /tmp/digests
|
|
||||||
digest="${{ steps.build.outputs.digest }}"
|
|
||||||
touch "/tmp/digests/${digest#sha256:}"
|
|
||||||
- name: Upload digest
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: digests-${{ env.PLATFORM_PAIR }}
|
|
||||||
path: /tmp/digests/*
|
|
||||||
if-no-files-found: error
|
|
||||||
retention-days: 1
|
|
||||||
merge:
|
|
||||||
environment: release
|
|
||||||
runs-on: linux
|
|
||||||
needs:
|
|
||||||
- build-container-image
|
|
||||||
env:
|
|
||||||
FINAL_IMAGE_REPO: ollama/ollama
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
- name: Download digests
|
|
||||||
uses: actions/download-artifact@v4
|
|
||||||
with:
|
|
||||||
path: /tmp/digests
|
|
||||||
pattern: digests-*
|
|
||||||
merge-multiple: true
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
- name: Docker meta
|
|
||||||
id: meta
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
|
||||||
images: ${{ env.FINAL_IMAGE_REPO }}
|
|
||||||
flavor: |
|
|
||||||
latest=false
|
|
||||||
tags: |
|
|
||||||
type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
|
|
||||||
type=semver,pattern={{version}}
|
|
||||||
- name: Set Version
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
machine=$(uname -m)
|
|
||||||
case ${machine} in
|
|
||||||
x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
|
|
||||||
aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
|
|
||||||
esac >>$GITHUB_ENV
|
|
||||||
echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
|
|
||||||
- name: Login to Docker Hub
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
username: ${{ vars.DOCKER_USER }}
|
|
||||||
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
|
||||||
- name: Create manifest list and push
|
|
||||||
working-directory: /tmp/digests
|
|
||||||
run: |
|
|
||||||
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
|
|
||||||
$(printf '${{ env.FINAL_IMAGE_REPO }}@sha256:%s ' *)
|
|
||||||
- name: Inspect image
|
|
||||||
run: |
|
|
||||||
docker buildx imagetools inspect ${{ env.FINAL_IMAGE_REPO }}:${{ steps.meta.outputs.version }}
|
|
||||||
build-container-image-rocm:
|
|
||||||
environment: release
|
|
||||||
runs-on: linux
|
|
||||||
env:
|
|
||||||
FINAL_IMAGE_REPO: ollama/ollama
|
|
||||||
ARCH: amd64
|
|
||||||
PLATFORM_PAIR: linux-amd64
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
- name: Docker meta
|
|
||||||
id: meta
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
|
||||||
images: ${{ env.FINAL_IMAGE_REPO }}
|
|
||||||
flavor: |
|
|
||||||
latest=false
|
|
||||||
tags: |
|
|
||||||
type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
|
|
||||||
type=semver,pattern={{version}}
|
|
||||||
- name: Set Version
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
- name: Login to Docker Hub
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
username: ${{ vars.DOCKER_USER }}
|
|
||||||
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
|
||||||
- name: Build and push by digest
|
|
||||||
id: build
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
with:
|
|
||||||
context: "."
|
|
||||||
target: runtime-rocm
|
|
||||||
build-args: |
|
|
||||||
GOFLAGS
|
|
||||||
tags: ${{ env.FINAL_IMAGE_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION}}-rocm
|
|
||||||
push: true
|
|
||||||
|
|
||||||
# Aggregate all the assets and ship a release
|
# Aggregate all the assets and ship a release
|
||||||
release:
|
release:
|
||||||
needs:
|
needs:
|
||||||
@@ -731,6 +435,8 @@ jobs:
|
|||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
env:
|
env:
|
||||||
|
OLLAMA_SKIP_IMAGE_BUILD: '1'
|
||||||
|
PUSH: '1'
|
||||||
GH_TOKEN: ${{ github.token }}
|
GH_TOKEN: ${{ github.token }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
@@ -739,6 +445,12 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
|
echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
|
||||||
echo "RELEASE_VERSION=$(echo ${GITHUB_REF_NAME} | cut -f1 -d-)" >> $GITHUB_ENV
|
echo "RELEASE_VERSION=$(echo ${GITHUB_REF_NAME} | cut -f1 -d-)" >> $GITHUB_ENV
|
||||||
|
- name: Login to Docker Hub
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
username: ${{ vars.DOCKER_USER }}
|
||||||
|
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
||||||
|
- run: ./scripts/build_docker.sh
|
||||||
- name: Retrieve built artifact
|
- name: Retrieve built artifact
|
||||||
uses: actions/download-artifact@v4
|
uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
@@ -747,8 +459,7 @@ jobs:
|
|||||||
merge-multiple: true
|
merge-multiple: true
|
||||||
- run: |
|
- run: |
|
||||||
ls -lh dist/
|
ls -lh dist/
|
||||||
(cd dist; find . -type f | xargs sha256sum > ../sha256sum.txt)
|
(cd dist; sha256sum * > sha256sum.txt)
|
||||||
mv sha256sum.txt dist/
|
|
||||||
cat dist/sha256sum.txt
|
cat dist/sha256sum.txt
|
||||||
- name: Create or update Release
|
- name: Create or update Release
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
198
.github/workflows/test.yaml
vendored
198
.github/workflows/test.yaml
vendored
@@ -21,7 +21,9 @@ jobs:
|
|||||||
changes:
|
changes:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
outputs:
|
||||||
RUNNERS: ${{ steps.changes.outputs.RUNNERS }}
|
GENERATE: ${{ steps.changes.outputs.GENERATE }}
|
||||||
|
GENERATE_CUDA: ${{ steps.changes.outputs.GENERATE_CUDA }}
|
||||||
|
GENERATE_ROCM: ${{ steps.changes.outputs.GENERATE_ROCM }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@@ -36,12 +38,58 @@ jobs:
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
echo RUNNERS=$(changed 'llama/**')
|
echo GENERATE=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
|
||||||
|
echo GENERATE_CUDA=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
|
||||||
|
echo GENERATE_ROCM=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
|
||||||
} >>$GITHUB_OUTPUT
|
} >>$GITHUB_OUTPUT
|
||||||
|
|
||||||
runners-linux-cuda:
|
generate:
|
||||||
needs: [changes]
|
needs: [changes]
|
||||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
if: ${{ needs.changes.outputs.GENERATE == 'True' }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||||
|
arch: [amd64, arm64]
|
||||||
|
exclude:
|
||||||
|
- os: ubuntu-latest
|
||||||
|
arch: arm64
|
||||||
|
- os: windows-2019
|
||||||
|
arch: arm64
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
env:
|
||||||
|
GOARCH: ${{ matrix.arch }}
|
||||||
|
CGO_ENABLED: '1'
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version-file: go.mod
|
||||||
|
cache: true
|
||||||
|
- run: go get ./...
|
||||||
|
- run: |
|
||||||
|
$gopath=(get-command go).source | split-path -parent
|
||||||
|
$gccpath=(get-command gcc).source | split-path -parent
|
||||||
|
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
|
||||||
|
cd $env:GITHUB_WORKSPACE
|
||||||
|
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||||
|
$env:PATH="$gopath;$gccpath;$env:PATH"
|
||||||
|
echo $env:PATH
|
||||||
|
go generate -x ./...
|
||||||
|
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||||
|
name: 'Windows Go Generate'
|
||||||
|
- run: go generate -x ./...
|
||||||
|
if: ${{ ! startsWith(matrix.os, 'windows-') }}
|
||||||
|
name: 'Unix Go Generate'
|
||||||
|
- run: go build .
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
|
||||||
|
path: |
|
||||||
|
llm/build/**/bin/*
|
||||||
|
llm/build/**/*.a
|
||||||
|
generate-cuda:
|
||||||
|
needs: [changes]
|
||||||
|
if: ${{ needs.changes.outputs.GENERATE_CUDA == 'True' }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
cuda-version:
|
cuda-version:
|
||||||
@@ -51,6 +99,8 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- run: |
|
- run: |
|
||||||
apt-get update && apt-get install -y git build-essential curl
|
apt-get update && apt-get install -y git build-essential curl
|
||||||
|
curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz \
|
||||||
|
| tar -zx -C /usr --strip-components 1
|
||||||
env:
|
env:
|
||||||
DEBIAN_FRONTEND: noninteractive
|
DEBIAN_FRONTEND: noninteractive
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
@@ -61,11 +111,18 @@ jobs:
|
|||||||
- run: go get ./...
|
- run: go get ./...
|
||||||
- run: |
|
- run: |
|
||||||
git config --global --add safe.directory /__w/ollama/ollama
|
git config --global --add safe.directory /__w/ollama/ollama
|
||||||
cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
|
go generate -x ./...
|
||||||
make -j $cores cuda_v11
|
env:
|
||||||
runners-linux-rocm:
|
OLLAMA_SKIP_CPU_GENERATE: '1'
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: cuda-${{ matrix.cuda-version }}-libraries
|
||||||
|
path: |
|
||||||
|
llm/build/**/bin/*
|
||||||
|
dist/windows-amd64/**
|
||||||
|
generate-rocm:
|
||||||
needs: [changes]
|
needs: [changes]
|
||||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
if: ${{ needs.changes.outputs.GENERATE_ROCM == 'True' }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
rocm-version:
|
rocm-version:
|
||||||
@@ -75,6 +132,8 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- run: |
|
- run: |
|
||||||
apt-get update && apt-get install -y git build-essential curl rocm-libs
|
apt-get update && apt-get install -y git build-essential curl rocm-libs
|
||||||
|
curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz \
|
||||||
|
| tar -zx -C /usr --strip-components 1
|
||||||
env:
|
env:
|
||||||
DEBIAN_FRONTEND: noninteractive
|
DEBIAN_FRONTEND: noninteractive
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
@@ -85,13 +144,20 @@ jobs:
|
|||||||
- run: go get ./...
|
- run: go get ./...
|
||||||
- run: |
|
- run: |
|
||||||
git config --global --add safe.directory /__w/ollama/ollama
|
git config --global --add safe.directory /__w/ollama/ollama
|
||||||
cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
|
go generate -x ./...
|
||||||
make -j $cores rocm
|
env:
|
||||||
|
OLLAMA_SKIP_CPU_GENERATE: '1'
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: rocm-${{ matrix.rocm-version }}-libraries
|
||||||
|
path: |
|
||||||
|
llm/build/**/bin/*
|
||||||
|
dist/windows-amd64/**
|
||||||
|
|
||||||
# ROCm generation step
|
# ROCm generation step
|
||||||
runners-windows-rocm:
|
generate-windows-rocm:
|
||||||
needs: [changes]
|
needs: [changes]
|
||||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
if: ${{ needs.changes.outputs.GENERATE_ROCM == 'True' }}
|
||||||
runs-on: windows
|
runs-on: windows
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
@@ -113,21 +179,22 @@ jobs:
|
|||||||
- run: go get ./...
|
- run: go get ./...
|
||||||
- run: |
|
- run: |
|
||||||
$gopath=(get-command go).source | split-path -parent
|
$gopath=(get-command go).source | split-path -parent
|
||||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
|
||||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
cd $env:GITHUB_WORKSPACE
|
||||||
|
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||||
$env:PATH="$gopath;$env:PATH"
|
$env:PATH="$gopath;$env:PATH"
|
||||||
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
||||||
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
||||||
$cores = (Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores
|
go generate -x ./...
|
||||||
write-host $env:HIP_PATH
|
name: go generate
|
||||||
make -C llama print-HIP_PATH print-HIP_LIB_DIR
|
env:
|
||||||
make -j $cores rocm
|
OLLAMA_SKIP_CPU_GENERATE: '1'
|
||||||
name: make
|
# TODO - do we need any artifacts?
|
||||||
|
|
||||||
# CUDA generation step
|
# CUDA generation step
|
||||||
runners-windows-cuda:
|
generate-windows-cuda:
|
||||||
needs: [changes]
|
needs: [changes]
|
||||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
if: ${{ needs.changes.outputs.GENERATE_CUDA == 'True' }}
|
||||||
runs-on: windows
|
runs-on: windows
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
@@ -152,59 +219,19 @@ jobs:
|
|||||||
- name: 'Verify CUDA'
|
- name: 'Verify CUDA'
|
||||||
run: nvcc -V
|
run: nvcc -V
|
||||||
- run: go get ./...
|
- run: go get ./...
|
||||||
- name: make
|
- name: go generate
|
||||||
run: |
|
run: |
|
||||||
$gopath=(get-command go).source | split-path -parent
|
$gopath=(get-command go).source | split-path -parent
|
||||||
$cudabin=(get-command nvcc).source | split-path
|
$cudabin=(get-command nvcc).source | split-path
|
||||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
|
||||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
cd $env:GITHUB_WORKSPACE
|
||||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||||
$env:PATH="$gopath;$cudabin;$env:PATH"
|
$env:PATH="$gopath;$cudabin;$env:PATH"
|
||||||
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
||||||
$cores = (Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores
|
go generate -x ./...
|
||||||
make -j $cores cuda_v11
|
|
||||||
env:
|
env:
|
||||||
OLLAMA_SKIP_CPU_GENERATE: '1'
|
OLLAMA_SKIP_CPU_GENERATE: '1'
|
||||||
|
# TODO - do we need any artifacts?
|
||||||
runners-cpu:
|
|
||||||
needs: [changes]
|
|
||||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
|
||||||
arch: [amd64, arm64]
|
|
||||||
exclude:
|
|
||||||
- os: ubuntu-latest
|
|
||||||
arch: arm64
|
|
||||||
- os: windows-2019
|
|
||||||
arch: arm64
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
env:
|
|
||||||
GOARCH: ${{ matrix.arch }}
|
|
||||||
ARCH: ${{ matrix.arch }}
|
|
||||||
CGO_ENABLED: '1'
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version-file: go.mod
|
|
||||||
cache: true
|
|
||||||
- run: go get ./...
|
|
||||||
- name: 'Build Windows Go Runners'
|
|
||||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
|
||||||
run: |
|
|
||||||
$gopath=(get-command go).source | split-path -parent
|
|
||||||
$gccpath=(get-command gcc).source | split-path -parent
|
|
||||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
|
||||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
|
||||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
|
||||||
$env:PATH="$gopath;$gccpath;$env:PATH"
|
|
||||||
echo $env:PATH
|
|
||||||
make -j 4
|
|
||||||
- name: 'Build Unix Go Runners'
|
|
||||||
if: ${{ ! startsWith(matrix.os, 'windows-') }}
|
|
||||||
run: make -j 4
|
|
||||||
- run: go build .
|
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
strategy:
|
strategy:
|
||||||
@@ -236,9 +263,17 @@ jobs:
|
|||||||
arm64) echo ARCH=arm64 ;;
|
arm64) echo ARCH=arm64 ;;
|
||||||
esac >>$GITHUB_ENV
|
esac >>$GITHUB_ENV
|
||||||
shell: bash
|
shell: bash
|
||||||
|
- run: |
|
||||||
|
mkdir -p llm/build/linux/$ARCH/stub/bin
|
||||||
|
touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server
|
||||||
|
if: ${{ startsWith(matrix.os, 'ubuntu-') }}
|
||||||
|
- run: |
|
||||||
|
mkdir -p llm/build/darwin/$ARCH/stub/bin
|
||||||
|
touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
|
||||||
|
if: ${{ startsWith(matrix.os, 'macos-') }}
|
||||||
- uses: golangci/golangci-lint-action@v6
|
- uses: golangci/golangci-lint-action@v6
|
||||||
with:
|
with:
|
||||||
args: --timeout 8m0s -v
|
args: --timeout 8m0s -v ${{ startsWith(matrix.os, 'windows-') && '' || '--disable gofmt --disable goimports' }}
|
||||||
test:
|
test:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
@@ -253,6 +288,9 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
GOARCH: ${{ matrix.arch }}
|
GOARCH: ${{ matrix.arch }}
|
||||||
CGO_ENABLED: '1'
|
CGO_ENABLED: '1'
|
||||||
|
OLLAMA_CPU_TARGET: 'static'
|
||||||
|
OLLAMA_SKIP_CPU_GENERATE: '1'
|
||||||
|
OLLAMA_SKIP_METAL_GENERATE: '1'
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@@ -263,21 +301,23 @@ jobs:
|
|||||||
cache: true
|
cache: true
|
||||||
- run: |
|
- run: |
|
||||||
case ${{ matrix.arch }} in
|
case ${{ matrix.arch }} in
|
||||||
amd64) echo ARCH=amd64 ;;
|
amd64) echo ARCH=x86_64 ;;
|
||||||
arm64) echo ARCH=arm64 ;;
|
arm64) echo ARCH=arm64 ;;
|
||||||
esac >>$GITHUB_ENV
|
esac >>$GITHUB_ENV
|
||||||
shell: bash
|
shell: bash
|
||||||
|
- run: |
|
||||||
|
mkdir -p llm/build/linux/$ARCH/stub/bin
|
||||||
|
touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server
|
||||||
|
if: ${{ startsWith(matrix.os, 'ubuntu-') }}
|
||||||
|
- run: |
|
||||||
|
mkdir -p llm/build/darwin/$ARCH/stub/bin
|
||||||
|
touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
|
||||||
|
if: ${{ startsWith(matrix.os, 'macos-') }}
|
||||||
|
shell: bash
|
||||||
|
- run: go generate ./...
|
||||||
- run: go build
|
- run: go build
|
||||||
- run: go test -v ./...
|
- run: go test -v ./...
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
patches:
|
|
||||||
needs: [changes]
|
|
||||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
with:
|
||||||
submodules: recursive
|
name: ${{ matrix.os }}-binaries
|
||||||
- name: Verify patches carry all the changes
|
path: ollama
|
||||||
run: |
|
|
||||||
make apply-patches sync && git diff --compact-summary --exit-code llama
|
|
||||||
|
|||||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -5,14 +5,11 @@
|
|||||||
.swp
|
.swp
|
||||||
dist
|
dist
|
||||||
ollama
|
ollama
|
||||||
|
ggml-metal.metal
|
||||||
.cache
|
.cache
|
||||||
*.exe
|
*.exe
|
||||||
.idea
|
.idea
|
||||||
test_data
|
test_data
|
||||||
*.crt
|
*.crt
|
||||||
llm/build
|
llm/build
|
||||||
build/*/*/*
|
|
||||||
!build/**/placeholder
|
|
||||||
llama/build
|
|
||||||
__debug_bin*
|
__debug_bin*
|
||||||
llama/vendor
|
|
||||||
4
.gitmodules
vendored
Normal file
4
.gitmodules
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
[submodule "llama.cpp"]
|
||||||
|
path = llm/llama.cpp
|
||||||
|
url = https://github.com/ggerganov/llama.cpp.git
|
||||||
|
shallow = true
|
||||||
@@ -7,35 +7,22 @@ linters:
|
|||||||
- bodyclose
|
- bodyclose
|
||||||
- containedctx
|
- containedctx
|
||||||
- contextcheck
|
- contextcheck
|
||||||
- errcheck
|
|
||||||
- exportloopref
|
- exportloopref
|
||||||
- gci
|
|
||||||
- gocheckcompilerdirectives
|
- gocheckcompilerdirectives
|
||||||
- gofmt
|
# conditionally enable this on linux/macos
|
||||||
- gofumpt
|
# - gofmt
|
||||||
- gosimple
|
# - goimports
|
||||||
- govet
|
|
||||||
- ineffassign
|
|
||||||
- intrange
|
- intrange
|
||||||
- makezero
|
|
||||||
- misspell
|
- misspell
|
||||||
- nilerr
|
- nilerr
|
||||||
- nolintlint
|
- nolintlint
|
||||||
- nosprintfhostport
|
- nosprintfhostport
|
||||||
- staticcheck
|
- testifylint
|
||||||
- tenv
|
|
||||||
- unconvert
|
- unconvert
|
||||||
- unused
|
- unused
|
||||||
- usestdlibvars
|
|
||||||
- wastedassign
|
- wastedassign
|
||||||
- whitespace
|
- whitespace
|
||||||
linters-settings:
|
- usestdlibvars
|
||||||
gci:
|
|
||||||
sections: [standard, default, localmodule]
|
|
||||||
staticcheck:
|
|
||||||
checks:
|
|
||||||
- all
|
|
||||||
- -SA1019 # omit Deprecated check
|
|
||||||
severity:
|
severity:
|
||||||
default-severity: error
|
default-severity: error
|
||||||
rules:
|
rules:
|
||||||
|
|||||||
@@ -1,37 +0,0 @@
|
|||||||
# Contributing to Ollama
|
|
||||||
|
|
||||||
Thank you for your interest in contributing to Ollama! Here are a few guidelines to help get you started.
|
|
||||||
|
|
||||||
## Set up
|
|
||||||
|
|
||||||
See the [development documentation](./docs/development.md) for instructions on how to build and run Ollama locally.
|
|
||||||
|
|
||||||
## Pull requests
|
|
||||||
|
|
||||||
### Ideal issues
|
|
||||||
|
|
||||||
* [Bugs](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Abug): issues where Ollama stops working or where it results in an unexpected error.
|
|
||||||
* [Performance](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Aperformance): issues to make Ollama faster at model inference, downloading or uploading.
|
|
||||||
* [Security](https://github.com/ollama/ollama/blob/main/SECURITY.md): issues that could lead to a security vulnerability. As mentioned in [SECURITY.md](https://github.com/ollama/ollama/blob/main/SECURITY.md), please do not disclose security vulnerabilities publicly.
|
|
||||||
|
|
||||||
### Issues that are harder to review
|
|
||||||
|
|
||||||
* New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future.
|
|
||||||
* Refactoring: large code improvements are important, but can be harder or take longer to review and merge.
|
|
||||||
* Documentation: small updates to fill in or correct missing documentation is helpful, however large documentation additions can be hard to maintain over time.
|
|
||||||
|
|
||||||
### Issues that may not be accepted
|
|
||||||
|
|
||||||
* Changes that break backwards compatibility in Ollama's API (including the OpenAI-compatible API)
|
|
||||||
* Changes that add significant friction to the user experience
|
|
||||||
* Changes that create a large future maintenance burden for maintainers and contributors
|
|
||||||
|
|
||||||
### Best practices
|
|
||||||
|
|
||||||
* Commit messages: please leave both a title and a description in your commit messages. The title should be a short summary of the changes, with a leading word that explains the section of the code being changed (e.g. `api: fix parsing of prompt field`) . In the description, leave a short 2-3 sentences that explain more about the change and its impact.
|
|
||||||
* Tests: please add test coverage to changes where possible.
|
|
||||||
* Minimize dependencies: avoid adding new dependencies unless absolutely necessary.
|
|
||||||
|
|
||||||
## Need help?
|
|
||||||
|
|
||||||
If you need help with anything, feel free to reach out to us on our [Discord server](https://discord.gg/ollama).
|
|
||||||
263
Dockerfile
263
Dockerfile
@@ -1,208 +1,131 @@
|
|||||||
# Note: once we have fully transitioned to the Go server, this will replace the old Dockerfile at the top of the tree
|
ARG GOLANG_VERSION=1.22.1
|
||||||
ARG GOLANG_VERSION=1.22.5
|
|
||||||
ARG CMAKE_VERSION=3.22.1
|
ARG CMAKE_VERSION=3.22.1
|
||||||
ARG CUDA_VERSION_11=11.3.1
|
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
|
||||||
ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
|
ARG CUDA_VERSION=11.3.1
|
||||||
ARG CUDA_VERSION_12=12.4.0
|
|
||||||
ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
|
|
||||||
ARG ROCM_VERSION=6.1.2
|
ARG ROCM_VERSION=6.1.2
|
||||||
|
|
||||||
### To create a local image for building linux binaries on mac or windows with efficient incremental builds
|
# Copy the minimal context we need to run the generate scripts
|
||||||
#
|
FROM scratch AS llm-code
|
||||||
# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 .
|
COPY .git .git
|
||||||
# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
|
COPY .gitmodules .gitmodules
|
||||||
#
|
COPY llm llm
|
||||||
### Then incremental builds will be much faster in this container
|
|
||||||
#
|
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
|
||||||
# make -C llama -j 10 && go build -trimpath -o dist/linux-amd64/ollama .
|
|
||||||
#
|
|
||||||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
|
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG GOLANG_VERSION
|
|
||||||
ARG CUDA_VERSION_11
|
|
||||||
ARG CUDA_VERSION_12
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
|
ARG CGO_CFLAGS
|
||||||
dnf clean all && \
|
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
dnf install -y \
|
|
||||||
zsh \
|
|
||||||
cuda-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
|
|
||||||
cuda-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
|
|
||||||
# TODO intel oneapi goes here...
|
|
||||||
ENV GOARCH amd64
|
|
||||||
ENV CGO_ENABLED 1
|
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/
|
|
||||||
ENTRYPOINT [ "zsh" ]
|
|
||||||
|
|
||||||
### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
|
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
|
||||||
# Note: this does not contain jetson variants
|
|
||||||
#
|
|
||||||
# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 .
|
|
||||||
# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
|
|
||||||
#
|
|
||||||
FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
|
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG GOLANG_VERSION
|
|
||||||
ARG CUDA_VERSION_11
|
|
||||||
ARG CUDA_VERSION_12
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
|
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
||||||
dnf config-manager --set-enabled appstream && \
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
dnf clean all && \
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
dnf install -y \
|
ARG CGO_CFLAGS
|
||||||
zsh \
|
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
|
|
||||||
cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
|
|
||||||
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
|
|
||||||
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
|
|
||||||
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
|
|
||||||
ENV GOARCH amd64
|
|
||||||
ENV CGO_ENABLED 1
|
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/
|
|
||||||
ENTRYPOINT [ "zsh" ]
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 unified-builder-amd64 AS runners-amd64
|
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
|
||||||
COPY . .
|
ARG CMAKE_VERSION
|
||||||
ARG OLLAMA_SKIP_CUDA_GENERATE
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
ARG OLLAMA_SKIP_CUDA_11_GENERATE
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ARG OLLAMA_SKIP_CUDA_12_GENERATE
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
ARG OLLAMA_SKIP_ROCM_GENERATE
|
ENV LIBRARY_PATH /opt/amdgpu/lib64
|
||||||
ARG CUDA_V11_ARCHITECTURES
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
ARG CUDA_V12_ARCHITECTURES
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
ARG OLLAMA_FAST_BUILD
|
ARG CGO_CFLAGS
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
ARG AMDGPU_TARGETS
|
||||||
if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
|
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
make -C llama -j $(expr $(nproc) / 2 ) ; \
|
RUN mkdir /tmp/scratch && \
|
||||||
else \
|
for dep in $(zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm*/bin/deps.txt.gz) ; do \
|
||||||
make -C llama -j 5 ; \
|
cp ${dep} /tmp/scratch/ || exit 1 ; \
|
||||||
fi
|
done && \
|
||||||
|
(cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
|
||||||
FROM --platform=linux/arm64 unified-builder-arm64 AS runners-arm64
|
mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \
|
||||||
COPY . .
|
(cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . )
|
||||||
ARG OLLAMA_SKIP_CUDA_GENERATE
|
|
||||||
ARG OLLAMA_SKIP_CUDA_11_GENERATE
|
|
||||||
ARG OLLAMA_SKIP_CUDA_12_GENERATE
|
|
||||||
ARG CUDA_V11_ARCHITECTURES
|
|
||||||
ARG CUDA_V12_ARCHITECTURES
|
|
||||||
ARG OLLAMA_FAST_BUILD
|
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
|
||||||
make -C llama -j 8
|
|
||||||
|
|
||||||
|
|
||||||
# Intermediate stages used for ./scripts/build_linux.sh
|
FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
|
||||||
FROM --platform=linux/amd64 centos:7 AS builder-amd64
|
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG GOLANG_VERSION
|
ARG GOLANG_VERSION
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
ENV CGO_ENABLED 1
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
ENV GOARCH amd64
|
ARG OLLAMA_CUSTOM_CPU_DEFS
|
||||||
WORKDIR /go/src/github.com/ollama/ollama
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 builder-amd64 AS build-amd64
|
|
||||||
COPY . .
|
|
||||||
COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
|
|
||||||
COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/build/ build/
|
|
||||||
ARG GOFLAGS
|
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
ARG OLLAMA_SKIP_ROCM_GENERATE
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
|
||||||
go build -trimpath -o dist/linux-amd64/bin/ollama .
|
|
||||||
RUN cd dist/linux-$GOARCH && \
|
|
||||||
tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
|
|
||||||
RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
|
|
||||||
cd dist/linux-$GOARCH-rocm && \
|
|
||||||
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
|
|
||||||
fi
|
|
||||||
|
|
||||||
FROM --platform=linux/arm64 rockylinux:8 AS builder-arm64
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
|
||||||
|
RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
|
||||||
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
|
||||||
|
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
|
||||||
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
|
||||||
|
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh
|
||||||
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
|
||||||
|
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh
|
||||||
|
|
||||||
|
FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG GOLANG_VERSION
|
ARG GOLANG_VERSION
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
||||||
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
|
ARG OLLAMA_CUSTOM_CPU_DEFS
|
||||||
|
ARG CGO_CFLAGS
|
||||||
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
|
|
||||||
|
FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
|
||||||
|
RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
|
||||||
|
FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
|
||||||
|
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
|
||||||
|
|
||||||
|
|
||||||
|
# Intermediate stage used for ./scripts/build_linux.sh
|
||||||
|
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
|
||||||
ENV CGO_ENABLED 1
|
ENV CGO_ENABLED 1
|
||||||
ENV GOARCH arm64
|
|
||||||
WORKDIR /go/src/github.com/ollama/ollama
|
|
||||||
|
|
||||||
FROM --platform=linux/arm64 builder-arm64 AS build-arm64
|
|
||||||
COPY . .
|
|
||||||
COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
|
||||||
COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/build/ build/
|
|
||||||
ARG GOFLAGS
|
|
||||||
ARG CGO_CFLAGS
|
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
|
||||||
go build -trimpath -o dist/linux-arm64/bin/ollama .
|
|
||||||
RUN cd dist/linux-$GOARCH && \
|
|
||||||
tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 scratch AS dist-amd64
|
|
||||||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
|
|
||||||
FROM --platform=linux/arm64 scratch AS dist-arm64
|
|
||||||
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
|
|
||||||
FROM dist-$TARGETARCH AS dist
|
|
||||||
|
|
||||||
|
|
||||||
# Optimized container images do not cary nested payloads
|
|
||||||
FROM --platform=linux/amd64 builder-amd64 AS container-build-amd64
|
|
||||||
WORKDIR /go/src/github.com/ollama/ollama
|
WORKDIR /go/src/github.com/ollama/ollama
|
||||||
COPY . .
|
COPY . .
|
||||||
|
COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
|
COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
|
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
|
COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
|
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
|
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
|
||||||
ARG GOFLAGS
|
ARG GOFLAGS
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
RUN go build -trimpath .
|
||||||
go build -trimpath -o dist/linux-amd64/bin/ollama .
|
|
||||||
|
|
||||||
FROM --platform=linux/arm64 builder-arm64 AS container-build-arm64
|
# Intermediate stage used for ./scripts/build_linux.sh
|
||||||
|
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
|
||||||
|
ENV CGO_ENABLED 1
|
||||||
|
ARG GOLANG_VERSION
|
||||||
WORKDIR /go/src/github.com/ollama/ollama
|
WORKDIR /go/src/github.com/ollama/ollama
|
||||||
COPY . .
|
COPY . .
|
||||||
|
COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
|
COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
ARG GOFLAGS
|
ARG GOFLAGS
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
RUN --mount=type=cache,target=/root/.ccache \
|
RUN go build -trimpath .
|
||||||
go build -trimpath -o dist/linux-arm64/bin/ollama .
|
|
||||||
|
|
||||||
# For amd64 container images, filter out cuda/rocm to minimize size
|
# Runtime stages
|
||||||
FROM runners-amd64 AS runners-cuda-amd64
|
FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
|
||||||
RUN rm -rf \
|
RUN apt-get update && apt-get install -y ca-certificates
|
||||||
./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
|
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
|
||||||
./dist/linux-amd64/lib/ollama/runners/rocm*
|
FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
|
||||||
|
RUN apt-get update && apt-get install -y ca-certificates
|
||||||
FROM runners-amd64 AS runners-rocm-amd64
|
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
|
||||||
RUN rm -rf \
|
|
||||||
./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
|
|
||||||
./dist/linux-amd64/lib/ollama/libcu*.so* \
|
|
||||||
./dist/linux-amd64/lib/ollama/runners/cuda*
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y ca-certificates && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
|
|
||||||
COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
|
|
||||||
|
|
||||||
FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y ca-certificates && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
|
|
||||||
COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
|
|
||||||
|
|
||||||
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
|
|
||||||
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
|
|
||||||
# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
|
|
||||||
# across releases
|
|
||||||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y ca-certificates && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
|
|
||||||
COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
|
|
||||||
|
|
||||||
|
# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
|
||||||
|
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
|
||||||
|
RUN update-pciids
|
||||||
|
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
|
||||||
EXPOSE 11434
|
EXPOSE 11434
|
||||||
ENV OLLAMA_HOST 0.0.0.0
|
ENV OLLAMA_HOST 0.0.0.0
|
||||||
|
|
||||||
|
|||||||
4
Makefile
4
Makefile
@@ -1,4 +0,0 @@
|
|||||||
GOALS := $(or $(MAKECMDGOALS),all)
|
|
||||||
.PHONY: $(GOALS)
|
|
||||||
$(GOALS):
|
|
||||||
$(MAKE) -C llama $@
|
|
||||||
103
README.md
103
README.md
@@ -35,10 +35,10 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla
|
|||||||
|
|
||||||
## Quickstart
|
## Quickstart
|
||||||
|
|
||||||
To run and chat with [Llama 3.2](https://ollama.com/library/llama3.2):
|
To run and chat with [Llama 3](https://ollama.com/library/llama3):
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama run llama3.2
|
ollama run llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
## Model library
|
## Model library
|
||||||
@@ -49,14 +49,10 @@ Here are some example models that can be downloaded:
|
|||||||
|
|
||||||
| Model | Parameters | Size | Download |
|
| Model | Parameters | Size | Download |
|
||||||
| ------------------ | ---------- | ----- | ------------------------------ |
|
| ------------------ | ---------- | ----- | ------------------------------ |
|
||||||
| Llama 3.2 | 3B | 2.0GB | `ollama run llama3.2` |
|
| Llama 3 | 8B | 4.7GB | `ollama run llama3` |
|
||||||
| Llama 3.2 | 1B | 1.3GB | `ollama run llama3.2:1b` |
|
| Llama 3 | 70B | 40GB | `ollama run llama3:70b` |
|
||||||
| Llama 3.1 | 8B | 4.7GB | `ollama run llama3.1` |
|
|
||||||
| Llama 3.1 | 70B | 40GB | `ollama run llama3.1:70b` |
|
|
||||||
| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` |
|
|
||||||
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
|
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
|
||||||
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
|
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
|
||||||
| Gemma 2 | 2B | 1.6GB | `ollama run gemma2:2b` |
|
|
||||||
| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
|
| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
|
||||||
| Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
|
| Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
|
||||||
| Mistral | 7B | 4.1GB | `ollama run mistral` |
|
| Mistral | 7B | 4.1GB | `ollama run mistral` |
|
||||||
@@ -68,8 +64,7 @@ Here are some example models that can be downloaded:
|
|||||||
| LLaVA | 7B | 4.5GB | `ollama run llava` |
|
| LLaVA | 7B | 4.5GB | `ollama run llava` |
|
||||||
| Solar | 10.7B | 6.1GB | `ollama run solar` |
|
| Solar | 10.7B | 6.1GB | `ollama run solar` |
|
||||||
|
|
||||||
> [!NOTE]
|
> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
|
||||||
> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
|
|
||||||
|
|
||||||
## Customize a model
|
## Customize a model
|
||||||
|
|
||||||
@@ -101,16 +96,16 @@ See the [guide](docs/import.md) on importing models for more information.
|
|||||||
|
|
||||||
### Customize a prompt
|
### Customize a prompt
|
||||||
|
|
||||||
Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.2` model:
|
Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3` model:
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama pull llama3.2
|
ollama pull llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
Create a `Modelfile`:
|
Create a `Modelfile`:
|
||||||
|
|
||||||
```
|
```
|
||||||
FROM llama3.2
|
FROM llama3
|
||||||
|
|
||||||
# set the temperature to 1 [higher is more creative, lower is more coherent]
|
# set the temperature to 1 [higher is more creative, lower is more coherent]
|
||||||
PARAMETER temperature 1
|
PARAMETER temperature 1
|
||||||
@@ -145,7 +140,7 @@ ollama create mymodel -f ./Modelfile
|
|||||||
### Pull a model
|
### Pull a model
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama pull llama3.2
|
ollama pull llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
> This command can also be used to update a local model. Only the diff will be pulled.
|
> This command can also be used to update a local model. Only the diff will be pulled.
|
||||||
@@ -153,13 +148,13 @@ ollama pull llama3.2
|
|||||||
### Remove a model
|
### Remove a model
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama rm llama3.2
|
ollama rm llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
### Copy a model
|
### Copy a model
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama cp llama3.2 my-model
|
ollama cp llama3 my-model
|
||||||
```
|
```
|
||||||
|
|
||||||
### Multiline input
|
### Multiline input
|
||||||
@@ -176,21 +171,21 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
|
|||||||
### Multimodal models
|
### Multimodal models
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama run llava "What's in this image? /Users/jmorgan/Desktop/smile.png"
|
>>> What's in this image? /Users/jmorgan/Desktop/smile.png
|
||||||
The image features a yellow smiley face, which is likely the central focus of the picture.
|
The image features a yellow smiley face, which is likely the central focus of the picture.
|
||||||
```
|
```
|
||||||
|
|
||||||
### Pass the prompt as an argument
|
### Pass the prompt as an argument
|
||||||
|
|
||||||
```
|
```
|
||||||
$ ollama run llama3.2 "Summarize this file: $(cat README.md)"
|
$ ollama run llama3 "Summarize this file: $(cat README.md)"
|
||||||
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
|
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
|
||||||
```
|
```
|
||||||
|
|
||||||
### Show model information
|
### Show model information
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama show llama3.2
|
ollama show llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
### List models on your computer
|
### List models on your computer
|
||||||
@@ -199,18 +194,6 @@ ollama show llama3.2
|
|||||||
ollama list
|
ollama list
|
||||||
```
|
```
|
||||||
|
|
||||||
### List which models are currently loaded
|
|
||||||
|
|
||||||
```
|
|
||||||
ollama ps
|
|
||||||
```
|
|
||||||
|
|
||||||
### Stop a model which is currently running
|
|
||||||
|
|
||||||
```
|
|
||||||
ollama stop llama3.2
|
|
||||||
```
|
|
||||||
|
|
||||||
### Start Ollama
|
### Start Ollama
|
||||||
|
|
||||||
`ollama serve` is used when you want to start ollama without running the desktop application.
|
`ollama serve` is used when you want to start ollama without running the desktop application.
|
||||||
@@ -230,7 +213,7 @@ Next, start the server:
|
|||||||
Finally, in a separate shell, run a model:
|
Finally, in a separate shell, run a model:
|
||||||
|
|
||||||
```
|
```
|
||||||
./ollama run llama3.2
|
./ollama run llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
## REST API
|
## REST API
|
||||||
@@ -241,7 +224,7 @@ Ollama has a REST API for running and managing models.
|
|||||||
|
|
||||||
```
|
```
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"prompt":"Why is the sky blue?"
|
"prompt":"Why is the sky blue?"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
@@ -250,7 +233,7 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
|
|
||||||
```
|
```
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"messages": [
|
"messages": [
|
||||||
{ "role": "user", "content": "why is the sky blue?" }
|
{ "role": "user", "content": "why is the sky blue?" }
|
||||||
]
|
]
|
||||||
@@ -309,28 +292,9 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
|
- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
|
||||||
- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
|
- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
|
||||||
- [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
|
- [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
|
||||||
- [AiLama](https://github.com/zeyoyt/ailama) (A Discord User App that allows you to interact with Ollama anywhere in discord )
|
|
||||||
- [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
|
- [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
|
||||||
- [Painting Droid](https://github.com/mateuszmigas/painting-droid) (Painting app with AI integrations)
|
|
||||||
- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
|
- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
|
||||||
- [AI Studio](https://github.com/MindWorkAI/AI-Studio)
|
- [AI Studio](https://github.com/MindWorkAI/AI-Studio)
|
||||||
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
|
|
||||||
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
|
|
||||||
- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
|
|
||||||
- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
|
|
||||||
- [Go-CREW](https://www.jonathanhecl.com/go-crew/) (Powerful Offline RAG in Golang)
|
|
||||||
- [PartCAD](https://github.com/openvmp/partcad/) (CAD model generation with OpenSCAD and CadQuery)
|
|
||||||
- [Ollama4j Web UI](https://github.com/ollama4j/ollama4j-web-ui) - Java-based Web UI for Ollama built with Vaadin, Spring Boot and Ollama4j
|
|
||||||
- [PyOllaMx](https://github.com/kspviswa/pyOllaMx) - macOS application capable of chatting with both Ollama and Apple MLX models.
|
|
||||||
- [Claude Dev](https://github.com/saoudrizwan/claude-dev) - VSCode extension for multi-file/whole-repo coding
|
|
||||||
- [Cherry Studio](https://github.com/kangfenmao/cherry-studio) (Desktop client with Ollama support)
|
|
||||||
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption)
|
|
||||||
- [Archyve](https://github.com/nickthecook/archyve) (RAG-enabling document library)
|
|
||||||
- [crewAI with Mesop](https://github.com/rapidarchitect/ollama-crew-mesop) (Mesop Web Interface to run crewAI with Ollama)
|
|
||||||
- [LLMChat](https://github.com/trendy-design/llmchat) (Privacy focused, 100% local, intuitive all-in-one chat interface)
|
|
||||||
- [ARGO](https://github.com/xark-argo/argo) (Locally download and run Ollama and Huggingface models with RAG on Mac/Windows/Linux)
|
|
||||||
- [G1](https://github.com/bklieger-groq/g1) (Prototype of using prompting strategies to improve the LLM's reasoning through o1-like reasoning chains.)
|
|
||||||
- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
|
|
||||||
|
|
||||||
### Terminal
|
### Terminal
|
||||||
|
|
||||||
@@ -354,12 +318,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [tlm](https://github.com/yusufcanb/tlm)
|
- [tlm](https://github.com/yusufcanb/tlm)
|
||||||
- [podman-ollama](https://github.com/ericcurtin/podman-ollama)
|
- [podman-ollama](https://github.com/ericcurtin/podman-ollama)
|
||||||
- [gollama](https://github.com/sammcj/gollama)
|
- [gollama](https://github.com/sammcj/gollama)
|
||||||
- [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/)
|
|
||||||
- [Ollama Mixture of Experts (MOE) in 50 lines of code](https://github.com/rapidarchitect/ollama_moe)
|
|
||||||
- [vim-intelligence-bridge](https://github.com/pepo-ec/vim-intelligence-bridge) Simple interaction of "Ollama" with the Vim editor
|
|
||||||
|
|
||||||
### Apple Vision Pro
|
|
||||||
- [Enchanted](https://github.com/AugustDev/enchanted)
|
|
||||||
|
|
||||||
### Database
|
### Database
|
||||||
|
|
||||||
@@ -369,28 +327,22 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
### Package managers
|
### Package managers
|
||||||
|
|
||||||
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
|
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
|
||||||
- [Gentoo](https://github.com/gentoo/guru/tree/master/app-misc/ollama)
|
|
||||||
- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
|
- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
|
||||||
- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
|
- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
|
||||||
- [Nix package](https://search.nixos.org/packages?channel=24.05&show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama)
|
|
||||||
- [Flox](https://flox.dev/blog/ollama-part-one)
|
|
||||||
|
|
||||||
### Libraries
|
### Libraries
|
||||||
|
|
||||||
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/integrations/chat/ollama/) with [example](https://js.langchain.com/docs/tutorials/local_rag/)
|
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
|
||||||
- [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama)
|
|
||||||
- [crewAI](https://github.com/crewAIInc/crewAI)
|
|
||||||
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
|
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
|
||||||
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
|
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
|
||||||
- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
|
- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
|
||||||
- [LlamaIndex](https://docs.llamaindex.ai/en/stable/examples/llm/ollama/) and [LlamaIndexTS](https://ts.llamaindex.ai/modules/llms/available_llms/ollama)
|
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
|
||||||
- [LiteLLM](https://github.com/BerriAI/litellm)
|
- [LiteLLM](https://github.com/BerriAI/litellm)
|
||||||
- [OllamaFarm for Go](https://github.com/presbrey/ollamafarm)
|
|
||||||
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
|
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
|
||||||
- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
|
- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
|
||||||
- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
|
- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
|
||||||
- [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
|
- [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
|
||||||
- [Ollama4j for Java](https://github.com/ollama4j/ollama4j)
|
- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
|
||||||
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
|
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
|
||||||
- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
|
- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
|
||||||
- [Ollama for Dart](https://github.com/breitburg/dart-ollama)
|
- [Ollama for Dart](https://github.com/breitburg/dart-ollama)
|
||||||
@@ -407,19 +359,11 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama)
|
- [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama)
|
||||||
- [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama)
|
- [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama)
|
||||||
- [LlamaScript](https://github.com/Project-Llama/llamascript)
|
- [LlamaScript](https://github.com/Project-Llama/llamascript)
|
||||||
- [Gollm](https://docs.gollm.co/examples/ollama-example)
|
|
||||||
- [Ollamaclient for Golang](https://github.com/xyproto/ollamaclient)
|
|
||||||
- [High-level function abstraction in Go](https://gitlab.com/tozd/go/fun)
|
|
||||||
- [Ollama PHP](https://github.com/ArdaGnsrn/ollama-php)
|
|
||||||
- [Agents-Flex for Java](https://github.com/agents-flex/agents-flex) with [example](https://github.com/agents-flex/agents-flex/tree/main/agents-flex-llm/agents-flex-llm-ollama/src/test/java/com/agentsflex/llm/ollama)
|
|
||||||
- [Ollama for Swift](https://github.com/mattt/ollama-swift)
|
|
||||||
|
|
||||||
### Mobile
|
### Mobile
|
||||||
|
|
||||||
- [Enchanted](https://github.com/AugustDev/enchanted)
|
- [Enchanted](https://github.com/AugustDev/enchanted)
|
||||||
- [Maid](https://github.com/Mobile-Artificial-Intelligence/maid)
|
- [Maid](https://github.com/Mobile-Artificial-Intelligence/maid)
|
||||||
- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
|
|
||||||
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption)
|
|
||||||
|
|
||||||
### Extensions & Plugins
|
### Extensions & Plugins
|
||||||
|
|
||||||
@@ -442,18 +386,13 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
|
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
|
||||||
- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
|
- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
|
||||||
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
|
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
|
||||||
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face)
|
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
|
||||||
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
|
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
|
||||||
- [Plasmoid Ollama Control](https://github.com/imoize/plasmoid-ollamacontrol) (KDE Plasma extension that allows you to quickly manage/control Ollama model)
|
|
||||||
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
|
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
|
||||||
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
|
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
|
||||||
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
|
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
|
||||||
- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
|
- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
|
||||||
- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
|
- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
|
||||||
- [vnc-lm](https://github.com/jk011ru/vnc-lm) (A containerized Discord bot with support for attachments and web links)
|
|
||||||
- [LSP-AI](https://github.com/SilasMarvin/lsp-ai) (Open-source language server for AI-powered functionality)
|
|
||||||
- [QodeAssist](https://github.com/Palm1r/QodeAssist) (AI-powered coding assistant plugin for Qt Creator)
|
|
||||||
- [Obsidian Quiz Generator plugin](https://github.com/ECuiDev/obsidian-quiz-generator)
|
|
||||||
|
|
||||||
### Supported backends
|
### Supported backends
|
||||||
|
|
||||||
|
|||||||
25
SECURITY.md
25
SECURITY.md
@@ -1,25 +0,0 @@
|
|||||||
# Security
|
|
||||||
|
|
||||||
The Ollama maintainer team takes security seriously and will actively work to resolve security issues.
|
|
||||||
|
|
||||||
## Reporting a vulnerability
|
|
||||||
|
|
||||||
If you discover a security vulnerability, please do not open a public issue. Instead, please report it by emailing hello@ollama.com. We ask that you give us sufficient time to investigate and address the vulnerability before disclosing it publicly.
|
|
||||||
|
|
||||||
Please include the following details in your report:
|
|
||||||
- A description of the vulnerability
|
|
||||||
- Steps to reproduce the issue
|
|
||||||
- Your assessment of the potential impact
|
|
||||||
- Any possible mitigations
|
|
||||||
|
|
||||||
## Security best practices
|
|
||||||
|
|
||||||
While the maintainer team does their best to secure Ollama, users are encouraged to implement their own security best practices, such as:
|
|
||||||
|
|
||||||
- Regularly updating to the latest version of Ollama
|
|
||||||
- Securing access to hosted instances of Ollama
|
|
||||||
- Monitoring systems for unusual activity
|
|
||||||
|
|
||||||
## Contact
|
|
||||||
|
|
||||||
For any other questions or concerns related to security, please contact us at hello@ollama.com
|
|
||||||
@@ -18,9 +18,9 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"runtime"
|
"runtime"
|
||||||
@@ -63,8 +63,13 @@ func checkError(resp *http.Response, body []byte) error {
|
|||||||
// If the variable is not specified, a default ollama host and port will be
|
// If the variable is not specified, a default ollama host and port will be
|
||||||
// used.
|
// used.
|
||||||
func ClientFromEnvironment() (*Client, error) {
|
func ClientFromEnvironment() (*Client, error) {
|
||||||
|
ollamaHost := envconfig.Host
|
||||||
|
|
||||||
return &Client{
|
return &Client{
|
||||||
base: envconfig.Host(),
|
base: &url.URL{
|
||||||
|
Scheme: ollamaHost.Scheme,
|
||||||
|
Host: net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
|
||||||
|
},
|
||||||
http: http.DefaultClient,
|
http: http.DefaultClient,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
@@ -173,7 +178,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
|
|||||||
}
|
}
|
||||||
|
|
||||||
if errorResponse.Error != "" {
|
if errorResponse.Error != "" {
|
||||||
return errors.New(errorResponse.Error)
|
return fmt.Errorf(errorResponse.Error)
|
||||||
}
|
}
|
||||||
|
|
||||||
if response.StatusCode >= http.StatusBadRequest {
|
if response.StatusCode >= http.StatusBadRequest {
|
||||||
@@ -298,7 +303,7 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
|
|||||||
return &lr, nil
|
return &lr, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ListRunning lists running models.
|
// List running models.
|
||||||
func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
|
func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
|
||||||
var lr ProcessResponse
|
var lr ProcessResponse
|
||||||
if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
|
if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
|
||||||
@@ -333,7 +338,7 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err
|
|||||||
return &resp, nil
|
return &resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Heartbeat checks if the server has started and is responsive; if yes, it
|
// Hearbeat checks if the server has started and is responsive; if yes, it
|
||||||
// returns nil, otherwise an error.
|
// returns nil, otherwise an error.
|
||||||
func (c *Client) Heartbeat(ctx context.Context) error {
|
func (c *Client) Heartbeat(ctx context.Context) error {
|
||||||
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
|
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ package api
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/envconfig"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestClientFromEnvironment(t *testing.T) {
|
func TestClientFromEnvironment(t *testing.T) {
|
||||||
@@ -31,6 +33,7 @@ func TestClientFromEnvironment(t *testing.T) {
|
|||||||
for k, v := range testCases {
|
for k, v := range testCases {
|
||||||
t.Run(k, func(t *testing.T) {
|
t.Run(k, func(t *testing.T) {
|
||||||
t.Setenv("OLLAMA_HOST", v.value)
|
t.Setenv("OLLAMA_HOST", v.value)
|
||||||
|
envconfig.LoadConfig()
|
||||||
|
|
||||||
client, err := ClientFromEnvironment()
|
client, err := ClientFromEnvironment()
|
||||||
if err != v.err {
|
if err != v.err {
|
||||||
|
|||||||
108
api/types.go
108
api/types.go
@@ -101,34 +101,46 @@ type ChatRequest struct {
|
|||||||
KeepAlive *Duration `json:"keep_alive,omitempty"`
|
KeepAlive *Duration `json:"keep_alive,omitempty"`
|
||||||
|
|
||||||
// Tools is an optional list of tools the model has access to.
|
// Tools is an optional list of tools the model has access to.
|
||||||
Tools `json:"tools,omitempty"`
|
Tools []Tool `json:"tools,omitempty"`
|
||||||
|
|
||||||
// Options lists model-specific options.
|
// Options lists model-specific options.
|
||||||
Options map[string]interface{} `json:"options"`
|
Options map[string]interface{} `json:"options"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Tools []Tool
|
|
||||||
|
|
||||||
func (t Tools) String() string {
|
|
||||||
bts, _ := json.Marshal(t)
|
|
||||||
return string(bts)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t Tool) String() string {
|
|
||||||
bts, _ := json.Marshal(t)
|
|
||||||
return string(bts)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Message is a single message in a chat sequence. The message contains the
|
// Message is a single message in a chat sequence. The message contains the
|
||||||
// role ("system", "user", or "assistant"), the content and an optional list
|
// role ("system", "user", or "assistant"), the content and an optional list
|
||||||
// of images.
|
// of images.
|
||||||
type Message struct {
|
type Message struct {
|
||||||
Role string `json:"role"`
|
Role string `json:"role"`
|
||||||
Content string `json:"content"`
|
Content string `json:"content,omitempty"`
|
||||||
Images []ImageData `json:"images,omitempty"`
|
Images []ImageData `json:"images,omitempty"`
|
||||||
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ToolCall struct {
|
||||||
|
Function struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Arguments map[string]any `json:"arguments"`
|
||||||
|
} `json:"function"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Tool struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Function struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
Parameters struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Required []string `json:"required"`
|
||||||
|
Properties map[string]struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
Enum []string `json:"enum,omitempty"`
|
||||||
|
} `json:"properties"`
|
||||||
|
} `json:"parameters"`
|
||||||
|
} `json:"function"`
|
||||||
|
}
|
||||||
|
|
||||||
func (m *Message) UnmarshalJSON(b []byte) error {
|
func (m *Message) UnmarshalJSON(b []byte) error {
|
||||||
type Alias Message
|
type Alias Message
|
||||||
var a Alias
|
var a Alias
|
||||||
@@ -141,46 +153,6 @@ func (m *Message) UnmarshalJSON(b []byte) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type ToolCall struct {
|
|
||||||
Function ToolCallFunction `json:"function"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type ToolCallFunction struct {
|
|
||||||
Name string `json:"name"`
|
|
||||||
Arguments ToolCallFunctionArguments `json:"arguments"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type ToolCallFunctionArguments map[string]any
|
|
||||||
|
|
||||||
func (t *ToolCallFunctionArguments) String() string {
|
|
||||||
bts, _ := json.Marshal(t)
|
|
||||||
return string(bts)
|
|
||||||
}
|
|
||||||
|
|
||||||
type Tool struct {
|
|
||||||
Type string `json:"type"`
|
|
||||||
Function ToolFunction `json:"function"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type ToolFunction struct {
|
|
||||||
Name string `json:"name"`
|
|
||||||
Description string `json:"description"`
|
|
||||||
Parameters struct {
|
|
||||||
Type string `json:"type"`
|
|
||||||
Required []string `json:"required"`
|
|
||||||
Properties map[string]struct {
|
|
||||||
Type string `json:"type"`
|
|
||||||
Description string `json:"description"`
|
|
||||||
Enum []string `json:"enum,omitempty"`
|
|
||||||
} `json:"properties"`
|
|
||||||
} `json:"parameters"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *ToolFunction) String() string {
|
|
||||||
bts, _ := json.Marshal(t)
|
|
||||||
return string(bts)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ChatResponse is the response returned by [Client.Chat]. Its fields are
|
// ChatResponse is the response returned by [Client.Chat]. Its fields are
|
||||||
// similar to [GenerateResponse].
|
// similar to [GenerateResponse].
|
||||||
type ChatResponse struct {
|
type ChatResponse struct {
|
||||||
@@ -214,7 +186,6 @@ type Options struct {
|
|||||||
NumPredict int `json:"num_predict,omitempty"`
|
NumPredict int `json:"num_predict,omitempty"`
|
||||||
TopK int `json:"top_k,omitempty"`
|
TopK int `json:"top_k,omitempty"`
|
||||||
TopP float32 `json:"top_p,omitempty"`
|
TopP float32 `json:"top_p,omitempty"`
|
||||||
MinP float32 `json:"min_p,omitempty"`
|
|
||||||
TFSZ float32 `json:"tfs_z,omitempty"`
|
TFSZ float32 `json:"tfs_z,omitempty"`
|
||||||
TypicalP float32 `json:"typical_p,omitempty"`
|
TypicalP float32 `json:"typical_p,omitempty"`
|
||||||
RepeatLastN int `json:"repeat_last_n,omitempty"`
|
RepeatLastN int `json:"repeat_last_n,omitempty"`
|
||||||
@@ -231,6 +202,7 @@ type Options struct {
|
|||||||
|
|
||||||
// Runner options which must be set when the model is loaded into memory
|
// Runner options which must be set when the model is loaded into memory
|
||||||
type Runner struct {
|
type Runner struct {
|
||||||
|
UseNUMA bool `json:"numa,omitempty"`
|
||||||
NumCtx int `json:"num_ctx,omitempty"`
|
NumCtx int `json:"num_ctx,omitempty"`
|
||||||
NumBatch int `json:"num_batch,omitempty"`
|
NumBatch int `json:"num_batch,omitempty"`
|
||||||
NumGPU int `json:"num_gpu,omitempty"`
|
NumGPU int `json:"num_gpu,omitempty"`
|
||||||
@@ -266,10 +238,6 @@ type EmbedRequest struct {
|
|||||||
type EmbedResponse struct {
|
type EmbedResponse struct {
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
Embeddings [][]float32 `json:"embeddings"`
|
Embeddings [][]float32 `json:"embeddings"`
|
||||||
|
|
||||||
TotalDuration time.Duration `json:"total_duration,omitempty"`
|
|
||||||
LoadDuration time.Duration `json:"load_duration,omitempty"`
|
|
||||||
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// EmbeddingRequest is the request passed to [Client.Embeddings].
|
// EmbeddingRequest is the request passed to [Client.Embeddings].
|
||||||
@@ -296,17 +264,15 @@ type EmbeddingResponse struct {
|
|||||||
// CreateRequest is the request passed to [Client.Create].
|
// CreateRequest is the request passed to [Client.Create].
|
||||||
type CreateRequest struct {
|
type CreateRequest struct {
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
|
Path string `json:"path"`
|
||||||
Modelfile string `json:"modelfile"`
|
Modelfile string `json:"modelfile"`
|
||||||
Stream *bool `json:"stream,omitempty"`
|
Stream *bool `json:"stream,omitempty"`
|
||||||
Quantize string `json:"quantize,omitempty"`
|
Quantize string `json:"quantize,omitempty"`
|
||||||
|
|
||||||
// Deprecated: set the model name with Model instead
|
// Name is deprecated, see Model
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
|
||||||
// Deprecated: set the file content with Modelfile instead
|
// Quantization is deprecated, see Quantize
|
||||||
Path string `json:"path"`
|
|
||||||
|
|
||||||
// Deprecated: use Quantize instead
|
|
||||||
Quantization string `json:"quantization,omitempty"`
|
Quantization string `json:"quantization,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -314,7 +280,7 @@ type CreateRequest struct {
|
|||||||
type DeleteRequest struct {
|
type DeleteRequest struct {
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
|
|
||||||
// Deprecated: set the model name with Model instead
|
// Name is deprecated, see Model
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -329,7 +295,7 @@ type ShowRequest struct {
|
|||||||
|
|
||||||
Options map[string]interface{} `json:"options"`
|
Options map[string]interface{} `json:"options"`
|
||||||
|
|
||||||
// Deprecated: set the model name with Model instead
|
// Name is deprecated, see Model
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -361,7 +327,7 @@ type PullRequest struct {
|
|||||||
Password string `json:"password"`
|
Password string `json:"password"`
|
||||||
Stream *bool `json:"stream,omitempty"`
|
Stream *bool `json:"stream,omitempty"`
|
||||||
|
|
||||||
// Deprecated: set the model name with Model instead
|
// Name is deprecated, see Model
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -382,7 +348,7 @@ type PushRequest struct {
|
|||||||
Password string `json:"password"`
|
Password string `json:"password"`
|
||||||
Stream *bool `json:"stream,omitempty"`
|
Stream *bool `json:"stream,omitempty"`
|
||||||
|
|
||||||
// Deprecated: set the model name with Model instead
|
// Name is deprecated, see Model
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -439,6 +405,9 @@ type GenerateResponse struct {
|
|||||||
// Response is the textual response itself.
|
// Response is the textual response itself.
|
||||||
Response string `json:"response"`
|
Response string `json:"response"`
|
||||||
|
|
||||||
|
// ToolCalls is the list of tools the model wants to call
|
||||||
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||||
|
|
||||||
// Done specifies if the response is complete.
|
// Done specifies if the response is complete.
|
||||||
Done bool `json:"done"`
|
Done bool `json:"done"`
|
||||||
|
|
||||||
@@ -506,7 +475,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
|
|||||||
for key, val := range m {
|
for key, val := range m {
|
||||||
opt, ok := jsonOpts[key]
|
opt, ok := jsonOpts[key]
|
||||||
if !ok {
|
if !ok {
|
||||||
slog.Warn("invalid option provided", "option", key)
|
slog.Warn("invalid option provided", "option", opt.Name)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -616,6 +585,7 @@ func DefaultOptions() Options {
|
|||||||
F16KV: true,
|
F16KV: true,
|
||||||
UseMLock: false,
|
UseMLock: false,
|
||||||
UseMMap: nil,
|
UseMMap: nil,
|
||||||
|
UseNUMA: false,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package api
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
@@ -192,7 +192,7 @@ func TestUseMmapFormatParams(t *testing.T) {
|
|||||||
"use_mmap": {"foo"},
|
"use_mmap": {"foo"},
|
||||||
},
|
},
|
||||||
exp: nil,
|
exp: nil,
|
||||||
err: errors.New("invalid bool value [foo]"),
|
err: fmt.Errorf("invalid bool value [foo]"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,8 +2,8 @@
|
|||||||
|
|
||||||
package lifecycle
|
package lifecycle
|
||||||
|
|
||||||
import "errors"
|
import "fmt"
|
||||||
|
|
||||||
func GetStarted() error {
|
func GetStarted() error {
|
||||||
return errors.New("not implemented")
|
return fmt.Errorf("GetStarted not implemented")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ func GetStarted() error {
|
|||||||
Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
|
Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
|
||||||
}
|
}
|
||||||
proc, err := os.StartProcess(args[0], args, attrs)
|
proc, err := os.StartProcess(args[0], args, attrs)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to start getting started shell %w", err)
|
return fmt.Errorf("unable to start getting started shell %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import (
|
|||||||
func InitLogging() {
|
func InitLogging() {
|
||||||
level := slog.LevelInfo
|
level := slog.LevelInfo
|
||||||
|
|
||||||
if envconfig.Debug() {
|
if envconfig.Debug {
|
||||||
level = slog.LevelDebug
|
level = slog.LevelDebug
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -27,7 +27,7 @@ func InitLogging() {
|
|||||||
// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
|
// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
|
||||||
} else {
|
} else {
|
||||||
rotateLogs(AppLogFile)
|
rotateLogs(AppLogFile)
|
||||||
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
|
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Error(fmt.Sprintf("failed to create server log %v", err))
|
slog.Error(fmt.Sprintf("failed to create server log %v", err))
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -5,5 +5,5 @@ package lifecycle
|
|||||||
import "log/slog"
|
import "log/slog"
|
||||||
|
|
||||||
func ShowLogs() {
|
func ShowLogs() {
|
||||||
slog.Warn("not implemented")
|
slog.Warn("ShowLogs not yet implemented")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ func TestRotateLogs(t *testing.T) {
|
|||||||
// No log exists
|
// No log exists
|
||||||
rotateLogs(logFile)
|
rotateLogs(logFile)
|
||||||
|
|
||||||
require.NoError(t, os.WriteFile(logFile, []byte("1"), 0o644))
|
require.NoError(t, os.WriteFile(logFile, []byte("1"), 0644))
|
||||||
assert.FileExists(t, logFile)
|
assert.FileExists(t, logFile)
|
||||||
// First rotation
|
// First rotation
|
||||||
rotateLogs(logFile)
|
rotateLogs(logFile)
|
||||||
@@ -32,7 +32,7 @@ func TestRotateLogs(t *testing.T) {
|
|||||||
assert.NoFileExists(t, logFile)
|
assert.NoFileExists(t, logFile)
|
||||||
|
|
||||||
for i := 2; i <= LogRotationCount+1; i++ {
|
for i := 2; i <= LogRotationCount+1; i++ {
|
||||||
require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0o644))
|
require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0644))
|
||||||
assert.FileExists(t, logFile)
|
assert.FileExists(t, logFile)
|
||||||
rotateLogs(logFile)
|
rotateLogs(logFile)
|
||||||
assert.NoFileExists(t, logFile)
|
assert.NoFileExists(t, logFile)
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
rotateLogs(ServerLogFile)
|
rotateLogs(ServerLogFile)
|
||||||
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
|
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create server log: %w", err)
|
return nil, fmt.Errorf("failed to create server log: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ import (
|
|||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -47,7 +46,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
|
|||||||
query.Add("os", runtime.GOOS)
|
query.Add("os", runtime.GOOS)
|
||||||
query.Add("arch", runtime.GOARCH)
|
query.Add("arch", runtime.GOARCH)
|
||||||
query.Add("version", version.Version)
|
query.Add("version", version.Version)
|
||||||
query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))
|
query.Add("ts", fmt.Sprintf("%d", time.Now().Unix()))
|
||||||
|
|
||||||
nonce, err := auth.NewNonce(rand.Reader, 16)
|
nonce, err := auth.NewNonce(rand.Reader, 16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -4,9 +4,9 @@ package lifecycle
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"fmt"
|
||||||
)
|
)
|
||||||
|
|
||||||
func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
||||||
return errors.New("not implemented")
|
return fmt.Errorf("DoUpgrade not yet implemented")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package lifecycle
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
@@ -16,7 +15,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
|||||||
return fmt.Errorf("failed to lookup downloads: %s", err)
|
return fmt.Errorf("failed to lookup downloads: %s", err)
|
||||||
}
|
}
|
||||||
if len(files) == 0 {
|
if len(files) == 0 {
|
||||||
return errors.New("no update downloads found")
|
return fmt.Errorf("no update downloads found")
|
||||||
} else if len(files) > 1 {
|
} else if len(files) > 1 {
|
||||||
// Shouldn't happen
|
// Shouldn't happen
|
||||||
slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
|
slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
|
||||||
@@ -65,7 +64,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// TODO - some details about why it didn't start, or is this a pedantic error case?
|
// TODO - some details about why it didn't start, or is this a pedantic error case?
|
||||||
return errors.New("installer process did not start")
|
return fmt.Errorf("installer process did not start")
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
|
// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
|
||||||
|
|||||||
@@ -28,8 +28,8 @@ AppPublisher={#MyAppPublisher}
|
|||||||
AppPublisherURL={#MyAppURL}
|
AppPublisherURL={#MyAppURL}
|
||||||
AppSupportURL={#MyAppURL}
|
AppSupportURL={#MyAppURL}
|
||||||
AppUpdatesURL={#MyAppURL}
|
AppUpdatesURL={#MyAppURL}
|
||||||
ArchitecturesAllowed=x64compatible arm64
|
ArchitecturesAllowed=x64 arm64
|
||||||
ArchitecturesInstallIn64BitMode=x64compatible arm64
|
ArchitecturesInstallIn64BitMode=x64 arm64
|
||||||
DefaultDirName={localappdata}\Programs\{#MyAppName}
|
DefaultDirName={localappdata}\Programs\{#MyAppName}
|
||||||
DefaultGroupName={#MyAppName}
|
DefaultGroupName={#MyAppName}
|
||||||
DisableProgramGroupPage=yes
|
DisableProgramGroupPage=yes
|
||||||
@@ -48,7 +48,6 @@ OutputDir=..\dist\
|
|||||||
SetupLogging=yes
|
SetupLogging=yes
|
||||||
CloseApplications=yes
|
CloseApplications=yes
|
||||||
RestartApplications=no
|
RestartApplications=no
|
||||||
RestartIfNeededByRun=no
|
|
||||||
|
|
||||||
; https://jrsoftware.org/ishelp/index.php?topic=setup_wizardimagefile
|
; https://jrsoftware.org/ishelp/index.php?topic=setup_wizardimagefile
|
||||||
WizardSmallImageFile=.\assets\setup.bmp
|
WizardSmallImageFile=.\assets\setup.bmp
|
||||||
@@ -87,21 +86,21 @@ Name: "english"; MessagesFile: "compiler:Default.isl"
|
|||||||
DialogFontSize=12
|
DialogFontSize=12
|
||||||
|
|
||||||
[Files]
|
[Files]
|
||||||
#if DirExists("..\dist\windows-amd64")
|
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
|
||||||
Source: "..\dist\windows-amd64-app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ;Check: not IsArm64(); Flags: ignoreversion 64bit
|
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
||||||
Source: "..\dist\windows-amd64\ollama.exe"; DestDir: "{app}"; Check: not IsArm64(); Flags: ignoreversion 64bit
|
Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
|
||||||
Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Check: not IsArm64(); Flags: ignoreversion 64bit recursesubdirs
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if DirExists("..\dist\windows-arm64")
|
|
||||||
Source: "..\dist\windows-arm64\vc_redist.arm64.exe"; DestDir: "{tmp}"; Check: IsArm64() and vc_redist_needed(); Flags: deleteafterinstall
|
|
||||||
Source: "..\dist\windows-arm64-app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ;Check: IsArm64(); Flags: ignoreversion 64bit
|
|
||||||
Source: "..\dist\windows-arm64\ollama.exe"; DestDir: "{app}"; Check: IsArm64(); Flags: ignoreversion 64bit
|
|
||||||
Source: "..\dist\windows-arm64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Check: IsArm64(); Flags: ignoreversion 64bit recursesubdirs
|
|
||||||
#endif
|
|
||||||
|
|
||||||
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
|
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
|
||||||
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
|
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
|
||||||
|
#if DirExists("..\dist\windows-amd64\cuda")
|
||||||
|
Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs
|
||||||
|
#endif
|
||||||
|
#if DirExists("..\dist\windows-amd64\oneapi")
|
||||||
|
Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs
|
||||||
|
#endif
|
||||||
|
#if DirExists("..\dist\windows-amd64\rocm")
|
||||||
|
Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
[Icons]
|
[Icons]
|
||||||
Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
|
Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
|
||||||
@@ -109,9 +108,6 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
|
|||||||
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
|
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
|
||||||
|
|
||||||
[Run]
|
[Run]
|
||||||
#if DirExists("..\dist\windows-arm64")
|
|
||||||
Filename: "{tmp}\vc_redist.arm64.exe"; Parameters: "/install /passive /norestart"; Check: IsArm64() and vc_redist_needed(); StatusMsg: "Installing VC++ Redistributables..."; Flags: waituntilterminated
|
|
||||||
#endif
|
|
||||||
Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
|
Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
|
||||||
|
|
||||||
[UninstallRun]
|
[UninstallRun]
|
||||||
@@ -142,7 +138,7 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
|
|||||||
|
|
||||||
|
|
||||||
;FinishedHeadingLabel=Run your first model
|
;FinishedHeadingLabel=Run your first model
|
||||||
;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3.2
|
;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3
|
||||||
;ClickFinish=%n
|
;ClickFinish=%n
|
||||||
|
|
||||||
[Registry]
|
[Registry]
|
||||||
@@ -167,39 +163,3 @@ begin
|
|||||||
{ Pos() returns 0 if not found }
|
{ Pos() returns 0 if not found }
|
||||||
Result := Pos(';' + ExpandConstant(Param) + ';', ';' + OrigPath + ';') = 0;
|
Result := Pos(';' + ExpandConstant(Param) + ';', ';' + OrigPath + ';') = 0;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{ --- VC Runtime libraries discovery code - Only install vc_redist if it isn't already installed ----- }
|
|
||||||
const VCRTL_MIN_V1 = 14;
|
|
||||||
const VCRTL_MIN_V2 = 40;
|
|
||||||
const VCRTL_MIN_V3 = 33807;
|
|
||||||
const VCRTL_MIN_V4 = 0;
|
|
||||||
|
|
||||||
// check if the minimum required vc redist is installed (by looking the registry)
|
|
||||||
function vc_redist_needed (): Boolean;
|
|
||||||
var
|
|
||||||
sRegKey: string;
|
|
||||||
v1: Cardinal;
|
|
||||||
v2: Cardinal;
|
|
||||||
v3: Cardinal;
|
|
||||||
v4: Cardinal;
|
|
||||||
begin
|
|
||||||
sRegKey := 'SOFTWARE\WOW6432Node\Microsoft\VisualStudio\14.0\VC\Runtimes\arm64';
|
|
||||||
if (RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'Major', v1) and
|
|
||||||
RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'Minor', v2) and
|
|
||||||
RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'Bld', v3) and
|
|
||||||
RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'RBld', v4)) then
|
|
||||||
begin
|
|
||||||
Log ('VC Redist version: ' + IntToStr (v1) +
|
|
||||||
'.' + IntToStr (v2) + '.' + IntToStr (v3) +
|
|
||||||
'.' + IntToStr (v4));
|
|
||||||
{ Version info was found. Return true if later or equal to our
|
|
||||||
minimal required version RTL_MIN_Vx }
|
|
||||||
Result := not (
|
|
||||||
(v1 > VCRTL_MIN_V1) or ((v1 = VCRTL_MIN_V1) and
|
|
||||||
((v2 > VCRTL_MIN_V2) or ((v2 = VCRTL_MIN_V2) and
|
|
||||||
((v3 > VCRTL_MIN_V3) or ((v3 = VCRTL_MIN_V3) and
|
|
||||||
(v4 >= VCRTL_MIN_V4)))))));
|
|
||||||
end
|
|
||||||
else
|
|
||||||
Result := TRUE;
|
|
||||||
end;
|
|
||||||
|
|||||||
@@ -4,5 +4,5 @@ write-host "Welcome to Ollama!"
|
|||||||
write-host ""
|
write-host ""
|
||||||
write-host "Run your first model:"
|
write-host "Run your first model:"
|
||||||
write-host ""
|
write-host ""
|
||||||
write-host "`tollama run llama3.2"
|
write-host "`tollama run llama3"
|
||||||
write-host ""
|
write-host ""
|
||||||
@@ -3,11 +3,11 @@
|
|||||||
package tray
|
package tray
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"fmt"
|
||||||
|
|
||||||
"github.com/ollama/ollama/app/tray/commontray"
|
"github.com/ollama/ollama/app/tray/commontray"
|
||||||
)
|
)
|
||||||
|
|
||||||
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
|
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
|
||||||
return nil, errors.New("not implemented")
|
return nil, fmt.Errorf("NOT IMPLEMENTED YET")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,7 +11,9 @@ import (
|
|||||||
"golang.org/x/sys/windows"
|
"golang.org/x/sys/windows"
|
||||||
)
|
)
|
||||||
|
|
||||||
var quitOnce sync.Once
|
var (
|
||||||
|
quitOnce sync.Once
|
||||||
|
)
|
||||||
|
|
||||||
func (t *winTray) Run() {
|
func (t *winTray) Run() {
|
||||||
nativeLoop()
|
nativeLoop()
|
||||||
|
|||||||
@@ -11,12 +11,12 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
updateAvailableMenuID = 1
|
updatAvailableMenuID = 1
|
||||||
updateMenuID = updateAvailableMenuID + 1
|
updateMenuID = updatAvailableMenuID + 1
|
||||||
separatorMenuID = updateMenuID + 1
|
separatorMenuID = updateMenuID + 1
|
||||||
diagLogsMenuID = separatorMenuID + 1
|
diagLogsMenuID = separatorMenuID + 1
|
||||||
diagSeparatorMenuID = diagLogsMenuID + 1
|
diagSeparatorMenuID = diagLogsMenuID + 1
|
||||||
quitMenuID = diagSeparatorMenuID + 1
|
quitMenuID = diagSeparatorMenuID + 1
|
||||||
)
|
)
|
||||||
|
|
||||||
func (t *winTray) initMenus() error {
|
func (t *winTray) initMenus() error {
|
||||||
@@ -35,7 +35,7 @@ func (t *winTray) initMenus() error {
|
|||||||
func (t *winTray) UpdateAvailable(ver string) error {
|
func (t *winTray) UpdateAvailable(ver string) error {
|
||||||
if !t.updateNotified {
|
if !t.updateNotified {
|
||||||
slog.Debug("updating menu and sending notification for new update")
|
slog.Debug("updating menu and sending notification for new update")
|
||||||
if err := t.addOrUpdateMenuItem(updateAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
|
if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
|
||||||
return fmt.Errorf("unable to create menu entries %w", err)
|
return fmt.Errorf("unable to create menu entries %w", err)
|
||||||
}
|
}
|
||||||
if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
|
if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
|
||||||
|
|||||||
@@ -11,12 +11,10 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
"syscall"
|
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"golang.org/x/sys/windows"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/app/tray/commontray"
|
"github.com/ollama/ollama/app/tray/commontray"
|
||||||
|
"golang.org/x/sys/windows"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
|
// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
|
||||||
@@ -416,7 +414,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
|
|||||||
iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
|
iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
|
||||||
|
|
||||||
if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
|
if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
|
||||||
if err := os.WriteFile(iconFilePath, iconBytes, 0o644); err != nil {
|
if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -434,12 +432,7 @@ func (t *winTray) setIcon(src string) error {
|
|||||||
t.muNID.Lock()
|
t.muNID.Lock()
|
||||||
defer t.muNID.Unlock()
|
defer t.muNID.Unlock()
|
||||||
t.nid.Icon = h
|
t.nid.Icon = h
|
||||||
t.nid.Flags |= NIF_ICON | NIF_TIP
|
t.nid.Flags |= NIF_ICON
|
||||||
if toolTipUTF16, err := syscall.UTF16FromString(commontray.ToolTip); err == nil {
|
|
||||||
copy(t.nid.Tip[:], toolTipUTF16)
|
|
||||||
} else {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
|
t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
|
||||||
|
|
||||||
return t.nid.modify()
|
return t.nid.modify()
|
||||||
|
|||||||
@@ -61,7 +61,6 @@ const (
|
|||||||
MIIM_SUBMENU = 0x00000004
|
MIIM_SUBMENU = 0x00000004
|
||||||
MIM_APPLYTOSUBMENUS = 0x80000000
|
MIM_APPLYTOSUBMENUS = 0x80000000
|
||||||
NIF_ICON = 0x00000002
|
NIF_ICON = 0x00000002
|
||||||
NIF_TIP = 0x00000004
|
|
||||||
NIF_INFO = 0x00000010
|
NIF_INFO = 0x00000010
|
||||||
NIF_MESSAGE = 0x00000001
|
NIF_MESSAGE = 0x00000001
|
||||||
SW_HIDE = 0
|
SW_HIDE = 0
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
@@ -79,7 +78,7 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
|
|||||||
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
|
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
|
||||||
parts := bytes.Split(publicKey, []byte(" "))
|
parts := bytes.Split(publicKey, []byte(" "))
|
||||||
if len(parts) < 2 {
|
if len(parts) < 2 {
|
||||||
return "", errors.New("malformed public key")
|
return "", fmt.Errorf("malformed public key")
|
||||||
}
|
}
|
||||||
|
|
||||||
signedData, err := privateKey.Sign(rand.Reader, bts)
|
signedData, err := privateKey.Sign(rand.Reader, bts)
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
This is here to make sure the build/ directory exists for the go:embed command
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
This is here to make sure the build/ directory exists for the go:embed command
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
package build
|
|
||||||
|
|
||||||
import "embed"
|
|
||||||
|
|
||||||
// Darwin payloads separated by architecture to avoid duplicate payloads when cross compiling
|
|
||||||
|
|
||||||
//go:embed darwin/amd64/*
|
|
||||||
var EmbedFS embed.FS
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
package build
|
|
||||||
|
|
||||||
import "embed"
|
|
||||||
|
|
||||||
// Darwin payloads separated by architecture to avoid duplicate payloads when cross compiling
|
|
||||||
|
|
||||||
//go:embed darwin/arm64/*
|
|
||||||
var EmbedFS embed.FS
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
package build
|
|
||||||
|
|
||||||
import "embed"
|
|
||||||
|
|
||||||
//go:embed linux/*
|
|
||||||
var EmbedFS embed.FS
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
//go:build !linux && !darwin
|
|
||||||
|
|
||||||
package build
|
|
||||||
|
|
||||||
import "embed"
|
|
||||||
|
|
||||||
// unused on windows
|
|
||||||
var EmbedFS embed.FS
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
This is here to make sure the build/ directory exists for the go:embed command
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
This is here to make sure the build/ directory exists for the go:embed command
|
|
||||||
397
cmd/cmd.go
397
cmd/cmd.go
@@ -2,7 +2,6 @@ package cmd
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"archive/zip"
|
"archive/zip"
|
||||||
"bufio"
|
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"crypto/ed25519"
|
"crypto/ed25519"
|
||||||
@@ -21,9 +20,8 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
"sync/atomic"
|
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -46,58 +44,28 @@ import (
|
|||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
|
||||||
errModelNotFound = errors.New("no Modelfile or safetensors files found")
|
|
||||||
errModelfileNotFound = errors.New("specified Modelfile wasn't found")
|
|
||||||
)
|
|
||||||
|
|
||||||
func getModelfileName(cmd *cobra.Command) (string, error) {
|
|
||||||
fn, _ := cmd.Flags().GetString("file")
|
|
||||||
|
|
||||||
filename := fn
|
|
||||||
if filename == "" {
|
|
||||||
filename = "Modelfile"
|
|
||||||
}
|
|
||||||
|
|
||||||
absName, err := filepath.Abs(filename)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = os.Stat(absName)
|
|
||||||
if err != nil {
|
|
||||||
return fn, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return absName, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func CreateHandler(cmd *cobra.Command, args []string) error {
|
func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||||
|
filename, _ := cmd.Flags().GetString("file")
|
||||||
|
filename, err := filepath.Abs(filename)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
client, err := api.ClientFromEnvironment()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
p := progress.NewProgress(os.Stderr)
|
p := progress.NewProgress(os.Stderr)
|
||||||
defer p.Stop()
|
defer p.Stop()
|
||||||
|
|
||||||
var reader io.Reader
|
f, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
filename, err := getModelfileName(cmd)
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
if filename == "" {
|
|
||||||
reader = strings.NewReader("FROM .\n")
|
|
||||||
} else {
|
|
||||||
return errModelfileNotFound
|
|
||||||
}
|
|
||||||
} else if err != nil {
|
|
||||||
return err
|
return err
|
||||||
} else {
|
|
||||||
f, err := os.Open(filename)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
reader = f
|
|
||||||
defer f.Close()
|
|
||||||
}
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
modelfile, err := parser.ParseFile(reader)
|
modelfile, err := parser.ParseFile(f)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -110,12 +78,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
status := "transferring model data"
|
status := "transferring model data"
|
||||||
spinner := progress.NewSpinner(status)
|
spinner := progress.NewSpinner(status)
|
||||||
p.Add(status, spinner)
|
p.Add(status, spinner)
|
||||||
defer p.Stop()
|
|
||||||
|
|
||||||
client, err := api.ClientFromEnvironment()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range modelfile.Commands {
|
for i := range modelfile.Commands {
|
||||||
switch modelfile.Commands[i].Name {
|
switch modelfile.Commands[i].Name {
|
||||||
@@ -150,7 +112,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
path = tempfile
|
path = tempfile
|
||||||
}
|
}
|
||||||
|
|
||||||
digest, err := createBlob(cmd, client, path, spinner)
|
digest, err := createBlob(cmd, client, path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -240,12 +202,6 @@ func tempZipFiles(path string) (string, error) {
|
|||||||
// safetensors files might be unresolved git lfs references; skip if they are
|
// safetensors files might be unresolved git lfs references; skip if they are
|
||||||
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
|
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
|
||||||
files = append(files, st...)
|
files = append(files, st...)
|
||||||
} else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 {
|
|
||||||
// covers adapters.safetensors
|
|
||||||
files = append(files, st...)
|
|
||||||
} else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 {
|
|
||||||
// covers adapter_model.safetensors
|
|
||||||
files = append(files, st...)
|
|
||||||
} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
|
} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
|
||||||
// pytorch files might also be unresolved git lfs references; skip if they are
|
// pytorch files might also be unresolved git lfs references; skip if they are
|
||||||
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
|
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
|
||||||
@@ -255,7 +211,7 @@ func tempZipFiles(path string) (string, error) {
|
|||||||
// covers consolidated.x.pth, consolidated.pth
|
// covers consolidated.x.pth, consolidated.pth
|
||||||
files = append(files, pt...)
|
files = append(files, pt...)
|
||||||
} else {
|
} else {
|
||||||
return "", errModelNotFound
|
return "", errors.New("no safetensors or torch files found")
|
||||||
}
|
}
|
||||||
|
|
||||||
// add configuration files, json files are detected as text/plain
|
// add configuration files, json files are detected as text/plain
|
||||||
@@ -265,14 +221,6 @@ func tempZipFiles(path string) (string, error) {
|
|||||||
}
|
}
|
||||||
files = append(files, js...)
|
files = append(files, js...)
|
||||||
|
|
||||||
// bert models require a nested config.json
|
|
||||||
// TODO(mxyng): merge this with the glob above
|
|
||||||
js, err = glob(filepath.Join(path, "**/*.json"), "text/plain")
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
files = append(files, js...)
|
|
||||||
|
|
||||||
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
|
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
|
||||||
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
|
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
|
||||||
// tokenizer.model might be a unresolved git lfs reference; error if it is
|
// tokenizer.model might be a unresolved git lfs reference; error if it is
|
||||||
@@ -302,11 +250,6 @@ func tempZipFiles(path string) (string, error) {
|
|||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
zfi.Name, err = filepath.Rel(path, file)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
zf, err := zipfile.CreateHeader(zfi)
|
zf, err := zipfile.CreateHeader(zfi)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
@@ -320,20 +263,13 @@ func tempZipFiles(path string) (string, error) {
|
|||||||
return tempfile.Name(), nil
|
return tempfile.Name(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *progress.Spinner) (string, error) {
|
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
|
||||||
bin, err := os.Open(path)
|
bin, err := os.Open(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
defer bin.Close()
|
defer bin.Close()
|
||||||
|
|
||||||
// Get file info to retrieve the size
|
|
||||||
fileInfo, err := bin.Stat()
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
fileSize := fileInfo.Size()
|
|
||||||
|
|
||||||
hash := sha256.New()
|
hash := sha256.New()
|
||||||
if _, err := io.Copy(hash, bin); err != nil {
|
if _, err := io.Copy(hash, bin); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
@@ -343,76 +279,13 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *pr
|
|||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
var pw progressWriter
|
|
||||||
status := "transferring model data 0%"
|
|
||||||
spinner.SetMessage(status)
|
|
||||||
|
|
||||||
done := make(chan struct{})
|
|
||||||
defer close(done)
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
ticker := time.NewTicker(60 * time.Millisecond)
|
|
||||||
defer ticker.Stop()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ticker.C:
|
|
||||||
spinner.SetMessage(fmt.Sprintf("transferring model data %d%%", int(100*pw.n.Load()/fileSize)))
|
|
||||||
case <-done:
|
|
||||||
spinner.SetMessage("transferring model data 100%")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
|
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
|
||||||
if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
|
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
return digest, nil
|
return digest, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type progressWriter struct {
|
|
||||||
n atomic.Int64
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *progressWriter) Write(p []byte) (n int, err error) {
|
|
||||||
w.n.Add(int64(len(p)))
|
|
||||||
return len(p), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func loadOrUnloadModel(cmd *cobra.Command, opts *runOptions) error {
|
|
||||||
p := progress.NewProgress(os.Stderr)
|
|
||||||
defer p.StopAndClear()
|
|
||||||
|
|
||||||
spinner := progress.NewSpinner("")
|
|
||||||
p.Add("", spinner)
|
|
||||||
|
|
||||||
client, err := api.ClientFromEnvironment()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
req := &api.GenerateRequest{
|
|
||||||
Model: opts.Model,
|
|
||||||
KeepAlive: opts.KeepAlive,
|
|
||||||
}
|
|
||||||
|
|
||||||
return client.Generate(cmd.Context(), req, func(api.GenerateResponse) error { return nil })
|
|
||||||
}
|
|
||||||
|
|
||||||
func StopHandler(cmd *cobra.Command, args []string) error {
|
|
||||||
opts := &runOptions{
|
|
||||||
Model: args[0],
|
|
||||||
KeepAlive: &api.Duration{Duration: 0},
|
|
||||||
}
|
|
||||||
if err := loadOrUnloadModel(cmd, opts); err != nil {
|
|
||||||
if strings.Contains(err.Error(), "not found") {
|
|
||||||
return fmt.Errorf("couldn't find model \"%s\" to stop", args[0])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func RunHandler(cmd *cobra.Command, args []string) error {
|
func RunHandler(cmd *cobra.Command, args []string) error {
|
||||||
interactive := true
|
interactive := true
|
||||||
|
|
||||||
@@ -487,26 +360,11 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
opts.MultiModal = len(info.ProjectorInfo) != 0
|
opts.MultiModal = slices.Contains(info.Details.Families, "clip")
|
||||||
opts.ParentModel = info.Details.ParentModel
|
opts.ParentModel = info.Details.ParentModel
|
||||||
|
opts.Messages = append(opts.Messages, info.Messages...)
|
||||||
|
|
||||||
if interactive {
|
if interactive {
|
||||||
if err := loadOrUnloadModel(cmd, &opts); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, msg := range info.Messages {
|
|
||||||
switch msg.Role {
|
|
||||||
case "user":
|
|
||||||
fmt.Printf(">>> %s\n", msg.Content)
|
|
||||||
case "assistant":
|
|
||||||
state := &displayResponseState{}
|
|
||||||
displayResponse(msg.Content, opts.WordWrap, state)
|
|
||||||
fmt.Println()
|
|
||||||
fmt.Println()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return generateInteractive(cmd, opts)
|
return generateInteractive(cmd, opts)
|
||||||
}
|
}
|
||||||
return generate(cmd, opts)
|
return generate(cmd, opts)
|
||||||
@@ -647,7 +505,7 @@ func ListHandler(cmd *cobra.Command, args []string) error {
|
|||||||
table.SetHeaderLine(false)
|
table.SetHeaderLine(false)
|
||||||
table.SetBorder(false)
|
table.SetBorder(false)
|
||||||
table.SetNoWhiteSpace(true)
|
table.SetNoWhiteSpace(true)
|
||||||
table.SetTablePadding(" ")
|
table.SetTablePadding("\t")
|
||||||
table.AppendBulk(data)
|
table.AppendBulk(data)
|
||||||
table.Render()
|
table.Render()
|
||||||
|
|
||||||
@@ -682,15 +540,7 @@ func ListRunningHandler(cmd *cobra.Command, args []string) error {
|
|||||||
cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
|
cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
|
||||||
procStr = fmt.Sprintf("%d%%/%d%% CPU/GPU", int(cpuPercent), int(100-cpuPercent))
|
procStr = fmt.Sprintf("%d%%/%d%% CPU/GPU", int(cpuPercent), int(100-cpuPercent))
|
||||||
}
|
}
|
||||||
|
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, format.HumanTime(m.ExpiresAt, "Never")})
|
||||||
var until string
|
|
||||||
delta := time.Since(m.ExpiresAt)
|
|
||||||
if delta > 0 {
|
|
||||||
until = "Stopping..."
|
|
||||||
} else {
|
|
||||||
until = format.HumanTime(m.ExpiresAt, "Never")
|
|
||||||
}
|
|
||||||
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, until})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -701,7 +551,7 @@ func ListRunningHandler(cmd *cobra.Command, args []string) error {
|
|||||||
table.SetHeaderLine(false)
|
table.SetHeaderLine(false)
|
||||||
table.SetBorder(false)
|
table.SetBorder(false)
|
||||||
table.SetNoWhiteSpace(true)
|
table.SetNoWhiteSpace(true)
|
||||||
table.SetTablePadding(" ")
|
table.SetTablePadding("\t")
|
||||||
table.AppendBulk(data)
|
table.AppendBulk(data)
|
||||||
table.Render()
|
table.Render()
|
||||||
|
|
||||||
@@ -714,17 +564,6 @@ func DeleteHandler(cmd *cobra.Command, args []string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unload the model if it's running before deletion
|
|
||||||
opts := &runOptions{
|
|
||||||
Model: args[0],
|
|
||||||
KeepAlive: &api.Duration{Duration: 0},
|
|
||||||
}
|
|
||||||
if err := loadOrUnloadModel(cmd, opts); err != nil {
|
|
||||||
if !strings.Contains(err.Error(), "not found") {
|
|
||||||
return fmt.Errorf("unable to stop existing running model \"%s\": %s", args[0], err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, name := range args {
|
for _, name := range args {
|
||||||
req := api.DeleteRequest{Name: name}
|
req := api.DeleteRequest{Name: name}
|
||||||
if err := client.Delete(cmd.Context(), &req); err != nil {
|
if err := client.Delete(cmd.Context(), &req); err != nil {
|
||||||
@@ -808,89 +647,122 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return showInfo(resp, os.Stdout)
|
showInfo(resp)
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func showInfo(resp *api.ShowResponse, w io.Writer) error {
|
func showInfo(resp *api.ShowResponse) {
|
||||||
tableRender := func(header string, rows func() [][]string) {
|
arch := resp.ModelInfo["general.architecture"].(string)
|
||||||
fmt.Fprintln(w, " ", header)
|
|
||||||
table := tablewriter.NewWriter(w)
|
|
||||||
table.SetAlignment(tablewriter.ALIGN_LEFT)
|
|
||||||
table.SetBorder(false)
|
|
||||||
table.SetNoWhiteSpace(true)
|
|
||||||
table.SetTablePadding(" ")
|
|
||||||
|
|
||||||
switch header {
|
modelData := [][]string{
|
||||||
case "Template", "System", "License":
|
{"arch", arch},
|
||||||
table.SetColWidth(100)
|
{"parameters", resp.Details.ParameterSize},
|
||||||
}
|
{"quantization", resp.Details.QuantizationLevel},
|
||||||
|
{"context length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64))},
|
||||||
table.AppendBulk(rows())
|
{"embedding length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.embedding_length", arch)].(float64))},
|
||||||
table.Render()
|
|
||||||
fmt.Fprintln(w)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tableRender("Model", func() (rows [][]string) {
|
mainTableData := [][]string{
|
||||||
if resp.ModelInfo != nil {
|
{"Model"},
|
||||||
arch := resp.ModelInfo["general.architecture"].(string)
|
{renderSubTable(modelData, false)},
|
||||||
rows = append(rows, []string{"", "architecture", arch})
|
}
|
||||||
rows = append(rows, []string{"", "parameters", format.HumanNumber(uint64(resp.ModelInfo["general.parameter_count"].(float64)))})
|
|
||||||
rows = append(rows, []string{"", "context length", strconv.FormatFloat(resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64), 'f', -1, 64)})
|
|
||||||
rows = append(rows, []string{"", "embedding length", strconv.FormatFloat(resp.ModelInfo[fmt.Sprintf("%s.embedding_length", arch)].(float64), 'f', -1, 64)})
|
|
||||||
} else {
|
|
||||||
rows = append(rows, []string{"", "architecture", resp.Details.Family})
|
|
||||||
rows = append(rows, []string{"", "parameters", resp.Details.ParameterSize})
|
|
||||||
}
|
|
||||||
rows = append(rows, []string{"", "quantization", resp.Details.QuantizationLevel})
|
|
||||||
return
|
|
||||||
})
|
|
||||||
|
|
||||||
if resp.ProjectorInfo != nil {
|
if resp.ProjectorInfo != nil {
|
||||||
tableRender("Projector", func() (rows [][]string) {
|
projectorData := [][]string{
|
||||||
arch := resp.ProjectorInfo["general.architecture"].(string)
|
{"arch", "clip"},
|
||||||
rows = append(rows, []string{"", "architecture", arch})
|
{"parameters", format.HumanNumber(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))},
|
||||||
rows = append(rows, []string{"", "parameters", format.HumanNumber(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))})
|
}
|
||||||
rows = append(rows, []string{"", "embedding length", strconv.FormatFloat(resp.ProjectorInfo[fmt.Sprintf("%s.vision.embedding_length", arch)].(float64), 'f', -1, 64)})
|
|
||||||
rows = append(rows, []string{"", "dimensions", strconv.FormatFloat(resp.ProjectorInfo[fmt.Sprintf("%s.vision.projection_dim", arch)].(float64), 'f', -1, 64)})
|
if projectorType, ok := resp.ProjectorInfo["clip.projector_type"]; ok {
|
||||||
return
|
projectorData = append(projectorData, []string{"projector type", projectorType.(string)})
|
||||||
})
|
}
|
||||||
|
|
||||||
|
projectorData = append(projectorData,
|
||||||
|
[]string{"embedding length", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.embedding_length"].(float64))},
|
||||||
|
[]string{"projection dimensionality", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.projection_dim"].(float64))},
|
||||||
|
)
|
||||||
|
|
||||||
|
mainTableData = append(mainTableData,
|
||||||
|
[]string{"Projector"},
|
||||||
|
[]string{renderSubTable(projectorData, false)},
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.Parameters != "" {
|
if resp.Parameters != "" {
|
||||||
tableRender("Parameters", func() (rows [][]string) {
|
mainTableData = append(mainTableData, []string{"Parameters"}, []string{formatParams(resp.Parameters)})
|
||||||
scanner := bufio.NewScanner(strings.NewReader(resp.Parameters))
|
|
||||||
for scanner.Scan() {
|
|
||||||
if text := scanner.Text(); text != "" {
|
|
||||||
rows = append(rows, append([]string{""}, strings.Fields(text)...))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
head := func(s string, n int) (rows [][]string) {
|
|
||||||
scanner := bufio.NewScanner(strings.NewReader(s))
|
|
||||||
for scanner.Scan() && (len(rows) < n || n < 0) {
|
|
||||||
if text := scanner.Text(); text != "" {
|
|
||||||
rows = append(rows, []string{"", strings.TrimSpace(text)})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.System != "" {
|
if resp.System != "" {
|
||||||
tableRender("System", func() [][]string {
|
mainTableData = append(mainTableData, []string{"System"}, []string{renderSubTable(twoLines(resp.System), true)})
|
||||||
return head(resp.System, 2)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.License != "" {
|
if resp.License != "" {
|
||||||
tableRender("License", func() [][]string {
|
mainTableData = append(mainTableData, []string{"License"}, []string{renderSubTable(twoLines(resp.License), true)})
|
||||||
return head(resp.License, 2)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
table := tablewriter.NewWriter(os.Stdout)
|
||||||
|
table.SetAutoWrapText(false)
|
||||||
|
table.SetBorder(false)
|
||||||
|
table.SetAlignment(tablewriter.ALIGN_LEFT)
|
||||||
|
|
||||||
|
for _, v := range mainTableData {
|
||||||
|
table.Append(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
table.Render()
|
||||||
|
}
|
||||||
|
|
||||||
|
func renderSubTable(data [][]string, file bool) string {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
table := tablewriter.NewWriter(&buf)
|
||||||
|
table.SetAutoWrapText(!file)
|
||||||
|
table.SetBorder(false)
|
||||||
|
table.SetNoWhiteSpace(true)
|
||||||
|
table.SetTablePadding("\t")
|
||||||
|
table.SetAlignment(tablewriter.ALIGN_LEFT)
|
||||||
|
|
||||||
|
for _, v := range data {
|
||||||
|
table.Append(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
table.Render()
|
||||||
|
|
||||||
|
renderedTable := buf.String()
|
||||||
|
lines := strings.Split(renderedTable, "\n")
|
||||||
|
for i, line := range lines {
|
||||||
|
lines[i] = "\t" + line
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Join(lines, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
func twoLines(s string) [][]string {
|
||||||
|
lines := strings.Split(s, "\n")
|
||||||
|
res := [][]string{}
|
||||||
|
|
||||||
|
count := 0
|
||||||
|
for _, line := range lines {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line != "" {
|
||||||
|
count++
|
||||||
|
res = append(res, []string{line})
|
||||||
|
if count == 2 {
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatParams(s string) string {
|
||||||
|
lines := strings.Split(s, "\n")
|
||||||
|
table := [][]string{}
|
||||||
|
|
||||||
|
for _, line := range lines {
|
||||||
|
table = append(table, strings.Fields(line))
|
||||||
|
}
|
||||||
|
return renderSubTable(table, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
func CopyHandler(cmd *cobra.Command, args []string) error {
|
func CopyHandler(cmd *cobra.Command, args []string) error {
|
||||||
@@ -1199,12 +1071,12 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func RunServer(_ *cobra.Command, _ []string) error {
|
func RunServer(cmd *cobra.Command, _ []string) error {
|
||||||
if err := initializeKeypair(); err != nil {
|
if err := initializeKeypair(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
ln, err := net.Listen("tcp", envconfig.Host().Host)
|
ln, err := net.Listen("tcp", net.JoinHostPort(envconfig.Host.Host, envconfig.Host.Port))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -1273,7 +1145,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := startApp(cmd.Context(), client); err != nil {
|
if err := startApp(cmd.Context(), client); err != nil {
|
||||||
return errors.New("could not connect to ollama app, is it running?")
|
return fmt.Errorf("could not connect to ollama app, is it running?")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
@@ -1350,7 +1222,7 @@ func NewCLI() *cobra.Command {
|
|||||||
RunE: CreateHandler,
|
RunE: CreateHandler,
|
||||||
}
|
}
|
||||||
|
|
||||||
createCmd.Flags().StringP("file", "f", "", "Name of the Modelfile (default \"Modelfile\"")
|
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile")
|
||||||
createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_0)")
|
createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_0)")
|
||||||
|
|
||||||
showCmd := &cobra.Command{
|
showCmd := &cobra.Command{
|
||||||
@@ -1380,15 +1252,6 @@ func NewCLI() *cobra.Command {
|
|||||||
runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
|
runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
|
||||||
runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
|
runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
|
||||||
runCmd.Flags().String("format", "", "Response format (e.g. json)")
|
runCmd.Flags().String("format", "", "Response format (e.g. json)")
|
||||||
|
|
||||||
stopCmd := &cobra.Command{
|
|
||||||
Use: "stop MODEL",
|
|
||||||
Short: "Stop a running model",
|
|
||||||
Args: cobra.ExactArgs(1),
|
|
||||||
PreRunE: checkServerHeartbeat,
|
|
||||||
RunE: StopHandler,
|
|
||||||
}
|
|
||||||
|
|
||||||
serveCmd := &cobra.Command{
|
serveCmd := &cobra.Command{
|
||||||
Use: "serve",
|
Use: "serve",
|
||||||
Aliases: []string{"start"},
|
Aliases: []string{"start"},
|
||||||
@@ -1456,7 +1319,6 @@ func NewCLI() *cobra.Command {
|
|||||||
createCmd,
|
createCmd,
|
||||||
showCmd,
|
showCmd,
|
||||||
runCmd,
|
runCmd,
|
||||||
stopCmd,
|
|
||||||
pullCmd,
|
pullCmd,
|
||||||
pushCmd,
|
pushCmd,
|
||||||
listCmd,
|
listCmd,
|
||||||
@@ -1479,12 +1341,10 @@ func NewCLI() *cobra.Command {
|
|||||||
envVars["OLLAMA_NUM_PARALLEL"],
|
envVars["OLLAMA_NUM_PARALLEL"],
|
||||||
envVars["OLLAMA_NOPRUNE"],
|
envVars["OLLAMA_NOPRUNE"],
|
||||||
envVars["OLLAMA_ORIGINS"],
|
envVars["OLLAMA_ORIGINS"],
|
||||||
envVars["OLLAMA_SCHED_SPREAD"],
|
|
||||||
envVars["OLLAMA_TMPDIR"],
|
envVars["OLLAMA_TMPDIR"],
|
||||||
envVars["OLLAMA_FLASH_ATTENTION"],
|
envVars["OLLAMA_FLASH_ATTENTION"],
|
||||||
envVars["OLLAMA_LLM_LIBRARY"],
|
envVars["OLLAMA_LLM_LIBRARY"],
|
||||||
envVars["OLLAMA_GPU_OVERHEAD"],
|
envVars["OLLAMA_MAX_VRAM"],
|
||||||
envVars["OLLAMA_LOAD_TIMEOUT"],
|
|
||||||
})
|
})
|
||||||
default:
|
default:
|
||||||
appendEnvDocs(cmd, envs)
|
appendEnvDocs(cmd, envs)
|
||||||
@@ -1496,7 +1356,6 @@ func NewCLI() *cobra.Command {
|
|||||||
createCmd,
|
createCmd,
|
||||||
showCmd,
|
showCmd,
|
||||||
runCmd,
|
runCmd,
|
||||||
stopCmd,
|
|
||||||
pullCmd,
|
pullCmd,
|
||||||
pushCmd,
|
pushCmd,
|
||||||
listCmd,
|
listCmd,
|
||||||
|
|||||||
371
cmd/cmd_test.go
371
cmd/cmd_test.go
@@ -1,371 +0,0 @@
|
|||||||
package cmd
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"net/http"
|
|
||||||
"net/http/httptest"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
|
||||||
"github.com/spf13/cobra"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestShowInfo(t *testing.T) {
|
|
||||||
t.Run("bare details", func(t *testing.T) {
|
|
||||||
var b bytes.Buffer
|
|
||||||
if err := showInfo(&api.ShowResponse{
|
|
||||||
Details: api.ModelDetails{
|
|
||||||
Family: "test",
|
|
||||||
ParameterSize: "7B",
|
|
||||||
QuantizationLevel: "FP16",
|
|
||||||
},
|
|
||||||
}, &b); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
expect := ` Model
|
|
||||||
architecture test
|
|
||||||
parameters 7B
|
|
||||||
quantization FP16
|
|
||||||
|
|
||||||
`
|
|
||||||
|
|
||||||
if diff := cmp.Diff(expect, b.String()); diff != "" {
|
|
||||||
t.Errorf("unexpected output (-want +got):\n%s", diff)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("bare model info", func(t *testing.T) {
|
|
||||||
var b bytes.Buffer
|
|
||||||
if err := showInfo(&api.ShowResponse{
|
|
||||||
ModelInfo: map[string]any{
|
|
||||||
"general.architecture": "test",
|
|
||||||
"general.parameter_count": float64(7_000_000_000),
|
|
||||||
"test.context_length": float64(0),
|
|
||||||
"test.embedding_length": float64(0),
|
|
||||||
},
|
|
||||||
Details: api.ModelDetails{
|
|
||||||
Family: "test",
|
|
||||||
ParameterSize: "7B",
|
|
||||||
QuantizationLevel: "FP16",
|
|
||||||
},
|
|
||||||
}, &b); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
expect := ` Model
|
|
||||||
architecture test
|
|
||||||
parameters 7B
|
|
||||||
context length 0
|
|
||||||
embedding length 0
|
|
||||||
quantization FP16
|
|
||||||
|
|
||||||
`
|
|
||||||
if diff := cmp.Diff(expect, b.String()); diff != "" {
|
|
||||||
t.Errorf("unexpected output (-want +got):\n%s", diff)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("parameters", func(t *testing.T) {
|
|
||||||
var b bytes.Buffer
|
|
||||||
if err := showInfo(&api.ShowResponse{
|
|
||||||
Details: api.ModelDetails{
|
|
||||||
Family: "test",
|
|
||||||
ParameterSize: "7B",
|
|
||||||
QuantizationLevel: "FP16",
|
|
||||||
},
|
|
||||||
Parameters: `
|
|
||||||
stop never
|
|
||||||
stop gonna
|
|
||||||
stop give
|
|
||||||
stop you
|
|
||||||
stop up
|
|
||||||
temperature 99`,
|
|
||||||
}, &b); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
expect := ` Model
|
|
||||||
architecture test
|
|
||||||
parameters 7B
|
|
||||||
quantization FP16
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
stop never
|
|
||||||
stop gonna
|
|
||||||
stop give
|
|
||||||
stop you
|
|
||||||
stop up
|
|
||||||
temperature 99
|
|
||||||
|
|
||||||
`
|
|
||||||
if diff := cmp.Diff(expect, b.String()); diff != "" {
|
|
||||||
t.Errorf("unexpected output (-want +got):\n%s", diff)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("project info", func(t *testing.T) {
|
|
||||||
var b bytes.Buffer
|
|
||||||
if err := showInfo(&api.ShowResponse{
|
|
||||||
Details: api.ModelDetails{
|
|
||||||
Family: "test",
|
|
||||||
ParameterSize: "7B",
|
|
||||||
QuantizationLevel: "FP16",
|
|
||||||
},
|
|
||||||
ProjectorInfo: map[string]any{
|
|
||||||
"general.architecture": "clip",
|
|
||||||
"general.parameter_count": float64(133_700_000),
|
|
||||||
"clip.vision.embedding_length": float64(0),
|
|
||||||
"clip.vision.projection_dim": float64(0),
|
|
||||||
},
|
|
||||||
}, &b); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
expect := ` Model
|
|
||||||
architecture test
|
|
||||||
parameters 7B
|
|
||||||
quantization FP16
|
|
||||||
|
|
||||||
Projector
|
|
||||||
architecture clip
|
|
||||||
parameters 133.70M
|
|
||||||
embedding length 0
|
|
||||||
dimensions 0
|
|
||||||
|
|
||||||
`
|
|
||||||
if diff := cmp.Diff(expect, b.String()); diff != "" {
|
|
||||||
t.Errorf("unexpected output (-want +got):\n%s", diff)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("system", func(t *testing.T) {
|
|
||||||
var b bytes.Buffer
|
|
||||||
if err := showInfo(&api.ShowResponse{
|
|
||||||
Details: api.ModelDetails{
|
|
||||||
Family: "test",
|
|
||||||
ParameterSize: "7B",
|
|
||||||
QuantizationLevel: "FP16",
|
|
||||||
},
|
|
||||||
System: `You are a pirate!
|
|
||||||
Ahoy, matey!
|
|
||||||
Weigh anchor!
|
|
||||||
`,
|
|
||||||
}, &b); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
expect := ` Model
|
|
||||||
architecture test
|
|
||||||
parameters 7B
|
|
||||||
quantization FP16
|
|
||||||
|
|
||||||
System
|
|
||||||
You are a pirate!
|
|
||||||
Ahoy, matey!
|
|
||||||
|
|
||||||
`
|
|
||||||
if diff := cmp.Diff(expect, b.String()); diff != "" {
|
|
||||||
t.Errorf("unexpected output (-want +got):\n%s", diff)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("license", func(t *testing.T) {
|
|
||||||
var b bytes.Buffer
|
|
||||||
license, err := os.ReadFile(filepath.Join("..", "LICENSE"))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := showInfo(&api.ShowResponse{
|
|
||||||
Details: api.ModelDetails{
|
|
||||||
Family: "test",
|
|
||||||
ParameterSize: "7B",
|
|
||||||
QuantizationLevel: "FP16",
|
|
||||||
},
|
|
||||||
License: string(license),
|
|
||||||
}, &b); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
expect := ` Model
|
|
||||||
architecture test
|
|
||||||
parameters 7B
|
|
||||||
quantization FP16
|
|
||||||
|
|
||||||
License
|
|
||||||
MIT License
|
|
||||||
Copyright (c) Ollama
|
|
||||||
|
|
||||||
`
|
|
||||||
if diff := cmp.Diff(expect, b.String()); diff != "" {
|
|
||||||
t.Errorf("unexpected output (-want +got):\n%s", diff)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestDeleteHandler(t *testing.T) {
|
|
||||||
stopped := false
|
|
||||||
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
if r.URL.Path == "/api/delete" && r.Method == http.MethodDelete {
|
|
||||||
var req api.DeleteRequest
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
||||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if req.Name == "test-model" {
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
} else {
|
|
||||||
w.WriteHeader(http.StatusNotFound)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if r.URL.Path == "/api/generate" && r.Method == http.MethodPost {
|
|
||||||
var req api.GenerateRequest
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
||||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if req.Model == "test-model" {
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
if err := json.NewEncoder(w).Encode(api.GenerateResponse{
|
|
||||||
Done: true,
|
|
||||||
}); err != nil {
|
|
||||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
||||||
}
|
|
||||||
stopped = true
|
|
||||||
return
|
|
||||||
} else {
|
|
||||||
w.WriteHeader(http.StatusNotFound)
|
|
||||||
if err := json.NewEncoder(w).Encode(api.GenerateResponse{
|
|
||||||
Done: false,
|
|
||||||
}); err != nil {
|
|
||||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}))
|
|
||||||
|
|
||||||
t.Setenv("OLLAMA_HOST", mockServer.URL)
|
|
||||||
t.Cleanup(mockServer.Close)
|
|
||||||
|
|
||||||
cmd := &cobra.Command{}
|
|
||||||
cmd.SetContext(context.TODO())
|
|
||||||
if err := DeleteHandler(cmd, []string{"test-model"}); err != nil {
|
|
||||||
t.Fatalf("DeleteHandler failed: %v", err)
|
|
||||||
}
|
|
||||||
if !stopped {
|
|
||||||
t.Fatal("Model was not stopped before deletion")
|
|
||||||
}
|
|
||||||
|
|
||||||
err := DeleteHandler(cmd, []string{"test-model-not-found"})
|
|
||||||
if err == nil || !strings.Contains(err.Error(), "unable to stop existing running model \"test-model-not-found\"") {
|
|
||||||
t.Fatalf("DeleteHandler failed: expected error about stopping non-existent model, got %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGetModelfileName(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
modelfileName string
|
|
||||||
fileExists bool
|
|
||||||
expectedName string
|
|
||||||
expectedErr error
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "no modelfile specified, no modelfile exists",
|
|
||||||
modelfileName: "",
|
|
||||||
fileExists: false,
|
|
||||||
expectedName: "",
|
|
||||||
expectedErr: os.ErrNotExist,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "no modelfile specified, modelfile exists",
|
|
||||||
modelfileName: "",
|
|
||||||
fileExists: true,
|
|
||||||
expectedName: "Modelfile",
|
|
||||||
expectedErr: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "modelfile specified, no modelfile exists",
|
|
||||||
modelfileName: "crazyfile",
|
|
||||||
fileExists: false,
|
|
||||||
expectedName: "crazyfile",
|
|
||||||
expectedErr: os.ErrNotExist,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "modelfile specified, modelfile exists",
|
|
||||||
modelfileName: "anotherfile",
|
|
||||||
fileExists: true,
|
|
||||||
expectedName: "anotherfile",
|
|
||||||
expectedErr: nil,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
cmd := &cobra.Command{
|
|
||||||
Use: "fakecmd",
|
|
||||||
}
|
|
||||||
cmd.Flags().String("file", "", "path to modelfile")
|
|
||||||
|
|
||||||
var expectedFilename string
|
|
||||||
|
|
||||||
if tt.fileExists {
|
|
||||||
tempDir, err := os.MkdirTemp("", "modelfiledir")
|
|
||||||
defer os.RemoveAll(tempDir)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("temp modelfile dir creation failed: %v", err)
|
|
||||||
}
|
|
||||||
var fn string
|
|
||||||
if tt.modelfileName != "" {
|
|
||||||
fn = tt.modelfileName
|
|
||||||
} else {
|
|
||||||
fn = "Modelfile"
|
|
||||||
}
|
|
||||||
|
|
||||||
tempFile, err := os.CreateTemp(tempDir, fn)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("temp modelfile creation failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
expectedFilename = tempFile.Name()
|
|
||||||
err = cmd.Flags().Set("file", expectedFilename)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("couldn't set file flag: %v", err)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if tt.modelfileName != "" {
|
|
||||||
expectedFilename = tt.modelfileName
|
|
||||||
err := cmd.Flags().Set("file", tt.modelfileName)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("couldn't set file flag: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
actualFilename, actualErr := getModelfileName(cmd)
|
|
||||||
|
|
||||||
if actualFilename != expectedFilename {
|
|
||||||
t.Errorf("expected filename: '%s' actual filename: '%s'", expectedFilename, actualFilename)
|
|
||||||
}
|
|
||||||
|
|
||||||
if tt.expectedErr != os.ErrNotExist {
|
|
||||||
if actualErr != tt.expectedErr {
|
|
||||||
t.Errorf("expected err: %v actual err: %v", tt.expectedErr, actualErr)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if !os.IsNotExist(actualErr) {
|
|
||||||
t.Errorf("expected err: %v actual err: %v", tt.expectedErr, actualErr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"cmp"
|
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
@@ -10,14 +9,14 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"slices"
|
"slices"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
"golang.org/x/exp/maps"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/progress"
|
||||||
"github.com/ollama/ollama/readline"
|
"github.com/ollama/ollama/readline"
|
||||||
"github.com/ollama/ollama/types/errtypes"
|
"github.com/ollama/ollama/types/errtypes"
|
||||||
)
|
)
|
||||||
@@ -30,7 +29,46 @@ const (
|
|||||||
MultilineSystem
|
MultilineSystem
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func loadModel(cmd *cobra.Command, opts *runOptions) error {
|
||||||
|
p := progress.NewProgress(os.Stderr)
|
||||||
|
defer p.StopAndClear()
|
||||||
|
|
||||||
|
spinner := progress.NewSpinner("")
|
||||||
|
p.Add("", spinner)
|
||||||
|
|
||||||
|
client, err := api.ClientFromEnvironment()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
chatReq := &api.ChatRequest{
|
||||||
|
Model: opts.Model,
|
||||||
|
KeepAlive: opts.KeepAlive,
|
||||||
|
}
|
||||||
|
|
||||||
|
return client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
|
||||||
|
p.StopAndClear()
|
||||||
|
for _, msg := range opts.Messages {
|
||||||
|
switch msg.Role {
|
||||||
|
case "user":
|
||||||
|
fmt.Printf(">>> %s\n", msg.Content)
|
||||||
|
case "assistant":
|
||||||
|
state := &displayResponseState{}
|
||||||
|
displayResponse(msg.Content, opts.WordWrap, state)
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Println()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
||||||
|
err := loadModel(cmd, &opts)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
usage := func() {
|
usage := func() {
|
||||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||||
fmt.Fprintln(os.Stderr, " /set Set session variables")
|
fmt.Fprintln(os.Stderr, " /set Set session variables")
|
||||||
@@ -100,7 +138,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
|||||||
fmt.Fprintln(os.Stderr, " /set parameter num_predict <int> Max number of tokens to predict")
|
fmt.Fprintln(os.Stderr, " /set parameter num_predict <int> Max number of tokens to predict")
|
||||||
fmt.Fprintln(os.Stderr, " /set parameter top_k <int> Pick from top k num of tokens")
|
fmt.Fprintln(os.Stderr, " /set parameter top_k <int> Pick from top k num of tokens")
|
||||||
fmt.Fprintln(os.Stderr, " /set parameter top_p <float> Pick token based on sum of probabilities")
|
fmt.Fprintln(os.Stderr, " /set parameter top_p <float> Pick token based on sum of probabilities")
|
||||||
fmt.Fprintln(os.Stderr, " /set parameter min_p <float> Pick token based on top token probability * min_p")
|
|
||||||
fmt.Fprintln(os.Stderr, " /set parameter num_ctx <int> Set the context size")
|
fmt.Fprintln(os.Stderr, " /set parameter num_ctx <int> Set the context size")
|
||||||
fmt.Fprintln(os.Stderr, " /set parameter temperature <float> Set creativity level")
|
fmt.Fprintln(os.Stderr, " /set parameter temperature <float> Set creativity level")
|
||||||
fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty <float> How strongly to penalize repetitions")
|
fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty <float> How strongly to penalize repetitions")
|
||||||
@@ -120,7 +157,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if envconfig.NoHistory() {
|
if envconfig.NoHistory {
|
||||||
scanner.HistoryDisable()
|
scanner.HistoryDisable()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -196,7 +233,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
|||||||
opts.Model = args[1]
|
opts.Model = args[1]
|
||||||
opts.Messages = []api.Message{}
|
opts.Messages = []api.Message{}
|
||||||
fmt.Printf("Loading model '%s'\n", opts.Model)
|
fmt.Printf("Loading model '%s'\n", opts.Model)
|
||||||
if err := loadOrUnloadModel(cmd, &opts); err != nil {
|
if err := loadModel(cmd, &opts); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
@@ -338,9 +375,9 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
req := &api.ShowRequest{
|
req := &api.ShowRequest{
|
||||||
Name: opts.Model,
|
Name: opts.Model,
|
||||||
System: opts.System,
|
System: opts.System,
|
||||||
Options: opts.Options,
|
Options: opts.Options,
|
||||||
}
|
}
|
||||||
resp, err := client.Show(cmd.Context(), req)
|
resp, err := client.Show(cmd.Context(), req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -350,7 +387,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
|||||||
|
|
||||||
switch args[1] {
|
switch args[1] {
|
||||||
case "info":
|
case "info":
|
||||||
_ = showInfo(resp, os.Stderr)
|
showInfo(resp)
|
||||||
case "license":
|
case "license":
|
||||||
if resp.License == "" {
|
if resp.License == "" {
|
||||||
fmt.Println("No license was specified for this model.")
|
fmt.Println("No license was specified for this model.")
|
||||||
@@ -442,6 +479,13 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// clear all previous images for better responses
|
||||||
|
if len(images) > 0 {
|
||||||
|
for i := range opts.Messages {
|
||||||
|
opts.Messages[i].Images = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
newMessage.Content = msg
|
newMessage.Content = msg
|
||||||
newMessage.Images = images
|
newMessage.Images = images
|
||||||
}
|
}
|
||||||
@@ -462,54 +506,56 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func buildModelfile(opts runOptions) string {
|
func buildModelfile(opts runOptions) string {
|
||||||
var f parser.File
|
var mf strings.Builder
|
||||||
f.Commands = append(f.Commands, parser.Command{Name: "model", Args: cmp.Or(opts.ParentModel, opts.Model)})
|
model := opts.ParentModel
|
||||||
|
if model == "" {
|
||||||
|
model = opts.Model
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&mf, "FROM %s\n", model)
|
||||||
if opts.System != "" {
|
if opts.System != "" {
|
||||||
f.Commands = append(f.Commands, parser.Command{Name: "system", Args: opts.System})
|
fmt.Fprintf(&mf, "SYSTEM \"\"\"%s\"\"\"\n", opts.System)
|
||||||
}
|
}
|
||||||
|
|
||||||
keys := maps.Keys(opts.Options)
|
keys := make([]string, 0)
|
||||||
slices.Sort(keys)
|
for k := range opts.Options {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
for _, k := range keys {
|
for _, k := range keys {
|
||||||
v := opts.Options[k]
|
fmt.Fprintf(&mf, "PARAMETER %s %v\n", k, opts.Options[k])
|
||||||
var cmds []parser.Command
|
|
||||||
switch t := v.(type) {
|
|
||||||
case []string:
|
|
||||||
for _, s := range t {
|
|
||||||
cmds = append(cmds, parser.Command{Name: k, Args: s})
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", t)})
|
|
||||||
}
|
|
||||||
|
|
||||||
f.Commands = append(f.Commands, cmds...)
|
|
||||||
}
|
}
|
||||||
|
fmt.Fprintln(&mf)
|
||||||
|
|
||||||
for _, msg := range opts.Messages {
|
for _, msg := range opts.Messages {
|
||||||
f.Commands = append(f.Commands, parser.Command{Name: "message", Args: fmt.Sprintf("%s: %s", msg.Role, msg.Content)})
|
fmt.Fprintf(&mf, "MESSAGE %s \"\"\"%s\"\"\"\n", msg.Role, msg.Content)
|
||||||
}
|
}
|
||||||
|
|
||||||
return f.String()
|
return mf.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
func normalizeFilePath(fp string) string {
|
func normalizeFilePath(fp string) string {
|
||||||
return strings.NewReplacer(
|
// Define a map of escaped characters and their replacements
|
||||||
"\\ ", " ", // Escaped space
|
replacements := map[string]string{
|
||||||
"\\(", "(", // Escaped left parenthesis
|
"\\ ": " ", // Escaped space
|
||||||
"\\)", ")", // Escaped right parenthesis
|
"\\(": "(", // Escaped left parenthesis
|
||||||
"\\[", "[", // Escaped left square bracket
|
"\\)": ")", // Escaped right parenthesis
|
||||||
"\\]", "]", // Escaped right square bracket
|
"\\[": "[", // Escaped left square bracket
|
||||||
"\\{", "{", // Escaped left curly brace
|
"\\]": "]", // Escaped right square bracket
|
||||||
"\\}", "}", // Escaped right curly brace
|
"\\{": "{", // Escaped left curly brace
|
||||||
"\\$", "$", // Escaped dollar sign
|
"\\}": "}", // Escaped right curly brace
|
||||||
"\\&", "&", // Escaped ampersand
|
"\\$": "$", // Escaped dollar sign
|
||||||
"\\;", ";", // Escaped semicolon
|
"\\&": "&", // Escaped ampersand
|
||||||
"\\'", "'", // Escaped single quote
|
"\\;": ";", // Escaped semicolon
|
||||||
"\\\\", "\\", // Escaped backslash
|
"\\'": "'", // Escaped single quote
|
||||||
"\\*", "*", // Escaped asterisk
|
"\\\\": "\\", // Escaped backslash
|
||||||
"\\?", "?", // Escaped question mark
|
"\\*": "*", // Escaped asterisk
|
||||||
).Replace(fp)
|
"\\?": "?", // Escaped question mark
|
||||||
|
}
|
||||||
|
|
||||||
|
for escaped, actual := range replacements {
|
||||||
|
fp = strings.ReplaceAll(fp, escaped, actual)
|
||||||
|
}
|
||||||
|
return fp
|
||||||
}
|
}
|
||||||
|
|
||||||
func extractFileNames(input string) []string {
|
func extractFileNames(input string) []string {
|
||||||
@@ -529,9 +575,10 @@ func extractFileData(input string) (string, []api.ImageData, error) {
|
|||||||
for _, fp := range filePaths {
|
for _, fp := range filePaths {
|
||||||
nfp := normalizeFilePath(fp)
|
nfp := normalizeFilePath(fp)
|
||||||
data, err := getImageData(nfp)
|
data, err := getImageData(nfp)
|
||||||
if errors.Is(err, os.ErrNotExist) {
|
if err != nil {
|
||||||
continue
|
if os.IsNotExist(err) {
|
||||||
} else if err != nil {
|
continue
|
||||||
|
}
|
||||||
fmt.Fprintf(os.Stderr, "Couldn't process image: %q\n", err)
|
fmt.Fprintf(os.Stderr, "Couldn't process image: %q\n", err)
|
||||||
return "", imgs, err
|
return "", imgs, err
|
||||||
}
|
}
|
||||||
@@ -539,7 +586,7 @@ func extractFileData(input string) (string, []api.ImageData, error) {
|
|||||||
input = strings.ReplaceAll(input, fp, "")
|
input = strings.ReplaceAll(input, fp, "")
|
||||||
imgs = append(imgs, data)
|
imgs = append(imgs, data)
|
||||||
}
|
}
|
||||||
return strings.TrimSpace(input), imgs, nil
|
return input, imgs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getImageData(filePath string) ([]byte, error) {
|
func getImageData(filePath string) ([]byte, error) {
|
||||||
@@ -569,7 +616,7 @@ func getImageData(filePath string) ([]byte, error) {
|
|||||||
// Check if the file size exceeds 100MB
|
// Check if the file size exceeds 100MB
|
||||||
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
||||||
if info.Size() > maxSize {
|
if info.Size() > maxSize {
|
||||||
return nil, errors.New("file size exceeds maximum limit (100MB)")
|
return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
|
||||||
}
|
}
|
||||||
|
|
||||||
buf = make([]byte, info.Size())
|
buf = make([]byte, info.Size())
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"testing"
|
"testing"
|
||||||
|
"text/template"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
)
|
)
|
||||||
@@ -55,53 +57,58 @@ d:\path with\spaces\seven.svg inbetween7 c:\users\jdoe\eight.png inbetween8
|
|||||||
|
|
||||||
func TestModelfileBuilder(t *testing.T) {
|
func TestModelfileBuilder(t *testing.T) {
|
||||||
opts := runOptions{
|
opts := runOptions{
|
||||||
Model: "hork",
|
Model: "hork",
|
||||||
System: "You are part horse and part shark, but all hork. Do horklike things",
|
System: "You are part horse and part shark, but all hork. Do horklike things",
|
||||||
Messages: []api.Message{
|
Messages: []api.Message{
|
||||||
{Role: "user", Content: "Hey there hork!"},
|
{Role: "user", Content: "Hey there hork!"},
|
||||||
{Role: "assistant", Content: "Yes it is true, I am half horse, half shark."},
|
{Role: "assistant", Content: "Yes it is true, I am half horse, half shark."},
|
||||||
},
|
},
|
||||||
Options: map[string]any{
|
Options: map[string]interface{}{},
|
||||||
"temperature": 0.9,
|
|
||||||
"seed": 42,
|
|
||||||
"penalize_newline": false,
|
|
||||||
"stop": []string{"hi", "there"},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Run("model", func(t *testing.T) {
|
opts.Options["temperature"] = 0.9
|
||||||
expect := `FROM hork
|
opts.Options["seed"] = 42
|
||||||
SYSTEM You are part horse and part shark, but all hork. Do horklike things
|
opts.Options["penalize_newline"] = false
|
||||||
|
opts.Options["stop"] = []string{"hi", "there"}
|
||||||
|
|
||||||
|
mf := buildModelfile(opts)
|
||||||
|
expectedModelfile := `FROM {{.Model}}
|
||||||
|
SYSTEM """{{.System}}"""
|
||||||
PARAMETER penalize_newline false
|
PARAMETER penalize_newline false
|
||||||
PARAMETER seed 42
|
PARAMETER seed 42
|
||||||
PARAMETER stop hi
|
PARAMETER stop [hi there]
|
||||||
PARAMETER stop there
|
|
||||||
PARAMETER temperature 0.9
|
PARAMETER temperature 0.9
|
||||||
MESSAGE user Hey there hork!
|
|
||||||
MESSAGE assistant Yes it is true, I am half horse, half shark.
|
MESSAGE user """Hey there hork!"""
|
||||||
|
MESSAGE assistant """Yes it is true, I am half horse, half shark."""
|
||||||
`
|
`
|
||||||
|
|
||||||
actual := buildModelfile(opts)
|
tmpl, err := template.New("").Parse(expectedModelfile)
|
||||||
if diff := cmp.Diff(expect, actual); diff != "" {
|
require.NoError(t, err)
|
||||||
t.Errorf("mismatch (-want +got):\n%s", diff)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("parent model", func(t *testing.T) {
|
var buf bytes.Buffer
|
||||||
opts.ParentModel = "horseshark"
|
err = tmpl.Execute(&buf, opts)
|
||||||
expect := `FROM horseshark
|
require.NoError(t, err)
|
||||||
SYSTEM You are part horse and part shark, but all hork. Do horklike things
|
assert.Equal(t, buf.String(), mf)
|
||||||
|
|
||||||
|
opts.ParentModel = "horseshark"
|
||||||
|
mf = buildModelfile(opts)
|
||||||
|
expectedModelfile = `FROM {{.ParentModel}}
|
||||||
|
SYSTEM """{{.System}}"""
|
||||||
PARAMETER penalize_newline false
|
PARAMETER penalize_newline false
|
||||||
PARAMETER seed 42
|
PARAMETER seed 42
|
||||||
PARAMETER stop hi
|
PARAMETER stop [hi there]
|
||||||
PARAMETER stop there
|
|
||||||
PARAMETER temperature 0.9
|
PARAMETER temperature 0.9
|
||||||
MESSAGE user Hey there hork!
|
|
||||||
MESSAGE assistant Yes it is true, I am half horse, half shark.
|
MESSAGE user """Hey there hork!"""
|
||||||
|
MESSAGE assistant """Yes it is true, I am half horse, half shark."""
|
||||||
`
|
`
|
||||||
actual := buildModelfile(opts)
|
|
||||||
if diff := cmp.Diff(expect, actual); diff != "" {
|
tmpl, err = template.New("").Parse(expectedModelfile)
|
||||||
t.Errorf("mismatch (-want +got):\n%s", diff)
|
require.NoError(t, err)
|
||||||
}
|
|
||||||
})
|
var parentBuf bytes.Buffer
|
||||||
|
err = tmpl.Execute(&parentBuf, opts)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, parentBuf.String(), mf)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package cmd
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -20,7 +20,7 @@ func startApp(ctx context.Context, client *api.Client) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if !strings.Contains(link, "Ollama.app") {
|
if !strings.Contains(link, "Ollama.app") {
|
||||||
return errors.New("could not find ollama app")
|
return fmt.Errorf("could not find ollama app")
|
||||||
}
|
}
|
||||||
path := strings.Split(link, "Ollama.app")
|
path := strings.Split(link, "Ollama.app")
|
||||||
if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
|
if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
|
||||||
|
|||||||
@@ -4,11 +4,11 @@ package cmd
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"fmt"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
)
|
)
|
||||||
|
|
||||||
func startApp(ctx context.Context, client *api.Client) error {
|
func startApp(ctx context.Context, client *api.Client) error {
|
||||||
return errors.New("could not connect to ollama server, run 'ollama serve' to start it")
|
return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ func startApp(ctx context.Context, client *api.Client) error {
|
|||||||
// Finally look in the path
|
// Finally look in the path
|
||||||
appExe, err = exec.LookPath(AppName)
|
appExe, err = exec.LookPath(AppName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return errors.New("could not locate ollama app")
|
return fmt.Errorf("could not locate ollama app")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,232 +1,200 @@
|
|||||||
package convert
|
package convert
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"cmp"
|
||||||
|
"encoding/binary"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/fs"
|
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fileutils"
|
"google.golang.org/protobuf/proto"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/convert/sentencepiece"
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ModelParameters struct {
|
const (
|
||||||
Architectures []string `json:"architectures"`
|
_ int32 = iota
|
||||||
VocabSize uint32 `json:"vocab_size"`
|
tokenTypeNormal
|
||||||
|
tokenTypeUnknown
|
||||||
|
tokenTypeControl
|
||||||
|
tokenTypeUserDefined
|
||||||
|
tokenTypeUnused
|
||||||
|
tokenTypeByte
|
||||||
|
)
|
||||||
|
|
||||||
|
type Params struct {
|
||||||
|
Architectures []string `json:"architectures"`
|
||||||
|
VocabSize int `json:"vocab_size"`
|
||||||
|
HiddenSize int `json:"hidden_size"` // n_embd
|
||||||
|
HiddenLayers int `json:"num_hidden_layers"` // n_layer
|
||||||
|
ContextSize int `json:"max_position_embeddings"`
|
||||||
|
IntermediateSize int `json:"intermediate_size"`
|
||||||
|
AttentionHeads int `json:"num_attention_heads"` // n_head
|
||||||
|
KeyValHeads int `json:"num_key_value_heads"`
|
||||||
|
NormEPS float64 `json:"rms_norm_eps"`
|
||||||
|
BoSTokenID int `json:"bos_token_id"`
|
||||||
|
EoSTokenID int `json:"eos_token_id"`
|
||||||
|
HeadDimension int `json:"head_dim"`
|
||||||
|
PaddingTokenID int `json:"pad_token_id"`
|
||||||
|
RopeFrequencyBase float64 `json:"rope_theta"`
|
||||||
|
|
||||||
|
Experts int `json:"num_local_experts"`
|
||||||
|
ExpertsUsed int `json:"num_experts_per_tok"`
|
||||||
|
|
||||||
|
PreTokenizer string
|
||||||
|
|
||||||
|
ByteOrder
|
||||||
}
|
}
|
||||||
|
|
||||||
type AdapterParameters struct {
|
type ByteOrder interface {
|
||||||
Alpha uint32 `json:"lora_alpha"`
|
binary.ByteOrder
|
||||||
LoraLayers uint32 `json:"lora_layers"`
|
binary.AppendByteOrder
|
||||||
LoraParameters struct {
|
|
||||||
Rank uint32 `json:"rank"`
|
|
||||||
Alpha float32 `json:"alpha"`
|
|
||||||
Scale float32 `json:"scale"`
|
|
||||||
} `json:"lora_parameters"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ModelParameters) KV(t *Tokenizer) fileutils.KV {
|
type ModelArch interface {
|
||||||
kv := fileutils.KV{
|
GetTensors() error
|
||||||
"general.file_type": uint32(1),
|
LoadVocab() error
|
||||||
"general.quantization_version": uint32(2),
|
WriteGGUF(io.WriteSeeker) error
|
||||||
"tokenizer.ggml.pre": t.Pre,
|
|
||||||
"tokenizer.ggml.model": t.Vocabulary.Model,
|
|
||||||
"tokenizer.ggml.tokens": t.Vocabulary.Tokens,
|
|
||||||
"tokenizer.ggml.scores": t.Vocabulary.Scores,
|
|
||||||
"tokenizer.ggml.token_type": t.Vocabulary.Types,
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(t.Merges) > 0 {
|
|
||||||
kv["tokenizer.ggml.merges"] = t.Merges
|
|
||||||
}
|
|
||||||
|
|
||||||
if t.Template != "" {
|
|
||||||
kv["tokenizer.chat_template"] = t.Template
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, sv := range t.SpecialVocabulary {
|
|
||||||
kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
|
|
||||||
kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
|
|
||||||
}
|
|
||||||
|
|
||||||
return kv
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p AdapterParameters) KV() fileutils.KV {
|
type ModelFormat interface {
|
||||||
var alpha float32
|
GetLayerName(string) (string, error)
|
||||||
if p.LoraParameters.Alpha == 0 {
|
GetTensors(string, *Params) ([]llm.Tensor, error)
|
||||||
alpha = float32(p.Alpha)
|
GetParams(string) (*Params, error)
|
||||||
} else {
|
GetModelArch(string, string, *Params) (ModelArch, error)
|
||||||
alpha = p.LoraParameters.Alpha
|
|
||||||
}
|
|
||||||
|
|
||||||
kv := fileutils.KV{
|
|
||||||
"adapter.lora.alpha": alpha,
|
|
||||||
"adapter.type": "lora",
|
|
||||||
"general.file_type": uint32(1),
|
|
||||||
"general.type": "adapter",
|
|
||||||
"general.version": "v0.2",
|
|
||||||
}
|
|
||||||
|
|
||||||
return kv
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ModelParameters) specialTokenTypes() []string {
|
type ModelData struct {
|
||||||
return []string{
|
Path string
|
||||||
"bos", "eos", "unk", "sep", "pad", "cls", "mask",
|
Name string
|
||||||
}
|
Params *Params
|
||||||
|
Vocab *Vocab
|
||||||
|
Tensors []llm.Tensor
|
||||||
|
Format ModelFormat
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ModelParameters) writeFile(ws io.WriteSeeker, kv fileutils.KV, ts []fileutils.Tensor) error {
|
func GetModelFormat(dirname string) (ModelFormat, error) {
|
||||||
return fileutils.WriteGGUF(ws, kv, ts)
|
files, err := filepath.Glob(filepath.Join(dirname, "*"))
|
||||||
}
|
|
||||||
|
|
||||||
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv fileutils.KV, ts []fileutils.Tensor) error {
|
|
||||||
return fileutils.WriteGGUF(ws, kv, ts)
|
|
||||||
}
|
|
||||||
|
|
||||||
type ModelConverter interface {
|
|
||||||
// KV maps parameters to LLM key-values
|
|
||||||
KV(*Tokenizer) fileutils.KV
|
|
||||||
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
|
||||||
Tensors([]Tensor) []fileutils.Tensor
|
|
||||||
// Replacements returns a list of string pairs to replace in tensor names.
|
|
||||||
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
|
||||||
Replacements() []string
|
|
||||||
|
|
||||||
// specialTokenTypes returns any special token types the model uses
|
|
||||||
specialTokenTypes() []string
|
|
||||||
// writeFile writes the model to the provided io.WriteSeeker
|
|
||||||
writeFile(io.WriteSeeker, fileutils.KV, []fileutils.Tensor) error
|
|
||||||
}
|
|
||||||
|
|
||||||
type moreParser interface {
|
|
||||||
parseMore(fs.FS) error
|
|
||||||
}
|
|
||||||
|
|
||||||
type AdapterConverter interface {
|
|
||||||
// KV maps parameters to LLM key-values
|
|
||||||
KV(fileutils.KV) fileutils.KV
|
|
||||||
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
|
|
||||||
Tensors([]Tensor) []fileutils.Tensor
|
|
||||||
// Replacements returns a list of string pairs to replace in tensor names.
|
|
||||||
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
|
||||||
Replacements() []string
|
|
||||||
|
|
||||||
writeFile(io.WriteSeeker, fileutils.KV, []fileutils.Tensor) error
|
|
||||||
}
|
|
||||||
|
|
||||||
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV fileutils.KV) error {
|
|
||||||
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var p AdapterParameters
|
for _, fn := range files {
|
||||||
if err := json.Unmarshal(bts, &p); err != nil {
|
if strings.HasSuffix(fn, ".safetensors") {
|
||||||
return err
|
return &SafetensorFormat{}, nil
|
||||||
}
|
} else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") {
|
||||||
|
slog.Debug("model is torch")
|
||||||
arch, ok := baseKV["general.architecture"]
|
return &TorchFormat{}, nil
|
||||||
if !ok {
|
|
||||||
return errors.New("architecture not set for the base model")
|
|
||||||
}
|
|
||||||
|
|
||||||
var conv AdapterConverter
|
|
||||||
switch arch {
|
|
||||||
case "llama":
|
|
||||||
conv = &llamaAdapter{}
|
|
||||||
case "gemma2":
|
|
||||||
conv = &gemma2Adapter{}
|
|
||||||
default:
|
|
||||||
return errors.New("unsupported architecture")
|
|
||||||
}
|
|
||||||
|
|
||||||
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := json.Unmarshal(bts, conv); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
|
|
||||||
// and files it finds in the input path.
|
|
||||||
// Supported input model formats include safetensors.
|
|
||||||
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
|
|
||||||
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
|
|
||||||
bts, err := fs.ReadFile(fsys, "config.json")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
var p ModelParameters
|
|
||||||
if err := json.Unmarshal(bts, &p); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(p.Architectures) < 1 {
|
|
||||||
return errors.New("unknown architecture")
|
|
||||||
}
|
|
||||||
|
|
||||||
var conv ModelConverter
|
|
||||||
switch p.Architectures[0] {
|
|
||||||
case "LlamaForCausalLM", "MistralForCausalLM":
|
|
||||||
conv = &llamaModel{}
|
|
||||||
case "MixtralForCausalLM":
|
|
||||||
conv = &mixtralModel{}
|
|
||||||
case "GemmaForCausalLM":
|
|
||||||
conv = &gemmaModel{}
|
|
||||||
case "Gemma2ForCausalLM":
|
|
||||||
conv = &gemma2Model{}
|
|
||||||
case "Phi3ForCausalLM":
|
|
||||||
conv = &phi3Model{}
|
|
||||||
case "BertModel":
|
|
||||||
conv = &bertModel{}
|
|
||||||
default:
|
|
||||||
return errors.New("unsupported architecture")
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := json.Unmarshal(bts, conv); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if t, ok := conv.(moreParser); ok {
|
|
||||||
if err := t.parseMore(fsys); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
t, err := parseTokenizer(fsys, conv.specialTokenTypes())
|
return nil, fmt.Errorf("couldn't determine model format")
|
||||||
if err != nil {
|
}
|
||||||
return err
|
|
||||||
}
|
// Details on gguf's tokenizer can be found at:
|
||||||
|
// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer
|
||||||
vocabSize := int(p.VocabSize)
|
type Vocab struct {
|
||||||
switch {
|
Tokens []string
|
||||||
case vocabSize > len(t.Vocabulary.Tokens):
|
Scores []float32
|
||||||
slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
|
Types []int32
|
||||||
for i := range vocabSize - len(t.Vocabulary.Tokens) {
|
Merges []string
|
||||||
t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
|
}
|
||||||
t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
|
|
||||||
t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
|
func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
|
||||||
}
|
slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model")))
|
||||||
case vocabSize < len(t.Vocabulary.Tokens):
|
in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model"))
|
||||||
return fmt.Errorf("vocabulary is larger than expected '%d' instead of '%d'", len(t.Vocabulary.Tokens), vocabSize)
|
if err != nil {
|
||||||
default:
|
return nil, err
|
||||||
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
|
}
|
||||||
}
|
|
||||||
|
// To regenerate sentencepiece from the protobufs use:
|
||||||
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
|
// protoc -I=./ --go_out=./ sentencepiece_model.proto
|
||||||
if err != nil {
|
modelProto := &sentencepiece.ModelProto{}
|
||||||
return err
|
if err := proto.Unmarshal(in, modelProto); err != nil {
|
||||||
}
|
return nil, err
|
||||||
|
}
|
||||||
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
|
|
||||||
|
v := &Vocab{
|
||||||
|
Tokens: make([]string, 0),
|
||||||
|
Scores: make([]float32, 0),
|
||||||
|
Types: make([]int32, 0),
|
||||||
|
}
|
||||||
|
|
||||||
|
pieces := modelProto.GetPieces()
|
||||||
|
for _, p := range pieces {
|
||||||
|
v.Tokens = append(v.Tokens, p.GetPiece())
|
||||||
|
v.Scores = append(v.Scores, p.GetScore())
|
||||||
|
t := p.GetType()
|
||||||
|
switch t {
|
||||||
|
case sentencepiece.ModelProto_SentencePiece_UNKNOWN:
|
||||||
|
case sentencepiece.ModelProto_SentencePiece_CONTROL:
|
||||||
|
case sentencepiece.ModelProto_SentencePiece_UNUSED:
|
||||||
|
case sentencepiece.ModelProto_SentencePiece_BYTE:
|
||||||
|
default:
|
||||||
|
t = sentencepiece.ModelProto_SentencePiece_NORMAL
|
||||||
|
}
|
||||||
|
v.Types = append(v.Types, int32(t))
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens)))
|
||||||
|
|
||||||
|
// add any additional tokens
|
||||||
|
addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json"))
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return v, nil
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("reading user defined tokens")
|
||||||
|
|
||||||
|
var extraTokenData map[string]int
|
||||||
|
if err := json.Unmarshal(addIn, &extraTokenData); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
type token struct {
|
||||||
|
key string
|
||||||
|
pos int
|
||||||
|
}
|
||||||
|
|
||||||
|
extraTokens := make([]token, 0)
|
||||||
|
for k, id := range extraTokenData {
|
||||||
|
extraTokens = append(extraTokens, token{k, id})
|
||||||
|
}
|
||||||
|
|
||||||
|
slices.SortFunc(extraTokens, func(a, b token) int {
|
||||||
|
return cmp.Compare(a.pos, b.pos)
|
||||||
|
})
|
||||||
|
|
||||||
|
numToks := len(v.Tokens)
|
||||||
|
|
||||||
|
for cnt, t := range extraTokens {
|
||||||
|
// the token id should match the specific index for the total number of tokens
|
||||||
|
if t.pos != cnt+numToks {
|
||||||
|
return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key)
|
||||||
|
}
|
||||||
|
v.Tokens = append(v.Tokens, t.key)
|
||||||
|
v.Scores = append(v.Scores, -1000.0)
|
||||||
|
v.Types = append(v.Types, tokenTypeUserDefined)
|
||||||
|
}
|
||||||
|
slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))
|
||||||
|
|
||||||
|
if params.VocabSize > len(v.Tokens) {
|
||||||
|
missingTokens := params.VocabSize - len(v.Tokens)
|
||||||
|
slog.Warn(fmt.Sprintf("vocab is missing %d tokens", missingTokens))
|
||||||
|
for cnt := range missingTokens {
|
||||||
|
v.Tokens = append(v.Tokens, fmt.Sprintf("<dummy%05d>", cnt+1))
|
||||||
|
v.Scores = append(v.Scores, -1)
|
||||||
|
v.Types = append(v.Types, tokenTypeUserDefined)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return v, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,174 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"cmp"
|
|
||||||
"encoding/json"
|
|
||||||
"io/fs"
|
|
||||||
"path/filepath"
|
|
||||||
"slices"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fileutils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type bertModel struct {
|
|
||||||
ModelParameters
|
|
||||||
NLayers uint32 `json:"n_layers"`
|
|
||||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
|
||||||
NLayer uint32 `json:"n_layer"`
|
|
||||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
|
||||||
NCtx uint32 `json:"n_ctx"`
|
|
||||||
HiddenSize uint32 `json:"hidden_size"`
|
|
||||||
NEmbd uint32 `json:"n_embd"`
|
|
||||||
IntermediateSize uint32 `json:"intermediate_size"`
|
|
||||||
NInner uint32 `json:"n_inner"`
|
|
||||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
|
||||||
NHead uint32 `json:"n_head"`
|
|
||||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
|
||||||
LayerNormEPS float32 `json:"layer_norm_eps"`
|
|
||||||
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
|
|
||||||
NormEpsilon float32 `json:"norm_epsilon"`
|
|
||||||
|
|
||||||
PoolingType uint32
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
_ ModelConverter = (*bertModel)(nil)
|
|
||||||
_ moreParser = (*bertModel)(nil)
|
|
||||||
)
|
|
||||||
|
|
||||||
func (p *bertModel) parseMore(fsys fs.FS) error {
|
|
||||||
bts, err := fs.ReadFile(fsys, "modules.json")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
var modules []struct {
|
|
||||||
Type string `json:"type"`
|
|
||||||
Path string `json:"path"`
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := json.Unmarshal(bts, &modules); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
var pooling string
|
|
||||||
for _, m := range modules {
|
|
||||||
if m.Type == "sentence_transformers.models.Pooling" {
|
|
||||||
pooling = m.Path
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if pooling != "" {
|
|
||||||
bts, err := fs.ReadFile(fsys, filepath.Join(pooling, "config.json"))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
var pc struct {
|
|
||||||
PoolingModeCLSToken bool `json:"pooling_mode_cls_token"`
|
|
||||||
PoolingModeMeanTokens bool `json:"pooling_mode_mean_tokens"`
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := json.Unmarshal(bts, &pc); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if pc.PoolingModeMeanTokens {
|
|
||||||
p.PoolingType = 1
|
|
||||||
} else if pc.PoolingModeCLSToken {
|
|
||||||
p.PoolingType = 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *bertModel) KV(t *Tokenizer) fileutils.KV {
|
|
||||||
kv := p.ModelParameters.KV(t)
|
|
||||||
kv["general.architecture"] = "bert"
|
|
||||||
kv["bert.attention.causal"] = false
|
|
||||||
kv["bert.pooling_type"] = p.PoolingType
|
|
||||||
|
|
||||||
kv["bert.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
|
|
||||||
|
|
||||||
if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
|
|
||||||
kv["bert.context_length"] = contextLength
|
|
||||||
}
|
|
||||||
|
|
||||||
if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
|
|
||||||
kv["bert.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
|
||||||
}
|
|
||||||
|
|
||||||
if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
|
|
||||||
kv["bert.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
|
|
||||||
}
|
|
||||||
|
|
||||||
if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
|
|
||||||
kv["bert.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
|
|
||||||
}
|
|
||||||
|
|
||||||
if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
|
|
||||||
kv["bert.attention.layer_norm_epsilon"] = layerNormEpsilon
|
|
||||||
}
|
|
||||||
|
|
||||||
kv["tokenizer.ggml.model"] = "bert"
|
|
||||||
kv["tokenizer.ggml.token_type_count"] = uint32(2)
|
|
||||||
|
|
||||||
// convert to phantom space tokens
|
|
||||||
for i, e := range t.Tokens {
|
|
||||||
if strings.HasPrefix(e, "[") && strings.HasSuffix(e, "]") {
|
|
||||||
// noop
|
|
||||||
} else if strings.HasPrefix(e, "##") {
|
|
||||||
t.Tokens[i] = e[2:]
|
|
||||||
} else {
|
|
||||||
t.Tokens[i] = "\u2581" + e
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
kv["tokenizer.ggml.tokens"] = t.Tokens
|
|
||||||
|
|
||||||
return kv
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *bertModel) Tensors(ts []Tensor) []fileutils.Tensor {
|
|
||||||
var out []fileutils.Tensor
|
|
||||||
for _, t := range ts {
|
|
||||||
if slices.Contains([]string{
|
|
||||||
"embeddings.position_ids",
|
|
||||||
"pooler.dense.weight",
|
|
||||||
"pooler.dense.bias",
|
|
||||||
}, t.Name()) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
out = append(out, fileutils.Tensor{
|
|
||||||
Name: t.Name(),
|
|
||||||
Kind: t.Kind(),
|
|
||||||
Shape: t.Shape(),
|
|
||||||
WriterTo: t,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bertModel) Replacements() []string {
|
|
||||||
return []string{
|
|
||||||
"encoder.layer", "blk",
|
|
||||||
"encoder.layers", "blk",
|
|
||||||
"embeddings.word_embeddings", "token_embd",
|
|
||||||
"embeddings.token_type_embeddings", "token_types",
|
|
||||||
"embeddings.LayerNorm", "token_embd_norm",
|
|
||||||
"embeddings.position_embeddings", "position_embd",
|
|
||||||
"attention.self.query", "attn_q",
|
|
||||||
"attention.self.key", "attn_k",
|
|
||||||
"attention.self.value", "attn_v",
|
|
||||||
"attention.output.dense", "attn_output",
|
|
||||||
"attention.output.LayerNorm", "attn_output_norm",
|
|
||||||
"intermediate.dense", "ffn_up",
|
|
||||||
"output.dense", "ffn_down",
|
|
||||||
"output.LayerNorm", "layer_output_norm",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,100 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/pdevine/tensor"
|
|
||||||
"github.com/pdevine/tensor/native"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fileutils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type gemmaModel struct {
|
|
||||||
ModelParameters
|
|
||||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
|
||||||
HiddenSize uint32 `json:"hidden_size"`
|
|
||||||
HiddenLayers uint32 `json:"num_hidden_layers"`
|
|
||||||
IntermediateSize uint32 `json:"intermediate_size"`
|
|
||||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
|
||||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
|
||||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
|
||||||
HeadDim uint32 `json:"head_dim"`
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ ModelConverter = (*gemmaModel)(nil)
|
|
||||||
|
|
||||||
func (p *gemmaModel) KV(t *Tokenizer) fileutils.KV {
|
|
||||||
kv := p.ModelParameters.KV(t)
|
|
||||||
kv["general.architecture"] = "gemma"
|
|
||||||
kv["gemma.context_length"] = p.MaxPositionEmbeddings
|
|
||||||
kv["gemma.embedding_length"] = p.HiddenSize
|
|
||||||
kv["gemma.block_count"] = p.HiddenLayers
|
|
||||||
kv["gemma.feed_forward_length"] = p.IntermediateSize
|
|
||||||
kv["gemma.attention.head_count"] = p.NumAttentionHeads
|
|
||||||
kv["gemma.attention.head_count_kv"] = p.NumKeyValueHeads
|
|
||||||
kv["gemma.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
|
||||||
kv["gemma.attention.key_length"] = p.HeadDim
|
|
||||||
kv["gemma.attention.value_length"] = p.HeadDim
|
|
||||||
kv["tokenizer.ggml.eot_token_id"] = uint32(107)
|
|
||||||
kv["tokenizer.ggml.middle_token_id"] = uint32(68)
|
|
||||||
kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
|
|
||||||
kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
|
|
||||||
return kv
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *gemmaModel) Tensors(ts []Tensor) []fileutils.Tensor {
|
|
||||||
var out []fileutils.Tensor
|
|
||||||
for _, t := range ts {
|
|
||||||
if strings.HasSuffix(t.Name(), "_norm.weight") {
|
|
||||||
t.SetRepacker(p.addOne)
|
|
||||||
}
|
|
||||||
|
|
||||||
out = append(out, fileutils.Tensor{
|
|
||||||
Name: t.Name(),
|
|
||||||
Kind: t.Kind(),
|
|
||||||
Shape: t.Shape(),
|
|
||||||
WriterTo: t,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *gemmaModel) Replacements() []string {
|
|
||||||
return []string{
|
|
||||||
"model.embed_tokens", "token_embd",
|
|
||||||
"model.norm", "output_norm",
|
|
||||||
"model.layers", "blk",
|
|
||||||
"input_layernorm", "attn_norm",
|
|
||||||
"self_attn.q_proj", "attn_q",
|
|
||||||
"self_attn.k_proj", "attn_k",
|
|
||||||
"self_attn.v_proj", "attn_v",
|
|
||||||
"self_attn.o_proj", "attn_output",
|
|
||||||
"mlp.gate_proj", "ffn_gate",
|
|
||||||
"mlp.down_proj", "ffn_down",
|
|
||||||
"mlp.up_proj", "ffn_up",
|
|
||||||
"post_attention_layernorm", "ffn_norm",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (*gemmaModel) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
|
|
||||||
n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
|
|
||||||
ones := tensor.Ones(tensor.Float32, int(shape[0]))
|
|
||||||
|
|
||||||
n, err := n.Add(ones)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
ts, err := native.SelectF32(n, 0)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var f32s []float32
|
|
||||||
for _, t := range ts {
|
|
||||||
f32s = append(f32s, t...)
|
|
||||||
}
|
|
||||||
|
|
||||||
return f32s, nil
|
|
||||||
}
|
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/ollama/ollama/fileutils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type gemma2Model struct {
|
|
||||||
gemmaModel
|
|
||||||
SlidingWindow uint32 `json:"sliding_window"`
|
|
||||||
AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
|
|
||||||
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *gemma2Model) KV(t *Tokenizer) fileutils.KV {
|
|
||||||
kv := p.ModelParameters.KV(t)
|
|
||||||
kv["general.architecture"] = "gemma2"
|
|
||||||
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
|
|
||||||
kv["gemma2.embedding_length"] = p.HiddenSize
|
|
||||||
kv["gemma2.block_count"] = p.HiddenLayers
|
|
||||||
kv["gemma2.feed_forward_length"] = p.IntermediateSize
|
|
||||||
kv["gemma2.attention.head_count"] = p.NumAttentionHeads
|
|
||||||
kv["gemma2.attention.head_count_kv"] = p.NumKeyValueHeads
|
|
||||||
kv["gemma2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
|
||||||
kv["gemma2.attention.key_length"] = p.HeadDim
|
|
||||||
kv["gemma2.attention.value_length"] = p.HeadDim
|
|
||||||
kv["gemma2.attention.sliding_window"] = p.SlidingWindow
|
|
||||||
kv["gemma2.attn_logit_softcapping"] = p.AttentionLogitSoftcap
|
|
||||||
kv["gemma2.final_logit_softcapping"] = p.FinalLogitSoftcap
|
|
||||||
kv["tokenizer.ggml.eot_token_id"] = uint32(107)
|
|
||||||
kv["tokenizer.ggml.middle_token_id"] = uint32(68)
|
|
||||||
kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
|
|
||||||
kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
|
|
||||||
return kv
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *gemma2Model) Replacements() []string {
|
|
||||||
return []string{
|
|
||||||
"model.embed_tokens", "token_embd",
|
|
||||||
"model.norm", "output_norm",
|
|
||||||
"model.layers", "blk",
|
|
||||||
"input_layernorm", "attn_norm",
|
|
||||||
"self_attn.q_proj", "attn_q",
|
|
||||||
"self_attn.k_proj", "attn_k",
|
|
||||||
"self_attn.v_proj", "attn_v",
|
|
||||||
"self_attn.o_proj", "attn_output",
|
|
||||||
"mlp.gate_proj", "ffn_gate",
|
|
||||||
"mlp.down_proj", "ffn_down",
|
|
||||||
"mlp.up_proj", "ffn_up",
|
|
||||||
"post_attention_layernorm", "post_attention_norm",
|
|
||||||
"pre_feedforward_layernorm", "ffn_norm",
|
|
||||||
"post_feedforward_layernorm", "post_ffw_norm",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,91 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/pdevine/tensor"
|
|
||||||
"github.com/pdevine/tensor/native"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fileutils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type gemma2Adapter struct {
|
|
||||||
AdapterParameters
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ AdapterConverter = (*gemma2Adapter)(nil)
|
|
||||||
|
|
||||||
func (p *gemma2Adapter) KV(baseKV fileutils.KV) fileutils.KV {
|
|
||||||
kv := p.AdapterParameters.KV()
|
|
||||||
kv["general.architecture"] = "gemma2"
|
|
||||||
return kv
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *gemma2Adapter) Tensors(ts []Tensor) []fileutils.Tensor {
|
|
||||||
var out []fileutils.Tensor
|
|
||||||
for _, t := range ts {
|
|
||||||
shape := t.Shape()
|
|
||||||
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
|
||||||
(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
|
|
||||||
shape[0], shape[1] = shape[1], shape[0]
|
|
||||||
t.SetRepacker(p.repack)
|
|
||||||
}
|
|
||||||
|
|
||||||
out = append(out, fileutils.Tensor{
|
|
||||||
Name: t.Name(),
|
|
||||||
Kind: t.Kind(),
|
|
||||||
Shape: t.Shape(),
|
|
||||||
WriterTo: t,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *gemma2Adapter) Replacements() []string {
|
|
||||||
return []string{
|
|
||||||
"base_model.model.", "",
|
|
||||||
"model.layers", "blk",
|
|
||||||
"self_attn.q_proj", "attn_q",
|
|
||||||
"self_attn.k_proj", "attn_k",
|
|
||||||
"self_attn.v_proj", "attn_v",
|
|
||||||
"self_attn.o_proj", "attn_output",
|
|
||||||
"mlp.gate_proj", "ffn_gate",
|
|
||||||
"mlp.down_proj", "ffn_down",
|
|
||||||
"mlp.up_proj", "ffn_up",
|
|
||||||
"lora_A.weight", "weight.lora_a",
|
|
||||||
"lora_B.weight", "weight.lora_b",
|
|
||||||
"lora_a", "weight.lora_a",
|
|
||||||
"lora_b", "weight.lora_b",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *gemma2Adapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
|
||||||
dims := []int{int(shape[1]), int(shape[0])}
|
|
||||||
|
|
||||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
|
||||||
|
|
||||||
if err := n.T(1, 0); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.Reshape(dims...); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.Transpose(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
ts, err := native.SelectF32(n, 1)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var f32s []float32
|
|
||||||
for _, t := range ts {
|
|
||||||
f32s = append(f32s, t...)
|
|
||||||
}
|
|
||||||
|
|
||||||
return f32s, nil
|
|
||||||
}
|
|
||||||
@@ -1,213 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"cmp"
|
|
||||||
"fmt"
|
|
||||||
"math"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/pdevine/tensor"
|
|
||||||
"github.com/pdevine/tensor/native"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fileutils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type llamaModel struct {
|
|
||||||
ModelParameters
|
|
||||||
NLayers uint32 `json:"n_layers"`
|
|
||||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
|
||||||
NLayer uint32 `json:"n_layer"`
|
|
||||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
|
||||||
NCtx uint32 `json:"n_ctx"`
|
|
||||||
HiddenSize uint32 `json:"hidden_size"`
|
|
||||||
NEmbd uint32 `json:"n_embd"`
|
|
||||||
IntermediateSize uint32 `json:"intermediate_size"`
|
|
||||||
NInner uint32 `json:"n_inner"`
|
|
||||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
|
||||||
NHead uint32 `json:"n_head"`
|
|
||||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
|
||||||
RopeTheta float32 `json:"rope_theta"`
|
|
||||||
RopeScaling struct {
|
|
||||||
Type string `json:"type"`
|
|
||||||
RopeType string `json:"rope_type"`
|
|
||||||
Factor float32 `json:"factor"`
|
|
||||||
LowFrequencyFactor float32 `json:"low_freq_factor"`
|
|
||||||
HighFrequencyFactor float32 `json:"high_freq_factor"`
|
|
||||||
OriginalMaxPositionalEmbeddings uint32 `json:"original_max_positional_embeddings"`
|
|
||||||
|
|
||||||
factors ropeFactor
|
|
||||||
} `json:"rope_scaling"`
|
|
||||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
|
||||||
LayerNormEPS float32 `json:"layer_norm_eps"`
|
|
||||||
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
|
|
||||||
NormEpsilon float32 `json:"norm_epsilon"`
|
|
||||||
HeadDim uint32 `json:"head_dim"`
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ ModelConverter = (*llamaModel)(nil)
|
|
||||||
|
|
||||||
func (p *llamaModel) KV(t *Tokenizer) fileutils.KV {
|
|
||||||
kv := p.ModelParameters.KV(t)
|
|
||||||
kv["general.architecture"] = "llama"
|
|
||||||
kv["llama.vocab_size"] = p.VocabSize
|
|
||||||
|
|
||||||
kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
|
|
||||||
|
|
||||||
if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
|
|
||||||
kv["llama.context_length"] = contextLength
|
|
||||||
}
|
|
||||||
|
|
||||||
if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
|
|
||||||
kv["llama.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
|
||||||
}
|
|
||||||
|
|
||||||
if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
|
|
||||||
kv["llama.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
|
|
||||||
}
|
|
||||||
|
|
||||||
if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
|
|
||||||
kv["llama.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
|
|
||||||
kv["llama.rope.dimension_count"] = p.HiddenSize / headCount
|
|
||||||
}
|
|
||||||
|
|
||||||
if p.RopeTheta > 0 {
|
|
||||||
kv["llama.rope.freq_base"] = p.RopeTheta
|
|
||||||
}
|
|
||||||
|
|
||||||
if p.RopeScaling.Type == "linear" {
|
|
||||||
kv["llama.rope.scaling.type"] = p.RopeScaling.Type
|
|
||||||
kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
|
|
||||||
} else if p.RopeScaling.RopeType == "llama3" {
|
|
||||||
dim := p.HiddenSize / p.NumAttentionHeads
|
|
||||||
for i := uint32(0); i < dim; i += 2 {
|
|
||||||
factor := cmp.Or(p.RopeScaling.Factor, 8.0)
|
|
||||||
factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
|
|
||||||
factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
|
|
||||||
|
|
||||||
original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192)
|
|
||||||
lambdaLow := float32(original) / factorLow
|
|
||||||
lambdaHigh := float32(original) / factorHigh
|
|
||||||
|
|
||||||
lambda := 2 * math.Pi * math.Pow(float64(p.RopeTheta), float64(i)/float64(dim))
|
|
||||||
if lambda < float64(lambdaHigh) {
|
|
||||||
p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0)
|
|
||||||
} else if lambda > float64(lambdaLow) {
|
|
||||||
p.RopeScaling.factors = append(p.RopeScaling.factors, factor)
|
|
||||||
} else {
|
|
||||||
smooth := (float32(original)/float32(lambda) - factorLow) / (factorHigh - factorLow)
|
|
||||||
p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0/((1-smooth)/factor+smooth))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if p.NumKeyValueHeads > 0 {
|
|
||||||
kv["llama.attention.head_count_kv"] = p.NumKeyValueHeads
|
|
||||||
}
|
|
||||||
|
|
||||||
if p.RMSNormEPS > 0 {
|
|
||||||
kv["llama.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
|
||||||
}
|
|
||||||
|
|
||||||
if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
|
|
||||||
kv["llama.attention.layer_norm_epsilon"] = layerNormEpsilon
|
|
||||||
}
|
|
||||||
|
|
||||||
if p.HeadDim > 0 {
|
|
||||||
kv["llama.attention.key_length"] = p.HeadDim
|
|
||||||
kv["llama.attention.value_length"] = p.HeadDim
|
|
||||||
}
|
|
||||||
|
|
||||||
return kv
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *llamaModel) Tensors(ts []Tensor) []fileutils.Tensor {
|
|
||||||
var out []fileutils.Tensor
|
|
||||||
|
|
||||||
if p.RopeScaling.factors != nil {
|
|
||||||
out = append(out, fileutils.Tensor{
|
|
||||||
Name: "rope_freqs.weight",
|
|
||||||
Kind: 0,
|
|
||||||
Shape: []uint64{uint64(len(p.RopeScaling.factors))},
|
|
||||||
WriterTo: p.RopeScaling.factors,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, t := range ts {
|
|
||||||
if strings.HasSuffix(t.Name(), "attn_q.weight") ||
|
|
||||||
strings.HasSuffix(t.Name(), "attn_k.weight") {
|
|
||||||
t.SetRepacker(p.repack)
|
|
||||||
}
|
|
||||||
|
|
||||||
out = append(out, fileutils.Tensor{
|
|
||||||
Name: t.Name(),
|
|
||||||
Kind: t.Kind(),
|
|
||||||
Shape: t.Shape(),
|
|
||||||
WriterTo: t,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *llamaModel) Replacements() []string {
|
|
||||||
return []string{
|
|
||||||
"lm_head", "output",
|
|
||||||
"model.embed_tokens", "token_embd",
|
|
||||||
"model.norm", "output_norm",
|
|
||||||
"model.layers", "blk",
|
|
||||||
"input_layernorm", "attn_norm",
|
|
||||||
"self_attn.q_proj", "attn_q",
|
|
||||||
"self_attn.k_proj", "attn_k",
|
|
||||||
"self_attn.v_proj", "attn_v",
|
|
||||||
"self_attn.o_proj", "attn_output",
|
|
||||||
"mlp.gate_proj", "ffn_gate",
|
|
||||||
"mlp.down_proj", "ffn_down",
|
|
||||||
"mlp.up_proj", "ffn_up",
|
|
||||||
"post_attention_layernorm", "ffn_norm",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *llamaModel) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
|
||||||
var dims []int
|
|
||||||
for _, dim := range shape {
|
|
||||||
dims = append(dims, int(dim))
|
|
||||||
}
|
|
||||||
|
|
||||||
var heads uint32
|
|
||||||
if strings.HasSuffix(name, "attn_q.weight") {
|
|
||||||
heads = p.NumAttentionHeads
|
|
||||||
} else if strings.HasSuffix(name, "attn_k.weight") {
|
|
||||||
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
|
||||||
} else {
|
|
||||||
return nil, fmt.Errorf("unknown tensor for repack: %s", name)
|
|
||||||
}
|
|
||||||
|
|
||||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
|
||||||
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.T(0, 2, 1, 3); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.Reshape(dims...); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.Transpose(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
ts, err := native.SelectF32(n, 1)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var f32s []float32
|
|
||||||
for _, t := range ts {
|
|
||||||
f32s = append(f32s, t...)
|
|
||||||
}
|
|
||||||
|
|
||||||
return f32s, nil
|
|
||||||
}
|
|
||||||
@@ -1,169 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"cmp"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/pdevine/tensor"
|
|
||||||
"github.com/pdevine/tensor/native"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fileutils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type llamaAdapter struct {
|
|
||||||
AdapterParameters
|
|
||||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
|
||||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ AdapterConverter = (*llamaAdapter)(nil)
|
|
||||||
|
|
||||||
func (p *llamaAdapter) KV(baseKV fileutils.KV) fileutils.KV {
|
|
||||||
kv := p.AdapterParameters.KV()
|
|
||||||
kv["general.architecture"] = "llama"
|
|
||||||
kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
|
|
||||||
kv["llama.attention.head_count_kv"] = baseKV["llama.attention.head_count_kv"]
|
|
||||||
|
|
||||||
p.NumAttentionHeads = baseKV["llama.attention.head_count"].(uint32)
|
|
||||||
|
|
||||||
return kv
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *llamaAdapter) Tensors(ts []Tensor) []fileutils.Tensor {
|
|
||||||
var out []fileutils.Tensor
|
|
||||||
for _, t := range ts {
|
|
||||||
shape := t.Shape()
|
|
||||||
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
|
||||||
(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
|
|
||||||
shape[0], shape[1] = shape[1], shape[0]
|
|
||||||
t.SetRepacker(p.repackAndTranspose)
|
|
||||||
} else {
|
|
||||||
t.SetRepacker(p.repack)
|
|
||||||
}
|
|
||||||
|
|
||||||
out = append(out, fileutils.Tensor{
|
|
||||||
Name: t.Name(),
|
|
||||||
Kind: t.Kind(),
|
|
||||||
Shape: shape,
|
|
||||||
WriterTo: t,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *llamaAdapter) Replacements() []string {
|
|
||||||
return []string{
|
|
||||||
"base_model.model.", "",
|
|
||||||
"model.layers", "blk",
|
|
||||||
"self_attn.q_proj", "attn_q",
|
|
||||||
"self_attn.k_proj", "attn_k",
|
|
||||||
"self_attn.v_proj", "attn_v",
|
|
||||||
"self_attn.o_proj", "attn_output",
|
|
||||||
"mlp.gate_proj", "ffn_gate",
|
|
||||||
"mlp.down_proj", "ffn_down",
|
|
||||||
"mlp.up_proj", "ffn_up",
|
|
||||||
"lora_A.weight", "weight.lora_a",
|
|
||||||
"lora_B.weight", "weight.lora_b",
|
|
||||||
"lora_a", "weight.lora_a",
|
|
||||||
"lora_b", "weight.lora_b",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *llamaAdapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
|
||||||
dims := []int{int(shape[1]), int(shape[0])}
|
|
||||||
|
|
||||||
var heads uint32
|
|
||||||
if strings.HasSuffix(name, "attn_q.weight.lora_a") {
|
|
||||||
heads = p.NumAttentionHeads
|
|
||||||
} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
|
|
||||||
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
|
||||||
} else {
|
|
||||||
return data, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
|
||||||
|
|
||||||
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.T(0, 2, 1, 3); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.Reshape(dims...); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.Transpose(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
ts, err := native.SelectF32(n, 1)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var f32s []float32
|
|
||||||
for _, t := range ts {
|
|
||||||
f32s = append(f32s, t...)
|
|
||||||
}
|
|
||||||
|
|
||||||
return f32s, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *llamaAdapter) repackAndTranspose(name string, data []float32, shape []uint64) ([]float32, error) {
|
|
||||||
dims := []int{int(shape[1]), int(shape[0])}
|
|
||||||
|
|
||||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
|
||||||
|
|
||||||
var heads uint32
|
|
||||||
if strings.HasSuffix(name, "attn_q.weight.lora_a") {
|
|
||||||
heads = p.NumAttentionHeads
|
|
||||||
} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
|
|
||||||
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
|
||||||
}
|
|
||||||
|
|
||||||
if heads > 0 {
|
|
||||||
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.T(0, 2, 1, 3); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.Reshape(dims...); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.Transpose(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.T(1, 0); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.Reshape(dims...); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.Transpose(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
ts, err := native.SelectF32(n, 1)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var f32s []float32
|
|
||||||
for _, t := range ts {
|
|
||||||
f32s = append(f32s, t...)
|
|
||||||
}
|
|
||||||
|
|
||||||
return f32s, nil
|
|
||||||
}
|
|
||||||
@@ -1,94 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"slices"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fileutils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type mixtralModel struct {
|
|
||||||
llamaModel
|
|
||||||
NumLocalExperts uint32 `json:"num_local_experts"`
|
|
||||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *mixtralModel) KV(t *Tokenizer) fileutils.KV {
|
|
||||||
kv := p.llamaModel.KV(t)
|
|
||||||
|
|
||||||
if p.NumLocalExperts > 0 {
|
|
||||||
kv["llama.expert_count"] = p.NumLocalExperts
|
|
||||||
}
|
|
||||||
|
|
||||||
if p.NumExpertsPerToken > 0 {
|
|
||||||
kv["llama.expert_used_count"] = p.NumExpertsPerToken
|
|
||||||
}
|
|
||||||
|
|
||||||
return kv
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *mixtralModel) Tensors(ts []Tensor) []fileutils.Tensor {
|
|
||||||
oldnew := []string{
|
|
||||||
"model.layers", "blk",
|
|
||||||
"w1", "ffn_gate_exps",
|
|
||||||
"w2", "ffn_down_exps",
|
|
||||||
"w3", "ffn_up_exps",
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range p.NumLocalExperts {
|
|
||||||
oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".")
|
|
||||||
}
|
|
||||||
|
|
||||||
// group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor
|
|
||||||
namer := strings.NewReplacer(oldnew...)
|
|
||||||
experts := make(map[string]experts)
|
|
||||||
|
|
||||||
// merge experts into a single tensor while removing them from ts
|
|
||||||
ts = slices.DeleteFunc(ts, func(t Tensor) bool {
|
|
||||||
if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
name := namer.Replace(t.Name())
|
|
||||||
experts[name] = append(experts[name], t)
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
|
|
||||||
var out []fileutils.Tensor
|
|
||||||
for n, e := range experts {
|
|
||||||
// TODO(mxyng): sanity check experts
|
|
||||||
out = append(out, fileutils.Tensor{
|
|
||||||
Name: n,
|
|
||||||
Kind: e[0].Kind(),
|
|
||||||
Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...),
|
|
||||||
WriterTo: e,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return append(out, p.llamaModel.Tensors(ts)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *mixtralModel) Replacements() []string {
|
|
||||||
return append(
|
|
||||||
p.llamaModel.Replacements(),
|
|
||||||
"block_sparse_moe.gate", "ffn_gate_inp",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
type experts []Tensor
|
|
||||||
|
|
||||||
func (e experts) WriteTo(w io.Writer) (int64, error) {
|
|
||||||
// TODO(mxyng): experts _should_ be numerically sorted by expert but this should check
|
|
||||||
for _, t := range e {
|
|
||||||
// the canonical merged experts tensor stacks all experts along a new, 0 axis,
|
|
||||||
// e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers
|
|
||||||
// this accomplishes the same thing by writing each expert tensor in sequence
|
|
||||||
if _, err := t.WriteTo(w); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0, nil
|
|
||||||
}
|
|
||||||
@@ -1,123 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"cmp"
|
|
||||||
"encoding/binary"
|
|
||||||
"io"
|
|
||||||
"math"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fileutils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type phi3Model struct {
|
|
||||||
ModelParameters
|
|
||||||
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
|
||||||
NLayers uint32 `json:"n_layers"`
|
|
||||||
HiddenSize uint32 `json:"hidden_size"`
|
|
||||||
NEmbd uint32 `json:"n_embd"`
|
|
||||||
IntermediateSize uint32 `json:"intermediate_size"`
|
|
||||||
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
|
||||||
NHead uint32 `json:"n_head"`
|
|
||||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
|
||||||
NHeadKV uint32 `json:"n_head_kv"`
|
|
||||||
RopeTheta float32 `json:"rope_theta"`
|
|
||||||
RopeScaling struct {
|
|
||||||
Type string `json:"type"`
|
|
||||||
LongFactor ropeFactor `json:"long_factor"`
|
|
||||||
ShortFactor ropeFactor `json:"short_factor"`
|
|
||||||
} `json:"rope_scaling"`
|
|
||||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
|
||||||
NPositions uint32 `json:"n_positions"`
|
|
||||||
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
|
||||||
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
|
||||||
SlidingWindow uint32 `json:"sliding_window"`
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ ModelConverter = (*phi3Model)(nil)
|
|
||||||
|
|
||||||
func (p *phi3Model) KV(t *Tokenizer) fileutils.KV {
|
|
||||||
kv := p.ModelParameters.KV(t)
|
|
||||||
kv["general.architecture"] = "phi3"
|
|
||||||
kv["phi3.context_length"] = p.MaxPositionEmbeddings
|
|
||||||
kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
|
||||||
kv["phi3.feed_forward_length"] = p.IntermediateSize
|
|
||||||
kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
|
|
||||||
kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
|
|
||||||
kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
|
|
||||||
kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
|
||||||
kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
|
|
||||||
kv["phi3.rope.freq_base"] = p.RopeTheta
|
|
||||||
kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
|
|
||||||
kv["phi3.attention.sliding_window"] = p.SlidingWindow
|
|
||||||
|
|
||||||
scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)
|
|
||||||
|
|
||||||
switch p.RopeScaling.Type {
|
|
||||||
case "":
|
|
||||||
// no scaling
|
|
||||||
case "su", "longrope":
|
|
||||||
kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
|
|
||||||
case "yarn":
|
|
||||||
kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
|
|
||||||
default:
|
|
||||||
panic("unknown rope scaling type")
|
|
||||||
}
|
|
||||||
|
|
||||||
return kv
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *phi3Model) Tensors(ts []Tensor) []fileutils.Tensor {
|
|
||||||
var addRopeFactors sync.Once
|
|
||||||
|
|
||||||
out := make([]fileutils.Tensor, 0, len(ts)+2)
|
|
||||||
for _, t := range ts {
|
|
||||||
if strings.HasPrefix(t.Name(), "blk.0.") {
|
|
||||||
addRopeFactors.Do(func() {
|
|
||||||
out = append(out, fileutils.Tensor{
|
|
||||||
Name: "rope_factors_long.weight",
|
|
||||||
Kind: 0,
|
|
||||||
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
|
|
||||||
WriterTo: p.RopeScaling.LongFactor,
|
|
||||||
}, fileutils.Tensor{
|
|
||||||
Name: "rope_factors_short.weight",
|
|
||||||
Kind: 0,
|
|
||||||
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
|
|
||||||
WriterTo: p.RopeScaling.ShortFactor,
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
out = append(out, fileutils.Tensor{
|
|
||||||
Name: t.Name(),
|
|
||||||
Kind: t.Kind(),
|
|
||||||
Shape: t.Shape(),
|
|
||||||
WriterTo: t,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *phi3Model) Replacements() []string {
|
|
||||||
return []string{
|
|
||||||
"lm_head", "output",
|
|
||||||
"model.embed_tokens", "token_embd",
|
|
||||||
"model.norm", "output_norm",
|
|
||||||
"model.layers", "blk",
|
|
||||||
"input_layernorm", "attn_norm",
|
|
||||||
"self_attn.qkv_proj", "attn_qkv",
|
|
||||||
"self_attn.o_proj", "attn_output",
|
|
||||||
"mlp.down_proj", "ffn_down",
|
|
||||||
"mlp.gate_up_proj", "ffn_up",
|
|
||||||
"post_attention_layernorm", "ffn_norm",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type ropeFactor []float32
|
|
||||||
|
|
||||||
func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
|
|
||||||
err := binary.Write(w, binary.LittleEndian, r)
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
@@ -1,44 +1,48 @@
|
|||||||
|
//go:build slow
|
||||||
|
|
||||||
package convert
|
package convert
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"crypto/sha256"
|
|
||||||
"encoding/binary"
|
|
||||||
"encoding/hex"
|
|
||||||
"encoding/json"
|
|
||||||
"flag"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"io/fs"
|
|
||||||
"log/slog"
|
|
||||||
"math"
|
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"slices"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"golang.org/x/exp/maps"
|
"github.com/ollama/ollama/llm"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fileutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type tensorData struct {
|
func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) {
|
||||||
Offsets []int `json:"data_offsets"`
|
|
||||||
Type string `json:"dtype"`
|
|
||||||
Shape []int `json:"shape"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func convertFull(t *testing.T, fsys fs.FS) (*os.File, fileutils.KV, *fileutils.Tensors) {
|
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
|
mf, err := GetModelFormat(p)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
params, err := mf.GetParams(p)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
arch, err := mf.GetModelArch("", p, params)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := arch.LoadVocab(); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := arch.GetTensors(); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
f, err := os.CreateTemp(t.TempDir(), "f16")
|
f, err := os.CreateTemp(t.TempDir(), "f16")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
|
||||||
if err := ConvertModel(fsys, f); err != nil {
|
if err := arch.WriteGGUF(f); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -46,431 +50,54 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, fileutils.KV, *fileutils.T
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
t.Cleanup(func() { r.Close() })
|
defer r.Close()
|
||||||
|
|
||||||
m, _, err := fileutils.DecodeGGML(r, math.MaxInt)
|
m, _, err := llm.DecodeGGML(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
return m.KV(), m.Tensors()
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return r, m.KV(), m.Tensors()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func generateResultsJSON(t *testing.T, f *os.File, kv fileutils.KV, tensors *fileutils.Tensors) map[string]string {
|
func TestConvertFull(t *testing.T) {
|
||||||
actual := make(map[string]string)
|
cases := []struct {
|
||||||
for k, v := range kv {
|
path string
|
||||||
if s, ok := v.(json.Marshaler); !ok {
|
arch string
|
||||||
actual[k] = fmt.Sprintf("%v", v)
|
tensors int
|
||||||
} else {
|
layers int
|
||||||
bts, err := json.Marshal(s)
|
}{
|
||||||
if err != nil {
|
{"Meta-Llama-3-8B-Instruct", "llama", 291, 35},
|
||||||
t.Fatal(err)
|
{"Mistral-7B-Instruct-v0.2", "llama", 291, 35},
|
||||||
}
|
{"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35},
|
||||||
|
{"gemma-2b-it", "gemma", 164, 20},
|
||||||
actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tensor := range tensors.Items {
|
for _, tt := range cases {
|
||||||
sha256sum := sha256.New()
|
t.Run(tt.path, func(t *testing.T) {
|
||||||
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
|
p := filepath.Join("testdata", tt.path)
|
||||||
if _, err := io.Copy(sha256sum, sr); err != nil {
|
if _, err := os.Stat(p); err != nil {
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
|
|
||||||
}
|
|
||||||
|
|
||||||
return actual
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMain(m *testing.M) {
|
|
||||||
var level slog.Level
|
|
||||||
flag.TextVar(&level, "level", slog.LevelInfo, "log level")
|
|
||||||
flag.Parse()
|
|
||||||
slog.SetLogLoggerLevel(level)
|
|
||||||
os.Exit(m.Run())
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestConvertModel(t *testing.T) {
|
|
||||||
cases := []string{
|
|
||||||
"Meta-Llama-3-8B-Instruct",
|
|
||||||
"Meta-Llama-3.1-8B-Instruct",
|
|
||||||
"Mistral-7B-Instruct-v0.2",
|
|
||||||
"Mixtral-8x7B-Instruct-v0.1",
|
|
||||||
"gemma-2b-it",
|
|
||||||
"gemma-2-2b-it",
|
|
||||||
// microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
|
|
||||||
"Phi-3-mini-128k-instruct",
|
|
||||||
"all-MiniLM-L6-v2",
|
|
||||||
"gemma-2-9b-it",
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range cases {
|
|
||||||
tt := cases[i]
|
|
||||||
t.Run(tt, func(t *testing.T) {
|
|
||||||
t.Parallel()
|
|
||||||
|
|
||||||
p := filepath.Join("testdata", tt)
|
|
||||||
if testing.Short() {
|
|
||||||
t.Skip("skipping in short mode")
|
|
||||||
} else if _, err := os.Stat(p); err != nil {
|
|
||||||
t.Skipf("%s not found", p)
|
t.Skipf("%s not found", p)
|
||||||
}
|
}
|
||||||
|
|
||||||
f, kv, tensors := convertFull(t, os.DirFS(p))
|
kv, tensors := convertFull(t, p)
|
||||||
actual := generateResultsJSON(t, f, kv, tensors)
|
|
||||||
|
|
||||||
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
|
if kv.Architecture() != tt.arch {
|
||||||
if err != nil {
|
t.Fatalf("expected llama, got %s", kv.Architecture())
|
||||||
t.Fatal(err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var expect map[string]string
|
if kv.FileType().String() != "F16" {
|
||||||
if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {
|
t.Fatalf("expected F16, got %s", kv.FileType())
|
||||||
t.Fatal(err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
keys := maps.Keys(expect)
|
if len(tensors) != tt.tensors {
|
||||||
slices.Sort(keys)
|
t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors))
|
||||||
for _, k := range keys {
|
}
|
||||||
if v, ok := actual[k]; !ok {
|
|
||||||
t.Errorf("missing %s", k)
|
layers := tensors.Layers()
|
||||||
} else if v != expect[k] {
|
if len(layers) != tt.layers {
|
||||||
t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v)
|
t.Fatalf("expected %d layers, got %d", tt.layers, len(layers))
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConvertInvalidTensorNames(t *testing.T) {
|
|
||||||
f, err := os.CreateTemp(t.TempDir(), "testmodel")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
tempDir := t.TempDir()
|
|
||||||
|
|
||||||
td := map[string]*tensorData{}
|
|
||||||
offset := 4096
|
|
||||||
|
|
||||||
td["model.layers.0.self_attn.q_proj.weight"] = &tensorData{
|
|
||||||
Offsets: []int{0, offset},
|
|
||||||
Type: "F32",
|
|
||||||
Shape: []int{4096, 4096},
|
|
||||||
}
|
|
||||||
td["blk.0.attn_q.weight"] = &tensorData{
|
|
||||||
Offsets: []int{offset, offset * 2},
|
|
||||||
Type: "F32",
|
|
||||||
Shape: []int{4096, 4096},
|
|
||||||
}
|
|
||||||
generateSafetensorTestData(t, tempDir, td)
|
|
||||||
|
|
||||||
err = ConvertModel(os.DirFS(tempDir), f)
|
|
||||||
if err == nil || !strings.HasPrefix(err.Error(), "duplicate tensor name") {
|
|
||||||
t.Errorf("expected error but didn't get one")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestConvertInvalidDatatype(t *testing.T) {
|
|
||||||
f, err := os.CreateTemp(t.TempDir(), "testmodel")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
tempDir := t.TempDir()
|
|
||||||
|
|
||||||
td := map[string]*tensorData{}
|
|
||||||
offset := 4096 * 14336
|
|
||||||
|
|
||||||
td["model.layers.0.mlp.down_proj.weight"] = &tensorData{
|
|
||||||
Offsets: []int{0, offset},
|
|
||||||
Type: "I8",
|
|
||||||
Shape: []int{4096, 14336},
|
|
||||||
}
|
|
||||||
td["model.layers.0.mlp.down_proj.weight_format"] = &tensorData{
|
|
||||||
Offsets: []int{offset, offset},
|
|
||||||
Type: "U8",
|
|
||||||
Shape: []int{},
|
|
||||||
}
|
|
||||||
generateSafetensorTestData(t, tempDir, td)
|
|
||||||
|
|
||||||
err = ConvertModel(os.DirFS(tempDir), f)
|
|
||||||
if err == nil || err.Error() != "unsupported safetensors model" {
|
|
||||||
t.Errorf("expected error but didn't get one")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func generateSafetensorTestData(t *testing.T, tempDir string, tensorData map[string]*tensorData) {
|
|
||||||
data, err := json.Marshal(tensorData)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var buf bytes.Buffer
|
|
||||||
|
|
||||||
l := int64(len(data))
|
|
||||||
err = binary.Write(&buf, binary.LittleEndian, l)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = buf.Write(data)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
fdata, err := os.Create(filepath.Join(tempDir, "model-00001-of-00001.safetensors"))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
defer fdata.Close()
|
|
||||||
|
|
||||||
_, err = fdata.Write(buf.Bytes())
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
configData := `
|
|
||||||
{
|
|
||||||
"architectures": [
|
|
||||||
"LlamaForCausalLM"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
`
|
|
||||||
|
|
||||||
f, err := os.Create(filepath.Join(tempDir, "config.json"))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
_, err = f.WriteString(configData)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
tokenizerData := `
|
|
||||||
{
|
|
||||||
}
|
|
||||||
`
|
|
||||||
|
|
||||||
f, err = os.Create(filepath.Join(tempDir, "tokenizer.json"))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
_, err = f.WriteString(tokenizerData)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestConvertAdapter(t *testing.T) {
|
|
||||||
type AdapterCase struct {
|
|
||||||
Name string
|
|
||||||
BaseKV map[string]any
|
|
||||||
Expected map[string]string
|
|
||||||
}
|
|
||||||
|
|
||||||
cases := []AdapterCase{
|
|
||||||
{
|
|
||||||
Name: "discollama",
|
|
||||||
BaseKV: map[string]any{
|
|
||||||
"general.architecture": "llama",
|
|
||||||
"llama.attention.head_count": uint32(32),
|
|
||||||
"llama.attention.head_count_kv": uint32(8),
|
|
||||||
},
|
|
||||||
Expected: map[string]string{
|
|
||||||
"general.architecture": "llama",
|
|
||||||
"general.file_type": "1",
|
|
||||||
"general.parameter_count": "106496",
|
|
||||||
"general.type": "adapter",
|
|
||||||
"general.version": "v0.2",
|
|
||||||
"adapter.lora.alpha": "16",
|
|
||||||
"adapter.type": "lora",
|
|
||||||
"llama.attention.head_count": "32",
|
|
||||||
"llama.attention.head_count_kv": "8",
|
|
||||||
"blk.31.attn_q.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
|
||||||
"blk.31.attn_q.weight.lora_b": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
|
||||||
"blk.31.attn_v.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
|
||||||
"blk.31.attn_v.weight.lora_b": "071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, c := range cases {
|
|
||||||
t.Run(c.Name, func(t *testing.T) {
|
|
||||||
t.Parallel()
|
|
||||||
|
|
||||||
f, err := os.CreateTemp(t.TempDir(), "f16")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
tempDir := t.TempDir()
|
|
||||||
generateLoraTestData(t, tempDir)
|
|
||||||
|
|
||||||
if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
r, err := os.Open(f.Name())
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
defer r.Close()
|
|
||||||
|
|
||||||
m, _, err := fileutils.DecodeGGML(r, math.MaxInt)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
actual := generateResultsJSON(t, r, m.KV(), m.Tensors())
|
|
||||||
|
|
||||||
keys := maps.Keys(c.Expected)
|
|
||||||
slices.Sort(keys)
|
|
||||||
for _, k := range keys {
|
|
||||||
if v, ok := actual[k]; !ok {
|
|
||||||
t.Errorf("missing %s", k)
|
|
||||||
} else if v != c.Expected[k] {
|
|
||||||
t.Errorf("unexpected %s: want %s, got %s", k, c.Expected[k], v)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func generateLoraTestData(t *testing.T, tempDir string) {
|
|
||||||
offset := 4096 * 8 * 4
|
|
||||||
|
|
||||||
td := map[string]*tensorData{"__metadata__": nil}
|
|
||||||
td["model.layers.31.self_attn.q_proj.lora_a"] = &tensorData{
|
|
||||||
Offsets: []int{0, offset},
|
|
||||||
Type: "F32",
|
|
||||||
Shape: []int{4096, 8},
|
|
||||||
}
|
|
||||||
td["model.layers.31.self_attn.q_proj.lora_b"] = &tensorData{
|
|
||||||
Offsets: []int{offset, offset * 2},
|
|
||||||
Type: "F32",
|
|
||||||
Shape: []int{8, 4096},
|
|
||||||
}
|
|
||||||
td["model.layers.31.self_attn.v_proj.lora_a"] = &tensorData{
|
|
||||||
Offsets: []int{offset * 2, offset * 3},
|
|
||||||
Type: "F32",
|
|
||||||
Shape: []int{4096, 8},
|
|
||||||
}
|
|
||||||
td["model.layers.31.self_attn.v_proj.lora_b"] = &tensorData{
|
|
||||||
Offsets: []int{offset * 3, offset*3 + 8*1024*4},
|
|
||||||
Type: "F32",
|
|
||||||
Shape: []int{8, 1024},
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := json.Marshal(td)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var buf bytes.Buffer
|
|
||||||
|
|
||||||
l := int64(len(data))
|
|
||||||
err = binary.Write(&buf, binary.LittleEndian, l)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = buf.Write(data)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// write some data for the tensors
|
|
||||||
|
|
||||||
ones := make([]float32, 4096*8)
|
|
||||||
for i := range ones {
|
|
||||||
ones[i] = float32(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
for range 3 {
|
|
||||||
err = binary.Write(&buf, binary.LittleEndian, ones)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ones = make([]float32, 1024*8)
|
|
||||||
for i := range ones {
|
|
||||||
ones[i] = float32(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = binary.Write(&buf, binary.LittleEndian, ones)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
fdata, err := os.Create(filepath.Join(tempDir, "adapters.safetensors"))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
defer fdata.Close()
|
|
||||||
|
|
||||||
_, err = fdata.Write(buf.Bytes())
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
configData := `
|
|
||||||
{
|
|
||||||
"adapter_path": "adapters-test",
|
|
||||||
"batch_size": 8,
|
|
||||||
"config": "config-tiny.json",
|
|
||||||
"data": "../discollama-completion",
|
|
||||||
"grad_checkpoint": null,
|
|
||||||
"iters": 1000,
|
|
||||||
"learning_rate": 1e-05,
|
|
||||||
"lora_layers": 1,
|
|
||||||
"lora_parameters": {
|
|
||||||
"rank": 8,
|
|
||||||
"alpha": 16,
|
|
||||||
"dropout": 0.0,
|
|
||||||
"scale": 2.0
|
|
||||||
},
|
|
||||||
"lr_schedule": null,
|
|
||||||
"max_seq_length": 2048,
|
|
||||||
"model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct",
|
|
||||||
"resume_adapter_file": null,
|
|
||||||
"save_every": 100,
|
|
||||||
"seed": 0,
|
|
||||||
"steps_per_eval": 200,
|
|
||||||
"steps_per_report": 10,
|
|
||||||
"test": false,
|
|
||||||
"test_batches": 500,
|
|
||||||
"train": true,
|
|
||||||
"use_dora": false,
|
|
||||||
"val_batches": 25
|
|
||||||
}
|
|
||||||
`
|
|
||||||
f, err := os.Create(filepath.Join(tempDir, "adapter_config.json"))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
_, err = f.WriteString(configData)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,58 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"archive/zip"
|
|
||||||
"errors"
|
|
||||||
"io"
|
|
||||||
"io/fs"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
)
|
|
||||||
|
|
||||||
type ZipReader struct {
|
|
||||||
r *zip.Reader
|
|
||||||
p string
|
|
||||||
|
|
||||||
// limit is the maximum size of a file that can be read directly
|
|
||||||
// from the zip archive. Files larger than this size will be extracted
|
|
||||||
limit int64
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewZipReader(r *zip.Reader, p string, limit int64) fs.FS {
|
|
||||||
return &ZipReader{r, p, limit}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (z *ZipReader) Open(name string) (fs.File, error) {
|
|
||||||
r, err := z.r.Open(name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer r.Close()
|
|
||||||
|
|
||||||
if fi, err := r.Stat(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
} else if fi.Size() < z.limit {
|
|
||||||
return r, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if !filepath.IsLocal(name) {
|
|
||||||
return nil, zip.ErrInsecurePath
|
|
||||||
}
|
|
||||||
|
|
||||||
n := filepath.Join(z.p, name)
|
|
||||||
if _, err := os.Stat(n); errors.Is(err, os.ErrNotExist) {
|
|
||||||
w, err := os.Create(n)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer w.Close()
|
|
||||||
|
|
||||||
if _, err := io.Copy(w, r); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
} else if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return os.Open(n)
|
|
||||||
}
|
|
||||||
102
convert/gemma.go
Normal file
102
convert/gemma.go
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log/slog"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/pdevine/tensor"
|
||||||
|
"github.com/pdevine/tensor/native"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type GemmaModel struct {
|
||||||
|
ModelData
|
||||||
|
}
|
||||||
|
|
||||||
|
func addOnes(data []float32, vectorSize int) ([]float32, error) {
|
||||||
|
n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data))
|
||||||
|
ones := tensor.Ones(tensor.Float32, vectorSize)
|
||||||
|
|
||||||
|
n, err := n.Add(ones)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := native.SelectF32(n, 0)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var f32s []float32
|
||||||
|
for _, t := range ts {
|
||||||
|
f32s = append(f32s, t...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f32s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *GemmaModel) GetTensors() error {
|
||||||
|
t, err := m.Format.GetTensors(m.Path, m.Params)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Debug(fmt.Sprintf("Total tensors: %d", len(t)))
|
||||||
|
for _, l := range t {
|
||||||
|
if strings.HasSuffix(l.Name, "norm.weight") {
|
||||||
|
wt := l.WriterTo.(safetensorWriterTo)
|
||||||
|
wt.repacker = m.Repack
|
||||||
|
l.WriterTo = wt
|
||||||
|
}
|
||||||
|
m.Tensors = append(m.Tensors, l)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *GemmaModel) LoadVocab() error {
|
||||||
|
v, err := LoadSentencePieceTokens(m.Path, m.Params)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
m.Vocab = v
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *GemmaModel) Repack(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
return addOnes(data, int(shape[0]))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||||
|
kv := llm.KV{
|
||||||
|
"general.architecture": "gemma",
|
||||||
|
"general.name": m.Name,
|
||||||
|
"gemma.context_length": uint32(m.Params.ContextSize),
|
||||||
|
"gemma.embedding_length": uint32(m.Params.HiddenSize),
|
||||||
|
"gemma.block_count": uint32(m.Params.HiddenLayers),
|
||||||
|
"gemma.feed_forward_length": uint32(m.Params.IntermediateSize),
|
||||||
|
"gemma.attention.head_count": uint32(m.Params.AttentionHeads),
|
||||||
|
"gemma.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
||||||
|
"gemma.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
||||||
|
"gemma.attention.key_length": uint32(m.Params.HeadDimension),
|
||||||
|
"gemma.attention.value_length": uint32(m.Params.HeadDimension),
|
||||||
|
"general.file_type": uint32(1),
|
||||||
|
"tokenizer.ggml.model": "llama",
|
||||||
|
|
||||||
|
"tokenizer.ggml.tokens": m.Vocab.Tokens,
|
||||||
|
"tokenizer.ggml.scores": m.Vocab.Scores,
|
||||||
|
"tokenizer.ggml.token_type": m.Vocab.Types,
|
||||||
|
|
||||||
|
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
|
||||||
|
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
|
||||||
|
"tokenizer.ggml.padding_token_id": uint32(m.Params.PaddingTokenID),
|
||||||
|
"tokenizer.ggml.unknown_token_id": uint32(3),
|
||||||
|
"tokenizer.ggml.add_bos_token": true,
|
||||||
|
"tokenizer.ggml.add_eos_token": false,
|
||||||
|
}
|
||||||
|
|
||||||
|
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||||
|
}
|
||||||
159
convert/llama.go
Normal file
159
convert/llama.go
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"cmp"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/pdevine/tensor"
|
||||||
|
"github.com/pdevine/tensor/native"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type LlamaModel struct {
|
||||||
|
ModelData
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *LlamaModel) GetTensors() error {
|
||||||
|
t, err := m.Format.GetTensors(m.Path, m.Params)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
|
||||||
|
re, err := regexp.Compile(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, l := range t {
|
||||||
|
matches := re.FindAllStringSubmatch(l.Name, -1)
|
||||||
|
if len(matches) > 0 {
|
||||||
|
switch m.Format.(type) {
|
||||||
|
case *TorchFormat:
|
||||||
|
wt := l.WriterTo.(torchWriterTo)
|
||||||
|
wt.repacker = m.Repack
|
||||||
|
l.WriterTo = wt
|
||||||
|
case *SafetensorFormat:
|
||||||
|
wt := l.WriterTo.(safetensorWriterTo)
|
||||||
|
wt.repacker = m.Repack
|
||||||
|
l.WriterTo = wt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.Tensors = append(m.Tensors, l)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *LlamaModel) LoadVocab() (err error) {
|
||||||
|
pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json"))
|
||||||
|
if errors.Is(err, os.ErrNotExist) {
|
||||||
|
return nil
|
||||||
|
} else if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
m.Vocab = &Vocab{}
|
||||||
|
for _, t := range ts {
|
||||||
|
m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content)
|
||||||
|
m.Vocab.Types = append(m.Vocab.Types, t.Type())
|
||||||
|
}
|
||||||
|
|
||||||
|
m.Vocab.Merges = merges
|
||||||
|
m.Params.PreTokenizer = pre
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||||
|
kv := llm.KV{
|
||||||
|
"general.architecture": "llama",
|
||||||
|
"general.name": m.Name,
|
||||||
|
"llama.vocab_size": uint32(len(m.Vocab.Tokens)),
|
||||||
|
"llama.context_length": uint32(m.Params.ContextSize),
|
||||||
|
"llama.embedding_length": uint32(m.Params.HiddenSize),
|
||||||
|
"llama.block_count": uint32(m.Params.HiddenLayers),
|
||||||
|
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
|
||||||
|
"llama.rope.freq_base": float32(m.Params.RopeFrequencyBase),
|
||||||
|
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
|
||||||
|
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
|
||||||
|
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
||||||
|
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
||||||
|
"general.file_type": uint32(1),
|
||||||
|
"tokenizer.ggml.model": "gpt2",
|
||||||
|
|
||||||
|
"tokenizer.ggml.pre": m.Params.PreTokenizer,
|
||||||
|
"tokenizer.ggml.tokens": m.Vocab.Tokens,
|
||||||
|
"tokenizer.ggml.token_type": m.Vocab.Types,
|
||||||
|
|
||||||
|
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
|
||||||
|
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
|
||||||
|
"tokenizer.ggml.unknown_token_id": uint32(0),
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(m.Vocab.Merges) > 0 {
|
||||||
|
kv["tokenizer.ggml.merges"] = m.Vocab.Merges
|
||||||
|
} else {
|
||||||
|
kv["tokenizer.ggml.scores"] = m.Vocab.Scores
|
||||||
|
}
|
||||||
|
|
||||||
|
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *LlamaModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
return llamaRepack(name, m.Params, data, shape)
|
||||||
|
}
|
||||||
|
|
||||||
|
func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
var dims []int
|
||||||
|
for _, dim := range shape {
|
||||||
|
if dim != 0 {
|
||||||
|
dims = append(dims, int(dim))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var heads int
|
||||||
|
switch {
|
||||||
|
case strings.HasSuffix(name, "attn_q.weight"):
|
||||||
|
heads = params.AttentionHeads
|
||||||
|
case strings.HasSuffix(name, "attn_k.weight"):
|
||||||
|
heads = cmp.Or(params.KeyValHeads, params.AttentionHeads)
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unknown tensor name: %s", name)
|
||||||
|
}
|
||||||
|
|
||||||
|
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||||
|
if err := n.Reshape(append([]int{heads, 2, dims[0] / heads / 2}, dims[1:]...)...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.T(0, 2, 1, 3); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Reshape(dims...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Transpose(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := native.SelectF32(n, 1)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var f32s []float32
|
||||||
|
for _, t := range ts {
|
||||||
|
f32s = append(f32s, t...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f32s, nil
|
||||||
|
}
|
||||||
79
convert/mistral.go
Normal file
79
convert/mistral.go
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"regexp"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MistralModel struct {
|
||||||
|
ModelData
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MistralModel) GetTensors() error {
|
||||||
|
t, err := m.Format.GetTensors(m.Path, m.Params)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
|
||||||
|
re, err := regexp.Compile(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, l := range t {
|
||||||
|
matches := re.FindAllStringSubmatch(l.Name, -1)
|
||||||
|
if len(matches) > 0 {
|
||||||
|
wt := l.WriterTo.(safetensorWriterTo)
|
||||||
|
wt.repacker = m.Repack
|
||||||
|
l.WriterTo = wt
|
||||||
|
}
|
||||||
|
m.Tensors = append(m.Tensors, l)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MistralModel) LoadVocab() error {
|
||||||
|
v, err := LoadSentencePieceTokens(m.Path, m.Params)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
m.Vocab = v
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||||
|
kv := llm.KV{
|
||||||
|
"general.architecture": "llama",
|
||||||
|
"general.name": m.Name,
|
||||||
|
"llama.context_length": uint32(m.Params.ContextSize),
|
||||||
|
"llama.embedding_length": uint32(m.Params.HiddenSize),
|
||||||
|
"llama.block_count": uint32(m.Params.HiddenLayers),
|
||||||
|
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
|
||||||
|
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
|
||||||
|
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
|
||||||
|
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
||||||
|
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
||||||
|
"general.file_type": uint32(1),
|
||||||
|
"tokenizer.ggml.model": "llama",
|
||||||
|
|
||||||
|
"tokenizer.ggml.tokens": m.Vocab.Tokens,
|
||||||
|
"tokenizer.ggml.scores": m.Vocab.Scores,
|
||||||
|
"tokenizer.ggml.token_type": m.Vocab.Types,
|
||||||
|
|
||||||
|
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
|
||||||
|
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
|
||||||
|
"tokenizer.ggml.add_bos_token": true,
|
||||||
|
"tokenizer.ggml.add_eos_token": false,
|
||||||
|
"tokenizer.ggml.unknown_token_id": uint32(0),
|
||||||
|
}
|
||||||
|
|
||||||
|
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
return llamaRepack(name, m.Params, data, shape)
|
||||||
|
}
|
||||||
87
convert/mixtral.go
Normal file
87
convert/mixtral.go
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"regexp"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MixtralModel struct {
|
||||||
|
ModelData
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MixtralModel) GetTensors() error {
|
||||||
|
t, err := m.Format.GetTensors(m.Path, m.Params)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
|
||||||
|
re, err := regexp.Compile(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, l := range t {
|
||||||
|
matches := re.FindAllStringSubmatch(l.Name, -1)
|
||||||
|
if len(matches) > 0 {
|
||||||
|
wt := l.WriterTo.(safetensorWriterTo)
|
||||||
|
wt.repacker = m.Repack
|
||||||
|
l.WriterTo = wt
|
||||||
|
}
|
||||||
|
m.Tensors = append(m.Tensors, l)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MixtralModel) LoadVocab() error {
|
||||||
|
v, err := LoadSentencePieceTokens(m.Path, m.Params)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
m.Vocab = v
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
|
||||||
|
kv := llm.KV{
|
||||||
|
"general.architecture": "llama",
|
||||||
|
"general.name": m.Name,
|
||||||
|
"llama.block_count": uint32(m.Params.HiddenLayers),
|
||||||
|
"llama.context_length": uint32(m.Params.ContextSize),
|
||||||
|
"llama.embedding_length": uint32(m.Params.HiddenSize),
|
||||||
|
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
|
||||||
|
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
|
||||||
|
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
||||||
|
|
||||||
|
"llama.rope.freq_base": float32(m.Params.RopeFrequencyBase),
|
||||||
|
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
||||||
|
|
||||||
|
"llama.expert_count": uint32(m.Params.Experts),
|
||||||
|
"llama.expert_used_count": uint32(m.Params.ExpertsUsed),
|
||||||
|
|
||||||
|
"llama.vocab_size": uint32(len(m.Vocab.Tokens)),
|
||||||
|
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
|
||||||
|
|
||||||
|
"general.file_type": uint32(1),
|
||||||
|
"tokenizer.ggml.model": "llama",
|
||||||
|
|
||||||
|
"tokenizer.ggml.tokens": m.Vocab.Tokens,
|
||||||
|
"tokenizer.ggml.scores": m.Vocab.Scores,
|
||||||
|
"tokenizer.ggml.token_type": m.Vocab.Types,
|
||||||
|
|
||||||
|
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
|
||||||
|
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
|
||||||
|
"tokenizer.ggml.unknown_token_id": uint32(0),
|
||||||
|
"tokenizer.ggml.add_bos_token": true,
|
||||||
|
"tokenizer.ggml.add_eos_token": false,
|
||||||
|
}
|
||||||
|
|
||||||
|
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
return llamaRepack(name, m.Params, data, shape)
|
||||||
|
}
|
||||||
@@ -1,86 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"io"
|
|
||||||
"io/fs"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Tensor interface {
|
|
||||||
Name() string
|
|
||||||
Shape() []uint64
|
|
||||||
Kind() uint32
|
|
||||||
SetRepacker(repacker)
|
|
||||||
WriteTo(io.Writer) (int64, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
type tensorBase struct {
|
|
||||||
name string
|
|
||||||
shape []uint64
|
|
||||||
repacker
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t tensorBase) Name() string {
|
|
||||||
return t.name
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t tensorBase) Shape() []uint64 {
|
|
||||||
return t.shape
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
|
||||||
tensorKindF32 uint32 = iota
|
|
||||||
tensorKindF16
|
|
||||||
)
|
|
||||||
|
|
||||||
func (t tensorBase) Kind() uint32 {
|
|
||||||
if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
|
|
||||||
t.name == "token_types.weight" {
|
|
||||||
// these tensors are always F32
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
switch len(t.shape) {
|
|
||||||
case 0:
|
|
||||||
panic("invalid tensor shape")
|
|
||||||
case 1:
|
|
||||||
return tensorKindF32
|
|
||||||
default:
|
|
||||||
return tensorKindF16
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tensorBase) SetRepacker(fn repacker) {
|
|
||||||
t.repacker = fn
|
|
||||||
}
|
|
||||||
|
|
||||||
type repacker func(string, []float32, []uint64) ([]float32, error)
|
|
||||||
|
|
||||||
func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
|
|
||||||
patterns := []struct {
|
|
||||||
Pattern string
|
|
||||||
Func func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error)
|
|
||||||
}{
|
|
||||||
{"model-*-of-*.safetensors", parseSafetensors},
|
|
||||||
{"model.safetensors", parseSafetensors},
|
|
||||||
{"adapters.safetensors", parseSafetensors},
|
|
||||||
{"adapter_model.safetensors", parseSafetensors},
|
|
||||||
{"pytorch_model-*-of-*.bin", parseTorch},
|
|
||||||
{"pytorch_model.bin", parseTorch},
|
|
||||||
{"consolidated.*.pth", parseTorch},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, pattern := range patterns {
|
|
||||||
matches, err := fs.Glob(fsys, pattern.Pattern)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(matches) > 0 {
|
|
||||||
return pattern.Func(fsys, replacer, matches...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, errors.New("unknown tensor format")
|
|
||||||
}
|
|
||||||
@@ -1,163 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"encoding/binary"
|
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"io/fs"
|
|
||||||
"slices"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/d4l3k/go-bfloat16"
|
|
||||||
"github.com/x448/float16"
|
|
||||||
"golang.org/x/exp/maps"
|
|
||||||
)
|
|
||||||
|
|
||||||
type safetensorMetadata struct {
|
|
||||||
Type string `json:"dtype"`
|
|
||||||
Shape []uint64 `json:"shape"`
|
|
||||||
Offsets []int64 `json:"data_offsets"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
|
|
||||||
var ts []Tensor
|
|
||||||
for _, p := range ps {
|
|
||||||
f, err := fsys.Open(p)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
var n int64
|
|
||||||
if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
b := bytes.NewBuffer(make([]byte, 0, n))
|
|
||||||
if _, err = io.CopyN(b, f, n); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var headers map[string]safetensorMetadata
|
|
||||||
if err := json.NewDecoder(b).Decode(&headers); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
keys := maps.Keys(headers)
|
|
||||||
slices.Sort(keys)
|
|
||||||
|
|
||||||
names := make(map[string]struct{}, len(keys))
|
|
||||||
|
|
||||||
for _, key := range keys {
|
|
||||||
if value := headers[key]; value.Type != "" {
|
|
||||||
// bitsandbytes quantized models are unsupported
|
|
||||||
if len(value.Shape) == 0 {
|
|
||||||
return nil, errors.New("unsupported safetensors model")
|
|
||||||
}
|
|
||||||
ggufName := replacer.Replace(key)
|
|
||||||
if _, ok := names[ggufName]; ok {
|
|
||||||
return nil, fmt.Errorf("duplicate tensor name '%s' was found for this model", ggufName)
|
|
||||||
}
|
|
||||||
names[ggufName] = struct{}{}
|
|
||||||
ts = append(ts, safetensor{
|
|
||||||
fs: fsys,
|
|
||||||
path: p,
|
|
||||||
dtype: value.Type,
|
|
||||||
offset: safetensorsPad(n, value.Offsets[0]),
|
|
||||||
size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
|
|
||||||
tensorBase: &tensorBase{
|
|
||||||
name: ggufName,
|
|
||||||
shape: value.Shape,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return ts, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// safetensorsPad returns the padded size of the safetensors file given a length n and offset s
|
|
||||||
func safetensorsPad(n, offset int64) int64 {
|
|
||||||
return 8 + n + offset
|
|
||||||
}
|
|
||||||
|
|
||||||
type safetensor struct {
|
|
||||||
fs fs.FS
|
|
||||||
path string
|
|
||||||
dtype string
|
|
||||||
offset int64
|
|
||||||
size int64
|
|
||||||
*tensorBase
|
|
||||||
}
|
|
||||||
|
|
||||||
func (st safetensor) WriteTo(w io.Writer) (int64, error) {
|
|
||||||
f, err := st.fs.Open(st.path)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
if seeker, ok := f.(io.Seeker); ok {
|
|
||||||
if _, err := seeker.Seek(st.offset, io.SeekStart); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if _, err := io.CopyN(io.Discard, f, st.offset); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var f32s []float32
|
|
||||||
switch st.dtype {
|
|
||||||
case "F32":
|
|
||||||
f32s = make([]float32, st.size/4)
|
|
||||||
if err = binary.Read(f, binary.LittleEndian, f32s); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
case "F16":
|
|
||||||
u16s := make([]uint16, st.size/2)
|
|
||||||
if err = binary.Read(f, binary.LittleEndian, u16s); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
f32s = make([]float32, len(u16s))
|
|
||||||
for i := range u16s {
|
|
||||||
f32s[i] = float16.Frombits(u16s[i]).Float32()
|
|
||||||
}
|
|
||||||
|
|
||||||
case "BF16":
|
|
||||||
u8s := make([]uint8, st.size)
|
|
||||||
if err = binary.Read(f, binary.LittleEndian, u8s); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
f32s = bfloat16.DecodeFloat32(u8s)
|
|
||||||
default:
|
|
||||||
return 0, fmt.Errorf("unknown data type: %s", st.dtype)
|
|
||||||
}
|
|
||||||
|
|
||||||
if st.repacker != nil {
|
|
||||||
f32s, err = st.repacker(st.Name(), f32s, st.Shape())
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
switch st.Kind() {
|
|
||||||
case tensorKindF32:
|
|
||||||
return 0, binary.Write(w, binary.LittleEndian, f32s)
|
|
||||||
case tensorKindF16:
|
|
||||||
f16s := make([]uint16, len(f32s))
|
|
||||||
for i := range f32s {
|
|
||||||
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0, binary.Write(w, binary.LittleEndian, f16s)
|
|
||||||
default:
|
|
||||||
return 0, fmt.Errorf("unknown storage type: %d", st.Kind())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,48 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"io"
|
|
||||||
"io/fs"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/nlpodyssey/gopickle/pytorch"
|
|
||||||
"github.com/nlpodyssey/gopickle/types"
|
|
||||||
)
|
|
||||||
|
|
||||||
func parseTorch(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
|
|
||||||
var ts []Tensor
|
|
||||||
for _, p := range ps {
|
|
||||||
pt, err := pytorch.Load(p)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, k := range pt.(*types.Dict).Keys() {
|
|
||||||
t := pt.(*types.Dict).MustGet(k)
|
|
||||||
|
|
||||||
var shape []uint64
|
|
||||||
for dim := range t.(*pytorch.Tensor).Size {
|
|
||||||
shape = append(shape, uint64(dim))
|
|
||||||
}
|
|
||||||
|
|
||||||
ts = append(ts, torch{
|
|
||||||
storage: t.(*pytorch.Tensor).Source,
|
|
||||||
tensorBase: &tensorBase{
|
|
||||||
name: replacer.Replace(k.(string)),
|
|
||||||
shape: shape,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return ts, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type torch struct {
|
|
||||||
storage pytorch.StorageInterface
|
|
||||||
*tensorBase
|
|
||||||
}
|
|
||||||
|
|
||||||
func (pt torch) WriteTo(w io.Writer) (int64, error) {
|
|
||||||
return 0, nil
|
|
||||||
}
|
|
||||||
309
convert/safetensors.go
Normal file
309
convert/safetensors.go
Normal file
@@ -0,0 +1,309 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/d4l3k/go-bfloat16"
|
||||||
|
"github.com/x448/float16"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type safetensorWriterTo struct {
|
||||||
|
t *llm.Tensor
|
||||||
|
|
||||||
|
params *Params
|
||||||
|
bo ByteOrder
|
||||||
|
|
||||||
|
filename string
|
||||||
|
dtype string
|
||||||
|
|
||||||
|
offset, size int64
|
||||||
|
repacker func(string, []float32, []uint64) ([]float32, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type safetensorMetadata struct {
|
||||||
|
Type string `json:"dtype"`
|
||||||
|
Shape []uint64 `json:"shape"`
|
||||||
|
Offsets []int64 `json:"data_offsets"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type SafetensorFormat struct{}
|
||||||
|
|
||||||
|
func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
|
||||||
|
var tensors []llm.Tensor
|
||||||
|
matches, err := filepath.Glob(filepath.Join(dirpath, "*.safetensors"))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var offset uint64
|
||||||
|
for _, f := range matches {
|
||||||
|
var t []llm.Tensor
|
||||||
|
var err error
|
||||||
|
t, offset, err = m.readTensors(f, offset, params)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tensors = append(tensors, t...)
|
||||||
|
}
|
||||||
|
return tensors, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) {
|
||||||
|
f, err := os.Open(fn)
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
var n int64
|
||||||
|
if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
b := bytes.NewBuffer(make([]byte, 0, n))
|
||||||
|
if _, err = io.CopyN(b, f, n); err != nil {
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var headers map[string]safetensorMetadata
|
||||||
|
if err := json.NewDecoder(b).Decode(&headers); err != nil {
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var keys []string
|
||||||
|
for key := range headers {
|
||||||
|
if !strings.HasSuffix(key, "self_attn.rotary_embd.inv_freq") {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
slices.Sort(keys)
|
||||||
|
|
||||||
|
var tensors []llm.Tensor
|
||||||
|
for _, key := range keys {
|
||||||
|
value := headers[key]
|
||||||
|
|
||||||
|
var kind uint32
|
||||||
|
switch len(value.Shape) {
|
||||||
|
case 0:
|
||||||
|
// valuedata
|
||||||
|
continue
|
||||||
|
case 2:
|
||||||
|
kind = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
name, err := m.GetLayerName(key)
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
shape := make([]uint64, len(value.Shape))
|
||||||
|
copy(shape, value.Shape)
|
||||||
|
|
||||||
|
pad := func(s int64) int64 {
|
||||||
|
return 8 + n + s
|
||||||
|
}
|
||||||
|
|
||||||
|
t := llm.Tensor{
|
||||||
|
Name: name,
|
||||||
|
Kind: kind,
|
||||||
|
Offset: offset,
|
||||||
|
Shape: shape,
|
||||||
|
}
|
||||||
|
|
||||||
|
t.WriterTo = safetensorWriterTo{
|
||||||
|
t: &t,
|
||||||
|
params: params,
|
||||||
|
bo: params.ByteOrder,
|
||||||
|
filename: fn,
|
||||||
|
dtype: value.Type,
|
||||||
|
offset: pad(value.Offsets[0]),
|
||||||
|
size: pad(value.Offsets[1]) - pad(value.Offsets[0]),
|
||||||
|
}
|
||||||
|
|
||||||
|
offset += t.Size()
|
||||||
|
tensors = append(tensors, t)
|
||||||
|
}
|
||||||
|
|
||||||
|
return tensors, offset, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) {
|
||||||
|
f, err := os.Open(filepath.Join(dirpath, "config.json"))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
var params Params
|
||||||
|
|
||||||
|
if err := json.NewDecoder(f).Decode(¶ms); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
params.ByteOrder = binary.LittleEndian
|
||||||
|
return ¶ms, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SafetensorFormat) GetLayerName(n string) (string, error) {
|
||||||
|
directMap := map[string]string{
|
||||||
|
"model.embed_tokens.weight": "token_embd.weight",
|
||||||
|
"lm_head.weight": "output.weight",
|
||||||
|
"model.norm.weight": "output_norm.weight",
|
||||||
|
}
|
||||||
|
|
||||||
|
tMap := map[string]string{
|
||||||
|
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
|
||||||
|
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
|
||||||
|
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
|
||||||
|
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
|
||||||
|
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
|
||||||
|
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
|
||||||
|
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
|
||||||
|
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
|
||||||
|
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
|
||||||
|
"model.layers.(\\d+).block_sparse_moe.gate.weight": "blk.$1.ffn_gate_inp.weight",
|
||||||
|
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w1.weight": "blk.$1.ffn_gate.$2.weight",
|
||||||
|
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w2.weight": "blk.$1.ffn_down.$2.weight",
|
||||||
|
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w3.weight": "blk.$1.ffn_up.$2.weight",
|
||||||
|
}
|
||||||
|
|
||||||
|
v, ok := directMap[n]
|
||||||
|
if ok {
|
||||||
|
return v, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// quick hack to rename the layers to gguf format
|
||||||
|
for k, v := range tMap {
|
||||||
|
re := regexp.MustCompile(k)
|
||||||
|
newName := re.ReplaceAllString(n, v)
|
||||||
|
if newName != n {
|
||||||
|
return newName, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) {
|
||||||
|
f, err := os.Open(r.filename)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
if _, err = f.Seek(r.offset, io.SeekStart); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var f32s []float32
|
||||||
|
switch r.dtype {
|
||||||
|
case "F32":
|
||||||
|
f32s = make([]float32, r.size/4)
|
||||||
|
if err = binary.Read(f, r.bo, f32s); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
case "F16":
|
||||||
|
u16s := make([]uint16, r.size/2)
|
||||||
|
if err = binary.Read(f, r.bo, u16s); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, b := range u16s {
|
||||||
|
f32s = append(f32s, float16.Frombits(b).Float32())
|
||||||
|
}
|
||||||
|
|
||||||
|
case "BF16":
|
||||||
|
u8s := make([]uint8, r.size)
|
||||||
|
if err = binary.Read(f, r.bo, u8s); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
f32s = bfloat16.DecodeFloat32(u8s)
|
||||||
|
default:
|
||||||
|
return 0, fmt.Errorf("unknown data type: %s", r.dtype)
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.repacker != nil {
|
||||||
|
f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch r.t.Kind {
|
||||||
|
case 0:
|
||||||
|
return 0, binary.Write(w, r.bo, f32s)
|
||||||
|
case 1:
|
||||||
|
f16s := make([]uint16, len(f32s))
|
||||||
|
for i := range f32s {
|
||||||
|
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, binary.Write(w, r.bo, f16s)
|
||||||
|
default:
|
||||||
|
return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
|
||||||
|
switch len(params.Architectures) {
|
||||||
|
case 0:
|
||||||
|
return nil, fmt.Errorf("No architecture specified to convert")
|
||||||
|
case 1:
|
||||||
|
switch params.Architectures[0] {
|
||||||
|
case "LlamaForCausalLM":
|
||||||
|
return &LlamaModel{
|
||||||
|
ModelData{
|
||||||
|
Name: name,
|
||||||
|
Path: dirPath,
|
||||||
|
Params: params,
|
||||||
|
Format: m,
|
||||||
|
},
|
||||||
|
}, nil
|
||||||
|
case "MistralForCausalLM":
|
||||||
|
return &MistralModel{
|
||||||
|
ModelData{
|
||||||
|
Name: name,
|
||||||
|
Path: dirPath,
|
||||||
|
Params: params,
|
||||||
|
Format: m,
|
||||||
|
},
|
||||||
|
}, nil
|
||||||
|
case "MixtralForCausalLM":
|
||||||
|
return &MixtralModel{
|
||||||
|
ModelData{
|
||||||
|
Name: name,
|
||||||
|
Path: dirPath,
|
||||||
|
Params: params,
|
||||||
|
Format: m,
|
||||||
|
},
|
||||||
|
}, nil
|
||||||
|
case "GemmaForCausalLM":
|
||||||
|
return &GemmaModel{
|
||||||
|
ModelData{
|
||||||
|
Name: name,
|
||||||
|
Path: dirPath,
|
||||||
|
Params: params,
|
||||||
|
Format: m,
|
||||||
|
},
|
||||||
|
}, nil
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, fmt.Errorf("Unknown error")
|
||||||
|
}
|
||||||
313
convert/testdata/Meta-Llama-3-8B-Instruct.json
vendored
313
convert/testdata/Meta-Llama-3-8B-Instruct.json
vendored
@@ -1,313 +0,0 @@
|
|||||||
{
|
|
||||||
"general.architecture": "llama",
|
|
||||||
"general.file_type": "1",
|
|
||||||
"general.quantization_version": "2",
|
|
||||||
"llama.block_count": "32",
|
|
||||||
"llama.context_length": "8192",
|
|
||||||
"llama.embedding_length": "4096",
|
|
||||||
"llama.feed_forward_length": "14336",
|
|
||||||
"llama.rope.dimension_count": "128",
|
|
||||||
"llama.rope.freq_base": "500000",
|
|
||||||
"llama.vocab_size": "128256",
|
|
||||||
"llama.attention.head_count": "32",
|
|
||||||
"llama.attention.head_count_kv": "8",
|
|
||||||
"llama.attention.layer_norm_rms_epsilon": "1e-05",
|
|
||||||
"tokenizer.ggml.model": "gpt2",
|
|
||||||
"tokenizer.ggml.pre": "llama-bpe",
|
|
||||||
"tokenizer.ggml.bos_token_id": "128000",
|
|
||||||
"tokenizer.ggml.eos_token_id": "128009",
|
|
||||||
"tokenizer.ggml.merges": "d0cbac1fcc9dcf03724b8db5c9bfb593ae1cf68fb9bc72eb1d15274dcbbf618b",
|
|
||||||
"tokenizer.ggml.token_type": "d70a88809fd7da6f1f028622685cd64268a7a922c5d343c96f25b66327358978",
|
|
||||||
"tokenizer.ggml.tokens": "765b529dbcbc42dd202ce657341c63807b51f3b07e09898f6aa6196326865d5a",
|
|
||||||
"token_embd.weight": "b53102a11d9064bbd404833e3464b1b13e08ce73300b442312cccde2f19b2698",
|
|
||||||
"blk.0.attn_norm.weight": "7318df3cca9e8d153ff0a503026a1265e63d20b2a8c1dd7a2769585082b5d1ee",
|
|
||||||
"blk.0.ffn_down.weight": "b950806a1fc722c9fad7fd0b20c3c0a7fb50f14395e1e7663a590bfd62e20900",
|
|
||||||
"blk.0.ffn_gate.weight": "e73e580af6d4f08e060a74a3c25efdf5d3bed99e183d95a5a85ae859014839fd",
|
|
||||||
"blk.0.ffn_up.weight": "c8158af679ef99746da1befb67eebb19489e0bbe6ce7d97e13e348508244e516",
|
|
||||||
"blk.0.ffn_norm.weight": "7ec69c3c31e95e49a3359003b0033f6b9e85561a3e3fd83e7476661ecdd756bb",
|
|
||||||
"blk.0.attn_k.weight": "2732303257bac969b4964e0e32ec08b5a7f5c031bb02bf6ac4467b3ea0ebcf1e",
|
|
||||||
"blk.0.attn_output.weight": "ecda1d43b4ccc91cd5b366d7e7a275353990ac78561a07c83d9c77031aba12dc",
|
|
||||||
"blk.0.attn_q.weight": "569b1f5faf92b6f00910cf7effb2d5862f91038ce5c3b0019fc10e5d79fbd5e1",
|
|
||||||
"blk.0.attn_v.weight": "aa8416c5ef7e32fb54a1f20d6ac651656845d4af240564b397c39bd83e06e3b8",
|
|
||||||
"blk.1.attn_norm.weight": "03327e02862908c2a44b2f52decdb924bf4201f400b46f8037a9cb2e1d7a61ff",
|
|
||||||
"blk.1.ffn_down.weight": "5a83a87603f38c99f8e1e370a2d5f967bb45ac51d881a609304a7811027321e0",
|
|
||||||
"blk.1.ffn_gate.weight": "31da0572c79e655186c721c231376f85e56cdcc6257c28d08c8c5b40d5c22b40",
|
|
||||||
"blk.1.ffn_up.weight": "e0c811d64ca155c8de10a868e72015d43888834804614ee1aa2953129ffbc90f",
|
|
||||||
"blk.1.ffn_norm.weight": "5861f313d6137d6f0f904d423df47fffc6069e224ff746e1b637ac9c7f0af862",
|
|
||||||
"blk.1.attn_k.weight": "5fbbec0acca6457b9416ebdcd90e526885d0224537b7628f6be376a7f275313d",
|
|
||||||
"blk.1.attn_output.weight": "b237c9763fa3f75166a6f70b70f1566e77d0d89dfa164ed1b3137393e90575c3",
|
|
||||||
"blk.1.attn_q.weight": "c0a9cf4a98b4882b16f3eb2b49d933793dcc5357abb246fd3fe3134ed2b12e1c",
|
|
||||||
"blk.1.attn_v.weight": "96867111727200cac1af7865189dd41fd62b47584e5e5f33a91f1d34509cbd40",
|
|
||||||
"blk.2.attn_norm.weight": "f392f8a88ee3a95b1cc19c40dd4ef66317037b0faaa1800f610779e129ee0539",
|
|
||||||
"blk.2.ffn_down.weight": "73823eef46632aedcc8c1cb08a736b6aa97ca97842cd1fdfc5567d8dec459662",
|
|
||||||
"blk.2.ffn_gate.weight": "f4909ae19fc3848b00bb8b9050122e74f8e903b89e22937036f4cc9fea20a718",
|
|
||||||
"blk.2.ffn_up.weight": "16f4904a3d814ea68f00519724fc4943e48444a84c786bda39aa5efc298a7d84",
|
|
||||||
"blk.2.ffn_norm.weight": "e3ccdf56e75cb969f6f69c39caf6daf7c4e70e89e25df0f4d2e4bc60e159aafe",
|
|
||||||
"blk.2.attn_k.weight": "c3beb1e0a11bcf007ef0f0d8f6bdd3082d8b29090cd29597846b5d51e308a8e5",
|
|
||||||
"blk.2.attn_output.weight": "bb9f66c32cff51154fea92933c2cd62549236f8cb1a767f9ef28d3f99809b343",
|
|
||||||
"blk.2.attn_q.weight": "8eba394132eef2a05c5a92d62d2376000f7948448d7a2dc74e6b608203add20d",
|
|
||||||
"blk.2.attn_v.weight": "88f61f77c53567c617db3eef8f30621109a750e679f6784f7911739bd42c2f02",
|
|
||||||
"blk.3.attn_norm.weight": "7b996675b7ca75fa24107b3ebe0788653ede0f49ac83b8659d71ff54d591f81a",
|
|
||||||
"blk.3.ffn_down.weight": "2cb332bc05e4821962fdc9dcbcc7cc12630f32117711b687d18fb53c0bc4fbf4",
|
|
||||||
"blk.3.ffn_gate.weight": "340b387c7f208c8f0a6db904ef8d87c1e84b7d6ad57177abd32d86c8d18b760f",
|
|
||||||
"blk.3.ffn_up.weight": "07484433f8a7ee061c55aa0de2ecc009f769b0617c9c0ec096e9bb2946df9f0e",
|
|
||||||
"blk.3.ffn_norm.weight": "4f1a4ade36b393af341240bc894a2aab09cff7e4d56dc4658445deb107f9371b",
|
|
||||||
"blk.3.attn_k.weight": "483dcd96acb4528df84b9842970994630dbd82b8715ace394aa8b39fcf8d6291",
|
|
||||||
"blk.3.attn_output.weight": "beaff0810687923585642ee11d929cbf3b43dc6f87f30ddb552c222ab57bdbb3",
|
|
||||||
"blk.3.attn_q.weight": "0739355002f6fce520863add697e0ff25fc88215322dc3f993be7bb68dcce7e8",
|
|
||||||
"blk.3.attn_v.weight": "c216d17b6d90ee3e07f82598b8161fae34de2f392dbb0f745b682b578c324767",
|
|
||||||
"blk.4.attn_norm.weight": "91ab405bc4ba15bf63af233f266aa43aaab43789a9e6596e14a357c2ac7df217",
|
|
||||||
"blk.4.ffn_down.weight": "620f34ee75cdc73aecb8949af5fbb0d2437fd81422b6d8eb7acfc52addb9fc68",
|
|
||||||
"blk.4.ffn_gate.weight": "f6feec7bc9acadf35ec22532f8998d8e50f31afedabb19263590dcf8b9a92eee",
|
|
||||||
"blk.4.ffn_up.weight": "4a72af7cd28fd07b038f6cc4406678d120517280236ea85d9e76eff40ab2cc22",
|
|
||||||
"blk.4.ffn_norm.weight": "1805b37b44d5d682bdbd2fadeafb763ee001617d7870848cc487079ee34b21f9",
|
|
||||||
"blk.4.attn_k.weight": "a1e4f9d97cdf4c1b0d177cf00c4e32d1be30c1984a239b3c9bd73f8848888853",
|
|
||||||
"blk.4.attn_output.weight": "a1547e2497c423b0aff0eee71d9300d6fdf4e4986679418b6e637b69a9a6720b",
|
|
||||||
"blk.4.attn_q.weight": "0677483a9264ea6803d03d304d87a54632242cb516e8b76b6e3e8284c2f4de04",
|
|
||||||
"blk.4.attn_v.weight": "02691ba3af344fcc1969428ab0df811ac94aaa2fd91b0dc4ec1ac0a58806980d",
|
|
||||||
"blk.5.attn_norm.weight": "ba9c028335e5c895b87a5bd1448ca429248f9746ed97bdcb8679923206117156",
|
|
||||||
"blk.5.ffn_down.weight": "ccfdc9006acad1940a6bc05042a3947f1066acd671e0bb53b7684e9eea9ef5c9",
|
|
||||||
"blk.5.ffn_gate.weight": "623157679f1e742ccc3807c0b0153ddc8450104de75ec62f1370ec3807c09cf4",
|
|
||||||
"blk.5.ffn_up.weight": "05748804c65091f963729b58b085f58351891cac8a2861f5eae26b06aa60b2a0",
|
|
||||||
"blk.5.ffn_norm.weight": "84bae55af2efc8b8429f09056c8c04990c466dae31cb3f9356038b8957f1b406",
|
|
||||||
"blk.5.attn_k.weight": "8c766180c726b037d587fc52371de6e3307140c52409011609d1225624b6a3eb",
|
|
||||||
"blk.5.attn_output.weight": "490b582b3b1dc151ae55aee8b6743dad6c01fb49e43afefb6e68394b74be3d73",
|
|
||||||
"blk.5.attn_q.weight": "6f7b8ca4d9025ec836a44bbcca46be30c66b471a9fb62943ddff8288b3731409",
|
|
||||||
"blk.5.attn_v.weight": "9f70df3ba00c9e723214b3da83ff435a2163fff5915f75515c9664c05c866c27",
|
|
||||||
"blk.6.attn_norm.weight": "1a4a66613a682df6f061fc7c4d986f9f7e9175b62f0c42fc1ef31db536bd5942",
|
|
||||||
"blk.6.ffn_down.weight": "c56f25e4e49b443dbc82d88311ee63bc1f5002cc67e52f4787fd5f003aedeac1",
|
|
||||||
"blk.6.ffn_gate.weight": "31a5cf1aa9b831a81588d508550f51fc425f9517c43254d4ef7096d38029cf04",
|
|
||||||
"blk.6.ffn_up.weight": "ce135f3a1163e0c9297a615bdbe68a67ead21edce8debbfa9f6e15e6af8d4c94",
|
|
||||||
"blk.6.ffn_norm.weight": "4e328ce0648c94e732bc40501858ef6262ad1161e2e407b0cdcf4813fa9d45d8",
|
|
||||||
"blk.6.attn_k.weight": "1eb1c4c9f9c4c7ff7f5429075e0dc6a7782bed55109fa88df209a817dd8ef960",
|
|
||||||
"blk.6.attn_output.weight": "3d32986b56873b88655ee1edabdd413fdd9ab18b82108c9ce90bdbc2d3a6f3a3",
|
|
||||||
"blk.6.attn_q.weight": "8432f583b3a2809c99c393f9beb077cb0534dd5d247c17108f2986cadc6651f6",
|
|
||||||
"blk.6.attn_v.weight": "5045381513815bb91839dbac8335ffe49bbc7b0008369de7ea97eb676c5e2b36",
|
|
||||||
"blk.7.attn_norm.weight": "3dabd003638ec2499bfc8a48c49eef34276caab4fe76894eb963207848c2fdaf",
|
|
||||||
"blk.7.ffn_down.weight": "194fae858608bdcffd235be59ab119d0b91c8549f864ea06dae69249e099935f",
|
|
||||||
"blk.7.ffn_gate.weight": "00b24c29c30246892bce0791be804a89701d4c1332777e0bcdad5d9d5666604f",
|
|
||||||
"blk.7.ffn_up.weight": "44d7082a5280080c90cef9e19d410391de34f212ca0736377769b8ddd0c82d5e",
|
|
||||||
"blk.7.ffn_norm.weight": "21fe8a7fd6911c64e0d15a788b3b4cb6d71dd6ec51de65f760ee89afbb6ae53e",
|
|
||||||
"blk.7.attn_k.weight": "57a149eec5f6744a9526cd3925ac073f9d12db0fbcb5afe042ef4dc846458c44",
|
|
||||||
"blk.7.attn_output.weight": "0e9c28a3e81a2880251ce5eed77bcb8be8aaa1a51c9cb6de820b47ed83849fc2",
|
|
||||||
"blk.7.attn_q.weight": "15ee75263ee4e2a43eb322bc159ae004bb7d77e3a7e63ee4ddab700430693fff",
|
|
||||||
"blk.7.attn_v.weight": "440aa970bba4bff429fd7b7b1de21f2ad14fb2952b776cfa4acee68d7c6e9b8f",
|
|
||||||
"blk.8.attn_norm.weight": "af5b44825633c42c1ae964c82bb2be6a242d3a751f0a91f1bae4f593e8f5b6ec",
|
|
||||||
"blk.8.ffn_down.weight": "b11c14c76adca94fa200496dd2c10743becb23aab6642443ef1ae6d8710edbc1",
|
|
||||||
"blk.8.ffn_gate.weight": "7bb03d3325bf8637ae2fa1296b0651356515578d46a7c5ca65c7a923d7de27bc",
|
|
||||||
"blk.8.ffn_up.weight": "b956ef0a0669b5a9c9bf3a8da2d1c24f52d331cfb7354f6d7c51bd65be355e30",
|
|
||||||
"blk.8.ffn_norm.weight": "c78c3d748302edfef76f71ea5cb2055c94352122eee8b9b1173779a1814d224e",
|
|
||||||
"blk.8.attn_k.weight": "c0fba6a596ed9c1c32a7055c31a935a8b31e42b77282ee47c1f03ee3bde736b5",
|
|
||||||
"blk.8.attn_output.weight": "83cf9947080c5d8d571f04a842bc3dcfe7bbb0195fb25b346e22635e8649f2d4",
|
|
||||||
"blk.8.attn_q.weight": "47409350a576b333d97b7c877d69f47f46df504f3765102dfc0be9e521c7ecd6",
|
|
||||||
"blk.8.attn_v.weight": "1999dff91404fdcf1ecb34d9eaaaa9244ec7658a74dec8feb7cfd1fddba0347e",
|
|
||||||
"blk.9.attn_norm.weight": "1e6e29d5c3889ab4e1b0a5b9998cba60179b0f1fca133515df49cbc19d092593",
|
|
||||||
"blk.9.ffn_down.weight": "acb898a6490adff592e10b4c62d70edc5941661ee6da44658500e9205357c8e9",
|
|
||||||
"blk.9.ffn_gate.weight": "4cff63013593aadc3ffbaaa6ed70ffdba1224cd43c3644bf6f4162b5ac1ab542",
|
|
||||||
"blk.9.ffn_up.weight": "f985b5a2d6cf4fe32c7256301c3c89b8ad22b59e516342c52da42d8110766a4e",
|
|
||||||
"blk.9.ffn_norm.weight": "0d659c538bc6b21ed0018f107ab674a7424a00a42946c80e07208b479b21918f",
|
|
||||||
"blk.9.attn_k.weight": "f67611d888780d1b38c1c146b361c65310c8183bdf64fd73e2259985c6e8517f",
|
|
||||||
"blk.9.attn_output.weight": "f12ca1fa62a02ddc3f77f798bfb5707e0c50bf18ee0eaa67025521a98355f26b",
|
|
||||||
"blk.9.attn_q.weight": "3865185f4361a645b086ad47b72904c095313fb1c624e511647bf1a7dfc1c476",
|
|
||||||
"blk.9.attn_v.weight": "92125bbfed63544ab56052bd1e4aa453bbf34c795249ee54cde54907c8c6d1d3",
|
|
||||||
"blk.10.attn_norm.weight": "5d6bfbe545bcc2fcb2fc75c68f64b1f4c918badaf53e0156fe2d88aa977b2f94",
|
|
||||||
"blk.10.ffn_down.weight": "1dd9da8b0d2696ab5531fbca8a29c7d67567620a9d3e5fc2a19ec5d7e4c6cc8a",
|
|
||||||
"blk.10.ffn_gate.weight": "6e55e7f014edaebda0ac6819a426221d3b025c27312a2e18cc5806f31e3db226",
|
|
||||||
"blk.10.ffn_up.weight": "d80dde54af5db51241345ee8d64c1972608644f4deeac1e8195dc423bf27474a",
|
|
||||||
"blk.10.ffn_norm.weight": "f6ca65951d58ae3379eee8247bec34ebd0db05674cc9295593573841b8a55df3",
|
|
||||||
"blk.10.attn_k.weight": "b58e350bd6b49aba0fba4e4dd6865de3a2a0651ab865dbf2419b627b53ffc187",
|
|
||||||
"blk.10.attn_output.weight": "6b26a986e12fe66ec286a21d7d5af5eaa1bfe6f2bf502165d270e4497235a54a",
|
|
||||||
"blk.10.attn_q.weight": "3440e0e5b7e0d1e426424ae5a33f4e057be623249e9035ea12e57dbe5d3893c4",
|
|
||||||
"blk.10.attn_v.weight": "ebfadcfe14bcd6dee933053df0a67e12e7a196d5cc45728c1ffb2a2daedd5ca2",
|
|
||||||
"blk.11.attn_norm.weight": "3ed057b9576cd2de84507ef64c7646dc478c651efca4c2024cbe91a4f3fbf0bc",
|
|
||||||
"blk.11.ffn_down.weight": "8ff1c2487d22f5c499761e4eb721418f141f960160d0bab779595a34e4d68898",
|
|
||||||
"blk.11.ffn_gate.weight": "9c74e4507c7e45bf39b7cc7402198cd1dd77e3fff8c625b0413acaeb16efeb9f",
|
|
||||||
"blk.11.ffn_up.weight": "4367158007161d29939e00a322bb6776016e43f648a94f9b08a96a477aae75be",
|
|
||||||
"blk.11.ffn_norm.weight": "1cc0288c1491072121f4c9a0af20be0e13af49895696a3320e4fcac608768de3",
|
|
||||||
"blk.11.attn_k.weight": "066f5b3c144fce1366835e1ebf376f768b333b8ae29f5b478c42d1d0c809c855",
|
|
||||||
"blk.11.attn_output.weight": "e0d9f3d3f2c54aed59c02713ea4fb562799ddbacbe67ca3998dfc887bc44e47b",
|
|
||||||
"blk.11.attn_q.weight": "28d3ecc8a88cb3815e89a7f7a7d043da7a71f702b337a126e4d3a2ac1cd6370f",
|
|
||||||
"blk.11.attn_v.weight": "7c5cdef10ee73bca0a3b9f6ece5f0a0155664e0ce3d8de90ccdccfab5545e5e7",
|
|
||||||
"blk.12.attn_norm.weight": "973b133301a1af760cd7b3a7955371ea0a750808b442deb6adaf7b98482bd0c6",
|
|
||||||
"blk.12.ffn_down.weight": "d6c87b4b4ca03f75546ddd6a9e7fca720585a309188723c1ace8122438d4b200",
|
|
||||||
"blk.12.ffn_gate.weight": "2189a6e0cab1540bd05d6089b922aa8fd694be51255654933c165f302a0c955f",
|
|
||||||
"blk.12.ffn_up.weight": "5affbec19b58d092b9305721e3552481fe2eff51269ea3ed91cda3b9ef84d4df",
|
|
||||||
"blk.12.ffn_norm.weight": "f650fd42a34e950f758b4a130e7b8b1a712b1dcbede0291bb8edde47aaed0ef6",
|
|
||||||
"blk.12.attn_k.weight": "59b1e86f10450a7cc188beefc0856d2dcf44e8d7fdd9cd8859c30ec1ebaf24b6",
|
|
||||||
"blk.12.attn_output.weight": "446b0d36b2f66bd72a2323f4f4e9d85a0f621e9a58872e89a27248d6b1123238",
|
|
||||||
"blk.12.attn_q.weight": "3ed6bfd39f040301ed99fad882d3e569769d594259f9948445bef0e44ec881fb",
|
|
||||||
"blk.12.attn_v.weight": "e73652cd5d0029b1931be3ba9d82508f6696dce5a29d085476a54fb7a2ddbabc",
|
|
||||||
"blk.13.attn_norm.weight": "491b85278c0bd67bd31b9b8a9720902c244bd067e53a4a03641b7c0994782e82",
|
|
||||||
"blk.13.ffn_down.weight": "ad71cc248a85e9ced49307a24a9bfae01d387e979a7689c82ff59998e09741f3",
|
|
||||||
"blk.13.ffn_gate.weight": "0a55984d53971fab97575ee0ef5882013be7fdecfa76e3fbebb5dc85a07a14d4",
|
|
||||||
"blk.13.ffn_up.weight": "378b697b35e2e53c0de98e8e29b73d42ae3ec112ec16129aa5997a9e2f3b5943",
|
|
||||||
"blk.13.ffn_norm.weight": "f8aff2f69ab286210fad45a62b03f8d10b38f96a420d7baadf6b95d7b0b0bcd2",
|
|
||||||
"blk.13.attn_k.weight": "25ceb841afb1034831bea7f4d6a6c578def2ce4d4c412c780ef147dc9a598360",
|
|
||||||
"blk.13.attn_output.weight": "a242b322889c6bdaa14b67a7bab593db39df8eea3721638ef639abbb74d482e3",
|
|
||||||
"blk.13.attn_q.weight": "d80be9945a369439e835c55cfb0e97828b8a66bb7ced534d9059c92487bf20a9",
|
|
||||||
"blk.13.attn_v.weight": "ac33274cf9b67979d9ecdc967a55175afe0c9c4aeeff6391433cd9840c818706",
|
|
||||||
"blk.14.attn_norm.weight": "12a1e1091de5b2da12c9e7c0b1c8e6f09ce2a749733cf7d5240445b8e21cd093",
|
|
||||||
"blk.14.ffn_down.weight": "cfd41965c88266e32bc2dcdadda512499c35519e8686fefb9a7f249ab2291eb5",
|
|
||||||
"blk.14.ffn_gate.weight": "8dcfe774f07a095c7c6cf0a901c9df70d938bad7b5ba347fbc8f694e7603c0d1",
|
|
||||||
"blk.14.ffn_up.weight": "c7995577fe4a72ea0fb17c4a7b6b87b959072bbfdd5edacc6c367d43465809ae",
|
|
||||||
"blk.14.ffn_norm.weight": "81c41ebde41739e7016ffec31d2256217b825dc3cae049a935f5f61a60d22003",
|
|
||||||
"blk.14.attn_k.weight": "fb708bdebe4384f5c4b479c110028554f4d122f166b8091eda7d8d65e6780eb8",
|
|
||||||
"blk.14.attn_output.weight": "f5295caf2dfdc60553dcabe17537a80577e8b153c902247daac058df23542514",
|
|
||||||
"blk.14.attn_q.weight": "c12b7a3601c68c63ab5dc9d2599ebf3f3a10abc2c59d3a2126fffd5818f2763b",
|
|
||||||
"blk.14.attn_v.weight": "1ce968d9149bf0d5e237d52cc6d6433565b4bbf03252a736262bb00a2b34a687",
|
|
||||||
"blk.15.attn_norm.weight": "266fd2c36d7dcefc6b6bb7f1c9374c41f2bab5d6c84a063b6f91c4f682dad3c4",
|
|
||||||
"blk.15.ffn_down.weight": "6154886e9ef0a6cc08ab0d264a35f497e6f0987efdac992ed04e87088bea7801",
|
|
||||||
"blk.15.ffn_gate.weight": "183d9fd3c1b5657840099053d2fd3f72ad953b1de523296159b7761f20491a76",
|
|
||||||
"blk.15.ffn_up.weight": "51546d4498842ae2340ee226a0888d5f61e7d2ca4d052dfa06a77b0451242d3d",
|
|
||||||
"blk.15.ffn_norm.weight": "ef7378091a41a25a5f58bf1bf9d3bc64ea562e7f421e1c232b1f177c30fd3500",
|
|
||||||
"blk.15.attn_k.weight": "8d556ab8d9639324141774999b6eed0e91d7ee645bf3e7a3dcd200b2e7a00751",
|
|
||||||
"blk.15.attn_output.weight": "54aa6ba87def7cbe18b0c6ab3aff5c351cb3b6ca4a0d7b2cd5f75a1312991429",
|
|
||||||
"blk.15.attn_q.weight": "10731b0dc031ea8e0ef37bd7f010e0a78518a10a6df05a8bae48e3148b73ef3e",
|
|
||||||
"blk.15.attn_v.weight": "cbbe50c2ed7224866d3cf9b489c599f3ec41a4ea1aa3181e9f4e87e1fa0cefec",
|
|
||||||
"blk.16.attn_norm.weight": "387058eb39d4b28c04cf1368247417f1faeae8ae79d894c9f293457e0eaa00b0",
|
|
||||||
"blk.16.ffn_down.weight": "2cb26ccee585e933401ad5c82ed36ddacb3289efa0b28f8cf91b020ffbd9c333",
|
|
||||||
"blk.16.ffn_gate.weight": "d745985efb5bab42304e5d509024631efe35f92f2b2ec4931ead6db97ca9727e",
|
|
||||||
"blk.16.ffn_up.weight": "7a67bd195e0642828ca36eb7818149bb70c2c25f82de07e2b5807c520daf540e",
|
|
||||||
"blk.16.ffn_norm.weight": "7cefd061c8182482a89272f8a4e88a954b12609a62716923ca1cb3593b1c1651",
|
|
||||||
"blk.16.attn_k.weight": "d7968a2de67e755b4533e061aaad1cb62f8882af92dcad67f99d6d5112513439",
|
|
||||||
"blk.16.attn_output.weight": "9e9ab5788272ca3394ea89eadbce8c86ecc3fd75b7899184d6191c134ad9aae0",
|
|
||||||
"blk.16.attn_q.weight": "ef81c261b536c1a3a093b33f44cf2d42b86e5aa2d821674f07a0c80e992ed925",
|
|
||||||
"blk.16.attn_v.weight": "aef38e7958301b4a437cbdd2fbae6197f677b09269ec1eaf63188cd5da428d25",
|
|
||||||
"blk.17.attn_norm.weight": "28f6b289f1bc3131041e9f791b7a2a3a48baee0dfea27bf7051ebbb7ed364d80",
|
|
||||||
"blk.17.ffn_down.weight": "1a502829aafc6a9bd6bc81f12573bf8632d5c8c659f0dfb13c8b2411f3b1ec05",
|
|
||||||
"blk.17.ffn_gate.weight": "ddfd8aa0eb98846ebc9afe31366249159f46ae9815199dd70161527ed241ac4d",
|
|
||||||
"blk.17.ffn_up.weight": "4211a3cc247071bd361b30de2131d02382f552855062bf3b3e004c17992e5d09",
|
|
||||||
"blk.17.ffn_norm.weight": "647e5fa99a5b0d232af36d15816539f4d27e60a50a341b00aa88bb6e4474f8b9",
|
|
||||||
"blk.17.attn_k.weight": "d9125ff33a19c502c0f8846433ffc24395048582fc2f463d34a0301a82156f02",
|
|
||||||
"blk.17.attn_output.weight": "3d64fbb1cfef04444827f37c35fd9ad3413eb2165094d339ef89f00503f09de4",
|
|
||||||
"blk.17.attn_q.weight": "e5b29424028f578beca385fd82e29f37adedf3037cd51e5889d5a1ffb0428ca7",
|
|
||||||
"blk.17.attn_v.weight": "1809c5aaf2ac04c5d65539097564ad62796e87d24bb8b9ce5b095561a61d908a",
|
|
||||||
"blk.18.attn_norm.weight": "99daca58d001c627523d3adfbca1d95f04e590382a326866544d57989d5f4835",
|
|
||||||
"blk.18.ffn_down.weight": "84f30231ce6ca0f10227541dfc602d6418c1a210386b0c4926ef1656e7d4635c",
|
|
||||||
"blk.18.ffn_gate.weight": "ca5bbe4468b541740e54f69b9e08fcc8e478c344b70551dab21b1206acfbaadb",
|
|
||||||
"blk.18.ffn_up.weight": "0b3067b9dded31686dcfdc1e247eae3974a28a61ac59e9862758dbfaad64e8f7",
|
|
||||||
"blk.18.ffn_norm.weight": "8154a102232dbc0f90ce77ae5c1ff8f26f8b6e4dcf326e9ec1645749669e7960",
|
|
||||||
"blk.18.attn_k.weight": "25abb26021ccc481471a30e0d4cbeb7e1db29828417ec5136edeb93fecf09ac4",
|
|
||||||
"blk.18.attn_output.weight": "d87d481d9b046b68efa06ccdd4ed8cbf61e692d61114b75b7fad5ed75f5d87b2",
|
|
||||||
"blk.18.attn_q.weight": "cc6400379e15766992ff1293be79dc67682c28e9e15155a78109f4b64653b164",
|
|
||||||
"blk.18.attn_v.weight": "45c75cb1dd496aea3173aafe2575b841dd1d02cbe010b3198099731eb98f531c",
|
|
||||||
"blk.19.attn_norm.weight": "65389efc75297684773284ef8e5f8789a4504b636c9f33b8a32e0ee42499fa72",
|
|
||||||
"blk.19.ffn_down.weight": "4eefab7e939f64a17e4a214ca3c77a6fa110d94f677e2d6401086f70fc538b04",
|
|
||||||
"blk.19.ffn_gate.weight": "f1c0a59cafda66f466ab585b0b8b4861b58abe87a67cea1f6a488492242edfdf",
|
|
||||||
"blk.19.ffn_up.weight": "c42d045eef588db4a0e56960a57e110e1ff92eb8041107d19899165fd3b90f17",
|
|
||||||
"blk.19.ffn_norm.weight": "a8f33eda6d5d62ff5f333ad9771783caff556641f4e7df713451385676f441fa",
|
|
||||||
"blk.19.attn_k.weight": "0bab5d9e9083492bfb05a5a3bb23b79c0e7b99ef6a6644817b4d57d5c453b8a5",
|
|
||||||
"blk.19.attn_output.weight": "c99c551d70eafad0f7aea98fb6f9251635897168eb3895f76abf0d4ea3b3aa6f",
|
|
||||||
"blk.19.attn_q.weight": "c98bde95627c3b54c9443813ca50b4e14f518319681db6bbf7b2332ba26e9a60",
|
|
||||||
"blk.19.attn_v.weight": "ff3a490518cf64904db89ce0dc7d6eb89e870f1440e41883c6b55a221f82de84",
|
|
||||||
"blk.20.ffn_gate.weight": "761f0e317229cafe9d3754048ab038a0a84e9a287b196ab65f633139f2d29aba",
|
|
||||||
"blk.20.attn_k.weight": "45d13439b41066d282e8490a726785abf513605f46c79bd0c840f6419d27e790",
|
|
||||||
"blk.20.attn_output.weight": "a3b958d84b4a097844179b7d55c18fd0e4f319cb15e918c6fde33b68de1bcac6",
|
|
||||||
"blk.20.attn_q.weight": "127ab8e7d8c3f882874904196a02712bab42e6744fde45871b67350609d19f5e",
|
|
||||||
"blk.20.attn_v.weight": "5f0ad2d14a8ae42dd3bbeccfb33295687a14055fa92c54bc946249373c1c9f17",
|
|
||||||
"blk.20.attn_norm.weight": "77300b1755edc8c70089e0f45efa646056b9add7d8568b2324d2f3e62b64971a",
|
|
||||||
"blk.20.ffn_down.weight": "ab93d0e075b42e9017b701a070d561e698050d90aac4b4b9919256fbe50c3204",
|
|
||||||
"blk.20.ffn_up.weight": "4fd6628a07acc57a48d1ef83f81b7d7aa0bce569c1160a99d307284f8821322c",
|
|
||||||
"blk.20.ffn_norm.weight": "2a9e46b9e48e8e55215de56592e1f189530037c1c94a1428e3d6f106c7f26fb2",
|
|
||||||
"blk.21.attn_norm.weight": "4b3b5912c7bc61eb9da8e47d4651f896e85d9e59c4ecaa65df7acf3c21737298",
|
|
||||||
"blk.21.ffn_down.weight": "7146f931663d93b8771cd84405cd4802ea6560d0729b0d6d44588203c095bc53",
|
|
||||||
"blk.21.ffn_gate.weight": "b44ec5d64388fa40b90b3e9976d97a8b6800fa3b97584f32e64b03daffb8601f",
|
|
||||||
"blk.21.ffn_up.weight": "0cf3643fd23c685e17062cd11e116e17ce57a405e5e78953bab94cd62fe48789",
|
|
||||||
"blk.21.ffn_norm.weight": "4ef2cdb53da166df70b39f3e6b17af51848cfa5ea3c27ad6a1ae2a1bb1da1ce9",
|
|
||||||
"blk.21.attn_k.weight": "5d40f32a706f670c19972b14176bf660d5b045e3637b110dbf8d7de4ff32101a",
|
|
||||||
"blk.21.attn_output.weight": "18afaa916752ce16c9653ec0ec7e2fe60be55faa2aa5025d147be184adb75cac",
|
|
||||||
"blk.21.attn_q.weight": "2621daa5f858931514a4b2f0fe8d81cf9b96f541e6af99bfa7539e9bde8e34ee",
|
|
||||||
"blk.21.attn_v.weight": "63226dafc54c899bbce4aa49efceeedd8908e94faa613450fdda91f332b62864",
|
|
||||||
"blk.22.attn_norm.weight": "cf3058daab4d2c04387e7d169d1553bb8e7358eea66285ec067703f6ce62043a",
|
|
||||||
"blk.22.ffn_down.weight": "6a58d5fd220abdbac6cee7ba048abab794731af318f04982c2506df59413d0b3",
|
|
||||||
"blk.22.ffn_gate.weight": "d5614535324b03c7b91727a903b2a72f8d07ad17f7aa8b61ea173cf9b895069e",
|
|
||||||
"blk.22.ffn_up.weight": "ec20da3949566e93f66cabb67f8cd7eab399047ec6ebf5d43edfaf3669b82296",
|
|
||||||
"blk.22.ffn_norm.weight": "84c82f38f53a649972a44466fc476bf764e064ce18de870291edc302f3700e28",
|
|
||||||
"blk.22.attn_k.weight": "a3d2ecc37fde7c201176bb8abadf27f0d8ede9679a6034913e03d9db924fda12",
|
|
||||||
"blk.22.attn_output.weight": "5a3b8bb433f43a387df43dd371bdf80ddfac986dfeaf38e9bac1d7a0ec6628de",
|
|
||||||
"blk.22.attn_q.weight": "3a875cec661b4859f30a8fd2c866811184b25b68c9e36fe2663d299caf8b59c6",
|
|
||||||
"blk.22.attn_v.weight": "8717a83b79035058dcfd3ef6f8e5b36e71d77379e5a239e1899eef8766fb7703",
|
|
||||||
"blk.23.attn_norm.weight": "2b4a68a0a2f023dd646e4755c9bef17c2f631901154afd839edac7ac006ec99c",
|
|
||||||
"blk.23.ffn_down.weight": "29499b1586c6fc4883c9b7a9c8cf388035146b5aecf90c5c4c8c8e082c71e7d7",
|
|
||||||
"blk.23.ffn_gate.weight": "7d6554036d21c587b9b556428054f9c15cbef96d24b257f906fcef4ae38bd9c8",
|
|
||||||
"blk.23.ffn_up.weight": "19761ecb288d6ebd44b681c4535661583b1e19dc29e96d0c007333cd8f00aacf",
|
|
||||||
"blk.23.ffn_norm.weight": "37dc35500790a4ca33807b39cf7af65065e535dc25b9e94f3ed2759f61887ac9",
|
|
||||||
"blk.23.attn_k.weight": "717547d00323817b0cb40a72ec5f8cf42ecd1f9e3e42715c2cc5e38f07fffffe",
|
|
||||||
"blk.23.attn_output.weight": "a24786feb6a905fdf166d7500133757cbe494779d4ebcba9eb03046b319557df",
|
|
||||||
"blk.23.attn_q.weight": "6a2c4a98f138b928d22136efa163562691d3b4ed526d52d46a2fa2694a8f3965",
|
|
||||||
"blk.23.attn_v.weight": "c6e6081eb9c38a7fda023085957b460e9ea321e1fff408b38c2b58595c39979c",
|
|
||||||
"blk.24.attn_norm.weight": "5e6283f891e538670425f3e244b08dc6f96f33dfa4aefa913f8eb17212421850",
|
|
||||||
"blk.24.ffn_down.weight": "e09eb170f389deea0a4a1cbfdb52c12490768a2c60491b7bef8a4c445e2a08f5",
|
|
||||||
"blk.24.ffn_gate.weight": "af29d815cf49a38fc2ebd0bf9b2dd9933d023a29f2d766981acb9a1b53f09117",
|
|
||||||
"blk.24.ffn_up.weight": "36ccd9333426666de9d3088bd4dcdf5b624b09dca9e3a83a22fc0383f2d950fa",
|
|
||||||
"blk.24.ffn_norm.weight": "a88e1692318826db6ac42582d182e51a3c698c655d0e21e04fa086318832d07b",
|
|
||||||
"blk.24.attn_k.weight": "f7d61d6d1225289bcc502e3bbb0168b4584add0253218c1b77ac92ccef9a1c2e",
|
|
||||||
"blk.24.attn_output.weight": "85a1363b3ccc87312094c2195022687c16b0dad7fafb9e80bb4ec474d53c29ac",
|
|
||||||
"blk.24.attn_q.weight": "53482a2c008f42f4fad779ca323addc3712040149dfc12f782417756388a72bb",
|
|
||||||
"blk.24.attn_v.weight": "67498272369af7dd10097c73b07f731b565cfc9a559e711cc0d526389e7b44e2",
|
|
||||||
"blk.25.attn_norm.weight": "98dd617def5cb7825ee4833132ca2da2121245921585e1d9e36b93344adc321b",
|
|
||||||
"blk.25.ffn_down.weight": "7fd477d6c50aed5f424a878dd284343379cffbee8a34c0b6e55100c8305fa13f",
|
|
||||||
"blk.25.ffn_gate.weight": "f892c9806c8ec22e8aa746734ac9213428c534921cf161239e1d249fdb5d1ec0",
|
|
||||||
"blk.25.ffn_up.weight": "528bed14c9bf9762f790525ee40412545221f4321d2a2323fa8e73c58b7643c5",
|
|
||||||
"blk.25.ffn_norm.weight": "ca5831966672e7be6a578feeb631ec3570d3b5afe12860819ccb96e896ffc346",
|
|
||||||
"blk.25.attn_k.weight": "610d3068cc9b20401f0c3a0efea39a279dd9f564fde19baf3403b2ec2319e4c4",
|
|
||||||
"blk.25.attn_output.weight": "798aaf702e53b657265ac3b5e6caf3a0ab515bdadfeb1a3a156b4f3bfba76666",
|
|
||||||
"blk.25.attn_q.weight": "8a7fa25248de83029fb97b51d036a01baebe31fcb4be121ab00dd8b7de209b10",
|
|
||||||
"blk.25.attn_v.weight": "2a53d5e9f8a1218c66958c6388d3b37400a9af7956c785024ca44bfbc3c7d371",
|
|
||||||
"blk.26.attn_norm.weight": "5f44fc043481eb0771f3e6d2420bcbcf73140afb9a9feb8eddb6575452acebee",
|
|
||||||
"blk.26.ffn_down.weight": "944a60a409d0d5b6a851e33c69aca152454b691711a8b96f5bcc488772ab2833",
|
|
||||||
"blk.26.ffn_gate.weight": "2a0ca4abb3de5593e6693d8be69b63d6d1a639855ac8332a75f520353f030c62",
|
|
||||||
"blk.26.ffn_up.weight": "0b1df496163f9ac07bf89375d3eb441b51a81d41b47d769a04a61efc18dbe35b",
|
|
||||||
"blk.26.ffn_norm.weight": "56b8dd046e9be6ea71f7efd80dbd14e7fb1aa020d3cd38e063275f3873fd12f8",
|
|
||||||
"blk.26.attn_k.weight": "b1dabfabb970e6971c7ea6e53c63cf7ef56341e6a2edd9cf177785cad9af2f9a",
|
|
||||||
"blk.26.attn_output.weight": "39532c7e836baad164a655fb97ec5114ea4da37ffba9fdea2684f6e4450e6f84",
|
|
||||||
"blk.26.attn_q.weight": "8f48bf6aaa1252bc149e98af2be1777a5c0d2c3274c6d314171ea9344a41b604",
|
|
||||||
"blk.26.attn_v.weight": "02fb145f7fd905133750e90571effacadddfd3f4966552dc59982ac3900ab8c4",
|
|
||||||
"blk.27.attn_norm.weight": "654d168fc3cab716d91261f5719f180b7d697218401633b4878a759f1b5283f2",
|
|
||||||
"blk.27.ffn_down.weight": "2823272bec3a1c12f02cc4cb24aa4031abd7e9dbe0b02676e2305b21671818f0",
|
|
||||||
"blk.27.ffn_gate.weight": "b1a1d40cd02f97182cac17a79971d1934ee0daf3aa0bf11303568c636e208a64",
|
|
||||||
"blk.27.ffn_up.weight": "ed62ec72a020d070e64eb7b50237b32213944727b5b2427f45d989f50df5fb2a",
|
|
||||||
"blk.27.ffn_norm.weight": "c69649ac65d694b306a905dee8b03b89eec1ed188b1eaaf38f8e29d4b12e38a0",
|
|
||||||
"blk.27.attn_k.weight": "cc57bbf413f1fd227128dc66efc8590c73634cbd6f96d01ec4878b5e7ca6a925",
|
|
||||||
"blk.27.attn_output.weight": "cac407ad02361d53207b3c7e25ceab84dcb4347b8087055162e2efe14d11d84a",
|
|
||||||
"blk.27.attn_q.weight": "0af18e07cee12015761c07c94407024f4f4d77d97bdb24163db0e16669e2cef3",
|
|
||||||
"blk.27.attn_v.weight": "a1d08fbdfa40af773c5adcf93bd68b78a44ed144e3fc6bbeb8af02e937527eb6",
|
|
||||||
"blk.28.attn_norm.weight": "f39a51f814512b040a1082143150e4a49ff730f85cef49d7f77fc79d83e91f40",
|
|
||||||
"blk.28.ffn_down.weight": "74f29ed51055d1c1adb8f0660bbe538a27e016c65650f2d67efc6f1c84fa1b45",
|
|
||||||
"blk.28.ffn_gate.weight": "ae48bb16487ded6781c60aafc0bf738fb4ae15729952906f247d216592ce249a",
|
|
||||||
"blk.28.ffn_up.weight": "543009727718ac22f11ee4b17815f68ea6f15ba1f3e7ed5ecdb755cf6417565b",
|
|
||||||
"blk.28.ffn_norm.weight": "b8f9e54c322079ff20a82b88948cdc2916c22c7db40b9a9ed6d3cbe89efb727e",
|
|
||||||
"blk.28.attn_k.weight": "55d055ba653b728d6e784f9e013786fed07115c9fdf23367e3941386d5e77db8",
|
|
||||||
"blk.28.attn_output.weight": "155101c03ddbf18f4fd0694bfc982f33c7bae25c9b087d6f5273c2bfbffcf2c9",
|
|
||||||
"blk.28.attn_q.weight": "1ed19bfdd22e9c14eca014739982492e9516d411515a8585f65cf754d849e53f",
|
|
||||||
"blk.28.attn_v.weight": "11ba854dd575c025d37256eee9041f6d1bd2b549a083d6409a09bfc1542913f3",
|
|
||||||
"blk.29.attn_norm.weight": "02b0bf5e2fcefd11a153cc988c81ba672682e4844fcf6442423e21a0e10d566d",
|
|
||||||
"blk.29.ffn_down.weight": "594bb692ec2779938721ff4748666ca8370e0e4fe85229503f616438b8884f5f",
|
|
||||||
"blk.29.ffn_gate.weight": "8bedcf47e91dcb2cf4093de56b048ee411faab6ff472f89ab2c9c113a08e6967",
|
|
||||||
"blk.29.ffn_up.weight": "e241a547b5fd6dfca8200b8141e21c1c487a96cbc4e5855f181a7ed1be91b642",
|
|
||||||
"blk.29.ffn_norm.weight": "e63eba5e4c6b288bfd9f15e46e236086456c8b7f1f9c732c0b5de84962a2e7cc",
|
|
||||||
"blk.29.attn_k.weight": "afe5979d5bcf211aebb526620f5974bcb0a2c39c8be71e815575c55d6385e3aa",
|
|
||||||
"blk.29.attn_output.weight": "9c944ed44b124b014906fc240afd3b90aed56bbd9567f2eddfd5b7a685b3cb48",
|
|
||||||
"blk.29.attn_q.weight": "e234e08e5c1bd9245a2edc8d63e9933b6b879f97c01392209cad4f55f05f3ada",
|
|
||||||
"blk.29.attn_v.weight": "5cb8e3e5f954e775c5a5e4de7a9a62b17e9c6931bb0ff0e2f82c4126fd3e1a1c",
|
|
||||||
"blk.30.attn_norm.weight": "a65483ee51a0b214144ec8a14f28ea5437586e9e12ebe342a57d1f8627ee12af",
|
|
||||||
"blk.30.ffn_down.weight": "417959da77ceb33ead4271cbb9428b195196173a893c44e52880a7ec61b4856b",
|
|
||||||
"blk.30.ffn_gate.weight": "a0d503ffcbe45dc927600bb98c9f6082487e65cb577ab545add400d666a87638",
|
|
||||||
"blk.30.ffn_up.weight": "f8ab957b82ffcd10b21303cb5e866209b6fe95f827b1b94e9a949207952d12c0",
|
|
||||||
"blk.30.ffn_norm.weight": "210c7ceb0514a9ef27b5d4d1b3aff6dde43f1af0345a050d71097940e0e73e03",
|
|
||||||
"blk.30.attn_k.weight": "16861b9abcf5a3fe73c93d977ca45a1e6daa65be0fd85c2cff53486ce2033afa",
|
|
||||||
"blk.30.attn_output.weight": "ca541fb2e57e2257118c35784845b0c731278af8db3036ac53d71aa1681fdbdc",
|
|
||||||
"blk.30.attn_q.weight": "f7834917748e26bb456b945e230bc926c228e93696bc01fbc2b134bdeeac71a1",
|
|
||||||
"blk.30.attn_v.weight": "9292783171dbe5eb689d17c9bda11e537f0e9b328fced6986c938d61ed590e81",
|
|
||||||
"blk.31.ffn_gate.weight": "e4766a04bcd8f937ba883c6a144101e546747804ca66c35c97281d6ccb47b566",
|
|
||||||
"blk.31.ffn_up.weight": "cc1e666116f7e6b06736db4aa4b81003c583f54f4d9200bfa48842249940e16a",
|
|
||||||
"blk.31.attn_k.weight": "fc80b57557687504efae7d24265cb7dc39b8f826bb3d897a11783012dbedc44f",
|
|
||||||
"blk.31.attn_output.weight": "215617f50a1f5d9b2250b82f3652b35a9e9aa0ad9ef2b485d73965a14b2b872a",
|
|
||||||
"blk.31.attn_q.weight": "274b4f1dfb0bdec28632705677049fb3e327ce6d9e1f3baaad1560439039982f",
|
|
||||||
"blk.31.attn_v.weight": "e641b8b926f9dfcbbf6b6da1c02555525ac4b1c306d96f20cfbba7d6662c4e56",
|
|
||||||
"blk.31.attn_norm.weight": "b3243c361d4041ddb892ce6862dd5091f57d87357e3c67e177451b85d8baf34d",
|
|
||||||
"blk.31.ffn_down.weight": "0a00cd3ecd5e91624a27f9e239b1de425d5ba3cfff82c256a11a4ad434abf3c2",
|
|
||||||
"blk.31.ffn_norm.weight": "2a0d67ea2bb1303975712243f07273c92fce83baa11b1cd6d8e42e74ea3c810b",
|
|
||||||
"output.weight": "768615f077fb797967844571c58b94d7c399d884d115be3ab4b0154504cae892",
|
|
||||||
"output_norm.weight": "7cc5b7ce10e5082000fa00bfa68af8c7c5da218e59e2c41cf2f1499d40ca229e"
|
|
||||||
}
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
{
|
|
||||||
"rope_freqs.weight": "80fd5efb2f729381785b293a091a268cfeceb0079167f6ece9b07070e662b222"
|
|
||||||
}
|
|
||||||
313
convert/testdata/Mistral-7B-Instruct-v0.2.json
vendored
313
convert/testdata/Mistral-7B-Instruct-v0.2.json
vendored
@@ -1,313 +0,0 @@
|
|||||||
{
|
|
||||||
"general.architecture": "llama",
|
|
||||||
"general.file_type": "1",
|
|
||||||
"general.quantization_version": "2",
|
|
||||||
"llama.block_count": "32",
|
|
||||||
"llama.context_length": "32768",
|
|
||||||
"llama.embedding_length": "4096",
|
|
||||||
"llama.feed_forward_length": "14336",
|
|
||||||
"llama.attention.head_count": "32",
|
|
||||||
"llama.attention.head_count_kv": "8",
|
|
||||||
"llama.attention.layer_norm_rms_epsilon": "1e-05",
|
|
||||||
"llama.rope.dimension_count": "128",
|
|
||||||
"tokenizer.ggml.model": "llama",
|
|
||||||
"tokenizer.ggml.add_bos_token": "true",
|
|
||||||
"tokenizer.ggml.add_eos_token": "false",
|
|
||||||
"tokenizer.ggml.bos_token_id": "1",
|
|
||||||
"tokenizer.ggml.eos_token_id": "2",
|
|
||||||
"tokenizer.ggml.unknown_token_id": "0",
|
|
||||||
"tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
|
|
||||||
"tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
|
|
||||||
"tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
|
|
||||||
"token_embd.weight": "cde834ccac5e94324b25cb81b02d27312cac0c551b55a7e1d555d90bf6cb6e81",
|
|
||||||
"blk.0.attn_k.weight": "458bfdd9715c66e017c2447b1ed3c582963a3111479314e664faad8c914f42be",
|
|
||||||
"blk.0.attn_norm.weight": "e1fd60b95f713bae7b7e3ca933c64ae6c9cd1e8d808000204bbfdc19f0ba635b",
|
|
||||||
"blk.0.attn_output.weight": "df13b6a157d9d4f96c53b012b3b9bcd207d0c94144cbd22ae3ec13bb07d6c373",
|
|
||||||
"blk.0.attn_q.weight": "13b4126b4245bf06c915a93317c42b8174e05053535ec99dc576541e4cec7c25",
|
|
||||||
"blk.0.attn_v.weight": "5b1781d3a341214511b27eb4e268674ea3ea829dbdf8ae5a6bb89b3c0b33fafd",
|
|
||||||
"blk.0.ffn_down.weight": "49186f5d8148d316b07458841d13a2e66587f4af69b776188a809591ed9c070d",
|
|
||||||
"blk.0.ffn_gate.weight": "4397e30ece09136f00f4ff84ff49e5241b765a374deb8c5a12e897e2bf73473e",
|
|
||||||
"blk.0.ffn_norm.weight": "43260589aac3850a779bca3f9649f793bbfbe5db538361cb743b3830217f8287",
|
|
||||||
"blk.0.ffn_up.weight": "fd7ac918240a07566f6967527ffca58fcf433a30b78fdd6d84b2136d4ebd9987",
|
|
||||||
"blk.1.attn_k.weight": "209839566c7d235bdc20565a4766378b6ee8553133a5a3315abe8a85baa80712",
|
|
||||||
"blk.1.attn_norm.weight": "58c52986f7c69784ba327cb7f350923420782bee17fa39b1fbd13839d4005357",
|
|
||||||
"blk.1.attn_output.weight": "5067cc628449682665dfcf59b16e58fe2a9d2a81cb099f0fcd42f4f8670c6740",
|
|
||||||
"blk.1.attn_q.weight": "f410f9f0dd5edc09401af597d02e2a4c727f1502ec3ec3898321617b36c6df6b",
|
|
||||||
"blk.1.attn_v.weight": "d40fa49e07c102c0644e130e7909eaa93ed0d54e2edddc0759e721d58a4e4f5e",
|
|
||||||
"blk.1.ffn_down.weight": "594b1eff6ed4defbdd819fabbe2d48764984f08878a860bdb808511d5a25b8db",
|
|
||||||
"blk.1.ffn_gate.weight": "4cda97541e388a5bb607ce4cc8b3db1da7045830a630e7ba4d17807befcff346",
|
|
||||||
"blk.1.ffn_norm.weight": "66c13d7481be65b97aa474735ddc9674f33d512ddda76fa6fb45c7464b09f1ed",
|
|
||||||
"blk.1.ffn_up.weight": "1adc6de288ba4cc1237833ca8b4eb81107149842e38bc452e18e5cfe284338a2",
|
|
||||||
"blk.2.attn_k.weight": "5420423559f236ab22d85a00849f31e0cc6e9c7dd879de724393d8cd2b379153",
|
|
||||||
"blk.2.attn_norm.weight": "495fe1ab40cc52aa054ddd4f0c2d2790f4326c8d103296b1b38f3b1060db2a24",
|
|
||||||
"blk.2.attn_output.weight": "ccb83e7085381f558bfd65588c525ad2671feddcbc3887afb4038ad9c7aac348",
|
|
||||||
"blk.2.attn_q.weight": "2e8f77478392bc93c2a391f2e0f4a173a952bbab88a7aca099c6ee909726409a",
|
|
||||||
"blk.2.attn_v.weight": "d64512590f3b7ebbb9e77c2eb97fbda90b00d45c944f2b174f03a2cb11007567",
|
|
||||||
"blk.2.ffn_down.weight": "1de5084a05dcaa6b1bd926e83517dbe9ebe7fde79235fe56018b3028b1aa6397",
|
|
||||||
"blk.2.ffn_gate.weight": "cbea526b557f49aad8c976973cf367fcd12175b900f551984f498b9e07e4b7fd",
|
|
||||||
"blk.2.ffn_norm.weight": "530aa49b10c7eae08899d143409240deb95dae4e1d5bf78cea3b26393cff3ba1",
|
|
||||||
"blk.2.ffn_up.weight": "13a5fc19b96b4dcc1e9bd01998c8272ebe52034c1933ed123a506b711fae9a5c",
|
|
||||||
"blk.3.attn_k.weight": "1913b63a73305941d8cdc472e7f101c633d3357a78602eac0a4b49a744261075",
|
|
||||||
"blk.3.attn_norm.weight": "9c11bed5ab41f4adbfdae4ead65b525c8f19443e656a8c61ba412a4e1ad1193b",
|
|
||||||
"blk.3.attn_output.weight": "bb0b42c1d34779c5943272ed71f1dbb31ad8edd75f8bcd5c868f88505ac3a610",
|
|
||||||
"blk.3.attn_q.weight": "3461a1fe4e49f5319ea047cae98ccdb46528a3ec23831183fe87610b48c94948",
|
|
||||||
"blk.3.attn_v.weight": "82aa30be6a61526a41fb79bb28a2617416f5909f0477aa9e95e16be9370fcb38",
|
|
||||||
"blk.3.ffn_down.weight": "68521011ae03f5e3b0966127111afa8ee9f2eaeeef8d3a0b86b633e0332e9fbf",
|
|
||||||
"blk.3.ffn_gate.weight": "1e89e26338fd364bb679695968c65106382f15ad55c95cbb5ec9bdfeb766f432",
|
|
||||||
"blk.3.ffn_norm.weight": "c81932529a5a8c417c27b888dbe95fff8b447c2ea5f6f560444ec5d50b93832c",
|
|
||||||
"blk.3.ffn_up.weight": "305021735afd8669afefd713f56137248d5e817e60471a112ad06b7fa07ffe88",
|
|
||||||
"blk.4.attn_k.weight": "cc26ba5c5c28082a79e6abfe61186029e80b145252ca6a7924c437f0bcf2d51b",
|
|
||||||
"blk.4.attn_norm.weight": "302d251fdcc91f7468cf33f80b49484251d8917d7018ad264ab3a85c8ecf9ddd",
|
|
||||||
"blk.4.attn_output.weight": "a012f5bee3520cd4ce51f0076c132ebc3653309f304032ad051aa308f55f36de",
|
|
||||||
"blk.4.attn_q.weight": "3c8d607e447f5ef21e73af71e3c0d32fae16f91f31faae34ff06912cf9cb68fa",
|
|
||||||
"blk.4.attn_v.weight": "49f6c81a634ce46d71c2350206ecbd231b1732af96e4e4e67693c41a07e007d8",
|
|
||||||
"blk.4.ffn_down.weight": "e89504f311a4a34dc819a67b761022f14d71c43df3ead4f892c87aaa8e9f0adf",
|
|
||||||
"blk.4.ffn_gate.weight": "18b22f079a2fbaefe3572eec61fdcd996fd747724e2f0ff4f08cfcb43eb7bfb6",
|
|
||||||
"blk.4.ffn_norm.weight": "22415a492c168a0878912b05c854a631228b01c3ea8842e1d75989ec46c18a65",
|
|
||||||
"blk.4.ffn_up.weight": "f57379eae2874d8853f14ddf0f0fcc4ff1338574d5ed5d7e88331d5fb84f5642",
|
|
||||||
"blk.5.attn_k.weight": "d627af853c40bddf9762ce3988008c1ff17f2686fa8f73a0b5da38010147c316",
|
|
||||||
"blk.5.attn_norm.weight": "9ce01092c7f7f1c3ef72d6b794da12d77aa1f6a24fb96ba1b9bd5a0bcc3e2443",
|
|
||||||
"blk.5.attn_output.weight": "0388da8064c4b6b795ce2d8079e8a36535e82b2c9cf794e38ce8ae460aae726d",
|
|
||||||
"blk.5.attn_q.weight": "039b7ce1c909761fdf475c06cf14cabe5a90199282c89e4dcf460e95a4b6275d",
|
|
||||||
"blk.5.attn_v.weight": "c47bfd8d2496bdb6e00e03b903e15fd0ee806a515094ec257e43cc433147ab7e",
|
|
||||||
"blk.5.ffn_down.weight": "1d62e6708974bae318cbf00a8bf621d9ba0537e549ce4710a536520a8d14168e",
|
|
||||||
"blk.5.ffn_gate.weight": "8b42b1b11c92db19985094cbb50434e3a7c9cfea71ee6f21ea79eae7c49284a5",
|
|
||||||
"blk.5.ffn_norm.weight": "e0bc520f1505e687ec391d632a381d38d8ebcdec19f614a11a2000ab573e8b7b",
|
|
||||||
"blk.5.ffn_up.weight": "8cdcd17d2ea89bb9ab902dbc6bf3f827fa4ee029c6bf19eecbdefd146d8b6f2f",
|
|
||||||
"blk.6.attn_k.weight": "5dc6bcff89794d1756bf57ec665b58622d9352130d31082a6c66e1a079f99932",
|
|
||||||
"blk.6.attn_norm.weight": "13b26008abe0f119b5104b9d78ebd5e797d3cdd68122b93d73a3b4831a54d085",
|
|
||||||
"blk.6.attn_output.weight": "f5a49917ea70c3fb311ccfffbfafa63ab18416a5d55e5429b70ce8bfba57c075",
|
|
||||||
"blk.6.attn_q.weight": "d9c2f652c87dbd09ec3822e12876648fa32e86553ac25afab723b1cd9f8cef90",
|
|
||||||
"blk.6.attn_v.weight": "5ecc5fe67609a35151011cb526f45c56fc0a999079ae0ff37c755ca03c68c555",
|
|
||||||
"blk.6.ffn_down.weight": "0ec125ae0ecb2d9277fdb1b04f17efee94e37d0ae37311057c212ca2db3fe6d1",
|
|
||||||
"blk.6.ffn_gate.weight": "fa4d6d38355ee8aa3b80b476d65ae7e343c9b7770d7b097fc848ee8a6e091d1f",
|
|
||||||
"blk.6.ffn_norm.weight": "30e8f7defc627532e1739dc76d31223d45767391a431f925b63dabe334b0f392",
|
|
||||||
"blk.6.ffn_up.weight": "6b97cc32b290fa9087806b5d65aa6dc1760737730c8c71394cc4f30c2157f9ab",
|
|
||||||
"blk.7.attn_k.weight": "0231cb127cb7c3714cd72b8f39343891d7715a9bab2237ade9e7bc5f4ed2e68a",
|
|
||||||
"blk.7.attn_norm.weight": "7c3187f07eead7d219d98ab2daf87905e88d5f1ace109b6f5fa55dce3914981f",
|
|
||||||
"blk.7.attn_output.weight": "2f30ad972c284ae7c8eb0482053433495ebe8fe9c5ee2c28b4bc4ed1f33050fe",
|
|
||||||
"blk.7.attn_q.weight": "3a2b4b8d61cc9956d304fa9f82a9e65b4bb9fda2196670b16df7e0d8c43eff2c",
|
|
||||||
"blk.7.attn_v.weight": "d2aab97d0dcf0f61dd2f32848f7a8a99c423a4948a660a660a03a546972b8db8",
|
|
||||||
"blk.7.ffn_down.weight": "2270d520468c5549cd30023ff9c452a277058310104c4239a616373fc5a94387",
|
|
||||||
"blk.7.ffn_gate.weight": "4134a3ef71b3eac8f76b6f1a2e58625b3bae48081f175994bc3ed7d8b0d4f2d0",
|
|
||||||
"blk.7.ffn_norm.weight": "42df4abd4b8769b16f3930068f96960af1b061f1aeb7505384f272233b2badff",
|
|
||||||
"blk.7.ffn_up.weight": "c920549054ec16ff8c73a72f5d837cf4e11885e44db57c1c1c584c18fbd7a9a5",
|
|
||||||
"blk.8.attn_k.weight": "01c609bd3bf31ce65688f1f640ee413740e821330134d4ed1877a3065d1527d5",
|
|
||||||
"blk.8.attn_norm.weight": "48857411f769b00290f4e4f2e593e092781fdc2503f80c1e3eeda1b85a20f74d",
|
|
||||||
"blk.8.attn_output.weight": "90fb273f8df83744554bd59236515c16c5a5a698ca3fbedc17cc89ddcee354ff",
|
|
||||||
"blk.8.attn_q.weight": "ade617ac4653c7f00593dbb51837a468afef20a14eaab3780fb96ac3d6714369",
|
|
||||||
"blk.8.attn_v.weight": "c2c37496494864fee5c527d1fe1f88529d31c73f9cbd02ef9b2e9b23611ea50f",
|
|
||||||
"blk.8.ffn_down.weight": "2da58572e9ad79087c03cbb0c23c9ef69f93ec221fd5fe4ed92fb93871d23ffa",
|
|
||||||
"blk.8.ffn_gate.weight": "4483294e628edaa4901708e73e92c917bdd93b780fa01aa74aed57166f2bbf0a",
|
|
||||||
"blk.8.ffn_norm.weight": "c0cbb7a4f8123b62f0c4652a687f3b394802bc32870dc446eefb709e42043a7f",
|
|
||||||
"blk.8.ffn_up.weight": "9eaf8a2060cb9224cd585997cd671866c4051ad885c2c6d9fdc7056c2a5c0d89",
|
|
||||||
"blk.9.attn_k.weight": "5dd36c45fbc9c50fd35c36cd75576288506971eac5c5311d4f5c16ef60099645",
|
|
||||||
"blk.9.attn_norm.weight": "3c8ca64f2f75ed7c8fc1da010c23be787648139a96ca0ef3ad10be7b14942b8d",
|
|
||||||
"blk.9.attn_output.weight": "6277e1f833024f53c409be919ec76d34464a78b278c8f9dbf79e777746e3b995",
|
|
||||||
"blk.9.attn_q.weight": "87352b70d9e328c2d51d59090cf5ea5a046529864a890d0bc8986447a0a5c006",
|
|
||||||
"blk.9.attn_v.weight": "2efdf01161d7a82a9117cc2d87d37dba5ffefcf730781cb94fcc95130e48ff9e",
|
|
||||||
"blk.9.ffn_down.weight": "e7658a2ca984961c7ace16acb679387bedb1fef656b5330bbbf588db19673a75",
|
|
||||||
"blk.9.ffn_gate.weight": "773cd330d4ff5d64be8af00adf2e2722fae4e33fc26bb9d03549f6f4b3b0fe57",
|
|
||||||
"blk.9.ffn_norm.weight": "c8b86cd5c43b332f72060b807091c33a258e5dac01358ff4733b916cd34c9c97",
|
|
||||||
"blk.9.ffn_up.weight": "d8cc3bcff18bd46124ba2aa7caacc71220b44eeef6fccb993b4c6cb53e8f2c3a",
|
|
||||||
"blk.10.attn_k.weight": "964bdf3b4e77b915a216f750ff7b0f2eb1dd6bfa071358aef21010b90111044d",
|
|
||||||
"blk.10.attn_norm.weight": "59ed411d91d14775764eb514acb0895a75a10cbbfbc1c15d453bc50f8046cb7f",
|
|
||||||
"blk.10.attn_output.weight": "4d35a2a44cfe4ac0a83fd3ab0dcf1f5a0bf54cdb3b7be9fc353ed32c8a3eb81c",
|
|
||||||
"blk.10.attn_q.weight": "defff5339450dd881ac352f5c459293f39e07b9619ebd10ed632d79a3f310278",
|
|
||||||
"blk.10.attn_v.weight": "b9803e8d6a54acea58f662d4c0a5c8ebdf986676de7dfe12d4b288937881ce93",
|
|
||||||
"blk.10.ffn_down.weight": "eba856be64e4be20b92fb4639a783454dd92427250759df92a337e39f1971c08",
|
|
||||||
"blk.10.ffn_gate.weight": "2d5c509b066584db4de3632b01234e86edcde35409c5ebce18957dc80fe465e3",
|
|
||||||
"blk.10.ffn_norm.weight": "ecb9a8679945ff0273856624ce435dd250ffe5a440ea0861a5c84f0e4c44d2c6",
|
|
||||||
"blk.10.ffn_up.weight": "e76ec7e993f399af02958778c643aa78368e3067846714165eb5aba9d5f547f5",
|
|
||||||
"blk.11.attn_k.weight": "29c6d1f34bd3ba2f0904e57b32a5bf8dcb2834d439159a33edf234ce0b775677",
|
|
||||||
"blk.11.attn_norm.weight": "b5817b275149cd2abe18a6a10e19854605fc58fd364666744362ceee8cfe49f4",
|
|
||||||
"blk.11.attn_output.weight": "1e05653220e237cbe0cc770033e183c9a0eed5680510997409b16186c6691950",
|
|
||||||
"blk.11.attn_q.weight": "03db725ae669151e4d536e50285b3b047ad097f52475df208ed3e790e31a44be",
|
|
||||||
"blk.11.attn_v.weight": "27cdf1d4e971326c451a4615a0b79a8c7fe9508f9b76c0d52fa01971fc7eb403",
|
|
||||||
"blk.11.ffn_down.weight": "176938cd7c2966094f614cace8ba568b10532e45a0d438f80eccd19b6c2a7f87",
|
|
||||||
"blk.11.ffn_gate.weight": "9782339915dd6fa70013628a01524ee1d01ad8beab04068da7ac6a5ee7603a60",
|
|
||||||
"blk.11.ffn_norm.weight": "8245f6391e3be97811c0ff27f0d8f484ecc82a468a837c893f059745bfcd95eb",
|
|
||||||
"blk.11.ffn_up.weight": "15616ddde096d0d25e906375c548b6de4bd5576d1f6b68eefdc29f14e183af42",
|
|
||||||
"blk.12.attn_k.weight": "66dd21604993edd1b1fe547bcaa06f5bb7e31c9204902d147a227e4badf7feec",
|
|
||||||
"blk.12.attn_norm.weight": "23a69f85dd8a0904b9839cc5d0afcda299b74e82ae2642106224a1c820f2b761",
|
|
||||||
"blk.12.attn_output.weight": "4a98d132e376beb274a39d4ea9b6a1b870ad5c66625439d7ff6f45c229c3ca04",
|
|
||||||
"blk.12.attn_q.weight": "1c6c309d63afcfde32fe37257e300a78e25d01117e33490801107c0e75d1ea66",
|
|
||||||
"blk.12.attn_v.weight": "723d9e4ebe4e2b1974afa01d8f512b52933698fa36717dd47b37b07760c50a10",
|
|
||||||
"blk.12.ffn_down.weight": "00e0fb09e1f1fbbf3803f1dee373eaae7a93756b6e13063ab77f9927bc6f996a",
|
|
||||||
"blk.12.ffn_gate.weight": "89159f7f97aefb1e100107e3ac2d694e1008ad873f79bb953d60c2c1bb22724d",
|
|
||||||
"blk.12.ffn_norm.weight": "5f70aebd0e43a39d6373d8658cc670c13aadd7818831d3d84f761d5f688442f0",
|
|
||||||
"blk.12.ffn_up.weight": "faec21b446f061eb4dca561a3180712724347b77a71eb312e7afe9be9e89fa04",
|
|
||||||
"blk.13.attn_k.weight": "3d440825d19eac3b1753b34d94fee2b3a3cb6636c10b2703ffcf688d3c1eded3",
|
|
||||||
"blk.13.attn_norm.weight": "47b575e57e410738ad13fd3c74bb49c06b3d31030910834ece509cd1a5c6d9be",
|
|
||||||
"blk.13.attn_output.weight": "05436d8e613f4475741c1798a7c371b53d61b229507fa04fe23c504ba1f0e12a",
|
|
||||||
"blk.13.attn_q.weight": "002b5024ce520da41256e3ded5cdc60e5ae07ad9b202cb19d76ab511efd02b1b",
|
|
||||||
"blk.13.attn_v.weight": "c1f2d6763587c50312cee0d7140fa2c7ee326f5b172bc99b2d8946e08329cabd",
|
|
||||||
"blk.13.ffn_down.weight": "b5c4e0d8a3ff96cd76a135e415b89f02d28c28f7f3c16a36af31ef0ab8773da5",
|
|
||||||
"blk.13.ffn_gate.weight": "ae06e9e3d2e1f64c7ad23a4009dc904c2eccd7241f9f91c4974ab2504f116be0",
|
|
||||||
"blk.13.ffn_norm.weight": "e44a22321bcbcb4a3c345b504e939e8071370f54a8cd702fabdb40b97e0d7683",
|
|
||||||
"blk.13.ffn_up.weight": "7e6f366d538e21ad431264b12c011892d0be9dfe4c4da9f730af677f920641ba",
|
|
||||||
"blk.14.attn_k.weight": "95492d6417952ec24b2cab87bceb750fc7e95ac6b1944fc328a3852d980164be",
|
|
||||||
"blk.14.attn_norm.weight": "6b7b09e1c51addcdbb160ea59edf032531421c520ec5645fe1ff9ca4180cef54",
|
|
||||||
"blk.14.attn_output.weight": "75887474e4d72c218e6ab0f69f1bf3ec3dc414d51b36fc59df00cdb23421bb6a",
|
|
||||||
"blk.14.attn_q.weight": "940e33f76e48c21215d19e8a21234c8246d4d084381a7d9806aecb24b071d5bd",
|
|
||||||
"blk.14.attn_v.weight": "c58601cf5a9833f80f7f9a5b2656e8eab5eb133211446ebd48f8be15fed4ebb9",
|
|
||||||
"blk.14.ffn_down.weight": "f9f886e7f9b2a54d717b08947a25a0a93e8c2a5b8bcd5a907c06817c8ee3ac11",
|
|
||||||
"blk.14.ffn_gate.weight": "727ed0ee68594a3f59d704ed3240b6929f083b9c36650fb848d182315737245c",
|
|
||||||
"blk.14.ffn_norm.weight": "bd2471008ff1b2bae9aa26bea019393fb2bbc5b9493b8cec3ebd2c280fca24ca",
|
|
||||||
"blk.14.ffn_up.weight": "b006446769f51e4f93b503c4727deae897bc1fc7f4fad49f85024b63c4548d38",
|
|
||||||
"blk.15.attn_k.weight": "23bb70f9035356624039547a603e46be7d1e4403616eafc2451cc09c5373d522",
|
|
||||||
"blk.15.attn_norm.weight": "718cb371ca052eeb3bfac6ac506abb887df125271821fd171797a7f2d8dd6313",
|
|
||||||
"blk.15.attn_output.weight": "c76a2695a204b43a8e5acfa5720590b5d449a9ad9e082cbe3e80fab5903ea16a",
|
|
||||||
"blk.15.attn_q.weight": "2b3e4037b9e91bdd26d6e8d904cf39f948192dcf09bb6445cb55ca058d4f4626",
|
|
||||||
"blk.15.attn_v.weight": "7c15e89b6acafc8619e86aa9d412f5893ab17843ff2cfaf40eea9637b24910c6",
|
|
||||||
"blk.15.ffn_down.weight": "e16fd4bdc6d1c1209c6b633454df4992870c8cefb2cb0e8c92a7e489e9fb5d19",
|
|
||||||
"blk.15.ffn_gate.weight": "95a46bea366c260337c537fde06b4cbeaeec52484a69c3390bb1d178eb0525c9",
|
|
||||||
"blk.15.ffn_norm.weight": "37730293f704da265dc6d1896b3be00c39c0a41dab07f573af39dc30a481d623",
|
|
||||||
"blk.15.ffn_up.weight": "ba74a199da2d0875d7410824238c4ffafbda3993568812284a72b8800df91f15",
|
|
||||||
"blk.16.attn_k.weight": "f58f79a2a91c9a763adefce0c53a71eb5ce6bd8442f4af554b04b58083bff27e",
|
|
||||||
"blk.16.attn_norm.weight": "0c16e41b95e81978e0e0e3b338e2afe2d297426578cacee94de15df74e94eaad",
|
|
||||||
"blk.16.attn_output.weight": "ead22fc337514e4add49aee19720008558e52090466866e849671953a1fccba4",
|
|
||||||
"blk.16.attn_q.weight": "ef59c4e8fe8918c1add43d7e9c6fb3ef799dd3e1bdd731ec7b6a4a6f97c86048",
|
|
||||||
"blk.16.attn_v.weight": "902e6b84c2b64241470b13e6f412f859f66b4b223bcfb9c15d5cb1106b07ef3b",
|
|
||||||
"blk.16.ffn_down.weight": "2ad6e9eb4d8372c32a554395d460d17cfb02d6dbcb757cc962b6bfa36db4f5ee",
|
|
||||||
"blk.16.ffn_gate.weight": "825b2d50fcce3dbe6a5d8d8a50a95466f83ca4a10343efe67894c20b4628fb15",
|
|
||||||
"blk.16.ffn_norm.weight": "3bf6ac90befb0e17e077c8ea9454a8485a30f89f2d761ec7751b60c90aed1af9",
|
|
||||||
"blk.16.ffn_up.weight": "9fbdd08739b32411f5ab0252174d386bab19eb0b17884862f760429b7d41d78c",
|
|
||||||
"blk.17.attn_k.weight": "4033398718bf3674830ed1b73071ed8482b6dd4ef27f31a6c5fbb998321b6c07",
|
|
||||||
"blk.17.attn_norm.weight": "714f2e8ac9592966a0f1c02ee979eee8f84586405b992e8ee9543e840199ffa1",
|
|
||||||
"blk.17.attn_output.weight": "b6bbb618597d767b8f535117be68f92911e4a71d4eb4d8b5d943444151445ece",
|
|
||||||
"blk.17.attn_q.weight": "b84a0dc00ceb515faa2628125dcec502eed923077b21cfe900a4ff16c2e5f9ed",
|
|
||||||
"blk.17.attn_v.weight": "4387c7d6a17da9cc7a6bca8f4a75618b20407d570792056283a8e93b6ec65f18",
|
|
||||||
"blk.17.ffn_down.weight": "47db95c6f1e12b399c3eaf9ddba261782dd71173dd163b52af96541cf87b5196",
|
|
||||||
"blk.17.ffn_gate.weight": "59abaded0aedfd12f01df81f7a811e84db6a227f51b60abe9a247ca726e87392",
|
|
||||||
"blk.17.ffn_norm.weight": "b7e86445be5c7b722e01ddb98d5c7527ca86cb827ce0354f2c269e0f2558751e",
|
|
||||||
"blk.17.ffn_up.weight": "8e31c293bac649d2f60da4b3fc4a3acdce1111ec6058d8805eeeb242443011de",
|
|
||||||
"blk.18.attn_k.weight": "5ce762ab7b032511c131df81093b587871718c7097f79d8e07d707571f18a47b",
|
|
||||||
"blk.18.attn_norm.weight": "1f52cdc7af1f4dc1f0ef6ad1ad02e18cda32133654e57cfa9c72ada9c0b1d995",
|
|
||||||
"blk.18.attn_output.weight": "6486957f30bf8a88516e25772c6650f98b13923f490a2865a8752e36439d1cfa",
|
|
||||||
"blk.18.attn_q.weight": "93621c8abf69d2ca29c5207180eb628fb2b544d89de6c4a7fb0699be95534899",
|
|
||||||
"blk.18.attn_v.weight": "11604083b5a74828ac1d226af015ad5dc0215a1fdca44fa7131c2163c02d8156",
|
|
||||||
"blk.18.ffn_down.weight": "8f9997feb94385f106915df810239c9753b31efda2bf14bdf18a9fbbeec8233d",
|
|
||||||
"blk.18.ffn_gate.weight": "427c213b3a4e94af703429daf2f65766f70424d8230c123e7e712a18bceb5ecb",
|
|
||||||
"blk.18.ffn_norm.weight": "c45d305c4ea6a54013ba112f12dafaade064a32cf01317373464a3618d8ba44a",
|
|
||||||
"blk.18.ffn_up.weight": "a2811f2e73ac9eb9cce91a21a454e84e230a155244e2cd73f2c12aad3c9b8cfd",
|
|
||||||
"blk.19.attn_k.weight": "b2daed159925eac58c291e2f1e2000beed21002b03c9e1bc7e7a52e22240666c",
|
|
||||||
"blk.19.attn_norm.weight": "6307306ede2ab5bffa1bcac3f8b139354678c0376b1d9f5530c1fcb4268cfeb4",
|
|
||||||
"blk.19.attn_output.weight": "ebb98218b2a9c84d3fb6baeb02c5df264b7ab80d994d1098ba1cd47aa398effe",
|
|
||||||
"blk.19.attn_q.weight": "4f10df2ad09177e7528e9456039b670d07db22940a49417101b725d239c16724",
|
|
||||||
"blk.19.attn_v.weight": "30f1efc5114badaeaafa91fa466dc7fa14b1616db433c6f563ab851f7333a5dd",
|
|
||||||
"blk.19.ffn_down.weight": "be5ec7fe6b48855cd0015b0e430d1b70c620de87a7ff188c7c1afef546d7b6bd",
|
|
||||||
"blk.19.ffn_gate.weight": "10dffea4213881f8a9b583ee0fd370e033756d32255ed15053f794375b9400e9",
|
|
||||||
"blk.19.ffn_norm.weight": "e75cd24ade45dca78fdb0cbcaaa2d4a17d83a5a73dcc94ce0ec2d68fbdb2a881",
|
|
||||||
"blk.19.ffn_up.weight": "63e81bdb951410ffa81bcfba1b94a679ec9ebae59cd1623ce2651ed5d4c78bfd",
|
|
||||||
"blk.20.attn_k.weight": "c2fc5ad39e9bdd45e73c6e54aecc474388d944c4be1ee1921b7fcd035bad02e0",
|
|
||||||
"blk.20.attn_norm.weight": "aaa9169171937bdce20c1f057e94e9252f221cabacf1ced12e11b9586f23d308",
|
|
||||||
"blk.20.attn_output.weight": "a9f4fb496e4bc053e3f6cf2e72e22d4cd2b545ef6c32f7e782c2ef6ebcc21d4b",
|
|
||||||
"blk.20.attn_q.weight": "5a07ac619ed251494170b213921ef3fcc4c2712839da262516d9d5b8ea1ff185",
|
|
||||||
"blk.20.attn_v.weight": "d6689473105d241eacb17f09f06000ee237336916cf5ec4f48271c5b41bcb8e7",
|
|
||||||
"blk.20.ffn_down.weight": "74be38db51df736f26ede7c6b52ea787e385f181cb66231e2cced4556a25c9b8",
|
|
||||||
"blk.20.ffn_gate.weight": "ea91e06dc3d051c0ba0243b5a8bb40edbf254eadfb54fda7247e05cfdd88cbe2",
|
|
||||||
"blk.20.ffn_norm.weight": "5fbd357b3d6f44a7a91e8a4fc246b24303891b7957e0f3c32818ae5dc16ddd8d",
|
|
||||||
"blk.20.ffn_up.weight": "fe3290333e056af4ed12942ac72aeba97a6b562e2db05e79cd35dd07eab5b101",
|
|
||||||
"blk.21.attn_k.weight": "201ec6ee95f06ea5eb80fe86fd07bd016d3ae9ab6abd25d631834414e14a010e",
|
|
||||||
"blk.21.attn_norm.weight": "ea8154f93e06485828475a00b98cc397ac84768dd70e06ecc0c075b5712d7276",
|
|
||||||
"blk.21.attn_output.weight": "9f8af74d531478fd304723fd8e4e01578db598441b80dc7c960cb801dbbc501e",
|
|
||||||
"blk.21.attn_q.weight": "277de9953a8d3cff894ffd06c15ad0ee1407e319df0c1a693d4f45fa9c74ac7f",
|
|
||||||
"blk.21.attn_v.weight": "6bfdc16cfb898909b7788ddd39dd04b928f31d6732772195d53c558004638dca",
|
|
||||||
"blk.21.ffn_down.weight": "173877146cb94801157796ee9e5eecf3f46acb3b5e797f90b83a3fc22395eb30",
|
|
||||||
"blk.21.ffn_gate.weight": "53146713e2ca1be80496024077a028f6b6d749b02e71003c349e113b436f48f4",
|
|
||||||
"blk.21.ffn_norm.weight": "b28b97e18ab20a5c553ba422f7d7f6014f5902f1d62a69abd20d9fe19a5f9462",
|
|
||||||
"blk.21.ffn_up.weight": "5c39d0ac4d602b8ec8909dade93b2efcd6b6d9d84a19b252d76bb66dcfaab87c",
|
|
||||||
"blk.22.attn_k.weight": "01f26272c82917a87a3ccf922fa1d521a952b05de878241b7efe3525b617ac87",
|
|
||||||
"blk.22.attn_norm.weight": "5ffc96249d8873b506e9eb7158bdfd07fa1429e53c1951430ca7505d25f11c76",
|
|
||||||
"blk.22.attn_output.weight": "9c2201569358f720244b9c9497e4da02585a167b1414c8a506b85ad75ba990d0",
|
|
||||||
"blk.22.attn_q.weight": "906036eb4ddf027f6d920f9356a6a2a5e529b96f4e1231a0496d46b4434a5842",
|
|
||||||
"blk.22.attn_v.weight": "30ede8b0d166003a4b8a81fc99437f557719fc36e5c4dd510c9f161f36a47e73",
|
|
||||||
"blk.22.ffn_down.weight": "d04c164beabab30e1837b843e18852260efccfbb9d96a34ddd816e6fb3ba23c5",
|
|
||||||
"blk.22.ffn_gate.weight": "19c889db6b19179f0a62d5981a1506592c65de83760d67afbe00d202202750a8",
|
|
||||||
"blk.22.ffn_norm.weight": "4885eff2d851b32dbd306bd632c725857e6d164f0fa8b3d5857e572e6ef98ee9",
|
|
||||||
"blk.22.ffn_up.weight": "365594d8db8e95cf87cc33ac23947942dc326110175cc8ec5a07b5c7059089a7",
|
|
||||||
"blk.23.attn_k.weight": "badfea1569da0fc6ab817c5727ca3a69b07d9cfd622fb8be5e66678d5b3f7ae2",
|
|
||||||
"blk.23.attn_norm.weight": "8968f78a379ac3ca5458b4ed4251e8d9112aca6d6dd1ef6440b4bb0b380375a4",
|
|
||||||
"blk.23.attn_output.weight": "93e43393c03956287b1fe31e9735ff1cfe84f4ae56b83dbaebe96275e4e11831",
|
|
||||||
"blk.23.attn_q.weight": "aaff73c725a8700ae66bf26ac8869dfe96738eff23a8ff340de2ab53400a5795",
|
|
||||||
"blk.23.attn_v.weight": "3a86a8dcf14a746ed1411f5a7e634064bc4dfd6511c24cfeccfb2c9ebb6b4101",
|
|
||||||
"blk.23.ffn_down.weight": "d4da6f37bd7ef69bb203f7b0dd59f50bce37432c70627e6cf274ab81548af5cf",
|
|
||||||
"blk.23.ffn_gate.weight": "5b6072936c4a693923bb4e3d1473fd45545cb02fc07799aca458ef0449a04061",
|
|
||||||
"blk.23.ffn_norm.weight": "cd76e37025f84773180298ddb15e0d4ba9cfc7d832e19c791049daa47c6d9c10",
|
|
||||||
"blk.23.ffn_up.weight": "cde43b99b83124a13b2e4753d12674b3a61dfb34c04703007ced3e8e2aee1801",
|
|
||||||
"blk.24.attn_k.weight": "457379edc4cce4cbbe107385079019bc922264fdfc7bd1d1ae84343a81460c66",
|
|
||||||
"blk.24.attn_norm.weight": "0ce0dfab2edeede5da419fa7833db78e36222cf25c358d08f3ec664310f031fb",
|
|
||||||
"blk.24.attn_output.weight": "0cf91c2fd40c204d2fd4b9c85b69281e5ad4ea8442972fcd44b5fc8e835ffdf8",
|
|
||||||
"blk.24.attn_q.weight": "87ede30c09eafec6a4e6285674c1bc4637140b168b2da4ed34f36fdb6e176cc9",
|
|
||||||
"blk.24.attn_v.weight": "4c0b078b2798ca35d6d2c2258fe499820d2bc88700654ba4016e4b028f563590",
|
|
||||||
"blk.24.ffn_down.weight": "cdb8540c32b1ab988f984484928d39f6841f2131c1cebe90ad9456737fccbcaf",
|
|
||||||
"blk.24.ffn_gate.weight": "da2e0e913648b5526bd2bbb344038dd067639343aed3b413662b064b0db7556e",
|
|
||||||
"blk.24.ffn_norm.weight": "8940bd781c610d75eb2be63cfc8d869a3af05e53c963dc7fd4c6f653df5a80ab",
|
|
||||||
"blk.24.ffn_up.weight": "90cbac2a58801abe11ed6c24560aa4acb949f79429f2aa8ff129ac05868bb87d",
|
|
||||||
"blk.25.attn_k.weight": "90607131e36998e990ce718ad05cbecd1bcaed010931401ce6baa3b0d93ebce6",
|
|
||||||
"blk.25.attn_norm.weight": "fbf679c85656c04a6cf8fedd5412c1ace22960e6c2d47f2d43997827811fbb97",
|
|
||||||
"blk.25.attn_output.weight": "08412724ee7a2086514406e6f68fb9f622e10bac25b0c373b294709f4b09bd2b",
|
|
||||||
"blk.25.attn_q.weight": "9c1238e98a2747654a0d4371d3e7ea8b979867f609dc42482544f25591e85c7f",
|
|
||||||
"blk.25.attn_v.weight": "a57796a535c6cb09581cbafd6a91dc14adc8cca2a2465a7ffd0aec546cd84074",
|
|
||||||
"blk.25.ffn_down.weight": "f7e34e8a6391b480da08b52640613ccadce268373934b409759743a1735b74d6",
|
|
||||||
"blk.25.ffn_gate.weight": "b8d0b2f4612678b5ce42bd4a683f8024514b75fb5ebf6b22c600811e95582ee4",
|
|
||||||
"blk.25.ffn_norm.weight": "cde1fdba2369d315f3c6940a997c471ec891924e642505db580d732763bd7b75",
|
|
||||||
"blk.25.ffn_up.weight": "72e700c32ac8b9c47559c2222e45888a480b527ea512075423c5dc01678e2bb3",
|
|
||||||
"blk.26.attn_k.weight": "6ac83b3414ae75bf3a9055c32e49d2c40fe611ab21f8444f03d2f465d18122c9",
|
|
||||||
"blk.26.attn_norm.weight": "55f9d6dc9d75973dc75136ecb9d991b4398097ac133070873fb96ec76a6f60bc",
|
|
||||||
"blk.26.attn_output.weight": "ebc4fcbd15b33263e50ed2ad45740867cce15bc90e1216623babcb1820734509",
|
|
||||||
"blk.26.attn_q.weight": "080f057521073e412936fe3fee64fd574c8128fa4a148b879d3e598fe4954581",
|
|
||||||
"blk.26.attn_v.weight": "0fa2830d6746487ac91b243716e4302361f891e4e008eddd14abec47c7809d5e",
|
|
||||||
"blk.26.ffn_down.weight": "cb2ab8af1653adc57111ada49d2825c6995e338c8208455b92de10e580f60f31",
|
|
||||||
"blk.26.ffn_gate.weight": "231ce30966086bce2dc0e0afd34a22a1958cfda7a57c41b3b8e9444c5dfde8a6",
|
|
||||||
"blk.26.ffn_norm.weight": "35d959d25d17b00617590f5d5831bf705c385c51e46297a14375a700effca6af",
|
|
||||||
"blk.26.ffn_up.weight": "367680c8d332538b467d1ef87cfeb36cc5c6af564c5023c5fb50e728e3438287",
|
|
||||||
"blk.27.attn_k.weight": "0bfcb351c6d17aeac5b55a915074fbdf00f11c4bda98babb196ac8804805746b",
|
|
||||||
"blk.27.attn_norm.weight": "5d598a88c2e75ba59dd7ba4fee940bdec92d72038f1286536d2dfb71d008a09c",
|
|
||||||
"blk.27.attn_output.weight": "23a9da7347336479f6a10ded14cb3f46e06b5bd56dc4b0fbc526c688552ec840",
|
|
||||||
"blk.27.attn_q.weight": "b83319dba9055f069208e9c9d66da08bc6874f23e575288fcd81697d1777aa54",
|
|
||||||
"blk.27.attn_v.weight": "36ed34ccb2f36fdf16b2c2dd225a98ea6b7b0e376e7791191136ccd7bd7a4add",
|
|
||||||
"blk.27.ffn_down.weight": "5488e1d3a58c71b5e9ddda430540b4776b268cfe1457cbc1c2622dedd9e4526e",
|
|
||||||
"blk.27.ffn_gate.weight": "4ff48011ee0bac39af704849d9132a2410392c87a509c684f2062f6b76b498fb",
|
|
||||||
"blk.27.ffn_norm.weight": "32afe99675983da3de2961d1b5ca41c98970a356823597fe29e91f6e86abf0e8",
|
|
||||||
"blk.27.ffn_up.weight": "1eae3088a75629571fdbf6a20f141bc2bb2ed3f5ba2b9fd1d949f80695e442a1",
|
|
||||||
"blk.28.attn_k.weight": "c4e80af714962d6f9040d2c09f316f4a1cbc3a2e994e19902d7c653cf3c73dba",
|
|
||||||
"blk.28.attn_norm.weight": "c1ecf85dedc1c83d5d402bb7c94fb8b9c11f1a3e5f64e7680f80912d4a560794",
|
|
||||||
"blk.28.attn_output.weight": "72ba47c061b21f5ebc5213a455eaf6fc49c8f8e04ff9ce37e6ed4921b629161d",
|
|
||||||
"blk.28.attn_q.weight": "c4abc47234307f44b8ca789aa6668e298158fa4b459b2c1e84bd581806591cc1",
|
|
||||||
"blk.28.attn_v.weight": "aeba950799d4950e491ad0fcbe30334e39b8975177990a2cb339031c45ac153c",
|
|
||||||
"blk.28.ffn_down.weight": "4e84ce382a37b994fb8608df451a60040559e3f4f3241c3b3cb8989a3ed50d83",
|
|
||||||
"blk.28.ffn_gate.weight": "04df157acdc8e8534ad60acc2d2a4dd3a7a6610f6382535ec728994fa6f83f83",
|
|
||||||
"blk.28.ffn_norm.weight": "4d0386dae2bd1c1a9d0f9730718333e3a486c3bc6a5c5d482193c75d39832c80",
|
|
||||||
"blk.28.ffn_up.weight": "fec60bb0a3daf182a14bd8311fe6dd1e3fd020c5fc273e2549cdb1a2d6b79b05",
|
|
||||||
"blk.29.attn_k.weight": "b0532a263aa5a4e2a7a80adc83fc5dec974493bd18da7f953e7ebfc3f3a19aae",
|
|
||||||
"blk.29.attn_norm.weight": "593fc3b4000c35b7a59dace09ca1756c08be0105b2edd354a0e1c16c82898859",
|
|
||||||
"blk.29.attn_output.weight": "315b896f9f0cbacd0ca8937384c3a3a227efa908cb8c3a9125ec00c480e32b9b",
|
|
||||||
"blk.29.attn_q.weight": "d482d45386d4ad3394f08e9dff233ee3a70d0427d65c0b8fa05905da7e25ca53",
|
|
||||||
"blk.29.attn_v.weight": "cd3b5a6e2852da796902930a6a84bc87fc6a7c7bf51f8fc23758d12a39013b36",
|
|
||||||
"blk.29.ffn_down.weight": "5b3dba6f9753bd1b1ebcba65ef5373dd62c38e755c44b7231b95d93d45761f89",
|
|
||||||
"blk.29.ffn_gate.weight": "8610d9d2db15c256243ffcca3ffd31786d0ada0af0e7c7aa3fd20524370ab036",
|
|
||||||
"blk.29.ffn_norm.weight": "1a2ef2d38b7ac3e51190b9ccb8b6552ba83ab290e523356a7f851ddb35dedca2",
|
|
||||||
"blk.29.ffn_up.weight": "a5fdd15811bde16dc27677cf1a4c97daab4c28cb12a9530f1a0e573134fdb69c",
|
|
||||||
"blk.30.attn_k.weight": "1efeb0b5f4b45a85cdf47300f892ac77ac1f38000ec3653565d1303d1fb8c743",
|
|
||||||
"blk.30.attn_norm.weight": "c73934c182c7fe80838ec1d0b92f50a583f75f7a3d78d822f009b58ad2c80e65",
|
|
||||||
"blk.30.attn_output.weight": "3a0fd89de2d274614750345d827a9c886a4f97b343a13cdf680390505df596a3",
|
|
||||||
"blk.30.attn_q.weight": "711e113362bdb067db843c66236704eb1cd3fc5f40e3767143e96d510686ef4e",
|
|
||||||
"blk.30.attn_v.weight": "82b12a9a74fd3d91b73cc2e841e2b3f0a5197ccd2998afa17020995f880d2267",
|
|
||||||
"blk.30.ffn_down.weight": "af9f4b1287c0d824ae22d6e335d19e04a70135b835be7caa2435f1d85e931993",
|
|
||||||
"blk.30.ffn_gate.weight": "e2ab3e6f15f5c50fca66c084cb6a57a2b6b82406d65150e82ea0437b93dd9a46",
|
|
||||||
"blk.30.ffn_norm.weight": "c1b9c325c83f00e177386a4d7e769945f2995e60950c4a576c0a2c4ab9703d04",
|
|
||||||
"blk.30.ffn_up.weight": "9b94a21efd419715d82071b490d3b635cf1e8da080620dcc39e5bde976d7e9a6",
|
|
||||||
"blk.31.attn_k.weight": "0db0d82e3ddcc2c06209f5f013e1d72a84a996c40bf00186be485b909cc268e8",
|
|
||||||
"blk.31.attn_norm.weight": "2b8b7239471f57140c5cdfe06bd224a4f6326282f99736e44fba4c7b120ac101",
|
|
||||||
"blk.31.attn_output.weight": "a310b048840cc3ff2be4b84796340e8e2cdf05ec89d14bd3655c109b2bfa9fcd",
|
|
||||||
"blk.31.attn_q.weight": "f45e0cd95645175ea82813455356d171838539bc3f7676d877c698f2af0a0eda",
|
|
||||||
"blk.31.attn_v.weight": "8bde008e809112aa7e7c23e9c3099087bcc557313b01306c87efa0a4a30805ba",
|
|
||||||
"blk.31.ffn_down.weight": "8266fec7e203fbfad7033120861e44984581ff8b6851d01dfb7b81c5d8fa90ec",
|
|
||||||
"blk.31.ffn_gate.weight": "b73bc0aa5baf006d9ef6403104891b8133671b0992398fe038380b67e0d7e2cf",
|
|
||||||
"blk.31.ffn_norm.weight": "9c62cc27a7b6017c1df8ad49bff249a8245e8895c6754f402cd44623fda83268",
|
|
||||||
"blk.31.ffn_up.weight": "5b970a4694ea3171a0167f6e1636d9f00268bc1c9640430ffc35218494884adb",
|
|
||||||
"output.weight": "74fa0ef08c57a30e633e7117b1e9c805f833e2e5e21434bc79ddf9c92c6d7330",
|
|
||||||
"output_norm.weight": "59b8a59fd3fbf39353506116e43e5e76edd0cbf2a2873d869da4cf27a04997c3"
|
|
||||||
}
|
|
||||||
348
convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
vendored
348
convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
vendored
@@ -1,348 +0,0 @@
|
|||||||
{
|
|
||||||
"general.architecture": "llama",
|
|
||||||
"general.file_type": "1",
|
|
||||||
"general.quantization_version": "2",
|
|
||||||
"llama.block_count": "32",
|
|
||||||
"llama.context_length": "32768",
|
|
||||||
"llama.embedding_length": "4096",
|
|
||||||
"llama.feed_forward_length": "14336",
|
|
||||||
"llama.rope.dimension_count": "128",
|
|
||||||
"llama.rope.freq_base": "1e+06",
|
|
||||||
"llama.attention.head_count": "32",
|
|
||||||
"llama.attention.head_count_kv": "8",
|
|
||||||
"llama.attention.layer_norm_rms_epsilon": "1e-05",
|
|
||||||
"llama.expert_count": "8",
|
|
||||||
"llama.expert_used_count": "2",
|
|
||||||
"tokenizer.ggml.model": "llama",
|
|
||||||
"tokenizer.ggml.add_bos_token": "true",
|
|
||||||
"tokenizer.ggml.add_eos_token": "false",
|
|
||||||
"tokenizer.ggml.bos_token_id": "1",
|
|
||||||
"tokenizer.ggml.eos_token_id": "2",
|
|
||||||
"tokenizer.ggml.unknown_token_id": "0",
|
|
||||||
"tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
|
|
||||||
"tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
|
|
||||||
"tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
|
|
||||||
"token_embd.weight": "1d1d1d39a867d5a4bfb32792a47247d2638c10c95a6259391d02843583505cc4",
|
|
||||||
"blk.0.ffn_gate_exps.weight": "2e5cd43ac3f26c44f071926ff6c3f239ecc52a34bc9a5b5906d3d4c1bf2fbbfa",
|
|
||||||
"blk.0.ffn_down_exps.weight": "a4dfc7e7c96e7402eb70279601675b956bb7331da8101e63fe5c0a611b6972e5",
|
|
||||||
"blk.0.ffn_up_exps.weight": "2d5d87b378b2319c344ed2c642598b6f7cb6beeb582a8ea51abc9ae690d473c3",
|
|
||||||
"blk.0.ffn_gate_inp.weight": "a46aaf5aba7401ce6e41f158242b4879d34901661f3ede85496cbd0ce79d6314",
|
|
||||||
"blk.0.attn_norm.weight": "3fe37d913bdd2b65076bcdd6efe64a37b0b03cacbb1b80b9f7089068aa35f38c",
|
|
||||||
"blk.0.ffn_norm.weight": "5e14308a3c894734eb204c8f558bdc817e94bbd5b4e9cb4094e91ba388c8f7f2",
|
|
||||||
"blk.0.attn_k.weight": "73d943dcac0911e87bd771f4aa1c901e1bfe1aed293af06e1a67812159859f67",
|
|
||||||
"blk.0.attn_output.weight": "4c5f754c855e262e8d4c94c6fbbb57af06399dc0e170d7d99a1a17fc9aab9227",
|
|
||||||
"blk.0.attn_q.weight": "d6fd7403c873d49c05f6f03208f30d99ad34cb3b71c9990c47334d502a8e4c7b",
|
|
||||||
"blk.0.attn_v.weight": "cf17cf64b2d683bd9de6cebaf60e5c264df6fdc38fe719dde9d54c80334f6366",
|
|
||||||
"blk.1.ffn_gate_inp.weight": "0d524de81cd915816b4e714bf595ad6946a9130b3de731cd89428b2781230809",
|
|
||||||
"blk.1.attn_k.weight": "2ea47f412992b374c70674730fe84700e0c8cce177086ce9b6635e42408964bd",
|
|
||||||
"blk.1.attn_output.weight": "b4b2520794d54113e86c8ff678eacfc62e35be4395a594a6c8c22b4383ebcc0c",
|
|
||||||
"blk.1.attn_q.weight": "5db930c98c4f91f6eab57eb974c72210b158e366d23d6d2890b2759c053bee33",
|
|
||||||
"blk.1.attn_v.weight": "079bdde09668394bf7af9f8bc175017b4f48f0ab64e6dd855a4d7561d1693c0f",
|
|
||||||
"blk.1.ffn_gate_exps.weight": "146a62de19f9ab093deb101f9640534ffc3dc40d69f508be12fc0475d01b0c7a",
|
|
||||||
"blk.1.ffn_down_exps.weight": "949da94a3c0f375160672a979e85f7def284264b10d48d038238aad5f5ece793",
|
|
||||||
"blk.1.ffn_up_exps.weight": "7016a3f467d9e3f2f4b4019579ed86b757469cd367f2b225483305376b4bb3c1",
|
|
||||||
"blk.1.attn_norm.weight": "1614d1e6ed537737275eb888666c7bac533f4eefbe73dec92b591045ca9e1afd",
|
|
||||||
"blk.1.ffn_norm.weight": "405a455fa7d1ec36894652ceb554bbcb09a07fd6405f42741e66dc4a4665c19c",
|
|
||||||
"blk.2.ffn_gate_exps.weight": "90d5003fc7421f44220c0842d43128955e91488f6f785fe570b62d81b719e964",
|
|
||||||
"blk.2.ffn_down_exps.weight": "ecdc2b5a8b504ef0a7833acff47d69b0c1fa9c22126de1bb120ff5e48c3d6e2c",
|
|
||||||
"blk.2.ffn_up_exps.weight": "2cbd9485a32460d315eb50a2f3b00863fd77245bfe885b7565efac1cdb1f191e",
|
|
||||||
"blk.2.ffn_gate_inp.weight": "0d0a17a1a2c7a61f2cca49ecbb479154dc93a870873257bc4f225e7607f2e2c2",
|
|
||||||
"blk.2.attn_norm.weight": "b2e4c5a977f87a6f880896bd73596234c9b83622fa0d7add5892501e3155913c",
|
|
||||||
"blk.2.ffn_norm.weight": "0ab875b4280afa922376cfc7b9aa3f7071c9432ea1254091ce7de3749df0e8e6",
|
|
||||||
"blk.2.attn_k.weight": "bb884af51fb51550acfef54ccf1b58ce8284e587806e6a2f88c8265e1ad05a5e",
|
|
||||||
"blk.2.attn_output.weight": "0f03099ba1ef342ea61af9cd71d028123bbd8b1dd7d7fd9b509aef77815427d9",
|
|
||||||
"blk.2.attn_q.weight": "8fad0d29eb4c9d24e564774ee3316b9eb7a4c4985e4567111d2c836c830f6cf3",
|
|
||||||
"blk.2.attn_v.weight": "fe04c847ff677632401a94e7b6b6fdca60391ab21cb23bd791533115de6303a1",
|
|
||||||
"blk.3.ffn_gate_inp.weight": "29e3aaa724590c070e614af8288939603d2641b0ef11e8c0f476bebb2776673c",
|
|
||||||
"blk.3.attn_k.weight": "231cc5631def10f7f292d8862d6125ff555164cd70480ac76362149fad204497",
|
|
||||||
"blk.3.attn_output.weight": "86467a605c62852e05fda1a7ef43150df2cf715fe59785dbcba09f1c27cfa086",
|
|
||||||
"blk.3.attn_q.weight": "901822402453922225c2d6ac79616691d48217635d5ff7338daa971d5ddee210",
|
|
||||||
"blk.3.attn_v.weight": "27030784f44375720df2f090933645a31a022d3fb3b14573e5ca0b78f44070c1",
|
|
||||||
"blk.3.ffn_gate_exps.weight": "231ba59cc0b988d125d77bf627aa3f04636684870af88f081f3944b48a160d86",
|
|
||||||
"blk.3.ffn_down_exps.weight": "530c3ab44ae4d66e8afa4d10c153ba5dfcdfb7321989a988e62e9d12e7234625",
|
|
||||||
"blk.3.ffn_up_exps.weight": "b85c2d4d9d11332e702b3c0a6610d4f525f9a93e5d12f5c7c55c592c40755e75",
|
|
||||||
"blk.3.attn_norm.weight": "05dbb6d88cfa6b199f9d705ccbda97c0ef13f9ec875c595398a1a42d009a4555",
|
|
||||||
"blk.3.ffn_norm.weight": "6880b1c27d46969ce36fac049c05dc8b89e4bb47dc89df357e32df7e18fc512e",
|
|
||||||
"blk.4.ffn_gate_exps.weight": "a883b4f225b760c5a2f6605dc5e2167ab85bb398c70bf64ceb539fcbd6128dcd",
|
|
||||||
"blk.4.ffn_down_exps.weight": "d291bb656aae77947d4b525e2819bf4112afece53ff31de9dab999af1f65f9c4",
|
|
||||||
"blk.4.ffn_up_exps.weight": "38592afb8ba3dcfb26970f906174f7d3fa62da44fa4be4fc6912a19030ea9164",
|
|
||||||
"blk.4.ffn_gate_inp.weight": "1596cb74e8fd6c3080b937b06468bb397b0dbb661e6d180a6bcbdc43e8bfd0c6",
|
|
||||||
"blk.4.attn_norm.weight": "f90c83c5ff4366281d283384efc941620542b9cfdea160d678dc54a75e33f758",
|
|
||||||
"blk.4.ffn_norm.weight": "d28d8c49d1746b7cc085562d1074905fd14023844de823dc4fb22202bb280790",
|
|
||||||
"blk.4.attn_k.weight": "792bbf412cc357140fdaba543e547a9b2f7582919e307bbd9a80c7d6d8f5f1f9",
|
|
||||||
"blk.4.attn_output.weight": "d98e4a062d2631d9c315f1990d5f6ca9a88e7e0e46387f611ccb0353f876aa12",
|
|
||||||
"blk.4.attn_q.weight": "1a11a55a91d9f748a72176ff6b1c174844df406e00d1b66b9aa64dc6ee4bcd1d",
|
|
||||||
"blk.4.attn_v.weight": "04cb3c02b12a6313c7ac7044513441083d534fb4c5a3f63bbaa58f7edbd2fadb",
|
|
||||||
"blk.5.ffn_gate_inp.weight": "cbd5cdf015d33a2da6703eb74c22fcb97581fb9175435173b6dc4f9e8364320d",
|
|
||||||
"blk.5.attn_k.weight": "4fdf3405e4d657403f5647b51233521310ee984b4b81bbcd901cb3e6ab76b7ff",
|
|
||||||
"blk.5.attn_output.weight": "4a25662c46979a29600ed77e1907cf81fb16ef30e724c155444e54ccb76af481",
|
|
||||||
"blk.5.attn_q.weight": "e2acb30e30b97300039bb20ad0878f05159d5657fa811748a51d5b6fb35d631e",
|
|
||||||
"blk.5.attn_v.weight": "306504b6a26aa123c63dbbed3f4ced0ed2ee8fb6a30bf0093539b817539f5ece",
|
|
||||||
"blk.5.ffn_gate_exps.weight": "7e34df9b9944dbeea5e8565786d3aa6937314a4b87acd4d0874687877c5a39fd",
|
|
||||||
"blk.5.ffn_down_exps.weight": "c4b7a57a42b5ac0a8ae27dcd5cb2646d7a7cc7123126d44a56ab128e85f60b13",
|
|
||||||
"blk.5.ffn_up_exps.weight": "09d47593b6dd6c664a9155bff02fc2eb7ac4a70219a88162d05c802a01d3c6ba",
|
|
||||||
"blk.5.attn_norm.weight": "58804a036d6ac4c1fe357b8b6a97a5c37cae1c2f06ee0086c041d449c1c6ef6a",
|
|
||||||
"blk.5.ffn_norm.weight": "d872dee6789f0826211aa46ca9d0869e3e96bcace9e77d6559a7b6f3e524f3ca",
|
|
||||||
"blk.6.ffn_gate_inp.weight": "fb1eae732e974d6c1d020a5b4ef98c5f33016f984701bcea656f999a99daad66",
|
|
||||||
"blk.6.attn_k.weight": "55e9c59c5051ab5519b3a7962e1b5fa96a3c0251cb6200dc2f177885ad2de470",
|
|
||||||
"blk.6.attn_output.weight": "f3c834a8d0027370350e2b6294d95434d31432e57be6313b013c15a56303d61c",
|
|
||||||
"blk.6.attn_q.weight": "efaefe5f11c2140dc7cb532b0832c2a0b363a165cbda21f00fadae77efca377b",
|
|
||||||
"blk.6.attn_v.weight": "900bd734d75616d846a90a121c97e081c956a3d1ab012f66dd0bc62c43e1ec3c",
|
|
||||||
"blk.6.ffn_gate_exps.weight": "312a99661b1468fcaed2474621116f1681432755e973f3ee79d01912974fd424",
|
|
||||||
"blk.6.ffn_down_exps.weight": "ac9cd7db67a2ef0d2b5def86873673d05e48d49d147dd944469dbb8e2d4c46f6",
|
|
||||||
"blk.6.ffn_up_exps.weight": "57613e7e09579400a1a09fee4445acfbfe83f2f327fdf317877787d96ada6b84",
|
|
||||||
"blk.6.attn_norm.weight": "0e8801e09885c633bc01a9a5b85d4e878d30158a4eb41a937dc5b760ebd044cb",
|
|
||||||
"blk.6.ffn_norm.weight": "b8c58062ac93072f878446b0e7f958c737aa47fb769fc3a8f593133d12db2dd1",
|
|
||||||
"blk.7.ffn_gate_exps.weight": "1ef611732ff13edfa8d30981ed9dac00c15ceba9fc012ed0b199e9280a849948",
|
|
||||||
"blk.7.ffn_down_exps.weight": "856c6811945c7b0fa461ca17811cfa43436b4cdf5326bad23cbc30883486d7cc",
|
|
||||||
"blk.7.ffn_up_exps.weight": "6725e3e33994302ee13fa5ec163631ce2dcaa08aadde8fc166c2265d4561c5c5",
|
|
||||||
"blk.7.ffn_gate_inp.weight": "36b49d7f80c1003dc392b2c1b9960cd49889dd69e77b26b9e4b13d01f3d0a32a",
|
|
||||||
"blk.7.attn_norm.weight": "7a0ec49acc5e20ee71c6f80ca02f4f1e564c485e0ae0621309e7c2eb0c616cf0",
|
|
||||||
"blk.7.ffn_norm.weight": "eeae035c39ab6e64bc06a4baa1bf6e50d4c8b8797cb0ad8abd48be86974802c0",
|
|
||||||
"blk.7.attn_k.weight": "e8f78c1def01a7a38d2d9bf7becb17755e28fefe4927856f7890fbee52840187",
|
|
||||||
"blk.7.attn_output.weight": "5367f05ac3bb49ef8745ba5902e1bdd4442415a3ebff2c7e1a3918d7be6fe948",
|
|
||||||
"blk.7.attn_q.weight": "37c95fc5acc55a4f6e5f02cab9be60e4fe54c08b65f98f4455741b4aa542ff4e",
|
|
||||||
"blk.7.attn_v.weight": "c89f1343486ba55814233511e94090f7365662a8a4214aa4c278cdadc79196c2",
|
|
||||||
"blk.8.ffn_gate_inp.weight": "4e239afe8c7afb8de3a005757c887cf14b1622ca2d224227591cb0e5301f4c17",
|
|
||||||
"blk.8.attn_k.weight": "2ad0229f30fdcc1e85ce64e00d8f75902238294844a81d5af43e14ba75c02983",
|
|
||||||
"blk.8.attn_output.weight": "2e44a4722acb3b521b81d0b910f8ca2f6c286d874a92ddd02150566454061699",
|
|
||||||
"blk.8.attn_q.weight": "1cd2b09cb2f43e08de776b5f7eac197a5a6d4ffdfd52b21baa36319450147bd0",
|
|
||||||
"blk.8.attn_v.weight": "5a22c57ebfd33ac500cbcfd321d5b5b1783f8728801db6f3f8bed51c7183e4db",
|
|
||||||
"blk.8.ffn_gate_exps.weight": "91063fe56cb4f3ff3b41052bb5046fcf8ef61516a603ee90aab893a9d68c15a7",
|
|
||||||
"blk.8.ffn_down_exps.weight": "d4c3abc8f1d1b462f67f70bd8f404b3fcf45dceeaa8527fa120527254c383c90",
|
|
||||||
"blk.8.ffn_up_exps.weight": "76a1a1f08ec577716a2e7027b45293e9205751126424f1bebe1de89c78f087d5",
|
|
||||||
"blk.8.attn_norm.weight": "f980d774da39eb76c52358afac3e38cb4c81cb323deaabbe5c41822e3f17a98e",
|
|
||||||
"blk.8.ffn_norm.weight": "1c937658cf90f1a85db9a5f26e077730fdd4b694607dbeeb825c5fb2bc407e0b",
|
|
||||||
"blk.9.ffn_gate_exps.weight": "a2532471ecb7896d5c78e5a34e10cfaf4125265e1595166c8d0d0dfbe2a3187f",
|
|
||||||
"blk.9.ffn_down_exps.weight": "b47921a28412d48fee450b8b9d97cee42344a2e69f06d407fd9523d7adf13333",
|
|
||||||
"blk.9.ffn_up_exps.weight": "7c461bd1b2a73b439cff6a10d94afa01e8b06f7e6f09d9a6f28e3876aef48bce",
|
|
||||||
"blk.9.ffn_gate_inp.weight": "1648dfb08b5c06d7953a5a97ecb764995fae9487fb729a1c867023b2538149d0",
|
|
||||||
"blk.9.attn_norm.weight": "8635db0f299882a63b7cfcd1d4259c9e53fab22c31d3d054de36b1001380b31b",
|
|
||||||
"blk.9.ffn_norm.weight": "f9309aa323062d174c463613afef9b0a33501b510bfaa58a8e0e866d12ffef3c",
|
|
||||||
"blk.9.attn_k.weight": "dfe62030441e947a588512d18d9c6e4ed72c2f71c227d622c095e4263b23dadf",
|
|
||||||
"blk.9.attn_output.weight": "1977beb75c6349c50ba7dd3865d7c0a9c5c5ddc854413147b0eec98ac4fda351",
|
|
||||||
"blk.9.attn_q.weight": "eb132596719605cd6bd1782487f121994629e115190edd69240b12af66e734f5",
|
|
||||||
"blk.9.attn_v.weight": "9e708f15d332d7c5187b0693b1a977eb30a2fa10bf7df48ed9d7537c0aa6ed99",
|
|
||||||
"blk.10.ffn_gate_inp.weight": "97503a5d166c1925f9b65c0eed980753d411714d66896f3d0fad5286c7aba702",
|
|
||||||
"blk.10.attn_k.weight": "1ebdd222336bd25b48df1b138cdbe09021c4a5562ea7cb78cadd1255d2be3a39",
|
|
||||||
"blk.10.attn_output.weight": "5e98faa38e9d514b9057e1c8342c509cbe1083defd518e506f6bad89117d1f5a",
|
|
||||||
"blk.10.attn_q.weight": "3323a26c87d936d1dd87c577d0b763459fced726679612c874b3de5fc6d969c5",
|
|
||||||
"blk.10.attn_v.weight": "d5fa73cb56aca388e205f44455e4b4f676fdc12ed7fac4542fbb3b41ecea59ad",
|
|
||||||
"blk.10.ffn_gate_exps.weight": "225021b53782800906cd13b70be3a4161e8b300b97f984a959ccad6a6e8adcbd",
|
|
||||||
"blk.10.ffn_down_exps.weight": "f08eb91526bd22f5fd0402fe925d6141cdbb308a1ced0330858d0c85c71f5ef3",
|
|
||||||
"blk.10.ffn_up_exps.weight": "a9f688350c3b53eaada5103b5848bd9a3d7d6b327a70fa16c24bf28ece933eac",
|
|
||||||
"blk.10.attn_norm.weight": "5ba426c9dfc79805015ccd76cd1068b0ad3bb7a8453e14bb1d35486f122d8f95",
|
|
||||||
"blk.10.ffn_norm.weight": "98891d6acbc3986b2581b7a3af9f5946a392d9188972c6a8b15d4e745a4f2482",
|
|
||||||
"blk.11.ffn_gate_inp.weight": "b2365a60566e7dace892e1cb0e62eb73ce387352601723e847052b34874feaa6",
|
|
||||||
"blk.11.attn_k.weight": "0efbc1d1430505543ff71532a4fcda821aeac616ef6c1dca40e00d4f2ff70bea",
|
|
||||||
"blk.11.attn_output.weight": "3d5bd4d9a41236f30d4293edb9ae27beaa113ffb31b4fbfadff3a4c370dfd3e6",
|
|
||||||
"blk.11.attn_q.weight": "aa11e9db14dd9c77951511443077c2a1a78070753d7bd3d9811038473f69e325",
|
|
||||||
"blk.11.attn_v.weight": "5adc567f377aa11d1763d35f50e53fb2896a8b03b623ac36acc45efa2486d512",
|
|
||||||
"blk.11.ffn_gate_exps.weight": "71d07d982aabfab9eed3c733d49c20f023bf475368fc71db5084d91beadc4b47",
|
|
||||||
"blk.11.ffn_down_exps.weight": "9a06e61461e48b3925a9f7d9cca634d048c8b62163d7bc5c43e35899f959319e",
|
|
||||||
"blk.11.ffn_up_exps.weight": "bc05494d0dcec61021b3ac0c5bc1bf502736cadf48224e213bc139d562699a89",
|
|
||||||
"blk.11.attn_norm.weight": "a5758a10bdd0404ae1470e8e9db903985d4d07f60553c5001a5e7b660d4f7ada",
|
|
||||||
"blk.11.ffn_norm.weight": "814ae037563aad3771787316bec4806c95bf6f5991dd6474b4b1e5cc13dc18ee",
|
|
||||||
"blk.12.ffn_gate_exps.weight": "3a68b831ba1606fb9ef6dffed4732032447ecef23ea563ff4e79317586c7eb49",
|
|
||||||
"blk.12.ffn_down_exps.weight": "268b25e13f4b7beab08686e83705a41b21d15251809ee4784526f78a580da829",
|
|
||||||
"blk.12.ffn_up_exps.weight": "9105751a5b5b42ca2614d0456f24f779d2e2ac8cdff0f96842aa7ae2b70f341e",
|
|
||||||
"blk.12.ffn_gate_inp.weight": "d0de1558cc1d458c5c504f63ddc59785c323df7330474bb0644c346104b40a3a",
|
|
||||||
"blk.12.attn_norm.weight": "859a4c8113678e2e202d10299850e0cfb52eb11ea50bcbf4fe3ff39bdd394154",
|
|
||||||
"blk.12.ffn_norm.weight": "7fbf4c459c1760218877e9ee3f5ad49e960956a4369bcfe96c143f04ff9ddf97",
|
|
||||||
"blk.12.attn_k.weight": "0a7e254fdf3730a57372b6ff421a613eabaea68cdefd64800857941411318374",
|
|
||||||
"blk.12.attn_output.weight": "ceb763fc15d88af149d8fb78e82db2b7dab3aeae584af8cf7611a12356a397e5",
|
|
||||||
"blk.12.attn_q.weight": "a43402d23c46cb2d3cb3c2a98c81b19d10026b7e6742370fed6b2880b6e049b5",
|
|
||||||
"blk.12.attn_v.weight": "3bc24f2c0480ce91ef72993ee8f1cf962f7359e12183424583ffa1246bf3db52",
|
|
||||||
"blk.13.ffn_gate_inp.weight": "a6d68c82bfe66d8bab68f980f5f18268a9e2c0cd6b8832ed39010e0de198ae05",
|
|
||||||
"blk.13.attn_k.weight": "0166c39546b37dc2e01b2b396ba43e183f797dd04eaa51a6d103d8b58ee4bace",
|
|
||||||
"blk.13.attn_output.weight": "2ce5eb198deab9557475a58b69b11e9874b547e05c23f223c6e42fa35ddca069",
|
|
||||||
"blk.13.attn_q.weight": "745c1bbdf434284a7fae98f45e821c076dd9c2a2467dba6a9d8cf0041e419dbc",
|
|
||||||
"blk.13.attn_v.weight": "9ece68d5ac64d1421ea7aa32e1cff9cc1fecf5175f4c4da858dd31d8633e3337",
|
|
||||||
"blk.13.ffn_gate_exps.weight": "ccfdcb4670b131689de12d396a010b5ea737795cf5c15a14a304d720b3c7c899",
|
|
||||||
"blk.13.ffn_down_exps.weight": "8b8fb328664764f1aaa5cbdec336d5654e981e965a02ef622bde5f07ea1c164d",
|
|
||||||
"blk.13.ffn_up_exps.weight": "d2ace0236c2fb3365fdc85499d676a7f65813c48e5085348b1df1799922766ec",
|
|
||||||
"blk.13.attn_norm.weight": "1ed29d7d89ce52d7cb4d57e895ff7115430466e917136c049c385c030ed44e9c",
|
|
||||||
"blk.13.ffn_norm.weight": "a194fc542597a4dcfdfaec5e3cba2a2b2b21b21edfc87c39c0d7f7651355bc4d",
|
|
||||||
"blk.14.ffn_gate_exps.weight": "a625e3574e5e740e7f8e2f9c40390f2f382c720aab5b10534e298002dd8d1fb9",
|
|
||||||
"blk.14.ffn_down_exps.weight": "bc366f015b83c865946afd74c8a884943e0ea2c671314a0b7bb72f21a44d2f78",
|
|
||||||
"blk.14.ffn_up_exps.weight": "ee3199bf2086de77b49f57f487676be8ee70e102a2fb5a5ef8ddbbc28a9eff41",
|
|
||||||
"blk.14.ffn_gate_inp.weight": "2b437870c850fa2e2044d032bb02908af634356e37466fdae260b933e48ee8b4",
|
|
||||||
"blk.14.attn_norm.weight": "cd8344d193a1cbd42bd898e17f4bcb1ca0b2918420fbdafa9249a6f2b7f4ae06",
|
|
||||||
"blk.14.ffn_norm.weight": "70eec40374e558fed5b07257283cf36342b6b0129285a00007deb59c32c9f7c8",
|
|
||||||
"blk.14.attn_k.weight": "4053bdb507e0543d724b632570bac86b31707696d90a0db44c49b2a082e0d599",
|
|
||||||
"blk.14.attn_output.weight": "0182632cb0e06a07241b8293d25d109fbc1862e1e337d435f908e8681e2eb1ab",
|
|
||||||
"blk.14.attn_q.weight": "ffc7794a4c1b6f793c842dba969435330a7a80b9212e457b4b2ac33e68b41241",
|
|
||||||
"blk.14.attn_v.weight": "6411805292d528e61bbaad8f9aab9dd073529a17946c057fb06864fad9cf3211",
|
|
||||||
"blk.15.ffn_gate_inp.weight": "77d0744567c76e6abb67f81ba9c715b2b544841186d5b948309571eff213bafb",
|
|
||||||
"blk.15.attn_k.weight": "1f7957954ea4c6521c257b35a360e868ffa02bdb3de91f146d5e06bb4a545c98",
|
|
||||||
"blk.15.attn_output.weight": "d7809d36bd8d3342240c46fd87bcc7f9821a222f48d9a95e45ae50460265d3cf",
|
|
||||||
"blk.15.attn_q.weight": "25f509313ae4d8401b871904059f472a26f5714e7c791c725de77a1a522c976e",
|
|
||||||
"blk.15.attn_v.weight": "96fedf5a591fc0f020e6de10fd72ff12b3ef9cf70cd21dabaa0d3e7b06f54e73",
|
|
||||||
"blk.15.ffn_gate_exps.weight": "8f950d976b2fd9a3d213b84123cf114c1377efde9352767fb2ddee89e177c8ef",
|
|
||||||
"blk.15.ffn_down_exps.weight": "6fd09d1557bb94b06efbd4f6a1ca4be532a202ba290e9315bc8da3d12a5c4c4a",
|
|
||||||
"blk.15.ffn_up_exps.weight": "cbeb59ae7b0266a928dc7e3a6e70a9330b92f9ee1b17ee1ed91022108204a33c",
|
|
||||||
"blk.15.attn_norm.weight": "2005330911ac2edc7b6d27aca021c67d30d16eb632e49b1a13f30fdb2717aed0",
|
|
||||||
"blk.15.ffn_norm.weight": "0e9198f3b548eb78acc8961f2b3350d238d26cec110933ba753a8cf0035c501c",
|
|
||||||
"blk.16.ffn_gate_inp.weight": "a41d1f99d739c8b150c3945b6949763988d0c6a4c5a2b5855592ca1a48ed23d5",
|
|
||||||
"blk.16.attn_k.weight": "b624e2ec88c2d3047f60530fb87e72cb4a5e655a9663f6f3e9b09e5ad32cddaa",
|
|
||||||
"blk.16.attn_output.weight": "687759ea75e45108526ffc1573d6fdf084728079bfc2dc89b9979e76280f43c4",
|
|
||||||
"blk.16.attn_q.weight": "beff3a45c7e9ec82ffc6d3c701126be28654d10aabd747d03441210491fd31b6",
|
|
||||||
"blk.16.attn_v.weight": "43a349b13f0b9d040cacecd942bcb168c030fef8c75c987d59a4fce6c14e855b",
|
|
||||||
"blk.16.ffn_gate_exps.weight": "793406d6c13d727c82bb7b692ca98d65ca975baee69fc57be5378d77c5a19b62",
|
|
||||||
"blk.16.ffn_down_exps.weight": "9bad3dd150d0230404b7f886ac7ff8803225757e813f195cdb26bad245243b4d",
|
|
||||||
"blk.16.ffn_up_exps.weight": "7449d663023fea3496475bf0a9c1de7272ad0ce9adcb3265e8e424badaa674dc",
|
|
||||||
"blk.16.attn_norm.weight": "a424ce34c195a401df1ce37ac4f2794e8a6720b1ee8acb21428e2b68c65e0125",
|
|
||||||
"blk.16.ffn_norm.weight": "405a68bb8e16e1064df2de55ca3cd9ceddda1d9fc0af007a9bd7cad4b2676248",
|
|
||||||
"blk.17.ffn_gate_exps.weight": "97c6e5321491ca5dc039ee88da0eb0e78f347372785411809af84b3298cb19dd",
|
|
||||||
"blk.17.ffn_down_exps.weight": "1617ac19788a1be19bac69277408761e6bdf5719d63a8c7fea14d41cc27641b5",
|
|
||||||
"blk.17.ffn_up_exps.weight": "4ead1c365f112581c10610ea3f63d2a1474311d2503d2060fed4b458ef337f5d",
|
|
||||||
"blk.17.ffn_gate_inp.weight": "ed4b3393f2523f2b5e0fc7680a1caa2842e605728a529b5af68a7fa8d7abf940",
|
|
||||||
"blk.17.attn_norm.weight": "beac17ef86a7fb2b5840cc72f7a95a5e3d6bd24e7fa698e0b0ebb9bdac45c561",
|
|
||||||
"blk.17.ffn_norm.weight": "81cb58ec6d6dc02a0b4ede10adc336dc865fa76f982d4eab0e4a37b40f5b0fac",
|
|
||||||
"blk.17.attn_k.weight": "eab569e5ea8c8b05e5a6a209fba031129453c2e28181eee3e736b3b04b36bbec",
|
|
||||||
"blk.17.attn_output.weight": "f85b70f01438ce8fe5d10599b113f30bf18dee2bbae0657d3eba295870001db3",
|
|
||||||
"blk.17.attn_q.weight": "887ceebfbf6a2b94b43d2df4439ac3a5bbc29311d4b28addc04d525546032047",
|
|
||||||
"blk.17.attn_v.weight": "2df9414d65014c06a93da22ba3a668be7b83e2e8008e98d7771f7dfebed98298",
|
|
||||||
"blk.18.ffn_gate_inp.weight": "9b07741a0950fc667e5fd25937e33bc22e1f764f80eb4ff3119f005327ae0f6e",
|
|
||||||
"blk.18.attn_k.weight": "8649598dbb63938744c39bcda5ce8c31773e29c573be8d4d2c114f5030f8d3e8",
|
|
||||||
"blk.18.attn_output.weight": "f8e391adb92622298ca834d5d1eda48b69c3b1c51c5a584ef6c54a725c298d75",
|
|
||||||
"blk.18.attn_q.weight": "84bf8708a2eed618f48f69c178ed7dd11fa4c468102376e72e910ebd037d131f",
|
|
||||||
"blk.18.attn_v.weight": "31db3cd773f09548c2c1b1eac2718e46364a7810970fe9c433fad9d8de5397eb",
|
|
||||||
"blk.18.ffn_gate_exps.weight": "be2a2ba378002f1b61f86c273a69eede9b93786d5ce96b4fee1861f730dca4c4",
|
|
||||||
"blk.18.ffn_down_exps.weight": "d35196159e37705db50a5343e3989f7335477f1a4add67ef42ad64a638cd07ae",
|
|
||||||
"blk.18.ffn_up_exps.weight": "c6ceedd86e97913a6dcadc838e7abb762d629fb8dd55f15cf02fd9bd66d2ba78",
|
|
||||||
"blk.18.attn_norm.weight": "41f0b1ad83d6e3cb9fbe0d27878c2e7ad4a351b9f554a6bc9117c01745cdf6e5",
|
|
||||||
"blk.18.ffn_norm.weight": "96646204bd0d82f25dc77faba4dbd86b1332e449313e6684e00122da8be99057",
|
|
||||||
"blk.19.ffn_gate_exps.weight": "c6eb7f61e7938bda0492dbc05e51e8f631c99224fe18e99861fc4fc53ba9e9ff",
|
|
||||||
"blk.19.ffn_down_exps.weight": "4384803da3a3a3d44120d7dd192fe2c9bbd9a1a0cb492dbec1fdd7565230f1e8",
|
|
||||||
"blk.19.ffn_up_exps.weight": "22d73de2fbb8bb0f1bd2caf17fad8a355c47d914143f7f6e6d0128f66f074a60",
|
|
||||||
"blk.19.ffn_gate_inp.weight": "9a0cc4a2301a5634022fbce41189021bf0d1a961792d2d9330fd35556d18e5bd",
|
|
||||||
"blk.19.attn_norm.weight": "c5cc56ec5df9a1f7d5ad71fbda49f1433132e58895d45cb44c73420bd61ebd6b",
|
|
||||||
"blk.19.ffn_norm.weight": "77e17de741742ef2482fc7872fd423c8e3c1454dc4d2be89ee939084b6d78bc0",
|
|
||||||
"blk.19.attn_k.weight": "a92ea36ce2e3569656306aeefb835ccd5d1b03b33a86e0d3d030644cc923b813",
|
|
||||||
"blk.19.attn_output.weight": "5e2a912b37855f84ea964907a1a86d609cbdd79efa0c93c3e8e2fc07caf7c226",
|
|
||||||
"blk.19.attn_q.weight": "4ef3a5913292ac3c1a6fd3e9e53d011021f2b41d0276cf849706d1ca925cf7a7",
|
|
||||||
"blk.19.attn_v.weight": "42981b75b68ae852cee638b5433605c147da4392aaa6d7a06e756115b0171f39",
|
|
||||||
"blk.20.ffn_gate_inp.weight": "71381b9879a7c80b9f7b475abc0aa31b8cd71ccc00856ebe89764a2acb9df2dc",
|
|
||||||
"blk.20.attn_k.weight": "1928b7ebc054eb3967929ed6fb446314d5352f4aaf8b475ce55c6345019f2ea4",
|
|
||||||
"blk.20.attn_output.weight": "6071ecd9ca91af0d2ba93fef4a1a56f3b243dd70f862a21a2d164d56f386043b",
|
|
||||||
"blk.20.attn_q.weight": "002e95042a40f36ceed5829e3d0c8072e5f5e4ee86a089e2902b2348fed24dd5",
|
|
||||||
"blk.20.attn_v.weight": "42f509cdb1c0e298f89f896e349be86952c5168e49b3f83bb17badbcb7596d57",
|
|
||||||
"blk.20.ffn_gate_exps.weight": "a684a3ffe4b0a57c819a5fa9cb3521de223f392732927271e97ce925b6e33765",
|
|
||||||
"blk.20.ffn_down_exps.weight": "e3081a7bc7ba750d8a4886bc8ca4f231b55db4ca082b54b4106c7531964725cb",
|
|
||||||
"blk.20.ffn_up_exps.weight": "fad0fd5eca36ab154788da28be8ec25bb5d6db06c9d133db89e96df358a2f6a2",
|
|
||||||
"blk.20.attn_norm.weight": "c3e3f2429715ae95e884ef1246b0b461b23c5cc0ed08beecf70a14cddd184820",
|
|
||||||
"blk.20.ffn_norm.weight": "ff31f609dda65ca496b0584fabea6550e42edd05ebf229812aa6b7bb5ede15e6",
|
|
||||||
"blk.21.ffn_gate_exps.weight": "366f09ef0ecfb86808eb3296cc9abdb957951d27f6533c03f1422b54061da660",
|
|
||||||
"blk.21.ffn_down_exps.weight": "3fc495947d27fcca7fc0893c8a96e5d48ba27b2c8c58f8fcfb8dcfcd5539741c",
|
|
||||||
"blk.21.ffn_up_exps.weight": "6713ed51410bcc8283cbb001c4ad784098f25701e8021f4fa4f411e186859c4a",
|
|
||||||
"blk.21.ffn_gate_inp.weight": "6d4c92c01ec801647134d907bf1108878156df266a6107abc10526332b328b93",
|
|
||||||
"blk.21.attn_norm.weight": "27605719ae2df24f4f2e85a730927cab20367631612cb501631f6bbf38eb1209",
|
|
||||||
"blk.21.ffn_norm.weight": "ca80ee8177db185b15a4a378c1cb6f7143c76546a7f1726bda23f329323d4ffa",
|
|
||||||
"blk.21.attn_k.weight": "9e49f743d4a5bda9b4bd9c40c2ca37cdae5aec7e54cb193897ac8b4945ada14d",
|
|
||||||
"blk.21.attn_output.weight": "ab923540879753feaed152f5950f69cdd83d8f2413ca873f5f038b63ab0aea12",
|
|
||||||
"blk.21.attn_q.weight": "62617fc3f1c9d2aa672a4d91a121c7a91b92d145b65e75f0b06b4bb7c825dc36",
|
|
||||||
"blk.21.attn_v.weight": "15f8b2e72f8e8e992f2f6b3e93238a9d7be7bd6136f91c9d04b4b4cd0cd60369",
|
|
||||||
"blk.22.ffn_gate_inp.weight": "3ddb1773d9257b68add7a2a4e94dad25ed926803e02707863dd742ab9b2dc179",
|
|
||||||
"blk.22.attn_k.weight": "680e45a9e8d5feddee5266e119dc053bf80718fa9af1cf6803e6f493b265f1eb",
|
|
||||||
"blk.22.attn_output.weight": "0d5fae3402fb2c5aa3a860010e3973fc8e3168d1015f7a76b7b2964681693206",
|
|
||||||
"blk.22.attn_q.weight": "eee7e3d426ab533bd18d62c9aa142eedbde394bed07db58313e0fccc82a23237",
|
|
||||||
"blk.22.attn_v.weight": "26b5be1fe3c2b6824c5a648a3e4bdf17691904526fca158fbc3ebb627b67e2f4",
|
|
||||||
"blk.22.ffn_gate_exps.weight": "32ab7a7735313d60f6a75229b1aeee940b6aee176c9648536bf5921b0dc2929a",
|
|
||||||
"blk.22.ffn_down_exps.weight": "67590808f6a67777d3eb7976c31fe616d388b98fecbb12253b72d1241d70753f",
|
|
||||||
"blk.22.ffn_up_exps.weight": "fc245c0183e6d90829ff5e71a4ec93e4860b3d4c1a17b9dda2fb64f5f5c9ed32",
|
|
||||||
"blk.22.attn_norm.weight": "128e99d206d4d6724758ec97468af767fa0aea592149c324b731659c1e74a1a8",
|
|
||||||
"blk.22.ffn_norm.weight": "e45f498033f0cffa15da0eff2c47b4472e43fcf8921729fc4eeb2e3a6b3c78e2",
|
|
||||||
"blk.23.ffn_gate_inp.weight": "d63e686f5325fbc89fa242c2c52a3b8ff54f867dca914c9ae6eea13e9d6f46e5",
|
|
||||||
"blk.23.attn_k.weight": "f71f5a577f46ea12b1818f3a5ff4b85ddc45f9a2afb0fa2e041d71a3e31c6779",
|
|
||||||
"blk.23.attn_output.weight": "92b13563c1e0eac0d748fb67b235dfd7a64c8f16e2dafb316885744582e23b4b",
|
|
||||||
"blk.23.attn_q.weight": "2f9b9c35dc4f912f3f51c06e2d68f417b51a0de0a84aac530a64f9d3d7b0a2dd",
|
|
||||||
"blk.23.attn_v.weight": "268e40813806e74a5c364b19556d087bf8374e76e7b6fcf55c381eb7da13ccd1",
|
|
||||||
"blk.23.ffn_gate_exps.weight": "12f857e7a7ce228afac34d99b602c8d6fe96984f2a21118f459a58cb767ee65e",
|
|
||||||
"blk.23.ffn_down_exps.weight": "cdb082c16599c3bb36a28066dcc122d9529b54fa91b6cf0153437ec960a5e16d",
|
|
||||||
"blk.23.ffn_up_exps.weight": "f4b99f6f44d7b8b5a305894e88633bf5938fc1f6303a2b2092399da9c8b64d7c",
|
|
||||||
"blk.23.attn_norm.weight": "a691392210383915916b4d3886d5e4d56e7855e27e37e414fbd73bf66b3712e6",
|
|
||||||
"blk.23.ffn_norm.weight": "0c3dc72f667e5ae19b69bfa9f2bd2a01a57681f89ef9527bad4eb0d8c7b70da8",
|
|
||||||
"blk.24.ffn_gate_exps.weight": "86baca2a3157994df7fd8ced5e08436d5c1810dc29c0715637c36de723e0e7d1",
|
|
||||||
"blk.24.ffn_down_exps.weight": "ac5d559562b35c34993e34b071f66d15c65be5907797078c2d2a49aba54e3192",
|
|
||||||
"blk.24.ffn_up_exps.weight": "fce0a099cf09777f44fbab3606ceb75f7fae6f0b80725f9e871654b8cdf9262a",
|
|
||||||
"blk.24.ffn_gate_inp.weight": "e7c6800c0cfc56b565b2d35ad6f1dbfdb70dd0b05b338bc8da2286ffc3678d79",
|
|
||||||
"blk.24.attn_norm.weight": "dc6cc18ec52d102d015153c4a1132f9d7a504e29cbdec81c5edbf3b9e65815e1",
|
|
||||||
"blk.24.ffn_norm.weight": "480d5a1397af5e0e657f1e67d20ec0cdef5724e71246a326843321b87ffabd33",
|
|
||||||
"blk.24.attn_k.weight": "338c0597954a9b95a782545b2fe36469553e73f86ae2d2b5697767b28e1c7daa",
|
|
||||||
"blk.24.attn_output.weight": "a77d23b79933c67e52f1eef7f83a3dff4f767ce0bbcc39572f8cec4acd457643",
|
|
||||||
"blk.24.attn_q.weight": "45c9478593002be1998e96e70668aafa2dd3972380fbc1df12fb05c24ba959e0",
|
|
||||||
"blk.24.attn_v.weight": "515729420885408a6a9614bc27cda393ed907521318d14d21335d39a3eff0b61",
|
|
||||||
"blk.25.ffn_gate_inp.weight": "aae4ac40e9ab3925241f9d784b54b38851d9bc999a6c3bc03fc3f17c9b28a67c",
|
|
||||||
"blk.25.attn_k.weight": "4ab4808d02396c35b00b426f536015673b71c17ae6cd55bbc2e6bfe7a4c59d0c",
|
|
||||||
"blk.25.attn_output.weight": "1990bb982b77e0c947cd1a8ef0b36227ee1259e6dbbc2829e5c136edf88675eb",
|
|
||||||
"blk.25.attn_q.weight": "a1490f3048e8c0ec8784f8550c43adf5cc8d0f2f90131c934713fe4b1b015bd7",
|
|
||||||
"blk.25.attn_v.weight": "f15e53c6d45b3b6f58808fa968425d65e0b26b7f9b268127a77abb1227c67431",
|
|
||||||
"blk.25.ffn_gate_exps.weight": "656662447ff54f56ee80f78a1b9483f7efdc40f7375d0cd8a9c72ccf21f77e7b",
|
|
||||||
"blk.25.ffn_down_exps.weight": "db06f101bccbaef19cced0f6c185166e18202465f4a42cddfd535fbe5cbabb4a",
|
|
||||||
"blk.25.ffn_up_exps.weight": "584a7b02456f27fe1d8d3c7ccd21d426b6ea887795a3ed77f704596a1e3841d7",
|
|
||||||
"blk.25.attn_norm.weight": "8f0f3597982930fd237e9d609776c64f2b909a455b21678f83a7ebd4bbb83e64",
|
|
||||||
"blk.25.ffn_norm.weight": "3e7079c32582afba0c55e032f254adc18d2997705eec860185e9a6dd3d82f07e",
|
|
||||||
"blk.26.ffn_gate_exps.weight": "e70341691b583b86489812b29b77aa41eb658b1865733d6118da54c66e3bfcc6",
|
|
||||||
"blk.26.ffn_down_exps.weight": "5c1b812d11dfb064af816ced5ab6463bf9722eefdfc341b8a93705d5038fd781",
|
|
||||||
"blk.26.ffn_up_exps.weight": "e18118362ae54ef7432781c83884f9fb230a9d934e342aabeda8822ea5f71fb6",
|
|
||||||
"blk.26.ffn_gate_inp.weight": "cd1c5f6710166b9567c6b74c97b2348b191c60aa860958c6bc264ab095261dff",
|
|
||||||
"blk.26.attn_norm.weight": "71d087531af2520bda2e676c489e8529cef5db8aeea1eec0a937a8b4f2fa2e54",
|
|
||||||
"blk.26.ffn_norm.weight": "7f704e936fda28eb5c2cc339f0f6a5f78170b5aa43c01265b21668870d819c82",
|
|
||||||
"blk.26.attn_k.weight": "1cc62a0ce0ae251275d898c52c4a9fba5995fca10955d2011d10dd1a59e1afb8",
|
|
||||||
"blk.26.attn_output.weight": "636e881b1505f9cef656a4be98bec6a4765321d51f9bf1dac8933397cf44b765",
|
|
||||||
"blk.26.attn_q.weight": "89a3c4d202d7d6adebb9e0c1bcfd8b775f6456386f1be25e86e43acc949c1e16",
|
|
||||||
"blk.26.attn_v.weight": "ff2cc963b597cdf1a21703f3e7022af3bb4c65a34a19e19d9309a7c5e198b5bd",
|
|
||||||
"blk.27.ffn_gate_inp.weight": "6150139498fefe380bb99d11e72028da47a15ecb73dfc5b2774f726f4bed8f9e",
|
|
||||||
"blk.27.attn_k.weight": "f286eb9e5c56c7b801a497aedc40158c2a27877d7f9fb59b3fc67834798902d2",
|
|
||||||
"blk.27.attn_output.weight": "5dc3d3a05f9f7729509147fd09c16fb53f85f520cdab5cb69abf4bae3fd460c7",
|
|
||||||
"blk.27.attn_q.weight": "8462e40f86b24251960d6f35a9ea99b8793a01937faf1aec2859f2e5395dbb61",
|
|
||||||
"blk.27.attn_v.weight": "bac1a99e38e25953f8315f7212eb9777dc216cadb09b959977885ae62724ceca",
|
|
||||||
"blk.27.ffn_gate_exps.weight": "6a15eca7f0f6ecfd93db2e55c63875348ec4a78c4ff643ec46df9e958c0101e4",
|
|
||||||
"blk.27.ffn_down_exps.weight": "2e1c91247c4359e2073a8e5f26fd7f6426da7be3ed5bc65dcfff701f0a5022b2",
|
|
||||||
"blk.27.ffn_up_exps.weight": "65d6f5c553c9332085eae4aeadf25090b5d7768212ea7b08ed698102c21b29a1",
|
|
||||||
"blk.27.attn_norm.weight": "7fab8ae63ec8e91ce625cd130ab96d8427dad3a7413bb21b25ec5f408c5b9f5a",
|
|
||||||
"blk.27.ffn_norm.weight": "532720546b0fdcd423a02ca6e3e9d8aacb84b1b3e8269968f88a47fe2a69bab4",
|
|
||||||
"blk.28.ffn_gate_inp.weight": "a305ea58d98962d9dcf0c53ad2389b7acc8936fb35a0e3fc9410e7767cd49dea",
|
|
||||||
"blk.28.attn_k.weight": "8315e8a2e4f78dfdf36d4fc18fffc74bc95fe42c3ae4f9af2b6c874612c0f71b",
|
|
||||||
"blk.28.attn_output.weight": "9b5fdedd32d39ef46a22cca7cd5355d7b93bd07ea305f466a8aad6ca5a4f3778",
|
|
||||||
"blk.28.attn_q.weight": "4e8fb96997c30e231c437130f410d7c91d541a816f6c568b5f3bfdb4b8dece74",
|
|
||||||
"blk.28.attn_v.weight": "1fec739cf3bd7b4913f72ca358d4cf31391c304de44ac0ae31ecb825beaa7cfd",
|
|
||||||
"blk.28.ffn_gate_exps.weight": "9f259789d535e09268266b9a8020f32d6a6779966c909d91d3a10574f06238a2",
|
|
||||||
"blk.28.ffn_down_exps.weight": "516d3f8abaedb01b9916a4b67d4672159769138ef2850158bc1b32c41e31f0e8",
|
|
||||||
"blk.28.ffn_up_exps.weight": "f2f1d88d2c31ed588806fb5ad981d68f5134d7284c4fc022fd018de2eef437fc",
|
|
||||||
"blk.28.attn_norm.weight": "960fd005598deadaebd969996f4367a9dbfad90539a863674fe95730935acc64",
|
|
||||||
"blk.28.ffn_norm.weight": "e1993b37ced93d4049e9af2c47b0d9207d8f7e6f2cc3a52f57bef30bc806d805",
|
|
||||||
"blk.29.ffn_gate_exps.weight": "58927146338f443513337476b3cd30e6341742f096c2beb5890d400f10121298",
|
|
||||||
"blk.29.ffn_down_exps.weight": "03a3386e4f0b75a28c5608e23b2de8f0de25f21954e4aa7fc343431bde9db07e",
|
|
||||||
"blk.29.ffn_up_exps.weight": "6916b7490a7ae7b04a5d81cc1e7ac9b20c483434f3b186b12d87fe176bf1567b",
|
|
||||||
"blk.29.ffn_gate_inp.weight": "98e710e467a3d567abe4ce29d78b8e8dc033148762290c0c5e1ae4d78efd8c78",
|
|
||||||
"blk.29.attn_norm.weight": "4e64cb307d37be20d55f38c94faf7e451d11df5e60df347906cbaf9c5441be71",
|
|
||||||
"blk.29.ffn_norm.weight": "696c23a52f742679bd44440d687a4c44b4302d57f1e9dc5610d23374336187e7",
|
|
||||||
"blk.29.attn_k.weight": "e85253652fd6120c623634ba66b725bf7cd491318b54ccdad2c7df8851d64c0a",
|
|
||||||
"blk.29.attn_output.weight": "4f650a71efb150d1f24cd4d114d4187bf570ac424da3b92ea6455abdf1aea705",
|
|
||||||
"blk.29.attn_q.weight": "69fa7da901026ebcbbbc848455b425458b7e3295007d7fc093acf4b38e2166ea",
|
|
||||||
"blk.29.attn_v.weight": "17e2e7590b317b21f106de546aafd955579703d1e95d6aea044ee72ec3a514c9",
|
|
||||||
"blk.30.ffn_gate_inp.weight": "3a03284b4aa60d59d4a2ec86253469b61fc656372afca427cb77a5332fbcc62c",
|
|
||||||
"blk.30.attn_k.weight": "d518cfd0db9708e769eb1399e87ee49357dc54d5afdbac3d4c0ca46c64e789eb",
|
|
||||||
"blk.30.attn_output.weight": "9b44378714d784c5ef9ab604359091baca4e0ec222afa139b7f840eaefb371fd",
|
|
||||||
"blk.30.attn_q.weight": "cbb95365bbfbcad0c9cd99b4eebb5a5d32de68ce08e4063b5ec3e792b7548044",
|
|
||||||
"blk.30.attn_v.weight": "e7985c04fe1740e35a9598f43b67b0922b4fc2d00b68a92a9f917b82c3248de1",
|
|
||||||
"blk.30.ffn_gate_exps.weight": "8ac4bbd07935d98f895ba94dc174e5ad5046c3c222b53729d60f987c05e7eb70",
|
|
||||||
"blk.30.ffn_down_exps.weight": "dd672cc71e82abf05064a18121b8e55fe1a4f19bc1d7cb9a142f4add54bc336e",
|
|
||||||
"blk.30.ffn_up_exps.weight": "12282f664a2a12aa25e2deac58946108715ebb978bafed5274cef24569107646",
|
|
||||||
"blk.30.attn_norm.weight": "1a33458fee054c6c9c896a4bb0a4e1fbfa0293b2408c7dd2b81d692e966e7273",
|
|
||||||
"blk.30.ffn_norm.weight": "311e33b68051f507f1478ed8f2693fddb846170ddb7285a91be43f795c2ce31e",
|
|
||||||
"blk.31.ffn_gate_exps.weight": "8af43d9867a51cd8392fb48b981b0ceee0ae979c491c07d711b3b56b5162c786",
|
|
||||||
"blk.31.ffn_down_exps.weight": "5579cb7758c1600b19d1f540deffe081b575962e37437b3b2efb2fb0a2924e40",
|
|
||||||
"blk.31.ffn_up_exps.weight": "f2e7c005276b3a001fb40753f027fa10b4d5a346f43cf4b4bbdeec6e74e1cf6a",
|
|
||||||
"blk.31.ffn_gate_inp.weight": "89885dc0e30b6b16a90c0331d7fa3174671e941364e8102d934f02132237e61b",
|
|
||||||
"blk.31.attn_norm.weight": "99e4e9bf86a9edf8c404153a7e8a82324ba79da462622196e2faba161bd95172",
|
|
||||||
"blk.31.ffn_norm.weight": "55335997cf6de781bf332b943de96ff4646966b05d9fee86b76ea897e27b6ca7",
|
|
||||||
"blk.31.attn_k.weight": "cee570762b78da6316b637892cc4b080e40f57af5551ffb1866b9a8e80e96628",
|
|
||||||
"blk.31.attn_output.weight": "fa321ff55ec7819ead7b819fd45215262f39744569765ba2113c989c03588802",
|
|
||||||
"blk.31.attn_q.weight": "9e2c409b878f8a2a1436874abf428fceb1c534b21f9ad4dd6f532b8a469007f0",
|
|
||||||
"blk.31.attn_v.weight": "a845d0be68ba537b4a775bfba4d897faf7c82a811a2612b0b7420cc4f3574cb8",
|
|
||||||
"output.weight": "16101cbb74b54cda9ebc07ca3c762e3263a56efb3cc011156184b95807d7cf13",
|
|
||||||
"output_norm.weight": "d7aa61585baedd60157aafe157930785742c55989c288573566a971b02423564"
|
|
||||||
}
|
|
||||||
225
convert/testdata/Phi-3-mini-128k-instruct.json
vendored
225
convert/testdata/Phi-3-mini-128k-instruct.json
vendored
@@ -1,225 +0,0 @@
|
|||||||
{
|
|
||||||
"general.architecture": "phi3",
|
|
||||||
"general.file_type": "1",
|
|
||||||
"general.quantization_version": "2",
|
|
||||||
"phi3.block_count": "32",
|
|
||||||
"phi3.context_length": "131072",
|
|
||||||
"phi3.embedding_length": "3072",
|
|
||||||
"phi3.feed_forward_length": "8192",
|
|
||||||
"phi3.rope.scaling.original_context_length": "4096",
|
|
||||||
"phi3.rope.dimension_count": "96",
|
|
||||||
"phi3.rope.freq_base": "10000",
|
|
||||||
"phi3.rope.scaling.attn_factor": "1.1902381",
|
|
||||||
"phi3.attention.head_count": "32",
|
|
||||||
"phi3.attention.head_count_kv": "32",
|
|
||||||
"phi3.attention.layer_norm_rms_epsilon": "1e-05",
|
|
||||||
"phi3.attention.sliding_window": "262144",
|
|
||||||
"tokenizer.ggml.model": "llama",
|
|
||||||
"tokenizer.ggml.pre": "default",
|
|
||||||
"tokenizer.ggml.add_bos_token": "false",
|
|
||||||
"tokenizer.ggml.add_eos_token": "false",
|
|
||||||
"tokenizer.ggml.bos_token_id": "1",
|
|
||||||
"tokenizer.ggml.eos_token_id": "32000",
|
|
||||||
"tokenizer.ggml.unknown_token_id": "0",
|
|
||||||
"tokenizer.ggml.padding_token_id": "32000",
|
|
||||||
"tokenizer.ggml.scores": "6e37bcde2adc7e350e87c496eddd7a2124329c1dc66c5bf3ad3997253e4f7a62",
|
|
||||||
"tokenizer.ggml.token_type": "b6ecf55ec64ee67d87750bdb8d757a2c58bf78377e9f4219f5689a6c4dea57ce",
|
|
||||||
"tokenizer.ggml.tokens": "d168da3ddd3eee820916945fcb9baf24dd3cde42f606cffa2d19e7c8a8743918",
|
|
||||||
"blk.0.attn_norm.weight": "216aeb2c9e0c271f899e1ef2a63cceeb8f41e97642e84fada54b1d3c1c11cf25",
|
|
||||||
"blk.0.attn_output.weight": "b597d56f7188ffc1fafc273fadc59d41738cffd677ae98c61a62c3285b3a3099",
|
|
||||||
"blk.0.attn_qkv.weight": "d28a6b44e13f59be5483e4be2bedb544e346168d720aca27f47d1a5a722be91e",
|
|
||||||
"blk.0.ffn_down.weight": "4a691370e5a61fcbbf540fbcbf4c0f1d15dec0364528c0e916d0744f6262b63b",
|
|
||||||
"blk.0.ffn_norm.weight": "0c00af2b4a3128bec64a0cbb1084b042fdbe13d9ad0d03bd577f9449dfead338",
|
|
||||||
"blk.0.ffn_up.weight": "b32b52f790c1c083bfb8a3126dc1111cfeeb28dc8c584a930a1e5334cb176bf4",
|
|
||||||
"blk.1.attn_norm.weight": "68748011503c6c029e8e69a84a8e5a89338f378769627b6dbf7f93d715c292e1",
|
|
||||||
"blk.1.attn_output.weight": "2267344add13b048ca59e4377c86dc512be8046a57156901fa32a20fa74e4ee0",
|
|
||||||
"blk.1.attn_qkv.weight": "9109d2e3d7a2eacfda5226587b8be124a3bf44b972da7ebb17aa15795897eacc",
|
|
||||||
"blk.1.ffn_down.weight": "d675df4df4dd039c0c339ad6445d39eddd2004db6bf35bed6314c7497245a633",
|
|
||||||
"blk.1.ffn_norm.weight": "3b5767ae977bc8baaa06b06efdbea193b6b3ba605ce76d77a76ce317e935500c",
|
|
||||||
"blk.1.ffn_up.weight": "80dfd6d9d234b00334c89b8e0a02f81899c2efd377321c34ba5ba51a5f61b5ff",
|
|
||||||
"blk.2.attn_norm.weight": "6a6743b057e5088f145bc179e92c9bfb41163e7295d7b81c62e23dd89d2b59c4",
|
|
||||||
"blk.2.attn_output.weight": "bc5491ea54e0db81462d7d9b7d25cbdda380c2db8de041bd1c4ab7b76a1d19c3",
|
|
||||||
"blk.2.attn_qkv.weight": "a61287a9852e2f5aca9c100b471d98398b2913a3497c743de3c70ec9ddd7087f",
|
|
||||||
"blk.2.ffn_down.weight": "4fddcc382c8dceeab027fe43d8d44e67edb5e8ce4b9a1b7f773c87770380ade1",
|
|
||||||
"blk.2.ffn_norm.weight": "07e05f82b3f63f711db3b684ca79aed25c0657917e66f88af47348a82065c227",
|
|
||||||
"blk.2.ffn_up.weight": "4835a682ef1826c12df01ae7663fc45f9c82bc8e64b665f13fb7da8e201ec0fb",
|
|
||||||
"blk.3.attn_norm.weight": "f22aba7c03999ba7136f39cda747a39715e498699dc1716cd97fc5dfc58d1b1c",
|
|
||||||
"blk.3.attn_output.weight": "53b579855366fd786c5126b2b30aac4d583ca7bda56833c4865f5cadb5c18c6d",
|
|
||||||
"blk.3.attn_qkv.weight": "bb56aba78158123140fcea59c69ac562ca208f6d3086819417cdad8c50f333ad",
|
|
||||||
"blk.3.ffn_down.weight": "97280897a7cd86db2830c004bccc5bc094f50e293baded0189159a2019145a6e",
|
|
||||||
"blk.3.ffn_norm.weight": "10a8c99f8b57a960e8e0a1133c4a26f9148403d1b9bff2eff114917de996f3b5",
|
|
||||||
"blk.3.ffn_up.weight": "7324046c915e75d621b2043597a245a428d8eea31869135e6257a861491d8dcc",
|
|
||||||
"blk.4.attn_norm.weight": "507d8e164de94646edbfe33def8e8fbf7c9a6ee3fbaedb5000f72d9f51ec5e36",
|
|
||||||
"blk.4.attn_output.weight": "bbb3429e6efa98c150e0fdbf48c16180cbf0d0cbc1b3c253c6c319d78f4593a2",
|
|
||||||
"blk.4.attn_qkv.weight": "b95ee5be0786d3901273d806c339fe6c20e6bfffd2a20672a9f56af80921e8ab",
|
|
||||||
"blk.4.ffn_down.weight": "806bbf91df92a5a22bd5aa1ffb7fc2869f7293ffc7704771c290ecc583b27975",
|
|
||||||
"blk.4.ffn_norm.weight": "cfc2930a81df7aee3a5e7f726a15c1182233e868bf0d9d37f6b6ae6d8c15c234",
|
|
||||||
"blk.4.ffn_up.weight": "c3390c69533de2c8424e8069323ccc5d0c4543111535da04cf2c7d26745576aa",
|
|
||||||
"blk.5.attn_norm.weight": "0d71c4fbcefabbd021569442853d2fe90668b19409ae2805a718a829ca60beab",
|
|
||||||
"blk.5.attn_output.weight": "10ebd93629112bf2df5c30dd0953a4a5e9020306768283181ed426934d47e14f",
|
|
||||||
"blk.5.attn_qkv.weight": "5cb05633369f12d4b00e0ff787736bd846856682115720ebc6cce05270c334f6",
|
|
||||||
"blk.5.ffn_down.weight": "e28bcc5094212eafc7476dbc5b7a520d25b79578cbf4229d698e2655956a80ad",
|
|
||||||
"blk.5.ffn_norm.weight": "b6f2c4cf9f34bb4d59989f96165c14a67dc1e266ad0a6d0fcc49f1add929e6ff",
|
|
||||||
"blk.5.ffn_up.weight": "0f9ef99423cc07ebedc0e9cfa95809f2d7108d910bb4ef97ebc0b0309c440750",
|
|
||||||
"blk.6.attn_norm.weight": "b3edcc47a42218234f7564d7470611b49401a41ae8cd42123f86557c69f5d7f2",
|
|
||||||
"blk.6.attn_output.weight": "eb9b7d257b388bb5b8fe0515e5c6873317239cb94cda236e4b6ada2a6c57c65c",
|
|
||||||
"blk.6.attn_qkv.weight": "eb968081f478c52f07bd9c2761741e982dba33cc4eeadeea3557d391b9ac2106",
|
|
||||||
"blk.6.ffn_down.weight": "1b8588bb7463206290322695577dcfced300895d6e6f4b26966c53a9ae2f0f84",
|
|
||||||
"blk.6.ffn_norm.weight": "1219c04b7770983c77814200eefe743f46d15328ea2b12711e44f8103eab08d3",
|
|
||||||
"blk.6.ffn_up.weight": "197ef287239fec47c55677f0fbb66eaf0644f775bc382de843971730721394f6",
|
|
||||||
"blk.7.attn_norm.weight": "b630ad08c80d564ed1c024384818e9fd3f22a36cd7a14aa96e7e2759a8285099",
|
|
||||||
"blk.7.attn_output.weight": "970255aa750828a47d6b9d399f9612b5bf25aefe7dadbcba41fc416d0d4067c1",
|
|
||||||
"blk.7.attn_qkv.weight": "ebb157c880293e6de8d629f263ba8853ed1dbdc02c311d43432bb8cfbb310739",
|
|
||||||
"blk.7.ffn_down.weight": "24bcd4db4cba844c89f878b81843c373dbbc0675e889d32c5b12e63384a7b670",
|
|
||||||
"blk.7.ffn_norm.weight": "b9c6f71001808ee873ce7db8056e4b53fb4cccec8b7f0f312899b575fae39d39",
|
|
||||||
"blk.7.ffn_up.weight": "979f1828d227455c26015a2a11afe9dd05f2bb97a8ba6b38c8dab3f50e627401",
|
|
||||||
"blk.8.attn_norm.weight": "4e8e347e3775010b7112ee630f2f4f2383be7ff64e6ca6154b9b22566552eaa6",
|
|
||||||
"blk.8.attn_output.weight": "65a44babf44a435a1829945211b3168f9ec78ac3cb7a049a733e93d11f0d6659",
|
|
||||||
"blk.8.attn_qkv.weight": "343ed07671da400b040812a4058482fa38284b5d9af9becfed07417fe26ce747",
|
|
||||||
"blk.8.ffn_down.weight": "7fb7e073e3c2c503c4e9d60efa0988fed7398d900cc003695fe3fffd3e188b82",
|
|
||||||
"blk.8.ffn_norm.weight": "b07c1f655d8593e3892a2cf73f8a0c19ce8e5cb613fafbe7cbd430da8ce4c57d",
|
|
||||||
"blk.8.ffn_up.weight": "8b26e14de54b3fdc2e2d3ea41720f9d9c236a93688c3b7fd7bf43f5fbb327c9b",
|
|
||||||
"blk.9.attn_norm.weight": "46394d408a8e316916177e6aa261de32e137a82d729c0b1800b072f0c38c39b6",
|
|
||||||
"blk.9.attn_output.weight": "d57f3d46107947a7073373a0b35d6ecf7759b5df15406f4a3590a60666af6b16",
|
|
||||||
"blk.9.attn_qkv.weight": "14bb8ace8c5453148f4b536e9f4279c813f31136716947256f5cca333448639c",
|
|
||||||
"blk.9.ffn_down.weight": "2b8d98e2b5ed68338f6e4de43bf7de0c4858cc69103cd5177725f7444eec7694",
|
|
||||||
"blk.9.ffn_norm.weight": "41a499dfd418cc4c6b8c12313f673f7e2cd4a3f9c4065eb6c4feb5eed02fb542",
|
|
||||||
"blk.9.ffn_up.weight": "143aab7533a64b17fbe201490a6f674bc7f0bd370c094500b2e100419073d1c2",
|
|
||||||
"blk.10.attn_norm.weight": "ebb670aafd36816a794347287269d8f1a5b19c1e3c0a1e38023bc19fdba9b073",
|
|
||||||
"blk.10.attn_output.weight": "b5d65bbc0ed5e49fdd9d754bc18163cd042a285024d0cf6f954c503bc8c877cb",
|
|
||||||
"blk.10.attn_qkv.weight": "f06b15bac88da798fa34a62b03eaac0dbe8b846020516603c387541f2d8dd672",
|
|
||||||
"blk.10.ffn_down.weight": "fb091fcd1b4de25d1bea94d1755e255cb02914a030d23e3a234e57b8d46bde6e",
|
|
||||||
"blk.10.ffn_norm.weight": "eb347bdf9c40414af87e13a8e72e40b31f004b50f7cb366f1a219ced60a61355",
|
|
||||||
"blk.10.ffn_up.weight": "ed2d52fc881a173f404fe8a1067862c9856d6c3e0d2e90a330a7aa394e3f84d1",
|
|
||||||
"blk.11.attn_norm.weight": "64e252603cf010a0e502ca39fdf8d0a196a79aec67c0d2bb9213fc0cb80c47d4",
|
|
||||||
"blk.11.attn_output.weight": "228e33e21c69f52efc74fdfc831bc9af271e44b2a29a3dced1d64e667ce36eb5",
|
|
||||||
"blk.11.attn_qkv.weight": "ab9ce6d4ef9e42ee0da3f20a7708a3bbc5e79e967b05fa86ba946a05e2eb63eb",
|
|
||||||
"blk.11.ffn_down.weight": "0ca133b7835c98dc77c25d64e4eb7873778bdb5e4d22d8b80f920f46865b43bd",
|
|
||||||
"blk.11.ffn_norm.weight": "02455741a0dfd161c79aa1ecc381901721f229fdcda5615622a629631fb61cfd",
|
|
||||||
"blk.11.ffn_up.weight": "9fecdcc099fbb8e23c6b1ea9294702a027f4a58d265543ec5e7be79b8f63b354",
|
|
||||||
"blk.12.attn_norm.weight": "783bb459911b1b3609a9b2bdfe272f1670add73b5471da738e07ac47e2e07dfd",
|
|
||||||
"blk.12.attn_output.weight": "1e1a914c9e48b857206ac5a1f7cead994bc1ea91d5d4fff8c834d73f2e38ef5d",
|
|
||||||
"blk.12.attn_qkv.weight": "5953e7185ccb87fb4dae8f9426ec86315d4c7794326e8ab59b3a95d4af2189f0",
|
|
||||||
"blk.12.ffn_down.weight": "a3eecf0f394f86e2cfb48a5940a5c50ca86d71883b2f79fcc642a935fabce0d4",
|
|
||||||
"blk.12.ffn_norm.weight": "0a4272e41373c23bd72f10d2d82930aa3a1480aac75832bfbf01cebf0b86b6a4",
|
|
||||||
"blk.12.ffn_up.weight": "06f42776de3a7ceac3025f26a7a8bd20e062233cce2bdaa2183470dc4b30b87d",
|
|
||||||
"blk.13.attn_norm.weight": "5915da60fb03e201fa649faba780e5fdf1c761c262b206e5415cf83181f65780",
|
|
||||||
"blk.13.attn_output.weight": "4dbf6eab074fa3835fd32bd631a8208e511037d5056d2fd3015735cca7674ef7",
|
|
||||||
"blk.13.attn_qkv.weight": "d3d8339a1c4782d9e73d77fdebe154d3c5b83ac40c9175b3e91a4977d08f876b",
|
|
||||||
"blk.13.ffn_down.weight": "de6772b46a55e1fd42b007637dfbf68b6598e5d5b61622da0935002e1e192d3a",
|
|
||||||
"blk.13.ffn_norm.weight": "5a640ea3b8c7be49c95a58a2327e10d8e8d9d142504bde5c8091613e5b961d7a",
|
|
||||||
"blk.13.ffn_up.weight": "f35e3545e4bd3531b2e843b5efd31dee0c13c807ee6386e65473ba67bbec30d0",
|
|
||||||
"blk.14.attn_norm.weight": "9b34986450b7c98b4927e81e61a816f9e84b1addc7c14926402100037aad6678",
|
|
||||||
"blk.14.attn_output.weight": "155d52efb23d366016d861a251d4d1f4a0c13699188c50d50dba016a0d8bfcd9",
|
|
||||||
"blk.14.attn_qkv.weight": "8e1415084e1f33c73a777f19e752489f4dd312cca047733e5ea643cd4a955e04",
|
|
||||||
"blk.14.ffn_down.weight": "a2a142226b94baa01ccb65bdea2b7418e49085c1d9c3c63e544e3112c58a25da",
|
|
||||||
"blk.14.ffn_norm.weight": "8aecfd9b0ae6affaea31a80c5c9a4a14b31deaa0db7bd8f6da2a64d23447921c",
|
|
||||||
"blk.14.ffn_up.weight": "0c1407237b8c1bd02f193346b5681926fe698a5055eac6a7450451b0f991707c",
|
|
||||||
"blk.15.attn_norm.weight": "e037bd19880bfa83d983200fb0c7866f8ad16c3ff5cc4b4f3a37ca7373870ff6",
|
|
||||||
"blk.15.attn_output.weight": "045fe4fc95cc129a1b92771b179c11b12845c4c088786c607f17bd98857e68e1",
|
|
||||||
"blk.15.attn_qkv.weight": "7621b7559705cab1d4dea1c69f76dbf9dc1c8837a203b656f484703b9c1b70ce",
|
|
||||||
"blk.15.ffn_down.weight": "7e5ac20e290bc60761e1cd972354fde225b7fa861048d44d9a0dd9b046d55f58",
|
|
||||||
"blk.15.ffn_norm.weight": "b6d830d88f1db1825687973c8c2b1a24c6fa84f07af8d0e3ef9c86009baca0b2",
|
|
||||||
"blk.15.ffn_up.weight": "dcda0957cd04fc45476774dba2bbf9aa89d6b05d5ca7b10ae6f73ad2c49b1cd3",
|
|
||||||
"blk.16.attn_norm.weight": "4ee9b70ba15cb2a08240f93990e90f5068c48fceb481f8e2186bec8b7214eb3f",
|
|
||||||
"blk.16.attn_output.weight": "315cfe5536658d2498192b2980eade15b2c9a4ff220e4011911457b1727fa103",
|
|
||||||
"blk.16.attn_qkv.weight": "3c8122e3ad637583b9dcde8ff3a323267d3014bb1f0f9771e5322260ca9ecc8d",
|
|
||||||
"blk.16.ffn_down.weight": "3b5fbebd5ee2b86cad96fb8a9b45a8770d08f82c1c8b74d7061e866f7020a18d",
|
|
||||||
"blk.16.ffn_norm.weight": "ffab69f20bda372de6e5878f0539163e2fc6ba113621ded95705fc3b1465c9f0",
|
|
||||||
"blk.16.ffn_up.weight": "0935ea3d258da42d6258406365f39f58ddaabfe97ea5977580db3635188f24a1",
|
|
||||||
"blk.17.attn_norm.weight": "f030441733f3d147b4a06a1eb4aeb8465c7c24d9c53bf4c48fe7e134d3629803",
|
|
||||||
"blk.17.attn_output.weight": "07a955ef09e8dc766ac0df647d0b2c69f23c4c69a7137654b4aad80303ed0eda",
|
|
||||||
"blk.17.attn_qkv.weight": "1c10688061e21e2fe12ad0cb54bf03895c1f83c3b0df743a42f548b52cbca1b2",
|
|
||||||
"blk.17.ffn_down.weight": "ebb9cc9836f41d88fdae2aa9a4355514e4edaec8d1577ffeb947a35204e77f52",
|
|
||||||
"blk.17.ffn_norm.weight": "50aff44f6528b13db5389f2ddcdb7676244947610bd7ffbff3f881c968c2a0d4",
|
|
||||||
"blk.17.ffn_up.weight": "d716537949582be33bde6b02e38f5a70081c9642a9fb05a61312126718b8d148",
|
|
||||||
"blk.18.attn_norm.weight": "0ea695c4e53d637902f46663a6ee42adc493c36794476acc7dbddaa05b13840d",
|
|
||||||
"blk.18.attn_output.weight": "5fd35b500221a612eb4f4bddf0e9b6b7db4d7733032a75f8802fb2d884647c2e",
|
|
||||||
"blk.18.attn_qkv.weight": "b0da37fd030fe69581f990bf23bfd35467a1bbe558af6de7c0924f6b72e92317",
|
|
||||||
"blk.18.ffn_down.weight": "b355c33f44b328f4bb977567de8f7544db4b005d7a8fbded658518ecf3c5a153",
|
|
||||||
"blk.18.ffn_norm.weight": "58b3fe9094079989a86e0387143259e1cc35952d24dc3df290c4ba6df44f5c51",
|
|
||||||
"blk.18.ffn_up.weight": "2ce530954c342c30ed2ead5353f931960bfae1d278868504c0efb973560fabbe",
|
|
||||||
"blk.19.attn_norm.weight": "533e9aed66feea8f0392aa81f9e293240e1f009a5334253915fb60c2749b615d",
|
|
||||||
"blk.19.attn_output.weight": "84f2d00f98a4113a779d3b5d1c3e7c914eb47784d3ab13b290367c124c2994aa",
|
|
||||||
"blk.19.attn_qkv.weight": "fbe6b9f53b07fa7537d3b3d452d20a9bc666f9fd41ec2091dd28bc2f70fc668f",
|
|
||||||
"blk.19.ffn_down.weight": "b30199e098c8bb3f890183d8b18471e80b62b604729b277ad62488dd71e1206b",
|
|
||||||
"blk.19.ffn_norm.weight": "c81373e41cd340b7badb19f9517c77c4250b4eb9a02dc758b8b49b652487d7ff",
|
|
||||||
"blk.19.ffn_up.weight": "5a5cb083ca7725720e3a890f7fa46354760e8007a8188849a092e305694a75e3",
|
|
||||||
"blk.20.attn_norm.weight": "4953091b4477e354357a8e743ba0a1900633e52f1599ee082a0c9b0b2b5cd978",
|
|
||||||
"blk.20.attn_output.weight": "62d54f7749cd6856097b2632066a322b0296df915fe66f382c5b5981be0d4f23",
|
|
||||||
"blk.20.attn_qkv.weight": "406de9e35b0729ebe902d7a47905cc7fb29a921431ed35dbef0c03e5690a1329",
|
|
||||||
"blk.20.ffn_down.weight": "62fb678b0d1261e19a4903a2b347d67afcc8acff01feb33a687a35a2d1e6f9a5",
|
|
||||||
"blk.20.ffn_norm.weight": "cd9d36b7e71e55c8925b97bb09c28219f182626bcff094878ae39c3db887a14b",
|
|
||||||
"blk.20.ffn_up.weight": "b9276771d79d3e932e73ccc520c3f8476342b9ef312ed2ee1e0da822e6e3ad18",
|
|
||||||
"blk.21.attn_norm.weight": "66d8c8a35e13ce9c2a0e75b670150e2c31484a55c2316df46075312196178ed3",
|
|
||||||
"blk.21.attn_output.weight": "12ab46c9382648f9b3350fdd92a6be6352743d62d6b520d7e2024e0c838588f5",
|
|
||||||
"blk.21.attn_qkv.weight": "a7909676ee1675ca23cd29a5fdd226df8dd9d68f94c6c9bbb51dd9fd38504008",
|
|
||||||
"blk.21.ffn_down.weight": "6fb317279c6542e82f97d5a12a60fac1bd0fa0405154f9fbe265e2fe39bd49cc",
|
|
||||||
"blk.21.ffn_norm.weight": "c0f703eb3ff161b5ba4490d87d8684b8a6c47a8f433e12f418333b9db439010a",
|
|
||||||
"blk.21.ffn_up.weight": "6dbdb80ef0c35e364bbce12d40d5e74c7963c7b55d58d9579567a07ffce7b863",
|
|
||||||
"blk.22.attn_norm.weight": "f94237433bf03d675cb2f655b81ca91a1ce2447bc6b00b13d6b0ccfe2d411eff",
|
|
||||||
"blk.22.attn_output.weight": "e821f95995ce497c01e63ca64f737713b1b65f11df1903e51d444aa516f33f71",
|
|
||||||
"blk.22.attn_qkv.weight": "1b0f717c73afb5eb4c82a1708c4e85c969e8a2a8770d9ddb78b1870a2d8a781e",
|
|
||||||
"blk.22.ffn_down.weight": "0f33f7a3cdc685484be99aa0c03642b0b20850a27d1fddbe054b13a9382f3ccb",
|
|
||||||
"blk.22.ffn_norm.weight": "9df285cf211ddd7df2b36a50489af574755c7d4d98b29a05cd04566ae613c8dc",
|
|
||||||
"blk.22.ffn_up.weight": "63ac300e1efb34041dd0136cf43ea622fac6f0caccce1cd9262f5e08d2cf179c",
|
|
||||||
"blk.23.attn_norm.weight": "5f72d9e88689b4027b28f5f8f26cd3abb03635ceea7ec98a4c91a9fc691f6707",
|
|
||||||
"blk.23.attn_output.weight": "6ecf04ff61125c5fc768f8656497152149373daf321ee9c957e8f7245a1184d1",
|
|
||||||
"blk.23.attn_qkv.weight": "a9d9978806724c2959f2cf386c233831f08e1e933dbf2b32665e788d9d512ea4",
|
|
||||||
"blk.23.ffn_down.weight": "72c7d17886a3da17fa0daa456aa5e877b2ef5b8b403182b870d9ca5ca9c70347",
|
|
||||||
"blk.23.ffn_norm.weight": "971e4b712e3025a13419b5b57d674b5e4ab7f18f74b57b9afc4671623da90c4b",
|
|
||||||
"blk.23.ffn_up.weight": "df2b5c7dbd5834545b815073af0c7355b065124e6d6f0fee78d8fa5b2076dc3e",
|
|
||||||
"blk.24.attn_norm.weight": "c41957c4a79ad3b16f6e11daec1c7f530b9f3f4b618e1e4367c3b67787ac4ab6",
|
|
||||||
"blk.24.attn_output.weight": "ef7d61f5fc88ac6f31bf60cb5f4d2d6b8df42d38825807112361a7224b0dee3b",
|
|
||||||
"blk.24.attn_qkv.weight": "3e6a58fe7d49c90bb6971efbad3371c32256881173ea5aee4b0c296cb206490f",
|
|
||||||
"blk.24.ffn_down.weight": "f43619144047de42fed81dfa495f1815d3cb771330e574043e2b67620819292c",
|
|
||||||
"blk.24.ffn_norm.weight": "5501d4a2a98c8ca6b42e77b53b221dbc08f530f6a067256d787534ec6fe028bd",
|
|
||||||
"blk.24.ffn_up.weight": "d64c8b0e509e2b1118f6000176f8956cacecdbb200c7e95ed93fb78b6e26c84a",
|
|
||||||
"blk.25.attn_norm.weight": "502fa3c302d371f61c5791f4615b73018ffb1daa09b6499b227116581244c5d4",
|
|
||||||
"blk.25.attn_output.weight": "ad8391d4e9c980856f2547aa945b2b6a407a6382158dc1ddd4f08d94ecc24be6",
|
|
||||||
"blk.25.attn_qkv.weight": "42e8983780d4a01a02c54ad23d4df21eea437f119a10af5a9c12a76a42d308c1",
|
|
||||||
"blk.25.ffn_down.weight": "302dd010d4e0ab4eeaee89090409ea0dddeeeed3236415eb8f97c942497eea91",
|
|
||||||
"blk.25.ffn_norm.weight": "fb34c1ee5bca96986c08834df0a0c047ba041c1123ac1f563e9d64312bf82d6a",
|
|
||||||
"blk.25.ffn_up.weight": "10739a8de156816d93c92b935386540bfa976bdbef204f0312960f6fc657582f",
|
|
||||||
"blk.26.attn_norm.weight": "7036c711609128c4e55968ff3681d3043338879a5737efd6c2ac9e1a2a61f1a0",
|
|
||||||
"blk.26.attn_output.weight": "db5db45dead5cb911fa01da59832f121b7c18b2d167bf53741c40819f24d346c",
|
|
||||||
"blk.26.attn_qkv.weight": "cae34c6b7f82ed14348d5ed30a79919c383737c1694a9cb9c0de609d3b0c1d0a",
|
|
||||||
"blk.26.ffn_down.weight": "491ec3a4da9b4f49f8ebc6be658ce397a9b801ae9fb35e82177e47808c65e5d0",
|
|
||||||
"blk.26.ffn_norm.weight": "fd7059d75d7f0e5288511ddeeb0f772eb3cae3ccfe4226b877015834edc3c386",
|
|
||||||
"blk.26.ffn_up.weight": "ea1ee1274c56458ce056d2205e5bb6e5422ce4cb0ad58006b8141749b97a0c39",
|
|
||||||
"blk.27.attn_norm.weight": "cc362c9a937609265052cd38544af17a1a7448cea086d4c801139e1fc865832d",
|
|
||||||
"blk.27.attn_output.weight": "ba757a81dabde9cb1b069d1bb616fe79649a1724f756567ec61caed1304fe6cf",
|
|
||||||
"blk.27.attn_qkv.weight": "1ab8d7d02d87756c12c2275636823aa5ede3d683178225c4cac4bd892c319bd4",
|
|
||||||
"blk.27.ffn_down.weight": "deb1c711c8a66acf4dcd2d088e1548f8e08f296f755e4067d6557fa55afde88c",
|
|
||||||
"blk.27.ffn_norm.weight": "fc6242d8cb8a4a37a8ddb7e41e7e60a63d4a89edf36acb35df052f10b9c91ece",
|
|
||||||
"blk.27.ffn_up.weight": "8df39b09c4801f343aca78f2918a1f6db78c8c55e591eda4c69eadb74c26e180",
|
|
||||||
"blk.28.attn_norm.weight": "75b539308f77e3cefdc6d98484d8b5cbf0538f0c2869a77b7373a145a18bc850",
|
|
||||||
"blk.28.attn_output.weight": "ae128940eb60a6d2e121762ef4b3e9dcf9eb3e105b249507fa7f12de0e19822c",
|
|
||||||
"blk.28.attn_qkv.weight": "bdda781c288e9326c240e33905f8e621b6a2ad902e620739d34f93fcd6f933de",
|
|
||||||
"blk.28.ffn_down.weight": "f1d6e6d1c286b1138bfd7e53fe477f399ae93bc2c04e35416f84218ed7247965",
|
|
||||||
"blk.28.ffn_norm.weight": "3f837ce82c8b9bde0d61d08b6f5fe5574886ea5328dbdc53f2929f18da8b4087",
|
|
||||||
"blk.28.ffn_up.weight": "2af027002e31d1b6cfedbdb30a2b9d7213f3aa691167c353913adfd48fda31e4",
|
|
||||||
"blk.29.attn_norm.weight": "61e8003b5329462ffe0fe172f2b160260de006aed858332d49d75504b6b6aa7a",
|
|
||||||
"blk.29.attn_output.weight": "ca44542a72a37476dc73dbdcc01f5b7497cb3ebc4ea230a55c9634ccd8e56ad4",
|
|
||||||
"blk.29.attn_qkv.weight": "abb3d9d6abe57872ae3daa51935d43264093ded5ce63b49d1e280ee5758be0e4",
|
|
||||||
"blk.29.ffn_down.weight": "6764b895fce881df097489c263446f0106de36217997660c15984b3ee22a5a06",
|
|
||||||
"blk.29.ffn_norm.weight": "89e03e9a33fc0e6e31ba9f0c2bd7c5734a118c5602bb90148793e08a80e8d0ae",
|
|
||||||
"blk.29.ffn_up.weight": "fa7ad57a84954f4121653152efed1a871d8adb20a1ea9086e3e849ce359d7d2e",
|
|
||||||
"blk.30.attn_norm.weight": "91a697aca1e42af54f806a20211031c3369e8d0bd58df1b0147fe24954e1f5a4",
|
|
||||||
"blk.30.attn_output.weight": "36063fcf766c89ac75be56f688cc63cefe5f2c733fbf4378ea9956ad386fa148",
|
|
||||||
"blk.30.attn_qkv.weight": "2cacd1161f1121a2c0b979930134f4666f73fb8d7237b3b0659ae091b15955a6",
|
|
||||||
"blk.30.ffn_down.weight": "9f3fcb6217100595850c05dc98f9ab2a263afdb6ab28df2fcb08aeff512057d7",
|
|
||||||
"blk.30.ffn_norm.weight": "6c600bc1fc7de39d4f8917b81fc7d1d5ed2a9b56492234c13a4bd6028c30d880",
|
|
||||||
"blk.30.ffn_up.weight": "73cabd1bb011956b2689ea3338bb76642ef3a57c197377d666d2ab5f56317668",
|
|
||||||
"blk.31.attn_norm.weight": "72d3e1cc771380645fa75a899858c95f39857a4f3f1ed60fe1578df383b8bc53",
|
|
||||||
"blk.31.attn_output.weight": "40089cdd29994dc19a1d89fa15902a89cfeca3540f12dc9bf4d00ef82506e456",
|
|
||||||
"blk.31.attn_qkv.weight": "1d0bb40e9258071ae14290a53c619a8e331dda07354d2a02ef45766c029ae5e4",
|
|
||||||
"blk.31.ffn_down.weight": "8defa0e06335b793fa8be03883f0a322d6c5b33f52c69c943c35c60d16e42c0a",
|
|
||||||
"blk.31.ffn_norm.weight": "33c55d9d0c496ccfb130361fe131649346e098abaaac39c0519507e5d846721d",
|
|
||||||
"blk.31.ffn_up.weight": "599f6503f61c692c1f82001973d35119f9688db5e6be9d9c298411491c93f09b",
|
|
||||||
"output.weight": "14b8dc662bfa3308ebb2e102c562d8e52c15670e538f20f3216a9c310ca9dd41",
|
|
||||||
"output_norm.weight": "7f2294ba94ce65681df6c7ddd8698799199b9d77dc83c10bdad5c3999f0fdb82",
|
|
||||||
"rope_factors_long.weight": "e34d378664e354652c38f47d10dafb0498ccc2fb042d39ff7fef768146fff22b",
|
|
||||||
"rope_factors_short.weight": "9379146a4988f373d362fe47b06c75e7fe7c54aa4dc9558758df79b7a87471fd",
|
|
||||||
"token_embd.weight": "19a03c1fb5ac0baee93b0a7d8b0f26e9a9b011e229b694afc50ebfc13d84f8bf"
|
|
||||||
}
|
|
||||||
124
convert/testdata/all-MiniLM-L6-v2.json
vendored
124
convert/testdata/all-MiniLM-L6-v2.json
vendored
@@ -1,124 +0,0 @@
|
|||||||
{
|
|
||||||
"general.architecture": "bert",
|
|
||||||
"general.file_type": "1",
|
|
||||||
"general.quantization_version": "2",
|
|
||||||
"bert.attention.causal": "false",
|
|
||||||
"bert.attention.head_count": "12",
|
|
||||||
"bert.attention.layer_norm_epsilon": "1e-12",
|
|
||||||
"bert.block_count": "6",
|
|
||||||
"bert.context_length": "512",
|
|
||||||
"bert.embedding_length": "384",
|
|
||||||
"bert.feed_forward_length": "1536",
|
|
||||||
"bert.pooling_type": "1",
|
|
||||||
"tokenizer.ggml.model": "bert",
|
|
||||||
"tokenizer.ggml.padding_token_id": "0",
|
|
||||||
"tokenizer.ggml.unknown_token_id": "100",
|
|
||||||
"tokenizer.ggml.cls_token_id": "101",
|
|
||||||
"tokenizer.ggml.seperator_token_id": "102",
|
|
||||||
"tokenizer.ggml.mask_token_id": "103",
|
|
||||||
"tokenizer.ggml.token_type_count": "2",
|
|
||||||
"tokenizer.ggml.scores": "6db964fe67338aca57790481a390121ff3dd643eebe49f7dd308029ad99abb6f",
|
|
||||||
"tokenizer.ggml.token_type": "98d247c5404b6b18f05f133b92dd56edf6efefefac326794b00d7b351f6c5aa1",
|
|
||||||
"tokenizer.ggml.tokens": "9efe405e229a45ff9916f54c475d151d2200cd2ab0006f347abfb069cf096c86",
|
|
||||||
"token_embd.weight": "8c1ee80a9ea4f65aa385ba30112010068af3d209bebc6e149d3d4589c2cd0a5a",
|
|
||||||
"position_embd.weight": "6c516f0b1c4e2388ab90394dd80ad69e4e4509b890982fc3408108ae66210eb6",
|
|
||||||
"token_types.weight": "f879f8e422ed211948f28b560d3c5e17aae7993f063b51196a28cf5c0fb3da21",
|
|
||||||
"token_embd_norm.weight": "75076e095d717aab96f8b6beeee503c27940d9a76f2b891a0e3de72f8a6043e4",
|
|
||||||
"token_embd_norm.bias": "298735285ffe944e1bf03e5d35c7280326b85cf121bde9874f1af5dc51ab939d",
|
|
||||||
"blk.0.attn_q.weight": "ab0923ce4c1549175112dcdfcc860fe30137f991e03ea6857fb5993670adaf6c",
|
|
||||||
"blk.0.attn_q.bias": "a3ec29551dabf976e1d34256b8ab5ab7b758f3ed9742c3cafdbd984d5441df62",
|
|
||||||
"blk.0.attn_k.weight": "4c1038a6d035c3e9ffed7fa672b614627814752503755fbad0cfb76a41ad71ba",
|
|
||||||
"blk.0.attn_k.bias": "e0363930eb588d91816aa3d230bb03b6e2551c165117b80b8d60397413819ef9",
|
|
||||||
"blk.0.attn_v.weight": "425e2e53e3f00ce98d29c3e6a161eb55d3e6ae0d96fdb9f6242d1c4fd6eef4b3",
|
|
||||||
"blk.0.attn_v.bias": "6579173a1e65ee124fbd0bd53cbdca4225515b4f2c5f18fb1bfd000f5978f9bb",
|
|
||||||
"blk.0.attn_output.weight": "a6d70a08cd7164de5d12af65d86d657c3db35aaecde778b2b3fda9193c4c9802",
|
|
||||||
"blk.0.attn_output.bias": "2b8d12c4f9a9c5bfaa29c597839568f6e0525cb41eeaf64ddeb6bd84dfeb9701",
|
|
||||||
"blk.0.attn_output_norm.weight": "bbe6e502a473228b525aeed26cc31b7db123ad63bdc5a6eebac6ea70b8b51d62",
|
|
||||||
"blk.0.attn_output_norm.bias": "36eaacaf0007c5c62daea97aab0115390c0682914f78482e37eb76885f4b7a50",
|
|
||||||
"blk.0.ffn_up.weight": "24654561c76ce387d125759ba843f06b904ef721fcceaeff6ccc62180a48e874",
|
|
||||||
"blk.0.ffn_up.bias": "fd3f0126aa1d95768fa60eb6f4ab8a2763cfcb7e5405f35b92353031d86f4d34",
|
|
||||||
"blk.0.ffn_down.weight": "97a829763a6a5bf3329ceb4d39c424ba4787d61653a5b0bbd1f84782e4d4e0ca",
|
|
||||||
"blk.0.ffn_down.bias": "7aa980c30ae8b4ee7f69df28808dbf5c431f56ccc4a80340f644a0419f16c054",
|
|
||||||
"blk.0.layer_output_norm.weight": "ef30dad4c2a083ae1ff5039a2a6cda60ecc89bf1e486a6f8c0d15f50589603f8",
|
|
||||||
"blk.0.layer_output_norm.bias": "8b1b77e67568b1bce43fc476de1b177c53ff688d66beb66995e8eb3dc290da8a",
|
|
||||||
"blk.1.attn_q.weight": "284331622a1f6f9b87ccee4f652bd66a394ca493c4d93be4d1844e4f6159ad10",
|
|
||||||
"blk.1.attn_q.bias": "e24ebd4860330e08f6bfdd077a82db0bee33f4c8846cf1db26327a34754c7069",
|
|
||||||
"blk.1.attn_k.weight": "729dd0d555544b5bd0f7580b3c8b384256b974605f0e7487b95f295aa032997d",
|
|
||||||
"blk.1.attn_k.bias": "2aa51a828a858f35473f54477583fea54ce2ccc34ea60fbd1d228fbe9bca827f",
|
|
||||||
"blk.1.attn_v.weight": "6be304671cc311d5ca5c103f2b51467ee800c589bc5b8101e09ff5aed1f68c21",
|
|
||||||
"blk.1.attn_v.bias": "43bcbab78a8819e07f723bc9e5b737b71e87a7594f15234e882b63e327a64199",
|
|
||||||
"blk.1.attn_output.weight": "15ec8a1a12b26c9976445308a09f748ab0e4bef0f583d13ab08c3129f8738d73",
|
|
||||||
"blk.1.attn_output.bias": "dac2146f4baa6ed16f6c0dc7443831fb7ec79bedcceafd80d1a4b628a1bb072d",
|
|
||||||
"blk.1.attn_output_norm.weight": "d2151eb33bffac536787a4c9a5d2b31c7a80b17c4611877842a3cce2cd6e98d8",
|
|
||||||
"blk.1.attn_output_norm.bias": "31e1b779716dafb855d2cf5631ee168a0ccf372eb9c6ea6091f66fa97a9b9d2d",
|
|
||||||
"blk.1.ffn_up.weight": "a57547fc3fc3b77406f5cdcb0c87af9bc184701f175c39c1f35297826fce3cc7",
|
|
||||||
"blk.1.ffn_up.bias": "123be6d541d086202913c75d878c54d59a749f3af7b58f7ef9eb9e7c62a24c9a",
|
|
||||||
"blk.1.ffn_down.weight": "cfdb79788377e5cbded8790cd41b9e66c397ecab75474071fcd7cf32d30f9613",
|
|
||||||
"blk.1.ffn_down.bias": "bcb58315519a573097960891c9ae41cf4c685ab78c3e0e77471471758a7eae88",
|
|
||||||
"blk.1.layer_output_norm.weight": "819b554271452bfb1d84c2603b90377b2e41a0ac1e3aa8b417ccf9dce63375bd",
|
|
||||||
"blk.1.layer_output_norm.bias": "47a3433ac27f5ce8947fb38dd491f3706df4ef6adb0ddf74612bf0f54b19e164",
|
|
||||||
"blk.2.attn_q.weight": "1557a9ea852b1880551f7290e00aded4f35e6c4180fdcbed1b0039bf805f639e",
|
|
||||||
"blk.2.attn_q.bias": "c3bfe5f3066f655fd36b055530997b59ff33ef013563aaeb3cb8ff07dabd59a9",
|
|
||||||
"blk.2.attn_k.weight": "cfd08eb69c61ae2f9f14f9b7ff5c5394ca264b1a9f3d48156677f90dd1766289",
|
|
||||||
"blk.2.attn_k.bias": "9b839bc0e79974a0b3f5d1895972bc6f5c9a1bc16052e1af786e6a530758152d",
|
|
||||||
"blk.2.attn_v.weight": "02b26b1208480eaeeb00e7b4cf8b690006ca14759357fc44ed4a2a8924ead993",
|
|
||||||
"blk.2.attn_v.bias": "e7e6f0089fded1659a867ab736c220d9653ea7da6b1b94baf5c8d30a748b63ab",
|
|
||||||
"blk.2.attn_output.weight": "a1db121c7d33806b349cadd050300a57db49fdc91224fd07c9ac43bf4299dc79",
|
|
||||||
"blk.2.attn_output.bias": "7675128b6a92555cd955c820311e91e9417d31f48848f45d047b4100c62148b3",
|
|
||||||
"blk.2.attn_output_norm.weight": "5b4595e0fbcba67a700c4331adf746d2fba3546364a4db5607ae241947bb1a21",
|
|
||||||
"blk.2.attn_output_norm.bias": "7b8e16826ea30e5a2ba0b02e0095a901775981a296e98819625320e983060d08",
|
|
||||||
"blk.2.ffn_up.weight": "a0d815d946ac07a65095c4ae4df77b818845e6d97795c7d82f55e689d944db59",
|
|
||||||
"blk.2.ffn_up.bias": "ce37c0a4174d6bf773ded7bd016ede627ad3bdb8bc99b9992a18dc8e8898f252",
|
|
||||||
"blk.2.ffn_down.weight": "f6231d2a25426fbd45b9f1160aa484220eb227ceef0348c4a6a6de890606e5ef",
|
|
||||||
"blk.2.ffn_down.bias": "429e00556e8dc63a785238b309b9d83738500c1ef6d736fe6526ad88ea496d27",
|
|
||||||
"blk.2.layer_output_norm.weight": "651457a573adf3f7dd9ee5dfe1c8e89389e94443993aab77ec6a0b05aa621e35",
|
|
||||||
"blk.2.layer_output_norm.bias": "41fbbeda7fd89b0cef5f945ae44011c316982390401d6f75ba8c6d365e185247",
|
|
||||||
"blk.3.attn_q.weight": "95a43f32949d2cb8d22815bb27a44abfc6665ba96221af817dfe058cb6ca72c6",
|
|
||||||
"blk.3.attn_q.bias": "f4e34385e75d8108b6b3bd336106e2133a8c9be0cc343dfe5dc48c32a823c7cb",
|
|
||||||
"blk.3.attn_k.weight": "6b892da6a17d4d3265265a15f695864a31813ee8c8e710ae9bc9e1adbc6c9a18",
|
|
||||||
"blk.3.attn_k.bias": "40b8067b641a56014cee42548240aa8930820958b1933004892b5f04fbaef39e",
|
|
||||||
"blk.3.attn_v.weight": "9fcd5922319dd2a461082a5ce040c1dfe65d87d70ca6547dd0b46eeecc3eeb2b",
|
|
||||||
"blk.3.attn_v.bias": "b528c56212e66931fdbe267ac327a9c2f87cd03baff3ea719e30afe681da15f1",
|
|
||||||
"blk.3.attn_output.weight": "e3b178c1b03981e75510e0d277af23ea59cc404b5394e61bd32291825719b502",
|
|
||||||
"blk.3.attn_output.bias": "712c84d39a6a5a9c06a09da8fd9939ba0d5525524a4bba61ea4de09b48f45cae",
|
|
||||||
"blk.3.attn_output_norm.weight": "d1ffac88e675592ff72f8a617be32b4a381d443b2f8f2645dbe44a1e5745aac0",
|
|
||||||
"blk.3.attn_output_norm.bias": "ea31a1c73146234c50e0e43f485c458413714867b8e2703af66482f7db2d6c40",
|
|
||||||
"blk.3.ffn_up.weight": "4ef4f3b9a1ea6ab2ef2eb6e8b008e06a44790d099d97482a05a51e39a29afac0",
|
|
||||||
"blk.3.ffn_up.bias": "06a4296dda16f452675c51f108079fe7722552d6521c737d97734943818b9a2b",
|
|
||||||
"blk.3.ffn_down.weight": "f114b2bebe392c7d80433bb880c6730293aa4561b0b0370dcdaf7472daebd847",
|
|
||||||
"blk.3.ffn_down.bias": "2c8e67831d28a3bf613fc7912ae3259b63d72abcaf4d30efd8800758400158de",
|
|
||||||
"blk.3.layer_output_norm.weight": "a1dfeb7b5a51dd56447312ca41e2ad2f361a3ea12ddc355127f5f4219fb0a482",
|
|
||||||
"blk.3.layer_output_norm.bias": "1ed630021b25c6c6fc93fd32988b9907df966d4982a93081f639aac3044618ab",
|
|
||||||
"blk.4.attn_q.weight": "b5fae4c1f9a5f33a2a2e816ac0c01c25f422e4efdd59ef1ed93da2610e5370fc",
|
|
||||||
"blk.4.attn_q.bias": "c2e376524ea98ac3b10d9eee19ecb1b1e261fa5149efe0232844c923dfb428fb",
|
|
||||||
"blk.4.attn_k.weight": "a4632f5ebf9321d9d08f9112a4e5dda2efe5671df4a4e67fee24845f5b14af16",
|
|
||||||
"blk.4.attn_k.bias": "a9a02ffb8b8b4f6dfe487a7e0341f1d5318c9d2b793a688f34cb1b22fc66ef60",
|
|
||||||
"blk.4.attn_v.weight": "10ad8deb81d9fa093b1e5c0f24ea82aa7df43e6aca49e260fcbea56eab8cc86a",
|
|
||||||
"blk.4.attn_v.bias": "7326813e181e021130bd33ac136293fcffccce2d1d8cb59041e5b13a8cceacf6",
|
|
||||||
"blk.4.attn_output.weight": "c92573088c7437c2b3cda51490e152c27fb19e5468df591eabba5a49d5398d44",
|
|
||||||
"blk.4.attn_output.bias": "14e10b419e5859af1eb685af5c330aee67048cd704dcead9217840c6f5393222",
|
|
||||||
"blk.4.attn_output_norm.weight": "02b6831c0e0fb0edbc579a92812a1dd972cb15d14fcd382d4427c5a7b300ac44",
|
|
||||||
"blk.4.attn_output_norm.bias": "7eed5cd503bb6bb6ceb1bc8b07cc077903a4f14fb8b9d6cdf39644815ecf1374",
|
|
||||||
"blk.4.ffn_up.weight": "8d0c91d62e74d6431321116a37cf3339e630bd50ba164d3304fc4fe8dd831223",
|
|
||||||
"blk.4.ffn_up.bias": "d325f07f73c005a273c484c7be8e7abb4d6e8a5c4fd093f5869133b97629d017",
|
|
||||||
"blk.4.ffn_down.weight": "7ba7bd81143f40537b84f938e403e19f30e4928625eb371de052b9025beb4d21",
|
|
||||||
"blk.4.ffn_down.bias": "2853d9c2a75288214a4bf4907dc19d04d01926f4913d302b1aa7bdbfcce0f7a1",
|
|
||||||
"blk.4.layer_output_norm.weight": "a4ed1885fa77b90fed5300c355ef0aa0c876a8c747151d9d790939d464d57d4f",
|
|
||||||
"blk.4.layer_output_norm.bias": "62142a81e813a9e636333b2b805d6bc3b17c5e7cd4b15adce1ada6bc9a32563c",
|
|
||||||
"blk.5.attn_q.weight": "afc1dff080a72c3daad01384b1448d476aaf789871017c8ff8e144788887995d",
|
|
||||||
"blk.5.attn_q.bias": "748a820371c1d4f872c84545b36358d239c35bf6c99e2812c237d88c3292763b",
|
|
||||||
"blk.5.attn_k.weight": "59e30c1ed8acd2cbb01de5f62e7804015b9ecf98ba157d98cab016344639eda5",
|
|
||||||
"blk.5.attn_k.bias": "f839520078f9e589496e982e86d0126c7aa14196047339abffcf49a696229f77",
|
|
||||||
"blk.5.attn_v.weight": "3e21fb874e21b90308e1f46af034a3c32d3eba1628d62ae5f2246d6af5818923",
|
|
||||||
"blk.5.attn_v.bias": "5cd4852bf95c1444d10d756750f6bf49f842c0b39e9953c7f408bb67c325ac8c",
|
|
||||||
"blk.5.attn_output.weight": "636ce6a7752895f204b9d01ba0aedd9a294f908b42f372c22a16d9dd590d7471",
|
|
||||||
"blk.5.attn_output.bias": "82d924d4b0d2b94f2bbff91619216d6967a3541ce9b1531a6a60457a67b5d219",
|
|
||||||
"blk.5.attn_output_norm.weight": "5e7bd0a8d3396080f3360d7c4700bf094a06216431bd014c4479eef72ecf4271",
|
|
||||||
"blk.5.attn_output_norm.bias": "66c6de5edda5466d029c6753780be81ccd4218bf8bc00680000e0f06856ab712",
|
|
||||||
"blk.5.ffn_up.weight": "5bbf6e7ea380e216e33f8bee06d25f2265359d3876a300e92bc6e41d48e33430",
|
|
||||||
"blk.5.ffn_up.bias": "9d795388bb36fb33ad3a37fea3ccb4937838e02800a608fb47d363cd06b47370",
|
|
||||||
"blk.5.ffn_down.weight": "2fd628974e7f075479dd227b46fbd48ae8d3ca34d735b36f391ac06410730368",
|
|
||||||
"blk.5.ffn_down.bias": "cd213ba9eaa75fa541648097fbe9c96e58077e6c3ad6ad2fb1f21f8350f44291",
|
|
||||||
"blk.5.layer_output_norm.weight": "159a9df41d15b7022d136f86a2a2631c4635f9816e957472217077b522bcf52a",
|
|
||||||
"blk.5.layer_output_norm.bias": "24c1f27ffd1eb4e5be7e3a2909943e6f0980635d761fa1efdd0c19645da23766"
|
|
||||||
}
|
|
||||||
312
convert/testdata/gemma-2-2b-it.json
vendored
312
convert/testdata/gemma-2-2b-it.json
vendored
@@ -1,312 +0,0 @@
|
|||||||
{
|
|
||||||
"general.architecture": "gemma2",
|
|
||||||
"general.file_type": "1",
|
|
||||||
"general.quantization_version": "2",
|
|
||||||
"gemma2.block_count": "26",
|
|
||||||
"gemma2.context_length": "8192",
|
|
||||||
"gemma2.embedding_length": "2304",
|
|
||||||
"gemma2.feed_forward_length": "9216",
|
|
||||||
"gemma2.attention.head_count": "8",
|
|
||||||
"gemma2.attention.head_count_kv": "4",
|
|
||||||
"gemma2.attention.key_length": "256",
|
|
||||||
"gemma2.attention.value_length": "256",
|
|
||||||
"gemma2.attention.layer_norm_rms_epsilon": "1e-06",
|
|
||||||
"tokenizer.ggml.model": "llama",
|
|
||||||
"tokenizer.ggml.add_bos_token": "true",
|
|
||||||
"tokenizer.ggml.add_eos_token": "false",
|
|
||||||
"tokenizer.ggml.bos_token_id": "2",
|
|
||||||
"tokenizer.ggml.eos_token_id": "1",
|
|
||||||
"tokenizer.ggml.padding_token_id": "0",
|
|
||||||
"tokenizer.ggml.unknown_token_id": "3",
|
|
||||||
"tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8",
|
|
||||||
"tokenizer.ggml.token_type": "8d40143b3477df77beea4139420335ede458bf5e14102f01b0170197b55da8d8",
|
|
||||||
"tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda",
|
|
||||||
"token_embd.weight": "64a9d30707e659e2e673656d71f5aef7a9fb9fd83bb9a77558dfc5abbe218a05",
|
|
||||||
"blk.0.attn_k.weight": "d8b4437c5edb3cddf6af9987038e1bb2b191c4f0fce0e160d2abace717f5d5d7",
|
|
||||||
"blk.0.attn_norm.weight": "1eb73e3f7aa8e502f6ca31cd19efbb8e4fd9a89692e13e48ac8205545a7fa7e8",
|
|
||||||
"blk.0.attn_output.weight": "39e7b78e57d356a22dd89ce1c4d7163b970712ba756545e1703f97866cd2192e",
|
|
||||||
"blk.0.attn_q.weight": "795058e23b6109febd9d55c89e1eebe6af0714ec8c56fd86a160876a6135ffe8",
|
|
||||||
"blk.0.attn_v.weight": "0cd6e583d1887c020472e961bbb113fe5a0d23ae2f1c2c876fc366cdb7692b52",
|
|
||||||
"blk.0.ffn_down.weight": "51eb4d962189e945a84e94e0dc1aad3f8f90cc1a11e18029670afcd0ea0acb1b",
|
|
||||||
"blk.0.ffn_gate.weight": "9811a29b8ad48432925897ab21dfcb13c5cbd372aeccbbefca9b7866883b4ce3",
|
|
||||||
"blk.0.ffn_norm.weight": "92cbf4652ef503c1de5b10f2be00b3fcf00100980cb3baa8f3013a8d8bf3d851",
|
|
||||||
"blk.0.ffn_up.weight": "af87de21746879483ed1b374cdd76b19ba11ca2b6dbb1beba98efdf3be3e8077",
|
|
||||||
"blk.0.post_attention_norm.weight": "32e135f1f258ffe407018899e39af1725d59d66d60022b9a21575ba160e0357a",
|
|
||||||
"blk.0.post_ffw_norm.weight": "ba286f5ac11b07fbc986173708c66f1920427be5a6d108af38fa0a837c1c8eb6",
|
|
||||||
"blk.1.attn_k.weight": "51584435552051f7fade76beca582b3f7190cf7fc07adcf527c2774d4b1c3901",
|
|
||||||
"blk.1.attn_norm.weight": "6833104c7fbf35a7e799ae56c262b97fffa14789642aee14381b25acd21ed80a",
|
|
||||||
"blk.1.attn_output.weight": "14c39481369087bf292ac9a3ab2ef166f9fe376a9f90c246653213ef264febdc",
|
|
||||||
"blk.1.attn_q.weight": "443f64ae2229f857c69d6bebb7800b685786cb77884c3ae19d4286aeed081325",
|
|
||||||
"blk.1.attn_v.weight": "0df482de2038f1e4c8a7733ac0ddb69ad90759dab5968b942af0155588de4c4a",
|
|
||||||
"blk.1.ffn_down.weight": "66f30763a8bbbcaea609a0087ed75fadb5e771c06378dd2cea94cf17e492e8cf",
|
|
||||||
"blk.1.ffn_gate.weight": "a7151bff00a545fa18b2c92dcd2a14572ccf9beb957a6c494f1374e8ebe174c9",
|
|
||||||
"blk.1.ffn_norm.weight": "e197d71ea11b5276bc0167d2663b88089b3ff42b47ba91e85f6c5d95f6306435",
|
|
||||||
"blk.1.ffn_up.weight": "57c182e0b14cccd1350d388f0c616991702e74281db54637451b70f4ccc24f9b",
|
|
||||||
"blk.1.post_attention_norm.weight": "3c56f837168d784c2d8bac247c130bdca6610c095c8da4558c536ccad7605609",
|
|
||||||
"blk.1.post_ffw_norm.weight": "d2a51d320fd01069dd7ccaa7082f16a7faeb671885607d7900b10a89c354d0fa",
|
|
||||||
"blk.2.attn_k.weight": "bc103c818192de7ce36caaf89dc117be4df13fb902e0bd9a23c64edace5df9b6",
|
|
||||||
"blk.2.attn_norm.weight": "0f2503aa126083a5d6ac72481be1ef66c6014705b573682b35bd864e4749a3d5",
|
|
||||||
"blk.2.attn_output.weight": "05fcd4a1226e482f91803a266f72caca887a93e63c2d2ba5611ab3c68d38743a",
|
|
||||||
"blk.2.attn_q.weight": "6a10b5c2fd423d1e4c4fd60fa8c154a0159b6b2501ea79cae2ef19f45a674e5e",
|
|
||||||
"blk.2.attn_v.weight": "3cf891945a1f8ae7cc908a5c6b729ff5b70f4436c5ffdbf245cc0ed4cc19cd1b",
|
|
||||||
"blk.2.ffn_down.weight": "ea204fd04e0d2fc728a9861a459216bbfec629c152004ba625f52cd8837bd51e",
|
|
||||||
"blk.2.ffn_gate.weight": "3a3518729f1b8b64a82b8792f33987db5418fdb094be0263c68f146a5c38de54",
|
|
||||||
"blk.2.ffn_norm.weight": "754ede678b725de41a34b82f0edf7688b5c065be7c0d46df6f7ad9430d986884",
|
|
||||||
"blk.2.ffn_up.weight": "ffdcb88439f5828ffbd9fc844b03ff91637b790b9838097258cc3ae75935720c",
|
|
||||||
"blk.2.post_attention_norm.weight": "4b3f53b7ba26e8c36b2dfda3b7e5fc4b1065257cefdea235fc7df9af130ac2fd",
|
|
||||||
"blk.2.post_ffw_norm.weight": "e550369e26b8485e2b54ad34b34bc98af5494287dcc513c2c39cf1eaa5b89d07",
|
|
||||||
"blk.3.attn_k.weight": "89f24ea450e37d9e95757651a83205c085d81b354ee9489dd6310a391d8409f3",
|
|
||||||
"blk.3.attn_norm.weight": "24e2ea662b7cb822b4ca5cd61bc17f2709f406d990ec3b4a0dac1cc112db45cf",
|
|
||||||
"blk.3.attn_output.weight": "ac4dad69473c6e3fac56669212cadd8c34ecc5973d945972e974d94805334967",
|
|
||||||
"blk.3.attn_q.weight": "b6a9c9a7d4722b9096631c65de62228dfddca6e26edfe6af7fce01e116ef0f4c",
|
|
||||||
"blk.3.attn_v.weight": "f272a960a40093942309bc342a379984cbacec2d7bc64428db3f64e6b1887ed4",
|
|
||||||
"blk.3.ffn_down.weight": "c0188ba50d8228805982029c277fc0e87aa57473b8363037c648f6d006ff828a",
|
|
||||||
"blk.3.ffn_gate.weight": "a04aec1561ee6c0fbb18c3db49dc62fb533619cf697fd548cbf2279761aaec3b",
|
|
||||||
"blk.3.ffn_norm.weight": "bc053837d44087ec05eb5d9458357b2a5be787789b19cdbbdc694b57697f99a6",
|
|
||||||
"blk.3.ffn_up.weight": "b3ce8b274f20796d3b1a7c08ba27a919066f9de89a782faa544c4a8d6bea1382",
|
|
||||||
"blk.3.post_attention_norm.weight": "9c922dee7a7df5667289e2788e60170238239cee2dfdbbd9e435763f9f416718",
|
|
||||||
"blk.3.post_ffw_norm.weight": "b682544ac953ad2e0b49027ed8916f2e9d1aba5d1587bb4127ac703570c7a03a",
|
|
||||||
"blk.4.attn_k.weight": "143b0cbb4b787b95c2b6212374410e32173ccef2adb914908a2f89a7916de512",
|
|
||||||
"blk.4.attn_norm.weight": "5668f60491b780273745192662d02c9a92a4f692b29d16aa0bbc7413fec4f85b",
|
|
||||||
"blk.4.attn_output.weight": "b9f2bdb68be1e0cf66dd19f8fa2afb105910ad2ef394864cb32cea8f8944e0d5",
|
|
||||||
"blk.4.attn_q.weight": "ddcf1343dafbc2dfcd0b8741225af22fe4b54b2becce29240bd01c34265d126c",
|
|
||||||
"blk.4.attn_v.weight": "6dc7074366e7ed52d9f48c594dcc85bef738e096276cb99d28228c89eecc5b9c",
|
|
||||||
"blk.4.ffn_down.weight": "30334ffc59ce343cf2a1b973174acb7722823463adc07e19a99bd0f404bc9906",
|
|
||||||
"blk.4.ffn_gate.weight": "890f7c8af208d63b28db52c4b8c16c2288a382d87ff5a6a6d6b0a5b3bf27e6cd",
|
|
||||||
"blk.4.ffn_norm.weight": "ff0316cc7847221eb86a90c1ab441d4ee61553d410c66414a7755021b3b12448",
|
|
||||||
"blk.4.ffn_up.weight": "6af97d113f91564c636734f215e25ee602d48eb045458f300b3ec7582be0f41d",
|
|
||||||
"blk.4.post_attention_norm.weight": "69438f231e105e68216b078bdeb35a7cdc8b12c4e2845e18ecf4c8d361d6a321",
|
|
||||||
"blk.4.post_ffw_norm.weight": "0fd535da78bcf2b32c95b05b2b83dc49817393765be90d8cc1ed3d56f47b68ec",
|
|
||||||
"blk.5.attn_k.weight": "0166eb3c6d20dcf3d3c169e94caa8dee057535bb525e29f698fb6f8844f18a6c",
|
|
||||||
"blk.5.attn_norm.weight": "a7808f27f164023d5cde2be00fc23cac6c71aa0ddeb60bc23e12411b80087672",
|
|
||||||
"blk.5.attn_output.weight": "8b65b2027a0842b68c5308f91d6a31de9599d794157d77df8418b19f9e0d9334",
|
|
||||||
"blk.5.attn_q.weight": "966bc626ef2c2394d872087a41c126bb1b67d1d5f6de920204ef5e5b16c34003",
|
|
||||||
"blk.5.attn_v.weight": "9a362aef3f4437fbf0ef6e1ba785f3329c3db2960f93fe36547d2795e9c254ea",
|
|
||||||
"blk.5.ffn_down.weight": "63e53541d34197720c06f297aa8142ac6b6eec002c7987b296f26e8b1400f931",
|
|
||||||
"blk.5.ffn_gate.weight": "d9591fdd32f783e0fc26e20d5d587ee8971ac8ae2e4c818c6eac1c125c7c7f37",
|
|
||||||
"blk.5.ffn_norm.weight": "677334cc60ecce3a7f4ab3acda15d359353d7358872f614ad8914e3780e9fc6e",
|
|
||||||
"blk.5.ffn_up.weight": "a63764110e1c655ffbd55af0669b2dfe4cc29d0e198d33a8e5426461b08a85f7",
|
|
||||||
"blk.5.post_attention_norm.weight": "c55499f859b2c0a7f5cabceaae47309a5ad38bc29d0f4a8db81f1357023162a9",
|
|
||||||
"blk.5.post_ffw_norm.weight": "82752754665f842418f3e302cb5f43d1e0504dcd124c4b8ddb77018b2c793837",
|
|
||||||
"blk.6.attn_k.weight": "e20a5f0d6c807273c8d491439566b428497ac02097cf0aa55e33748c28e14be6",
|
|
||||||
"blk.6.attn_norm.weight": "2c6ba42fd3c73d72073ced03a32dd28d70a89ed9bbbc8fea1ba03a7ade951e6c",
|
|
||||||
"blk.6.attn_output.weight": "4de7c5c2f4a133a266e17ed8c14c52959466b54cc7ab9e19f789a33b4850f284",
|
|
||||||
"blk.6.attn_q.weight": "56462d921800e6b8cd2213fef04c4ff16d728905cb2f4c58e966d0a053a3b0ae",
|
|
||||||
"blk.6.attn_v.weight": "b758dcbff769d6240c2245ede1dbc62c4170a67c77458e866312589220fe29af",
|
|
||||||
"blk.6.ffn_down.weight": "582247fb3c2bf687cbe9413fe18d18ad47bef4b65df7d78905e10335c6134764",
|
|
||||||
"blk.6.ffn_gate.weight": "3035444d5286aefb7a6d04e55bc27e1fac7cf895cd5be02319a431b8e047b4ae",
|
|
||||||
"blk.6.ffn_norm.weight": "e582d24c66e01b96faa20ce6adfda3d8583b11e809bff89969927398175e369a",
|
|
||||||
"blk.6.ffn_up.weight": "6f4b7bbfedeacf61a4866ae0616c4ba6c9e856662e8f00ae6aaec7f52c53e7b4",
|
|
||||||
"blk.6.post_attention_norm.weight": "8fe51b50bd677d21586aecab0b565c4bf9fa68ad50bfe366f45e8fea3c657ca8",
|
|
||||||
"blk.6.post_ffw_norm.weight": "81ba3cb4c2bf5c546b86855b7a885d3fafededc67eb3a35cd3598b03c9e26e65",
|
|
||||||
"blk.7.attn_k.weight": "2e044179cdcae0946708c86bfea7aa0391e1f7e2a09b33fca035d384cc3ca758",
|
|
||||||
"blk.7.attn_norm.weight": "94b48c546b046803c60e75a3acb17a356b710735989938021b565f68df9b4985",
|
|
||||||
"blk.7.attn_output.weight": "65709b4ad7a581f4d75793d39d4032a359f6bcc0c3835205242a0b99e5b66824",
|
|
||||||
"blk.7.attn_q.weight": "8ded993c95d1f7caf201ceb6fa035cd6ed6d351b50b999fa9355dfee9486cb5b",
|
|
||||||
"blk.7.attn_v.weight": "c92d5e2d2d48397542bc03bea25bf39154075e66c5bb1ead85188505aa04ae91",
|
|
||||||
"blk.7.ffn_down.weight": "e8ba8fb57208805ef1dc23cd7c86e9a2d1fb7c52c3940d292cd5bb2eb24b3fac",
|
|
||||||
"blk.7.ffn_gate.weight": "f0f06d6a2e06c5ac252083bc61d05c814e6289d3f4e4a87d2f06918254c02c36",
|
|
||||||
"blk.7.ffn_norm.weight": "ebf8ef775f72624148e09d68a4332187a7a5020c521fe0623da1cd3485ad33e0",
|
|
||||||
"blk.7.ffn_up.weight": "a554adc4fc7122c247c77670e169916ba1794c787b5be30a2b36705138f1f746",
|
|
||||||
"blk.7.post_attention_norm.weight": "3aa6bc21d85c3a0c12b964e82b12feaedfdd13130c3cd2229228e24e0967ebdf",
|
|
||||||
"blk.7.post_ffw_norm.weight": "508bc7b19ee8ff08f0007c890133a462fc57c7e72b16ee8f6dd64def264ef876",
|
|
||||||
"blk.8.attn_k.weight": "363c8e74056642fe9e7c2f3f9769d57319cd3fa0a6022810189ab8d894322885",
|
|
||||||
"blk.8.attn_norm.weight": "685b49a1f1acb169f4df0bdd8e3de6943f3033cebad14b898a72000595610d92",
|
|
||||||
"blk.8.attn_output.weight": "7bde571e4efef1c6a6143f0526721dfb59e0a0ea0e1a3616a322b2eb937efa48",
|
|
||||||
"blk.8.attn_q.weight": "fc993dbc1074c28a0e1d85e5ab2f4ea6a9c6c1affe7ee56027000a275daed9b6",
|
|
||||||
"blk.8.attn_v.weight": "281e8791d3aef9b3864f1cb054da0ae0c2fef4ce0a58b1bad8bc136b2fa0f62b",
|
|
||||||
"blk.8.ffn_down.weight": "b1164a2578a7f87ed99c2bbc76c5dfbbbc6a1a803605391acc3f320fc989ffd7",
|
|
||||||
"blk.8.ffn_gate.weight": "6b39a3b3aaaa79aee61416b54d62160b9258042650e61c6b47bc77c2dd17daf3",
|
|
||||||
"blk.8.ffn_norm.weight": "17ea1362c72da27f12bc936500492035bdef3fd8f940cb12b57f37d42ba8ecb1",
|
|
||||||
"blk.8.ffn_up.weight": "bc3a7c47afc440d2bdf8fbe9ddf2c9220467472c60c8b4ded8c0f181470ec96c",
|
|
||||||
"blk.8.post_attention_norm.weight": "5c506204e00411ef9c8b4134d40eedcc19fffe68dd0af7d7cc49dcabf2dfac7e",
|
|
||||||
"blk.8.post_ffw_norm.weight": "002faec235c3678864e2901eed275ce4e9dc229164a91c9cd4c965142ba62305",
|
|
||||||
"blk.9.attn_k.weight": "0bab39d8c237f1b6d0010db40467142625a9e6f2e0e4c49a56c12b41e4e0b1fa",
|
|
||||||
"blk.9.attn_norm.weight": "de5f38e873b17f07aa7598831b89cc1cae2c9bc3eb2e042ee9af059d2563e84e",
|
|
||||||
"blk.9.attn_output.weight": "8a8184702c25a62df9ff309c0c7badc8587208523b2be3e8fa90ce7080573e6f",
|
|
||||||
"blk.9.attn_q.weight": "7c961b2431b09ddf95377acd07201cb91bf13d9cd3ae0f2c25c7d6a0358d9f50",
|
|
||||||
"blk.9.attn_v.weight": "e22d240cb4743067033e659cbf210ebe2ebbab3e1dea6ccbe5eaa982382ca038",
|
|
||||||
"blk.9.ffn_down.weight": "a426f81210f03d6ad53277416e1fdcdf37d8065e4817613edaf6c67a343426be",
|
|
||||||
"blk.9.ffn_gate.weight": "a82eba825cb77b8e64f85ff99ede2fc71bc9b01751eeb17e9e6c246ee12ea62e",
|
|
||||||
"blk.9.ffn_norm.weight": "1a97f9b1302a3a326d534c5c3fed2db6db0ae45fd0edd381a3e4fc1c75d81030",
|
|
||||||
"blk.9.ffn_up.weight": "5f20bac2bbf03bb42adb92fbf99561651e1edda57e0b61935ac7f6c08c0ed7cb",
|
|
||||||
"blk.9.post_attention_norm.weight": "9f9866d13988e1946b1e1c80d9374a92a6e3be33748f8eaed3e126d1e1a4c796",
|
|
||||||
"blk.9.post_ffw_norm.weight": "a6896dbf698db4dbbe5dbf12417d4fd80e9cad0c539c858892ec0aa5b046bb58",
|
|
||||||
"blk.10.attn_k.weight": "ca8446e5d21ecd4e6a70dca8d321be480be4fba94d70cba065205436feb44270",
|
|
||||||
"blk.10.attn_norm.weight": "4f41fe290e8f21f63b82151b6cce94bf7318d121468816b0c58af0ff7c1658ab",
|
|
||||||
"blk.10.attn_output.weight": "c626d2e9681c5c941bbde43dddfae1a8d4986bf2be4470857bc8e8bd7f869044",
|
|
||||||
"blk.10.attn_q.weight": "1e61b210a13a429977325cf15d781ab77d604cfa862f4270329cbd94237d5835",
|
|
||||||
"blk.10.attn_v.weight": "8ff8d3e3f058ec3b35ada1057f2ed59c06494d0e0be6a8dc3ff9edf9f0e1a115",
|
|
||||||
"blk.10.ffn_down.weight": "bcebc04219f8081a5f483e58103c0ddbbbc631a0a54fd6dd9d55778e041f70ee",
|
|
||||||
"blk.10.ffn_gate.weight": "7a23a1e620ef871384ddf9611ccdcfb893fbf013cc203ac8e72f745420f1eea0",
|
|
||||||
"blk.10.ffn_norm.weight": "e3a375e43c349a1c6c66c22328e513cc1af3137fe839e43dc8e9be2f65914fd7",
|
|
||||||
"blk.10.ffn_up.weight": "5d182e7c94369194fca5f19cbbe668a999911e57f3d363bc7fb6088428700cb9",
|
|
||||||
"blk.10.post_attention_norm.weight": "b841c6308296e8984f3c5f549c6e3a242f4b3e19141e1f54cc08de9c46759c09",
|
|
||||||
"blk.10.post_ffw_norm.weight": "9d66fa05b5c940208f634f5053d809094c99a2a10a1d1e8847c8281fbd99fb49",
|
|
||||||
"blk.11.attn_k.weight": "14adf24ebb2bb17b336ca81cec3e690fd854782f4440ca6c66cc1d7e7bf1c850",
|
|
||||||
"blk.11.attn_norm.weight": "2d2213f311f50414702b5b34f22aafb9d9a0b6787243e7578562583dc40ad195",
|
|
||||||
"blk.11.attn_output.weight": "de1f14cc2a7fff00cf11b229f0576999205f17b9536e97abc9d6de3cc79a7884",
|
|
||||||
"blk.11.attn_q.weight": "2bcc5c147524003109ece0be08b89ac8b25baa71416ffa76573c6c052ffc6eea",
|
|
||||||
"blk.11.attn_v.weight": "2e6ab8573070c22dc1e0d7aebe4d52123226dacf7822dcce06fadbb38fb036a4",
|
|
||||||
"blk.11.ffn_down.weight": "1b86902f4e36868421e5228b9445051f8290b292df22a6d1af836dcecc1f25c3",
|
|
||||||
"blk.11.ffn_gate.weight": "e756e8081bd0a16aea4a9ef5076ad102113524f7a3d50a3a77aaa7f7938b63e8",
|
|
||||||
"blk.11.ffn_norm.weight": "6913887267be227cf9d1991a3dd8db2e7e74bb9b5fbdfcb9ac954fd7d7b95b3b",
|
|
||||||
"blk.11.ffn_up.weight": "619a3ac0609ebdf42c3fb2b6e4b1db48df79e6dd8418d7ab8f1bbff13d8a6a50",
|
|
||||||
"blk.11.post_attention_norm.weight": "e4b4ba92cef7b6a78407e8ab1b0307d47dac6c3df7b6817e28038317ff662d7e",
|
|
||||||
"blk.11.post_ffw_norm.weight": "40aceeec58cb855f0c158c9cc217168fcd5d0e735567d587217b1d78df17bc5f",
|
|
||||||
"blk.12.attn_k.weight": "c54c5a4d4892522022d1aa2204cfc624f0b4042caa536e678967316293fe5cb1",
|
|
||||||
"blk.12.attn_norm.weight": "7cd2ef58298569ffdf244d9b390f3917245276c8206e5780af5f96d8c0bbb446",
|
|
||||||
"blk.12.attn_output.weight": "85495ef9cc8b3deb21f741bde463ff6493acae2be51f02ecdeef952cbdec3375",
|
|
||||||
"blk.12.attn_q.weight": "d19383f83fd119bfb8c0280c9515705c11d8e7d502019fcf8f49efeef0d106d0",
|
|
||||||
"blk.12.attn_v.weight": "869ac669ba49531d9128892a0e27cef15de508ff40cdf80cc1681dde50d09204",
|
|
||||||
"blk.12.ffn_down.weight": "578f39f8f9fc2f09138afc884a952d7cc3a9a31de4216acd10e88e19e0b75f8c",
|
|
||||||
"blk.12.ffn_gate.weight": "e29a0186bc6c4a0720246306e922d3a83f777dadcf4ac80bad468287031cc8b5",
|
|
||||||
"blk.12.ffn_norm.weight": "e1ee95c6584b5cb57fcf1db8ce2bcc03aff91eb389238c094a61c00dde93d1f2",
|
|
||||||
"blk.12.ffn_up.weight": "2a826f06d7cdfb3edc6ae250ff44363ef77a2a9cdf96313e23a331b99ebfa17d",
|
|
||||||
"blk.12.post_attention_norm.weight": "4bafc7699b948d5cbc0d3e09b418b06c6abc4651a61ada9609d9a2f21c7e5607",
|
|
||||||
"blk.12.post_ffw_norm.weight": "bbb8c34a7176bb1a49f9fe2bacca0bd26b673d52c0835b2e90fa11f2962f077f",
|
|
||||||
"blk.13.attn_k.weight": "ffeefccfe8255d1b694382012ff4134eee5fec9d9491c8d0ff0a13832d1a37e8",
|
|
||||||
"blk.13.attn_norm.weight": "35713726529e3887c4135a88e86e8a4d7270ba5b9f2d1ab462622fbf40a7cdce",
|
|
||||||
"blk.13.attn_output.weight": "0d60b7c5cd71190a9ef4b873b0f516be15447c32d83914db2794b14592b0b460",
|
|
||||||
"blk.13.attn_q.weight": "8296069e65bef794cefc61257fc65789b3cb22955e30f3df129205e5041b2222",
|
|
||||||
"blk.13.attn_v.weight": "ca0f4ab9d16a748fc643a5c0c7a19826a811bf2a4e7316a8c935d4bf0ce8abc6",
|
|
||||||
"blk.13.ffn_down.weight": "d5514e0c8e7b3ed1cbcc1605eb5be1733b6ab3514cf8a0508fc72f7d05ed8bcb",
|
|
||||||
"blk.13.ffn_gate.weight": "8108e517a82e08a3aefbbd267bfa50a1668f92a76273280ce8a6bc1f6dd61521",
|
|
||||||
"blk.13.ffn_norm.weight": "5fcb6132d2134bf1f835b904a99820fa501dbc57d2224129f7098bf3cabc1d36",
|
|
||||||
"blk.13.ffn_up.weight": "6d744b7cd390a3cae3aa350dd379b81246acd056a2259996b6aaadece8465ccc",
|
|
||||||
"blk.13.post_attention_norm.weight": "e08b14698912509790e9575b8676971fbb0a4d82d719367e3756c0d0c4ab8cc0",
|
|
||||||
"blk.13.post_ffw_norm.weight": "2b196e4450fc5f1e7367b2cf7fe33a15fe919fbcdd861d11002346f16e980535",
|
|
||||||
"blk.14.attn_k.weight": "120e5f48d7268dfd9ab5f4bc9cc57a7cec63ea9635f56b80d435eb22936e9483",
|
|
||||||
"blk.14.attn_norm.weight": "146367bcce4db72cc894419a2e0145a6f533507dd68e4739c10ee480308c401f",
|
|
||||||
"blk.14.attn_output.weight": "720fa0165e756876c5cb6ad9e2780dd910390933f3f8849e5add5da04266650b",
|
|
||||||
"blk.14.attn_q.weight": "f5183466f56219ca1aca52d8b82c2d966a4198fea40fdd6b39f4d8b06ca2a6dd",
|
|
||||||
"blk.14.attn_v.weight": "24f8ea3d5512cd37c43c8329cb0da0c90d1895aef763ac2dcee3fe5157ec50a2",
|
|
||||||
"blk.14.ffn_down.weight": "e29960965b384ae5ab3d898a4dbaa8fddd28fa0e477ac28bcac49dec12a5ac67",
|
|
||||||
"blk.14.ffn_gate.weight": "6d0d6a74bfe9692e8f8eedff0fc34fc4fa1c8687794f35f2e2b033ab2d7510b8",
|
|
||||||
"blk.14.ffn_norm.weight": "f7036c1a9a71e046c9d2af16e9218fda5dbb0f7241ab44747abed1f0f9d602ca",
|
|
||||||
"blk.14.ffn_up.weight": "7d69ea1424007ffc9c12247dd0308c616e93ac02a59ec341cfa48f92d6ce3b10",
|
|
||||||
"blk.14.post_attention_norm.weight": "65b9712834d9445d4236bec362f3fb795c20d60c541b3dc6dbb7914d9b493e41",
|
|
||||||
"blk.14.post_ffw_norm.weight": "9c6a8da2e4e437d5cfdf3b9097e9f8b64bf07946a048badec20f4d374613f38f",
|
|
||||||
"blk.15.attn_k.weight": "864bc618303a0e4ee67fb1d5e751de61e936cd51e96669dd86f8cd08f2305045",
|
|
||||||
"blk.15.attn_norm.weight": "f9f4187da6eeadc2fc5921d8fe669741697d16c13d71e4aaeb73b82f50dc577e",
|
|
||||||
"blk.15.attn_output.weight": "ce2419a0b097036b2a31f2f4ad731d5814bcc2ef4c511786e24471e5eefd273b",
|
|
||||||
"blk.15.attn_q.weight": "9539db5a970d11ebe99722d1e13fcd635e250033630811efe583d2f97778e4a9",
|
|
||||||
"blk.15.attn_v.weight": "1c834b48ccd88adaeabb7d8bcb6be0bcd6d5ac1354ce88fc28f19a1a96b81ab3",
|
|
||||||
"blk.15.ffn_down.weight": "bc1f97a65dde6fa2c1e5397afb612266944b343f2eaa868b635ddd25829f8a42",
|
|
||||||
"blk.15.ffn_gate.weight": "1b14529d57056b79037f6cb5008132e62cc35992353b38dda59572274623103b",
|
|
||||||
"blk.15.ffn_norm.weight": "9af77458de9ee55c66f93865759f9c2c398557f94f3fa8fa6af30543d7339cde",
|
|
||||||
"blk.15.ffn_up.weight": "41d524a26b61a9595816b4fd53cf57ef50a702e4ef32933ff6136dca9136a267",
|
|
||||||
"blk.15.post_attention_norm.weight": "c60a03cd0e63a7db5c80015e58e9b97ba2208caa19f66a6fef5c4447eca900ce",
|
|
||||||
"blk.15.post_ffw_norm.weight": "34f7f9f96769215bbc3d17084df091864aef96a6645b7d0b3b7d9bd92f1a4b0b",
|
|
||||||
"blk.16.attn_k.weight": "7e27240d9f3a8c6cf0f4a980113d43234f514eadc3e3e1792b86efb29ffb1a6d",
|
|
||||||
"blk.16.attn_norm.weight": "af798acc0899282a30448edec48223b3e8efda177090273e612d8eca5e377301",
|
|
||||||
"blk.16.attn_output.weight": "79df39a3709d3d53e84146291e0944a7a653d06705293d9ccb5648dceadb432c",
|
|
||||||
"blk.16.attn_q.weight": "db58a1c3b83ad294804e5fd7321005719e200659173466df5a52a182b80b7165",
|
|
||||||
"blk.16.attn_v.weight": "2af6d48cbaeb225b5c1a704f76abd89c8ab1521417695b112b4dcc2cbd39b74d",
|
|
||||||
"blk.16.ffn_down.weight": "fc1c813eb5e7da3d6194569d6cb21602fc6eff2dc8e1b0eb753f2d5df148189c",
|
|
||||||
"blk.16.ffn_gate.weight": "7a80bcbc42464bd55df4814a6edbd7b5c153e0428323bbe49de55e2d2add33e7",
|
|
||||||
"blk.16.ffn_norm.weight": "2041685ee926d30f3f2ae4ec35b5688f1cd834167a6359a7d4057eac804c58b2",
|
|
||||||
"blk.16.ffn_up.weight": "8da4b718973ac1d43b928829bc45e062fd101984d6c98dd825bd7c5d08ebfbe3",
|
|
||||||
"blk.16.post_attention_norm.weight": "975c48fe680a6167438a106140a8872eee7765191f152d80e3b8ddf47693e095",
|
|
||||||
"blk.16.post_ffw_norm.weight": "4de2d4d483acfe4fc77860ea929025df2f4e15c10729413f36a18c94eaa6d689",
|
|
||||||
"blk.17.attn_k.weight": "f937e61f0af8c4cd98ee742648eb60e02e579683e21d421071295a3b70aebaad",
|
|
||||||
"blk.17.attn_norm.weight": "c3270583ed28b7e423f5b170c59113234f258169b93a867d9274f4c10b7cb115",
|
|
||||||
"blk.17.attn_output.weight": "b8c1150e81e685e539a5dcf2c19047a24eba2b281fabe166674b1d71ef4612ea",
|
|
||||||
"blk.17.attn_q.weight": "c255100ae2011e7dc7e3bf3bc3ccd96d859fbb98581cae993d7b82c1ba8e8b39",
|
|
||||||
"blk.17.attn_v.weight": "5830bb0a555984c6485348067f70b5d22ae337c011aa9248dac2ff4c95944551",
|
|
||||||
"blk.17.ffn_down.weight": "8ff9a7cccaa3776434a9d895aae4fb5c36c736bf2ec98784226b4c234940fbb0",
|
|
||||||
"blk.17.ffn_gate.weight": "1b52876739712831c272911533da206f407b46034a1a4ae8a88c1f96b6bd5747",
|
|
||||||
"blk.17.ffn_norm.weight": "d0e16ba5e87c91b545334e022058c7d03849665c3b1a6298771b656531366b66",
|
|
||||||
"blk.17.ffn_up.weight": "4dd6211d01dbebbe21052708eddc242b082a58b5f18ed16479e17987c1d3432e",
|
|
||||||
"blk.17.post_attention_norm.weight": "6f49c775c7417dade77ba8268a0f8441c1e5ec28b5d7e4dc5ed07a04d04600c8",
|
|
||||||
"blk.17.post_ffw_norm.weight": "b91a0bb2e6679e9c9be06ad323adae441d00a3d673efb19d7c4954be2aa84b27",
|
|
||||||
"blk.18.attn_k.weight": "22b565ace1b4da8b33865a58625be1d90beea9891f29686a69fa9cf7c93217db",
|
|
||||||
"blk.18.attn_norm.weight": "3e0160d7063c8753de65d2356a66648e47d921efdc5c917efb8209892120f8db",
|
|
||||||
"blk.18.attn_output.weight": "e3180f0bb4ca90b31e9b08158db38e332de62dfbaefe34aa94cc316409331e09",
|
|
||||||
"blk.18.attn_q.weight": "f3a5a83614c3ba7ea41cdd5b1b0819a241ee2a951a381ce4a9e001d3f700ed8f",
|
|
||||||
"blk.18.attn_v.weight": "f3350a5984fb951fc738adcf78147e6d812ff1c576670c460cafc99c253c1654",
|
|
||||||
"blk.18.ffn_down.weight": "9e9d09b13a33525e14bdaee6efc65c551ac7cf7680e534b940ab122a3a7c1ac9",
|
|
||||||
"blk.18.ffn_gate.weight": "ebaec8b4b578a2e8d815baac12f1675c208f80c68074d5a18288a2e1a60680ee",
|
|
||||||
"blk.18.ffn_norm.weight": "33e7687c53a242f2f8dc7093a491c97b18d4a5a8c14d183f02bd586a770f05aa",
|
|
||||||
"blk.18.ffn_up.weight": "78a1816662378ce56cc870e705174492781897b3afd2d4d97a51f10f2f2987c1",
|
|
||||||
"blk.18.post_attention_norm.weight": "a58dde3f12df3e94cbc27d87c8ea86f89af8a388a506446ff6758f05399b05fc",
|
|
||||||
"blk.18.post_ffw_norm.weight": "cebf90cc143577d483cca27b032dfd82031ee59bdf17c0e2cf60a0a3ad5bf996",
|
|
||||||
"blk.19.attn_k.weight": "4683375d0599ac9e2232196aae1e90af13a14cae26e865465de5c8e257bb2055",
|
|
||||||
"blk.19.attn_norm.weight": "f3eba936bfb1814bbcb0a1d62739eb66daac839df8c9c836fe0e94860df88525",
|
|
||||||
"blk.19.attn_output.weight": "51c0f01d38a9dcfe9bdbc4643576fab164c1d9e4b7168b7695c0ee55e6965667",
|
|
||||||
"blk.19.attn_q.weight": "28d15b69b8416f2e7ddc88fe381cb1e2ef2ad705fb1c268139ba96498cc74848",
|
|
||||||
"blk.19.attn_v.weight": "6860f1cd720638e63a981fa2c0b4db900129826bcb9823c9ddf9fb8b1b9f3383",
|
|
||||||
"blk.19.ffn_down.weight": "bc7f2d7827ee01c2dd41401c7b3b1700ad3a4ff620e8bb734f92630d342dcc7f",
|
|
||||||
"blk.19.ffn_gate.weight": "54d03ef69ba373fc410fbca8f1e34a565d58e4296d9a035ff7e48340b9c848e7",
|
|
||||||
"blk.19.ffn_norm.weight": "9178fc796a340ee6e8128ca74c0cb6203d1adbed6927af4e5ac7863da57affc7",
|
|
||||||
"blk.19.ffn_up.weight": "a77bd708026c6e83ad5c79c223278e74621bcf74a9641c7818d96b595daaad20",
|
|
||||||
"blk.19.post_attention_norm.weight": "ae94aa26f4c411bf9496a6fd4a6df64ee589ee1ae9a04b531d45acc95721e582",
|
|
||||||
"blk.19.post_ffw_norm.weight": "9ad210700edeef12133bdcff04bf1c7f62b49f6f4a9ba483c7cdc59857c24a5c",
|
|
||||||
"blk.20.attn_k.weight": "e35bce1e9f4a7a09ef34721f57ea38cfca68c272f52d923fe50af8308f66cfaa",
|
|
||||||
"blk.20.attn_norm.weight": "644800f6926fd34f233795c4dec1151a295d2138ca8cac33e3e48167d26f8b41",
|
|
||||||
"blk.20.attn_output.weight": "8d3758cd236471741e1ad66c0710cb79077dc8c7a3a292d35bc551c0c5abe627",
|
|
||||||
"blk.20.attn_q.weight": "c333b1f0f6f956b5d73891df10b1a0321e55fc31c40d623a24e1f52caa6a998b",
|
|
||||||
"blk.20.attn_v.weight": "8562b418d0c4868a050fb19fa3fcaf50a8cf1c669f537d666c80c7b3a04714e1",
|
|
||||||
"blk.20.ffn_down.weight": "97efb608ac44cc804198faec3ee66eafe56ced6b7ca5359700c6f1df75b7205e",
|
|
||||||
"blk.20.ffn_gate.weight": "5c61151d86f28415c73c73d90ec088c646cbe5c1640197caf58eb501ba7db293",
|
|
||||||
"blk.20.ffn_norm.weight": "24bbe0a701afd4bbeea65b3edde712b3cbb2281043bbc43dbf250582453116ed",
|
|
||||||
"blk.20.ffn_up.weight": "e170cf68e249566aa99eb6f6b265679bf9a5a6b76830ba24e7e130c2515910c4",
|
|
||||||
"blk.20.post_attention_norm.weight": "e092d751cfe20dbf2d348358f3b38397bd83e4ed94d6bbaa6bbaddcd902b2ac4",
|
|
||||||
"blk.20.post_ffw_norm.weight": "219a18a47dcba76e669e4322223a5a9227bd3db1de3fbd3d3cfb22e54a783c5a",
|
|
||||||
"blk.21.attn_k.weight": "c3a095ebddb42c63824f1c98da65263dc88e4d790a26aa1632840b44f5cc7cb1",
|
|
||||||
"blk.21.attn_norm.weight": "ef8bbaded5fbc45ad9cf3985ae02174524e7090fe6362811124f942ef643bec7",
|
|
||||||
"blk.21.attn_output.weight": "668f018aba72baac6252aa3ad58569ddd55ab751a0dd8d7bcc9fb9b6efb4bf53",
|
|
||||||
"blk.21.attn_q.weight": "e759c65663089f3bbbd51847934c185e680c82f1249065d5d487da638e519e6d",
|
|
||||||
"blk.21.attn_v.weight": "2ff57762686cf9ba1f5a6be76503454b97556ce67f4ac98254bd0562231197ba",
|
|
||||||
"blk.21.ffn_down.weight": "3fd106556fb721b1c28ae3f4026bc83eb1b08ed910f2ba5f466c6b5f327d91cb",
|
|
||||||
"blk.21.ffn_gate.weight": "338022d882f4b6619e8054a6fb909696fa3eef3013cf69b65c3cacdfc5b9e42c",
|
|
||||||
"blk.21.ffn_norm.weight": "1e77660c23a3f9653ee721a863d1960f773d87437cabc4dc0a6e17ee3d4e5e44",
|
|
||||||
"blk.21.ffn_up.weight": "7d31b20fbc2e6eba8f350f170069dc36f0cb12f68fbc4206ec5022a74085ebcb",
|
|
||||||
"blk.21.post_attention_norm.weight": "9638bae8d8bdcd7ed68da282979cd84a07c41ff9cabcaea94ebc846a1803db23",
|
|
||||||
"blk.21.post_ffw_norm.weight": "d622ef11115fe0cbe04b727d5a3b6371e7f39bf08c8d5eb9bc6da52e3f3cfb9d",
|
|
||||||
"blk.22.attn_k.weight": "5c321cb29deffbe57de200dd206a62005f1e80acb86c4fd2349dd44c8d3594fd",
|
|
||||||
"blk.22.attn_norm.weight": "198d949705d7170a331d75889d8c7500c3635254dac2cc6aa4dc35d556584536",
|
|
||||||
"blk.22.attn_output.weight": "19805cd5d7025b457e5d41d70db8b3fd63c2dd0e4a94d3ef1704d50ef4e749e8",
|
|
||||||
"blk.22.attn_q.weight": "177836cd583fc87405975ddc21ebfebdaa090a0363799664c72caa3da851ae2c",
|
|
||||||
"blk.22.attn_v.weight": "fea255692483e30d0108f9e4e250eb3ed7dbda8d83f499b06519b8c223ae6096",
|
|
||||||
"blk.22.ffn_down.weight": "00cb8939f03e5817d6d412de8cf2c923c9568d5493e382cec7faf5718fb034eb",
|
|
||||||
"blk.22.ffn_gate.weight": "b0591065b91281b2fbd8a9567f3568d40479f680e1f0a29e27ae213f37642489",
|
|
||||||
"blk.22.ffn_norm.weight": "96b5c5d0737c2ceb8fc869f54adb9e5f46e28cb7b177c40f49fa926b923c00f8",
|
|
||||||
"blk.22.ffn_up.weight": "81f472185b24344ab0594ea8246cc6e200e0dc1cab4943e74fbe4ca19d5a9701",
|
|
||||||
"blk.22.post_attention_norm.weight": "27fa9aa6260aa3071e0391e1a1d49322dcb6e8072315b8a9b7064087108dbd06",
|
|
||||||
"blk.22.post_ffw_norm.weight": "f37e1dcd7f643d9545675ffe9dc527a11eba86eb204989c2f44f636b266d896a",
|
|
||||||
"blk.23.attn_k.weight": "5d82f36658a56c3f94d0bb2d61f65509c966fa6568f81812e0d3e338b380ef8c",
|
|
||||||
"blk.23.attn_norm.weight": "b7983f88d9cad88bc88a528923e6da592ad20e699965b223ebc10840fe1f4fec",
|
|
||||||
"blk.23.attn_output.weight": "59f97f80f430d71606aab0158a195aed29ccd3405e6c0a5c41c809be8eb01898",
|
|
||||||
"blk.23.attn_q.weight": "53ac4789fe958919cc02ea4222bcd64c0ea1b4baa54304bff46635bdf42f7490",
|
|
||||||
"blk.23.attn_v.weight": "ec8abe09b9e84dbb52c7a068094657c6d3c62fe551ba8d7c3a3f23da622e9756",
|
|
||||||
"blk.23.ffn_down.weight": "3cf547eccb1b82aa64f208cee9682d7f558ca84e0aead7d9d3d1420d90f3d992",
|
|
||||||
"blk.23.ffn_gate.weight": "366aa2486d911ba81eb519119e13807deacf7e9908bc1975a2a63e00d6b10124",
|
|
||||||
"blk.23.ffn_norm.weight": "6d1d4a4af34bb7dc090ac87d6457d398c3e0fb68bd2e2b60b099dc318b6cfac3",
|
|
||||||
"blk.23.ffn_up.weight": "53f76692e253f5d2420b3f200c731b9f3b7a83e379920b4a067c729b4674aa4d",
|
|
||||||
"blk.23.post_attention_norm.weight": "7c952fa0efa76b3f048c8c4c9e8dcb5e3724d231327eda6423a34d3f3d3367de",
|
|
||||||
"blk.23.post_ffw_norm.weight": "7ab188cfe61f0a91b40309a0ab6bfa99f19d0ff2a37b6ac10e5f0c7f44eb5270",
|
|
||||||
"blk.24.attn_k.weight": "225798792f9bfdd10eff0505ebe61e0aad0209c17b431f6044ee7968ffe8c198",
|
|
||||||
"blk.24.attn_norm.weight": "635e3c1ebf5219bbebfc40ef164bc32d2b726ef595a94da64ac524ae878e2915",
|
|
||||||
"blk.24.attn_output.weight": "482f5bb2db8d9ed22b253d9a3296333b239efe698e5992e5d77e7e12dc2a5cf5",
|
|
||||||
"blk.24.attn_q.weight": "43805bbccddb65d58fffc4be9b5c374d4e1df1395ec1e1ffb4bcff03e98d5adb",
|
|
||||||
"blk.24.attn_v.weight": "fa741af54b4a3b1775d32f59134756090c5df2e7345a12a2d8db94fe289667a7",
|
|
||||||
"blk.24.ffn_down.weight": "83c6351e3162626b276f524a57836144625c2556dbe321b57cbd8fd486a68fab",
|
|
||||||
"blk.24.ffn_gate.weight": "fbe66be0d84d12cea5176cc7eaef64382ffc7324cd9d6266a3342dc43442f2ac",
|
|
||||||
"blk.24.ffn_norm.weight": "77c1445a8639ad24938bdf0280233eea2362d47391421833dfa72ec756dfc1e8",
|
|
||||||
"blk.24.ffn_up.weight": "78235ac729ee23c1cf1ae543751e3af32776d8808cee6e529c2a625a1f027654",
|
|
||||||
"blk.24.post_attention_norm.weight": "161f71b6d07628d43e4ae51a4c9088ec6ca2db123a17986a14505d83fdd04dad",
|
|
||||||
"blk.24.post_ffw_norm.weight": "cf1ba692aa683368b02ac413e69b2521b98c69a5274eacbb54165b53bf38a8b2",
|
|
||||||
"blk.25.attn_k.weight": "057a56bd8c8d2b41608d1f71faa3052902152ddf85e47669ad950c1c3e77c33f",
|
|
||||||
"blk.25.attn_norm.weight": "b7179fe02c334da556ddcf6c1b502245639a728c4cbba8b552d8e1df4565ee9d",
|
|
||||||
"blk.25.attn_output.weight": "4fed8b05b08a0ff75ffd022701bbeb52f17b23d09332a1ddcba737244bd0d3b0",
|
|
||||||
"blk.25.attn_q.weight": "c52e99f5d38bf7538d6106a0bbf38ac6dc6296bca9a3f849afa384ea67b4af01",
|
|
||||||
"blk.25.attn_v.weight": "c49c23d8e1cfa6a8eb971eb69942204890c6d7d830dc8774c84b108a80598912",
|
|
||||||
"blk.25.ffn_down.weight": "c08d4dc8412b19fdc870c164b83c341b236ec6fe7bb4a9bcfe0dc100faa20286",
|
|
||||||
"blk.25.ffn_gate.weight": "1a4cb3f36735d59181721471452807903006539e5e1b5ceb4f72d1d7ae134127",
|
|
||||||
"blk.25.ffn_norm.weight": "8fd6bd0dcec5198761525a36992a57c9ec5e9da60a22092839a84ae8c4e87f26",
|
|
||||||
"blk.25.ffn_up.weight": "3a00f39bdd5f31dc5e3b281d2002e1ac4f2475d49a0ac1d7720a25b377dcd04a",
|
|
||||||
"blk.25.post_attention_norm.weight": "e5f31a648612c859b6d21c9ee426e87a86cb1973dfdd86276c767371d9cef5ad",
|
|
||||||
"blk.25.post_ffw_norm.weight": "553c3bd774922c99c2384380a142d019881d30dbf0fe3bf9430dabfb3f6cbd33",
|
|
||||||
"output_norm.weight": "49445c4585ab0a8135717a0bdb1cda4a062a030177d0119561d91542aec5744b"
|
|
||||||
}
|
|
||||||
6
convert/testdata/gemma-2-9b-it.json
vendored
6
convert/testdata/gemma-2-9b-it.json
vendored
@@ -1,6 +0,0 @@
|
|||||||
{
|
|
||||||
"general.architecture": "gemma2",
|
|
||||||
"gemma2.attention.sliding_window": "4096",
|
|
||||||
"gemma2.attn_logit_softcapping": "50",
|
|
||||||
"gemma2.final_logit_softcapping": "30"
|
|
||||||
}
|
|
||||||
188
convert/testdata/gemma-2b-it.json
vendored
188
convert/testdata/gemma-2b-it.json
vendored
@@ -1,188 +0,0 @@
|
|||||||
{
|
|
||||||
"general.architecture": "gemma",
|
|
||||||
"general.file_type": "1",
|
|
||||||
"general.quantization_version": "2",
|
|
||||||
"gemma.block_count": "18",
|
|
||||||
"gemma.context_length": "8192",
|
|
||||||
"gemma.embedding_length": "2048",
|
|
||||||
"gemma.feed_forward_length": "16384",
|
|
||||||
"gemma.attention.head_count": "8",
|
|
||||||
"gemma.attention.head_count_kv": "1",
|
|
||||||
"gemma.attention.key_length": "256",
|
|
||||||
"gemma.attention.value_length": "256",
|
|
||||||
"gemma.attention.layer_norm_rms_epsilon": "1e-06",
|
|
||||||
"tokenizer.ggml.model": "llama",
|
|
||||||
"tokenizer.ggml.add_bos_token": "true",
|
|
||||||
"tokenizer.ggml.add_eos_token": "false",
|
|
||||||
"tokenizer.ggml.bos_token_id": "2",
|
|
||||||
"tokenizer.ggml.eos_token_id": "1",
|
|
||||||
"tokenizer.ggml.padding_token_id": "0",
|
|
||||||
"tokenizer.ggml.unknown_token_id": "3",
|
|
||||||
"tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8",
|
|
||||||
"tokenizer.ggml.token_type": "485e40bf3d715a4764818fc097d6a2a41db872d82ee714bc500872a3437ff48d",
|
|
||||||
"tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda",
|
|
||||||
"token_embd.weight": "17b87ab2c01c80657855a5413d0457b4a041afaeda0cc785080e44e2f04acf07",
|
|
||||||
"blk.0.attn_k.weight": "28ac0da05754ad2714ae95da28a5ad191192140b30b8fd22d108d4700c9d989f",
|
|
||||||
"blk.0.attn_norm.weight": "3f9d5675d1ab0eb8a816719dac9fab81f2e95c52be02c34263339acbc087febb",
|
|
||||||
"blk.0.attn_output.weight": "703295c2c63990ff896778685c678f145298886f680f3ed5dc2a7ad54c293265",
|
|
||||||
"blk.0.attn_q.weight": "69c2d0e4870e9d722a190d356203c9605575a16863466c3d1747966ef1cf5791",
|
|
||||||
"blk.0.attn_v.weight": "95219c9c07b5ffe9a9a01e456d845eef2b11f4fc12c93dbbba479db395444c13",
|
|
||||||
"blk.0.ffn_down.weight": "a2feb5eb3d572c57c5bafbf0ab506862df1160fe40965dcfe4b9fd855c08bed7",
|
|
||||||
"blk.0.ffn_gate.weight": "fcca072c445c31f4dc4d5dfaa785b1bdf7271342442099b74fd17268b5829fbf",
|
|
||||||
"blk.0.ffn_norm.weight": "7621f95dbd245cade6fffd6b08797d69d8e3954e960f0b5551b90d967ab95448",
|
|
||||||
"blk.0.ffn_up.weight": "14a9bcdd451403c67136391e1b6e53b3b1830f00199bd911dbcc56d8749c14f4",
|
|
||||||
"blk.1.attn_k.weight": "c70f73c5df20579cb44d971164b48b5f0d8d5abdb38b381e7a8b880ba12aa406",
|
|
||||||
"blk.1.attn_norm.weight": "88b6b91f93a1ef83425a7c7dc2a2fbd3b22704a04c64a80061df376ac8c33626",
|
|
||||||
"blk.1.attn_output.weight": "f031a537490c452be3b3bb51e6b7949a636405756e160976a1c070a792ea00ee",
|
|
||||||
"blk.1.attn_q.weight": "bdb23214b1cf9cfd30f863a0a5868e52c6809d93b7e8f44df096a94204d9896a",
|
|
||||||
"blk.1.attn_v.weight": "e9bbc0b05f2c872fb1403f8f938cd1612b502229ee401f12593b1164c61acc00",
|
|
||||||
"blk.1.ffn_down.weight": "5ff53811038b661a7b8f2bfdf213bebfb185ec1a6060b662f063714f33584d79",
|
|
||||||
"blk.1.ffn_gate.weight": "205085c8c951a5c7543b1495183cd96028fb49f67464b3e9862a2693a6077a33",
|
|
||||||
"blk.1.ffn_norm.weight": "798f354fc85afce9625f5d10093a585a966831698a0560e6c9b97ce659eb4b22",
|
|
||||||
"blk.1.ffn_up.weight": "db92dc5684cb6e90940e13f4d1da555ed20ba4f8cab1e990ddfd7553e2e91315",
|
|
||||||
"blk.2.attn_k.weight": "ef5ce360c4eed6d00d03ca4761e0f8e4b0af4509978468314be14f3d46621044",
|
|
||||||
"blk.2.attn_norm.weight": "6dadbc05dbd0d3fabb4216affa60a3de1378a82d2859dc90b338cbe70f50d455",
|
|
||||||
"blk.2.attn_output.weight": "6bbf87a966f691bbfd7c8d25629aa4e6710107bd431a667434861febb391edc5",
|
|
||||||
"blk.2.attn_q.weight": "4e575c09ae2de417ce9057ce8b073680e860a24aae13a472b68f101b760752e5",
|
|
||||||
"blk.2.attn_v.weight": "cd33f7f01141e9439afdaf2ea1aaced9feaa335e32a58daa136ebd555d4d96f4",
|
|
||||||
"blk.2.ffn_down.weight": "b970ff1b0b6494165defe2fbfa1d31425766ed71e64de9ec4e66ac3955c8bc5f",
|
|
||||||
"blk.2.ffn_gate.weight": "dbb3e1360402e0e369b101995bb686b73f95d4a7673f061be85d64d15dfb0061",
|
|
||||||
"blk.2.ffn_norm.weight": "bfb7980105d8ac9647710454f57a5cdac50598a0f6f4884e16f1d94b00844687",
|
|
||||||
"blk.2.ffn_up.weight": "50ef89339b275a438b664686f6227dd9b6e43853ed6856ec9e33ef4bbd90bda1",
|
|
||||||
"blk.3.attn_k.weight": "be942ea98151434eebcd2c1da4b00e0146152fe524a530689b1fd491cb833d21",
|
|
||||||
"blk.3.attn_norm.weight": "0df2f218daf609c289fb7c60c5f375fa99c0d4e04381ad5a494a19144edd8e20",
|
|
||||||
"blk.3.attn_output.weight": "c2184aaf86aa2cb8f47be49f60b165834e97205f39c6ee1dfd19fd4411a156ce",
|
|
||||||
"blk.3.attn_q.weight": "4f86e2a0a4221c1c84ff9c409ac89893cb95d7208cf65bf1e98e24e01125f991",
|
|
||||||
"blk.3.attn_v.weight": "abfdb8a60c349dadde641d1afc9542025e24fbf41a3238bfa9675e0b1f1e4b68",
|
|
||||||
"blk.3.ffn_down.weight": "58821a8d87008d47d122427911c6fad5272aca70c448bbae223256a74bacd07e",
|
|
||||||
"blk.3.ffn_gate.weight": "776e051f1a0ddd5c4934e69186683a75ca9a3c8c0f61911bba321fed1dd287d2",
|
|
||||||
"blk.3.ffn_norm.weight": "7f380f29335e28be90bfcfae6f6d69fdf5751211b36d2dd62aa5541ed113e4f2",
|
|
||||||
"blk.3.ffn_up.weight": "fc5ae8d488894cbd4951059675468d227da27871d26e925c9941863841c097ee",
|
|
||||||
"blk.4.attn_k.weight": "14833b078cc4c5137bdd5fdc0538047974ca147a99b0282e1b144440c78bc1db",
|
|
||||||
"blk.4.attn_norm.weight": "0a69957d4a15599fb80ad4753558020804925221457d9a5052926754d3768065",
|
|
||||||
"blk.4.attn_output.weight": "887a49b6130fb6297cf10767207c3dd97191b2cf63723449af9c27bca8dbeda0",
|
|
||||||
"blk.4.attn_q.weight": "51fd577b76764824dd6f0d4891c137ebe4736f591b5ca2793c5fff2be49abbde",
|
|
||||||
"blk.4.attn_v.weight": "1a623c43cf9c509d1b7ea0d1a5c04d0af4809665f9f9e93b7d6dba8c5df178fa",
|
|
||||||
"blk.4.ffn_down.weight": "5d61e8856d8941d2b1fd138116d015f63840d0fa1e31e20e20a5ceca1536ceec",
|
|
||||||
"blk.4.ffn_gate.weight": "06640f7273764f8ca5df7e386547417916b6cd7d565a8343153113239a94b0a1",
|
|
||||||
"blk.4.ffn_norm.weight": "91a6c6c41b894228e361435ecbc5058dca34d4911a23da5b56de219299c964d3",
|
|
||||||
"blk.4.ffn_up.weight": "d016dac1055e36d6a10b6317e57f98a904709ea892ef3194342f4d2f6326561e",
|
|
||||||
"blk.5.attn_k.weight": "987146afe124131500808cc0da33c06d207433656d41df6e6d8c99118a83bac5",
|
|
||||||
"blk.5.attn_norm.weight": "6b354938966f2608a2fb8d0f5b363ed0d8b0967c2ec8d0abd5c625b413042ded",
|
|
||||||
"blk.5.attn_output.weight": "cdcbfe02c6ff79d5326882b017a02099f5af71beedf6b1b3eb4de01e3a844536",
|
|
||||||
"blk.5.attn_q.weight": "b910d0cff781d3efb42eab0a302f46f286b2de717079175680d5b42bf8c309c8",
|
|
||||||
"blk.5.attn_v.weight": "66d3a279f747412f9f4b0e8abad44540c122ab2e811a7ee74c1f33bc36caade9",
|
|
||||||
"blk.5.ffn_down.weight": "c9b0efd2212981f16d956d8571f054b68780ad01f4917033647e359b557a4653",
|
|
||||||
"blk.5.ffn_gate.weight": "fe96b94109ca141c01f6a04788e20783019ca6ec334aa1f3134810bdb499e557",
|
|
||||||
"blk.5.ffn_norm.weight": "aa7b016e832e7055a36c6e20de58ea1936f995f390401fff1c5fc65906064e49",
|
|
||||||
"blk.5.ffn_up.weight": "555ce27c4873d3375394f38ad3b45e3d8848f9d5642dc1602383d0f0a33c2a14",
|
|
||||||
"blk.6.attn_k.weight": "88280d461db324c4f36475ce396793063e61a27283ec64511b0480890fb5b3b4",
|
|
||||||
"blk.6.attn_norm.weight": "af8f460c411f660d33196286d208f1845fd5a2b45f7b56549a4df31e7515447a",
|
|
||||||
"blk.6.attn_output.weight": "dd9996fb0a256e8375ad3917705258a33fce006bcea0f536caae420a77974d8b",
|
|
||||||
"blk.6.attn_q.weight": "7a4841541191e037cfb9b07930c4d8cab451809658b182f0ada6ccde9615c003",
|
|
||||||
"blk.6.attn_v.weight": "ae81e6a592b64d701a9d40233e986039a56cba8d8d24f61aea93c6393cf3078a",
|
|
||||||
"blk.6.ffn_down.weight": "622dd1ce1706355cbc659a8ab2c4509678ffe0f3ad34258e5e25ed2a5d951bcd",
|
|
||||||
"blk.6.ffn_gate.weight": "8389a735c0bd5591010f8ced9805a2a12c749f6df0d3c18ad4d05c2a302e7168",
|
|
||||||
"blk.6.ffn_norm.weight": "621f5346400382474d61358397bd58fb1459b07c53e376e4bca15e08b3f9b3fb",
|
|
||||||
"blk.6.ffn_up.weight": "8d834e4c42f13c251dfee36cf89e12f1bd400680d00d5c2e6cac0459e9ce2f7f",
|
|
||||||
"blk.7.attn_k.weight": "8bd0412de65a3e64901ef8fe6a28c95e116bf39dc9aa22f0126b9d36688e5ea7",
|
|
||||||
"blk.7.attn_norm.weight": "056d8e56be4e87d6dc6f900762f0dc6fde07bfdc50dd85bfc510415e2bba3f3d",
|
|
||||||
"blk.7.attn_output.weight": "27972eda51da53d416ff95aed78149a2c5a287b47d2cd46f2f544ca692ecb3bb",
|
|
||||||
"blk.7.attn_q.weight": "41eca977b9371f7932800c11a9c45b931310196919e2a0651b847703b180fc7f",
|
|
||||||
"blk.7.attn_v.weight": "13c74fd7e07f08883a09fb070a1fe5bbdd2341b4cb8d1cac07c4b637049b5774",
|
|
||||||
"blk.7.ffn_down.weight": "9e75db42468800849a9a7da603d0072c5e86c8ed2b4d8b20a312a51fb86a7a10",
|
|
||||||
"blk.7.ffn_gate.weight": "db6bdc3117f910088aaf7db51f2da63ea5bd933de36af5599c215bfb26f7db2b",
|
|
||||||
"blk.7.ffn_norm.weight": "48bb82b49bfc8679a1e77f282ee182d952db7a3c11be7ef9a102ee2ddd8011e2",
|
|
||||||
"blk.7.ffn_up.weight": "feebea87175817a0f3585ec0af09dc873d94c203581ae97a712eb356d3b49efe",
|
|
||||||
"blk.8.attn_k.weight": "d5640ad71b6af68d88e17bf8e7fc26c907d2262605457a84247dd9afc2884d69",
|
|
||||||
"blk.8.attn_norm.weight": "75b850c481a69083ae09d0207ba7317b37c735a39fcf5fef5400e6c84fb1257f",
|
|
||||||
"blk.8.attn_output.weight": "cbd669dbdea2bdd90f9f0cc97566b3dffff3c56cecb4f47290ceef30da83b2d6",
|
|
||||||
"blk.8.attn_q.weight": "9edcb63087a431bac361822497e6ecdaa06d9ea4a1a754e36da7ba9f8db81c7c",
|
|
||||||
"blk.8.attn_v.weight": "3fb72c2c4f95a83626aa3e30062f9450b09ab37c7871e229f18bbc5cf744633c",
|
|
||||||
"blk.8.ffn_down.weight": "bd69d2c9172974fff154441b237b4787fb53b2d185325442d5048130ef5bc4ef",
|
|
||||||
"blk.8.ffn_gate.weight": "d04689c80553edd011d1cbaa5d570fffa7fa91e88b66cf1352d89ab60b72f908",
|
|
||||||
"blk.8.ffn_norm.weight": "e49984183b735b7f2c4e4730c289eed9394056d2e283a00fd83ea0915df31a73",
|
|
||||||
"blk.8.ffn_up.weight": "8fe62a1ce8e847e567add6c6f6bf2922bc467495b5eb4c116b3cb85b85b3b211",
|
|
||||||
"blk.9.attn_k.weight": "d90904959e5004cf0d6e729c6bff18cc33c094798b802473c1ec55ab8d276183",
|
|
||||||
"blk.9.attn_norm.weight": "79277f290cc07411115d8fa138045edf4a17b3416ab2145409cbe8ab829fd4ee",
|
|
||||||
"blk.9.attn_output.weight": "5a21bf2e1f09a81405025f96d4153ffb630158e17269cff8ffff935c38ceb1a7",
|
|
||||||
"blk.9.attn_q.weight": "51b1d0febc3b350945be4504f55afa4347517bde0f710e1a4b88e6b17e71e7c7",
|
|
||||||
"blk.9.attn_v.weight": "aab7e1db0a8b50a03036356791ffce736ab010d15674c96eaef8049d80076054",
|
|
||||||
"blk.9.ffn_down.weight": "cbf43ec84becb40c9359a181ab0e641fd7faae7d34b549501f7cfb7afdc3d764",
|
|
||||||
"blk.9.ffn_gate.weight": "dce0e8661c778327bed7f03b6790d26710764188aed9dc746e6e05863891fa57",
|
|
||||||
"blk.9.ffn_norm.weight": "6d41642104f995c77bf31122b13237caebda3e7fcccb1367ce91db36b015e923",
|
|
||||||
"blk.9.ffn_up.weight": "82fe4c67bf24e7b2d6f6e05f7b1234c2bf90c3932951091a9066211b8e15ecbb",
|
|
||||||
"blk.10.attn_k.weight": "f6a9ed8fd8d3229b5d03175c413ffc56a07f2ce7236271986361dd3d8993f9aa",
|
|
||||||
"blk.10.attn_norm.weight": "cebbef89f0326ca8e02df3867a571e4d61c20c2a12f295f98ae590d62bc86010",
|
|
||||||
"blk.10.attn_output.weight": "34f5efb86accb4f06347d83a32558ea8eab3039d128969161a741ebacbb656ff",
|
|
||||||
"blk.10.attn_q.weight": "1e0efe27df2d5d50f7157253ba2cfd436d6781c3dc78ca176d0c16a210b5b763",
|
|
||||||
"blk.10.attn_v.weight": "8f085bf50a2b0f83cd6cdda3c8ef5a9e204a36348ed95871aac725d1f68640cf",
|
|
||||||
"blk.10.ffn_down.weight": "bf3b3cb4cace435809ac7b4cc933f20853af12f1f272d3dcefe7f19c0f203b8b",
|
|
||||||
"blk.10.ffn_gate.weight": "d3df7a1413b1c5adf1a1dcda9e5225a15c89874bae53bb6137ad1ea42fca2d34",
|
|
||||||
"blk.10.ffn_norm.weight": "a1da603b0480471b5ed8e862148cecd5fed918f8304d6933ab0bdb25b8d2fb8f",
|
|
||||||
"blk.10.ffn_up.weight": "bffbba605922e972dc47dda88a0b4659aa52236c76e5fe861a949e6d9a367492",
|
|
||||||
"blk.11.attn_k.weight": "9f31c63d66cd32c29b1eb8bb829d0c8525ce2ae936e0eefdaab6335a2d12a3df",
|
|
||||||
"blk.11.attn_norm.weight": "0bde1a266d8b2e8f202bb7e2e88b19147ca83021901f6d3cae77a4df5548c754",
|
|
||||||
"blk.11.attn_output.weight": "e10725c7cf746ed4a7e472cf7aea6cb564e5db6a1d5197adc980d650a387ccea",
|
|
||||||
"blk.11.attn_q.weight": "05ee758a7d065802630f8c65dca424364c1c8825e389aa33f9405c45e8a50cce",
|
|
||||||
"blk.11.attn_v.weight": "0c3ae7090f11775d24c51120db6e305db6aff706493e7ee123dcab74485ba789",
|
|
||||||
"blk.11.ffn_down.weight": "7ba40b8e12c09c5fb2006b77a771cb01ce894e88a3b3e1877f927a5b89c91709",
|
|
||||||
"blk.11.ffn_gate.weight": "db76388a023b98097972d354ba1c6a5e26efdeb1c596b9c28bf2cd8f6596975e",
|
|
||||||
"blk.11.ffn_norm.weight": "a38c3ae1b89a68ddc7b72c99c5b28be7fe3787c4fad9904d0c43d64eaf00c474",
|
|
||||||
"blk.11.ffn_up.weight": "13c8142f9cf1eddc658babf978daf3515c4ccc45f849f3e7e3930aa18a8480a0",
|
|
||||||
"blk.12.attn_k.weight": "f03241c36ac87cb57429a2ef22186b8d7d0b590a8b173beb01fa13d93772f3b1",
|
|
||||||
"blk.12.attn_norm.weight": "4568f654e6d65104d586e7c16ba960c83428698ce103022b7e0be15e2884e13b",
|
|
||||||
"blk.12.attn_output.weight": "04867603f82f91e41306e09b33ecda0104b3ee4834061f2c0bbdc8da33c72509",
|
|
||||||
"blk.12.attn_q.weight": "70fe04b9a8e08b6100cc8d6b58bf4cbbad15ca1de82d63baca5d352ba6c4cbae",
|
|
||||||
"blk.12.attn_v.weight": "15cb28db61a86c98687991d7e611bc92a1fcc6007f3432149cfb5fe518a4f65e",
|
|
||||||
"blk.12.ffn_down.weight": "6d10c790a4e3dc44c2dc36d96251ae97cdf30a4fa04d4c43e31bfbd038e6a7b7",
|
|
||||||
"blk.12.ffn_gate.weight": "3462a2d8f6b4743b25e24da51b90018ac2858d05ac7e582bcb69063cfdac1104",
|
|
||||||
"blk.12.ffn_norm.weight": "1f96392c1faa34e34ae5dea55a6a86c5aa4c79758952075d53d28de89dd88456",
|
|
||||||
"blk.12.ffn_up.weight": "d22eacc612a7411953d948483c5fb201e11722955ee0754da866e7bec578ac6d",
|
|
||||||
"blk.13.attn_k.weight": "5864977e6b733ea942647d6feed5c76156c48c200649c22e4e11b9e5860e57f3",
|
|
||||||
"blk.13.attn_norm.weight": "87e053535144723db4145aa5402acc54331b7696752d852bb9fc542ff33f0fb5",
|
|
||||||
"blk.13.attn_output.weight": "078145f5ad83f8b14f97a869346f7fd1583b24d1e3edadaa95d3da4242973f8f",
|
|
||||||
"blk.13.attn_q.weight": "3b8caf35504cbc4d1a7dd6e011a95760703b7f71e2218b030b1254f811362dd7",
|
|
||||||
"blk.13.attn_v.weight": "4fdf8365a603e043e5b40c4a21c84ac167f9be62794178f9d8a608dfe5653bf9",
|
|
||||||
"blk.13.ffn_down.weight": "a07d3abbfcacf48ba028df2cab895be32cc15022d23389a745286e79c1b1d1fd",
|
|
||||||
"blk.13.ffn_gate.weight": "1d2ab39666aa2909acc96787432a3ed13b19d25170f74665fadff9b17bbaffb1",
|
|
||||||
"blk.13.ffn_norm.weight": "4f2e809fda5f3eadf52578ee50e0ba36e53be91e55dce418c12dfe595f5f18e7",
|
|
||||||
"blk.13.ffn_up.weight": "8783d2720c2c37ca176a5801e0b3ef1f9cc9cf3ef1cd37af423aaf6b2a27e2bd",
|
|
||||||
"blk.14.attn_k.weight": "ce9428e2b55d43ae0c6690dbd56182f99adc427694ba8236b405cc8ea5035e86",
|
|
||||||
"blk.14.attn_norm.weight": "6abb35f9db8251d6ae954bda147c6ada2371b0574d11702e828f3c6ac99b7cc0",
|
|
||||||
"blk.14.attn_output.weight": "fe3880916d0ceb5bff672c88bbefb7060a545be609bf049beb2024b38221836d",
|
|
||||||
"blk.14.attn_q.weight": "7c8ad81be6f4a350931fd108b5f7c9e366e8c26ef62d1d85ffef5dca8fd893f8",
|
|
||||||
"blk.14.attn_v.weight": "e4bdedffacbebe38567a0734dfd67db90e911d9a9669fcde9a7c4ad8a0066c52",
|
|
||||||
"blk.14.ffn_down.weight": "ef6694dff1e05820aac0cd2b22f39ac7788b4967afc9250775575554c66aab2c",
|
|
||||||
"blk.14.ffn_gate.weight": "db63c4179e2db704bc505e2b4696e055b593e295a1b7c4c586fc793bdd5aab19",
|
|
||||||
"blk.14.ffn_norm.weight": "2796a62d832a9710148f95d533320492a33e712b2e5218659c548705bd11684d",
|
|
||||||
"blk.14.ffn_up.weight": "3f78c78d8c2d54df45f799d4ff902316628af296834afe4ceed63d4a324ff03e",
|
|
||||||
"blk.15.attn_k.weight": "6e810ee3859e07695645ee0c9a5efc7962668984a5f0a9325f47e462743b447c",
|
|
||||||
"blk.15.attn_norm.weight": "0956b576ae96db0b28cb09f761f801cfd9281432284664f0fe181c8d9c55d1ec",
|
|
||||||
"blk.15.attn_output.weight": "03a17f7e94208177aace5cc41b7f54670ba57873b7274ff6e23caf58cce110ca",
|
|
||||||
"blk.15.attn_q.weight": "b8edafe7d2216a6f8b4ae4905a906475490e6ea418f6e1d3cec563dbdc6fab91",
|
|
||||||
"blk.15.attn_v.weight": "f8ae8cae0f4cfa34a459824eba57350c3c248104ba5607e7d9dc7d7c39aaf4a6",
|
|
||||||
"blk.15.ffn_down.weight": "8d02eb439da852246d2ca67e9b7b6de0b090b80744355e64728a23e41926505b",
|
|
||||||
"blk.15.ffn_gate.weight": "ed5bf361c67db8731f186b775826f21c33bdb521111fd2d922539719a770239f",
|
|
||||||
"blk.15.ffn_norm.weight": "5942ca3c73209ac9a0c8bfd9b4aab7f7be7aee9aa12d9c35833493b44af76767",
|
|
||||||
"blk.15.ffn_up.weight": "f4bebf4ad99ec5f911327dec347be6c595814885309c7bc5647ce28c7f4d1cf5",
|
|
||||||
"blk.16.attn_k.weight": "756a534c19364448e0958b8948fe33891c6ccda0fbb4dfa2024e1f532a87804b",
|
|
||||||
"blk.16.attn_norm.weight": "386b7b9e4e6509f6af9c022d942b6c6c6cc136aeed8751ecb037c74d7c4bfb93",
|
|
||||||
"blk.16.attn_output.weight": "3ba1a766a25830b84d7c22178203635f9c5624caad290bc5e5d73da5d5e7a2ec",
|
|
||||||
"blk.16.attn_q.weight": "d39b0c91e1fda7685d50a0f7cc8d18c44b5bdc90a142c7fda0bc329cca1afa74",
|
|
||||||
"blk.16.attn_v.weight": "98b33fcb0ee3483cff1b06ecb44d7b7ffb4d34c268248e4d73dfdf82b2065b2f",
|
|
||||||
"blk.16.ffn_down.weight": "14006f5e4acb2f9416271ae562e299359cd2585739c7fc77ccbca54495563948",
|
|
||||||
"blk.16.ffn_gate.weight": "12f8abae2d301d8f88bedb6af98b1daecc7b0b8d05148594f931f30958d77aca",
|
|
||||||
"blk.16.ffn_norm.weight": "129a15a046ee96d06de288bd43c80f77a6b0fb3a159c7367154c6e4aaf362672",
|
|
||||||
"blk.16.ffn_up.weight": "b4a5911a45f3871ef1d4efb7dc7108645a564b70f818eccf45beebef2e844ee9",
|
|
||||||
"blk.17.attn_k.weight": "5e1bfcff0146ebdde3817b656952892eb671e14e75afc92fa53f84f8eecbec4c",
|
|
||||||
"blk.17.attn_norm.weight": "60bc988fab7c4b29ee9de599df41a8de00caa94fcd74677da011fac82f60f465",
|
|
||||||
"blk.17.attn_output.weight": "ba49b40d6a0b5685f749c24b0edbed3adc44dbe13b5d5e5fa1e56169fc746555",
|
|
||||||
"blk.17.attn_q.weight": "82bb415d24efcd14d03ace03f907bb70db6a204c76a0bdd1892e0fba165db87d",
|
|
||||||
"blk.17.attn_v.weight": "73dbe54beb91a899884e275ea81ffc5187a20cb7d5b68d5c299b783096999d94",
|
|
||||||
"blk.17.ffn_down.weight": "7c086166241e0664f8963fd1ca4ed74c737abfb2525ec20f8435821ff50158f3",
|
|
||||||
"blk.17.ffn_gate.weight": "51a32f78244d42a539f619c5ce661db9e6cf41636280a826d439b5444edcd28c",
|
|
||||||
"blk.17.ffn_norm.weight": "c4bb247fccd1ecc84875028af63dd20aaf5cbd17eb94a9bc36679c09285dccab",
|
|
||||||
"blk.17.ffn_up.weight": "b5886182790bc6fbadd63de9bc4ffee416f3b69a66280d197ab8c18edf769abf",
|
|
||||||
"output_norm.weight": "481f3097d0a20412e35b3a739b1b958487bcd41ff67744baa3c9acbddd2ee4d4"
|
|
||||||
}
|
|
||||||
@@ -1,12 +1,10 @@
|
|||||||
package convert
|
package convert
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"cmp"
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
"encoding/hex"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/fs"
|
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"slices"
|
"slices"
|
||||||
@@ -14,152 +12,10 @@ import (
|
|||||||
"golang.org/x/exp/maps"
|
"golang.org/x/exp/maps"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
_ int32 = iota
|
|
||||||
tokenTypeNormal
|
|
||||||
tokenTypeUnknown
|
|
||||||
tokenTypeControl
|
|
||||||
tokenTypeUserDefined
|
|
||||||
tokenTypeUnused
|
|
||||||
tokenTypeByte
|
|
||||||
)
|
|
||||||
|
|
||||||
type Tokenizer struct {
|
type Tokenizer struct {
|
||||||
*Vocabulary
|
Version string `json:"version"`
|
||||||
SpecialVocabulary []*SpecialVocabulary
|
AddedTokens []Token `json:"added_tokens"`
|
||||||
Merges []string
|
Model TokenizerModel `json:"model"`
|
||||||
|
|
||||||
Pre string
|
|
||||||
Template string
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error) {
|
|
||||||
v, err := parseVocabulary(fsys)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
t := &Tokenizer{
|
|
||||||
Vocabulary: v,
|
|
||||||
Pre: "default",
|
|
||||||
}
|
|
||||||
|
|
||||||
addedTokens := make(map[string]token)
|
|
||||||
if f, err := fsys.Open("tokenizer.json"); errors.Is(err, os.ErrNotExist) {
|
|
||||||
} else if err != nil {
|
|
||||||
return nil, err
|
|
||||||
} else {
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
var tt tokenizer
|
|
||||||
if err := json.NewDecoder(f).Decode(&tt); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, t := range tt.AddedTokens {
|
|
||||||
addedTokens[t.Content] = t
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Merges = tt.Model.Merges
|
|
||||||
|
|
||||||
sha256sum := sha256.New()
|
|
||||||
for _, pt := range tt.PreTokenizer.PreTokenizers {
|
|
||||||
switch pt.Type {
|
|
||||||
case "Split":
|
|
||||||
if pt.Pattern.Regex != "" {
|
|
||||||
// create a checksum of all Split pretokenizers which should be sufficient
|
|
||||||
// to identify the pretokenizer
|
|
||||||
sha256sum.Write([]byte(pt.Pattern.Regex))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
switch digest := hex.EncodeToString(sha256sum.Sum(nil)); digest {
|
|
||||||
case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
|
|
||||||
t.Pre = "llama-bpe"
|
|
||||||
case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
|
|
||||||
t.Pre = "deepseek-llm"
|
|
||||||
case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
|
|
||||||
t.Pre = "deepseek-coder"
|
|
||||||
case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855":
|
|
||||||
// noop, empty pretokenizer
|
|
||||||
default:
|
|
||||||
slog.Warn("unknown pretokenizer, using default", "digest", digest)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if f, err := fsys.Open("tokenizer_config.json"); errors.Is(err, os.ErrNotExist) {
|
|
||||||
} else if err != nil {
|
|
||||||
return nil, err
|
|
||||||
} else {
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
var p map[string]json.RawMessage
|
|
||||||
if err := json.NewDecoder(f).Decode(&p); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if template, ok := p["chat_template"]; ok {
|
|
||||||
var s []struct {
|
|
||||||
Name string `json:"name"`
|
|
||||||
Template string `json:"template"`
|
|
||||||
}
|
|
||||||
if err := json.Unmarshal(template, &t.Template); err == nil {
|
|
||||||
// noop
|
|
||||||
} else if err := json.Unmarshal(template, &s); err == nil {
|
|
||||||
for _, e := range s {
|
|
||||||
if e.Name == "default" {
|
|
||||||
t.Template = e.Template
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return nil, fmt.Errorf("invalid chat_template: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, st := range specialTokenTypes {
|
|
||||||
sv := SpecialVocabulary{Type: st}
|
|
||||||
if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok {
|
|
||||||
if err := json.Unmarshal(bts, &sv.AddToken); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if bts, ok := p[fmt.Sprintf("%s_token", st)]; ok {
|
|
||||||
var content string
|
|
||||||
if err := json.Unmarshal(bts, &content); err != nil {
|
|
||||||
var mm map[string]any
|
|
||||||
if err := json.Unmarshal(bts, &mm); err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
content, ok = mm["content"].(string)
|
|
||||||
if !ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sv.Content = content
|
|
||||||
}
|
|
||||||
|
|
||||||
if id, ok := addedTokens[sv.Content]; ok {
|
|
||||||
sv.ID = id.ID
|
|
||||||
t.SpecialVocabulary = append(t.SpecialVocabulary, &sv)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return t, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type tokenizer struct {
|
|
||||||
AddedTokens []token `json:"added_tokens"`
|
|
||||||
Model struct {
|
|
||||||
Type string `json:"type"`
|
|
||||||
Vocab map[string]int `json:"vocab"`
|
|
||||||
Merges []string `json:"merges"`
|
|
||||||
} `json:"model"`
|
|
||||||
|
|
||||||
PreTokenizer struct {
|
PreTokenizer struct {
|
||||||
PreTokenizers []struct {
|
PreTokenizers []struct {
|
||||||
@@ -171,108 +27,80 @@ type tokenizer struct {
|
|||||||
} `json:"pre_tokenizer"`
|
} `json:"pre_tokenizer"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type token struct {
|
type TokenizerModel struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Vocab map[string]int `json:"vocab"`
|
||||||
|
Merges []string `json:"merges"`
|
||||||
|
Tokens []Token
|
||||||
|
}
|
||||||
|
|
||||||
|
type Token struct {
|
||||||
ID int `json:"id"`
|
ID int `json:"id"`
|
||||||
Content string `json:"content"`
|
Content string `json:"content"`
|
||||||
Special bool `json:"special"`
|
Special bool `json:"special"`
|
||||||
UserDefined bool
|
UserDefined bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type Vocabulary struct {
|
func (t *Token) Type() int32 {
|
||||||
Model string
|
switch {
|
||||||
Tokens []string
|
case t.Special:
|
||||||
Scores []float32
|
return tokenTypeControl
|
||||||
Types []int32
|
case t.UserDefined:
|
||||||
|
return tokenTypeUserDefined
|
||||||
|
default:
|
||||||
|
return tokenTypeNormal
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) {
|
func (t *Tokenizer) maxID() int {
|
||||||
f, err := fsys.Open("tokenizer.json")
|
return max(
|
||||||
|
slices.Max(maps.Values(t.Model.Vocab)),
|
||||||
|
slices.MaxFunc(t.AddedTokens, func(a, b Token) int {
|
||||||
|
return cmp.Compare(a.ID, b.ID)
|
||||||
|
}).ID,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) {
|
||||||
|
f, err := os.Open(dirpath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
panic(err)
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
|
||||||
var t tokenizer
|
var t Tokenizer
|
||||||
if err := json.NewDecoder(f).Decode(&t); err != nil {
|
if err := json.NewDecoder(f).Decode(&t); err != nil {
|
||||||
return nil, err
|
return "", nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
tokens := make(map[int]token, len(t.Model.Vocab))
|
tokens = make([]Token, t.maxID()+1)
|
||||||
for k, v := range t.Model.Vocab {
|
for k, v := range t.Model.Vocab {
|
||||||
tokens[v] = token{
|
tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false}
|
||||||
ID: v,
|
}
|
||||||
Content: k,
|
|
||||||
|
for _, v := range t.AddedTokens {
|
||||||
|
v.UserDefined = true
|
||||||
|
tokens[v.ID] = v
|
||||||
|
}
|
||||||
|
|
||||||
|
sha256sum := sha256.New()
|
||||||
|
for _, pt := range t.PreTokenizer.PreTokenizers {
|
||||||
|
if pt.Type == "Split" && pt.Pattern.Regex != "" {
|
||||||
|
sha256sum.Write([]byte(pt.Pattern.Regex))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, token := range t.AddedTokens {
|
switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest {
|
||||||
token.UserDefined = true
|
case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
|
||||||
tokens[token.ID] = token
|
pre = "llama-bpe"
|
||||||
|
case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
|
||||||
|
pre = "deepseek-llm"
|
||||||
|
case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
|
||||||
|
pre = "deepseek-coder"
|
||||||
|
default:
|
||||||
|
slog.Warn("unknown pretokenizer, using default", "digest", digest)
|
||||||
|
pre = "default"
|
||||||
}
|
}
|
||||||
|
|
||||||
keys := maps.Keys(tokens)
|
return pre, tokens, t.Model.Merges, nil
|
||||||
slices.Sort(keys)
|
|
||||||
|
|
||||||
v := Vocabulary{Model: "gpt2"}
|
|
||||||
for _, k := range keys {
|
|
||||||
token := tokens[k]
|
|
||||||
v.Tokens = append(v.Tokens, token.Content)
|
|
||||||
v.Scores = append(v.Scores, float32(token.ID))
|
|
||||||
|
|
||||||
switch {
|
|
||||||
case token.Special:
|
|
||||||
v.Types = append(v.Types, tokenTypeControl)
|
|
||||||
case token.UserDefined:
|
|
||||||
v.Types = append(v.Types, tokenTypeUserDefined)
|
|
||||||
default:
|
|
||||||
v.Types = append(v.Types, tokenTypeNormal)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return &v, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseVocabulary(fsys fs.FS) (*Vocabulary, error) {
|
|
||||||
patterns := []struct {
|
|
||||||
Pattern string
|
|
||||||
Func func(fs.FS) (*Vocabulary, error)
|
|
||||||
}{
|
|
||||||
{"tokenizer.model", parseSentencePiece},
|
|
||||||
{"tokenizer.json", parseVocabularyFromTokenizer},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, pattern := range patterns {
|
|
||||||
if _, err := fs.Stat(fsys, pattern.Pattern); errors.Is(err, os.ErrNotExist) {
|
|
||||||
continue
|
|
||||||
} else if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return pattern.Func(fsys)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, errors.New("unknown tokenizer format")
|
|
||||||
}
|
|
||||||
|
|
||||||
type SpecialVocabulary struct {
|
|
||||||
Type string
|
|
||||||
ID int
|
|
||||||
Content string
|
|
||||||
AddToken bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sv SpecialVocabulary) Key() string {
|
|
||||||
switch t := sv.Type; t {
|
|
||||||
case "bos", "eos", "cls", "mask":
|
|
||||||
return t
|
|
||||||
case "unk":
|
|
||||||
return "unknown"
|
|
||||||
case "sep":
|
|
||||||
//nolint:misspell // this is an upstream typo
|
|
||||||
return "seperator"
|
|
||||||
case "pad":
|
|
||||||
return "padding"
|
|
||||||
}
|
|
||||||
|
|
||||||
panic("unknown special vocabulary type")
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,113 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"cmp"
|
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"io/fs"
|
|
||||||
"os"
|
|
||||||
"slices"
|
|
||||||
|
|
||||||
"google.golang.org/protobuf/proto"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/convert/sentencepiece"
|
|
||||||
)
|
|
||||||
|
|
||||||
func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
|
|
||||||
ast, err := parseAdditionalSpecialTokens(fsys)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
bts, err := fs.ReadFile(fsys, "tokenizer.model")
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var spm sentencepiece.ModelProto
|
|
||||||
if err := proto.Unmarshal(bts, &spm); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
v := Vocabulary{Model: "llama"}
|
|
||||||
for _, piece := range spm.GetPieces() {
|
|
||||||
v.Tokens = append(v.Tokens, piece.GetPiece())
|
|
||||||
v.Scores = append(v.Scores, piece.GetScore())
|
|
||||||
|
|
||||||
switch t := piece.GetType(); t {
|
|
||||||
case sentencepiece.ModelProto_SentencePiece_UNKNOWN,
|
|
||||||
sentencepiece.ModelProto_SentencePiece_CONTROL,
|
|
||||||
sentencepiece.ModelProto_SentencePiece_UNUSED,
|
|
||||||
sentencepiece.ModelProto_SentencePiece_BYTE:
|
|
||||||
v.Types = append(v.Types, int32(t))
|
|
||||||
default:
|
|
||||||
tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL)
|
|
||||||
if slices.Contains(ast, piece.GetPiece()) {
|
|
||||||
tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
|
|
||||||
}
|
|
||||||
|
|
||||||
v.Types = append(v.Types, tt)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
f, err := fsys.Open("added_tokens.json")
|
|
||||||
if errors.Is(err, os.ErrNotExist) {
|
|
||||||
return &v, nil
|
|
||||||
} else if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
var atm map[string]int
|
|
||||||
if err := json.NewDecoder(f).Decode(&atm); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
type t struct {
|
|
||||||
id int
|
|
||||||
content string
|
|
||||||
}
|
|
||||||
|
|
||||||
var ts []t
|
|
||||||
for content, id := range atm {
|
|
||||||
ts = append(ts, t{id, content})
|
|
||||||
}
|
|
||||||
|
|
||||||
slices.SortFunc(ts, func(i, j t) int {
|
|
||||||
return cmp.Compare(i.id, j.id)
|
|
||||||
})
|
|
||||||
|
|
||||||
n := len(v.Tokens)
|
|
||||||
for i, t := range ts {
|
|
||||||
if t.id != i+n {
|
|
||||||
return nil, fmt.Errorf("invalid token id: %d", t.id)
|
|
||||||
}
|
|
||||||
|
|
||||||
v.Tokens = append(v.Tokens, t.content)
|
|
||||||
v.Scores = append(v.Scores, -1000.0)
|
|
||||||
v.Types = append(v.Types, tokenTypeUserDefined)
|
|
||||||
}
|
|
||||||
|
|
||||||
return &v, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseAdditionalSpecialTokens(fsys fs.FS) ([]string, error) {
|
|
||||||
f, err := fsys.Open("special_tokens_map.json")
|
|
||||||
if errors.Is(err, os.ErrNotExist) {
|
|
||||||
return nil, nil
|
|
||||||
} else if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
var m struct {
|
|
||||||
AdditionalSpecialTokens []string `json:"additional_special_tokens"`
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := json.NewDecoder(f).Decode(&m); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return m.AdditionalSpecialTokens, nil
|
|
||||||
}
|
|
||||||
@@ -1,208 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"io"
|
|
||||||
"io/fs"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
|
||||||
)
|
|
||||||
|
|
||||||
func createTokenizerFS(t *testing.T, dir string, files map[string]io.Reader) fs.FS {
|
|
||||||
t.Helper()
|
|
||||||
|
|
||||||
for k, v := range files {
|
|
||||||
if err := func() error {
|
|
||||||
f, err := os.Create(filepath.Join(dir, k))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
if _, err := io.Copy(f, v); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}(); err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return os.DirFS(dir)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseTokenizer(t *testing.T) {
|
|
||||||
cases := []struct {
|
|
||||||
name string
|
|
||||||
fsys fs.FS
|
|
||||||
specialTokenTypes []string
|
|
||||||
want *Tokenizer
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "string chat template",
|
|
||||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
|
||||||
"tokenizer.json": strings.NewReader(`{}`),
|
|
||||||
"tokenizer_config.json": strings.NewReader(`{
|
|
||||||
"chat_template": "<default template>"
|
|
||||||
}`),
|
|
||||||
}),
|
|
||||||
want: &Tokenizer{
|
|
||||||
Vocabulary: &Vocabulary{Model: "gpt2"},
|
|
||||||
Pre: "default",
|
|
||||||
Template: "<default template>",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "list chat template",
|
|
||||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
|
||||||
"tokenizer.json": strings.NewReader(`{}`),
|
|
||||||
"tokenizer_config.json": strings.NewReader(`{
|
|
||||||
"chat_template": [
|
|
||||||
{
|
|
||||||
"name": "default",
|
|
||||||
"template": "<default template>"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "tools",
|
|
||||||
"template": "<tools template>"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}`),
|
|
||||||
}),
|
|
||||||
want: &Tokenizer{
|
|
||||||
Vocabulary: &Vocabulary{Model: "gpt2"},
|
|
||||||
Pre: "default",
|
|
||||||
Template: "<default template>",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "added tokens",
|
|
||||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
|
||||||
"tokenizer.json": strings.NewReader(`{
|
|
||||||
"added_tokens": [
|
|
||||||
{
|
|
||||||
"id": 999,
|
|
||||||
"content": "<unused999>",
|
|
||||||
"special": false
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}`),
|
|
||||||
}),
|
|
||||||
want: &Tokenizer{
|
|
||||||
Vocabulary: &Vocabulary{
|
|
||||||
Model: "gpt2",
|
|
||||||
Tokens: []string{"<unused999>"},
|
|
||||||
Scores: []float32{999},
|
|
||||||
Types: []int32{4},
|
|
||||||
},
|
|
||||||
Pre: "default",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "added tokens overlap vocab",
|
|
||||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
|
||||||
"tokenizer.json": strings.NewReader(`{
|
|
||||||
"added_tokens": [
|
|
||||||
{
|
|
||||||
"id": 0,
|
|
||||||
"content": "<pad>",
|
|
||||||
"special": true
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"model": {
|
|
||||||
"vocab": {
|
|
||||||
"<pad>": 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}`),
|
|
||||||
}),
|
|
||||||
want: &Tokenizer{
|
|
||||||
Vocabulary: &Vocabulary{
|
|
||||||
Model: "gpt2",
|
|
||||||
Tokens: []string{"<pad>"},
|
|
||||||
Scores: []float32{0},
|
|
||||||
Types: []int32{3},
|
|
||||||
},
|
|
||||||
Pre: "default",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "special token types",
|
|
||||||
fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
|
|
||||||
"tokenizer.json": strings.NewReader(`{
|
|
||||||
"added_tokens": [
|
|
||||||
{
|
|
||||||
"id": 0,
|
|
||||||
"content": "<pad>",
|
|
||||||
"special": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 1,
|
|
||||||
"content": "<eos>",
|
|
||||||
"special": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 2,
|
|
||||||
"content": "<bos>",
|
|
||||||
"special": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 3,
|
|
||||||
"content": "<unk>",
|
|
||||||
"special": true
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"model": {
|
|
||||||
"vocab": {
|
|
||||||
"<pad>": 0,
|
|
||||||
"<eos>": 1,
|
|
||||||
"<bos>": 2,
|
|
||||||
"<unk>": 3
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}`),
|
|
||||||
"tokenizer_config.json": strings.NewReader(`{
|
|
||||||
"add_bos_token": true,
|
|
||||||
"add_eos_token": false,
|
|
||||||
"bos_token": "<bos>",
|
|
||||||
"eos_token": "<eos>",
|
|
||||||
"pad_token": "<pad>",
|
|
||||||
"unk_token": "<unk>"
|
|
||||||
}`),
|
|
||||||
}),
|
|
||||||
specialTokenTypes: []string{"pad", "eos", "bos", "unk"},
|
|
||||||
want: &Tokenizer{
|
|
||||||
Vocabulary: &Vocabulary{
|
|
||||||
Model: "gpt2",
|
|
||||||
Tokens: []string{"<pad>", "<eos>", "<bos>", "<unk>"},
|
|
||||||
Scores: []float32{0, 1, 2, 3},
|
|
||||||
Types: []int32{3, 3, 3, 3},
|
|
||||||
},
|
|
||||||
SpecialVocabulary: []*SpecialVocabulary{
|
|
||||||
{Type: "pad", Content: "<pad>", ID: 0, AddToken: false},
|
|
||||||
{Type: "eos", Content: "<eos>", ID: 1, AddToken: false},
|
|
||||||
{Type: "bos", Content: "<bos>", ID: 2, AddToken: true},
|
|
||||||
{Type: "unk", Content: "<unk>", ID: 3, AddToken: false},
|
|
||||||
},
|
|
||||||
Pre: "default",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range cases {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
tokenizer, err := parseTokenizer(tt.fsys, tt.specialTokenTypes)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if diff := cmp.Diff(tt.want, tokenizer); diff != "" {
|
|
||||||
t.Errorf("unexpected tokenizer (-want +got):\n%s", diff)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
287
convert/torch.go
Normal file
287
convert/torch.go
Normal file
@@ -0,0 +1,287 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/nlpodyssey/gopickle/pytorch"
|
||||||
|
"github.com/nlpodyssey/gopickle/types"
|
||||||
|
"github.com/x448/float16"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type torchWriterTo struct {
|
||||||
|
t *llm.Tensor
|
||||||
|
|
||||||
|
params *Params
|
||||||
|
bo ByteOrder
|
||||||
|
|
||||||
|
storage pytorch.StorageInterface
|
||||||
|
repacker func(string, []float32, []uint64) ([]float32, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type TorchFormat struct{}
|
||||||
|
|
||||||
|
func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
|
||||||
|
slog.Debug("getting torch tensors")
|
||||||
|
|
||||||
|
var files []string
|
||||||
|
if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 {
|
||||||
|
files = append(files, pt...)
|
||||||
|
} else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 {
|
||||||
|
files = append(files, pt...)
|
||||||
|
}
|
||||||
|
|
||||||
|
var offset uint64
|
||||||
|
var tensors []llm.Tensor
|
||||||
|
for _, fn := range files {
|
||||||
|
m, err := pytorch.Load(fn)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error(fmt.Sprintf("error unpickling: %q", err))
|
||||||
|
return []llm.Tensor{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, k := range m.(*types.Dict).Keys() {
|
||||||
|
if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
t, _ := m.(*types.Dict).Get(k)
|
||||||
|
tshape := t.(*pytorch.Tensor).Size
|
||||||
|
|
||||||
|
var size uint64
|
||||||
|
var kind uint32
|
||||||
|
switch len(tshape) {
|
||||||
|
case 0:
|
||||||
|
continue
|
||||||
|
case 1:
|
||||||
|
// convert to float32
|
||||||
|
kind = 0
|
||||||
|
size = uint64(tshape[0] * 4)
|
||||||
|
case 2:
|
||||||
|
// convert to float16
|
||||||
|
kind = 1
|
||||||
|
size = uint64(tshape[0] * tshape[1] * 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
ggufName, err := tf.GetLayerName(k.(string))
|
||||||
|
if err != nil {
|
||||||
|
slog.Error(err.Error())
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape))
|
||||||
|
|
||||||
|
shape := []uint64{0, 0, 0, 0}
|
||||||
|
for i := range tshape {
|
||||||
|
shape[i] = uint64(tshape[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
tensor := llm.Tensor{
|
||||||
|
Name: ggufName,
|
||||||
|
Kind: kind,
|
||||||
|
Offset: offset, // calculate the offset
|
||||||
|
Shape: shape,
|
||||||
|
}
|
||||||
|
|
||||||
|
tensor.WriterTo = torchWriterTo{
|
||||||
|
t: &tensor,
|
||||||
|
params: params,
|
||||||
|
bo: params.ByteOrder,
|
||||||
|
storage: t.(*pytorch.Tensor).Source,
|
||||||
|
}
|
||||||
|
|
||||||
|
tensors = append(tensors, tensor)
|
||||||
|
offset += size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tensors, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getAltParams(dirpath string) (*Params, error) {
|
||||||
|
f, err := os.Open(filepath.Join(dirpath, "params.json"))
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("no params.json")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
type TorchParams struct {
|
||||||
|
HiddenSize int `json:"dim"`
|
||||||
|
AttentionHeads int `json:"n_heads"`
|
||||||
|
KeyValHeads int `json:"n_kv_heads"`
|
||||||
|
HiddenLayers int `json:"n_layers"`
|
||||||
|
RopeTheta float64 `json:"rope_theta"`
|
||||||
|
NormEPS float64 `json:"norm_eps"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var tparams TorchParams
|
||||||
|
|
||||||
|
d := json.NewDecoder(f)
|
||||||
|
err = d.Decode(&tparams)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
params := &Params{
|
||||||
|
Architectures: []string{"LlamaForCausalLM"},
|
||||||
|
HiddenSize: tparams.HiddenSize,
|
||||||
|
AttentionHeads: tparams.AttentionHeads,
|
||||||
|
KeyValHeads: tparams.KeyValHeads,
|
||||||
|
HiddenLayers: tparams.HiddenLayers,
|
||||||
|
NormEPS: tparams.NormEPS,
|
||||||
|
}
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case tparams.RopeTheta == 1000000:
|
||||||
|
// Codellama
|
||||||
|
params.ContextSize = 16384
|
||||||
|
case tparams.NormEPS == 1e-06:
|
||||||
|
// llama2
|
||||||
|
slog.Debug("Found llama2 - setting context size to 4096")
|
||||||
|
params.ContextSize = 4096
|
||||||
|
default:
|
||||||
|
params.ContextSize = 2048
|
||||||
|
}
|
||||||
|
|
||||||
|
params.ByteOrder = binary.LittleEndian
|
||||||
|
return params, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *TorchFormat) GetParams(dirpath string) (*Params, error) {
|
||||||
|
f, err := os.Open(filepath.Join(dirpath, "config.json"))
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
// try params.json instead
|
||||||
|
return getAltParams(dirpath)
|
||||||
|
} else {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var params Params
|
||||||
|
d := json.NewDecoder(f)
|
||||||
|
err = d.Decode(¶ms)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
params.ByteOrder = binary.LittleEndian
|
||||||
|
return ¶ms, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *TorchFormat) GetLayerName(n string) (string, error) {
|
||||||
|
directMap := map[string]string{
|
||||||
|
"tok_embeddings.weight": "token_embd.weight",
|
||||||
|
"output.weight": "output.weight",
|
||||||
|
"norm.weight": "output_norm.weight",
|
||||||
|
"rope.freqs": "rope_freqs.weight",
|
||||||
|
"model.embed_tokens.weight": "token_embd.weight",
|
||||||
|
"lm_head.weight": "output.weight",
|
||||||
|
"model.norm.weight": "output_norm.weight",
|
||||||
|
}
|
||||||
|
|
||||||
|
lMap := map[string]string{
|
||||||
|
"layers.(\\d+).attention_norm.weight": "blk.$1.attn_norm.weight",
|
||||||
|
"layers.(\\d+).attention_output_norm.weight": "blk.$1.attn_norm.weight",
|
||||||
|
"layers.(\\d+).feed_forward.w2.weight": "blk.$1.ffn_down.weight",
|
||||||
|
"layers.(\\d+).feed_forward.w1.weight": "blk.$1.ffn_gate.weight",
|
||||||
|
"layers.(\\d+).feed_forward.w3.weight": "blk.$1.ffn_up.weight",
|
||||||
|
"layers.(\\d+).ffn_norm.weight": "blk.$1.ffn_norm.weight",
|
||||||
|
"layers.(\\d+).attention.wk.weight": "blk.$1.attn_k.weight",
|
||||||
|
"layers.(\\d+).attention.wo.weight": "blk.$1.attn_output.weight",
|
||||||
|
"layers.(\\d+).attention.wq.weight": "blk.$1.attn_q.weight",
|
||||||
|
"layers.(\\d+).attention.wv.weight": "blk.$1.attn_v.weight",
|
||||||
|
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
|
||||||
|
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
|
||||||
|
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
|
||||||
|
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
|
||||||
|
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
|
||||||
|
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
|
||||||
|
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
|
||||||
|
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
|
||||||
|
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
|
||||||
|
}
|
||||||
|
|
||||||
|
v, ok := directMap[n]
|
||||||
|
if ok {
|
||||||
|
return v, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// quick hack to rename the layers to gguf format
|
||||||
|
for k, v := range lMap {
|
||||||
|
re := regexp.MustCompile(k)
|
||||||
|
newName := re.ReplaceAllString(n, v)
|
||||||
|
if newName != n {
|
||||||
|
return newName, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) {
|
||||||
|
var f32s []float32
|
||||||
|
switch s := r.storage.(type) {
|
||||||
|
case *pytorch.FloatStorage:
|
||||||
|
f32s = s.Data
|
||||||
|
case *pytorch.HalfStorage:
|
||||||
|
f32s = s.Data
|
||||||
|
case *pytorch.BFloat16Storage:
|
||||||
|
f32s = s.Data
|
||||||
|
default:
|
||||||
|
return 0, fmt.Errorf("unknown data type: %T", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.repacker != nil {
|
||||||
|
f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch r.t.Kind {
|
||||||
|
case 0:
|
||||||
|
return 0, binary.Write(w, r.bo, f32s)
|
||||||
|
case 1:
|
||||||
|
f16s := make([]uint16, len(f32s))
|
||||||
|
for i := range f32s {
|
||||||
|
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, binary.Write(w, r.bo, f16s)
|
||||||
|
default:
|
||||||
|
return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
|
||||||
|
switch len(params.Architectures) {
|
||||||
|
case 0:
|
||||||
|
return nil, fmt.Errorf("No architecture specified to convert")
|
||||||
|
case 1:
|
||||||
|
switch params.Architectures[0] {
|
||||||
|
case "LlamaForCausalLM":
|
||||||
|
return &LlamaModel{
|
||||||
|
ModelData{
|
||||||
|
Name: name,
|
||||||
|
Path: dirPath,
|
||||||
|
Params: params,
|
||||||
|
Format: m,
|
||||||
|
},
|
||||||
|
}, nil
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, fmt.Errorf("Unknown error")
|
||||||
|
}
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
# `discover`
|
|
||||||
|
|
||||||
This package is responsible for discovering information about the system and the capabilities to run LLM. This includes GPU and CPU discovery so the optimal runner can be chosen for a given model. The ollama scheduler relies on up-to-date available memory information, so this package provides the ability to refresh free memory as efficiently as possible.
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
package discover
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"runtime"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"golang.org/x/sys/cpu"
|
|
||||||
)
|
|
||||||
|
|
||||||
func GetCPUCapability() CPUCapability {
|
|
||||||
if cpu.X86.HasAVX2 {
|
|
||||||
return CPUCapabilityAVX2
|
|
||||||
}
|
|
||||||
if cpu.X86.HasAVX {
|
|
||||||
return CPUCapabilityAVX
|
|
||||||
}
|
|
||||||
// else LCD
|
|
||||||
return CPUCapabilityNone
|
|
||||||
}
|
|
||||||
|
|
||||||
func IsNUMA() bool {
|
|
||||||
if runtime.GOOS != "linux" {
|
|
||||||
// numa support in llama.cpp is linux only
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
ids := map[string]interface{}{}
|
|
||||||
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
|
|
||||||
for _, packageId := range packageIds {
|
|
||||||
id, err := os.ReadFile(packageId)
|
|
||||||
if err == nil {
|
|
||||||
ids[strings.TrimSpace(string(id))] = struct{}{}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return len(ids) > 1
|
|
||||||
}
|
|
||||||
@@ -1,64 +0,0 @@
|
|||||||
//go:build linux || windows
|
|
||||||
|
|
||||||
package discover
|
|
||||||
|
|
||||||
import (
|
|
||||||
"log/slog"
|
|
||||||
"os"
|
|
||||||
"regexp"
|
|
||||||
"runtime"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
|
|
||||||
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
|
|
||||||
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
|
||||||
|
|
||||||
func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
|
|
||||||
ids := []string{}
|
|
||||||
for _, info := range gpuInfo {
|
|
||||||
if info.Library != "cuda" {
|
|
||||||
// TODO shouldn't happen if things are wired correctly...
|
|
||||||
slog.Debug("cudaGetVisibleDevicesEnv skipping over non-cuda device", "library", info.Library)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
ids = append(ids, info.ID)
|
|
||||||
}
|
|
||||||
return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
|
|
||||||
}
|
|
||||||
|
|
||||||
func cudaVariant(gpuInfo CudaGPUInfo) string {
|
|
||||||
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
|
||||||
if CudaTegra != "" {
|
|
||||||
ver := strings.Split(CudaTegra, ".")
|
|
||||||
if len(ver) > 0 {
|
|
||||||
return "jetpack" + ver[0]
|
|
||||||
}
|
|
||||||
} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
|
|
||||||
r := regexp.MustCompile(` R(\d+) `)
|
|
||||||
m := r.FindSubmatch(data)
|
|
||||||
if len(m) != 2 {
|
|
||||||
slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version")
|
|
||||||
} else {
|
|
||||||
if l4t, err := strconv.Atoi(string(m[1])); err == nil {
|
|
||||||
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
|
|
||||||
// https://developer.nvidia.com/embedded/jetpack-archive
|
|
||||||
switch l4t {
|
|
||||||
case 35:
|
|
||||||
return "jetpack5"
|
|
||||||
case 36:
|
|
||||||
return "jetpack6"
|
|
||||||
default:
|
|
||||||
slog.Info("unsupported L4T version", "nv_tegra_release", string(data))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if gpuInfo.computeMajor < 6 || gpuInfo.DriverMajor < 12 || (gpuInfo.DriverMajor == 12 && gpuInfo.DriverMinor == 0) {
|
|
||||||
return "v11"
|
|
||||||
}
|
|
||||||
return "v12"
|
|
||||||
}
|
|
||||||
@@ -1,186 +0,0 @@
|
|||||||
package discover
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"reflect"
|
|
||||||
"regexp"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
|
||||||
)
|
|
||||||
|
|
||||||
var CudartGlobs = []string{
|
|
||||||
"/usr/local/cuda/lib64/libcudart.so*",
|
|
||||||
"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
|
|
||||||
"/usr/lib/x86_64-linux-gnu/libcudart.so*",
|
|
||||||
"/usr/lib/wsl/lib/libcudart.so*",
|
|
||||||
"/usr/lib/wsl/drivers/*/libcudart.so*",
|
|
||||||
"/opt/cuda/lib64/libcudart.so*",
|
|
||||||
"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
|
|
||||||
"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
|
|
||||||
"/usr/lib/aarch64-linux-gnu/libcudart.so*",
|
|
||||||
"/usr/local/cuda/lib*/libcudart.so*",
|
|
||||||
"/usr/lib*/libcudart.so*",
|
|
||||||
"/usr/local/lib*/libcudart.so*",
|
|
||||||
}
|
|
||||||
|
|
||||||
var NvmlGlobs = []string{}
|
|
||||||
|
|
||||||
var NvcudaGlobs = []string{
|
|
||||||
"/usr/local/cuda*/targets/*/lib/libcuda.so*",
|
|
||||||
"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
|
|
||||||
"/usr/lib/*-linux-gnu/libcuda.so*",
|
|
||||||
"/usr/lib/wsl/lib/libcuda.so*",
|
|
||||||
"/usr/lib/wsl/drivers/*/libcuda.so*",
|
|
||||||
"/opt/cuda/lib*/libcuda.so*",
|
|
||||||
"/usr/local/cuda/lib*/libcuda.so*",
|
|
||||||
"/usr/lib*/libcuda.so*",
|
|
||||||
"/usr/local/lib*/libcuda.so*",
|
|
||||||
}
|
|
||||||
|
|
||||||
var OneapiGlobs = []string{
|
|
||||||
"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
|
|
||||||
"/usr/lib*/libze_intel_gpu.so*",
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
CudartMgmtName = "libcudart.so*"
|
|
||||||
NvcudaMgmtName = "libcuda.so*"
|
|
||||||
NvmlMgmtName = "" // not currently wired on linux
|
|
||||||
OneapiMgmtName = "libze_intel_gpu.so*"
|
|
||||||
)
|
|
||||||
|
|
||||||
func GetCPUMem() (memInfo, error) {
|
|
||||||
var mem memInfo
|
|
||||||
var total, available, free, buffers, cached, freeSwap uint64
|
|
||||||
f, err := os.Open("/proc/meminfo")
|
|
||||||
if err != nil {
|
|
||||||
return mem, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
s := bufio.NewScanner(f)
|
|
||||||
for s.Scan() {
|
|
||||||
line := s.Text()
|
|
||||||
switch {
|
|
||||||
case strings.HasPrefix(line, "MemTotal:"):
|
|
||||||
_, err = fmt.Sscanf(line, "MemTotal:%d", &total)
|
|
||||||
case strings.HasPrefix(line, "MemAvailable:"):
|
|
||||||
_, err = fmt.Sscanf(line, "MemAvailable:%d", &available)
|
|
||||||
case strings.HasPrefix(line, "MemFree:"):
|
|
||||||
_, err = fmt.Sscanf(line, "MemFree:%d", &free)
|
|
||||||
case strings.HasPrefix(line, "Buffers:"):
|
|
||||||
_, err = fmt.Sscanf(line, "Buffers:%d", &buffers)
|
|
||||||
case strings.HasPrefix(line, "Cached:"):
|
|
||||||
_, err = fmt.Sscanf(line, "Cached:%d", &cached)
|
|
||||||
case strings.HasPrefix(line, "SwapFree:"):
|
|
||||||
_, err = fmt.Sscanf(line, "SwapFree:%d", &freeSwap)
|
|
||||||
default:
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return mem, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mem.TotalMemory = total * format.KibiByte
|
|
||||||
mem.FreeSwap = freeSwap * format.KibiByte
|
|
||||||
if available > 0 {
|
|
||||||
mem.FreeMemory = available * format.KibiByte
|
|
||||||
} else {
|
|
||||||
mem.FreeMemory = (free + buffers + cached) * format.KibiByte
|
|
||||||
}
|
|
||||||
return mem, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
const CpuInfoFilename = "/proc/cpuinfo"
|
|
||||||
|
|
||||||
type linuxCpuInfo struct {
|
|
||||||
ID string `cpuinfo:"processor"`
|
|
||||||
VendorID string `cpuinfo:"vendor_id"`
|
|
||||||
ModelName string `cpuinfo:"model name"`
|
|
||||||
PhysicalID string `cpuinfo:"physical id"`
|
|
||||||
Siblings string `cpuinfo:"siblings"`
|
|
||||||
CoreID string `cpuinfo:"core id"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetCPUDetails() ([]CPU, error) {
|
|
||||||
file, err := os.Open(CpuInfoFilename)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
reColumns := regexp.MustCompile("\t+: ")
|
|
||||||
scanner := bufio.NewScanner(file)
|
|
||||||
cpuInfos := []linuxCpuInfo{}
|
|
||||||
cpu := &linuxCpuInfo{}
|
|
||||||
for scanner.Scan() {
|
|
||||||
line := scanner.Text()
|
|
||||||
if sl := reColumns.Split(line, 2); len(sl) > 1 {
|
|
||||||
t := reflect.TypeOf(cpu).Elem()
|
|
||||||
s := reflect.ValueOf(cpu).Elem()
|
|
||||||
for i := range t.NumField() {
|
|
||||||
field := t.Field(i)
|
|
||||||
tag := field.Tag.Get("cpuinfo")
|
|
||||||
if tag == sl[0] {
|
|
||||||
s.FieldByName(field.Name).SetString(sl[1])
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if strings.TrimSpace(line) == "" && cpu.ID != "" {
|
|
||||||
cpuInfos = append(cpuInfos, *cpu)
|
|
||||||
cpu = &linuxCpuInfo{}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process the sockets/cores/threads
|
|
||||||
socketByID := map[string]*CPU{}
|
|
||||||
coreBySocket := map[string]map[string]struct{}{}
|
|
||||||
threadsByCoreBySocket := map[string]map[string]int{}
|
|
||||||
for _, c := range cpuInfos {
|
|
||||||
if _, found := socketByID[c.PhysicalID]; !found {
|
|
||||||
socketByID[c.PhysicalID] = &CPU{
|
|
||||||
ID: c.PhysicalID,
|
|
||||||
VendorID: c.VendorID,
|
|
||||||
ModelName: c.ModelName,
|
|
||||||
}
|
|
||||||
coreBySocket[c.PhysicalID] = map[string]struct{}{}
|
|
||||||
threadsByCoreBySocket[c.PhysicalID] = map[string]int{}
|
|
||||||
}
|
|
||||||
if c.CoreID != "" {
|
|
||||||
coreBySocket[c.PhysicalID][c.PhysicalID+":"+c.CoreID] = struct{}{}
|
|
||||||
threadsByCoreBySocket[c.PhysicalID][c.PhysicalID+":"+c.CoreID]++
|
|
||||||
} else {
|
|
||||||
coreBySocket[c.PhysicalID][c.PhysicalID+":"+c.ID] = struct{}{}
|
|
||||||
threadsByCoreBySocket[c.PhysicalID][c.PhysicalID+":"+c.ID]++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tally up the values from the tracking maps
|
|
||||||
for id, s := range socketByID {
|
|
||||||
s.CoreCount = len(coreBySocket[id])
|
|
||||||
s.ThreadCount = 0
|
|
||||||
for _, tc := range threadsByCoreBySocket[id] {
|
|
||||||
s.ThreadCount += tc
|
|
||||||
}
|
|
||||||
|
|
||||||
// This only works if HT is enabled, consider a more reliable model, maybe cache size comparisons?
|
|
||||||
efficiencyCoreCount := 0
|
|
||||||
for _, threads := range threadsByCoreBySocket[id] {
|
|
||||||
if threads == 1 {
|
|
||||||
efficiencyCoreCount++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if efficiencyCoreCount == s.CoreCount {
|
|
||||||
// 1:1 mapping means they're not actually efficiency cores, but regular cores
|
|
||||||
s.EfficiencyCoreCount = 0
|
|
||||||
} else {
|
|
||||||
s.EfficiencyCoreCount = efficiencyCoreCount
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
result := []CPU{}
|
|
||||||
for _, c := range socketByID {
|
|
||||||
result = append(result, *c)
|
|
||||||
}
|
|
||||||
return result, nil
|
|
||||||
}
|
|
||||||
@@ -1,60 +0,0 @@
|
|||||||
package discover
|
|
||||||
|
|
||||||
import (
|
|
||||||
"runtime"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestBasicGetGPUInfo(t *testing.T) {
|
|
||||||
info := GetGPUInfo()
|
|
||||||
assert.NotEmpty(t, len(info))
|
|
||||||
assert.Contains(t, "cuda rocm cpu metal", info[0].Library)
|
|
||||||
if info[0].Library != "cpu" {
|
|
||||||
assert.Greater(t, info[0].TotalMemory, uint64(0))
|
|
||||||
assert.Greater(t, info[0].FreeMemory, uint64(0))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCPUMemInfo(t *testing.T) {
|
|
||||||
info, err := GetCPUMem()
|
|
||||||
require.NoError(t, err)
|
|
||||||
switch runtime.GOOS {
|
|
||||||
case "darwin":
|
|
||||||
t.Skip("CPU memory not populated on darwin")
|
|
||||||
case "linux", "windows":
|
|
||||||
assert.Greater(t, info.TotalMemory, uint64(0))
|
|
||||||
assert.Greater(t, info.FreeMemory, uint64(0))
|
|
||||||
default:
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestByLibrary(t *testing.T) {
|
|
||||||
type testCase struct {
|
|
||||||
input []GpuInfo
|
|
||||||
expect int
|
|
||||||
}
|
|
||||||
|
|
||||||
testCases := map[string]*testCase{
|
|
||||||
"empty": {input: []GpuInfo{}, expect: 0},
|
|
||||||
"cpu": {input: []GpuInfo{{Library: "cpu"}}, expect: 1},
|
|
||||||
"cpu + GPU": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda"}}, expect: 2},
|
|
||||||
"cpu + 2 GPU no variant": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda"}, {Library: "cuda"}}, expect: 2},
|
|
||||||
"cpu + 2 GPU same variant": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda", Variant: "v11"}, {Library: "cuda", Variant: "v11"}}, expect: 2},
|
|
||||||
"cpu + 2 GPU diff variant": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda", Variant: "v11"}, {Library: "cuda", Variant: "v12"}}, expect: 3},
|
|
||||||
}
|
|
||||||
|
|
||||||
for k, v := range testCases {
|
|
||||||
t.Run(k, func(t *testing.T) {
|
|
||||||
resp := (GpuInfoList)(v.input).ByLibrary()
|
|
||||||
if len(resp) != v.expect {
|
|
||||||
t.Fatalf("expected length %d, got %d => %+v", v.expect, len(resp), resp)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
|
|
||||||
@@ -1,234 +0,0 @@
|
|||||||
package discover
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"log/slog"
|
|
||||||
"syscall"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
type MEMORYSTATUSEX struct {
|
|
||||||
length uint32
|
|
||||||
MemoryLoad uint32
|
|
||||||
TotalPhys uint64
|
|
||||||
AvailPhys uint64
|
|
||||||
TotalPageFile uint64
|
|
||||||
AvailPageFile uint64
|
|
||||||
TotalVirtual uint64
|
|
||||||
AvailVirtual uint64
|
|
||||||
AvailExtendedVirtual uint64
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
k32 = syscall.NewLazyDLL("kernel32.dll")
|
|
||||||
globalMemoryStatusExProc = k32.NewProc("GlobalMemoryStatusEx")
|
|
||||||
sizeofMemoryStatusEx = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
|
|
||||||
GetLogicalProcessorInformationEx = k32.NewProc("GetLogicalProcessorInformationEx")
|
|
||||||
)
|
|
||||||
|
|
||||||
var CudartGlobs = []string{
|
|
||||||
"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
|
|
||||||
}
|
|
||||||
|
|
||||||
var NvmlGlobs = []string{
|
|
||||||
"c:\\Windows\\System32\\nvml.dll",
|
|
||||||
}
|
|
||||||
|
|
||||||
var NvcudaGlobs = []string{
|
|
||||||
"c:\\windows\\system*\\nvcuda.dll",
|
|
||||||
}
|
|
||||||
|
|
||||||
var OneapiGlobs = []string{
|
|
||||||
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
CudartMgmtName = "cudart64_*.dll"
|
|
||||||
NvcudaMgmtName = "nvcuda.dll"
|
|
||||||
NvmlMgmtName = "nvml.dll"
|
|
||||||
OneapiMgmtName = "ze_intel_gpu64.dll"
|
|
||||||
)
|
|
||||||
|
|
||||||
func GetCPUMem() (memInfo, error) {
|
|
||||||
memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
|
|
||||||
r1, _, err := globalMemoryStatusExProc.Call(uintptr(unsafe.Pointer(&memStatus)))
|
|
||||||
if r1 == 0 {
|
|
||||||
return memInfo{}, fmt.Errorf("GlobalMemoryStatusEx failed: %w", err)
|
|
||||||
}
|
|
||||||
return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type LOGICAL_PROCESSOR_RELATIONSHIP uint32
|
|
||||||
|
|
||||||
const (
|
|
||||||
RelationProcessorCore LOGICAL_PROCESSOR_RELATIONSHIP = iota
|
|
||||||
RelationNumaNode
|
|
||||||
RelationCache
|
|
||||||
RelationProcessorPackage
|
|
||||||
RelationGroup
|
|
||||||
RelationProcessorDie
|
|
||||||
RelationNumaNodeEx
|
|
||||||
RelationProcessorModule
|
|
||||||
)
|
|
||||||
const RelationAll LOGICAL_PROCESSOR_RELATIONSHIP = 0xffff
|
|
||||||
|
|
||||||
type GROUP_AFFINITY struct {
|
|
||||||
Mask uintptr // KAFFINITY
|
|
||||||
Group uint16
|
|
||||||
Reserved [3]uint16
|
|
||||||
}
|
|
||||||
|
|
||||||
type PROCESSOR_RELATIONSHIP struct {
|
|
||||||
Flags byte
|
|
||||||
EfficiencyClass byte
|
|
||||||
Reserved [20]byte
|
|
||||||
GroupCount uint16
|
|
||||||
GroupMask [1]GROUP_AFFINITY // len GroupCount
|
|
||||||
}
|
|
||||||
|
|
||||||
// Omitted unused structs: NUMA_NODE_RELATIONSHIP CACHE_RELATIONSHIP GROUP_RELATIONSHIP
|
|
||||||
|
|
||||||
type SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX struct {
|
|
||||||
Relationship LOGICAL_PROCESSOR_RELATIONSHIP
|
|
||||||
Size uint32
|
|
||||||
U [1]byte // Union len Size
|
|
||||||
// PROCESSOR_RELATIONSHIP
|
|
||||||
// NUMA_NODE_RELATIONSHIP
|
|
||||||
// CACHE_RELATIONSHIP
|
|
||||||
// GROUP_RELATIONSHIP
|
|
||||||
}
|
|
||||||
|
|
||||||
func (group *GROUP_AFFINITY) IsMember(target *GROUP_AFFINITY) bool {
|
|
||||||
if group == nil || target == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return group.Mask&target.Mask != 0
|
|
||||||
}
|
|
||||||
|
|
||||||
type winPackage struct {
|
|
||||||
groups []*GROUP_AFFINITY
|
|
||||||
coreCount int // performance cores = coreCount - efficiencyCoreCount
|
|
||||||
efficiencyCoreCount int
|
|
||||||
threadCount int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (pkg *winPackage) IsMember(target *GROUP_AFFINITY) bool {
|
|
||||||
for _, group := range pkg.groups {
|
|
||||||
if group.IsMember(target) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func getLogicalProcessorInformationEx() ([]byte, error) {
|
|
||||||
buf := make([]byte, 1)
|
|
||||||
bufSize := len(buf)
|
|
||||||
ret, _, err := GetLogicalProcessorInformationEx.Call(
|
|
||||||
uintptr(RelationAll),
|
|
||||||
uintptr(unsafe.Pointer(&buf[0])),
|
|
||||||
uintptr(unsafe.Pointer(&bufSize)),
|
|
||||||
)
|
|
||||||
if ret != 0 {
|
|
||||||
return nil, fmt.Errorf("failed to determine size info ret:%d %w", ret, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
buf = make([]byte, bufSize)
|
|
||||||
ret, _, err = GetLogicalProcessorInformationEx.Call(
|
|
||||||
uintptr(RelationAll),
|
|
||||||
uintptr(unsafe.Pointer(&buf[0])),
|
|
||||||
uintptr(unsafe.Pointer(&bufSize)),
|
|
||||||
)
|
|
||||||
if ret == 0 {
|
|
||||||
return nil, fmt.Errorf("failed to gather processor information ret:%d buflen:%d %w", ret, bufSize, err)
|
|
||||||
}
|
|
||||||
return buf, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func processSystemLogicalProcessorInforationList(buf []byte) []*winPackage {
|
|
||||||
var slpi *SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
|
|
||||||
// Find all the packages first
|
|
||||||
packages := []*winPackage{}
|
|
||||||
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
|
||||||
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
|
||||||
if slpi.Relationship != RelationProcessorPackage {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
|
||||||
pkg := &winPackage{}
|
|
||||||
ga0 := unsafe.Pointer(&pr.GroupMask[0])
|
|
||||||
for j := range pr.GroupCount {
|
|
||||||
gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
|
|
||||||
pkg.groups = append(pkg.groups, gm)
|
|
||||||
}
|
|
||||||
packages = append(packages, pkg)
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Info("packages", "count", len(packages))
|
|
||||||
|
|
||||||
// To identify efficiency cores we have to compare the relative values
|
|
||||||
// Larger values are "less efficient" (aka, more performant)
|
|
||||||
var maxEfficiencyClass byte
|
|
||||||
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
|
||||||
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
|
||||||
if slpi.Relationship != RelationProcessorCore {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
|
||||||
if pr.EfficiencyClass > maxEfficiencyClass {
|
|
||||||
maxEfficiencyClass = pr.EfficiencyClass
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if maxEfficiencyClass > 0 {
|
|
||||||
slog.Info("efficiency cores detected", "maxEfficiencyClass", maxEfficiencyClass)
|
|
||||||
}
|
|
||||||
|
|
||||||
// then match up the Cores to the Packages, count up cores, threads and efficiency cores
|
|
||||||
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
|
||||||
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
|
||||||
if slpi.Relationship != RelationProcessorCore {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
|
||||||
ga0 := unsafe.Pointer(&pr.GroupMask[0])
|
|
||||||
for j := range pr.GroupCount {
|
|
||||||
gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
|
|
||||||
for _, pkg := range packages {
|
|
||||||
if pkg.IsMember(gm) {
|
|
||||||
pkg.coreCount++
|
|
||||||
if pr.Flags == 0 {
|
|
||||||
pkg.threadCount++
|
|
||||||
} else {
|
|
||||||
pkg.threadCount += 2
|
|
||||||
}
|
|
||||||
if pr.EfficiencyClass < maxEfficiencyClass {
|
|
||||||
pkg.efficiencyCoreCount++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sumarize the results
|
|
||||||
for i, pkg := range packages {
|
|
||||||
slog.Info("", "package", i, "cores", pkg.coreCount, "efficiency", pkg.efficiencyCoreCount, "threads", pkg.threadCount)
|
|
||||||
}
|
|
||||||
|
|
||||||
return packages
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetCPUDetails() ([]CPU, error) {
|
|
||||||
buf, err := getLogicalProcessorInformationEx()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
packages := processSystemLogicalProcessorInforationList(buf)
|
|
||||||
cpus := make([]CPU, len(packages))
|
|
||||||
|
|
||||||
for i, pkg := range packages {
|
|
||||||
cpus[i].CoreCount = pkg.coreCount
|
|
||||||
cpus[i].EfficiencyCoreCount = pkg.efficiencyCoreCount
|
|
||||||
cpus[i].ThreadCount = pkg.threadCount
|
|
||||||
}
|
|
||||||
return cpus, nil
|
|
||||||
}
|
|
||||||
File diff suppressed because one or more lines are too long
340
docs/api.md
340
docs/api.md
@@ -40,7 +40,6 @@ Generate a response for a given prompt with a provided model. This is a streamin
|
|||||||
|
|
||||||
- `model`: (required) the [model name](#model-names)
|
- `model`: (required) the [model name](#model-names)
|
||||||
- `prompt`: the prompt to generate a response for
|
- `prompt`: the prompt to generate a response for
|
||||||
- `suffix`: the text after the model response
|
|
||||||
- `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)
|
- `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)
|
||||||
|
|
||||||
Advanced parameters (optional):
|
Advanced parameters (optional):
|
||||||
@@ -58,8 +57,7 @@ Advanced parameters (optional):
|
|||||||
|
|
||||||
Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
|
Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
|
||||||
|
|
||||||
> [!IMPORTANT]
|
> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
|
||||||
> It's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
|
|
||||||
|
|
||||||
### Examples
|
### Examples
|
||||||
|
|
||||||
@@ -69,7 +67,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"prompt": "Why is the sky blue?"
|
"prompt": "Why is the sky blue?"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
@@ -80,7 +78,7 @@ A stream of JSON objects is returned:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||||
"response": "The",
|
"response": "The",
|
||||||
"done": false
|
"done": false
|
||||||
@@ -102,7 +100,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"response": "",
|
"response": "",
|
||||||
"done": true,
|
"done": true,
|
||||||
@@ -124,7 +122,7 @@ A response can be received in one reply when streaming is off.
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"prompt": "Why is the sky blue?",
|
"prompt": "Why is the sky blue?",
|
||||||
"stream": false
|
"stream": false
|
||||||
}'
|
}'
|
||||||
@@ -136,7 +134,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"response": "The sky is blue because it is the color of the sky.",
|
"response": "The sky is blue because it is the color of the sky.",
|
||||||
"done": true,
|
"done": true,
|
||||||
@@ -150,51 +148,15 @@ If `stream` is set to `false`, the response will be a single JSON object:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Request (with suffix)
|
|
||||||
|
|
||||||
##### Request
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl http://localhost:11434/api/generate -d '{
|
|
||||||
"model": "codellama:code",
|
|
||||||
"prompt": "def compute_gcd(a, b):",
|
|
||||||
"suffix": " return result",
|
|
||||||
"options": {
|
|
||||||
"temperature": 0
|
|
||||||
},
|
|
||||||
"stream": false
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Response
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"model": "codellama:code",
|
|
||||||
"created_at": "2024-07-22T20:47:51.147561Z",
|
|
||||||
"response": "\n if a == 0:\n return b\n else:\n return compute_gcd(b % a, a)\n\ndef compute_lcm(a, b):\n result = (a * b) / compute_gcd(a, b)\n",
|
|
||||||
"done": true,
|
|
||||||
"done_reason": "stop",
|
|
||||||
"context": [...],
|
|
||||||
"total_duration": 1162761250,
|
|
||||||
"load_duration": 6683708,
|
|
||||||
"prompt_eval_count": 17,
|
|
||||||
"prompt_eval_duration": 201222000,
|
|
||||||
"eval_count": 63,
|
|
||||||
"eval_duration": 953997000
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Request (JSON mode)
|
#### Request (JSON mode)
|
||||||
|
|
||||||
> [!IMPORTANT]
|
|
||||||
> When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.
|
> When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.
|
||||||
|
|
||||||
##### Request
|
##### Request
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"prompt": "What color is the sky at different times of the day? Respond using JSON",
|
"prompt": "What color is the sky at different times of the day? Respond using JSON",
|
||||||
"format": "json",
|
"format": "json",
|
||||||
"stream": false
|
"stream": false
|
||||||
@@ -205,7 +167,7 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"created_at": "2023-11-09T21:07:55.186497Z",
|
"created_at": "2023-11-09T21:07:55.186497Z",
|
||||||
"response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
|
"response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
|
||||||
"done": true,
|
"done": true,
|
||||||
@@ -327,7 +289,7 @@ If you want to set custom options for the model at runtime rather than in the Mo
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"prompt": "Why is the sky blue?",
|
"prompt": "Why is the sky blue?",
|
||||||
"stream": false,
|
"stream": false,
|
||||||
"options": {
|
"options": {
|
||||||
@@ -336,7 +298,6 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
"num_predict": 100,
|
"num_predict": 100,
|
||||||
"top_k": 20,
|
"top_k": 20,
|
||||||
"top_p": 0.9,
|
"top_p": 0.9,
|
||||||
"min_p": 0.0,
|
|
||||||
"tfs_z": 0.5,
|
"tfs_z": 0.5,
|
||||||
"typical_p": 0.7,
|
"typical_p": 0.7,
|
||||||
"repeat_last_n": 33,
|
"repeat_last_n": 33,
|
||||||
@@ -368,7 +329,7 @@ curl http://localhost:11434/api/generate -d '{
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"response": "The sky is blue because it is the color of the sky.",
|
"response": "The sky is blue because it is the color of the sky.",
|
||||||
"done": true,
|
"done": true,
|
||||||
@@ -390,7 +351,7 @@ If an empty prompt is provided, the model will be loaded into memory.
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2"
|
"model": "llama3"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -400,40 +361,13 @@ A single JSON object is returned:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"created_at": "2023-12-18T19:52:07.071755Z",
|
"created_at": "2023-12-18T19:52:07.071755Z",
|
||||||
"response": "",
|
"response": "",
|
||||||
"done": true
|
"done": true
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Unload a model
|
|
||||||
|
|
||||||
If an empty prompt is provided and the `keep_alive` parameter is set to `0`, a model will be unloaded from memory.
|
|
||||||
|
|
||||||
##### Request
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl http://localhost:11434/api/generate -d '{
|
|
||||||
"model": "llama3.2",
|
|
||||||
"keep_alive": 0
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Response
|
|
||||||
|
|
||||||
A single JSON object is returned:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"model": "llama3.2",
|
|
||||||
"created_at": "2024-09-12T03:54:03.516566Z",
|
|
||||||
"response": "",
|
|
||||||
"done": true,
|
|
||||||
"done_reason": "unload"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Generate a chat completion
|
## Generate a chat completion
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
@@ -446,14 +380,12 @@ Generate the next message in a chat with a provided model. This is a streaming e
|
|||||||
|
|
||||||
- `model`: (required) the [model name](#model-names)
|
- `model`: (required) the [model name](#model-names)
|
||||||
- `messages`: the messages of the chat, this can be used to keep a chat memory
|
- `messages`: the messages of the chat, this can be used to keep a chat memory
|
||||||
- `tools`: tools for the model to use if supported. Requires `stream` to be set to `false`
|
|
||||||
|
|
||||||
The `message` object has the following fields:
|
The `message` object has the following fields:
|
||||||
|
|
||||||
- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool`
|
- `role`: the role of the message, either `system`, `user` or `assistant`
|
||||||
- `content`: the content of the message
|
- `content`: the content of the message
|
||||||
- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
|
- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
|
||||||
- `tool_calls` (optional): a list of tools the model wants to use
|
|
||||||
|
|
||||||
Advanced parameters (optional):
|
Advanced parameters (optional):
|
||||||
|
|
||||||
@@ -472,7 +404,7 @@ Send a chat message with a streaming response.
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@@ -488,7 +420,7 @@ A stream of JSON objects is returned:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
@@ -503,7 +435,7 @@ Final response:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"total_duration": 4883583458,
|
"total_duration": 4883583458,
|
||||||
@@ -521,7 +453,7 @@ Final response:
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@@ -536,7 +468,7 @@ curl http://localhost:11434/api/chat -d '{
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "registry.ollama.ai/library/llama3:latest",
|
||||||
"created_at": "2023-12-12T14:13:43.416799Z",
|
"created_at": "2023-12-12T14:13:43.416799Z",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
@@ -560,7 +492,7 @@ Send a chat message with a conversation history. You can use this same approach
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@@ -584,7 +516,7 @@ A stream of JSON objects is returned:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
@@ -598,7 +530,7 @@ Final response:
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"created_at": "2023-08-04T19:22:45.499127Z",
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"total_duration": 8113331500,
|
"total_duration": 8113331500,
|
||||||
@@ -614,7 +546,7 @@ Final response:
|
|||||||
|
|
||||||
##### Request
|
##### Request
|
||||||
|
|
||||||
Send a chat message with images. The images should be provided as an array, with the individual images encoded in Base64.
|
Send a chat message with a conversation history.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
@@ -656,7 +588,7 @@ curl http://localhost:11434/api/chat -d '{
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@@ -674,7 +606,7 @@ curl http://localhost:11434/api/chat -d '{
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "llama3.2",
|
"model": "registry.ollama.ai/library/llama3:latest",
|
||||||
"created_at": "2023-12-12T14:13:43.416799Z",
|
"created_at": "2023-12-12T14:13:43.416799Z",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
@@ -690,137 +622,6 @@ curl http://localhost:11434/api/chat -d '{
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Chat request (with tools)
|
|
||||||
|
|
||||||
##### Request
|
|
||||||
|
|
||||||
```
|
|
||||||
curl http://localhost:11434/api/chat -d '{
|
|
||||||
"model": "llama3.2",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "What is the weather today in Paris?"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"stream": false,
|
|
||||||
"tools": [
|
|
||||||
{
|
|
||||||
"type": "function",
|
|
||||||
"function": {
|
|
||||||
"name": "get_current_weather",
|
|
||||||
"description": "Get the current weather for a location",
|
|
||||||
"parameters": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"location": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The location to get the weather for, e.g. San Francisco, CA"
|
|
||||||
},
|
|
||||||
"format": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
|
|
||||||
"enum": ["celsius", "fahrenheit"]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["location", "format"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Response
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"model": "llama3.2",
|
|
||||||
"created_at": "2024-07-22T20:33:28.123648Z",
|
|
||||||
"message": {
|
|
||||||
"role": "assistant",
|
|
||||||
"content": "",
|
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"function": {
|
|
||||||
"name": "get_current_weather",
|
|
||||||
"arguments": {
|
|
||||||
"format": "celsius",
|
|
||||||
"location": "Paris, FR"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"done_reason": "stop",
|
|
||||||
"done": true,
|
|
||||||
"total_duration": 885095291,
|
|
||||||
"load_duration": 3753500,
|
|
||||||
"prompt_eval_count": 122,
|
|
||||||
"prompt_eval_duration": 328493000,
|
|
||||||
"eval_count": 33,
|
|
||||||
"eval_duration": 552222000
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Load a model
|
|
||||||
|
|
||||||
If the messages array is empty, the model will be loaded into memory.
|
|
||||||
|
|
||||||
##### Request
|
|
||||||
|
|
||||||
```
|
|
||||||
curl http://localhost:11434/api/chat -d '{
|
|
||||||
"model": "llama3.2",
|
|
||||||
"messages": []
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Response
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"model": "llama3.2",
|
|
||||||
"created_at":"2024-09-12T21:17:29.110811Z",
|
|
||||||
"message": {
|
|
||||||
"role": "assistant",
|
|
||||||
"content": ""
|
|
||||||
},
|
|
||||||
"done_reason": "load",
|
|
||||||
"done": true
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Unload a model
|
|
||||||
|
|
||||||
If the messages array is empty and the `keep_alive` parameter is set to `0`, a model will be unloaded from memory.
|
|
||||||
|
|
||||||
##### Request
|
|
||||||
|
|
||||||
```
|
|
||||||
curl http://localhost:11434/api/chat -d '{
|
|
||||||
"model": "llama3.2",
|
|
||||||
"messages": [],
|
|
||||||
"keep_alive": 0
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Response
|
|
||||||
|
|
||||||
A single JSON object is returned:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"model": "llama3.2",
|
|
||||||
"created_at":"2024-09-12T21:33:17.547535Z",
|
|
||||||
"message": {
|
|
||||||
"role": "assistant",
|
|
||||||
"content": ""
|
|
||||||
},
|
|
||||||
"done_reason": "unload",
|
|
||||||
"done": true
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Create a Model
|
## Create a Model
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
@@ -989,7 +790,7 @@ Show information about a model including details, modelfile, template, parameter
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/show -d '{
|
curl http://localhost:11434/api/show -d '{
|
||||||
"name": "llama3.2"
|
"name": "llama3"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -1050,7 +851,7 @@ Copy a model. Creates a model with another name from an existing model.
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/copy -d '{
|
curl http://localhost:11434/api/copy -d '{
|
||||||
"source": "llama3.2",
|
"source": "llama3",
|
||||||
"destination": "llama3-backup"
|
"destination": "llama3-backup"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
@@ -1105,7 +906,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/pull -d '{
|
curl http://localhost:11434/api/pull -d '{
|
||||||
"name": "llama3.2"
|
"name": "llama3"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -1225,7 +1026,7 @@ If `stream` is set to `false`, then the response is a single JSON object:
|
|||||||
## Generate Embeddings
|
## Generate Embeddings
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
POST /api/embed
|
POST /api/embeddings
|
||||||
```
|
```
|
||||||
|
|
||||||
Generate embeddings from a model
|
Generate embeddings from a model
|
||||||
@@ -1233,11 +1034,10 @@ Generate embeddings from a model
|
|||||||
### Parameters
|
### Parameters
|
||||||
|
|
||||||
- `model`: name of model to generate embeddings from
|
- `model`: name of model to generate embeddings from
|
||||||
- `input`: text or list of text to generate embeddings for
|
- `prompt`: text to generate embeddings for
|
||||||
|
|
||||||
Advanced parameters:
|
Advanced parameters:
|
||||||
|
|
||||||
- `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true`
|
|
||||||
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
||||||
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
|
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
|
||||||
|
|
||||||
@@ -1246,9 +1046,9 @@ Advanced parameters:
|
|||||||
#### Request
|
#### Request
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/embed -d '{
|
curl http://localhost:11434/api/embeddings -d '{
|
||||||
"model": "all-minilm",
|
"model": "all-minilm",
|
||||||
"input": "Why is the sky blue?"
|
"prompt": "Here is an article about llamas..."
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -1256,38 +1056,10 @@ curl http://localhost:11434/api/embed -d '{
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "all-minilm",
|
"embedding": [
|
||||||
"embeddings": [[
|
0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
|
||||||
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
|
0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
|
||||||
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
|
]
|
||||||
]],
|
|
||||||
"total_duration": 14143917,
|
|
||||||
"load_duration": 1019500,
|
|
||||||
"prompt_eval_count": 8
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Request (Multiple input)
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl http://localhost:11434/api/embed -d '{
|
|
||||||
"model": "all-minilm",
|
|
||||||
"input": ["Why is the sky blue?", "Why is the grass green?"]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Response
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"model": "all-minilm",
|
|
||||||
"embeddings": [[
|
|
||||||
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
|
|
||||||
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
|
|
||||||
],[
|
|
||||||
-0.0098027075, 0.06042469, 0.025257962, -0.006364387, 0.07272725,
|
|
||||||
0.017194884, 0.09032035, -0.051705178, 0.09951512, 0.09072481
|
|
||||||
]]
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -1334,45 +1106,3 @@ A single JSON object will be returned.
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## Generate Embedding
|
|
||||||
|
|
||||||
> Note: this endpoint has been superseded by `/api/embed`
|
|
||||||
|
|
||||||
```shell
|
|
||||||
POST /api/embeddings
|
|
||||||
```
|
|
||||||
|
|
||||||
Generate embeddings from a model
|
|
||||||
|
|
||||||
### Parameters
|
|
||||||
|
|
||||||
- `model`: name of model to generate embeddings from
|
|
||||||
- `prompt`: text to generate embeddings for
|
|
||||||
|
|
||||||
Advanced parameters:
|
|
||||||
|
|
||||||
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
|
||||||
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
|
|
||||||
|
|
||||||
### Examples
|
|
||||||
|
|
||||||
#### Request
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl http://localhost:11434/api/embeddings -d '{
|
|
||||||
"model": "all-minilm",
|
|
||||||
"prompt": "Here is an article about llamas..."
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Response
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"embedding": [
|
|
||||||
0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
|
|
||||||
0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|||||||
@@ -2,13 +2,15 @@
|
|||||||
|
|
||||||
Install required tools:
|
Install required tools:
|
||||||
|
|
||||||
|
- cmake version 3.24 or higher
|
||||||
- go version 1.22 or higher
|
- go version 1.22 or higher
|
||||||
- gcc version 11.4.0 or higher
|
- gcc version 11.4.0 or higher
|
||||||
|
|
||||||
|
|
||||||
### MacOS
|
### MacOS
|
||||||
|
|
||||||
[Download Go](https://go.dev/dl/)
|
```bash
|
||||||
|
brew install go cmake gcc
|
||||||
|
```
|
||||||
|
|
||||||
Optionally enable debugging and more verbose logging:
|
Optionally enable debugging and more verbose logging:
|
||||||
|
|
||||||
@@ -20,10 +22,10 @@ export CGO_CFLAGS="-g"
|
|||||||
export OLLAMA_DEBUG=1
|
export OLLAMA_DEBUG=1
|
||||||
```
|
```
|
||||||
|
|
||||||
Get the required libraries and build the native LLM code: (Adjust the job count based on your number of processors for a faster build)
|
Get the required libraries and build the native LLM code:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
make -j 5
|
go generate ./...
|
||||||
```
|
```
|
||||||
|
|
||||||
Then build ollama:
|
Then build ollama:
|
||||||
@@ -38,17 +40,13 @@ Now you can run `ollama`:
|
|||||||
./ollama
|
./ollama
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Xcode 15 warnings
|
|
||||||
|
|
||||||
If you are using Xcode newer than version 14, you may see a warning during `go build` about `ld: warning: ignoring duplicate libraries: '-lobjc'` due to Golang issue https://github.com/golang/go/issues/67799 which can be safely ignored. You can suppress the warning with `export CGO_LDFLAGS="-Wl,-no_warn_duplicate_libraries"`
|
|
||||||
|
|
||||||
### Linux
|
### Linux
|
||||||
|
|
||||||
#### Linux CUDA (NVIDIA)
|
#### Linux CUDA (NVIDIA)
|
||||||
|
|
||||||
_Your operating system distribution may already have packages for NVIDIA CUDA. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!_
|
_Your operating system distribution may already have packages for NVIDIA CUDA. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!_
|
||||||
|
|
||||||
Install `make`, `gcc` and `golang` as well as [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads)
|
Install `cmake` and `golang` as well as [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads)
|
||||||
development and runtime packages.
|
development and runtime packages.
|
||||||
|
|
||||||
Typically the build scripts will auto-detect CUDA, however, if your Linux distro
|
Typically the build scripts will auto-detect CUDA, however, if your Linux distro
|
||||||
@@ -57,10 +55,10 @@ specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
|
|||||||
libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
|
libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
|
||||||
a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
|
a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
|
||||||
|
|
||||||
Then generate dependencies: (Adjust the job count based on your number of processors for a faster build)
|
Then generate dependencies:
|
||||||
|
|
||||||
```
|
```
|
||||||
make -j 5
|
go generate ./...
|
||||||
```
|
```
|
||||||
|
|
||||||
Then build the binary:
|
Then build the binary:
|
||||||
@@ -73,7 +71,7 @@ go build .
|
|||||||
|
|
||||||
_Your operating system distribution may already have packages for AMD ROCm and CLBlast. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!_
|
_Your operating system distribution may already have packages for AMD ROCm and CLBlast. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!_
|
||||||
|
|
||||||
Install [CLBlast](https://github.com/CNugteren/CLBlast/blob/master/doc/installation.md) and [ROCm](https://rocm.docs.amd.com/en/latest/) development packages first, as well as `make`, `gcc`, and `golang`.
|
Install [CLBlast](https://github.com/CNugteren/CLBlast/blob/master/doc/installation.md) and [ROCm](https://rocm.docs.amd.com/en/latest/) development packages first, as well as `cmake` and `golang`.
|
||||||
|
|
||||||
Typically the build scripts will auto-detect ROCm, however, if your Linux distro
|
Typically the build scripts will auto-detect ROCm, however, if your Linux distro
|
||||||
or installation approach uses unusual paths, you can specify the location by
|
or installation approach uses unusual paths, you can specify the location by
|
||||||
@@ -82,10 +80,8 @@ install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
|
|||||||
CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
|
CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
|
||||||
the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
|
the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
|
||||||
|
|
||||||
Then generate dependencies: (Adjust the job count based on your number of processors for a faster build)
|
|
||||||
|
|
||||||
```
|
```
|
||||||
make -j 5
|
go generate ./...
|
||||||
```
|
```
|
||||||
|
|
||||||
Then build the binary:
|
Then build the binary:
|
||||||
@@ -98,75 +94,57 @@ ROCm requires elevated privileges to access the GPU at runtime. On most distros
|
|||||||
|
|
||||||
#### Advanced CPU Settings
|
#### Advanced CPU Settings
|
||||||
|
|
||||||
By default, running `make` will compile a few different variations
|
By default, running `go generate ./...` will compile a few different variations
|
||||||
of the LLM library based on common CPU families and vector math capabilities,
|
of the LLM library based on common CPU families and vector math capabilities,
|
||||||
including a lowest-common-denominator which should run on almost any 64 bit CPU
|
including a lowest-common-denominator which should run on almost any 64 bit CPU
|
||||||
somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
|
somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
|
||||||
load.
|
load. If you would like to build a CPU-based build customized for your
|
||||||
|
processor, you can set `OLLAMA_CUSTOM_CPU_DEFS` to the llama.cpp flags you would
|
||||||
|
like to use. For example, to compile an optimized binary for an Intel i9-9880H,
|
||||||
|
you might use:
|
||||||
|
|
||||||
Custom CPU settings are not currently supported in the new Go server build but will be added back after we complete the transition.
|
```
|
||||||
|
OLLAMA_CUSTOM_CPU_DEFS="-DGGML_AVX=on -DGGML_AVX2=on -DGGML_F16C=on -DGGML_FMA=on" go generate ./...
|
||||||
|
go build .
|
||||||
|
```
|
||||||
|
|
||||||
#### Containerized Linux Build
|
#### Containerized Linux Build
|
||||||
|
|
||||||
If you have Docker available, you can build linux binaries with `OLLAMA_NEW_RUNNERS=1 ./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included. The resulting binary is placed in `./dist`
|
If you have Docker available, you can build linux binaries with `./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included. The resulting binary is placed in `./dist`
|
||||||
|
|
||||||
### Windows
|
### Windows
|
||||||
|
|
||||||
The following tools are required as a minimal development environment to build CPU inference support.
|
Note: The Windows build for Ollama is still under development.
|
||||||
|
|
||||||
|
First, install required tools:
|
||||||
|
|
||||||
|
- MSVC toolchain - C/C++ and cmake as minimal requirements
|
||||||
- Go version 1.22 or higher
|
- Go version 1.22 or higher
|
||||||
- https://go.dev/dl/
|
- MinGW (pick one variant) with GCC.
|
||||||
- Git
|
- [MinGW-w64](https://www.mingw-w64.org/)
|
||||||
- https://git-scm.com/download/win
|
|
||||||
- GCC and Make. There are multiple options on how to go about installing these tools on Windows. We have verified the following, but others may work as well:
|
|
||||||
- [MSYS2](https://www.msys2.org/)
|
- [MSYS2](https://www.msys2.org/)
|
||||||
- After installing, from an MSYS2 terminal, run `pacman -S mingw-w64-ucrt-x86_64-gcc make` to install the required tools
|
- The `ThreadJob` Powershell module: `Install-Module -Name ThreadJob -Scope CurrentUser`
|
||||||
- Assuming you used the default install prefix for msys2 above, add `c:\msys64\ucrt64\bin` and `c:\msys64\usr\bin` to your environment variable `PATH` where you will perform the build steps below (e.g. system-wide, account-level, powershell, cmd, etc.)
|
|
||||||
|
|
||||||
Then, build the `ollama` binary:
|
Then, build the `ollama` binary:
|
||||||
|
|
||||||
```powershell
|
```powershell
|
||||||
$env:CGO_ENABLED="1"
|
$env:CGO_ENABLED="1"
|
||||||
make -j 8
|
go generate ./...
|
||||||
go build .
|
go build .
|
||||||
```
|
```
|
||||||
|
|
||||||
#### GPU Support
|
|
||||||
|
|
||||||
The GPU tools require the Microsoft native build tools. To build either CUDA or ROCm, you must first install MSVC via Visual Studio:
|
|
||||||
|
|
||||||
- Make sure to select `Desktop development with C++` as a Workload during the Visual Studio install
|
|
||||||
- You must complete the Visual Studio install and run it once **BEFORE** installing CUDA or ROCm for the tools to properly register
|
|
||||||
- Add the location of the **64 bit (x64)** compiler (`cl.exe`) to your `PATH`
|
|
||||||
- Note: the default Developer Shell may configure the 32 bit (x86) compiler which will lead to build failures. Ollama requires a 64 bit toolchain.
|
|
||||||
|
|
||||||
#### Windows CUDA (NVIDIA)
|
#### Windows CUDA (NVIDIA)
|
||||||
|
|
||||||
In addition to the common Windows development tools and MSVC described above:
|
In addition to the common Windows development tools described above, install CUDA after installing MSVC.
|
||||||
|
|
||||||
- [NVIDIA CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html)
|
- [NVIDIA CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html)
|
||||||
|
|
||||||
|
|
||||||
#### Windows ROCm (AMD Radeon)
|
#### Windows ROCm (AMD Radeon)
|
||||||
|
|
||||||
In addition to the common Windows development tools and MSVC described above:
|
In addition to the common Windows development tools described above, install AMDs HIP package after installing MSVC.
|
||||||
|
|
||||||
- [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
|
- [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
|
||||||
|
- [Strawberry Perl](https://strawberryperl.com/)
|
||||||
|
|
||||||
#### Windows arm64
|
Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
|
||||||
|
|
||||||
The default `Developer PowerShell for VS 2022` may default to x86 which is not what you want. To ensure you get an arm64 development environment, start a plain PowerShell terminal and run:
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
import-module 'C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\Common7\\Tools\\Microsoft.VisualStudio.DevShell.dll'
|
|
||||||
Enter-VsDevShell -Arch arm64 -vsinstallpath 'C:\\Program Files\\Microsoft Visual Studio\\2022\\Community' -skipautomaticlocation
|
|
||||||
```
|
|
||||||
|
|
||||||
You can confirm with `write-host $env:VSCMD_ARG_TGT_ARCH`
|
|
||||||
|
|
||||||
Follow the instructions at https://www.msys2.org/wiki/arm64/ to set up an arm64 msys2 environment. Ollama requires gcc and mingw32-make to compile, which is not currently available on Windows arm64, but a gcc compatibility adapter is available via `mingw-w64-clang-aarch64-gcc-compat`. At a minimum you will need to install the following:
|
|
||||||
|
|
||||||
```
|
|
||||||
pacman -S mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-gcc-compat mingw-w64-clang-aarch64-make make
|
|
||||||
```
|
|
||||||
|
|
||||||
You will need to ensure your PATH includes go, cmake, gcc and clang mingw32-make to build ollama from source. (typically `C:\msys64\clangarm64\bin\`)
|
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 114
|
|||||||
Now you can run a model:
|
Now you can run a model:
|
||||||
|
|
||||||
```
|
```
|
||||||
docker exec -it ollama ollama run llama3.2
|
docker exec -it ollama ollama run llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
### Try different models
|
### Try different models
|
||||||
|
|||||||
31
docs/faq.md
31
docs/faq.md
@@ -32,7 +32,7 @@ When using the API, specify the `num_ctx` parameter:
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{
|
curl http://localhost:11434/api/generate -d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"prompt": "Why is the sky blue?",
|
"prompt": "Why is the sky blue?",
|
||||||
"options": {
|
"options": {
|
||||||
"num_ctx": 4096
|
"num_ctx": 4096
|
||||||
@@ -111,10 +111,7 @@ On Windows, Ollama inherits your user and system environment variables.
|
|||||||
|
|
||||||
## How do I use Ollama behind a proxy?
|
## How do I use Ollama behind a proxy?
|
||||||
|
|
||||||
Ollama pulls models from the Internet and may require a proxy server to access the models. Use `HTTPS_PROXY` to redirect outbound requests through the proxy. Ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform.
|
Ollama is compatible with proxy servers if `HTTP_PROXY` or `HTTPS_PROXY` are configured. When using either variables, ensure it is set where `ollama serve` can access the values. When using `HTTPS_PROXY`, ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform.
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> Avoid setting `HTTP_PROXY`. Ollama does not use HTTP for model pulls, only HTTPS. Setting `HTTP_PROXY` may interrupt client connections to the server.
|
|
||||||
|
|
||||||
### How do I use Ollama behind a proxy in Docker?
|
### How do I use Ollama behind a proxy in Docker?
|
||||||
|
|
||||||
@@ -194,8 +191,6 @@ Refer to the section [above](#how-do-i-configure-ollama-server) for how to set e
|
|||||||
|
|
||||||
If a different directory needs to be used, set the environment variable `OLLAMA_MODELS` to the chosen directory.
|
If a different directory needs to be used, set the environment variable `OLLAMA_MODELS` to the chosen directory.
|
||||||
|
|
||||||
> Note: on Linux using the standard installer, the `ollama` user needs read and write access to the specified directory. To assign the directory to the `ollama` user run `sudo chown -R ollama:ollama <directory>`.
|
|
||||||
|
|
||||||
Refer to the section [above](#how-do-i-configure-ollama-server) for how to set environment variables on your platform.
|
Refer to the section [above](#how-do-i-configure-ollama-server) for how to set environment variables on your platform.
|
||||||
|
|
||||||
## How can I use Ollama in Visual Studio Code?
|
## How can I use Ollama in Visual Studio Code?
|
||||||
@@ -232,18 +227,14 @@ curl http://localhost:11434/api/chat -d '{"model": "mistral"}'
|
|||||||
|
|
||||||
To preload a model using the CLI, use the command:
|
To preload a model using the CLI, use the command:
|
||||||
```shell
|
```shell
|
||||||
ollama run llama3.2 ""
|
ollama run llama3 ""
|
||||||
```
|
```
|
||||||
|
|
||||||
## How do I keep a model loaded in memory or make it unload immediately?
|
## How do I keep a model loaded in memory or make it unload immediately?
|
||||||
|
|
||||||
By default models are kept in memory for 5 minutes before being unloaded. This allows for quicker response times if you're making numerous requests to the LLM. If you want to immediately unload a model from memory, use the `ollama stop` command:
|
By default models are kept in memory for 5 minutes before being unloaded. This allows for quicker response times if you are making numerous requests to the LLM. You may, however, want to free up the memory before the 5 minutes have elapsed or keep the model loaded indefinitely. Use the `keep_alive` parameter with either the `/api/generate` and `/api/chat` API endpoints to control how long the model is left in memory.
|
||||||
|
|
||||||
```shell
|
The `keep_alive` parameter can be set to:
|
||||||
ollama stop llama3.2
|
|
||||||
```
|
|
||||||
|
|
||||||
If you're using the API, use the `keep_alive` parameter with the `/api/generate` and `/api/chat` endpoints to set the amount of time that a model stays in memory. The `keep_alive` parameter can be set to:
|
|
||||||
* a duration string (such as "10m" or "24h")
|
* a duration string (such as "10m" or "24h")
|
||||||
* a number in seconds (such as 3600)
|
* a number in seconds (such as 3600)
|
||||||
* any negative number which will keep the model loaded in memory (e.g. -1 or "-1m")
|
* any negative number which will keep the model loaded in memory (e.g. -1 or "-1m")
|
||||||
@@ -251,17 +242,17 @@ If you're using the API, use the `keep_alive` parameter with the `/api/generate`
|
|||||||
|
|
||||||
For example, to preload a model and leave it in memory use:
|
For example, to preload a model and leave it in memory use:
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{"model": "llama3.2", "keep_alive": -1}'
|
curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": -1}'
|
||||||
```
|
```
|
||||||
|
|
||||||
To unload the model and free up memory use:
|
To unload the model and free up memory use:
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:11434/api/generate -d '{"model": "llama3.2", "keep_alive": 0}'
|
curl http://localhost:11434/api/generate -d '{"model": "llama3", "keep_alive": 0}'
|
||||||
```
|
```
|
||||||
|
|
||||||
Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to the section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable.
|
Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable.
|
||||||
|
|
||||||
The `keep_alive` API parameter with the `/api/generate` and `/api/chat` API endpoints will override the `OLLAMA_KEEP_ALIVE` setting.
|
If you wish to override the `OLLAMA_KEEP_ALIVE` setting, use the `keep_alive` API parameter with the `/api/generate` or `/api/chat` API endpoints.
|
||||||
|
|
||||||
## How do I manage the maximum number of requests the Ollama server can queue?
|
## How do I manage the maximum number of requests the Ollama server can queue?
|
||||||
|
|
||||||
@@ -282,7 +273,3 @@ The following server settings may be used to adjust how Ollama handles concurren
|
|||||||
- `OLLAMA_MAX_QUEUE` - The maximum number of requests Ollama will queue when busy before rejecting additional requests. The default is 512
|
- `OLLAMA_MAX_QUEUE` - The maximum number of requests Ollama will queue when busy before rejecting additional requests. The default is 512
|
||||||
|
|
||||||
Note: Windows with Radeon GPUs currently default to 1 model maximum due to limitations in ROCm v5.7 for available VRAM reporting. Once ROCm v6.2 is available, Windows Radeon will follow the defaults above. You may enable concurrent model loads on Radeon on Windows, but ensure you don't load more models than will fit into your GPUs VRAM.
|
Note: Windows with Radeon GPUs currently default to 1 model maximum due to limitations in ROCm v5.7 for available VRAM reporting. Once ROCm v6.2 is available, Windows Radeon will follow the defaults above. You may enable concurrent model loads on Radeon on Windows, but ensure you don't load more models than will fit into your GPUs VRAM.
|
||||||
|
|
||||||
## How does Ollama load models on multiple GPUs?
|
|
||||||
|
|
||||||
Installing multiple GPUs of the same brand can be a great way to increase your available VRAM to load larger models. When you load a new model, Ollama evaluates the required VRAM for the model against what is currently available. If the model will entirely fit on any single GPU, Ollama will load the model on that GPU. This typically provides the best performance as it reduces the amount of data transfering across the PCI bus during inference. If the model does not fit entirely on one GPU, then it will be spread across all the available GPUs.
|
|
||||||
|
|||||||
17
docs/gpu.md
17
docs/gpu.md
@@ -10,7 +10,7 @@ Check your compute compatibility to see if your card is supported:
|
|||||||
| 9.0 | NVIDIA | `H100` |
|
| 9.0 | NVIDIA | `H100` |
|
||||||
| 8.9 | GeForce RTX 40xx | `RTX 4090` `RTX 4080 SUPER` `RTX 4080` `RTX 4070 Ti SUPER` `RTX 4070 Ti` `RTX 4070 SUPER` `RTX 4070` `RTX 4060 Ti` `RTX 4060` |
|
| 8.9 | GeForce RTX 40xx | `RTX 4090` `RTX 4080 SUPER` `RTX 4080` `RTX 4070 Ti SUPER` `RTX 4070 Ti` `RTX 4070 SUPER` `RTX 4070` `RTX 4060 Ti` `RTX 4060` |
|
||||||
| | NVIDIA Professional | `L4` `L40` `RTX 6000` |
|
| | NVIDIA Professional | `L4` `L40` `RTX 6000` |
|
||||||
| 8.6 | GeForce RTX 30xx | `RTX 3090 Ti` `RTX 3090` `RTX 3080 Ti` `RTX 3080` `RTX 3070 Ti` `RTX 3070` `RTX 3060 Ti` `RTX 3060` `RTX 3050 Ti` `RTX 3050` |
|
| 8.6 | GeForce RTX 30xx | `RTX 3090 Ti` `RTX 3090` `RTX 3080 Ti` `RTX 3080` `RTX 3070 Ti` `RTX 3070` `RTX 3060 Ti` `RTX 3060` |
|
||||||
| | NVIDIA Professional | `A40` `RTX A6000` `RTX A5000` `RTX A4000` `RTX A3000` `RTX A2000` `A10` `A16` `A2` |
|
| | NVIDIA Professional | `A40` `RTX A6000` `RTX A5000` `RTX A4000` `RTX A3000` `RTX A2000` `A10` `A16` `A2` |
|
||||||
| 8.0 | NVIDIA | `A100` `A30` |
|
| 8.0 | NVIDIA | `A100` `A30` |
|
||||||
| 7.5 | GeForce GTX/RTX | `GTX 1650 Ti` `TITAN RTX` `RTX 2080 Ti` `RTX 2080` `RTX 2070` `RTX 2060` |
|
| 7.5 | GeForce GTX/RTX | `GTX 1650 Ti` `TITAN RTX` `RTX 2080 Ti` `RTX 2080` `RTX 2070` `RTX 2060` |
|
||||||
@@ -46,24 +46,13 @@ sudo modprobe nvidia_uvm`
|
|||||||
|
|
||||||
## AMD Radeon
|
## AMD Radeon
|
||||||
Ollama supports the following AMD GPUs:
|
Ollama supports the following AMD GPUs:
|
||||||
|
|
||||||
### Linux Support
|
|
||||||
| Family | Cards and accelerators |
|
| Family | Cards and accelerators |
|
||||||
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
|
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| AMD Radeon RX | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800` `Vega 64` `Vega 56` |
|
| AMD Radeon RX | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800` `Vega 64` `Vega 56` |
|
||||||
| AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` `V420` `V340` `V320` `Vega II Duo` `Vega II` `VII` `SSG` |
|
| AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` `V420` `V340` `V320` `Vega II Duo` `Vega II` `VII` `SSG` |
|
||||||
| AMD Instinct | `MI300X` `MI300A` `MI300` `MI250X` `MI250` `MI210` `MI200` `MI100` `MI60` `MI50` |
|
| AMD Instinct | `MI300X` `MI300A` `MI300` `MI250X` `MI250` `MI210` `MI200` `MI100` `MI60` `MI50` |
|
||||||
|
|
||||||
### Windows Support
|
### Overrides
|
||||||
With ROCm v6.1, the following GPUs are supported on Windows.
|
|
||||||
|
|
||||||
| Family | Cards and accelerators |
|
|
||||||
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
||||||
| AMD Radeon RX | `7900 XTX` `7900 XT` `7900 GRE` `7800 XT` `7700 XT` `7600 XT` `7600` `6950 XT` `6900 XTX` `6900XT` `6800 XT` `6800` |
|
|
||||||
| AMD Radeon PRO | `W7900` `W7800` `W7700` `W7600` `W7500` `W6900X` `W6800X Duo` `W6800X` `W6800` `V620` |
|
|
||||||
|
|
||||||
|
|
||||||
### Overrides on Linux
|
|
||||||
Ollama leverages the AMD ROCm library, which does not support all AMD GPUs. In
|
Ollama leverages the AMD ROCm library, which does not support all AMD GPUs. In
|
||||||
some cases you can force the system to try to use a similar LLVM target that is
|
some cases you can force the system to try to use a similar LLVM target that is
|
||||||
close. For example The Radeon RX 5400 is `gfx1034` (also known as 10.3.4)
|
close. For example The Radeon RX 5400 is `gfx1034` (also known as 10.3.4)
|
||||||
@@ -74,7 +63,7 @@ would set `HSA_OVERRIDE_GFX_VERSION="10.3.0"` as an environment variable for the
|
|||||||
server. If you have an unsupported AMD GPU you can experiment using the list of
|
server. If you have an unsupported AMD GPU you can experiment using the list of
|
||||||
supported types below.
|
supported types below.
|
||||||
|
|
||||||
At this time, the known supported GPU types on linux are the following LLVM Targets.
|
At this time, the known supported GPU types are the following LLVM Targets.
|
||||||
This table shows some example GPUs that map to these LLVM targets:
|
This table shows some example GPUs that map to these LLVM targets:
|
||||||
| **LLVM Target** | **An Example GPU** |
|
| **LLVM Target** | **An Example GPU** |
|
||||||
|-----------------|---------------------|
|
|-----------------|---------------------|
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 150 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 80 KiB |
188
docs/import.md
188
docs/import.md
@@ -1,129 +1,42 @@
|
|||||||
# Importing a model
|
# Import
|
||||||
|
|
||||||
## Table of Contents
|
GGUF models and select Safetensors models can be imported directly into Ollama.
|
||||||
|
|
||||||
* [Importing a Safetensors adapter](#Importing-a-fine-tuned-adapter-from-Safetensors-weights)
|
## Import GGUF
|
||||||
* [Importing a Safetensors model](#Importing-a-model-from-Safetensors-weights)
|
|
||||||
* [Importing a GGUF file](#Importing-a-GGUF-based-model-or-adapter)
|
|
||||||
* [Sharing models on ollama.com](#Sharing-your-model-on-ollamacom)
|
|
||||||
|
|
||||||
## Importing a fine tuned adapter from Safetensors weights
|
A binary GGUF file can be imported directly into Ollama through a Modelfile.
|
||||||
|
|
||||||
First, create a `Modelfile` with a `FROM` command pointing at the base model you used for fine tuning, and an `ADAPTER` command which points to the directory with your Safetensors adapter:
|
|
||||||
|
|
||||||
```dockerfile
|
|
||||||
FROM <base model name>
|
|
||||||
ADAPTER /path/to/safetensors/adapter/directory
|
|
||||||
```
|
|
||||||
|
|
||||||
Make sure that you use the same base model in the `FROM` command as you used to create the adapter otherwise you will get erratic results. Most frameworks use different quantization methods, so it's best to use non-quantized (i.e. non-QLoRA) adapters. If your adapter is in the same directory as your `Modelfile`, use `ADAPTER .` to specify the adapter path.
|
|
||||||
|
|
||||||
Now run `ollama create` from the directory where the `Modelfile` was created:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
ollama create my-model
|
|
||||||
```
|
|
||||||
|
|
||||||
Lastly, test the model:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
ollama run my-model
|
|
||||||
```
|
|
||||||
|
|
||||||
Ollama supports importing adapters based on several different model architectures including:
|
|
||||||
|
|
||||||
* Llama (including Llama 2, Llama 3, and Llama 3.1);
|
|
||||||
* Mistral (including Mistral 1, Mistral 2, and Mixtral); and
|
|
||||||
* Gemma (including Gemma 1 and Gemma 2)
|
|
||||||
|
|
||||||
You can create the adapter using a fine tuning framework or tool which can output adapters in the Safetensors format, such as:
|
|
||||||
|
|
||||||
* Hugging Face [fine tuning framework](https://huggingface.co/docs/transformers/en/training)
|
|
||||||
* [Unsloth](https://github.com/unslothai/unsloth)
|
|
||||||
* [MLX](https://github.com/ml-explore/mlx)
|
|
||||||
|
|
||||||
|
|
||||||
## Importing a model from Safetensors weights
|
|
||||||
|
|
||||||
First, create a `Modelfile` with a `FROM` command which points to the directory containing your Safetensors weights:
|
|
||||||
|
|
||||||
```dockerfile
|
|
||||||
FROM /path/to/safetensors/directory
|
|
||||||
```
|
|
||||||
|
|
||||||
If you create the Modelfile in the same directory as the weights, you can use the command `FROM .`.
|
|
||||||
|
|
||||||
Now run the `ollama create` command from the directory where you created the `Modelfile`:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
ollama create my-model
|
|
||||||
```
|
|
||||||
|
|
||||||
Lastly, test the model:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
ollama run my-model
|
|
||||||
```
|
|
||||||
|
|
||||||
Ollama supports importing models for several different architectures including:
|
|
||||||
|
|
||||||
* Llama (including Llama 2, Llama 3, and Llama 3.1);
|
|
||||||
* Mistral (including Mistral 1, Mistral 2, and Mixtral);
|
|
||||||
* Gemma (including Gemma 1 and Gemma 2); and
|
|
||||||
* Phi3
|
|
||||||
|
|
||||||
This includes importing foundation models as well as any fine tuned models which which have been _fused_ with a foundation model.
|
|
||||||
|
|
||||||
|
|
||||||
## Importing a GGUF based model or adapter
|
|
||||||
|
|
||||||
If you have a GGUF based model or adapter it is possible to import it into Ollama. You can obtain a GGUF model or adapter by:
|
|
||||||
|
|
||||||
* converting a Safetensors model with the `convert_hf_to_gguf.py` from Llama.cpp;
|
|
||||||
* converting a Safetensors adapter with the `convert_lora_to_gguf.py` from Llama.cpp; or
|
|
||||||
* downloading a model or adapter from a place such as HuggingFace
|
|
||||||
|
|
||||||
To import a GGUF model, create a `Modelfile` containg:
|
|
||||||
|
|
||||||
```dockerfile
|
```dockerfile
|
||||||
FROM /path/to/file.gguf
|
FROM /path/to/file.gguf
|
||||||
```
|
```
|
||||||
|
|
||||||
For a GGUF adapter, create the `Modelfile` with:
|
## Import Safetensors
|
||||||
|
|
||||||
|
If the model being imported is one of these architectures, it can be imported directly into Ollama through a Modelfile:
|
||||||
|
|
||||||
|
- LlamaForCausalLM
|
||||||
|
- MistralForCausalLM
|
||||||
|
- GemmaForCausalLM
|
||||||
|
|
||||||
```dockerfile
|
```dockerfile
|
||||||
FROM <model name>
|
FROM /path/to/safetensors/directory
|
||||||
ADAPTER /path/to/file.gguf
|
|
||||||
```
|
```
|
||||||
|
|
||||||
When importing a GGUF adapter, it's important to use the same base model as the base model that the adapter was created with. You can use:
|
For architectures not directly convertable by Ollama, see llama.cpp's [guide](https://github.com/ggerganov/llama.cpp/blob/master/README.md#prepare-and-quantize) on conversion. After conversion, see [Import GGUF](#import-gguf).
|
||||||
|
|
||||||
* a model from Ollama
|
## Automatic Quantization
|
||||||
* a GGUF file
|
|
||||||
* a Safetensors based model
|
|
||||||
|
|
||||||
Once you have created your `Modelfile`, use the `ollama create` command to build the model.
|
> [!NOTE]
|
||||||
|
> Automatic quantization requires v0.1.35 or higher.
|
||||||
|
|
||||||
```shell
|
Ollama is capable of quantizing FP16 or FP32 models to any of the supported quantizations with the `-q/--quantize` flag in `ollama create`.
|
||||||
ollama create my-model
|
|
||||||
```
|
|
||||||
|
|
||||||
## Quantizing a Model
|
|
||||||
|
|
||||||
Quantizing a model allows you to run models faster and with less memory consumption but at reduced accuracy. This allows you to run a model on more modest hardware.
|
|
||||||
|
|
||||||
Ollama can quantize FP16 and FP32 based models into different quantization levels using the `-q/--quantize` flag with the `ollama create` command.
|
|
||||||
|
|
||||||
First, create a Modelfile with the FP16 or FP32 based model you wish to quantize.
|
|
||||||
|
|
||||||
```dockerfile
|
```dockerfile
|
||||||
FROM /path/to/my/gemma/f16/model
|
FROM /path/to/my/gemma/f16/model
|
||||||
```
|
```
|
||||||
|
|
||||||
Use `ollama create` to then create the quantized model.
|
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ ollama create --quantize q4_K_M mymodel
|
$ ollama create -q Q4_K_M mymodel
|
||||||
transferring model data
|
transferring model data
|
||||||
quantizing F16 model to Q4_K_M
|
quantizing F16 model to Q4_K_M
|
||||||
creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd
|
creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd
|
||||||
@@ -134,53 +47,42 @@ success
|
|||||||
|
|
||||||
### Supported Quantizations
|
### Supported Quantizations
|
||||||
|
|
||||||
- `q4_0`
|
- `Q4_0`
|
||||||
- `q4_1`
|
- `Q4_1`
|
||||||
- `q5_0`
|
- `Q5_0`
|
||||||
- `q5_1`
|
- `Q5_1`
|
||||||
- `q8_0`
|
- `Q8_0`
|
||||||
|
|
||||||
#### K-means Quantizations
|
#### K-means Quantizations
|
||||||
|
|
||||||
- `q3_K_S`
|
- `Q3_K_S`
|
||||||
- `q3_K_M`
|
- `Q3_K_M`
|
||||||
- `q3_K_L`
|
- `Q3_K_L`
|
||||||
- `q4_K_S`
|
- `Q4_K_S`
|
||||||
- `q4_K_M`
|
- `Q4_K_M`
|
||||||
- `q5_K_S`
|
- `Q5_K_S`
|
||||||
- `q5_K_M`
|
- `Q5_K_M`
|
||||||
- `q6_K`
|
- `Q6_K`
|
||||||
|
|
||||||
|
## Template Detection
|
||||||
|
|
||||||
## Sharing your model on ollama.com
|
> [!NOTE]
|
||||||
|
> Template detection requires v0.1.42 or higher.
|
||||||
|
|
||||||
You can share any model you have created by pushing it to [ollama.com](https://ollama.com) so that other users can try it out.
|
Ollama uses model metadata, specifically `tokenizer.chat_template`, to automatically create a template appropriate for the model you're importing.
|
||||||
|
|
||||||
First, use your browser to go to the [Ollama Sign-Up](https://ollama.com/signup) page. If you already have an account, you can skip this step.
|
```dockerfile
|
||||||
|
FROM /path/to/my/gemma/model
|
||||||
<img src="images/signup.png" alt="Sign-Up" width="40%">
|
|
||||||
|
|
||||||
The `Username` field will be used as part of your model's name (e.g. `jmorganca/mymodel`), so make sure you are comfortable with the username that you have selected.
|
|
||||||
|
|
||||||
Now that you have created an account and are signed-in, go to the [Ollama Keys Settings](https://ollama.com/settings/keys) page.
|
|
||||||
|
|
||||||
Follow the directions on the page to determine where your Ollama Public Key is located.
|
|
||||||
|
|
||||||
<img src="images/ollama-keys.png" alt="Ollama Keys" width="80%">
|
|
||||||
|
|
||||||
Click on the `Add Ollama Public Key` button, and copy and paste the contents of your Ollama Public Key into the text field.
|
|
||||||
|
|
||||||
To push a model to [ollama.com](https://ollama.com), first make sure that it is named correctly with your username. You may have to use the `ollama cp` command to copy
|
|
||||||
your model to give it the correct name. Once you're happy with your model's name, use the `ollama push` command to push it to [ollama.com](https://ollama.com).
|
|
||||||
|
|
||||||
```shell
|
|
||||||
ollama cp mymodel myuser/mymodel
|
|
||||||
ollama push myuser/mymodel
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Once your model has been pushed, other users can pull and run it by using the command:
|
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
ollama run myuser/mymodel
|
$ ollama create mymodel
|
||||||
|
transferring model data
|
||||||
|
using autodetected template gemma-instruct
|
||||||
|
creating new layer sha256:baa2a0edc27d19cc6b7537578a9a7ba1a4e3214dc185ed5ae43692b319af7b84
|
||||||
|
creating new layer sha256:ba66c3309914dbef07e5149a648fd1877f030d337a4f240d444ea335008943cb
|
||||||
|
writing manifest
|
||||||
|
success
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Defining a template in the Modelfile will disable this feature which may be useful if you want to use a different template than the autodetected one.
|
||||||
|
|||||||
111
docs/linux.md
111
docs/linux.md
@@ -1,59 +1,40 @@
|
|||||||
# Linux
|
# Ollama on Linux
|
||||||
|
|
||||||
## Install
|
## Install
|
||||||
|
|
||||||
To install Ollama, run the following command:
|
Install Ollama running this one-liner:
|
||||||
|
|
||||||
```shell
|
>
|
||||||
|
|
||||||
|
```bash
|
||||||
curl -fsSL https://ollama.com/install.sh | sh
|
curl -fsSL https://ollama.com/install.sh | sh
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## AMD Radeon GPU support
|
||||||
|
|
||||||
|
While AMD has contributed the `amdgpu` driver upstream to the official linux
|
||||||
|
kernel source, the version is older and may not support all ROCm features. We
|
||||||
|
recommend you install the latest driver from
|
||||||
|
https://www.amd.com/en/support/linux-drivers for best support of your Radeon
|
||||||
|
GPU.
|
||||||
|
|
||||||
## Manual install
|
## Manual install
|
||||||
|
|
||||||
Download and extract the package:
|
### Download the `ollama` binary
|
||||||
|
|
||||||
```shell
|
Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:
|
||||||
curl -L https://ollama.com/download/ollama-linux-amd64.tgz -o ollama-linux-amd64.tgz
|
|
||||||
sudo tar -C /usr -xzf ollama-linux-amd64.tgz
|
|
||||||
```
|
|
||||||
|
|
||||||
Start Ollama:
|
```bash
|
||||||
|
sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
|
||||||
```shell
|
sudo chmod +x /usr/bin/ollama
|
||||||
ollama serve
|
|
||||||
```
|
|
||||||
|
|
||||||
In another terminal, verify that Ollama is running:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
ollama -v
|
|
||||||
```
|
|
||||||
|
|
||||||
### AMD GPU install
|
|
||||||
|
|
||||||
If you have an AMD GPU, also download and extract the additional ROCm package:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl -L https://ollama.com/download/ollama-linux-amd64-rocm.tgz -o ollama-linux-amd64-rocm.tgz
|
|
||||||
sudo tar -C /usr -xzf ollama-linux-amd64-rocm.tgz
|
|
||||||
```
|
|
||||||
|
|
||||||
### ARM64 install
|
|
||||||
|
|
||||||
Download and extract the ARM64-specific package:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl -L https://ollama.com/download/ollama-linux-arm64.tgz -o ollama-linux-arm64.tgz
|
|
||||||
sudo tar -C /usr -xzf ollama-linux-arm64.tgz
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Adding Ollama as a startup service (recommended)
|
### Adding Ollama as a startup service (recommended)
|
||||||
|
|
||||||
Create a user and group for Ollama:
|
Create a user for Ollama:
|
||||||
|
|
||||||
```shell
|
```bash
|
||||||
sudo useradd -r -s /bin/false -U -m -d /usr/share/ollama ollama
|
sudo useradd -r -s /bin/false -m -d /usr/share/ollama ollama
|
||||||
sudo usermod -a -G ollama $(whoami)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Create a service file in `/etc/systemd/system/ollama.service`:
|
Create a service file in `/etc/systemd/system/ollama.service`:
|
||||||
@@ -69,7 +50,6 @@ User=ollama
|
|||||||
Group=ollama
|
Group=ollama
|
||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=3
|
RestartSec=3
|
||||||
Environment="PATH=$PATH"
|
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=default.target
|
WantedBy=default.target
|
||||||
@@ -77,54 +57,47 @@ WantedBy=default.target
|
|||||||
|
|
||||||
Then start the service:
|
Then start the service:
|
||||||
|
|
||||||
```shell
|
```bash
|
||||||
sudo systemctl daemon-reload
|
sudo systemctl daemon-reload
|
||||||
sudo systemctl enable ollama
|
sudo systemctl enable ollama
|
||||||
```
|
```
|
||||||
|
|
||||||
### Install CUDA drivers (optional)
|
### Install CUDA drivers (optional – for Nvidia GPUs)
|
||||||
|
|
||||||
[Download and install](https://developer.nvidia.com/cuda-downloads) CUDA.
|
[Download and install](https://developer.nvidia.com/cuda-downloads) CUDA.
|
||||||
|
|
||||||
Verify that the drivers are installed by running the following command, which should print details about your GPU:
|
Verify that the drivers are installed by running the following command, which should print details about your GPU:
|
||||||
|
|
||||||
```shell
|
```bash
|
||||||
nvidia-smi
|
nvidia-smi
|
||||||
```
|
```
|
||||||
|
|
||||||
### Install AMD ROCm drivers (optional)
|
### Install ROCm (optional - for Radeon GPUs)
|
||||||
|
[Download and Install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html)
|
||||||
|
|
||||||
[Download and Install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html) ROCm v6.
|
Make sure to install ROCm v6
|
||||||
|
|
||||||
### Start Ollama
|
### Start Ollama
|
||||||
|
|
||||||
Start Ollama and verify it is running:
|
Start Ollama using `systemd`:
|
||||||
|
|
||||||
```shell
|
```bash
|
||||||
sudo systemctl start ollama
|
sudo systemctl start ollama
|
||||||
sudo systemctl status ollama
|
|
||||||
```
|
```
|
||||||
|
|
||||||
> [!NOTE]
|
## Update
|
||||||
> While AMD has contributed the `amdgpu` driver upstream to the official linux
|
|
||||||
> kernel source, the version is older and may not support all ROCm features. We
|
|
||||||
> recommend you install the latest driver from
|
|
||||||
> https://www.amd.com/en/support/linux-drivers for best support of your Radeon
|
|
||||||
> GPU.
|
|
||||||
|
|
||||||
## Updating
|
Update ollama by running the install script again:
|
||||||
|
|
||||||
Update Ollama by running the install script again:
|
```bash
|
||||||
|
|
||||||
```shell
|
|
||||||
curl -fsSL https://ollama.com/install.sh | sh
|
curl -fsSL https://ollama.com/install.sh | sh
|
||||||
```
|
```
|
||||||
|
|
||||||
Or by re-downloading Ollama:
|
Or by downloading the ollama binary:
|
||||||
|
|
||||||
```shell
|
```bash
|
||||||
curl -L https://ollama.com/download/ollama-linux-amd64.tgz -o ollama-linux-amd64.tgz
|
sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
|
||||||
sudo tar -C /usr -xzf ollama-linux-amd64.tgz
|
sudo chmod +x /usr/bin/ollama
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installing specific versions
|
## Installing specific versions
|
||||||
@@ -133,15 +106,15 @@ Use `OLLAMA_VERSION` environment variable with the install script to install a s
|
|||||||
|
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
```shell
|
```
|
||||||
curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.3.9 sh
|
curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.1.32 sh
|
||||||
```
|
```
|
||||||
|
|
||||||
## Viewing logs
|
## Viewing logs
|
||||||
|
|
||||||
To view logs of Ollama running as a startup service, run:
|
To view logs of Ollama running as a startup service, run:
|
||||||
|
|
||||||
```shell
|
```bash
|
||||||
journalctl -e -u ollama
|
journalctl -e -u ollama
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -149,7 +122,7 @@ journalctl -e -u ollama
|
|||||||
|
|
||||||
Remove the ollama service:
|
Remove the ollama service:
|
||||||
|
|
||||||
```shell
|
```bash
|
||||||
sudo systemctl stop ollama
|
sudo systemctl stop ollama
|
||||||
sudo systemctl disable ollama
|
sudo systemctl disable ollama
|
||||||
sudo rm /etc/systemd/system/ollama.service
|
sudo rm /etc/systemd/system/ollama.service
|
||||||
@@ -157,13 +130,13 @@ sudo rm /etc/systemd/system/ollama.service
|
|||||||
|
|
||||||
Remove the ollama binary from your bin directory (either `/usr/local/bin`, `/usr/bin`, or `/bin`):
|
Remove the ollama binary from your bin directory (either `/usr/local/bin`, `/usr/bin`, or `/bin`):
|
||||||
|
|
||||||
```shell
|
```bash
|
||||||
sudo rm $(which ollama)
|
sudo rm $(which ollama)
|
||||||
```
|
```
|
||||||
|
|
||||||
Remove the downloaded models and Ollama service user and group:
|
Remove the downloaded models and Ollama service user and group:
|
||||||
|
|
||||||
```shell
|
```bash
|
||||||
sudo rm -r /usr/share/ollama
|
sudo rm -r /usr/share/ollama
|
||||||
sudo userdel ollama
|
sudo userdel ollama
|
||||||
sudo groupdel ollama
|
sudo groupdel ollama
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
# Ollama Model File
|
# Ollama Model File
|
||||||
|
|
||||||
> [!NOTE]
|
> Note: `Modelfile` syntax is in development
|
||||||
> `Modelfile` syntax is in development
|
|
||||||
|
|
||||||
A model file is the blueprint to create and share models with Ollama.
|
A model file is the blueprint to create and share models with Ollama.
|
||||||
|
|
||||||
@@ -11,9 +10,8 @@ A model file is the blueprint to create and share models with Ollama.
|
|||||||
- [Examples](#examples)
|
- [Examples](#examples)
|
||||||
- [Instructions](#instructions)
|
- [Instructions](#instructions)
|
||||||
- [FROM (Required)](#from-required)
|
- [FROM (Required)](#from-required)
|
||||||
- [Build from existing model](#build-from-existing-model)
|
- [Build from llama3](#build-from-llama3)
|
||||||
- [Build from a Safetensors model](#build-from-a-safetensors-model)
|
- [Build from a bin file](#build-from-a-bin-file)
|
||||||
- [Build from a GGUF file](#build-from-a-gguf-file)
|
|
||||||
- [PARAMETER](#parameter)
|
- [PARAMETER](#parameter)
|
||||||
- [Valid Parameters and Values](#valid-parameters-and-values)
|
- [Valid Parameters and Values](#valid-parameters-and-values)
|
||||||
- [TEMPLATE](#template)
|
- [TEMPLATE](#template)
|
||||||
@@ -50,7 +48,7 @@ INSTRUCTION arguments
|
|||||||
An example of a `Modelfile` creating a mario blueprint:
|
An example of a `Modelfile` creating a mario blueprint:
|
||||||
|
|
||||||
```modelfile
|
```modelfile
|
||||||
FROM llama3.2
|
FROM llama3
|
||||||
# sets the temperature to 1 [higher is more creative, lower is more coherent]
|
# sets the temperature to 1 [higher is more creative, lower is more coherent]
|
||||||
PARAMETER temperature 1
|
PARAMETER temperature 1
|
||||||
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
|
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
|
||||||
@@ -72,10 +70,10 @@ More examples are available in the [examples directory](../examples).
|
|||||||
To view the Modelfile of a given model, use the `ollama show --modelfile` command.
|
To view the Modelfile of a given model, use the `ollama show --modelfile` command.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
> ollama show --modelfile llama3.2
|
> ollama show --modelfile llama3
|
||||||
# Modelfile generated by "ollama show"
|
# Modelfile generated by "ollama show"
|
||||||
# To build a new Modelfile based on this one, replace the FROM line with:
|
# To build a new Modelfile based on this one, replace the FROM line with:
|
||||||
# FROM llama3.2:latest
|
# FROM llama3:latest
|
||||||
FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
|
FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
|
||||||
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
||||||
|
|
||||||
@@ -100,39 +98,22 @@ The `FROM` instruction defines the base model to use when creating a model.
|
|||||||
FROM <model name>:<tag>
|
FROM <model name>:<tag>
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Build from existing model
|
#### Build from llama3
|
||||||
|
|
||||||
```modelfile
|
```modelfile
|
||||||
FROM llama3.2
|
FROM llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
A list of available base models:
|
A list of available base models:
|
||||||
<https://github.com/ollama/ollama#model-library>
|
<https://github.com/ollama/ollama#model-library>
|
||||||
Additional models can be found at:
|
|
||||||
<https://ollama.com/library>
|
|
||||||
|
|
||||||
#### Build from a Safetensors model
|
#### Build from a `bin` file
|
||||||
|
|
||||||
```modelfile
|
```modelfile
|
||||||
FROM <model directory>
|
FROM ./ollama-model.bin
|
||||||
```
|
```
|
||||||
|
|
||||||
The model directory should contain the Safetensors weights for a supported architecture.
|
This bin file location should be specified as an absolute path or relative to the `Modelfile` location.
|
||||||
|
|
||||||
Currently supported model architectures:
|
|
||||||
* Llama (including Llama 2, Llama 3, and Llama 3.1)
|
|
||||||
* Mistral (including Mistral 1, Mistral 2, and Mixtral)
|
|
||||||
* Gemma (including Gemma 1 and Gemma 2)
|
|
||||||
* Phi3
|
|
||||||
|
|
||||||
#### Build from a GGUF file
|
|
||||||
|
|
||||||
```modelfile
|
|
||||||
FROM ./ollama-model.gguf
|
|
||||||
```
|
|
||||||
|
|
||||||
The GGUF file location should be specified as an absolute path or relative to the `Modelfile` location.
|
|
||||||
|
|
||||||
|
|
||||||
### PARAMETER
|
### PARAMETER
|
||||||
|
|
||||||
@@ -159,7 +140,6 @@ PARAMETER <parameter> <parametervalue>
|
|||||||
| num_predict | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) | int | num_predict 42 |
|
| num_predict | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) | int | num_predict 42 |
|
||||||
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
|
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
|
||||||
| top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 |
|
| top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 |
|
||||||
| min_p | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float | min_p 0.05 |
|
|
||||||
|
|
||||||
### TEMPLATE
|
### TEMPLATE
|
||||||
|
|
||||||
@@ -192,23 +172,10 @@ SYSTEM """<system message>"""
|
|||||||
|
|
||||||
### ADAPTER
|
### ADAPTER
|
||||||
|
|
||||||
The `ADAPTER` instruction specifies a fine tuned LoRA adapter that should apply to the base model. The value of the adapter should be an absolute path or a path relative to the Modelfile. The base model should be specified with a `FROM` instruction. If the base model is not the same as the base model that the adapter was tuned from the behaviour will be erratic.
|
The `ADAPTER` instruction is an optional instruction that specifies any LoRA adapter that should apply to the base model. The value of this instruction should be an absolute path or a path relative to the Modelfile and the file must be in a GGML file format. The adapter should be tuned from the base model otherwise the behaviour is undefined.
|
||||||
|
|
||||||
#### Safetensor adapter
|
|
||||||
|
|
||||||
```modelfile
|
```modelfile
|
||||||
ADAPTER <path to safetensor adapter>
|
ADAPTER ./ollama-lora.bin
|
||||||
```
|
|
||||||
|
|
||||||
Currently supported Safetensor adapters:
|
|
||||||
* Llama (including Llama 2, Llama 3, and Llama 3.1)
|
|
||||||
* Mistral (including Mistral 1, Mistral 2, and Mixtral)
|
|
||||||
* Gemma (including Gemma 1 and Gemma 2)
|
|
||||||
|
|
||||||
#### GGUF adapter
|
|
||||||
|
|
||||||
```modelfile
|
|
||||||
ADAPTER ./ollama-lora.gguf
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### LICENSE
|
### LICENSE
|
||||||
|
|||||||
192
docs/openai.md
192
docs/openai.md
@@ -25,38 +25,12 @@ chat_completion = client.chat.completions.create(
|
|||||||
'content': 'Say this is a test',
|
'content': 'Say this is a test',
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
model='llama3.2',
|
model='llama3',
|
||||||
)
|
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model="llava",
|
|
||||||
messages=[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": [
|
|
||||||
{"type": "text", "text": "What's in this image?"},
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": "",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
],
|
|
||||||
max_tokens=300,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
completion = client.completions.create(
|
completion = client.completions.create(
|
||||||
model="llama3.2",
|
model="llama3",
|
||||||
prompt="Say this is a test",
|
prompt="Say this is a test"
|
||||||
)
|
|
||||||
|
|
||||||
list_completion = client.models.list()
|
|
||||||
|
|
||||||
model = client.models.retrieve("llama3.2")
|
|
||||||
|
|
||||||
embeddings = client.embeddings.create(
|
|
||||||
model="all-minilm",
|
|
||||||
input=["why is the sky blue?", "why is the grass green?"],
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -73,48 +47,23 @@ const openai = new OpenAI({
|
|||||||
})
|
})
|
||||||
|
|
||||||
const chatCompletion = await openai.chat.completions.create({
|
const chatCompletion = await openai.chat.completions.create({
|
||||||
messages: [{ role: 'user', content: 'Say this is a test' }],
|
messages: [{ role: 'user', content: 'Say this is a test' }],
|
||||||
model: 'llama3.2',
|
model: 'llama3',
|
||||||
})
|
|
||||||
|
|
||||||
const response = await openai.chat.completions.create({
|
|
||||||
model: "llava",
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: [
|
|
||||||
{ type: "text", text: "What's in this image?" },
|
|
||||||
{
|
|
||||||
type: "image_url",
|
|
||||||
image_url: "",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
],
|
|
||||||
})
|
})
|
||||||
|
|
||||||
const completion = await openai.completions.create({
|
const completion = await openai.completions.create({
|
||||||
model: "llama3.2",
|
model: "llama3",
|
||||||
prompt: "Say this is a test.",
|
prompt: "Say this is a test.",
|
||||||
})
|
})
|
||||||
|
|
||||||
const listCompletion = await openai.models.list()
|
|
||||||
|
|
||||||
const model = await openai.models.retrieve("llama3.2")
|
|
||||||
|
|
||||||
const embedding = await openai.embeddings.create({
|
|
||||||
model: "all-minilm",
|
|
||||||
input: ["why is the sky blue?", "why is the grass green?"],
|
|
||||||
})
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### `curl`
|
### `curl`
|
||||||
|
|
||||||
``` shell
|
```
|
||||||
curl http://localhost:11434/v1/chat/completions \
|
curl http://localhost:11434/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
@@ -127,47 +76,12 @@ curl http://localhost:11434/v1/chat/completions \
|
|||||||
]
|
]
|
||||||
}'
|
}'
|
||||||
|
|
||||||
curl http://localhost:11434/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "llava",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": [
|
|
||||||
{
|
|
||||||
"type": "text",
|
|
||||||
"text": "What'\''s in this image?"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {
|
|
||||||
"url": ""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"max_tokens": 300
|
|
||||||
}'
|
|
||||||
|
|
||||||
curl http://localhost:11434/v1/completions \
|
curl http://localhost:11434/v1/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3",
|
||||||
"prompt": "Say this is a test"
|
"prompt": "Say this is a test"
|
||||||
}'
|
}'
|
||||||
|
|
||||||
curl http://localhost:11434/v1/models
|
|
||||||
|
|
||||||
curl http://localhost:11434/v1/models/llama3.2
|
|
||||||
|
|
||||||
curl http://localhost:11434/v1/embeddings \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "all-minilm",
|
|
||||||
"input": ["why is the sky blue?", "why is the grass green?"]
|
|
||||||
}'
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Endpoints
|
## Endpoints
|
||||||
@@ -180,8 +94,8 @@ curl http://localhost:11434/v1/embeddings \
|
|||||||
- [x] Streaming
|
- [x] Streaming
|
||||||
- [x] JSON mode
|
- [x] JSON mode
|
||||||
- [x] Reproducible outputs
|
- [x] Reproducible outputs
|
||||||
- [x] Vision
|
- [ ] Vision
|
||||||
- [x] Tools (streaming support coming soon)
|
- [ ] Function calling
|
||||||
- [ ] Logprobs
|
- [ ] Logprobs
|
||||||
|
|
||||||
#### Supported request fields
|
#### Supported request fields
|
||||||
@@ -189,10 +103,7 @@ curl http://localhost:11434/v1/embeddings \
|
|||||||
- [x] `model`
|
- [x] `model`
|
||||||
- [x] `messages`
|
- [x] `messages`
|
||||||
- [x] Text `content`
|
- [x] Text `content`
|
||||||
- [x] Image `content`
|
- [ ] Array of `content` parts
|
||||||
- [x] Base64 encoded image
|
|
||||||
- [ ] Image URL
|
|
||||||
- [x] Array of `content` parts
|
|
||||||
- [x] `frequency_penalty`
|
- [x] `frequency_penalty`
|
||||||
- [x] `presence_penalty`
|
- [x] `presence_penalty`
|
||||||
- [x] `response_format`
|
- [x] `response_format`
|
||||||
@@ -202,9 +113,9 @@ curl http://localhost:11434/v1/embeddings \
|
|||||||
- [x] `temperature`
|
- [x] `temperature`
|
||||||
- [x] `top_p`
|
- [x] `top_p`
|
||||||
- [x] `max_tokens`
|
- [x] `max_tokens`
|
||||||
- [x] `tools`
|
|
||||||
- [ ] `tool_choice`
|
|
||||||
- [ ] `logit_bias`
|
- [ ] `logit_bias`
|
||||||
|
- [ ] `tools`
|
||||||
|
- [ ] `tool_choice`
|
||||||
- [ ] `user`
|
- [ ] `user`
|
||||||
- [ ] `n`
|
- [ ] `n`
|
||||||
|
|
||||||
@@ -241,40 +152,46 @@ curl http://localhost:11434/v1/embeddings \
|
|||||||
|
|
||||||
- `prompt` currently only accepts a string
|
- `prompt` currently only accepts a string
|
||||||
|
|
||||||
### `/v1/models`
|
### `/v1/completions`
|
||||||
|
|
||||||
#### Notes
|
#### Supported features
|
||||||
|
|
||||||
- `created` corresponds to when the model was last modified
|
- [x] Completions
|
||||||
- `owned_by` corresponds to the ollama username, defaulting to `"library"`
|
- [x] Streaming
|
||||||
|
- [x] JSON mode
|
||||||
### `/v1/models/{model}`
|
- [x] Reproducible outputs
|
||||||
|
- [ ] Logprobs
|
||||||
#### Notes
|
|
||||||
|
|
||||||
- `created` corresponds to when the model was last modified
|
|
||||||
- `owned_by` corresponds to the ollama username, defaulting to `"library"`
|
|
||||||
|
|
||||||
### `/v1/embeddings`
|
|
||||||
|
|
||||||
#### Supported request fields
|
#### Supported request fields
|
||||||
|
|
||||||
- [x] `model`
|
- [x] `model`
|
||||||
- [x] `input`
|
- [x] `prompt`
|
||||||
- [x] string
|
- [x] `frequency_penalty`
|
||||||
- [x] array of strings
|
- [x] `presence_penalty`
|
||||||
- [ ] array of tokens
|
- [x] `seed`
|
||||||
- [ ] array of token arrays
|
- [x] `stop`
|
||||||
- [ ] `encoding format`
|
- [x] `stream`
|
||||||
- [ ] `dimensions`
|
- [x] `temperature`
|
||||||
|
- [x] `top_p`
|
||||||
|
- [x] `max_tokens`
|
||||||
|
- [ ] `best_of`
|
||||||
|
- [ ] `echo`
|
||||||
|
- [ ] `suffix`
|
||||||
|
- [ ] `logit_bias`
|
||||||
- [ ] `user`
|
- [ ] `user`
|
||||||
|
- [ ] `n`
|
||||||
|
|
||||||
|
#### Notes
|
||||||
|
|
||||||
|
- `prompt` currently only accepts a string
|
||||||
|
- `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached
|
||||||
|
|
||||||
## Models
|
## Models
|
||||||
|
|
||||||
Before using a model, pull it locally `ollama pull`:
|
Before using a model, pull it locally `ollama pull`:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
ollama pull llama3.2
|
ollama pull llama3
|
||||||
```
|
```
|
||||||
|
|
||||||
### Default model names
|
### Default model names
|
||||||
@@ -282,7 +199,7 @@ ollama pull llama3.2
|
|||||||
For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name:
|
For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name:
|
||||||
|
|
||||||
```
|
```
|
||||||
ollama cp llama3.2 gpt-3.5-turbo
|
ollama cp llama3 gpt-3.5-turbo
|
||||||
```
|
```
|
||||||
|
|
||||||
Afterwards, this new model name can be specified the `model` field:
|
Afterwards, this new model name can be specified the `model` field:
|
||||||
@@ -300,28 +217,3 @@ curl http://localhost:11434/v1/chat/completions \
|
|||||||
]
|
]
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
### Setting the context size
|
|
||||||
|
|
||||||
The OpenAI API does not have a way of setting the context size for a model. If you need to change the context size, create a `Modelfile` which looks like:
|
|
||||||
|
|
||||||
```modelfile
|
|
||||||
FROM <some model>
|
|
||||||
PARAMETER num_ctx <context size>
|
|
||||||
```
|
|
||||||
|
|
||||||
Use the `ollama create mymodel` command to create a new model with the updated context size. Call the API with the updated model name:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl http://localhost:11434/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "mymodel",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "Hello!"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|||||||
167
docs/template.md
167
docs/template.md
@@ -1,167 +0,0 @@
|
|||||||
# Template
|
|
||||||
|
|
||||||
Ollama provides a powerful templating engine backed by Go's built-in templating engine to construct prompts for your large language model. This feature is a valuable tool to get the most out of your models.
|
|
||||||
|
|
||||||
## Basic Template Structure
|
|
||||||
|
|
||||||
A basic Go template consists of three main parts:
|
|
||||||
|
|
||||||
* **Layout**: The overall structure of the template.
|
|
||||||
* **Variables**: Placeholders for dynamic data that will be replaced with actual values when the template is rendered.
|
|
||||||
* **Functions**: Custom functions or logic that can be used to manipulate the template's content.
|
|
||||||
|
|
||||||
Here's an example of a simple chat template:
|
|
||||||
|
|
||||||
```gotmpl
|
|
||||||
{{- range .Messages }}
|
|
||||||
{{ .Role }}: {{ .Content }}
|
|
||||||
{{- end }}
|
|
||||||
```
|
|
||||||
|
|
||||||
In this example, we have:
|
|
||||||
|
|
||||||
* A basic messages structure (layout)
|
|
||||||
* Three variables: `Messages`, `Role`, and `Content` (variables)
|
|
||||||
* A custom function (action) that iterates over an array of items (`range .Messages`) and displays each item
|
|
||||||
|
|
||||||
## Adding templates to your model
|
|
||||||
|
|
||||||
By default, models imported into Ollama have a default template of `{{ .Prompt }}`, i.e. user inputs are sent verbatim to the LLM. This is appropriate for text or code completion models but lacks essential markers for chat or instruction models.
|
|
||||||
|
|
||||||
Omitting a template in these models puts the responsibility of correctly templating input onto the user. Adding a template allows users to easily get the best results from the model.
|
|
||||||
|
|
||||||
To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
|
|
||||||
|
|
||||||
```dockerfile
|
|
||||||
FROM llama3.2
|
|
||||||
|
|
||||||
TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
|
|
||||||
|
|
||||||
{{ .System }}<|eot_id|>
|
|
||||||
{{- end }}
|
|
||||||
{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
|
|
||||||
|
|
||||||
{{ .Content }}<|eot_id|>
|
|
||||||
{{- end }}<|start_header_id|>assistant<|end_header_id|>
|
|
||||||
|
|
||||||
"""
|
|
||||||
```
|
|
||||||
|
|
||||||
## Variables
|
|
||||||
|
|
||||||
`System` (string): system prompt
|
|
||||||
|
|
||||||
`Prompt` (string): user prompt
|
|
||||||
|
|
||||||
`Response` (string): assistant response
|
|
||||||
|
|
||||||
`Suffix` (string): text inserted after the assistant's response
|
|
||||||
|
|
||||||
`Messages` (list): list of messages
|
|
||||||
|
|
||||||
`Messages[].Role` (string): role which can be one of `system`, `user`, `assistant`, or `tool`
|
|
||||||
|
|
||||||
`Messages[].Content` (string): message content
|
|
||||||
|
|
||||||
`Messages[].ToolCalls` (list): list of tools the model wants to call
|
|
||||||
|
|
||||||
`Messages[].ToolCalls[].Function` (object): function to call
|
|
||||||
|
|
||||||
`Messages[].ToolCalls[].Function.Name` (string): function name
|
|
||||||
|
|
||||||
`Messages[].ToolCalls[].Function.Arguments` (map): mapping of argument name to argument value
|
|
||||||
|
|
||||||
`Tools` (list): list of tools the model can access
|
|
||||||
|
|
||||||
`Tools[].Type` (string): schema type. `type` is always `function`
|
|
||||||
|
|
||||||
`Tools[].Function` (object): function definition
|
|
||||||
|
|
||||||
`Tools[].Function.Name` (string): function name
|
|
||||||
|
|
||||||
`Tools[].Function.Description` (string): function description
|
|
||||||
|
|
||||||
`Tools[].Function.Parameters` (object): function parameters
|
|
||||||
|
|
||||||
`Tools[].Function.Parameters.Type` (string): schema type. `type` is always `object`
|
|
||||||
|
|
||||||
`Tools[].Function.Parameters.Required` (list): list of required properties
|
|
||||||
|
|
||||||
`Tools[].Function.Parameters.Properties` (map): mapping of property name to property definition
|
|
||||||
|
|
||||||
`Tools[].Function.Parameters.Properties[].Type` (string): property type
|
|
||||||
|
|
||||||
`Tools[].Function.Parameters.Properties[].Description` (string): property description
|
|
||||||
|
|
||||||
`Tools[].Function.Parameters.Properties[].Enum` (list): list of valid values
|
|
||||||
|
|
||||||
## Tips and Best Practices
|
|
||||||
|
|
||||||
Keep the following tips and best practices in mind when working with Go templates:
|
|
||||||
|
|
||||||
* **Be mindful of dot**: Control flow structures like `range` and `with` changes the value `.`
|
|
||||||
* **Out-of-scope variables**: Use `$.` to reference variables not currently in scope, starting from the root
|
|
||||||
* **Whitespace control**: Use `-` to trim leading (`{{-`) and trailing (`-}}`) whitespace
|
|
||||||
|
|
||||||
## Examples
|
|
||||||
|
|
||||||
### Example Messages
|
|
||||||
|
|
||||||
#### ChatML
|
|
||||||
|
|
||||||
ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
|
|
||||||
|
|
||||||
```gotmpl
|
|
||||||
{{- range .Messages }}<|im_start|>{{ .Role }}
|
|
||||||
{{ .Content }}<|im_end|>
|
|
||||||
{{ end }}<|im_start|>assistant
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example Tools
|
|
||||||
|
|
||||||
Tools support can be added to a model by adding a `{{ .Tools }}` node to the template. This feature is useful for models trained to call external tools and can a powerful tool for retrieving real-time data or performing complex tasks.
|
|
||||||
|
|
||||||
#### Mistral
|
|
||||||
|
|
||||||
Mistral v0.3 and Mixtral 8x22B supports tool calling.
|
|
||||||
|
|
||||||
```gotmpl
|
|
||||||
{{- range $index, $_ := .Messages }}
|
|
||||||
{{- if eq .Role "user" }}
|
|
||||||
{{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS]
|
|
||||||
{{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}
|
|
||||||
|
|
||||||
{{ end }}{{ .Content }}[/INST]
|
|
||||||
{{- else if eq .Role "assistant" }}
|
|
||||||
{{- if .Content }} {{ .Content }}</s>
|
|
||||||
{{- else if .ToolCalls }}[TOOL_CALLS] [
|
|
||||||
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}}
|
|
||||||
{{- end }}]</s>
|
|
||||||
{{- end }}
|
|
||||||
{{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example Fill-in-Middle
|
|
||||||
|
|
||||||
Fill-in-middle support can be added to a model by adding a `{{ .Suffix }}` node to the template. This feature is useful for models that are trained to generate text in the middle of user input, such as code completion models.
|
|
||||||
|
|
||||||
#### CodeLlama
|
|
||||||
|
|
||||||
CodeLlama [7B](https://ollama.com/library/codellama:7b-code) and [13B](https://ollama.com/library/codellama:13b-code) code completion models support fill-in-middle.
|
|
||||||
|
|
||||||
```gotmpl
|
|
||||||
<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> CodeLlama 34B and 70B code completion and all instruct and Python fine-tuned models do not support fill-in-middle.
|
|
||||||
|
|
||||||
#### Codestral
|
|
||||||
|
|
||||||
Codestral [22B](https://ollama.com/library/codestral:22b) supports fill-in-middle.
|
|
||||||
|
|
||||||
```gotmpl
|
|
||||||
[SUFFIX]{{ .Suffix }}[PREFIX] {{ .Prompt }}
|
|
||||||
```
|
|
||||||
@@ -9,7 +9,7 @@ cat ~/.ollama/logs/server.log
|
|||||||
On **Linux** systems with systemd, the logs can be found with this command:
|
On **Linux** systems with systemd, the logs can be found with this command:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
journalctl -u ollama --no-pager
|
journalctl -u ollama
|
||||||
```
|
```
|
||||||
|
|
||||||
When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
|
When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
|
||||||
@@ -91,17 +91,6 @@ If none of those resolve the problem, gather additional information and file an
|
|||||||
- Check dmesg for any errors `sudo dmesg | grep -i nvrm` and `sudo dmesg | grep -i nvidia`
|
- Check dmesg for any errors `sudo dmesg | grep -i nvrm` and `sudo dmesg | grep -i nvidia`
|
||||||
|
|
||||||
|
|
||||||
## AMD GPU Discovery
|
|
||||||
|
|
||||||
On linux, AMD GPU access typically requires `video` and/or `render` group membership to access the `/dev/kfd` device. If permissions are not set up correctly, Ollama will detect this and report an error in the server log.
|
|
||||||
|
|
||||||
When running in a container, in some Linux distributions and container runtimes, the ollama process may be unable to access the GPU. Use `ls -ld /dev/kfd /dev/dri /dev/dri/*` on the host system to determine the group assignments on your system, and pass additional `--group-add ...` arguments to the container so it can access the required devices.
|
|
||||||
|
|
||||||
If you are experiencing problems getting Ollama to correctly discover or use your GPU for inference, the following may help isolate the failure.
|
|
||||||
- `AMD_LOG_LEVEL=3` Enable info log levels in the AMD HIP/ROCm libraries. This can help show more detailed error codes that can help troubleshoot problems
|
|
||||||
- `OLLAMA_DEBUG=1` During GPU discovery additional information will be reported
|
|
||||||
- Check dmesg for any errors from amdgpu or kfd drivers `sudo dmesg | grep -i amdgpu` and `sudo dmesg | grep -i kfd`
|
|
||||||
|
|
||||||
## Windows Terminal Errors
|
## Windows Terminal Errors
|
||||||
|
|
||||||
Older versions of Windows 10 (e.g., 21H1) are known to have a bug where the standard terminal program does not display control characters correctly. This can result in a long string of strings like `←[?25h←[?25l` being displayed, sometimes erroring with `The parameter is incorrect` To resolve this problem, please update to Win 10 22H1 or newer.
|
Older versions of Windows 10 (e.g., 21H1) are known to have a bug where the standard terminal program does not display control characters correctly. This can result in a long string of strings like `←[?25h←[?25l` being displayed, sometimes erroring with `The parameter is incorrect` To resolve this problem, please update to Win 10 22H1 or newer.
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user