Compare commits
159 Commits
v0.3.1
...
pdevine/im
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb576a6b23 | ||
|
|
15b7ff3a89 | ||
|
|
3ad243466b | ||
|
|
a13e583c49 | ||
|
|
3c1994d0ee | ||
|
|
1b2da3829d | ||
|
|
0f92b19bec | ||
|
|
69be940bf6 | ||
|
|
9638c24c58 | ||
|
|
bb362caf88 | ||
|
|
0c819e167b | ||
|
|
7a1e1c1caf | ||
|
|
0b03b9c32f | ||
|
|
90ca84172c | ||
|
|
6bd8a4b0a1 | ||
|
|
77903ab8b4 | ||
|
|
e22286c9e1 | ||
|
|
107f695929 | ||
|
|
4ecc70d3b4 | ||
|
|
3546bbd08c | ||
|
|
beb49eef65 | ||
|
|
5a28b9cf5f | ||
|
|
a017cf2fea | ||
|
|
19e5a890f7 | ||
|
|
f91c9e3709 | ||
|
|
2df6905ede | ||
|
|
d8be22e47d | ||
|
|
652c273f0e | ||
|
|
88e7705079 | ||
|
|
f9e31da946 | ||
|
|
88bb9e3328 | ||
|
|
3b19cdba2a | ||
|
|
927d98a6cd | ||
|
|
f6c811b320 | ||
|
|
4fe3a556fa | ||
|
|
fc3b4cda89 | ||
|
|
d470ebe78b | ||
|
|
c7bcb00319 | ||
|
|
74d45f0102 | ||
|
|
9fddef3731 | ||
|
|
885cf45087 | ||
|
|
9352eeb752 | ||
|
|
0ad0e738cd | ||
|
|
bdc4308afb | ||
|
|
d29cd4c2ed | ||
|
|
a84c05cf91 | ||
|
|
e3d7f32af7 | ||
|
|
3a75e74e34 | ||
|
|
237dccba1e | ||
|
|
b3f75fc812 | ||
|
|
8200c371ae | ||
|
|
0a8d6ea86d | ||
|
|
8e1050f366 | ||
|
|
eda8a32a09 | ||
|
|
a0a40aa20c | ||
|
|
2697d7f5aa | ||
|
|
1f32276178 | ||
|
|
4c4fe3f87f | ||
|
|
feedf49c71 | ||
|
|
8b00a415ab | ||
|
|
01b80e9ffc | ||
|
|
bd5e432630 | ||
|
|
aec77d6a05 | ||
|
|
6ffb5cb017 | ||
|
|
f7e3b9190f | ||
|
|
980dd15f81 | ||
|
|
01d544d373 | ||
|
|
1dc3ef3aa9 | ||
|
|
8aac22438e | ||
|
|
15c2d8fe14 | ||
|
|
25906d72d1 | ||
|
|
023451ce47 | ||
|
|
9b53e39d8e | ||
|
|
97fae2df95 | ||
|
|
160d9d4900 | ||
|
|
d4e6407464 | ||
|
|
b7f7d8cd15 | ||
|
|
2fa1db4345 | ||
|
|
71b0945fc6 | ||
|
|
5bca2e60a7 | ||
|
|
67472e0e89 | ||
|
|
e9aa5117c4 | ||
|
|
2473bdba5e | ||
|
|
7d1c0047fa | ||
|
|
7b61eba471 | ||
|
|
7edaf6e7e8 | ||
|
|
97ec8cfd4e | ||
|
|
5b3a21b578 | ||
|
|
ad0c19dde4 | ||
|
|
69eb06c40e | ||
|
|
1829fb61bd | ||
|
|
ce67706037 | ||
|
|
685a53534b | ||
|
|
de4fc29773 | ||
|
|
e04c7012c2 | ||
|
|
d4a7216c82 | ||
|
|
a4fdd03c3b | ||
|
|
fc85f50a2b | ||
|
|
86b907f82a | ||
|
|
10d49bce70 | ||
|
|
7ed367419e | ||
|
|
50ee8b5f56 | ||
|
|
03bdac0595 | ||
|
|
f457d63400 | ||
|
|
04210aa6dd | ||
|
|
43f9d92008 | ||
|
|
ed6c8bfe57 | ||
|
|
39f2bc6bfc | ||
|
|
b73b0940ef | ||
|
|
6a07344786 | ||
|
|
8b920f35a4 | ||
|
|
4221e39867 | ||
|
|
a091fadfda | ||
|
|
77ccbf04dc | ||
|
|
4addf6b587 | ||
|
|
85c7f11170 | ||
|
|
df3802a65f | ||
|
|
b732beba6a | ||
|
|
ce1fb4447e | ||
|
|
558a54b098 | ||
|
|
ed52833bb1 | ||
|
|
6f133a0bdd | ||
|
|
f561eecfb8 | ||
|
|
ff7c9060ec | ||
|
|
0ff42e84b0 | ||
|
|
8a9f946ca7 | ||
|
|
3b5210548e | ||
|
|
b0c216584c | ||
|
|
49a5483139 | ||
|
|
6bc5c13758 | ||
|
|
3e614260af | ||
|
|
d87b4a488e | ||
|
|
4c14855ad7 | ||
|
|
dc77bbcfa4 | ||
|
|
d8e2664c33 | ||
|
|
eafc607abb | ||
|
|
781fc2d576 | ||
|
|
df993fa37b | ||
|
|
5e9db9fb0b | ||
|
|
0f3271db88 | ||
|
|
6b252918fb | ||
|
|
c4c84b7a0d | ||
|
|
5c1912769e | ||
|
|
71399aa682 | ||
|
|
463a8aa273 | ||
|
|
3579b4966a | ||
|
|
a250c2cb13 | ||
|
|
15af558423 | ||
|
|
85d9d73a72 | ||
|
|
78140a712c | ||
|
|
1954ec5917 | ||
|
|
0f1910129f | ||
|
|
e2c3f6b3e2 | ||
|
|
8570c1c0ef | ||
|
|
55cd3ddcca | ||
|
|
66fe77f084 | ||
|
|
d1a5227cad | ||
|
|
4f1afd575d | ||
|
|
35b89b2eab |
2
.gitattributes
vendored
2
.gitattributes
vendored
@@ -1 +1,3 @@
|
|||||||
llm/ext_server/* linguist-vendored
|
llm/ext_server/* linguist-vendored
|
||||||
|
* text=auto
|
||||||
|
*.go text eol=lf
|
||||||
|
|||||||
42
.github/workflows/release.yaml
vendored
42
.github/workflows/release.yaml
vendored
@@ -31,7 +31,7 @@ jobs:
|
|||||||
security set-keychain-settings -lut 3600 build.keychain
|
security set-keychain-settings -lut 3600 build.keychain
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: "stable"
|
go-version-file: go.mod
|
||||||
cache: true
|
cache: true
|
||||||
- name: Build Darwin
|
- name: Build Darwin
|
||||||
env:
|
env:
|
||||||
@@ -87,7 +87,7 @@ jobs:
|
|||||||
write-host "plugin installed"
|
write-host "plugin installed"
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: "stable"
|
go-version-file: go.mod
|
||||||
cache: true
|
cache: true
|
||||||
- run: go get ./...
|
- run: go get ./...
|
||||||
- run: |
|
- run: |
|
||||||
@@ -141,7 +141,7 @@ jobs:
|
|||||||
write-host "plugin installed"
|
write-host "plugin installed"
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: "stable"
|
go-version-file: go.mod
|
||||||
cache: true
|
cache: true
|
||||||
- name: 'Install ROCm'
|
- name: 'Install ROCm'
|
||||||
run: |
|
run: |
|
||||||
@@ -187,6 +187,13 @@ jobs:
|
|||||||
generate-windows-cuda:
|
generate-windows-cuda:
|
||||||
environment: release
|
environment: release
|
||||||
runs-on: windows
|
runs-on: windows
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
cuda:
|
||||||
|
- version: "11"
|
||||||
|
url: 'https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe'
|
||||||
|
- version: "12"
|
||||||
|
url: 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe'
|
||||||
env:
|
env:
|
||||||
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
|
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
|
||||||
steps:
|
steps:
|
||||||
@@ -218,13 +225,13 @@ jobs:
|
|||||||
write-host "plugin installed"
|
write-host "plugin installed"
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: "stable"
|
go-version-file: go.mod
|
||||||
cache: true
|
cache: true
|
||||||
- name: 'Install CUDA'
|
- name: 'Install CUDA ${{ matrix.cuda.version }}'
|
||||||
run: |
|
run: |
|
||||||
$ErrorActionPreference = "Stop"
|
$ErrorActionPreference = "Stop"
|
||||||
write-host "downloading CUDA Installer"
|
write-host "downloading CUDA Installer"
|
||||||
Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
|
Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
|
||||||
write-host "Installing CUDA"
|
write-host "Installing CUDA"
|
||||||
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
|
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
|
||||||
write-host "Completed CUDA"
|
write-host "Completed CUDA"
|
||||||
@@ -256,15 +263,16 @@ jobs:
|
|||||||
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
|
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: generate-windows-cuda
|
name: generate-windows-cuda-${{ matrix.cuda.version }}
|
||||||
path: |
|
path: |
|
||||||
llm/build/**/bin/*
|
llm/build/**/bin/*
|
||||||
dist/windows-amd64/**
|
dist/windows-amd64/**
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: windows-cuda-deps
|
name: windows-cuda-deps-${{ matrix.cuda.version }}
|
||||||
path: dist/deps/*
|
path: dist/deps/*
|
||||||
|
|
||||||
|
|
||||||
# Import the prior generation steps and build the final windows assets
|
# Import the prior generation steps and build the final windows assets
|
||||||
build-windows:
|
build-windows:
|
||||||
environment: release
|
environment: release
|
||||||
@@ -306,7 +314,7 @@ jobs:
|
|||||||
write-host "plugin installed"
|
write-host "plugin installed"
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: "stable"
|
go-version-file: go.mod
|
||||||
cache: true
|
cache: true
|
||||||
- run: go get
|
- run: go get
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
@@ -314,10 +322,16 @@ jobs:
|
|||||||
name: generate-windows-cpu
|
name: generate-windows-cpu
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: generate-windows-cuda
|
name: generate-windows-cuda-11
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: windows-cuda-deps
|
name: generate-windows-cuda-12
|
||||||
|
- uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: windows-cuda-deps-11
|
||||||
|
- uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: windows-cuda-deps-12
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: windows-rocm-deps
|
name: windows-rocm-deps
|
||||||
@@ -363,7 +377,6 @@ jobs:
|
|||||||
- run: |
|
- run: |
|
||||||
./scripts/build_linux.sh
|
./scripts/build_linux.sh
|
||||||
./scripts/build_docker.sh
|
./scripts/build_docker.sh
|
||||||
mv dist/deps/* dist/
|
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: dist-linux-amd64
|
name: dist-linux-amd64
|
||||||
@@ -459,7 +472,10 @@ jobs:
|
|||||||
merge-multiple: true
|
merge-multiple: true
|
||||||
- run: |
|
- run: |
|
||||||
ls -lh dist/
|
ls -lh dist/
|
||||||
(cd dist; sha256sum * > sha256sum.txt)
|
(cd dist; find . -type f | xargs sha256sum > ../sha256sum.txt)
|
||||||
|
mv sha256sum.txt dist/
|
||||||
|
mv dist/linux-???64 .
|
||||||
|
mv dist/linux-amd64-rocm .
|
||||||
cat dist/sha256sum.txt
|
cat dist/sha256sum.txt
|
||||||
- name: Create or update Release
|
- name: Create or update Release
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
12
.github/workflows/test.yaml
vendored
12
.github/workflows/test.yaml
vendored
@@ -63,7 +63,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: "stable"
|
go-version-file: go.mod
|
||||||
cache: true
|
cache: true
|
||||||
- run: go get ./...
|
- run: go get ./...
|
||||||
- run: |
|
- run: |
|
||||||
@@ -163,7 +163,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: "stable"
|
go-version-file: go.mod
|
||||||
cache: true
|
cache: true
|
||||||
- name: 'Install ROCm'
|
- name: 'Install ROCm'
|
||||||
run: |
|
run: |
|
||||||
@@ -200,7 +200,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: "stable"
|
go-version-file: go.mod
|
||||||
cache: true
|
cache: true
|
||||||
- name: 'Install CUDA'
|
- name: 'Install CUDA'
|
||||||
run: |
|
run: |
|
||||||
@@ -255,7 +255,7 @@ jobs:
|
|||||||
submodules: recursive
|
submodules: recursive
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: "stable"
|
go-version-file: go.mod
|
||||||
cache: false
|
cache: false
|
||||||
- run: |
|
- run: |
|
||||||
case ${{ matrix.arch }} in
|
case ${{ matrix.arch }} in
|
||||||
@@ -273,7 +273,7 @@ jobs:
|
|||||||
if: ${{ startsWith(matrix.os, 'macos-') }}
|
if: ${{ startsWith(matrix.os, 'macos-') }}
|
||||||
- uses: golangci/golangci-lint-action@v6
|
- uses: golangci/golangci-lint-action@v6
|
||||||
with:
|
with:
|
||||||
args: --timeout 8m0s -v ${{ startsWith(matrix.os, 'windows-') && '' || '--disable gofmt --disable goimports' }}
|
args: --timeout 8m0s -v
|
||||||
test:
|
test:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
@@ -297,7 +297,7 @@ jobs:
|
|||||||
submodules: recursive
|
submodules: recursive
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: "stable"
|
go-version-file: go.mod
|
||||||
cache: true
|
cache: true
|
||||||
- run: |
|
- run: |
|
||||||
case ${{ matrix.arch }} in
|
case ${{ matrix.arch }} in
|
||||||
|
|||||||
@@ -7,22 +7,31 @@ linters:
|
|||||||
- bodyclose
|
- bodyclose
|
||||||
- containedctx
|
- containedctx
|
||||||
- contextcheck
|
- contextcheck
|
||||||
|
- errcheck
|
||||||
- exportloopref
|
- exportloopref
|
||||||
|
- gci
|
||||||
- gocheckcompilerdirectives
|
- gocheckcompilerdirectives
|
||||||
# conditionally enable this on linux/macos
|
- gofmt
|
||||||
# - gofmt
|
- gofumpt
|
||||||
# - goimports
|
- gosimple
|
||||||
|
- govet
|
||||||
|
- ineffassign
|
||||||
- intrange
|
- intrange
|
||||||
|
- makezero
|
||||||
- misspell
|
- misspell
|
||||||
- nilerr
|
- nilerr
|
||||||
- nolintlint
|
- nolintlint
|
||||||
- nosprintfhostport
|
- nosprintfhostport
|
||||||
- testifylint
|
- staticcheck
|
||||||
|
- tenv
|
||||||
- unconvert
|
- unconvert
|
||||||
- unused
|
- unused
|
||||||
|
- usestdlibvars
|
||||||
- wastedassign
|
- wastedassign
|
||||||
- whitespace
|
- whitespace
|
||||||
- usestdlibvars
|
linters-settings:
|
||||||
|
gci:
|
||||||
|
sections: [standard, default, localmodule]
|
||||||
severity:
|
severity:
|
||||||
default-severity: error
|
default-severity: error
|
||||||
rules:
|
rules:
|
||||||
|
|||||||
37
CONTRIBUTING.md
Normal file
37
CONTRIBUTING.md
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
# Contributing to Ollama
|
||||||
|
|
||||||
|
Thank you for your interest in contributing to Ollama! Here are a few guidelines to help get you started.
|
||||||
|
|
||||||
|
## Set up
|
||||||
|
|
||||||
|
See the [development documentation](./docs/development.md) for instructions on how to build and run Ollama locally.
|
||||||
|
|
||||||
|
## Pull requests
|
||||||
|
|
||||||
|
### Ideal issues
|
||||||
|
|
||||||
|
* [Bugs](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Abug): issues where Ollama stops working or where it results in an unexpected error.
|
||||||
|
* [Performance](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Aperformance): issues to make Ollama faster at model inference, downloading or uploading.
|
||||||
|
* [Security](https://github.com/ollama/ollama/blob/main/SECURITY.md): issues that could lead to a security vulnerability. As mentioned in [SECURITY.md](https://github.com/ollama/ollama/blob/main/SECURITY.md), please do not disclose security vulnerabilities publicly.
|
||||||
|
|
||||||
|
### Issues that are harder to review
|
||||||
|
|
||||||
|
* New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future.
|
||||||
|
* Refactoring: large code improvements are important, but can be harder or take longer to review and merge.
|
||||||
|
* Documentation: small updates to fill in or dorrect missing documentation is helpful, however large documentation additions can be hard to maintain over time.
|
||||||
|
|
||||||
|
### Issues that may not be accepted
|
||||||
|
|
||||||
|
* Changes that break backwards compatibility in Ollama's API (including the OpenAI-compatible API)
|
||||||
|
* Changes that add significant friction to the user experience
|
||||||
|
* Changes that create a large future maintenance burden for maintainers and contributors
|
||||||
|
|
||||||
|
### Best practices
|
||||||
|
|
||||||
|
* Commit messages: please leave both a title and a description in your commit messages. The title should be a short summary of the changes, with a leading word that explains the section of the code being changed (e.g. `api: fix parsing of prompt field`) . In the description, leave a short 2-3 sentences that explain more about the change and its impact.
|
||||||
|
* Tests: please add test coverage to changes where possible.
|
||||||
|
* Minimize dependencies: avoid adding new dependencies unless absolutely necessary.
|
||||||
|
|
||||||
|
## Need help?
|
||||||
|
|
||||||
|
If you need help with anything, feel free to reach out to us on our [Discord server](https://discord.gg/ollama).
|
||||||
131
Dockerfile
131
Dockerfile
@@ -1,7 +1,9 @@
|
|||||||
ARG GOLANG_VERSION=1.22.5
|
ARG GOLANG_VERSION=1.22.5
|
||||||
ARG CMAKE_VERSION=3.22.1
|
ARG CMAKE_VERSION=3.22.1
|
||||||
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
|
ARG CUDA_VERSION_11=11.3.1
|
||||||
ARG CUDA_VERSION=11.3.1
|
ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
|
||||||
|
ARG CUDA_VERSION_12=12.4.0
|
||||||
|
ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
|
||||||
ARG ROCM_VERSION=6.1.2
|
ARG ROCM_VERSION=6.1.2
|
||||||
|
|
||||||
# Copy the minimal context we need to run the generate scripts
|
# Copy the minimal context we need to run the generate scripts
|
||||||
@@ -10,7 +12,7 @@ COPY .git .git
|
|||||||
COPY .gitmodules .gitmodules
|
COPY .gitmodules .gitmodules
|
||||||
COPY llm llm
|
COPY llm llm
|
||||||
|
|
||||||
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
|
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_11-devel-centos7 AS cuda-11-build-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
@@ -18,9 +20,34 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
|||||||
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
ARG CUDA_V11_ARCHITECTURES
|
||||||
|
ENV GOARCH amd64
|
||||||
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
OLLAMA_SKIP_STATIC_GENERATE=1 \
|
||||||
|
OLLAMA_SKIP_CPU_GENERATE=1 \
|
||||||
|
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
|
||||||
|
CUDA_VARIANT="_v11" \
|
||||||
|
bash gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
|
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64
|
||||||
|
ARG CMAKE_VERSION
|
||||||
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
|
ARG CGO_CFLAGS
|
||||||
|
ARG CUDA_V12_ARCHITECTURES
|
||||||
|
ENV GOARCH amd64
|
||||||
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
OLLAMA_SKIP_STATIC_GENERATE=1 \
|
||||||
|
OLLAMA_SKIP_CPU_GENERATE=1 \
|
||||||
|
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
|
||||||
|
CUDA_VARIANT="_v12" \
|
||||||
|
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
|
||||||
|
bash gen_linux.sh
|
||||||
|
|
||||||
|
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
@@ -28,7 +55,32 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
|||||||
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
ARG CUDA_V11_ARCHITECTURES
|
||||||
|
ENV GOARCH arm64
|
||||||
|
RUN OLLAMA_SKIP_STATIC_GENERATE=1 \
|
||||||
|
OLLAMA_SKIP_CPU_GENERATE=1 \
|
||||||
|
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
|
||||||
|
CUDA_VARIANT="_v11" \
|
||||||
|
bash gen_linux.sh
|
||||||
|
|
||||||
|
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
|
||||||
|
ARG CMAKE_VERSION
|
||||||
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
|
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
||||||
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
|
ARG CGO_CFLAGS
|
||||||
|
ARG CUDA_V12_ARCHITECTURES
|
||||||
|
ENV GOARCH arm64
|
||||||
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
OLLAMA_SKIP_STATIC_GENERATE=1 \
|
||||||
|
OLLAMA_SKIP_CPU_GENERATE=1 \
|
||||||
|
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
|
||||||
|
CUDA_VARIANT="_v12" \
|
||||||
|
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
|
||||||
|
bash gen_linux.sh
|
||||||
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
|
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
@@ -40,15 +92,11 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
|||||||
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
ARG AMDGPU_TARGETS
|
ARG AMDGPU_TARGETS
|
||||||
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
ENV GOARCH amd64
|
||||||
RUN mkdir /tmp/scratch && \
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
for dep in $(zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm*/bin/deps.txt.gz) ; do \
|
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
|
||||||
cp ${dep} /tmp/scratch/ || exit 1 ; \
|
RUN mkdir -p ../../dist/linux-amd64-rocm/lib/ollama && \
|
||||||
done && \
|
(cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64-rocm/lib/ollama && tar xf - )
|
||||||
(cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
|
|
||||||
mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \
|
|
||||||
(cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . )
|
|
||||||
|
|
||||||
|
|
||||||
FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
|
FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
@@ -59,16 +107,21 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
|||||||
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
ARG OLLAMA_CUSTOM_CPU_DEFS
|
ARG OLLAMA_CUSTOM_CPU_DEFS
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
|
ENV GOARCH amd64
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
|
|
||||||
FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
|
||||||
RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
OLLAMA_CPU_TARGET="static" bash gen_linux.sh
|
||||||
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
|
||||||
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
|
||||||
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
|
||||||
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" bash gen_linux.sh
|
||||||
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
|
||||||
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" bash gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
|
FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
@@ -79,12 +132,15 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
|||||||
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
|
||||||
ARG OLLAMA_CUSTOM_CPU_DEFS
|
ARG OLLAMA_CUSTOM_CPU_DEFS
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
|
ENV GOARCH arm64
|
||||||
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
|
||||||
|
|
||||||
FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
|
FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
|
||||||
RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
OLLAMA_CPU_TARGET="static" bash gen_linux.sh
|
||||||
FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
|
FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
|
||||||
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
|
||||||
|
|
||||||
|
|
||||||
# Intermediate stage used for ./scripts/build_linux.sh
|
# Intermediate stage used for ./scripts/build_linux.sh
|
||||||
@@ -95,12 +151,16 @@ COPY . .
|
|||||||
COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||||
|
COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
|
COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||||
|
COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
|
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||||
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
|
|
||||||
ARG GOFLAGS
|
ARG GOFLAGS
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
RUN go build -trimpath .
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
go build -trimpath -o dist/linux-amd64/bin/ollama .
|
||||||
|
|
||||||
# Intermediate stage used for ./scripts/build_linux.sh
|
# Intermediate stage used for ./scripts/build_linux.sh
|
||||||
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
|
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
|
||||||
@@ -109,23 +169,36 @@ ARG GOLANG_VERSION
|
|||||||
WORKDIR /go/src/github.com/ollama/ollama
|
WORKDIR /go/src/github.com/ollama/ollama
|
||||||
COPY . .
|
COPY . .
|
||||||
COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||||
|
COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
|
COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
||||||
|
COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
|
||||||
ARG GOFLAGS
|
ARG GOFLAGS
|
||||||
ARG CGO_CFLAGS
|
ARG CGO_CFLAGS
|
||||||
RUN go build -trimpath .
|
RUN --mount=type=cache,target=/root/.ccache \
|
||||||
|
go build -trimpath -o dist/linux-arm64/bin/ollama .
|
||||||
|
|
||||||
|
# Strip out ROCm dependencies to keep the primary image lean
|
||||||
|
FROM --platform=linux/amd64 ubuntu:22.04 as amd64-libs-without-rocm
|
||||||
|
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/
|
||||||
|
RUN cd /scratch/ollama/ && rm -rf rocblas libamd* libdrm* libroc* libhip* libhsa*
|
||||||
|
|
||||||
# Runtime stages
|
# Runtime stages
|
||||||
FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
|
FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
|
||||||
|
COPY --from=amd64-libs-without-rocm /scratch/ /lib/
|
||||||
RUN apt-get update && apt-get install -y ca-certificates
|
RUN apt-get update && apt-get install -y ca-certificates
|
||||||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
|
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
|
||||||
|
|
||||||
FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
|
FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
|
||||||
|
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
|
||||||
RUN apt-get update && apt-get install -y ca-certificates
|
RUN apt-get update && apt-get install -y ca-certificates
|
||||||
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
|
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
|
||||||
|
|
||||||
# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
|
# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
|
||||||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
|
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
|
||||||
RUN update-pciids
|
RUN update-pciids
|
||||||
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
|
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
|
||||||
|
RUN ln -s /opt/rocm/lib /lib/ollama
|
||||||
EXPOSE 11434
|
EXPOSE 11434
|
||||||
ENV OLLAMA_HOST 0.0.0.0
|
ENV OLLAMA_HOST 0.0.0.0
|
||||||
|
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ Here are some example models that can be downloaded:
|
|||||||
| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` |
|
| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` |
|
||||||
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
|
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
|
||||||
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
|
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
|
||||||
|
| Gemma 2 | 2B | 1.6GB | `ollama run gemma2:2b` |
|
||||||
| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
|
| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
|
||||||
| Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
|
| Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
|
||||||
| Mistral | 7B | 4.1GB | `ollama run mistral` |
|
| Mistral | 7B | 4.1GB | `ollama run mistral` |
|
||||||
@@ -299,6 +300,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [AI Studio](https://github.com/MindWorkAI/AI-Studio)
|
- [AI Studio](https://github.com/MindWorkAI/AI-Studio)
|
||||||
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
|
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
|
||||||
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
|
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
|
||||||
|
- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
|
||||||
|
- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
|
||||||
|
|
||||||
### Terminal
|
### Terminal
|
||||||
|
|
||||||
@@ -322,6 +325,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [tlm](https://github.com/yusufcanb/tlm)
|
- [tlm](https://github.com/yusufcanb/tlm)
|
||||||
- [podman-ollama](https://github.com/ericcurtin/podman-ollama)
|
- [podman-ollama](https://github.com/ericcurtin/podman-ollama)
|
||||||
- [gollama](https://github.com/sammcj/gollama)
|
- [gollama](https://github.com/sammcj/gollama)
|
||||||
|
- [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/)
|
||||||
|
|
||||||
### Database
|
### Database
|
||||||
|
|
||||||
@@ -337,6 +341,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
### Libraries
|
### Libraries
|
||||||
|
|
||||||
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
|
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
|
||||||
|
- [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama)
|
||||||
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
|
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
|
||||||
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
|
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
|
||||||
- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
|
- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
|
||||||
|
|||||||
25
SECURITY.md
Normal file
25
SECURITY.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# Security
|
||||||
|
|
||||||
|
The Ollama maintainer team takes security seriously and will actively work to resolve security issues.
|
||||||
|
|
||||||
|
## Reporting a vulnerability
|
||||||
|
|
||||||
|
If you discover a security vulnerability, please do not open a public issue. Instead, please report it by emailing hello@ollama.com. We ask that you give us sufficient time to investigate and address the vulnerability before disclosing it publicly.
|
||||||
|
|
||||||
|
Please include the following details in your report:
|
||||||
|
- A description of the vulnerability
|
||||||
|
- Steps to reproduce the issue
|
||||||
|
- Your assessment of the potential impact
|
||||||
|
- Any possible mitigations
|
||||||
|
|
||||||
|
## Security best practices
|
||||||
|
|
||||||
|
While the maintainer team does their best to secure Ollama, users are encouraged to implement their own security best practices, such as:
|
||||||
|
|
||||||
|
- Regularly updating to the latest version of Ollama
|
||||||
|
- Securing access to hosted instances of Ollama
|
||||||
|
- Monitoring systems for unusual activity
|
||||||
|
|
||||||
|
## Contact
|
||||||
|
|
||||||
|
For any other questions or concerns related to security, please contact us at hello@ollama.com
|
||||||
@@ -18,9 +18,9 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"runtime"
|
"runtime"
|
||||||
@@ -63,13 +63,8 @@ func checkError(resp *http.Response, body []byte) error {
|
|||||||
// If the variable is not specified, a default ollama host and port will be
|
// If the variable is not specified, a default ollama host and port will be
|
||||||
// used.
|
// used.
|
||||||
func ClientFromEnvironment() (*Client, error) {
|
func ClientFromEnvironment() (*Client, error) {
|
||||||
ollamaHost := envconfig.Host
|
|
||||||
|
|
||||||
return &Client{
|
return &Client{
|
||||||
base: &url.URL{
|
base: envconfig.Host(),
|
||||||
Scheme: ollamaHost.Scheme,
|
|
||||||
Host: net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
|
|
||||||
},
|
|
||||||
http: http.DefaultClient,
|
http: http.DefaultClient,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
@@ -178,7 +173,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
|
|||||||
}
|
}
|
||||||
|
|
||||||
if errorResponse.Error != "" {
|
if errorResponse.Error != "" {
|
||||||
return fmt.Errorf(errorResponse.Error)
|
return errors.New(errorResponse.Error)
|
||||||
}
|
}
|
||||||
|
|
||||||
if response.StatusCode >= http.StatusBadRequest {
|
if response.StatusCode >= http.StatusBadRequest {
|
||||||
@@ -303,7 +298,7 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
|
|||||||
return &lr, nil
|
return &lr, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// List running models.
|
// ListRunning lists running models.
|
||||||
func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
|
func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
|
||||||
var lr ProcessResponse
|
var lr ProcessResponse
|
||||||
if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
|
if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
|
||||||
@@ -338,7 +333,7 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err
|
|||||||
return &resp, nil
|
return &resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hearbeat checks if the server has started and is responsive; if yes, it
|
// Heartbeat checks if the server has started and is responsive; if yes, it
|
||||||
// returns nil, otherwise an error.
|
// returns nil, otherwise an error.
|
||||||
func (c *Client) Heartbeat(ctx context.Context) error {
|
func (c *Client) Heartbeat(ctx context.Context) error {
|
||||||
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
|
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
|
||||||
|
|||||||
@@ -2,8 +2,6 @@ package api
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/ollama/ollama/envconfig"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestClientFromEnvironment(t *testing.T) {
|
func TestClientFromEnvironment(t *testing.T) {
|
||||||
@@ -33,7 +31,6 @@ func TestClientFromEnvironment(t *testing.T) {
|
|||||||
for k, v := range testCases {
|
for k, v := range testCases {
|
||||||
t.Run(k, func(t *testing.T) {
|
t.Run(k, func(t *testing.T) {
|
||||||
t.Setenv("OLLAMA_HOST", v.value)
|
t.Setenv("OLLAMA_HOST", v.value)
|
||||||
envconfig.LoadConfig()
|
|
||||||
|
|
||||||
client, err := ClientFromEnvironment()
|
client, err := ClientFromEnvironment()
|
||||||
if err != v.err {
|
if err != v.err {
|
||||||
|
|||||||
@@ -231,7 +231,6 @@ type Options struct {
|
|||||||
|
|
||||||
// Runner options which must be set when the model is loaded into memory
|
// Runner options which must be set when the model is loaded into memory
|
||||||
type Runner struct {
|
type Runner struct {
|
||||||
UseNUMA bool `json:"numa,omitempty"`
|
|
||||||
NumCtx int `json:"num_ctx,omitempty"`
|
NumCtx int `json:"num_ctx,omitempty"`
|
||||||
NumBatch int `json:"num_batch,omitempty"`
|
NumBatch int `json:"num_batch,omitempty"`
|
||||||
NumGPU int `json:"num_gpu,omitempty"`
|
NumGPU int `json:"num_gpu,omitempty"`
|
||||||
@@ -505,7 +504,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
|
|||||||
for key, val := range m {
|
for key, val := range m {
|
||||||
opt, ok := jsonOpts[key]
|
opt, ok := jsonOpts[key]
|
||||||
if !ok {
|
if !ok {
|
||||||
slog.Warn("invalid option provided", "option", opt.Name)
|
slog.Warn("invalid option provided", "option", key)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -615,7 +614,6 @@ func DefaultOptions() Options {
|
|||||||
F16KV: true,
|
F16KV: true,
|
||||||
UseMLock: false,
|
UseMLock: false,
|
||||||
UseMMap: nil,
|
UseMMap: nil,
|
||||||
UseNUMA: false,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package api
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"errors"
|
||||||
"math"
|
"math"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
@@ -192,7 +192,7 @@ func TestUseMmapFormatParams(t *testing.T) {
|
|||||||
"use_mmap": {"foo"},
|
"use_mmap": {"foo"},
|
||||||
},
|
},
|
||||||
exp: nil,
|
exp: nil,
|
||||||
err: fmt.Errorf("invalid bool value [foo]"),
|
err: errors.New("invalid bool value [foo]"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,8 +2,8 @@
|
|||||||
|
|
||||||
package lifecycle
|
package lifecycle
|
||||||
|
|
||||||
import "fmt"
|
import "errors"
|
||||||
|
|
||||||
func GetStarted() error {
|
func GetStarted() error {
|
||||||
return fmt.Errorf("GetStarted not implemented")
|
return errors.New("not implemented")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ func GetStarted() error {
|
|||||||
Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
|
Sys: &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
|
||||||
}
|
}
|
||||||
proc, err := os.StartProcess(args[0], args, attrs)
|
proc, err := os.StartProcess(args[0], args, attrs)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to start getting started shell %w", err)
|
return fmt.Errorf("unable to start getting started shell %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import (
|
|||||||
func InitLogging() {
|
func InitLogging() {
|
||||||
level := slog.LevelInfo
|
level := slog.LevelInfo
|
||||||
|
|
||||||
if envconfig.Debug {
|
if envconfig.Debug() {
|
||||||
level = slog.LevelDebug
|
level = slog.LevelDebug
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -27,7 +27,7 @@ func InitLogging() {
|
|||||||
// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
|
// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
|
||||||
} else {
|
} else {
|
||||||
rotateLogs(AppLogFile)
|
rotateLogs(AppLogFile)
|
||||||
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Error(fmt.Sprintf("failed to create server log %v", err))
|
slog.Error(fmt.Sprintf("failed to create server log %v", err))
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -5,5 +5,5 @@ package lifecycle
|
|||||||
import "log/slog"
|
import "log/slog"
|
||||||
|
|
||||||
func ShowLogs() {
|
func ShowLogs() {
|
||||||
slog.Warn("ShowLogs not yet implemented")
|
slog.Warn("not implemented")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ func TestRotateLogs(t *testing.T) {
|
|||||||
// No log exists
|
// No log exists
|
||||||
rotateLogs(logFile)
|
rotateLogs(logFile)
|
||||||
|
|
||||||
require.NoError(t, os.WriteFile(logFile, []byte("1"), 0644))
|
require.NoError(t, os.WriteFile(logFile, []byte("1"), 0o644))
|
||||||
assert.FileExists(t, logFile)
|
assert.FileExists(t, logFile)
|
||||||
// First rotation
|
// First rotation
|
||||||
rotateLogs(logFile)
|
rotateLogs(logFile)
|
||||||
@@ -32,7 +32,7 @@ func TestRotateLogs(t *testing.T) {
|
|||||||
assert.NoFileExists(t, logFile)
|
assert.NoFileExists(t, logFile)
|
||||||
|
|
||||||
for i := 2; i <= LogRotationCount+1; i++ {
|
for i := 2; i <= LogRotationCount+1; i++ {
|
||||||
require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0644))
|
require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0o644))
|
||||||
assert.FileExists(t, logFile)
|
assert.FileExists(t, logFile)
|
||||||
rotateLogs(logFile)
|
rotateLogs(logFile)
|
||||||
assert.NoFileExists(t, logFile)
|
assert.NoFileExists(t, logFile)
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
rotateLogs(ServerLogFile)
|
rotateLogs(ServerLogFile)
|
||||||
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
|
logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create server log: %w", err)
|
return nil, fmt.Errorf("failed to create server log: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import (
|
|||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -46,7 +47,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
|
|||||||
query.Add("os", runtime.GOOS)
|
query.Add("os", runtime.GOOS)
|
||||||
query.Add("arch", runtime.GOARCH)
|
query.Add("arch", runtime.GOARCH)
|
||||||
query.Add("version", version.Version)
|
query.Add("version", version.Version)
|
||||||
query.Add("ts", fmt.Sprintf("%d", time.Now().Unix()))
|
query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))
|
||||||
|
|
||||||
nonce, err := auth.NewNonce(rand.Reader, 16)
|
nonce, err := auth.NewNonce(rand.Reader, 16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -4,9 +4,9 @@ package lifecycle
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"errors"
|
||||||
)
|
)
|
||||||
|
|
||||||
func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
||||||
return fmt.Errorf("DoUpgrade not yet implemented")
|
return errors.New("not implemented")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package lifecycle
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
@@ -15,7 +16,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
|||||||
return fmt.Errorf("failed to lookup downloads: %s", err)
|
return fmt.Errorf("failed to lookup downloads: %s", err)
|
||||||
}
|
}
|
||||||
if len(files) == 0 {
|
if len(files) == 0 {
|
||||||
return fmt.Errorf("no update downloads found")
|
return errors.New("no update downloads found")
|
||||||
} else if len(files) > 1 {
|
} else if len(files) > 1 {
|
||||||
// Shouldn't happen
|
// Shouldn't happen
|
||||||
slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
|
slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
|
||||||
@@ -64,7 +65,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// TODO - some details about why it didn't start, or is this a pedantic error case?
|
// TODO - some details about why it didn't start, or is this a pedantic error case?
|
||||||
return fmt.Errorf("installer process did not start")
|
return errors.New("installer process did not start")
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
|
// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
|
||||||
|
|||||||
@@ -87,20 +87,11 @@ DialogFontSize=12
|
|||||||
|
|
||||||
[Files]
|
[Files]
|
||||||
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
|
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
|
||||||
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
Source: "..\ollama.exe"; DestDir: "{app}\bin"; Flags: ignoreversion 64bit
|
||||||
Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
|
Source: "..\dist\windows-{#ARCH}\lib\ollama\runners\*"; DestDir: "{app}\lib\ollama\runners"; Flags: ignoreversion 64bit recursesubdirs
|
||||||
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
|
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
|
||||||
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
|
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
|
||||||
#if DirExists("..\dist\windows-amd64\cuda")
|
Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Flags: ignoreversion recursesubdirs
|
||||||
Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs
|
|
||||||
#endif
|
|
||||||
#if DirExists("..\dist\windows-amd64\oneapi")
|
|
||||||
Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs
|
|
||||||
#endif
|
|
||||||
#if DirExists("..\dist\windows-amd64\rocm")
|
|
||||||
Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
[Icons]
|
[Icons]
|
||||||
Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
|
Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
|
||||||
@@ -108,7 +99,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
|
|||||||
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
|
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
|
||||||
|
|
||||||
[Run]
|
[Run]
|
||||||
Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
|
Filename: "{cmd}"; Parameters: "/C set PATH={app}\bin;%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
|
||||||
|
|
||||||
[UninstallRun]
|
[UninstallRun]
|
||||||
; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
|
; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
|
||||||
@@ -143,8 +134,8 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
|
|||||||
|
|
||||||
[Registry]
|
[Registry]
|
||||||
Root: HKCU; Subkey: "Environment"; \
|
Root: HKCU; Subkey: "Environment"; \
|
||||||
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \
|
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}\bin"; \
|
||||||
Check: NeedsAddPath('{app}')
|
Check: NeedsAddPath('{app}\bin')
|
||||||
|
|
||||||
[Code]
|
[Code]
|
||||||
|
|
||||||
|
|||||||
@@ -3,11 +3,11 @@
|
|||||||
package tray
|
package tray
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"errors"
|
||||||
|
|
||||||
"github.com/ollama/ollama/app/tray/commontray"
|
"github.com/ollama/ollama/app/tray/commontray"
|
||||||
)
|
)
|
||||||
|
|
||||||
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
|
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
|
||||||
return nil, fmt.Errorf("NOT IMPLEMENTED YET")
|
return nil, errors.New("not implemented")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,9 +11,7 @@ import (
|
|||||||
"golang.org/x/sys/windows"
|
"golang.org/x/sys/windows"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var quitOnce sync.Once
|
||||||
quitOnce sync.Once
|
|
||||||
)
|
|
||||||
|
|
||||||
func (t *winTray) Run() {
|
func (t *winTray) Run() {
|
||||||
nativeLoop()
|
nativeLoop()
|
||||||
|
|||||||
@@ -11,12 +11,12 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
updatAvailableMenuID = 1
|
updateAvailableMenuID = 1
|
||||||
updateMenuID = updatAvailableMenuID + 1
|
updateMenuID = updateAvailableMenuID + 1
|
||||||
separatorMenuID = updateMenuID + 1
|
separatorMenuID = updateMenuID + 1
|
||||||
diagLogsMenuID = separatorMenuID + 1
|
diagLogsMenuID = separatorMenuID + 1
|
||||||
diagSeparatorMenuID = diagLogsMenuID + 1
|
diagSeparatorMenuID = diagLogsMenuID + 1
|
||||||
quitMenuID = diagSeparatorMenuID + 1
|
quitMenuID = diagSeparatorMenuID + 1
|
||||||
)
|
)
|
||||||
|
|
||||||
func (t *winTray) initMenus() error {
|
func (t *winTray) initMenus() error {
|
||||||
@@ -35,7 +35,7 @@ func (t *winTray) initMenus() error {
|
|||||||
func (t *winTray) UpdateAvailable(ver string) error {
|
func (t *winTray) UpdateAvailable(ver string) error {
|
||||||
if !t.updateNotified {
|
if !t.updateNotified {
|
||||||
slog.Debug("updating menu and sending notification for new update")
|
slog.Debug("updating menu and sending notification for new update")
|
||||||
if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
|
if err := t.addOrUpdateMenuItem(updateAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
|
||||||
return fmt.Errorf("unable to create menu entries %w", err)
|
return fmt.Errorf("unable to create menu entries %w", err)
|
||||||
}
|
}
|
||||||
if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
|
if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
|
||||||
|
|||||||
@@ -11,10 +11,12 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
|
"syscall"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/ollama/ollama/app/tray/commontray"
|
|
||||||
"golang.org/x/sys/windows"
|
"golang.org/x/sys/windows"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/app/tray/commontray"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
|
// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
|
||||||
@@ -414,7 +416,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
|
|||||||
iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
|
iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)
|
||||||
|
|
||||||
if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
|
if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
|
||||||
if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil {
|
if err := os.WriteFile(iconFilePath, iconBytes, 0o644); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -432,7 +434,12 @@ func (t *winTray) setIcon(src string) error {
|
|||||||
t.muNID.Lock()
|
t.muNID.Lock()
|
||||||
defer t.muNID.Unlock()
|
defer t.muNID.Unlock()
|
||||||
t.nid.Icon = h
|
t.nid.Icon = h
|
||||||
t.nid.Flags |= NIF_ICON
|
t.nid.Flags |= NIF_ICON | NIF_TIP
|
||||||
|
if toolTipUTF16, err := syscall.UTF16FromString(commontray.ToolTip); err == nil {
|
||||||
|
copy(t.nid.Tip[:], toolTipUTF16)
|
||||||
|
} else {
|
||||||
|
return err
|
||||||
|
}
|
||||||
t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
|
t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
|
||||||
|
|
||||||
return t.nid.modify()
|
return t.nid.modify()
|
||||||
|
|||||||
@@ -61,6 +61,7 @@ const (
|
|||||||
MIIM_SUBMENU = 0x00000004
|
MIIM_SUBMENU = 0x00000004
|
||||||
MIM_APPLYTOSUBMENUS = 0x80000000
|
MIM_APPLYTOSUBMENUS = 0x80000000
|
||||||
NIF_ICON = 0x00000002
|
NIF_ICON = 0x00000002
|
||||||
|
NIF_TIP = 0x00000004
|
||||||
NIF_INFO = 0x00000010
|
NIF_INFO = 0x00000010
|
||||||
NIF_MESSAGE = 0x00000001
|
NIF_MESSAGE = 0x00000001
|
||||||
SW_HIDE = 0
|
SW_HIDE = 0
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
@@ -78,7 +79,7 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
|
|||||||
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
|
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
|
||||||
parts := bytes.Split(publicKey, []byte(" "))
|
parts := bytes.Split(publicKey, []byte(" "))
|
||||||
if len(parts) < 2 {
|
if len(parts) < 2 {
|
||||||
return "", fmt.Errorf("malformed public key")
|
return "", errors.New("malformed public key")
|
||||||
}
|
}
|
||||||
|
|
||||||
signedData, err := privateKey.Sign(rand.Reader, bts)
|
signedData, err := privateKey.Sign(rand.Reader, bts)
|
||||||
|
|||||||
87
cmd/cmd.go
87
cmd/cmd.go
@@ -22,6 +22,7 @@ import (
|
|||||||
"runtime"
|
"runtime"
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync/atomic"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -78,6 +79,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
status := "transferring model data"
|
status := "transferring model data"
|
||||||
spinner := progress.NewSpinner(status)
|
spinner := progress.NewSpinner(status)
|
||||||
p.Add(status, spinner)
|
p.Add(status, spinner)
|
||||||
|
defer p.Stop()
|
||||||
|
|
||||||
for i := range modelfile.Commands {
|
for i := range modelfile.Commands {
|
||||||
switch modelfile.Commands[i].Name {
|
switch modelfile.Commands[i].Name {
|
||||||
@@ -112,7 +114,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
path = tempfile
|
path = tempfile
|
||||||
}
|
}
|
||||||
|
|
||||||
digest, err := createBlob(cmd, client, path)
|
digest, err := createBlob(cmd, client, path, spinner)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -202,6 +204,12 @@ func tempZipFiles(path string) (string, error) {
|
|||||||
// safetensors files might be unresolved git lfs references; skip if they are
|
// safetensors files might be unresolved git lfs references; skip if they are
|
||||||
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
|
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
|
||||||
files = append(files, st...)
|
files = append(files, st...)
|
||||||
|
} else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 {
|
||||||
|
// covers adapters.safetensors
|
||||||
|
files = append(files, st...)
|
||||||
|
} else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 {
|
||||||
|
// covers adapter_model.safetensors
|
||||||
|
files = append(files, st...)
|
||||||
} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
|
} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
|
||||||
// pytorch files might also be unresolved git lfs references; skip if they are
|
// pytorch files might also be unresolved git lfs references; skip if they are
|
||||||
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
|
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
|
||||||
@@ -221,6 +229,14 @@ func tempZipFiles(path string) (string, error) {
|
|||||||
}
|
}
|
||||||
files = append(files, js...)
|
files = append(files, js...)
|
||||||
|
|
||||||
|
// bert models require a nested config.json
|
||||||
|
// TODO(mxyng): merge this with the glob above
|
||||||
|
js, err = glob(filepath.Join(path, "**/*.json"), "text/plain")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
files = append(files, js...)
|
||||||
|
|
||||||
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
|
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
|
||||||
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
|
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
|
||||||
// tokenizer.model might be a unresolved git lfs reference; error if it is
|
// tokenizer.model might be a unresolved git lfs reference; error if it is
|
||||||
@@ -250,6 +266,11 @@ func tempZipFiles(path string) (string, error) {
|
|||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
zfi.Name, err = filepath.Rel(path, file)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
zf, err := zipfile.CreateHeader(zfi)
|
zf, err := zipfile.CreateHeader(zfi)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
@@ -263,13 +284,20 @@ func tempZipFiles(path string) (string, error) {
|
|||||||
return tempfile.Name(), nil
|
return tempfile.Name(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
|
func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *progress.Spinner) (string, error) {
|
||||||
bin, err := os.Open(path)
|
bin, err := os.Open(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
defer bin.Close()
|
defer bin.Close()
|
||||||
|
|
||||||
|
// Get file info to retrieve the size
|
||||||
|
fileInfo, err := bin.Stat()
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
fileSize := fileInfo.Size()
|
||||||
|
|
||||||
hash := sha256.New()
|
hash := sha256.New()
|
||||||
if _, err := io.Copy(hash, bin); err != nil {
|
if _, err := io.Copy(hash, bin); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
@@ -279,13 +307,43 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er
|
|||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var pw progressWriter
|
||||||
|
status := "transferring model data 0%"
|
||||||
|
spinner.SetMessage(status)
|
||||||
|
|
||||||
|
done := make(chan struct{})
|
||||||
|
defer close(done)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
ticker := time.NewTicker(60 * time.Millisecond)
|
||||||
|
defer ticker.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
spinner.SetMessage(fmt.Sprintf("transferring model data %d%%", int(100*pw.n.Load()/fileSize)))
|
||||||
|
case <-done:
|
||||||
|
spinner.SetMessage("transferring model data 100%")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
|
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
|
||||||
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
|
if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
return digest, nil
|
return digest, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type progressWriter struct {
|
||||||
|
n atomic.Int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *progressWriter) Write(p []byte) (n int, err error) {
|
||||||
|
w.n.Add(int64(len(p)))
|
||||||
|
return len(p), nil
|
||||||
|
}
|
||||||
|
|
||||||
func RunHandler(cmd *cobra.Command, args []string) error {
|
func RunHandler(cmd *cobra.Command, args []string) error {
|
||||||
interactive := true
|
interactive := true
|
||||||
|
|
||||||
@@ -362,9 +420,24 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
|||||||
|
|
||||||
opts.MultiModal = slices.Contains(info.Details.Families, "clip")
|
opts.MultiModal = slices.Contains(info.Details.Families, "clip")
|
||||||
opts.ParentModel = info.Details.ParentModel
|
opts.ParentModel = info.Details.ParentModel
|
||||||
opts.Messages = append(opts.Messages, info.Messages...)
|
|
||||||
|
|
||||||
if interactive {
|
if interactive {
|
||||||
|
if err := loadModel(cmd, &opts); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, msg := range info.Messages {
|
||||||
|
switch msg.Role {
|
||||||
|
case "user":
|
||||||
|
fmt.Printf(">>> %s\n", msg.Content)
|
||||||
|
case "assistant":
|
||||||
|
state := &displayResponseState{}
|
||||||
|
displayResponse(msg.Content, opts.WordWrap, state)
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Println()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return generateInteractive(cmd, opts)
|
return generateInteractive(cmd, opts)
|
||||||
}
|
}
|
||||||
return generate(cmd, opts)
|
return generate(cmd, opts)
|
||||||
@@ -1071,12 +1144,12 @@ func generate(cmd *cobra.Command, opts runOptions) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func RunServer(cmd *cobra.Command, _ []string) error {
|
func RunServer(_ *cobra.Command, _ []string) error {
|
||||||
if err := initializeKeypair(); err != nil {
|
if err := initializeKeypair(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
ln, err := net.Listen("tcp", net.JoinHostPort(envconfig.Host.Host, envconfig.Host.Port))
|
ln, err := net.Listen("tcp", envconfig.Host().Host)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -1145,7 +1218,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := startApp(cmd.Context(), client); err != nil {
|
if err := startApp(cmd.Context(), client); err != nil {
|
||||||
return fmt.Errorf("could not connect to ollama app, is it running?")
|
return errors.New("could not connect to ollama app, is it running?")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -48,29 +48,10 @@ func loadModel(cmd *cobra.Command, opts *runOptions) error {
|
|||||||
KeepAlive: opts.KeepAlive,
|
KeepAlive: opts.KeepAlive,
|
||||||
}
|
}
|
||||||
|
|
||||||
return client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
|
return client.Chat(cmd.Context(), chatReq, func(api.ChatResponse) error { return nil })
|
||||||
p.StopAndClear()
|
|
||||||
for _, msg := range opts.Messages {
|
|
||||||
switch msg.Role {
|
|
||||||
case "user":
|
|
||||||
fmt.Printf(">>> %s\n", msg.Content)
|
|
||||||
case "assistant":
|
|
||||||
state := &displayResponseState{}
|
|
||||||
displayResponse(msg.Content, opts.WordWrap, state)
|
|
||||||
fmt.Println()
|
|
||||||
fmt.Println()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
||||||
err := loadModel(cmd, &opts)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
usage := func() {
|
usage := func() {
|
||||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||||
fmt.Fprintln(os.Stderr, " /set Set session variables")
|
fmt.Fprintln(os.Stderr, " /set Set session variables")
|
||||||
@@ -160,7 +141,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if envconfig.NoHistory {
|
if envconfig.NoHistory() {
|
||||||
scanner.HistoryDisable()
|
scanner.HistoryDisable()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -623,7 +604,7 @@ func getImageData(filePath string) ([]byte, error) {
|
|||||||
// Check if the file size exceeds 100MB
|
// Check if the file size exceeds 100MB
|
||||||
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
||||||
if info.Size() > maxSize {
|
if info.Size() > maxSize {
|
||||||
return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
|
return nil, errors.New("file size exceeds maximum limit (100MB)")
|
||||||
}
|
}
|
||||||
|
|
||||||
buf = make([]byte, info.Size())
|
buf = make([]byte, info.Size())
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package cmd
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"errors"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -20,7 +20,7 @@ func startApp(ctx context.Context, client *api.Client) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if !strings.Contains(link, "Ollama.app") {
|
if !strings.Contains(link, "Ollama.app") {
|
||||||
return fmt.Errorf("could not find ollama app")
|
return errors.New("could not find ollama app")
|
||||||
}
|
}
|
||||||
path := strings.Split(link, "Ollama.app")
|
path := strings.Split(link, "Ollama.app")
|
||||||
if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
|
if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
|
||||||
|
|||||||
@@ -4,11 +4,11 @@ package cmd
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"errors"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
)
|
)
|
||||||
|
|
||||||
func startApp(ctx context.Context, client *api.Client) error {
|
func startApp(ctx context.Context, client *api.Client) error {
|
||||||
return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it")
|
return errors.New("could not connect to ollama server, run 'ollama serve' to start it")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ func startApp(ctx context.Context, client *api.Client) error {
|
|||||||
// Finally look in the path
|
// Finally look in the path
|
||||||
appExe, err = exec.LookPath(AppName)
|
appExe, err = exec.LookPath(AppName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not locate ollama app")
|
return errors.New("could not locate ollama app")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,200 +1,228 @@
|
|||||||
package convert
|
package convert
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"cmp"
|
|
||||||
"encoding/binary"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"io/fs"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"slices"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"google.golang.org/protobuf/proto"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/convert/sentencepiece"
|
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
type ModelParameters struct {
|
||||||
_ int32 = iota
|
Architectures []string `json:"architectures"`
|
||||||
tokenTypeNormal
|
VocabSize uint32 `json:"vocab_size"`
|
||||||
tokenTypeUnknown
|
|
||||||
tokenTypeControl
|
|
||||||
tokenTypeUserDefined
|
|
||||||
tokenTypeUnused
|
|
||||||
tokenTypeByte
|
|
||||||
)
|
|
||||||
|
|
||||||
type Params struct {
|
|
||||||
Architectures []string `json:"architectures"`
|
|
||||||
VocabSize int `json:"vocab_size"`
|
|
||||||
HiddenSize int `json:"hidden_size"` // n_embd
|
|
||||||
HiddenLayers int `json:"num_hidden_layers"` // n_layer
|
|
||||||
ContextSize int `json:"max_position_embeddings"`
|
|
||||||
IntermediateSize int `json:"intermediate_size"`
|
|
||||||
AttentionHeads int `json:"num_attention_heads"` // n_head
|
|
||||||
KeyValHeads int `json:"num_key_value_heads"`
|
|
||||||
NormEPS float64 `json:"rms_norm_eps"`
|
|
||||||
BoSTokenID int `json:"bos_token_id"`
|
|
||||||
EoSTokenID int `json:"eos_token_id"`
|
|
||||||
HeadDimension int `json:"head_dim"`
|
|
||||||
PaddingTokenID int `json:"pad_token_id"`
|
|
||||||
RopeFrequencyBase float64 `json:"rope_theta"`
|
|
||||||
|
|
||||||
Experts int `json:"num_local_experts"`
|
|
||||||
ExpertsUsed int `json:"num_experts_per_tok"`
|
|
||||||
|
|
||||||
PreTokenizer string
|
|
||||||
|
|
||||||
ByteOrder
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type ByteOrder interface {
|
type AdapterParameters struct {
|
||||||
binary.ByteOrder
|
Alpha uint32 `json:"lora_alpha"`
|
||||||
binary.AppendByteOrder
|
LoraLayers uint32 `json:"lora_layers"`
|
||||||
|
LoraParameters struct {
|
||||||
|
Rank uint32 `json:"rank"`
|
||||||
|
Alpha float32 `json:"alpha"`
|
||||||
|
Scale float32 `json:"scale"`
|
||||||
|
} `json:"lora_parameters"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ModelArch interface {
|
func (ModelParameters) KV(t *Tokenizer) llm.KV {
|
||||||
GetTensors() error
|
kv := llm.KV{
|
||||||
LoadVocab() error
|
"general.file_type": uint32(1),
|
||||||
WriteGGUF(io.WriteSeeker) error
|
"general.quantization_version": uint32(2),
|
||||||
|
"tokenizer.ggml.pre": t.Pre,
|
||||||
|
"tokenizer.ggml.model": t.Vocabulary.Model,
|
||||||
|
"tokenizer.ggml.tokens": t.Vocabulary.Tokens,
|
||||||
|
"tokenizer.ggml.scores": t.Vocabulary.Scores,
|
||||||
|
"tokenizer.ggml.token_type": t.Vocabulary.Types,
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(t.Merges) > 0 {
|
||||||
|
kv["tokenizer.ggml.merges"] = t.Merges
|
||||||
|
}
|
||||||
|
|
||||||
|
if t.Template != "" {
|
||||||
|
kv["tokenizer.chat_template"] = t.Template
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, sv := range t.SpecialVocabulary {
|
||||||
|
kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
|
||||||
|
kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
|
||||||
|
}
|
||||||
|
|
||||||
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
type ModelFormat interface {
|
func (p AdapterParameters) KV() llm.KV {
|
||||||
GetLayerName(string) (string, error)
|
var alpha float32
|
||||||
GetTensors(string, *Params) ([]llm.Tensor, error)
|
if p.LoraParameters.Alpha == 0 {
|
||||||
GetParams(string) (*Params, error)
|
alpha = float32(p.Alpha)
|
||||||
GetModelArch(string, string, *Params) (ModelArch, error)
|
} else {
|
||||||
|
alpha = p.LoraParameters.Alpha
|
||||||
|
}
|
||||||
|
|
||||||
|
kv := llm.KV{
|
||||||
|
"adapter.lora.alpha": alpha,
|
||||||
|
"adapter.type": "lora",
|
||||||
|
"general.file_type": uint32(1),
|
||||||
|
"general.type": "adapter",
|
||||||
|
"general.version": "v0.2",
|
||||||
|
}
|
||||||
|
|
||||||
|
return kv
|
||||||
}
|
}
|
||||||
|
|
||||||
type ModelData struct {
|
func (ModelParameters) specialTokenTypes() []string {
|
||||||
Path string
|
return []string{
|
||||||
Name string
|
"bos", "eos", "unk", "sep", "pad", "cls", "mask",
|
||||||
Params *Params
|
}
|
||||||
Vocab *Vocab
|
|
||||||
Tensors []llm.Tensor
|
|
||||||
Format ModelFormat
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetModelFormat(dirname string) (ModelFormat, error) {
|
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||||
files, err := filepath.Glob(filepath.Join(dirname, "*"))
|
return llm.WriteGGUF(ws, kv, ts)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||||
|
return llm.WriteGGUF(ws, kv, ts)
|
||||||
|
}
|
||||||
|
|
||||||
|
type ModelConverter interface {
|
||||||
|
// KV maps parameters to LLM key-values
|
||||||
|
KV(*Tokenizer) llm.KV
|
||||||
|
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
||||||
|
Tensors([]Tensor) []llm.Tensor
|
||||||
|
// Replacements returns a list of string pairs to replace in tensor names.
|
||||||
|
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||||
|
Replacements() []string
|
||||||
|
|
||||||
|
// specialTokenTypes returns any special token types the model uses
|
||||||
|
specialTokenTypes() []string
|
||||||
|
// writeFile writes the model to the provided io.WriteSeeker
|
||||||
|
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
||||||
|
}
|
||||||
|
|
||||||
|
type moreParser interface {
|
||||||
|
parseMore(fs.FS) error
|
||||||
|
}
|
||||||
|
|
||||||
|
type AdapterConverter interface {
|
||||||
|
// KV maps parameters to LLM key-values
|
||||||
|
KV(llm.KV) llm.KV
|
||||||
|
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
|
||||||
|
Tensors([]Tensor) []llm.Tensor
|
||||||
|
// Replacements returns a list of string pairs to replace in tensor names.
|
||||||
|
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||||
|
Replacements() []string
|
||||||
|
|
||||||
|
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
||||||
|
}
|
||||||
|
|
||||||
|
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
|
||||||
|
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, fn := range files {
|
var p AdapterParameters
|
||||||
if strings.HasSuffix(fn, ".safetensors") {
|
if err := json.Unmarshal(bts, &p); err != nil {
|
||||||
return &SafetensorFormat{}, nil
|
return err
|
||||||
} else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") {
|
|
||||||
slog.Debug("model is torch")
|
|
||||||
return &TorchFormat{}, nil
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, fmt.Errorf("couldn't determine model format")
|
arch, ok := baseKV["general.architecture"]
|
||||||
}
|
if !ok {
|
||||||
|
return errors.New("architecture not set for the base model")
|
||||||
|
}
|
||||||
|
|
||||||
// Details on gguf's tokenizer can be found at:
|
var conv AdapterConverter
|
||||||
// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer
|
switch arch {
|
||||||
type Vocab struct {
|
case "llama":
|
||||||
Tokens []string
|
conv = &llamaAdapter{}
|
||||||
Scores []float32
|
case "gemma2":
|
||||||
Types []int32
|
conv = &gemma2Adapter{}
|
||||||
Merges []string
|
default:
|
||||||
}
|
return errors.New("unsupported architecture")
|
||||||
|
}
|
||||||
|
|
||||||
func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
|
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
|
||||||
slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model")))
|
|
||||||
in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model"))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// To regenerate sentencepiece from the protobufs use:
|
if err := json.Unmarshal(bts, conv); err != nil {
|
||||||
// protoc -I=./ --go_out=./ sentencepiece_model.proto
|
return err
|
||||||
modelProto := &sentencepiece.ModelProto{}
|
|
||||||
if err := proto.Unmarshal(in, modelProto); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
v := &Vocab{
|
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
|
||||||
Tokens: make([]string, 0),
|
}
|
||||||
Scores: make([]float32, 0),
|
|
||||||
Types: make([]int32, 0),
|
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
|
||||||
}
|
// and files it finds in the input path.
|
||||||
|
// Supported input model formats include safetensors.
|
||||||
pieces := modelProto.GetPieces()
|
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
|
||||||
for _, p := range pieces {
|
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
|
||||||
v.Tokens = append(v.Tokens, p.GetPiece())
|
bts, err := fs.ReadFile(fsys, "config.json")
|
||||||
v.Scores = append(v.Scores, p.GetScore())
|
if err != nil {
|
||||||
t := p.GetType()
|
return err
|
||||||
switch t {
|
}
|
||||||
case sentencepiece.ModelProto_SentencePiece_UNKNOWN:
|
|
||||||
case sentencepiece.ModelProto_SentencePiece_CONTROL:
|
var p ModelParameters
|
||||||
case sentencepiece.ModelProto_SentencePiece_UNUSED:
|
if err := json.Unmarshal(bts, &p); err != nil {
|
||||||
case sentencepiece.ModelProto_SentencePiece_BYTE:
|
return err
|
||||||
default:
|
}
|
||||||
t = sentencepiece.ModelProto_SentencePiece_NORMAL
|
|
||||||
}
|
if len(p.Architectures) < 1 {
|
||||||
v.Types = append(v.Types, int32(t))
|
return errors.New("unknown architecture")
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens)))
|
var conv ModelConverter
|
||||||
|
switch p.Architectures[0] {
|
||||||
// add any additional tokens
|
case "LlamaForCausalLM", "MistralForCausalLM":
|
||||||
addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json"))
|
conv = &llamaModel{}
|
||||||
if os.IsNotExist(err) {
|
case "MixtralForCausalLM":
|
||||||
return v, nil
|
conv = &mixtralModel{}
|
||||||
} else if err != nil {
|
case "GemmaForCausalLM":
|
||||||
return nil, err
|
conv = &gemmaModel{}
|
||||||
}
|
case "Gemma2ForCausalLM":
|
||||||
|
conv = &gemma2Model{}
|
||||||
slog.Info("reading user defined tokens")
|
case "Phi3ForCausalLM":
|
||||||
|
conv = &phi3Model{}
|
||||||
var extraTokenData map[string]int
|
case "BertModel":
|
||||||
if err := json.Unmarshal(addIn, &extraTokenData); err != nil {
|
conv = &bertModel{}
|
||||||
return nil, err
|
default:
|
||||||
}
|
return errors.New("unsupported architecture")
|
||||||
|
}
|
||||||
type token struct {
|
|
||||||
key string
|
if err := json.Unmarshal(bts, conv); err != nil {
|
||||||
pos int
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
extraTokens := make([]token, 0)
|
if t, ok := conv.(moreParser); ok {
|
||||||
for k, id := range extraTokenData {
|
if err := t.parseMore(fsys); err != nil {
|
||||||
extraTokens = append(extraTokens, token{k, id})
|
return err
|
||||||
}
|
}
|
||||||
|
}
|
||||||
slices.SortFunc(extraTokens, func(a, b token) int {
|
|
||||||
return cmp.Compare(a.pos, b.pos)
|
t, err := parseTokenizer(fsys, conv.specialTokenTypes())
|
||||||
})
|
if err != nil {
|
||||||
|
return err
|
||||||
numToks := len(v.Tokens)
|
}
|
||||||
|
|
||||||
for cnt, t := range extraTokens {
|
if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) {
|
||||||
// the token id should match the specific index for the total number of tokens
|
slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens))
|
||||||
if t.pos != cnt+numToks {
|
for i := range vocabSize - len(t.Vocabulary.Tokens) {
|
||||||
return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key)
|
t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
|
||||||
}
|
t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
|
||||||
v.Tokens = append(v.Tokens, t.key)
|
t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
|
||||||
v.Scores = append(v.Scores, -1000.0)
|
}
|
||||||
v.Types = append(v.Types, tokenTypeUserDefined)
|
} else {
|
||||||
}
|
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
|
||||||
slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))
|
}
|
||||||
|
|
||||||
if params.VocabSize > len(v.Tokens) {
|
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
|
||||||
missingTokens := params.VocabSize - len(v.Tokens)
|
if err != nil {
|
||||||
slog.Warn(fmt.Sprintf("vocab is missing %d tokens", missingTokens))
|
return err
|
||||||
for cnt := range missingTokens {
|
}
|
||||||
v.Tokens = append(v.Tokens, fmt.Sprintf("<dummy%05d>", cnt+1))
|
|
||||||
v.Scores = append(v.Scores, -1)
|
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
|
||||||
v.Types = append(v.Types, tokenTypeUserDefined)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return v, nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
174
convert/convert_bert.go
Normal file
174
convert/convert_bert.go
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"cmp"
|
||||||
|
"encoding/json"
|
||||||
|
"io/fs"
|
||||||
|
"path/filepath"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type bertModel struct {
|
||||||
|
ModelParameters
|
||||||
|
NLayers uint32 `json:"n_layers"`
|
||||||
|
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||||
|
NLayer uint32 `json:"n_layer"`
|
||||||
|
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||||
|
NCtx uint32 `json:"n_ctx"`
|
||||||
|
HiddenSize uint32 `json:"hidden_size"`
|
||||||
|
NEmbd uint32 `json:"n_embd"`
|
||||||
|
IntermediateSize uint32 `json:"intermediate_size"`
|
||||||
|
NInner uint32 `json:"n_inner"`
|
||||||
|
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||||
|
NHead uint32 `json:"n_head"`
|
||||||
|
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||||
|
LayerNormEPS float32 `json:"layer_norm_eps"`
|
||||||
|
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
|
||||||
|
NormEpsilon float32 `json:"norm_epsilon"`
|
||||||
|
|
||||||
|
PoolingType uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
_ ModelConverter = (*bertModel)(nil)
|
||||||
|
_ moreParser = (*bertModel)(nil)
|
||||||
|
)
|
||||||
|
|
||||||
|
func (p *bertModel) parseMore(fsys fs.FS) error {
|
||||||
|
bts, err := fs.ReadFile(fsys, "modules.json")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var modules []struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Path string `json:"path"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.Unmarshal(bts, &modules); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var pooling string
|
||||||
|
for _, m := range modules {
|
||||||
|
if m.Type == "sentence_transformers.models.Pooling" {
|
||||||
|
pooling = m.Path
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if pooling != "" {
|
||||||
|
bts, err := fs.ReadFile(fsys, filepath.Join(pooling, "config.json"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var pc struct {
|
||||||
|
PoolingModeCLSToken bool `json:"pooling_mode_cls_token"`
|
||||||
|
PoolingModeMeanTokens bool `json:"pooling_mode_mean_tokens"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.Unmarshal(bts, &pc); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if pc.PoolingModeMeanTokens {
|
||||||
|
p.PoolingType = 1
|
||||||
|
} else if pc.PoolingModeCLSToken {
|
||||||
|
p.PoolingType = 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *bertModel) KV(t *Tokenizer) llm.KV {
|
||||||
|
kv := p.ModelParameters.KV(t)
|
||||||
|
kv["general.architecture"] = "bert"
|
||||||
|
kv["bert.attention.causal"] = false
|
||||||
|
kv["bert.pooling_type"] = p.PoolingType
|
||||||
|
|
||||||
|
kv["bert.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
|
||||||
|
|
||||||
|
if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
|
||||||
|
kv["bert.context_length"] = contextLength
|
||||||
|
}
|
||||||
|
|
||||||
|
if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
|
||||||
|
kv["bert.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
|
||||||
|
kv["bert.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
|
||||||
|
}
|
||||||
|
|
||||||
|
if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
|
||||||
|
kv["bert.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
|
||||||
|
}
|
||||||
|
|
||||||
|
if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
|
||||||
|
kv["bert.attention.layer_norm_epsilon"] = layerNormEpsilon
|
||||||
|
}
|
||||||
|
|
||||||
|
kv["tokenizer.ggml.model"] = "bert"
|
||||||
|
kv["tokenizer.ggml.token_type_count"] = uint32(2)
|
||||||
|
|
||||||
|
// convert to phantom space tokens
|
||||||
|
for i, e := range t.Tokens {
|
||||||
|
if strings.HasPrefix(e, "[") && strings.HasSuffix(e, "]") {
|
||||||
|
// noop
|
||||||
|
} else if strings.HasPrefix(e, "##") {
|
||||||
|
t.Tokens[i] = e[2:]
|
||||||
|
} else {
|
||||||
|
t.Tokens[i] = "\u2581" + e
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
kv["tokenizer.ggml.tokens"] = t.Tokens
|
||||||
|
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
|
var out []llm.Tensor
|
||||||
|
for _, t := range ts {
|
||||||
|
if slices.Contains([]string{
|
||||||
|
"embeddings.position_ids",
|
||||||
|
"pooler.dense.weight",
|
||||||
|
"pooler.dense.bias",
|
||||||
|
}, t.Name()) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
out = append(out, llm.Tensor{
|
||||||
|
Name: t.Name(),
|
||||||
|
Kind: t.Kind(),
|
||||||
|
Shape: t.Shape(),
|
||||||
|
WriterTo: t,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bertModel) Replacements() []string {
|
||||||
|
return []string{
|
||||||
|
"encoder.layer", "blk",
|
||||||
|
"encoder.layers", "blk",
|
||||||
|
"embeddings.word_embeddings", "token_embd",
|
||||||
|
"embeddings.token_type_embeddings", "token_types",
|
||||||
|
"embeddings.LayerNorm", "token_embd_norm",
|
||||||
|
"embeddings.position_embeddings", "position_embd",
|
||||||
|
"attention.self.query", "attn_q",
|
||||||
|
"attention.self.key", "attn_k",
|
||||||
|
"attention.self.value", "attn_v",
|
||||||
|
"attention.output.dense", "attn_output",
|
||||||
|
"attention.output.LayerNorm", "attn_output_norm",
|
||||||
|
"intermediate.dense", "ffn_up",
|
||||||
|
"output.dense", "ffn_down",
|
||||||
|
"output.LayerNorm", "layer_output_norm",
|
||||||
|
}
|
||||||
|
}
|
||||||
100
convert/convert_gemma.go
Normal file
100
convert/convert_gemma.go
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/pdevine/tensor"
|
||||||
|
"github.com/pdevine/tensor/native"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type gemmaModel struct {
|
||||||
|
ModelParameters
|
||||||
|
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||||
|
HiddenSize uint32 `json:"hidden_size"`
|
||||||
|
HiddenLayers uint32 `json:"num_hidden_layers"`
|
||||||
|
IntermediateSize uint32 `json:"intermediate_size"`
|
||||||
|
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||||
|
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||||
|
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||||
|
HeadDim uint32 `json:"head_dim"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ ModelConverter = (*gemmaModel)(nil)
|
||||||
|
|
||||||
|
func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
|
||||||
|
kv := p.ModelParameters.KV(t)
|
||||||
|
kv["general.architecture"] = "gemma"
|
||||||
|
kv["gemma.context_length"] = p.MaxPositionEmbeddings
|
||||||
|
kv["gemma.embedding_length"] = p.HiddenSize
|
||||||
|
kv["gemma.block_count"] = p.HiddenLayers
|
||||||
|
kv["gemma.feed_forward_length"] = p.IntermediateSize
|
||||||
|
kv["gemma.attention.head_count"] = p.NumAttentionHeads
|
||||||
|
kv["gemma.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||||
|
kv["gemma.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||||
|
kv["gemma.attention.key_length"] = p.HeadDim
|
||||||
|
kv["gemma.attention.value_length"] = p.HeadDim
|
||||||
|
kv["tokenizer.ggml.eot_token_id"] = uint32(107)
|
||||||
|
kv["tokenizer.ggml.middle_token_id"] = uint32(68)
|
||||||
|
kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
|
||||||
|
kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
|
var out []llm.Tensor
|
||||||
|
for _, t := range ts {
|
||||||
|
if strings.HasSuffix(t.Name(), "_norm.weight") {
|
||||||
|
t.SetRepacker(p.addOne)
|
||||||
|
}
|
||||||
|
|
||||||
|
out = append(out, llm.Tensor{
|
||||||
|
Name: t.Name(),
|
||||||
|
Kind: t.Kind(),
|
||||||
|
Shape: t.Shape(),
|
||||||
|
WriterTo: t,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *gemmaModel) Replacements() []string {
|
||||||
|
return []string{
|
||||||
|
"model.embed_tokens", "token_embd",
|
||||||
|
"model.norm", "output_norm",
|
||||||
|
"model.layers", "blk",
|
||||||
|
"input_layernorm", "attn_norm",
|
||||||
|
"self_attn.q_proj", "attn_q",
|
||||||
|
"self_attn.k_proj", "attn_k",
|
||||||
|
"self_attn.v_proj", "attn_v",
|
||||||
|
"self_attn.o_proj", "attn_output",
|
||||||
|
"mlp.gate_proj", "ffn_gate",
|
||||||
|
"mlp.down_proj", "ffn_down",
|
||||||
|
"mlp.up_proj", "ffn_up",
|
||||||
|
"post_attention_layernorm", "ffn_norm",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (*gemmaModel) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
|
||||||
|
ones := tensor.Ones(tensor.Float32, int(shape[0]))
|
||||||
|
|
||||||
|
n, err := n.Add(ones)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := native.SelectF32(n, 0)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var f32s []float32
|
||||||
|
for _, t := range ts {
|
||||||
|
f32s = append(f32s, t...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f32s, nil
|
||||||
|
}
|
||||||
43
convert/convert_gemma2.go
Normal file
43
convert/convert_gemma2.go
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type gemma2Model struct {
|
||||||
|
gemmaModel
|
||||||
|
SlidingWindow uint32 `json:"sliding_window"`
|
||||||
|
AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
|
||||||
|
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *gemma2Model) KV(t *Tokenizer) llm.KV {
|
||||||
|
kv := p.ModelParameters.KV(t)
|
||||||
|
kv["general.architecture"] = "gemma2"
|
||||||
|
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
|
||||||
|
kv["gemma2.embedding_length"] = p.HiddenSize
|
||||||
|
kv["gemma2.block_count"] = p.HiddenLayers
|
||||||
|
kv["gemma2.feed_forward_length"] = p.IntermediateSize
|
||||||
|
kv["gemma2.attention.head_count"] = p.NumAttentionHeads
|
||||||
|
kv["gemma2.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||||
|
kv["gemma2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||||
|
kv["gemma2.attention.key_length"] = p.HeadDim
|
||||||
|
kv["gemma2.attention.value_length"] = p.HeadDim
|
||||||
|
kv["gemma2.attention.sliding_window"] = p.SlidingWindow
|
||||||
|
kv["gemma2.attn_logit_softcapping"] = p.AttentionLogitSoftcap
|
||||||
|
kv["gemma2.final_logit_softcapping"] = p.FinalLogitSoftcap
|
||||||
|
kv["tokenizer.ggml.eot_token_id"] = uint32(107)
|
||||||
|
kv["tokenizer.ggml.middle_token_id"] = uint32(68)
|
||||||
|
kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
|
||||||
|
kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *gemma2Model) Replacements() []string {
|
||||||
|
return append(
|
||||||
|
p.gemmaModel.Replacements(),
|
||||||
|
"post_attention_layernorm", "post_attention_norm",
|
||||||
|
"pre_feedforward_layernorm", "ffn_norm",
|
||||||
|
"post_feedforward_layernorm", "post_ffw_norm",
|
||||||
|
)
|
||||||
|
}
|
||||||
91
convert/convert_gemma2_adapter.go
Normal file
91
convert/convert_gemma2_adapter.go
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/pdevine/tensor"
|
||||||
|
"github.com/pdevine/tensor/native"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type gemma2Adapter struct {
|
||||||
|
AdapterParameters
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ AdapterConverter = (*gemma2Adapter)(nil)
|
||||||
|
|
||||||
|
func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV {
|
||||||
|
kv := p.AdapterParameters.KV()
|
||||||
|
kv["general.architecture"] = "gemma2"
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
|
var out []llm.Tensor
|
||||||
|
for _, t := range ts {
|
||||||
|
shape := t.Shape()
|
||||||
|
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||||
|
(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
|
||||||
|
shape[0], shape[1] = shape[1], shape[0]
|
||||||
|
t.SetRepacker(p.repack)
|
||||||
|
}
|
||||||
|
|
||||||
|
out = append(out, llm.Tensor{
|
||||||
|
Name: t.Name(),
|
||||||
|
Kind: t.Kind(),
|
||||||
|
Shape: t.Shape(),
|
||||||
|
WriterTo: t,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *gemma2Adapter) Replacements() []string {
|
||||||
|
return []string{
|
||||||
|
"base_model.model.", "",
|
||||||
|
"model.layers", "blk",
|
||||||
|
"self_attn.q_proj", "attn_q",
|
||||||
|
"self_attn.k_proj", "attn_k",
|
||||||
|
"self_attn.v_proj", "attn_v",
|
||||||
|
"self_attn.o_proj", "attn_output",
|
||||||
|
"mlp.gate_proj", "ffn_gate",
|
||||||
|
"mlp.down_proj", "ffn_down",
|
||||||
|
"mlp.up_proj", "ffn_up",
|
||||||
|
"lora_A.weight", "weight.lora_a",
|
||||||
|
"lora_B.weight", "weight.lora_b",
|
||||||
|
"lora_a", "weight.lora_a",
|
||||||
|
"lora_b", "weight.lora_b",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *gemma2Adapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
dims := []int{int(shape[1]), int(shape[0])}
|
||||||
|
|
||||||
|
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||||
|
|
||||||
|
if err := n.T(1, 0); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Reshape(dims...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Transpose(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := native.SelectF32(n, 1)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var f32s []float32
|
||||||
|
for _, t := range ts {
|
||||||
|
f32s = append(f32s, t...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f32s, nil
|
||||||
|
}
|
||||||
213
convert/convert_llama.go
Normal file
213
convert/convert_llama.go
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"cmp"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/pdevine/tensor"
|
||||||
|
"github.com/pdevine/tensor/native"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type llamaModel struct {
|
||||||
|
ModelParameters
|
||||||
|
NLayers uint32 `json:"n_layers"`
|
||||||
|
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||||
|
NLayer uint32 `json:"n_layer"`
|
||||||
|
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||||
|
NCtx uint32 `json:"n_ctx"`
|
||||||
|
HiddenSize uint32 `json:"hidden_size"`
|
||||||
|
NEmbd uint32 `json:"n_embd"`
|
||||||
|
IntermediateSize uint32 `json:"intermediate_size"`
|
||||||
|
NInner uint32 `json:"n_inner"`
|
||||||
|
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||||
|
NHead uint32 `json:"n_head"`
|
||||||
|
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||||
|
RopeTheta float32 `json:"rope_theta"`
|
||||||
|
RopeScaling struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
RopeType string `json:"rope_type"`
|
||||||
|
Factor float32 `json:"factor"`
|
||||||
|
LowFrequencyFactor float32 `json:"low_freq_factor"`
|
||||||
|
HighFrequencyFactor float32 `json:"high_freq_factor"`
|
||||||
|
OriginalMaxPositionalEmbeddings uint32 `json:"original_max_positional_embeddings"`
|
||||||
|
|
||||||
|
factors ropeFactor
|
||||||
|
} `json:"rope_scaling"`
|
||||||
|
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||||
|
LayerNormEPS float32 `json:"layer_norm_eps"`
|
||||||
|
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
|
||||||
|
NormEpsilon float32 `json:"norm_epsilon"`
|
||||||
|
HeadDim uint32 `json:"head_dim"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ ModelConverter = (*llamaModel)(nil)
|
||||||
|
|
||||||
|
func (p *llamaModel) KV(t *Tokenizer) llm.KV {
|
||||||
|
kv := p.ModelParameters.KV(t)
|
||||||
|
kv["general.architecture"] = "llama"
|
||||||
|
kv["llama.vocab_size"] = p.VocabSize
|
||||||
|
|
||||||
|
kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
|
||||||
|
|
||||||
|
if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
|
||||||
|
kv["llama.context_length"] = contextLength
|
||||||
|
}
|
||||||
|
|
||||||
|
if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
|
||||||
|
kv["llama.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
|
||||||
|
kv["llama.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
|
||||||
|
}
|
||||||
|
|
||||||
|
if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
|
||||||
|
kv["llama.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
|
||||||
|
kv["llama.rope.dimension_count"] = p.HiddenSize / headCount
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.RopeTheta > 0 {
|
||||||
|
kv["llama.rope.freq_base"] = p.RopeTheta
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.RopeScaling.Type == "linear" {
|
||||||
|
kv["llama.rope.scaling.type"] = p.RopeScaling.Type
|
||||||
|
kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
|
||||||
|
} else if p.RopeScaling.RopeType == "llama3" {
|
||||||
|
dim := p.HiddenSize / p.NumAttentionHeads
|
||||||
|
for i := uint32(0); i < dim; i += 2 {
|
||||||
|
factor := cmp.Or(p.RopeScaling.Factor, 8.0)
|
||||||
|
factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
|
||||||
|
factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
|
||||||
|
|
||||||
|
original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192)
|
||||||
|
lambdaLow := float32(original) / factorLow
|
||||||
|
lambdaHigh := float32(original) / factorHigh
|
||||||
|
|
||||||
|
lambda := 2 * math.Pi * math.Pow(float64(p.RopeTheta), float64(i)/float64(dim))
|
||||||
|
if lambda < float64(lambdaHigh) {
|
||||||
|
p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0)
|
||||||
|
} else if lambda > float64(lambdaLow) {
|
||||||
|
p.RopeScaling.factors = append(p.RopeScaling.factors, factor)
|
||||||
|
} else {
|
||||||
|
smooth := (float32(original)/float32(lambda) - factorLow) / (factorHigh - factorLow)
|
||||||
|
p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0/((1-smooth)/factor+smooth))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.NumKeyValueHeads > 0 {
|
||||||
|
kv["llama.attention.head_count_kv"] = p.NumKeyValueHeads
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.RMSNormEPS > 0 {
|
||||||
|
kv["llama.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||||
|
}
|
||||||
|
|
||||||
|
if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
|
||||||
|
kv["llama.attention.layer_norm_epsilon"] = layerNormEpsilon
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.HeadDim > 0 {
|
||||||
|
kv["llama.attention.key_length"] = p.HeadDim
|
||||||
|
kv["llama.attention.value_length"] = p.HeadDim
|
||||||
|
}
|
||||||
|
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
|
var out []llm.Tensor
|
||||||
|
|
||||||
|
if p.RopeScaling.factors != nil {
|
||||||
|
out = append(out, llm.Tensor{
|
||||||
|
Name: "rope_freqs.weight",
|
||||||
|
Kind: 0,
|
||||||
|
Shape: []uint64{uint64(len(p.RopeScaling.factors))},
|
||||||
|
WriterTo: p.RopeScaling.factors,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, t := range ts {
|
||||||
|
if strings.HasSuffix(t.Name(), "attn_q.weight") ||
|
||||||
|
strings.HasSuffix(t.Name(), "attn_k.weight") {
|
||||||
|
t.SetRepacker(p.repack)
|
||||||
|
}
|
||||||
|
|
||||||
|
out = append(out, llm.Tensor{
|
||||||
|
Name: t.Name(),
|
||||||
|
Kind: t.Kind(),
|
||||||
|
Shape: t.Shape(),
|
||||||
|
WriterTo: t,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *llamaModel) Replacements() []string {
|
||||||
|
return []string{
|
||||||
|
"lm_head", "output",
|
||||||
|
"model.embed_tokens", "token_embd",
|
||||||
|
"model.norm", "output_norm",
|
||||||
|
"model.layers", "blk",
|
||||||
|
"input_layernorm", "attn_norm",
|
||||||
|
"self_attn.q_proj", "attn_q",
|
||||||
|
"self_attn.k_proj", "attn_k",
|
||||||
|
"self_attn.v_proj", "attn_v",
|
||||||
|
"self_attn.o_proj", "attn_output",
|
||||||
|
"mlp.gate_proj", "ffn_gate",
|
||||||
|
"mlp.down_proj", "ffn_down",
|
||||||
|
"mlp.up_proj", "ffn_up",
|
||||||
|
"post_attention_layernorm", "ffn_norm",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *llamaModel) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
var dims []int
|
||||||
|
for _, dim := range shape {
|
||||||
|
dims = append(dims, int(dim))
|
||||||
|
}
|
||||||
|
|
||||||
|
var heads uint32
|
||||||
|
if strings.HasSuffix(name, "attn_q.weight") {
|
||||||
|
heads = p.NumAttentionHeads
|
||||||
|
} else if strings.HasSuffix(name, "attn_k.weight") {
|
||||||
|
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||||
|
} else {
|
||||||
|
return nil, fmt.Errorf("unknown tensor for repack: %s", name)
|
||||||
|
}
|
||||||
|
|
||||||
|
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||||
|
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.T(0, 2, 1, 3); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Reshape(dims...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Transpose(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := native.SelectF32(n, 1)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var f32s []float32
|
||||||
|
for _, t := range ts {
|
||||||
|
f32s = append(f32s, t...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f32s, nil
|
||||||
|
}
|
||||||
169
convert/convert_llama_adapter.go
Normal file
169
convert/convert_llama_adapter.go
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"cmp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/pdevine/tensor"
|
||||||
|
"github.com/pdevine/tensor/native"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type llamaAdapter struct {
|
||||||
|
AdapterParameters
|
||||||
|
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||||
|
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ AdapterConverter = (*llamaAdapter)(nil)
|
||||||
|
|
||||||
|
func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
|
||||||
|
kv := p.AdapterParameters.KV()
|
||||||
|
kv["general.architecture"] = "llama"
|
||||||
|
kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
|
||||||
|
kv["llama.attention.head_count_kv"] = baseKV["llama.attention.head_count_kv"]
|
||||||
|
|
||||||
|
p.NumAttentionHeads = baseKV["llama.attention.head_count"].(uint32)
|
||||||
|
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
|
var out []llm.Tensor
|
||||||
|
for _, t := range ts {
|
||||||
|
shape := t.Shape()
|
||||||
|
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||||
|
(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
|
||||||
|
shape[0], shape[1] = shape[1], shape[0]
|
||||||
|
t.SetRepacker(p.repackAndTranspose)
|
||||||
|
} else {
|
||||||
|
t.SetRepacker(p.repack)
|
||||||
|
}
|
||||||
|
|
||||||
|
out = append(out, llm.Tensor{
|
||||||
|
Name: t.Name(),
|
||||||
|
Kind: t.Kind(),
|
||||||
|
Shape: shape,
|
||||||
|
WriterTo: t,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *llamaAdapter) Replacements() []string {
|
||||||
|
return []string{
|
||||||
|
"base_model.model.", "",
|
||||||
|
"model.layers", "blk",
|
||||||
|
"self_attn.q_proj", "attn_q",
|
||||||
|
"self_attn.k_proj", "attn_k",
|
||||||
|
"self_attn.v_proj", "attn_v",
|
||||||
|
"self_attn.o_proj", "attn_output",
|
||||||
|
"mlp.gate_proj", "ffn_gate",
|
||||||
|
"mlp.down_proj", "ffn_down",
|
||||||
|
"mlp.up_proj", "ffn_up",
|
||||||
|
"lora_A.weight", "weight.lora_a",
|
||||||
|
"lora_B.weight", "weight.lora_b",
|
||||||
|
"lora_a", "weight.lora_a",
|
||||||
|
"lora_b", "weight.lora_b",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *llamaAdapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
dims := []int{int(shape[1]), int(shape[0])}
|
||||||
|
|
||||||
|
var heads uint32
|
||||||
|
if strings.HasSuffix(name, "attn_q.weight.lora_a") {
|
||||||
|
heads = p.NumAttentionHeads
|
||||||
|
} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
|
||||||
|
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||||
|
} else {
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||||
|
|
||||||
|
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.T(0, 2, 1, 3); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Reshape(dims...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Transpose(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := native.SelectF32(n, 1)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var f32s []float32
|
||||||
|
for _, t := range ts {
|
||||||
|
f32s = append(f32s, t...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f32s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *llamaAdapter) repackAndTranspose(name string, data []float32, shape []uint64) ([]float32, error) {
|
||||||
|
dims := []int{int(shape[1]), int(shape[0])}
|
||||||
|
|
||||||
|
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
||||||
|
|
||||||
|
var heads uint32
|
||||||
|
if strings.HasSuffix(name, "attn_q.weight.lora_a") {
|
||||||
|
heads = p.NumAttentionHeads
|
||||||
|
} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
|
||||||
|
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
|
||||||
|
}
|
||||||
|
|
||||||
|
if heads > 0 {
|
||||||
|
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.T(0, 2, 1, 3); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Reshape(dims...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Transpose(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.T(1, 0); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Reshape(dims...); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := n.Transpose(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := native.SelectF32(n, 1)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var f32s []float32
|
||||||
|
for _, t := range ts {
|
||||||
|
f32s = append(f32s, t...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f32s, nil
|
||||||
|
}
|
||||||
94
convert/convert_mixtral.go
Normal file
94
convert/convert_mixtral.go
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type mixtralModel struct {
|
||||||
|
llamaModel
|
||||||
|
NumLocalExperts uint32 `json:"num_local_experts"`
|
||||||
|
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
|
||||||
|
kv := p.llamaModel.KV(t)
|
||||||
|
|
||||||
|
if p.NumLocalExperts > 0 {
|
||||||
|
kv["llama.expert_count"] = p.NumLocalExperts
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.NumExpertsPerToken > 0 {
|
||||||
|
kv["llama.expert_used_count"] = p.NumExpertsPerToken
|
||||||
|
}
|
||||||
|
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
|
oldnew := []string{
|
||||||
|
"model.layers", "blk",
|
||||||
|
"w1", "ffn_gate_exps",
|
||||||
|
"w2", "ffn_down_exps",
|
||||||
|
"w3", "ffn_up_exps",
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range p.NumLocalExperts {
|
||||||
|
oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".")
|
||||||
|
}
|
||||||
|
|
||||||
|
// group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor
|
||||||
|
namer := strings.NewReplacer(oldnew...)
|
||||||
|
experts := make(map[string]experts)
|
||||||
|
|
||||||
|
// merge experts into a single tensor while removing them from ts
|
||||||
|
ts = slices.DeleteFunc(ts, func(t Tensor) bool {
|
||||||
|
if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
name := namer.Replace(t.Name())
|
||||||
|
experts[name] = append(experts[name], t)
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
|
var out []llm.Tensor
|
||||||
|
for n, e := range experts {
|
||||||
|
// TODO(mxyng): sanity check experts
|
||||||
|
out = append(out, llm.Tensor{
|
||||||
|
Name: n,
|
||||||
|
Kind: e[0].Kind(),
|
||||||
|
Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...),
|
||||||
|
WriterTo: e,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return append(out, p.llamaModel.Tensors(ts)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *mixtralModel) Replacements() []string {
|
||||||
|
return append(
|
||||||
|
p.llamaModel.Replacements(),
|
||||||
|
"block_sparse_moe.gate", "ffn_gate_inp",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
type experts []Tensor
|
||||||
|
|
||||||
|
func (e experts) WriteTo(w io.Writer) (int64, error) {
|
||||||
|
// TODO(mxyng): experts _should_ be numerically sorted by expert but this should check
|
||||||
|
for _, t := range e {
|
||||||
|
// the canonical merged experts tensor stacks all experts along a new, 0 axis,
|
||||||
|
// e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers
|
||||||
|
// this accomplishes the same thing by writing each expert tensor in sequence
|
||||||
|
if _, err := t.WriteTo(w); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
123
convert/convert_phi3.go
Normal file
123
convert/convert_phi3.go
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"cmp"
|
||||||
|
"encoding/binary"
|
||||||
|
"io"
|
||||||
|
"math"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/llm"
|
||||||
|
)
|
||||||
|
|
||||||
|
type phi3Model struct {
|
||||||
|
ModelParameters
|
||||||
|
NumHiddenLayers uint32 `json:"num_hidden_layers"`
|
||||||
|
NLayers uint32 `json:"n_layers"`
|
||||||
|
HiddenSize uint32 `json:"hidden_size"`
|
||||||
|
NEmbd uint32 `json:"n_embd"`
|
||||||
|
IntermediateSize uint32 `json:"intermediate_size"`
|
||||||
|
NumAttentionHeads uint32 `json:"num_attention_heads"`
|
||||||
|
NHead uint32 `json:"n_head"`
|
||||||
|
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||||
|
NHeadKV uint32 `json:"n_head_kv"`
|
||||||
|
RopeTheta float32 `json:"rope_theta"`
|
||||||
|
RopeScaling struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
LongFactor ropeFactor `json:"long_factor"`
|
||||||
|
ShortFactor ropeFactor `json:"short_factor"`
|
||||||
|
} `json:"rope_scaling"`
|
||||||
|
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||||
|
NPositions uint32 `json:"n_positions"`
|
||||||
|
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
|
||||||
|
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||||
|
SlidingWindow uint32 `json:"sliding_window"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ ModelConverter = (*phi3Model)(nil)
|
||||||
|
|
||||||
|
func (p *phi3Model) KV(t *Tokenizer) llm.KV {
|
||||||
|
kv := p.ModelParameters.KV(t)
|
||||||
|
kv["general.architecture"] = "phi3"
|
||||||
|
kv["phi3.context_length"] = p.MaxPositionEmbeddings
|
||||||
|
kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
|
||||||
|
kv["phi3.feed_forward_length"] = p.IntermediateSize
|
||||||
|
kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
|
||||||
|
kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
|
||||||
|
kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
|
||||||
|
kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
|
||||||
|
kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
|
||||||
|
kv["phi3.rope.freq_base"] = p.RopeTheta
|
||||||
|
kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
|
||||||
|
kv["phi3.attention.sliding_window"] = p.SlidingWindow
|
||||||
|
|
||||||
|
scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)
|
||||||
|
|
||||||
|
switch p.RopeScaling.Type {
|
||||||
|
case "":
|
||||||
|
// no scaling
|
||||||
|
case "su", "longrope":
|
||||||
|
kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
|
||||||
|
case "yarn":
|
||||||
|
kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
|
||||||
|
default:
|
||||||
|
panic("unknown rope scaling type")
|
||||||
|
}
|
||||||
|
|
||||||
|
return kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
|
||||||
|
var addRopeFactors sync.Once
|
||||||
|
|
||||||
|
out := make([]llm.Tensor, 0, len(ts)+2)
|
||||||
|
for _, t := range ts {
|
||||||
|
if strings.HasPrefix(t.Name(), "blk.0.") {
|
||||||
|
addRopeFactors.Do(func() {
|
||||||
|
out = append(out, llm.Tensor{
|
||||||
|
Name: "rope_factors_long.weight",
|
||||||
|
Kind: 0,
|
||||||
|
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
|
||||||
|
WriterTo: p.RopeScaling.LongFactor,
|
||||||
|
}, llm.Tensor{
|
||||||
|
Name: "rope_factors_short.weight",
|
||||||
|
Kind: 0,
|
||||||
|
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
|
||||||
|
WriterTo: p.RopeScaling.ShortFactor,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
out = append(out, llm.Tensor{
|
||||||
|
Name: t.Name(),
|
||||||
|
Kind: t.Kind(),
|
||||||
|
Shape: t.Shape(),
|
||||||
|
WriterTo: t,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *phi3Model) Replacements() []string {
|
||||||
|
return []string{
|
||||||
|
"lm_head", "output",
|
||||||
|
"model.embed_tokens", "token_embd",
|
||||||
|
"model.norm", "output_norm",
|
||||||
|
"model.layers", "blk",
|
||||||
|
"input_layernorm", "attn_norm",
|
||||||
|
"self_attn.qkv_proj", "attn_qkv",
|
||||||
|
"self_attn.o_proj", "attn_output",
|
||||||
|
"mlp.down_proj", "ffn_down",
|
||||||
|
"mlp.gate_up_proj", "ffn_up",
|
||||||
|
"post_attention_layernorm", "ffn_norm",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ropeFactor []float32
|
||||||
|
|
||||||
|
func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
|
||||||
|
err := binary.Write(w, binary.LittleEndian, r)
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
@@ -1,48 +1,37 @@
|
|||||||
//go:build slow
|
|
||||||
|
|
||||||
package convert
|
package convert
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/binary"
|
||||||
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"io/fs"
|
||||||
|
"log/slog"
|
||||||
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"slices"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"golang.org/x/exp/maps"
|
||||||
|
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) {
|
func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
mf, err := GetModelFormat(p)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
params, err := mf.GetParams(p)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
arch, err := mf.GetModelArch("", p, params)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := arch.LoadVocab(); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := arch.GetTensors(); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
f, err := os.CreateTemp(t.TempDir(), "f16")
|
f, err := os.CreateTemp(t.TempDir(), "f16")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
|
||||||
if err := arch.WriteGGUF(f); err != nil {
|
if err := ConvertModel(fsys, f); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -50,54 +39,309 @@ func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
defer r.Close()
|
t.Cleanup(func() { r.Close() })
|
||||||
|
|
||||||
m, _, err := llm.DecodeGGML(r)
|
m, _, err := llm.DecodeGGML(r, math.MaxInt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return m.KV(), m.Tensors()
|
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return r, m.KV(), m.Tensors()
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors llm.Tensors) map[string]string {
|
||||||
|
actual := make(map[string]string)
|
||||||
|
for k, v := range kv {
|
||||||
|
if s, ok := v.(json.Marshaler); !ok {
|
||||||
|
actual[k] = fmt.Sprintf("%v", v)
|
||||||
|
} else {
|
||||||
|
bts, err := json.Marshal(s)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tensor := range tensors.Items {
|
||||||
|
sha256sum := sha256.New()
|
||||||
|
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
|
||||||
|
if _, err := io.Copy(sha256sum, sr); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
|
||||||
|
}
|
||||||
|
|
||||||
|
return actual
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMain(m *testing.M) {
|
||||||
|
var level slog.Level
|
||||||
|
flag.TextVar(&level, "level", slog.LevelInfo, "log level")
|
||||||
|
flag.Parse()
|
||||||
|
slog.SetLogLoggerLevel(level)
|
||||||
|
os.Exit(m.Run())
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConvertFull(t *testing.T) {
|
func TestConvertFull(t *testing.T) {
|
||||||
cases := []struct {
|
cases := []string{
|
||||||
path string
|
"Meta-Llama-3-8B-Instruct",
|
||||||
arch string
|
"Meta-Llama-3.1-8B-Instruct",
|
||||||
tensors int
|
"Mistral-7B-Instruct-v0.2",
|
||||||
layers int
|
"Mixtral-8x7B-Instruct-v0.1",
|
||||||
}{
|
"gemma-2b-it",
|
||||||
{"Meta-Llama-3-8B-Instruct", "llama", 291, 35},
|
// microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
|
||||||
{"Mistral-7B-Instruct-v0.2", "llama", 291, 35},
|
"Phi-3-mini-128k-instruct",
|
||||||
{"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35},
|
"all-MiniLM-L6-v2",
|
||||||
{"gemma-2b-it", "gemma", 164, 20},
|
"gemma-2-9b-it",
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range cases {
|
for i := range cases {
|
||||||
t.Run(tt.path, func(t *testing.T) {
|
tt := cases[i]
|
||||||
p := filepath.Join("testdata", tt.path)
|
t.Run(tt, func(t *testing.T) {
|
||||||
if _, err := os.Stat(p); err != nil {
|
t.Parallel()
|
||||||
|
|
||||||
|
p := filepath.Join("testdata", tt)
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping in short mode")
|
||||||
|
} else if _, err := os.Stat(p); err != nil {
|
||||||
t.Skipf("%s not found", p)
|
t.Skipf("%s not found", p)
|
||||||
}
|
}
|
||||||
|
|
||||||
kv, tensors := convertFull(t, p)
|
f, kv, tensors := convertFull(t, os.DirFS(p))
|
||||||
|
actual := generateResultsJSON(t, f, kv, tensors)
|
||||||
|
|
||||||
if kv.Architecture() != tt.arch {
|
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
|
||||||
t.Fatalf("expected llama, got %s", kv.Architecture())
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if kv.FileType().String() != "F16" {
|
var expect map[string]string
|
||||||
t.Fatalf("expected F16, got %s", kv.FileType())
|
if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(tensors) != tt.tensors {
|
keys := maps.Keys(expect)
|
||||||
t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors))
|
slices.Sort(keys)
|
||||||
}
|
for _, k := range keys {
|
||||||
|
if v, ok := actual[k]; !ok {
|
||||||
layers := tensors.Layers()
|
t.Errorf("missing %s", k)
|
||||||
if len(layers) != tt.layers {
|
} else if v != expect[k] {
|
||||||
t.Fatalf("expected %d layers, got %d", tt.layers, len(layers))
|
t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConvertAdapter(t *testing.T) {
|
||||||
|
type AdapterCase struct {
|
||||||
|
Name string
|
||||||
|
BaseKV map[string]any
|
||||||
|
Expected map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []AdapterCase{
|
||||||
|
{
|
||||||
|
Name: "discollama",
|
||||||
|
BaseKV: map[string]any{
|
||||||
|
"general.architecture": "llama",
|
||||||
|
"llama.attention.head_count": uint32(32),
|
||||||
|
"llama.attention.head_count_kv": uint32(8),
|
||||||
|
},
|
||||||
|
Expected: map[string]string{
|
||||||
|
"general.architecture": "llama",
|
||||||
|
"general.file_type": "1",
|
||||||
|
"general.parameter_count": "106496",
|
||||||
|
"general.type": "adapter",
|
||||||
|
"general.version": "v0.2",
|
||||||
|
"adapter.lora.alpha": "16",
|
||||||
|
"adapter.type": "lora",
|
||||||
|
"llama.attention.head_count": "32",
|
||||||
|
"llama.attention.head_count_kv": "8",
|
||||||
|
"blk.31.attn_q.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
||||||
|
"blk.31.attn_q.weight.lora_b": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
||||||
|
"blk.31.attn_v.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
|
||||||
|
"blk.31.attn_v.weight.lora_b": "071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.Name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
f, err := os.CreateTemp(t.TempDir(), "f16")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
generateLoraTestData(t, tempDir)
|
||||||
|
|
||||||
|
if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
r, err := os.Open(f.Name())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer r.Close()
|
||||||
|
|
||||||
|
m, _, err := llm.DecodeGGML(r, math.MaxInt)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
actual := generateResultsJSON(t, r, m.KV(), m.Tensors())
|
||||||
|
|
||||||
|
keys := maps.Keys(c.Expected)
|
||||||
|
slices.Sort(keys)
|
||||||
|
for _, k := range keys {
|
||||||
|
if v, ok := actual[k]; !ok {
|
||||||
|
t.Errorf("missing %s", k)
|
||||||
|
} else if v != c.Expected[k] {
|
||||||
|
t.Errorf("unexpected %s: want %s, got %s", k, c.Expected[k], v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateLoraTestData(t *testing.T, tempDir string) {
|
||||||
|
type tensorData struct {
|
||||||
|
Offsets []int `json:"data_offsets"`
|
||||||
|
Type string `json:"dtype"`
|
||||||
|
Shape []int `json:"shape"`
|
||||||
|
}
|
||||||
|
offset := 4096 * 8 * 4
|
||||||
|
|
||||||
|
td := map[string]*tensorData{"__metadata__": nil}
|
||||||
|
td["model.layers.31.self_attn.q_proj.lora_a"] = &tensorData{
|
||||||
|
Offsets: []int{0, offset},
|
||||||
|
Type: "F32",
|
||||||
|
Shape: []int{4096, 8},
|
||||||
|
}
|
||||||
|
td["model.layers.31.self_attn.q_proj.lora_b"] = &tensorData{
|
||||||
|
Offsets: []int{offset, offset * 2},
|
||||||
|
Type: "F32",
|
||||||
|
Shape: []int{8, 4096},
|
||||||
|
}
|
||||||
|
td["model.layers.31.self_attn.v_proj.lora_a"] = &tensorData{
|
||||||
|
Offsets: []int{offset * 2, offset * 3},
|
||||||
|
Type: "F32",
|
||||||
|
Shape: []int{4096, 8},
|
||||||
|
}
|
||||||
|
td["model.layers.31.self_attn.v_proj.lora_b"] = &tensorData{
|
||||||
|
Offsets: []int{offset * 3, offset*3 + 8*1024*4},
|
||||||
|
Type: "F32",
|
||||||
|
Shape: []int{8, 1024},
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := json.Marshal(td)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
|
||||||
|
l := int64(len(data))
|
||||||
|
err = binary.Write(&buf, binary.LittleEndian, l)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = buf.Write(data)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// write some data for the tensors
|
||||||
|
|
||||||
|
ones := make([]float32, 4096*8)
|
||||||
|
for i := range ones {
|
||||||
|
ones[i] = float32(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
for range 3 {
|
||||||
|
err = binary.Write(&buf, binary.LittleEndian, ones)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ones = make([]float32, 1024*8)
|
||||||
|
for i := range ones {
|
||||||
|
ones[i] = float32(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = binary.Write(&buf, binary.LittleEndian, ones)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fdata, err := os.Create(filepath.Join(tempDir, "adapters.safetensors"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer fdata.Close()
|
||||||
|
|
||||||
|
_, err = fdata.Write(buf.Bytes())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
configData := `
|
||||||
|
{
|
||||||
|
"adapter_path": "adapters-test",
|
||||||
|
"batch_size": 8,
|
||||||
|
"config": "config-tiny.json",
|
||||||
|
"data": "../discollama-completion",
|
||||||
|
"grad_checkpoint": null,
|
||||||
|
"iters": 1000,
|
||||||
|
"learning_rate": 1e-05,
|
||||||
|
"lora_layers": 1,
|
||||||
|
"lora_parameters": {
|
||||||
|
"rank": 8,
|
||||||
|
"alpha": 16,
|
||||||
|
"dropout": 0.0,
|
||||||
|
"scale": 2.0
|
||||||
|
},
|
||||||
|
"lr_schedule": null,
|
||||||
|
"max_seq_length": 2048,
|
||||||
|
"model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct",
|
||||||
|
"resume_adapter_file": null,
|
||||||
|
"save_every": 100,
|
||||||
|
"seed": 0,
|
||||||
|
"steps_per_eval": 200,
|
||||||
|
"steps_per_report": 10,
|
||||||
|
"test": false,
|
||||||
|
"test_batches": 500,
|
||||||
|
"train": true,
|
||||||
|
"use_dora": false,
|
||||||
|
"val_batches": 25
|
||||||
|
}
|
||||||
|
`
|
||||||
|
f, err := os.Create(filepath.Join(tempDir, "adapter_config.json"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
_, err = f.WriteString(configData)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
58
convert/fs.go
Normal file
58
convert/fs.go
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"archive/zip"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"io/fs"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ZipReader struct {
|
||||||
|
r *zip.Reader
|
||||||
|
p string
|
||||||
|
|
||||||
|
// limit is the maximum size of a file that can be read directly
|
||||||
|
// from the zip archive. Files larger than this size will be extracted
|
||||||
|
limit int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewZipReader(r *zip.Reader, p string, limit int64) fs.FS {
|
||||||
|
return &ZipReader{r, p, limit}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (z *ZipReader) Open(name string) (fs.File, error) {
|
||||||
|
r, err := z.r.Open(name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer r.Close()
|
||||||
|
|
||||||
|
if fi, err := r.Stat(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
} else if fi.Size() < z.limit {
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if !filepath.IsLocal(name) {
|
||||||
|
return nil, zip.ErrInsecurePath
|
||||||
|
}
|
||||||
|
|
||||||
|
n := filepath.Join(z.p, name)
|
||||||
|
if _, err := os.Stat(n); errors.Is(err, os.ErrNotExist) {
|
||||||
|
w, err := os.Create(n)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer w.Close()
|
||||||
|
|
||||||
|
if _, err := io.Copy(w, r); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return os.Open(n)
|
||||||
|
}
|
||||||
102
convert/gemma.go
102
convert/gemma.go
@@ -1,102 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"log/slog"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/pdevine/tensor"
|
|
||||||
"github.com/pdevine/tensor/native"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/llm"
|
|
||||||
)
|
|
||||||
|
|
||||||
type GemmaModel struct {
|
|
||||||
ModelData
|
|
||||||
}
|
|
||||||
|
|
||||||
func addOnes(data []float32, vectorSize int) ([]float32, error) {
|
|
||||||
n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data))
|
|
||||||
ones := tensor.Ones(tensor.Float32, vectorSize)
|
|
||||||
|
|
||||||
n, err := n.Add(ones)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
ts, err := native.SelectF32(n, 0)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var f32s []float32
|
|
||||||
for _, t := range ts {
|
|
||||||
f32s = append(f32s, t...)
|
|
||||||
}
|
|
||||||
|
|
||||||
return f32s, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *GemmaModel) GetTensors() error {
|
|
||||||
t, err := m.Format.GetTensors(m.Path, m.Params)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Debug(fmt.Sprintf("Total tensors: %d", len(t)))
|
|
||||||
for _, l := range t {
|
|
||||||
if strings.HasSuffix(l.Name, "norm.weight") {
|
|
||||||
wt := l.WriterTo.(safetensorWriterTo)
|
|
||||||
wt.repacker = m.Repack
|
|
||||||
l.WriterTo = wt
|
|
||||||
}
|
|
||||||
m.Tensors = append(m.Tensors, l)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *GemmaModel) LoadVocab() error {
|
|
||||||
v, err := LoadSentencePieceTokens(m.Path, m.Params)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.Vocab = v
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *GemmaModel) Repack(_ string, data []float32, shape []uint64) ([]float32, error) {
|
|
||||||
return addOnes(data, int(shape[0]))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
|
|
||||||
kv := llm.KV{
|
|
||||||
"general.architecture": "gemma",
|
|
||||||
"general.name": m.Name,
|
|
||||||
"gemma.context_length": uint32(m.Params.ContextSize),
|
|
||||||
"gemma.embedding_length": uint32(m.Params.HiddenSize),
|
|
||||||
"gemma.block_count": uint32(m.Params.HiddenLayers),
|
|
||||||
"gemma.feed_forward_length": uint32(m.Params.IntermediateSize),
|
|
||||||
"gemma.attention.head_count": uint32(m.Params.AttentionHeads),
|
|
||||||
"gemma.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
|
||||||
"gemma.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
|
||||||
"gemma.attention.key_length": uint32(m.Params.HeadDimension),
|
|
||||||
"gemma.attention.value_length": uint32(m.Params.HeadDimension),
|
|
||||||
"general.file_type": uint32(1),
|
|
||||||
"tokenizer.ggml.model": "llama",
|
|
||||||
|
|
||||||
"tokenizer.ggml.tokens": m.Vocab.Tokens,
|
|
||||||
"tokenizer.ggml.scores": m.Vocab.Scores,
|
|
||||||
"tokenizer.ggml.token_type": m.Vocab.Types,
|
|
||||||
|
|
||||||
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
|
|
||||||
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
|
|
||||||
"tokenizer.ggml.padding_token_id": uint32(m.Params.PaddingTokenID),
|
|
||||||
"tokenizer.ggml.unknown_token_id": uint32(3),
|
|
||||||
"tokenizer.ggml.add_bos_token": true,
|
|
||||||
"tokenizer.ggml.add_eos_token": false,
|
|
||||||
}
|
|
||||||
|
|
||||||
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
|
||||||
}
|
|
||||||
159
convert/llama.go
159
convert/llama.go
@@ -1,159 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"cmp"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"regexp"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/pdevine/tensor"
|
|
||||||
"github.com/pdevine/tensor/native"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/llm"
|
|
||||||
)
|
|
||||||
|
|
||||||
type LlamaModel struct {
|
|
||||||
ModelData
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *LlamaModel) GetTensors() error {
|
|
||||||
t, err := m.Format.GetTensors(m.Path, m.Params)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
|
|
||||||
re, err := regexp.Compile(pattern)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, l := range t {
|
|
||||||
matches := re.FindAllStringSubmatch(l.Name, -1)
|
|
||||||
if len(matches) > 0 {
|
|
||||||
switch m.Format.(type) {
|
|
||||||
case *TorchFormat:
|
|
||||||
wt := l.WriterTo.(torchWriterTo)
|
|
||||||
wt.repacker = m.Repack
|
|
||||||
l.WriterTo = wt
|
|
||||||
case *SafetensorFormat:
|
|
||||||
wt := l.WriterTo.(safetensorWriterTo)
|
|
||||||
wt.repacker = m.Repack
|
|
||||||
l.WriterTo = wt
|
|
||||||
}
|
|
||||||
}
|
|
||||||
m.Tensors = append(m.Tensors, l)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *LlamaModel) LoadVocab() (err error) {
|
|
||||||
pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json"))
|
|
||||||
if errors.Is(err, os.ErrNotExist) {
|
|
||||||
return nil
|
|
||||||
} else if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
m.Vocab = &Vocab{}
|
|
||||||
for _, t := range ts {
|
|
||||||
m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content)
|
|
||||||
m.Vocab.Types = append(m.Vocab.Types, t.Type())
|
|
||||||
}
|
|
||||||
|
|
||||||
m.Vocab.Merges = merges
|
|
||||||
m.Params.PreTokenizer = pre
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
|
|
||||||
kv := llm.KV{
|
|
||||||
"general.architecture": "llama",
|
|
||||||
"general.name": m.Name,
|
|
||||||
"llama.vocab_size": uint32(len(m.Vocab.Tokens)),
|
|
||||||
"llama.context_length": uint32(m.Params.ContextSize),
|
|
||||||
"llama.embedding_length": uint32(m.Params.HiddenSize),
|
|
||||||
"llama.block_count": uint32(m.Params.HiddenLayers),
|
|
||||||
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
|
|
||||||
"llama.rope.freq_base": float32(m.Params.RopeFrequencyBase),
|
|
||||||
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
|
|
||||||
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
|
|
||||||
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
|
||||||
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
|
||||||
"general.file_type": uint32(1),
|
|
||||||
"tokenizer.ggml.model": "gpt2",
|
|
||||||
|
|
||||||
"tokenizer.ggml.pre": m.Params.PreTokenizer,
|
|
||||||
"tokenizer.ggml.tokens": m.Vocab.Tokens,
|
|
||||||
"tokenizer.ggml.token_type": m.Vocab.Types,
|
|
||||||
|
|
||||||
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
|
|
||||||
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
|
|
||||||
"tokenizer.ggml.unknown_token_id": uint32(0),
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(m.Vocab.Merges) > 0 {
|
|
||||||
kv["tokenizer.ggml.merges"] = m.Vocab.Merges
|
|
||||||
} else {
|
|
||||||
kv["tokenizer.ggml.scores"] = m.Vocab.Scores
|
|
||||||
}
|
|
||||||
|
|
||||||
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *LlamaModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
|
||||||
return llamaRepack(name, m.Params, data, shape)
|
|
||||||
}
|
|
||||||
|
|
||||||
func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([]float32, error) {
|
|
||||||
var dims []int
|
|
||||||
for _, dim := range shape {
|
|
||||||
if dim != 0 {
|
|
||||||
dims = append(dims, int(dim))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var heads int
|
|
||||||
switch {
|
|
||||||
case strings.HasSuffix(name, "attn_q.weight"):
|
|
||||||
heads = params.AttentionHeads
|
|
||||||
case strings.HasSuffix(name, "attn_k.weight"):
|
|
||||||
heads = cmp.Or(params.KeyValHeads, params.AttentionHeads)
|
|
||||||
default:
|
|
||||||
return nil, fmt.Errorf("unknown tensor name: %s", name)
|
|
||||||
}
|
|
||||||
|
|
||||||
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
|
||||||
if err := n.Reshape(append([]int{heads, 2, dims[0] / heads / 2}, dims[1:]...)...); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.T(0, 2, 1, 3); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.Reshape(dims...); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := n.Transpose(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
ts, err := native.SelectF32(n, 1)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var f32s []float32
|
|
||||||
for _, t := range ts {
|
|
||||||
f32s = append(f32s, t...)
|
|
||||||
}
|
|
||||||
|
|
||||||
return f32s, nil
|
|
||||||
}
|
|
||||||
@@ -1,84 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"io"
|
|
||||||
"regexp"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/llm"
|
|
||||||
)
|
|
||||||
|
|
||||||
type MistralModel struct {
|
|
||||||
ModelData
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *MistralModel) GetTensors() error {
|
|
||||||
t, err := m.Format.GetTensors(m.Path, m.Params)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
|
|
||||||
re, err := regexp.Compile(pattern)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, l := range t {
|
|
||||||
matches := re.FindAllStringSubmatch(l.Name, -1)
|
|
||||||
if len(matches) > 0 {
|
|
||||||
wt := l.WriterTo.(safetensorWriterTo)
|
|
||||||
wt.repacker = m.Repack
|
|
||||||
l.WriterTo = wt
|
|
||||||
}
|
|
||||||
m.Tensors = append(m.Tensors, l)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *MistralModel) LoadVocab() error {
|
|
||||||
v, err := LoadSentencePieceTokens(m.Path, m.Params)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.Vocab = v
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
|
|
||||||
kv := llm.KV{
|
|
||||||
"general.architecture": "llama",
|
|
||||||
"general.name": m.Name,
|
|
||||||
"llama.context_length": uint32(m.Params.ContextSize),
|
|
||||||
"llama.embedding_length": uint32(m.Params.HiddenSize),
|
|
||||||
"llama.block_count": uint32(m.Params.HiddenLayers),
|
|
||||||
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
|
|
||||||
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
|
|
||||||
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
|
|
||||||
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
|
||||||
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
|
||||||
"general.file_type": uint32(1),
|
|
||||||
"tokenizer.ggml.model": "llama",
|
|
||||||
|
|
||||||
"tokenizer.ggml.tokens": m.Vocab.Tokens,
|
|
||||||
"tokenizer.ggml.scores": m.Vocab.Scores,
|
|
||||||
"tokenizer.ggml.token_type": m.Vocab.Types,
|
|
||||||
|
|
||||||
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
|
|
||||||
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
|
|
||||||
"tokenizer.ggml.add_bos_token": true,
|
|
||||||
"tokenizer.ggml.add_eos_token": false,
|
|
||||||
"tokenizer.ggml.unknown_token_id": uint32(0),
|
|
||||||
}
|
|
||||||
|
|
||||||
if m.Params.HeadDimension > 0 {
|
|
||||||
kv["llama.attention.key_length"] = uint32(m.Params.HeadDimension)
|
|
||||||
kv["llama.attention.value_length"] = uint32(m.Params.HeadDimension)
|
|
||||||
}
|
|
||||||
|
|
||||||
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
|
||||||
return llamaRepack(name, m.Params, data, shape)
|
|
||||||
}
|
|
||||||
@@ -1,87 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"io"
|
|
||||||
"regexp"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/llm"
|
|
||||||
)
|
|
||||||
|
|
||||||
type MixtralModel struct {
|
|
||||||
ModelData
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *MixtralModel) GetTensors() error {
|
|
||||||
t, err := m.Format.GetTensors(m.Path, m.Params)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
|
|
||||||
re, err := regexp.Compile(pattern)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, l := range t {
|
|
||||||
matches := re.FindAllStringSubmatch(l.Name, -1)
|
|
||||||
if len(matches) > 0 {
|
|
||||||
wt := l.WriterTo.(safetensorWriterTo)
|
|
||||||
wt.repacker = m.Repack
|
|
||||||
l.WriterTo = wt
|
|
||||||
}
|
|
||||||
m.Tensors = append(m.Tensors, l)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *MixtralModel) LoadVocab() error {
|
|
||||||
v, err := LoadSentencePieceTokens(m.Path, m.Params)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.Vocab = v
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
|
|
||||||
kv := llm.KV{
|
|
||||||
"general.architecture": "llama",
|
|
||||||
"general.name": m.Name,
|
|
||||||
"llama.block_count": uint32(m.Params.HiddenLayers),
|
|
||||||
"llama.context_length": uint32(m.Params.ContextSize),
|
|
||||||
"llama.embedding_length": uint32(m.Params.HiddenSize),
|
|
||||||
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
|
|
||||||
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
|
|
||||||
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
|
|
||||||
|
|
||||||
"llama.rope.freq_base": float32(m.Params.RopeFrequencyBase),
|
|
||||||
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
|
|
||||||
|
|
||||||
"llama.expert_count": uint32(m.Params.Experts),
|
|
||||||
"llama.expert_used_count": uint32(m.Params.ExpertsUsed),
|
|
||||||
|
|
||||||
"llama.vocab_size": uint32(len(m.Vocab.Tokens)),
|
|
||||||
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
|
|
||||||
|
|
||||||
"general.file_type": uint32(1),
|
|
||||||
"tokenizer.ggml.model": "llama",
|
|
||||||
|
|
||||||
"tokenizer.ggml.tokens": m.Vocab.Tokens,
|
|
||||||
"tokenizer.ggml.scores": m.Vocab.Scores,
|
|
||||||
"tokenizer.ggml.token_type": m.Vocab.Types,
|
|
||||||
|
|
||||||
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
|
|
||||||
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
|
|
||||||
"tokenizer.ggml.unknown_token_id": uint32(0),
|
|
||||||
"tokenizer.ggml.add_bos_token": true,
|
|
||||||
"tokenizer.ggml.add_eos_token": false,
|
|
||||||
}
|
|
||||||
|
|
||||||
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
|
|
||||||
return llamaRepack(name, m.Params, data, shape)
|
|
||||||
}
|
|
||||||
86
convert/reader.go
Normal file
86
convert/reader.go
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"io/fs"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Tensor interface {
|
||||||
|
Name() string
|
||||||
|
Shape() []uint64
|
||||||
|
Kind() uint32
|
||||||
|
SetRepacker(repacker)
|
||||||
|
WriteTo(io.Writer) (int64, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type tensorBase struct {
|
||||||
|
name string
|
||||||
|
shape []uint64
|
||||||
|
repacker
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t tensorBase) Name() string {
|
||||||
|
return t.name
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t tensorBase) Shape() []uint64 {
|
||||||
|
return t.shape
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
tensorKindF32 uint32 = iota
|
||||||
|
tensorKindF16
|
||||||
|
)
|
||||||
|
|
||||||
|
func (t tensorBase) Kind() uint32 {
|
||||||
|
if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
|
||||||
|
t.name == "token_types.weight" {
|
||||||
|
// these tensors are always F32
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
switch len(t.shape) {
|
||||||
|
case 0:
|
||||||
|
panic("invalid tensor shape")
|
||||||
|
case 1:
|
||||||
|
return tensorKindF32
|
||||||
|
default:
|
||||||
|
return tensorKindF16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *tensorBase) SetRepacker(fn repacker) {
|
||||||
|
t.repacker = fn
|
||||||
|
}
|
||||||
|
|
||||||
|
type repacker func(string, []float32, []uint64) ([]float32, error)
|
||||||
|
|
||||||
|
func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
|
||||||
|
patterns := []struct {
|
||||||
|
Pattern string
|
||||||
|
Func func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error)
|
||||||
|
}{
|
||||||
|
{"model-*-of-*.safetensors", parseSafetensors},
|
||||||
|
{"model.safetensors", parseSafetensors},
|
||||||
|
{"adapters.safetensors", parseSafetensors},
|
||||||
|
{"adapter_model.safetensors", parseSafetensors},
|
||||||
|
{"pytorch_model-*-of-*.bin", parseTorch},
|
||||||
|
{"pytorch_model.bin", parseTorch},
|
||||||
|
{"consolidated.*.pth", parseTorch},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, pattern := range patterns {
|
||||||
|
matches, err := fs.Glob(fsys, pattern.Pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(matches) > 0 {
|
||||||
|
return pattern.Func(fsys, replacer, matches...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, errors.New("unknown tensor format")
|
||||||
|
}
|
||||||
151
convert/reader_safetensors.go
Normal file
151
convert/reader_safetensors.go
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"io/fs"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/d4l3k/go-bfloat16"
|
||||||
|
"github.com/x448/float16"
|
||||||
|
"golang.org/x/exp/maps"
|
||||||
|
)
|
||||||
|
|
||||||
|
type safetensorMetadata struct {
|
||||||
|
Type string `json:"dtype"`
|
||||||
|
Shape []uint64 `json:"shape"`
|
||||||
|
Offsets []int64 `json:"data_offsets"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
|
||||||
|
var ts []Tensor
|
||||||
|
for _, p := range ps {
|
||||||
|
f, err := fsys.Open(p)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
var n int64
|
||||||
|
if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
b := bytes.NewBuffer(make([]byte, 0, n))
|
||||||
|
if _, err = io.CopyN(b, f, n); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var headers map[string]safetensorMetadata
|
||||||
|
if err := json.NewDecoder(b).Decode(&headers); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
keys := maps.Keys(headers)
|
||||||
|
slices.Sort(keys)
|
||||||
|
|
||||||
|
for _, key := range keys {
|
||||||
|
if value := headers[key]; value.Type != "" {
|
||||||
|
ts = append(ts, safetensor{
|
||||||
|
fs: fsys,
|
||||||
|
path: p,
|
||||||
|
dtype: value.Type,
|
||||||
|
offset: safetensorsPad(n, value.Offsets[0]),
|
||||||
|
size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
|
||||||
|
tensorBase: &tensorBase{
|
||||||
|
name: replacer.Replace(key),
|
||||||
|
shape: value.Shape,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ts, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// safetensorsPad returns the padded size of the safetensors file given a length n and offset s
|
||||||
|
func safetensorsPad(n, offset int64) int64 {
|
||||||
|
return 8 + n + offset
|
||||||
|
}
|
||||||
|
|
||||||
|
type safetensor struct {
|
||||||
|
fs fs.FS
|
||||||
|
path string
|
||||||
|
dtype string
|
||||||
|
offset int64
|
||||||
|
size int64
|
||||||
|
*tensorBase
|
||||||
|
}
|
||||||
|
|
||||||
|
func (st safetensor) WriteTo(w io.Writer) (int64, error) {
|
||||||
|
f, err := st.fs.Open(st.path)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
if seeker, ok := f.(io.Seeker); ok {
|
||||||
|
if _, err := seeker.Seek(st.offset, io.SeekStart); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if _, err := io.CopyN(io.Discard, f, st.offset); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var f32s []float32
|
||||||
|
switch st.dtype {
|
||||||
|
case "F32":
|
||||||
|
f32s = make([]float32, st.size/4)
|
||||||
|
if err = binary.Read(f, binary.LittleEndian, f32s); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
case "F16":
|
||||||
|
u16s := make([]uint16, st.size/2)
|
||||||
|
if err = binary.Read(f, binary.LittleEndian, u16s); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
f32s = make([]float32, len(u16s))
|
||||||
|
for i := range u16s {
|
||||||
|
f32s[i] = float16.Frombits(u16s[i]).Float32()
|
||||||
|
}
|
||||||
|
|
||||||
|
case "BF16":
|
||||||
|
u8s := make([]uint8, st.size)
|
||||||
|
if err = binary.Read(f, binary.LittleEndian, u8s); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
f32s = bfloat16.DecodeFloat32(u8s)
|
||||||
|
default:
|
||||||
|
return 0, fmt.Errorf("unknown data type: %s", st.dtype)
|
||||||
|
}
|
||||||
|
|
||||||
|
if st.repacker != nil {
|
||||||
|
f32s, err = st.repacker(st.Name(), f32s, st.Shape())
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch st.Kind() {
|
||||||
|
case tensorKindF32:
|
||||||
|
return 0, binary.Write(w, binary.LittleEndian, f32s)
|
||||||
|
case tensorKindF16:
|
||||||
|
f16s := make([]uint16, len(f32s))
|
||||||
|
for i := range f32s {
|
||||||
|
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, binary.Write(w, binary.LittleEndian, f16s)
|
||||||
|
default:
|
||||||
|
return 0, fmt.Errorf("unknown storage type: %d", st.Kind())
|
||||||
|
}
|
||||||
|
}
|
||||||
48
convert/reader_torch.go
Normal file
48
convert/reader_torch.go
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"io/fs"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/nlpodyssey/gopickle/pytorch"
|
||||||
|
"github.com/nlpodyssey/gopickle/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
func parseTorch(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
|
||||||
|
var ts []Tensor
|
||||||
|
for _, p := range ps {
|
||||||
|
pt, err := pytorch.Load(p)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, k := range pt.(*types.Dict).Keys() {
|
||||||
|
t := pt.(*types.Dict).MustGet(k)
|
||||||
|
|
||||||
|
var shape []uint64
|
||||||
|
for dim := range t.(*pytorch.Tensor).Size {
|
||||||
|
shape = append(shape, uint64(dim))
|
||||||
|
}
|
||||||
|
|
||||||
|
ts = append(ts, torch{
|
||||||
|
storage: t.(*pytorch.Tensor).Source,
|
||||||
|
tensorBase: &tensorBase{
|
||||||
|
name: replacer.Replace(k.(string)),
|
||||||
|
shape: shape,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ts, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type torch struct {
|
||||||
|
storage pytorch.StorageInterface
|
||||||
|
*tensorBase
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pt torch) WriteTo(w io.Writer) (int64, error) {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
@@ -1,309 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"encoding/binary"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"regexp"
|
|
||||||
"slices"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/d4l3k/go-bfloat16"
|
|
||||||
"github.com/x448/float16"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/llm"
|
|
||||||
)
|
|
||||||
|
|
||||||
type safetensorWriterTo struct {
|
|
||||||
t *llm.Tensor
|
|
||||||
|
|
||||||
params *Params
|
|
||||||
bo ByteOrder
|
|
||||||
|
|
||||||
filename string
|
|
||||||
dtype string
|
|
||||||
|
|
||||||
offset, size int64
|
|
||||||
repacker func(string, []float32, []uint64) ([]float32, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
type safetensorMetadata struct {
|
|
||||||
Type string `json:"dtype"`
|
|
||||||
Shape []uint64 `json:"shape"`
|
|
||||||
Offsets []int64 `json:"data_offsets"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type SafetensorFormat struct{}
|
|
||||||
|
|
||||||
func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
|
|
||||||
var tensors []llm.Tensor
|
|
||||||
matches, err := filepath.Glob(filepath.Join(dirpath, "*.safetensors"))
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var offset uint64
|
|
||||||
for _, f := range matches {
|
|
||||||
var t []llm.Tensor
|
|
||||||
var err error
|
|
||||||
t, offset, err = m.readTensors(f, offset, params)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
tensors = append(tensors, t...)
|
|
||||||
}
|
|
||||||
return tensors, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) {
|
|
||||||
f, err := os.Open(fn)
|
|
||||||
if err != nil {
|
|
||||||
return nil, 0, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
var n int64
|
|
||||||
if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
|
|
||||||
return nil, 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
b := bytes.NewBuffer(make([]byte, 0, n))
|
|
||||||
if _, err = io.CopyN(b, f, n); err != nil {
|
|
||||||
return nil, 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var headers map[string]safetensorMetadata
|
|
||||||
if err := json.NewDecoder(b).Decode(&headers); err != nil {
|
|
||||||
return nil, 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var keys []string
|
|
||||||
for key := range headers {
|
|
||||||
if !strings.HasSuffix(key, "self_attn.rotary_embd.inv_freq") {
|
|
||||||
keys = append(keys, key)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
slices.Sort(keys)
|
|
||||||
|
|
||||||
var tensors []llm.Tensor
|
|
||||||
for _, key := range keys {
|
|
||||||
value := headers[key]
|
|
||||||
|
|
||||||
var kind uint32
|
|
||||||
switch len(value.Shape) {
|
|
||||||
case 0:
|
|
||||||
// valuedata
|
|
||||||
continue
|
|
||||||
case 2:
|
|
||||||
kind = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
name, err := m.GetLayerName(key)
|
|
||||||
if err != nil {
|
|
||||||
return nil, 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
shape := make([]uint64, len(value.Shape))
|
|
||||||
copy(shape, value.Shape)
|
|
||||||
|
|
||||||
pad := func(s int64) int64 {
|
|
||||||
return 8 + n + s
|
|
||||||
}
|
|
||||||
|
|
||||||
t := llm.Tensor{
|
|
||||||
Name: name,
|
|
||||||
Kind: kind,
|
|
||||||
Offset: offset,
|
|
||||||
Shape: shape,
|
|
||||||
}
|
|
||||||
|
|
||||||
t.WriterTo = safetensorWriterTo{
|
|
||||||
t: &t,
|
|
||||||
params: params,
|
|
||||||
bo: params.ByteOrder,
|
|
||||||
filename: fn,
|
|
||||||
dtype: value.Type,
|
|
||||||
offset: pad(value.Offsets[0]),
|
|
||||||
size: pad(value.Offsets[1]) - pad(value.Offsets[0]),
|
|
||||||
}
|
|
||||||
|
|
||||||
offset += t.Size()
|
|
||||||
tensors = append(tensors, t)
|
|
||||||
}
|
|
||||||
|
|
||||||
return tensors, offset, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) {
|
|
||||||
f, err := os.Open(filepath.Join(dirpath, "config.json"))
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
var params Params
|
|
||||||
|
|
||||||
if err := json.NewDecoder(f).Decode(¶ms); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
params.ByteOrder = binary.LittleEndian
|
|
||||||
return ¶ms, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *SafetensorFormat) GetLayerName(n string) (string, error) {
|
|
||||||
directMap := map[string]string{
|
|
||||||
"model.embed_tokens.weight": "token_embd.weight",
|
|
||||||
"lm_head.weight": "output.weight",
|
|
||||||
"model.norm.weight": "output_norm.weight",
|
|
||||||
}
|
|
||||||
|
|
||||||
tMap := map[string]string{
|
|
||||||
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
|
|
||||||
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
|
|
||||||
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
|
|
||||||
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
|
|
||||||
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
|
|
||||||
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
|
|
||||||
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
|
|
||||||
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
|
|
||||||
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
|
|
||||||
"model.layers.(\\d+).block_sparse_moe.gate.weight": "blk.$1.ffn_gate_inp.weight",
|
|
||||||
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w1.weight": "blk.$1.ffn_gate.$2.weight",
|
|
||||||
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w2.weight": "blk.$1.ffn_down.$2.weight",
|
|
||||||
"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w3.weight": "blk.$1.ffn_up.$2.weight",
|
|
||||||
}
|
|
||||||
|
|
||||||
v, ok := directMap[n]
|
|
||||||
if ok {
|
|
||||||
return v, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// quick hack to rename the layers to gguf format
|
|
||||||
for k, v := range tMap {
|
|
||||||
re := regexp.MustCompile(k)
|
|
||||||
newName := re.ReplaceAllString(n, v)
|
|
||||||
if newName != n {
|
|
||||||
return newName, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) {
|
|
||||||
f, err := os.Open(r.filename)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
if _, err = f.Seek(r.offset, io.SeekStart); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var f32s []float32
|
|
||||||
switch r.dtype {
|
|
||||||
case "F32":
|
|
||||||
f32s = make([]float32, r.size/4)
|
|
||||||
if err = binary.Read(f, r.bo, f32s); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
case "F16":
|
|
||||||
u16s := make([]uint16, r.size/2)
|
|
||||||
if err = binary.Read(f, r.bo, u16s); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, b := range u16s {
|
|
||||||
f32s = append(f32s, float16.Frombits(b).Float32())
|
|
||||||
}
|
|
||||||
|
|
||||||
case "BF16":
|
|
||||||
u8s := make([]uint8, r.size)
|
|
||||||
if err = binary.Read(f, r.bo, u8s); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
f32s = bfloat16.DecodeFloat32(u8s)
|
|
||||||
default:
|
|
||||||
return 0, fmt.Errorf("unknown data type: %s", r.dtype)
|
|
||||||
}
|
|
||||||
|
|
||||||
if r.repacker != nil {
|
|
||||||
f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
switch r.t.Kind {
|
|
||||||
case 0:
|
|
||||||
return 0, binary.Write(w, r.bo, f32s)
|
|
||||||
case 1:
|
|
||||||
f16s := make([]uint16, len(f32s))
|
|
||||||
for i := range f32s {
|
|
||||||
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0, binary.Write(w, r.bo, f16s)
|
|
||||||
default:
|
|
||||||
return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
|
|
||||||
switch len(params.Architectures) {
|
|
||||||
case 0:
|
|
||||||
return nil, fmt.Errorf("No architecture specified to convert")
|
|
||||||
case 1:
|
|
||||||
switch params.Architectures[0] {
|
|
||||||
case "LlamaForCausalLM":
|
|
||||||
return &LlamaModel{
|
|
||||||
ModelData{
|
|
||||||
Name: name,
|
|
||||||
Path: dirPath,
|
|
||||||
Params: params,
|
|
||||||
Format: m,
|
|
||||||
},
|
|
||||||
}, nil
|
|
||||||
case "MistralForCausalLM":
|
|
||||||
return &MistralModel{
|
|
||||||
ModelData{
|
|
||||||
Name: name,
|
|
||||||
Path: dirPath,
|
|
||||||
Params: params,
|
|
||||||
Format: m,
|
|
||||||
},
|
|
||||||
}, nil
|
|
||||||
case "MixtralForCausalLM":
|
|
||||||
return &MixtralModel{
|
|
||||||
ModelData{
|
|
||||||
Name: name,
|
|
||||||
Path: dirPath,
|
|
||||||
Params: params,
|
|
||||||
Format: m,
|
|
||||||
},
|
|
||||||
}, nil
|
|
||||||
case "GemmaForCausalLM":
|
|
||||||
return &GemmaModel{
|
|
||||||
ModelData{
|
|
||||||
Name: name,
|
|
||||||
Path: dirPath,
|
|
||||||
Params: params,
|
|
||||||
Format: m,
|
|
||||||
},
|
|
||||||
}, nil
|
|
||||||
default:
|
|
||||||
return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, fmt.Errorf("Unknown error")
|
|
||||||
}
|
|
||||||
313
convert/testdata/Meta-Llama-3-8B-Instruct.json
vendored
Normal file
313
convert/testdata/Meta-Llama-3-8B-Instruct.json
vendored
Normal file
@@ -0,0 +1,313 @@
|
|||||||
|
{
|
||||||
|
"general.architecture": "llama",
|
||||||
|
"general.file_type": "1",
|
||||||
|
"general.quantization_version": "2",
|
||||||
|
"llama.block_count": "32",
|
||||||
|
"llama.context_length": "8192",
|
||||||
|
"llama.embedding_length": "4096",
|
||||||
|
"llama.feed_forward_length": "14336",
|
||||||
|
"llama.rope.dimension_count": "128",
|
||||||
|
"llama.rope.freq_base": "500000",
|
||||||
|
"llama.vocab_size": "128256",
|
||||||
|
"llama.attention.head_count": "32",
|
||||||
|
"llama.attention.head_count_kv": "8",
|
||||||
|
"llama.attention.layer_norm_rms_epsilon": "1e-05",
|
||||||
|
"tokenizer.ggml.model": "gpt2",
|
||||||
|
"tokenizer.ggml.pre": "llama-bpe",
|
||||||
|
"tokenizer.ggml.bos_token_id": "128000",
|
||||||
|
"tokenizer.ggml.eos_token_id": "128009",
|
||||||
|
"tokenizer.ggml.merges": "d0cbac1fcc9dcf03724b8db5c9bfb593ae1cf68fb9bc72eb1d15274dcbbf618b",
|
||||||
|
"tokenizer.ggml.token_type": "d70a88809fd7da6f1f028622685cd64268a7a922c5d343c96f25b66327358978",
|
||||||
|
"tokenizer.ggml.tokens": "765b529dbcbc42dd202ce657341c63807b51f3b07e09898f6aa6196326865d5a",
|
||||||
|
"token_embd.weight": "b53102a11d9064bbd404833e3464b1b13e08ce73300b442312cccde2f19b2698",
|
||||||
|
"blk.0.attn_norm.weight": "7318df3cca9e8d153ff0a503026a1265e63d20b2a8c1dd7a2769585082b5d1ee",
|
||||||
|
"blk.0.ffn_down.weight": "b950806a1fc722c9fad7fd0b20c3c0a7fb50f14395e1e7663a590bfd62e20900",
|
||||||
|
"blk.0.ffn_gate.weight": "e73e580af6d4f08e060a74a3c25efdf5d3bed99e183d95a5a85ae859014839fd",
|
||||||
|
"blk.0.ffn_up.weight": "c8158af679ef99746da1befb67eebb19489e0bbe6ce7d97e13e348508244e516",
|
||||||
|
"blk.0.ffn_norm.weight": "7ec69c3c31e95e49a3359003b0033f6b9e85561a3e3fd83e7476661ecdd756bb",
|
||||||
|
"blk.0.attn_k.weight": "2732303257bac969b4964e0e32ec08b5a7f5c031bb02bf6ac4467b3ea0ebcf1e",
|
||||||
|
"blk.0.attn_output.weight": "ecda1d43b4ccc91cd5b366d7e7a275353990ac78561a07c83d9c77031aba12dc",
|
||||||
|
"blk.0.attn_q.weight": "569b1f5faf92b6f00910cf7effb2d5862f91038ce5c3b0019fc10e5d79fbd5e1",
|
||||||
|
"blk.0.attn_v.weight": "aa8416c5ef7e32fb54a1f20d6ac651656845d4af240564b397c39bd83e06e3b8",
|
||||||
|
"blk.1.attn_norm.weight": "03327e02862908c2a44b2f52decdb924bf4201f400b46f8037a9cb2e1d7a61ff",
|
||||||
|
"blk.1.ffn_down.weight": "5a83a87603f38c99f8e1e370a2d5f967bb45ac51d881a609304a7811027321e0",
|
||||||
|
"blk.1.ffn_gate.weight": "31da0572c79e655186c721c231376f85e56cdcc6257c28d08c8c5b40d5c22b40",
|
||||||
|
"blk.1.ffn_up.weight": "e0c811d64ca155c8de10a868e72015d43888834804614ee1aa2953129ffbc90f",
|
||||||
|
"blk.1.ffn_norm.weight": "5861f313d6137d6f0f904d423df47fffc6069e224ff746e1b637ac9c7f0af862",
|
||||||
|
"blk.1.attn_k.weight": "5fbbec0acca6457b9416ebdcd90e526885d0224537b7628f6be376a7f275313d",
|
||||||
|
"blk.1.attn_output.weight": "b237c9763fa3f75166a6f70b70f1566e77d0d89dfa164ed1b3137393e90575c3",
|
||||||
|
"blk.1.attn_q.weight": "c0a9cf4a98b4882b16f3eb2b49d933793dcc5357abb246fd3fe3134ed2b12e1c",
|
||||||
|
"blk.1.attn_v.weight": "96867111727200cac1af7865189dd41fd62b47584e5e5f33a91f1d34509cbd40",
|
||||||
|
"blk.2.attn_norm.weight": "f392f8a88ee3a95b1cc19c40dd4ef66317037b0faaa1800f610779e129ee0539",
|
||||||
|
"blk.2.ffn_down.weight": "73823eef46632aedcc8c1cb08a736b6aa97ca97842cd1fdfc5567d8dec459662",
|
||||||
|
"blk.2.ffn_gate.weight": "f4909ae19fc3848b00bb8b9050122e74f8e903b89e22937036f4cc9fea20a718",
|
||||||
|
"blk.2.ffn_up.weight": "16f4904a3d814ea68f00519724fc4943e48444a84c786bda39aa5efc298a7d84",
|
||||||
|
"blk.2.ffn_norm.weight": "e3ccdf56e75cb969f6f69c39caf6daf7c4e70e89e25df0f4d2e4bc60e159aafe",
|
||||||
|
"blk.2.attn_k.weight": "c3beb1e0a11bcf007ef0f0d8f6bdd3082d8b29090cd29597846b5d51e308a8e5",
|
||||||
|
"blk.2.attn_output.weight": "bb9f66c32cff51154fea92933c2cd62549236f8cb1a767f9ef28d3f99809b343",
|
||||||
|
"blk.2.attn_q.weight": "8eba394132eef2a05c5a92d62d2376000f7948448d7a2dc74e6b608203add20d",
|
||||||
|
"blk.2.attn_v.weight": "88f61f77c53567c617db3eef8f30621109a750e679f6784f7911739bd42c2f02",
|
||||||
|
"blk.3.attn_norm.weight": "7b996675b7ca75fa24107b3ebe0788653ede0f49ac83b8659d71ff54d591f81a",
|
||||||
|
"blk.3.ffn_down.weight": "2cb332bc05e4821962fdc9dcbcc7cc12630f32117711b687d18fb53c0bc4fbf4",
|
||||||
|
"blk.3.ffn_gate.weight": "340b387c7f208c8f0a6db904ef8d87c1e84b7d6ad57177abd32d86c8d18b760f",
|
||||||
|
"blk.3.ffn_up.weight": "07484433f8a7ee061c55aa0de2ecc009f769b0617c9c0ec096e9bb2946df9f0e",
|
||||||
|
"blk.3.ffn_norm.weight": "4f1a4ade36b393af341240bc894a2aab09cff7e4d56dc4658445deb107f9371b",
|
||||||
|
"blk.3.attn_k.weight": "483dcd96acb4528df84b9842970994630dbd82b8715ace394aa8b39fcf8d6291",
|
||||||
|
"blk.3.attn_output.weight": "beaff0810687923585642ee11d929cbf3b43dc6f87f30ddb552c222ab57bdbb3",
|
||||||
|
"blk.3.attn_q.weight": "0739355002f6fce520863add697e0ff25fc88215322dc3f993be7bb68dcce7e8",
|
||||||
|
"blk.3.attn_v.weight": "c216d17b6d90ee3e07f82598b8161fae34de2f392dbb0f745b682b578c324767",
|
||||||
|
"blk.4.attn_norm.weight": "91ab405bc4ba15bf63af233f266aa43aaab43789a9e6596e14a357c2ac7df217",
|
||||||
|
"blk.4.ffn_down.weight": "620f34ee75cdc73aecb8949af5fbb0d2437fd81422b6d8eb7acfc52addb9fc68",
|
||||||
|
"blk.4.ffn_gate.weight": "f6feec7bc9acadf35ec22532f8998d8e50f31afedabb19263590dcf8b9a92eee",
|
||||||
|
"blk.4.ffn_up.weight": "4a72af7cd28fd07b038f6cc4406678d120517280236ea85d9e76eff40ab2cc22",
|
||||||
|
"blk.4.ffn_norm.weight": "1805b37b44d5d682bdbd2fadeafb763ee001617d7870848cc487079ee34b21f9",
|
||||||
|
"blk.4.attn_k.weight": "a1e4f9d97cdf4c1b0d177cf00c4e32d1be30c1984a239b3c9bd73f8848888853",
|
||||||
|
"blk.4.attn_output.weight": "a1547e2497c423b0aff0eee71d9300d6fdf4e4986679418b6e637b69a9a6720b",
|
||||||
|
"blk.4.attn_q.weight": "0677483a9264ea6803d03d304d87a54632242cb516e8b76b6e3e8284c2f4de04",
|
||||||
|
"blk.4.attn_v.weight": "02691ba3af344fcc1969428ab0df811ac94aaa2fd91b0dc4ec1ac0a58806980d",
|
||||||
|
"blk.5.attn_norm.weight": "ba9c028335e5c895b87a5bd1448ca429248f9746ed97bdcb8679923206117156",
|
||||||
|
"blk.5.ffn_down.weight": "ccfdc9006acad1940a6bc05042a3947f1066acd671e0bb53b7684e9eea9ef5c9",
|
||||||
|
"blk.5.ffn_gate.weight": "623157679f1e742ccc3807c0b0153ddc8450104de75ec62f1370ec3807c09cf4",
|
||||||
|
"blk.5.ffn_up.weight": "05748804c65091f963729b58b085f58351891cac8a2861f5eae26b06aa60b2a0",
|
||||||
|
"blk.5.ffn_norm.weight": "84bae55af2efc8b8429f09056c8c04990c466dae31cb3f9356038b8957f1b406",
|
||||||
|
"blk.5.attn_k.weight": "8c766180c726b037d587fc52371de6e3307140c52409011609d1225624b6a3eb",
|
||||||
|
"blk.5.attn_output.weight": "490b582b3b1dc151ae55aee8b6743dad6c01fb49e43afefb6e68394b74be3d73",
|
||||||
|
"blk.5.attn_q.weight": "6f7b8ca4d9025ec836a44bbcca46be30c66b471a9fb62943ddff8288b3731409",
|
||||||
|
"blk.5.attn_v.weight": "9f70df3ba00c9e723214b3da83ff435a2163fff5915f75515c9664c05c866c27",
|
||||||
|
"blk.6.attn_norm.weight": "1a4a66613a682df6f061fc7c4d986f9f7e9175b62f0c42fc1ef31db536bd5942",
|
||||||
|
"blk.6.ffn_down.weight": "c56f25e4e49b443dbc82d88311ee63bc1f5002cc67e52f4787fd5f003aedeac1",
|
||||||
|
"blk.6.ffn_gate.weight": "31a5cf1aa9b831a81588d508550f51fc425f9517c43254d4ef7096d38029cf04",
|
||||||
|
"blk.6.ffn_up.weight": "ce135f3a1163e0c9297a615bdbe68a67ead21edce8debbfa9f6e15e6af8d4c94",
|
||||||
|
"blk.6.ffn_norm.weight": "4e328ce0648c94e732bc40501858ef6262ad1161e2e407b0cdcf4813fa9d45d8",
|
||||||
|
"blk.6.attn_k.weight": "1eb1c4c9f9c4c7ff7f5429075e0dc6a7782bed55109fa88df209a817dd8ef960",
|
||||||
|
"blk.6.attn_output.weight": "3d32986b56873b88655ee1edabdd413fdd9ab18b82108c9ce90bdbc2d3a6f3a3",
|
||||||
|
"blk.6.attn_q.weight": "8432f583b3a2809c99c393f9beb077cb0534dd5d247c17108f2986cadc6651f6",
|
||||||
|
"blk.6.attn_v.weight": "5045381513815bb91839dbac8335ffe49bbc7b0008369de7ea97eb676c5e2b36",
|
||||||
|
"blk.7.attn_norm.weight": "3dabd003638ec2499bfc8a48c49eef34276caab4fe76894eb963207848c2fdaf",
|
||||||
|
"blk.7.ffn_down.weight": "194fae858608bdcffd235be59ab119d0b91c8549f864ea06dae69249e099935f",
|
||||||
|
"blk.7.ffn_gate.weight": "00b24c29c30246892bce0791be804a89701d4c1332777e0bcdad5d9d5666604f",
|
||||||
|
"blk.7.ffn_up.weight": "44d7082a5280080c90cef9e19d410391de34f212ca0736377769b8ddd0c82d5e",
|
||||||
|
"blk.7.ffn_norm.weight": "21fe8a7fd6911c64e0d15a788b3b4cb6d71dd6ec51de65f760ee89afbb6ae53e",
|
||||||
|
"blk.7.attn_k.weight": "57a149eec5f6744a9526cd3925ac073f9d12db0fbcb5afe042ef4dc846458c44",
|
||||||
|
"blk.7.attn_output.weight": "0e9c28a3e81a2880251ce5eed77bcb8be8aaa1a51c9cb6de820b47ed83849fc2",
|
||||||
|
"blk.7.attn_q.weight": "15ee75263ee4e2a43eb322bc159ae004bb7d77e3a7e63ee4ddab700430693fff",
|
||||||
|
"blk.7.attn_v.weight": "440aa970bba4bff429fd7b7b1de21f2ad14fb2952b776cfa4acee68d7c6e9b8f",
|
||||||
|
"blk.8.attn_norm.weight": "af5b44825633c42c1ae964c82bb2be6a242d3a751f0a91f1bae4f593e8f5b6ec",
|
||||||
|
"blk.8.ffn_down.weight": "b11c14c76adca94fa200496dd2c10743becb23aab6642443ef1ae6d8710edbc1",
|
||||||
|
"blk.8.ffn_gate.weight": "7bb03d3325bf8637ae2fa1296b0651356515578d46a7c5ca65c7a923d7de27bc",
|
||||||
|
"blk.8.ffn_up.weight": "b956ef0a0669b5a9c9bf3a8da2d1c24f52d331cfb7354f6d7c51bd65be355e30",
|
||||||
|
"blk.8.ffn_norm.weight": "c78c3d748302edfef76f71ea5cb2055c94352122eee8b9b1173779a1814d224e",
|
||||||
|
"blk.8.attn_k.weight": "c0fba6a596ed9c1c32a7055c31a935a8b31e42b77282ee47c1f03ee3bde736b5",
|
||||||
|
"blk.8.attn_output.weight": "83cf9947080c5d8d571f04a842bc3dcfe7bbb0195fb25b346e22635e8649f2d4",
|
||||||
|
"blk.8.attn_q.weight": "47409350a576b333d97b7c877d69f47f46df504f3765102dfc0be9e521c7ecd6",
|
||||||
|
"blk.8.attn_v.weight": "1999dff91404fdcf1ecb34d9eaaaa9244ec7658a74dec8feb7cfd1fddba0347e",
|
||||||
|
"blk.9.attn_norm.weight": "1e6e29d5c3889ab4e1b0a5b9998cba60179b0f1fca133515df49cbc19d092593",
|
||||||
|
"blk.9.ffn_down.weight": "acb898a6490adff592e10b4c62d70edc5941661ee6da44658500e9205357c8e9",
|
||||||
|
"blk.9.ffn_gate.weight": "4cff63013593aadc3ffbaaa6ed70ffdba1224cd43c3644bf6f4162b5ac1ab542",
|
||||||
|
"blk.9.ffn_up.weight": "f985b5a2d6cf4fe32c7256301c3c89b8ad22b59e516342c52da42d8110766a4e",
|
||||||
|
"blk.9.ffn_norm.weight": "0d659c538bc6b21ed0018f107ab674a7424a00a42946c80e07208b479b21918f",
|
||||||
|
"blk.9.attn_k.weight": "f67611d888780d1b38c1c146b361c65310c8183bdf64fd73e2259985c6e8517f",
|
||||||
|
"blk.9.attn_output.weight": "f12ca1fa62a02ddc3f77f798bfb5707e0c50bf18ee0eaa67025521a98355f26b",
|
||||||
|
"blk.9.attn_q.weight": "3865185f4361a645b086ad47b72904c095313fb1c624e511647bf1a7dfc1c476",
|
||||||
|
"blk.9.attn_v.weight": "92125bbfed63544ab56052bd1e4aa453bbf34c795249ee54cde54907c8c6d1d3",
|
||||||
|
"blk.10.attn_norm.weight": "5d6bfbe545bcc2fcb2fc75c68f64b1f4c918badaf53e0156fe2d88aa977b2f94",
|
||||||
|
"blk.10.ffn_down.weight": "1dd9da8b0d2696ab5531fbca8a29c7d67567620a9d3e5fc2a19ec5d7e4c6cc8a",
|
||||||
|
"blk.10.ffn_gate.weight": "6e55e7f014edaebda0ac6819a426221d3b025c27312a2e18cc5806f31e3db226",
|
||||||
|
"blk.10.ffn_up.weight": "d80dde54af5db51241345ee8d64c1972608644f4deeac1e8195dc423bf27474a",
|
||||||
|
"blk.10.ffn_norm.weight": "f6ca65951d58ae3379eee8247bec34ebd0db05674cc9295593573841b8a55df3",
|
||||||
|
"blk.10.attn_k.weight": "b58e350bd6b49aba0fba4e4dd6865de3a2a0651ab865dbf2419b627b53ffc187",
|
||||||
|
"blk.10.attn_output.weight": "6b26a986e12fe66ec286a21d7d5af5eaa1bfe6f2bf502165d270e4497235a54a",
|
||||||
|
"blk.10.attn_q.weight": "3440e0e5b7e0d1e426424ae5a33f4e057be623249e9035ea12e57dbe5d3893c4",
|
||||||
|
"blk.10.attn_v.weight": "ebfadcfe14bcd6dee933053df0a67e12e7a196d5cc45728c1ffb2a2daedd5ca2",
|
||||||
|
"blk.11.attn_norm.weight": "3ed057b9576cd2de84507ef64c7646dc478c651efca4c2024cbe91a4f3fbf0bc",
|
||||||
|
"blk.11.ffn_down.weight": "8ff1c2487d22f5c499761e4eb721418f141f960160d0bab779595a34e4d68898",
|
||||||
|
"blk.11.ffn_gate.weight": "9c74e4507c7e45bf39b7cc7402198cd1dd77e3fff8c625b0413acaeb16efeb9f",
|
||||||
|
"blk.11.ffn_up.weight": "4367158007161d29939e00a322bb6776016e43f648a94f9b08a96a477aae75be",
|
||||||
|
"blk.11.ffn_norm.weight": "1cc0288c1491072121f4c9a0af20be0e13af49895696a3320e4fcac608768de3",
|
||||||
|
"blk.11.attn_k.weight": "066f5b3c144fce1366835e1ebf376f768b333b8ae29f5b478c42d1d0c809c855",
|
||||||
|
"blk.11.attn_output.weight": "e0d9f3d3f2c54aed59c02713ea4fb562799ddbacbe67ca3998dfc887bc44e47b",
|
||||||
|
"blk.11.attn_q.weight": "28d3ecc8a88cb3815e89a7f7a7d043da7a71f702b337a126e4d3a2ac1cd6370f",
|
||||||
|
"blk.11.attn_v.weight": "7c5cdef10ee73bca0a3b9f6ece5f0a0155664e0ce3d8de90ccdccfab5545e5e7",
|
||||||
|
"blk.12.attn_norm.weight": "973b133301a1af760cd7b3a7955371ea0a750808b442deb6adaf7b98482bd0c6",
|
||||||
|
"blk.12.ffn_down.weight": "d6c87b4b4ca03f75546ddd6a9e7fca720585a309188723c1ace8122438d4b200",
|
||||||
|
"blk.12.ffn_gate.weight": "2189a6e0cab1540bd05d6089b922aa8fd694be51255654933c165f302a0c955f",
|
||||||
|
"blk.12.ffn_up.weight": "5affbec19b58d092b9305721e3552481fe2eff51269ea3ed91cda3b9ef84d4df",
|
||||||
|
"blk.12.ffn_norm.weight": "f650fd42a34e950f758b4a130e7b8b1a712b1dcbede0291bb8edde47aaed0ef6",
|
||||||
|
"blk.12.attn_k.weight": "59b1e86f10450a7cc188beefc0856d2dcf44e8d7fdd9cd8859c30ec1ebaf24b6",
|
||||||
|
"blk.12.attn_output.weight": "446b0d36b2f66bd72a2323f4f4e9d85a0f621e9a58872e89a27248d6b1123238",
|
||||||
|
"blk.12.attn_q.weight": "3ed6bfd39f040301ed99fad882d3e569769d594259f9948445bef0e44ec881fb",
|
||||||
|
"blk.12.attn_v.weight": "e73652cd5d0029b1931be3ba9d82508f6696dce5a29d085476a54fb7a2ddbabc",
|
||||||
|
"blk.13.attn_norm.weight": "491b85278c0bd67bd31b9b8a9720902c244bd067e53a4a03641b7c0994782e82",
|
||||||
|
"blk.13.ffn_down.weight": "ad71cc248a85e9ced49307a24a9bfae01d387e979a7689c82ff59998e09741f3",
|
||||||
|
"blk.13.ffn_gate.weight": "0a55984d53971fab97575ee0ef5882013be7fdecfa76e3fbebb5dc85a07a14d4",
|
||||||
|
"blk.13.ffn_up.weight": "378b697b35e2e53c0de98e8e29b73d42ae3ec112ec16129aa5997a9e2f3b5943",
|
||||||
|
"blk.13.ffn_norm.weight": "f8aff2f69ab286210fad45a62b03f8d10b38f96a420d7baadf6b95d7b0b0bcd2",
|
||||||
|
"blk.13.attn_k.weight": "25ceb841afb1034831bea7f4d6a6c578def2ce4d4c412c780ef147dc9a598360",
|
||||||
|
"blk.13.attn_output.weight": "a242b322889c6bdaa14b67a7bab593db39df8eea3721638ef639abbb74d482e3",
|
||||||
|
"blk.13.attn_q.weight": "d80be9945a369439e835c55cfb0e97828b8a66bb7ced534d9059c92487bf20a9",
|
||||||
|
"blk.13.attn_v.weight": "ac33274cf9b67979d9ecdc967a55175afe0c9c4aeeff6391433cd9840c818706",
|
||||||
|
"blk.14.attn_norm.weight": "12a1e1091de5b2da12c9e7c0b1c8e6f09ce2a749733cf7d5240445b8e21cd093",
|
||||||
|
"blk.14.ffn_down.weight": "cfd41965c88266e32bc2dcdadda512499c35519e8686fefb9a7f249ab2291eb5",
|
||||||
|
"blk.14.ffn_gate.weight": "8dcfe774f07a095c7c6cf0a901c9df70d938bad7b5ba347fbc8f694e7603c0d1",
|
||||||
|
"blk.14.ffn_up.weight": "c7995577fe4a72ea0fb17c4a7b6b87b959072bbfdd5edacc6c367d43465809ae",
|
||||||
|
"blk.14.ffn_norm.weight": "81c41ebde41739e7016ffec31d2256217b825dc3cae049a935f5f61a60d22003",
|
||||||
|
"blk.14.attn_k.weight": "fb708bdebe4384f5c4b479c110028554f4d122f166b8091eda7d8d65e6780eb8",
|
||||||
|
"blk.14.attn_output.weight": "f5295caf2dfdc60553dcabe17537a80577e8b153c902247daac058df23542514",
|
||||||
|
"blk.14.attn_q.weight": "c12b7a3601c68c63ab5dc9d2599ebf3f3a10abc2c59d3a2126fffd5818f2763b",
|
||||||
|
"blk.14.attn_v.weight": "1ce968d9149bf0d5e237d52cc6d6433565b4bbf03252a736262bb00a2b34a687",
|
||||||
|
"blk.15.attn_norm.weight": "266fd2c36d7dcefc6b6bb7f1c9374c41f2bab5d6c84a063b6f91c4f682dad3c4",
|
||||||
|
"blk.15.ffn_down.weight": "6154886e9ef0a6cc08ab0d264a35f497e6f0987efdac992ed04e87088bea7801",
|
||||||
|
"blk.15.ffn_gate.weight": "183d9fd3c1b5657840099053d2fd3f72ad953b1de523296159b7761f20491a76",
|
||||||
|
"blk.15.ffn_up.weight": "51546d4498842ae2340ee226a0888d5f61e7d2ca4d052dfa06a77b0451242d3d",
|
||||||
|
"blk.15.ffn_norm.weight": "ef7378091a41a25a5f58bf1bf9d3bc64ea562e7f421e1c232b1f177c30fd3500",
|
||||||
|
"blk.15.attn_k.weight": "8d556ab8d9639324141774999b6eed0e91d7ee645bf3e7a3dcd200b2e7a00751",
|
||||||
|
"blk.15.attn_output.weight": "54aa6ba87def7cbe18b0c6ab3aff5c351cb3b6ca4a0d7b2cd5f75a1312991429",
|
||||||
|
"blk.15.attn_q.weight": "10731b0dc031ea8e0ef37bd7f010e0a78518a10a6df05a8bae48e3148b73ef3e",
|
||||||
|
"blk.15.attn_v.weight": "cbbe50c2ed7224866d3cf9b489c599f3ec41a4ea1aa3181e9f4e87e1fa0cefec",
|
||||||
|
"blk.16.attn_norm.weight": "387058eb39d4b28c04cf1368247417f1faeae8ae79d894c9f293457e0eaa00b0",
|
||||||
|
"blk.16.ffn_down.weight": "2cb26ccee585e933401ad5c82ed36ddacb3289efa0b28f8cf91b020ffbd9c333",
|
||||||
|
"blk.16.ffn_gate.weight": "d745985efb5bab42304e5d509024631efe35f92f2b2ec4931ead6db97ca9727e",
|
||||||
|
"blk.16.ffn_up.weight": "7a67bd195e0642828ca36eb7818149bb70c2c25f82de07e2b5807c520daf540e",
|
||||||
|
"blk.16.ffn_norm.weight": "7cefd061c8182482a89272f8a4e88a954b12609a62716923ca1cb3593b1c1651",
|
||||||
|
"blk.16.attn_k.weight": "d7968a2de67e755b4533e061aaad1cb62f8882af92dcad67f99d6d5112513439",
|
||||||
|
"blk.16.attn_output.weight": "9e9ab5788272ca3394ea89eadbce8c86ecc3fd75b7899184d6191c134ad9aae0",
|
||||||
|
"blk.16.attn_q.weight": "ef81c261b536c1a3a093b33f44cf2d42b86e5aa2d821674f07a0c80e992ed925",
|
||||||
|
"blk.16.attn_v.weight": "aef38e7958301b4a437cbdd2fbae6197f677b09269ec1eaf63188cd5da428d25",
|
||||||
|
"blk.17.attn_norm.weight": "28f6b289f1bc3131041e9f791b7a2a3a48baee0dfea27bf7051ebbb7ed364d80",
|
||||||
|
"blk.17.ffn_down.weight": "1a502829aafc6a9bd6bc81f12573bf8632d5c8c659f0dfb13c8b2411f3b1ec05",
|
||||||
|
"blk.17.ffn_gate.weight": "ddfd8aa0eb98846ebc9afe31366249159f46ae9815199dd70161527ed241ac4d",
|
||||||
|
"blk.17.ffn_up.weight": "4211a3cc247071bd361b30de2131d02382f552855062bf3b3e004c17992e5d09",
|
||||||
|
"blk.17.ffn_norm.weight": "647e5fa99a5b0d232af36d15816539f4d27e60a50a341b00aa88bb6e4474f8b9",
|
||||||
|
"blk.17.attn_k.weight": "d9125ff33a19c502c0f8846433ffc24395048582fc2f463d34a0301a82156f02",
|
||||||
|
"blk.17.attn_output.weight": "3d64fbb1cfef04444827f37c35fd9ad3413eb2165094d339ef89f00503f09de4",
|
||||||
|
"blk.17.attn_q.weight": "e5b29424028f578beca385fd82e29f37adedf3037cd51e5889d5a1ffb0428ca7",
|
||||||
|
"blk.17.attn_v.weight": "1809c5aaf2ac04c5d65539097564ad62796e87d24bb8b9ce5b095561a61d908a",
|
||||||
|
"blk.18.attn_norm.weight": "99daca58d001c627523d3adfbca1d95f04e590382a326866544d57989d5f4835",
|
||||||
|
"blk.18.ffn_down.weight": "84f30231ce6ca0f10227541dfc602d6418c1a210386b0c4926ef1656e7d4635c",
|
||||||
|
"blk.18.ffn_gate.weight": "ca5bbe4468b541740e54f69b9e08fcc8e478c344b70551dab21b1206acfbaadb",
|
||||||
|
"blk.18.ffn_up.weight": "0b3067b9dded31686dcfdc1e247eae3974a28a61ac59e9862758dbfaad64e8f7",
|
||||||
|
"blk.18.ffn_norm.weight": "8154a102232dbc0f90ce77ae5c1ff8f26f8b6e4dcf326e9ec1645749669e7960",
|
||||||
|
"blk.18.attn_k.weight": "25abb26021ccc481471a30e0d4cbeb7e1db29828417ec5136edeb93fecf09ac4",
|
||||||
|
"blk.18.attn_output.weight": "d87d481d9b046b68efa06ccdd4ed8cbf61e692d61114b75b7fad5ed75f5d87b2",
|
||||||
|
"blk.18.attn_q.weight": "cc6400379e15766992ff1293be79dc67682c28e9e15155a78109f4b64653b164",
|
||||||
|
"blk.18.attn_v.weight": "45c75cb1dd496aea3173aafe2575b841dd1d02cbe010b3198099731eb98f531c",
|
||||||
|
"blk.19.attn_norm.weight": "65389efc75297684773284ef8e5f8789a4504b636c9f33b8a32e0ee42499fa72",
|
||||||
|
"blk.19.ffn_down.weight": "4eefab7e939f64a17e4a214ca3c77a6fa110d94f677e2d6401086f70fc538b04",
|
||||||
|
"blk.19.ffn_gate.weight": "f1c0a59cafda66f466ab585b0b8b4861b58abe87a67cea1f6a488492242edfdf",
|
||||||
|
"blk.19.ffn_up.weight": "c42d045eef588db4a0e56960a57e110e1ff92eb8041107d19899165fd3b90f17",
|
||||||
|
"blk.19.ffn_norm.weight": "a8f33eda6d5d62ff5f333ad9771783caff556641f4e7df713451385676f441fa",
|
||||||
|
"blk.19.attn_k.weight": "0bab5d9e9083492bfb05a5a3bb23b79c0e7b99ef6a6644817b4d57d5c453b8a5",
|
||||||
|
"blk.19.attn_output.weight": "c99c551d70eafad0f7aea98fb6f9251635897168eb3895f76abf0d4ea3b3aa6f",
|
||||||
|
"blk.19.attn_q.weight": "c98bde95627c3b54c9443813ca50b4e14f518319681db6bbf7b2332ba26e9a60",
|
||||||
|
"blk.19.attn_v.weight": "ff3a490518cf64904db89ce0dc7d6eb89e870f1440e41883c6b55a221f82de84",
|
||||||
|
"blk.20.ffn_gate.weight": "761f0e317229cafe9d3754048ab038a0a84e9a287b196ab65f633139f2d29aba",
|
||||||
|
"blk.20.attn_k.weight": "45d13439b41066d282e8490a726785abf513605f46c79bd0c840f6419d27e790",
|
||||||
|
"blk.20.attn_output.weight": "a3b958d84b4a097844179b7d55c18fd0e4f319cb15e918c6fde33b68de1bcac6",
|
||||||
|
"blk.20.attn_q.weight": "127ab8e7d8c3f882874904196a02712bab42e6744fde45871b67350609d19f5e",
|
||||||
|
"blk.20.attn_v.weight": "5f0ad2d14a8ae42dd3bbeccfb33295687a14055fa92c54bc946249373c1c9f17",
|
||||||
|
"blk.20.attn_norm.weight": "77300b1755edc8c70089e0f45efa646056b9add7d8568b2324d2f3e62b64971a",
|
||||||
|
"blk.20.ffn_down.weight": "ab93d0e075b42e9017b701a070d561e698050d90aac4b4b9919256fbe50c3204",
|
||||||
|
"blk.20.ffn_up.weight": "4fd6628a07acc57a48d1ef83f81b7d7aa0bce569c1160a99d307284f8821322c",
|
||||||
|
"blk.20.ffn_norm.weight": "2a9e46b9e48e8e55215de56592e1f189530037c1c94a1428e3d6f106c7f26fb2",
|
||||||
|
"blk.21.attn_norm.weight": "4b3b5912c7bc61eb9da8e47d4651f896e85d9e59c4ecaa65df7acf3c21737298",
|
||||||
|
"blk.21.ffn_down.weight": "7146f931663d93b8771cd84405cd4802ea6560d0729b0d6d44588203c095bc53",
|
||||||
|
"blk.21.ffn_gate.weight": "b44ec5d64388fa40b90b3e9976d97a8b6800fa3b97584f32e64b03daffb8601f",
|
||||||
|
"blk.21.ffn_up.weight": "0cf3643fd23c685e17062cd11e116e17ce57a405e5e78953bab94cd62fe48789",
|
||||||
|
"blk.21.ffn_norm.weight": "4ef2cdb53da166df70b39f3e6b17af51848cfa5ea3c27ad6a1ae2a1bb1da1ce9",
|
||||||
|
"blk.21.attn_k.weight": "5d40f32a706f670c19972b14176bf660d5b045e3637b110dbf8d7de4ff32101a",
|
||||||
|
"blk.21.attn_output.weight": "18afaa916752ce16c9653ec0ec7e2fe60be55faa2aa5025d147be184adb75cac",
|
||||||
|
"blk.21.attn_q.weight": "2621daa5f858931514a4b2f0fe8d81cf9b96f541e6af99bfa7539e9bde8e34ee",
|
||||||
|
"blk.21.attn_v.weight": "63226dafc54c899bbce4aa49efceeedd8908e94faa613450fdda91f332b62864",
|
||||||
|
"blk.22.attn_norm.weight": "cf3058daab4d2c04387e7d169d1553bb8e7358eea66285ec067703f6ce62043a",
|
||||||
|
"blk.22.ffn_down.weight": "6a58d5fd220abdbac6cee7ba048abab794731af318f04982c2506df59413d0b3",
|
||||||
|
"blk.22.ffn_gate.weight": "d5614535324b03c7b91727a903b2a72f8d07ad17f7aa8b61ea173cf9b895069e",
|
||||||
|
"blk.22.ffn_up.weight": "ec20da3949566e93f66cabb67f8cd7eab399047ec6ebf5d43edfaf3669b82296",
|
||||||
|
"blk.22.ffn_norm.weight": "84c82f38f53a649972a44466fc476bf764e064ce18de870291edc302f3700e28",
|
||||||
|
"blk.22.attn_k.weight": "a3d2ecc37fde7c201176bb8abadf27f0d8ede9679a6034913e03d9db924fda12",
|
||||||
|
"blk.22.attn_output.weight": "5a3b8bb433f43a387df43dd371bdf80ddfac986dfeaf38e9bac1d7a0ec6628de",
|
||||||
|
"blk.22.attn_q.weight": "3a875cec661b4859f30a8fd2c866811184b25b68c9e36fe2663d299caf8b59c6",
|
||||||
|
"blk.22.attn_v.weight": "8717a83b79035058dcfd3ef6f8e5b36e71d77379e5a239e1899eef8766fb7703",
|
||||||
|
"blk.23.attn_norm.weight": "2b4a68a0a2f023dd646e4755c9bef17c2f631901154afd839edac7ac006ec99c",
|
||||||
|
"blk.23.ffn_down.weight": "29499b1586c6fc4883c9b7a9c8cf388035146b5aecf90c5c4c8c8e082c71e7d7",
|
||||||
|
"blk.23.ffn_gate.weight": "7d6554036d21c587b9b556428054f9c15cbef96d24b257f906fcef4ae38bd9c8",
|
||||||
|
"blk.23.ffn_up.weight": "19761ecb288d6ebd44b681c4535661583b1e19dc29e96d0c007333cd8f00aacf",
|
||||||
|
"blk.23.ffn_norm.weight": "37dc35500790a4ca33807b39cf7af65065e535dc25b9e94f3ed2759f61887ac9",
|
||||||
|
"blk.23.attn_k.weight": "717547d00323817b0cb40a72ec5f8cf42ecd1f9e3e42715c2cc5e38f07fffffe",
|
||||||
|
"blk.23.attn_output.weight": "a24786feb6a905fdf166d7500133757cbe494779d4ebcba9eb03046b319557df",
|
||||||
|
"blk.23.attn_q.weight": "6a2c4a98f138b928d22136efa163562691d3b4ed526d52d46a2fa2694a8f3965",
|
||||||
|
"blk.23.attn_v.weight": "c6e6081eb9c38a7fda023085957b460e9ea321e1fff408b38c2b58595c39979c",
|
||||||
|
"blk.24.attn_norm.weight": "5e6283f891e538670425f3e244b08dc6f96f33dfa4aefa913f8eb17212421850",
|
||||||
|
"blk.24.ffn_down.weight": "e09eb170f389deea0a4a1cbfdb52c12490768a2c60491b7bef8a4c445e2a08f5",
|
||||||
|
"blk.24.ffn_gate.weight": "af29d815cf49a38fc2ebd0bf9b2dd9933d023a29f2d766981acb9a1b53f09117",
|
||||||
|
"blk.24.ffn_up.weight": "36ccd9333426666de9d3088bd4dcdf5b624b09dca9e3a83a22fc0383f2d950fa",
|
||||||
|
"blk.24.ffn_norm.weight": "a88e1692318826db6ac42582d182e51a3c698c655d0e21e04fa086318832d07b",
|
||||||
|
"blk.24.attn_k.weight": "f7d61d6d1225289bcc502e3bbb0168b4584add0253218c1b77ac92ccef9a1c2e",
|
||||||
|
"blk.24.attn_output.weight": "85a1363b3ccc87312094c2195022687c16b0dad7fafb9e80bb4ec474d53c29ac",
|
||||||
|
"blk.24.attn_q.weight": "53482a2c008f42f4fad779ca323addc3712040149dfc12f782417756388a72bb",
|
||||||
|
"blk.24.attn_v.weight": "67498272369af7dd10097c73b07f731b565cfc9a559e711cc0d526389e7b44e2",
|
||||||
|
"blk.25.attn_norm.weight": "98dd617def5cb7825ee4833132ca2da2121245921585e1d9e36b93344adc321b",
|
||||||
|
"blk.25.ffn_down.weight": "7fd477d6c50aed5f424a878dd284343379cffbee8a34c0b6e55100c8305fa13f",
|
||||||
|
"blk.25.ffn_gate.weight": "f892c9806c8ec22e8aa746734ac9213428c534921cf161239e1d249fdb5d1ec0",
|
||||||
|
"blk.25.ffn_up.weight": "528bed14c9bf9762f790525ee40412545221f4321d2a2323fa8e73c58b7643c5",
|
||||||
|
"blk.25.ffn_norm.weight": "ca5831966672e7be6a578feeb631ec3570d3b5afe12860819ccb96e896ffc346",
|
||||||
|
"blk.25.attn_k.weight": "610d3068cc9b20401f0c3a0efea39a279dd9f564fde19baf3403b2ec2319e4c4",
|
||||||
|
"blk.25.attn_output.weight": "798aaf702e53b657265ac3b5e6caf3a0ab515bdadfeb1a3a156b4f3bfba76666",
|
||||||
|
"blk.25.attn_q.weight": "8a7fa25248de83029fb97b51d036a01baebe31fcb4be121ab00dd8b7de209b10",
|
||||||
|
"blk.25.attn_v.weight": "2a53d5e9f8a1218c66958c6388d3b37400a9af7956c785024ca44bfbc3c7d371",
|
||||||
|
"blk.26.attn_norm.weight": "5f44fc043481eb0771f3e6d2420bcbcf73140afb9a9feb8eddb6575452acebee",
|
||||||
|
"blk.26.ffn_down.weight": "944a60a409d0d5b6a851e33c69aca152454b691711a8b96f5bcc488772ab2833",
|
||||||
|
"blk.26.ffn_gate.weight": "2a0ca4abb3de5593e6693d8be69b63d6d1a639855ac8332a75f520353f030c62",
|
||||||
|
"blk.26.ffn_up.weight": "0b1df496163f9ac07bf89375d3eb441b51a81d41b47d769a04a61efc18dbe35b",
|
||||||
|
"blk.26.ffn_norm.weight": "56b8dd046e9be6ea71f7efd80dbd14e7fb1aa020d3cd38e063275f3873fd12f8",
|
||||||
|
"blk.26.attn_k.weight": "b1dabfabb970e6971c7ea6e53c63cf7ef56341e6a2edd9cf177785cad9af2f9a",
|
||||||
|
"blk.26.attn_output.weight": "39532c7e836baad164a655fb97ec5114ea4da37ffba9fdea2684f6e4450e6f84",
|
||||||
|
"blk.26.attn_q.weight": "8f48bf6aaa1252bc149e98af2be1777a5c0d2c3274c6d314171ea9344a41b604",
|
||||||
|
"blk.26.attn_v.weight": "02fb145f7fd905133750e90571effacadddfd3f4966552dc59982ac3900ab8c4",
|
||||||
|
"blk.27.attn_norm.weight": "654d168fc3cab716d91261f5719f180b7d697218401633b4878a759f1b5283f2",
|
||||||
|
"blk.27.ffn_down.weight": "2823272bec3a1c12f02cc4cb24aa4031abd7e9dbe0b02676e2305b21671818f0",
|
||||||
|
"blk.27.ffn_gate.weight": "b1a1d40cd02f97182cac17a79971d1934ee0daf3aa0bf11303568c636e208a64",
|
||||||
|
"blk.27.ffn_up.weight": "ed62ec72a020d070e64eb7b50237b32213944727b5b2427f45d989f50df5fb2a",
|
||||||
|
"blk.27.ffn_norm.weight": "c69649ac65d694b306a905dee8b03b89eec1ed188b1eaaf38f8e29d4b12e38a0",
|
||||||
|
"blk.27.attn_k.weight": "cc57bbf413f1fd227128dc66efc8590c73634cbd6f96d01ec4878b5e7ca6a925",
|
||||||
|
"blk.27.attn_output.weight": "cac407ad02361d53207b3c7e25ceab84dcb4347b8087055162e2efe14d11d84a",
|
||||||
|
"blk.27.attn_q.weight": "0af18e07cee12015761c07c94407024f4f4d77d97bdb24163db0e16669e2cef3",
|
||||||
|
"blk.27.attn_v.weight": "a1d08fbdfa40af773c5adcf93bd68b78a44ed144e3fc6bbeb8af02e937527eb6",
|
||||||
|
"blk.28.attn_norm.weight": "f39a51f814512b040a1082143150e4a49ff730f85cef49d7f77fc79d83e91f40",
|
||||||
|
"blk.28.ffn_down.weight": "74f29ed51055d1c1adb8f0660bbe538a27e016c65650f2d67efc6f1c84fa1b45",
|
||||||
|
"blk.28.ffn_gate.weight": "ae48bb16487ded6781c60aafc0bf738fb4ae15729952906f247d216592ce249a",
|
||||||
|
"blk.28.ffn_up.weight": "543009727718ac22f11ee4b17815f68ea6f15ba1f3e7ed5ecdb755cf6417565b",
|
||||||
|
"blk.28.ffn_norm.weight": "b8f9e54c322079ff20a82b88948cdc2916c22c7db40b9a9ed6d3cbe89efb727e",
|
||||||
|
"blk.28.attn_k.weight": "55d055ba653b728d6e784f9e013786fed07115c9fdf23367e3941386d5e77db8",
|
||||||
|
"blk.28.attn_output.weight": "155101c03ddbf18f4fd0694bfc982f33c7bae25c9b087d6f5273c2bfbffcf2c9",
|
||||||
|
"blk.28.attn_q.weight": "1ed19bfdd22e9c14eca014739982492e9516d411515a8585f65cf754d849e53f",
|
||||||
|
"blk.28.attn_v.weight": "11ba854dd575c025d37256eee9041f6d1bd2b549a083d6409a09bfc1542913f3",
|
||||||
|
"blk.29.attn_norm.weight": "02b0bf5e2fcefd11a153cc988c81ba672682e4844fcf6442423e21a0e10d566d",
|
||||||
|
"blk.29.ffn_down.weight": "594bb692ec2779938721ff4748666ca8370e0e4fe85229503f616438b8884f5f",
|
||||||
|
"blk.29.ffn_gate.weight": "8bedcf47e91dcb2cf4093de56b048ee411faab6ff472f89ab2c9c113a08e6967",
|
||||||
|
"blk.29.ffn_up.weight": "e241a547b5fd6dfca8200b8141e21c1c487a96cbc4e5855f181a7ed1be91b642",
|
||||||
|
"blk.29.ffn_norm.weight": "e63eba5e4c6b288bfd9f15e46e236086456c8b7f1f9c732c0b5de84962a2e7cc",
|
||||||
|
"blk.29.attn_k.weight": "afe5979d5bcf211aebb526620f5974bcb0a2c39c8be71e815575c55d6385e3aa",
|
||||||
|
"blk.29.attn_output.weight": "9c944ed44b124b014906fc240afd3b90aed56bbd9567f2eddfd5b7a685b3cb48",
|
||||||
|
"blk.29.attn_q.weight": "e234e08e5c1bd9245a2edc8d63e9933b6b879f97c01392209cad4f55f05f3ada",
|
||||||
|
"blk.29.attn_v.weight": "5cb8e3e5f954e775c5a5e4de7a9a62b17e9c6931bb0ff0e2f82c4126fd3e1a1c",
|
||||||
|
"blk.30.attn_norm.weight": "a65483ee51a0b214144ec8a14f28ea5437586e9e12ebe342a57d1f8627ee12af",
|
||||||
|
"blk.30.ffn_down.weight": "417959da77ceb33ead4271cbb9428b195196173a893c44e52880a7ec61b4856b",
|
||||||
|
"blk.30.ffn_gate.weight": "a0d503ffcbe45dc927600bb98c9f6082487e65cb577ab545add400d666a87638",
|
||||||
|
"blk.30.ffn_up.weight": "f8ab957b82ffcd10b21303cb5e866209b6fe95f827b1b94e9a949207952d12c0",
|
||||||
|
"blk.30.ffn_norm.weight": "210c7ceb0514a9ef27b5d4d1b3aff6dde43f1af0345a050d71097940e0e73e03",
|
||||||
|
"blk.30.attn_k.weight": "16861b9abcf5a3fe73c93d977ca45a1e6daa65be0fd85c2cff53486ce2033afa",
|
||||||
|
"blk.30.attn_output.weight": "ca541fb2e57e2257118c35784845b0c731278af8db3036ac53d71aa1681fdbdc",
|
||||||
|
"blk.30.attn_q.weight": "f7834917748e26bb456b945e230bc926c228e93696bc01fbc2b134bdeeac71a1",
|
||||||
|
"blk.30.attn_v.weight": "9292783171dbe5eb689d17c9bda11e537f0e9b328fced6986c938d61ed590e81",
|
||||||
|
"blk.31.ffn_gate.weight": "e4766a04bcd8f937ba883c6a144101e546747804ca66c35c97281d6ccb47b566",
|
||||||
|
"blk.31.ffn_up.weight": "cc1e666116f7e6b06736db4aa4b81003c583f54f4d9200bfa48842249940e16a",
|
||||||
|
"blk.31.attn_k.weight": "fc80b57557687504efae7d24265cb7dc39b8f826bb3d897a11783012dbedc44f",
|
||||||
|
"blk.31.attn_output.weight": "215617f50a1f5d9b2250b82f3652b35a9e9aa0ad9ef2b485d73965a14b2b872a",
|
||||||
|
"blk.31.attn_q.weight": "274b4f1dfb0bdec28632705677049fb3e327ce6d9e1f3baaad1560439039982f",
|
||||||
|
"blk.31.attn_v.weight": "e641b8b926f9dfcbbf6b6da1c02555525ac4b1c306d96f20cfbba7d6662c4e56",
|
||||||
|
"blk.31.attn_norm.weight": "b3243c361d4041ddb892ce6862dd5091f57d87357e3c67e177451b85d8baf34d",
|
||||||
|
"blk.31.ffn_down.weight": "0a00cd3ecd5e91624a27f9e239b1de425d5ba3cfff82c256a11a4ad434abf3c2",
|
||||||
|
"blk.31.ffn_norm.weight": "2a0d67ea2bb1303975712243f07273c92fce83baa11b1cd6d8e42e74ea3c810b",
|
||||||
|
"output.weight": "768615f077fb797967844571c58b94d7c399d884d115be3ab4b0154504cae892",
|
||||||
|
"output_norm.weight": "7cc5b7ce10e5082000fa00bfa68af8c7c5da218e59e2c41cf2f1499d40ca229e"
|
||||||
|
}
|
||||||
3
convert/testdata/Meta-Llama-3.1-8B-Instruct.json
vendored
Normal file
3
convert/testdata/Meta-Llama-3.1-8B-Instruct.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"rope_freqs.weight": "80fd5efb2f729381785b293a091a268cfeceb0079167f6ece9b07070e662b222"
|
||||||
|
}
|
||||||
313
convert/testdata/Mistral-7B-Instruct-v0.2.json
vendored
Normal file
313
convert/testdata/Mistral-7B-Instruct-v0.2.json
vendored
Normal file
@@ -0,0 +1,313 @@
|
|||||||
|
{
|
||||||
|
"general.architecture": "llama",
|
||||||
|
"general.file_type": "1",
|
||||||
|
"general.quantization_version": "2",
|
||||||
|
"llama.block_count": "32",
|
||||||
|
"llama.context_length": "32768",
|
||||||
|
"llama.embedding_length": "4096",
|
||||||
|
"llama.feed_forward_length": "14336",
|
||||||
|
"llama.attention.head_count": "32",
|
||||||
|
"llama.attention.head_count_kv": "8",
|
||||||
|
"llama.attention.layer_norm_rms_epsilon": "1e-05",
|
||||||
|
"llama.rope.dimension_count": "128",
|
||||||
|
"tokenizer.ggml.model": "llama",
|
||||||
|
"tokenizer.ggml.add_bos_token": "true",
|
||||||
|
"tokenizer.ggml.add_eos_token": "false",
|
||||||
|
"tokenizer.ggml.bos_token_id": "1",
|
||||||
|
"tokenizer.ggml.eos_token_id": "2",
|
||||||
|
"tokenizer.ggml.unknown_token_id": "0",
|
||||||
|
"tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
|
||||||
|
"tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
|
||||||
|
"tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
|
||||||
|
"token_embd.weight": "cde834ccac5e94324b25cb81b02d27312cac0c551b55a7e1d555d90bf6cb6e81",
|
||||||
|
"blk.0.attn_k.weight": "458bfdd9715c66e017c2447b1ed3c582963a3111479314e664faad8c914f42be",
|
||||||
|
"blk.0.attn_norm.weight": "e1fd60b95f713bae7b7e3ca933c64ae6c9cd1e8d808000204bbfdc19f0ba635b",
|
||||||
|
"blk.0.attn_output.weight": "df13b6a157d9d4f96c53b012b3b9bcd207d0c94144cbd22ae3ec13bb07d6c373",
|
||||||
|
"blk.0.attn_q.weight": "13b4126b4245bf06c915a93317c42b8174e05053535ec99dc576541e4cec7c25",
|
||||||
|
"blk.0.attn_v.weight": "5b1781d3a341214511b27eb4e268674ea3ea829dbdf8ae5a6bb89b3c0b33fafd",
|
||||||
|
"blk.0.ffn_down.weight": "49186f5d8148d316b07458841d13a2e66587f4af69b776188a809591ed9c070d",
|
||||||
|
"blk.0.ffn_gate.weight": "4397e30ece09136f00f4ff84ff49e5241b765a374deb8c5a12e897e2bf73473e",
|
||||||
|
"blk.0.ffn_norm.weight": "43260589aac3850a779bca3f9649f793bbfbe5db538361cb743b3830217f8287",
|
||||||
|
"blk.0.ffn_up.weight": "fd7ac918240a07566f6967527ffca58fcf433a30b78fdd6d84b2136d4ebd9987",
|
||||||
|
"blk.1.attn_k.weight": "209839566c7d235bdc20565a4766378b6ee8553133a5a3315abe8a85baa80712",
|
||||||
|
"blk.1.attn_norm.weight": "58c52986f7c69784ba327cb7f350923420782bee17fa39b1fbd13839d4005357",
|
||||||
|
"blk.1.attn_output.weight": "5067cc628449682665dfcf59b16e58fe2a9d2a81cb099f0fcd42f4f8670c6740",
|
||||||
|
"blk.1.attn_q.weight": "f410f9f0dd5edc09401af597d02e2a4c727f1502ec3ec3898321617b36c6df6b",
|
||||||
|
"blk.1.attn_v.weight": "d40fa49e07c102c0644e130e7909eaa93ed0d54e2edddc0759e721d58a4e4f5e",
|
||||||
|
"blk.1.ffn_down.weight": "594b1eff6ed4defbdd819fabbe2d48764984f08878a860bdb808511d5a25b8db",
|
||||||
|
"blk.1.ffn_gate.weight": "4cda97541e388a5bb607ce4cc8b3db1da7045830a630e7ba4d17807befcff346",
|
||||||
|
"blk.1.ffn_norm.weight": "66c13d7481be65b97aa474735ddc9674f33d512ddda76fa6fb45c7464b09f1ed",
|
||||||
|
"blk.1.ffn_up.weight": "1adc6de288ba4cc1237833ca8b4eb81107149842e38bc452e18e5cfe284338a2",
|
||||||
|
"blk.2.attn_k.weight": "5420423559f236ab22d85a00849f31e0cc6e9c7dd879de724393d8cd2b379153",
|
||||||
|
"blk.2.attn_norm.weight": "495fe1ab40cc52aa054ddd4f0c2d2790f4326c8d103296b1b38f3b1060db2a24",
|
||||||
|
"blk.2.attn_output.weight": "ccb83e7085381f558bfd65588c525ad2671feddcbc3887afb4038ad9c7aac348",
|
||||||
|
"blk.2.attn_q.weight": "2e8f77478392bc93c2a391f2e0f4a173a952bbab88a7aca099c6ee909726409a",
|
||||||
|
"blk.2.attn_v.weight": "d64512590f3b7ebbb9e77c2eb97fbda90b00d45c944f2b174f03a2cb11007567",
|
||||||
|
"blk.2.ffn_down.weight": "1de5084a05dcaa6b1bd926e83517dbe9ebe7fde79235fe56018b3028b1aa6397",
|
||||||
|
"blk.2.ffn_gate.weight": "cbea526b557f49aad8c976973cf367fcd12175b900f551984f498b9e07e4b7fd",
|
||||||
|
"blk.2.ffn_norm.weight": "530aa49b10c7eae08899d143409240deb95dae4e1d5bf78cea3b26393cff3ba1",
|
||||||
|
"blk.2.ffn_up.weight": "13a5fc19b96b4dcc1e9bd01998c8272ebe52034c1933ed123a506b711fae9a5c",
|
||||||
|
"blk.3.attn_k.weight": "1913b63a73305941d8cdc472e7f101c633d3357a78602eac0a4b49a744261075",
|
||||||
|
"blk.3.attn_norm.weight": "9c11bed5ab41f4adbfdae4ead65b525c8f19443e656a8c61ba412a4e1ad1193b",
|
||||||
|
"blk.3.attn_output.weight": "bb0b42c1d34779c5943272ed71f1dbb31ad8edd75f8bcd5c868f88505ac3a610",
|
||||||
|
"blk.3.attn_q.weight": "3461a1fe4e49f5319ea047cae98ccdb46528a3ec23831183fe87610b48c94948",
|
||||||
|
"blk.3.attn_v.weight": "82aa30be6a61526a41fb79bb28a2617416f5909f0477aa9e95e16be9370fcb38",
|
||||||
|
"blk.3.ffn_down.weight": "68521011ae03f5e3b0966127111afa8ee9f2eaeeef8d3a0b86b633e0332e9fbf",
|
||||||
|
"blk.3.ffn_gate.weight": "1e89e26338fd364bb679695968c65106382f15ad55c95cbb5ec9bdfeb766f432",
|
||||||
|
"blk.3.ffn_norm.weight": "c81932529a5a8c417c27b888dbe95fff8b447c2ea5f6f560444ec5d50b93832c",
|
||||||
|
"blk.3.ffn_up.weight": "305021735afd8669afefd713f56137248d5e817e60471a112ad06b7fa07ffe88",
|
||||||
|
"blk.4.attn_k.weight": "cc26ba5c5c28082a79e6abfe61186029e80b145252ca6a7924c437f0bcf2d51b",
|
||||||
|
"blk.4.attn_norm.weight": "302d251fdcc91f7468cf33f80b49484251d8917d7018ad264ab3a85c8ecf9ddd",
|
||||||
|
"blk.4.attn_output.weight": "a012f5bee3520cd4ce51f0076c132ebc3653309f304032ad051aa308f55f36de",
|
||||||
|
"blk.4.attn_q.weight": "3c8d607e447f5ef21e73af71e3c0d32fae16f91f31faae34ff06912cf9cb68fa",
|
||||||
|
"blk.4.attn_v.weight": "49f6c81a634ce46d71c2350206ecbd231b1732af96e4e4e67693c41a07e007d8",
|
||||||
|
"blk.4.ffn_down.weight": "e89504f311a4a34dc819a67b761022f14d71c43df3ead4f892c87aaa8e9f0adf",
|
||||||
|
"blk.4.ffn_gate.weight": "18b22f079a2fbaefe3572eec61fdcd996fd747724e2f0ff4f08cfcb43eb7bfb6",
|
||||||
|
"blk.4.ffn_norm.weight": "22415a492c168a0878912b05c854a631228b01c3ea8842e1d75989ec46c18a65",
|
||||||
|
"blk.4.ffn_up.weight": "f57379eae2874d8853f14ddf0f0fcc4ff1338574d5ed5d7e88331d5fb84f5642",
|
||||||
|
"blk.5.attn_k.weight": "d627af853c40bddf9762ce3988008c1ff17f2686fa8f73a0b5da38010147c316",
|
||||||
|
"blk.5.attn_norm.weight": "9ce01092c7f7f1c3ef72d6b794da12d77aa1f6a24fb96ba1b9bd5a0bcc3e2443",
|
||||||
|
"blk.5.attn_output.weight": "0388da8064c4b6b795ce2d8079e8a36535e82b2c9cf794e38ce8ae460aae726d",
|
||||||
|
"blk.5.attn_q.weight": "039b7ce1c909761fdf475c06cf14cabe5a90199282c89e4dcf460e95a4b6275d",
|
||||||
|
"blk.5.attn_v.weight": "c47bfd8d2496bdb6e00e03b903e15fd0ee806a515094ec257e43cc433147ab7e",
|
||||||
|
"blk.5.ffn_down.weight": "1d62e6708974bae318cbf00a8bf621d9ba0537e549ce4710a536520a8d14168e",
|
||||||
|
"blk.5.ffn_gate.weight": "8b42b1b11c92db19985094cbb50434e3a7c9cfea71ee6f21ea79eae7c49284a5",
|
||||||
|
"blk.5.ffn_norm.weight": "e0bc520f1505e687ec391d632a381d38d8ebcdec19f614a11a2000ab573e8b7b",
|
||||||
|
"blk.5.ffn_up.weight": "8cdcd17d2ea89bb9ab902dbc6bf3f827fa4ee029c6bf19eecbdefd146d8b6f2f",
|
||||||
|
"blk.6.attn_k.weight": "5dc6bcff89794d1756bf57ec665b58622d9352130d31082a6c66e1a079f99932",
|
||||||
|
"blk.6.attn_norm.weight": "13b26008abe0f119b5104b9d78ebd5e797d3cdd68122b93d73a3b4831a54d085",
|
||||||
|
"blk.6.attn_output.weight": "f5a49917ea70c3fb311ccfffbfafa63ab18416a5d55e5429b70ce8bfba57c075",
|
||||||
|
"blk.6.attn_q.weight": "d9c2f652c87dbd09ec3822e12876648fa32e86553ac25afab723b1cd9f8cef90",
|
||||||
|
"blk.6.attn_v.weight": "5ecc5fe67609a35151011cb526f45c56fc0a999079ae0ff37c755ca03c68c555",
|
||||||
|
"blk.6.ffn_down.weight": "0ec125ae0ecb2d9277fdb1b04f17efee94e37d0ae37311057c212ca2db3fe6d1",
|
||||||
|
"blk.6.ffn_gate.weight": "fa4d6d38355ee8aa3b80b476d65ae7e343c9b7770d7b097fc848ee8a6e091d1f",
|
||||||
|
"blk.6.ffn_norm.weight": "30e8f7defc627532e1739dc76d31223d45767391a431f925b63dabe334b0f392",
|
||||||
|
"blk.6.ffn_up.weight": "6b97cc32b290fa9087806b5d65aa6dc1760737730c8c71394cc4f30c2157f9ab",
|
||||||
|
"blk.7.attn_k.weight": "0231cb127cb7c3714cd72b8f39343891d7715a9bab2237ade9e7bc5f4ed2e68a",
|
||||||
|
"blk.7.attn_norm.weight": "7c3187f07eead7d219d98ab2daf87905e88d5f1ace109b6f5fa55dce3914981f",
|
||||||
|
"blk.7.attn_output.weight": "2f30ad972c284ae7c8eb0482053433495ebe8fe9c5ee2c28b4bc4ed1f33050fe",
|
||||||
|
"blk.7.attn_q.weight": "3a2b4b8d61cc9956d304fa9f82a9e65b4bb9fda2196670b16df7e0d8c43eff2c",
|
||||||
|
"blk.7.attn_v.weight": "d2aab97d0dcf0f61dd2f32848f7a8a99c423a4948a660a660a03a546972b8db8",
|
||||||
|
"blk.7.ffn_down.weight": "2270d520468c5549cd30023ff9c452a277058310104c4239a616373fc5a94387",
|
||||||
|
"blk.7.ffn_gate.weight": "4134a3ef71b3eac8f76b6f1a2e58625b3bae48081f175994bc3ed7d8b0d4f2d0",
|
||||||
|
"blk.7.ffn_norm.weight": "42df4abd4b8769b16f3930068f96960af1b061f1aeb7505384f272233b2badff",
|
||||||
|
"blk.7.ffn_up.weight": "c920549054ec16ff8c73a72f5d837cf4e11885e44db57c1c1c584c18fbd7a9a5",
|
||||||
|
"blk.8.attn_k.weight": "01c609bd3bf31ce65688f1f640ee413740e821330134d4ed1877a3065d1527d5",
|
||||||
|
"blk.8.attn_norm.weight": "48857411f769b00290f4e4f2e593e092781fdc2503f80c1e3eeda1b85a20f74d",
|
||||||
|
"blk.8.attn_output.weight": "90fb273f8df83744554bd59236515c16c5a5a698ca3fbedc17cc89ddcee354ff",
|
||||||
|
"blk.8.attn_q.weight": "ade617ac4653c7f00593dbb51837a468afef20a14eaab3780fb96ac3d6714369",
|
||||||
|
"blk.8.attn_v.weight": "c2c37496494864fee5c527d1fe1f88529d31c73f9cbd02ef9b2e9b23611ea50f",
|
||||||
|
"blk.8.ffn_down.weight": "2da58572e9ad79087c03cbb0c23c9ef69f93ec221fd5fe4ed92fb93871d23ffa",
|
||||||
|
"blk.8.ffn_gate.weight": "4483294e628edaa4901708e73e92c917bdd93b780fa01aa74aed57166f2bbf0a",
|
||||||
|
"blk.8.ffn_norm.weight": "c0cbb7a4f8123b62f0c4652a687f3b394802bc32870dc446eefb709e42043a7f",
|
||||||
|
"blk.8.ffn_up.weight": "9eaf8a2060cb9224cd585997cd671866c4051ad885c2c6d9fdc7056c2a5c0d89",
|
||||||
|
"blk.9.attn_k.weight": "5dd36c45fbc9c50fd35c36cd75576288506971eac5c5311d4f5c16ef60099645",
|
||||||
|
"blk.9.attn_norm.weight": "3c8ca64f2f75ed7c8fc1da010c23be787648139a96ca0ef3ad10be7b14942b8d",
|
||||||
|
"blk.9.attn_output.weight": "6277e1f833024f53c409be919ec76d34464a78b278c8f9dbf79e777746e3b995",
|
||||||
|
"blk.9.attn_q.weight": "87352b70d9e328c2d51d59090cf5ea5a046529864a890d0bc8986447a0a5c006",
|
||||||
|
"blk.9.attn_v.weight": "2efdf01161d7a82a9117cc2d87d37dba5ffefcf730781cb94fcc95130e48ff9e",
|
||||||
|
"blk.9.ffn_down.weight": "e7658a2ca984961c7ace16acb679387bedb1fef656b5330bbbf588db19673a75",
|
||||||
|
"blk.9.ffn_gate.weight": "773cd330d4ff5d64be8af00adf2e2722fae4e33fc26bb9d03549f6f4b3b0fe57",
|
||||||
|
"blk.9.ffn_norm.weight": "c8b86cd5c43b332f72060b807091c33a258e5dac01358ff4733b916cd34c9c97",
|
||||||
|
"blk.9.ffn_up.weight": "d8cc3bcff18bd46124ba2aa7caacc71220b44eeef6fccb993b4c6cb53e8f2c3a",
|
||||||
|
"blk.10.attn_k.weight": "964bdf3b4e77b915a216f750ff7b0f2eb1dd6bfa071358aef21010b90111044d",
|
||||||
|
"blk.10.attn_norm.weight": "59ed411d91d14775764eb514acb0895a75a10cbbfbc1c15d453bc50f8046cb7f",
|
||||||
|
"blk.10.attn_output.weight": "4d35a2a44cfe4ac0a83fd3ab0dcf1f5a0bf54cdb3b7be9fc353ed32c8a3eb81c",
|
||||||
|
"blk.10.attn_q.weight": "defff5339450dd881ac352f5c459293f39e07b9619ebd10ed632d79a3f310278",
|
||||||
|
"blk.10.attn_v.weight": "b9803e8d6a54acea58f662d4c0a5c8ebdf986676de7dfe12d4b288937881ce93",
|
||||||
|
"blk.10.ffn_down.weight": "eba856be64e4be20b92fb4639a783454dd92427250759df92a337e39f1971c08",
|
||||||
|
"blk.10.ffn_gate.weight": "2d5c509b066584db4de3632b01234e86edcde35409c5ebce18957dc80fe465e3",
|
||||||
|
"blk.10.ffn_norm.weight": "ecb9a8679945ff0273856624ce435dd250ffe5a440ea0861a5c84f0e4c44d2c6",
|
||||||
|
"blk.10.ffn_up.weight": "e76ec7e993f399af02958778c643aa78368e3067846714165eb5aba9d5f547f5",
|
||||||
|
"blk.11.attn_k.weight": "29c6d1f34bd3ba2f0904e57b32a5bf8dcb2834d439159a33edf234ce0b775677",
|
||||||
|
"blk.11.attn_norm.weight": "b5817b275149cd2abe18a6a10e19854605fc58fd364666744362ceee8cfe49f4",
|
||||||
|
"blk.11.attn_output.weight": "1e05653220e237cbe0cc770033e183c9a0eed5680510997409b16186c6691950",
|
||||||
|
"blk.11.attn_q.weight": "03db725ae669151e4d536e50285b3b047ad097f52475df208ed3e790e31a44be",
|
||||||
|
"blk.11.attn_v.weight": "27cdf1d4e971326c451a4615a0b79a8c7fe9508f9b76c0d52fa01971fc7eb403",
|
||||||
|
"blk.11.ffn_down.weight": "176938cd7c2966094f614cace8ba568b10532e45a0d438f80eccd19b6c2a7f87",
|
||||||
|
"blk.11.ffn_gate.weight": "9782339915dd6fa70013628a01524ee1d01ad8beab04068da7ac6a5ee7603a60",
|
||||||
|
"blk.11.ffn_norm.weight": "8245f6391e3be97811c0ff27f0d8f484ecc82a468a837c893f059745bfcd95eb",
|
||||||
|
"blk.11.ffn_up.weight": "15616ddde096d0d25e906375c548b6de4bd5576d1f6b68eefdc29f14e183af42",
|
||||||
|
"blk.12.attn_k.weight": "66dd21604993edd1b1fe547bcaa06f5bb7e31c9204902d147a227e4badf7feec",
|
||||||
|
"blk.12.attn_norm.weight": "23a69f85dd8a0904b9839cc5d0afcda299b74e82ae2642106224a1c820f2b761",
|
||||||
|
"blk.12.attn_output.weight": "4a98d132e376beb274a39d4ea9b6a1b870ad5c66625439d7ff6f45c229c3ca04",
|
||||||
|
"blk.12.attn_q.weight": "1c6c309d63afcfde32fe37257e300a78e25d01117e33490801107c0e75d1ea66",
|
||||||
|
"blk.12.attn_v.weight": "723d9e4ebe4e2b1974afa01d8f512b52933698fa36717dd47b37b07760c50a10",
|
||||||
|
"blk.12.ffn_down.weight": "00e0fb09e1f1fbbf3803f1dee373eaae7a93756b6e13063ab77f9927bc6f996a",
|
||||||
|
"blk.12.ffn_gate.weight": "89159f7f97aefb1e100107e3ac2d694e1008ad873f79bb953d60c2c1bb22724d",
|
||||||
|
"blk.12.ffn_norm.weight": "5f70aebd0e43a39d6373d8658cc670c13aadd7818831d3d84f761d5f688442f0",
|
||||||
|
"blk.12.ffn_up.weight": "faec21b446f061eb4dca561a3180712724347b77a71eb312e7afe9be9e89fa04",
|
||||||
|
"blk.13.attn_k.weight": "3d440825d19eac3b1753b34d94fee2b3a3cb6636c10b2703ffcf688d3c1eded3",
|
||||||
|
"blk.13.attn_norm.weight": "47b575e57e410738ad13fd3c74bb49c06b3d31030910834ece509cd1a5c6d9be",
|
||||||
|
"blk.13.attn_output.weight": "05436d8e613f4475741c1798a7c371b53d61b229507fa04fe23c504ba1f0e12a",
|
||||||
|
"blk.13.attn_q.weight": "002b5024ce520da41256e3ded5cdc60e5ae07ad9b202cb19d76ab511efd02b1b",
|
||||||
|
"blk.13.attn_v.weight": "c1f2d6763587c50312cee0d7140fa2c7ee326f5b172bc99b2d8946e08329cabd",
|
||||||
|
"blk.13.ffn_down.weight": "b5c4e0d8a3ff96cd76a135e415b89f02d28c28f7f3c16a36af31ef0ab8773da5",
|
||||||
|
"blk.13.ffn_gate.weight": "ae06e9e3d2e1f64c7ad23a4009dc904c2eccd7241f9f91c4974ab2504f116be0",
|
||||||
|
"blk.13.ffn_norm.weight": "e44a22321bcbcb4a3c345b504e939e8071370f54a8cd702fabdb40b97e0d7683",
|
||||||
|
"blk.13.ffn_up.weight": "7e6f366d538e21ad431264b12c011892d0be9dfe4c4da9f730af677f920641ba",
|
||||||
|
"blk.14.attn_k.weight": "95492d6417952ec24b2cab87bceb750fc7e95ac6b1944fc328a3852d980164be",
|
||||||
|
"blk.14.attn_norm.weight": "6b7b09e1c51addcdbb160ea59edf032531421c520ec5645fe1ff9ca4180cef54",
|
||||||
|
"blk.14.attn_output.weight": "75887474e4d72c218e6ab0f69f1bf3ec3dc414d51b36fc59df00cdb23421bb6a",
|
||||||
|
"blk.14.attn_q.weight": "940e33f76e48c21215d19e8a21234c8246d4d084381a7d9806aecb24b071d5bd",
|
||||||
|
"blk.14.attn_v.weight": "c58601cf5a9833f80f7f9a5b2656e8eab5eb133211446ebd48f8be15fed4ebb9",
|
||||||
|
"blk.14.ffn_down.weight": "f9f886e7f9b2a54d717b08947a25a0a93e8c2a5b8bcd5a907c06817c8ee3ac11",
|
||||||
|
"blk.14.ffn_gate.weight": "727ed0ee68594a3f59d704ed3240b6929f083b9c36650fb848d182315737245c",
|
||||||
|
"blk.14.ffn_norm.weight": "bd2471008ff1b2bae9aa26bea019393fb2bbc5b9493b8cec3ebd2c280fca24ca",
|
||||||
|
"blk.14.ffn_up.weight": "b006446769f51e4f93b503c4727deae897bc1fc7f4fad49f85024b63c4548d38",
|
||||||
|
"blk.15.attn_k.weight": "23bb70f9035356624039547a603e46be7d1e4403616eafc2451cc09c5373d522",
|
||||||
|
"blk.15.attn_norm.weight": "718cb371ca052eeb3bfac6ac506abb887df125271821fd171797a7f2d8dd6313",
|
||||||
|
"blk.15.attn_output.weight": "c76a2695a204b43a8e5acfa5720590b5d449a9ad9e082cbe3e80fab5903ea16a",
|
||||||
|
"blk.15.attn_q.weight": "2b3e4037b9e91bdd26d6e8d904cf39f948192dcf09bb6445cb55ca058d4f4626",
|
||||||
|
"blk.15.attn_v.weight": "7c15e89b6acafc8619e86aa9d412f5893ab17843ff2cfaf40eea9637b24910c6",
|
||||||
|
"blk.15.ffn_down.weight": "e16fd4bdc6d1c1209c6b633454df4992870c8cefb2cb0e8c92a7e489e9fb5d19",
|
||||||
|
"blk.15.ffn_gate.weight": "95a46bea366c260337c537fde06b4cbeaeec52484a69c3390bb1d178eb0525c9",
|
||||||
|
"blk.15.ffn_norm.weight": "37730293f704da265dc6d1896b3be00c39c0a41dab07f573af39dc30a481d623",
|
||||||
|
"blk.15.ffn_up.weight": "ba74a199da2d0875d7410824238c4ffafbda3993568812284a72b8800df91f15",
|
||||||
|
"blk.16.attn_k.weight": "f58f79a2a91c9a763adefce0c53a71eb5ce6bd8442f4af554b04b58083bff27e",
|
||||||
|
"blk.16.attn_norm.weight": "0c16e41b95e81978e0e0e3b338e2afe2d297426578cacee94de15df74e94eaad",
|
||||||
|
"blk.16.attn_output.weight": "ead22fc337514e4add49aee19720008558e52090466866e849671953a1fccba4",
|
||||||
|
"blk.16.attn_q.weight": "ef59c4e8fe8918c1add43d7e9c6fb3ef799dd3e1bdd731ec7b6a4a6f97c86048",
|
||||||
|
"blk.16.attn_v.weight": "902e6b84c2b64241470b13e6f412f859f66b4b223bcfb9c15d5cb1106b07ef3b",
|
||||||
|
"blk.16.ffn_down.weight": "2ad6e9eb4d8372c32a554395d460d17cfb02d6dbcb757cc962b6bfa36db4f5ee",
|
||||||
|
"blk.16.ffn_gate.weight": "825b2d50fcce3dbe6a5d8d8a50a95466f83ca4a10343efe67894c20b4628fb15",
|
||||||
|
"blk.16.ffn_norm.weight": "3bf6ac90befb0e17e077c8ea9454a8485a30f89f2d761ec7751b60c90aed1af9",
|
||||||
|
"blk.16.ffn_up.weight": "9fbdd08739b32411f5ab0252174d386bab19eb0b17884862f760429b7d41d78c",
|
||||||
|
"blk.17.attn_k.weight": "4033398718bf3674830ed1b73071ed8482b6dd4ef27f31a6c5fbb998321b6c07",
|
||||||
|
"blk.17.attn_norm.weight": "714f2e8ac9592966a0f1c02ee979eee8f84586405b992e8ee9543e840199ffa1",
|
||||||
|
"blk.17.attn_output.weight": "b6bbb618597d767b8f535117be68f92911e4a71d4eb4d8b5d943444151445ece",
|
||||||
|
"blk.17.attn_q.weight": "b84a0dc00ceb515faa2628125dcec502eed923077b21cfe900a4ff16c2e5f9ed",
|
||||||
|
"blk.17.attn_v.weight": "4387c7d6a17da9cc7a6bca8f4a75618b20407d570792056283a8e93b6ec65f18",
|
||||||
|
"blk.17.ffn_down.weight": "47db95c6f1e12b399c3eaf9ddba261782dd71173dd163b52af96541cf87b5196",
|
||||||
|
"blk.17.ffn_gate.weight": "59abaded0aedfd12f01df81f7a811e84db6a227f51b60abe9a247ca726e87392",
|
||||||
|
"blk.17.ffn_norm.weight": "b7e86445be5c7b722e01ddb98d5c7527ca86cb827ce0354f2c269e0f2558751e",
|
||||||
|
"blk.17.ffn_up.weight": "8e31c293bac649d2f60da4b3fc4a3acdce1111ec6058d8805eeeb242443011de",
|
||||||
|
"blk.18.attn_k.weight": "5ce762ab7b032511c131df81093b587871718c7097f79d8e07d707571f18a47b",
|
||||||
|
"blk.18.attn_norm.weight": "1f52cdc7af1f4dc1f0ef6ad1ad02e18cda32133654e57cfa9c72ada9c0b1d995",
|
||||||
|
"blk.18.attn_output.weight": "6486957f30bf8a88516e25772c6650f98b13923f490a2865a8752e36439d1cfa",
|
||||||
|
"blk.18.attn_q.weight": "93621c8abf69d2ca29c5207180eb628fb2b544d89de6c4a7fb0699be95534899",
|
||||||
|
"blk.18.attn_v.weight": "11604083b5a74828ac1d226af015ad5dc0215a1fdca44fa7131c2163c02d8156",
|
||||||
|
"blk.18.ffn_down.weight": "8f9997feb94385f106915df810239c9753b31efda2bf14bdf18a9fbbeec8233d",
|
||||||
|
"blk.18.ffn_gate.weight": "427c213b3a4e94af703429daf2f65766f70424d8230c123e7e712a18bceb5ecb",
|
||||||
|
"blk.18.ffn_norm.weight": "c45d305c4ea6a54013ba112f12dafaade064a32cf01317373464a3618d8ba44a",
|
||||||
|
"blk.18.ffn_up.weight": "a2811f2e73ac9eb9cce91a21a454e84e230a155244e2cd73f2c12aad3c9b8cfd",
|
||||||
|
"blk.19.attn_k.weight": "b2daed159925eac58c291e2f1e2000beed21002b03c9e1bc7e7a52e22240666c",
|
||||||
|
"blk.19.attn_norm.weight": "6307306ede2ab5bffa1bcac3f8b139354678c0376b1d9f5530c1fcb4268cfeb4",
|
||||||
|
"blk.19.attn_output.weight": "ebb98218b2a9c84d3fb6baeb02c5df264b7ab80d994d1098ba1cd47aa398effe",
|
||||||
|
"blk.19.attn_q.weight": "4f10df2ad09177e7528e9456039b670d07db22940a49417101b725d239c16724",
|
||||||
|
"blk.19.attn_v.weight": "30f1efc5114badaeaafa91fa466dc7fa14b1616db433c6f563ab851f7333a5dd",
|
||||||
|
"blk.19.ffn_down.weight": "be5ec7fe6b48855cd0015b0e430d1b70c620de87a7ff188c7c1afef546d7b6bd",
|
||||||
|
"blk.19.ffn_gate.weight": "10dffea4213881f8a9b583ee0fd370e033756d32255ed15053f794375b9400e9",
|
||||||
|
"blk.19.ffn_norm.weight": "e75cd24ade45dca78fdb0cbcaaa2d4a17d83a5a73dcc94ce0ec2d68fbdb2a881",
|
||||||
|
"blk.19.ffn_up.weight": "63e81bdb951410ffa81bcfba1b94a679ec9ebae59cd1623ce2651ed5d4c78bfd",
|
||||||
|
"blk.20.attn_k.weight": "c2fc5ad39e9bdd45e73c6e54aecc474388d944c4be1ee1921b7fcd035bad02e0",
|
||||||
|
"blk.20.attn_norm.weight": "aaa9169171937bdce20c1f057e94e9252f221cabacf1ced12e11b9586f23d308",
|
||||||
|
"blk.20.attn_output.weight": "a9f4fb496e4bc053e3f6cf2e72e22d4cd2b545ef6c32f7e782c2ef6ebcc21d4b",
|
||||||
|
"blk.20.attn_q.weight": "5a07ac619ed251494170b213921ef3fcc4c2712839da262516d9d5b8ea1ff185",
|
||||||
|
"blk.20.attn_v.weight": "d6689473105d241eacb17f09f06000ee237336916cf5ec4f48271c5b41bcb8e7",
|
||||||
|
"blk.20.ffn_down.weight": "74be38db51df736f26ede7c6b52ea787e385f181cb66231e2cced4556a25c9b8",
|
||||||
|
"blk.20.ffn_gate.weight": "ea91e06dc3d051c0ba0243b5a8bb40edbf254eadfb54fda7247e05cfdd88cbe2",
|
||||||
|
"blk.20.ffn_norm.weight": "5fbd357b3d6f44a7a91e8a4fc246b24303891b7957e0f3c32818ae5dc16ddd8d",
|
||||||
|
"blk.20.ffn_up.weight": "fe3290333e056af4ed12942ac72aeba97a6b562e2db05e79cd35dd07eab5b101",
|
||||||
|
"blk.21.attn_k.weight": "201ec6ee95f06ea5eb80fe86fd07bd016d3ae9ab6abd25d631834414e14a010e",
|
||||||
|
"blk.21.attn_norm.weight": "ea8154f93e06485828475a00b98cc397ac84768dd70e06ecc0c075b5712d7276",
|
||||||
|
"blk.21.attn_output.weight": "9f8af74d531478fd304723fd8e4e01578db598441b80dc7c960cb801dbbc501e",
|
||||||
|
"blk.21.attn_q.weight": "277de9953a8d3cff894ffd06c15ad0ee1407e319df0c1a693d4f45fa9c74ac7f",
|
||||||
|
"blk.21.attn_v.weight": "6bfdc16cfb898909b7788ddd39dd04b928f31d6732772195d53c558004638dca",
|
||||||
|
"blk.21.ffn_down.weight": "173877146cb94801157796ee9e5eecf3f46acb3b5e797f90b83a3fc22395eb30",
|
||||||
|
"blk.21.ffn_gate.weight": "53146713e2ca1be80496024077a028f6b6d749b02e71003c349e113b436f48f4",
|
||||||
|
"blk.21.ffn_norm.weight": "b28b97e18ab20a5c553ba422f7d7f6014f5902f1d62a69abd20d9fe19a5f9462",
|
||||||
|
"blk.21.ffn_up.weight": "5c39d0ac4d602b8ec8909dade93b2efcd6b6d9d84a19b252d76bb66dcfaab87c",
|
||||||
|
"blk.22.attn_k.weight": "01f26272c82917a87a3ccf922fa1d521a952b05de878241b7efe3525b617ac87",
|
||||||
|
"blk.22.attn_norm.weight": "5ffc96249d8873b506e9eb7158bdfd07fa1429e53c1951430ca7505d25f11c76",
|
||||||
|
"blk.22.attn_output.weight": "9c2201569358f720244b9c9497e4da02585a167b1414c8a506b85ad75ba990d0",
|
||||||
|
"blk.22.attn_q.weight": "906036eb4ddf027f6d920f9356a6a2a5e529b96f4e1231a0496d46b4434a5842",
|
||||||
|
"blk.22.attn_v.weight": "30ede8b0d166003a4b8a81fc99437f557719fc36e5c4dd510c9f161f36a47e73",
|
||||||
|
"blk.22.ffn_down.weight": "d04c164beabab30e1837b843e18852260efccfbb9d96a34ddd816e6fb3ba23c5",
|
||||||
|
"blk.22.ffn_gate.weight": "19c889db6b19179f0a62d5981a1506592c65de83760d67afbe00d202202750a8",
|
||||||
|
"blk.22.ffn_norm.weight": "4885eff2d851b32dbd306bd632c725857e6d164f0fa8b3d5857e572e6ef98ee9",
|
||||||
|
"blk.22.ffn_up.weight": "365594d8db8e95cf87cc33ac23947942dc326110175cc8ec5a07b5c7059089a7",
|
||||||
|
"blk.23.attn_k.weight": "badfea1569da0fc6ab817c5727ca3a69b07d9cfd622fb8be5e66678d5b3f7ae2",
|
||||||
|
"blk.23.attn_norm.weight": "8968f78a379ac3ca5458b4ed4251e8d9112aca6d6dd1ef6440b4bb0b380375a4",
|
||||||
|
"blk.23.attn_output.weight": "93e43393c03956287b1fe31e9735ff1cfe84f4ae56b83dbaebe96275e4e11831",
|
||||||
|
"blk.23.attn_q.weight": "aaff73c725a8700ae66bf26ac8869dfe96738eff23a8ff340de2ab53400a5795",
|
||||||
|
"blk.23.attn_v.weight": "3a86a8dcf14a746ed1411f5a7e634064bc4dfd6511c24cfeccfb2c9ebb6b4101",
|
||||||
|
"blk.23.ffn_down.weight": "d4da6f37bd7ef69bb203f7b0dd59f50bce37432c70627e6cf274ab81548af5cf",
|
||||||
|
"blk.23.ffn_gate.weight": "5b6072936c4a693923bb4e3d1473fd45545cb02fc07799aca458ef0449a04061",
|
||||||
|
"blk.23.ffn_norm.weight": "cd76e37025f84773180298ddb15e0d4ba9cfc7d832e19c791049daa47c6d9c10",
|
||||||
|
"blk.23.ffn_up.weight": "cde43b99b83124a13b2e4753d12674b3a61dfb34c04703007ced3e8e2aee1801",
|
||||||
|
"blk.24.attn_k.weight": "457379edc4cce4cbbe107385079019bc922264fdfc7bd1d1ae84343a81460c66",
|
||||||
|
"blk.24.attn_norm.weight": "0ce0dfab2edeede5da419fa7833db78e36222cf25c358d08f3ec664310f031fb",
|
||||||
|
"blk.24.attn_output.weight": "0cf91c2fd40c204d2fd4b9c85b69281e5ad4ea8442972fcd44b5fc8e835ffdf8",
|
||||||
|
"blk.24.attn_q.weight": "87ede30c09eafec6a4e6285674c1bc4637140b168b2da4ed34f36fdb6e176cc9",
|
||||||
|
"blk.24.attn_v.weight": "4c0b078b2798ca35d6d2c2258fe499820d2bc88700654ba4016e4b028f563590",
|
||||||
|
"blk.24.ffn_down.weight": "cdb8540c32b1ab988f984484928d39f6841f2131c1cebe90ad9456737fccbcaf",
|
||||||
|
"blk.24.ffn_gate.weight": "da2e0e913648b5526bd2bbb344038dd067639343aed3b413662b064b0db7556e",
|
||||||
|
"blk.24.ffn_norm.weight": "8940bd781c610d75eb2be63cfc8d869a3af05e53c963dc7fd4c6f653df5a80ab",
|
||||||
|
"blk.24.ffn_up.weight": "90cbac2a58801abe11ed6c24560aa4acb949f79429f2aa8ff129ac05868bb87d",
|
||||||
|
"blk.25.attn_k.weight": "90607131e36998e990ce718ad05cbecd1bcaed010931401ce6baa3b0d93ebce6",
|
||||||
|
"blk.25.attn_norm.weight": "fbf679c85656c04a6cf8fedd5412c1ace22960e6c2d47f2d43997827811fbb97",
|
||||||
|
"blk.25.attn_output.weight": "08412724ee7a2086514406e6f68fb9f622e10bac25b0c373b294709f4b09bd2b",
|
||||||
|
"blk.25.attn_q.weight": "9c1238e98a2747654a0d4371d3e7ea8b979867f609dc42482544f25591e85c7f",
|
||||||
|
"blk.25.attn_v.weight": "a57796a535c6cb09581cbafd6a91dc14adc8cca2a2465a7ffd0aec546cd84074",
|
||||||
|
"blk.25.ffn_down.weight": "f7e34e8a6391b480da08b52640613ccadce268373934b409759743a1735b74d6",
|
||||||
|
"blk.25.ffn_gate.weight": "b8d0b2f4612678b5ce42bd4a683f8024514b75fb5ebf6b22c600811e95582ee4",
|
||||||
|
"blk.25.ffn_norm.weight": "cde1fdba2369d315f3c6940a997c471ec891924e642505db580d732763bd7b75",
|
||||||
|
"blk.25.ffn_up.weight": "72e700c32ac8b9c47559c2222e45888a480b527ea512075423c5dc01678e2bb3",
|
||||||
|
"blk.26.attn_k.weight": "6ac83b3414ae75bf3a9055c32e49d2c40fe611ab21f8444f03d2f465d18122c9",
|
||||||
|
"blk.26.attn_norm.weight": "55f9d6dc9d75973dc75136ecb9d991b4398097ac133070873fb96ec76a6f60bc",
|
||||||
|
"blk.26.attn_output.weight": "ebc4fcbd15b33263e50ed2ad45740867cce15bc90e1216623babcb1820734509",
|
||||||
|
"blk.26.attn_q.weight": "080f057521073e412936fe3fee64fd574c8128fa4a148b879d3e598fe4954581",
|
||||||
|
"blk.26.attn_v.weight": "0fa2830d6746487ac91b243716e4302361f891e4e008eddd14abec47c7809d5e",
|
||||||
|
"blk.26.ffn_down.weight": "cb2ab8af1653adc57111ada49d2825c6995e338c8208455b92de10e580f60f31",
|
||||||
|
"blk.26.ffn_gate.weight": "231ce30966086bce2dc0e0afd34a22a1958cfda7a57c41b3b8e9444c5dfde8a6",
|
||||||
|
"blk.26.ffn_norm.weight": "35d959d25d17b00617590f5d5831bf705c385c51e46297a14375a700effca6af",
|
||||||
|
"blk.26.ffn_up.weight": "367680c8d332538b467d1ef87cfeb36cc5c6af564c5023c5fb50e728e3438287",
|
||||||
|
"blk.27.attn_k.weight": "0bfcb351c6d17aeac5b55a915074fbdf00f11c4bda98babb196ac8804805746b",
|
||||||
|
"blk.27.attn_norm.weight": "5d598a88c2e75ba59dd7ba4fee940bdec92d72038f1286536d2dfb71d008a09c",
|
||||||
|
"blk.27.attn_output.weight": "23a9da7347336479f6a10ded14cb3f46e06b5bd56dc4b0fbc526c688552ec840",
|
||||||
|
"blk.27.attn_q.weight": "b83319dba9055f069208e9c9d66da08bc6874f23e575288fcd81697d1777aa54",
|
||||||
|
"blk.27.attn_v.weight": "36ed34ccb2f36fdf16b2c2dd225a98ea6b7b0e376e7791191136ccd7bd7a4add",
|
||||||
|
"blk.27.ffn_down.weight": "5488e1d3a58c71b5e9ddda430540b4776b268cfe1457cbc1c2622dedd9e4526e",
|
||||||
|
"blk.27.ffn_gate.weight": "4ff48011ee0bac39af704849d9132a2410392c87a509c684f2062f6b76b498fb",
|
||||||
|
"blk.27.ffn_norm.weight": "32afe99675983da3de2961d1b5ca41c98970a356823597fe29e91f6e86abf0e8",
|
||||||
|
"blk.27.ffn_up.weight": "1eae3088a75629571fdbf6a20f141bc2bb2ed3f5ba2b9fd1d949f80695e442a1",
|
||||||
|
"blk.28.attn_k.weight": "c4e80af714962d6f9040d2c09f316f4a1cbc3a2e994e19902d7c653cf3c73dba",
|
||||||
|
"blk.28.attn_norm.weight": "c1ecf85dedc1c83d5d402bb7c94fb8b9c11f1a3e5f64e7680f80912d4a560794",
|
||||||
|
"blk.28.attn_output.weight": "72ba47c061b21f5ebc5213a455eaf6fc49c8f8e04ff9ce37e6ed4921b629161d",
|
||||||
|
"blk.28.attn_q.weight": "c4abc47234307f44b8ca789aa6668e298158fa4b459b2c1e84bd581806591cc1",
|
||||||
|
"blk.28.attn_v.weight": "aeba950799d4950e491ad0fcbe30334e39b8975177990a2cb339031c45ac153c",
|
||||||
|
"blk.28.ffn_down.weight": "4e84ce382a37b994fb8608df451a60040559e3f4f3241c3b3cb8989a3ed50d83",
|
||||||
|
"blk.28.ffn_gate.weight": "04df157acdc8e8534ad60acc2d2a4dd3a7a6610f6382535ec728994fa6f83f83",
|
||||||
|
"blk.28.ffn_norm.weight": "4d0386dae2bd1c1a9d0f9730718333e3a486c3bc6a5c5d482193c75d39832c80",
|
||||||
|
"blk.28.ffn_up.weight": "fec60bb0a3daf182a14bd8311fe6dd1e3fd020c5fc273e2549cdb1a2d6b79b05",
|
||||||
|
"blk.29.attn_k.weight": "b0532a263aa5a4e2a7a80adc83fc5dec974493bd18da7f953e7ebfc3f3a19aae",
|
||||||
|
"blk.29.attn_norm.weight": "593fc3b4000c35b7a59dace09ca1756c08be0105b2edd354a0e1c16c82898859",
|
||||||
|
"blk.29.attn_output.weight": "315b896f9f0cbacd0ca8937384c3a3a227efa908cb8c3a9125ec00c480e32b9b",
|
||||||
|
"blk.29.attn_q.weight": "d482d45386d4ad3394f08e9dff233ee3a70d0427d65c0b8fa05905da7e25ca53",
|
||||||
|
"blk.29.attn_v.weight": "cd3b5a6e2852da796902930a6a84bc87fc6a7c7bf51f8fc23758d12a39013b36",
|
||||||
|
"blk.29.ffn_down.weight": "5b3dba6f9753bd1b1ebcba65ef5373dd62c38e755c44b7231b95d93d45761f89",
|
||||||
|
"blk.29.ffn_gate.weight": "8610d9d2db15c256243ffcca3ffd31786d0ada0af0e7c7aa3fd20524370ab036",
|
||||||
|
"blk.29.ffn_norm.weight": "1a2ef2d38b7ac3e51190b9ccb8b6552ba83ab290e523356a7f851ddb35dedca2",
|
||||||
|
"blk.29.ffn_up.weight": "a5fdd15811bde16dc27677cf1a4c97daab4c28cb12a9530f1a0e573134fdb69c",
|
||||||
|
"blk.30.attn_k.weight": "1efeb0b5f4b45a85cdf47300f892ac77ac1f38000ec3653565d1303d1fb8c743",
|
||||||
|
"blk.30.attn_norm.weight": "c73934c182c7fe80838ec1d0b92f50a583f75f7a3d78d822f009b58ad2c80e65",
|
||||||
|
"blk.30.attn_output.weight": "3a0fd89de2d274614750345d827a9c886a4f97b343a13cdf680390505df596a3",
|
||||||
|
"blk.30.attn_q.weight": "711e113362bdb067db843c66236704eb1cd3fc5f40e3767143e96d510686ef4e",
|
||||||
|
"blk.30.attn_v.weight": "82b12a9a74fd3d91b73cc2e841e2b3f0a5197ccd2998afa17020995f880d2267",
|
||||||
|
"blk.30.ffn_down.weight": "af9f4b1287c0d824ae22d6e335d19e04a70135b835be7caa2435f1d85e931993",
|
||||||
|
"blk.30.ffn_gate.weight": "e2ab3e6f15f5c50fca66c084cb6a57a2b6b82406d65150e82ea0437b93dd9a46",
|
||||||
|
"blk.30.ffn_norm.weight": "c1b9c325c83f00e177386a4d7e769945f2995e60950c4a576c0a2c4ab9703d04",
|
||||||
|
"blk.30.ffn_up.weight": "9b94a21efd419715d82071b490d3b635cf1e8da080620dcc39e5bde976d7e9a6",
|
||||||
|
"blk.31.attn_k.weight": "0db0d82e3ddcc2c06209f5f013e1d72a84a996c40bf00186be485b909cc268e8",
|
||||||
|
"blk.31.attn_norm.weight": "2b8b7239471f57140c5cdfe06bd224a4f6326282f99736e44fba4c7b120ac101",
|
||||||
|
"blk.31.attn_output.weight": "a310b048840cc3ff2be4b84796340e8e2cdf05ec89d14bd3655c109b2bfa9fcd",
|
||||||
|
"blk.31.attn_q.weight": "f45e0cd95645175ea82813455356d171838539bc3f7676d877c698f2af0a0eda",
|
||||||
|
"blk.31.attn_v.weight": "8bde008e809112aa7e7c23e9c3099087bcc557313b01306c87efa0a4a30805ba",
|
||||||
|
"blk.31.ffn_down.weight": "8266fec7e203fbfad7033120861e44984581ff8b6851d01dfb7b81c5d8fa90ec",
|
||||||
|
"blk.31.ffn_gate.weight": "b73bc0aa5baf006d9ef6403104891b8133671b0992398fe038380b67e0d7e2cf",
|
||||||
|
"blk.31.ffn_norm.weight": "9c62cc27a7b6017c1df8ad49bff249a8245e8895c6754f402cd44623fda83268",
|
||||||
|
"blk.31.ffn_up.weight": "5b970a4694ea3171a0167f6e1636d9f00268bc1c9640430ffc35218494884adb",
|
||||||
|
"output.weight": "74fa0ef08c57a30e633e7117b1e9c805f833e2e5e21434bc79ddf9c92c6d7330",
|
||||||
|
"output_norm.weight": "59b8a59fd3fbf39353506116e43e5e76edd0cbf2a2873d869da4cf27a04997c3"
|
||||||
|
}
|
||||||
348
convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
vendored
Normal file
348
convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
vendored
Normal file
@@ -0,0 +1,348 @@
|
|||||||
|
{
|
||||||
|
"general.architecture": "llama",
|
||||||
|
"general.file_type": "1",
|
||||||
|
"general.quantization_version": "2",
|
||||||
|
"llama.block_count": "32",
|
||||||
|
"llama.context_length": "32768",
|
||||||
|
"llama.embedding_length": "4096",
|
||||||
|
"llama.feed_forward_length": "14336",
|
||||||
|
"llama.rope.dimension_count": "128",
|
||||||
|
"llama.rope.freq_base": "1e+06",
|
||||||
|
"llama.attention.head_count": "32",
|
||||||
|
"llama.attention.head_count_kv": "8",
|
||||||
|
"llama.attention.layer_norm_rms_epsilon": "1e-05",
|
||||||
|
"llama.expert_count": "8",
|
||||||
|
"llama.expert_used_count": "2",
|
||||||
|
"tokenizer.ggml.model": "llama",
|
||||||
|
"tokenizer.ggml.add_bos_token": "true",
|
||||||
|
"tokenizer.ggml.add_eos_token": "false",
|
||||||
|
"tokenizer.ggml.bos_token_id": "1",
|
||||||
|
"tokenizer.ggml.eos_token_id": "2",
|
||||||
|
"tokenizer.ggml.unknown_token_id": "0",
|
||||||
|
"tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
|
||||||
|
"tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
|
||||||
|
"tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
|
||||||
|
"token_embd.weight": "1d1d1d39a867d5a4bfb32792a47247d2638c10c95a6259391d02843583505cc4",
|
||||||
|
"blk.0.ffn_gate_exps.weight": "2e5cd43ac3f26c44f071926ff6c3f239ecc52a34bc9a5b5906d3d4c1bf2fbbfa",
|
||||||
|
"blk.0.ffn_down_exps.weight": "a4dfc7e7c96e7402eb70279601675b956bb7331da8101e63fe5c0a611b6972e5",
|
||||||
|
"blk.0.ffn_up_exps.weight": "2d5d87b378b2319c344ed2c642598b6f7cb6beeb582a8ea51abc9ae690d473c3",
|
||||||
|
"blk.0.ffn_gate_inp.weight": "a46aaf5aba7401ce6e41f158242b4879d34901661f3ede85496cbd0ce79d6314",
|
||||||
|
"blk.0.attn_norm.weight": "3fe37d913bdd2b65076bcdd6efe64a37b0b03cacbb1b80b9f7089068aa35f38c",
|
||||||
|
"blk.0.ffn_norm.weight": "5e14308a3c894734eb204c8f558bdc817e94bbd5b4e9cb4094e91ba388c8f7f2",
|
||||||
|
"blk.0.attn_k.weight": "73d943dcac0911e87bd771f4aa1c901e1bfe1aed293af06e1a67812159859f67",
|
||||||
|
"blk.0.attn_output.weight": "4c5f754c855e262e8d4c94c6fbbb57af06399dc0e170d7d99a1a17fc9aab9227",
|
||||||
|
"blk.0.attn_q.weight": "d6fd7403c873d49c05f6f03208f30d99ad34cb3b71c9990c47334d502a8e4c7b",
|
||||||
|
"blk.0.attn_v.weight": "cf17cf64b2d683bd9de6cebaf60e5c264df6fdc38fe719dde9d54c80334f6366",
|
||||||
|
"blk.1.ffn_gate_inp.weight": "0d524de81cd915816b4e714bf595ad6946a9130b3de731cd89428b2781230809",
|
||||||
|
"blk.1.attn_k.weight": "2ea47f412992b374c70674730fe84700e0c8cce177086ce9b6635e42408964bd",
|
||||||
|
"blk.1.attn_output.weight": "b4b2520794d54113e86c8ff678eacfc62e35be4395a594a6c8c22b4383ebcc0c",
|
||||||
|
"blk.1.attn_q.weight": "5db930c98c4f91f6eab57eb974c72210b158e366d23d6d2890b2759c053bee33",
|
||||||
|
"blk.1.attn_v.weight": "079bdde09668394bf7af9f8bc175017b4f48f0ab64e6dd855a4d7561d1693c0f",
|
||||||
|
"blk.1.ffn_gate_exps.weight": "146a62de19f9ab093deb101f9640534ffc3dc40d69f508be12fc0475d01b0c7a",
|
||||||
|
"blk.1.ffn_down_exps.weight": "949da94a3c0f375160672a979e85f7def284264b10d48d038238aad5f5ece793",
|
||||||
|
"blk.1.ffn_up_exps.weight": "7016a3f467d9e3f2f4b4019579ed86b757469cd367f2b225483305376b4bb3c1",
|
||||||
|
"blk.1.attn_norm.weight": "1614d1e6ed537737275eb888666c7bac533f4eefbe73dec92b591045ca9e1afd",
|
||||||
|
"blk.1.ffn_norm.weight": "405a455fa7d1ec36894652ceb554bbcb09a07fd6405f42741e66dc4a4665c19c",
|
||||||
|
"blk.2.ffn_gate_exps.weight": "90d5003fc7421f44220c0842d43128955e91488f6f785fe570b62d81b719e964",
|
||||||
|
"blk.2.ffn_down_exps.weight": "ecdc2b5a8b504ef0a7833acff47d69b0c1fa9c22126de1bb120ff5e48c3d6e2c",
|
||||||
|
"blk.2.ffn_up_exps.weight": "2cbd9485a32460d315eb50a2f3b00863fd77245bfe885b7565efac1cdb1f191e",
|
||||||
|
"blk.2.ffn_gate_inp.weight": "0d0a17a1a2c7a61f2cca49ecbb479154dc93a870873257bc4f225e7607f2e2c2",
|
||||||
|
"blk.2.attn_norm.weight": "b2e4c5a977f87a6f880896bd73596234c9b83622fa0d7add5892501e3155913c",
|
||||||
|
"blk.2.ffn_norm.weight": "0ab875b4280afa922376cfc7b9aa3f7071c9432ea1254091ce7de3749df0e8e6",
|
||||||
|
"blk.2.attn_k.weight": "bb884af51fb51550acfef54ccf1b58ce8284e587806e6a2f88c8265e1ad05a5e",
|
||||||
|
"blk.2.attn_output.weight": "0f03099ba1ef342ea61af9cd71d028123bbd8b1dd7d7fd9b509aef77815427d9",
|
||||||
|
"blk.2.attn_q.weight": "8fad0d29eb4c9d24e564774ee3316b9eb7a4c4985e4567111d2c836c830f6cf3",
|
||||||
|
"blk.2.attn_v.weight": "fe04c847ff677632401a94e7b6b6fdca60391ab21cb23bd791533115de6303a1",
|
||||||
|
"blk.3.ffn_gate_inp.weight": "29e3aaa724590c070e614af8288939603d2641b0ef11e8c0f476bebb2776673c",
|
||||||
|
"blk.3.attn_k.weight": "231cc5631def10f7f292d8862d6125ff555164cd70480ac76362149fad204497",
|
||||||
|
"blk.3.attn_output.weight": "86467a605c62852e05fda1a7ef43150df2cf715fe59785dbcba09f1c27cfa086",
|
||||||
|
"blk.3.attn_q.weight": "901822402453922225c2d6ac79616691d48217635d5ff7338daa971d5ddee210",
|
||||||
|
"blk.3.attn_v.weight": "27030784f44375720df2f090933645a31a022d3fb3b14573e5ca0b78f44070c1",
|
||||||
|
"blk.3.ffn_gate_exps.weight": "231ba59cc0b988d125d77bf627aa3f04636684870af88f081f3944b48a160d86",
|
||||||
|
"blk.3.ffn_down_exps.weight": "530c3ab44ae4d66e8afa4d10c153ba5dfcdfb7321989a988e62e9d12e7234625",
|
||||||
|
"blk.3.ffn_up_exps.weight": "b85c2d4d9d11332e702b3c0a6610d4f525f9a93e5d12f5c7c55c592c40755e75",
|
||||||
|
"blk.3.attn_norm.weight": "05dbb6d88cfa6b199f9d705ccbda97c0ef13f9ec875c595398a1a42d009a4555",
|
||||||
|
"blk.3.ffn_norm.weight": "6880b1c27d46969ce36fac049c05dc8b89e4bb47dc89df357e32df7e18fc512e",
|
||||||
|
"blk.4.ffn_gate_exps.weight": "a883b4f225b760c5a2f6605dc5e2167ab85bb398c70bf64ceb539fcbd6128dcd",
|
||||||
|
"blk.4.ffn_down_exps.weight": "d291bb656aae77947d4b525e2819bf4112afece53ff31de9dab999af1f65f9c4",
|
||||||
|
"blk.4.ffn_up_exps.weight": "38592afb8ba3dcfb26970f906174f7d3fa62da44fa4be4fc6912a19030ea9164",
|
||||||
|
"blk.4.ffn_gate_inp.weight": "1596cb74e8fd6c3080b937b06468bb397b0dbb661e6d180a6bcbdc43e8bfd0c6",
|
||||||
|
"blk.4.attn_norm.weight": "f90c83c5ff4366281d283384efc941620542b9cfdea160d678dc54a75e33f758",
|
||||||
|
"blk.4.ffn_norm.weight": "d28d8c49d1746b7cc085562d1074905fd14023844de823dc4fb22202bb280790",
|
||||||
|
"blk.4.attn_k.weight": "792bbf412cc357140fdaba543e547a9b2f7582919e307bbd9a80c7d6d8f5f1f9",
|
||||||
|
"blk.4.attn_output.weight": "d98e4a062d2631d9c315f1990d5f6ca9a88e7e0e46387f611ccb0353f876aa12",
|
||||||
|
"blk.4.attn_q.weight": "1a11a55a91d9f748a72176ff6b1c174844df406e00d1b66b9aa64dc6ee4bcd1d",
|
||||||
|
"blk.4.attn_v.weight": "04cb3c02b12a6313c7ac7044513441083d534fb4c5a3f63bbaa58f7edbd2fadb",
|
||||||
|
"blk.5.ffn_gate_inp.weight": "cbd5cdf015d33a2da6703eb74c22fcb97581fb9175435173b6dc4f9e8364320d",
|
||||||
|
"blk.5.attn_k.weight": "4fdf3405e4d657403f5647b51233521310ee984b4b81bbcd901cb3e6ab76b7ff",
|
||||||
|
"blk.5.attn_output.weight": "4a25662c46979a29600ed77e1907cf81fb16ef30e724c155444e54ccb76af481",
|
||||||
|
"blk.5.attn_q.weight": "e2acb30e30b97300039bb20ad0878f05159d5657fa811748a51d5b6fb35d631e",
|
||||||
|
"blk.5.attn_v.weight": "306504b6a26aa123c63dbbed3f4ced0ed2ee8fb6a30bf0093539b817539f5ece",
|
||||||
|
"blk.5.ffn_gate_exps.weight": "7e34df9b9944dbeea5e8565786d3aa6937314a4b87acd4d0874687877c5a39fd",
|
||||||
|
"blk.5.ffn_down_exps.weight": "c4b7a57a42b5ac0a8ae27dcd5cb2646d7a7cc7123126d44a56ab128e85f60b13",
|
||||||
|
"blk.5.ffn_up_exps.weight": "09d47593b6dd6c664a9155bff02fc2eb7ac4a70219a88162d05c802a01d3c6ba",
|
||||||
|
"blk.5.attn_norm.weight": "58804a036d6ac4c1fe357b8b6a97a5c37cae1c2f06ee0086c041d449c1c6ef6a",
|
||||||
|
"blk.5.ffn_norm.weight": "d872dee6789f0826211aa46ca9d0869e3e96bcace9e77d6559a7b6f3e524f3ca",
|
||||||
|
"blk.6.ffn_gate_inp.weight": "fb1eae732e974d6c1d020a5b4ef98c5f33016f984701bcea656f999a99daad66",
|
||||||
|
"blk.6.attn_k.weight": "55e9c59c5051ab5519b3a7962e1b5fa96a3c0251cb6200dc2f177885ad2de470",
|
||||||
|
"blk.6.attn_output.weight": "f3c834a8d0027370350e2b6294d95434d31432e57be6313b013c15a56303d61c",
|
||||||
|
"blk.6.attn_q.weight": "efaefe5f11c2140dc7cb532b0832c2a0b363a165cbda21f00fadae77efca377b",
|
||||||
|
"blk.6.attn_v.weight": "900bd734d75616d846a90a121c97e081c956a3d1ab012f66dd0bc62c43e1ec3c",
|
||||||
|
"blk.6.ffn_gate_exps.weight": "312a99661b1468fcaed2474621116f1681432755e973f3ee79d01912974fd424",
|
||||||
|
"blk.6.ffn_down_exps.weight": "ac9cd7db67a2ef0d2b5def86873673d05e48d49d147dd944469dbb8e2d4c46f6",
|
||||||
|
"blk.6.ffn_up_exps.weight": "57613e7e09579400a1a09fee4445acfbfe83f2f327fdf317877787d96ada6b84",
|
||||||
|
"blk.6.attn_norm.weight": "0e8801e09885c633bc01a9a5b85d4e878d30158a4eb41a937dc5b760ebd044cb",
|
||||||
|
"blk.6.ffn_norm.weight": "b8c58062ac93072f878446b0e7f958c737aa47fb769fc3a8f593133d12db2dd1",
|
||||||
|
"blk.7.ffn_gate_exps.weight": "1ef611732ff13edfa8d30981ed9dac00c15ceba9fc012ed0b199e9280a849948",
|
||||||
|
"blk.7.ffn_down_exps.weight": "856c6811945c7b0fa461ca17811cfa43436b4cdf5326bad23cbc30883486d7cc",
|
||||||
|
"blk.7.ffn_up_exps.weight": "6725e3e33994302ee13fa5ec163631ce2dcaa08aadde8fc166c2265d4561c5c5",
|
||||||
|
"blk.7.ffn_gate_inp.weight": "36b49d7f80c1003dc392b2c1b9960cd49889dd69e77b26b9e4b13d01f3d0a32a",
|
||||||
|
"blk.7.attn_norm.weight": "7a0ec49acc5e20ee71c6f80ca02f4f1e564c485e0ae0621309e7c2eb0c616cf0",
|
||||||
|
"blk.7.ffn_norm.weight": "eeae035c39ab6e64bc06a4baa1bf6e50d4c8b8797cb0ad8abd48be86974802c0",
|
||||||
|
"blk.7.attn_k.weight": "e8f78c1def01a7a38d2d9bf7becb17755e28fefe4927856f7890fbee52840187",
|
||||||
|
"blk.7.attn_output.weight": "5367f05ac3bb49ef8745ba5902e1bdd4442415a3ebff2c7e1a3918d7be6fe948",
|
||||||
|
"blk.7.attn_q.weight": "37c95fc5acc55a4f6e5f02cab9be60e4fe54c08b65f98f4455741b4aa542ff4e",
|
||||||
|
"blk.7.attn_v.weight": "c89f1343486ba55814233511e94090f7365662a8a4214aa4c278cdadc79196c2",
|
||||||
|
"blk.8.ffn_gate_inp.weight": "4e239afe8c7afb8de3a005757c887cf14b1622ca2d224227591cb0e5301f4c17",
|
||||||
|
"blk.8.attn_k.weight": "2ad0229f30fdcc1e85ce64e00d8f75902238294844a81d5af43e14ba75c02983",
|
||||||
|
"blk.8.attn_output.weight": "2e44a4722acb3b521b81d0b910f8ca2f6c286d874a92ddd02150566454061699",
|
||||||
|
"blk.8.attn_q.weight": "1cd2b09cb2f43e08de776b5f7eac197a5a6d4ffdfd52b21baa36319450147bd0",
|
||||||
|
"blk.8.attn_v.weight": "5a22c57ebfd33ac500cbcfd321d5b5b1783f8728801db6f3f8bed51c7183e4db",
|
||||||
|
"blk.8.ffn_gate_exps.weight": "91063fe56cb4f3ff3b41052bb5046fcf8ef61516a603ee90aab893a9d68c15a7",
|
||||||
|
"blk.8.ffn_down_exps.weight": "d4c3abc8f1d1b462f67f70bd8f404b3fcf45dceeaa8527fa120527254c383c90",
|
||||||
|
"blk.8.ffn_up_exps.weight": "76a1a1f08ec577716a2e7027b45293e9205751126424f1bebe1de89c78f087d5",
|
||||||
|
"blk.8.attn_norm.weight": "f980d774da39eb76c52358afac3e38cb4c81cb323deaabbe5c41822e3f17a98e",
|
||||||
|
"blk.8.ffn_norm.weight": "1c937658cf90f1a85db9a5f26e077730fdd4b694607dbeeb825c5fb2bc407e0b",
|
||||||
|
"blk.9.ffn_gate_exps.weight": "a2532471ecb7896d5c78e5a34e10cfaf4125265e1595166c8d0d0dfbe2a3187f",
|
||||||
|
"blk.9.ffn_down_exps.weight": "b47921a28412d48fee450b8b9d97cee42344a2e69f06d407fd9523d7adf13333",
|
||||||
|
"blk.9.ffn_up_exps.weight": "7c461bd1b2a73b439cff6a10d94afa01e8b06f7e6f09d9a6f28e3876aef48bce",
|
||||||
|
"blk.9.ffn_gate_inp.weight": "1648dfb08b5c06d7953a5a97ecb764995fae9487fb729a1c867023b2538149d0",
|
||||||
|
"blk.9.attn_norm.weight": "8635db0f299882a63b7cfcd1d4259c9e53fab22c31d3d054de36b1001380b31b",
|
||||||
|
"blk.9.ffn_norm.weight": "f9309aa323062d174c463613afef9b0a33501b510bfaa58a8e0e866d12ffef3c",
|
||||||
|
"blk.9.attn_k.weight": "dfe62030441e947a588512d18d9c6e4ed72c2f71c227d622c095e4263b23dadf",
|
||||||
|
"blk.9.attn_output.weight": "1977beb75c6349c50ba7dd3865d7c0a9c5c5ddc854413147b0eec98ac4fda351",
|
||||||
|
"blk.9.attn_q.weight": "eb132596719605cd6bd1782487f121994629e115190edd69240b12af66e734f5",
|
||||||
|
"blk.9.attn_v.weight": "9e708f15d332d7c5187b0693b1a977eb30a2fa10bf7df48ed9d7537c0aa6ed99",
|
||||||
|
"blk.10.ffn_gate_inp.weight": "97503a5d166c1925f9b65c0eed980753d411714d66896f3d0fad5286c7aba702",
|
||||||
|
"blk.10.attn_k.weight": "1ebdd222336bd25b48df1b138cdbe09021c4a5562ea7cb78cadd1255d2be3a39",
|
||||||
|
"blk.10.attn_output.weight": "5e98faa38e9d514b9057e1c8342c509cbe1083defd518e506f6bad89117d1f5a",
|
||||||
|
"blk.10.attn_q.weight": "3323a26c87d936d1dd87c577d0b763459fced726679612c874b3de5fc6d969c5",
|
||||||
|
"blk.10.attn_v.weight": "d5fa73cb56aca388e205f44455e4b4f676fdc12ed7fac4542fbb3b41ecea59ad",
|
||||||
|
"blk.10.ffn_gate_exps.weight": "225021b53782800906cd13b70be3a4161e8b300b97f984a959ccad6a6e8adcbd",
|
||||||
|
"blk.10.ffn_down_exps.weight": "f08eb91526bd22f5fd0402fe925d6141cdbb308a1ced0330858d0c85c71f5ef3",
|
||||||
|
"blk.10.ffn_up_exps.weight": "a9f688350c3b53eaada5103b5848bd9a3d7d6b327a70fa16c24bf28ece933eac",
|
||||||
|
"blk.10.attn_norm.weight": "5ba426c9dfc79805015ccd76cd1068b0ad3bb7a8453e14bb1d35486f122d8f95",
|
||||||
|
"blk.10.ffn_norm.weight": "98891d6acbc3986b2581b7a3af9f5946a392d9188972c6a8b15d4e745a4f2482",
|
||||||
|
"blk.11.ffn_gate_inp.weight": "b2365a60566e7dace892e1cb0e62eb73ce387352601723e847052b34874feaa6",
|
||||||
|
"blk.11.attn_k.weight": "0efbc1d1430505543ff71532a4fcda821aeac616ef6c1dca40e00d4f2ff70bea",
|
||||||
|
"blk.11.attn_output.weight": "3d5bd4d9a41236f30d4293edb9ae27beaa113ffb31b4fbfadff3a4c370dfd3e6",
|
||||||
|
"blk.11.attn_q.weight": "aa11e9db14dd9c77951511443077c2a1a78070753d7bd3d9811038473f69e325",
|
||||||
|
"blk.11.attn_v.weight": "5adc567f377aa11d1763d35f50e53fb2896a8b03b623ac36acc45efa2486d512",
|
||||||
|
"blk.11.ffn_gate_exps.weight": "71d07d982aabfab9eed3c733d49c20f023bf475368fc71db5084d91beadc4b47",
|
||||||
|
"blk.11.ffn_down_exps.weight": "9a06e61461e48b3925a9f7d9cca634d048c8b62163d7bc5c43e35899f959319e",
|
||||||
|
"blk.11.ffn_up_exps.weight": "bc05494d0dcec61021b3ac0c5bc1bf502736cadf48224e213bc139d562699a89",
|
||||||
|
"blk.11.attn_norm.weight": "a5758a10bdd0404ae1470e8e9db903985d4d07f60553c5001a5e7b660d4f7ada",
|
||||||
|
"blk.11.ffn_norm.weight": "814ae037563aad3771787316bec4806c95bf6f5991dd6474b4b1e5cc13dc18ee",
|
||||||
|
"blk.12.ffn_gate_exps.weight": "3a68b831ba1606fb9ef6dffed4732032447ecef23ea563ff4e79317586c7eb49",
|
||||||
|
"blk.12.ffn_down_exps.weight": "268b25e13f4b7beab08686e83705a41b21d15251809ee4784526f78a580da829",
|
||||||
|
"blk.12.ffn_up_exps.weight": "9105751a5b5b42ca2614d0456f24f779d2e2ac8cdff0f96842aa7ae2b70f341e",
|
||||||
|
"blk.12.ffn_gate_inp.weight": "d0de1558cc1d458c5c504f63ddc59785c323df7330474bb0644c346104b40a3a",
|
||||||
|
"blk.12.attn_norm.weight": "859a4c8113678e2e202d10299850e0cfb52eb11ea50bcbf4fe3ff39bdd394154",
|
||||||
|
"blk.12.ffn_norm.weight": "7fbf4c459c1760218877e9ee3f5ad49e960956a4369bcfe96c143f04ff9ddf97",
|
||||||
|
"blk.12.attn_k.weight": "0a7e254fdf3730a57372b6ff421a613eabaea68cdefd64800857941411318374",
|
||||||
|
"blk.12.attn_output.weight": "ceb763fc15d88af149d8fb78e82db2b7dab3aeae584af8cf7611a12356a397e5",
|
||||||
|
"blk.12.attn_q.weight": "a43402d23c46cb2d3cb3c2a98c81b19d10026b7e6742370fed6b2880b6e049b5",
|
||||||
|
"blk.12.attn_v.weight": "3bc24f2c0480ce91ef72993ee8f1cf962f7359e12183424583ffa1246bf3db52",
|
||||||
|
"blk.13.ffn_gate_inp.weight": "a6d68c82bfe66d8bab68f980f5f18268a9e2c0cd6b8832ed39010e0de198ae05",
|
||||||
|
"blk.13.attn_k.weight": "0166c39546b37dc2e01b2b396ba43e183f797dd04eaa51a6d103d8b58ee4bace",
|
||||||
|
"blk.13.attn_output.weight": "2ce5eb198deab9557475a58b69b11e9874b547e05c23f223c6e42fa35ddca069",
|
||||||
|
"blk.13.attn_q.weight": "745c1bbdf434284a7fae98f45e821c076dd9c2a2467dba6a9d8cf0041e419dbc",
|
||||||
|
"blk.13.attn_v.weight": "9ece68d5ac64d1421ea7aa32e1cff9cc1fecf5175f4c4da858dd31d8633e3337",
|
||||||
|
"blk.13.ffn_gate_exps.weight": "ccfdcb4670b131689de12d396a010b5ea737795cf5c15a14a304d720b3c7c899",
|
||||||
|
"blk.13.ffn_down_exps.weight": "8b8fb328664764f1aaa5cbdec336d5654e981e965a02ef622bde5f07ea1c164d",
|
||||||
|
"blk.13.ffn_up_exps.weight": "d2ace0236c2fb3365fdc85499d676a7f65813c48e5085348b1df1799922766ec",
|
||||||
|
"blk.13.attn_norm.weight": "1ed29d7d89ce52d7cb4d57e895ff7115430466e917136c049c385c030ed44e9c",
|
||||||
|
"blk.13.ffn_norm.weight": "a194fc542597a4dcfdfaec5e3cba2a2b2b21b21edfc87c39c0d7f7651355bc4d",
|
||||||
|
"blk.14.ffn_gate_exps.weight": "a625e3574e5e740e7f8e2f9c40390f2f382c720aab5b10534e298002dd8d1fb9",
|
||||||
|
"blk.14.ffn_down_exps.weight": "bc366f015b83c865946afd74c8a884943e0ea2c671314a0b7bb72f21a44d2f78",
|
||||||
|
"blk.14.ffn_up_exps.weight": "ee3199bf2086de77b49f57f487676be8ee70e102a2fb5a5ef8ddbbc28a9eff41",
|
||||||
|
"blk.14.ffn_gate_inp.weight": "2b437870c850fa2e2044d032bb02908af634356e37466fdae260b933e48ee8b4",
|
||||||
|
"blk.14.attn_norm.weight": "cd8344d193a1cbd42bd898e17f4bcb1ca0b2918420fbdafa9249a6f2b7f4ae06",
|
||||||
|
"blk.14.ffn_norm.weight": "70eec40374e558fed5b07257283cf36342b6b0129285a00007deb59c32c9f7c8",
|
||||||
|
"blk.14.attn_k.weight": "4053bdb507e0543d724b632570bac86b31707696d90a0db44c49b2a082e0d599",
|
||||||
|
"blk.14.attn_output.weight": "0182632cb0e06a07241b8293d25d109fbc1862e1e337d435f908e8681e2eb1ab",
|
||||||
|
"blk.14.attn_q.weight": "ffc7794a4c1b6f793c842dba969435330a7a80b9212e457b4b2ac33e68b41241",
|
||||||
|
"blk.14.attn_v.weight": "6411805292d528e61bbaad8f9aab9dd073529a17946c057fb06864fad9cf3211",
|
||||||
|
"blk.15.ffn_gate_inp.weight": "77d0744567c76e6abb67f81ba9c715b2b544841186d5b948309571eff213bafb",
|
||||||
|
"blk.15.attn_k.weight": "1f7957954ea4c6521c257b35a360e868ffa02bdb3de91f146d5e06bb4a545c98",
|
||||||
|
"blk.15.attn_output.weight": "d7809d36bd8d3342240c46fd87bcc7f9821a222f48d9a95e45ae50460265d3cf",
|
||||||
|
"blk.15.attn_q.weight": "25f509313ae4d8401b871904059f472a26f5714e7c791c725de77a1a522c976e",
|
||||||
|
"blk.15.attn_v.weight": "96fedf5a591fc0f020e6de10fd72ff12b3ef9cf70cd21dabaa0d3e7b06f54e73",
|
||||||
|
"blk.15.ffn_gate_exps.weight": "8f950d976b2fd9a3d213b84123cf114c1377efde9352767fb2ddee89e177c8ef",
|
||||||
|
"blk.15.ffn_down_exps.weight": "6fd09d1557bb94b06efbd4f6a1ca4be532a202ba290e9315bc8da3d12a5c4c4a",
|
||||||
|
"blk.15.ffn_up_exps.weight": "cbeb59ae7b0266a928dc7e3a6e70a9330b92f9ee1b17ee1ed91022108204a33c",
|
||||||
|
"blk.15.attn_norm.weight": "2005330911ac2edc7b6d27aca021c67d30d16eb632e49b1a13f30fdb2717aed0",
|
||||||
|
"blk.15.ffn_norm.weight": "0e9198f3b548eb78acc8961f2b3350d238d26cec110933ba753a8cf0035c501c",
|
||||||
|
"blk.16.ffn_gate_inp.weight": "a41d1f99d739c8b150c3945b6949763988d0c6a4c5a2b5855592ca1a48ed23d5",
|
||||||
|
"blk.16.attn_k.weight": "b624e2ec88c2d3047f60530fb87e72cb4a5e655a9663f6f3e9b09e5ad32cddaa",
|
||||||
|
"blk.16.attn_output.weight": "687759ea75e45108526ffc1573d6fdf084728079bfc2dc89b9979e76280f43c4",
|
||||||
|
"blk.16.attn_q.weight": "beff3a45c7e9ec82ffc6d3c701126be28654d10aabd747d03441210491fd31b6",
|
||||||
|
"blk.16.attn_v.weight": "43a349b13f0b9d040cacecd942bcb168c030fef8c75c987d59a4fce6c14e855b",
|
||||||
|
"blk.16.ffn_gate_exps.weight": "793406d6c13d727c82bb7b692ca98d65ca975baee69fc57be5378d77c5a19b62",
|
||||||
|
"blk.16.ffn_down_exps.weight": "9bad3dd150d0230404b7f886ac7ff8803225757e813f195cdb26bad245243b4d",
|
||||||
|
"blk.16.ffn_up_exps.weight": "7449d663023fea3496475bf0a9c1de7272ad0ce9adcb3265e8e424badaa674dc",
|
||||||
|
"blk.16.attn_norm.weight": "a424ce34c195a401df1ce37ac4f2794e8a6720b1ee8acb21428e2b68c65e0125",
|
||||||
|
"blk.16.ffn_norm.weight": "405a68bb8e16e1064df2de55ca3cd9ceddda1d9fc0af007a9bd7cad4b2676248",
|
||||||
|
"blk.17.ffn_gate_exps.weight": "97c6e5321491ca5dc039ee88da0eb0e78f347372785411809af84b3298cb19dd",
|
||||||
|
"blk.17.ffn_down_exps.weight": "1617ac19788a1be19bac69277408761e6bdf5719d63a8c7fea14d41cc27641b5",
|
||||||
|
"blk.17.ffn_up_exps.weight": "4ead1c365f112581c10610ea3f63d2a1474311d2503d2060fed4b458ef337f5d",
|
||||||
|
"blk.17.ffn_gate_inp.weight": "ed4b3393f2523f2b5e0fc7680a1caa2842e605728a529b5af68a7fa8d7abf940",
|
||||||
|
"blk.17.attn_norm.weight": "beac17ef86a7fb2b5840cc72f7a95a5e3d6bd24e7fa698e0b0ebb9bdac45c561",
|
||||||
|
"blk.17.ffn_norm.weight": "81cb58ec6d6dc02a0b4ede10adc336dc865fa76f982d4eab0e4a37b40f5b0fac",
|
||||||
|
"blk.17.attn_k.weight": "eab569e5ea8c8b05e5a6a209fba031129453c2e28181eee3e736b3b04b36bbec",
|
||||||
|
"blk.17.attn_output.weight": "f85b70f01438ce8fe5d10599b113f30bf18dee2bbae0657d3eba295870001db3",
|
||||||
|
"blk.17.attn_q.weight": "887ceebfbf6a2b94b43d2df4439ac3a5bbc29311d4b28addc04d525546032047",
|
||||||
|
"blk.17.attn_v.weight": "2df9414d65014c06a93da22ba3a668be7b83e2e8008e98d7771f7dfebed98298",
|
||||||
|
"blk.18.ffn_gate_inp.weight": "9b07741a0950fc667e5fd25937e33bc22e1f764f80eb4ff3119f005327ae0f6e",
|
||||||
|
"blk.18.attn_k.weight": "8649598dbb63938744c39bcda5ce8c31773e29c573be8d4d2c114f5030f8d3e8",
|
||||||
|
"blk.18.attn_output.weight": "f8e391adb92622298ca834d5d1eda48b69c3b1c51c5a584ef6c54a725c298d75",
|
||||||
|
"blk.18.attn_q.weight": "84bf8708a2eed618f48f69c178ed7dd11fa4c468102376e72e910ebd037d131f",
|
||||||
|
"blk.18.attn_v.weight": "31db3cd773f09548c2c1b1eac2718e46364a7810970fe9c433fad9d8de5397eb",
|
||||||
|
"blk.18.ffn_gate_exps.weight": "be2a2ba378002f1b61f86c273a69eede9b93786d5ce96b4fee1861f730dca4c4",
|
||||||
|
"blk.18.ffn_down_exps.weight": "d35196159e37705db50a5343e3989f7335477f1a4add67ef42ad64a638cd07ae",
|
||||||
|
"blk.18.ffn_up_exps.weight": "c6ceedd86e97913a6dcadc838e7abb762d629fb8dd55f15cf02fd9bd66d2ba78",
|
||||||
|
"blk.18.attn_norm.weight": "41f0b1ad83d6e3cb9fbe0d27878c2e7ad4a351b9f554a6bc9117c01745cdf6e5",
|
||||||
|
"blk.18.ffn_norm.weight": "96646204bd0d82f25dc77faba4dbd86b1332e449313e6684e00122da8be99057",
|
||||||
|
"blk.19.ffn_gate_exps.weight": "c6eb7f61e7938bda0492dbc05e51e8f631c99224fe18e99861fc4fc53ba9e9ff",
|
||||||
|
"blk.19.ffn_down_exps.weight": "4384803da3a3a3d44120d7dd192fe2c9bbd9a1a0cb492dbec1fdd7565230f1e8",
|
||||||
|
"blk.19.ffn_up_exps.weight": "22d73de2fbb8bb0f1bd2caf17fad8a355c47d914143f7f6e6d0128f66f074a60",
|
||||||
|
"blk.19.ffn_gate_inp.weight": "9a0cc4a2301a5634022fbce41189021bf0d1a961792d2d9330fd35556d18e5bd",
|
||||||
|
"blk.19.attn_norm.weight": "c5cc56ec5df9a1f7d5ad71fbda49f1433132e58895d45cb44c73420bd61ebd6b",
|
||||||
|
"blk.19.ffn_norm.weight": "77e17de741742ef2482fc7872fd423c8e3c1454dc4d2be89ee939084b6d78bc0",
|
||||||
|
"blk.19.attn_k.weight": "a92ea36ce2e3569656306aeefb835ccd5d1b03b33a86e0d3d030644cc923b813",
|
||||||
|
"blk.19.attn_output.weight": "5e2a912b37855f84ea964907a1a86d609cbdd79efa0c93c3e8e2fc07caf7c226",
|
||||||
|
"blk.19.attn_q.weight": "4ef3a5913292ac3c1a6fd3e9e53d011021f2b41d0276cf849706d1ca925cf7a7",
|
||||||
|
"blk.19.attn_v.weight": "42981b75b68ae852cee638b5433605c147da4392aaa6d7a06e756115b0171f39",
|
||||||
|
"blk.20.ffn_gate_inp.weight": "71381b9879a7c80b9f7b475abc0aa31b8cd71ccc00856ebe89764a2acb9df2dc",
|
||||||
|
"blk.20.attn_k.weight": "1928b7ebc054eb3967929ed6fb446314d5352f4aaf8b475ce55c6345019f2ea4",
|
||||||
|
"blk.20.attn_output.weight": "6071ecd9ca91af0d2ba93fef4a1a56f3b243dd70f862a21a2d164d56f386043b",
|
||||||
|
"blk.20.attn_q.weight": "002e95042a40f36ceed5829e3d0c8072e5f5e4ee86a089e2902b2348fed24dd5",
|
||||||
|
"blk.20.attn_v.weight": "42f509cdb1c0e298f89f896e349be86952c5168e49b3f83bb17badbcb7596d57",
|
||||||
|
"blk.20.ffn_gate_exps.weight": "a684a3ffe4b0a57c819a5fa9cb3521de223f392732927271e97ce925b6e33765",
|
||||||
|
"blk.20.ffn_down_exps.weight": "e3081a7bc7ba750d8a4886bc8ca4f231b55db4ca082b54b4106c7531964725cb",
|
||||||
|
"blk.20.ffn_up_exps.weight": "fad0fd5eca36ab154788da28be8ec25bb5d6db06c9d133db89e96df358a2f6a2",
|
||||||
|
"blk.20.attn_norm.weight": "c3e3f2429715ae95e884ef1246b0b461b23c5cc0ed08beecf70a14cddd184820",
|
||||||
|
"blk.20.ffn_norm.weight": "ff31f609dda65ca496b0584fabea6550e42edd05ebf229812aa6b7bb5ede15e6",
|
||||||
|
"blk.21.ffn_gate_exps.weight": "366f09ef0ecfb86808eb3296cc9abdb957951d27f6533c03f1422b54061da660",
|
||||||
|
"blk.21.ffn_down_exps.weight": "3fc495947d27fcca7fc0893c8a96e5d48ba27b2c8c58f8fcfb8dcfcd5539741c",
|
||||||
|
"blk.21.ffn_up_exps.weight": "6713ed51410bcc8283cbb001c4ad784098f25701e8021f4fa4f411e186859c4a",
|
||||||
|
"blk.21.ffn_gate_inp.weight": "6d4c92c01ec801647134d907bf1108878156df266a6107abc10526332b328b93",
|
||||||
|
"blk.21.attn_norm.weight": "27605719ae2df24f4f2e85a730927cab20367631612cb501631f6bbf38eb1209",
|
||||||
|
"blk.21.ffn_norm.weight": "ca80ee8177db185b15a4a378c1cb6f7143c76546a7f1726bda23f329323d4ffa",
|
||||||
|
"blk.21.attn_k.weight": "9e49f743d4a5bda9b4bd9c40c2ca37cdae5aec7e54cb193897ac8b4945ada14d",
|
||||||
|
"blk.21.attn_output.weight": "ab923540879753feaed152f5950f69cdd83d8f2413ca873f5f038b63ab0aea12",
|
||||||
|
"blk.21.attn_q.weight": "62617fc3f1c9d2aa672a4d91a121c7a91b92d145b65e75f0b06b4bb7c825dc36",
|
||||||
|
"blk.21.attn_v.weight": "15f8b2e72f8e8e992f2f6b3e93238a9d7be7bd6136f91c9d04b4b4cd0cd60369",
|
||||||
|
"blk.22.ffn_gate_inp.weight": "3ddb1773d9257b68add7a2a4e94dad25ed926803e02707863dd742ab9b2dc179",
|
||||||
|
"blk.22.attn_k.weight": "680e45a9e8d5feddee5266e119dc053bf80718fa9af1cf6803e6f493b265f1eb",
|
||||||
|
"blk.22.attn_output.weight": "0d5fae3402fb2c5aa3a860010e3973fc8e3168d1015f7a76b7b2964681693206",
|
||||||
|
"blk.22.attn_q.weight": "eee7e3d426ab533bd18d62c9aa142eedbde394bed07db58313e0fccc82a23237",
|
||||||
|
"blk.22.attn_v.weight": "26b5be1fe3c2b6824c5a648a3e4bdf17691904526fca158fbc3ebb627b67e2f4",
|
||||||
|
"blk.22.ffn_gate_exps.weight": "32ab7a7735313d60f6a75229b1aeee940b6aee176c9648536bf5921b0dc2929a",
|
||||||
|
"blk.22.ffn_down_exps.weight": "67590808f6a67777d3eb7976c31fe616d388b98fecbb12253b72d1241d70753f",
|
||||||
|
"blk.22.ffn_up_exps.weight": "fc245c0183e6d90829ff5e71a4ec93e4860b3d4c1a17b9dda2fb64f5f5c9ed32",
|
||||||
|
"blk.22.attn_norm.weight": "128e99d206d4d6724758ec97468af767fa0aea592149c324b731659c1e74a1a8",
|
||||||
|
"blk.22.ffn_norm.weight": "e45f498033f0cffa15da0eff2c47b4472e43fcf8921729fc4eeb2e3a6b3c78e2",
|
||||||
|
"blk.23.ffn_gate_inp.weight": "d63e686f5325fbc89fa242c2c52a3b8ff54f867dca914c9ae6eea13e9d6f46e5",
|
||||||
|
"blk.23.attn_k.weight": "f71f5a577f46ea12b1818f3a5ff4b85ddc45f9a2afb0fa2e041d71a3e31c6779",
|
||||||
|
"blk.23.attn_output.weight": "92b13563c1e0eac0d748fb67b235dfd7a64c8f16e2dafb316885744582e23b4b",
|
||||||
|
"blk.23.attn_q.weight": "2f9b9c35dc4f912f3f51c06e2d68f417b51a0de0a84aac530a64f9d3d7b0a2dd",
|
||||||
|
"blk.23.attn_v.weight": "268e40813806e74a5c364b19556d087bf8374e76e7b6fcf55c381eb7da13ccd1",
|
||||||
|
"blk.23.ffn_gate_exps.weight": "12f857e7a7ce228afac34d99b602c8d6fe96984f2a21118f459a58cb767ee65e",
|
||||||
|
"blk.23.ffn_down_exps.weight": "cdb082c16599c3bb36a28066dcc122d9529b54fa91b6cf0153437ec960a5e16d",
|
||||||
|
"blk.23.ffn_up_exps.weight": "f4b99f6f44d7b8b5a305894e88633bf5938fc1f6303a2b2092399da9c8b64d7c",
|
||||||
|
"blk.23.attn_norm.weight": "a691392210383915916b4d3886d5e4d56e7855e27e37e414fbd73bf66b3712e6",
|
||||||
|
"blk.23.ffn_norm.weight": "0c3dc72f667e5ae19b69bfa9f2bd2a01a57681f89ef9527bad4eb0d8c7b70da8",
|
||||||
|
"blk.24.ffn_gate_exps.weight": "86baca2a3157994df7fd8ced5e08436d5c1810dc29c0715637c36de723e0e7d1",
|
||||||
|
"blk.24.ffn_down_exps.weight": "ac5d559562b35c34993e34b071f66d15c65be5907797078c2d2a49aba54e3192",
|
||||||
|
"blk.24.ffn_up_exps.weight": "fce0a099cf09777f44fbab3606ceb75f7fae6f0b80725f9e871654b8cdf9262a",
|
||||||
|
"blk.24.ffn_gate_inp.weight": "e7c6800c0cfc56b565b2d35ad6f1dbfdb70dd0b05b338bc8da2286ffc3678d79",
|
||||||
|
"blk.24.attn_norm.weight": "dc6cc18ec52d102d015153c4a1132f9d7a504e29cbdec81c5edbf3b9e65815e1",
|
||||||
|
"blk.24.ffn_norm.weight": "480d5a1397af5e0e657f1e67d20ec0cdef5724e71246a326843321b87ffabd33",
|
||||||
|
"blk.24.attn_k.weight": "338c0597954a9b95a782545b2fe36469553e73f86ae2d2b5697767b28e1c7daa",
|
||||||
|
"blk.24.attn_output.weight": "a77d23b79933c67e52f1eef7f83a3dff4f767ce0bbcc39572f8cec4acd457643",
|
||||||
|
"blk.24.attn_q.weight": "45c9478593002be1998e96e70668aafa2dd3972380fbc1df12fb05c24ba959e0",
|
||||||
|
"blk.24.attn_v.weight": "515729420885408a6a9614bc27cda393ed907521318d14d21335d39a3eff0b61",
|
||||||
|
"blk.25.ffn_gate_inp.weight": "aae4ac40e9ab3925241f9d784b54b38851d9bc999a6c3bc03fc3f17c9b28a67c",
|
||||||
|
"blk.25.attn_k.weight": "4ab4808d02396c35b00b426f536015673b71c17ae6cd55bbc2e6bfe7a4c59d0c",
|
||||||
|
"blk.25.attn_output.weight": "1990bb982b77e0c947cd1a8ef0b36227ee1259e6dbbc2829e5c136edf88675eb",
|
||||||
|
"blk.25.attn_q.weight": "a1490f3048e8c0ec8784f8550c43adf5cc8d0f2f90131c934713fe4b1b015bd7",
|
||||||
|
"blk.25.attn_v.weight": "f15e53c6d45b3b6f58808fa968425d65e0b26b7f9b268127a77abb1227c67431",
|
||||||
|
"blk.25.ffn_gate_exps.weight": "656662447ff54f56ee80f78a1b9483f7efdc40f7375d0cd8a9c72ccf21f77e7b",
|
||||||
|
"blk.25.ffn_down_exps.weight": "db06f101bccbaef19cced0f6c185166e18202465f4a42cddfd535fbe5cbabb4a",
|
||||||
|
"blk.25.ffn_up_exps.weight": "584a7b02456f27fe1d8d3c7ccd21d426b6ea887795a3ed77f704596a1e3841d7",
|
||||||
|
"blk.25.attn_norm.weight": "8f0f3597982930fd237e9d609776c64f2b909a455b21678f83a7ebd4bbb83e64",
|
||||||
|
"blk.25.ffn_norm.weight": "3e7079c32582afba0c55e032f254adc18d2997705eec860185e9a6dd3d82f07e",
|
||||||
|
"blk.26.ffn_gate_exps.weight": "e70341691b583b86489812b29b77aa41eb658b1865733d6118da54c66e3bfcc6",
|
||||||
|
"blk.26.ffn_down_exps.weight": "5c1b812d11dfb064af816ced5ab6463bf9722eefdfc341b8a93705d5038fd781",
|
||||||
|
"blk.26.ffn_up_exps.weight": "e18118362ae54ef7432781c83884f9fb230a9d934e342aabeda8822ea5f71fb6",
|
||||||
|
"blk.26.ffn_gate_inp.weight": "cd1c5f6710166b9567c6b74c97b2348b191c60aa860958c6bc264ab095261dff",
|
||||||
|
"blk.26.attn_norm.weight": "71d087531af2520bda2e676c489e8529cef5db8aeea1eec0a937a8b4f2fa2e54",
|
||||||
|
"blk.26.ffn_norm.weight": "7f704e936fda28eb5c2cc339f0f6a5f78170b5aa43c01265b21668870d819c82",
|
||||||
|
"blk.26.attn_k.weight": "1cc62a0ce0ae251275d898c52c4a9fba5995fca10955d2011d10dd1a59e1afb8",
|
||||||
|
"blk.26.attn_output.weight": "636e881b1505f9cef656a4be98bec6a4765321d51f9bf1dac8933397cf44b765",
|
||||||
|
"blk.26.attn_q.weight": "89a3c4d202d7d6adebb9e0c1bcfd8b775f6456386f1be25e86e43acc949c1e16",
|
||||||
|
"blk.26.attn_v.weight": "ff2cc963b597cdf1a21703f3e7022af3bb4c65a34a19e19d9309a7c5e198b5bd",
|
||||||
|
"blk.27.ffn_gate_inp.weight": "6150139498fefe380bb99d11e72028da47a15ecb73dfc5b2774f726f4bed8f9e",
|
||||||
|
"blk.27.attn_k.weight": "f286eb9e5c56c7b801a497aedc40158c2a27877d7f9fb59b3fc67834798902d2",
|
||||||
|
"blk.27.attn_output.weight": "5dc3d3a05f9f7729509147fd09c16fb53f85f520cdab5cb69abf4bae3fd460c7",
|
||||||
|
"blk.27.attn_q.weight": "8462e40f86b24251960d6f35a9ea99b8793a01937faf1aec2859f2e5395dbb61",
|
||||||
|
"blk.27.attn_v.weight": "bac1a99e38e25953f8315f7212eb9777dc216cadb09b959977885ae62724ceca",
|
||||||
|
"blk.27.ffn_gate_exps.weight": "6a15eca7f0f6ecfd93db2e55c63875348ec4a78c4ff643ec46df9e958c0101e4",
|
||||||
|
"blk.27.ffn_down_exps.weight": "2e1c91247c4359e2073a8e5f26fd7f6426da7be3ed5bc65dcfff701f0a5022b2",
|
||||||
|
"blk.27.ffn_up_exps.weight": "65d6f5c553c9332085eae4aeadf25090b5d7768212ea7b08ed698102c21b29a1",
|
||||||
|
"blk.27.attn_norm.weight": "7fab8ae63ec8e91ce625cd130ab96d8427dad3a7413bb21b25ec5f408c5b9f5a",
|
||||||
|
"blk.27.ffn_norm.weight": "532720546b0fdcd423a02ca6e3e9d8aacb84b1b3e8269968f88a47fe2a69bab4",
|
||||||
|
"blk.28.ffn_gate_inp.weight": "a305ea58d98962d9dcf0c53ad2389b7acc8936fb35a0e3fc9410e7767cd49dea",
|
||||||
|
"blk.28.attn_k.weight": "8315e8a2e4f78dfdf36d4fc18fffc74bc95fe42c3ae4f9af2b6c874612c0f71b",
|
||||||
|
"blk.28.attn_output.weight": "9b5fdedd32d39ef46a22cca7cd5355d7b93bd07ea305f466a8aad6ca5a4f3778",
|
||||||
|
"blk.28.attn_q.weight": "4e8fb96997c30e231c437130f410d7c91d541a816f6c568b5f3bfdb4b8dece74",
|
||||||
|
"blk.28.attn_v.weight": "1fec739cf3bd7b4913f72ca358d4cf31391c304de44ac0ae31ecb825beaa7cfd",
|
||||||
|
"blk.28.ffn_gate_exps.weight": "9f259789d535e09268266b9a8020f32d6a6779966c909d91d3a10574f06238a2",
|
||||||
|
"blk.28.ffn_down_exps.weight": "516d3f8abaedb01b9916a4b67d4672159769138ef2850158bc1b32c41e31f0e8",
|
||||||
|
"blk.28.ffn_up_exps.weight": "f2f1d88d2c31ed588806fb5ad981d68f5134d7284c4fc022fd018de2eef437fc",
|
||||||
|
"blk.28.attn_norm.weight": "960fd005598deadaebd969996f4367a9dbfad90539a863674fe95730935acc64",
|
||||||
|
"blk.28.ffn_norm.weight": "e1993b37ced93d4049e9af2c47b0d9207d8f7e6f2cc3a52f57bef30bc806d805",
|
||||||
|
"blk.29.ffn_gate_exps.weight": "58927146338f443513337476b3cd30e6341742f096c2beb5890d400f10121298",
|
||||||
|
"blk.29.ffn_down_exps.weight": "03a3386e4f0b75a28c5608e23b2de8f0de25f21954e4aa7fc343431bde9db07e",
|
||||||
|
"blk.29.ffn_up_exps.weight": "6916b7490a7ae7b04a5d81cc1e7ac9b20c483434f3b186b12d87fe176bf1567b",
|
||||||
|
"blk.29.ffn_gate_inp.weight": "98e710e467a3d567abe4ce29d78b8e8dc033148762290c0c5e1ae4d78efd8c78",
|
||||||
|
"blk.29.attn_norm.weight": "4e64cb307d37be20d55f38c94faf7e451d11df5e60df347906cbaf9c5441be71",
|
||||||
|
"blk.29.ffn_norm.weight": "696c23a52f742679bd44440d687a4c44b4302d57f1e9dc5610d23374336187e7",
|
||||||
|
"blk.29.attn_k.weight": "e85253652fd6120c623634ba66b725bf7cd491318b54ccdad2c7df8851d64c0a",
|
||||||
|
"blk.29.attn_output.weight": "4f650a71efb150d1f24cd4d114d4187bf570ac424da3b92ea6455abdf1aea705",
|
||||||
|
"blk.29.attn_q.weight": "69fa7da901026ebcbbbc848455b425458b7e3295007d7fc093acf4b38e2166ea",
|
||||||
|
"blk.29.attn_v.weight": "17e2e7590b317b21f106de546aafd955579703d1e95d6aea044ee72ec3a514c9",
|
||||||
|
"blk.30.ffn_gate_inp.weight": "3a03284b4aa60d59d4a2ec86253469b61fc656372afca427cb77a5332fbcc62c",
|
||||||
|
"blk.30.attn_k.weight": "d518cfd0db9708e769eb1399e87ee49357dc54d5afdbac3d4c0ca46c64e789eb",
|
||||||
|
"blk.30.attn_output.weight": "9b44378714d784c5ef9ab604359091baca4e0ec222afa139b7f840eaefb371fd",
|
||||||
|
"blk.30.attn_q.weight": "cbb95365bbfbcad0c9cd99b4eebb5a5d32de68ce08e4063b5ec3e792b7548044",
|
||||||
|
"blk.30.attn_v.weight": "e7985c04fe1740e35a9598f43b67b0922b4fc2d00b68a92a9f917b82c3248de1",
|
||||||
|
"blk.30.ffn_gate_exps.weight": "8ac4bbd07935d98f895ba94dc174e5ad5046c3c222b53729d60f987c05e7eb70",
|
||||||
|
"blk.30.ffn_down_exps.weight": "dd672cc71e82abf05064a18121b8e55fe1a4f19bc1d7cb9a142f4add54bc336e",
|
||||||
|
"blk.30.ffn_up_exps.weight": "12282f664a2a12aa25e2deac58946108715ebb978bafed5274cef24569107646",
|
||||||
|
"blk.30.attn_norm.weight": "1a33458fee054c6c9c896a4bb0a4e1fbfa0293b2408c7dd2b81d692e966e7273",
|
||||||
|
"blk.30.ffn_norm.weight": "311e33b68051f507f1478ed8f2693fddb846170ddb7285a91be43f795c2ce31e",
|
||||||
|
"blk.31.ffn_gate_exps.weight": "8af43d9867a51cd8392fb48b981b0ceee0ae979c491c07d711b3b56b5162c786",
|
||||||
|
"blk.31.ffn_down_exps.weight": "5579cb7758c1600b19d1f540deffe081b575962e37437b3b2efb2fb0a2924e40",
|
||||||
|
"blk.31.ffn_up_exps.weight": "f2e7c005276b3a001fb40753f027fa10b4d5a346f43cf4b4bbdeec6e74e1cf6a",
|
||||||
|
"blk.31.ffn_gate_inp.weight": "89885dc0e30b6b16a90c0331d7fa3174671e941364e8102d934f02132237e61b",
|
||||||
|
"blk.31.attn_norm.weight": "99e4e9bf86a9edf8c404153a7e8a82324ba79da462622196e2faba161bd95172",
|
||||||
|
"blk.31.ffn_norm.weight": "55335997cf6de781bf332b943de96ff4646966b05d9fee86b76ea897e27b6ca7",
|
||||||
|
"blk.31.attn_k.weight": "cee570762b78da6316b637892cc4b080e40f57af5551ffb1866b9a8e80e96628",
|
||||||
|
"blk.31.attn_output.weight": "fa321ff55ec7819ead7b819fd45215262f39744569765ba2113c989c03588802",
|
||||||
|
"blk.31.attn_q.weight": "9e2c409b878f8a2a1436874abf428fceb1c534b21f9ad4dd6f532b8a469007f0",
|
||||||
|
"blk.31.attn_v.weight": "a845d0be68ba537b4a775bfba4d897faf7c82a811a2612b0b7420cc4f3574cb8",
|
||||||
|
"output.weight": "16101cbb74b54cda9ebc07ca3c762e3263a56efb3cc011156184b95807d7cf13",
|
||||||
|
"output_norm.weight": "d7aa61585baedd60157aafe157930785742c55989c288573566a971b02423564"
|
||||||
|
}
|
||||||
225
convert/testdata/Phi-3-mini-128k-instruct.json
vendored
Normal file
225
convert/testdata/Phi-3-mini-128k-instruct.json
vendored
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
{
|
||||||
|
"general.architecture": "phi3",
|
||||||
|
"general.file_type": "1",
|
||||||
|
"general.quantization_version": "2",
|
||||||
|
"phi3.block_count": "32",
|
||||||
|
"phi3.context_length": "131072",
|
||||||
|
"phi3.embedding_length": "3072",
|
||||||
|
"phi3.feed_forward_length": "8192",
|
||||||
|
"phi3.rope.scaling.original_context_length": "4096",
|
||||||
|
"phi3.rope.dimension_count": "96",
|
||||||
|
"phi3.rope.freq_base": "10000",
|
||||||
|
"phi3.rope.scaling.attn_factor": "1.1902381",
|
||||||
|
"phi3.attention.head_count": "32",
|
||||||
|
"phi3.attention.head_count_kv": "32",
|
||||||
|
"phi3.attention.layer_norm_rms_epsilon": "1e-05",
|
||||||
|
"phi3.attention.sliding_window": "262144",
|
||||||
|
"tokenizer.ggml.model": "llama",
|
||||||
|
"tokenizer.ggml.pre": "default",
|
||||||
|
"tokenizer.ggml.add_bos_token": "false",
|
||||||
|
"tokenizer.ggml.add_eos_token": "false",
|
||||||
|
"tokenizer.ggml.bos_token_id": "1",
|
||||||
|
"tokenizer.ggml.eos_token_id": "32000",
|
||||||
|
"tokenizer.ggml.unknown_token_id": "0",
|
||||||
|
"tokenizer.ggml.padding_token_id": "32000",
|
||||||
|
"tokenizer.ggml.scores": "6e37bcde2adc7e350e87c496eddd7a2124329c1dc66c5bf3ad3997253e4f7a62",
|
||||||
|
"tokenizer.ggml.token_type": "b6ecf55ec64ee67d87750bdb8d757a2c58bf78377e9f4219f5689a6c4dea57ce",
|
||||||
|
"tokenizer.ggml.tokens": "d168da3ddd3eee820916945fcb9baf24dd3cde42f606cffa2d19e7c8a8743918",
|
||||||
|
"blk.0.attn_norm.weight": "216aeb2c9e0c271f899e1ef2a63cceeb8f41e97642e84fada54b1d3c1c11cf25",
|
||||||
|
"blk.0.attn_output.weight": "b597d56f7188ffc1fafc273fadc59d41738cffd677ae98c61a62c3285b3a3099",
|
||||||
|
"blk.0.attn_qkv.weight": "d28a6b44e13f59be5483e4be2bedb544e346168d720aca27f47d1a5a722be91e",
|
||||||
|
"blk.0.ffn_down.weight": "4a691370e5a61fcbbf540fbcbf4c0f1d15dec0364528c0e916d0744f6262b63b",
|
||||||
|
"blk.0.ffn_norm.weight": "0c00af2b4a3128bec64a0cbb1084b042fdbe13d9ad0d03bd577f9449dfead338",
|
||||||
|
"blk.0.ffn_up.weight": "b32b52f790c1c083bfb8a3126dc1111cfeeb28dc8c584a930a1e5334cb176bf4",
|
||||||
|
"blk.1.attn_norm.weight": "68748011503c6c029e8e69a84a8e5a89338f378769627b6dbf7f93d715c292e1",
|
||||||
|
"blk.1.attn_output.weight": "2267344add13b048ca59e4377c86dc512be8046a57156901fa32a20fa74e4ee0",
|
||||||
|
"blk.1.attn_qkv.weight": "9109d2e3d7a2eacfda5226587b8be124a3bf44b972da7ebb17aa15795897eacc",
|
||||||
|
"blk.1.ffn_down.weight": "d675df4df4dd039c0c339ad6445d39eddd2004db6bf35bed6314c7497245a633",
|
||||||
|
"blk.1.ffn_norm.weight": "3b5767ae977bc8baaa06b06efdbea193b6b3ba605ce76d77a76ce317e935500c",
|
||||||
|
"blk.1.ffn_up.weight": "80dfd6d9d234b00334c89b8e0a02f81899c2efd377321c34ba5ba51a5f61b5ff",
|
||||||
|
"blk.2.attn_norm.weight": "6a6743b057e5088f145bc179e92c9bfb41163e7295d7b81c62e23dd89d2b59c4",
|
||||||
|
"blk.2.attn_output.weight": "bc5491ea54e0db81462d7d9b7d25cbdda380c2db8de041bd1c4ab7b76a1d19c3",
|
||||||
|
"blk.2.attn_qkv.weight": "a61287a9852e2f5aca9c100b471d98398b2913a3497c743de3c70ec9ddd7087f",
|
||||||
|
"blk.2.ffn_down.weight": "4fddcc382c8dceeab027fe43d8d44e67edb5e8ce4b9a1b7f773c87770380ade1",
|
||||||
|
"blk.2.ffn_norm.weight": "07e05f82b3f63f711db3b684ca79aed25c0657917e66f88af47348a82065c227",
|
||||||
|
"blk.2.ffn_up.weight": "4835a682ef1826c12df01ae7663fc45f9c82bc8e64b665f13fb7da8e201ec0fb",
|
||||||
|
"blk.3.attn_norm.weight": "f22aba7c03999ba7136f39cda747a39715e498699dc1716cd97fc5dfc58d1b1c",
|
||||||
|
"blk.3.attn_output.weight": "53b579855366fd786c5126b2b30aac4d583ca7bda56833c4865f5cadb5c18c6d",
|
||||||
|
"blk.3.attn_qkv.weight": "bb56aba78158123140fcea59c69ac562ca208f6d3086819417cdad8c50f333ad",
|
||||||
|
"blk.3.ffn_down.weight": "97280897a7cd86db2830c004bccc5bc094f50e293baded0189159a2019145a6e",
|
||||||
|
"blk.3.ffn_norm.weight": "10a8c99f8b57a960e8e0a1133c4a26f9148403d1b9bff2eff114917de996f3b5",
|
||||||
|
"blk.3.ffn_up.weight": "7324046c915e75d621b2043597a245a428d8eea31869135e6257a861491d8dcc",
|
||||||
|
"blk.4.attn_norm.weight": "507d8e164de94646edbfe33def8e8fbf7c9a6ee3fbaedb5000f72d9f51ec5e36",
|
||||||
|
"blk.4.attn_output.weight": "bbb3429e6efa98c150e0fdbf48c16180cbf0d0cbc1b3c253c6c319d78f4593a2",
|
||||||
|
"blk.4.attn_qkv.weight": "b95ee5be0786d3901273d806c339fe6c20e6bfffd2a20672a9f56af80921e8ab",
|
||||||
|
"blk.4.ffn_down.weight": "806bbf91df92a5a22bd5aa1ffb7fc2869f7293ffc7704771c290ecc583b27975",
|
||||||
|
"blk.4.ffn_norm.weight": "cfc2930a81df7aee3a5e7f726a15c1182233e868bf0d9d37f6b6ae6d8c15c234",
|
||||||
|
"blk.4.ffn_up.weight": "c3390c69533de2c8424e8069323ccc5d0c4543111535da04cf2c7d26745576aa",
|
||||||
|
"blk.5.attn_norm.weight": "0d71c4fbcefabbd021569442853d2fe90668b19409ae2805a718a829ca60beab",
|
||||||
|
"blk.5.attn_output.weight": "10ebd93629112bf2df5c30dd0953a4a5e9020306768283181ed426934d47e14f",
|
||||||
|
"blk.5.attn_qkv.weight": "5cb05633369f12d4b00e0ff787736bd846856682115720ebc6cce05270c334f6",
|
||||||
|
"blk.5.ffn_down.weight": "e28bcc5094212eafc7476dbc5b7a520d25b79578cbf4229d698e2655956a80ad",
|
||||||
|
"blk.5.ffn_norm.weight": "b6f2c4cf9f34bb4d59989f96165c14a67dc1e266ad0a6d0fcc49f1add929e6ff",
|
||||||
|
"blk.5.ffn_up.weight": "0f9ef99423cc07ebedc0e9cfa95809f2d7108d910bb4ef97ebc0b0309c440750",
|
||||||
|
"blk.6.attn_norm.weight": "b3edcc47a42218234f7564d7470611b49401a41ae8cd42123f86557c69f5d7f2",
|
||||||
|
"blk.6.attn_output.weight": "eb9b7d257b388bb5b8fe0515e5c6873317239cb94cda236e4b6ada2a6c57c65c",
|
||||||
|
"blk.6.attn_qkv.weight": "eb968081f478c52f07bd9c2761741e982dba33cc4eeadeea3557d391b9ac2106",
|
||||||
|
"blk.6.ffn_down.weight": "1b8588bb7463206290322695577dcfced300895d6e6f4b26966c53a9ae2f0f84",
|
||||||
|
"blk.6.ffn_norm.weight": "1219c04b7770983c77814200eefe743f46d15328ea2b12711e44f8103eab08d3",
|
||||||
|
"blk.6.ffn_up.weight": "197ef287239fec47c55677f0fbb66eaf0644f775bc382de843971730721394f6",
|
||||||
|
"blk.7.attn_norm.weight": "b630ad08c80d564ed1c024384818e9fd3f22a36cd7a14aa96e7e2759a8285099",
|
||||||
|
"blk.7.attn_output.weight": "970255aa750828a47d6b9d399f9612b5bf25aefe7dadbcba41fc416d0d4067c1",
|
||||||
|
"blk.7.attn_qkv.weight": "ebb157c880293e6de8d629f263ba8853ed1dbdc02c311d43432bb8cfbb310739",
|
||||||
|
"blk.7.ffn_down.weight": "24bcd4db4cba844c89f878b81843c373dbbc0675e889d32c5b12e63384a7b670",
|
||||||
|
"blk.7.ffn_norm.weight": "b9c6f71001808ee873ce7db8056e4b53fb4cccec8b7f0f312899b575fae39d39",
|
||||||
|
"blk.7.ffn_up.weight": "979f1828d227455c26015a2a11afe9dd05f2bb97a8ba6b38c8dab3f50e627401",
|
||||||
|
"blk.8.attn_norm.weight": "4e8e347e3775010b7112ee630f2f4f2383be7ff64e6ca6154b9b22566552eaa6",
|
||||||
|
"blk.8.attn_output.weight": "65a44babf44a435a1829945211b3168f9ec78ac3cb7a049a733e93d11f0d6659",
|
||||||
|
"blk.8.attn_qkv.weight": "343ed07671da400b040812a4058482fa38284b5d9af9becfed07417fe26ce747",
|
||||||
|
"blk.8.ffn_down.weight": "7fb7e073e3c2c503c4e9d60efa0988fed7398d900cc003695fe3fffd3e188b82",
|
||||||
|
"blk.8.ffn_norm.weight": "b07c1f655d8593e3892a2cf73f8a0c19ce8e5cb613fafbe7cbd430da8ce4c57d",
|
||||||
|
"blk.8.ffn_up.weight": "8b26e14de54b3fdc2e2d3ea41720f9d9c236a93688c3b7fd7bf43f5fbb327c9b",
|
||||||
|
"blk.9.attn_norm.weight": "46394d408a8e316916177e6aa261de32e137a82d729c0b1800b072f0c38c39b6",
|
||||||
|
"blk.9.attn_output.weight": "d57f3d46107947a7073373a0b35d6ecf7759b5df15406f4a3590a60666af6b16",
|
||||||
|
"blk.9.attn_qkv.weight": "14bb8ace8c5453148f4b536e9f4279c813f31136716947256f5cca333448639c",
|
||||||
|
"blk.9.ffn_down.weight": "2b8d98e2b5ed68338f6e4de43bf7de0c4858cc69103cd5177725f7444eec7694",
|
||||||
|
"blk.9.ffn_norm.weight": "41a499dfd418cc4c6b8c12313f673f7e2cd4a3f9c4065eb6c4feb5eed02fb542",
|
||||||
|
"blk.9.ffn_up.weight": "143aab7533a64b17fbe201490a6f674bc7f0bd370c094500b2e100419073d1c2",
|
||||||
|
"blk.10.attn_norm.weight": "ebb670aafd36816a794347287269d8f1a5b19c1e3c0a1e38023bc19fdba9b073",
|
||||||
|
"blk.10.attn_output.weight": "b5d65bbc0ed5e49fdd9d754bc18163cd042a285024d0cf6f954c503bc8c877cb",
|
||||||
|
"blk.10.attn_qkv.weight": "f06b15bac88da798fa34a62b03eaac0dbe8b846020516603c387541f2d8dd672",
|
||||||
|
"blk.10.ffn_down.weight": "fb091fcd1b4de25d1bea94d1755e255cb02914a030d23e3a234e57b8d46bde6e",
|
||||||
|
"blk.10.ffn_norm.weight": "eb347bdf9c40414af87e13a8e72e40b31f004b50f7cb366f1a219ced60a61355",
|
||||||
|
"blk.10.ffn_up.weight": "ed2d52fc881a173f404fe8a1067862c9856d6c3e0d2e90a330a7aa394e3f84d1",
|
||||||
|
"blk.11.attn_norm.weight": "64e252603cf010a0e502ca39fdf8d0a196a79aec67c0d2bb9213fc0cb80c47d4",
|
||||||
|
"blk.11.attn_output.weight": "228e33e21c69f52efc74fdfc831bc9af271e44b2a29a3dced1d64e667ce36eb5",
|
||||||
|
"blk.11.attn_qkv.weight": "ab9ce6d4ef9e42ee0da3f20a7708a3bbc5e79e967b05fa86ba946a05e2eb63eb",
|
||||||
|
"blk.11.ffn_down.weight": "0ca133b7835c98dc77c25d64e4eb7873778bdb5e4d22d8b80f920f46865b43bd",
|
||||||
|
"blk.11.ffn_norm.weight": "02455741a0dfd161c79aa1ecc381901721f229fdcda5615622a629631fb61cfd",
|
||||||
|
"blk.11.ffn_up.weight": "9fecdcc099fbb8e23c6b1ea9294702a027f4a58d265543ec5e7be79b8f63b354",
|
||||||
|
"blk.12.attn_norm.weight": "783bb459911b1b3609a9b2bdfe272f1670add73b5471da738e07ac47e2e07dfd",
|
||||||
|
"blk.12.attn_output.weight": "1e1a914c9e48b857206ac5a1f7cead994bc1ea91d5d4fff8c834d73f2e38ef5d",
|
||||||
|
"blk.12.attn_qkv.weight": "5953e7185ccb87fb4dae8f9426ec86315d4c7794326e8ab59b3a95d4af2189f0",
|
||||||
|
"blk.12.ffn_down.weight": "a3eecf0f394f86e2cfb48a5940a5c50ca86d71883b2f79fcc642a935fabce0d4",
|
||||||
|
"blk.12.ffn_norm.weight": "0a4272e41373c23bd72f10d2d82930aa3a1480aac75832bfbf01cebf0b86b6a4",
|
||||||
|
"blk.12.ffn_up.weight": "06f42776de3a7ceac3025f26a7a8bd20e062233cce2bdaa2183470dc4b30b87d",
|
||||||
|
"blk.13.attn_norm.weight": "5915da60fb03e201fa649faba780e5fdf1c761c262b206e5415cf83181f65780",
|
||||||
|
"blk.13.attn_output.weight": "4dbf6eab074fa3835fd32bd631a8208e511037d5056d2fd3015735cca7674ef7",
|
||||||
|
"blk.13.attn_qkv.weight": "d3d8339a1c4782d9e73d77fdebe154d3c5b83ac40c9175b3e91a4977d08f876b",
|
||||||
|
"blk.13.ffn_down.weight": "de6772b46a55e1fd42b007637dfbf68b6598e5d5b61622da0935002e1e192d3a",
|
||||||
|
"blk.13.ffn_norm.weight": "5a640ea3b8c7be49c95a58a2327e10d8e8d9d142504bde5c8091613e5b961d7a",
|
||||||
|
"blk.13.ffn_up.weight": "f35e3545e4bd3531b2e843b5efd31dee0c13c807ee6386e65473ba67bbec30d0",
|
||||||
|
"blk.14.attn_norm.weight": "9b34986450b7c98b4927e81e61a816f9e84b1addc7c14926402100037aad6678",
|
||||||
|
"blk.14.attn_output.weight": "155d52efb23d366016d861a251d4d1f4a0c13699188c50d50dba016a0d8bfcd9",
|
||||||
|
"blk.14.attn_qkv.weight": "8e1415084e1f33c73a777f19e752489f4dd312cca047733e5ea643cd4a955e04",
|
||||||
|
"blk.14.ffn_down.weight": "a2a142226b94baa01ccb65bdea2b7418e49085c1d9c3c63e544e3112c58a25da",
|
||||||
|
"blk.14.ffn_norm.weight": "8aecfd9b0ae6affaea31a80c5c9a4a14b31deaa0db7bd8f6da2a64d23447921c",
|
||||||
|
"blk.14.ffn_up.weight": "0c1407237b8c1bd02f193346b5681926fe698a5055eac6a7450451b0f991707c",
|
||||||
|
"blk.15.attn_norm.weight": "e037bd19880bfa83d983200fb0c7866f8ad16c3ff5cc4b4f3a37ca7373870ff6",
|
||||||
|
"blk.15.attn_output.weight": "045fe4fc95cc129a1b92771b179c11b12845c4c088786c607f17bd98857e68e1",
|
||||||
|
"blk.15.attn_qkv.weight": "7621b7559705cab1d4dea1c69f76dbf9dc1c8837a203b656f484703b9c1b70ce",
|
||||||
|
"blk.15.ffn_down.weight": "7e5ac20e290bc60761e1cd972354fde225b7fa861048d44d9a0dd9b046d55f58",
|
||||||
|
"blk.15.ffn_norm.weight": "b6d830d88f1db1825687973c8c2b1a24c6fa84f07af8d0e3ef9c86009baca0b2",
|
||||||
|
"blk.15.ffn_up.weight": "dcda0957cd04fc45476774dba2bbf9aa89d6b05d5ca7b10ae6f73ad2c49b1cd3",
|
||||||
|
"blk.16.attn_norm.weight": "4ee9b70ba15cb2a08240f93990e90f5068c48fceb481f8e2186bec8b7214eb3f",
|
||||||
|
"blk.16.attn_output.weight": "315cfe5536658d2498192b2980eade15b2c9a4ff220e4011911457b1727fa103",
|
||||||
|
"blk.16.attn_qkv.weight": "3c8122e3ad637583b9dcde8ff3a323267d3014bb1f0f9771e5322260ca9ecc8d",
|
||||||
|
"blk.16.ffn_down.weight": "3b5fbebd5ee2b86cad96fb8a9b45a8770d08f82c1c8b74d7061e866f7020a18d",
|
||||||
|
"blk.16.ffn_norm.weight": "ffab69f20bda372de6e5878f0539163e2fc6ba113621ded95705fc3b1465c9f0",
|
||||||
|
"blk.16.ffn_up.weight": "0935ea3d258da42d6258406365f39f58ddaabfe97ea5977580db3635188f24a1",
|
||||||
|
"blk.17.attn_norm.weight": "f030441733f3d147b4a06a1eb4aeb8465c7c24d9c53bf4c48fe7e134d3629803",
|
||||||
|
"blk.17.attn_output.weight": "07a955ef09e8dc766ac0df647d0b2c69f23c4c69a7137654b4aad80303ed0eda",
|
||||||
|
"blk.17.attn_qkv.weight": "1c10688061e21e2fe12ad0cb54bf03895c1f83c3b0df743a42f548b52cbca1b2",
|
||||||
|
"blk.17.ffn_down.weight": "ebb9cc9836f41d88fdae2aa9a4355514e4edaec8d1577ffeb947a35204e77f52",
|
||||||
|
"blk.17.ffn_norm.weight": "50aff44f6528b13db5389f2ddcdb7676244947610bd7ffbff3f881c968c2a0d4",
|
||||||
|
"blk.17.ffn_up.weight": "d716537949582be33bde6b02e38f5a70081c9642a9fb05a61312126718b8d148",
|
||||||
|
"blk.18.attn_norm.weight": "0ea695c4e53d637902f46663a6ee42adc493c36794476acc7dbddaa05b13840d",
|
||||||
|
"blk.18.attn_output.weight": "5fd35b500221a612eb4f4bddf0e9b6b7db4d7733032a75f8802fb2d884647c2e",
|
||||||
|
"blk.18.attn_qkv.weight": "b0da37fd030fe69581f990bf23bfd35467a1bbe558af6de7c0924f6b72e92317",
|
||||||
|
"blk.18.ffn_down.weight": "b355c33f44b328f4bb977567de8f7544db4b005d7a8fbded658518ecf3c5a153",
|
||||||
|
"blk.18.ffn_norm.weight": "58b3fe9094079989a86e0387143259e1cc35952d24dc3df290c4ba6df44f5c51",
|
||||||
|
"blk.18.ffn_up.weight": "2ce530954c342c30ed2ead5353f931960bfae1d278868504c0efb973560fabbe",
|
||||||
|
"blk.19.attn_norm.weight": "533e9aed66feea8f0392aa81f9e293240e1f009a5334253915fb60c2749b615d",
|
||||||
|
"blk.19.attn_output.weight": "84f2d00f98a4113a779d3b5d1c3e7c914eb47784d3ab13b290367c124c2994aa",
|
||||||
|
"blk.19.attn_qkv.weight": "fbe6b9f53b07fa7537d3b3d452d20a9bc666f9fd41ec2091dd28bc2f70fc668f",
|
||||||
|
"blk.19.ffn_down.weight": "b30199e098c8bb3f890183d8b18471e80b62b604729b277ad62488dd71e1206b",
|
||||||
|
"blk.19.ffn_norm.weight": "c81373e41cd340b7badb19f9517c77c4250b4eb9a02dc758b8b49b652487d7ff",
|
||||||
|
"blk.19.ffn_up.weight": "5a5cb083ca7725720e3a890f7fa46354760e8007a8188849a092e305694a75e3",
|
||||||
|
"blk.20.attn_norm.weight": "4953091b4477e354357a8e743ba0a1900633e52f1599ee082a0c9b0b2b5cd978",
|
||||||
|
"blk.20.attn_output.weight": "62d54f7749cd6856097b2632066a322b0296df915fe66f382c5b5981be0d4f23",
|
||||||
|
"blk.20.attn_qkv.weight": "406de9e35b0729ebe902d7a47905cc7fb29a921431ed35dbef0c03e5690a1329",
|
||||||
|
"blk.20.ffn_down.weight": "62fb678b0d1261e19a4903a2b347d67afcc8acff01feb33a687a35a2d1e6f9a5",
|
||||||
|
"blk.20.ffn_norm.weight": "cd9d36b7e71e55c8925b97bb09c28219f182626bcff094878ae39c3db887a14b",
|
||||||
|
"blk.20.ffn_up.weight": "b9276771d79d3e932e73ccc520c3f8476342b9ef312ed2ee1e0da822e6e3ad18",
|
||||||
|
"blk.21.attn_norm.weight": "66d8c8a35e13ce9c2a0e75b670150e2c31484a55c2316df46075312196178ed3",
|
||||||
|
"blk.21.attn_output.weight": "12ab46c9382648f9b3350fdd92a6be6352743d62d6b520d7e2024e0c838588f5",
|
||||||
|
"blk.21.attn_qkv.weight": "a7909676ee1675ca23cd29a5fdd226df8dd9d68f94c6c9bbb51dd9fd38504008",
|
||||||
|
"blk.21.ffn_down.weight": "6fb317279c6542e82f97d5a12a60fac1bd0fa0405154f9fbe265e2fe39bd49cc",
|
||||||
|
"blk.21.ffn_norm.weight": "c0f703eb3ff161b5ba4490d87d8684b8a6c47a8f433e12f418333b9db439010a",
|
||||||
|
"blk.21.ffn_up.weight": "6dbdb80ef0c35e364bbce12d40d5e74c7963c7b55d58d9579567a07ffce7b863",
|
||||||
|
"blk.22.attn_norm.weight": "f94237433bf03d675cb2f655b81ca91a1ce2447bc6b00b13d6b0ccfe2d411eff",
|
||||||
|
"blk.22.attn_output.weight": "e821f95995ce497c01e63ca64f737713b1b65f11df1903e51d444aa516f33f71",
|
||||||
|
"blk.22.attn_qkv.weight": "1b0f717c73afb5eb4c82a1708c4e85c969e8a2a8770d9ddb78b1870a2d8a781e",
|
||||||
|
"blk.22.ffn_down.weight": "0f33f7a3cdc685484be99aa0c03642b0b20850a27d1fddbe054b13a9382f3ccb",
|
||||||
|
"blk.22.ffn_norm.weight": "9df285cf211ddd7df2b36a50489af574755c7d4d98b29a05cd04566ae613c8dc",
|
||||||
|
"blk.22.ffn_up.weight": "63ac300e1efb34041dd0136cf43ea622fac6f0caccce1cd9262f5e08d2cf179c",
|
||||||
|
"blk.23.attn_norm.weight": "5f72d9e88689b4027b28f5f8f26cd3abb03635ceea7ec98a4c91a9fc691f6707",
|
||||||
|
"blk.23.attn_output.weight": "6ecf04ff61125c5fc768f8656497152149373daf321ee9c957e8f7245a1184d1",
|
||||||
|
"blk.23.attn_qkv.weight": "a9d9978806724c2959f2cf386c233831f08e1e933dbf2b32665e788d9d512ea4",
|
||||||
|
"blk.23.ffn_down.weight": "72c7d17886a3da17fa0daa456aa5e877b2ef5b8b403182b870d9ca5ca9c70347",
|
||||||
|
"blk.23.ffn_norm.weight": "971e4b712e3025a13419b5b57d674b5e4ab7f18f74b57b9afc4671623da90c4b",
|
||||||
|
"blk.23.ffn_up.weight": "df2b5c7dbd5834545b815073af0c7355b065124e6d6f0fee78d8fa5b2076dc3e",
|
||||||
|
"blk.24.attn_norm.weight": "c41957c4a79ad3b16f6e11daec1c7f530b9f3f4b618e1e4367c3b67787ac4ab6",
|
||||||
|
"blk.24.attn_output.weight": "ef7d61f5fc88ac6f31bf60cb5f4d2d6b8df42d38825807112361a7224b0dee3b",
|
||||||
|
"blk.24.attn_qkv.weight": "3e6a58fe7d49c90bb6971efbad3371c32256881173ea5aee4b0c296cb206490f",
|
||||||
|
"blk.24.ffn_down.weight": "f43619144047de42fed81dfa495f1815d3cb771330e574043e2b67620819292c",
|
||||||
|
"blk.24.ffn_norm.weight": "5501d4a2a98c8ca6b42e77b53b221dbc08f530f6a067256d787534ec6fe028bd",
|
||||||
|
"blk.24.ffn_up.weight": "d64c8b0e509e2b1118f6000176f8956cacecdbb200c7e95ed93fb78b6e26c84a",
|
||||||
|
"blk.25.attn_norm.weight": "502fa3c302d371f61c5791f4615b73018ffb1daa09b6499b227116581244c5d4",
|
||||||
|
"blk.25.attn_output.weight": "ad8391d4e9c980856f2547aa945b2b6a407a6382158dc1ddd4f08d94ecc24be6",
|
||||||
|
"blk.25.attn_qkv.weight": "42e8983780d4a01a02c54ad23d4df21eea437f119a10af5a9c12a76a42d308c1",
|
||||||
|
"blk.25.ffn_down.weight": "302dd010d4e0ab4eeaee89090409ea0dddeeeed3236415eb8f97c942497eea91",
|
||||||
|
"blk.25.ffn_norm.weight": "fb34c1ee5bca96986c08834df0a0c047ba041c1123ac1f563e9d64312bf82d6a",
|
||||||
|
"blk.25.ffn_up.weight": "10739a8de156816d93c92b935386540bfa976bdbef204f0312960f6fc657582f",
|
||||||
|
"blk.26.attn_norm.weight": "7036c711609128c4e55968ff3681d3043338879a5737efd6c2ac9e1a2a61f1a0",
|
||||||
|
"blk.26.attn_output.weight": "db5db45dead5cb911fa01da59832f121b7c18b2d167bf53741c40819f24d346c",
|
||||||
|
"blk.26.attn_qkv.weight": "cae34c6b7f82ed14348d5ed30a79919c383737c1694a9cb9c0de609d3b0c1d0a",
|
||||||
|
"blk.26.ffn_down.weight": "491ec3a4da9b4f49f8ebc6be658ce397a9b801ae9fb35e82177e47808c65e5d0",
|
||||||
|
"blk.26.ffn_norm.weight": "fd7059d75d7f0e5288511ddeeb0f772eb3cae3ccfe4226b877015834edc3c386",
|
||||||
|
"blk.26.ffn_up.weight": "ea1ee1274c56458ce056d2205e5bb6e5422ce4cb0ad58006b8141749b97a0c39",
|
||||||
|
"blk.27.attn_norm.weight": "cc362c9a937609265052cd38544af17a1a7448cea086d4c801139e1fc865832d",
|
||||||
|
"blk.27.attn_output.weight": "ba757a81dabde9cb1b069d1bb616fe79649a1724f756567ec61caed1304fe6cf",
|
||||||
|
"blk.27.attn_qkv.weight": "1ab8d7d02d87756c12c2275636823aa5ede3d683178225c4cac4bd892c319bd4",
|
||||||
|
"blk.27.ffn_down.weight": "deb1c711c8a66acf4dcd2d088e1548f8e08f296f755e4067d6557fa55afde88c",
|
||||||
|
"blk.27.ffn_norm.weight": "fc6242d8cb8a4a37a8ddb7e41e7e60a63d4a89edf36acb35df052f10b9c91ece",
|
||||||
|
"blk.27.ffn_up.weight": "8df39b09c4801f343aca78f2918a1f6db78c8c55e591eda4c69eadb74c26e180",
|
||||||
|
"blk.28.attn_norm.weight": "75b539308f77e3cefdc6d98484d8b5cbf0538f0c2869a77b7373a145a18bc850",
|
||||||
|
"blk.28.attn_output.weight": "ae128940eb60a6d2e121762ef4b3e9dcf9eb3e105b249507fa7f12de0e19822c",
|
||||||
|
"blk.28.attn_qkv.weight": "bdda781c288e9326c240e33905f8e621b6a2ad902e620739d34f93fcd6f933de",
|
||||||
|
"blk.28.ffn_down.weight": "f1d6e6d1c286b1138bfd7e53fe477f399ae93bc2c04e35416f84218ed7247965",
|
||||||
|
"blk.28.ffn_norm.weight": "3f837ce82c8b9bde0d61d08b6f5fe5574886ea5328dbdc53f2929f18da8b4087",
|
||||||
|
"blk.28.ffn_up.weight": "2af027002e31d1b6cfedbdb30a2b9d7213f3aa691167c353913adfd48fda31e4",
|
||||||
|
"blk.29.attn_norm.weight": "61e8003b5329462ffe0fe172f2b160260de006aed858332d49d75504b6b6aa7a",
|
||||||
|
"blk.29.attn_output.weight": "ca44542a72a37476dc73dbdcc01f5b7497cb3ebc4ea230a55c9634ccd8e56ad4",
|
||||||
|
"blk.29.attn_qkv.weight": "abb3d9d6abe57872ae3daa51935d43264093ded5ce63b49d1e280ee5758be0e4",
|
||||||
|
"blk.29.ffn_down.weight": "6764b895fce881df097489c263446f0106de36217997660c15984b3ee22a5a06",
|
||||||
|
"blk.29.ffn_norm.weight": "89e03e9a33fc0e6e31ba9f0c2bd7c5734a118c5602bb90148793e08a80e8d0ae",
|
||||||
|
"blk.29.ffn_up.weight": "fa7ad57a84954f4121653152efed1a871d8adb20a1ea9086e3e849ce359d7d2e",
|
||||||
|
"blk.30.attn_norm.weight": "91a697aca1e42af54f806a20211031c3369e8d0bd58df1b0147fe24954e1f5a4",
|
||||||
|
"blk.30.attn_output.weight": "36063fcf766c89ac75be56f688cc63cefe5f2c733fbf4378ea9956ad386fa148",
|
||||||
|
"blk.30.attn_qkv.weight": "2cacd1161f1121a2c0b979930134f4666f73fb8d7237b3b0659ae091b15955a6",
|
||||||
|
"blk.30.ffn_down.weight": "9f3fcb6217100595850c05dc98f9ab2a263afdb6ab28df2fcb08aeff512057d7",
|
||||||
|
"blk.30.ffn_norm.weight": "6c600bc1fc7de39d4f8917b81fc7d1d5ed2a9b56492234c13a4bd6028c30d880",
|
||||||
|
"blk.30.ffn_up.weight": "73cabd1bb011956b2689ea3338bb76642ef3a57c197377d666d2ab5f56317668",
|
||||||
|
"blk.31.attn_norm.weight": "72d3e1cc771380645fa75a899858c95f39857a4f3f1ed60fe1578df383b8bc53",
|
||||||
|
"blk.31.attn_output.weight": "40089cdd29994dc19a1d89fa15902a89cfeca3540f12dc9bf4d00ef82506e456",
|
||||||
|
"blk.31.attn_qkv.weight": "1d0bb40e9258071ae14290a53c619a8e331dda07354d2a02ef45766c029ae5e4",
|
||||||
|
"blk.31.ffn_down.weight": "8defa0e06335b793fa8be03883f0a322d6c5b33f52c69c943c35c60d16e42c0a",
|
||||||
|
"blk.31.ffn_norm.weight": "33c55d9d0c496ccfb130361fe131649346e098abaaac39c0519507e5d846721d",
|
||||||
|
"blk.31.ffn_up.weight": "599f6503f61c692c1f82001973d35119f9688db5e6be9d9c298411491c93f09b",
|
||||||
|
"output.weight": "14b8dc662bfa3308ebb2e102c562d8e52c15670e538f20f3216a9c310ca9dd41",
|
||||||
|
"output_norm.weight": "7f2294ba94ce65681df6c7ddd8698799199b9d77dc83c10bdad5c3999f0fdb82",
|
||||||
|
"rope_factors_long.weight": "e34d378664e354652c38f47d10dafb0498ccc2fb042d39ff7fef768146fff22b",
|
||||||
|
"rope_factors_short.weight": "9379146a4988f373d362fe47b06c75e7fe7c54aa4dc9558758df79b7a87471fd",
|
||||||
|
"token_embd.weight": "19a03c1fb5ac0baee93b0a7d8b0f26e9a9b011e229b694afc50ebfc13d84f8bf"
|
||||||
|
}
|
||||||
124
convert/testdata/all-MiniLM-L6-v2.json
vendored
Normal file
124
convert/testdata/all-MiniLM-L6-v2.json
vendored
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
{
|
||||||
|
"general.architecture": "bert",
|
||||||
|
"general.file_type": "1",
|
||||||
|
"general.quantization_version": "2",
|
||||||
|
"bert.attention.causal": "false",
|
||||||
|
"bert.attention.head_count": "12",
|
||||||
|
"bert.attention.layer_norm_epsilon": "1e-12",
|
||||||
|
"bert.block_count": "6",
|
||||||
|
"bert.context_length": "512",
|
||||||
|
"bert.embedding_length": "384",
|
||||||
|
"bert.feed_forward_length": "1536",
|
||||||
|
"bert.pooling_type": "1",
|
||||||
|
"tokenizer.ggml.model": "bert",
|
||||||
|
"tokenizer.ggml.padding_token_id": "0",
|
||||||
|
"tokenizer.ggml.unknown_token_id": "100",
|
||||||
|
"tokenizer.ggml.cls_token_id": "101",
|
||||||
|
"tokenizer.ggml.seperator_token_id": "102",
|
||||||
|
"tokenizer.ggml.mask_token_id": "103",
|
||||||
|
"tokenizer.ggml.token_type_count": "2",
|
||||||
|
"tokenizer.ggml.scores": "6db964fe67338aca57790481a390121ff3dd643eebe49f7dd308029ad99abb6f",
|
||||||
|
"tokenizer.ggml.token_type": "98d247c5404b6b18f05f133b92dd56edf6efefefac326794b00d7b351f6c5aa1",
|
||||||
|
"tokenizer.ggml.tokens": "9efe405e229a45ff9916f54c475d151d2200cd2ab0006f347abfb069cf096c86",
|
||||||
|
"token_embd.weight": "8c1ee80a9ea4f65aa385ba30112010068af3d209bebc6e149d3d4589c2cd0a5a",
|
||||||
|
"position_embd.weight": "6c516f0b1c4e2388ab90394dd80ad69e4e4509b890982fc3408108ae66210eb6",
|
||||||
|
"token_types.weight": "f879f8e422ed211948f28b560d3c5e17aae7993f063b51196a28cf5c0fb3da21",
|
||||||
|
"token_embd_norm.weight": "75076e095d717aab96f8b6beeee503c27940d9a76f2b891a0e3de72f8a6043e4",
|
||||||
|
"token_embd_norm.bias": "298735285ffe944e1bf03e5d35c7280326b85cf121bde9874f1af5dc51ab939d",
|
||||||
|
"blk.0.attn_q.weight": "ab0923ce4c1549175112dcdfcc860fe30137f991e03ea6857fb5993670adaf6c",
|
||||||
|
"blk.0.attn_q.bias": "a3ec29551dabf976e1d34256b8ab5ab7b758f3ed9742c3cafdbd984d5441df62",
|
||||||
|
"blk.0.attn_k.weight": "4c1038a6d035c3e9ffed7fa672b614627814752503755fbad0cfb76a41ad71ba",
|
||||||
|
"blk.0.attn_k.bias": "e0363930eb588d91816aa3d230bb03b6e2551c165117b80b8d60397413819ef9",
|
||||||
|
"blk.0.attn_v.weight": "425e2e53e3f00ce98d29c3e6a161eb55d3e6ae0d96fdb9f6242d1c4fd6eef4b3",
|
||||||
|
"blk.0.attn_v.bias": "6579173a1e65ee124fbd0bd53cbdca4225515b4f2c5f18fb1bfd000f5978f9bb",
|
||||||
|
"blk.0.attn_output.weight": "a6d70a08cd7164de5d12af65d86d657c3db35aaecde778b2b3fda9193c4c9802",
|
||||||
|
"blk.0.attn_output.bias": "2b8d12c4f9a9c5bfaa29c597839568f6e0525cb41eeaf64ddeb6bd84dfeb9701",
|
||||||
|
"blk.0.attn_output_norm.weight": "bbe6e502a473228b525aeed26cc31b7db123ad63bdc5a6eebac6ea70b8b51d62",
|
||||||
|
"blk.0.attn_output_norm.bias": "36eaacaf0007c5c62daea97aab0115390c0682914f78482e37eb76885f4b7a50",
|
||||||
|
"blk.0.ffn_up.weight": "24654561c76ce387d125759ba843f06b904ef721fcceaeff6ccc62180a48e874",
|
||||||
|
"blk.0.ffn_up.bias": "fd3f0126aa1d95768fa60eb6f4ab8a2763cfcb7e5405f35b92353031d86f4d34",
|
||||||
|
"blk.0.ffn_down.weight": "97a829763a6a5bf3329ceb4d39c424ba4787d61653a5b0bbd1f84782e4d4e0ca",
|
||||||
|
"blk.0.ffn_down.bias": "7aa980c30ae8b4ee7f69df28808dbf5c431f56ccc4a80340f644a0419f16c054",
|
||||||
|
"blk.0.layer_output_norm.weight": "ef30dad4c2a083ae1ff5039a2a6cda60ecc89bf1e486a6f8c0d15f50589603f8",
|
||||||
|
"blk.0.layer_output_norm.bias": "8b1b77e67568b1bce43fc476de1b177c53ff688d66beb66995e8eb3dc290da8a",
|
||||||
|
"blk.1.attn_q.weight": "284331622a1f6f9b87ccee4f652bd66a394ca493c4d93be4d1844e4f6159ad10",
|
||||||
|
"blk.1.attn_q.bias": "e24ebd4860330e08f6bfdd077a82db0bee33f4c8846cf1db26327a34754c7069",
|
||||||
|
"blk.1.attn_k.weight": "729dd0d555544b5bd0f7580b3c8b384256b974605f0e7487b95f295aa032997d",
|
||||||
|
"blk.1.attn_k.bias": "2aa51a828a858f35473f54477583fea54ce2ccc34ea60fbd1d228fbe9bca827f",
|
||||||
|
"blk.1.attn_v.weight": "6be304671cc311d5ca5c103f2b51467ee800c589bc5b8101e09ff5aed1f68c21",
|
||||||
|
"blk.1.attn_v.bias": "43bcbab78a8819e07f723bc9e5b737b71e87a7594f15234e882b63e327a64199",
|
||||||
|
"blk.1.attn_output.weight": "15ec8a1a12b26c9976445308a09f748ab0e4bef0f583d13ab08c3129f8738d73",
|
||||||
|
"blk.1.attn_output.bias": "dac2146f4baa6ed16f6c0dc7443831fb7ec79bedcceafd80d1a4b628a1bb072d",
|
||||||
|
"blk.1.attn_output_norm.weight": "d2151eb33bffac536787a4c9a5d2b31c7a80b17c4611877842a3cce2cd6e98d8",
|
||||||
|
"blk.1.attn_output_norm.bias": "31e1b779716dafb855d2cf5631ee168a0ccf372eb9c6ea6091f66fa97a9b9d2d",
|
||||||
|
"blk.1.ffn_up.weight": "a57547fc3fc3b77406f5cdcb0c87af9bc184701f175c39c1f35297826fce3cc7",
|
||||||
|
"blk.1.ffn_up.bias": "123be6d541d086202913c75d878c54d59a749f3af7b58f7ef9eb9e7c62a24c9a",
|
||||||
|
"blk.1.ffn_down.weight": "cfdb79788377e5cbded8790cd41b9e66c397ecab75474071fcd7cf32d30f9613",
|
||||||
|
"blk.1.ffn_down.bias": "bcb58315519a573097960891c9ae41cf4c685ab78c3e0e77471471758a7eae88",
|
||||||
|
"blk.1.layer_output_norm.weight": "819b554271452bfb1d84c2603b90377b2e41a0ac1e3aa8b417ccf9dce63375bd",
|
||||||
|
"blk.1.layer_output_norm.bias": "47a3433ac27f5ce8947fb38dd491f3706df4ef6adb0ddf74612bf0f54b19e164",
|
||||||
|
"blk.2.attn_q.weight": "1557a9ea852b1880551f7290e00aded4f35e6c4180fdcbed1b0039bf805f639e",
|
||||||
|
"blk.2.attn_q.bias": "c3bfe5f3066f655fd36b055530997b59ff33ef013563aaeb3cb8ff07dabd59a9",
|
||||||
|
"blk.2.attn_k.weight": "cfd08eb69c61ae2f9f14f9b7ff5c5394ca264b1a9f3d48156677f90dd1766289",
|
||||||
|
"blk.2.attn_k.bias": "9b839bc0e79974a0b3f5d1895972bc6f5c9a1bc16052e1af786e6a530758152d",
|
||||||
|
"blk.2.attn_v.weight": "02b26b1208480eaeeb00e7b4cf8b690006ca14759357fc44ed4a2a8924ead993",
|
||||||
|
"blk.2.attn_v.bias": "e7e6f0089fded1659a867ab736c220d9653ea7da6b1b94baf5c8d30a748b63ab",
|
||||||
|
"blk.2.attn_output.weight": "a1db121c7d33806b349cadd050300a57db49fdc91224fd07c9ac43bf4299dc79",
|
||||||
|
"blk.2.attn_output.bias": "7675128b6a92555cd955c820311e91e9417d31f48848f45d047b4100c62148b3",
|
||||||
|
"blk.2.attn_output_norm.weight": "5b4595e0fbcba67a700c4331adf746d2fba3546364a4db5607ae241947bb1a21",
|
||||||
|
"blk.2.attn_output_norm.bias": "7b8e16826ea30e5a2ba0b02e0095a901775981a296e98819625320e983060d08",
|
||||||
|
"blk.2.ffn_up.weight": "a0d815d946ac07a65095c4ae4df77b818845e6d97795c7d82f55e689d944db59",
|
||||||
|
"blk.2.ffn_up.bias": "ce37c0a4174d6bf773ded7bd016ede627ad3bdb8bc99b9992a18dc8e8898f252",
|
||||||
|
"blk.2.ffn_down.weight": "f6231d2a25426fbd45b9f1160aa484220eb227ceef0348c4a6a6de890606e5ef",
|
||||||
|
"blk.2.ffn_down.bias": "429e00556e8dc63a785238b309b9d83738500c1ef6d736fe6526ad88ea496d27",
|
||||||
|
"blk.2.layer_output_norm.weight": "651457a573adf3f7dd9ee5dfe1c8e89389e94443993aab77ec6a0b05aa621e35",
|
||||||
|
"blk.2.layer_output_norm.bias": "41fbbeda7fd89b0cef5f945ae44011c316982390401d6f75ba8c6d365e185247",
|
||||||
|
"blk.3.attn_q.weight": "95a43f32949d2cb8d22815bb27a44abfc6665ba96221af817dfe058cb6ca72c6",
|
||||||
|
"blk.3.attn_q.bias": "f4e34385e75d8108b6b3bd336106e2133a8c9be0cc343dfe5dc48c32a823c7cb",
|
||||||
|
"blk.3.attn_k.weight": "6b892da6a17d4d3265265a15f695864a31813ee8c8e710ae9bc9e1adbc6c9a18",
|
||||||
|
"blk.3.attn_k.bias": "40b8067b641a56014cee42548240aa8930820958b1933004892b5f04fbaef39e",
|
||||||
|
"blk.3.attn_v.weight": "9fcd5922319dd2a461082a5ce040c1dfe65d87d70ca6547dd0b46eeecc3eeb2b",
|
||||||
|
"blk.3.attn_v.bias": "b528c56212e66931fdbe267ac327a9c2f87cd03baff3ea719e30afe681da15f1",
|
||||||
|
"blk.3.attn_output.weight": "e3b178c1b03981e75510e0d277af23ea59cc404b5394e61bd32291825719b502",
|
||||||
|
"blk.3.attn_output.bias": "712c84d39a6a5a9c06a09da8fd9939ba0d5525524a4bba61ea4de09b48f45cae",
|
||||||
|
"blk.3.attn_output_norm.weight": "d1ffac88e675592ff72f8a617be32b4a381d443b2f8f2645dbe44a1e5745aac0",
|
||||||
|
"blk.3.attn_output_norm.bias": "ea31a1c73146234c50e0e43f485c458413714867b8e2703af66482f7db2d6c40",
|
||||||
|
"blk.3.ffn_up.weight": "4ef4f3b9a1ea6ab2ef2eb6e8b008e06a44790d099d97482a05a51e39a29afac0",
|
||||||
|
"blk.3.ffn_up.bias": "06a4296dda16f452675c51f108079fe7722552d6521c737d97734943818b9a2b",
|
||||||
|
"blk.3.ffn_down.weight": "f114b2bebe392c7d80433bb880c6730293aa4561b0b0370dcdaf7472daebd847",
|
||||||
|
"blk.3.ffn_down.bias": "2c8e67831d28a3bf613fc7912ae3259b63d72abcaf4d30efd8800758400158de",
|
||||||
|
"blk.3.layer_output_norm.weight": "a1dfeb7b5a51dd56447312ca41e2ad2f361a3ea12ddc355127f5f4219fb0a482",
|
||||||
|
"blk.3.layer_output_norm.bias": "1ed630021b25c6c6fc93fd32988b9907df966d4982a93081f639aac3044618ab",
|
||||||
|
"blk.4.attn_q.weight": "b5fae4c1f9a5f33a2a2e816ac0c01c25f422e4efdd59ef1ed93da2610e5370fc",
|
||||||
|
"blk.4.attn_q.bias": "c2e376524ea98ac3b10d9eee19ecb1b1e261fa5149efe0232844c923dfb428fb",
|
||||||
|
"blk.4.attn_k.weight": "a4632f5ebf9321d9d08f9112a4e5dda2efe5671df4a4e67fee24845f5b14af16",
|
||||||
|
"blk.4.attn_k.bias": "a9a02ffb8b8b4f6dfe487a7e0341f1d5318c9d2b793a688f34cb1b22fc66ef60",
|
||||||
|
"blk.4.attn_v.weight": "10ad8deb81d9fa093b1e5c0f24ea82aa7df43e6aca49e260fcbea56eab8cc86a",
|
||||||
|
"blk.4.attn_v.bias": "7326813e181e021130bd33ac136293fcffccce2d1d8cb59041e5b13a8cceacf6",
|
||||||
|
"blk.4.attn_output.weight": "c92573088c7437c2b3cda51490e152c27fb19e5468df591eabba5a49d5398d44",
|
||||||
|
"blk.4.attn_output.bias": "14e10b419e5859af1eb685af5c330aee67048cd704dcead9217840c6f5393222",
|
||||||
|
"blk.4.attn_output_norm.weight": "02b6831c0e0fb0edbc579a92812a1dd972cb15d14fcd382d4427c5a7b300ac44",
|
||||||
|
"blk.4.attn_output_norm.bias": "7eed5cd503bb6bb6ceb1bc8b07cc077903a4f14fb8b9d6cdf39644815ecf1374",
|
||||||
|
"blk.4.ffn_up.weight": "8d0c91d62e74d6431321116a37cf3339e630bd50ba164d3304fc4fe8dd831223",
|
||||||
|
"blk.4.ffn_up.bias": "d325f07f73c005a273c484c7be8e7abb4d6e8a5c4fd093f5869133b97629d017",
|
||||||
|
"blk.4.ffn_down.weight": "7ba7bd81143f40537b84f938e403e19f30e4928625eb371de052b9025beb4d21",
|
||||||
|
"blk.4.ffn_down.bias": "2853d9c2a75288214a4bf4907dc19d04d01926f4913d302b1aa7bdbfcce0f7a1",
|
||||||
|
"blk.4.layer_output_norm.weight": "a4ed1885fa77b90fed5300c355ef0aa0c876a8c747151d9d790939d464d57d4f",
|
||||||
|
"blk.4.layer_output_norm.bias": "62142a81e813a9e636333b2b805d6bc3b17c5e7cd4b15adce1ada6bc9a32563c",
|
||||||
|
"blk.5.attn_q.weight": "afc1dff080a72c3daad01384b1448d476aaf789871017c8ff8e144788887995d",
|
||||||
|
"blk.5.attn_q.bias": "748a820371c1d4f872c84545b36358d239c35bf6c99e2812c237d88c3292763b",
|
||||||
|
"blk.5.attn_k.weight": "59e30c1ed8acd2cbb01de5f62e7804015b9ecf98ba157d98cab016344639eda5",
|
||||||
|
"blk.5.attn_k.bias": "f839520078f9e589496e982e86d0126c7aa14196047339abffcf49a696229f77",
|
||||||
|
"blk.5.attn_v.weight": "3e21fb874e21b90308e1f46af034a3c32d3eba1628d62ae5f2246d6af5818923",
|
||||||
|
"blk.5.attn_v.bias": "5cd4852bf95c1444d10d756750f6bf49f842c0b39e9953c7f408bb67c325ac8c",
|
||||||
|
"blk.5.attn_output.weight": "636ce6a7752895f204b9d01ba0aedd9a294f908b42f372c22a16d9dd590d7471",
|
||||||
|
"blk.5.attn_output.bias": "82d924d4b0d2b94f2bbff91619216d6967a3541ce9b1531a6a60457a67b5d219",
|
||||||
|
"blk.5.attn_output_norm.weight": "5e7bd0a8d3396080f3360d7c4700bf094a06216431bd014c4479eef72ecf4271",
|
||||||
|
"blk.5.attn_output_norm.bias": "66c6de5edda5466d029c6753780be81ccd4218bf8bc00680000e0f06856ab712",
|
||||||
|
"blk.5.ffn_up.weight": "5bbf6e7ea380e216e33f8bee06d25f2265359d3876a300e92bc6e41d48e33430",
|
||||||
|
"blk.5.ffn_up.bias": "9d795388bb36fb33ad3a37fea3ccb4937838e02800a608fb47d363cd06b47370",
|
||||||
|
"blk.5.ffn_down.weight": "2fd628974e7f075479dd227b46fbd48ae8d3ca34d735b36f391ac06410730368",
|
||||||
|
"blk.5.ffn_down.bias": "cd213ba9eaa75fa541648097fbe9c96e58077e6c3ad6ad2fb1f21f8350f44291",
|
||||||
|
"blk.5.layer_output_norm.weight": "159a9df41d15b7022d136f86a2a2631c4635f9816e957472217077b522bcf52a",
|
||||||
|
"blk.5.layer_output_norm.bias": "24c1f27ffd1eb4e5be7e3a2909943e6f0980635d761fa1efdd0c19645da23766"
|
||||||
|
}
|
||||||
6
convert/testdata/gemma-2-9b-it.json
vendored
Normal file
6
convert/testdata/gemma-2-9b-it.json
vendored
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"general.architecture": "gemma2",
|
||||||
|
"gemma2.attention.sliding_window": "4096",
|
||||||
|
"gemma2.attn_logit_softcapping": "50",
|
||||||
|
"gemma2.final_logit_softcapping": "30"
|
||||||
|
}
|
||||||
188
convert/testdata/gemma-2b-it.json
vendored
Normal file
188
convert/testdata/gemma-2b-it.json
vendored
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
{
|
||||||
|
"general.architecture": "gemma",
|
||||||
|
"general.file_type": "1",
|
||||||
|
"general.quantization_version": "2",
|
||||||
|
"gemma.block_count": "18",
|
||||||
|
"gemma.context_length": "8192",
|
||||||
|
"gemma.embedding_length": "2048",
|
||||||
|
"gemma.feed_forward_length": "16384",
|
||||||
|
"gemma.attention.head_count": "8",
|
||||||
|
"gemma.attention.head_count_kv": "1",
|
||||||
|
"gemma.attention.key_length": "256",
|
||||||
|
"gemma.attention.value_length": "256",
|
||||||
|
"gemma.attention.layer_norm_rms_epsilon": "1e-06",
|
||||||
|
"tokenizer.ggml.model": "llama",
|
||||||
|
"tokenizer.ggml.add_bos_token": "true",
|
||||||
|
"tokenizer.ggml.add_eos_token": "false",
|
||||||
|
"tokenizer.ggml.bos_token_id": "2",
|
||||||
|
"tokenizer.ggml.eos_token_id": "1",
|
||||||
|
"tokenizer.ggml.padding_token_id": "0",
|
||||||
|
"tokenizer.ggml.unknown_token_id": "3",
|
||||||
|
"tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8",
|
||||||
|
"tokenizer.ggml.token_type": "485e40bf3d715a4764818fc097d6a2a41db872d82ee714bc500872a3437ff48d",
|
||||||
|
"tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda",
|
||||||
|
"token_embd.weight": "17b87ab2c01c80657855a5413d0457b4a041afaeda0cc785080e44e2f04acf07",
|
||||||
|
"blk.0.attn_k.weight": "28ac0da05754ad2714ae95da28a5ad191192140b30b8fd22d108d4700c9d989f",
|
||||||
|
"blk.0.attn_norm.weight": "3f9d5675d1ab0eb8a816719dac9fab81f2e95c52be02c34263339acbc087febb",
|
||||||
|
"blk.0.attn_output.weight": "703295c2c63990ff896778685c678f145298886f680f3ed5dc2a7ad54c293265",
|
||||||
|
"blk.0.attn_q.weight": "69c2d0e4870e9d722a190d356203c9605575a16863466c3d1747966ef1cf5791",
|
||||||
|
"blk.0.attn_v.weight": "95219c9c07b5ffe9a9a01e456d845eef2b11f4fc12c93dbbba479db395444c13",
|
||||||
|
"blk.0.ffn_down.weight": "a2feb5eb3d572c57c5bafbf0ab506862df1160fe40965dcfe4b9fd855c08bed7",
|
||||||
|
"blk.0.ffn_gate.weight": "fcca072c445c31f4dc4d5dfaa785b1bdf7271342442099b74fd17268b5829fbf",
|
||||||
|
"blk.0.ffn_norm.weight": "7621f95dbd245cade6fffd6b08797d69d8e3954e960f0b5551b90d967ab95448",
|
||||||
|
"blk.0.ffn_up.weight": "14a9bcdd451403c67136391e1b6e53b3b1830f00199bd911dbcc56d8749c14f4",
|
||||||
|
"blk.1.attn_k.weight": "c70f73c5df20579cb44d971164b48b5f0d8d5abdb38b381e7a8b880ba12aa406",
|
||||||
|
"blk.1.attn_norm.weight": "88b6b91f93a1ef83425a7c7dc2a2fbd3b22704a04c64a80061df376ac8c33626",
|
||||||
|
"blk.1.attn_output.weight": "f031a537490c452be3b3bb51e6b7949a636405756e160976a1c070a792ea00ee",
|
||||||
|
"blk.1.attn_q.weight": "bdb23214b1cf9cfd30f863a0a5868e52c6809d93b7e8f44df096a94204d9896a",
|
||||||
|
"blk.1.attn_v.weight": "e9bbc0b05f2c872fb1403f8f938cd1612b502229ee401f12593b1164c61acc00",
|
||||||
|
"blk.1.ffn_down.weight": "5ff53811038b661a7b8f2bfdf213bebfb185ec1a6060b662f063714f33584d79",
|
||||||
|
"blk.1.ffn_gate.weight": "205085c8c951a5c7543b1495183cd96028fb49f67464b3e9862a2693a6077a33",
|
||||||
|
"blk.1.ffn_norm.weight": "798f354fc85afce9625f5d10093a585a966831698a0560e6c9b97ce659eb4b22",
|
||||||
|
"blk.1.ffn_up.weight": "db92dc5684cb6e90940e13f4d1da555ed20ba4f8cab1e990ddfd7553e2e91315",
|
||||||
|
"blk.2.attn_k.weight": "ef5ce360c4eed6d00d03ca4761e0f8e4b0af4509978468314be14f3d46621044",
|
||||||
|
"blk.2.attn_norm.weight": "6dadbc05dbd0d3fabb4216affa60a3de1378a82d2859dc90b338cbe70f50d455",
|
||||||
|
"blk.2.attn_output.weight": "6bbf87a966f691bbfd7c8d25629aa4e6710107bd431a667434861febb391edc5",
|
||||||
|
"blk.2.attn_q.weight": "4e575c09ae2de417ce9057ce8b073680e860a24aae13a472b68f101b760752e5",
|
||||||
|
"blk.2.attn_v.weight": "cd33f7f01141e9439afdaf2ea1aaced9feaa335e32a58daa136ebd555d4d96f4",
|
||||||
|
"blk.2.ffn_down.weight": "b970ff1b0b6494165defe2fbfa1d31425766ed71e64de9ec4e66ac3955c8bc5f",
|
||||||
|
"blk.2.ffn_gate.weight": "dbb3e1360402e0e369b101995bb686b73f95d4a7673f061be85d64d15dfb0061",
|
||||||
|
"blk.2.ffn_norm.weight": "bfb7980105d8ac9647710454f57a5cdac50598a0f6f4884e16f1d94b00844687",
|
||||||
|
"blk.2.ffn_up.weight": "50ef89339b275a438b664686f6227dd9b6e43853ed6856ec9e33ef4bbd90bda1",
|
||||||
|
"blk.3.attn_k.weight": "be942ea98151434eebcd2c1da4b00e0146152fe524a530689b1fd491cb833d21",
|
||||||
|
"blk.3.attn_norm.weight": "0df2f218daf609c289fb7c60c5f375fa99c0d4e04381ad5a494a19144edd8e20",
|
||||||
|
"blk.3.attn_output.weight": "c2184aaf86aa2cb8f47be49f60b165834e97205f39c6ee1dfd19fd4411a156ce",
|
||||||
|
"blk.3.attn_q.weight": "4f86e2a0a4221c1c84ff9c409ac89893cb95d7208cf65bf1e98e24e01125f991",
|
||||||
|
"blk.3.attn_v.weight": "abfdb8a60c349dadde641d1afc9542025e24fbf41a3238bfa9675e0b1f1e4b68",
|
||||||
|
"blk.3.ffn_down.weight": "58821a8d87008d47d122427911c6fad5272aca70c448bbae223256a74bacd07e",
|
||||||
|
"blk.3.ffn_gate.weight": "776e051f1a0ddd5c4934e69186683a75ca9a3c8c0f61911bba321fed1dd287d2",
|
||||||
|
"blk.3.ffn_norm.weight": "7f380f29335e28be90bfcfae6f6d69fdf5751211b36d2dd62aa5541ed113e4f2",
|
||||||
|
"blk.3.ffn_up.weight": "fc5ae8d488894cbd4951059675468d227da27871d26e925c9941863841c097ee",
|
||||||
|
"blk.4.attn_k.weight": "14833b078cc4c5137bdd5fdc0538047974ca147a99b0282e1b144440c78bc1db",
|
||||||
|
"blk.4.attn_norm.weight": "0a69957d4a15599fb80ad4753558020804925221457d9a5052926754d3768065",
|
||||||
|
"blk.4.attn_output.weight": "887a49b6130fb6297cf10767207c3dd97191b2cf63723449af9c27bca8dbeda0",
|
||||||
|
"blk.4.attn_q.weight": "51fd577b76764824dd6f0d4891c137ebe4736f591b5ca2793c5fff2be49abbde",
|
||||||
|
"blk.4.attn_v.weight": "1a623c43cf9c509d1b7ea0d1a5c04d0af4809665f9f9e93b7d6dba8c5df178fa",
|
||||||
|
"blk.4.ffn_down.weight": "5d61e8856d8941d2b1fd138116d015f63840d0fa1e31e20e20a5ceca1536ceec",
|
||||||
|
"blk.4.ffn_gate.weight": "06640f7273764f8ca5df7e386547417916b6cd7d565a8343153113239a94b0a1",
|
||||||
|
"blk.4.ffn_norm.weight": "91a6c6c41b894228e361435ecbc5058dca34d4911a23da5b56de219299c964d3",
|
||||||
|
"blk.4.ffn_up.weight": "d016dac1055e36d6a10b6317e57f98a904709ea892ef3194342f4d2f6326561e",
|
||||||
|
"blk.5.attn_k.weight": "987146afe124131500808cc0da33c06d207433656d41df6e6d8c99118a83bac5",
|
||||||
|
"blk.5.attn_norm.weight": "6b354938966f2608a2fb8d0f5b363ed0d8b0967c2ec8d0abd5c625b413042ded",
|
||||||
|
"blk.5.attn_output.weight": "cdcbfe02c6ff79d5326882b017a02099f5af71beedf6b1b3eb4de01e3a844536",
|
||||||
|
"blk.5.attn_q.weight": "b910d0cff781d3efb42eab0a302f46f286b2de717079175680d5b42bf8c309c8",
|
||||||
|
"blk.5.attn_v.weight": "66d3a279f747412f9f4b0e8abad44540c122ab2e811a7ee74c1f33bc36caade9",
|
||||||
|
"blk.5.ffn_down.weight": "c9b0efd2212981f16d956d8571f054b68780ad01f4917033647e359b557a4653",
|
||||||
|
"blk.5.ffn_gate.weight": "fe96b94109ca141c01f6a04788e20783019ca6ec334aa1f3134810bdb499e557",
|
||||||
|
"blk.5.ffn_norm.weight": "aa7b016e832e7055a36c6e20de58ea1936f995f390401fff1c5fc65906064e49",
|
||||||
|
"blk.5.ffn_up.weight": "555ce27c4873d3375394f38ad3b45e3d8848f9d5642dc1602383d0f0a33c2a14",
|
||||||
|
"blk.6.attn_k.weight": "88280d461db324c4f36475ce396793063e61a27283ec64511b0480890fb5b3b4",
|
||||||
|
"blk.6.attn_norm.weight": "af8f460c411f660d33196286d208f1845fd5a2b45f7b56549a4df31e7515447a",
|
||||||
|
"blk.6.attn_output.weight": "dd9996fb0a256e8375ad3917705258a33fce006bcea0f536caae420a77974d8b",
|
||||||
|
"blk.6.attn_q.weight": "7a4841541191e037cfb9b07930c4d8cab451809658b182f0ada6ccde9615c003",
|
||||||
|
"blk.6.attn_v.weight": "ae81e6a592b64d701a9d40233e986039a56cba8d8d24f61aea93c6393cf3078a",
|
||||||
|
"blk.6.ffn_down.weight": "622dd1ce1706355cbc659a8ab2c4509678ffe0f3ad34258e5e25ed2a5d951bcd",
|
||||||
|
"blk.6.ffn_gate.weight": "8389a735c0bd5591010f8ced9805a2a12c749f6df0d3c18ad4d05c2a302e7168",
|
||||||
|
"blk.6.ffn_norm.weight": "621f5346400382474d61358397bd58fb1459b07c53e376e4bca15e08b3f9b3fb",
|
||||||
|
"blk.6.ffn_up.weight": "8d834e4c42f13c251dfee36cf89e12f1bd400680d00d5c2e6cac0459e9ce2f7f",
|
||||||
|
"blk.7.attn_k.weight": "8bd0412de65a3e64901ef8fe6a28c95e116bf39dc9aa22f0126b9d36688e5ea7",
|
||||||
|
"blk.7.attn_norm.weight": "056d8e56be4e87d6dc6f900762f0dc6fde07bfdc50dd85bfc510415e2bba3f3d",
|
||||||
|
"blk.7.attn_output.weight": "27972eda51da53d416ff95aed78149a2c5a287b47d2cd46f2f544ca692ecb3bb",
|
||||||
|
"blk.7.attn_q.weight": "41eca977b9371f7932800c11a9c45b931310196919e2a0651b847703b180fc7f",
|
||||||
|
"blk.7.attn_v.weight": "13c74fd7e07f08883a09fb070a1fe5bbdd2341b4cb8d1cac07c4b637049b5774",
|
||||||
|
"blk.7.ffn_down.weight": "9e75db42468800849a9a7da603d0072c5e86c8ed2b4d8b20a312a51fb86a7a10",
|
||||||
|
"blk.7.ffn_gate.weight": "db6bdc3117f910088aaf7db51f2da63ea5bd933de36af5599c215bfb26f7db2b",
|
||||||
|
"blk.7.ffn_norm.weight": "48bb82b49bfc8679a1e77f282ee182d952db7a3c11be7ef9a102ee2ddd8011e2",
|
||||||
|
"blk.7.ffn_up.weight": "feebea87175817a0f3585ec0af09dc873d94c203581ae97a712eb356d3b49efe",
|
||||||
|
"blk.8.attn_k.weight": "d5640ad71b6af68d88e17bf8e7fc26c907d2262605457a84247dd9afc2884d69",
|
||||||
|
"blk.8.attn_norm.weight": "75b850c481a69083ae09d0207ba7317b37c735a39fcf5fef5400e6c84fb1257f",
|
||||||
|
"blk.8.attn_output.weight": "cbd669dbdea2bdd90f9f0cc97566b3dffff3c56cecb4f47290ceef30da83b2d6",
|
||||||
|
"blk.8.attn_q.weight": "9edcb63087a431bac361822497e6ecdaa06d9ea4a1a754e36da7ba9f8db81c7c",
|
||||||
|
"blk.8.attn_v.weight": "3fb72c2c4f95a83626aa3e30062f9450b09ab37c7871e229f18bbc5cf744633c",
|
||||||
|
"blk.8.ffn_down.weight": "bd69d2c9172974fff154441b237b4787fb53b2d185325442d5048130ef5bc4ef",
|
||||||
|
"blk.8.ffn_gate.weight": "d04689c80553edd011d1cbaa5d570fffa7fa91e88b66cf1352d89ab60b72f908",
|
||||||
|
"blk.8.ffn_norm.weight": "e49984183b735b7f2c4e4730c289eed9394056d2e283a00fd83ea0915df31a73",
|
||||||
|
"blk.8.ffn_up.weight": "8fe62a1ce8e847e567add6c6f6bf2922bc467495b5eb4c116b3cb85b85b3b211",
|
||||||
|
"blk.9.attn_k.weight": "d90904959e5004cf0d6e729c6bff18cc33c094798b802473c1ec55ab8d276183",
|
||||||
|
"blk.9.attn_norm.weight": "79277f290cc07411115d8fa138045edf4a17b3416ab2145409cbe8ab829fd4ee",
|
||||||
|
"blk.9.attn_output.weight": "5a21bf2e1f09a81405025f96d4153ffb630158e17269cff8ffff935c38ceb1a7",
|
||||||
|
"blk.9.attn_q.weight": "51b1d0febc3b350945be4504f55afa4347517bde0f710e1a4b88e6b17e71e7c7",
|
||||||
|
"blk.9.attn_v.weight": "aab7e1db0a8b50a03036356791ffce736ab010d15674c96eaef8049d80076054",
|
||||||
|
"blk.9.ffn_down.weight": "cbf43ec84becb40c9359a181ab0e641fd7faae7d34b549501f7cfb7afdc3d764",
|
||||||
|
"blk.9.ffn_gate.weight": "dce0e8661c778327bed7f03b6790d26710764188aed9dc746e6e05863891fa57",
|
||||||
|
"blk.9.ffn_norm.weight": "6d41642104f995c77bf31122b13237caebda3e7fcccb1367ce91db36b015e923",
|
||||||
|
"blk.9.ffn_up.weight": "82fe4c67bf24e7b2d6f6e05f7b1234c2bf90c3932951091a9066211b8e15ecbb",
|
||||||
|
"blk.10.attn_k.weight": "f6a9ed8fd8d3229b5d03175c413ffc56a07f2ce7236271986361dd3d8993f9aa",
|
||||||
|
"blk.10.attn_norm.weight": "cebbef89f0326ca8e02df3867a571e4d61c20c2a12f295f98ae590d62bc86010",
|
||||||
|
"blk.10.attn_output.weight": "34f5efb86accb4f06347d83a32558ea8eab3039d128969161a741ebacbb656ff",
|
||||||
|
"blk.10.attn_q.weight": "1e0efe27df2d5d50f7157253ba2cfd436d6781c3dc78ca176d0c16a210b5b763",
|
||||||
|
"blk.10.attn_v.weight": "8f085bf50a2b0f83cd6cdda3c8ef5a9e204a36348ed95871aac725d1f68640cf",
|
||||||
|
"blk.10.ffn_down.weight": "bf3b3cb4cace435809ac7b4cc933f20853af12f1f272d3dcefe7f19c0f203b8b",
|
||||||
|
"blk.10.ffn_gate.weight": "d3df7a1413b1c5adf1a1dcda9e5225a15c89874bae53bb6137ad1ea42fca2d34",
|
||||||
|
"blk.10.ffn_norm.weight": "a1da603b0480471b5ed8e862148cecd5fed918f8304d6933ab0bdb25b8d2fb8f",
|
||||||
|
"blk.10.ffn_up.weight": "bffbba605922e972dc47dda88a0b4659aa52236c76e5fe861a949e6d9a367492",
|
||||||
|
"blk.11.attn_k.weight": "9f31c63d66cd32c29b1eb8bb829d0c8525ce2ae936e0eefdaab6335a2d12a3df",
|
||||||
|
"blk.11.attn_norm.weight": "0bde1a266d8b2e8f202bb7e2e88b19147ca83021901f6d3cae77a4df5548c754",
|
||||||
|
"blk.11.attn_output.weight": "e10725c7cf746ed4a7e472cf7aea6cb564e5db6a1d5197adc980d650a387ccea",
|
||||||
|
"blk.11.attn_q.weight": "05ee758a7d065802630f8c65dca424364c1c8825e389aa33f9405c45e8a50cce",
|
||||||
|
"blk.11.attn_v.weight": "0c3ae7090f11775d24c51120db6e305db6aff706493e7ee123dcab74485ba789",
|
||||||
|
"blk.11.ffn_down.weight": "7ba40b8e12c09c5fb2006b77a771cb01ce894e88a3b3e1877f927a5b89c91709",
|
||||||
|
"blk.11.ffn_gate.weight": "db76388a023b98097972d354ba1c6a5e26efdeb1c596b9c28bf2cd8f6596975e",
|
||||||
|
"blk.11.ffn_norm.weight": "a38c3ae1b89a68ddc7b72c99c5b28be7fe3787c4fad9904d0c43d64eaf00c474",
|
||||||
|
"blk.11.ffn_up.weight": "13c8142f9cf1eddc658babf978daf3515c4ccc45f849f3e7e3930aa18a8480a0",
|
||||||
|
"blk.12.attn_k.weight": "f03241c36ac87cb57429a2ef22186b8d7d0b590a8b173beb01fa13d93772f3b1",
|
||||||
|
"blk.12.attn_norm.weight": "4568f654e6d65104d586e7c16ba960c83428698ce103022b7e0be15e2884e13b",
|
||||||
|
"blk.12.attn_output.weight": "04867603f82f91e41306e09b33ecda0104b3ee4834061f2c0bbdc8da33c72509",
|
||||||
|
"blk.12.attn_q.weight": "70fe04b9a8e08b6100cc8d6b58bf4cbbad15ca1de82d63baca5d352ba6c4cbae",
|
||||||
|
"blk.12.attn_v.weight": "15cb28db61a86c98687991d7e611bc92a1fcc6007f3432149cfb5fe518a4f65e",
|
||||||
|
"blk.12.ffn_down.weight": "6d10c790a4e3dc44c2dc36d96251ae97cdf30a4fa04d4c43e31bfbd038e6a7b7",
|
||||||
|
"blk.12.ffn_gate.weight": "3462a2d8f6b4743b25e24da51b90018ac2858d05ac7e582bcb69063cfdac1104",
|
||||||
|
"blk.12.ffn_norm.weight": "1f96392c1faa34e34ae5dea55a6a86c5aa4c79758952075d53d28de89dd88456",
|
||||||
|
"blk.12.ffn_up.weight": "d22eacc612a7411953d948483c5fb201e11722955ee0754da866e7bec578ac6d",
|
||||||
|
"blk.13.attn_k.weight": "5864977e6b733ea942647d6feed5c76156c48c200649c22e4e11b9e5860e57f3",
|
||||||
|
"blk.13.attn_norm.weight": "87e053535144723db4145aa5402acc54331b7696752d852bb9fc542ff33f0fb5",
|
||||||
|
"blk.13.attn_output.weight": "078145f5ad83f8b14f97a869346f7fd1583b24d1e3edadaa95d3da4242973f8f",
|
||||||
|
"blk.13.attn_q.weight": "3b8caf35504cbc4d1a7dd6e011a95760703b7f71e2218b030b1254f811362dd7",
|
||||||
|
"blk.13.attn_v.weight": "4fdf8365a603e043e5b40c4a21c84ac167f9be62794178f9d8a608dfe5653bf9",
|
||||||
|
"blk.13.ffn_down.weight": "a07d3abbfcacf48ba028df2cab895be32cc15022d23389a745286e79c1b1d1fd",
|
||||||
|
"blk.13.ffn_gate.weight": "1d2ab39666aa2909acc96787432a3ed13b19d25170f74665fadff9b17bbaffb1",
|
||||||
|
"blk.13.ffn_norm.weight": "4f2e809fda5f3eadf52578ee50e0ba36e53be91e55dce418c12dfe595f5f18e7",
|
||||||
|
"blk.13.ffn_up.weight": "8783d2720c2c37ca176a5801e0b3ef1f9cc9cf3ef1cd37af423aaf6b2a27e2bd",
|
||||||
|
"blk.14.attn_k.weight": "ce9428e2b55d43ae0c6690dbd56182f99adc427694ba8236b405cc8ea5035e86",
|
||||||
|
"blk.14.attn_norm.weight": "6abb35f9db8251d6ae954bda147c6ada2371b0574d11702e828f3c6ac99b7cc0",
|
||||||
|
"blk.14.attn_output.weight": "fe3880916d0ceb5bff672c88bbefb7060a545be609bf049beb2024b38221836d",
|
||||||
|
"blk.14.attn_q.weight": "7c8ad81be6f4a350931fd108b5f7c9e366e8c26ef62d1d85ffef5dca8fd893f8",
|
||||||
|
"blk.14.attn_v.weight": "e4bdedffacbebe38567a0734dfd67db90e911d9a9669fcde9a7c4ad8a0066c52",
|
||||||
|
"blk.14.ffn_down.weight": "ef6694dff1e05820aac0cd2b22f39ac7788b4967afc9250775575554c66aab2c",
|
||||||
|
"blk.14.ffn_gate.weight": "db63c4179e2db704bc505e2b4696e055b593e295a1b7c4c586fc793bdd5aab19",
|
||||||
|
"blk.14.ffn_norm.weight": "2796a62d832a9710148f95d533320492a33e712b2e5218659c548705bd11684d",
|
||||||
|
"blk.14.ffn_up.weight": "3f78c78d8c2d54df45f799d4ff902316628af296834afe4ceed63d4a324ff03e",
|
||||||
|
"blk.15.attn_k.weight": "6e810ee3859e07695645ee0c9a5efc7962668984a5f0a9325f47e462743b447c",
|
||||||
|
"blk.15.attn_norm.weight": "0956b576ae96db0b28cb09f761f801cfd9281432284664f0fe181c8d9c55d1ec",
|
||||||
|
"blk.15.attn_output.weight": "03a17f7e94208177aace5cc41b7f54670ba57873b7274ff6e23caf58cce110ca",
|
||||||
|
"blk.15.attn_q.weight": "b8edafe7d2216a6f8b4ae4905a906475490e6ea418f6e1d3cec563dbdc6fab91",
|
||||||
|
"blk.15.attn_v.weight": "f8ae8cae0f4cfa34a459824eba57350c3c248104ba5607e7d9dc7d7c39aaf4a6",
|
||||||
|
"blk.15.ffn_down.weight": "8d02eb439da852246d2ca67e9b7b6de0b090b80744355e64728a23e41926505b",
|
||||||
|
"blk.15.ffn_gate.weight": "ed5bf361c67db8731f186b775826f21c33bdb521111fd2d922539719a770239f",
|
||||||
|
"blk.15.ffn_norm.weight": "5942ca3c73209ac9a0c8bfd9b4aab7f7be7aee9aa12d9c35833493b44af76767",
|
||||||
|
"blk.15.ffn_up.weight": "f4bebf4ad99ec5f911327dec347be6c595814885309c7bc5647ce28c7f4d1cf5",
|
||||||
|
"blk.16.attn_k.weight": "756a534c19364448e0958b8948fe33891c6ccda0fbb4dfa2024e1f532a87804b",
|
||||||
|
"blk.16.attn_norm.weight": "386b7b9e4e6509f6af9c022d942b6c6c6cc136aeed8751ecb037c74d7c4bfb93",
|
||||||
|
"blk.16.attn_output.weight": "3ba1a766a25830b84d7c22178203635f9c5624caad290bc5e5d73da5d5e7a2ec",
|
||||||
|
"blk.16.attn_q.weight": "d39b0c91e1fda7685d50a0f7cc8d18c44b5bdc90a142c7fda0bc329cca1afa74",
|
||||||
|
"blk.16.attn_v.weight": "98b33fcb0ee3483cff1b06ecb44d7b7ffb4d34c268248e4d73dfdf82b2065b2f",
|
||||||
|
"blk.16.ffn_down.weight": "14006f5e4acb2f9416271ae562e299359cd2585739c7fc77ccbca54495563948",
|
||||||
|
"blk.16.ffn_gate.weight": "12f8abae2d301d8f88bedb6af98b1daecc7b0b8d05148594f931f30958d77aca",
|
||||||
|
"blk.16.ffn_norm.weight": "129a15a046ee96d06de288bd43c80f77a6b0fb3a159c7367154c6e4aaf362672",
|
||||||
|
"blk.16.ffn_up.weight": "b4a5911a45f3871ef1d4efb7dc7108645a564b70f818eccf45beebef2e844ee9",
|
||||||
|
"blk.17.attn_k.weight": "5e1bfcff0146ebdde3817b656952892eb671e14e75afc92fa53f84f8eecbec4c",
|
||||||
|
"blk.17.attn_norm.weight": "60bc988fab7c4b29ee9de599df41a8de00caa94fcd74677da011fac82f60f465",
|
||||||
|
"blk.17.attn_output.weight": "ba49b40d6a0b5685f749c24b0edbed3adc44dbe13b5d5e5fa1e56169fc746555",
|
||||||
|
"blk.17.attn_q.weight": "82bb415d24efcd14d03ace03f907bb70db6a204c76a0bdd1892e0fba165db87d",
|
||||||
|
"blk.17.attn_v.weight": "73dbe54beb91a899884e275ea81ffc5187a20cb7d5b68d5c299b783096999d94",
|
||||||
|
"blk.17.ffn_down.weight": "7c086166241e0664f8963fd1ca4ed74c737abfb2525ec20f8435821ff50158f3",
|
||||||
|
"blk.17.ffn_gate.weight": "51a32f78244d42a539f619c5ce661db9e6cf41636280a826d439b5444edcd28c",
|
||||||
|
"blk.17.ffn_norm.weight": "c4bb247fccd1ecc84875028af63dd20aaf5cbd17eb94a9bc36679c09285dccab",
|
||||||
|
"blk.17.ffn_up.weight": "b5886182790bc6fbadd63de9bc4ffee416f3b69a66280d197ab8c18edf769abf",
|
||||||
|
"output_norm.weight": "481f3097d0a20412e35b3a739b1b958487bcd41ff67744baa3c9acbddd2ee4d4"
|
||||||
|
}
|
||||||
@@ -1,10 +1,12 @@
|
|||||||
package convert
|
package convert
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"cmp"
|
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"slices"
|
"slices"
|
||||||
@@ -12,10 +14,140 @@ import (
|
|||||||
"golang.org/x/exp/maps"
|
"golang.org/x/exp/maps"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
_ int32 = iota
|
||||||
|
tokenTypeNormal
|
||||||
|
tokenTypeUnknown
|
||||||
|
tokenTypeControl
|
||||||
|
tokenTypeUserDefined
|
||||||
|
tokenTypeUnused
|
||||||
|
tokenTypeByte
|
||||||
|
)
|
||||||
|
|
||||||
type Tokenizer struct {
|
type Tokenizer struct {
|
||||||
Version string `json:"version"`
|
*Vocabulary
|
||||||
AddedTokens []Token `json:"added_tokens"`
|
SpecialVocabulary []*SpecialVocabulary
|
||||||
Model TokenizerModel `json:"model"`
|
Merges []string
|
||||||
|
|
||||||
|
Pre string
|
||||||
|
Template string
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error) {
|
||||||
|
v, err := parseVocabulary(fsys)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
t := &Tokenizer{
|
||||||
|
Vocabulary: v,
|
||||||
|
Pre: "default",
|
||||||
|
}
|
||||||
|
|
||||||
|
addedTokens := make(map[string]token)
|
||||||
|
if f, err := fsys.Open("tokenizer.json"); errors.Is(err, os.ErrNotExist) {
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
} else {
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
var tt tokenizer
|
||||||
|
if err := json.NewDecoder(f).Decode(&tt); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, t := range tt.AddedTokens {
|
||||||
|
addedTokens[t.Content] = t
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Merges = tt.Model.Merges
|
||||||
|
|
||||||
|
sha256sum := sha256.New()
|
||||||
|
for _, pt := range tt.PreTokenizer.PreTokenizers {
|
||||||
|
switch pt.Type {
|
||||||
|
case "Split":
|
||||||
|
if pt.Pattern.Regex != "" {
|
||||||
|
// create a checksum of all Split pretokenizers which should be sufficient
|
||||||
|
// to identify the pretokenizer
|
||||||
|
sha256sum.Write([]byte(pt.Pattern.Regex))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch digest := hex.EncodeToString(sha256sum.Sum(nil)); digest {
|
||||||
|
case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
|
||||||
|
t.Pre = "llama-bpe"
|
||||||
|
case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
|
||||||
|
t.Pre = "deepseek-llm"
|
||||||
|
case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
|
||||||
|
t.Pre = "deepseek-coder"
|
||||||
|
case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855":
|
||||||
|
// noop, empty pretokenizer
|
||||||
|
default:
|
||||||
|
slog.Warn("unknown pretokenizer, using default", "digest", digest)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if f, err := fsys.Open("tokenizer_config.json"); errors.Is(err, os.ErrNotExist) {
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
} else {
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
var p map[string]json.RawMessage
|
||||||
|
if err := json.NewDecoder(f).Decode(&p); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if template, ok := p["chat_template"]; ok {
|
||||||
|
if err := json.Unmarshal(template, &t.Template); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, st := range specialTokenTypes {
|
||||||
|
sv := SpecialVocabulary{Type: st}
|
||||||
|
if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok {
|
||||||
|
if err := json.Unmarshal(bts, &sv.AddToken); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if bts, ok := p[fmt.Sprintf("%s_token", st)]; ok {
|
||||||
|
var content string
|
||||||
|
if err := json.Unmarshal(bts, &content); err != nil {
|
||||||
|
var mm map[string]any
|
||||||
|
if err := json.Unmarshal(bts, &mm); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
content, ok = mm["content"].(string)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sv.Content = content
|
||||||
|
}
|
||||||
|
|
||||||
|
if id, ok := addedTokens[sv.Content]; ok {
|
||||||
|
sv.ID = id.ID
|
||||||
|
t.SpecialVocabulary = append(t.SpecialVocabulary, &sv)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type tokenizer struct {
|
||||||
|
Version string `json:"version"`
|
||||||
|
AddedTokens []token `json:"added_tokens"`
|
||||||
|
Model struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Vocab map[string]int `json:"vocab"`
|
||||||
|
Merges []string `json:"merges"`
|
||||||
|
} `json:"model"`
|
||||||
|
|
||||||
PreTokenizer struct {
|
PreTokenizer struct {
|
||||||
PreTokenizers []struct {
|
PreTokenizers []struct {
|
||||||
@@ -27,80 +159,108 @@ type Tokenizer struct {
|
|||||||
} `json:"pre_tokenizer"`
|
} `json:"pre_tokenizer"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type TokenizerModel struct {
|
type token struct {
|
||||||
Type string `json:"type"`
|
|
||||||
Vocab map[string]int `json:"vocab"`
|
|
||||||
Merges []string `json:"merges"`
|
|
||||||
Tokens []Token
|
|
||||||
}
|
|
||||||
|
|
||||||
type Token struct {
|
|
||||||
ID int `json:"id"`
|
ID int `json:"id"`
|
||||||
Content string `json:"content"`
|
Content string `json:"content"`
|
||||||
Special bool `json:"special"`
|
Special bool `json:"special"`
|
||||||
UserDefined bool
|
UserDefined bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *Token) Type() int32 {
|
type Vocabulary struct {
|
||||||
switch {
|
Model string
|
||||||
case t.Special:
|
Tokens []string
|
||||||
return tokenTypeControl
|
Scores []float32
|
||||||
case t.UserDefined:
|
Types []int32
|
||||||
return tokenTypeUserDefined
|
|
||||||
default:
|
|
||||||
return tokenTypeNormal
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *Tokenizer) maxID() int {
|
func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) {
|
||||||
return max(
|
f, err := fsys.Open("tokenizer.json")
|
||||||
slices.Max(maps.Values(t.Model.Vocab)),
|
|
||||||
slices.MaxFunc(t.AddedTokens, func(a, b Token) int {
|
|
||||||
return cmp.Compare(a.ID, b.ID)
|
|
||||||
}).ID,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) {
|
|
||||||
f, err := os.Open(dirpath)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
return nil, err
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
|
||||||
var t Tokenizer
|
var t tokenizer
|
||||||
if err := json.NewDecoder(f).Decode(&t); err != nil {
|
if err := json.NewDecoder(f).Decode(&t); err != nil {
|
||||||
return "", nil, nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
tokens = make([]Token, t.maxID()+1)
|
tokens := make(map[int]token, len(t.Model.Vocab))
|
||||||
for k, v := range t.Model.Vocab {
|
for k, v := range t.Model.Vocab {
|
||||||
tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false}
|
tokens[v] = token{
|
||||||
}
|
ID: v,
|
||||||
|
Content: k,
|
||||||
for _, v := range t.AddedTokens {
|
|
||||||
v.UserDefined = true
|
|
||||||
tokens[v.ID] = v
|
|
||||||
}
|
|
||||||
|
|
||||||
sha256sum := sha256.New()
|
|
||||||
for _, pt := range t.PreTokenizer.PreTokenizers {
|
|
||||||
if pt.Type == "Split" && pt.Pattern.Regex != "" {
|
|
||||||
sha256sum.Write([]byte(pt.Pattern.Regex))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest {
|
for _, token := range t.AddedTokens {
|
||||||
case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
|
token.UserDefined = true
|
||||||
pre = "llama-bpe"
|
tokens[token.ID] = token
|
||||||
case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
|
|
||||||
pre = "deepseek-llm"
|
|
||||||
case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
|
|
||||||
pre = "deepseek-coder"
|
|
||||||
default:
|
|
||||||
slog.Warn("unknown pretokenizer, using default", "digest", digest)
|
|
||||||
pre = "default"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return pre, tokens, t.Model.Merges, nil
|
keys := maps.Keys(tokens)
|
||||||
|
slices.Sort(keys)
|
||||||
|
|
||||||
|
v := Vocabulary{Model: "gpt2"}
|
||||||
|
for _, k := range keys {
|
||||||
|
token := tokens[k]
|
||||||
|
v.Tokens = append(v.Tokens, token.Content)
|
||||||
|
v.Scores = append(v.Scores, float32(token.ID))
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case token.Special:
|
||||||
|
v.Types = append(v.Types, tokenTypeControl)
|
||||||
|
case token.UserDefined:
|
||||||
|
v.Types = append(v.Types, tokenTypeUserDefined)
|
||||||
|
default:
|
||||||
|
v.Types = append(v.Types, tokenTypeNormal)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &v, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseVocabulary(fsys fs.FS) (*Vocabulary, error) {
|
||||||
|
patterns := []struct {
|
||||||
|
Pattern string
|
||||||
|
Func func(fs.FS) (*Vocabulary, error)
|
||||||
|
}{
|
||||||
|
{"tokenizer.model", parseSentencePiece},
|
||||||
|
{"tokenizer.json", parseVocabularyFromTokenizer},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, pattern := range patterns {
|
||||||
|
if _, err := fs.Stat(fsys, pattern.Pattern); errors.Is(err, os.ErrNotExist) {
|
||||||
|
continue
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return pattern.Func(fsys)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, errors.New("unknown tensor format")
|
||||||
|
}
|
||||||
|
|
||||||
|
type SpecialVocabulary struct {
|
||||||
|
Type string
|
||||||
|
ID int
|
||||||
|
Content string
|
||||||
|
AddToken bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sv SpecialVocabulary) Key() string {
|
||||||
|
switch t := sv.Type; t {
|
||||||
|
case "bos", "eos", "cls", "mask":
|
||||||
|
return t
|
||||||
|
case "unk":
|
||||||
|
return "unknown"
|
||||||
|
case "sep":
|
||||||
|
//nolint:misspell // this is an upstream typo
|
||||||
|
return "seperator"
|
||||||
|
case "pad":
|
||||||
|
return "padding"
|
||||||
|
}
|
||||||
|
|
||||||
|
panic("unknown special vocabulary type")
|
||||||
}
|
}
|
||||||
|
|||||||
113
convert/tokenizer_spm.go
Normal file
113
convert/tokenizer_spm.go
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
package convert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"cmp"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
|
"os"
|
||||||
|
"slices"
|
||||||
|
|
||||||
|
"google.golang.org/protobuf/proto"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/convert/sentencepiece"
|
||||||
|
)
|
||||||
|
|
||||||
|
func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
|
||||||
|
ast, err := parseAdditionalSpecialTokens(fsys)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
bts, err := fs.ReadFile(fsys, "tokenizer.model")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var spm sentencepiece.ModelProto
|
||||||
|
if err := proto.Unmarshal(bts, &spm); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
v := Vocabulary{Model: "llama"}
|
||||||
|
for _, piece := range spm.GetPieces() {
|
||||||
|
v.Tokens = append(v.Tokens, piece.GetPiece())
|
||||||
|
v.Scores = append(v.Scores, piece.GetScore())
|
||||||
|
|
||||||
|
switch t := piece.GetType(); t {
|
||||||
|
case sentencepiece.ModelProto_SentencePiece_UNKNOWN,
|
||||||
|
sentencepiece.ModelProto_SentencePiece_CONTROL,
|
||||||
|
sentencepiece.ModelProto_SentencePiece_UNUSED,
|
||||||
|
sentencepiece.ModelProto_SentencePiece_BYTE:
|
||||||
|
v.Types = append(v.Types, int32(t))
|
||||||
|
default:
|
||||||
|
tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL)
|
||||||
|
if slices.Contains(ast, piece.GetPiece()) {
|
||||||
|
tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
|
||||||
|
}
|
||||||
|
|
||||||
|
v.Types = append(v.Types, tt)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := fsys.Open("added_tokens.json")
|
||||||
|
if errors.Is(err, os.ErrNotExist) {
|
||||||
|
return &v, nil
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
var atm map[string]int
|
||||||
|
if err := json.NewDecoder(f).Decode(&atm); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
type t struct {
|
||||||
|
id int
|
||||||
|
content string
|
||||||
|
}
|
||||||
|
|
||||||
|
var ts []t
|
||||||
|
for content, id := range atm {
|
||||||
|
ts = append(ts, t{id, content})
|
||||||
|
}
|
||||||
|
|
||||||
|
slices.SortFunc(ts, func(i, j t) int {
|
||||||
|
return cmp.Compare(i.id, j.id)
|
||||||
|
})
|
||||||
|
|
||||||
|
n := len(v.Tokens)
|
||||||
|
for i, t := range ts {
|
||||||
|
if t.id != i+n {
|
||||||
|
return nil, fmt.Errorf("invalid token id: %d", t.id)
|
||||||
|
}
|
||||||
|
|
||||||
|
v.Tokens = append(v.Tokens, t.content)
|
||||||
|
v.Scores = append(v.Scores, -1000.0)
|
||||||
|
v.Types = append(v.Types, tokenTypeUserDefined)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &v, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseAdditionalSpecialTokens(fsys fs.FS) ([]string, error) {
|
||||||
|
f, err := fsys.Open("special_tokens_map.json")
|
||||||
|
if errors.Is(err, os.ErrNotExist) {
|
||||||
|
return nil, nil
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
var m struct {
|
||||||
|
AdditionalSpecialTokens []string `json:"additional_special_tokens"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewDecoder(f).Decode(&m); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return m.AdditionalSpecialTokens, nil
|
||||||
|
}
|
||||||
287
convert/torch.go
287
convert/torch.go
@@ -1,287 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/binary"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"log/slog"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"regexp"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/nlpodyssey/gopickle/pytorch"
|
|
||||||
"github.com/nlpodyssey/gopickle/types"
|
|
||||||
"github.com/x448/float16"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/llm"
|
|
||||||
)
|
|
||||||
|
|
||||||
type torchWriterTo struct {
|
|
||||||
t *llm.Tensor
|
|
||||||
|
|
||||||
params *Params
|
|
||||||
bo ByteOrder
|
|
||||||
|
|
||||||
storage pytorch.StorageInterface
|
|
||||||
repacker func(string, []float32, []uint64) ([]float32, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
type TorchFormat struct{}
|
|
||||||
|
|
||||||
func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
|
|
||||||
slog.Debug("getting torch tensors")
|
|
||||||
|
|
||||||
var files []string
|
|
||||||
if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 {
|
|
||||||
files = append(files, pt...)
|
|
||||||
} else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 {
|
|
||||||
files = append(files, pt...)
|
|
||||||
}
|
|
||||||
|
|
||||||
var offset uint64
|
|
||||||
var tensors []llm.Tensor
|
|
||||||
for _, fn := range files {
|
|
||||||
m, err := pytorch.Load(fn)
|
|
||||||
if err != nil {
|
|
||||||
slog.Error(fmt.Sprintf("error unpickling: %q", err))
|
|
||||||
return []llm.Tensor{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, k := range m.(*types.Dict).Keys() {
|
|
||||||
if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
t, _ := m.(*types.Dict).Get(k)
|
|
||||||
tshape := t.(*pytorch.Tensor).Size
|
|
||||||
|
|
||||||
var size uint64
|
|
||||||
var kind uint32
|
|
||||||
switch len(tshape) {
|
|
||||||
case 0:
|
|
||||||
continue
|
|
||||||
case 1:
|
|
||||||
// convert to float32
|
|
||||||
kind = 0
|
|
||||||
size = uint64(tshape[0] * 4)
|
|
||||||
case 2:
|
|
||||||
// convert to float16
|
|
||||||
kind = 1
|
|
||||||
size = uint64(tshape[0] * tshape[1] * 2)
|
|
||||||
}
|
|
||||||
|
|
||||||
ggufName, err := tf.GetLayerName(k.(string))
|
|
||||||
if err != nil {
|
|
||||||
slog.Error(err.Error())
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape))
|
|
||||||
|
|
||||||
shape := []uint64{0, 0, 0, 0}
|
|
||||||
for i := range tshape {
|
|
||||||
shape[i] = uint64(tshape[i])
|
|
||||||
}
|
|
||||||
|
|
||||||
tensor := llm.Tensor{
|
|
||||||
Name: ggufName,
|
|
||||||
Kind: kind,
|
|
||||||
Offset: offset, // calculate the offset
|
|
||||||
Shape: shape,
|
|
||||||
}
|
|
||||||
|
|
||||||
tensor.WriterTo = torchWriterTo{
|
|
||||||
t: &tensor,
|
|
||||||
params: params,
|
|
||||||
bo: params.ByteOrder,
|
|
||||||
storage: t.(*pytorch.Tensor).Source,
|
|
||||||
}
|
|
||||||
|
|
||||||
tensors = append(tensors, tensor)
|
|
||||||
offset += size
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return tensors, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func getAltParams(dirpath string) (*Params, error) {
|
|
||||||
f, err := os.Open(filepath.Join(dirpath, "params.json"))
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("no params.json")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
type TorchParams struct {
|
|
||||||
HiddenSize int `json:"dim"`
|
|
||||||
AttentionHeads int `json:"n_heads"`
|
|
||||||
KeyValHeads int `json:"n_kv_heads"`
|
|
||||||
HiddenLayers int `json:"n_layers"`
|
|
||||||
RopeTheta float64 `json:"rope_theta"`
|
|
||||||
NormEPS float64 `json:"norm_eps"`
|
|
||||||
}
|
|
||||||
|
|
||||||
var tparams TorchParams
|
|
||||||
|
|
||||||
d := json.NewDecoder(f)
|
|
||||||
err = d.Decode(&tparams)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
params := &Params{
|
|
||||||
Architectures: []string{"LlamaForCausalLM"},
|
|
||||||
HiddenSize: tparams.HiddenSize,
|
|
||||||
AttentionHeads: tparams.AttentionHeads,
|
|
||||||
KeyValHeads: tparams.KeyValHeads,
|
|
||||||
HiddenLayers: tparams.HiddenLayers,
|
|
||||||
NormEPS: tparams.NormEPS,
|
|
||||||
}
|
|
||||||
|
|
||||||
switch {
|
|
||||||
case tparams.RopeTheta == 1000000:
|
|
||||||
// Codellama
|
|
||||||
params.ContextSize = 16384
|
|
||||||
case tparams.NormEPS == 1e-06:
|
|
||||||
// llama2
|
|
||||||
slog.Debug("Found llama2 - setting context size to 4096")
|
|
||||||
params.ContextSize = 4096
|
|
||||||
default:
|
|
||||||
params.ContextSize = 2048
|
|
||||||
}
|
|
||||||
|
|
||||||
params.ByteOrder = binary.LittleEndian
|
|
||||||
return params, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *TorchFormat) GetParams(dirpath string) (*Params, error) {
|
|
||||||
f, err := os.Open(filepath.Join(dirpath, "config.json"))
|
|
||||||
if err != nil {
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
// try params.json instead
|
|
||||||
return getAltParams(dirpath)
|
|
||||||
} else {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var params Params
|
|
||||||
d := json.NewDecoder(f)
|
|
||||||
err = d.Decode(¶ms)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
params.ByteOrder = binary.LittleEndian
|
|
||||||
return ¶ms, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *TorchFormat) GetLayerName(n string) (string, error) {
|
|
||||||
directMap := map[string]string{
|
|
||||||
"tok_embeddings.weight": "token_embd.weight",
|
|
||||||
"output.weight": "output.weight",
|
|
||||||
"norm.weight": "output_norm.weight",
|
|
||||||
"rope.freqs": "rope_freqs.weight",
|
|
||||||
"model.embed_tokens.weight": "token_embd.weight",
|
|
||||||
"lm_head.weight": "output.weight",
|
|
||||||
"model.norm.weight": "output_norm.weight",
|
|
||||||
}
|
|
||||||
|
|
||||||
lMap := map[string]string{
|
|
||||||
"layers.(\\d+).attention_norm.weight": "blk.$1.attn_norm.weight",
|
|
||||||
"layers.(\\d+).attention_output_norm.weight": "blk.$1.attn_norm.weight",
|
|
||||||
"layers.(\\d+).feed_forward.w2.weight": "blk.$1.ffn_down.weight",
|
|
||||||
"layers.(\\d+).feed_forward.w1.weight": "blk.$1.ffn_gate.weight",
|
|
||||||
"layers.(\\d+).feed_forward.w3.weight": "blk.$1.ffn_up.weight",
|
|
||||||
"layers.(\\d+).ffn_norm.weight": "blk.$1.ffn_norm.weight",
|
|
||||||
"layers.(\\d+).attention.wk.weight": "blk.$1.attn_k.weight",
|
|
||||||
"layers.(\\d+).attention.wo.weight": "blk.$1.attn_output.weight",
|
|
||||||
"layers.(\\d+).attention.wq.weight": "blk.$1.attn_q.weight",
|
|
||||||
"layers.(\\d+).attention.wv.weight": "blk.$1.attn_v.weight",
|
|
||||||
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
|
|
||||||
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
|
|
||||||
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
|
|
||||||
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
|
|
||||||
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
|
|
||||||
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
|
|
||||||
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
|
|
||||||
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
|
|
||||||
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
|
|
||||||
}
|
|
||||||
|
|
||||||
v, ok := directMap[n]
|
|
||||||
if ok {
|
|
||||||
return v, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// quick hack to rename the layers to gguf format
|
|
||||||
for k, v := range lMap {
|
|
||||||
re := regexp.MustCompile(k)
|
|
||||||
newName := re.ReplaceAllString(n, v)
|
|
||||||
if newName != n {
|
|
||||||
return newName, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) {
|
|
||||||
var f32s []float32
|
|
||||||
switch s := r.storage.(type) {
|
|
||||||
case *pytorch.FloatStorage:
|
|
||||||
f32s = s.Data
|
|
||||||
case *pytorch.HalfStorage:
|
|
||||||
f32s = s.Data
|
|
||||||
case *pytorch.BFloat16Storage:
|
|
||||||
f32s = s.Data
|
|
||||||
default:
|
|
||||||
return 0, fmt.Errorf("unknown data type: %T", s)
|
|
||||||
}
|
|
||||||
|
|
||||||
if r.repacker != nil {
|
|
||||||
f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
switch r.t.Kind {
|
|
||||||
case 0:
|
|
||||||
return 0, binary.Write(w, r.bo, f32s)
|
|
||||||
case 1:
|
|
||||||
f16s := make([]uint16, len(f32s))
|
|
||||||
for i := range f32s {
|
|
||||||
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0, binary.Write(w, r.bo, f16s)
|
|
||||||
default:
|
|
||||||
return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
|
|
||||||
switch len(params.Architectures) {
|
|
||||||
case 0:
|
|
||||||
return nil, fmt.Errorf("No architecture specified to convert")
|
|
||||||
case 1:
|
|
||||||
switch params.Architectures[0] {
|
|
||||||
case "LlamaForCausalLM":
|
|
||||||
return &LlamaModel{
|
|
||||||
ModelData{
|
|
||||||
Name: name,
|
|
||||||
Path: dirPath,
|
|
||||||
Params: params,
|
|
||||||
Format: m,
|
|
||||||
},
|
|
||||||
}, nil
|
|
||||||
default:
|
|
||||||
return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, fmt.Errorf("Unknown error")
|
|
||||||
}
|
|
||||||
@@ -669,7 +669,7 @@ curl http://localhost:11434/api/chat -d '{
|
|||||||
|
|
||||||
```
|
```
|
||||||
curl http://localhost:11434/api/chat -d '{
|
curl http://localhost:11434/api/chat -d '{
|
||||||
"model": "mistral",
|
"model": "llama3.1",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@@ -708,7 +708,7 @@ curl http://localhost:11434/api/chat -d '{
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"model": "mistral:7b-instruct-v0.3-q4_K_M",
|
"model": "llama3.1",
|
||||||
"created_at": "2024-07-22T20:33:28.123648Z",
|
"created_at": "2024-07-22T20:33:28.123648Z",
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
@@ -1175,7 +1175,10 @@ curl http://localhost:11434/api/embed -d '{
|
|||||||
"embeddings": [[
|
"embeddings": [[
|
||||||
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
|
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
|
||||||
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
|
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
|
||||||
]]
|
]],
|
||||||
|
"total_duration": 14143917,
|
||||||
|
"load_duration": 1019500,
|
||||||
|
"prompt_eval_count": 8
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
142
docs/docker.md
142
docs/docker.md
@@ -1,71 +1,71 @@
|
|||||||
# Ollama Docker image
|
# Ollama Docker image
|
||||||
|
|
||||||
### CPU only
|
### CPU only
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
||||||
```
|
```
|
||||||
|
|
||||||
### Nvidia GPU
|
### Nvidia GPU
|
||||||
Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
|
Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
|
||||||
|
|
||||||
#### Install with Apt
|
#### Install with Apt
|
||||||
1. Configure the repository
|
1. Configure the repository
|
||||||
```bash
|
```bash
|
||||||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
|
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
|
||||||
| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
||||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
|
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
|
||||||
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
|
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
|
||||||
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
```
|
```
|
||||||
2. Install the NVIDIA Container Toolkit packages
|
2. Install the NVIDIA Container Toolkit packages
|
||||||
```bash
|
```bash
|
||||||
sudo apt-get install -y nvidia-container-toolkit
|
sudo apt-get install -y nvidia-container-toolkit
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Install with Yum or Dnf
|
#### Install with Yum or Dnf
|
||||||
1. Configure the repository
|
1. Configure the repository
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
|
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
|
||||||
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install the NVIDIA Container Toolkit packages
|
2. Install the NVIDIA Container Toolkit packages
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
sudo yum install -y nvidia-container-toolkit
|
sudo yum install -y nvidia-container-toolkit
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Configure Docker to use Nvidia driver
|
#### Configure Docker to use Nvidia driver
|
||||||
```
|
```
|
||||||
sudo nvidia-ctk runtime configure --runtime=docker
|
sudo nvidia-ctk runtime configure --runtime=docker
|
||||||
sudo systemctl restart docker
|
sudo systemctl restart docker
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Start the container
|
#### Start the container
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
||||||
```
|
```
|
||||||
|
|
||||||
### AMD GPU
|
### AMD GPU
|
||||||
|
|
||||||
To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
|
To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
|
||||||
|
|
||||||
```
|
```
|
||||||
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
|
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
|
||||||
```
|
```
|
||||||
|
|
||||||
### Run model locally
|
### Run model locally
|
||||||
|
|
||||||
Now you can run a model:
|
Now you can run a model:
|
||||||
|
|
||||||
```
|
```
|
||||||
docker exec -it ollama ollama run llama3.1
|
docker exec -it ollama ollama run llama3.1
|
||||||
```
|
```
|
||||||
|
|
||||||
### Try different models
|
### Try different models
|
||||||
|
|
||||||
More models can be found on the [Ollama library](https://ollama.com/library).
|
More models can be found on the [Ollama library](https://ollama.com/library).
|
||||||
|
|||||||
@@ -111,7 +111,10 @@ On Windows, Ollama inherits your user and system environment variables.
|
|||||||
|
|
||||||
## How do I use Ollama behind a proxy?
|
## How do I use Ollama behind a proxy?
|
||||||
|
|
||||||
Ollama is compatible with proxy servers if `HTTP_PROXY` or `HTTPS_PROXY` are configured. When using either variables, ensure it is set where `ollama serve` can access the values. When using `HTTPS_PROXY`, ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform.
|
Ollama pulls models from the Internet and may require a proxy server to access the models. Use `HTTPS_PROXY` to redirect outbound requests through the proxy. Ensure the proxy certificate is installed as a system certificate. Refer to the section above for how to use environment variables on your platform.
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> Avoid setting `HTTP_PROXY`. Ollama does not use HTTP for model pulls, only HTTPS. Setting `HTTP_PROXY` may interrupt client connections to the server.
|
||||||
|
|
||||||
### How do I use Ollama behind a proxy in Docker?
|
### How do I use Ollama behind a proxy in Docker?
|
||||||
|
|
||||||
@@ -276,4 +279,4 @@ Note: Windows with Radeon GPUs currently default to 1 model maximum due to limit
|
|||||||
|
|
||||||
## How does Ollama load models on multiple GPUs?
|
## How does Ollama load models on multiple GPUs?
|
||||||
|
|
||||||
Installing multiple GPUs of the same brand can be a great way to increase your available VRAM to load larger models. When you load a new model, Ollama evaluates the required VRAM for the model against what is currently available. If the model will entirely fit on any single GPU, Ollama will load the model on that GPU. This typically provides the best performance as it reduces the amount of data transfering across the PCI bus during inference. If the model does not fit entirely on one GPU, then it will be spread across all the available GPUs.
|
Installing multiple GPUs of the same brand can be a great way to increase your available VRAM to load larger models. When you load a new model, Ollama evaluates the required VRAM for the model against what is currently available. If the model will entirely fit on any single GPU, Ollama will load the model on that GPU. This typically provides the best performance as it reduces the amount of data transfering across the PCI bus during inference. If the model does not fit entirely on one GPU, then it will be spread across all the available GPUs.
|
||||||
|
|||||||
BIN
docs/images/ollama-keys.png
Normal file
BIN
docs/images/ollama-keys.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 141 KiB |
BIN
docs/images/signup.png
Normal file
BIN
docs/images/signup.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 80 KiB |
186
docs/import.md
186
docs/import.md
@@ -1,42 +1,129 @@
|
|||||||
# Import
|
# Importing a model
|
||||||
|
|
||||||
GGUF models and select Safetensors models can be imported directly into Ollama.
|
## Table of Contents
|
||||||
|
|
||||||
## Import GGUF
|
* [Importing a Safetensors adapter](#Importing-a-fine-tuned-adapter-from-Safetensors-weights)
|
||||||
|
* [Importing a Safetensors model](#Importing-a-model-from-Safetensors-weights)
|
||||||
|
* [Importing a GGUF file](#Importing-a-GGUF-based-model-or-adapter)
|
||||||
|
* [Sharing models on ollama.com](#Sharing-your-model-on-ollamacom)
|
||||||
|
|
||||||
A binary GGUF file can be imported directly into Ollama through a Modelfile.
|
## Importing a fine tuned adapter from Safetensors weights
|
||||||
|
|
||||||
|
First, create a `Modelfile` with a `FROM` command pointing at the base model you used for fine tuning, and an `ADAPTER` command which points to the directory with your Safetensors adapter:
|
||||||
|
|
||||||
```dockerfile
|
```dockerfile
|
||||||
FROM /path/to/file.gguf
|
FROM <base model name>
|
||||||
|
ADAPTER /path/to/safetensors/adapter/directory
|
||||||
```
|
```
|
||||||
|
|
||||||
## Import Safetensors
|
Make sure that you use the same base model in the `FROM` command as you used to create the adapter otherwise you will get erratic results. Most frameworks use different quantization methods, so it's best to use non-quantized (i.e. non-QLoRA) adapters. If your adapter is in the same directory as your `Modelfile`, use `ADAPTER .` to specify the adapter path.
|
||||||
|
|
||||||
If the model being imported is one of these architectures, it can be imported directly into Ollama through a Modelfile:
|
Now run `ollama create` from the directory where the `Modelfile` was created:
|
||||||
|
|
||||||
- LlamaForCausalLM
|
```bash
|
||||||
- MistralForCausalLM
|
ollama create my-model
|
||||||
- GemmaForCausalLM
|
```
|
||||||
|
|
||||||
|
Lastly, test the model:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ollama run my-model
|
||||||
|
```
|
||||||
|
|
||||||
|
Ollama supports importing adapters based on several different model architectures including:
|
||||||
|
|
||||||
|
* Llama (including Llama 2, Llama 3, and Llama 3.1);
|
||||||
|
* Mistral (including Mistral 1, Mistral 2, and Mixtral); and
|
||||||
|
* Gemma (including Gemma 1 and Gemma 2)
|
||||||
|
|
||||||
|
You can create the adapter using a fine tuning framework or tool which can output adapters in the Safetensors format, such as:
|
||||||
|
|
||||||
|
* Hugging Face [fine tuning framework] (https://huggingface.co/docs/transformers/en/training)
|
||||||
|
* [Unsloth](https://github.com/unslothai/unsloth)
|
||||||
|
* [MLX](https://github.com/ml-explore/mlx)
|
||||||
|
|
||||||
|
|
||||||
|
## Importing a model from Safetensors weights
|
||||||
|
|
||||||
|
First, create a `Modelfile` with a `FROM` command which points to the directory containing your Safetensors weights:
|
||||||
|
|
||||||
```dockerfile
|
```dockerfile
|
||||||
FROM /path/to/safetensors/directory
|
FROM /path/to/safetensors/directory
|
||||||
```
|
```
|
||||||
|
|
||||||
For architectures not directly convertable by Ollama, see llama.cpp's [guide](https://github.com/ggerganov/llama.cpp/blob/master/README.md#prepare-and-quantize) on conversion. After conversion, see [Import GGUF](#import-gguf).
|
If you create the Modelfile in the same directory as the weights, you can use the command `FROM .`.
|
||||||
|
|
||||||
## Automatic Quantization
|
Now run the `ollama create` command from the directory where you created the `Modelfile`:
|
||||||
|
|
||||||
> [!NOTE]
|
```shell
|
||||||
> Automatic quantization requires v0.1.35 or higher.
|
ollama create my-model
|
||||||
|
```
|
||||||
|
|
||||||
Ollama is capable of quantizing FP16 or FP32 models to any of the supported quantizations with the `-q/--quantize` flag in `ollama create`.
|
Lastly, test the model:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
ollama run my-model
|
||||||
|
```
|
||||||
|
|
||||||
|
Ollama supports importing models for several different architectures including:
|
||||||
|
|
||||||
|
* Llama (including Llama 2, Llama 3, and Llama 3.1);
|
||||||
|
* Mistral (including Mistral 1, Mistral 2, and Mixtral);
|
||||||
|
* Gemma (including Gemma 1 and Gemma 2); and
|
||||||
|
* Phi3
|
||||||
|
|
||||||
|
This includes importing foundation models as well as any fine tuned models which which have been _fused_ with a foundation model.
|
||||||
|
|
||||||
|
|
||||||
|
## Importing a GGUF based model or adapter
|
||||||
|
|
||||||
|
If you have a GGUF based model or adapter it is possible to import it into Ollama. You can obtain a GGUF model or adapter by:
|
||||||
|
|
||||||
|
* converting a Safetensors model with the `convert_hf_to_gguf.py` from Llama.cpp;
|
||||||
|
* converting a Safetensors adapter with the `convert_lora_to_gguf.py` from Llama.cpp; or
|
||||||
|
* downloading a model or adapter from a place such as HuggingFace
|
||||||
|
|
||||||
|
To import a GGUF model, create a `Modelfile` containg:
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
FROM /path/to/file.gguf
|
||||||
|
```
|
||||||
|
|
||||||
|
For a GGUF adapter, create the `Modelfile` with:
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
FROM <model name>
|
||||||
|
ADAPTER /path/to/file.gguf
|
||||||
|
```
|
||||||
|
|
||||||
|
When importing a GGUF adapter, it's important to use the same base model as the base model that the adapter was created with. You can use:
|
||||||
|
|
||||||
|
* a model from Ollama
|
||||||
|
* a GGUF file
|
||||||
|
* a Safetensors based model
|
||||||
|
|
||||||
|
Once you have created your `Modelfile`, use the `ollama create` command to build the model.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
ollama create my-model
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quantizing a Model
|
||||||
|
|
||||||
|
Quantizing a model allows you to run models faster and with less memory consumption but at reduced accuracy. This allows you to run a model on more modest hardware.
|
||||||
|
|
||||||
|
Ollama can quantize FP16 and FP32 based models into different quantization levels using the `-q/--quantize` flag with the `ollama create` command.
|
||||||
|
|
||||||
|
First, create a Modelfile with the FP16 or FP32 based model you wish to quantize.
|
||||||
|
|
||||||
```dockerfile
|
```dockerfile
|
||||||
FROM /path/to/my/gemma/f16/model
|
FROM /path/to/my/gemma/f16/model
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Use `ollama create` to then create the quantized model.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ ollama create -q Q4_K_M mymodel
|
$ ollama create --quantize q4_K_M mymodel
|
||||||
transferring model data
|
transferring model data
|
||||||
quantizing F16 model to Q4_K_M
|
quantizing F16 model to Q4_K_M
|
||||||
creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd
|
creating new layer sha256:735e246cc1abfd06e9cdcf95504d6789a6cd1ad7577108a70d9902fef503c1bd
|
||||||
@@ -47,42 +134,53 @@ success
|
|||||||
|
|
||||||
### Supported Quantizations
|
### Supported Quantizations
|
||||||
|
|
||||||
- `Q4_0`
|
- `q4_0`
|
||||||
- `Q4_1`
|
- `q4_1`
|
||||||
- `Q5_0`
|
- `q5_0`
|
||||||
- `Q5_1`
|
- `q5_1`
|
||||||
- `Q8_0`
|
- `q8_0`
|
||||||
|
|
||||||
#### K-means Quantizations
|
#### K-means Quantizations
|
||||||
|
|
||||||
- `Q3_K_S`
|
- `q3_K_S`
|
||||||
- `Q3_K_M`
|
- `q3_K_M`
|
||||||
- `Q3_K_L`
|
- `q3_K_L`
|
||||||
- `Q4_K_S`
|
- `q4_K_S`
|
||||||
- `Q4_K_M`
|
- `q4_K_M`
|
||||||
- `Q5_K_S`
|
- `q5_K_S`
|
||||||
- `Q5_K_M`
|
- `q5_K_M`
|
||||||
- `Q6_K`
|
- `q6_K`
|
||||||
|
|
||||||
## Template Detection
|
|
||||||
|
|
||||||
> [!NOTE]
|
## Sharing your model on ollama.com
|
||||||
> Template detection requires v0.1.42 or higher.
|
|
||||||
|
|
||||||
Ollama uses model metadata, specifically `tokenizer.chat_template`, to automatically create a template appropriate for the model you're importing.
|
You can share any model you have created by pushing it to [ollama.com](https://ollama.com) so that other users can try it out.
|
||||||
|
|
||||||
```dockerfile
|
First, use your browser to go to the [Ollama Sign-Up](https://ollama.com/signup) page. If you already have an account, you can skip this step.
|
||||||
FROM /path/to/my/gemma/model
|
|
||||||
```
|

|
||||||
|
|
||||||
|
The `Username` field will be used as part of your model's name (e.g. `jmorganca/mymodel`), so make sure you are comfortable with the username that you have selected.
|
||||||
|
|
||||||
|
Now that you have created an account and are signed-in, go to the [Ollama Keys Settings](https://ollama.com/settings/keys) page.
|
||||||
|
|
||||||
|
Follow the directions on the page to determine where your Ollama Public Key is located.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Click on the `Add Ollama Public Key` button, and copy and paste the contents of your Ollama Public Key into the text field.
|
||||||
|
|
||||||
|
To push a model to [ollama.com](https://ollama.com), first make sure that it is named correctly with your username. You may have to use the `ollama cp` command to copy
|
||||||
|
your model to give it the correct name. Once you're happy with your model's name, use the `ollama push` command to push it to [ollama.com](https://ollama.com).
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ ollama create mymodel
|
ollama cp mymodel myuser/mymodel
|
||||||
transferring model data
|
ollama push myuser/mymodel
|
||||||
using autodetected template gemma-instruct
|
```
|
||||||
creating new layer sha256:baa2a0edc27d19cc6b7537578a9a7ba1a4e3214dc185ed5ae43692b319af7b84
|
|
||||||
creating new layer sha256:ba66c3309914dbef07e5149a648fd1877f030d337a4f240d444ea335008943cb
|
Once your model has been pushed, other users can pull and run it by using the command:
|
||||||
writing manifest
|
|
||||||
success
|
```shell
|
||||||
|
ollama run myuser/mymodel
|
||||||
```
|
```
|
||||||
|
|
||||||
Defining a template in the Modelfile will disable this feature which may be useful if you want to use a different template than the autodetected one.
|
|
||||||
|
|||||||
@@ -20,13 +20,12 @@ GPU.
|
|||||||
|
|
||||||
## Manual install
|
## Manual install
|
||||||
|
|
||||||
### Download the `ollama` binary
|
### Download `ollama`
|
||||||
|
|
||||||
Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:
|
Download and extract the Linux package:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
|
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
|
||||||
sudo chmod +x /usr/bin/ollama
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Adding Ollama as a startup service (recommended)
|
### Adding Ollama as a startup service (recommended)
|
||||||
@@ -96,8 +95,7 @@ curl -fsSL https://ollama.com/install.sh | sh
|
|||||||
Or by downloading the ollama binary:
|
Or by downloading the ollama binary:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
|
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
|
||||||
sudo chmod +x /usr/bin/ollama
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installing specific versions
|
## Installing specific versions
|
||||||
|
|||||||
176
docs/openai.md
176
docs/openai.md
@@ -27,6 +27,37 @@ chat_completion = client.chat.completions.create(
|
|||||||
],
|
],
|
||||||
model='llama3',
|
model='llama3',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="llava",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "What's in this image?"},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
max_tokens=300,
|
||||||
|
)
|
||||||
|
|
||||||
|
completion = client.completions.create(
|
||||||
|
model="llama3",
|
||||||
|
prompt="Say this is a test",
|
||||||
|
)
|
||||||
|
|
||||||
|
list_completion = client.models.list()
|
||||||
|
|
||||||
|
model = client.models.retrieve("llama3")
|
||||||
|
|
||||||
|
embeddings = client.embeddings.create(
|
||||||
|
model="all-minilm",
|
||||||
|
input=["why is the sky blue?", "why is the grass green?"],
|
||||||
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
### OpenAI JavaScript library
|
### OpenAI JavaScript library
|
||||||
@@ -42,14 +73,44 @@ const openai = new OpenAI({
|
|||||||
})
|
})
|
||||||
|
|
||||||
const chatCompletion = await openai.chat.completions.create({
|
const chatCompletion = await openai.chat.completions.create({
|
||||||
messages: [{ role: 'user', content: 'Say this is a test' }],
|
messages: [{ role: 'user', content: 'Say this is a test' }],
|
||||||
model: 'llama3',
|
model: 'llama3',
|
||||||
|
})
|
||||||
|
|
||||||
|
const response = await openai.chat.completions.create({
|
||||||
|
model: "llava",
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: [
|
||||||
|
{ type: "text", text: "What's in this image?" },
|
||||||
|
{
|
||||||
|
type: "image_url",
|
||||||
|
image_url: "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
|
||||||
|
const completion = await openai.completions.create({
|
||||||
|
model: "llama3",
|
||||||
|
prompt: "Say this is a test.",
|
||||||
|
})
|
||||||
|
|
||||||
|
const listCompletion = await openai.models.list()
|
||||||
|
|
||||||
|
const model = await openai.models.retrieve("llama3")
|
||||||
|
|
||||||
|
const embedding = await openai.embeddings.create({
|
||||||
|
model: "all-minilm",
|
||||||
|
input: ["why is the sky blue?", "why is the grass green?"],
|
||||||
})
|
})
|
||||||
```
|
```
|
||||||
|
|
||||||
### `curl`
|
### `curl`
|
||||||
|
|
||||||
```
|
``` shell
|
||||||
curl http://localhost:11434/v1/chat/completions \
|
curl http://localhost:11434/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
@@ -66,6 +127,47 @@ curl http://localhost:11434/v1/chat/completions \
|
|||||||
]
|
]
|
||||||
}'
|
}'
|
||||||
|
|
||||||
|
curl http://localhost:11434/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "llava",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What'\''s in this image?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 300
|
||||||
|
}'
|
||||||
|
|
||||||
|
curl http://localhost:11434/v1/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "llama3",
|
||||||
|
"prompt": "Say this is a test"
|
||||||
|
}'
|
||||||
|
|
||||||
|
curl http://localhost:11434/v1/models
|
||||||
|
|
||||||
|
curl http://localhost:11434/v1/models/llama3
|
||||||
|
|
||||||
|
curl http://localhost:11434/v1/embeddings \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "all-minilm",
|
||||||
|
"input": ["why is the sky blue?", "why is the grass green?"]
|
||||||
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
## Endpoints
|
## Endpoints
|
||||||
@@ -78,8 +180,8 @@ curl http://localhost:11434/v1/chat/completions \
|
|||||||
- [x] Streaming
|
- [x] Streaming
|
||||||
- [x] JSON mode
|
- [x] JSON mode
|
||||||
- [x] Reproducible outputs
|
- [x] Reproducible outputs
|
||||||
|
- [x] Vision
|
||||||
- [x] Tools (streaming support coming soon)
|
- [x] Tools (streaming support coming soon)
|
||||||
- [ ] Vision
|
|
||||||
- [ ] Logprobs
|
- [ ] Logprobs
|
||||||
|
|
||||||
#### Supported request fields
|
#### Supported request fields
|
||||||
@@ -87,7 +189,10 @@ curl http://localhost:11434/v1/chat/completions \
|
|||||||
- [x] `model`
|
- [x] `model`
|
||||||
- [x] `messages`
|
- [x] `messages`
|
||||||
- [x] Text `content`
|
- [x] Text `content`
|
||||||
- [ ] Array of `content` parts
|
- [x] Image `content`
|
||||||
|
- [x] Base64 encoded image
|
||||||
|
- [ ] Image URL
|
||||||
|
- [x] Array of `content` parts
|
||||||
- [x] `frequency_penalty`
|
- [x] `frequency_penalty`
|
||||||
- [x] `presence_penalty`
|
- [x] `presence_penalty`
|
||||||
- [x] `response_format`
|
- [x] `response_format`
|
||||||
@@ -103,6 +208,67 @@ curl http://localhost:11434/v1/chat/completions \
|
|||||||
- [ ] `user`
|
- [ ] `user`
|
||||||
- [ ] `n`
|
- [ ] `n`
|
||||||
|
|
||||||
|
### `/v1/completions`
|
||||||
|
|
||||||
|
#### Supported features
|
||||||
|
|
||||||
|
- [x] Completions
|
||||||
|
- [x] Streaming
|
||||||
|
- [x] JSON mode
|
||||||
|
- [x] Reproducible outputs
|
||||||
|
- [ ] Logprobs
|
||||||
|
|
||||||
|
#### Supported request fields
|
||||||
|
|
||||||
|
- [x] `model`
|
||||||
|
- [x] `prompt`
|
||||||
|
- [x] `frequency_penalty`
|
||||||
|
- [x] `presence_penalty`
|
||||||
|
- [x] `seed`
|
||||||
|
- [x] `stop`
|
||||||
|
- [x] `stream`
|
||||||
|
- [x] `temperature`
|
||||||
|
- [x] `top_p`
|
||||||
|
- [x] `max_tokens`
|
||||||
|
- [x] `suffix`
|
||||||
|
- [ ] `best_of`
|
||||||
|
- [ ] `echo`
|
||||||
|
- [ ] `logit_bias`
|
||||||
|
- [ ] `user`
|
||||||
|
- [ ] `n`
|
||||||
|
|
||||||
|
#### Notes
|
||||||
|
|
||||||
|
- `prompt` currently only accepts a string
|
||||||
|
|
||||||
|
### `/v1/models`
|
||||||
|
|
||||||
|
#### Notes
|
||||||
|
|
||||||
|
- `created` corresponds to when the model was last modified
|
||||||
|
- `owned_by` corresponds to the ollama username, defaulting to `"library"`
|
||||||
|
|
||||||
|
### `/v1/models/{model}`
|
||||||
|
|
||||||
|
#### Notes
|
||||||
|
|
||||||
|
- `created` corresponds to when the model was last modified
|
||||||
|
- `owned_by` corresponds to the ollama username, defaulting to `"library"`
|
||||||
|
|
||||||
|
### `/v1/embeddings`
|
||||||
|
|
||||||
|
#### Supported request fields
|
||||||
|
|
||||||
|
- [x] `model`
|
||||||
|
- [x] `input`
|
||||||
|
- [x] string
|
||||||
|
- [x] array of strings
|
||||||
|
- [ ] array of tokens
|
||||||
|
- [ ] array of token arrays
|
||||||
|
- [ ] `encoding format`
|
||||||
|
- [ ] `dimensions`
|
||||||
|
- [ ] `user`
|
||||||
|
|
||||||
## Models
|
## Models
|
||||||
|
|
||||||
Before using a model, pull it locally `ollama pull`:
|
Before using a model, pull it locally `ollama pull`:
|
||||||
|
|||||||
@@ -112,15 +112,9 @@ Keep the following tips and best practices in mind when working with Go template
|
|||||||
ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
|
ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
|
||||||
|
|
||||||
```gotmpl
|
```gotmpl
|
||||||
{{- if .System }}<|im_start|>system
|
|
||||||
{{ .System }}<|im_end|>
|
|
||||||
{{ end }}
|
|
||||||
{{- range .Messages }}<|im_start|>{{ .Role }}
|
{{- range .Messages }}<|im_start|>{{ .Role }}
|
||||||
{{ .Content }}<|im_end|>
|
{{ .Content }}<|im_end|>
|
||||||
{{ end }}<|im_start|>assistant
|
{{ end }}<|im_start|>assistant
|
||||||
{{ else }}
|
|
||||||
{{ if .System }}<|im_start|>system
|
|
||||||
{{ .System }}<|im_end|>
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Example Tools
|
### Example Tools
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ cat ~/.ollama/logs/server.log
|
|||||||
On **Linux** systems with systemd, the logs can be found with this command:
|
On **Linux** systems with systemd, the logs can be found with this command:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
journalctl -u ollama
|
journalctl -u ollama --no-pager
|
||||||
```
|
```
|
||||||
|
|
||||||
When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
|
When you run Ollama in a **container**, the logs go to stdout/stderr in the container:
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
package envconfig
|
package envconfig
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"math"
|
"math"
|
||||||
"net"
|
"net"
|
||||||
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
@@ -14,296 +14,16 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
type OllamaHost struct {
|
// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
|
||||||
Scheme string
|
// Default is scheme "http" and host "127.0.0.1:11434"
|
||||||
Host string
|
func Host() *url.URL {
|
||||||
Port string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o OllamaHost) String() string {
|
|
||||||
return fmt.Sprintf("%s://%s:%s", o.Scheme, o.Host, o.Port)
|
|
||||||
}
|
|
||||||
|
|
||||||
var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
|
|
||||||
|
|
||||||
var (
|
|
||||||
// Set via OLLAMA_ORIGINS in the environment
|
|
||||||
AllowOrigins []string
|
|
||||||
// Set via OLLAMA_DEBUG in the environment
|
|
||||||
Debug bool
|
|
||||||
// Experimental flash attention
|
|
||||||
FlashAttention bool
|
|
||||||
// Set via OLLAMA_HOST in the environment
|
|
||||||
Host *OllamaHost
|
|
||||||
// Set via OLLAMA_KEEP_ALIVE in the environment
|
|
||||||
KeepAlive time.Duration
|
|
||||||
// Set via OLLAMA_LLM_LIBRARY in the environment
|
|
||||||
LLMLibrary string
|
|
||||||
// Set via OLLAMA_MAX_LOADED_MODELS in the environment
|
|
||||||
MaxRunners int
|
|
||||||
// Set via OLLAMA_MAX_QUEUE in the environment
|
|
||||||
MaxQueuedRequests int
|
|
||||||
// Set via OLLAMA_MODELS in the environment
|
|
||||||
ModelsDir string
|
|
||||||
// Set via OLLAMA_NOHISTORY in the environment
|
|
||||||
NoHistory bool
|
|
||||||
// Set via OLLAMA_NOPRUNE in the environment
|
|
||||||
NoPrune bool
|
|
||||||
// Set via OLLAMA_NUM_PARALLEL in the environment
|
|
||||||
NumParallel int
|
|
||||||
// Set via OLLAMA_RUNNERS_DIR in the environment
|
|
||||||
RunnersDir string
|
|
||||||
// Set via OLLAMA_SCHED_SPREAD in the environment
|
|
||||||
SchedSpread bool
|
|
||||||
// Set via OLLAMA_TMPDIR in the environment
|
|
||||||
TmpDir string
|
|
||||||
// Set via OLLAMA_INTEL_GPU in the environment
|
|
||||||
IntelGpu bool
|
|
||||||
|
|
||||||
// Set via CUDA_VISIBLE_DEVICES in the environment
|
|
||||||
CudaVisibleDevices string
|
|
||||||
// Set via HIP_VISIBLE_DEVICES in the environment
|
|
||||||
HipVisibleDevices string
|
|
||||||
// Set via ROCR_VISIBLE_DEVICES in the environment
|
|
||||||
RocrVisibleDevices string
|
|
||||||
// Set via GPU_DEVICE_ORDINAL in the environment
|
|
||||||
GpuDeviceOrdinal string
|
|
||||||
// Set via HSA_OVERRIDE_GFX_VERSION in the environment
|
|
||||||
HsaOverrideGfxVersion string
|
|
||||||
)
|
|
||||||
|
|
||||||
type EnvVar struct {
|
|
||||||
Name string
|
|
||||||
Value any
|
|
||||||
Description string
|
|
||||||
}
|
|
||||||
|
|
||||||
func AsMap() map[string]EnvVar {
|
|
||||||
ret := map[string]EnvVar{
|
|
||||||
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
|
||||||
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
|
|
||||||
"OLLAMA_HOST": {"OLLAMA_HOST", Host, "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
|
||||||
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
|
|
||||||
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
|
|
||||||
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models per GPU"},
|
|
||||||
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
|
|
||||||
"OLLAMA_MODELS": {"OLLAMA_MODELS", ModelsDir, "The path to the models directory"},
|
|
||||||
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"},
|
|
||||||
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"},
|
|
||||||
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests"},
|
|
||||||
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
|
|
||||||
"OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"},
|
|
||||||
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread, "Always schedule model across all GPUs"},
|
|
||||||
"OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
|
|
||||||
}
|
|
||||||
if runtime.GOOS != "darwin" {
|
|
||||||
ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices, "Set which NVIDIA devices are visible"}
|
|
||||||
ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices, "Set which AMD devices are visible"}
|
|
||||||
ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices, "Set which AMD devices are visible"}
|
|
||||||
ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal, "Set which AMD devices are visible"}
|
|
||||||
ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion, "Override the gfx used for all detected AMD GPUs"}
|
|
||||||
ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGpu, "Enable experimental Intel GPU detection"}
|
|
||||||
}
|
|
||||||
return ret
|
|
||||||
}
|
|
||||||
|
|
||||||
func Values() map[string]string {
|
|
||||||
vals := make(map[string]string)
|
|
||||||
for k, v := range AsMap() {
|
|
||||||
vals[k] = fmt.Sprintf("%v", v.Value)
|
|
||||||
}
|
|
||||||
return vals
|
|
||||||
}
|
|
||||||
|
|
||||||
var defaultAllowOrigins = []string{
|
|
||||||
"localhost",
|
|
||||||
"127.0.0.1",
|
|
||||||
"0.0.0.0",
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean quotes and spaces from the value
|
|
||||||
func clean(key string) string {
|
|
||||||
return strings.Trim(os.Getenv(key), "\"' ")
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
// default values
|
|
||||||
NumParallel = 0 // Autoselect
|
|
||||||
MaxRunners = 0 // Autoselect
|
|
||||||
MaxQueuedRequests = 512
|
|
||||||
KeepAlive = 5 * time.Minute
|
|
||||||
|
|
||||||
LoadConfig()
|
|
||||||
}
|
|
||||||
|
|
||||||
func LoadConfig() {
|
|
||||||
if debug := clean("OLLAMA_DEBUG"); debug != "" {
|
|
||||||
d, err := strconv.ParseBool(debug)
|
|
||||||
if err == nil {
|
|
||||||
Debug = d
|
|
||||||
} else {
|
|
||||||
Debug = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" {
|
|
||||||
d, err := strconv.ParseBool(fa)
|
|
||||||
if err == nil {
|
|
||||||
FlashAttention = d
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
RunnersDir = clean("OLLAMA_RUNNERS_DIR")
|
|
||||||
if runtime.GOOS == "windows" && RunnersDir == "" {
|
|
||||||
// On Windows we do not carry the payloads inside the main executable
|
|
||||||
appExe, err := os.Executable()
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("failed to lookup executable path", "error", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
cwd, err := os.Getwd()
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("failed to lookup working directory", "error", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var paths []string
|
|
||||||
for _, root := range []string{filepath.Dir(appExe), cwd} {
|
|
||||||
paths = append(paths,
|
|
||||||
root,
|
|
||||||
filepath.Join(root, "windows-"+runtime.GOARCH),
|
|
||||||
filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try a few variations to improve developer experience when building from source in the local tree
|
|
||||||
for _, p := range paths {
|
|
||||||
candidate := filepath.Join(p, "ollama_runners")
|
|
||||||
_, err := os.Stat(candidate)
|
|
||||||
if err == nil {
|
|
||||||
RunnersDir = candidate
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if RunnersDir == "" {
|
|
||||||
slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TmpDir = clean("OLLAMA_TMPDIR")
|
|
||||||
|
|
||||||
LLMLibrary = clean("OLLAMA_LLM_LIBRARY")
|
|
||||||
|
|
||||||
if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" {
|
|
||||||
val, err := strconv.Atoi(onp)
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("invalid setting, ignoring", "OLLAMA_NUM_PARALLEL", onp, "error", err)
|
|
||||||
} else {
|
|
||||||
NumParallel = val
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" {
|
|
||||||
NoHistory = true
|
|
||||||
}
|
|
||||||
|
|
||||||
if spread := clean("OLLAMA_SCHED_SPREAD"); spread != "" {
|
|
||||||
s, err := strconv.ParseBool(spread)
|
|
||||||
if err == nil {
|
|
||||||
SchedSpread = s
|
|
||||||
} else {
|
|
||||||
SchedSpread = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
|
|
||||||
NoPrune = true
|
|
||||||
}
|
|
||||||
|
|
||||||
if origins := clean("OLLAMA_ORIGINS"); origins != "" {
|
|
||||||
AllowOrigins = strings.Split(origins, ",")
|
|
||||||
}
|
|
||||||
for _, allowOrigin := range defaultAllowOrigins {
|
|
||||||
AllowOrigins = append(AllowOrigins,
|
|
||||||
fmt.Sprintf("http://%s", allowOrigin),
|
|
||||||
fmt.Sprintf("https://%s", allowOrigin),
|
|
||||||
fmt.Sprintf("http://%s", net.JoinHostPort(allowOrigin, "*")),
|
|
||||||
fmt.Sprintf("https://%s", net.JoinHostPort(allowOrigin, "*")),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
AllowOrigins = append(AllowOrigins,
|
|
||||||
"app://*",
|
|
||||||
"file://*",
|
|
||||||
"tauri://*",
|
|
||||||
)
|
|
||||||
|
|
||||||
maxRunners := clean("OLLAMA_MAX_LOADED_MODELS")
|
|
||||||
if maxRunners != "" {
|
|
||||||
m, err := strconv.Atoi(maxRunners)
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("invalid setting, ignoring", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
|
|
||||||
} else {
|
|
||||||
MaxRunners = m
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" {
|
|
||||||
p, err := strconv.Atoi(onp)
|
|
||||||
if err != nil || p <= 0 {
|
|
||||||
slog.Error("invalid setting, ignoring", "OLLAMA_MAX_QUEUE", onp, "error", err)
|
|
||||||
} else {
|
|
||||||
MaxQueuedRequests = p
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ka := clean("OLLAMA_KEEP_ALIVE")
|
|
||||||
if ka != "" {
|
|
||||||
loadKeepAlive(ka)
|
|
||||||
}
|
|
||||||
|
|
||||||
var err error
|
|
||||||
ModelsDir, err = getModelsDir()
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("invalid setting", "OLLAMA_MODELS", ModelsDir, "error", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
Host, err = getOllamaHost()
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port)
|
|
||||||
}
|
|
||||||
|
|
||||||
if set, err := strconv.ParseBool(clean("OLLAMA_INTEL_GPU")); err == nil {
|
|
||||||
IntelGpu = set
|
|
||||||
}
|
|
||||||
|
|
||||||
CudaVisibleDevices = clean("CUDA_VISIBLE_DEVICES")
|
|
||||||
HipVisibleDevices = clean("HIP_VISIBLE_DEVICES")
|
|
||||||
RocrVisibleDevices = clean("ROCR_VISIBLE_DEVICES")
|
|
||||||
GpuDeviceOrdinal = clean("GPU_DEVICE_ORDINAL")
|
|
||||||
HsaOverrideGfxVersion = clean("HSA_OVERRIDE_GFX_VERSION")
|
|
||||||
}
|
|
||||||
|
|
||||||
func getModelsDir() (string, error) {
|
|
||||||
if models, exists := os.LookupEnv("OLLAMA_MODELS"); exists {
|
|
||||||
return models, nil
|
|
||||||
}
|
|
||||||
home, err := os.UserHomeDir()
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
return filepath.Join(home, ".ollama", "models"), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func getOllamaHost() (*OllamaHost, error) {
|
|
||||||
defaultPort := "11434"
|
defaultPort := "11434"
|
||||||
|
|
||||||
hostVar := os.Getenv("OLLAMA_HOST")
|
s := strings.TrimSpace(Var("OLLAMA_HOST"))
|
||||||
hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
|
scheme, hostport, ok := strings.Cut(s, "://")
|
||||||
|
|
||||||
scheme, hostport, ok := strings.Cut(hostVar, "://")
|
|
||||||
switch {
|
switch {
|
||||||
case !ok:
|
case !ok:
|
||||||
scheme, hostport = "http", hostVar
|
scheme, hostport = "http", s
|
||||||
case scheme == "http":
|
case scheme == "http":
|
||||||
defaultPort = "80"
|
defaultPort = "80"
|
||||||
case scheme == "https":
|
case scheme == "https":
|
||||||
@@ -323,38 +43,242 @@ func getOllamaHost() (*OllamaHost, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
|
if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
|
||||||
return &OllamaHost{
|
slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
|
||||||
|
return &url.URL{
|
||||||
Scheme: scheme,
|
Scheme: scheme,
|
||||||
Host: host,
|
Host: net.JoinHostPort(host, defaultPort),
|
||||||
Port: defaultPort,
|
}
|
||||||
}, ErrInvalidHostPort
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return &OllamaHost{
|
return &url.URL{
|
||||||
Scheme: scheme,
|
Scheme: scheme,
|
||||||
Host: host,
|
Host: net.JoinHostPort(host, port),
|
||||||
Port: port,
|
}
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadKeepAlive(ka string) {
|
// Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
|
||||||
v, err := strconv.Atoi(ka)
|
func Origins() (origins []string) {
|
||||||
|
if s := Var("OLLAMA_ORIGINS"); s != "" {
|
||||||
|
origins = strings.Split(s, ",")
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
|
||||||
|
origins = append(origins,
|
||||||
|
fmt.Sprintf("http://%s", origin),
|
||||||
|
fmt.Sprintf("https://%s", origin),
|
||||||
|
fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
|
||||||
|
fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
origins = append(origins,
|
||||||
|
"app://*",
|
||||||
|
"file://*",
|
||||||
|
"tauri://*",
|
||||||
|
)
|
||||||
|
|
||||||
|
return origins
|
||||||
|
}
|
||||||
|
|
||||||
|
// Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
|
||||||
|
// Default is $HOME/.ollama/models
|
||||||
|
func Models() string {
|
||||||
|
if s := Var("OLLAMA_MODELS"); s != "" {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
home, err := os.UserHomeDir()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
d, err := time.ParseDuration(ka)
|
panic(err)
|
||||||
if err == nil {
|
}
|
||||||
if d < 0 {
|
|
||||||
KeepAlive = time.Duration(math.MaxInt64)
|
return filepath.Join(home, ".ollama", "models")
|
||||||
|
}
|
||||||
|
|
||||||
|
// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
|
||||||
|
// Negative values are treated as infinite. Zero is treated as no keep alive.
|
||||||
|
// Default is 5 minutes.
|
||||||
|
func KeepAlive() (keepAlive time.Duration) {
|
||||||
|
keepAlive = 5 * time.Minute
|
||||||
|
if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
|
||||||
|
if d, err := time.ParseDuration(s); err == nil {
|
||||||
|
keepAlive = d
|
||||||
|
} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
|
||||||
|
keepAlive = time.Duration(n) * time.Second
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if keepAlive < 0 {
|
||||||
|
return time.Duration(math.MaxInt64)
|
||||||
|
}
|
||||||
|
|
||||||
|
return keepAlive
|
||||||
|
}
|
||||||
|
|
||||||
|
func Bool(k string) func() bool {
|
||||||
|
return func() bool {
|
||||||
|
if s := Var(k); s != "" {
|
||||||
|
b, err := strconv.ParseBool(s)
|
||||||
|
if err != nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
// Debug enabled additional debug information.
|
||||||
|
Debug = Bool("OLLAMA_DEBUG")
|
||||||
|
// FlashAttention enables the experimental flash attention feature.
|
||||||
|
FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
|
||||||
|
// NoHistory disables readline history.
|
||||||
|
NoHistory = Bool("OLLAMA_NOHISTORY")
|
||||||
|
// NoPrune disables pruning of model blobs on startup.
|
||||||
|
NoPrune = Bool("OLLAMA_NOPRUNE")
|
||||||
|
// SchedSpread allows scheduling models across all GPUs.
|
||||||
|
SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
|
||||||
|
// IntelGPU enables experimental Intel GPU detection.
|
||||||
|
IntelGPU = Bool("OLLAMA_INTEL_GPU")
|
||||||
|
)
|
||||||
|
|
||||||
|
func String(s string) func() string {
|
||||||
|
return func() string {
|
||||||
|
return Var(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
LLMLibrary = String("OLLAMA_LLM_LIBRARY")
|
||||||
|
TmpDir = String("OLLAMA_TMPDIR")
|
||||||
|
|
||||||
|
CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES")
|
||||||
|
HipVisibleDevices = String("HIP_VISIBLE_DEVICES")
|
||||||
|
RocrVisibleDevices = String("ROCR_VISIBLE_DEVICES")
|
||||||
|
GpuDeviceOrdinal = String("GPU_DEVICE_ORDINAL")
|
||||||
|
HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
|
||||||
|
)
|
||||||
|
|
||||||
|
func RunnersDir() (p string) {
|
||||||
|
if p := Var("OLLAMA_RUNNERS_DIR"); p != "" {
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
if runtime.GOOS != "windows" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
if p == "" {
|
||||||
|
slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// On Windows we do not carry the payloads inside the main executable
|
||||||
|
exe, err := os.Executable()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
cwd, err := os.Getwd()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var paths []string
|
||||||
|
for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), ".."), cwd} {
|
||||||
|
paths = append(paths,
|
||||||
|
root,
|
||||||
|
filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
|
||||||
|
filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try a few variations to improve developer experience when building from source in the local tree
|
||||||
|
for _, path := range paths {
|
||||||
|
candidate := filepath.Join(path, "lib", "ollama", "runners")
|
||||||
|
if _, err := os.Stat(candidate); err == nil {
|
||||||
|
p = candidate
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
func Uint(key string, defaultValue uint) func() uint {
|
||||||
|
return func() uint {
|
||||||
|
if s := Var(key); s != "" {
|
||||||
|
if n, err := strconv.ParseUint(s, 10, 64); err != nil {
|
||||||
|
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
|
||||||
} else {
|
} else {
|
||||||
KeepAlive = d
|
return uint(n)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
d := time.Duration(v) * time.Second
|
return defaultValue
|
||||||
if d < 0 {
|
|
||||||
KeepAlive = time.Duration(math.MaxInt64)
|
|
||||||
} else {
|
|
||||||
KeepAlive = d
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
|
||||||
|
NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
|
||||||
|
// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
|
||||||
|
MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
|
||||||
|
// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
|
||||||
|
MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
|
||||||
|
// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
|
||||||
|
MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
|
||||||
|
)
|
||||||
|
|
||||||
|
type EnvVar struct {
|
||||||
|
Name string
|
||||||
|
Value any
|
||||||
|
Description string
|
||||||
|
}
|
||||||
|
|
||||||
|
func AsMap() map[string]EnvVar {
|
||||||
|
ret := map[string]EnvVar{
|
||||||
|
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
||||||
|
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
|
||||||
|
"OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
||||||
|
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
|
||||||
|
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
|
||||||
|
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
|
||||||
|
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
|
||||||
|
"OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
|
||||||
|
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
|
||||||
|
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
|
||||||
|
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
|
||||||
|
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
|
||||||
|
"OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
|
||||||
|
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
|
||||||
|
"OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
|
||||||
|
}
|
||||||
|
if runtime.GOOS != "darwin" {
|
||||||
|
ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
|
||||||
|
ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
|
||||||
|
ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
|
||||||
|
ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
|
||||||
|
ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
|
||||||
|
ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
|
||||||
|
}
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func Values() map[string]string {
|
||||||
|
vals := make(map[string]string)
|
||||||
|
for k, v := range AsMap() {
|
||||||
|
vals[k] = fmt.Sprintf("%v", v.Value)
|
||||||
|
}
|
||||||
|
return vals
|
||||||
|
}
|
||||||
|
|
||||||
|
// Var returns an environment variable stripped of leading and trailing quotes or spaces
|
||||||
|
func Var(key string) string {
|
||||||
|
return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,87 +1,234 @@
|
|||||||
package envconfig
|
package envconfig
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"math"
|
"math"
|
||||||
"net"
|
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/google/go-cmp/cmp"
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestConfig(t *testing.T) {
|
func TestHost(t *testing.T) {
|
||||||
Debug = false // Reset whatever was loaded in init()
|
cases := map[string]struct {
|
||||||
t.Setenv("OLLAMA_DEBUG", "")
|
|
||||||
LoadConfig()
|
|
||||||
require.False(t, Debug)
|
|
||||||
t.Setenv("OLLAMA_DEBUG", "false")
|
|
||||||
LoadConfig()
|
|
||||||
require.False(t, Debug)
|
|
||||||
t.Setenv("OLLAMA_DEBUG", "1")
|
|
||||||
LoadConfig()
|
|
||||||
require.True(t, Debug)
|
|
||||||
t.Setenv("OLLAMA_FLASH_ATTENTION", "1")
|
|
||||||
LoadConfig()
|
|
||||||
require.True(t, FlashAttention)
|
|
||||||
t.Setenv("OLLAMA_KEEP_ALIVE", "")
|
|
||||||
LoadConfig()
|
|
||||||
require.Equal(t, 5*time.Minute, KeepAlive)
|
|
||||||
t.Setenv("OLLAMA_KEEP_ALIVE", "3")
|
|
||||||
LoadConfig()
|
|
||||||
require.Equal(t, 3*time.Second, KeepAlive)
|
|
||||||
t.Setenv("OLLAMA_KEEP_ALIVE", "1h")
|
|
||||||
LoadConfig()
|
|
||||||
require.Equal(t, 1*time.Hour, KeepAlive)
|
|
||||||
t.Setenv("OLLAMA_KEEP_ALIVE", "-1s")
|
|
||||||
LoadConfig()
|
|
||||||
require.Equal(t, time.Duration(math.MaxInt64), KeepAlive)
|
|
||||||
t.Setenv("OLLAMA_KEEP_ALIVE", "-1")
|
|
||||||
LoadConfig()
|
|
||||||
require.Equal(t, time.Duration(math.MaxInt64), KeepAlive)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestClientFromEnvironment(t *testing.T) {
|
|
||||||
type testCase struct {
|
|
||||||
value string
|
value string
|
||||||
expect string
|
expect string
|
||||||
err error
|
}{
|
||||||
|
"empty": {"", "127.0.0.1:11434"},
|
||||||
|
"only address": {"1.2.3.4", "1.2.3.4:11434"},
|
||||||
|
"only port": {":1234", ":1234"},
|
||||||
|
"address and port": {"1.2.3.4:1234", "1.2.3.4:1234"},
|
||||||
|
"hostname": {"example.com", "example.com:11434"},
|
||||||
|
"hostname and port": {"example.com:1234", "example.com:1234"},
|
||||||
|
"zero port": {":0", ":0"},
|
||||||
|
"too large port": {":66000", ":11434"},
|
||||||
|
"too small port": {":-1", ":11434"},
|
||||||
|
"ipv6 localhost": {"[::1]", "[::1]:11434"},
|
||||||
|
"ipv6 world open": {"[::]", "[::]:11434"},
|
||||||
|
"ipv6 no brackets": {"::1", "[::1]:11434"},
|
||||||
|
"ipv6 + port": {"[::1]:1337", "[::1]:1337"},
|
||||||
|
"extra space": {" 1.2.3.4 ", "1.2.3.4:11434"},
|
||||||
|
"extra quotes": {"\"1.2.3.4\"", "1.2.3.4:11434"},
|
||||||
|
"extra space+quotes": {" \" 1.2.3.4 \" ", "1.2.3.4:11434"},
|
||||||
|
"extra single quotes": {"'1.2.3.4'", "1.2.3.4:11434"},
|
||||||
|
"http": {"http://1.2.3.4", "1.2.3.4:80"},
|
||||||
|
"http port": {"http://1.2.3.4:4321", "1.2.3.4:4321"},
|
||||||
|
"https": {"https://1.2.3.4", "1.2.3.4:443"},
|
||||||
|
"https port": {"https://1.2.3.4:4321", "1.2.3.4:4321"},
|
||||||
}
|
}
|
||||||
|
|
||||||
hostTestCases := map[string]*testCase{
|
for name, tt := range cases {
|
||||||
"empty": {value: "", expect: "127.0.0.1:11434"},
|
t.Run(name, func(t *testing.T) {
|
||||||
"only address": {value: "1.2.3.4", expect: "1.2.3.4:11434"},
|
t.Setenv("OLLAMA_HOST", tt.value)
|
||||||
"only port": {value: ":1234", expect: ":1234"},
|
if host := Host(); host.Host != tt.expect {
|
||||||
"address and port": {value: "1.2.3.4:1234", expect: "1.2.3.4:1234"},
|
t.Errorf("%s: expected %s, got %s", name, tt.expect, host.Host)
|
||||||
"hostname": {value: "example.com", expect: "example.com:11434"},
|
}
|
||||||
"hostname and port": {value: "example.com:1234", expect: "example.com:1234"},
|
})
|
||||||
"zero port": {value: ":0", expect: ":0"},
|
}
|
||||||
"too large port": {value: ":66000", err: ErrInvalidHostPort},
|
}
|
||||||
"too small port": {value: ":-1", err: ErrInvalidHostPort},
|
|
||||||
"ipv6 localhost": {value: "[::1]", expect: "[::1]:11434"},
|
func TestOrigins(t *testing.T) {
|
||||||
"ipv6 world open": {value: "[::]", expect: "[::]:11434"},
|
cases := []struct {
|
||||||
"ipv6 no brackets": {value: "::1", expect: "[::1]:11434"},
|
value string
|
||||||
"ipv6 + port": {value: "[::1]:1337", expect: "[::1]:1337"},
|
expect []string
|
||||||
"extra space": {value: " 1.2.3.4 ", expect: "1.2.3.4:11434"},
|
}{
|
||||||
"extra quotes": {value: "\"1.2.3.4\"", expect: "1.2.3.4:11434"},
|
{"", []string{
|
||||||
"extra space+quotes": {value: " \" 1.2.3.4 \" ", expect: "1.2.3.4:11434"},
|
"http://localhost",
|
||||||
"extra single quotes": {value: "'1.2.3.4'", expect: "1.2.3.4:11434"},
|
"https://localhost",
|
||||||
}
|
"http://localhost:*",
|
||||||
|
"https://localhost:*",
|
||||||
for k, v := range hostTestCases {
|
"http://127.0.0.1",
|
||||||
t.Run(k, func(t *testing.T) {
|
"https://127.0.0.1",
|
||||||
t.Setenv("OLLAMA_HOST", v.value)
|
"http://127.0.0.1:*",
|
||||||
LoadConfig()
|
"https://127.0.0.1:*",
|
||||||
|
"http://0.0.0.0",
|
||||||
oh, err := getOllamaHost()
|
"https://0.0.0.0",
|
||||||
if err != v.err {
|
"http://0.0.0.0:*",
|
||||||
t.Fatalf("expected %s, got %s", v.err, err)
|
"https://0.0.0.0:*",
|
||||||
}
|
"app://*",
|
||||||
|
"file://*",
|
||||||
if err == nil {
|
"tauri://*",
|
||||||
host := net.JoinHostPort(oh.Host, oh.Port)
|
}},
|
||||||
assert.Equal(t, v.expect, host, fmt.Sprintf("%s: expected %s, got %s", k, v.expect, host))
|
{"http://10.0.0.1", []string{
|
||||||
|
"http://10.0.0.1",
|
||||||
|
"http://localhost",
|
||||||
|
"https://localhost",
|
||||||
|
"http://localhost:*",
|
||||||
|
"https://localhost:*",
|
||||||
|
"http://127.0.0.1",
|
||||||
|
"https://127.0.0.1",
|
||||||
|
"http://127.0.0.1:*",
|
||||||
|
"https://127.0.0.1:*",
|
||||||
|
"http://0.0.0.0",
|
||||||
|
"https://0.0.0.0",
|
||||||
|
"http://0.0.0.0:*",
|
||||||
|
"https://0.0.0.0:*",
|
||||||
|
"app://*",
|
||||||
|
"file://*",
|
||||||
|
"tauri://*",
|
||||||
|
}},
|
||||||
|
{"http://172.16.0.1,https://192.168.0.1", []string{
|
||||||
|
"http://172.16.0.1",
|
||||||
|
"https://192.168.0.1",
|
||||||
|
"http://localhost",
|
||||||
|
"https://localhost",
|
||||||
|
"http://localhost:*",
|
||||||
|
"https://localhost:*",
|
||||||
|
"http://127.0.0.1",
|
||||||
|
"https://127.0.0.1",
|
||||||
|
"http://127.0.0.1:*",
|
||||||
|
"https://127.0.0.1:*",
|
||||||
|
"http://0.0.0.0",
|
||||||
|
"https://0.0.0.0",
|
||||||
|
"http://0.0.0.0:*",
|
||||||
|
"https://0.0.0.0:*",
|
||||||
|
"app://*",
|
||||||
|
"file://*",
|
||||||
|
"tauri://*",
|
||||||
|
}},
|
||||||
|
{"http://totally.safe,http://definitely.legit", []string{
|
||||||
|
"http://totally.safe",
|
||||||
|
"http://definitely.legit",
|
||||||
|
"http://localhost",
|
||||||
|
"https://localhost",
|
||||||
|
"http://localhost:*",
|
||||||
|
"https://localhost:*",
|
||||||
|
"http://127.0.0.1",
|
||||||
|
"https://127.0.0.1",
|
||||||
|
"http://127.0.0.1:*",
|
||||||
|
"https://127.0.0.1:*",
|
||||||
|
"http://0.0.0.0",
|
||||||
|
"https://0.0.0.0",
|
||||||
|
"http://0.0.0.0:*",
|
||||||
|
"https://0.0.0.0:*",
|
||||||
|
"app://*",
|
||||||
|
"file://*",
|
||||||
|
"tauri://*",
|
||||||
|
}},
|
||||||
|
}
|
||||||
|
for _, tt := range cases {
|
||||||
|
t.Run(tt.value, func(t *testing.T) {
|
||||||
|
t.Setenv("OLLAMA_ORIGINS", tt.value)
|
||||||
|
|
||||||
|
if diff := cmp.Diff(Origins(), tt.expect); diff != "" {
|
||||||
|
t.Errorf("%s: mismatch (-want +got):\n%s", tt.value, diff)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBool(t *testing.T) {
|
||||||
|
cases := map[string]bool{
|
||||||
|
"": false,
|
||||||
|
"true": true,
|
||||||
|
"false": false,
|
||||||
|
"1": true,
|
||||||
|
"0": false,
|
||||||
|
// invalid values
|
||||||
|
"random": true,
|
||||||
|
"something": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range cases {
|
||||||
|
t.Run(k, func(t *testing.T) {
|
||||||
|
t.Setenv("OLLAMA_BOOL", k)
|
||||||
|
if b := Bool("OLLAMA_BOOL")(); b != v {
|
||||||
|
t.Errorf("%s: expected %t, got %t", k, v, b)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint(t *testing.T) {
|
||||||
|
cases := map[string]uint{
|
||||||
|
"0": 0,
|
||||||
|
"1": 1,
|
||||||
|
"1337": 1337,
|
||||||
|
// default values
|
||||||
|
"": 11434,
|
||||||
|
"-1": 11434,
|
||||||
|
"0o10": 11434,
|
||||||
|
"0x10": 11434,
|
||||||
|
"string": 11434,
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range cases {
|
||||||
|
t.Run(k, func(t *testing.T) {
|
||||||
|
t.Setenv("OLLAMA_UINT", k)
|
||||||
|
if i := Uint("OLLAMA_UINT", 11434)(); i != v {
|
||||||
|
t.Errorf("%s: expected %d, got %d", k, v, i)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestKeepAlive(t *testing.T) {
|
||||||
|
cases := map[string]time.Duration{
|
||||||
|
"": 5 * time.Minute,
|
||||||
|
"1s": time.Second,
|
||||||
|
"1m": time.Minute,
|
||||||
|
"1h": time.Hour,
|
||||||
|
"5m0s": 5 * time.Minute,
|
||||||
|
"1h2m3s": 1*time.Hour + 2*time.Minute + 3*time.Second,
|
||||||
|
"0": time.Duration(0),
|
||||||
|
"60": 60 * time.Second,
|
||||||
|
"120": 2 * time.Minute,
|
||||||
|
"3600": time.Hour,
|
||||||
|
"-0": time.Duration(0),
|
||||||
|
"-1": time.Duration(math.MaxInt64),
|
||||||
|
"-1m": time.Duration(math.MaxInt64),
|
||||||
|
// invalid values
|
||||||
|
" ": 5 * time.Minute,
|
||||||
|
"???": 5 * time.Minute,
|
||||||
|
"1d": 5 * time.Minute,
|
||||||
|
"1y": 5 * time.Minute,
|
||||||
|
"1w": 5 * time.Minute,
|
||||||
|
}
|
||||||
|
|
||||||
|
for tt, expect := range cases {
|
||||||
|
t.Run(tt, func(t *testing.T) {
|
||||||
|
t.Setenv("OLLAMA_KEEP_ALIVE", tt)
|
||||||
|
if actual := KeepAlive(); actual != expect {
|
||||||
|
t.Errorf("%s: expected %s, got %s", tt, expect, actual)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVar(t *testing.T) {
|
||||||
|
cases := map[string]string{
|
||||||
|
"value": "value",
|
||||||
|
" value ": "value",
|
||||||
|
" 'value' ": "value",
|
||||||
|
` "value" `: "value",
|
||||||
|
" ' value ' ": " value ",
|
||||||
|
` " value " `: " value ",
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range cases {
|
||||||
|
t.Run(k, func(t *testing.T) {
|
||||||
|
t.Setenv("OLLAMA_VAR", k)
|
||||||
|
if s := Var("OLLAMA_VAR"); s != v {
|
||||||
|
t.Errorf("%s: expected %q, got %q", k, v, s)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package format
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"strconv"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -28,6 +29,6 @@ func HumanNumber(b uint64) string {
|
|||||||
case b >= Thousand:
|
case b >= Thousand:
|
||||||
return fmt.Sprintf("%.0fK", float64(b)/Thousand)
|
return fmt.Sprintf("%.0fK", float64(b)/Thousand)
|
||||||
default:
|
default:
|
||||||
return fmt.Sprintf("%d", b)
|
return strconv.FormatUint(b, 10)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
2
go.mod
2
go.mod
@@ -1,6 +1,6 @@
|
|||||||
module github.com/ollama/ollama
|
module github.com/ollama/ollama
|
||||||
|
|
||||||
go 1.22.0
|
go 1.22.5
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/containerd/console v1.0.3
|
github.com/containerd/console v1.0.3
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
package gpu
|
package gpu
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"errors"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -54,7 +54,7 @@ func commonAMDValidateLibDir() (string, error) {
|
|||||||
// Installer payload location if we're running the installed binary
|
// Installer payload location if we're running the installed binary
|
||||||
exe, err := os.Executable()
|
exe, err := os.Executable()
|
||||||
if err == nil {
|
if err == nil {
|
||||||
rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm")
|
rocmTargetDir := filepath.Join(filepath.Dir(exe), "..", "lib", "ollama")
|
||||||
if rocmLibUsable(rocmTargetDir) {
|
if rocmLibUsable(rocmTargetDir) {
|
||||||
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
|
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
|
||||||
return rocmTargetDir, nil
|
return rocmTargetDir, nil
|
||||||
@@ -95,5 +95,5 @@ func commonAMDValidateLibDir() (string, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
return "", errors.New("no suitable rocm found, falling back to CPU")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package gpu
|
package gpu
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"syscall"
|
"syscall"
|
||||||
@@ -76,7 +77,7 @@ func (hl *HipLib) Release() {
|
|||||||
|
|
||||||
func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
||||||
if hl.dll == 0 {
|
if hl.dll == 0 {
|
||||||
return 0, 0, fmt.Errorf("dll has been unloaded")
|
return 0, 0, errors.New("dll has been unloaded")
|
||||||
}
|
}
|
||||||
var version int
|
var version int
|
||||||
status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
|
status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
|
||||||
@@ -110,7 +111,7 @@ func (hl *HipLib) HipGetDeviceCount() int {
|
|||||||
|
|
||||||
func (hl *HipLib) HipSetDevice(device int) error {
|
func (hl *HipLib) HipSetDevice(device int) error {
|
||||||
if hl.dll == 0 {
|
if hl.dll == 0 {
|
||||||
return fmt.Errorf("dll has been unloaded")
|
return errors.New("dll has been unloaded")
|
||||||
}
|
}
|
||||||
status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
|
status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
|
||||||
if status != hipSuccess {
|
if status != hipSuccess {
|
||||||
@@ -121,7 +122,7 @@ func (hl *HipLib) HipSetDevice(device int) error {
|
|||||||
|
|
||||||
func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
|
func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
|
||||||
if hl.dll == 0 {
|
if hl.dll == 0 {
|
||||||
return nil, fmt.Errorf("dll has been unloaded")
|
return nil, errors.New("dll has been unloaded")
|
||||||
}
|
}
|
||||||
var props hipDevicePropMinimal
|
var props hipDevicePropMinimal
|
||||||
status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
|
status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
|
||||||
@@ -134,7 +135,7 @@ func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, err
|
|||||||
// free, total, err
|
// free, total, err
|
||||||
func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
|
func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
|
||||||
if hl.dll == 0 {
|
if hl.dll == 0 {
|
||||||
return 0, 0, fmt.Errorf("dll has been unloaded")
|
return 0, 0, errors.New("dll has been unloaded")
|
||||||
}
|
}
|
||||||
var totalMemory uint64
|
var totalMemory uint64
|
||||||
var freeMemory uint64
|
var freeMemory uint64
|
||||||
|
|||||||
@@ -61,9 +61,9 @@ func AMDGetGPUInfo() []RocmGPUInfo {
|
|||||||
|
|
||||||
// Determine if the user has already pre-selected which GPUs to look at, then ignore the others
|
// Determine if the user has already pre-selected which GPUs to look at, then ignore the others
|
||||||
var visibleDevices []string
|
var visibleDevices []string
|
||||||
hipVD := envconfig.HipVisibleDevices // zero based index only
|
hipVD := envconfig.HipVisibleDevices() // zero based index only
|
||||||
rocrVD := envconfig.RocrVisibleDevices // zero based index or UUID, but consumer cards seem to not support UUID
|
rocrVD := envconfig.RocrVisibleDevices() // zero based index or UUID, but consumer cards seem to not support UUID
|
||||||
gpuDO := envconfig.GpuDeviceOrdinal // zero based index
|
gpuDO := envconfig.GpuDeviceOrdinal() // zero based index
|
||||||
switch {
|
switch {
|
||||||
// TODO is this priorty order right?
|
// TODO is this priorty order right?
|
||||||
case hipVD != "":
|
case hipVD != "":
|
||||||
@@ -76,7 +76,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
|
|||||||
visibleDevices = strings.Split(gpuDO, ",")
|
visibleDevices = strings.Split(gpuDO, ",")
|
||||||
}
|
}
|
||||||
|
|
||||||
gfxOverride := envconfig.HsaOverrideGfxVersion
|
gfxOverride := envconfig.HsaOverrideGfxVersion()
|
||||||
var supported []string
|
var supported []string
|
||||||
libDir := ""
|
libDir := ""
|
||||||
|
|
||||||
@@ -393,7 +393,7 @@ func AMDValidateLibDir() (string, error) {
|
|||||||
|
|
||||||
// If we still haven't found a usable rocm, the user will have to install it on their own
|
// If we still haven't found a usable rocm, the user will have to install it on their own
|
||||||
slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install")
|
slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install")
|
||||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
return "", errors.New("no suitable rocm found, falling back to CPU")
|
||||||
}
|
}
|
||||||
|
|
||||||
func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package gpu
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"errors"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -53,7 +53,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var supported []string
|
var supported []string
|
||||||
gfxOverride := envconfig.HsaOverrideGfxVersion
|
gfxOverride := envconfig.HsaOverrideGfxVersion()
|
||||||
if gfxOverride == "" {
|
if gfxOverride == "" {
|
||||||
supported, err = GetSupportedGFX(libDir)
|
supported, err = GetSupportedGFX(libDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -85,7 +85,7 @@ func AMDGetGPUInfo() []RocmGPUInfo {
|
|||||||
n = bytes.IndexByte(props.GcnArchName[:], 0)
|
n = bytes.IndexByte(props.GcnArchName[:], 0)
|
||||||
gfx := string(props.GcnArchName[:n])
|
gfx := string(props.GcnArchName[:n])
|
||||||
slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
|
slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
|
||||||
//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
|
// slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
|
||||||
// TODO Why isn't props.iGPU accurate!?
|
// TODO Why isn't props.iGPU accurate!?
|
||||||
if strings.EqualFold(name, iGPUName) {
|
if strings.EqualFold(name, iGPUName) {
|
||||||
slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
|
slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
|
||||||
@@ -153,7 +153,7 @@ func AMDValidateLibDir() (string, error) {
|
|||||||
// Installer payload (if we're running from some other location)
|
// Installer payload (if we're running from some other location)
|
||||||
localAppData := os.Getenv("LOCALAPPDATA")
|
localAppData := os.Getenv("LOCALAPPDATA")
|
||||||
appDir := filepath.Join(localAppData, "Programs", "Ollama")
|
appDir := filepath.Join(localAppData, "Programs", "Ollama")
|
||||||
rocmTargetDir := filepath.Join(appDir, "rocm")
|
rocmTargetDir := filepath.Join(appDir, "..", "lib", "ollama")
|
||||||
if rocmLibUsable(rocmTargetDir) {
|
if rocmLibUsable(rocmTargetDir) {
|
||||||
slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
|
slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
|
||||||
return rocmTargetDir, nil
|
return rocmTargetDir, nil
|
||||||
@@ -161,7 +161,7 @@ func AMDValidateLibDir() (string, error) {
|
|||||||
|
|
||||||
// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
|
// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
|
||||||
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm")
|
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm")
|
||||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
return "", errors.New("no suitable rocm found, falling back to CPU")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
|
func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ func PayloadsDir() (string, error) {
|
|||||||
defer lock.Unlock()
|
defer lock.Unlock()
|
||||||
var err error
|
var err error
|
||||||
if payloadsDir == "" {
|
if payloadsDir == "" {
|
||||||
runnersDir := envconfig.RunnersDir
|
runnersDir := envconfig.RunnersDir()
|
||||||
|
|
||||||
if runnersDir != "" {
|
if runnersDir != "" {
|
||||||
payloadsDir = runnersDir
|
payloadsDir = runnersDir
|
||||||
@@ -35,27 +35,23 @@ func PayloadsDir() (string, error) {
|
|||||||
|
|
||||||
// The remainder only applies on non-windows where we still carry payloads in the main executable
|
// The remainder only applies on non-windows where we still carry payloads in the main executable
|
||||||
cleanupTmpDirs()
|
cleanupTmpDirs()
|
||||||
tmpDir := envconfig.TmpDir
|
tmpDir := envconfig.TmpDir()
|
||||||
if tmpDir == "" {
|
if tmpDir == "" {
|
||||||
tmpDir, err = os.MkdirTemp("", "ollama")
|
tmpDir, err = os.MkdirTemp("", "ollama")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("failed to generate tmp dir: %w", err)
|
return "", fmt.Errorf("failed to generate tmp dir: %w", err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
err = os.MkdirAll(tmpDir, 0755)
|
err = os.MkdirAll(tmpDir, 0o755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err)
|
return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Track our pid so we can clean up orphaned tmpdirs
|
// Track our pid so we can clean up orphaned tmpdirs
|
||||||
pidFilePath := filepath.Join(tmpDir, "ollama.pid")
|
n := filepath.Join(tmpDir, "ollama.pid")
|
||||||
pidFile, err := os.OpenFile(pidFilePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm)
|
if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
|
||||||
if err != nil {
|
return "", fmt.Errorf("failed to write pid file %s: %w", n, err)
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
if _, err := pidFile.Write([]byte(fmt.Sprint(os.Getpid()))); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// We create a distinct subdirectory for payloads within the tmpdir
|
// We create a distinct subdirectory for payloads within the tmpdir
|
||||||
@@ -67,37 +63,44 @@ func PayloadsDir() (string, error) {
|
|||||||
|
|
||||||
// Best effort to clean up prior tmpdirs
|
// Best effort to clean up prior tmpdirs
|
||||||
func cleanupTmpDirs() {
|
func cleanupTmpDirs() {
|
||||||
dirs, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*"))
|
matches, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*", "ollama.pid"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
for _, d := range dirs {
|
|
||||||
info, err := os.Stat(d)
|
for _, match := range matches {
|
||||||
if err != nil || !info.IsDir() {
|
raw, err := os.ReadFile(match)
|
||||||
|
if errors.Is(err, os.ErrNotExist) {
|
||||||
|
slog.Debug("not a ollama runtime directory, skipping", "path", match)
|
||||||
continue
|
continue
|
||||||
}
|
} else if err != nil {
|
||||||
raw, err := os.ReadFile(filepath.Join(d, "ollama.pid"))
|
slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
|
||||||
if err != nil {
|
|
||||||
slog.Warn("failed to read ollama.pid", "path", d, "error", err)
|
|
||||||
// No pid, ignore this tmpdir
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
pid, err := strconv.Atoi(string(raw))
|
pid, err := strconv.Atoi(string(raw))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("failed to parse pid", "path", d, "error", err)
|
slog.Warn("invalid pid, skipping", "path", match, "error", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
proc, err := os.FindProcess(pid)
|
p, err := os.FindProcess(pid)
|
||||||
if err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
|
if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
|
||||||
slog.Warn("found running ollama", "pid", pid, "path", d)
|
slog.Warn("process still running, skipping", "pid", pid, "path", match)
|
||||||
// Another running ollama, ignore this tmpdir
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.Remove(d); err != nil {
|
if err := os.Remove(match); err != nil {
|
||||||
slog.Warn("unable to cleanup stale tmpdir", "path", d, "error", err)
|
slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
runners := filepath.Join(filepath.Dir(match), "runners")
|
||||||
|
if err := os.RemoveAll(runners); err != nil {
|
||||||
|
slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.Remove(filepath.Dir(match)); err != nil {
|
||||||
|
slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -105,7 +108,7 @@ func cleanupTmpDirs() {
|
|||||||
func Cleanup() {
|
func Cleanup() {
|
||||||
lock.Lock()
|
lock.Lock()
|
||||||
defer lock.Unlock()
|
defer lock.Unlock()
|
||||||
runnersDir := envconfig.RunnersDir
|
runnersDir := envconfig.RunnersDir()
|
||||||
if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" {
|
if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" {
|
||||||
// We want to fully clean up the tmpdir parent of the payloads dir
|
// We want to fully clean up the tmpdir parent of the payloads dir
|
||||||
tmpDir := filepath.Clean(filepath.Join(payloadsDir, ".."))
|
tmpDir := filepath.Clean(filepath.Join(payloadsDir, ".."))
|
||||||
|
|||||||
@@ -1,6 +1,11 @@
|
|||||||
package gpu
|
package gpu
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/sys/cpu"
|
"golang.org/x/sys/cpu"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
|
|||||||
// else LCD
|
// else LCD
|
||||||
return CPUCapabilityNone
|
return CPUCapabilityNone
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func IsNUMA() bool {
|
||||||
|
if runtime.GOOS != "linux" {
|
||||||
|
// numa support in llama.cpp is linux only
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
ids := map[string]interface{}{}
|
||||||
|
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
|
||||||
|
for _, packageId := range packageIds {
|
||||||
|
id, err := os.ReadFile(packageId)
|
||||||
|
if err == nil {
|
||||||
|
ids[strings.TrimSpace(string(id))] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len(ids) > 1
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,9 +4,17 @@ package gpu
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"regexp"
|
||||||
|
"runtime"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
|
||||||
|
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
|
||||||
|
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
||||||
|
|
||||||
func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
|
func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
|
||||||
ids := []string{}
|
ids := []string{}
|
||||||
for _, info := range gpuInfo {
|
for _, info := range gpuInfo {
|
||||||
@@ -19,3 +27,38 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
|
|||||||
}
|
}
|
||||||
return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
|
return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func cudaVariant(gpuInfo CudaGPUInfo) string {
|
||||||
|
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
||||||
|
if CudaTegra != "" {
|
||||||
|
ver := strings.Split(CudaTegra, ".")
|
||||||
|
if len(ver) > 0 {
|
||||||
|
return "jetpack" + ver[0]
|
||||||
|
}
|
||||||
|
} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
|
||||||
|
r := regexp.MustCompile(` R(\d+) `)
|
||||||
|
m := r.FindSubmatch(data)
|
||||||
|
if len(m) != 2 {
|
||||||
|
slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version")
|
||||||
|
} else {
|
||||||
|
if l4t, err := strconv.Atoi(string(m[1])); err == nil {
|
||||||
|
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
|
||||||
|
// https://developer.nvidia.com/embedded/jetpack-archive
|
||||||
|
switch l4t {
|
||||||
|
case 35:
|
||||||
|
return "jetpack5"
|
||||||
|
case 36:
|
||||||
|
return "jetpack6"
|
||||||
|
default:
|
||||||
|
slog.Info("unsupported L4T version", "nv_tegra_release", string(data))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if gpuInfo.computeMajor < 6 || gpuInfo.DriverMajor < 12 {
|
||||||
|
return "v11"
|
||||||
|
}
|
||||||
|
return "v12"
|
||||||
|
}
|
||||||
|
|||||||
144
gpu/gpu.go
144
gpu/gpu.go
@@ -7,9 +7,9 @@ package gpu
|
|||||||
#cgo windows LDFLAGS: -lpthread
|
#cgo windows LDFLAGS: -lpthread
|
||||||
|
|
||||||
#include "gpu_info.h"
|
#include "gpu_info.h"
|
||||||
|
|
||||||
*/
|
*/
|
||||||
import "C"
|
import "C"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
@@ -64,13 +64,8 @@ var RocmComputeMin = 9
|
|||||||
// TODO find a better way to detect iGPU instead of minimum memory
|
// TODO find a better way to detect iGPU instead of minimum memory
|
||||||
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
|
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
|
||||||
|
|
||||||
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
|
|
||||||
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
|
|
||||||
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
|
||||||
|
|
||||||
// Note: gpuMutex must already be held
|
// Note: gpuMutex must already be held
|
||||||
func initCudaHandles() *cudaHandles {
|
func initCudaHandles() *cudaHandles {
|
||||||
|
|
||||||
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
||||||
|
|
||||||
cHandles := &cudaHandles{}
|
cHandles := &cudaHandles{}
|
||||||
@@ -211,14 +206,16 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("error looking up system memory", "error", err)
|
slog.Warn("error looking up system memory", "error", err)
|
||||||
}
|
}
|
||||||
cpus = []CPUInfo{CPUInfo{
|
cpus = []CPUInfo{
|
||||||
GpuInfo: GpuInfo{
|
{
|
||||||
memInfo: mem,
|
GpuInfo: GpuInfo{
|
||||||
Library: "cpu",
|
memInfo: mem,
|
||||||
Variant: cpuCapability,
|
Library: "cpu",
|
||||||
ID: "0",
|
Variant: cpuCapability.String(),
|
||||||
|
ID: "0",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}}
|
}
|
||||||
|
|
||||||
// Fallback to CPU mode if we're lacking required vector extensions on x86
|
// Fallback to CPU mode if we're lacking required vector extensions on x86
|
||||||
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
|
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
|
||||||
@@ -228,11 +225,7 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
return GpuInfoList{cpus[0].GpuInfo}
|
return GpuInfoList{cpus[0].GpuInfo}
|
||||||
}
|
}
|
||||||
|
|
||||||
// On windows we bundle the nvidia library one level above the runner dir
|
depPath := LibraryDir()
|
||||||
depPath := ""
|
|
||||||
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
|
|
||||||
depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "cuda")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load ALL libraries
|
// Load ALL libraries
|
||||||
cHandles = initCudaHandles()
|
cHandles = initCudaHandles()
|
||||||
@@ -268,11 +261,23 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
gpuInfo.FreeMemory = uint64(memInfo.free)
|
gpuInfo.FreeMemory = uint64(memInfo.free)
|
||||||
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||||
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
|
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
|
||||||
|
gpuInfo.computeMajor = int(memInfo.major)
|
||||||
|
gpuInfo.computeMinor = int(memInfo.minor)
|
||||||
gpuInfo.MinimumMemory = cudaMinimumMemory
|
gpuInfo.MinimumMemory = cudaMinimumMemory
|
||||||
gpuInfo.DependencyPath = depPath
|
|
||||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
|
||||||
gpuInfo.DriverMajor = driverMajor
|
gpuInfo.DriverMajor = driverMajor
|
||||||
gpuInfo.DriverMinor = driverMinor
|
gpuInfo.DriverMinor = driverMinor
|
||||||
|
variant := cudaVariant(gpuInfo)
|
||||||
|
if depPath != "" {
|
||||||
|
gpuInfo.DependencyPath = depPath
|
||||||
|
// Check for variant specific directory
|
||||||
|
if variant != "" {
|
||||||
|
if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil {
|
||||||
|
gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||||
|
gpuInfo.Variant = variant
|
||||||
|
|
||||||
// query the management library as well so we can record any skew between the two
|
// query the management library as well so we can record any skew between the two
|
||||||
// which represents overhead on the GPU we must set aside on subsequent updates
|
// which represents overhead on the GPU we must set aside on subsequent updates
|
||||||
@@ -302,40 +307,36 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Intel
|
// Intel
|
||||||
if envconfig.IntelGpu {
|
if envconfig.IntelGPU() {
|
||||||
oHandles = initOneAPIHandles()
|
oHandles = initOneAPIHandles()
|
||||||
// On windows we bundle the oneapi library one level above the runner dir
|
if oHandles != nil && oHandles.oneapi != nil {
|
||||||
depPath = ""
|
for d := range oHandles.oneapi.num_drivers {
|
||||||
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
|
if oHandles.oneapi == nil {
|
||||||
depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "oneapi")
|
// shouldn't happen
|
||||||
}
|
slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
|
||||||
|
continue
|
||||||
for d := range oHandles.oneapi.num_drivers {
|
}
|
||||||
if oHandles.oneapi == nil {
|
devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
|
||||||
// shouldn't happen
|
for i := range devCount {
|
||||||
slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
|
gpuInfo := OneapiGPUInfo{
|
||||||
continue
|
GpuInfo: GpuInfo{
|
||||||
}
|
Library: "oneapi",
|
||||||
devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
|
},
|
||||||
for i := range devCount {
|
driverIndex: int(d),
|
||||||
gpuInfo := OneapiGPUInfo{
|
gpuIndex: int(i),
|
||||||
GpuInfo: GpuInfo{
|
}
|
||||||
Library: "oneapi",
|
// TODO - split bootstrapping from updating free memory
|
||||||
},
|
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
|
||||||
driverIndex: int(d),
|
// TODO - convert this to MinimumMemory based on testing...
|
||||||
gpuIndex: int(i),
|
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
||||||
|
memInfo.free = C.uint64_t(totalFreeMem)
|
||||||
|
gpuInfo.TotalMemory = uint64(memInfo.total)
|
||||||
|
gpuInfo.FreeMemory = uint64(memInfo.free)
|
||||||
|
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||||
|
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||||
|
gpuInfo.DependencyPath = depPath
|
||||||
|
oneapiGPUs = append(oneapiGPUs, gpuInfo)
|
||||||
}
|
}
|
||||||
// TODO - split bootstrapping from updating free memory
|
|
||||||
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
|
|
||||||
// TODO - convert this to MinimumMemory based on testing...
|
|
||||||
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
|
||||||
memInfo.free = C.uint64_t(totalFreeMem)
|
|
||||||
gpuInfo.TotalMemory = uint64(memInfo.total)
|
|
||||||
gpuInfo.FreeMemory = uint64(memInfo.free)
|
|
||||||
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
|
||||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
|
||||||
gpuInfo.DependencyPath = depPath
|
|
||||||
oneapiGPUs = append(oneapiGPUs, gpuInfo)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -463,10 +464,12 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
|
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
|
||||||
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
|
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
|
||||||
var ldPaths []string
|
var ldPaths []string
|
||||||
var patterns []string
|
|
||||||
gpuLibPaths := []string{}
|
gpuLibPaths := []string{}
|
||||||
slog.Debug("Searching for GPU library", "name", baseLibName)
|
slog.Debug("Searching for GPU library", "name", baseLibName)
|
||||||
|
|
||||||
|
// Start with our bundled libraries
|
||||||
|
patterns := []string{filepath.Join(LibraryDir(), baseLibName)}
|
||||||
|
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
case "windows":
|
case "windows":
|
||||||
ldPaths = strings.Split(os.Getenv("PATH"), ";")
|
ldPaths = strings.Split(os.Getenv("PATH"), ";")
|
||||||
@@ -475,13 +478,14 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
|
|||||||
default:
|
default:
|
||||||
return gpuLibPaths
|
return gpuLibPaths
|
||||||
}
|
}
|
||||||
// Start with whatever we find in the PATH/LD_LIBRARY_PATH
|
|
||||||
|
// Then with whatever we find in the PATH/LD_LIBRARY_PATH
|
||||||
for _, ldPath := range ldPaths {
|
for _, ldPath := range ldPaths {
|
||||||
d, err := filepath.Abs(ldPath)
|
d, err := filepath.Abs(ldPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
|
patterns = append(patterns, filepath.Join(d, baseLibName))
|
||||||
}
|
}
|
||||||
patterns = append(patterns, defaultPatterns...)
|
patterns = append(patterns, defaultPatterns...)
|
||||||
slog.Debug("gpu library search", "globs", patterns)
|
slog.Debug("gpu library search", "globs", patterns)
|
||||||
@@ -611,7 +615,7 @@ func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func getVerboseState() C.uint16_t {
|
func getVerboseState() C.uint16_t {
|
||||||
if envconfig.Debug {
|
if envconfig.Debug() {
|
||||||
return C.uint16_t(1)
|
return C.uint16_t(1)
|
||||||
}
|
}
|
||||||
return C.uint16_t(0)
|
return C.uint16_t(0)
|
||||||
@@ -637,3 +641,31 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
|||||||
return "", ""
|
return "", ""
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func LibraryDir() string {
|
||||||
|
// On Windows/linux we bundle the dependencies at the same level as the executable
|
||||||
|
appExe, err := os.Executable()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to lookup executable path", "error", err)
|
||||||
|
}
|
||||||
|
cwd, err := os.Getwd()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to lookup working directory", "error", err)
|
||||||
|
}
|
||||||
|
// Scan for any of our dependeices, and pick first match
|
||||||
|
for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), ".."), cwd} {
|
||||||
|
libDep := filepath.Join("lib", "ollama")
|
||||||
|
if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
|
||||||
|
return filepath.Join(root, libDep)
|
||||||
|
}
|
||||||
|
// Developer mode, local build
|
||||||
|
if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
|
||||||
|
return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
|
||||||
|
return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slog.Warn("unable to locate gpu dependency libraries")
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ package gpu
|
|||||||
#include "gpu_info_darwin.h"
|
#include "gpu_info_darwin.h"
|
||||||
*/
|
*/
|
||||||
import "C"
|
import "C"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"runtime"
|
"runtime"
|
||||||
|
|
||||||
@@ -24,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
return []GpuInfo{
|
return []GpuInfo{
|
||||||
{
|
{
|
||||||
Library: "cpu",
|
Library: "cpu",
|
||||||
Variant: GetCPUCapability(),
|
Variant: GetCPUCapability().String(),
|
||||||
memInfo: mem,
|
memInfo: mem,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -47,7 +48,7 @@ func GetCPUInfo() GpuInfoList {
|
|||||||
return []GpuInfo{
|
return []GpuInfo{
|
||||||
{
|
{
|
||||||
Library: "cpu",
|
Library: "cpu",
|
||||||
Variant: GetCPUCapability(),
|
Variant: GetCPUCapability().String(),
|
||||||
memInfo: mem,
|
memInfo: mem,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -67,4 +67,4 @@ void cpu_check_ram(mem_info_t *resp);
|
|||||||
#include "gpu_info_oneapi.h"
|
#include "gpu_info_oneapi.h"
|
||||||
|
|
||||||
#endif // __GPU_INFO_H__
|
#endif // __GPU_INFO_H__
|
||||||
#endif // __APPLE__
|
#endif // __APPLE__
|
||||||
|
|||||||
@@ -43,10 +43,12 @@ var OneapiGlobs = []string{
|
|||||||
"/usr/lib*/libze_intel_gpu.so*",
|
"/usr/lib*/libze_intel_gpu.so*",
|
||||||
}
|
}
|
||||||
|
|
||||||
var CudartMgmtName = "libcudart.so*"
|
var (
|
||||||
var NvcudaMgmtName = "libcuda.so*"
|
CudartMgmtName = "libcudart.so*"
|
||||||
var NvmlMgmtName = "" // not currently wired on linux
|
NvcudaMgmtName = "libcuda.so*"
|
||||||
var OneapiMgmtName = "libze_intel_gpu.so"
|
NvmlMgmtName = "" // not currently wired on linux
|
||||||
|
OneapiMgmtName = "libze_intel_gpu.so*"
|
||||||
|
)
|
||||||
|
|
||||||
func GetCPUMem() (memInfo, error) {
|
func GetCPUMem() (memInfo, error) {
|
||||||
var mem memInfo
|
var mem memInfo
|
||||||
|
|||||||
@@ -32,4 +32,29 @@ func TestCPUMemInfo(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestByLibrary(t *testing.T) {
|
||||||
|
type testCase struct {
|
||||||
|
input []GpuInfo
|
||||||
|
expect int
|
||||||
|
}
|
||||||
|
|
||||||
|
testCases := map[string]*testCase{
|
||||||
|
"empty": {input: []GpuInfo{}, expect: 0},
|
||||||
|
"cpu": {input: []GpuInfo{{Library: "cpu"}}, expect: 1},
|
||||||
|
"cpu + GPU": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda"}}, expect: 2},
|
||||||
|
"cpu + 2 GPU no variant": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda"}, {Library: "cuda"}}, expect: 2},
|
||||||
|
"cpu + 2 GPU same variant": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda", Variant: "v11"}, {Library: "cuda", Variant: "v11"}}, expect: 2},
|
||||||
|
"cpu + 2 GPU diff variant": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda", Variant: "v11"}, {Library: "cuda", Variant: "v12"}}, expect: 3},
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range testCases {
|
||||||
|
t.Run(k, func(t *testing.T) {
|
||||||
|
resp := (GpuInfoList)(v.input).ByLibrary()
|
||||||
|
if len(resp) != v.expect {
|
||||||
|
t.Fatalf("expected length %d, got %d => %+v", v.expect, len(resp), resp)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
|
// TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
|
||||||
|
|||||||
@@ -40,10 +40,12 @@ var OneapiGlobs = []string{
|
|||||||
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
|
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
|
||||||
}
|
}
|
||||||
|
|
||||||
var CudartMgmtName = "cudart64_*.dll"
|
var (
|
||||||
var NvcudaMgmtName = "nvcuda.dll"
|
CudartMgmtName = "cudart64_*.dll"
|
||||||
var NvmlMgmtName = "nvml.dll"
|
NvcudaMgmtName = "nvcuda.dll"
|
||||||
var OneapiMgmtName = "ze_intel_gpu64.dll"
|
NvmlMgmtName = "nvml.dll"
|
||||||
|
OneapiMgmtName = "ze_intel_gpu64.dll"
|
||||||
|
)
|
||||||
|
|
||||||
func GetCPUMem() (memInfo, error) {
|
func GetCPUMem() (memInfo, error) {
|
||||||
memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
|
memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
|
||||||
|
|||||||
15
gpu/types.go
15
gpu/types.go
@@ -19,7 +19,7 @@ type GpuInfo struct {
|
|||||||
Library string `json:"library,omitempty"`
|
Library string `json:"library,omitempty"`
|
||||||
|
|
||||||
// Optional variant to select (e.g. versions, cpu feature flags)
|
// Optional variant to select (e.g. versions, cpu feature flags)
|
||||||
Variant CPUCapability `json:"variant"`
|
Variant string `json:"variant"`
|
||||||
|
|
||||||
// MinimumMemory represents the minimum memory required to use the GPU
|
// MinimumMemory represents the minimum memory required to use the GPU
|
||||||
MinimumMemory uint64 `json:"-"`
|
MinimumMemory uint64 `json:"-"`
|
||||||
@@ -53,8 +53,10 @@ type CPUInfo struct {
|
|||||||
|
|
||||||
type CudaGPUInfo struct {
|
type CudaGPUInfo struct {
|
||||||
GpuInfo
|
GpuInfo
|
||||||
OSOverhead uint64 // Memory overhead between the driver library and management library
|
OSOverhead uint64 // Memory overhead between the driver library and management library
|
||||||
index int //nolint:unused,nolintlint
|
index int //nolint:unused,nolintlint
|
||||||
|
computeMajor int //nolint:unused,nolintlint
|
||||||
|
computeMinor int //nolint:unused,nolintlint
|
||||||
}
|
}
|
||||||
type CudaGPUInfoList []CudaGPUInfo
|
type CudaGPUInfoList []CudaGPUInfo
|
||||||
|
|
||||||
@@ -81,8 +83,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
|
|||||||
for _, info := range l {
|
for _, info := range l {
|
||||||
found := false
|
found := false
|
||||||
requested := info.Library
|
requested := info.Library
|
||||||
if info.Variant != CPUCapabilityNone {
|
if info.Variant != CPUCapabilityNone.String() {
|
||||||
requested += "_" + info.Variant.String()
|
requested += "_" + info.Variant
|
||||||
}
|
}
|
||||||
for i, lib := range libs {
|
for i, lib := range libs {
|
||||||
if lib == requested {
|
if lib == requested {
|
||||||
@@ -92,7 +94,7 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !found {
|
if !found {
|
||||||
libs = append(libs, info.Library)
|
libs = append(libs, requested)
|
||||||
resp = append(resp, []GpuInfo{info})
|
resp = append(resp, []GpuInfo{info})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -105,6 +107,7 @@ func (l GpuInfoList) LogDetails() {
|
|||||||
slog.Info("inference compute",
|
slog.Info("inference compute",
|
||||||
"id", g.ID,
|
"id", g.ID,
|
||||||
"library", g.Library,
|
"library", g.Library,
|
||||||
|
"variant", g.Variant,
|
||||||
"compute", g.Compute,
|
"compute", g.Compute,
|
||||||
"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
|
"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
|
||||||
"name", g.Name,
|
"name", g.Name,
|
||||||
|
|||||||
@@ -45,14 +45,7 @@ func TestUnicodeModelDir(t *testing.T) {
|
|||||||
defer os.RemoveAll(modelDir)
|
defer os.RemoveAll(modelDir)
|
||||||
slog.Info("unicode", "OLLAMA_MODELS", modelDir)
|
slog.Info("unicode", "OLLAMA_MODELS", modelDir)
|
||||||
|
|
||||||
oldModelsDir := os.Getenv("OLLAMA_MODELS")
|
t.Setenv("OLLAMA_MODELS", modelDir)
|
||||||
if oldModelsDir == "" {
|
|
||||||
defer os.Unsetenv("OLLAMA_MODELS")
|
|
||||||
} else {
|
|
||||||
defer os.Setenv("OLLAMA_MODELS", oldModelsDir)
|
|
||||||
}
|
|
||||||
err = os.Setenv("OLLAMA_MODELS", modelDir)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|||||||
@@ -11,8 +11,10 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/format"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestMultiModelConcurrency(t *testing.T) {
|
func TestMultiModelConcurrency(t *testing.T) {
|
||||||
@@ -39,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
resp = [2][]string{
|
resp = [2][]string{
|
||||||
[]string{"sunlight"},
|
{"sunlight"},
|
||||||
[]string{"england", "english", "massachusetts", "pilgrims", "british"},
|
{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
@@ -69,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
|
|||||||
reqLimit := len(req)
|
reqLimit := len(req)
|
||||||
iterLimit := 5
|
iterLimit := 5
|
||||||
|
|
||||||
vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM
|
if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
|
||||||
if vram != "" {
|
maxVram, err := strconv.ParseUint(s, 10, 64)
|
||||||
max, err := strconv.ParseUint(vram, 10, 64)
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
// Don't hammer on small VRAM cards...
|
// Don't hammer on small VRAM cards...
|
||||||
if max < 4*1024*1024*1024 {
|
if maxVram < 4*format.GibiByte {
|
||||||
reqLimit = min(reqLimit, 2)
|
reqLimit = min(reqLimit, 2)
|
||||||
iterLimit = 2
|
iterLimit = 2
|
||||||
}
|
}
|
||||||
@@ -106,13 +107,16 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
|
|||||||
|
|
||||||
// Stress the system if we know how much VRAM it has, and attempt to load more models than will fit
|
// Stress the system if we know how much VRAM it has, and attempt to load more models than will fit
|
||||||
func TestMultiModelStress(t *testing.T) {
|
func TestMultiModelStress(t *testing.T) {
|
||||||
vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM
|
s := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM
|
||||||
if vram == "" {
|
if s == "" {
|
||||||
t.Skip("OLLAMA_MAX_VRAM not specified, can't pick the right models for the stress test")
|
t.Skip("OLLAMA_MAX_VRAM not specified, can't pick the right models for the stress test")
|
||||||
}
|
}
|
||||||
max, err := strconv.ParseUint(vram, 10, 64)
|
|
||||||
require.NoError(t, err)
|
maxVram, err := strconv.ParseUint(s, 10, 64)
|
||||||
const MB = uint64(1024 * 1024)
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
type model struct {
|
type model struct {
|
||||||
name string
|
name string
|
||||||
size uint64 // Approximate amount of VRAM they typically use when fully loaded in VRAM
|
size uint64 // Approximate amount of VRAM they typically use when fully loaded in VRAM
|
||||||
@@ -121,83 +125,82 @@ func TestMultiModelStress(t *testing.T) {
|
|||||||
smallModels := []model{
|
smallModels := []model{
|
||||||
{
|
{
|
||||||
name: "orca-mini",
|
name: "orca-mini",
|
||||||
size: 2992 * MB,
|
size: 2992 * format.MebiByte,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "phi",
|
name: "phi",
|
||||||
size: 2616 * MB,
|
size: 2616 * format.MebiByte,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "gemma:2b",
|
name: "gemma:2b",
|
||||||
size: 2364 * MB,
|
size: 2364 * format.MebiByte,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "stable-code:3b",
|
name: "stable-code:3b",
|
||||||
size: 2608 * MB,
|
size: 2608 * format.MebiByte,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "starcoder2:3b",
|
name: "starcoder2:3b",
|
||||||
size: 2166 * MB,
|
size: 2166 * format.MebiByte,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
mediumModels := []model{
|
mediumModels := []model{
|
||||||
{
|
{
|
||||||
name: "llama2",
|
name: "llama2",
|
||||||
size: 5118 * MB,
|
size: 5118 * format.MebiByte,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "mistral",
|
name: "mistral",
|
||||||
size: 4620 * MB,
|
size: 4620 * format.MebiByte,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "orca-mini:7b",
|
name: "orca-mini:7b",
|
||||||
size: 5118 * MB,
|
size: 5118 * format.MebiByte,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "dolphin-mistral",
|
name: "dolphin-mistral",
|
||||||
size: 4620 * MB,
|
size: 4620 * format.MebiByte,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "gemma:7b",
|
name: "gemma:7b",
|
||||||
size: 5000 * MB,
|
size: 5000 * format.MebiByte,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "codellama:7b",
|
||||||
|
size: 5118 * format.MebiByte,
|
||||||
},
|
},
|
||||||
// TODO - uncomment this once #3565 is merged and this is rebased on it
|
|
||||||
// {
|
|
||||||
// name: "codellama:7b",
|
|
||||||
// size: 5118 * MB,
|
|
||||||
// },
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// These seem to be too slow to be useful...
|
// These seem to be too slow to be useful...
|
||||||
// largeModels := []model{
|
// largeModels := []model{
|
||||||
// {
|
// {
|
||||||
// name: "llama2:13b",
|
// name: "llama2:13b",
|
||||||
// size: 7400 * MB,
|
// size: 7400 * format.MebiByte,
|
||||||
// },
|
// },
|
||||||
// {
|
// {
|
||||||
// name: "codellama:13b",
|
// name: "codellama:13b",
|
||||||
// size: 7400 * MB,
|
// size: 7400 * format.MebiByte,
|
||||||
// },
|
// },
|
||||||
// {
|
// {
|
||||||
// name: "orca-mini:13b",
|
// name: "orca-mini:13b",
|
||||||
// size: 7400 * MB,
|
// size: 7400 * format.MebiByte,
|
||||||
// },
|
// },
|
||||||
// {
|
// {
|
||||||
// name: "gemma:7b",
|
// name: "gemma:7b",
|
||||||
// size: 5000 * MB,
|
// size: 5000 * format.MebiByte,
|
||||||
// },
|
// },
|
||||||
// {
|
// {
|
||||||
// name: "starcoder2:15b",
|
// name: "starcoder2:15b",
|
||||||
// size: 9100 * MB,
|
// size: 9100 * format.MebiByte,
|
||||||
// },
|
// },
|
||||||
// }
|
// }
|
||||||
|
|
||||||
var chosenModels []model
|
var chosenModels []model
|
||||||
switch {
|
switch {
|
||||||
case max < 10000*MB:
|
case maxVram < 10000*format.MebiByte:
|
||||||
slog.Info("selecting small models")
|
slog.Info("selecting small models")
|
||||||
chosenModels = smallModels
|
chosenModels = smallModels
|
||||||
// case max < 30000*MB:
|
// case maxVram < 30000*format.MebiByte:
|
||||||
default:
|
default:
|
||||||
slog.Info("selecting medium models")
|
slog.Info("selecting medium models")
|
||||||
chosenModels = mediumModels
|
chosenModels = mediumModels
|
||||||
@@ -226,15 +229,15 @@ func TestMultiModelStress(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
consumed := uint64(256 * MB) // Assume some baseline usage
|
consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
|
||||||
for i := 0; i < len(req); i++ {
|
for i := 0; i < len(req); i++ {
|
||||||
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
|
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
|
||||||
if i > 1 && consumed > max {
|
if i > 1 && consumed > maxVram {
|
||||||
slog.Info("achieved target vram exhaustion", "count", i, "vramMB", max/1024/1024, "modelsMB", consumed/1024/1024)
|
slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
consumed += chosenModels[i].size
|
consumed += chosenModels[i].size
|
||||||
slog.Info("target vram", "count", i, "vramMB", max/1024/1024, "modelsMB", consumed/1024/1024)
|
slog.Info("target vram", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(i int) {
|
go func(i int) {
|
||||||
|
|||||||
@@ -70,8 +70,8 @@ func TestAllMiniLMEmbed(t *testing.T) {
|
|||||||
t.Fatalf("expected 0.010071031, got %.8f", res.Embeddings[0][0])
|
t.Fatalf("expected 0.010071031, got %.8f", res.Embeddings[0][0])
|
||||||
}
|
}
|
||||||
|
|
||||||
if res.PromptEvalCount != 8 {
|
if res.PromptEvalCount != 6 {
|
||||||
t.Fatalf("expected 8 prompt tokens, got %d", res.PromptEvalCount)
|
t.Fatalf("expected 6 prompt tokens, got %d", res.PromptEvalCount)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -102,8 +102,8 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
|
|||||||
t.Fatalf("expected 0.010071031 and -0.009802706, got %.8f and %.8f", res.Embeddings[0][0], res.Embeddings[1][0])
|
t.Fatalf("expected 0.010071031 and -0.009802706, got %.8f and %.8f", res.Embeddings[0][0], res.Embeddings[1][0])
|
||||||
}
|
}
|
||||||
|
|
||||||
if res.PromptEvalCount != 16 {
|
if res.PromptEvalCount != 12 {
|
||||||
t.Fatalf("expected 16 prompt tokens, got %d", res.PromptEvalCount)
|
t.Fatalf("expected 12 prompt tokens, got %d", res.PromptEvalCount)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -35,8 +35,8 @@ var (
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
resp = [2][]string{
|
resp = [2][]string{
|
||||||
[]string{"sunlight"},
|
{"sunlight"},
|
||||||
[]string{"england", "english", "massachusetts", "pilgrims"},
|
{"england", "english", "massachusetts", "pilgrims"},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ package integration
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -14,8 +13,10 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/envconfig"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestMaxQueue(t *testing.T) {
|
func TestMaxQueue(t *testing.T) {
|
||||||
@@ -27,13 +28,10 @@ func TestMaxQueue(t *testing.T) {
|
|||||||
// Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU
|
// Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU
|
||||||
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
|
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
|
||||||
threadCount := 32
|
threadCount := 32
|
||||||
mq := os.Getenv("OLLAMA_MAX_QUEUE")
|
if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
|
||||||
if mq != "" {
|
threadCount = int(maxQueue)
|
||||||
var err error
|
|
||||||
threadCount, err = strconv.Atoi(mq)
|
|
||||||
require.NoError(t, err)
|
|
||||||
} else {
|
} else {
|
||||||
os.Setenv("OLLAMA_MAX_QUEUE", fmt.Sprintf("%d", threadCount))
|
t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
|
||||||
}
|
}
|
||||||
|
|
||||||
req := api.GenerateRequest{
|
req := api.GenerateRequest{
|
||||||
|
|||||||
@@ -162,7 +162,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
|
|||||||
fn := func(resp api.ProgressResponse) error {
|
fn := func(resp api.ProgressResponse) error {
|
||||||
// fmt.Print(".")
|
// fmt.Print(".")
|
||||||
if !stallTimer.Reset(stallDuration) {
|
if !stallTimer.Reset(stallDuration) {
|
||||||
return fmt.Errorf("stall was detected, aborting status reporting")
|
return errors.New("stall was detected, aborting status reporting")
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -180,7 +180,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
|
|||||||
|
|
||||||
select {
|
select {
|
||||||
case <-stallTimer.C:
|
case <-stallTimer.C:
|
||||||
return fmt.Errorf("download stalled")
|
return errors.New("download stalled")
|
||||||
case <-done:
|
case <-done:
|
||||||
return pullError
|
return pullError
|
||||||
}
|
}
|
||||||
@@ -243,7 +243,7 @@ func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq ap
|
|||||||
// fmt.Print(".")
|
// fmt.Print(".")
|
||||||
buf.Write([]byte(response.Response))
|
buf.Write([]byte(response.Response))
|
||||||
if !stallTimer.Reset(streamTimeout) {
|
if !stallTimer.Reset(streamTimeout) {
|
||||||
return fmt.Errorf("stall was detected while streaming response, aborting")
|
return errors.New("stall was detected while streaming response, aborting")
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
[][]string{
|
[][]string{
|
||||||
[]string{"sunlight"},
|
{"sunlight"},
|
||||||
[]string{"soil", "organic", "earth", "black", "tan"},
|
{"soil", "organic", "earth", "black", "tan"},
|
||||||
[]string{"england", "english", "massachusetts", "pilgrims", "british"},
|
{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||||
[]string{"fourth", "july", "declaration", "independence"},
|
{"fourth", "july", "declaration", "independence"},
|
||||||
[]string{"nitrogen", "oxygen", "carbon", "dioxide"},
|
{"nitrogen", "oxygen", "carbon", "dioxide"},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
25
llm/ext_server/CMakeLists.txt
vendored
25
llm/ext_server/CMakeLists.txt
vendored
@@ -1,13 +1,14 @@
|
|||||||
set(TARGET ollama_llama_server)
|
set(TARGET ollama_llama_server)
|
||||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
set(LLAMA_SERVER_LDFLAGS $ENV{LLAMA_SERVER_LDFLAGS})
|
||||||
add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
|
||||||
target_compile_definitions(${TARGET} PRIVATE
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
target_compile_definitions(${TARGET} PRIVATE
|
||||||
)
|
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||||
target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT})
|
)
|
||||||
if (WIN32)
|
target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT} ${LLAMA_SERVER_LDFLAGS})
|
||||||
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
if (WIN32)
|
||||||
endif()
|
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
||||||
|
endif()
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
60
llm/ext_server/server.cpp
vendored
60
llm/ext_server/server.cpp
vendored
@@ -44,6 +44,7 @@
|
|||||||
#include <errhandlingapi.h>
|
#include <errhandlingapi.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
@@ -402,7 +403,9 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
auto init_result = llama_init_from_gpt_params(params);
|
||||||
|
model = init_result.model;
|
||||||
|
ctx = init_result.context;
|
||||||
if (model == nullptr)
|
if (model == nullptr)
|
||||||
{
|
{
|
||||||
LOG_ERROR("unable to load model", {{"model", params.model}});
|
LOG_ERROR("unable to load model", {{"model", params.model}});
|
||||||
@@ -1221,7 +1224,6 @@ struct llama_server_context
|
|||||||
res.result_json = json
|
res.result_json = json
|
||||||
{
|
{
|
||||||
{"embedding", std::vector<float>(embd, embd + n_embd)},
|
{"embedding", std::vector<float>(embd, embd + n_embd)},
|
||||||
{"timings", slot.get_formated_timings()},
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1427,7 +1429,13 @@ struct llama_server_context
|
|||||||
switch (task.type)
|
switch (task.type)
|
||||||
{
|
{
|
||||||
case TASK_TYPE_COMPLETION: {
|
case TASK_TYPE_COMPLETION: {
|
||||||
server_slot *slot = prefix_slot(task.data["prompt"]);
|
server_slot *slot = nullptr;
|
||||||
|
if (task.embedding_mode) {
|
||||||
|
// Embedding seq_id (aka slot id) must always be <= token length, so always use slot 0
|
||||||
|
slot = slots[0].available() ? &slots[0] : nullptr;
|
||||||
|
} else {
|
||||||
|
slot = prefix_slot(task.data["prompt"]);
|
||||||
|
}
|
||||||
if (slot == nullptr)
|
if (slot == nullptr)
|
||||||
{
|
{
|
||||||
// if no slot is available, we defer this task for processing later
|
// if no slot is available, we defer this task for processing later
|
||||||
@@ -2420,7 +2428,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
|
|||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.lora_adapter.emplace_back(argv[i], 1.0f);
|
params.lora_adapters.push_back({
|
||||||
|
std::string(argv[i]),
|
||||||
|
1.0,
|
||||||
|
});
|
||||||
params.use_mmap = false;
|
params.use_mmap = false;
|
||||||
}
|
}
|
||||||
else if (arg == "--lora-scaled")
|
else if (arg == "--lora-scaled")
|
||||||
@@ -2436,7 +2447,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
|
|||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
|
params.lora_adapters.push_back({
|
||||||
|
lora_adapter,
|
||||||
|
std::stof(argv[i])
|
||||||
|
});
|
||||||
params.use_mmap = false;
|
params.use_mmap = false;
|
||||||
}
|
}
|
||||||
else if (arg == "-v" || arg == "--verbose")
|
else if (arg == "-v" || arg == "--verbose")
|
||||||
@@ -3184,37 +3198,17 @@ int main(int argc, char **argv) {
|
|||||||
prompt = "";
|
prompt = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (prompt.size() == 1) {
|
|
||||||
prompt = prompt[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
// create and queue the task
|
// create and queue the task
|
||||||
json responses;
|
const int task_id = llama.queue_tasks.get_new_id();
|
||||||
{
|
llama.queue_results.add_waiting_task_id(task_id);
|
||||||
const int id_task = llama.queue_tasks.get_new_id();
|
llama.request_completion(task_id, {{"prompt", prompt}}, true, -1);
|
||||||
llama.queue_results.add_waiting_task_id(id_task);
|
|
||||||
llama.request_completion(id_task, {{"prompt", prompt}}, true, -1);
|
|
||||||
|
|
||||||
// get the result
|
// get the result
|
||||||
task_result result = llama.queue_results.recv(id_task);
|
task_result result = llama.queue_results.recv(task_id);
|
||||||
llama.queue_results.remove_waiting_task_id(id_task);
|
llama.queue_results.remove_waiting_task_id(task_id);
|
||||||
if (result.error) {
|
|
||||||
return res.set_content(result.result_json.dump(), "application/json; charset=utf-8");
|
|
||||||
}
|
|
||||||
|
|
||||||
responses = result.result_json.value("results", std::vector<json>{result.result_json});
|
// send the result
|
||||||
json embeddings = json::array();
|
return res.set_content(result.result_json.dump(), "application/json; charset=utf-8");
|
||||||
|
|
||||||
int prompt_n = 0;
|
|
||||||
for (auto & elem : responses) {
|
|
||||||
embeddings.push_back(elem.at("embedding"));
|
|
||||||
prompt_n += elem.at("timings").at("prompt_n").get<int>();
|
|
||||||
}
|
|
||||||
|
|
||||||
// send the result
|
|
||||||
json embedding_res = json{{"embedding", embeddings}, {"prompt_n", prompt_n}};
|
|
||||||
return res.set_content(embedding_res.dump(), "application/json; charset=utf-8");
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// GG: if I put the main loop inside a thread, it crashes on the first request when build in Debug!?
|
// GG: if I put the main loop inside a thread, it crashes on the first request when build in Debug!?
|
||||||
|
|||||||
@@ -9,11 +9,14 @@ init_vars() {
|
|||||||
ARCH="arm64"
|
ARCH="arm64"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
ARCH=$(uname -m | sed -e "s/aarch64/arm64/g")
|
echo "GOARCH must be set"
|
||||||
|
echo "this script is meant to be run from within go generate"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
LLAMACPP_DIR=../llama.cpp
|
LLAMACPP_DIR=../llama.cpp
|
||||||
CMAKE_DEFS=""
|
CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on"
|
||||||
CMAKE_TARGETS="--target ollama_llama_server"
|
CMAKE_TARGETS="--target ollama_llama_server"
|
||||||
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
|
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
|
||||||
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"
|
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"
|
||||||
@@ -27,6 +30,7 @@ init_vars() {
|
|||||||
WHOLE_ARCHIVE="-Wl,-force_load"
|
WHOLE_ARCHIVE="-Wl,-force_load"
|
||||||
NO_WHOLE_ARCHIVE=""
|
NO_WHOLE_ARCHIVE=""
|
||||||
GCC_ARCH="-arch ${ARCH}"
|
GCC_ARCH="-arch ${ARCH}"
|
||||||
|
DIST_BASE=../../dist/darwin-${GOARCH}/
|
||||||
;;
|
;;
|
||||||
"Linux")
|
"Linux")
|
||||||
LIB_EXT="so"
|
LIB_EXT="so"
|
||||||
@@ -35,6 +39,7 @@ init_vars() {
|
|||||||
|
|
||||||
# Cross compiling not supported on linux - Use docker
|
# Cross compiling not supported on linux - Use docker
|
||||||
GCC_ARCH=""
|
GCC_ARCH=""
|
||||||
|
DIST_BASE=../../dist/linux-${GOARCH}/
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
;;
|
;;
|
||||||
@@ -42,6 +47,7 @@ init_vars() {
|
|||||||
if [ -z "${CMAKE_CUDA_ARCHITECTURES}" ] ; then
|
if [ -z "${CMAKE_CUDA_ARCHITECTURES}" ] ; then
|
||||||
CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
|
CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
|
||||||
fi
|
fi
|
||||||
|
GZIP=$(which pigz 2>/dev/null || echo "gzip")
|
||||||
}
|
}
|
||||||
|
|
||||||
git_module_setup() {
|
git_module_setup() {
|
||||||
@@ -85,26 +91,36 @@ build() {
|
|||||||
|
|
||||||
compress() {
|
compress() {
|
||||||
echo "Compressing payloads to reduce overall binary size..."
|
echo "Compressing payloads to reduce overall binary size..."
|
||||||
pids=""
|
|
||||||
rm -rf ${BUILD_DIR}/bin/*.gz
|
rm -rf ${BUILD_DIR}/bin/*.gz
|
||||||
for f in ${BUILD_DIR}/bin/* ; do
|
for f in ${BUILD_DIR}/bin/* ; do
|
||||||
gzip -n --best -f ${f} &
|
${GZIP} -n --best -f ${f} &
|
||||||
pids+=" $!"
|
compress_pids+=" $!"
|
||||||
done
|
done
|
||||||
# check for lib directory
|
# check for lib directory
|
||||||
if [ -d ${BUILD_DIR}/lib ]; then
|
if [ -d ${BUILD_DIR}/lib ]; then
|
||||||
for f in ${BUILD_DIR}/lib/* ; do
|
for f in ${BUILD_DIR}/lib/* ; do
|
||||||
gzip -n --best -f ${f} &
|
${GZIP} -n --best -f ${f} &
|
||||||
pids+=" $!"
|
compress_pids+=" $!"
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
echo
|
echo
|
||||||
for pid in ${pids}; do
|
}
|
||||||
|
|
||||||
|
wait_for_compress() {
|
||||||
|
for pid in ${compress_pids}; do
|
||||||
wait $pid
|
wait $pid
|
||||||
done
|
done
|
||||||
echo "Finished compression"
|
echo "Finished compression"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
install() {
|
||||||
|
echo "Installing libraries to bin dir ${BUILD_DIR}/bin/"
|
||||||
|
for lib in $(find ${BUILD_DIR} -name \*.${LIB_EXT}); do
|
||||||
|
rm -f "${BUILD_DIR}/bin/$(basename ${lib})"
|
||||||
|
cp -af "${lib}" "${BUILD_DIR}/bin/"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
# Keep the local tree clean after we're done with the build
|
# Keep the local tree clean after we're done with the build
|
||||||
cleanup() {
|
cleanup() {
|
||||||
(cd ${LLAMACPP_DIR}/ && git checkout CMakeLists.txt)
|
(cd ${LLAMACPP_DIR}/ && git checkout CMakeLists.txt)
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user