Compare commits
24 Commits
main
...
parth/cons
Author | SHA1 | Date | |
---|---|---|---|
![]() |
a4265c278a | ||
![]() |
ffd6428758 | ||
![]() |
aa6d5151df | ||
![]() |
25edfa6fdb | ||
![]() |
cc2e44b885 | ||
![]() |
d5f8670f0a | ||
![]() |
524029cd6d | ||
![]() |
b973dedb4b | ||
![]() |
c56a8b7749 | ||
![]() |
198fde82aa | ||
![]() |
77f709ebd5 | ||
![]() |
73098a2973 | ||
![]() |
e93db4d20e | ||
![]() |
a2a73ce5e0 | ||
![]() |
6ba557f25b | ||
![]() |
a7c8cc06da | ||
![]() |
089bbb537d | ||
![]() |
7cd9fbbbb1 | ||
![]() |
b91487f289 | ||
![]() |
5b19d4941a | ||
![]() |
5e73f24e16 | ||
![]() |
4aac178cac | ||
![]() |
800e21d060 | ||
![]() |
5827999e9e |
9
.gitattributes
vendored
9
.gitattributes
vendored
@ -7,5 +7,14 @@ llama/**/*.cuh linguist-vendored
|
||||
llama/**/*.m linguist-vendored
|
||||
llama/**/*.metal linguist-vendored
|
||||
|
||||
ml/backend/**/*.c linguist-vendored
|
||||
ml/backend/**/*.h linguist-vendored
|
||||
ml/backend/**/*.cpp linguist-vendored
|
||||
ml/backend/**/*.hpp linguist-vendored
|
||||
ml/backend/**/*.cu linguist-vendored
|
||||
ml/backend/**/*.cuh linguist-vendored
|
||||
ml/backend/**/*.m linguist-vendored
|
||||
ml/backend/**/*.metal linguist-vendored
|
||||
|
||||
* text=auto
|
||||
*.go text eol=lf
|
||||
|
310
.github/workflows/test.yaml
vendored
310
.github/workflows/test.yaml
vendored
@ -1,11 +1,5 @@
|
||||
name: test
|
||||
|
||||
env:
|
||||
ROCM_WINDOWS_URL: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe
|
||||
MSYS2_URL: https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe
|
||||
CUDA_12_WINDOWS_URL: https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe
|
||||
CUDA_12_WINDOWS_VER: 12.4
|
||||
|
||||
concurrency:
|
||||
# For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
|
||||
# cancels running CI jobs and starts all new ones.
|
||||
@ -27,7 +21,7 @@ jobs:
|
||||
changes:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
RUNNERS: ${{ steps.changes.outputs.RUNNERS }}
|
||||
changed: ${{ steps.changes.outputs.changed }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
@ -35,309 +29,61 @@ jobs:
|
||||
- id: changes
|
||||
run: |
|
||||
changed() {
|
||||
git diff-tree -r --no-commit-id --name-only \
|
||||
$(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
|
||||
${{ github.event.pull_request.head.sha }} \
|
||||
local BASE=${{ github.event.pull_request.base.sha }}
|
||||
local HEAD=${{ github.event.pull_request.head.sha }}
|
||||
local MERGE_BASE=$(git merge-base $BASE $HEAD)
|
||||
git diff-tree -r --no-commit-id --name-only "$MERGE_BASE" "$HEAD" \
|
||||
| xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
|
||||
}
|
||||
|
||||
{
|
||||
echo RUNNERS=$(changed 'llama/**')
|
||||
} >>$GITHUB_OUTPUT
|
||||
echo changed=$(changed 'llama/llama.cpp/**' 'ml/backend/ggml/ggml/**') | tee -a $GITHUB_OUTPUT
|
||||
|
||||
runners-linux-cuda:
|
||||
linux:
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
||||
if: ${{ needs.changes.outputs.changed == 'True' }}
|
||||
strategy:
|
||||
matrix:
|
||||
cuda-version:
|
||||
- '11.8.0'
|
||||
runs-on: linux
|
||||
container: nvidia/cuda:${{ matrix.cuda-version }}-devel-ubuntu20.04
|
||||
include:
|
||||
- container: nvidia/cuda:11.8.0-devel-ubuntu22.04
|
||||
preset: CUDA
|
||||
- container: rocm/dev-ubuntu-22.04:6.1.2
|
||||
preset: ROCm
|
||||
extra-packages: rocm-libs
|
||||
runs-on: ubuntu-latest
|
||||
container: ${{ matrix.container }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- run: |
|
||||
apt-get update && apt-get install -y git build-essential curl
|
||||
apt-get update
|
||||
apt-get install -y cmake pkg-config ${{ matrix.extra-packages }}
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- run: go get ./...
|
||||
- run: |
|
||||
git config --global --add safe.directory /__w/ollama/ollama
|
||||
cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
|
||||
make -j $cores cuda_v11
|
||||
runners-linux-rocm:
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
||||
strategy:
|
||||
matrix:
|
||||
rocm-version:
|
||||
- '6.1.2'
|
||||
runs-on: linux
|
||||
container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
|
||||
steps:
|
||||
- run: |
|
||||
apt-get update && apt-get install -y git build-essential curl rocm-libs
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- run: go get ./...
|
||||
- run: |
|
||||
git config --global --add safe.directory /__w/ollama/ollama
|
||||
cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
|
||||
make -j $cores rocm
|
||||
cmake --preset ${{ matrix.preset }}
|
||||
cmake --build --preset ${{ matrix.preset }} --parallel
|
||||
|
||||
# ROCm generation step
|
||||
runners-windows-rocm:
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
||||
runs-on: windows
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- name: Set make jobs default
|
||||
run: |
|
||||
echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||
|
||||
# ROCM installation steps
|
||||
- name: 'Cache ROCm installer'
|
||||
id: cache-rocm
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: rocm-install.exe
|
||||
key: ${{ env.ROCM_WINDOWS_URL }}
|
||||
- name: 'Conditionally Download ROCm'
|
||||
if: steps.cache-rocm.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
$ErrorActionPreference = "Stop"
|
||||
Invoke-WebRequest -Uri "${env:ROCM_WINDOWS_URL}" -OutFile "rocm-install.exe"
|
||||
- name: 'Install ROCm'
|
||||
run: |
|
||||
Start-Process "rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
|
||||
- name: 'Verify ROCm'
|
||||
run: |
|
||||
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
|
||||
echo "HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path | select -first 1)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||
|
||||
- name: Add msys paths
|
||||
run: |
|
||||
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
- name: Install msys2 tools
|
||||
run: |
|
||||
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
|
||||
|
||||
- name: make rocm runner
|
||||
run: |
|
||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
|
||||
make -C llama print-HIP_PATH print-HIP_LIB_DIR
|
||||
make rocm
|
||||
|
||||
# CUDA generation step
|
||||
runners-windows-cuda:
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
||||
runs-on: windows
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- name: Set make jobs default
|
||||
run: |
|
||||
echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||
|
||||
# CUDA installation steps
|
||||
- name: 'Cache CUDA installer'
|
||||
id: cache-cuda
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: cuda-install.exe
|
||||
key: ${{ env.CUDA_12_WINDOWS_URL }}
|
||||
- name: 'Conditionally Download CUDA'
|
||||
if: steps.cache-cuda.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
$ErrorActionPreference = "Stop"
|
||||
Invoke-WebRequest -Uri "${env:CUDA_12_WINDOWS_URL}" -OutFile "cuda-install.exe"
|
||||
- name: 'Install CUDA'
|
||||
run: |
|
||||
$subpackages = @("cudart", "nvcc", "cublas", "cublas_dev") | foreach-object {"${_}_${{ env.CUDA_12_WINDOWS_VER }}"}
|
||||
Start-Process "cuda-install.exe" -ArgumentList (@("-s") + $subpackages) -NoNewWindow -Wait
|
||||
- name: 'Verify CUDA'
|
||||
run: |
|
||||
& (resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0] --version
|
||||
$cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
|
||||
$cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2'
|
||||
echo "$cudaPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "CUDA_PATH=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||
echo "CUDA_PATH_V${cudaVer}=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||
echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||
|
||||
- name: Add msys paths
|
||||
run: |
|
||||
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
- name: Install msys2 tools
|
||||
run: |
|
||||
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
|
||||
- name: make cuda runner
|
||||
run: |
|
||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
|
||||
make cuda_v$(($env:CUDA_PATH | split-path -leaf) -replace 'v(\d+).*', '$1')
|
||||
|
||||
runners-cpu:
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
arch: [amd64, arm64]
|
||||
exclude:
|
||||
- os: ubuntu-latest
|
||||
arch: arm64
|
||||
- os: windows-2019
|
||||
arch: arm64
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
GOARCH: ${{ matrix.arch }}
|
||||
ARCH: ${{ matrix.arch }}
|
||||
CGO_ENABLED: '1'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- name: Add msys paths
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
- name: Install msys2 tools
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
|
||||
- name: 'Build Windows Go Runners'
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
$gopath=(get-command go).source | split-path -parent
|
||||
$gccpath=(get-command gcc).source | split-path -parent
|
||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||
$env:PATH="$gopath;$gccpath;$env:PATH"
|
||||
echo $env:PATH
|
||||
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
|
||||
make -j 4
|
||||
- name: 'Build Unix Go Runners'
|
||||
if: ${{ ! startsWith(matrix.os, 'windows-') }}
|
||||
run: make -j 4
|
||||
- run: go build .
|
||||
|
||||
lint:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
arch: [amd64, arm64]
|
||||
exclude:
|
||||
- os: ubuntu-latest
|
||||
arch: arm64
|
||||
- os: windows-2019
|
||||
arch: arm64
|
||||
- os: macos-latest
|
||||
arch: amd64
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
GOARCH: ${{ matrix.arch }}
|
||||
CGO_ENABLED: '1'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Add msys paths
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
- name: Install msys2 tools
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: false
|
||||
- run: |
|
||||
case ${{ matrix.arch }} in
|
||||
amd64) echo ARCH=x86_64 ;;
|
||||
arm64) echo ARCH=arm64 ;;
|
||||
esac >>$GITHUB_ENV
|
||||
shell: bash
|
||||
- uses: golangci/golangci-lint-action@v6
|
||||
with:
|
||||
args: --timeout 10m0s -v
|
||||
test:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
arch: [amd64]
|
||||
exclude:
|
||||
- os: ubuntu-latest
|
||||
arch: arm64
|
||||
- os: windows-2019
|
||||
arch: arm64
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
GOARCH: ${{ matrix.arch }}
|
||||
CGO_ENABLED: '1'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Add msys paths
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||
- name: Install msys2 tools
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
run: |
|
||||
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
- run: |
|
||||
case ${{ matrix.arch }} in
|
||||
amd64) echo ARCH=amd64 ;;
|
||||
arm64) echo ARCH=arm64 ;;
|
||||
esac >>$GITHUB_ENV
|
||||
shell: bash
|
||||
- uses: golangci/golangci-lint-action@v6
|
||||
with:
|
||||
args: --timeout 10m0s -v
|
||||
- run: go test ./...
|
||||
|
||||
patches:
|
||||
needs: [changes]
|
||||
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Verify patches carry all the changes
|
||||
- name: Verify patches apply cleanly and do not change files
|
||||
run: |
|
||||
make apply-patches sync && git diff --compact-summary --exit-code llama
|
||||
make -f Makefile2 clean checkout sync
|
||||
git diff --compact-summary --exit-code
|
||||
|
45
CMakeLists.txt
Normal file
45
CMakeLists.txt
Normal file
@ -0,0 +1,45 @@
|
||||
cmake_minimum_required(VERSION 3.21)
|
||||
|
||||
project(Ollama C CXX)
|
||||
|
||||
include(CheckLanguage)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
set(BUILD_SHARED_LIBS ON)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
set(GGML_BUILD ON)
|
||||
set(GGML_SHARED ON)
|
||||
set(GGML_CCACHE ON)
|
||||
set(GGML_BACKEND_DL ON)
|
||||
set(GGML_BACKEND_SHARED ON)
|
||||
set(GGML_SCHED_MAX_COPIES 4)
|
||||
set(GGML_CPU_ALL_VARIANTS ON)
|
||||
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
|
||||
set(GGML_LLAMAFILE ON)
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx)
|
||||
|
||||
set(GGML_CPU ON)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
|
||||
set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE)
|
||||
|
||||
check_language(CUDA)
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
|
||||
endif()
|
||||
|
||||
check_language(HIP)
|
||||
if(CMAKE_HIP_COMPILER)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)
|
||||
endif()
|
96
CMakePresets.json
Normal file
96
CMakePresets.json
Normal file
@ -0,0 +1,96 @@
|
||||
{
|
||||
"version": 3,
|
||||
"configurePresets": [
|
||||
{
|
||||
"name": "Default",
|
||||
"binaryDir": "${sourceDir}/build",
|
||||
"cacheVariables": {
|
||||
"CMAKE_BUILD_TYPE": "Release"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "CUDA",
|
||||
"inherits": [ "Default" ]
|
||||
},
|
||||
{
|
||||
"name": "CUDA 11",
|
||||
"inherits": [ "CUDA" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;62;70;72;75;80;86"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "CUDA 12",
|
||||
"inherits": [ "CUDA" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_CUDA_ARCHITECTURES": "60;61;62;70;72;75;80;86;87;89;90;90a"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "JetPack 5",
|
||||
"inherits": [ "CUDA" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_CUDA_ARCHITECTURES": "72;87"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "JetPack 6",
|
||||
"inherits": [ "CUDA" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_CUDA_ARCHITECTURES": "87"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "ROCm",
|
||||
"inherits": [ "Default" ]
|
||||
},
|
||||
{
|
||||
"name": "ROCm 6",
|
||||
"inherits": [ "ROCm" ],
|
||||
"cacheVariables": {
|
||||
"CMAKE_HIP_ARCHITECTURES": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
|
||||
}
|
||||
}
|
||||
],
|
||||
"buildPresets": [
|
||||
{
|
||||
"name": "Default",
|
||||
"configurePreset": "Default"
|
||||
},
|
||||
{
|
||||
"name": "CUDA",
|
||||
"configurePreset": "CUDA",
|
||||
"targets": [ "ggml-cuda" ]
|
||||
},
|
||||
{
|
||||
"name": "CUDA 11",
|
||||
"inherits": [ "CUDA" ],
|
||||
"configurePreset": "CUDA 11"
|
||||
},
|
||||
{
|
||||
"name": "CUDA 12",
|
||||
"inherits": [ "CUDA" ],
|
||||
"configurePreset": "CUDA 12"
|
||||
},
|
||||
{
|
||||
"name": "JetPack 5",
|
||||
"inherits": [ "CUDA" ],
|
||||
"configurePreset": "JetPack 5"
|
||||
},
|
||||
{
|
||||
"name": "JetPack 6",
|
||||
"inherits": [ "CUDA" ],
|
||||
"configurePreset": "JetPack 6"
|
||||
},
|
||||
{
|
||||
"name": "ROCm",
|
||||
"configurePreset": "ROCm",
|
||||
"targets": [ "ggml-hip" ]
|
||||
},
|
||||
{
|
||||
"name": "ROCm 6",
|
||||
"inherits": [ "ROCm" ],
|
||||
"configurePreset": "ROCm 6"
|
||||
}
|
||||
]
|
||||
}
|
66
Dockerfile2
Normal file
66
Dockerfile2
Normal file
@ -0,0 +1,66 @@
|
||||
ARG CUDA_11_VERSION=11.3
|
||||
ARG CUDA_12_VERSION=12.4
|
||||
ARG ROCM_VERSION=6.1.2
|
||||
ARG JETPACK_5_VERSION=r35.4.1
|
||||
ARG JETPACK_6_VERSION=r36.2.0
|
||||
ARG CMAKE_VERSION=3.31.2
|
||||
|
||||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS base
|
||||
ARG CMAKE_VERSION
|
||||
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz | tar xz -C /usr --strip-components 1
|
||||
RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \
|
||||
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
|
||||
|
||||
# FROM --platform=linux/arm64 rockylinux:8 AS base
|
||||
# ARG CMAKE_VERSION
|
||||
# RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1
|
||||
# RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
|
||||
|
||||
FROM base AS amd64
|
||||
ARG CUDA_11_VERSION
|
||||
ARG CUDA_12_VERSION
|
||||
RUN yum install -y cuda-toolkit-${CUDA_11_VERSION//./-} \
|
||||
&& yum install -y cuda-toolkit-${CUDA_12_VERSION//./-}
|
||||
COPY CMakeLists.txt CMakeLists.txt
|
||||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||
|
||||
FROM --platform=linux/amd64 amd64 AS cuda_11
|
||||
ENV PATH=/usr/local/cuda-${CUDA_11_VERSION}/bin:$PATH
|
||||
RUN cmake -S . -B build -DCMAKE_CUDA_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
|
||||
RUN cmake --build build --target ggml-cuda -j
|
||||
|
||||
FROM --platform=linux/amd64 amd64 AS cuda_12
|
||||
ENV PATH=/usr/local/cuda-${CUDA_12_VERSION}/bin:$PATH
|
||||
RUN cmake -S . -B build -DCMAKE_CUDA_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
|
||||
RUN cmake --build build --target ggml-cuda -j
|
||||
|
||||
FROM --platform=linux/amd64 amd64 AS rocm
|
||||
RUN cmake -S . -B build -DCMAKE_HIP_ARCHITECTURES="gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
|
||||
RUN cmake --build build --target ggml-hip -j
|
||||
|
||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5_VERSION} AS jetpack_5
|
||||
ARG CMAKE_VERSION
|
||||
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1
|
||||
COPY CMakeLists.txt .
|
||||
COPY ml/backend/ggml/ggml .
|
||||
RUN cmake -S . -B build \
|
||||
-DCMAKE_CUDA_ARCHITECTURES="72;87"
|
||||
RUN cmake --build build --target ggml-cuda
|
||||
|
||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6_VERSION} AS jetpack_6
|
||||
ARG CMAKE_VERSION
|
||||
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1
|
||||
COPY CMakeLists.txt .
|
||||
COPY ml/backend/ggml/ggml .
|
||||
RUN cmake -S . -B build \
|
||||
-DCMAKE_CUDA_ARCHITECTURES="87"
|
||||
RUN cmake --build build --target ggml-cuda
|
||||
|
||||
FROM --platform=linux/amd64 golang:1.23
|
||||
COPY --from=cuda_11 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-11.so
|
||||
COPY --from=cuda_12 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-12.so
|
||||
COPY --from=rocm build/ml/backend/ggml/ggml/src/ggml-hip/libggml-hip.so libggml-hip.so
|
||||
|
||||
# FROM --platform=linux/arm64 golang:1.23
|
||||
# COPY --from=jetpack_5 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-jetpack-5.so
|
||||
# COPY --from=jetpack_6 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-jetpack-6.so
|
103
Makefile
103
Makefile
@ -1,103 +0,0 @@
|
||||
# top level makefile for Ollama
|
||||
include make/common-defs.make
|
||||
|
||||
|
||||
# Determine which if any GPU runners we should build
|
||||
include make/cuda-v11-defs.make
|
||||
include make/cuda-v12-defs.make
|
||||
include make/rocm-defs.make
|
||||
|
||||
ifeq ($(CUSTOM_CPU_FLAGS),)
|
||||
ifeq ($(ARCH),amd64)
|
||||
RUNNER_TARGETS=cpu
|
||||
endif
|
||||
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
|
||||
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
|
||||
ifneq ($(CUDA_11_COMPILER),)
|
||||
RUNNER_TARGETS += cuda_v11
|
||||
endif
|
||||
ifneq ($(CUDA_12_COMPILER),)
|
||||
RUNNER_TARGETS += cuda_v12
|
||||
endif
|
||||
endif
|
||||
else # CUSTOM_CPU_FLAGS is set, we'll build only the latest cuda version detected
|
||||
ifneq ($(CUDA_12_COMPILER),)
|
||||
RUNNER_TARGETS += cuda_v12
|
||||
else ifneq ($(CUDA_11_COMPILER),)
|
||||
RUNNER_TARGETS += cuda_v11
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(OLLAMA_SKIP_ROCM_GENERATE),)
|
||||
ifneq ($(HIP_COMPILER),)
|
||||
RUNNER_TARGETS += rocm
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
all: runners exe
|
||||
|
||||
dist: $(addprefix dist_, $(RUNNER_TARGETS)) dist_exe
|
||||
|
||||
dist_%:
|
||||
@$(MAKE) --no-print-directory -f make/Makefile.$* dist
|
||||
|
||||
runners: $(RUNNER_TARGETS)
|
||||
|
||||
$(RUNNER_TARGETS):
|
||||
@$(MAKE) --no-print-directory -f make/Makefile.$@
|
||||
|
||||
exe dist_exe:
|
||||
@$(MAKE) --no-print-directory -f make/Makefile.ollama $@
|
||||
|
||||
help-sync apply-patches create-patches sync sync-clean:
|
||||
@$(MAKE) --no-print-directory -f make/Makefile.sync $@
|
||||
|
||||
test integration lint:
|
||||
@$(MAKE) --no-print-directory -f make/Makefile.test $@
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILD_DIR) $(DIST_LIB_DIR) $(OLLAMA_EXE) $(DIST_OLLAMA_EXE)
|
||||
go clean -cache
|
||||
|
||||
help:
|
||||
@echo "The following make targets will help you build Ollama"
|
||||
@echo ""
|
||||
@echo " make all # (default target) Build Ollama llm subprocess runners, and the primary ollama executable"
|
||||
@echo " make runners # Build Ollama llm subprocess runners; after you may use 'go build .' to build the primary ollama exectuable"
|
||||
@echo " make <runner> # Build specific runners. Enabled: '$(RUNNER_TARGETS)'"
|
||||
@echo " make dist # Build the runners and primary ollama executable for distribution"
|
||||
@echo " make help-sync # Help information on vendor update targets"
|
||||
@echo " make help-runners # Help information on runner targets"
|
||||
@echo ""
|
||||
@echo "The following make targets will help you test Ollama"
|
||||
@echo ""
|
||||
@echo " make test # Run unit tests"
|
||||
@echo " make integration # Run integration tests. You must 'make all' first"
|
||||
@echo " make lint # Run lint and style tests"
|
||||
@echo ""
|
||||
@echo "For more information see 'docs/development.md'"
|
||||
@echo ""
|
||||
|
||||
|
||||
help-runners:
|
||||
@echo "The following runners will be built based on discovered GPU libraries: '$(RUNNER_TARGETS)'"
|
||||
@echo ""
|
||||
@echo "GPU Runner CPU Flags: '$(GPU_RUNNER_CPU_FLAGS)' (Override with CUSTOM_CPU_FLAGS)"
|
||||
@echo ""
|
||||
@echo "# CUDA_PATH sets the location where CUDA toolkits are present"
|
||||
@echo "CUDA_PATH=$(CUDA_PATH)"
|
||||
@echo " CUDA_11_PATH=$(CUDA_11_PATH)"
|
||||
@echo " CUDA_11_COMPILER=$(CUDA_11_COMPILER)"
|
||||
@echo " CUDA_12_PATH=$(CUDA_12_PATH)"
|
||||
@echo " CUDA_12_COMPILER=$(CUDA_12_COMPILER)"
|
||||
@echo ""
|
||||
@echo "# HIP_PATH sets the location where the ROCm toolkit is present"
|
||||
@echo "HIP_PATH=$(HIP_PATH)"
|
||||
@echo " HIP_COMPILER=$(HIP_COMPILER)"
|
||||
|
||||
.PHONY: all exe dist help help-sync help-runners test integration lint runners clean $(RUNNER_TARGETS)
|
||||
|
||||
# Handy debugging for make variables
|
||||
print-%:
|
||||
@echo '$*=$($*)'
|
46
Makefile2
Normal file
46
Makefile2
Normal file
@ -0,0 +1,46 @@
|
||||
UPSTREAM=https://github.com/ggerganov/llama.cpp.git
|
||||
WORKDIR=llama/vendor
|
||||
FETCH_HEAD=46e3556e01b824e52395fb050b29804b6cff2a7c
|
||||
|
||||
all: sync
|
||||
|
||||
.PHONY: sync
|
||||
sync: llama/llama.cpp ml/backend/ggml/ggml
|
||||
|
||||
.PHONY: llama/llama.cpp
|
||||
llama/llama.cpp: llama/vendor/ apply_patches
|
||||
rsync -arvzc -f "merge $@/.rsync-filter" $< $@
|
||||
|
||||
.PHONY: ml/backend/ggml/ggml apply_patches
|
||||
ml/backend/ggml/ggml: llama/vendor/ggml/ apply_patches
|
||||
rsync -arvzc -f "merge $@/.rsync-filter" $< $@
|
||||
|
||||
PATCHES=$(wildcard llama/patches/*.patch)
|
||||
|
||||
.PHONY: apply_patches
|
||||
.NOTPARALLEL:
|
||||
apply_patches: $(addsuffix ed, $(PATCHES))
|
||||
|
||||
%.patched: %.patch
|
||||
@if git -c user.name=nobody -c 'user.email=<>' -C $(WORKDIR) am -3 $(realpath $<); then touch $@; else git -C $(WORKDIR) am --abort; exit 1; fi
|
||||
|
||||
.PHONY: checkout
|
||||
checkout: $(WORKDIR)
|
||||
git -C $(WORKDIR) fetch
|
||||
git -C $(WORKDIR) checkout -f $(FETCH_HEAD)
|
||||
|
||||
$(WORKDIR):
|
||||
git clone $(UPSTREAM) $(WORKDIR)
|
||||
|
||||
.PHONE: format_patches
|
||||
format_patches: llama/patches
|
||||
git -C $(WORKDIR) format-patch \
|
||||
--no-signature \
|
||||
--no-numbered \
|
||||
--zero-commit \
|
||||
-o $(realpath $<) \
|
||||
$(FETCH_HEAD)
|
||||
|
||||
.PHONE: clean
|
||||
clean: checkout
|
||||
$(RM) $(addsuffix ed, $(PATCHES))
|
63
cache/cache.go
vendored
Normal file
63
cache/cache.go
vendored
Normal file
@ -0,0 +1,63 @@
|
||||
package cache
|
||||
|
||||
import (
|
||||
"github.com/ollama/ollama/ml"
|
||||
)
|
||||
|
||||
type Options struct {
|
||||
Position int
|
||||
}
|
||||
|
||||
type Cache interface {
|
||||
Sub(i int) Cache
|
||||
Put(ctx ml.Context, key, value ml.Tensor, opts Options) (ml.Tensor, ml.Tensor)
|
||||
}
|
||||
|
||||
type Simple struct {
|
||||
DType ml.DType
|
||||
Capacity int
|
||||
|
||||
keys, values []ml.Tensor
|
||||
}
|
||||
|
||||
func (c *Simple) Sub(i int) Cache {
|
||||
if i >= len(c.keys) {
|
||||
c.keys = append(c.keys, make([]ml.Tensor, i-len(c.keys)+1)...)
|
||||
c.values = append(c.values, make([]ml.Tensor, i-len(c.values)+1)...)
|
||||
}
|
||||
|
||||
return &Simple{
|
||||
keys: c.keys[i : i+1],
|
||||
values: c.values[i : i+1],
|
||||
Capacity: c.Capacity,
|
||||
DType: c.DType,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Simple) Put(ctx ml.Context, key, value ml.Tensor, opts Options) (ml.Tensor, ml.Tensor) {
|
||||
if c.keys[0] == nil || c.values[0] == nil {
|
||||
c.keys[0] = ctx.Zeros(c.DType, int(key.Dim(0)*key.Dim(1))*c.Capacity)
|
||||
c.values[0] = ctx.Zeros(c.DType, int(value.Dim(0)*value.Dim(1))*c.Capacity)
|
||||
}
|
||||
|
||||
ctx.Forward(key.Copy(ctx, c.keys[0].View(ctx, int(key.Stride(2))*opts.Position, int(key.Dim(0)*key.Dim(1)*key.Dim(2)))))
|
||||
ctx.Forward(value.Copy(ctx, c.values[0].View(ctx, int(value.Stride(2))*opts.Position, int(value.Dim(0)*value.Dim(1)*value.Dim(2)))))
|
||||
|
||||
n := min(c.Capacity, int(key.Dim(2))+opts.Position)
|
||||
|
||||
key = c.keys[0].View(ctx, 0,
|
||||
int(key.Dim(0)), int(key.Stride(1)),
|
||||
int(key.Dim(1)), int(key.Stride(2)),
|
||||
n,
|
||||
)
|
||||
|
||||
value = c.values[0].View(ctx, 0,
|
||||
int(value.Dim(0)), int(value.Stride(1)),
|
||||
int(value.Dim(1)), int(value.Stride(2)),
|
||||
n,
|
||||
)
|
||||
|
||||
// TODO shift context if necessary
|
||||
|
||||
return key, value
|
||||
}
|
@ -9,7 +9,7 @@ import (
|
||||
"log/slog"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type ModelParameters struct {
|
||||
@ -27,8 +27,8 @@ type AdapterParameters struct {
|
||||
} `json:"lora_parameters"`
|
||||
}
|
||||
|
||||
func (ModelParameters) KV(t *Tokenizer) llm.KV {
|
||||
kv := llm.KV{
|
||||
func (ModelParameters) KV(t *Tokenizer) ggml.KV {
|
||||
kv := ggml.KV{
|
||||
"general.file_type": uint32(1),
|
||||
"general.quantization_version": uint32(2),
|
||||
"tokenizer.ggml.pre": t.Pre,
|
||||
@ -54,7 +54,7 @@ func (ModelParameters) KV(t *Tokenizer) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p AdapterParameters) KV() llm.KV {
|
||||
func (p AdapterParameters) KV() ggml.KV {
|
||||
var alpha float32
|
||||
if p.LoraParameters.Alpha == 0 {
|
||||
alpha = float32(p.Alpha)
|
||||
@ -62,7 +62,7 @@ func (p AdapterParameters) KV() llm.KV {
|
||||
alpha = p.LoraParameters.Alpha
|
||||
}
|
||||
|
||||
kv := llm.KV{
|
||||
kv := ggml.KV{
|
||||
"adapter.lora.alpha": alpha,
|
||||
"adapter.type": "lora",
|
||||
"general.file_type": uint32(1),
|
||||
@ -79,19 +79,19 @@ func (ModelParameters) specialTokenTypes() []string {
|
||||
}
|
||||
}
|
||||
|
||||
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||
return llm.WriteGGUF(ws, kv, ts)
|
||||
func (ModelParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
|
||||
return ggml.WriteGGUF(ws, kv, ts)
|
||||
}
|
||||
|
||||
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
|
||||
return llm.WriteGGUF(ws, kv, ts)
|
||||
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
|
||||
return ggml.WriteGGUF(ws, kv, ts)
|
||||
}
|
||||
|
||||
type ModelConverter interface {
|
||||
// KV maps parameters to LLM key-values
|
||||
KV(*Tokenizer) llm.KV
|
||||
KV(*Tokenizer) ggml.KV
|
||||
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
||||
Tensors([]Tensor) []llm.Tensor
|
||||
Tensors([]Tensor) []ggml.Tensor
|
||||
// Replacements returns a list of string pairs to replace in tensor names.
|
||||
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||
Replacements() []string
|
||||
@ -99,7 +99,7 @@ type ModelConverter interface {
|
||||
// specialTokenTypes returns any special token types the model uses
|
||||
specialTokenTypes() []string
|
||||
// writeFile writes the model to the provided io.WriteSeeker
|
||||
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
||||
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
|
||||
}
|
||||
|
||||
type moreParser interface {
|
||||
@ -108,17 +108,17 @@ type moreParser interface {
|
||||
|
||||
type AdapterConverter interface {
|
||||
// KV maps parameters to LLM key-values
|
||||
KV(llm.KV) llm.KV
|
||||
KV(ggml.KV) ggml.KV
|
||||
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
|
||||
Tensors([]Tensor) []llm.Tensor
|
||||
Tensors([]Tensor) []ggml.Tensor
|
||||
// Replacements returns a list of string pairs to replace in tensor names.
|
||||
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||
Replacements() []string
|
||||
|
||||
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
|
||||
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
|
||||
}
|
||||
|
||||
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
|
||||
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
|
||||
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type bertModel struct {
|
||||
@ -85,7 +85,7 @@ func (p *bertModel) parseMore(fsys fs.FS) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *bertModel) KV(t *Tokenizer) llm.KV {
|
||||
func (p *bertModel) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "bert"
|
||||
kv["bert.attention.causal"] = false
|
||||
@ -132,8 +132,8 @@ func (p *bertModel) KV(t *Tokenizer) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
func (p *bertModel) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
for _, t := range ts {
|
||||
if slices.Contains([]string{
|
||||
"embeddings.position_ids",
|
||||
@ -143,7 +143,7 @@ func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
continue
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
|
@ -6,7 +6,7 @@ import (
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type gemmaModel struct {
|
||||
@ -23,7 +23,7 @@ type gemmaModel struct {
|
||||
|
||||
var _ ModelConverter = (*gemmaModel)(nil)
|
||||
|
||||
func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
|
||||
func (p *gemmaModel) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "gemma"
|
||||
kv["gemma.context_length"] = p.MaxPositionEmbeddings
|
||||
@ -42,14 +42,14 @@ func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
func (p *gemmaModel) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
for _, t := range ts {
|
||||
if strings.HasSuffix(t.Name(), "_norm.weight") {
|
||||
t.SetRepacker(p.addOne)
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
|
@ -1,8 +1,6 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
import "github.com/ollama/ollama/fs/ggml"
|
||||
|
||||
type gemma2Model struct {
|
||||
gemmaModel
|
||||
@ -11,7 +9,7 @@ type gemma2Model struct {
|
||||
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
|
||||
}
|
||||
|
||||
func (p *gemma2Model) KV(t *Tokenizer) llm.KV {
|
||||
func (p *gemma2Model) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "gemma2"
|
||||
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
|
||||
|
@ -6,7 +6,7 @@ import (
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type gemma2Adapter struct {
|
||||
@ -15,14 +15,14 @@ type gemma2Adapter struct {
|
||||
|
||||
var _ AdapterConverter = (*gemma2Adapter)(nil)
|
||||
|
||||
func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV {
|
||||
func (p *gemma2Adapter) KV(baseKV ggml.KV) ggml.KV {
|
||||
kv := p.AdapterParameters.KV()
|
||||
kv["general.architecture"] = "gemma2"
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
func (p *gemma2Adapter) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
for _, t := range ts {
|
||||
shape := t.Shape()
|
||||
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||
@ -31,7 +31,7 @@ func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
|
@ -9,7 +9,7 @@ import (
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type llamaModel struct {
|
||||
@ -46,7 +46,7 @@ type llamaModel struct {
|
||||
|
||||
var _ ModelConverter = (*llamaModel)(nil)
|
||||
|
||||
func (p *llamaModel) KV(t *Tokenizer) llm.KV {
|
||||
func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "llama"
|
||||
kv["llama.vocab_size"] = p.VocabSize
|
||||
@ -120,11 +120,11 @@ func (p *llamaModel) KV(t *Tokenizer) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
func (p *llamaModel) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
|
||||
if p.RopeScaling.factors != nil {
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: "rope_freqs.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{uint64(len(p.RopeScaling.factors))},
|
||||
@ -138,7 +138,7 @@ func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
|
@ -7,7 +7,7 @@ import (
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type llamaAdapter struct {
|
||||
@ -18,7 +18,7 @@ type llamaAdapter struct {
|
||||
|
||||
var _ AdapterConverter = (*llamaAdapter)(nil)
|
||||
|
||||
func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
|
||||
func (p *llamaAdapter) KV(baseKV ggml.KV) ggml.KV {
|
||||
kv := p.AdapterParameters.KV()
|
||||
kv["general.architecture"] = "llama"
|
||||
kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
|
||||
@ -29,8 +29,8 @@ func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
func (p *llamaAdapter) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
for _, t := range ts {
|
||||
shape := t.Shape()
|
||||
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
|
||||
@ -41,7 +41,7 @@ func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
|
||||
t.SetRepacker(p.repack)
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: shape,
|
||||
|
@ -6,7 +6,7 @@ import (
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type mixtralModel struct {
|
||||
@ -15,7 +15,7 @@ type mixtralModel struct {
|
||||
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
||||
}
|
||||
|
||||
func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
|
||||
func (p *mixtralModel) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.llamaModel.KV(t)
|
||||
|
||||
if p.NumLocalExperts > 0 {
|
||||
@ -29,7 +29,7 @@ func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
func (p *mixtralModel) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
oldnew := []string{
|
||||
"model.layers", "blk",
|
||||
"w1", "ffn_gate_exps",
|
||||
@ -56,10 +56,10 @@ func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
|
||||
return true
|
||||
})
|
||||
|
||||
var out []llm.Tensor
|
||||
var out []ggml.Tensor
|
||||
for n, e := range experts {
|
||||
// TODO(mxyng): sanity check experts
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: n,
|
||||
Kind: e[0].Kind(),
|
||||
Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...),
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type phi3Model struct {
|
||||
@ -37,7 +37,7 @@ type phi3Model struct {
|
||||
|
||||
var _ ModelConverter = (*phi3Model)(nil)
|
||||
|
||||
func (p *phi3Model) KV(t *Tokenizer) llm.KV {
|
||||
func (p *phi3Model) KV(t *Tokenizer) ggml.KV {
|
||||
kv := p.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "phi3"
|
||||
kv["phi3.context_length"] = p.MaxPositionEmbeddings
|
||||
@ -68,19 +68,19 @@ func (p *phi3Model) KV(t *Tokenizer) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
|
||||
func (p *phi3Model) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var addRopeFactors sync.Once
|
||||
|
||||
out := make([]llm.Tensor, 0, len(ts)+2)
|
||||
out := make([]ggml.Tensor, 0, len(ts)+2)
|
||||
for _, t := range ts {
|
||||
if strings.HasPrefix(t.Name(), "blk.0.") {
|
||||
addRopeFactors.Do(func() {
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: "rope_factors_long.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
|
||||
WriterTo: p.RopeScaling.LongFactor,
|
||||
}, llm.Tensor{
|
||||
}, ggml.Tensor{
|
||||
Name: "rope_factors_short.weight",
|
||||
Kind: 0,
|
||||
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
|
||||
@ -89,7 +89,7 @@ func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
|
||||
})
|
||||
}
|
||||
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
|
@ -1,6 +1,7 @@
|
||||
package convert
|
||||
|
||||
import "github.com/ollama/ollama/llm"
|
||||
import "github.com/ollama/ollama/fs/ggml"
|
||||
|
||||
|
||||
type qwen2Model struct {
|
||||
ModelParameters
|
||||
@ -21,7 +22,7 @@ type qwen2Model struct {
|
||||
|
||||
var _ ModelConverter = (*qwen2Model)(nil)
|
||||
|
||||
func (q *qwen2Model) KV(t *Tokenizer) llm.KV {
|
||||
func (q *qwen2Model) KV(t *Tokenizer) ggml.KV {
|
||||
kv := q.ModelParameters.KV(t)
|
||||
kv["general.architecture"] = "qwen2"
|
||||
kv["qwen2.block_count"] = q.HiddenLayers
|
||||
@ -45,10 +46,10 @@ func (q *qwen2Model) KV(t *Tokenizer) llm.KV {
|
||||
return kv
|
||||
}
|
||||
|
||||
func (q *qwen2Model) Tensors(ts []Tensor) []llm.Tensor {
|
||||
var out []llm.Tensor
|
||||
func (q *qwen2Model) Tensors(ts []Tensor) []ggml.Tensor {
|
||||
var out []ggml.Tensor
|
||||
for _, t := range ts {
|
||||
out = append(out, llm.Tensor{
|
||||
out = append(out, ggml.Tensor{
|
||||
Name: t.Name(),
|
||||
Kind: t.Kind(),
|
||||
Shape: t.Shape(),
|
||||
|
@ -20,7 +20,7 @@ import (
|
||||
|
||||
"golang.org/x/exp/maps"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
)
|
||||
|
||||
type tensorData struct {
|
||||
@ -29,7 +29,7 @@ type tensorData struct {
|
||||
Shape []int `json:"shape"`
|
||||
}
|
||||
|
||||
func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
|
||||
func convertFull(t *testing.T, fsys fs.FS) (*os.File, ggml.KV, ggml.Tensors) {
|
||||
t.Helper()
|
||||
|
||||
f, err := os.CreateTemp(t.TempDir(), "f16")
|
||||
@ -48,7 +48,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
|
||||
}
|
||||
t.Cleanup(func() { r.Close() })
|
||||
|
||||
m, _, err := llm.DecodeGGML(r, math.MaxInt)
|
||||
m, _, err := ggml.Decode(r, math.MaxInt)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -60,7 +60,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
|
||||
return r, m.KV(), m.Tensors()
|
||||
}
|
||||
|
||||
func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors *llm.Tensors) map[string]string {
|
||||
func generateResultsJSON(t *testing.T, f *os.File, kv ggml.KV, tensors ggml.Tensors) map[string]string {
|
||||
actual := make(map[string]string)
|
||||
for k, v := range kv {
|
||||
if s, ok := v.(json.Marshaler); !ok {
|
||||
@ -75,7 +75,7 @@ func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors *llm.Tenso
|
||||
}
|
||||
}
|
||||
|
||||
for _, tensor := range tensors.Items {
|
||||
for _, tensor := range tensors.Items() {
|
||||
sha256sum := sha256.New()
|
||||
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
|
||||
if _, err := io.Copy(sha256sum, sr); err != nil {
|
||||
@ -331,7 +331,7 @@ func TestConvertAdapter(t *testing.T) {
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
m, _, err := llm.DecodeGGML(r, math.MaxInt)
|
||||
m, _, err := ggml.Decode(r, math.MaxInt)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -718,23 +718,18 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
||||
func LibraryDirs() []string {
|
||||
// dependencies can exist wherever we found the runners (e.g. build tree for developers) and relative to the executable
|
||||
// This can be simplified once we no longer carry runners as payloads
|
||||
paths := []string{}
|
||||
appExe, err := os.Executable()
|
||||
exe, err := os.Executable()
|
||||
if err != nil {
|
||||
slog.Warn("failed to lookup executable path", "error", err)
|
||||
} else {
|
||||
appRelative := filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe(), "lib", "ollama")
|
||||
if _, err := os.Stat(appRelative); err == nil {
|
||||
paths = append(paths, appRelative)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
rDir := runners.Locate()
|
||||
if err != nil {
|
||||
slog.Warn("unable to locate gpu dependency libraries", "error", err)
|
||||
} else {
|
||||
paths = append(paths, filepath.Dir(rDir))
|
||||
|
||||
lib := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama")
|
||||
if _, err := os.Stat(lib); err != nil {
|
||||
return nil
|
||||
}
|
||||
return paths
|
||||
|
||||
return []string{lib}
|
||||
}
|
||||
|
||||
func GetSystemInfo() SystemInfo {
|
||||
|
@ -1,15 +1,15 @@
|
||||
package llm
|
||||
package ggml
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ollama/ollama/util/bufioutil"
|
||||
"github.com/ollama/ollama/fs/util/bufioutil"
|
||||
)
|
||||
|
||||
type GGML struct {
|
||||
@ -19,145 +19,168 @@ type GGML struct {
|
||||
|
||||
type model interface {
|
||||
KV() KV
|
||||
Tensors() *Tensors
|
||||
Tensors() Tensors
|
||||
}
|
||||
|
||||
type KV map[string]any
|
||||
|
||||
func (kv KV) u64(key string) uint64 {
|
||||
switch v := kv[key].(type) {
|
||||
case uint64:
|
||||
return v
|
||||
case uint32:
|
||||
return uint64(v)
|
||||
case float64:
|
||||
return uint64(v)
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func (kv KV) Architecture() string {
|
||||
if s, ok := kv["general.architecture"].(string); ok {
|
||||
return s
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
return kv.String("general.architecture", "unknown")
|
||||
}
|
||||
|
||||
func (kv KV) Kind() string {
|
||||
if s, ok := kv["general.type"].(string); ok {
|
||||
return s
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
return kv.String("general.type", "unknown")
|
||||
}
|
||||
|
||||
func (kv KV) ParameterCount() uint64 {
|
||||
return kv.u64("general.parameter_count")
|
||||
return keyValue[uint64](kv, "general.parameter_count")
|
||||
}
|
||||
|
||||
func (kv KV) FileType() fileType {
|
||||
if u64 := kv.u64("general.file_type"); u64 > 0 {
|
||||
return fileType(uint32(u64))
|
||||
if t := kv.Uint("general.file_type"); t > 0 {
|
||||
return fileType(t)
|
||||
}
|
||||
|
||||
return fileTypeUnknown
|
||||
}
|
||||
|
||||
func (kv KV) BlockCount() uint64 {
|
||||
return kv.u64(fmt.Sprintf("%s.block_count", kv.Architecture()))
|
||||
return uint64(kv.Uint("block_count"))
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingLength() uint64 {
|
||||
return uint64(kv.Uint("embedding_length"))
|
||||
}
|
||||
|
||||
func (kv KV) HeadCount() uint64 {
|
||||
return kv.u64(fmt.Sprintf("%s.attention.head_count", kv.Architecture()))
|
||||
return uint64(kv.Uint("attention.head_count"))
|
||||
}
|
||||
|
||||
func (kv KV) HeadCountKV() uint64 {
|
||||
if headCountKV := kv.u64(fmt.Sprintf("%s.attention.head_count_kv", kv.Architecture())); headCountKV > 0 {
|
||||
return headCountKV
|
||||
}
|
||||
|
||||
return 1
|
||||
return uint64(kv.Uint("attention.head_count_kv", 1))
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCount() uint64 {
|
||||
if heads := kv.HeadCount(); heads > 0 {
|
||||
return kv.EmbeddingLength() / kv.HeadCount()
|
||||
return kv.EmbeddingLength() / heads
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCountK() uint64 {
|
||||
if k := kv.u64(fmt.Sprintf("%s.attention.key_length", kv.Architecture())); k > 0 {
|
||||
return k
|
||||
}
|
||||
|
||||
return kv.EmbeddingHeadCount()
|
||||
return uint64(kv.Uint("attention.key_length", uint32(kv.EmbeddingHeadCount())))
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCountV() uint64 {
|
||||
if v := kv.u64(fmt.Sprintf("%s.attention.value_length", kv.Architecture())); v > 0 {
|
||||
return v
|
||||
}
|
||||
|
||||
return kv.EmbeddingHeadCount()
|
||||
return uint64(kv.Uint("attention.value_length", uint32(kv.EmbeddingHeadCount())))
|
||||
}
|
||||
|
||||
func (kv KV) GQA() uint64 {
|
||||
return kv.HeadCount() / kv.HeadCountKV()
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingLength() uint64 {
|
||||
return kv.u64(fmt.Sprintf("%s.embedding_length", kv.Architecture()))
|
||||
}
|
||||
|
||||
func (kv KV) ContextLength() uint64 {
|
||||
return kv.u64(fmt.Sprintf("%s.context_length", kv.Architecture()))
|
||||
return uint64(kv.Uint("context_length"))
|
||||
}
|
||||
|
||||
func (kv KV) ChatTemplate() string {
|
||||
s, _ := kv["tokenizer.chat_template"].(string)
|
||||
return kv.String("tokenizer.chat_template")
|
||||
}
|
||||
|
||||
func (kv KV) String(key string, defaultValue ...string) string {
|
||||
return keyValue(kv, key, append(defaultValue, "")...)
|
||||
}
|
||||
|
||||
func (kv KV) Uint(key string, defaultValue ...uint32) uint32 {
|
||||
return keyValue(kv, key, append(defaultValue, 0)...)
|
||||
}
|
||||
|
||||
func (kv KV) Float(key string, defaultValue ...float32) float32 {
|
||||
return keyValue(kv, key, append(defaultValue, 0)...)
|
||||
}
|
||||
|
||||
func (kv KV) Strings(key string, defaultValue ...[]string) []string {
|
||||
r := keyValue(kv, key, &array{})
|
||||
s := make([]string, r.size)
|
||||
for i := range r.size {
|
||||
s[i] = r.values[i].(string)
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
type Tensors struct {
|
||||
Items []*Tensor
|
||||
Offset uint64
|
||||
func (kv KV) Uints(key string, defaultValue ...[]uint32) []uint32 {
|
||||
r := keyValue(kv, key, &array{})
|
||||
s := make([]uint32, r.size)
|
||||
for i := range r.size {
|
||||
s[i] = uint32(r.values[i].(int32))
|
||||
}
|
||||
|
||||
layers map[string]Layer
|
||||
layersOnce sync.Once
|
||||
return s
|
||||
}
|
||||
|
||||
func (ts *Tensors) Layers() map[string]Layer {
|
||||
ts.layersOnce.Do(func() {
|
||||
ts.layers = make(map[string]Layer)
|
||||
for _, t := range ts.Items {
|
||||
parts := strings.Split(t.Name, ".")
|
||||
if index := slices.IndexFunc(parts, func(s string) bool { return s == "blk" || s == "mm" }); index != -1 {
|
||||
if len(parts) > index+2 {
|
||||
// blk and mm should have a number after them, join it
|
||||
parts = append(
|
||||
[]string{strings.Join(parts[:index+2], ".")},
|
||||
parts[index+2:]...)
|
||||
}
|
||||
}
|
||||
func keyValue[T string | uint32 | uint64 | float32 | *array](kv KV, key string, defaultValue ...T) T {
|
||||
if !strings.HasPrefix(key, "tokenizer.") && !strings.HasPrefix(key, "general.") {
|
||||
key = kv.Architecture() + "." + key
|
||||
}
|
||||
|
||||
if _, ok := ts.layers[parts[0]]; !ok {
|
||||
ts.layers[parts[0]] = make(Layer)
|
||||
}
|
||||
if val, ok := kv[key]; ok {
|
||||
return val.(T)
|
||||
}
|
||||
|
||||
ts.layers[parts[0]][strings.Join(parts[1:], ".")] = t
|
||||
slog.Warn("key not found", "key", key, "default", defaultValue[0])
|
||||
return defaultValue[0]
|
||||
}
|
||||
|
||||
type Tensors struct {
|
||||
items []*Tensor
|
||||
Offset uint64
|
||||
}
|
||||
|
||||
func (s Tensors) Items(prefix ...string) []*Tensor {
|
||||
if len(prefix) == 0 {
|
||||
return s.items
|
||||
}
|
||||
|
||||
var items []*Tensor
|
||||
for _, t := range s.items {
|
||||
if strings.HasPrefix(t.Name, prefix[0]) {
|
||||
items = append(items, t)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return ts.layers
|
||||
return items
|
||||
}
|
||||
|
||||
func (ts Tensors) Layers() map[string]Layer {
|
||||
layers := make(map[string]Layer)
|
||||
for _, t := range ts.items {
|
||||
parts := strings.Split(t.Name, ".")
|
||||
if i := slices.Index(parts, "blk"); i > 0 {
|
||||
parts = append([]string{
|
||||
strings.Join(parts[:i], "."),
|
||||
strings.Join(parts[i:i+2], "."),
|
||||
}, parts[i+2:]...)
|
||||
} else if i == 0 {
|
||||
parts = append([]string{
|
||||
strings.Join(parts[i:i+2], "."),
|
||||
}, parts[i+2:]...)
|
||||
}
|
||||
|
||||
if _, ok := layers[parts[0]]; !ok {
|
||||
layers[parts[0]] = make(Layer)
|
||||
}
|
||||
|
||||
layers[parts[0]][strings.Join(parts[1:], ".")] = t
|
||||
}
|
||||
|
||||
return layers
|
||||
}
|
||||
|
||||
type Layer map[string]*Tensor
|
||||
|
||||
func (l Layer) size() (size uint64) {
|
||||
func (l Layer) Size() (size uint64) {
|
||||
for _, t := range l {
|
||||
size += t.Size()
|
||||
}
|
||||
@ -255,8 +278,6 @@ func (t Tensor) typeSize() uint64 {
|
||||
return 8
|
||||
case 29: // IQ1_M
|
||||
return blockSize/8 + blockSize/16 + blockSize/32
|
||||
case 30: // BF16
|
||||
return 2
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
@ -295,7 +316,7 @@ const (
|
||||
|
||||
var ErrUnsupportedFormat = errors.New("unsupported model format")
|
||||
|
||||
func DetectGGMLType(b []byte) string {
|
||||
func DetectContentType(b []byte) string {
|
||||
switch binary.LittleEndian.Uint32(b[:4]) {
|
||||
case FILE_MAGIC_GGML:
|
||||
return "ggml"
|
||||
@ -312,12 +333,12 @@ func DetectGGMLType(b []byte) string {
|
||||
}
|
||||
}
|
||||
|
||||
// DecodeGGML decodes a GGML model from the given reader.
|
||||
// Decode decodes a GGML model from the given reader.
|
||||
//
|
||||
// It collects array values for arrays with a size less than or equal to
|
||||
// maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If
|
||||
// the maxArraySize is negative, all arrays are collected.
|
||||
func DecodeGGML(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
|
||||
func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
|
||||
if maxArraySize == 0 {
|
||||
maxArraySize = 1024
|
||||
}
|
||||
@ -331,10 +352,6 @@ func DecodeGGML(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
|
||||
|
||||
var c container
|
||||
switch magic {
|
||||
case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT:
|
||||
return nil, 0, ErrUnsupportedFormat
|
||||
case FILE_MAGIC_GGLA:
|
||||
c = &containerGGLA{}
|
||||
case FILE_MAGIC_GGUF_LE:
|
||||
c = &containerGGUF{ByteOrder: binary.LittleEndian, maxArraySize: maxArraySize}
|
||||
case FILE_MAGIC_GGUF_BE:
|
||||
@ -530,21 +547,20 @@ func (llm GGML) GraphSize(context, batch uint64, kvCacheType string) (kv, partia
|
||||
}
|
||||
|
||||
// SupportsKVCacheType checks if the requested cache type is supported
|
||||
func (ggml GGML) SupportsKVCacheType(cacheType string) bool {
|
||||
validKVCacheTypes := []string{"f16", "q8_0", "q4_0"}
|
||||
return slices.Contains(validKVCacheTypes, cacheType)
|
||||
func (llm GGML) SupportsKVCacheType(cacheType string) bool {
|
||||
return slices.Contains([]string{"f16", "q8_0", "q4_0"}, cacheType)
|
||||
}
|
||||
|
||||
// SupportsFlashAttention checks if the model supports flash attention
|
||||
func (ggml GGML) SupportsFlashAttention() bool {
|
||||
_, isEmbedding := ggml.KV()[fmt.Sprintf("%s.pooling_type", ggml.KV().Architecture())]
|
||||
func (llm GGML) SupportsFlashAttention() bool {
|
||||
_, isEmbedding := llm.KV()[fmt.Sprintf("%s.pooling_type", llm.KV().Architecture())]
|
||||
if isEmbedding {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check head counts match and are non-zero
|
||||
headCountK := ggml.KV().EmbeddingHeadCountK()
|
||||
headCountV := ggml.KV().EmbeddingHeadCountV()
|
||||
headCountK := llm.KV().EmbeddingHeadCountK()
|
||||
headCountV := llm.KV().EmbeddingHeadCountV()
|
||||
return headCountK != 0 && headCountV != 0 && headCountK == headCountV
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
package llm
|
||||
package ggml
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@ -8,10 +8,9 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"maps"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/exp/maps"
|
||||
)
|
||||
|
||||
type containerGGUF struct {
|
||||
@ -110,9 +109,9 @@ func (llm *gguf) KV() KV {
|
||||
return llm.kv
|
||||
}
|
||||
|
||||
func (llm *gguf) Tensors() *Tensors {
|
||||
return &Tensors{
|
||||
Items: llm.tensors,
|
||||
func (llm *gguf) Tensors() Tensors {
|
||||
return Tensors{
|
||||
items: llm.tensors,
|
||||
Offset: llm.tensorOffset,
|
||||
}
|
||||
}
|
||||
@ -523,7 +522,7 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
||||
return err
|
||||
}
|
||||
|
||||
keys := maps.Keys(kv)
|
||||
keys := slices.Collect(maps.Keys(kv))
|
||||
slices.Sort(keys)
|
||||
|
||||
for _, key := range keys {
|
@ -1,4 +1,4 @@
|
||||
package llm
|
||||
package ggml
|
||||
|
||||
import "fmt"
|
||||
|
||||
@ -32,10 +32,9 @@ const (
|
||||
fileTypeIQ1_S
|
||||
fileTypeIQ4_NL
|
||||
fileTypeIQ3_S
|
||||
fileTypeIQ3_M
|
||||
fileTypeIQ2_S
|
||||
fileTypeIQ2_M
|
||||
fileTypeIQ4_XS
|
||||
fileTypeIQ2_M
|
||||
fileTypeIQ1_M
|
||||
fileTypeBF16
|
||||
|
||||
@ -94,8 +93,6 @@ func ParseFileType(s string) (fileType, error) {
|
||||
return fileTypeIQ4_NL, nil
|
||||
case "IQ3_S":
|
||||
return fileTypeIQ3_S, nil
|
||||
case "IQ3_M":
|
||||
return fileTypeIQ3_M, nil
|
||||
case "IQ2_S":
|
||||
return fileTypeIQ2_S, nil
|
||||
case "IQ4_XS":
|
||||
@ -163,8 +160,6 @@ func (t fileType) String() string {
|
||||
return "IQ4_NL"
|
||||
case fileTypeIQ3_S:
|
||||
return "IQ3_S"
|
||||
case fileTypeIQ3_M:
|
||||
return "IQ3_M"
|
||||
case fileTypeIQ2_S:
|
||||
return "IQ2_S"
|
||||
case fileTypeIQ4_XS:
|
6
go.mod
6
go.mod
@ -17,12 +17,15 @@ require (
|
||||
require (
|
||||
github.com/agnivade/levenshtein v1.1.1
|
||||
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
|
||||
github.com/dlclark/regexp2 v1.11.4
|
||||
github.com/emirpasic/gods/v2 v2.0.0-alpha
|
||||
github.com/google/go-cmp v0.6.0
|
||||
github.com/mattn/go-runewidth v0.0.14
|
||||
github.com/nlpodyssey/gopickle v0.3.0
|
||||
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
|
||||
golang.org/x/image v0.22.0
|
||||
golang.org/x/tools v0.28.0
|
||||
gonum.org/v1/gonum v0.15.0
|
||||
)
|
||||
|
||||
require (
|
||||
@ -42,7 +45,6 @@ require (
|
||||
github.com/xtgo/set v1.0.0 // indirect
|
||||
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
|
||||
gonum.org/v1/gonum v0.15.0 // indirect
|
||||
gorgonia.org/vecf32 v0.9.0 // indirect
|
||||
gorgonia.org/vecf64 v0.9.0 // indirect
|
||||
)
|
||||
@ -70,7 +72,7 @@ require (
|
||||
golang.org/x/arch v0.8.0 // indirect
|
||||
golang.org/x/crypto v0.31.0
|
||||
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa
|
||||
golang.org/x/net v0.25.0 // indirect
|
||||
golang.org/x/net v0.32.0 // indirect
|
||||
golang.org/x/sys v0.28.0
|
||||
golang.org/x/term v0.27.0
|
||||
golang.org/x/text v0.21.0
|
||||
|
8
go.sum
8
go.sum
@ -42,6 +42,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g=
|
||||
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
|
||||
github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
|
||||
github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||
github.com/emirpasic/gods/v2 v2.0.0-alpha h1:dwFlh8pBg1VMOXWGipNMRt8v96dKAIvBehtCt6OtunU=
|
||||
github.com/emirpasic/gods/v2 v2.0.0-alpha/go.mod h1:W0y4M2dtBB9U5z3YlghmpuUhiaZT2h6yoeE+C1sCp6A=
|
||||
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||
@ -255,8 +257,8 @@ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81R
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
|
||||
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
|
||||
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
||||
golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI=
|
||||
golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs=
|
||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
@ -307,6 +309,8 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapK
|
||||
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||
golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8=
|
||||
golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
|
22
grammar/bench_test.go
Normal file
22
grammar/bench_test.go
Normal file
@ -0,0 +1,22 @@
|
||||
//go:build go1.24
|
||||
|
||||
package grammar
|
||||
|
||||
import "testing"
|
||||
|
||||
func BenchmarkFromSchema(b *testing.B) {
|
||||
for tt := range testCases(b) {
|
||||
b.Run("", func(b *testing.B) {
|
||||
s := []byte(tt.schema)
|
||||
|
||||
b.ReportAllocs()
|
||||
for b.Loop() {
|
||||
_, err := FromSchema(nil, s)
|
||||
if err != nil {
|
||||
b.Fatalf("GrammarFromSchema: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
227
grammar/grammar.go
Normal file
227
grammar/grammar.go
Normal file
@ -0,0 +1,227 @@
|
||||
package grammar
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"iter"
|
||||
"strconv"
|
||||
|
||||
"github.com/ollama/ollama/grammar/jsonschema"
|
||||
)
|
||||
|
||||
const jsonTerms = `
|
||||
# Unicode
|
||||
#
|
||||
# Unicode characters can be specified directly in the grammar, for example
|
||||
# hiragana ::= [ぁ-ゟ], or with escapes: 8-bit (\xXX), 16-bit (\uXXXX) or 32-bit
|
||||
# (\UXXXXXXXX).
|
||||
unicode ::= \x{hex}{2} | \u{hex}{4} | \U{hex}{8}
|
||||
|
||||
# JSON grammar from RFC 7159
|
||||
null ::= "null"
|
||||
object ::= "{" (kv ("," kv)*)? "}"
|
||||
array ::= "[" (value ("," value)*)? "]"
|
||||
kv ::= string ":" value
|
||||
integer ::= "0" | [1-9] [0-9]*
|
||||
number ::= "-"? integer frac? exp?
|
||||
frac ::= "." [0-9]+
|
||||
exp ::= ("e" | "E") ("+" | "-") [0-9]+
|
||||
string ::= "\"" char* "\""
|
||||
escape ::= ["/" | "b" | "f" | "n" | "r" | "t" | unicode]
|
||||
char ::= [^"\\] | escape
|
||||
space ::= (" " | "\t" | "\n" | "\r")*
|
||||
hex ::= [0-9] | [a-f] | [A-F]
|
||||
boolean ::= "true" | "false"
|
||||
value ::= object | array | string | number | boolean | "null"
|
||||
|
||||
# User-defined
|
||||
`
|
||||
|
||||
// FromSchema generates a grammar from a JSON schema.
|
||||
func FromSchema(buf []byte, jsonSchema []byte) ([]byte, error) {
|
||||
var s *jsonschema.Schema
|
||||
if err := json.Unmarshal(jsonSchema, &s); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var g builder
|
||||
|
||||
// "root" is the only rule that is guaranteed to exist, so we start
|
||||
// with its length for padding, and then adjust it as we go.
|
||||
g.pad = len("root")
|
||||
for id := range dependencies("root", s) {
|
||||
g.pad = max(g.pad, len(id))
|
||||
}
|
||||
|
||||
g.b.WriteString(jsonTerms)
|
||||
|
||||
ids := make(map[*jsonschema.Schema]string)
|
||||
for id, s := range dependencies("root", s) {
|
||||
ids[s] = id
|
||||
g.define(id)
|
||||
if err := fromSchema(&g, ids, s); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
g.define("root")
|
||||
if err := fromSchema(&g, ids, s); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
g.define("") // finalize the last rule
|
||||
return g.b.Bytes(), nil
|
||||
}
|
||||
|
||||
func fromSchema(g *builder, ids map[*jsonschema.Schema]string, s *jsonschema.Schema) error {
|
||||
switch typ := s.EffectiveType(); typ {
|
||||
case "array":
|
||||
if len(s.PrefixItems) == 0 && s.Items == nil {
|
||||
g.u("array")
|
||||
} else {
|
||||
g.q("[")
|
||||
for i, s := range s.PrefixItems {
|
||||
if i > 0 {
|
||||
g.q(",")
|
||||
}
|
||||
g.u(ids[s])
|
||||
}
|
||||
if s.Items != nil {
|
||||
g.u("(")
|
||||
if len(s.PrefixItems) > 0 {
|
||||
g.q(",")
|
||||
}
|
||||
g.u(ids[s.Items])
|
||||
g.u(")*")
|
||||
}
|
||||
g.q("]")
|
||||
}
|
||||
case "object":
|
||||
if len(s.Properties) == 0 {
|
||||
g.u("object")
|
||||
} else {
|
||||
g.q("{")
|
||||
for i, p := range s.Properties {
|
||||
name := ids[p]
|
||||
if i > 0 {
|
||||
g.q(",")
|
||||
}
|
||||
g.q(p.Name)
|
||||
g.q(":")
|
||||
g.u(name)
|
||||
}
|
||||
g.q("}")
|
||||
}
|
||||
case "number":
|
||||
buildConstrainedNumber(g, s)
|
||||
case "string":
|
||||
if len(s.Enum) == 0 {
|
||||
g.u("string")
|
||||
} else {
|
||||
g.u("(")
|
||||
for i, e := range s.Enum {
|
||||
if i > 0 {
|
||||
g.q("|")
|
||||
}
|
||||
g.q(string(e))
|
||||
}
|
||||
g.u(")")
|
||||
}
|
||||
case "boolean", "value", "null", "integer":
|
||||
g.u(typ)
|
||||
default:
|
||||
return fmt.Errorf("%s: unsupported type %q", s.Name, typ)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// dependencies returns a sequence of all child dependencies of the schema in
|
||||
// post-order.
|
||||
//
|
||||
// The first value is the id/pointer to the dependency, and the second value
|
||||
// is the schema.
|
||||
func dependencies(id string, s *jsonschema.Schema) iter.Seq2[string, *jsonschema.Schema] {
|
||||
return func(yield func(string, *jsonschema.Schema) bool) {
|
||||
for i, p := range s.Properties {
|
||||
id := fmt.Sprintf("%s_%d", id, i)
|
||||
for did, d := range dependencies(id, p) {
|
||||
if !yield(did, d) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if !yield(id, p) {
|
||||
return
|
||||
}
|
||||
}
|
||||
for i, p := range s.PrefixItems {
|
||||
id := fmt.Sprintf("tuple_%d", i)
|
||||
for did, d := range dependencies(id, p) {
|
||||
id := fmt.Sprintf("%s_%s", id, did)
|
||||
if !yield(id, d) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if !yield(id, p) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if s.Items != nil {
|
||||
id := fmt.Sprintf("%s_tuple_%d", id, len(s.PrefixItems))
|
||||
for did, d := range dependencies(id, s.Items) {
|
||||
if !yield(did, d) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if !yield(id, s.Items) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type builder struct {
|
||||
b bytes.Buffer
|
||||
pad int
|
||||
rules int
|
||||
items int
|
||||
}
|
||||
|
||||
// define terminates the current rule, if any, and then either starts a new
|
||||
// rule or does nothing else if the name is empty.
|
||||
func (b *builder) define(name string) {
|
||||
if b.rules > 0 {
|
||||
b.b.WriteString(";\n")
|
||||
}
|
||||
if name == "" {
|
||||
return
|
||||
}
|
||||
fmt.Fprintf(&b.b, "% -*s", b.pad, name)
|
||||
b.b.WriteString(" ::=")
|
||||
b.rules++
|
||||
b.items = 0
|
||||
}
|
||||
|
||||
// quote appends a terminal to the current rule.
|
||||
func (b *builder) q(s string) {
|
||||
if b.items > 0 {
|
||||
b.b.WriteString(" ")
|
||||
}
|
||||
b.b.WriteString(" ")
|
||||
b.b.WriteString(strconv.Quote(s))
|
||||
}
|
||||
|
||||
// u appends a non-terminal to the current rule.
|
||||
func (b *builder) u(s string) {
|
||||
if b.items > 0 {
|
||||
b.b.WriteString(" ")
|
||||
}
|
||||
b.b.WriteString(" ")
|
||||
b.b.WriteString(s)
|
||||
}
|
||||
|
||||
func buildConstrainedNumber(b *builder, s *jsonschema.Schema) {
|
||||
if s.Minimum == 0 && s.Maximum == 0 {
|
||||
b.u("TODO")
|
||||
} else {
|
||||
b.u("number")
|
||||
}
|
||||
}
|
75
grammar/grammar_test.go
Normal file
75
grammar/grammar_test.go
Normal file
@ -0,0 +1,75 @@
|
||||
package grammar
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"cmp"
|
||||
"iter"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
_ "embed"
|
||||
|
||||
"github.com/ollama/ollama/grammar/internal/diff"
|
||||
)
|
||||
|
||||
func TestFromSchema(t *testing.T) {
|
||||
for tt := range testCases(t) {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
g, err := FromSchema(nil, []byte(tt.schema))
|
||||
if err != nil {
|
||||
t.Fatalf("FromSchema: %v", err)
|
||||
}
|
||||
got := string(g)
|
||||
got = strings.TrimPrefix(got, jsonTerms)
|
||||
if got != tt.want {
|
||||
t.Logf("schema:\n%s", tt.schema)
|
||||
t.Fatal(string(diff.Diff("got", []byte(got), "want", []byte(tt.want))))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type testCase struct {
|
||||
name string
|
||||
schema string
|
||||
want string
|
||||
}
|
||||
|
||||
//go:embed testdata/schemas.txt
|
||||
var tests string
|
||||
|
||||
func testCases(t testing.TB) iter.Seq[testCase] {
|
||||
t.Helper()
|
||||
return func(yield func(testCase) bool) {
|
||||
t.Helper()
|
||||
sc := bufio.NewScanner(strings.NewReader(tests))
|
||||
name := ""
|
||||
for sc.Scan() {
|
||||
line := strings.TrimSpace(sc.Text())
|
||||
if line == "" {
|
||||
name = ""
|
||||
continue
|
||||
}
|
||||
if line[0] == '#' {
|
||||
name = cmp.Or(name, strings.TrimSpace(line[1:]))
|
||||
continue
|
||||
}
|
||||
s := sc.Text()
|
||||
g := ""
|
||||
for sc.Scan() {
|
||||
line = strings.TrimSpace(sc.Text())
|
||||
if line == "" || line[0] == '#' {
|
||||
break
|
||||
}
|
||||
g += sc.Text() + "\n"
|
||||
}
|
||||
if !yield(testCase{name, s, g}) {
|
||||
return
|
||||
}
|
||||
name = strings.TrimSpace(strings.TrimPrefix(line, "#"))
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
t.Fatalf("error reading tests: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
261
grammar/internal/diff/diff.go
Normal file
261
grammar/internal/diff/diff.go
Normal file
@ -0,0 +1,261 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package diff
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A pair is a pair of values tracked for both the x and y side of a diff.
|
||||
// It is typically a pair of line indexes.
|
||||
type pair struct{ x, y int }
|
||||
|
||||
// Diff returns an anchored diff of the two texts old and new
|
||||
// in the “unified diff” format. If old and new are identical,
|
||||
// Diff returns a nil slice (no output).
|
||||
//
|
||||
// Unix diff implementations typically look for a diff with
|
||||
// the smallest number of lines inserted and removed,
|
||||
// which can in the worst case take time quadratic in the
|
||||
// number of lines in the texts. As a result, many implementations
|
||||
// either can be made to run for a long time or cut off the search
|
||||
// after a predetermined amount of work.
|
||||
//
|
||||
// In contrast, this implementation looks for a diff with the
|
||||
// smallest number of “unique” lines inserted and removed,
|
||||
// where unique means a line that appears just once in both old and new.
|
||||
// We call this an “anchored diff” because the unique lines anchor
|
||||
// the chosen matching regions. An anchored diff is usually clearer
|
||||
// than a standard diff, because the algorithm does not try to
|
||||
// reuse unrelated blank lines or closing braces.
|
||||
// The algorithm also guarantees to run in O(n log n) time
|
||||
// instead of the standard O(n²) time.
|
||||
//
|
||||
// Some systems call this approach a “patience diff,” named for
|
||||
// the “patience sorting” algorithm, itself named for a solitaire card game.
|
||||
// We avoid that name for two reasons. First, the name has been used
|
||||
// for a few different variants of the algorithm, so it is imprecise.
|
||||
// Second, the name is frequently interpreted as meaning that you have
|
||||
// to wait longer (to be patient) for the diff, meaning that it is a slower algorithm,
|
||||
// when in fact the algorithm is faster than the standard one.
|
||||
func Diff(oldName string, old []byte, newName string, new []byte) []byte {
|
||||
if bytes.Equal(old, new) {
|
||||
return nil
|
||||
}
|
||||
x := lines(old)
|
||||
y := lines(new)
|
||||
|
||||
// Print diff header.
|
||||
var out bytes.Buffer
|
||||
fmt.Fprintf(&out, "diff %s %s\n", oldName, newName)
|
||||
fmt.Fprintf(&out, "--- %s\n", oldName)
|
||||
fmt.Fprintf(&out, "+++ %s\n", newName)
|
||||
|
||||
// Loop over matches to consider,
|
||||
// expanding each match to include surrounding lines,
|
||||
// and then printing diff chunks.
|
||||
// To avoid setup/teardown cases outside the loop,
|
||||
// tgs returns a leading {0,0} and trailing {len(x), len(y)} pair
|
||||
// in the sequence of matches.
|
||||
var (
|
||||
done pair // printed up to x[:done.x] and y[:done.y]
|
||||
chunk pair // start lines of current chunk
|
||||
count pair // number of lines from each side in current chunk
|
||||
ctext []string // lines for current chunk
|
||||
)
|
||||
for _, m := range tgs(x, y) {
|
||||
if m.x < done.x {
|
||||
// Already handled scanning forward from earlier match.
|
||||
continue
|
||||
}
|
||||
|
||||
// Expand matching lines as far as possible,
|
||||
// establishing that x[start.x:end.x] == y[start.y:end.y].
|
||||
// Note that on the first (or last) iteration we may (or definitely do)
|
||||
// have an empty match: start.x==end.x and start.y==end.y.
|
||||
start := m
|
||||
for start.x > done.x && start.y > done.y && x[start.x-1] == y[start.y-1] {
|
||||
start.x--
|
||||
start.y--
|
||||
}
|
||||
end := m
|
||||
for end.x < len(x) && end.y < len(y) && x[end.x] == y[end.y] {
|
||||
end.x++
|
||||
end.y++
|
||||
}
|
||||
|
||||
// Emit the mismatched lines before start into this chunk.
|
||||
// (No effect on first sentinel iteration, when start = {0,0}.)
|
||||
for _, s := range x[done.x:start.x] {
|
||||
ctext = append(ctext, "-"+s)
|
||||
count.x++
|
||||
}
|
||||
for _, s := range y[done.y:start.y] {
|
||||
ctext = append(ctext, "+"+s)
|
||||
count.y++
|
||||
}
|
||||
|
||||
// If we're not at EOF and have too few common lines,
|
||||
// the chunk includes all the common lines and continues.
|
||||
const C = 3 // number of context lines
|
||||
if (end.x < len(x) || end.y < len(y)) &&
|
||||
(end.x-start.x < C || (len(ctext) > 0 && end.x-start.x < 2*C)) {
|
||||
for _, s := range x[start.x:end.x] {
|
||||
ctext = append(ctext, " "+s)
|
||||
count.x++
|
||||
count.y++
|
||||
}
|
||||
done = end
|
||||
continue
|
||||
}
|
||||
|
||||
// End chunk with common lines for context.
|
||||
if len(ctext) > 0 {
|
||||
n := end.x - start.x
|
||||
if n > C {
|
||||
n = C
|
||||
}
|
||||
for _, s := range x[start.x : start.x+n] {
|
||||
ctext = append(ctext, " "+s)
|
||||
count.x++
|
||||
count.y++
|
||||
}
|
||||
done = pair{start.x + n, start.y + n}
|
||||
|
||||
// Format and emit chunk.
|
||||
// Convert line numbers to 1-indexed.
|
||||
// Special case: empty file shows up as 0,0 not 1,0.
|
||||
if count.x > 0 {
|
||||
chunk.x++
|
||||
}
|
||||
if count.y > 0 {
|
||||
chunk.y++
|
||||
}
|
||||
fmt.Fprintf(&out, "@@ -%d,%d +%d,%d @@\n", chunk.x, count.x, chunk.y, count.y)
|
||||
for _, s := range ctext {
|
||||
out.WriteString(s)
|
||||
}
|
||||
count.x = 0
|
||||
count.y = 0
|
||||
ctext = ctext[:0]
|
||||
}
|
||||
|
||||
// If we reached EOF, we're done.
|
||||
if end.x >= len(x) && end.y >= len(y) {
|
||||
break
|
||||
}
|
||||
|
||||
// Otherwise start a new chunk.
|
||||
chunk = pair{end.x - C, end.y - C}
|
||||
for _, s := range x[chunk.x:end.x] {
|
||||
ctext = append(ctext, " "+s)
|
||||
count.x++
|
||||
count.y++
|
||||
}
|
||||
done = end
|
||||
}
|
||||
|
||||
return out.Bytes()
|
||||
}
|
||||
|
||||
// lines returns the lines in the file x, including newlines.
|
||||
// If the file does not end in a newline, one is supplied
|
||||
// along with a warning about the missing newline.
|
||||
func lines(x []byte) []string {
|
||||
l := strings.SplitAfter(string(x), "\n")
|
||||
if l[len(l)-1] == "" {
|
||||
l = l[:len(l)-1]
|
||||
} else {
|
||||
// Treat last line as having a message about the missing newline attached,
|
||||
// using the same text as BSD/GNU diff (including the leading backslash).
|
||||
l[len(l)-1] += "\n\\ No newline at end of file\n"
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
// tgs returns the pairs of indexes of the longest common subsequence
|
||||
// of unique lines in x and y, where a unique line is one that appears
|
||||
// once in x and once in y.
|
||||
//
|
||||
// The longest common subsequence algorithm is as described in
|
||||
// Thomas G. Szymanski, “A Special Case of the Maximal Common
|
||||
// Subsequence Problem,” Princeton TR #170 (January 1975),
|
||||
// available at https://research.swtch.com/tgs170.pdf.
|
||||
func tgs(x, y []string) []pair {
|
||||
// Count the number of times each string appears in a and b.
|
||||
// We only care about 0, 1, many, counted as 0, -1, -2
|
||||
// for the x side and 0, -4, -8 for the y side.
|
||||
// Using negative numbers now lets us distinguish positive line numbers later.
|
||||
m := make(map[string]int)
|
||||
for _, s := range x {
|
||||
if c := m[s]; c > -2 {
|
||||
m[s] = c - 1
|
||||
}
|
||||
}
|
||||
for _, s := range y {
|
||||
if c := m[s]; c > -8 {
|
||||
m[s] = c - 4
|
||||
}
|
||||
}
|
||||
|
||||
// Now unique strings can be identified by m[s] = -1+-4.
|
||||
//
|
||||
// Gather the indexes of those strings in x and y, building:
|
||||
// xi[i] = increasing indexes of unique strings in x.
|
||||
// yi[i] = increasing indexes of unique strings in y.
|
||||
// inv[i] = index j such that x[xi[i]] = y[yi[j]].
|
||||
var xi, yi, inv []int
|
||||
for i, s := range y {
|
||||
if m[s] == -1+-4 {
|
||||
m[s] = len(yi)
|
||||
yi = append(yi, i)
|
||||
}
|
||||
}
|
||||
for i, s := range x {
|
||||
if j, ok := m[s]; ok && j >= 0 {
|
||||
xi = append(xi, i)
|
||||
inv = append(inv, j)
|
||||
}
|
||||
}
|
||||
|
||||
// Apply Algorithm A from Szymanski's paper.
|
||||
// In those terms, A = J = inv and B = [0, n).
|
||||
// We add sentinel pairs {0,0}, and {len(x),len(y)}
|
||||
// to the returned sequence, to help the processing loop.
|
||||
J := inv
|
||||
n := len(xi)
|
||||
T := make([]int, n)
|
||||
L := make([]int, n)
|
||||
for i := range T {
|
||||
T[i] = n + 1
|
||||
}
|
||||
for i := range n {
|
||||
k := sort.Search(n, func(k int) bool {
|
||||
return T[k] >= J[i]
|
||||
})
|
||||
T[k] = J[i]
|
||||
L[i] = k + 1
|
||||
}
|
||||
k := 0
|
||||
for _, v := range L {
|
||||
if k < v {
|
||||
k = v
|
||||
}
|
||||
}
|
||||
seq := make([]pair, 2+k)
|
||||
seq[1+k] = pair{len(x), len(y)} // sentinel at end
|
||||
lastj := n
|
||||
for i := n - 1; i >= 0; i-- {
|
||||
if L[i] == k && J[i] < lastj {
|
||||
seq[k] = pair{xi[i], yi[J[i]]}
|
||||
k--
|
||||
}
|
||||
}
|
||||
seq[0] = pair{0, 0} // sentinel at start
|
||||
return seq
|
||||
}
|
44
grammar/internal/diff/diff_test.go
Normal file
44
grammar/internal/diff/diff_test.go
Normal file
@ -0,0 +1,44 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package diff
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/tools/txtar"
|
||||
)
|
||||
|
||||
func clean(text []byte) []byte {
|
||||
text = bytes.ReplaceAll(text, []byte("$\n"), []byte("\n"))
|
||||
text = bytes.TrimSuffix(text, []byte("^D\n"))
|
||||
return text
|
||||
}
|
||||
|
||||
func Test(t *testing.T) {
|
||||
files, _ := filepath.Glob("testdata/*.txt")
|
||||
if len(files) == 0 {
|
||||
t.Fatalf("no testdata")
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
t.Run(filepath.Base(file), func(t *testing.T) {
|
||||
a, err := txtar.ParseFile(file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(a.Files) != 3 || a.Files[2].Name != "diff" {
|
||||
t.Fatalf("%s: want three files, third named \"diff\"", file)
|
||||
}
|
||||
diffs := Diff(a.Files[0].Name, clean(a.Files[0].Data), a.Files[1].Name, clean(a.Files[1].Data))
|
||||
want := clean(a.Files[2].Data)
|
||||
if !bytes.Equal(diffs, want) {
|
||||
t.Fatalf("%s: have:\n%s\nwant:\n%s\n%s", file,
|
||||
diffs, want, Diff("have", diffs, "want", want))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
13
grammar/internal/diff/testdata/allnew.txt
vendored
Normal file
13
grammar/internal/diff/testdata/allnew.txt
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
-- old --
|
||||
-- new --
|
||||
a
|
||||
b
|
||||
c
|
||||
-- diff --
|
||||
diff old new
|
||||
--- old
|
||||
+++ new
|
||||
@@ -0,0 +1,3 @@
|
||||
+a
|
||||
+b
|
||||
+c
|
13
grammar/internal/diff/testdata/allold.txt
vendored
Normal file
13
grammar/internal/diff/testdata/allold.txt
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
-- old --
|
||||
a
|
||||
b
|
||||
c
|
||||
-- new --
|
||||
-- diff --
|
||||
diff old new
|
||||
--- old
|
||||
+++ new
|
||||
@@ -1,3 +0,0 @@
|
||||
-a
|
||||
-b
|
||||
-c
|
35
grammar/internal/diff/testdata/basic.txt
vendored
Normal file
35
grammar/internal/diff/testdata/basic.txt
vendored
Normal file
@ -0,0 +1,35 @@
|
||||
Example from Hunt and McIlroy, “An Algorithm for Differential File Comparison.”
|
||||
https://www.cs.dartmouth.edu/~doug/diff.pdf
|
||||
|
||||
-- old --
|
||||
a
|
||||
b
|
||||
c
|
||||
d
|
||||
e
|
||||
f
|
||||
g
|
||||
-- new --
|
||||
w
|
||||
a
|
||||
b
|
||||
x
|
||||
y
|
||||
z
|
||||
e
|
||||
-- diff --
|
||||
diff old new
|
||||
--- old
|
||||
+++ new
|
||||
@@ -1,7 +1,7 @@
|
||||
+w
|
||||
a
|
||||
b
|
||||
-c
|
||||
-d
|
||||
+x
|
||||
+y
|
||||
+z
|
||||
e
|
||||
-f
|
||||
-g
|
40
grammar/internal/diff/testdata/dups.txt
vendored
Normal file
40
grammar/internal/diff/testdata/dups.txt
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
-- old --
|
||||
a
|
||||
|
||||
b
|
||||
|
||||
c
|
||||
|
||||
d
|
||||
|
||||
e
|
||||
|
||||
f
|
||||
-- new --
|
||||
a
|
||||
|
||||
B
|
||||
|
||||
C
|
||||
|
||||
d
|
||||
|
||||
e
|
||||
|
||||
f
|
||||
-- diff --
|
||||
diff old new
|
||||
--- old
|
||||
+++ new
|
||||
@@ -1,8 +1,8 @@
|
||||
a
|
||||
$
|
||||
-b
|
||||
-
|
||||
-c
|
||||
+B
|
||||
+
|
||||
+C
|
||||
$
|
||||
d
|
||||
$
|
38
grammar/internal/diff/testdata/end.txt
vendored
Normal file
38
grammar/internal/diff/testdata/end.txt
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
-- old --
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
eight
|
||||
nine
|
||||
ten
|
||||
eleven
|
||||
-- new --
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
-- diff --
|
||||
diff old new
|
||||
--- old
|
||||
+++ new
|
||||
@@ -5,7 +5,6 @@
|
||||
5
|
||||
6
|
||||
7
|
||||
-eight
|
||||
-nine
|
||||
-ten
|
||||
-eleven
|
||||
+8
|
||||
+9
|
||||
+10
|
9
grammar/internal/diff/testdata/eof.txt
vendored
Normal file
9
grammar/internal/diff/testdata/eof.txt
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
-- old --
|
||||
a
|
||||
b
|
||||
c^D
|
||||
-- new --
|
||||
a
|
||||
b
|
||||
c^D
|
||||
-- diff --
|
18
grammar/internal/diff/testdata/eof1.txt
vendored
Normal file
18
grammar/internal/diff/testdata/eof1.txt
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
-- old --
|
||||
a
|
||||
b
|
||||
c
|
||||
-- new --
|
||||
a
|
||||
b
|
||||
c^D
|
||||
-- diff --
|
||||
diff old new
|
||||
--- old
|
||||
+++ new
|
||||
@@ -1,3 +1,3 @@
|
||||
a
|
||||
b
|
||||
-c
|
||||
+c
|
||||
\ No newline at end of file
|
18
grammar/internal/diff/testdata/eof2.txt
vendored
Normal file
18
grammar/internal/diff/testdata/eof2.txt
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
-- old --
|
||||
a
|
||||
b
|
||||
c^D
|
||||
-- new --
|
||||
a
|
||||
b
|
||||
c
|
||||
-- diff --
|
||||
diff old new
|
||||
--- old
|
||||
+++ new
|
||||
@@ -1,3 +1,3 @@
|
||||
a
|
||||
b
|
||||
-c
|
||||
\ No newline at end of file
|
||||
+c
|
62
grammar/internal/diff/testdata/long.txt
vendored
Normal file
62
grammar/internal/diff/testdata/long.txt
vendored
Normal file
@ -0,0 +1,62 @@
|
||||
-- old --
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
11
|
||||
12
|
||||
13
|
||||
14
|
||||
14½
|
||||
15
|
||||
16
|
||||
17
|
||||
18
|
||||
19
|
||||
20
|
||||
-- new --
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
8
|
||||
9
|
||||
10
|
||||
11
|
||||
12
|
||||
13
|
||||
14
|
||||
17
|
||||
18
|
||||
19
|
||||
20
|
||||
-- diff --
|
||||
diff old new
|
||||
--- old
|
||||
+++ new
|
||||
@@ -4,7 +4,6 @@
|
||||
4
|
||||
5
|
||||
6
|
||||
-7
|
||||
8
|
||||
9
|
||||
10
|
||||
@@ -12,9 +11,6 @@
|
||||
12
|
||||
13
|
||||
14
|
||||
-14½
|
||||
-15
|
||||
-16
|
||||
17
|
||||
18
|
||||
19
|
5
grammar/internal/diff/testdata/same.txt
vendored
Normal file
5
grammar/internal/diff/testdata/same.txt
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
-- old --
|
||||
hello world
|
||||
-- new --
|
||||
hello world
|
||||
-- diff --
|
34
grammar/internal/diff/testdata/start.txt
vendored
Normal file
34
grammar/internal/diff/testdata/start.txt
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
-- old --
|
||||
e
|
||||
pi
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
-- new --
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
-- diff --
|
||||
diff old new
|
||||
--- old
|
||||
+++ new
|
||||
@@ -1,5 +1,6 @@
|
||||
-e
|
||||
-pi
|
||||
+1
|
||||
+2
|
||||
+3
|
||||
4
|
||||
5
|
||||
6
|
40
grammar/internal/diff/testdata/triv.txt
vendored
Normal file
40
grammar/internal/diff/testdata/triv.txt
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
Another example from Hunt and McIlroy,
|
||||
“An Algorithm for Differential File Comparison.”
|
||||
https://www.cs.dartmouth.edu/~doug/diff.pdf
|
||||
|
||||
Anchored diff gives up on finding anything,
|
||||
since there are no unique lines.
|
||||
|
||||
-- old --
|
||||
a
|
||||
b
|
||||
c
|
||||
a
|
||||
b
|
||||
b
|
||||
a
|
||||
-- new --
|
||||
c
|
||||
a
|
||||
b
|
||||
a
|
||||
b
|
||||
c
|
||||
-- diff --
|
||||
diff old new
|
||||
--- old
|
||||
+++ new
|
||||
@@ -1,7 +1,6 @@
|
||||
-a
|
||||
-b
|
||||
-c
|
||||
-a
|
||||
-b
|
||||
-b
|
||||
-a
|
||||
+c
|
||||
+a
|
||||
+b
|
||||
+a
|
||||
+b
|
||||
+c
|
171
grammar/jsonschema/decode.go
Normal file
171
grammar/jsonschema/decode.go
Normal file
@ -0,0 +1,171 @@
|
||||
package jsonschema
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
)
|
||||
|
||||
// Schema holds a JSON schema.
|
||||
type Schema struct {
|
||||
// Name is the name of the property. For the parent/root property, this
|
||||
// is "root". For child properties, this is the name of the property.
|
||||
Name string `json:"-"`
|
||||
|
||||
// Type is the type of the property.
|
||||
//
|
||||
// TODO: Union types (e.g. make this a []string).
|
||||
Type string
|
||||
|
||||
// PrefixItems is a list of schemas for each item in a tuple. By
|
||||
// default, the tuple is "closed." unless Items is set to true or a
|
||||
// valid Schema.
|
||||
PrefixItems []*Schema
|
||||
|
||||
// Items is the schema for each item in a list.
|
||||
//
|
||||
// If it is missing, or its JSON value is "null" or "false", it is nil.
|
||||
// If the JSON value is "true", it is set to the empty Schema. If the
|
||||
// JSON value is an object, it will be decoded as a Schema.
|
||||
Items *Schema
|
||||
|
||||
// MinItems specifies the minimum number of items allowed in a list.
|
||||
MinItems int
|
||||
|
||||
// MaxItems specifies the maximum number of items allowed in a list.
|
||||
MaxItems int
|
||||
|
||||
// Properties is the schema for each property of an object.
|
||||
Properties []*Schema
|
||||
|
||||
// Format is the format of the property. This is used to validate the
|
||||
// property against a specific format.
|
||||
//
|
||||
// It is the callers responsibility to validate the property against
|
||||
// the format.
|
||||
Format string
|
||||
|
||||
// Minimum specifies the minimum value for numeric properties.
|
||||
Minimum float64
|
||||
|
||||
// Maximum specifies the maximum value for numeric properties.
|
||||
Maximum float64
|
||||
|
||||
// Enum is a list of valid values for the property.
|
||||
Enum []json.RawMessage
|
||||
}
|
||||
|
||||
func (s *Schema) UnmarshalJSON(data []byte) error {
|
||||
type S Schema
|
||||
w := struct {
|
||||
Properties props
|
||||
Items items
|
||||
*S
|
||||
}{
|
||||
S: (*S)(s),
|
||||
}
|
||||
if err := json.Unmarshal(data, &w); err != nil {
|
||||
return err
|
||||
}
|
||||
if w.Items.set {
|
||||
s.Items = &w.Items.Schema
|
||||
}
|
||||
s.Properties = w.Properties
|
||||
return nil
|
||||
}
|
||||
|
||||
type items struct {
|
||||
Schema
|
||||
set bool
|
||||
}
|
||||
|
||||
func (s *items) UnmarshalJSON(data []byte) error {
|
||||
switch b := data[0]; b {
|
||||
case 't':
|
||||
*s = items{set: true}
|
||||
case '{':
|
||||
type I items
|
||||
if err := json.Unmarshal(data, (*I)(s)); err != nil {
|
||||
return err
|
||||
}
|
||||
s.set = true
|
||||
case 'n', 'f':
|
||||
default:
|
||||
return errors.New("invalid Items")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// EffectiveType returns the effective type of the schema. If the Type field is
|
||||
// not empty, it is returned; otherwise:
|
||||
//
|
||||
// - If the schema has both Properties and Items, it returns an empty string.
|
||||
// - If the schema has Properties, it returns "object".
|
||||
// - If the schema has Items, it returns "array".
|
||||
// - If the schema has neither Properties nor Items, it returns "value".
|
||||
//
|
||||
// The returned string is never empty.
|
||||
func (d *Schema) EffectiveType() string {
|
||||
if d.Type == "" {
|
||||
if len(d.Properties) > 0 {
|
||||
return "object"
|
||||
}
|
||||
if len(d.PrefixItems) > 0 || d.Items != nil {
|
||||
return "array"
|
||||
}
|
||||
return "value"
|
||||
}
|
||||
return d.Type
|
||||
}
|
||||
|
||||
// props is an ordered list of properties. The order of the properties
|
||||
// is the order in which they were defined in the schema.
|
||||
type props []*Schema
|
||||
|
||||
var _ json.Unmarshaler = (*props)(nil)
|
||||
|
||||
func (v *props) UnmarshalJSON(data []byte) error {
|
||||
if len(data) == 0 {
|
||||
return nil
|
||||
}
|
||||
if data[0] != '{' {
|
||||
return errors.New("expected object")
|
||||
}
|
||||
|
||||
d := json.NewDecoder(bytes.NewReader(data))
|
||||
|
||||
// TODO(bmizerany): Consider DisallowUnknownFields. Currently, we, like
|
||||
// llama.cpp, ignore unknown fields, which could be lead to unexpected
|
||||
// behavior for clients of this package, since they may not be aware
|
||||
// that "additionalFields", "itemsPrefix", etc, are being ignored.
|
||||
//
|
||||
// For now, just do what llama.cpp does.
|
||||
|
||||
t, err := d.Token()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if t != json.Delim('{') {
|
||||
return errors.New("expected object")
|
||||
}
|
||||
for d.More() {
|
||||
// Use the first token (map key) as the property name, then
|
||||
// decode the rest of the object fields into a Schema and
|
||||
// append.
|
||||
t, err := d.Token()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if t == json.Delim('}') {
|
||||
return nil
|
||||
}
|
||||
s := &Schema{
|
||||
Name: t.(string),
|
||||
}
|
||||
if err := d.Decode(s); err != nil {
|
||||
return err
|
||||
}
|
||||
*v = append(*v, s)
|
||||
}
|
||||
return nil
|
||||
}
|
104
grammar/jsonschema/decode_test.go
Normal file
104
grammar/jsonschema/decode_test.go
Normal file
@ -0,0 +1,104 @@
|
||||
package jsonschema
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
const testSchemaBasic = `
|
||||
{
|
||||
"properties": {
|
||||
"tupleClosedEmpty": { "prefixItems": [] },
|
||||
"tupleClosedMissing": { "prefixItems": [{}] },
|
||||
"tupleClosedNull": { "prefixItems": [{}], "items": null },
|
||||
"tupleClosedFalse": { "prefixItems": [{}], "items": false },
|
||||
"tupleOpenTrue": { "prefixItems": [{}], "items": true },
|
||||
"tupleOpenEmpty": { "prefixItems": [{}], "items": {} },
|
||||
"tupleOpenTyped": { "prefixItems": [{}], "items": {"type": "boolean"} },
|
||||
"tupleOpenMax": { "prefixItems": [{}], "items": true, "maxItems": 3},
|
||||
|
||||
"array": { "items": {"type": "number"} },
|
||||
|
||||
"null": { "type": "null" },
|
||||
"string": { "type": "string" },
|
||||
"boolean": { "type": "boolean" }
|
||||
}
|
||||
}
|
||||
`
|
||||
|
||||
func TestSchemaUnmarshal(t *testing.T) {
|
||||
var got *Schema
|
||||
if err := json.Unmarshal([]byte(testSchemaBasic), &got); err != nil {
|
||||
t.Fatalf("Unmarshal: %v", err)
|
||||
}
|
||||
want := &Schema{
|
||||
Properties: []*Schema{
|
||||
{Name: "tupleClosedEmpty", PrefixItems: []*Schema{}, Items: nil},
|
||||
{Name: "tupleClosedMissing", PrefixItems: []*Schema{{}}, Items: nil},
|
||||
{Name: "tupleClosedNull", PrefixItems: []*Schema{{}}, Items: nil},
|
||||
{Name: "tupleClosedFalse", PrefixItems: []*Schema{{}}, Items: nil},
|
||||
|
||||
{Name: "tupleOpenTrue", PrefixItems: []*Schema{{}}, Items: &Schema{}},
|
||||
{Name: "tupleOpenEmpty", PrefixItems: []*Schema{{}}, Items: &Schema{}},
|
||||
{Name: "tupleOpenTyped", PrefixItems: []*Schema{{}}, Items: &Schema{Type: "boolean"}},
|
||||
{Name: "tupleOpenMax", PrefixItems: []*Schema{{}}, Items: &Schema{}, MaxItems: 3},
|
||||
|
||||
{Name: "array", Items: &Schema{Type: "number"}},
|
||||
|
||||
{Name: "null", Type: "null"},
|
||||
{Name: "string", Type: "string"},
|
||||
{Name: "boolean", Type: "boolean"},
|
||||
},
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Errorf("(-want, +got)\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEffectiveType(t *testing.T) {
|
||||
const schema = `
|
||||
{"properties": {
|
||||
"o": {"type": "object"},
|
||||
"a": {"type": "array"},
|
||||
"n": {"type": "number"},
|
||||
"s": {"type": "string"},
|
||||
"z": {"type": "null"},
|
||||
"b": {"type": "boolean"},
|
||||
|
||||
"t0": {"prefixItems": [{}], "items": {"type": "number"}},
|
||||
"t1": {"items": {"type": "number"}, "maxItems": 3},
|
||||
|
||||
"v": {"maxItems": 3}
|
||||
}}
|
||||
`
|
||||
|
||||
var s *Schema
|
||||
if err := json.Unmarshal([]byte(schema), &s); err != nil {
|
||||
t.Fatalf("json.Unmarshal: %v", err)
|
||||
}
|
||||
|
||||
var got []string
|
||||
for _, p := range s.Properties {
|
||||
got = append(got, p.EffectiveType())
|
||||
}
|
||||
|
||||
want := strings.Fields(`
|
||||
object
|
||||
array
|
||||
number
|
||||
string
|
||||
null
|
||||
boolean
|
||||
array
|
||||
array
|
||||
value
|
||||
`)
|
||||
if !reflect.DeepEqual(want, got) {
|
||||
t.Errorf("\ngot:\n\t%v\nwant:\n\t%v", got, want)
|
||||
}
|
||||
}
|
76
grammar/testdata/schemas.txt
vendored
Normal file
76
grammar/testdata/schemas.txt
vendored
Normal file
@ -0,0 +1,76 @@
|
||||
# This file holds tests for JSON schema to EBNF grammar conversions.
|
||||
#
|
||||
# The format is a JSON schema, followed by the expected EBNF grammar. Each test
|
||||
# MAY be preceded by a comment that describes the test (e.g. the test name), followed by
|
||||
# the JSON schema and the expected EBNF grammar. If no comment is present, the test
|
||||
# name the tests number in the file (e.g. "#0", "#1", etc.)
|
||||
#
|
||||
# Blank lines signify the end or start of a new test. Comments can be added
|
||||
# anywhere in the file, but they must be preceded by a '#' character and start at
|
||||
# the beginning of the line.
|
||||
|
||||
# default
|
||||
{}
|
||||
root ::= value;
|
||||
|
||||
{"properties": {}}
|
||||
root ::= value;
|
||||
|
||||
# array
|
||||
{"properties": {"a": {"type": "array", "items": {"type": "string"}}}}
|
||||
root_0_tuple_0 ::= string;
|
||||
root_0 ::= "[" ( root_0_tuple_0 )* "]";
|
||||
root ::= "{" "a" ":" root_0 "}";
|
||||
|
||||
# array with nested array
|
||||
{"type": "array", "items": {"type": "array", "items": {"type": "string"}}}
|
||||
root_tuple_0_tuple_0 ::= string;
|
||||
root_tuple_0 ::= "[" ( root_tuple_0_tuple_0 )* "]";
|
||||
root ::= "[" ( root_tuple_0 )* "]";
|
||||
|
||||
# object
|
||||
{"properties": {"e": {}}}
|
||||
root_0 ::= value;
|
||||
root ::= "{" "e" ":" root_0 "}";
|
||||
|
||||
# object with nested object
|
||||
{"properties": {"o": {"type": "object", "properties": {"e": {}}}}}
|
||||
root_0_0 ::= value;
|
||||
root_0 ::= "{" "e" ":" root_0_0 "}";
|
||||
root ::= "{" "o" ":" root_0 "}";
|
||||
|
||||
# boolean
|
||||
{"type": "boolean"}
|
||||
root ::= boolean;
|
||||
|
||||
# number
|
||||
{"properties": {"n": {"type": "number", "minimum": 123, "maximum": 4567}}}
|
||||
root_0 ::= number;
|
||||
root ::= "{" "n" ":" root_0 "}";
|
||||
|
||||
# string
|
||||
{"type": "string"}
|
||||
root ::= string;
|
||||
|
||||
# string with enum
|
||||
{"type": "string", "enum": ["a", "b", "c"]}
|
||||
root ::= ( "\"a\"" "|" "\"b\"" "|" "\"c\"" );
|
||||
|
||||
# spaces in key
|
||||
{"properties": {"a b": {}}}
|
||||
root_0 ::= value;
|
||||
root ::= "{" "a b" ":" root_0 "}";
|
||||
|
||||
# issue7978
|
||||
{ "type": "object", "properties": { "steps": { "type": "array", "items": { "type": "object", "properties": { "explanation": { "type": "string" }, "output": { "type": "string" } }, "required": [ "explanation", "output" ], "additionalProperties": false } }, "final_answer": { "type": "string" } }, "required": [ "steps", "final_answer" ], "additionalProperties": false }
|
||||
root_0_tuple_0_0 ::= string;
|
||||
root_0_tuple_0_1 ::= string;
|
||||
root_0_tuple_0 ::= "{" "explanation" ":" root_0_tuple_0_0 "," "output" ":" root_0_tuple_0_1 "}";
|
||||
root_0 ::= "[" ( root_0_tuple_0 )* "]";
|
||||
root_1 ::= string;
|
||||
root ::= "{" "steps" ":" root_0 "," "final_answer" ":" root_1 "}";
|
||||
|
||||
# !! # special characters in key
|
||||
# !! {"properties": {"a!b": {}}}
|
||||
# !! !invalid character '!' in key
|
||||
# !!
|
@ -37,8 +37,7 @@ go build -tags avx .
|
||||
```shell
|
||||
# go doesn't recognize `-mfma` as a valid compiler flag
|
||||
# see https://github.com/golang/go/issues/17895
|
||||
go env -w "CGO_CFLAGS_ALLOW=-mfma|-mf16c"
|
||||
go env -w "CGO_CXXFLAGS_ALLOW=-mfma|-mf16c"
|
||||
go env -w "CGO_CPPFLAGS_ALLOW=-mfma|-mf16c"
|
||||
go build -tags=avx,avx2 .
|
||||
```
|
||||
|
||||
|
34
llama/amx.h
vendored
34
llama/amx.h
vendored
@ -1,34 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-cpu-impl.h"
|
||||
|
||||
// GGML internal header
|
||||
|
||||
#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
|
||||
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);
|
||||
#endif
|
51
llama/ggml-blas.h
vendored
51
llama/ggml-blas.h
vendored
@ -1,51 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// backend API
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
|
||||
|
||||
// number of threads used for conversion to float
|
||||
// for openblas and blis, this will also set the number of threads used for blas operations
|
||||
GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
34
llama/ggml-cpu-aarch64.h
vendored
34
llama/ggml-cpu-aarch64.h
vendored
@ -1,34 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ggml-cpu-traits.h"
|
||||
#include "ggml.h"
|
||||
|
||||
// GGML internal header
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
|
64
llama/ggml-cpu-traits.h
vendored
64
llama/ggml-cpu-traits.h
vendored
@ -1,64 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-cpu-impl.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <vector>
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// return true if op part of extra "accelerator"
|
||||
bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op);
|
||||
bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
namespace ggml::cpu {
|
||||
// register in tensor->extra
|
||||
class tensor_traits {
|
||||
public:
|
||||
virtual ~tensor_traits();
|
||||
virtual bool work_size(int n_threads, const struct ggml_tensor * op, size_t & size) = 0;
|
||||
virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) = 0;
|
||||
};
|
||||
|
||||
class extra_buffer_type {
|
||||
public:
|
||||
virtual ~extra_buffer_type();
|
||||
virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0;
|
||||
virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op) = 0;
|
||||
};
|
||||
} // namespace ggml::cpu
|
||||
|
||||
// implemented in ggml-cpu.cpp.
|
||||
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffers_type();
|
||||
|
||||
#endif
|
31
llama/ggml-cuda/acc.cuh
vendored
31
llama/ggml-cuda/acc.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_ACC_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
60
llama/ggml-cuda/arange.cu
vendored
60
llama/ggml-cuda/arange.cu
vendored
@ -1,60 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arange.cuh"
|
||||
|
||||
static __global__ void arange_f32(float * dst, const int ne0, const float start, const float step) {
|
||||
// blockIDx.x: idx of ne0 / BLOCK_SIZE
|
||||
int nidx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (nidx >= ne0) {
|
||||
return;
|
||||
}
|
||||
dst[nidx] = start + step * nidx;
|
||||
}
|
||||
|
||||
static void arange_f32_cuda(float * dst, const int ne0, const float start, const float step, cudaStream_t stream) {
|
||||
int num_blocks = (ne0 + CUDA_ARANGE_BLOCK_SIZE - 1) / CUDA_ARANGE_BLOCK_SIZE;
|
||||
arange_f32<<<num_blocks, CUDA_ARANGE_BLOCK_SIZE, 0, stream>>>(dst, ne0, start, step);
|
||||
}
|
||||
|
||||
void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
float * dst_d = (float *)dst->data;
|
||||
cudaStream_t stream = ctx.stream();
|
||||
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
float start;
|
||||
float stop;
|
||||
float step;
|
||||
memcpy(&start, (float *)dst->op_params + 0, sizeof(float));
|
||||
memcpy(&stop, (float *)dst->op_params + 1, sizeof(float));
|
||||
memcpy(&step, (float *)dst->op_params + 2, sizeof(float));
|
||||
|
||||
int64_t steps = (int64_t)ceil((stop - start) / step);
|
||||
GGML_ASSERT(ggml_nelements(dst) == steps);
|
||||
|
||||
arange_f32_cuda(dst_d, dst->ne[0], start, step, stream);
|
||||
}
|
31
llama/ggml-cuda/arange.cuh
vendored
31
llama/ggml-cuda/arange.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_ARANGE_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
29
llama/ggml-cuda/argmax.cuh
vendored
29
llama/ggml-cuda/argmax.cuh
vendored
@ -1,29 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_argmax(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
29
llama/ggml-cuda/argsort.cuh
vendored
29
llama/ggml-cuda/argsort.cuh
vendored
@ -1,29 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
35
llama/ggml-cuda/binbcast.cuh
vendored
35
llama/ggml-cuda/binbcast.cuh
vendored
@ -1,35 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_repeat_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
60
llama/ggml-cuda/clamp.cu
vendored
60
llama/ggml-cuda/clamp.cu
vendored
@ -1,60 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "clamp.cuh"
|
||||
|
||||
static __global__ void clamp_f32(const float * x, float * dst, const float min, const float max, const int k) {
|
||||
const int i = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i >= k) {
|
||||
return;
|
||||
}
|
||||
|
||||
dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
|
||||
}
|
||||
|
||||
static void clamp_f32_cuda(const float * x, float * dst, const float min, const float max, const int k, cudaStream_t stream) {
|
||||
const int num_blocks = (k + CUDA_CLAMP_BLOCK_SIZE - 1) / CUDA_CLAMP_BLOCK_SIZE;
|
||||
clamp_f32<<<num_blocks, CUDA_CLAMP_BLOCK_SIZE, 0, stream>>>(x, dst, min, max, k);
|
||||
}
|
||||
|
||||
|
||||
void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
const float * src0_d = (const float *)src0->data;
|
||||
float * dst_d = (float *)dst->data;
|
||||
cudaStream_t stream = ctx.stream();
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
|
||||
float min;
|
||||
float max;
|
||||
memcpy(&min, dst->op_params, sizeof(float));
|
||||
memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
|
||||
|
||||
clamp_f32_cuda(src0_d, dst_d, min, max, ggml_nelements(src0), stream);
|
||||
}
|
31
llama/ggml-cuda/clamp.cuh
vendored
31
llama/ggml-cuda/clamp.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_CLAMP_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
31
llama/ggml-cuda/concat.cuh
vendored
31
llama/ggml-cuda/concat.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_CONCAT_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
31
llama/ggml-cuda/conv-transpose-1d.cuh
vendored
31
llama/ggml-cuda/conv-transpose-1d.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_CONV_TRANPOSE_1D_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_conv_transpose_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
39
llama/ggml-cuda/convert.cuh
vendored
39
llama/ggml-cuda/convert.cuh
vendored
@ -1,39 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256
|
||||
|
||||
template<typename T>
|
||||
using to_t_cuda_t = void (*)(const void * __restrict__ x, T * __restrict__ y, int64_t k, cudaStream_t stream);
|
||||
|
||||
typedef to_t_cuda_t<float> to_fp32_cuda_t;
|
||||
typedef to_t_cuda_t<half> to_fp16_cuda_t;
|
||||
|
||||
to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type);
|
||||
|
||||
to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type);
|
31
llama/ggml-cuda/count-equal.cuh
vendored
31
llama/ggml-cuda/count-equal.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_COUNT_EQUAL_CHUNK_SIZE 128
|
||||
|
||||
void ggml_cuda_count_equal(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
35
llama/ggml-cuda/cpy.cuh
vendored
35
llama/ggml-cuda/cpy.cuh
vendored
@ -1,35 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_CPY_BLOCK_SIZE 64
|
||||
|
||||
void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1);
|
||||
|
||||
void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1);
|
33
llama/ggml-cuda/cross-entropy-loss.cuh
vendored
33
llama/ggml-cuda/cross-entropy-loss.cuh
vendored
@ -1,33 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_CROSS_ENTROPY_LOSS_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_cross_entropy_loss(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_cross_entropy_loss_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
31
llama/ggml-cuda/diagmask.cuh
vendored
31
llama/ggml-cuda/diagmask.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32
|
||||
|
||||
void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
29
llama/ggml-cuda/fattn-tile-f16.cuh
vendored
29
llama/ggml-cuda/fattn-tile-f16.cuh
vendored
@ -1,29 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_flash_attn_ext_tile_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
29
llama/ggml-cuda/fattn-tile-f32.cuh
vendored
29
llama/ggml-cuda/fattn-tile-f32.cuh
vendored
@ -1,29 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_flash_attn_ext_tile_f32(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
29
llama/ggml-cuda/fattn.cuh
vendored
29
llama/ggml-cuda/fattn.cuh
vendored
@ -1,29 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
31
llama/ggml-cuda/getrows.cuh
vendored
31
llama/ggml-cuda/getrows.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_GET_ROWS_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
31
llama/ggml-cuda/im2col.cuh
vendored
31
llama/ggml-cuda/im2col.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_IM2COL_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
38
llama/ggml-cuda/mmv.cuh
vendored
38
llama/ggml-cuda/mmv.cuh
vendored
@ -1,38 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
// maximum number of src0 rows with which to use mul_mat_vec over cuBLAS if FP16 tensor cores are available
|
||||
#define MMV_MAX_ROWS 512
|
||||
|
||||
void ggml_cuda_mul_mat_vec(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_mul_mat_vec(
|
||||
ggml_backend_cuda_context & ctx,
|
||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
|
||||
const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols,
|
||||
const int64_t src1_padded_row_size, cudaStream_t stream);
|
35
llama/ggml-cuda/mmvq.cuh
vendored
35
llama/ggml-cuda/mmvq.cuh
vendored
@ -1,35 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define MMVQ_MAX_BATCH_SIZE 8 // Max. batch size for which to use MMVQ kernels.
|
||||
|
||||
void ggml_cuda_op_mul_mat_vec_q(
|
||||
ggml_backend_cuda_context & ctx,
|
||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
|
||||
const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols,
|
||||
const int64_t src1_padded_row_size, cudaStream_t stream);
|
33
llama/ggml-cuda/norm.cuh
vendored
33
llama/ggml-cuda/norm.cuh
vendored
@ -1,33 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_op_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
31
llama/ggml-cuda/opt-step-adamw.cuh
vendored
31
llama/ggml-cuda/opt-step-adamw.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_OPT_STEP_ADAMW_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_opt_step_adamw(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
29
llama/ggml-cuda/out-prod.cuh
vendored
29
llama/ggml-cuda/out-prod.cuh
vendored
@ -1,29 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
32
llama/ggml-cuda/pad.cuh
vendored
32
llama/ggml-cuda/pad.cuh
vendored
@ -1,32 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_PAD_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
31
llama/ggml-cuda/pool2d.cuh
vendored
31
llama/ggml-cuda/pool2d.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_POOL2D_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
50
llama/ggml-cuda/quantize.cuh
vendored
50
llama/ggml-cuda/quantize.cuh
vendored
@ -1,50 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common.cuh"
|
||||
#include "mmq.cuh"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#define CUDA_QUANTIZE_BLOCK_SIZE 256
|
||||
#define CUDA_QUANTIZE_BLOCK_SIZE_MMQ 128
|
||||
|
||||
static_assert(MATRIX_ROW_PADDING % CUDA_QUANTIZE_BLOCK_SIZE == 0, "Risk of out-of-bounds access.");
|
||||
static_assert(MATRIX_ROW_PADDING % (4*CUDA_QUANTIZE_BLOCK_SIZE_MMQ) == 0, "Risk of out-of-bounds access.");
|
||||
|
||||
typedef void (*quantize_cuda_t)(
|
||||
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
|
||||
const ggml_type type_x, cudaStream_t stream);
|
||||
|
||||
void quantize_row_q8_1_cuda(
|
||||
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
|
||||
const ggml_type type_x, cudaStream_t stream);
|
||||
|
||||
void quantize_mmq_q8_1_cuda(
|
||||
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
|
||||
const ggml_type type_x, cudaStream_t stream);
|
31
llama/ggml-cuda/rope.cuh
vendored
31
llama/ggml-cuda/rope.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_ROPE_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
57
llama/ggml-cuda/scale.cu
vendored
57
llama/ggml-cuda/scale.cu
vendored
@ -1,57 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "scale.cuh"
|
||||
|
||||
static __global__ void scale_f32(const float * x, float * dst, const float scale, const int k) {
|
||||
const int i = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i >= k) {
|
||||
return;
|
||||
}
|
||||
|
||||
dst[i] = scale * x[i];
|
||||
}
|
||||
|
||||
static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) {
|
||||
const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE;
|
||||
scale_f32<<<num_blocks, CUDA_SCALE_BLOCK_SIZE, 0, stream>>>(x, dst, scale, k);
|
||||
}
|
||||
|
||||
void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
const float * src0_d = (const float *)src0->data;
|
||||
float * dst_d = (float *)dst->data;
|
||||
cudaStream_t stream = ctx.stream();
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
|
||||
float scale;
|
||||
memcpy(&scale, dst->op_params, sizeof(float));
|
||||
|
||||
scale_f32_cuda(src0_d, dst_d, scale, ggml_nelements(src0), stream);
|
||||
}
|
31
llama/ggml-cuda/scale.cuh
vendored
31
llama/ggml-cuda/scale.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_SCALE_BLOCK_SIZE 256
|
||||
|
||||
void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
31
llama/ggml-cuda/softmax.cuh
vendored
31
llama/ggml-cuda/softmax.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
#define CUDA_SOFT_MAX_BLOCK_SIZE 1024
|
||||
|
||||
void ggml_cuda_op_soft_max(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
31
llama/ggml-cuda/sum.cuh
vendored
31
llama/ggml-cuda/sum.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream);
|
||||
|
||||
void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
65
llama/ggml-cuda/sumrows.cu
vendored
65
llama/ggml-cuda/sumrows.cu
vendored
@ -1,65 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "sumrows.cuh"
|
||||
|
||||
static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) {
|
||||
const int row = blockIdx.x;
|
||||
const int col = threadIdx.x;
|
||||
|
||||
float sum = 0.0f;
|
||||
for (int i = col; i < ncols; i += blockDim.x) {
|
||||
sum += x[row * ncols + i];
|
||||
}
|
||||
|
||||
sum = warp_reduce_sum(sum);
|
||||
|
||||
if (col == 0) {
|
||||
dst[row] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) {
|
||||
const dim3 block_dims(WARP_SIZE, 1, 1);
|
||||
const dim3 block_nums(nrows, 1, 1);
|
||||
k_sum_rows_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
|
||||
}
|
||||
|
||||
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
const float * src0_d = (const float *)src0->data;
|
||||
float * dst_d = (float *)dst->data;
|
||||
cudaStream_t stream = ctx.stream();
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||
|
||||
const int64_t ncols = src0->ne[0];
|
||||
const int64_t nrows = ggml_nrows(src0);
|
||||
|
||||
sum_rows_f32_cuda(src0_d, dst_d, ncols, nrows, stream);
|
||||
}
|
31
llama/ggml-cuda/sumrows.cuh
vendored
31
llama/ggml-cuda/sumrows.cuh
vendored
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream);
|
||||
|
||||
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16);
|
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0);
|
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1);
|
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0);
|
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1);
|
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0);
|
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16);
|
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0);
|
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1);
|
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0);
|
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1);
|
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0);
|
@ -1,31 +0,0 @@
|
||||
/**
|
||||
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
|
||||
|
||||
#include "../fattn-vec-f16.cuh"
|
||||
|
||||
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16);
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user