From c6bcdc4223c50071b59a19c42cc54ec9932f696f Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 13 May 2025 13:12:54 -0700 Subject: [PATCH] Revert "remove cuda v11 (#10569)" (#10692) Bring back v11 until we can better warn users that their driver is too old. This reverts commit fa393554b927f154145488c852297a2330cb5f13. --- .github/workflows/release.yaml | 6 ++++++ .github/workflows/test.yaml | 6 +++--- CMakePresets.json | 13 +++++++++++++ Dockerfile | 17 ++++++++++++++++- discover/cuda_common.go | 3 --- discover/path.go | 2 +- docs/gpu.md | 2 +- docs/troubleshooting.md | 2 +- llm/server.go | 2 +- scripts/build_windows.ps1 | 14 ++++++++++++++ scripts/env.sh | 2 ++ 11 files changed, 58 insertions(+), 11 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index b2e122469..f423106e7 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -103,6 +103,11 @@ jobs: arch: [amd64] preset: ['CPU'] include: + - os: windows + arch: amd64 + preset: 'CUDA 11' + install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe + cuda-version: '11.3' - os: windows arch: amd64 preset: 'CUDA 12' @@ -319,6 +324,7 @@ jobs: case "$COMPONENT" in bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; + lib/ollama/cuda_v11) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; lib/ollama/cuda_v12) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;; lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;; diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2e7093391..27e229fcf 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -46,7 +46,7 @@ jobs: include: - preset: CPU - preset: CUDA - container: nvidia/cuda:12.8.1-devel-ubuntu22.04 + container: nvidia/cuda:11.8.0-devel-ubuntu22.04 flags: '-DCMAKE_CUDA_ARCHITECTURES=87' - preset: ROCm container: rocm/dev-ubuntu-22.04:6.1.2 @@ -78,7 +78,7 @@ jobs: include: - preset: CPU - preset: CUDA - install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe + install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe flags: '-DCMAKE_CUDA_ARCHITECTURES=80' - preset: ROCm install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe @@ -102,7 +102,7 @@ jobs: $ErrorActionPreference = "Stop" if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') { Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe" - Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_12.8", "nvcc_12.8", "cublas_12.8", "cublas_dev_12.8")) -NoNewWindow -Wait + Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_11.3", "nvcc_11.3", "cublas_11.3", "cublas_dev_11.3")) -NoNewWindow -Wait } $cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path diff --git a/CMakePresets.json b/CMakePresets.json index 2f29e041e..0b70d8ba3 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -17,6 +17,14 @@ "name": "CUDA", "inherits": [ "Default" ] }, + { + "name": "CUDA 11", + "inherits": [ "CUDA" ], + "cacheVariables": { + "CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;70;75;80;86", + "CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets" + } + }, { "name": "CUDA 12", "inherits": [ "CUDA" ], @@ -70,6 +78,11 @@ "configurePreset": "CUDA", "targets": [ "ggml-cuda" ] }, + { + "name": "CUDA 11", + "inherits": [ "CUDA" ], + "configurePreset": "CUDA 11" + }, { "name": "CUDA 12", "inherits": [ "CUDA" ], diff --git a/Dockerfile b/Dockerfile index 1196dc535..4c6619e77 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,10 +7,14 @@ ARG JETPACK5VERSION=r35.4.1 ARG JETPACK6VERSION=r36.4.0 ARG CMAKEVERSION=3.31.2 +# CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64 RUN yum install -y yum-utils \ - && dnf install -y ccache \ + && yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \ + && rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \ + && dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \ && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo +ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH FROM --platform=linux/arm64 almalinux:8 AS base-arm64 # install epel-release for ccache @@ -34,6 +38,15 @@ RUN --mount=type=cache,target=/root/.ccache \ && cmake --build --parallel --preset 'CPU' \ && cmake --install build --component CPU --strip --parallel 8 +FROM base AS cuda-11 +ARG CUDA11VERSION=11.3 +RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-} +ENV PATH=/usr/local/cuda-11/bin:$PATH +RUN --mount=type=cache,target=/root/.ccache \ + cmake --preset 'CUDA 11' \ + && cmake --build --parallel --preset 'CUDA 11' \ + && cmake --install build --component CUDA --strip --parallel 8 + FROM base AS cuda-12 ARG CUDA12VERSION=12.8 RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-} @@ -85,9 +98,11 @@ RUN --mount=type=cache,target=/root/.cache/go-build \ go build -trimpath -buildmode=pie -o /bin/ollama . FROM --platform=linux/amd64 scratch AS amd64 +COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11 COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12 FROM --platform=linux/arm64 scratch AS arm64 +COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11 COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12 COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_jetpack5 COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_jetpack6 diff --git a/discover/cuda_common.go b/discover/cuda_common.go index f46c7cfa5..048295297 100644 --- a/discover/cuda_common.go +++ b/discover/cuda_common.go @@ -3,7 +3,6 @@ package discover import ( - "fmt" "log/slog" "os" "regexp" @@ -60,8 +59,6 @@ func cudaVariant(gpuInfo CudaGPUInfo) string { // driver 12.0 has problems with the cuda v12 library, so run v11 on those older drivers if gpuInfo.DriverMajor < 12 || (gpuInfo.DriverMajor == 12 && gpuInfo.DriverMinor == 0) { - // The detected driver is older than Feb 2023 - slog.Warn("old CUDA driver detected - please upgrade to a newer driver", "version", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor)) return "v11" } return "v12" diff --git a/discover/path.go b/discover/path.go index 68e63009a..8a20d8c21 100644 --- a/discover/path.go +++ b/discover/path.go @@ -12,7 +12,7 @@ import ( // '../lib/ollama' on Linux and the executable's directory on macOS // note: distribution builds, additional GPU-specific libraries are // found in subdirectories of the returned path, such as -// 'cuda_v12', 'rocm', etc. +// 'cuda_v11', 'cuda_v12', 'rocm', etc. var LibOllamaPath string = func() string { exe, err := os.Executable() if err != nil { diff --git a/docs/gpu.md b/docs/gpu.md index 61ff6e458..b54c66ab6 100644 --- a/docs/gpu.md +++ b/docs/gpu.md @@ -1,6 +1,6 @@ # GPU ## Nvidia -Ollama supports Nvidia GPUs with compute capability 5.0+ and driver version 531 and newer. +Ollama supports Nvidia GPUs with compute capability 5.0+. Check your compute compatibility to see if your card is supported: [https://developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus) diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 995b33aca..ba5487fef 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -43,7 +43,7 @@ Ollama includes multiple LLM libraries compiled for different GPUs and CPU vecto In the server log, you will see a message that looks something like this (varies from release to release): ``` -Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v12 rocm_v5] +Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v11 rocm_v5] ``` **Experimental LLM Library Override** diff --git a/llm/server.go b/llm/server.go index 085e0980c..a64669c2f 100644 --- a/llm/server.go +++ b/llm/server.go @@ -311,7 +311,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a params = append(params, "--mmproj", projectors[0]) } - // iterate through compatible GPU libraries such as 'cuda_v12', 'rocm', etc. + // iterate through compatible GPU libraries such as 'cuda_v12', 'cuda_v11', 'rocm', etc. // adding each library's respective path to the LD_LIBRARY_PATH, until finally running // without any LD_LIBRARY_PATH flags for { diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1 index eaac2c600..e4c0b3d93 100644 --- a/scripts/build_windows.ps1 +++ b/scripts/build_windows.ps1 @@ -27,6 +27,7 @@ function checkEnv() { $env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0] } # Locate CUDA versions + # Note: this assumes every version found will be built $cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue') if ($cudaList.length -eq 0) { $d=(get-command -ea 'silentlycontinue' nvcc).path @@ -93,6 +94,19 @@ function buildOllama() { $hashEnv = @{} Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value } + if ("$script:CUDA_DIRS".Contains("v11")) { + $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $v11="$_" }} + $env:CUDAToolkit_ROOT=$hashEnv[$v11] + write-host "Building CUDA v11 backend libraries" + # Note: cuda v11 requires msvc 2019 so force the older generator + # to avoid 2022 (or newer) from being used as the default + & cmake --fresh --preset "CUDA 11" -G "Visual Studio 16 2019" --install-prefix $script:DIST_DIR + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + & cmake --build --preset "CUDA 11" --config Release --parallel $script:JOBS + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + & cmake --install build --component "CUDA" --strip + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + } if ("$script:CUDA_DIRS".Contains("v12")) { $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12")) { $v12="$_" }} $env:CUDAToolkit_ROOT=$hashEnv[$v12] diff --git a/scripts/env.sh b/scripts/env.sh index 65a970bdc..c5e6f530a 100644 --- a/scripts/env.sh +++ b/scripts/env.sh @@ -10,7 +10,9 @@ OLLAMA_COMMON_BUILD_ARGS="--build-arg=VERSION \ --build-arg=GOFLAGS \ --build-arg=OLLAMA_CUSTOM_CPU_DEFS \ --build-arg=OLLAMA_SKIP_CUDA_GENERATE \ + --build-arg=OLLAMA_SKIP_CUDA_11_GENERATE \ --build-arg=OLLAMA_SKIP_CUDA_12_GENERATE \ + --build-arg=CUDA_V11_ARCHITECTURES \ --build-arg=CUDA_V12_ARCHITECTURES \ --build-arg=OLLAMA_SKIP_ROCM_GENERATE \ --build-arg=OLLAMA_FAST_BUILD \