llama.cpp subdirs

2024-12-19 11:36:57 -08:00 · 2024-12-19 11:36:57 -08:00 · bca6ed0ccc
commit bca6ed0ccc
parent 756bfebe1b
56 changed files with 352 additions and 817 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -9,10 +9,15 @@ find_package(Threads REQUIRED)
 set(CMAKE_BUILD_TYPE Release)
 set(BUILD_SHARED_LIBS ON)
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 set(GGML_CCACHE ON)
 set(GGML_SCHED_MAX_COPIES 4)
 set(GGML_CPU_ALL_VARIANTS ON)
 set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
 set(GGML_LLAMAFILE ON)
 add_compile_definitions(GGML_BUILD)
 add_compile_definitions(GGML_SHARED)
@ -24,9 +29,21 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx)
 function(set_target_output_directory _target)
    if(TARGET ${_target})
        set_target_properties(${_target} PROPERTIES
            RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
            LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
            ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
        )
    endif()
 endfunction()
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src EXCLUDE_FROM_ALL)
 set_target_output_directory(ggml-base)
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu)
 set_target_output_directory(ggml-cpu)
 find_package(BLAS)
 if(NOT BLAS_VENDOR)
@ -36,14 +53,16 @@ else()
 endif()
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-blas)
-target_compile_features(ggml-blas PRIVATE cxx_std_11)
+set_target_output_directory(ggml-blas)
 check_language(CUDA)
 if(CMAKE_CUDA_COMPILER)
    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
    set_target_output_directory(ggml-cuda)
 endif()
 check_language(HIP)
 if(CMAKE_HIP_COMPILER)
    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)
    set_target_output_directory(ggml-hip)
 endif()
--- a/66
+++ b/66
@ -0,0 +1,66 @@
 ARG CUDA_11_VERSION=11.3
 ARG CUDA_12_VERSION=12.4
 ARG ROCM_VERSION=6.1.2
 ARG JETPACK_5_VERSION=r35.4.1
 ARG JETPACK_6_VERSION=r36.2.0
 ARG CMAKE_VERSION=3.31.2
 FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS base
 ARG CMAKE_VERSION
 RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz | tar xz -C /usr --strip-components 1
 RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \
    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
 # FROM --platform=linux/arm64 rockylinux:8 AS base
 # ARG CMAKE_VERSION
 # RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1
 # RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
 FROM base AS amd64
 ARG CUDA_11_VERSION
 ARG CUDA_12_VERSION
 RUN yum install -y cuda-toolkit-${CUDA_11_VERSION//./-} \
    && yum install -y cuda-toolkit-${CUDA_12_VERSION//./-}
 COPY CMakeLists.txt CMakeLists.txt
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 FROM --platform=linux/amd64 amd64 AS cuda_11
 ENV PATH=/usr/local/cuda-${CUDA_11_VERSION}/bin:$PATH
 RUN cmake -S . -B build -DCMAKE_CUDA_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
 RUN cmake --build build --target ggml-cuda -j
 FROM --platform=linux/amd64 amd64 AS cuda_12
 ENV PATH=/usr/local/cuda-${CUDA_12_VERSION}/bin:$PATH
 RUN cmake -S . -B build -DCMAKE_CUDA_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
 RUN cmake --build build --target ggml-cuda -j
 FROM --platform=linux/amd64 amd64 AS rocm
 RUN cmake -S . -B build -DCMAKE_HIP_ARCHITECTURES="gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
 RUN cmake --build build --target ggml-hip -j
 FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5_VERSION} AS jetpack_5
 ARG CMAKE_VERSION
 RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1
 COPY CMakeLists.txt .
 COPY ml/backend/ggml/ggml .
 RUN cmake -S . -B build \
    -DCMAKE_CUDA_ARCHITECTURES="72;87"
 RUN cmake --build build --target ggml-cuda
 FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6_VERSION} AS jetpack_6
 ARG CMAKE_VERSION
 RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1
 COPY CMakeLists.txt .
 COPY ml/backend/ggml/ggml .
 RUN cmake -S . -B build \
    -DCMAKE_CUDA_ARCHITECTURES="87"
 RUN cmake --build build --target ggml-cuda
 FROM --platform=linux/amd64 golang:1.23
 COPY --from=cuda_11 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-11.so
 COPY --from=cuda_12 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-12.so
 COPY --from=rocm build/ml/backend/ggml/ggml/src/ggml-hip/libggml-hip.so libggml-hip.so
 # FROM --platform=linux/arm64 golang:1.23
 # COPY --from=jetpack_5 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-jetpack-5.so
 # COPY --from=jetpack_6 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-jetpack-6.so
--- a/48
+++ b/48
@ -0,0 +1,48 @@
 UPSTREAM=https://github.com/ggerganov/llama.cpp.git
 WORKDIR=llama/vendor
 FETCH_HEAD=40c6d79fb52f995f47507fedfeaae2ac05d9b35c
 all: sync
 .PHONY: sync
 sync: llama/llama.cpp ml/backend/ggml/ggml
 .PHONY: llama/llama.cpp
 llama/llama.cpp: llama/vendor apply_patches
 	rsync -arvzc --delete -f "merge $@/.rsync-filter" $< $@
 	rsync -arvzc --delete --include LICENSE --exclude $(WORKDIR) $@
 .PHONY: ml/backend/ggml/ggml apply_patches
 ml/backend/ggml/ggml: llama/vendor/ggml apply_patches
 	rsync -arvzc --delete -f "merge $@/.rsync-filter" $< $@
 	rsync -arvzc --delete --include LICENSE --exclude $(WORKDIR) $@
 PATCHES=$(wildcard llama/patches/*.patch)
 .PHONY: apply_patches
 .NOTPARALLEL:
 apply_patches: $(addsuffix ed, $(PATCHES))
 %.patched: %.patch
 	if git -C $(WORKDIR) am -3 $(realpath $<); then touch $@; else git -C $(WORKDIR) am --abort; exit 1; fi
 .PHONY: checkout
 checkout: $(WORKDIR)
 	git -C $(WORKDIR) fetch
 	git -C $(WORKDIR) checkout -f $(FETCH_HEAD)
 $(WORKDIR):
 	git clone $(UPSTREAM) $(WORKDIR)
 .PHONE: format_patches
 format_patches: llama/patches
 	git -C $(WORKDIR) format-patch \
 		--no-signature \
 		--no-numbered \
 		--zero-commit \
 		-o $(realpath $<) \
 		$(FETCH_HEAD)
 .PHONE: clean
 clean: checkout
 	$(RM) $(addsuffix ed, $(PATCHES))
--- a/llama/json-schema-to-grammar.h
+++ b/llama/json-schema-to-grammar.h
@ -1,34 +0,0 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #pragma once
 #include "ggml.h"
 // Change JSON_ASSERT from assert() to GGML_ASSERT:
 #define JSON_ASSERT GGML_ASSERT
 #include "json.hpp"
 std::string json_schema_to_grammar(const nlohmann::ordered_json& schema);
--- a/llama/llama.cpp/.rsync-filter
+++ b/llama/llama.cpp/.rsync-filter
@ -0,0 +1,24 @@
 protect **/*.go
 include common/
 include common/base64.*
 include common/common.*
 include common/json-schema-to-grammar.*
 include common/json.*
 include common/log.*
 include common/sampling.*
 include common/stb_image.*
 include include/
 include include/llama.*
 include examples/
 include examples/llava/
 include examples/llava/clip.*
 include examples/llava/llava.*
 include src/
 include src/llama-grammar.*
 include src/llama-impl.*
 include src/llama-sampling.*
 include src/llama-vocab.*
 include src/llama.*
 include src/unicode-data.*
 include src/unicode.*
 exclude *
--- a/llama/llama.cpp/LICENSE
+++ b/llama/llama.cpp/LICENSE
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2023-2024 The ggml authors
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/llama/llama.cpp/common/base64.hpp
+++ b/llama/llama.cpp/common/base64.hpp
--- a/llama/llama.cpp/common/common.cpp
+++ b/llama/llama.cpp/common/common.cpp
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #if defined(_MSC_VER)
 #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
 #endif
--- a/llama/llama.cpp/common/common.go
+++ b/llama/llama.cpp/common/common.go
@ -0,0 +1,6 @@
 package common
 // #cgo CXXFLAGS: -std=c++11
 // #cgo CPPFLAGS: -I${SRCDIR}/../include
 // #cgo CPPFLAGS: -I${SRCDIR}/../../../ml/backend/ggml/ggml/include
 import "C"
--- a/llama/llama.cpp/common/common.h
+++ b/llama/llama.cpp/common/common.h
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 // Various helper functions and utilities
 #pragma once
--- a/llama/llama.cpp/common/json-schema-to-grammar.cpp
+++ b/llama/llama.cpp/common/json-schema-to-grammar.cpp
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include "json-schema-to-grammar.h"
 #include <algorithm>
 #include <fstream>
@ -417,7 +391,7 @@ class SchemaConverter {
 private:
    std::function<json(const std::string &)> _fetch_json;
    bool _dotall;
-    std::map<std::string, std::string> _rules;
+    std::unordered_map<std::string, std::string> _rules;
    std::unordered_map<std::string, json> _refs;
    std::unordered_set<std::string> _refs_being_resolved;
    std::vector<std::string> _errors;
--- a/llama/llama.cpp/common/json-schema-to-grammar.h
+++ b/llama/llama.cpp/common/json-schema-to-grammar.h
@ -0,0 +1,8 @@
 #pragma once
 #include "ggml.h"
 // Change JSON_ASSERT from assert() to GGML_ASSERT:
 #define JSON_ASSERT GGML_ASSERT
 #include "json.hpp"
 std::string json_schema_to_grammar(const nlohmann::ordered_json& schema);
--- a/llama/llama.cpp/common/json.hpp
+++ b/llama/llama.cpp/common/json.hpp
--- a/llama/llama.cpp/common/log.cpp
+++ b/llama/llama.cpp/common/log.cpp
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include "log.h"
 #include <condition_variable>
--- a/llama/llama.cpp/common/log.h
+++ b/llama/llama.cpp/common/log.h
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #pragma once
 #include "ggml.h" // for ggml_log_level
--- a/llama/llama.cpp/common/sampling.cpp
+++ b/llama/llama.cpp/common/sampling.cpp
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include "sampling.h"
 #include "common.h"
--- a/llama/llama.cpp/common/sampling.h
+++ b/llama/llama.cpp/common/sampling.h
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #pragma once
 #include "llama.h"
--- a/llama/llama.cpp/common/stb_image.h
+++ b/llama/llama.cpp/common/stb_image.h
--- a/llama/llama.cpp/examples/llava/clip.cpp
+++ b/llama/llama.cpp/examples/llava/clip.cpp
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 // NOTE: This is modified from clip.cpp only for LLaVA,
 // so there might be still unnecessary artifacts hanging around
 // I'll gradually clean and extend it
--- a/llama/llama.cpp/examples/llava/clip.h
+++ b/llama/llama.cpp/examples/llava/clip.h
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #ifndef CLIP_H
 #define CLIP_H
--- a/llama/llama.cpp/examples/llava/llava.cpp
+++ b/llama/llama.cpp/examples/llava/llava.cpp
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include "clip.h"
 #include "llava.h"
--- a/llama/llama.cpp/examples/llava/llava.go
+++ b/llama/llama.cpp/examples/llava/llava.go
@ -0,0 +1,6 @@
 package llava
 // #cgo CXXFLAGS: -std=c++11
 // #cgo CPPFLAGS: -I${SRCDIR}/../../include -I${SRCDIR}/../../common
 // #cgo CPPFLAGS: -I${SRCDIR}/../../../../ml/backend/ggml/ggml/include
 import "C"
--- a/llama/llama.cpp/examples/llava/llava.h
+++ b/llama/llama.cpp/examples/llava/llava.h
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #ifndef LLAVA_H
 #define LLAVA_H
--- a/llama/llama.cpp/include/llama.h
+++ b/llama/llama.cpp/include/llama.h
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #ifndef LLAMA_H
 #define LLAMA_H
--- a/llama/llama.cpp/src/llama-grammar.cpp
+++ b/llama/llama.cpp/src/llama-grammar.cpp
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include "llama-grammar.h"
 #include "llama-vocab.h"
--- a/llama/llama.cpp/src/llama-grammar.h
+++ b/llama/llama.cpp/src/llama-grammar.h
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #pragma once
 #include "llama-impl.h"
--- a/llama/llama.cpp/src/llama-impl.h
+++ b/llama/llama.cpp/src/llama-impl.h
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #pragma once
 #include "llama.h"
--- a/llama/llama.cpp/src/llama-sampling.cpp
+++ b/llama/llama.cpp/src/llama-sampling.cpp
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include "llama-sampling.h"
 #include "llama-vocab.h"
--- a/llama/llama.cpp/src/llama-sampling.h
+++ b/llama/llama.cpp/src/llama-sampling.h
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #pragma once
 // TODO: rename llama-sampling.h/.cpp to llama-sampler.h/.cpp ?
--- a/llama/llama.cpp/src/llama-vocab.cpp
+++ b/llama/llama.cpp/src/llama-vocab.cpp
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include "llama-vocab.h"
 #include "unicode.h"
--- a/llama/llama.cpp/src/llama-vocab.h
+++ b/llama/llama.cpp/src/llama-vocab.h
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #pragma once
 #include "llama-impl.h"
--- a/llama/llama.cpp/src/llama.cpp
+++ b/llama/llama.cpp/src/llama.cpp
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include "llama-impl.h"
 #include "llama-vocab.h"
 #include "llama-sampling.h"
--- a/llama/llama.cpp/src/llama.go
+++ b/llama/llama.cpp/src/llama.go
@ -0,0 +1,7 @@
 package llama
 // #cgo CXXFLAGS: -std=c++11
 // #cgo CPPFLAGS: -I${SRCDIR}/../include
 // #cgo CPPFLAGS: -I${SRCDIR}/../../../ml/backend/ggml/ggml/include
 import "C"
 import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src"
--- a/llama/llama.cpp/src/unicode-data.cpp
+++ b/llama/llama.cpp/src/unicode-data.cpp
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 // generated with scripts/gen-unicode-data.py
 #include "unicode-data.h"
--- a/llama/llama.cpp/src/unicode-data.h
+++ b/llama/llama.cpp/src/unicode-data.h
@ -0,0 +1,20 @@
 #pragma once
 #include <cstdint>
 #include <vector>
 #include <unordered_map>
 #include <unordered_set>
 struct range_nfd {
    uint32_t first;
    uint32_t last;
    uint32_t nfd;
 };
 static const uint32_t MAX_CODEPOINTS = 0x110000;
 extern const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
 extern const std::unordered_set<uint32_t> unicode_set_whitespace;
 extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase;
 extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase;
 extern const std::initializer_list<range_nfd> unicode_ranges_nfd;
--- a/llama/llama.cpp/src/unicode.cpp
+++ b/llama/llama.cpp/src/unicode.cpp
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #if defined(_MSC_VER)
 #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
 #endif
--- a/llama/llama.cpp/src/unicode.h
+++ b/llama/llama.cpp/src/unicode.h
@ -1,29 +1,3 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #pragma once
 #include <cstdint>
--- a/llama/llama.go
+++ b/llama/llama.go
@ -1,18 +1,19 @@
 package llama
 //go:generate make -j 8
 /*
 #cgo CFLAGS: -std=c11
 #cgo CXXFLAGS: -std=c++11
 #cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/include
 #cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/common
 #cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/examples/llava
 #cgo CPPFLAGS: -I${SRCDIR}/../ml/backend/ggml/ggml/include
 #cgo darwin,arm64 CPPFLAGS: -DGGML_USE_METAL
 #include <stdlib.h>
 #include "ggml.h"
 #include "llama.h"
 #include "clip.h"
 #include "ggml.h"
 #include "llava.h"
 #include "mllama.h"
 #include "sampling_ext.h"
@ -47,10 +48,14 @@ import (
 	"sync/atomic"
 	"unsafe"
-	_ "github.com/ollama/ollama/ml/backend/ggml/ggml/src"
+	_ "github.com/ollama/ollama/llama/llama.cpp/common"
 	_ "github.com/ollama/ollama/llama/llama.cpp/examples/llava"
 	_ "github.com/ollama/ollama/llama/llama.cpp/src"
 	"github.com/ollama/ollama/ml/backend/ggml/ggml/src"
 )
 func BackendInit() {
 	ggml.OnceLoad()
 	C.llama_backend_init()
 }
--- a/llama/patches/0001-cuda.patch
+++ b/llama/patches/0001-cuda.patch
@ -1,7 +1,7 @@
-From 702ee500b229e910e3e6cd3c84d87763c51fb411 Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: jmorganca <jmorganca@gmail.com>
 Date: Thu, 6 Jun 2024 23:55:47 -0700
-Subject: [PATCH 01/11] cuda
+Subject: [PATCH] cuda
 ---
 ggml/src/ggml-backend.cpp        | 2 +-
@ -53,6 +53,3 @@ index 093ae900..a0cf4ba4 100644
 }
 static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) {
 -- 
 2.46.0
--- a/llama/patches/0002-pretokenizer.patch
+++ b/llama/patches/0002-pretokenizer.patch
@ -1,7 +1,7 @@
-From 67eb186ccf062100835d413b1c3e2a0fc58e1c0f Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Michael Yang <mxyng@pm.me>
 Date: Mon, 16 Sep 2024 15:53:13 -0700
-Subject: [PATCH 02/11] pretokenizer
+Subject: [PATCH] pretokenizer
 ---
 src/llama.cpp | 14 +++-----------
@ -39,6 +39,3 @@ index 6a6f4c2a..fa09f3b3 100644
             }
         } else if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
             vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
 -- 
 2.46.0
--- a/llama/patches/0003-embeddings.patch
+++ b/llama/patches/0003-embeddings.patch
@ -1,7 +1,7 @@
-From a9a7820ae111d70e24d4f7004378b5321e8a29c7 Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Michael Yang <mxyng@pm.me>
 Date: Mon, 16 Sep 2024 15:53:14 -0700
-Subject: [PATCH 03/11] embeddings
+Subject: [PATCH] embeddings
 ---
 src/llama.cpp | 9 ++++++---
@ -45,6 +45,3 @@ index fa09f3b3..d1791af0 100644
         // LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);
         ggml_backend_sched_alloc_graph(lctx.sched.get(), gf);
 -- 
 2.46.0
--- a/llama/patches/0004-clip-unicode.patch
+++ b/llama/patches/0004-clip-unicode.patch
@ -1,7 +1,7 @@
-From aa5ad04094458943643df789c5b7fd7d4c68dafb Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Michael Yang <mxyng@pm.me>
 Date: Mon, 16 Sep 2024 15:53:15 -0700
-Subject: [PATCH 04/11] clip-unicode
+Subject: [PATCH] clip-unicode
 ---
 examples/llava/clip.cpp | 40 +++++++++++++++++++++++++++++++++++++++-
@ -74,6 +74,3 @@ index d7c94352..427d5e02 100644
     }
     // vision model
 -- 
 2.46.0
--- a/llama/patches/0005-solar-pro.patch
+++ b/llama/patches/0005-solar-pro.patch
@ -1,7 +1,7 @@
-From 226de4f71ce73a87a805dc83484b32f9f9d9c24d Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Michael Yang <mxyng@pm.me>
 Date: Mon, 16 Sep 2024 15:53:16 -0700
-Subject: [PATCH 05/11] solar-pro
+Subject: [PATCH] solar-pro
 solar-pro introduces block skip connections where blocks are connected
 to other, non-sequential blocks with a scale multiple
@ -404,6 +404,3 @@ index d1791af0..b01770d0 100644
             return LLAMA_ROPE_TYPE_NORM;
         // the pairs of head values are offset by n_rot/2
 -- 
 2.46.0
--- a/llama/patches/0006-conditional-fattn.patch
+++ b/llama/patches/0006-conditional-fattn.patch
@ -1,7 +1,7 @@
-From b9d893b5c7c3dcff42bce378ea26587a6c7d1113 Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Daniel Hiltgen <daniel@ollama.com>
 Date: Wed, 9 Oct 2024 17:26:23 -0700
-Subject: [PATCH 06/11] conditional-fattn
+Subject: [PATCH] conditional-fattn
 ---
 ggml/src/ggml-cuda/ggml-cuda.cu | 2 ++
@ -23,6 +23,3 @@ index a2fcfe5d..5eed90da 100644
         case GGML_OP_CROSS_ENTROPY_LOSS:
             ggml_cuda_cross_entropy_loss(ctx, dst);
             break;
 -- 
 2.46.0
--- a/llama/patches/0007-add-mllama-support.patch
+++ b/llama/patches/0007-add-mllama-support.patch
@ -1,7 +1,7 @@
-From c2f0b1c0eda94eea785a1de9098df9eb29d64eb5 Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: jmorganca <jmorganca@gmail.com>
 Date: Thu, 17 Oct 2024 15:18:22 -0700
-Subject: [PATCH 08/11] add mllama support
+Subject: [PATCH] add mllama support
 mllama adds cross-attention layers to the standard llama architecture
 it also requires a way to input a new tensor: cross_attention_state
@ -784,6 +784,3 @@ index b01770d0..46881642 100644
     } else {
         batch.token = (llama_token *) malloc(sizeof(llama_token) * n_tokens_alloc);
     }
 -- 
 2.46.0
--- a/llama/patches/0007-blas.patch
+++ b/llama/patches/0007-blas.patch
@ -1,29 +0,0 @@
 From 9a5a9479d9cdf2032ff989fd297e50490f53e4c2 Mon Sep 17 00:00:00 2001
 From: Jesse Gross <jesse@ollama.com>
 Date: Mon, 30 Sep 2024 16:31:04 -0700
 Subject: [PATCH 07/11] blas
 ---
 ggml/src/ggml-blas/ggml-blas.cpp | 4 ++++
 1 file changed, 4 insertions(+)
 diff --git a/ggml/src/ggml-blas/ggml-blas.cpp b/ggml/src/ggml-blas/ggml-blas.cpp
 index ec158dfa..b3ac1fa4 100644
 --- a/ggml/src/ggml-blas/ggml-blas.cpp
 +++ b/ggml/src/ggml-blas/ggml-blas.cpp
@@ -1,3 +1,5 @@
 +#ifdef GGML_USE_BLAS
 +
 #include "ggml-impl.h"
 #include "ggml-blas.h"
 #include "ggml-backend-impl.h"
@@ -515,3 +517,5 @@ ggml_backend_reg_t ggml_backend_blas_reg(void) {
 }
 GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg)
 +
 +#endif // GGML_USE_BLAS
 \ No newline at end of file
 -- 
 2.46.0
--- a/llama/patches/0008-add-unpad-operator.patch
+++ b/llama/patches/0008-add-unpad-operator.patch
@ -1,7 +1,7 @@
-From 8e07a88fa87f31b6f2245c02a89a4a367ed6013c Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Michael Yang <mxyng@pm.me>
 Date: Thu, 17 Oct 2024 17:19:25 -0700
-Subject: [PATCH 09/11] add unpad operator
+Subject: [PATCH] add unpad operator
 ---
 ggml/include/ggml.h                  | 10 +++++
@ -394,6 +394,3 @@ index 1a9a7efa..ea2b259b 100644
 // ggml_arange
 struct ggml_tensor * ggml_arange(
 -- 
 2.46.0
--- a/llama/patches/0009-fix-deepseek-deseret-regex.patch
+++ b/llama/patches/0009-fix-deepseek-deseret-regex.patch
@ -1,7 +1,7 @@
-From 4236c07fc90fb758b89921fa7ef39dc0482c4bea Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Daniel Hiltgen <daniel@ollama.com>
 Date: Fri, 25 Oct 2024 16:25:18 -0700
-Subject: [PATCH 10/11] fix deepseek deseret regex
+Subject: [PATCH] fix deepseek deseret regex
 On windows compiled with gcc the c++ regex library failed to handle
 the characters
@ -70,6 +70,3 @@ index 3d459263..51dd81fb 100644
 }
 static std::vector<std::string> unicode_byte_encoding_process(const std::vector<std::string> & bpe_words) {
 -- 
 2.46.0
--- a/llama/patches/0010-Maintain-ordering-for-rules-for-grammar.patch
+++ b/llama/patches/0010-Maintain-ordering-for-rules-for-grammar.patch
@ -1,7 +1,7 @@
-From 7752556d7922e92b455ed92d22a3bfa9725f4458 Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: ParthSareen <parth.sareen@ollama.com>
 Date: Wed, 11 Dec 2024 15:37:32 -0800
-Subject: [PATCH 11/11] Maintain ordering for rules for grammar
+Subject: [PATCH] Maintain ordering for rules for grammar
 ---
 common/json-schema-to-grammar.cpp | 2 +-
@ -20,6 +20,3 @@ index dadc18c8..2a8dbd22 100644
     std::unordered_map<std::string, json> _refs;
     std::unordered_set<std::string> _refs_being_resolved;
     std::vector<std::string> _errors;
 -- 
 2.46.0
--- a/llama/unicode-data.h
+++ b/llama/unicode-data.h
@ -1,46 +0,0 @@
 /**
 * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #pragma once
 #include <cstdint>
 #include <vector>
 #include <unordered_map>
 #include <unordered_set>
 struct range_nfd {
    uint32_t first;
    uint32_t last;
    uint32_t nfd;
 };
 static const uint32_t MAX_CODEPOINTS = 0x110000;
 extern const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
 extern const std::unordered_set<uint32_t> unicode_set_whitespace;
 extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase;
 extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase;
 extern const std::initializer_list<range_nfd> unicode_ranges_nfd;
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@ -15,18 +15,15 @@ import (
 	"io"
 	"log/slog"
 	"os"
 	"path/filepath"
 	"runtime"
 	"strings"
 	"sync"
 	"unsafe"
 	"github.com/ollama/ollama/format"
-	"github.com/ollama/ollama/fs/ggml"
+	fs "github.com/ollama/ollama/fs/ggml"
 	"github.com/ollama/ollama/ml"
 	"golang.org/x/sync/errgroup"
-	_ "github.com/ollama/ollama/ml/backend/ggml/ggml/src"
+	"github.com/ollama/ollama/ml/backend/ggml/ggml/src"
 )
 type device struct {
@ -67,45 +64,7 @@ func (d device) LogValue() slog.Value {
 }
 var devices = sync.OnceValue(func() []device {
-	var lib struct{ name, pattern, defaultValue string }
+	ggml.OnceLoad()
 	if runtime.GOOS == "windows" {
 		lib.name = "PATH"
 		lib.pattern = "ggml-*.dll"
 		lib.defaultValue = "."
 	} else if runtime.GOOS == "linux" {
 		lib.name = "LD_LIBRARY_PATH"
 		lib.pattern = "libggml-*.so"
 		lib.defaultValue = "/usr/local/lib:/usr/lib"
 	}
 	if lib.name != "" {
 		paths, ok := os.LookupEnv(lib.name)
 		if !ok {
 			paths = lib.defaultValue
 		}
 		for _, path := range filepath.SplitList(paths) {
 			matches, err := filepath.Glob(filepath.Join(path, lib.pattern))
 			if err != nil {
 				slog.Error("failed to glob", "path", path, "error", err)
 				continue
 			}
 			for _, match := range matches {
 				if base := filepath.Base(match); strings.HasPrefix(base, "ggml-base") ||
 					strings.HasPrefix(base, "libggml-base") {
 					continue
 				}
 				func() {
 					cmatch := C.CString(match)
 					defer C.free(unsafe.Pointer(cmatch))
 					C.ggml_backend_load(cmatch)
 				}()
 			}
 		}
 	}
 	s := make([]device, C.ggml_backend_dev_count())
 	for i := range s {
@ -116,13 +75,13 @@ var devices = sync.OnceValue(func() []device {
 })
 type Backend struct {
-	meta       *ggml.GGML
+	meta       *fs.GGML
 	cpus, gpus []Context
 	tensors    map[string]*Context
 }
 func New(r *os.File) (ml.Backend, error) {
-	meta, n, err := ggml.Decode(r, -1)
+	meta, n, err := fs.Decode(r, -1)
 	if err != nil {
 		return nil, err
 	}
@ -170,7 +129,7 @@ func New(r *os.File) (ml.Backend, error) {
 		return nil, fmt.Errorf("no devices available")
 	}
-	tensors := make(map[*ggml.Tensor]*Context, len(meta.Tensors().Items()))
+	tensors := make(map[*fs.Tensor]*Context, len(meta.Tensors().Items()))
 	for _, t := range meta.Tensors().Items() {
 		c, err := ctxFunc(append(gpus, cpus...))
 		if err != nil {
--- a/ml/backend/ggml/ggml/.rsync-filter
+++ b/ml/backend/ggml/ggml/.rsync-filter
@ -0,0 +1,21 @@
 include include/
 include src/
 include src/ggml-blas/
 include src/ggml-cpu/
 include src/ggml-cpu/amx/
 include src/ggml-cpu/llamafile/
 include src/ggml-cuda/
 include src/ggml-cuda/template-instances/
 include src/ggml-hip/
 include src/ggml-metal/
 protect **/*.go
 protect **/*-embed.*
 include **/CMakeLists.txt
 include **/*.c
 include **/*.h
 include **/*.cpp
 include **/*.cu
 include **/*.cuh
 include **/*.m
 include **/*.metal
 exclude *
--- a/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp
@ -1,5 +1,3 @@
 #ifdef GGML_USE_BLAS
 #include "ggml-impl.h"
 #include "ggml-blas.h"
 #include "ggml-backend-impl.h"
@ -517,5 +515,3 @@ ggml_backend_reg_t ggml_backend_blas_reg(void) {
 }
 GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg)
 #endif // GGML_USE_BLAS
--- a/ml/backend/ggml/ggml/src/ggml-cpu/cpu.go
+++ b/ml/backend/ggml/ggml/src/ggml-cpu/cpu.go
@ -1,8 +1,10 @@
 package cpu
 // #cgo CXXFLAGS: -std=c++11
-// #cgo CPPFLAGS: -I${SRCDIR}/amx -I${SRCDIR}/.. -I${SRCDIR}/../../include
+// #cgo CPPFLAGS: -I${SRCDIR}/amx -I${SRCDIR}/llamafile -I${SRCDIR}/.. -I${SRCDIR}/../../include
 // #cgo CPPFLAGS: -DGGML_USE_LLAMAFILE
 // #cgo linux CPPFLAGS: -D_GNU_SOURCE
 // #cgo darwin,arm64 CPPFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
 // #cgo darwin,arm64 LDFLAGS: -framework Accelerate
 import "C"
 import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src/ggml-cpu/llamafile"
--- a/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile.go
+++ b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile.go
@ -0,0 +1,5 @@
 package llamafile
 // #cgo CXXFLAGS: -std=c++17
 // #cgo CPPFLAGS: -I${SRCDIR}/.. -I${SRCDIR}/../.. -I${SRCDIR}/../../../include
 import "C"
--- a/ml/backend/ggml/ggml/src/ggml.go
+++ b/ml/backend/ggml/ggml/src/ggml.go
@ -3,5 +3,64 @@ package ggml
 // #cgo CXXFLAGS: -std=c++17
 // #cgo CPPFLAGS: -DNDEBUG -DGGML_USE_CPU
 // #cgo CPPFLAGS: -I${SRCDIR}/../include -I${SRCDIR}/ggml-cpu
 // #include <stdlib.h>
 // #include "ggml-backend.h"
 import "C"
-import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src/ggml-cpu"
+import (
 	"log/slog"
 	"os"
 	"path/filepath"
 	"runtime"
 	"strings"
 	"sync"
 	"unsafe"
 	_ "github.com/ollama/ollama/ml/backend/ggml/ggml/src/ggml-cpu"
 )
 var OnceLoad = sync.OnceFunc(func() {
 	var lib struct{ name, pattern, defaultValue string }
 	switch runtime.GOOS {
 	case "darwin":
 		lib.name = "LD_LIBRARY_PATH"
 		lib.pattern = "libggml-*.dylib"
 		lib.defaultValue = "/usr/local/lib:/usr/lib"
 	case "linux":
 		lib.name = "LD_LIBRARY_PATH"
 		lib.pattern = "libggml-*.so"
 		lib.defaultValue = "/usr/local/lib:/usr/lib"
 	case "windows":
 		lib.name = "PATH"
 		lib.pattern = "ggml-*.dll"
 		lib.defaultValue = "."
 	default:
 		return
 	}
 	paths, ok := os.LookupEnv(lib.name)
 	if !ok {
 		paths = lib.defaultValue
 	}
 	for _, path := range filepath.SplitList(paths) {
 		matches, err := filepath.Glob(filepath.Join(path, lib.pattern))
 		if err != nil {
 			slog.Error("failed to glob", "path", path, "error", err)
 			continue
 		}
 		for _, match := range matches {
 			if base := filepath.Base(match); strings.HasPrefix(base, "ggml-base") ||
 				strings.HasPrefix(base, "libggml-base") {
 				continue
 			}
 			func() {
 				cmatch := C.CString(match)
 				defer C.free(unsafe.Pointer(cmatch))
 				C.ggml_backend_load(cmatch)
 			}()
 		}
 	}
 })