From bca6ed0ccc353de666412720f90ae747bc67c4f6 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Thu, 19 Dec 2024 11:36:57 -0800 Subject: [PATCH] llama.cpp subdirs --- CMakeLists.txt | 21 +++++- Dockerfile2 | 66 +++++++++++++++++++ Makefile2 | 48 ++++++++++++++ llama/json-schema-to-grammar.h | 34 ---------- llama/llama.cpp/.rsync-filter | 24 +++++++ llama/llama.cpp/LICENSE | 21 ++++++ llama/{ => llama.cpp/common}/base64.hpp | 0 llama/{ => llama.cpp/common}/common.cpp | 26 -------- llama/llama.cpp/common/common.go | 6 ++ llama/{ => llama.cpp/common}/common.h | 26 -------- .../common}/json-schema-to-grammar.cpp | 28 +------- .../llama.cpp/common/json-schema-to-grammar.h | 8 +++ llama/{ => llama.cpp/common}/json.hpp | 0 llama/{ => llama.cpp/common}/log.cpp | 26 -------- llama/{ => llama.cpp/common}/log.h | 26 -------- llama/{ => llama.cpp/common}/sampling.cpp | 26 -------- llama/{ => llama.cpp/common}/sampling.h | 26 -------- llama/{ => llama.cpp/common}/stb_image.h | 0 llama/{ => llama.cpp/examples/llava}/clip.cpp | 26 -------- llama/{ => llama.cpp/examples/llava}/clip.h | 26 -------- .../{ => llama.cpp/examples/llava}/llava.cpp | 26 -------- llama/llama.cpp/examples/llava/llava.go | 6 ++ llama/{ => llama.cpp/examples/llava}/llava.h | 26 -------- llama/{ => llama.cpp/include}/llama.h | 26 -------- llama/{ => llama.cpp/src}/llama-grammar.cpp | 26 -------- llama/{ => llama.cpp/src}/llama-grammar.h | 26 -------- llama/{ => llama.cpp/src}/llama-impl.h | 26 -------- llama/{ => llama.cpp/src}/llama-sampling.cpp | 26 -------- llama/{ => llama.cpp/src}/llama-sampling.h | 26 -------- llama/{ => llama.cpp/src}/llama-vocab.cpp | 26 -------- llama/{ => llama.cpp/src}/llama-vocab.h | 26 -------- llama/{ => llama.cpp/src}/llama.cpp | 26 -------- llama/llama.cpp/src/llama.go | 7 ++ llama/{ => llama.cpp/src}/unicode-data.cpp | 26 -------- llama/llama.cpp/src/unicode-data.h | 20 ++++++ llama/{ => llama.cpp/src}/unicode.cpp | 26 -------- llama/{ => llama.cpp/src}/unicode.h | 26 -------- llama/llama.go | 15 +++-- llama/patches/0001-cuda.patch | 7 +- llama/patches/0002-pretokenizer.patch | 7 +- llama/patches/0003-embeddings.patch | 7 +- llama/patches/0004-clip-unicode.patch | 7 +- llama/patches/0005-solar-pro.patch | 7 +- llama/patches/0006-conditional-fattn.patch | 7 +- ...rt.patch => 0007-add-mllama-support.patch} | 7 +- llama/patches/0007-blas.patch | 29 -------- ...or.patch => 0008-add-unpad-operator.patch} | 7 +- ... => 0009-fix-deepseek-deseret-regex.patch} | 7 +- ...tain-ordering-for-rules-for-grammar.patch} | 7 +- llama/unicode-data.h | 46 ------------- ml/backend/ggml/ggml.go | 53 ++------------- ml/backend/ggml/ggml/.rsync-filter | 21 ++++++ .../ggml/ggml/src/ggml-blas/ggml-blas.cpp | 4 -- ml/backend/ggml/ggml/src/ggml-cpu/cpu.go | 4 +- .../ggml/src/ggml-cpu/llamafile/llamafile.go | 5 ++ ml/backend/ggml/ggml/src/ggml.go | 61 ++++++++++++++++- 56 files changed, 352 insertions(+), 817 deletions(-) create mode 100644 Dockerfile2 create mode 100644 Makefile2 delete mode 100644 llama/json-schema-to-grammar.h create mode 100644 llama/llama.cpp/.rsync-filter create mode 100644 llama/llama.cpp/LICENSE rename llama/{ => llama.cpp/common}/base64.hpp (100%) rename llama/{ => llama.cpp/common}/common.cpp (98%) create mode 100644 llama/llama.cpp/common/common.go rename llama/{ => llama.cpp/common}/common.h (95%) rename llama/{ => llama.cpp/common}/json-schema-to-grammar.cpp (97%) create mode 100644 llama/llama.cpp/common/json-schema-to-grammar.h rename llama/{ => llama.cpp/common}/json.hpp (100%) rename llama/{ => llama.cpp/common}/log.cpp (89%) rename llama/{ => llama.cpp/common}/log.h (77%) rename llama/{ => llama.cpp/common}/sampling.cpp (93%) rename llama/{ => llama.cpp/common}/sampling.h (78%) rename llama/{ => llama.cpp/common}/stb_image.h (100%) rename llama/{ => llama.cpp/examples/llava}/clip.cpp (98%) rename llama/{ => llama.cpp/examples/llava}/clip.h (72%) rename llama/{ => llama.cpp/examples/llava}/llava.cpp (95%) create mode 100644 llama/llama.cpp/examples/llava/llava.go rename llama/{ => llama.cpp/examples/llava}/llava.h (59%) rename llama/{ => llama.cpp/include}/llama.h (98%) rename llama/{ => llama.cpp/src}/llama-grammar.cpp (97%) rename llama/{ => llama.cpp/src}/llama-grammar.h (78%) rename llama/{ => llama.cpp/src}/llama-impl.h (79%) rename llama/{ => llama.cpp/src}/llama-sampling.cpp (98%) rename llama/{ => llama.cpp/src}/llama-sampling.h (54%) rename llama/{ => llama.cpp/src}/llama-vocab.cpp (98%) rename llama/{ => llama.cpp/src}/llama-vocab.h (83%) rename llama/{ => llama.cpp/src}/llama.cpp (99%) create mode 100644 llama/llama.cpp/src/llama.go rename llama/{ => llama.cpp/src}/unicode-data.cpp (99%) create mode 100644 llama/llama.cpp/src/unicode-data.h rename llama/{ => llama.cpp/src}/unicode.cpp (96%) rename llama/{ => llama.cpp/src}/unicode.h (63%) rename llama/patches/{0008-add-mllama-support.patch => 0007-add-mllama-support.patch} (99%) delete mode 100644 llama/patches/0007-blas.patch rename llama/patches/{0009-add-unpad-operator.patch => 0008-add-unpad-operator.patch} (99%) rename llama/patches/{0010-fix-deepseek-deseret-regex.patch => 0009-fix-deepseek-deseret-regex.patch} (96%) rename llama/patches/{0011-Maintain-ordering-for-rules-for-grammar.patch => 0010-Maintain-ordering-for-rules-for-grammar.patch} (84%) delete mode 100644 llama/unicode-data.h create mode 100644 ml/backend/ggml/ggml/.rsync-filter create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile.go diff --git a/CMakeLists.txt b/CMakeLists.txt index 21d687b81..d6369a55d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,10 +9,15 @@ find_package(Threads REQUIRED) set(CMAKE_BUILD_TYPE Release) set(BUILD_SHARED_LIBS ON) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + set(GGML_CCACHE ON) set(GGML_SCHED_MAX_COPIES 4) set(GGML_CPU_ALL_VARIANTS ON) set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128) +set(GGML_LLAMAFILE ON) add_compile_definitions(GGML_BUILD) add_compile_definitions(GGML_SHARED) @@ -24,9 +29,21 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx) +function(set_target_output_directory _target) + if(TARGET ${_target}) + set_target_properties(${_target} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib + ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib + ) + endif() +endfunction() + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src EXCLUDE_FROM_ALL) +set_target_output_directory(ggml-base) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu) +set_target_output_directory(ggml-cpu) find_package(BLAS) if(NOT BLAS_VENDOR) @@ -36,14 +53,16 @@ else() endif() add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-blas) -target_compile_features(ggml-blas PRIVATE cxx_std_11) +set_target_output_directory(ggml-blas) check_language(CUDA) if(CMAKE_CUDA_COMPILER) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda) + set_target_output_directory(ggml-cuda) endif() check_language(HIP) if(CMAKE_HIP_COMPILER) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip) + set_target_output_directory(ggml-hip) endif() diff --git a/Dockerfile2 b/Dockerfile2 new file mode 100644 index 000000000..1a584488d --- /dev/null +++ b/Dockerfile2 @@ -0,0 +1,66 @@ +ARG CUDA_11_VERSION=11.3 +ARG CUDA_12_VERSION=12.4 +ARG ROCM_VERSION=6.1.2 +ARG JETPACK_5_VERSION=r35.4.1 +ARG JETPACK_6_VERSION=r36.2.0 +ARG CMAKE_VERSION=3.31.2 + +FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS base +ARG CMAKE_VERSION +RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz | tar xz -C /usr --strip-components 1 +RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \ + && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo + +# FROM --platform=linux/arm64 rockylinux:8 AS base +# ARG CMAKE_VERSION +# RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1 +# RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo + +FROM base AS amd64 +ARG CUDA_11_VERSION +ARG CUDA_12_VERSION +RUN yum install -y cuda-toolkit-${CUDA_11_VERSION//./-} \ + && yum install -y cuda-toolkit-${CUDA_12_VERSION//./-} +COPY CMakeLists.txt CMakeLists.txt +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml + +FROM --platform=linux/amd64 amd64 AS cuda_11 +ENV PATH=/usr/local/cuda-${CUDA_11_VERSION}/bin:$PATH +RUN cmake -S . -B build -DCMAKE_CUDA_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86" +RUN cmake --build build --target ggml-cuda -j + +FROM --platform=linux/amd64 amd64 AS cuda_12 +ENV PATH=/usr/local/cuda-${CUDA_12_VERSION}/bin:$PATH +RUN cmake -S . -B build -DCMAKE_CUDA_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a" +RUN cmake --build build --target ggml-cuda -j + +FROM --platform=linux/amd64 amd64 AS rocm +RUN cmake -S . -B build -DCMAKE_HIP_ARCHITECTURES="gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" +RUN cmake --build build --target ggml-hip -j + +FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5_VERSION} AS jetpack_5 +ARG CMAKE_VERSION +RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1 +COPY CMakeLists.txt . +COPY ml/backend/ggml/ggml . +RUN cmake -S . -B build \ + -DCMAKE_CUDA_ARCHITECTURES="72;87" +RUN cmake --build build --target ggml-cuda + +FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6_VERSION} AS jetpack_6 +ARG CMAKE_VERSION +RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1 +COPY CMakeLists.txt . +COPY ml/backend/ggml/ggml . +RUN cmake -S . -B build \ + -DCMAKE_CUDA_ARCHITECTURES="87" +RUN cmake --build build --target ggml-cuda + +FROM --platform=linux/amd64 golang:1.23 +COPY --from=cuda_11 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-11.so +COPY --from=cuda_12 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-12.so +COPY --from=rocm build/ml/backend/ggml/ggml/src/ggml-hip/libggml-hip.so libggml-hip.so + +# FROM --platform=linux/arm64 golang:1.23 +# COPY --from=jetpack_5 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-jetpack-5.so +# COPY --from=jetpack_6 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-jetpack-6.so diff --git a/Makefile2 b/Makefile2 new file mode 100644 index 000000000..3fbff0e79 --- /dev/null +++ b/Makefile2 @@ -0,0 +1,48 @@ +UPSTREAM=https://github.com/ggerganov/llama.cpp.git +WORKDIR=llama/vendor +FETCH_HEAD=40c6d79fb52f995f47507fedfeaae2ac05d9b35c + +all: sync + +.PHONY: sync +sync: llama/llama.cpp ml/backend/ggml/ggml + +.PHONY: llama/llama.cpp +llama/llama.cpp: llama/vendor apply_patches + rsync -arvzc --delete -f "merge $@/.rsync-filter" $< $@ + rsync -arvzc --delete --include LICENSE --exclude $(WORKDIR) $@ + +.PHONY: ml/backend/ggml/ggml apply_patches +ml/backend/ggml/ggml: llama/vendor/ggml apply_patches + rsync -arvzc --delete -f "merge $@/.rsync-filter" $< $@ + rsync -arvzc --delete --include LICENSE --exclude $(WORKDIR) $@ + +PATCHES=$(wildcard llama/patches/*.patch) + +.PHONY: apply_patches +.NOTPARALLEL: +apply_patches: $(addsuffix ed, $(PATCHES)) + +%.patched: %.patch + if git -C $(WORKDIR) am -3 $(realpath $<); then touch $@; else git -C $(WORKDIR) am --abort; exit 1; fi + +.PHONY: checkout +checkout: $(WORKDIR) + git -C $(WORKDIR) fetch + git -C $(WORKDIR) checkout -f $(FETCH_HEAD) + +$(WORKDIR): + git clone $(UPSTREAM) $(WORKDIR) + +.PHONE: format_patches +format_patches: llama/patches + git -C $(WORKDIR) format-patch \ + --no-signature \ + --no-numbered \ + --zero-commit \ + -o $(realpath $<) \ + $(FETCH_HEAD) + +.PHONE: clean +clean: checkout + $(RM) $(addsuffix ed, $(PATCHES)) diff --git a/llama/json-schema-to-grammar.h b/llama/json-schema-to-grammar.h deleted file mode 100644 index bfff9bc58..000000000 --- a/llama/json-schema-to-grammar.h +++ /dev/null @@ -1,34 +0,0 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include "ggml.h" -// Change JSON_ASSERT from assert() to GGML_ASSERT: -#define JSON_ASSERT GGML_ASSERT -#include "json.hpp" - -std::string json_schema_to_grammar(const nlohmann::ordered_json& schema); diff --git a/llama/llama.cpp/.rsync-filter b/llama/llama.cpp/.rsync-filter new file mode 100644 index 000000000..bedb86972 --- /dev/null +++ b/llama/llama.cpp/.rsync-filter @@ -0,0 +1,24 @@ +protect **/*.go +include common/ +include common/base64.* +include common/common.* +include common/json-schema-to-grammar.* +include common/json.* +include common/log.* +include common/sampling.* +include common/stb_image.* +include include/ +include include/llama.* +include examples/ +include examples/llava/ +include examples/llava/clip.* +include examples/llava/llava.* +include src/ +include src/llama-grammar.* +include src/llama-impl.* +include src/llama-sampling.* +include src/llama-vocab.* +include src/llama.* +include src/unicode-data.* +include src/unicode.* +exclude * diff --git a/llama/llama.cpp/LICENSE b/llama/llama.cpp/LICENSE new file mode 100644 index 000000000..acb96ce78 --- /dev/null +++ b/llama/llama.cpp/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023-2024 The ggml authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/llama/base64.hpp b/llama/llama.cpp/common/base64.hpp similarity index 100% rename from llama/base64.hpp rename to llama/llama.cpp/common/base64.hpp diff --git a/llama/common.cpp b/llama/llama.cpp/common/common.cpp similarity index 98% rename from llama/common.cpp rename to llama/llama.cpp/common/common.cpp index 1b90fb445..6143516d2 100644 --- a/llama/common.cpp +++ b/llama/llama.cpp/common/common.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #if defined(_MSC_VER) #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING #endif diff --git a/llama/llama.cpp/common/common.go b/llama/llama.cpp/common/common.go new file mode 100644 index 000000000..ebbb738f2 --- /dev/null +++ b/llama/llama.cpp/common/common.go @@ -0,0 +1,6 @@ +package common + +// #cgo CXXFLAGS: -std=c++11 +// #cgo CPPFLAGS: -I${SRCDIR}/../include +// #cgo CPPFLAGS: -I${SRCDIR}/../../../ml/backend/ggml/ggml/include +import "C" diff --git a/llama/common.h b/llama/llama.cpp/common/common.h similarity index 95% rename from llama/common.h rename to llama/llama.cpp/common/common.h index 03d60aaff..0373fd3ea 100644 --- a/llama/common.h +++ b/llama/llama.cpp/common/common.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - // Various helper functions and utilities #pragma once diff --git a/llama/json-schema-to-grammar.cpp b/llama/llama.cpp/common/json-schema-to-grammar.cpp similarity index 97% rename from llama/json-schema-to-grammar.cpp rename to llama/llama.cpp/common/json-schema-to-grammar.cpp index e5191967b..2a8dbd22d 100644 --- a/llama/json-schema-to-grammar.cpp +++ b/llama/llama.cpp/common/json-schema-to-grammar.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "json-schema-to-grammar.h" #include #include @@ -417,7 +391,7 @@ class SchemaConverter { private: std::function _fetch_json; bool _dotall; - std::map _rules; + std::unordered_map _rules; std::unordered_map _refs; std::unordered_set _refs_being_resolved; std::vector _errors; diff --git a/llama/llama.cpp/common/json-schema-to-grammar.h b/llama/llama.cpp/common/json-schema-to-grammar.h new file mode 100644 index 000000000..41623b346 --- /dev/null +++ b/llama/llama.cpp/common/json-schema-to-grammar.h @@ -0,0 +1,8 @@ +#pragma once + +#include "ggml.h" +// Change JSON_ASSERT from assert() to GGML_ASSERT: +#define JSON_ASSERT GGML_ASSERT +#include "json.hpp" + +std::string json_schema_to_grammar(const nlohmann::ordered_json& schema); diff --git a/llama/json.hpp b/llama/llama.cpp/common/json.hpp similarity index 100% rename from llama/json.hpp rename to llama/llama.cpp/common/json.hpp diff --git a/llama/log.cpp b/llama/llama.cpp/common/log.cpp similarity index 89% rename from llama/log.cpp rename to llama/llama.cpp/common/log.cpp index 1a98ff726..04c7c0ed1 100644 --- a/llama/log.cpp +++ b/llama/llama.cpp/common/log.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "log.h" #include diff --git a/llama/log.h b/llama/llama.cpp/common/log.h similarity index 77% rename from llama/log.h rename to llama/llama.cpp/common/log.h index 951d0c21d..66605cc69 100644 --- a/llama/log.h +++ b/llama/llama.cpp/common/log.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "ggml.h" // for ggml_log_level diff --git a/llama/sampling.cpp b/llama/llama.cpp/common/sampling.cpp similarity index 93% rename from llama/sampling.cpp rename to llama/llama.cpp/common/sampling.cpp index 616555f06..0c4699a89 100644 --- a/llama/sampling.cpp +++ b/llama/llama.cpp/common/sampling.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "sampling.h" #include "common.h" diff --git a/llama/sampling.h b/llama/llama.cpp/common/sampling.h similarity index 78% rename from llama/sampling.h rename to llama/llama.cpp/common/sampling.h index 38a5f2b22..348911b18 100644 --- a/llama/sampling.h +++ b/llama/llama.cpp/common/sampling.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama.h" diff --git a/llama/stb_image.h b/llama/llama.cpp/common/stb_image.h similarity index 100% rename from llama/stb_image.h rename to llama/llama.cpp/common/stb_image.h diff --git a/llama/clip.cpp b/llama/llama.cpp/examples/llava/clip.cpp similarity index 98% rename from llama/clip.cpp rename to llama/llama.cpp/examples/llava/clip.cpp index dd88a6e90..427d5e020 100644 --- a/llama/clip.cpp +++ b/llama/llama.cpp/examples/llava/clip.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - // NOTE: This is modified from clip.cpp only for LLaVA, // so there might be still unnecessary artifacts hanging around // I'll gradually clean and extend it diff --git a/llama/clip.h b/llama/llama.cpp/examples/llava/clip.h similarity index 72% rename from llama/clip.h rename to llama/llama.cpp/examples/llava/clip.h index 2af04bf53..78588bdf1 100644 --- a/llama/clip.h +++ b/llama/llama.cpp/examples/llava/clip.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #ifndef CLIP_H #define CLIP_H diff --git a/llama/llava.cpp b/llama/llama.cpp/examples/llava/llava.cpp similarity index 95% rename from llama/llava.cpp rename to llama/llama.cpp/examples/llava/llava.cpp index d1d00eab5..d56644a89 100644 --- a/llama/llava.cpp +++ b/llama/llama.cpp/examples/llava/llava.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "clip.h" #include "llava.h" diff --git a/llama/llama.cpp/examples/llava/llava.go b/llama/llama.cpp/examples/llava/llava.go new file mode 100644 index 000000000..37b031cb7 --- /dev/null +++ b/llama/llama.cpp/examples/llava/llava.go @@ -0,0 +1,6 @@ +package llava + +// #cgo CXXFLAGS: -std=c++11 +// #cgo CPPFLAGS: -I${SRCDIR}/../../include -I${SRCDIR}/../../common +// #cgo CPPFLAGS: -I${SRCDIR}/../../../../ml/backend/ggml/ggml/include +import "C" diff --git a/llama/llava.h b/llama/llama.cpp/examples/llava/llava.h similarity index 59% rename from llama/llava.h rename to llama/llama.cpp/examples/llava/llava.h index 3acd9f615..b6feb3027 100644 --- a/llama/llava.h +++ b/llama/llama.cpp/examples/llava/llava.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #ifndef LLAVA_H #define LLAVA_H diff --git a/llama/llama.h b/llama/llama.cpp/include/llama.h similarity index 98% rename from llama/llama.h rename to llama/llama.cpp/include/llama.h index 4ff8c8621..aba85f860 100644 --- a/llama/llama.h +++ b/llama/llama.cpp/include/llama.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #ifndef LLAMA_H #define LLAMA_H diff --git a/llama/llama-grammar.cpp b/llama/llama.cpp/src/llama-grammar.cpp similarity index 97% rename from llama/llama-grammar.cpp rename to llama/llama.cpp/src/llama-grammar.cpp index d9e4839f5..74e9f64b3 100644 --- a/llama/llama-grammar.cpp +++ b/llama/llama.cpp/src/llama-grammar.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-grammar.h" #include "llama-vocab.h" diff --git a/llama/llama-grammar.h b/llama/llama.cpp/src/llama-grammar.h similarity index 78% rename from llama/llama-grammar.h rename to llama/llama.cpp/src/llama-grammar.h index 9052dd2f9..f529ce351 100644 --- a/llama/llama-grammar.h +++ b/llama/llama.cpp/src/llama-grammar.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama-impl.h" diff --git a/llama/llama-impl.h b/llama/llama.cpp/src/llama-impl.h similarity index 79% rename from llama/llama-impl.h rename to llama/llama.cpp/src/llama-impl.h index 3a33cf331..70f16b61c 100644 --- a/llama/llama-impl.h +++ b/llama/llama.cpp/src/llama-impl.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama.h" diff --git a/llama/llama-sampling.cpp b/llama/llama.cpp/src/llama-sampling.cpp similarity index 98% rename from llama/llama-sampling.cpp rename to llama/llama.cpp/src/llama-sampling.cpp index 154cc40eb..fd8ca8a9e 100644 --- a/llama/llama-sampling.cpp +++ b/llama/llama.cpp/src/llama-sampling.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-sampling.h" #include "llama-vocab.h" diff --git a/llama/llama-sampling.h b/llama/llama.cpp/src/llama-sampling.h similarity index 54% rename from llama/llama-sampling.h rename to llama/llama.cpp/src/llama-sampling.h index af63bb885..919f6fdfc 100644 --- a/llama/llama-sampling.h +++ b/llama/llama.cpp/src/llama-sampling.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once // TODO: rename llama-sampling.h/.cpp to llama-sampler.h/.cpp ? diff --git a/llama/llama-vocab.cpp b/llama/llama.cpp/src/llama-vocab.cpp similarity index 98% rename from llama/llama-vocab.cpp rename to llama/llama.cpp/src/llama-vocab.cpp index 1a6c84fbf..05ef0e71f 100644 --- a/llama/llama-vocab.cpp +++ b/llama/llama.cpp/src/llama-vocab.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-vocab.h" #include "unicode.h" diff --git a/llama/llama-vocab.h b/llama/llama.cpp/src/llama-vocab.h similarity index 83% rename from llama/llama-vocab.h rename to llama/llama.cpp/src/llama-vocab.h index ec7329eb2..4bb16d2e4 100644 --- a/llama/llama-vocab.h +++ b/llama/llama.cpp/src/llama-vocab.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama-impl.h" diff --git a/llama/llama.cpp b/llama/llama.cpp/src/llama.cpp similarity index 99% rename from llama/llama.cpp rename to llama/llama.cpp/src/llama.cpp index 181525f4b..468816428 100644 --- a/llama/llama.cpp +++ b/llama/llama.cpp/src/llama.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-impl.h" #include "llama-vocab.h" #include "llama-sampling.h" diff --git a/llama/llama.cpp/src/llama.go b/llama/llama.cpp/src/llama.go new file mode 100644 index 000000000..29385ccc0 --- /dev/null +++ b/llama/llama.cpp/src/llama.go @@ -0,0 +1,7 @@ +package llama + +// #cgo CXXFLAGS: -std=c++11 +// #cgo CPPFLAGS: -I${SRCDIR}/../include +// #cgo CPPFLAGS: -I${SRCDIR}/../../../ml/backend/ggml/ggml/include +import "C" +import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src" diff --git a/llama/unicode-data.cpp b/llama/llama.cpp/src/unicode-data.cpp similarity index 99% rename from llama/unicode-data.cpp rename to llama/llama.cpp/src/unicode-data.cpp index 4b3a8dec9..04dcd7fcf 100644 --- a/llama/unicode-data.cpp +++ b/llama/llama.cpp/src/unicode-data.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - // generated with scripts/gen-unicode-data.py #include "unicode-data.h" diff --git a/llama/llama.cpp/src/unicode-data.h b/llama/llama.cpp/src/unicode-data.h new file mode 100644 index 000000000..f6973ebd2 --- /dev/null +++ b/llama/llama.cpp/src/unicode-data.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include +#include +#include + +struct range_nfd { + uint32_t first; + uint32_t last; + uint32_t nfd; +}; + +static const uint32_t MAX_CODEPOINTS = 0x110000; + +extern const std::initializer_list> unicode_ranges_flags; +extern const std::unordered_set unicode_set_whitespace; +extern const std::initializer_list> unicode_map_lowercase; +extern const std::initializer_list> unicode_map_uppercase; +extern const std::initializer_list unicode_ranges_nfd; diff --git a/llama/unicode.cpp b/llama/llama.cpp/src/unicode.cpp similarity index 96% rename from llama/unicode.cpp rename to llama/llama.cpp/src/unicode.cpp index d9cedd420..51dd81fba 100644 --- a/llama/unicode.cpp +++ b/llama/llama.cpp/src/unicode.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #if defined(_MSC_VER) #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING #endif diff --git a/llama/unicode.h b/llama/llama.cpp/src/unicode.h similarity index 63% rename from llama/unicode.h rename to llama/llama.cpp/src/unicode.h index c6752ee0f..008532a24 100644 --- a/llama/unicode.h +++ b/llama/llama.cpp/src/unicode.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include diff --git a/llama/llama.go b/llama/llama.go index ab6da37aa..c48f2708c 100644 --- a/llama/llama.go +++ b/llama/llama.go @@ -1,18 +1,19 @@ package llama -//go:generate make -j 8 - /* #cgo CFLAGS: -std=c11 #cgo CXXFLAGS: -std=c++11 +#cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/include +#cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/common +#cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/examples/llava #cgo CPPFLAGS: -I${SRCDIR}/../ml/backend/ggml/ggml/include -#cgo darwin,arm64 CPPFLAGS: -DGGML_USE_METAL #include +#include "ggml.h" #include "llama.h" #include "clip.h" -#include "ggml.h" #include "llava.h" + #include "mllama.h" #include "sampling_ext.h" @@ -47,10 +48,14 @@ import ( "sync/atomic" "unsafe" - _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src" + _ "github.com/ollama/ollama/llama/llama.cpp/common" + _ "github.com/ollama/ollama/llama/llama.cpp/examples/llava" + _ "github.com/ollama/ollama/llama/llama.cpp/src" + "github.com/ollama/ollama/ml/backend/ggml/ggml/src" ) func BackendInit() { + ggml.OnceLoad() C.llama_backend_init() } diff --git a/llama/patches/0001-cuda.patch b/llama/patches/0001-cuda.patch index c74885269..3f9ac4d33 100644 --- a/llama/patches/0001-cuda.patch +++ b/llama/patches/0001-cuda.patch @@ -1,7 +1,7 @@ -From 702ee500b229e910e3e6cd3c84d87763c51fb411 Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: jmorganca Date: Thu, 6 Jun 2024 23:55:47 -0700 -Subject: [PATCH 01/11] cuda +Subject: [PATCH] cuda --- ggml/src/ggml-backend.cpp | 2 +- @@ -53,6 +53,3 @@ index 093ae900..a0cf4ba4 100644 } static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) { --- -2.46.0 - diff --git a/llama/patches/0002-pretokenizer.patch b/llama/patches/0002-pretokenizer.patch index 72e4b268c..c87d1e1a6 100644 --- a/llama/patches/0002-pretokenizer.patch +++ b/llama/patches/0002-pretokenizer.patch @@ -1,7 +1,7 @@ -From 67eb186ccf062100835d413b1c3e2a0fc58e1c0f Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 16 Sep 2024 15:53:13 -0700 -Subject: [PATCH 02/11] pretokenizer +Subject: [PATCH] pretokenizer --- src/llama.cpp | 14 +++----------- @@ -39,6 +39,3 @@ index 6a6f4c2a..fa09f3b3 100644 } } else if (vocab.type == LLAMA_VOCAB_TYPE_SPM) { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; --- -2.46.0 - diff --git a/llama/patches/0003-embeddings.patch b/llama/patches/0003-embeddings.patch index 74832a2ec..996f8dbe6 100644 --- a/llama/patches/0003-embeddings.patch +++ b/llama/patches/0003-embeddings.patch @@ -1,7 +1,7 @@ -From a9a7820ae111d70e24d4f7004378b5321e8a29c7 Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 16 Sep 2024 15:53:14 -0700 -Subject: [PATCH 03/11] embeddings +Subject: [PATCH] embeddings --- src/llama.cpp | 9 ++++++--- @@ -45,6 +45,3 @@ index fa09f3b3..d1791af0 100644 // LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs); ggml_backend_sched_alloc_graph(lctx.sched.get(), gf); --- -2.46.0 - diff --git a/llama/patches/0004-clip-unicode.patch b/llama/patches/0004-clip-unicode.patch index 73bde706b..13e945c37 100644 --- a/llama/patches/0004-clip-unicode.patch +++ b/llama/patches/0004-clip-unicode.patch @@ -1,7 +1,7 @@ -From aa5ad04094458943643df789c5b7fd7d4c68dafb Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 16 Sep 2024 15:53:15 -0700 -Subject: [PATCH 04/11] clip-unicode +Subject: [PATCH] clip-unicode --- examples/llava/clip.cpp | 40 +++++++++++++++++++++++++++++++++++++++- @@ -74,6 +74,3 @@ index d7c94352..427d5e02 100644 } // vision model --- -2.46.0 - diff --git a/llama/patches/0005-solar-pro.patch b/llama/patches/0005-solar-pro.patch index f69ed943d..35b8c55d8 100644 --- a/llama/patches/0005-solar-pro.patch +++ b/llama/patches/0005-solar-pro.patch @@ -1,7 +1,7 @@ -From 226de4f71ce73a87a805dc83484b32f9f9d9c24d Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 16 Sep 2024 15:53:16 -0700 -Subject: [PATCH 05/11] solar-pro +Subject: [PATCH] solar-pro solar-pro introduces block skip connections where blocks are connected to other, non-sequential blocks with a scale multiple @@ -404,6 +404,3 @@ index d1791af0..b01770d0 100644 return LLAMA_ROPE_TYPE_NORM; // the pairs of head values are offset by n_rot/2 --- -2.46.0 - diff --git a/llama/patches/0006-conditional-fattn.patch b/llama/patches/0006-conditional-fattn.patch index c80864f17..9eb37d346 100644 --- a/llama/patches/0006-conditional-fattn.patch +++ b/llama/patches/0006-conditional-fattn.patch @@ -1,7 +1,7 @@ -From b9d893b5c7c3dcff42bce378ea26587a6c7d1113 Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Wed, 9 Oct 2024 17:26:23 -0700 -Subject: [PATCH 06/11] conditional-fattn +Subject: [PATCH] conditional-fattn --- ggml/src/ggml-cuda/ggml-cuda.cu | 2 ++ @@ -23,6 +23,3 @@ index a2fcfe5d..5eed90da 100644 case GGML_OP_CROSS_ENTROPY_LOSS: ggml_cuda_cross_entropy_loss(ctx, dst); break; --- -2.46.0 - diff --git a/llama/patches/0008-add-mllama-support.patch b/llama/patches/0007-add-mllama-support.patch similarity index 99% rename from llama/patches/0008-add-mllama-support.patch rename to llama/patches/0007-add-mllama-support.patch index 4ed259fac..ae8b80177 100644 --- a/llama/patches/0008-add-mllama-support.patch +++ b/llama/patches/0007-add-mllama-support.patch @@ -1,7 +1,7 @@ -From c2f0b1c0eda94eea785a1de9098df9eb29d64eb5 Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: jmorganca Date: Thu, 17 Oct 2024 15:18:22 -0700 -Subject: [PATCH 08/11] add mllama support +Subject: [PATCH] add mllama support mllama adds cross-attention layers to the standard llama architecture it also requires a way to input a new tensor: cross_attention_state @@ -784,6 +784,3 @@ index b01770d0..46881642 100644 } else { batch.token = (llama_token *) malloc(sizeof(llama_token) * n_tokens_alloc); } --- -2.46.0 - diff --git a/llama/patches/0007-blas.patch b/llama/patches/0007-blas.patch deleted file mode 100644 index d0c3eed22..000000000 --- a/llama/patches/0007-blas.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 9a5a9479d9cdf2032ff989fd297e50490f53e4c2 Mon Sep 17 00:00:00 2001 -From: Jesse Gross -Date: Mon, 30 Sep 2024 16:31:04 -0700 -Subject: [PATCH 07/11] blas - ---- - ggml/src/ggml-blas/ggml-blas.cpp | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/ggml/src/ggml-blas/ggml-blas.cpp b/ggml/src/ggml-blas/ggml-blas.cpp -index ec158dfa..b3ac1fa4 100644 ---- a/ggml/src/ggml-blas/ggml-blas.cpp -+++ b/ggml/src/ggml-blas/ggml-blas.cpp -@@ -1,3 +1,5 @@ -+#ifdef GGML_USE_BLAS -+ - #include "ggml-impl.h" - #include "ggml-blas.h" - #include "ggml-backend-impl.h" -@@ -515,3 +517,5 @@ ggml_backend_reg_t ggml_backend_blas_reg(void) { - } - - GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg) -+ -+#endif // GGML_USE_BLAS -\ No newline at end of file --- -2.46.0 - diff --git a/llama/patches/0009-add-unpad-operator.patch b/llama/patches/0008-add-unpad-operator.patch similarity index 99% rename from llama/patches/0009-add-unpad-operator.patch rename to llama/patches/0008-add-unpad-operator.patch index 470b8b427..d30b5e041 100644 --- a/llama/patches/0009-add-unpad-operator.patch +++ b/llama/patches/0008-add-unpad-operator.patch @@ -1,7 +1,7 @@ -From 8e07a88fa87f31b6f2245c02a89a4a367ed6013c Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Thu, 17 Oct 2024 17:19:25 -0700 -Subject: [PATCH 09/11] add unpad operator +Subject: [PATCH] add unpad operator --- ggml/include/ggml.h | 10 +++++ @@ -394,6 +394,3 @@ index 1a9a7efa..ea2b259b 100644 // ggml_arange struct ggml_tensor * ggml_arange( --- -2.46.0 - diff --git a/llama/patches/0010-fix-deepseek-deseret-regex.patch b/llama/patches/0009-fix-deepseek-deseret-regex.patch similarity index 96% rename from llama/patches/0010-fix-deepseek-deseret-regex.patch rename to llama/patches/0009-fix-deepseek-deseret-regex.patch index 5e8a2e216..9ea501d06 100644 --- a/llama/patches/0010-fix-deepseek-deseret-regex.patch +++ b/llama/patches/0009-fix-deepseek-deseret-regex.patch @@ -1,7 +1,7 @@ -From 4236c07fc90fb758b89921fa7ef39dc0482c4bea Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 25 Oct 2024 16:25:18 -0700 -Subject: [PATCH 10/11] fix deepseek deseret regex +Subject: [PATCH] fix deepseek deseret regex On windows compiled with gcc the c++ regex library failed to handle the characters @@ -70,6 +70,3 @@ index 3d459263..51dd81fb 100644 } static std::vector unicode_byte_encoding_process(const std::vector & bpe_words) { --- -2.46.0 - diff --git a/llama/patches/0011-Maintain-ordering-for-rules-for-grammar.patch b/llama/patches/0010-Maintain-ordering-for-rules-for-grammar.patch similarity index 84% rename from llama/patches/0011-Maintain-ordering-for-rules-for-grammar.patch rename to llama/patches/0010-Maintain-ordering-for-rules-for-grammar.patch index ccb6fce96..33b504ec1 100644 --- a/llama/patches/0011-Maintain-ordering-for-rules-for-grammar.patch +++ b/llama/patches/0010-Maintain-ordering-for-rules-for-grammar.patch @@ -1,7 +1,7 @@ -From 7752556d7922e92b455ed92d22a3bfa9725f4458 Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Wed, 11 Dec 2024 15:37:32 -0800 -Subject: [PATCH 11/11] Maintain ordering for rules for grammar +Subject: [PATCH] Maintain ordering for rules for grammar --- common/json-schema-to-grammar.cpp | 2 +- @@ -20,6 +20,3 @@ index dadc18c8..2a8dbd22 100644 std::unordered_map _refs; std::unordered_set _refs_being_resolved; std::vector _errors; --- -2.46.0 - diff --git a/llama/unicode-data.h b/llama/unicode-data.h deleted file mode 100644 index 393ea0bd4..000000000 --- a/llama/unicode-data.h +++ /dev/null @@ -1,46 +0,0 @@ -/** - * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include -#include -#include -#include - -struct range_nfd { - uint32_t first; - uint32_t last; - uint32_t nfd; -}; - -static const uint32_t MAX_CODEPOINTS = 0x110000; - -extern const std::initializer_list> unicode_ranges_flags; -extern const std::unordered_set unicode_set_whitespace; -extern const std::initializer_list> unicode_map_lowercase; -extern const std::initializer_list> unicode_map_uppercase; -extern const std::initializer_list unicode_ranges_nfd; diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index 1fb057eee..108631266 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -15,18 +15,15 @@ import ( "io" "log/slog" "os" - "path/filepath" - "runtime" - "strings" "sync" "unsafe" "github.com/ollama/ollama/format" - "github.com/ollama/ollama/fs/ggml" + fs "github.com/ollama/ollama/fs/ggml" "github.com/ollama/ollama/ml" "golang.org/x/sync/errgroup" - _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src" + "github.com/ollama/ollama/ml/backend/ggml/ggml/src" ) type device struct { @@ -67,45 +64,7 @@ func (d device) LogValue() slog.Value { } var devices = sync.OnceValue(func() []device { - var lib struct{ name, pattern, defaultValue string } - if runtime.GOOS == "windows" { - lib.name = "PATH" - lib.pattern = "ggml-*.dll" - lib.defaultValue = "." - } else if runtime.GOOS == "linux" { - lib.name = "LD_LIBRARY_PATH" - lib.pattern = "libggml-*.so" - lib.defaultValue = "/usr/local/lib:/usr/lib" - } - - if lib.name != "" { - paths, ok := os.LookupEnv(lib.name) - if !ok { - paths = lib.defaultValue - } - - for _, path := range filepath.SplitList(paths) { - matches, err := filepath.Glob(filepath.Join(path, lib.pattern)) - if err != nil { - slog.Error("failed to glob", "path", path, "error", err) - continue - } - - for _, match := range matches { - if base := filepath.Base(match); strings.HasPrefix(base, "ggml-base") || - strings.HasPrefix(base, "libggml-base") { - continue - } - - func() { - cmatch := C.CString(match) - defer C.free(unsafe.Pointer(cmatch)) - - C.ggml_backend_load(cmatch) - }() - } - } - } + ggml.OnceLoad() s := make([]device, C.ggml_backend_dev_count()) for i := range s { @@ -116,13 +75,13 @@ var devices = sync.OnceValue(func() []device { }) type Backend struct { - meta *ggml.GGML + meta *fs.GGML cpus, gpus []Context tensors map[string]*Context } func New(r *os.File) (ml.Backend, error) { - meta, n, err := ggml.Decode(r, -1) + meta, n, err := fs.Decode(r, -1) if err != nil { return nil, err } @@ -170,7 +129,7 @@ func New(r *os.File) (ml.Backend, error) { return nil, fmt.Errorf("no devices available") } - tensors := make(map[*ggml.Tensor]*Context, len(meta.Tensors().Items())) + tensors := make(map[*fs.Tensor]*Context, len(meta.Tensors().Items())) for _, t := range meta.Tensors().Items() { c, err := ctxFunc(append(gpus, cpus...)) if err != nil { diff --git a/ml/backend/ggml/ggml/.rsync-filter b/ml/backend/ggml/ggml/.rsync-filter new file mode 100644 index 000000000..147398174 --- /dev/null +++ b/ml/backend/ggml/ggml/.rsync-filter @@ -0,0 +1,21 @@ +include include/ +include src/ +include src/ggml-blas/ +include src/ggml-cpu/ +include src/ggml-cpu/amx/ +include src/ggml-cpu/llamafile/ +include src/ggml-cuda/ +include src/ggml-cuda/template-instances/ +include src/ggml-hip/ +include src/ggml-metal/ +protect **/*.go +protect **/*-embed.* +include **/CMakeLists.txt +include **/*.c +include **/*.h +include **/*.cpp +include **/*.cu +include **/*.cuh +include **/*.m +include **/*.metal +exclude * diff --git a/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp b/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp index b3ac1fa45..ec158dfac 100644 --- a/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp +++ b/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp @@ -1,5 +1,3 @@ -#ifdef GGML_USE_BLAS - #include "ggml-impl.h" #include "ggml-blas.h" #include "ggml-backend-impl.h" @@ -517,5 +515,3 @@ ggml_backend_reg_t ggml_backend_blas_reg(void) { } GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg) - -#endif // GGML_USE_BLAS \ No newline at end of file diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/cpu.go b/ml/backend/ggml/ggml/src/ggml-cpu/cpu.go index b0cd99780..55915df98 100644 --- a/ml/backend/ggml/ggml/src/ggml-cpu/cpu.go +++ b/ml/backend/ggml/ggml/src/ggml-cpu/cpu.go @@ -1,8 +1,10 @@ package cpu // #cgo CXXFLAGS: -std=c++11 -// #cgo CPPFLAGS: -I${SRCDIR}/amx -I${SRCDIR}/.. -I${SRCDIR}/../../include +// #cgo CPPFLAGS: -I${SRCDIR}/amx -I${SRCDIR}/llamafile -I${SRCDIR}/.. -I${SRCDIR}/../../include +// #cgo CPPFLAGS: -DGGML_USE_LLAMAFILE // #cgo linux CPPFLAGS: -D_GNU_SOURCE // #cgo darwin,arm64 CPPFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 // #cgo darwin,arm64 LDFLAGS: -framework Accelerate import "C" +import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src/ggml-cpu/llamafile" diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile.go b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile.go new file mode 100644 index 000000000..09b002ce5 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile.go @@ -0,0 +1,5 @@ +package llamafile + +// #cgo CXXFLAGS: -std=c++17 +// #cgo CPPFLAGS: -I${SRCDIR}/.. -I${SRCDIR}/../.. -I${SRCDIR}/../../../include +import "C" diff --git a/ml/backend/ggml/ggml/src/ggml.go b/ml/backend/ggml/ggml/src/ggml.go index f554b4550..f8f490484 100644 --- a/ml/backend/ggml/ggml/src/ggml.go +++ b/ml/backend/ggml/ggml/src/ggml.go @@ -3,5 +3,64 @@ package ggml // #cgo CXXFLAGS: -std=c++17 // #cgo CPPFLAGS: -DNDEBUG -DGGML_USE_CPU // #cgo CPPFLAGS: -I${SRCDIR}/../include -I${SRCDIR}/ggml-cpu +// #include +// #include "ggml-backend.h" import "C" -import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src/ggml-cpu" +import ( + "log/slog" + "os" + "path/filepath" + "runtime" + "strings" + "sync" + "unsafe" + + _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src/ggml-cpu" +) + +var OnceLoad = sync.OnceFunc(func() { + var lib struct{ name, pattern, defaultValue string } + switch runtime.GOOS { + case "darwin": + lib.name = "LD_LIBRARY_PATH" + lib.pattern = "libggml-*.dylib" + lib.defaultValue = "/usr/local/lib:/usr/lib" + case "linux": + lib.name = "LD_LIBRARY_PATH" + lib.pattern = "libggml-*.so" + lib.defaultValue = "/usr/local/lib:/usr/lib" + case "windows": + lib.name = "PATH" + lib.pattern = "ggml-*.dll" + lib.defaultValue = "." + default: + return + } + + paths, ok := os.LookupEnv(lib.name) + if !ok { + paths = lib.defaultValue + } + + for _, path := range filepath.SplitList(paths) { + matches, err := filepath.Glob(filepath.Join(path, lib.pattern)) + if err != nil { + slog.Error("failed to glob", "path", path, "error", err) + continue + } + + for _, match := range matches { + if base := filepath.Base(match); strings.HasPrefix(base, "ggml-base") || + strings.HasPrefix(base, "libggml-base") { + continue + } + + func() { + cmatch := C.CString(match) + defer C.free(unsafe.Pointer(cmatch)) + + C.ggml_backend_load(cmatch) + }() + } + } +})