From 144f63e2fbd349f529ec0c71aef0ee1636a05780 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 2 Dec 2024 11:45:23 -0800 Subject: [PATCH] next build --- .gitattributes | 9 + .github/workflows/release.yaml | 270 ++---- .github/workflows/test.yaml | 313 +------ CMakeLists.txt | 54 ++ CMakePresets.json | 109 +++ Dockerfile | 316 +++---- Makefile | 103 --- Makefile2 | 46 + go.mod | 3 +- go.sum | 2 + llama/README.md | 3 +- llama/amx.h | 34 - llama/ggml-blas.h | 51 -- llama/ggml-cpu-aarch64.h | 34 - llama/ggml-cpu-traits.h | 64 -- llama/ggml-cuda/acc.cuh | 31 - llama/ggml-cuda/arange.cu | 60 -- llama/ggml-cuda/arange.cuh | 31 - llama/ggml-cuda/argmax.cuh | 29 - llama/ggml-cuda/argsort.cuh | 29 - llama/ggml-cuda/binbcast.cuh | 35 - llama/ggml-cuda/clamp.cu | 60 -- llama/ggml-cuda/clamp.cuh | 31 - llama/ggml-cuda/concat.cuh | 31 - llama/ggml-cuda/conv-transpose-1d.cuh | 31 - llama/ggml-cuda/convert.cuh | 39 - llama/ggml-cuda/count-equal.cuh | 31 - llama/ggml-cuda/cpy.cuh | 35 - llama/ggml-cuda/cross-entropy-loss.cuh | 33 - llama/ggml-cuda/diagmask.cuh | 31 - llama/ggml-cuda/fattn-tile-f16.cuh | 29 - llama/ggml-cuda/fattn-tile-f32.cuh | 29 - llama/ggml-cuda/fattn.cuh | 29 - llama/ggml-cuda/getrows.cuh | 31 - llama/ggml-cuda/im2col.cuh | 31 - llama/ggml-cuda/mmv.cuh | 38 - llama/ggml-cuda/mmvq.cuh | 35 - llama/ggml-cuda/norm.cuh | 33 - llama/ggml-cuda/opt-step-adamw.cuh | 31 - llama/ggml-cuda/out-prod.cuh | 29 - llama/ggml-cuda/pad.cuh | 32 - llama/ggml-cuda/pool2d.cuh | 31 - llama/ggml-cuda/quantize.cuh | 50 - llama/ggml-cuda/rope.cuh | 31 - llama/ggml-cuda/scale.cu | 57 -- llama/ggml-cuda/scale.cuh | 31 - llama/ggml-cuda/softmax.cuh | 31 - llama/ggml-cuda/sum.cuh | 31 - llama/ggml-cuda/sumrows.cu | 65 -- llama/ggml-cuda/sumrows.cuh | 31 - .../fattn-vec-f16-instance-hs128-f16-f16.cu | 31 - .../fattn-vec-f16-instance-hs128-f16-q4_0.cu | 31 - .../fattn-vec-f16-instance-hs128-f16-q4_1.cu | 31 - .../fattn-vec-f16-instance-hs128-f16-q5_0.cu | 31 - .../fattn-vec-f16-instance-hs128-f16-q5_1.cu | 31 - .../fattn-vec-f16-instance-hs128-f16-q8_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q4_0-f16.cu | 31 - .../fattn-vec-f16-instance-hs128-q4_0-q4_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q4_0-q4_1.cu | 31 - .../fattn-vec-f16-instance-hs128-q4_0-q5_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q4_0-q5_1.cu | 31 - .../fattn-vec-f16-instance-hs128-q4_0-q8_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q4_1-f16.cu | 31 - .../fattn-vec-f16-instance-hs128-q4_1-q4_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q4_1-q4_1.cu | 31 - .../fattn-vec-f16-instance-hs128-q4_1-q5_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q4_1-q5_1.cu | 31 - .../fattn-vec-f16-instance-hs128-q4_1-q8_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q5_0-f16.cu | 31 - .../fattn-vec-f16-instance-hs128-q5_0-q4_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q5_0-q4_1.cu | 31 - .../fattn-vec-f16-instance-hs128-q5_0-q5_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q5_0-q5_1.cu | 31 - .../fattn-vec-f16-instance-hs128-q5_0-q8_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q5_1-f16.cu | 31 - .../fattn-vec-f16-instance-hs128-q5_1-q4_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q5_1-q4_1.cu | 31 - .../fattn-vec-f16-instance-hs128-q5_1-q5_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q5_1-q5_1.cu | 31 - .../fattn-vec-f16-instance-hs128-q5_1-q8_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q8_0-f16.cu | 31 - .../fattn-vec-f16-instance-hs128-q8_0-q4_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q8_0-q4_1.cu | 31 - .../fattn-vec-f16-instance-hs128-q8_0-q5_0.cu | 31 - .../fattn-vec-f16-instance-hs128-q8_0-q5_1.cu | 31 - .../fattn-vec-f16-instance-hs128-q8_0-q8_0.cu | 31 - .../fattn-vec-f16-instance-hs256-f16-f16.cu | 31 - .../fattn-vec-f16-instance-hs64-f16-f16.cu | 31 - .../fattn-vec-f16-instance-hs64-f16-q4_0.cu | 31 - .../fattn-vec-f16-instance-hs64-f16-q4_1.cu | 31 - .../fattn-vec-f16-instance-hs64-f16-q5_0.cu | 31 - .../fattn-vec-f16-instance-hs64-f16-q5_1.cu | 31 - .../fattn-vec-f16-instance-hs64-f16-q8_0.cu | 31 - .../fattn-vec-f32-instance-hs128-f16-f16.cu | 31 - .../fattn-vec-f32-instance-hs128-f16-q4_0.cu | 31 - .../fattn-vec-f32-instance-hs128-f16-q4_1.cu | 31 - .../fattn-vec-f32-instance-hs128-f16-q5_0.cu | 31 - .../fattn-vec-f32-instance-hs128-f16-q5_1.cu | 31 - .../fattn-vec-f32-instance-hs128-f16-q8_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q4_0-f16.cu | 31 - .../fattn-vec-f32-instance-hs128-q4_0-q4_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q4_0-q4_1.cu | 31 - .../fattn-vec-f32-instance-hs128-q4_0-q5_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q4_0-q5_1.cu | 31 - .../fattn-vec-f32-instance-hs128-q4_0-q8_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q4_1-f16.cu | 31 - .../fattn-vec-f32-instance-hs128-q4_1-q4_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q4_1-q4_1.cu | 31 - .../fattn-vec-f32-instance-hs128-q4_1-q5_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q4_1-q5_1.cu | 31 - .../fattn-vec-f32-instance-hs128-q4_1-q8_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q5_0-f16.cu | 31 - .../fattn-vec-f32-instance-hs128-q5_0-q4_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q5_0-q4_1.cu | 31 - .../fattn-vec-f32-instance-hs128-q5_0-q5_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q5_0-q5_1.cu | 31 - .../fattn-vec-f32-instance-hs128-q5_0-q8_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q5_1-f16.cu | 31 - .../fattn-vec-f32-instance-hs128-q5_1-q4_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q5_1-q4_1.cu | 31 - .../fattn-vec-f32-instance-hs128-q5_1-q5_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q5_1-q5_1.cu | 31 - .../fattn-vec-f32-instance-hs128-q5_1-q8_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q8_0-f16.cu | 31 - .../fattn-vec-f32-instance-hs128-q8_0-q4_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q8_0-q4_1.cu | 31 - .../fattn-vec-f32-instance-hs128-q8_0-q5_0.cu | 31 - .../fattn-vec-f32-instance-hs128-q8_0-q5_1.cu | 31 - .../fattn-vec-f32-instance-hs128-q8_0-q8_0.cu | 31 - .../fattn-vec-f32-instance-hs256-f16-f16.cu | 31 - .../fattn-vec-f32-instance-hs64-f16-f16.cu | 31 - .../fattn-vec-f32-instance-hs64-f16-q4_0.cu | 31 - .../fattn-vec-f32-instance-hs64-f16-q4_1.cu | 31 - .../fattn-vec-f32-instance-hs64-f16-q5_0.cu | 31 - .../fattn-vec-f32-instance-hs64-f16-q5_1.cu | 31 - .../fattn-vec-f32-instance-hs64-f16-q8_0.cu | 31 - .../fattn-wmma-f16-instance-kqfloat-cpb16.cu | 36 - .../fattn-wmma-f16-instance-kqfloat-cpb32.cu | 35 - .../fattn-wmma-f16-instance-kqhalf-cpb16.cu | 36 - .../fattn-wmma-f16-instance-kqhalf-cpb32.cu | 36 - .../fattn-wmma-f16-instance-kqhalf-cpb8.cu | 34 - .../template-instances/mmq-instance-iq1_s.cu | 31 - .../template-instances/mmq-instance-iq2_s.cu | 31 - .../template-instances/mmq-instance-iq2_xs.cu | 31 - .../mmq-instance-iq2_xxs.cu | 31 - .../template-instances/mmq-instance-iq3_s.cu | 31 - .../mmq-instance-iq3_xxs.cu | 31 - .../template-instances/mmq-instance-iq4_nl.cu | 31 - .../template-instances/mmq-instance-iq4_xs.cu | 31 - .../template-instances/mmq-instance-q2_k.cu | 31 - .../template-instances/mmq-instance-q3_k.cu | 31 - .../template-instances/mmq-instance-q4_0.cu | 31 - .../template-instances/mmq-instance-q4_1.cu | 31 - .../template-instances/mmq-instance-q4_k.cu | 31 - .../template-instances/mmq-instance-q5_0.cu | 31 - .../template-instances/mmq-instance-q5_1.cu | 31 - .../template-instances/mmq-instance-q5_k.cu | 31 - .../template-instances/mmq-instance-q6_k.cu | 31 - .../template-instances/mmq-instance-q8_0.cu | 31 - llama/ggml-cuda/tsembd.cuh | 31 - llama/ggml-cuda/upscale.cuh | 31 - llama/ggml-cuda/vendors/cuda.h | 41 - llama/ggml-cuda/wkv6.cuh | 31 - llama/ggml-threading.cpp | 38 - llama/ggml-threading.h | 40 - llama/json-schema-to-grammar.h | 34 - llama/llama-cparams.cpp | 27 - llama/llama-cparams.h | 64 -- llama/llama-cpp.h | 56 -- llama/llama-quant.h | 27 - llama/llama.cpp/.rsync-filter | 22 + llama/llama.cpp/LICENSE | 21 + llama/{ => llama.cpp/common}/base64.hpp | 0 llama/{ => llama.cpp/common}/common.cpp | 26 - llama/llama.cpp/common/common.go | 6 + llama/{ => llama.cpp/common}/common.h | 26 - .../common}/json-schema-to-grammar.cpp | 26 - .../llama.cpp/common/json-schema-to-grammar.h | 8 + llama/{ => llama.cpp/common}/json.hpp | 0 llama/{ => llama.cpp/common}/log.cpp | 26 - llama/{ => llama.cpp/common}/log.h | 26 - llama/{ => llama.cpp/common}/sampling.cpp | 26 - llama/{ => llama.cpp/common}/sampling.h | 26 - llama/{ => llama.cpp/common}/stb_image.h | 0 llama/{ => llama.cpp/examples/llava}/clip.cpp | 26 - llama/{ => llama.cpp/examples/llava}/clip.h | 26 - .../{ => llama.cpp/examples/llava}/llava.cpp | 26 - llama/llama.cpp/examples/llava/llava.go | 6 + llama/{ => llama.cpp/examples/llava}/llava.h | 26 - llama/llama.cpp/include/llama-cpp.h | 30 + llama/{ => llama.cpp/include}/llama.h | 26 - llama/{ => llama.cpp/src}/llama-adapter.cpp | 26 - llama/{ => llama.cpp/src}/llama-adapter.h | 26 - llama/{ => llama.cpp/src}/llama-arch.cpp | 26 - llama/{ => llama.cpp/src}/llama-arch.h | 26 - llama/{ => llama.cpp/src}/llama-batch.cpp | 26 - llama/{ => llama.cpp/src}/llama-batch.h | 26 - llama/{ => llama.cpp/src}/llama-chat.cpp | 26 - llama/{ => llama.cpp/src}/llama-chat.h | 26 - llama/{ => llama.cpp/src}/llama-context.cpp | 26 - llama/{ => llama.cpp/src}/llama-context.h | 26 - llama/llama.cpp/src/llama-cparams.cpp | 1 + llama/llama.cpp/src/llama-cparams.h | 38 + llama/{ => llama.cpp/src}/llama-grammar.cpp | 26 - llama/{ => llama.cpp/src}/llama-grammar.h | 26 - llama/{ => llama.cpp/src}/llama-hparams.cpp | 26 - llama/{ => llama.cpp/src}/llama-hparams.h | 26 - llama/{ => llama.cpp/src}/llama-impl.cpp | 26 - llama/{ => llama.cpp/src}/llama-impl.h | 26 - llama/{ => llama.cpp/src}/llama-kv-cache.cpp | 26 - llama/{ => llama.cpp/src}/llama-kv-cache.h | 26 - llama/{ => llama.cpp/src}/llama-mmap.cpp | 26 - llama/{ => llama.cpp/src}/llama-mmap.h | 26 - .../src}/llama-model-loader.cpp | 26 - .../{ => llama.cpp/src}/llama-model-loader.h | 26 - llama/{ => llama.cpp/src}/llama-model.cpp | 26 - llama/{ => llama.cpp/src}/llama-model.h | 26 - llama/{ => llama.cpp/src}/llama-quant.cpp | 26 - llama/llama.cpp/src/llama-quant.h | 1 + llama/{ => llama.cpp/src}/llama-sampling.cpp | 26 - llama/{ => llama.cpp/src}/llama-sampling.h | 26 - llama/{ => llama.cpp/src}/llama-vocab.cpp | 26 - llama/{ => llama.cpp/src}/llama-vocab.h | 26 - llama/{ => llama.cpp/src}/llama.cpp | 26 - llama/llama.cpp/src/llama.go | 8 + llama/{ => llama.cpp/src}/unicode-data.cpp | 26 - llama/llama.cpp/src/unicode-data.h | 20 + llama/{ => llama.cpp/src}/unicode.cpp | 26 - llama/{ => llama.cpp/src}/unicode.h | 26 - llama/llama.go | 74 +- llama/mmq.h | 36 - llama/patches/0001-cuda.patch | 39 +- llama/patches/0006-conditional-fattn.patch | 4 +- ...rt.patch => 0007-add-mllama-support.patch} | 0 llama/patches/0007-blas.patch | 26 - ...or.patch => 0008-add-unpad-operator.patch} | 12 +- ... => 0009-fix-deepseek-deseret-regex.patch} | 0 ...tain-ordering-for-rules-for-grammar.patch} | 0 ...ing-arg-in-static-assert-on-windows.patch} | 0 .../patches/0011-relative-include-paths.patch | 51 -- ...sure-KV-cache-is-fully-defragmented.patch} | 0 ...atch => 0013-re-enable-gpu-for-clip.patch} | 0 .../patches/0014-sort-devices-by-score.patch | 82 ++ ...target-ggml-cpu-for-all-cpu-variants.patch | 29 + llama/sgemm.h | 14 - llama/unicode-data.h | 46 - make/Makefile.cpu | 40 - make/Makefile.cuda_v11 | 13 - make/Makefile.cuda_v12 | 13 - make/Makefile.ollama | 19 - make/Makefile.rocm | 119 --- make/Makefile.sync | 250 ----- make/Makefile.test | 19 - make/common-defs.make | 91 -- make/cuda-v11-defs.make | 17 - make/cuda-v12-defs.make | 17 - make/cuda.make | 56 -- make/gpu.make | 89 -- make/rocm-defs.make | 9 - ml/backend/ggml/ggml/.rsync-filter | 21 + ml/backend/ggml/ggml/CMakeLists.txt | 262 ++++++ ml/backend/ggml/ggml/LICENSE | 21 + .../backend/ggml/ggml/include}/ggml-alloc.h | 26 - .../backend/ggml/ggml/include}/ggml-backend.h | 26 - ml/backend/ggml/ggml/include/ggml-blas.h | 25 + ml/backend/ggml/ggml/include/ggml-cann.h | 123 +++ .../backend/ggml/ggml/include}/ggml-cpp.h | 26 - .../backend/ggml/ggml/include}/ggml-cpu.h | 26 - .../backend/ggml/ggml/include}/ggml-cuda.h | 26 - ml/backend/ggml/ggml/include/ggml-kompute.h | 50 + .../backend/ggml/ggml/include}/ggml-metal.h | 26 - ml/backend/ggml/ggml/include/ggml-opencl.h | 26 + ml/backend/ggml/ggml/include/ggml-opt.h | 216 +++++ ml/backend/ggml/ggml/include/ggml-rpc.h | 28 + ml/backend/ggml/ggml/include/ggml-sycl.h | 49 + ml/backend/ggml/ggml/include/ggml-vulkan.h | 31 + .../backend/ggml/ggml/include}/ggml.h | 26 - ml/backend/ggml/ggml/src/CMakeLists.txt | 340 +++++++ .../backend/ggml/ggml/src}/ggml-alloc.c | 26 - .../ggml/ggml/src}/ggml-backend-impl.h | 26 - .../ggml/ggml/src}/ggml-backend-reg.cpp | 47 +- .../backend/ggml/ggml/src}/ggml-backend.cpp | 32 - .../ggml/ggml/src/ggml-blas/CMakeLists.txt | 87 ++ ml/backend/ggml/ggml/src/ggml-blas/blas.go | 10 + .../ggml/ggml/src/ggml-blas}/ggml-blas.cpp | 30 - .../backend/ggml/ggml/src}/ggml-common.h | 26 - .../ggml/ggml/src/ggml-cpu/CMakeLists.txt | 346 +++++++ .../ggml/ggml/src/ggml-cpu/amx}/amx.cpp | 26 - ml/backend/ggml/ggml/src/ggml-cpu/amx/amx.h | 8 + .../ggml/ggml/src/ggml-cpu/amx/common.h | 91 ++ .../ggml/ggml/src/ggml-cpu/amx}/mmq.cpp | 26 - ml/backend/ggml/ggml/src/ggml-cpu/amx/mmq.h | 10 + .../ggml/ggml/src/ggml-cpu/cpu-feats-x86.cpp | 323 +++++++ ml/backend/ggml/ggml/src/ggml-cpu/cpu.go | 10 + .../ggml/src/ggml-cpu}/ggml-cpu-aarch64.cpp | 26 - .../ggml/ggml/src/ggml-cpu/ggml-cpu-aarch64.h | 8 + .../ggml/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp | 55 ++ .../ggml/ggml/src/ggml-cpu/ggml-cpu-hbm.h | 8 + .../ggml/ggml/src/ggml-cpu}/ggml-cpu-impl.h | 26 - .../ggml/ggml/src/ggml-cpu}/ggml-cpu-quants.c | 26 - .../ggml/ggml/src/ggml-cpu}/ggml-cpu-quants.h | 26 - .../ggml/src/ggml-cpu}/ggml-cpu-traits.cpp | 26 - .../ggml/ggml/src/ggml-cpu/ggml-cpu-traits.h | 38 + .../ggml/ggml/src/ggml-cpu}/ggml-cpu.c | 28 +- .../ggml/ggml/src/ggml-cpu}/ggml-cpu.cpp | 29 +- .../ggml/src/ggml-cpu/llamafile/llamafile.go | 5 + .../ggml/src/ggml-cpu/llamafile}/sgemm.cpp | 0 .../ggml/ggml/src/ggml-cpu}/llamafile/sgemm.h | 0 .../ggml/ggml/src/ggml-cuda/CMakeLists.txt | 152 ++++ .../backend/ggml/ggml/src}/ggml-cuda/acc.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/acc.cuh | 5 + ml/backend/ggml/ggml/src/ggml-cuda/arange.cu | 34 + ml/backend/ggml/ggml/src/ggml-cuda/arange.cuh | 5 + .../ggml/ggml/src}/ggml-cuda/argmax.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/argmax.cuh | 3 + .../ggml/ggml/src}/ggml-cuda/argsort.cu | 26 - .../ggml/ggml/src/ggml-cuda/argsort.cuh | 3 + .../ggml/ggml/src}/ggml-cuda/binbcast.cu | 26 - .../ggml/ggml/src/ggml-cuda/binbcast.cuh | 9 + ml/backend/ggml/ggml/src/ggml-cuda/clamp.cu | 34 + ml/backend/ggml/ggml/src/ggml-cuda/clamp.cuh | 5 + .../ggml/ggml/src}/ggml-cuda/common.cuh | 26 - .../ggml/ggml/src}/ggml-cuda/concat.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/concat.cuh | 5 + .../ggml/src}/ggml-cuda/conv-transpose-1d.cu | 26 - .../ggml/src/ggml-cuda/conv-transpose-1d.cuh | 5 + .../ggml/ggml/src}/ggml-cuda/convert.cu | 26 - .../ggml/ggml/src/ggml-cuda/convert.cuh | 13 + .../ggml/ggml/src}/ggml-cuda/count-equal.cu | 26 - .../ggml/ggml/src/ggml-cuda/count-equal.cuh | 5 + .../backend/ggml/ggml/src}/ggml-cuda/cpy.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/cpy.cuh | 9 + .../ggml/src}/ggml-cuda/cross-entropy-loss.cu | 26 - .../ggml/src/ggml-cuda/cross-entropy-loss.cuh | 7 + .../ggml/ggml/src}/ggml-cuda/dequantize.cuh | 26 - .../ggml/ggml/src}/ggml-cuda/diagmask.cu | 26 - .../ggml/ggml/src/ggml-cuda/diagmask.cuh | 5 + .../ggml/ggml/src}/ggml-cuda/fattn-common.cuh | 26 - .../ggml/src}/ggml-cuda/fattn-tile-f16.cu | 26 - .../ggml/src/ggml-cuda/fattn-tile-f16.cuh | 3 + .../ggml/src}/ggml-cuda/fattn-tile-f32.cu | 26 - .../ggml/src/ggml-cuda/fattn-tile-f32.cuh | 3 + .../ggml/src}/ggml-cuda/fattn-vec-f16.cuh | 26 - .../ggml/src}/ggml-cuda/fattn-vec-f32.cuh | 26 - .../ggml/src}/ggml-cuda/fattn-wmma-f16.cuh | 26 - .../backend/ggml/ggml/src}/ggml-cuda/fattn.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/fattn.cuh | 3 + .../ggml/ggml/src}/ggml-cuda/getrows.cu | 26 - .../ggml/ggml/src/ggml-cuda/getrows.cuh | 5 + .../ggml/ggml/src}/ggml-cuda/ggml-cuda.cu | 31 +- .../ggml/ggml/src}/ggml-cuda/im2col.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/im2col.cuh | 5 + .../backend/ggml/ggml/src}/ggml-cuda/mma.cuh | 26 - .../backend/ggml/ggml/src}/ggml-cuda/mmq.cu | 26 - .../backend/ggml/ggml/src}/ggml-cuda/mmq.cuh | 26 - .../backend/ggml/ggml/src}/ggml-cuda/mmv.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/mmv.cuh | 12 + .../backend/ggml/ggml/src}/ggml-cuda/mmvq.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/mmvq.cuh | 9 + .../backend/ggml/ggml/src}/ggml-cuda/norm.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/norm.cuh | 7 + .../ggml/src}/ggml-cuda/opt-step-adamw.cu | 26 - .../ggml/src/ggml-cuda/opt-step-adamw.cuh | 5 + .../ggml/ggml/src}/ggml-cuda/out-prod.cu | 26 - .../ggml/ggml/src/ggml-cuda/out-prod.cuh | 3 + .../backend/ggml/ggml/src}/ggml-cuda/pad.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/pad.cuh | 6 + .../ggml/ggml/src}/ggml-cuda/pool2d.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/pool2d.cuh | 5 + .../ggml/ggml/src}/ggml-cuda/quantize.cu | 26 - .../ggml/ggml/src/ggml-cuda/quantize.cuh | 24 + .../backend/ggml/ggml/src}/ggml-cuda/rope.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/rope.cuh | 5 + ml/backend/ggml/ggml/src/ggml-cuda/scale.cu | 31 + ml/backend/ggml/ggml/src/ggml-cuda/scale.cuh | 5 + .../ggml/ggml/src}/ggml-cuda/softmax.cu | 26 - .../ggml/ggml/src/ggml-cuda/softmax.cuh | 5 + .../backend/ggml/ggml/src}/ggml-cuda/sum.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/sum.cuh | 5 + ml/backend/ggml/ggml/src/ggml-cuda/sumrows.cu | 39 + .../ggml/ggml/src/ggml-cuda/sumrows.cuh | 5 + .../fattn-vec-f16-instance-hs128-f16-f16.cu | 5 + .../fattn-vec-f16-instance-hs128-f16-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs128-f16-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs128-f16-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs128-f16-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs128-f16-q8_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_0-f16.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_0-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_0-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_0-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_0-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_0-q8_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_1-f16.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_1-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_1-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_1-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_1-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q4_1-q8_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_0-f16.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_0-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_0-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_0-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_0-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_0-q8_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_1-f16.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_1-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_1-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_1-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_1-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q5_1-q8_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q8_0-f16.cu | 5 + .../fattn-vec-f16-instance-hs128-q8_0-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q8_0-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q8_0-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs128-q8_0-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs128-q8_0-q8_0.cu | 5 + .../fattn-vec-f16-instance-hs256-f16-f16.cu | 5 + .../fattn-vec-f16-instance-hs64-f16-f16.cu | 5 + .../fattn-vec-f16-instance-hs64-f16-q4_0.cu | 5 + .../fattn-vec-f16-instance-hs64-f16-q4_1.cu | 5 + .../fattn-vec-f16-instance-hs64-f16-q5_0.cu | 5 + .../fattn-vec-f16-instance-hs64-f16-q5_1.cu | 5 + .../fattn-vec-f16-instance-hs64-f16-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs128-f16-f16.cu | 5 + .../fattn-vec-f32-instance-hs128-f16-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs128-f16-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs128-f16-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs128-f16-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs128-f16-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_0-f16.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_0-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_0-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_0-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_0-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_0-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_1-f16.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_1-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_1-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_1-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_1-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q4_1-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_0-f16.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_0-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_0-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_0-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_0-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_0-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_1-f16.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_1-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_1-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_1-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_1-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q5_1-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q8_0-f16.cu | 5 + .../fattn-vec-f32-instance-hs128-q8_0-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q8_0-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q8_0-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs128-q8_0-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs128-q8_0-q8_0.cu | 5 + .../fattn-vec-f32-instance-hs256-f16-f16.cu | 5 + .../fattn-vec-f32-instance-hs64-f16-f16.cu | 5 + .../fattn-vec-f32-instance-hs64-f16-q4_0.cu | 5 + .../fattn-vec-f32-instance-hs64-f16-q4_1.cu | 5 + .../fattn-vec-f32-instance-hs64-f16-q5_0.cu | 5 + .../fattn-vec-f32-instance-hs64-f16-q5_1.cu | 5 + .../fattn-vec-f32-instance-hs64-f16-q8_0.cu | 5 + .../fattn-wmma-f16-instance-kqfloat-cpb16.cu | 10 + .../fattn-wmma-f16-instance-kqfloat-cpb32.cu | 9 + .../fattn-wmma-f16-instance-kqhalf-cpb16.cu | 10 + .../fattn-wmma-f16-instance-kqhalf-cpb32.cu | 10 + .../fattn-wmma-f16-instance-kqhalf-cpb8.cu | 8 + .../template-instances/generate_cu_files.py | 77 ++ .../template-instances/mmq-instance-iq1_s.cu | 5 + .../template-instances/mmq-instance-iq2_s.cu | 5 + .../template-instances/mmq-instance-iq2_xs.cu | 5 + .../mmq-instance-iq2_xxs.cu | 5 + .../template-instances/mmq-instance-iq3_s.cu | 5 + .../mmq-instance-iq3_xxs.cu | 5 + .../template-instances/mmq-instance-iq4_nl.cu | 5 + .../template-instances/mmq-instance-iq4_xs.cu | 5 + .../template-instances/mmq-instance-q2_k.cu | 5 + .../template-instances/mmq-instance-q3_k.cu | 5 + .../template-instances/mmq-instance-q4_0.cu | 5 + .../template-instances/mmq-instance-q4_1.cu | 5 + .../template-instances/mmq-instance-q4_k.cu | 5 + .../template-instances/mmq-instance-q5_0.cu | 5 + .../template-instances/mmq-instance-q5_1.cu | 5 + .../template-instances/mmq-instance-q5_k.cu | 5 + .../template-instances/mmq-instance-q6_k.cu | 5 + .../template-instances/mmq-instance-q8_0.cu | 5 + .../ggml/ggml/src}/ggml-cuda/tsembd.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/tsembd.cuh | 5 + .../backend/ggml/ggml/src}/ggml-cuda/unary.cu | 26 - .../ggml/ggml/src}/ggml-cuda/unary.cuh | 26 - .../ggml/ggml/src}/ggml-cuda/upscale.cu | 26 - .../ggml/ggml/src/ggml-cuda/upscale.cuh | 5 + .../ggml/ggml/src}/ggml-cuda/vecdotq.cuh | 26 - .../ggml/ggml/src/ggml-cuda/vendors/cuda.h | 14 + .../ggml/ggml/src}/ggml-cuda/vendors/hip.h | 26 - .../ggml/ggml/src}/ggml-cuda/vendors/musa.h | 26 - .../backend/ggml/ggml/src}/ggml-cuda/wkv6.cu | 26 - ml/backend/ggml/ggml/src/ggml-cuda/wkv6.cuh | 5 + .../ggml/ggml/src/ggml-hip/CMakeLists.txt | 104 +++ .../backend/ggml/ggml/src}/ggml-impl.h | 26 - .../ggml/ggml/src/ggml-metal/CMakeLists.txt | 121 +++ .../src/ggml-metal}/ggml-metal-embed.metal | 79 +- .../ggml/src/ggml-metal/ggml-metal-embed.s | 2 +- .../ggml/src/ggml-metal}/ggml-metal-impl.h | 26 - .../ggml/ggml/src/ggml-metal/ggml-metal.m | 27 +- .../ggml/src/ggml-metal}/ggml-metal.metal | 26 - ml/backend/ggml/ggml/src/ggml-metal/metal.go | 9 + ml/backend/ggml/ggml/src/ggml-opt.cpp | 854 ++++++++++++++++++ .../backend/ggml/ggml/src}/ggml-quants.c | 28 +- .../backend/ggml/ggml/src}/ggml-quants.h | 26 - ml/backend/ggml/ggml/src/ggml-threading.cpp | 12 + ml/backend/ggml/ggml/src/ggml-threading.h | 14 + {llama => ml/backend/ggml/ggml/src}/ggml.c | 26 - ml/backend/ggml/ggml/src/ggml.go | 68 ++ ml/backend/ggml/ggml/src/ggml_darwin_arm64.go | 10 + ml/backend/ggml/ggml_debug.go | 6 + scripts/build_darwin.sh | 4 +- scripts/build_linux.sh | 4 +- 523 files changed, 5332 insertions(+), 10241 deletions(-) create mode 100644 CMakeLists.txt create mode 100644 CMakePresets.json delete mode 100644 Makefile create mode 100644 Makefile2 delete mode 100644 llama/amx.h delete mode 100644 llama/ggml-blas.h delete mode 100644 llama/ggml-cpu-aarch64.h delete mode 100644 llama/ggml-cpu-traits.h delete mode 100644 llama/ggml-cuda/acc.cuh delete mode 100644 llama/ggml-cuda/arange.cu delete mode 100644 llama/ggml-cuda/arange.cuh delete mode 100644 llama/ggml-cuda/argmax.cuh delete mode 100644 llama/ggml-cuda/argsort.cuh delete mode 100644 llama/ggml-cuda/binbcast.cuh delete mode 100644 llama/ggml-cuda/clamp.cu delete mode 100644 llama/ggml-cuda/clamp.cuh delete mode 100644 llama/ggml-cuda/concat.cuh delete mode 100644 llama/ggml-cuda/conv-transpose-1d.cuh delete mode 100644 llama/ggml-cuda/convert.cuh delete mode 100644 llama/ggml-cuda/count-equal.cuh delete mode 100644 llama/ggml-cuda/cpy.cuh delete mode 100644 llama/ggml-cuda/cross-entropy-loss.cuh delete mode 100644 llama/ggml-cuda/diagmask.cuh delete mode 100644 llama/ggml-cuda/fattn-tile-f16.cuh delete mode 100644 llama/ggml-cuda/fattn-tile-f32.cuh delete mode 100644 llama/ggml-cuda/fattn.cuh delete mode 100644 llama/ggml-cuda/getrows.cuh delete mode 100644 llama/ggml-cuda/im2col.cuh delete mode 100644 llama/ggml-cuda/mmv.cuh delete mode 100644 llama/ggml-cuda/mmvq.cuh delete mode 100644 llama/ggml-cuda/norm.cuh delete mode 100644 llama/ggml-cuda/opt-step-adamw.cuh delete mode 100644 llama/ggml-cuda/out-prod.cuh delete mode 100644 llama/ggml-cuda/pad.cuh delete mode 100644 llama/ggml-cuda/pool2d.cuh delete mode 100644 llama/ggml-cuda/quantize.cuh delete mode 100644 llama/ggml-cuda/rope.cuh delete mode 100644 llama/ggml-cuda/scale.cu delete mode 100644 llama/ggml-cuda/scale.cuh delete mode 100644 llama/ggml-cuda/softmax.cuh delete mode 100644 llama/ggml-cuda/sum.cuh delete mode 100644 llama/ggml-cuda/sumrows.cu delete mode 100644 llama/ggml-cuda/sumrows.cuh delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu delete mode 100644 llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-iq1_s.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-iq2_s.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-iq3_s.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q2_k.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q3_k.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q4_0.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q4_1.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q4_k.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q5_0.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q5_1.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q5_k.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q6_k.cu delete mode 100644 llama/ggml-cuda/template-instances/mmq-instance-q8_0.cu delete mode 100644 llama/ggml-cuda/tsembd.cuh delete mode 100644 llama/ggml-cuda/upscale.cuh delete mode 100644 llama/ggml-cuda/vendors/cuda.h delete mode 100644 llama/ggml-cuda/wkv6.cuh delete mode 100644 llama/ggml-threading.cpp delete mode 100644 llama/ggml-threading.h delete mode 100644 llama/json-schema-to-grammar.h delete mode 100644 llama/llama-cparams.cpp delete mode 100644 llama/llama-cparams.h delete mode 100644 llama/llama-cpp.h delete mode 100644 llama/llama-quant.h create mode 100644 llama/llama.cpp/.rsync-filter create mode 100644 llama/llama.cpp/LICENSE rename llama/{ => llama.cpp/common}/base64.hpp (100%) rename llama/{ => llama.cpp/common}/common.cpp (98%) create mode 100644 llama/llama.cpp/common/common.go rename llama/{ => llama.cpp/common}/common.h (95%) rename llama/{ => llama.cpp/common}/json-schema-to-grammar.cpp (97%) create mode 100644 llama/llama.cpp/common/json-schema-to-grammar.h rename llama/{ => llama.cpp/common}/json.hpp (100%) rename llama/{ => llama.cpp/common}/log.cpp (89%) rename llama/{ => llama.cpp/common}/log.h (77%) rename llama/{ => llama.cpp/common}/sampling.cpp (93%) rename llama/{ => llama.cpp/common}/sampling.h (78%) rename llama/{ => llama.cpp/common}/stb_image.h (100%) rename llama/{ => llama.cpp/examples/llava}/clip.cpp (98%) rename llama/{ => llama.cpp/examples/llava}/clip.h (74%) rename llama/{ => llama.cpp/examples/llava}/llava.cpp (95%) create mode 100644 llama/llama.cpp/examples/llava/llava.go rename llama/{ => llama.cpp/examples/llava}/llava.h (59%) create mode 100644 llama/llama.cpp/include/llama-cpp.h rename llama/{ => llama.cpp/include}/llama.h (98%) rename llama/{ => llama.cpp/src}/llama-adapter.cpp (90%) rename llama/{ => llama.cpp/src}/llama-adapter.h (55%) rename llama/{ => llama.cpp/src}/llama-arch.cpp (98%) rename llama/{ => llama.cpp/src}/llama-arch.h (89%) rename llama/{ => llama.cpp/src}/llama-batch.cpp (91%) rename llama/{ => llama.cpp/src}/llama-batch.h (67%) rename llama/{ => llama.cpp/src}/llama-chat.cpp (95%) rename llama/{ => llama.cpp/src}/llama-chat.h (54%) rename llama/{ => llama.cpp/src}/llama-context.cpp (98%) rename llama/{ => llama.cpp/src}/llama-context.h (80%) create mode 100644 llama/llama.cpp/src/llama-cparams.cpp create mode 100644 llama/llama.cpp/src/llama-cparams.h rename llama/{ => llama.cpp/src}/llama-grammar.cpp (97%) rename llama/{ => llama.cpp/src}/llama-grammar.h (78%) rename llama/{ => llama.cpp/src}/llama-hparams.cpp (61%) rename llama/{ => llama.cpp/src}/llama-hparams.h (78%) rename llama/{ => llama.cpp/src}/llama-impl.cpp (82%) rename llama/{ => llama.cpp/src}/llama-impl.h (58%) rename llama/{ => llama.cpp/src}/llama-kv-cache.cpp (95%) rename llama/{ => llama.cpp/src}/llama-kv-cache.h (84%) rename llama/{ => llama.cpp/src}/llama-mmap.cpp (93%) rename llama/{ => llama.cpp/src}/llama-mmap.h (52%) rename llama/{ => llama.cpp/src}/llama-model-loader.cpp (97%) rename llama/{ => llama.cpp/src}/llama-model-loader.h (81%) rename llama/{ => llama.cpp/src}/llama-model.cpp (98%) rename llama/{ => llama.cpp/src}/llama-model.h (91%) rename llama/{ => llama.cpp/src}/llama-quant.cpp (97%) create mode 100644 llama/llama.cpp/src/llama-quant.h rename llama/{ => llama.cpp/src}/llama-sampling.cpp (98%) rename llama/{ => llama.cpp/src}/llama-sampling.h (54%) rename llama/{ => llama.cpp/src}/llama-vocab.cpp (98%) rename llama/{ => llama.cpp/src}/llama-vocab.h (84%) rename llama/{ => llama.cpp/src}/llama.cpp (99%) create mode 100644 llama/llama.cpp/src/llama.go rename llama/{ => llama.cpp/src}/unicode-data.cpp (99%) create mode 100644 llama/llama.cpp/src/unicode-data.h rename llama/{ => llama.cpp/src}/unicode.cpp (96%) rename llama/{ => llama.cpp/src}/unicode.h (63%) delete mode 100644 llama/mmq.h rename llama/patches/{0008-add-mllama-support.patch => 0007-add-mllama-support.patch} (100%) delete mode 100644 llama/patches/0007-blas.patch rename llama/patches/{0009-add-unpad-operator.patch => 0008-add-unpad-operator.patch} (97%) rename llama/patches/{0010-fix-deepseek-deseret-regex.patch => 0009-fix-deepseek-deseret-regex.patch} (100%) rename llama/patches/{0012-Maintain-ordering-for-rules-for-grammar.patch => 0010-Maintain-ordering-for-rules-for-grammar.patch} (100%) rename llama/patches/{0013-fix-missing-arg-in-static-assert-on-windows.patch => 0011-fix-missing-arg-in-static-assert-on-windows.patch} (100%) delete mode 100644 llama/patches/0011-relative-include-paths.patch rename llama/patches/{0014-llama-Ensure-KV-cache-is-fully-defragmented.patch => 0012-llama-Ensure-KV-cache-is-fully-defragmented.patch} (100%) rename llama/patches/{0015-re-enable-gpu-for-clip.patch => 0013-re-enable-gpu-for-clip.patch} (100%) create mode 100644 llama/patches/0014-sort-devices-by-score.patch create mode 100644 llama/patches/0015-add-phony-target-ggml-cpu-for-all-cpu-variants.patch delete mode 100644 llama/sgemm.h delete mode 100644 llama/unicode-data.h delete mode 100644 make/Makefile.cpu delete mode 100644 make/Makefile.cuda_v11 delete mode 100644 make/Makefile.cuda_v12 delete mode 100644 make/Makefile.ollama delete mode 100644 make/Makefile.rocm delete mode 100644 make/Makefile.sync delete mode 100644 make/Makefile.test delete mode 100644 make/common-defs.make delete mode 100644 make/cuda-v11-defs.make delete mode 100644 make/cuda-v12-defs.make delete mode 100644 make/cuda.make delete mode 100644 make/gpu.make delete mode 100644 make/rocm-defs.make create mode 100644 ml/backend/ggml/ggml/.rsync-filter create mode 100644 ml/backend/ggml/ggml/CMakeLists.txt create mode 100644 ml/backend/ggml/ggml/LICENSE rename {llama => ml/backend/ggml/ggml/include}/ggml-alloc.h (70%) rename {llama => ml/backend/ggml/ggml/include}/ggml-backend.h (94%) create mode 100644 ml/backend/ggml/ggml/include/ggml-blas.h create mode 100644 ml/backend/ggml/ggml/include/ggml-cann.h rename {llama => ml/backend/ggml/ggml/include}/ggml-cpp.h (56%) rename {llama => ml/backend/ggml/ggml/include}/ggml-cpu.h (84%) rename {llama => ml/backend/ggml/ggml/include}/ggml-cuda.h (56%) create mode 100644 ml/backend/ggml/ggml/include/ggml-kompute.h rename {llama => ml/backend/ggml/ggml/include}/ggml-metal.h (66%) create mode 100644 ml/backend/ggml/ggml/include/ggml-opencl.h create mode 100644 ml/backend/ggml/ggml/include/ggml-opt.h create mode 100644 ml/backend/ggml/ggml/include/ggml-rpc.h create mode 100644 ml/backend/ggml/ggml/include/ggml-sycl.h create mode 100644 ml/backend/ggml/ggml/include/ggml-vulkan.h rename {llama => ml/backend/ggml/ggml/include}/ggml.h (98%) create mode 100644 ml/backend/ggml/ggml/src/CMakeLists.txt rename {llama => ml/backend/ggml/ggml/src}/ggml-alloc.c (96%) rename {llama => ml/backend/ggml/ggml/src}/ggml-backend-impl.h (90%) rename {llama => ml/backend/ggml/ggml/src}/ggml-backend-reg.cpp (90%) rename {llama => ml/backend/ggml/ggml/src}/ggml-backend.cpp (98%) create mode 100644 ml/backend/ggml/ggml/src/ggml-blas/CMakeLists.txt create mode 100644 ml/backend/ggml/ggml/src/ggml-blas/blas.go rename {llama => ml/backend/ggml/ggml/src/ggml-blas}/ggml-blas.cpp (92%) rename {llama => ml/backend/ggml/ggml/src}/ggml-common.h (99%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/CMakeLists.txt rename {llama => ml/backend/ggml/ggml/src/ggml-cpu/amx}/amx.cpp (86%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/amx/amx.h create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/amx/common.h rename {llama => ml/backend/ggml/ggml/src/ggml-cpu/amx}/mmq.cpp (98%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/amx/mmq.h create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/cpu-feats-x86.cpp create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/cpu.go rename {llama => ml/backend/ggml/ggml/src/ggml-cpu}/ggml-cpu-aarch64.cpp (99%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-aarch64.h create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-hbm.h rename {llama => ml/backend/ggml/ggml/src/ggml-cpu}/ggml-cpu-impl.h (87%) rename {llama => ml/backend/ggml/ggml/src/ggml-cpu}/ggml-cpu-quants.c (99%) rename {llama => ml/backend/ggml/ggml/src/ggml-cpu}/ggml-cpu-quants.h (80%) rename {llama => ml/backend/ggml/ggml/src/ggml-cpu}/ggml-cpu-traits.cpp (50%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-traits.h rename {llama => ml/backend/ggml/ggml/src/ggml-cpu}/ggml-cpu.c (99%) rename {llama => ml/backend/ggml/ggml/src/ggml-cpu}/ggml-cpu.cpp (94%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile.go rename {llama => ml/backend/ggml/ggml/src/ggml-cpu/llamafile}/sgemm.cpp (100%) rename {llama => ml/backend/ggml/ggml/src/ggml-cpu}/llamafile/sgemm.h (100%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/CMakeLists.txt rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/acc.cu (61%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/acc.cuh create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/arange.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/arange.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/argmax.cu (69%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/argmax.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/argsort.cu (73%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/argsort.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/binbcast.cu (91%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/binbcast.cuh create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/clamp.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/clamp.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/common.cuh (94%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/concat.cu (85%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/concat.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/conv-transpose-1d.cu (72%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/conv-transpose-1d.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/convert.cu (95%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/convert.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/count-equal.cu (62%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/count-equal.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/cpy.cu (94%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/cpy.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/cross-entropy-loss.cu (82%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/cross-entropy-loss.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/dequantize.cuh (68%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/diagmask.cu (58%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/diagmask.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/fattn-common.cuh (95%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/fattn-tile-f16.cu (91%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f16.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/fattn-tile-f32.cu (91%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f32.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/fattn-vec-f16.cuh (93%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/fattn-vec-f32.cuh (92%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/fattn-wmma-f16.cuh (94%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/fattn.cu (92%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/fattn.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/getrows.cu (84%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/getrows.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/ggml-cuda.cu (98%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/im2col.cu (78%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/im2col.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/mma.cuh (86%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/mmq.cu (80%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/mmq.cuh (98%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/mmv.cu (89%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/mmv.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/mmvq.cu (93%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/mmvq.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/norm.cu (85%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/norm.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/opt-step-adamw.cu (70%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/opt-step-adamw.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/out-prod.cu (57%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/out-prod.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/pad.cu (74%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/pad.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/pool2d.cu (72%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/pool2d.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/quantize.cu (81%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/quantize.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/rope.cu (94%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/rope.cuh create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/scale.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/scale.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/softmax.cu (86%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/softmax.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/sum.cu (54%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/sum.cuh create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/sumrows.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/sumrows.cuh create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu create mode 100755 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/generate_cu_files.py create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/tsembd.cu (59%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/tsembd.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/unary.cu (92%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/unary.cuh (58%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/upscale.cu (63%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/upscale.cuh rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/vecdotq.cuh (96%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/vendors/cuda.h rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/vendors/hip.h (85%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/vendors/musa.h (83%) rename {llama => ml/backend/ggml/ggml/src}/ggml-cuda/wkv6.cu (71%) create mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/wkv6.cuh create mode 100644 ml/backend/ggml/ggml/src/ggml-hip/CMakeLists.txt rename {llama => ml/backend/ggml/ggml/src}/ggml-impl.h (93%) create mode 100644 ml/backend/ggml/ggml/src/ggml-metal/CMakeLists.txt rename {llama => ml/backend/ggml/ggml/src/ggml-metal}/ggml-metal-embed.metal (99%) rename llama/ggml-metal-embed_darwin_arm64.s => ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s (87%) rename {llama => ml/backend/ggml/ggml/src/ggml-metal}/ggml-metal-impl.h (81%) rename llama/ggml-metal_darwin_arm64.m => ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m (99%) rename {llama => ml/backend/ggml/ggml/src/ggml-metal}/ggml-metal.metal (99%) create mode 100644 ml/backend/ggml/ggml/src/ggml-metal/metal.go create mode 100644 ml/backend/ggml/ggml/src/ggml-opt.cpp rename {llama => ml/backend/ggml/ggml/src}/ggml-quants.c (99%) rename {llama => ml/backend/ggml/ggml/src}/ggml-quants.h (87%) create mode 100644 ml/backend/ggml/ggml/src/ggml-threading.cpp create mode 100644 ml/backend/ggml/ggml/src/ggml-threading.h rename {llama => ml/backend/ggml/ggml/src}/ggml.c (99%) create mode 100644 ml/backend/ggml/ggml/src/ggml.go create mode 100644 ml/backend/ggml/ggml/src/ggml_darwin_arm64.go create mode 100644 ml/backend/ggml/ggml_debug.go diff --git a/.gitattributes b/.gitattributes index 51635caa7..4bcd95b0b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -7,5 +7,14 @@ llama/**/*.cuh linguist-vendored llama/**/*.m linguist-vendored llama/**/*.metal linguist-vendored +ml/backend/**/*.c linguist-vendored +ml/backend/**/*.h linguist-vendored +ml/backend/**/*.cpp linguist-vendored +ml/backend/**/*.hpp linguist-vendored +ml/backend/**/*.cu linguist-vendored +ml/backend/**/*.cuh linguist-vendored +ml/backend/**/*.m linguist-vendored +ml/backend/**/*.metal linguist-vendored + * text=auto *.go text eol=lf diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 50177050c..08abac1f9 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -478,243 +478,77 @@ jobs: dist/OllamaSetup.exe dist/ollama-windows-*.zip - # Linux x86 assets built using the container based build - build-linux-amd64: + build-linux: environment: release runs-on: linux - env: - PLATFORM: linux/amd64 - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Set Version - shell: bash - run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV - - run: | - ./scripts/build_linux.sh - - uses: actions/upload-artifact@v4 - with: - name: dist-linux-amd64 - path: | - dist/*linux* - !dist/*-cov - - # Linux ARM assets built using the container based build - # (at present, docker isn't pre-installed on arm ubunutu images) - build-linux-arm64: - environment: release - runs-on: linux-arm64 - env: - PLATFORM: linux/arm64 - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Set Version - shell: bash - run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV - - name: 'Install Docker' - run: | - # Add Docker's official GPG key: - env - uname -a - sudo apt-get update - sudo apt-get install -y ca-certificates curl - sudo install -m 0755 -d /etc/apt/keyrings - sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc - sudo chmod a+r /etc/apt/keyrings/docker.asc - - # Add the repository to Apt sources: - echo \ - "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ - $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ - sudo tee /etc/apt/sources.list.d/docker.list > /dev/null - sudo apt-get update - sudo apt-get install -y docker-ce docker-ce-cli containerd.io - sudo usermod -aG docker $USER - sudo apt-get install acl - sudo setfacl --modify user:$USER:rw /var/run/docker.sock - - run: | - ./scripts/build_linux.sh - - uses: actions/upload-artifact@v4 - with: - name: dist-linux-arm64 - path: | - dist/*linux* - !dist/*-cov - - # Container image build - build-container-image: - environment: release strategy: matrix: - runner: - - linux - - linux-arm64 - runs-on: ${{ matrix.runner }} - env: - FINAL_IMAGE_REPO: ollama/ollama + include: + - os: linux + arch: amd64 + targets: [archive, rocm] + - os: linux + arch: arm64 + targets: [archive] steps: - uses: actions/checkout@v4 + - uses: docker/setup-qemu-action@v3 + - uses: docker/setup-buildx-action@v3 + - run: | + apt-get update && apt-get install pigz + for TARGET in ${{ matrix.targets }}; do docker buildx build --platform $PLATFORM --target $TARGET --output type=local,dest=dist/$PLATFORM .; done + tar c -C dist/$PLATFORM . | pigz -9cv >dist/ollama-${PLATFORM//\//-}.tar.gz + env: + PLATFORM: ${{ matrix.os }}/${{ matrix.arch }} + - uses: actions/upload-artifact@v4 with: - submodules: recursive - - name: 'Install Docker' - if: ${{ startsWith(matrix.runner, 'linux-arm64') }} - run: | - sudo apt-get update - sudo apt-get install -y ca-certificates curl - sudo install -m 0755 -d /etc/apt/keyrings - sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc - sudo chmod a+r /etc/apt/keyrings/docker.asc - echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ - $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ - sudo tee /etc/apt/sources.list.d/docker.list > /dev/null - sudo apt-get update - sudo apt-get install -y docker-ce docker-ce-cli containerd.io - sudo usermod -aG docker $USER - sudo apt-get install acl - sudo setfacl --modify user:$USER:rw /var/run/docker.sock - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.FINAL_IMAGE_REPO }} - flavor: | - latest=false - tags: | - type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr - type=semver,pattern={{version}} - - name: Set Version - shell: bash - run: | - machine=$(uname -m) - case ${machine} in - x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;; - aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;; - esac >>$GITHUB_ENV - echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ vars.DOCKER_USER }} - password: ${{ secrets.DOCKER_ACCESS_TOKEN }} - - name: Build and push by digest - id: build - uses: docker/build-push-action@v6 - with: - context: "." - platforms: linux/${{ env.ARCH }} - build-args: | - GOFLAGS - outputs: type=image,name=${{ env.FINAL_IMAGE_REPO }},push-by-digest=true,name-canonical=true,push=true - - name: Export digest - run: | - mkdir -p /tmp/digests - digest="${{ steps.build.outputs.digest }}" - touch "/tmp/digests/${digest#sha256:}" - - name: Upload digest - uses: actions/upload-artifact@v4 - with: - name: digests-${{ env.PLATFORM_PAIR }} - path: /tmp/digests/* - if-no-files-found: error - retention-days: 1 - merge: + name: dist-${{ matrix.os }}-${{ matrix.arch }} + path: | + dist/ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.gz + + build-docker: environment: release runs-on: linux - needs: - - build-container-image - env: - FINAL_IMAGE_REPO: ollama/ollama + strategy: + matrix: + include: + - flavor: | + latest=auto + platforms: linux/amd64,linux/arm64 + build-args: [GOFLAGS] + - flavor: | + suffix=-rocm,onlatest=false + platforms: linux/amd64 + build-args: [GOFLAGS, FLAVOR=rocm] steps: - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Download digests - uses: actions/download-artifact@v4 - with: - path: /tmp/digests - pattern: digests-* - merge-multiple: true - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.FINAL_IMAGE_REPO }} - flavor: | - latest=false - tags: | - type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr - type=semver,pattern={{version}} - - name: Set Version - shell: bash - run: | - machine=$(uname -m) - case ${machine} in - x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;; - aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;; - esac >>$GITHUB_ENV - echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV - - name: Login to Docker Hub - uses: docker/login-action@v3 + - uses: docker/setup-qemu-action@v2 + - uses: docker/setup-buildx-action@v2 + - uses: docker/login-action@v3 with: username: ${{ vars.DOCKER_USER }} password: ${{ secrets.DOCKER_ACCESS_TOKEN }} - - name: Create manifest list and push - working-directory: /tmp/digests - run: | - docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ - $(printf '${{ env.FINAL_IMAGE_REPO }}@sha256:%s ' *) - - name: Inspect image - run: | - docker buildx imagetools inspect ${{ env.FINAL_IMAGE_REPO }}:${{ steps.meta.outputs.version }} - build-container-image-rocm: - environment: release - runs-on: linux - env: - FINAL_IMAGE_REPO: ollama/ollama - ARCH: amd64 - PLATFORM_PAIR: linux-amd64 - steps: - - uses: actions/checkout@v4 + - id: metadata + uses: docker/metadata-action@v4 with: - submodules: recursive - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.FINAL_IMAGE_REPO }} - flavor: | - latest=false + flavor: ${{ matrix.flavor }} + images: | + ollama/ollama tags: | - type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr type=semver,pattern={{version}} - - name: Set Version - shell: bash - run: | - echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 + - uses: docker/build-push-action@v6 with: - username: ${{ vars.DOCKER_USER }} - password: ${{ secrets.DOCKER_ACCESS_TOKEN }} - - name: Build and push by digest - id: build - uses: docker/build-push-action@v6 - with: - context: "." - target: runtime-rocm - build-args: | - GOFLAGS - tags: ${{ env.FINAL_IMAGE_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION}}-rocm + context: . push: true + platforms: ${{ matrix.platforms }} + build-args: ${{ matrix.build-args }} + tags: ${{ steps.metadata.outputs.tags }} + labels: ${{ steps.metadata.outputs.labels }} + cache-from: type=registry,ref=ollama/ollama:latest + cache-to: type=inline + provenance: false + env: + GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ steps.metadata.outputs.version }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" # Aggregate all the assets and ship a release release: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 8dcc506ba..db036671c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,11 +1,5 @@ name: test -env: - ROCM_WINDOWS_URL: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe - MSYS2_URL: https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe - CUDA_12_WINDOWS_URL: https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe - CUDA_12_WINDOWS_VER: 12.4 - concurrency: # For PRs, later CI runs preempt previous ones. e.g. a force push on a PR # cancels running CI jobs and starts all new ones. @@ -27,7 +21,7 @@ jobs: changes: runs-on: ubuntu-latest outputs: - RUNNERS: ${{ steps.changes.outputs.RUNNERS }} + changed: ${{ steps.changes.outputs.changed }} steps: - uses: actions/checkout@v4 with: @@ -35,309 +29,66 @@ jobs: - id: changes run: | changed() { - git diff-tree -r --no-commit-id --name-only \ - $(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \ - ${{ github.event.pull_request.head.sha }} \ + local BASE=${{ github.event.pull_request.base.sha }} + local HEAD=${{ github.event.pull_request.head.sha }} + local MERGE_BASE=$(git merge-base $BASE $HEAD) + git diff-tree -r --no-commit-id --name-only "$MERGE_BASE" "$HEAD" \ | xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))" } - { - echo RUNNERS=$(changed 'llama/**') - } >>$GITHUB_OUTPUT + echo changed=$(changed 'llama/llama.cpp/**' 'ml/backend/ggml/ggml/**') | tee -a $GITHUB_OUTPUT - runners-linux-cuda: + linux: needs: [changes] - if: ${{ needs.changes.outputs.RUNNERS == 'True' }} + if: ${{ needs.changes.outputs.changed == 'True' }} strategy: matrix: - cuda-version: - - '11.8.0' - runs-on: linux - container: nvidia/cuda:${{ matrix.cuda-version }}-devel-ubuntu20.04 + include: + - container: nvidia/cuda:11.8.0-devel-ubuntu22.04 + preset: CUDA + - container: rocm/dev-ubuntu-22.04:6.1.2 + preset: ROCm + extra-packages: rocm-libs + runs-on: ubuntu-latest + container: ${{ matrix.container }} steps: + - uses: actions/checkout@v4 - run: | - apt-get update && apt-get install -y git build-essential curl + apt-get update + apt-get install -y cmake pkg-config ccache ${{ matrix.extra-packages }} + ccache -o cache_dir=${{ github.workspace }}\.ccache env: DEBIAN_FRONTEND: noninteractive - - uses: actions/checkout@v4 - - uses: actions/setup-go@v4 + - uses: actions/cache@v4 with: - go-version-file: go.mod - cache: true - - run: go get ./... + path: ${{ github.workspace }}\.ccache + key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }} - run: | - git config --global --add safe.directory /__w/ollama/ollama - cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l) - make -j $cores cuda_v11 - runners-linux-rocm: - needs: [changes] - if: ${{ needs.changes.outputs.RUNNERS == 'True' }} - strategy: - matrix: - rocm-version: - - '6.1.2' - runs-on: linux - container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }} - steps: - - run: | - apt-get update && apt-get install -y git build-essential curl rocm-libs - env: - DEBIAN_FRONTEND: noninteractive - - uses: actions/checkout@v4 - - uses: actions/setup-go@v4 - with: - go-version-file: go.mod - cache: true - - run: go get ./... - - run: | - git config --global --add safe.directory /__w/ollama/ollama - cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l) - make -j $cores rocm + cmake --preset ${{ matrix.preset }} + cmake --build --preset ${{ matrix.preset }} --parallel - # ROCm generation step - runners-windows-rocm: - needs: [changes] - if: ${{ needs.changes.outputs.RUNNERS == 'True' }} - runs-on: windows - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - cache: true - - name: Set make jobs default - run: | - echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - - # ROCM installation steps - - name: 'Cache ROCm installer' - id: cache-rocm - uses: actions/cache@v4 - with: - path: rocm-install.exe - key: ${{ env.ROCM_WINDOWS_URL }} - - name: 'Conditionally Download ROCm' - if: steps.cache-rocm.outputs.cache-hit != 'true' - run: | - $ErrorActionPreference = "Stop" - Invoke-WebRequest -Uri "${env:ROCM_WINDOWS_URL}" -OutFile "rocm-install.exe" - - name: 'Install ROCm' - run: | - Start-Process "rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait - - name: 'Verify ROCm' - run: | - & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version - echo "HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path | select -first 1)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - - - name: Add msys paths - run: | - echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - - name: Install msys2 tools - run: | - Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait - - - name: make rocm runner - run: | - import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll' - Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo' - if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" } - make -C llama print-HIP_PATH print-HIP_LIB_DIR - make rocm - - # CUDA generation step - runners-windows-cuda: - needs: [changes] - if: ${{ needs.changes.outputs.RUNNERS == 'True' }} - runs-on: windows - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - cache: true - - name: Set make jobs default - run: | - echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - - # CUDA installation steps - - name: 'Cache CUDA installer' - id: cache-cuda - uses: actions/cache@v4 - with: - path: cuda-install.exe - key: ${{ env.CUDA_12_WINDOWS_URL }} - - name: 'Conditionally Download CUDA' - if: steps.cache-cuda.outputs.cache-hit != 'true' - run: | - $ErrorActionPreference = "Stop" - Invoke-WebRequest -Uri "${env:CUDA_12_WINDOWS_URL}" -OutFile "cuda-install.exe" - - name: 'Install CUDA' - run: | - $subpackages = @("cudart", "nvcc", "cublas", "cublas_dev") | foreach-object {"${_}_${{ env.CUDA_12_WINDOWS_VER }}"} - Start-Process "cuda-install.exe" -ArgumentList (@("-s") + $subpackages) -NoNewWindow -Wait - - name: 'Verify CUDA' - run: | - & (resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0] --version - $cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path) - $cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2' - echo "$cudaPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "CUDA_PATH=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - echo "CUDA_PATH_V${cudaVer}=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - - - name: Add msys paths - run: | - echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - - name: Install msys2 tools - run: | - Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait - - name: make cuda runner - run: | - import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll' - Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo' - if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" } - make cuda_v$(($env:CUDA_PATH | split-path -leaf) -replace 'v(\d+).*', '$1') - - runners-cpu: - needs: [changes] - if: ${{ needs.changes.outputs.RUNNERS == 'True' }} - strategy: - matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - arch: [amd64, arm64] - exclude: - - os: ubuntu-latest - arch: arm64 - - os: windows-2019 - arch: arm64 - runs-on: ${{ matrix.os }} - env: - GOARCH: ${{ matrix.arch }} - ARCH: ${{ matrix.arch }} - CGO_ENABLED: '1' - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - cache: true - - name: Add msys paths - if: ${{ startsWith(matrix.os, 'windows-') }} - run: | - echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - - name: Install msys2 tools - if: ${{ startsWith(matrix.os, 'windows-') }} - run: | - Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait - - name: 'Build Windows Go Runners' - if: ${{ startsWith(matrix.os, 'windows-') }} - run: | - $gopath=(get-command go).source | split-path -parent - $gccpath=(get-command gcc).source | split-path -parent - import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll' - Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo' - $env:CMAKE_SYSTEM_VERSION="10.0.22621.0" - $env:PATH="$gopath;$gccpath;$env:PATH" - echo $env:PATH - if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" } - make -j 4 - - name: 'Build Unix Go Runners' - if: ${{ ! startsWith(matrix.os, 'windows-') }} - run: make -j 4 - - run: go build . - - lint: - strategy: - matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - arch: [amd64, arm64] - exclude: - - os: ubuntu-latest - arch: arm64 - - os: windows-2019 - arch: arm64 - - os: macos-latest - arch: amd64 - runs-on: ${{ matrix.os }} - env: - GOARCH: ${{ matrix.arch }} - CGO_ENABLED: '1' - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Add msys paths - if: ${{ startsWith(matrix.os, 'windows-') }} - run: | - echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - - name: Install msys2 tools - if: ${{ startsWith(matrix.os, 'windows-') }} - run: | - Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait - - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - cache: false - - run: | - case ${{ matrix.arch }} in - amd64) echo ARCH=x86_64 ;; - arm64) echo ARCH=arm64 ;; - esac >>$GITHUB_ENV - shell: bash - - uses: golangci/golangci-lint-action@v6 - with: - args: --timeout 10m0s -v test: strategy: matrix: - os: [ubuntu-latest, macos-latest, windows-2019] - arch: [amd64] - exclude: - - os: ubuntu-latest - arch: arm64 - - os: windows-2019 - arch: arm64 + os: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.os }} env: - GOARCH: ${{ matrix.arch }} CGO_ENABLED: '1' steps: - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Add msys paths - if: ${{ startsWith(matrix.os, 'windows-') }} - run: | - echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - - name: Install msys2 tools - if: ${{ startsWith(matrix.os, 'windows-') }} - run: | - Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait - uses: actions/setup-go@v5 with: go-version-file: go.mod - cache: true - - run: | - case ${{ matrix.arch }} in - amd64) echo ARCH=amd64 ;; - arm64) echo ARCH=arm64 ;; - esac >>$GITHUB_ENV - shell: bash + - uses: golangci/golangci-lint-action@v6 + with: + args: --timeout 10m0s -v - run: go test ./... patches: - needs: [changes] - if: ${{ needs.changes.outputs.RUNNERS == 'True' }} runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Verify patches carry all the changes + - name: Verify patches apply cleanly and do not change files run: | - make apply-patches sync && git diff --compact-summary --exit-code llama + make -f Makefile2 clean checkout sync + git diff --compact-summary --exit-code diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..4b3c3a968 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,54 @@ +cmake_minimum_required(VERSION 3.21) + +project(Ollama C CXX) + +include(CheckLanguage) + +find_package(Threads REQUIRED) + +set(CMAKE_BUILD_TYPE Release) +set(BUILD_SHARED_LIBS ON) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +set(GGML_BUILD ON) +set(GGML_SHARED ON) +set(GGML_CCACHE ON) +set(GGML_BACKEND_DL ON) +set(GGML_BACKEND_SHARED ON) +set(GGML_SCHED_MAX_COPIES 4) + +set(GGML_LLAMAFILE ON) +set(GGML_CPU_ALL_VARIANTS ON) +set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128) +set(GGML_CUDA_GRAPHS ON) + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx) + +set(GGML_CPU ON) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src) +set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE) + +check_language(CUDA) +if(CMAKE_CUDA_COMPILER) + if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24" AND NOT CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES "native") + endif() + + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda) +endif() + +check_language(HIP) +if(CMAKE_HIP_COMPILER) + set(HIP_PLATFORM "amd") + + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip) +endif() diff --git a/CMakePresets.json b/CMakePresets.json new file mode 100644 index 000000000..fccd237a4 --- /dev/null +++ b/CMakePresets.json @@ -0,0 +1,109 @@ +{ + "version": 3, + "configurePresets": [ + { + "name": "Default", + "binaryDir": "${sourceDir}/build", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release" + } + }, + { + "name": "CPU", + "inherits": [ "Default" ] + }, + { + "name": "CUDA", + "inherits": [ "Default" ] + }, + { + "name": "CUDA 11", + "inherits": [ "CUDA" ], + "cacheVariables": { + "CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;62;70;72;75;80;86" + } + }, + { + "name": "CUDA 12", + "inherits": [ "CUDA" ], + "cacheVariables": { + "CMAKE_CUDA_ARCHITECTURES": "60;61;62;70;72;75;80;86;87;89;90;90a" + } + }, + { + "name": "JetPack 5", + "inherits": [ "CUDA" ], + "cacheVariables": { + "CMAKE_CUDA_ARCHITECTURES": "72;87" + } + }, + { + "name": "JetPack 6", + "inherits": [ "CUDA" ], + "cacheVariables": { + "CMAKE_CUDA_ARCHITECTURES": "87" + } + }, + { + "name": "ROCm", + "inherits": [ "Default" ], + "cacheVariables": { + "CMAKE_HIP_PLATFORM": "amd" + } + }, + { + "name": "ROCm 6", + "inherits": [ "ROCm" ], + "cacheVariables": { + "AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" + } + } + ], + "buildPresets": [ + { + "name": "Default", + "configurePreset": "Default", + "configuration": "Release" + }, + { + "name": "CPU", + "configurePreset": "Default", + "targets": [ "ggml-cpu" ] + }, + { + "name": "CUDA", + "configurePreset": "CUDA", + "targets": [ "ggml-cuda" ] + }, + { + "name": "CUDA 11", + "inherits": [ "CUDA" ], + "configurePreset": "CUDA 11" + }, + { + "name": "CUDA 12", + "inherits": [ "CUDA" ], + "configurePreset": "CUDA 12" + }, + { + "name": "JetPack 5", + "inherits": [ "CUDA" ], + "configurePreset": "JetPack 5" + }, + { + "name": "JetPack 6", + "inherits": [ "CUDA" ], + "configurePreset": "JetPack 6" + }, + { + "name": "ROCm", + "configurePreset": "ROCm", + "targets": [ "ggml-hip" ] + }, + { + "name": "ROCm 6", + "inherits": [ "ROCm" ], + "configurePreset": "ROCm 6" + } + ] +} diff --git a/Dockerfile b/Dockerfile index 47228df61..9b09585fa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,201 +1,161 @@ -ARG GOLANG_VERSION=1.22.8 -ARG CUDA_VERSION_11=11.3.1 -ARG CUDA_VERSION_12=12.4.0 -ARG ROCM_VERSION=6.1.2 -ARG JETPACK_6=r36.2.0 -ARG JETPACK_5=r35.4.1 +# vim: filetype=dockerfile -### To create a local image for building linux binaries on mac or windows with efficient incremental builds -# -# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 . -# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64 -# -### Then incremental builds will be much faster in this container -# -# make -j 10 dist -# -FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64 -ARG GOLANG_VERSION -ARG CUDA_VERSION_11 -ARG CUDA_VERSION_12 -COPY ./scripts/rh_linux_deps.sh / -ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH -ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64 -RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh -RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \ - dnf clean all && \ - dnf install -y \ - zsh \ - cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \ - cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g") -# TODO intel oneapi goes here... -ENV GOARCH amd64 -ENV CGO_ENABLED 1 -WORKDIR /go/src/github.com/ollama/ollama/ -ENTRYPOINT [ "zsh" ] +ARG FLAVOR=${TARGETARCH} -### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds -# Note: this does not contain jetson variants -# -# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 . -# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64 -# -FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64 -ARG GOLANG_VERSION -ARG CUDA_VERSION_11 -ARG CUDA_VERSION_12 -COPY ./scripts/rh_linux_deps.sh / -RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh -RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \ - dnf config-manager --set-enabled appstream && \ - dnf clean all && \ - dnf install -y \ - zsh \ - cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \ - cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g") -ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin -ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64 -ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64 -ENV GOARCH arm64 -ENV CGO_ENABLED 1 -WORKDIR /go/src/github.com/ollama/ollama/ -ENTRYPOINT [ "zsh" ] +ARG ROCMVERSION=6.1.2 +ARG JETPACK5VERSION=r35.4.1 +ARG JETPACK6VERSION=r36.2.0 +ARG CMAKEVERSION=3.31.2 -FROM --platform=linux/amd64 unified-builder-amd64 AS build-amd64 -COPY . . -ARG OLLAMA_SKIP_CUDA_GENERATE -ARG OLLAMA_SKIP_ROCM_GENERATE -ARG OLLAMA_FAST_BUILD -ARG VERSION -ARG CUSTOM_CPU_FLAGS +FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCMVERSION}-complete AS base-amd64 +RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \ + && yum install -y yum-utils devtoolset-10-gcc devtoolset-10-gcc-c++ \ + && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo \ + && curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /usr/local/bin --strip-components 1 +ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:/opt/rh/devtoolset-11/root/usr/bin:$PATH + +FROM --platform=linux/arm64 rockylinux:8 AS base-arm64 +# install epel-release for ccache +RUN yum install -y yum-utils epel-release \ + && yum install -y clang ccache \ + && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo +ENV CC=clang CXX=clang++ + +FROM base-${TARGETARCH} AS base +ARG CMAKEVERSION +RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 +COPY CMakeLists.txt CMakePresets.json . +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml +ENV LDFLAGS=-s + +FROM base AS cpu +# amd64 uses gcc which requires devtoolset-11 for AVX extensions while arm64 uses clang +RUN if [ "$(uname -m)" = "x86_64" ]; then yum install -y devtoolset-11-gcc devtoolset-11-gcc-c++; fi +ENV PATH=/opt/rh/devtoolset-11/root/usr/bin:$PATH RUN --mount=type=cache,target=/root/.ccache \ - if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \ - make -j $(nproc) dist ; \ - else \ - make -j 5 dist ; \ - fi -RUN cd dist/linux-$GOARCH && \ - tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz -RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \ - cd dist/linux-$GOARCH-rocm && \ - tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\ - fi + cmake --preset 'CPU' && cmake --build --parallel --preset 'CPU' -# Jetsons need to be built in discrete stages -FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64 -ARG GOLANG_VERSION -RUN apt-get update && apt-get install -y git curl ccache && \ - curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \ - ln -s /usr/local/go/bin/go /usr/local/bin/go && \ - ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \ - apt-get clean && rm -rf /var/lib/apt/lists/* -WORKDIR /go/src/github.com/ollama/ollama/ -COPY . . -ARG CGO_CFLAGS -ENV GOARCH arm64 -ARG VERSION +FROM base AS cuda-11 +ARG CUDA11VERSION=11.3 +RUN yum install -y cuda-toolkit-${CUDA11VERSION//./-} +ENV PATH=/usr/local/cuda-11/bin:$PATH RUN --mount=type=cache,target=/root/.ccache \ - make -j 5 dist_cuda_v11 \ - CUDA_ARCHITECTURES="72;87" \ - GPU_RUNNER_VARIANT=_jetpack5 \ - DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \ - DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5 + cmake --preset 'CUDA 11' && cmake --build --parallel --preset 'CUDA 11' -FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64 -ARG GOLANG_VERSION -RUN apt-get update && apt-get install -y git curl ccache && \ - curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \ - ln -s /usr/local/go/bin/go /usr/local/bin/go && \ - ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \ - apt-get clean && rm -rf /var/lib/apt/lists/* -WORKDIR /go/src/github.com/ollama/ollama/ -COPY . . -ARG CGO_CFLAGS -ENV GOARCH arm64 -ARG VERSION +FROM base AS cuda-12 +ARG CUDA12VERSION=12.4 +RUN yum install -y cuda-toolkit-${CUDA12VERSION//./-} +ENV PATH=/usr/local/cuda-12/bin:$PATH RUN --mount=type=cache,target=/root/.ccache \ - make -j 5 dist_cuda_v12 \ - CUDA_ARCHITECTURES="87" \ - GPU_RUNNER_VARIANT=_jetpack6 \ - DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \ - DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6 + cmake --preset 'CUDA 12' && cmake --build --parallel --preset 'CUDA 12' -FROM --platform=linux/arm64 unified-builder-arm64 AS build-arm64 -COPY . . -ARG OLLAMA_SKIP_CUDA_GENERATE -ARG OLLAMA_FAST_BUILD -ARG VERSION +FROM base AS rocm-6 RUN --mount=type=cache,target=/root/.ccache \ - make -j 5 dist -COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ -COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ -RUN cd dist/linux-$GOARCH && \ - tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz -RUN cd dist/linux-$GOARCH-jetpack5 && \ - tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz -RUN cd dist/linux-$GOARCH-jetpack6 && \ - tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz + cmake --preset 'ROCm 6' && cmake --build --parallel --preset 'ROCm 6' -FROM --platform=linux/amd64 scratch AS dist-amd64 -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / -FROM --platform=linux/arm64 scratch AS dist-arm64 -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / -FROM dist-$TARGETARCH AS dist +FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5 +ARG CMAKEVERSION +RUN apt-get update && apt-get install -y curl ccache \ + && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 +COPY CMakeLists.txt CMakePresets.json . +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml +RUN --mount=type=cache,target=/root/.ccache \ + cmake --preset 'JetPack 5' && cmake --build --parallel --preset 'JetPack 5' +FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6 +ARG CMAKEVERSION +RUN apt-get update && apt-get install -y curl ccache \ + && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 +COPY CMakeLists.txt CMakePresets.json . +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml +RUN --mount=type=cache,target=/root/.ccache \ + cmake --preset 'JetPack 6' && cmake --build --parallel --preset 'JetPack 6' -# For amd64 container images, filter out cuda/rocm to minimize size -FROM build-amd64 AS runners-cuda-amd64 -RUN rm -rf \ - ./dist/linux-amd64/lib/ollama/libggml_hipblas.so \ - ./dist/linux-amd64/lib/ollama/runners/rocm* +FROM base AS build +ARG GOVERSION=1.23.4 +RUN curl -fsSL https://golang.org/dl/go${GOVERSION}.linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local +ENV PATH=/usr/local/go/bin:$PATH +WORKDIR /go/src/github.com/ollama/ollama +COPY . . +ARG GOFLAGS="'-ldflags=-w -s'" +ENV CGO_ENABLED=1 +RUN --mount=type=cache,target=/root/.cache/go-build \ + go build -trimpath -buildmode=pie -o /bin/ollama . -FROM build-amd64 AS runners-rocm-amd64 -RUN rm -rf \ - ./dist/linux-amd64/lib/ollama/libggml_cuda*.so \ - ./dist/linux-amd64/lib/ollama/libcu*.so* \ - ./dist/linux-amd64/lib/ollama/runners/cuda* +FROM --platform=linux/amd64 scratch AS amd64 +COPY --from=cuda-11 --chmod=644 \ + build/lib/libggml-cuda.so \ + /usr/local/cuda/lib64/libcublas.so.11 \ + /usr/local/cuda/lib64/libcublasLt.so.11 \ + /usr/local/cuda/lib64/libcudart.so.11.0 \ + /lib/ollama/cuda_v11/ +COPY --from=cuda-12 --chmod=644 \ + build/lib/libggml-cuda.so \ + /usr/local/cuda/lib64/libcublas.so.12 \ + /usr/local/cuda/lib64/libcublasLt.so.12 \ + /usr/local/cuda/lib64/libcudart.so.12 \ + /lib/ollama/cuda_v12/ -FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64 -RUN apt-get update && \ - apt-get install -y ca-certificates && \ - apt-get clean && rm -rf /var/lib/apt/lists/* -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ -COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +FROM --platform=linux/arm64 scratch AS arm64 +COPY --from=cuda-11 --chmod=644 \ + build/lib/libggml-cuda.so \ + /usr/local/cuda/lib64/libcublas.so.11 \ + /usr/local/cuda/lib64/libcublasLt.so.11 \ + /usr/local/cuda/lib64/libcudart.so.11.0 \ + /lib/ollama/cuda_v11/ +COPY --from=cuda-12 --chmod=644 \ + build/lib/libggml-cuda.so \ + /usr/local/cuda/lib64/libcublas.so.12 \ + /usr/local/cuda/lib64/libcublasLt.so.12 \ + /usr/local/cuda/lib64/libcudart.so.12 \ + /lib/ollama/cuda_v12/ +COPY --from=jetpack-5 --chmod=644 \ + build/lib/libggml-cuda.so \ + /usr/local/cuda/lib64/libcublas.so.11 \ + /usr/local/cuda/lib64/libcublasLt.so.11 \ + /usr/local/cuda/lib64/libcudart.so.11.0 \ + /lib/ollama/cuda_jetpack5/ +COPY --from=jetpack-6 --chmod=644 \ + build/lib/libggml-cuda.so \ + /usr/local/cuda/lib64/libcublas.so.12 \ + /usr/local/cuda/lib64/libcublasLt.so.12 \ + /usr/local/cuda/lib64/libcudart.so.12 \ + /lib/ollama/cuda_jetpack6/ -FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64 -RUN apt-get update && \ - apt-get install -y ca-certificates && \ - apt-get clean && rm -rf /var/lib/apt/lists/* -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/ -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ -COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/ -COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/ +FROM --platform=linux/arm64 scratch AS rocm +COPY --from=rocm-6 --chmod=644 \ + build/lib/libggml-hip.so \ + /opt/rocm/lib/libamdhip64.so.6 \ + /opt/rocm/lib/libhipblas.so.2 \ + /opt/rocm/lib/librocblas.so.4 \ + /opt/rocm/lib/libamd_comgr.so.2 \ + /opt/rocm/lib/libhsa-runtime64.so.1 \ + /opt/rocm/lib/librocprofiler-register.so.0 \ + /opt/amdgpu/lib64/libdrm_amdgpu.so.1 \ + /opt/amdgpu/lib64/libdrm.so.2 \ + /usr/lib64/libnuma.so.1 \ + /lib/ollama/rocm/ +COPY --from=rocm-6 /opt/rocm/lib/rocblas/ /lib/ollama/rocm/rocblas/ +FROM ${FLAVOR} AS archive +COPY --from=cpu --chmod=644 \ + build/lib/libggml-base.so \ + build/lib/libggml-cpu-*.so \ + /lib/ollama/ +COPY --from=build /bin/ollama /bin/ollama -# ROCm libraries larger so we keep it distinct from the CPU/CUDA image -FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm -# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer -# across releases -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/ -RUN apt-get update && \ - apt-get install -y ca-certificates && \ - apt-get clean && rm -rf /var/lib/apt/lists/* -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ -COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ - -EXPOSE 11434 -ENV OLLAMA_HOST 0.0.0.0 - -ENTRYPOINT ["/bin/ollama"] -CMD ["serve"] - -FROM runtime-$TARGETARCH -EXPOSE 11434 -ENV OLLAMA_HOST 0.0.0.0 +FROM ubuntu:20.04 +RUN apt-get update \ + && apt-get install -y ca-certificates \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* +COPY --from=archive /bin/ /usr/bin/ ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 +COPY --from=archive /lib/ollama/ /usr/lib/ollama/ +ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/ollama ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_VISIBLE_DEVICES=all - +ENV OLLAMA_HOST=0.0.0.0:11434 +EXPOSE 11434 ENTRYPOINT ["/bin/ollama"] CMD ["serve"] diff --git a/Makefile b/Makefile deleted file mode 100644 index 383354ee9..000000000 --- a/Makefile +++ /dev/null @@ -1,103 +0,0 @@ -# top level makefile for Ollama -include make/common-defs.make - - -# Determine which if any GPU runners we should build -include make/cuda-v11-defs.make -include make/cuda-v12-defs.make -include make/rocm-defs.make - -ifeq ($(CUSTOM_CPU_FLAGS),) -ifeq ($(ARCH),amd64) - RUNNER_TARGETS=cpu -endif -# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present -ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),) -ifneq ($(CUDA_11_COMPILER),) - RUNNER_TARGETS += cuda_v11 -endif -ifneq ($(CUDA_12_COMPILER),) - RUNNER_TARGETS += cuda_v12 -endif -endif -else # CUSTOM_CPU_FLAGS is set, we'll build only the latest cuda version detected -ifneq ($(CUDA_12_COMPILER),) - RUNNER_TARGETS += cuda_v12 -else ifneq ($(CUDA_11_COMPILER),) - RUNNER_TARGETS += cuda_v11 -endif -endif - -ifeq ($(OLLAMA_SKIP_ROCM_GENERATE),) -ifneq ($(HIP_COMPILER),) - RUNNER_TARGETS += rocm -endif -endif - - -all: runners exe - -dist: $(addprefix dist_, $(RUNNER_TARGETS)) dist_exe - -dist_%: - @$(MAKE) --no-print-directory -f make/Makefile.$* dist - -runners: $(RUNNER_TARGETS) - -$(RUNNER_TARGETS): - @$(MAKE) --no-print-directory -f make/Makefile.$@ - -exe dist_exe: - @$(MAKE) --no-print-directory -f make/Makefile.ollama $@ - -help-sync apply-patches create-patches sync sync-clean: - @$(MAKE) --no-print-directory -f make/Makefile.sync $@ - -test integration lint: - @$(MAKE) --no-print-directory -f make/Makefile.test $@ - -clean: - rm -rf $(BUILD_DIR) $(DIST_LIB_DIR) $(OLLAMA_EXE) $(DIST_OLLAMA_EXE) - go clean -cache - -help: - @echo "The following make targets will help you build Ollama" - @echo "" - @echo " make all # (default target) Build Ollama llm subprocess runners, and the primary ollama executable" - @echo " make runners # Build Ollama llm subprocess runners; after you may use 'go build .' to build the primary ollama exectuable" - @echo " make # Build specific runners. Enabled: '$(RUNNER_TARGETS)'" - @echo " make dist # Build the runners and primary ollama executable for distribution" - @echo " make help-sync # Help information on vendor update targets" - @echo " make help-runners # Help information on runner targets" - @echo "" - @echo "The following make targets will help you test Ollama" - @echo "" - @echo " make test # Run unit tests" - @echo " make integration # Run integration tests. You must 'make all' first" - @echo " make lint # Run lint and style tests" - @echo "" - @echo "For more information see 'docs/development.md'" - @echo "" - - -help-runners: - @echo "The following runners will be built based on discovered GPU libraries: '$(RUNNER_TARGETS)'" - @echo "" - @echo "GPU Runner CPU Flags: '$(GPU_RUNNER_CPU_FLAGS)' (Override with CUSTOM_CPU_FLAGS)" - @echo "" - @echo "# CUDA_PATH sets the location where CUDA toolkits are present" - @echo "CUDA_PATH=$(CUDA_PATH)" - @echo " CUDA_11_PATH=$(CUDA_11_PATH)" - @echo " CUDA_11_COMPILER=$(CUDA_11_COMPILER)" - @echo " CUDA_12_PATH=$(CUDA_12_PATH)" - @echo " CUDA_12_COMPILER=$(CUDA_12_COMPILER)" - @echo "" - @echo "# HIP_PATH sets the location where the ROCm toolkit is present" - @echo "HIP_PATH=$(HIP_PATH)" - @echo " HIP_COMPILER=$(HIP_COMPILER)" - -.PHONY: all exe dist help help-sync help-runners test integration lint runners clean $(RUNNER_TARGETS) - -# Handy debugging for make variables -print-%: - @echo '$*=$($*)' diff --git a/Makefile2 b/Makefile2 new file mode 100644 index 000000000..b4610e590 --- /dev/null +++ b/Makefile2 @@ -0,0 +1,46 @@ +UPSTREAM=https://github.com/ggerganov/llama.cpp.git +WORKDIR=llama/vendor +FETCH_HEAD=46e3556e01b824e52395fb050b29804b6cff2a7c + +all: sync + +.PHONY: sync +sync: llama/llama.cpp ml/backend/ggml/ggml + +.PHONY: llama/llama.cpp +llama/llama.cpp: llama/vendor/ apply_patches + rsync -arvzc -f "merge $@/.rsync-filter" $< $@ + +.PHONY: ml/backend/ggml/ggml apply_patches +ml/backend/ggml/ggml: llama/vendor/ggml/ apply_patches + rsync -arvzc -f "merge $@/.rsync-filter" $< $@ + +PATCHES=$(wildcard llama/patches/*.patch) + +.PHONY: apply_patches +.NOTPARALLEL: +apply_patches: $(addsuffix ed, $(PATCHES)) + +%.patched: %.patch + @if git -c user.name=nobody -c 'user.email=<>' -C $(WORKDIR) am -3 $(realpath $<); then touch $@; else git -C $(WORKDIR) am --abort; exit 1; fi + +.PHONY: checkout +checkout: $(WORKDIR) + git -C $(WORKDIR) fetch + git -C $(WORKDIR) checkout -f $(FETCH_HEAD) + +$(WORKDIR): + git clone $(UPSTREAM) $(WORKDIR) + +.PHONE: format_patches +format_patches: llama/patches + git -C $(WORKDIR) format-patch \ + --no-signature \ + --no-numbered \ + --zero-commit \ + -o $(realpath $<) \ + $(FETCH_HEAD) + +.PHONE: clean +clean: checkout + $(RM) $(addsuffix ed, $(PATCHES)) diff --git a/go.mod b/go.mod index 1a1fdb408..1c99c0946 100644 --- a/go.mod +++ b/go.mod @@ -17,12 +17,14 @@ require ( require ( github.com/agnivade/levenshtein v1.1.1 github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1 + github.com/dlclark/regexp2 v1.11.4 github.com/emirpasic/gods/v2 v2.0.0-alpha github.com/google/go-cmp v0.6.0 github.com/mattn/go-runewidth v0.0.14 github.com/nlpodyssey/gopickle v0.3.0 github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c golang.org/x/image v0.22.0 + gonum.org/v1/gonum v0.15.0 ) require ( @@ -42,7 +44,6 @@ require ( github.com/xtgo/set v1.0.0 // indirect go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect - gonum.org/v1/gonum v0.15.0 // indirect gorgonia.org/vecf32 v0.9.0 // indirect gorgonia.org/vecf64 v0.9.0 // indirect ) diff --git a/go.sum b/go.sum index 6a2c91896..8eb8d84ab 100644 --- a/go.sum +++ b/go.sum @@ -42,6 +42,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g= github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= +github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo= +github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/emirpasic/gods/v2 v2.0.0-alpha h1:dwFlh8pBg1VMOXWGipNMRt8v96dKAIvBehtCt6OtunU= github.com/emirpasic/gods/v2 v2.0.0-alpha/go.mod h1:W0y4M2dtBB9U5z3YlghmpuUhiaZT2h6yoeE+C1sCp6A= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= diff --git a/llama/README.md b/llama/README.md index 3b6b20672..a1fef48c2 100644 --- a/llama/README.md +++ b/llama/README.md @@ -37,8 +37,7 @@ go build -tags avx . ```shell # go doesn't recognize `-mfma` as a valid compiler flag # see https://github.com/golang/go/issues/17895 -go env -w "CGO_CFLAGS_ALLOW=-mfma|-mf16c" -go env -w "CGO_CXXFLAGS_ALLOW=-mfma|-mf16c" +go env -w "CGO_CPPFLAGS_ALLOW=-mfma|-mf16c" go build -tags=avx,avx2 . ``` diff --git a/llama/amx.h b/llama/amx.h deleted file mode 100644 index 5b64b8bd8..000000000 --- a/llama/amx.h +++ /dev/null @@ -1,34 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ggml-backend.h" -#include "ggml-cpu-impl.h" - -// GGML internal header - -#if defined(__AMX_INT8__) && defined(__AVX512VNNI__) -ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void); -#endif diff --git a/llama/ggml-blas.h b/llama/ggml-blas.h deleted file mode 100644 index f5fb9de21..000000000 --- a/llama/ggml-blas.h +++ /dev/null @@ -1,51 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include "ggml.h" -#include "ggml-backend.h" - - -#ifdef __cplusplus -extern "C" { -#endif - -// backend API -GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void); - -GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend); - -// number of threads used for conversion to float -// for openblas and blis, this will also set the number of threads used for blas operations -GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads); - -GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void); - - -#ifdef __cplusplus -} -#endif diff --git a/llama/ggml-cpu-aarch64.h b/llama/ggml-cpu-aarch64.h deleted file mode 100644 index 14320735c..000000000 --- a/llama/ggml-cpu-aarch64.h +++ /dev/null @@ -1,34 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include "ggml-cpu-traits.h" -#include "ggml.h" - -// GGML internal header - -ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void); diff --git a/llama/ggml-cpu-traits.h b/llama/ggml-cpu-traits.h deleted file mode 100644 index dcd7855fd..000000000 --- a/llama/ggml-cpu-traits.h +++ /dev/null @@ -1,64 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include "ggml-backend-impl.h" -#include "ggml-cpu-impl.h" -#include "ggml.h" - -#ifdef __cplusplus -# include -extern "C" { -#endif - -// return true if op part of extra "accelerator" -bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op); -bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size); - -#ifdef __cplusplus -} - -namespace ggml::cpu { -// register in tensor->extra -class tensor_traits { - public: - virtual ~tensor_traits(); - virtual bool work_size(int n_threads, const struct ggml_tensor * op, size_t & size) = 0; - virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) = 0; -}; - -class extra_buffer_type { - public: - virtual ~extra_buffer_type(); - virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0; - virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op) = 0; -}; -} // namespace ggml::cpu - -// implemented in ggml-cpu.cpp. -std::vector & ggml_backend_cpu_get_extra_buffers_type(); - -#endif diff --git a/llama/ggml-cuda/acc.cuh b/llama/ggml-cuda/acc.cuh deleted file mode 100644 index 5c12d9066..000000000 --- a/llama/ggml-cuda/acc.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_ACC_BLOCK_SIZE 256 - -void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/arange.cu b/llama/ggml-cuda/arange.cu deleted file mode 100644 index 3b67b3b5f..000000000 --- a/llama/ggml-cuda/arange.cu +++ /dev/null @@ -1,60 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arange.cuh" - -static __global__ void arange_f32(float * dst, const int ne0, const float start, const float step) { - // blockIDx.x: idx of ne0 / BLOCK_SIZE - int nidx = threadIdx.x + blockIdx.x * blockDim.x; - if (nidx >= ne0) { - return; - } - dst[nidx] = start + step * nidx; -} - -static void arange_f32_cuda(float * dst, const int ne0, const float start, const float step, cudaStream_t stream) { - int num_blocks = (ne0 + CUDA_ARANGE_BLOCK_SIZE - 1) / CUDA_ARANGE_BLOCK_SIZE; - arange_f32<<>>(dst, ne0, start, step); -} - -void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - GGML_ASSERT(dst->type == GGML_TYPE_F32); - - float start; - float stop; - float step; - memcpy(&start, (float *)dst->op_params + 0, sizeof(float)); - memcpy(&stop, (float *)dst->op_params + 1, sizeof(float)); - memcpy(&step, (float *)dst->op_params + 2, sizeof(float)); - - int64_t steps = (int64_t)ceil((stop - start) / step); - GGML_ASSERT(ggml_nelements(dst) == steps); - - arange_f32_cuda(dst_d, dst->ne[0], start, step, stream); -} diff --git a/llama/ggml-cuda/arange.cuh b/llama/ggml-cuda/arange.cuh deleted file mode 100644 index 16201546b..000000000 --- a/llama/ggml-cuda/arange.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_ARANGE_BLOCK_SIZE 256 - -void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/argmax.cuh b/llama/ggml-cuda/argmax.cuh deleted file mode 100644 index 805a90d8c..000000000 --- a/llama/ggml-cuda/argmax.cuh +++ /dev/null @@ -1,29 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -void ggml_cuda_argmax(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/argsort.cuh b/llama/ggml-cuda/argsort.cuh deleted file mode 100644 index 0d8427bb1..000000000 --- a/llama/ggml-cuda/argsort.cuh +++ /dev/null @@ -1,29 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/binbcast.cuh b/llama/ggml-cuda/binbcast.cuh deleted file mode 100644 index 3acee0d07..000000000 --- a/llama/ggml-cuda/binbcast.cuh +++ /dev/null @@ -1,35 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst); -void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst); -void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst); -void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst); -void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst); - -void ggml_cuda_op_repeat_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/clamp.cu b/llama/ggml-cuda/clamp.cu deleted file mode 100644 index 2df1076ca..000000000 --- a/llama/ggml-cuda/clamp.cu +++ /dev/null @@ -1,60 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "clamp.cuh" - -static __global__ void clamp_f32(const float * x, float * dst, const float min, const float max, const int k) { - const int i = blockDim.x*blockIdx.x + threadIdx.x; - - if (i >= k) { - return; - } - - dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]); -} - -static void clamp_f32_cuda(const float * x, float * dst, const float min, const float max, const int k, cudaStream_t stream) { - const int num_blocks = (k + CUDA_CLAMP_BLOCK_SIZE - 1) / CUDA_CLAMP_BLOCK_SIZE; - clamp_f32<<>>(x, dst, min, max, k); -} - - -void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * src0 = dst->src[0]; - const float * src0_d = (const float *)src0->data; - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_F32); - - float min; - float max; - memcpy(&min, dst->op_params, sizeof(float)); - memcpy(&max, (float *) dst->op_params + 1, sizeof(float)); - - clamp_f32_cuda(src0_d, dst_d, min, max, ggml_nelements(src0), stream); -} diff --git a/llama/ggml-cuda/clamp.cuh b/llama/ggml-cuda/clamp.cuh deleted file mode 100644 index 3f74a8807..000000000 --- a/llama/ggml-cuda/clamp.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_CLAMP_BLOCK_SIZE 256 - -void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/concat.cuh b/llama/ggml-cuda/concat.cuh deleted file mode 100644 index ba2b67ec1..000000000 --- a/llama/ggml-cuda/concat.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_CONCAT_BLOCK_SIZE 256 - -void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/conv-transpose-1d.cuh b/llama/ggml-cuda/conv-transpose-1d.cuh deleted file mode 100644 index 53c3beefd..000000000 --- a/llama/ggml-cuda/conv-transpose-1d.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_CONV_TRANPOSE_1D_BLOCK_SIZE 256 - -void ggml_cuda_op_conv_transpose_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/convert.cuh b/llama/ggml-cuda/convert.cuh deleted file mode 100644 index 27f949e2a..000000000 --- a/llama/ggml-cuda/convert.cuh +++ /dev/null @@ -1,39 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_DEQUANTIZE_BLOCK_SIZE 256 - -template -using to_t_cuda_t = void (*)(const void * __restrict__ x, T * __restrict__ y, int64_t k, cudaStream_t stream); - -typedef to_t_cuda_t to_fp32_cuda_t; -typedef to_t_cuda_t to_fp16_cuda_t; - -to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type); - -to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type); diff --git a/llama/ggml-cuda/count-equal.cuh b/llama/ggml-cuda/count-equal.cuh deleted file mode 100644 index 922c6288c..000000000 --- a/llama/ggml-cuda/count-equal.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_COUNT_EQUAL_CHUNK_SIZE 128 - -void ggml_cuda_count_equal(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/cpy.cuh b/llama/ggml-cuda/cpy.cuh deleted file mode 100644 index 79496c4cc..000000000 --- a/llama/ggml-cuda/cpy.cuh +++ /dev/null @@ -1,35 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_CPY_BLOCK_SIZE 64 - -void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1); - -void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst); - -void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1); diff --git a/llama/ggml-cuda/cross-entropy-loss.cuh b/llama/ggml-cuda/cross-entropy-loss.cuh deleted file mode 100644 index e816b8df6..000000000 --- a/llama/ggml-cuda/cross-entropy-loss.cuh +++ /dev/null @@ -1,33 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_CROSS_ENTROPY_LOSS_BLOCK_SIZE 256 - -void ggml_cuda_cross_entropy_loss(ggml_backend_cuda_context & ctx, ggml_tensor * dst); - -void ggml_cuda_cross_entropy_loss_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/diagmask.cuh b/llama/ggml-cuda/diagmask.cuh deleted file mode 100644 index 76162837f..000000000 --- a/llama/ggml-cuda/diagmask.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32 - -void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/fattn-tile-f16.cuh b/llama/ggml-cuda/fattn-tile-f16.cuh deleted file mode 100644 index 4a3965ed3..000000000 --- a/llama/ggml-cuda/fattn-tile-f16.cuh +++ /dev/null @@ -1,29 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -void ggml_cuda_flash_attn_ext_tile_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/fattn-tile-f32.cuh b/llama/ggml-cuda/fattn-tile-f32.cuh deleted file mode 100644 index 8a5eef471..000000000 --- a/llama/ggml-cuda/fattn-tile-f32.cuh +++ /dev/null @@ -1,29 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -void ggml_cuda_flash_attn_ext_tile_f32(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/fattn.cuh b/llama/ggml-cuda/fattn.cuh deleted file mode 100644 index 6947118e1..000000000 --- a/llama/ggml-cuda/fattn.cuh +++ /dev/null @@ -1,29 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/getrows.cuh b/llama/ggml-cuda/getrows.cuh deleted file mode 100644 index bbbf482d3..000000000 --- a/llama/ggml-cuda/getrows.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_GET_ROWS_BLOCK_SIZE 256 - -void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/im2col.cuh b/llama/ggml-cuda/im2col.cuh deleted file mode 100644 index 2c64c16b1..000000000 --- a/llama/ggml-cuda/im2col.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_IM2COL_BLOCK_SIZE 256 - -void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/mmv.cuh b/llama/ggml-cuda/mmv.cuh deleted file mode 100644 index fcfc8ea46..000000000 --- a/llama/ggml-cuda/mmv.cuh +++ /dev/null @@ -1,38 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -// maximum number of src0 rows with which to use mul_mat_vec over cuBLAS if FP16 tensor cores are available -#define MMV_MAX_ROWS 512 - -void ggml_cuda_mul_mat_vec(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); - -void ggml_cuda_op_mul_mat_vec( - ggml_backend_cuda_context & ctx, - const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i, - const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols, - const int64_t src1_padded_row_size, cudaStream_t stream); diff --git a/llama/ggml-cuda/mmvq.cuh b/llama/ggml-cuda/mmvq.cuh deleted file mode 100644 index ae18ae315..000000000 --- a/llama/ggml-cuda/mmvq.cuh +++ /dev/null @@ -1,35 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define MMVQ_MAX_BATCH_SIZE 8 // Max. batch size for which to use MMVQ kernels. - -void ggml_cuda_op_mul_mat_vec_q( - ggml_backend_cuda_context & ctx, - const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i, - const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols, - const int64_t src1_padded_row_size, cudaStream_t stream); diff --git a/llama/ggml-cuda/norm.cuh b/llama/ggml-cuda/norm.cuh deleted file mode 100644 index 0902f23ac..000000000 --- a/llama/ggml-cuda/norm.cuh +++ /dev/null @@ -1,33 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -void ggml_cuda_op_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); - -void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); - -void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/opt-step-adamw.cuh b/llama/ggml-cuda/opt-step-adamw.cuh deleted file mode 100644 index b956bf93a..000000000 --- a/llama/ggml-cuda/opt-step-adamw.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_OPT_STEP_ADAMW_BLOCK_SIZE 256 - -void ggml_cuda_opt_step_adamw(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/out-prod.cuh b/llama/ggml-cuda/out-prod.cuh deleted file mode 100644 index 4631cd65a..000000000 --- a/llama/ggml-cuda/out-prod.cuh +++ /dev/null @@ -1,29 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/pad.cuh b/llama/ggml-cuda/pad.cuh deleted file mode 100644 index 9c23680dc..000000000 --- a/llama/ggml-cuda/pad.cuh +++ /dev/null @@ -1,32 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_PAD_BLOCK_SIZE 256 - -void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); -void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/pool2d.cuh b/llama/ggml-cuda/pool2d.cuh deleted file mode 100644 index 9c0045f87..000000000 --- a/llama/ggml-cuda/pool2d.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_POOL2D_BLOCK_SIZE 256 - -void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/quantize.cuh b/llama/ggml-cuda/quantize.cuh deleted file mode 100644 index ee8e2a52b..000000000 --- a/llama/ggml-cuda/quantize.cuh +++ /dev/null @@ -1,50 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include "common.cuh" -#include "mmq.cuh" - -#include - -#define CUDA_QUANTIZE_BLOCK_SIZE 256 -#define CUDA_QUANTIZE_BLOCK_SIZE_MMQ 128 - -static_assert(MATRIX_ROW_PADDING % CUDA_QUANTIZE_BLOCK_SIZE == 0, "Risk of out-of-bounds access."); -static_assert(MATRIX_ROW_PADDING % (4*CUDA_QUANTIZE_BLOCK_SIZE_MMQ) == 0, "Risk of out-of-bounds access."); - -typedef void (*quantize_cuda_t)( - const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded, - const ggml_type type_x, cudaStream_t stream); - -void quantize_row_q8_1_cuda( - const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded, - const ggml_type type_x, cudaStream_t stream); - -void quantize_mmq_q8_1_cuda( - const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded, - const ggml_type type_x, cudaStream_t stream); diff --git a/llama/ggml-cuda/rope.cuh b/llama/ggml-cuda/rope.cuh deleted file mode 100644 index cd5140ce0..000000000 --- a/llama/ggml-cuda/rope.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_ROPE_BLOCK_SIZE 256 - -void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/scale.cu b/llama/ggml-cuda/scale.cu deleted file mode 100644 index b3b38cdf3..000000000 --- a/llama/ggml-cuda/scale.cu +++ /dev/null @@ -1,57 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "scale.cuh" - -static __global__ void scale_f32(const float * x, float * dst, const float scale, const int k) { - const int i = blockDim.x*blockIdx.x + threadIdx.x; - - if (i >= k) { - return; - } - - dst[i] = scale * x[i]; -} - -static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) { - const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE; - scale_f32<<>>(x, dst, scale, k); -} - -void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * src0 = dst->src[0]; - const float * src0_d = (const float *)src0->data; - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_F32); - - float scale; - memcpy(&scale, dst->op_params, sizeof(float)); - - scale_f32_cuda(src0_d, dst_d, scale, ggml_nelements(src0), stream); -} diff --git a/llama/ggml-cuda/scale.cuh b/llama/ggml-cuda/scale.cuh deleted file mode 100644 index ae2ec5af5..000000000 --- a/llama/ggml-cuda/scale.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_SCALE_BLOCK_SIZE 256 - -void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/softmax.cuh b/llama/ggml-cuda/softmax.cuh deleted file mode 100644 index 85459e24e..000000000 --- a/llama/ggml-cuda/softmax.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_SOFT_MAX_BLOCK_SIZE 1024 - -void ggml_cuda_op_soft_max(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/sum.cuh b/llama/ggml-cuda/sum.cuh deleted file mode 100644 index 6883be872..000000000 --- a/llama/ggml-cuda/sum.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream); - -void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/sumrows.cu b/llama/ggml-cuda/sumrows.cu deleted file mode 100644 index fbd3cd874..000000000 --- a/llama/ggml-cuda/sumrows.cu +++ /dev/null @@ -1,65 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "sumrows.cuh" - -static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) { - const int row = blockIdx.x; - const int col = threadIdx.x; - - float sum = 0.0f; - for (int i = col; i < ncols; i += blockDim.x) { - sum += x[row * ncols + i]; - } - - sum = warp_reduce_sum(sum); - - if (col == 0) { - dst[row] = sum; - } -} - -void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) { - const dim3 block_dims(WARP_SIZE, 1, 1); - const dim3 block_nums(nrows, 1, 1); - k_sum_rows_f32<<>>(x, dst, ncols); -} - -void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * src0 = dst->src[0]; - const float * src0_d = (const float *)src0->data; - float * dst_d = (float *)dst->data; - cudaStream_t stream = ctx.stream(); - - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_F32); - GGML_ASSERT(ggml_is_contiguous(src0)); - - const int64_t ncols = src0->ne[0]; - const int64_t nrows = ggml_nrows(src0); - - sum_rows_f32_cuda(src0_d, dst_d, ncols, nrows, stream); -} diff --git a/llama/ggml-cuda/sumrows.cuh b/llama/ggml-cuda/sumrows.cuh deleted file mode 100644 index 204384f51..000000000 --- a/llama/ggml-cuda/sumrows.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream); - -void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu deleted file mode 100644 index 48cdc8f40..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu deleted file mode 100644 index 6aeab0bac..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu deleted file mode 100644 index 2d98ef1a9..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu deleted file mode 100644 index 7fe280e05..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu deleted file mode 100644 index 9835cbfaa..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu deleted file mode 100644 index 45ffa2a8f..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu deleted file mode 100644 index 592287a86..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu deleted file mode 100644 index fe080a734..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu deleted file mode 100644 index 0580444e6..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu deleted file mode 100644 index 5b2650d8a..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu deleted file mode 100644 index 886ba3956..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu deleted file mode 100644 index 789757a80..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu deleted file mode 100644 index a4bfe23ff..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu deleted file mode 100644 index eab22f0d9..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu deleted file mode 100644 index 3301160fb..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu deleted file mode 100644 index aa37c412e..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu deleted file mode 100644 index a2dd8d866..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu deleted file mode 100644 index 709c2de08..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu deleted file mode 100644 index 3279dad9c..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu deleted file mode 100644 index 4e112e133..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu deleted file mode 100644 index 8662359bd..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu deleted file mode 100644 index bc3c70614..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu deleted file mode 100644 index 027c6d944..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu deleted file mode 100644 index 543346290..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu deleted file mode 100644 index 9cdcd1b39..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu deleted file mode 100644 index 258e08b29..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu deleted file mode 100644 index 7c41007a3..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu deleted file mode 100644 index 0296737fe..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu deleted file mode 100644 index f9fdc1976..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu deleted file mode 100644 index 518c67255..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu deleted file mode 100644 index dfb36938d..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu deleted file mode 100644 index 4ae015114..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu deleted file mode 100644 index a69a7acb5..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu deleted file mode 100644 index a46aab8a8..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu deleted file mode 100644 index 3fe4f970a..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu deleted file mode 100644 index 933a5dd7a..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu deleted file mode 100644 index b051c7d1a..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu deleted file mode 100644 index 3a90aba7e..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu deleted file mode 100644 index 3ddad858a..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu deleted file mode 100644 index df3ce0a3c..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu deleted file mode 100644 index 49d2666a4..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu deleted file mode 100644 index 531c87c22..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu deleted file mode 100644 index e747f6e7d..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f16.cuh" - -DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu deleted file mode 100644 index d6097d1cb..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu deleted file mode 100644 index a6bda11fd..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu deleted file mode 100644 index 800ea14f6..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu deleted file mode 100644 index b3bad6b01..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu deleted file mode 100644 index 6a7127ddf..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu deleted file mode 100644 index 62351c23b..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu deleted file mode 100644 index 1b35f1688..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu deleted file mode 100644 index 5c6256810..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu deleted file mode 100644 index 6f70b7404..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu deleted file mode 100644 index d91c6f920..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu deleted file mode 100644 index d206889db..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu deleted file mode 100644 index ae104a61e..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu deleted file mode 100644 index ab2c66bec..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu deleted file mode 100644 index 4b55d39f3..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu deleted file mode 100644 index 1c1065ff3..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu deleted file mode 100644 index b973d1617..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu deleted file mode 100644 index 9b3999e89..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu deleted file mode 100644 index fc7fde303..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu deleted file mode 100644 index b1f482722..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu deleted file mode 100644 index b854659a3..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu deleted file mode 100644 index 35db0d6d6..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu deleted file mode 100644 index cc76b0fb9..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu deleted file mode 100644 index ff9e76dd6..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu deleted file mode 100644 index 4b031d981..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu deleted file mode 100644 index b99bab1e7..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu deleted file mode 100644 index 22e2e6db2..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu deleted file mode 100644 index 95c1984e5..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu deleted file mode 100644 index 65307d393..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu deleted file mode 100644 index ae0ec146b..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu deleted file mode 100644 index 1f420c1d9..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu deleted file mode 100644 index 1d445af3d..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu deleted file mode 100644 index b3a951dc7..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu deleted file mode 100644 index 804c30b20..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu deleted file mode 100644 index 432928a20..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu deleted file mode 100644 index 409f81b06..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu deleted file mode 100644 index 032dab7ff..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu deleted file mode 100644 index 00014a4f3..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu deleted file mode 100644 index 324572636..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu deleted file mode 100644 index e7d49c270..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu deleted file mode 100644 index 8d732548b..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu deleted file mode 100644 index a8e257641..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu deleted file mode 100644 index dabbcd237..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu deleted file mode 100644 index cfbae911d..000000000 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-vec-f32.cuh" - -DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu deleted file mode 100644 index b1bdc1e95..000000000 --- a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +++ /dev/null @@ -1,36 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 16, float); -DECL_FATTN_WMMA_F16_CASE(80, 16, float); -DECL_FATTN_WMMA_F16_CASE(96, 16, float); -DECL_FATTN_WMMA_F16_CASE(112, 16, float); -DECL_FATTN_WMMA_F16_CASE(128, 16, float); -DECL_FATTN_WMMA_F16_CASE(256, 16, float); diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu deleted file mode 100644 index 3151d9d67..000000000 --- a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +++ /dev/null @@ -1,35 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 32, float); -DECL_FATTN_WMMA_F16_CASE(80, 32, float); -DECL_FATTN_WMMA_F16_CASE(96, 32, float); -DECL_FATTN_WMMA_F16_CASE(112, 32, float); -DECL_FATTN_WMMA_F16_CASE(128, 32, float); diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu deleted file mode 100644 index eea23df92..000000000 --- a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +++ /dev/null @@ -1,36 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 16, half); -DECL_FATTN_WMMA_F16_CASE(80, 16, half); -DECL_FATTN_WMMA_F16_CASE(96, 16, half); -DECL_FATTN_WMMA_F16_CASE(112, 16, half); -DECL_FATTN_WMMA_F16_CASE(128, 16, half); -DECL_FATTN_WMMA_F16_CASE(256, 16, half); diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu deleted file mode 100644 index 70ba3a53b..000000000 --- a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +++ /dev/null @@ -1,36 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 32, half); -DECL_FATTN_WMMA_F16_CASE(80, 32, half); -DECL_FATTN_WMMA_F16_CASE(96, 32, half); -DECL_FATTN_WMMA_F16_CASE(112, 32, half); -DECL_FATTN_WMMA_F16_CASE(128, 32, half); -DECL_FATTN_WMMA_F16_CASE(256, 32, half); diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu deleted file mode 100644 index 3a8261ab0..000000000 --- a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +++ /dev/null @@ -1,34 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 8, half); -DECL_FATTN_WMMA_F16_CASE(96, 8, half); -DECL_FATTN_WMMA_F16_CASE(128, 8, half); -DECL_FATTN_WMMA_F16_CASE(256, 8, half); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq1_s.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq1_s.cu deleted file mode 100644 index f39436687..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_IQ1_S); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq2_s.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq2_s.cu deleted file mode 100644 index 086ab539f..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_IQ2_S); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu deleted file mode 100644 index 6af7aa320..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_IQ2_XS); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu deleted file mode 100644 index fc771442a..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_IQ2_XXS); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq3_s.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq3_s.cu deleted file mode 100644 index 5ba22c06d..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_IQ3_S); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu deleted file mode 100644 index 647be438b..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_IQ3_XXS); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu deleted file mode 100644 index b8263fa36..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_IQ4_NL); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu deleted file mode 100644 index 41986b9d6..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_IQ4_XS); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q2_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q2_k.cu deleted file mode 100644 index 023aec760..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-q2_k.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_Q2_K); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q3_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q3_k.cu deleted file mode 100644 index f8bba904d..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-q3_k.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_Q3_K); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q4_0.cu b/llama/ggml-cuda/template-instances/mmq-instance-q4_0.cu deleted file mode 100644 index 425d7a613..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-q4_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_Q4_0); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q4_1.cu b/llama/ggml-cuda/template-instances/mmq-instance-q4_1.cu deleted file mode 100644 index 91bafb73f..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-q4_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_Q4_1); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q4_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q4_k.cu deleted file mode 100644 index a0ad396c1..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-q4_k.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_Q4_K); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q5_0.cu b/llama/ggml-cuda/template-instances/mmq-instance-q5_0.cu deleted file mode 100644 index dc1cbd434..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-q5_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_Q5_0); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q5_1.cu b/llama/ggml-cuda/template-instances/mmq-instance-q5_1.cu deleted file mode 100644 index cc70a445c..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-q5_1.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_Q5_1); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q5_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q5_k.cu deleted file mode 100644 index 3ff67b9f1..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-q5_k.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_Q5_K); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q6_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q6_k.cu deleted file mode 100644 index 1d1ffee9f..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-q6_k.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_Q6_K); diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q8_0.cu b/llama/ggml-cuda/template-instances/mmq-instance-q8_0.cu deleted file mode 100644 index 1a7e0865d..000000000 --- a/llama/ggml-cuda/template-instances/mmq-instance-q8_0.cu +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../mmq.cuh" - -DECL_MMQ_CASE(GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/tsembd.cuh b/llama/ggml-cuda/tsembd.cuh deleted file mode 100644 index 629586504..000000000 --- a/llama/ggml-cuda/tsembd.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_TIMESTEP_EMBEDDING_BLOCK_SIZE 256 - -void ggml_cuda_op_timestep_embedding(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/upscale.cuh b/llama/ggml-cuda/upscale.cuh deleted file mode 100644 index d8bb2ec8d..000000000 --- a/llama/ggml-cuda/upscale.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_UPSCALE_BLOCK_SIZE 256 - -void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/vendors/cuda.h b/llama/ggml-cuda/vendors/cuda.h deleted file mode 100644 index e309dd3f1..000000000 --- a/llama/ggml-cuda/vendors/cuda.h +++ /dev/null @@ -1,41 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include -#include -#include -#include -#include - -#if CUDART_VERSION < 11020 -#define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED -#define CUBLAS_TF32_TENSOR_OP_MATH CUBLAS_TENSOR_OP_MATH -#define CUBLAS_COMPUTE_16F CUDA_R_16F -#define CUBLAS_COMPUTE_32F CUDA_R_32F -#define cublasComputeType_t cudaDataType_t -#endif // CUDART_VERSION < 11020 diff --git a/llama/ggml-cuda/wkv6.cuh b/llama/ggml-cuda/wkv6.cuh deleted file mode 100644 index 270272878..000000000 --- a/llama/ggml-cuda/wkv6.cuh +++ /dev/null @@ -1,31 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.cuh" - -#define CUDA_WKV_BLOCK_SIZE 64 - -void ggml_cuda_op_rwkv_wkv6(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-threading.cpp b/llama/ggml-threading.cpp deleted file mode 100644 index 7559b3366..000000000 --- a/llama/ggml-threading.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ggml-threading.h" -#include - -std::mutex ggml_critical_section_mutex; - -void ggml_critical_section_start() { - ggml_critical_section_mutex.lock(); -} - -void ggml_critical_section_end(void) { - ggml_critical_section_mutex.unlock(); -} diff --git a/llama/ggml-threading.h b/llama/ggml-threading.h deleted file mode 100644 index fe2ce3679..000000000 --- a/llama/ggml-threading.h +++ /dev/null @@ -1,40 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include "ggml.h" - -#ifdef __cplusplus -extern "C" { -#endif - -GGML_API void ggml_critical_section_start(void); -GGML_API void ggml_critical_section_end(void); - -#ifdef __cplusplus -} -#endif diff --git a/llama/json-schema-to-grammar.h b/llama/json-schema-to-grammar.h deleted file mode 100644 index 39b451cab..000000000 --- a/llama/json-schema-to-grammar.h +++ /dev/null @@ -1,34 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include "ggml.h" -// Change JSON_ASSERT from assert() to GGML_ASSERT: -#define JSON_ASSERT GGML_ASSERT -#include "json.hpp" - -std::string json_schema_to_grammar(const nlohmann::ordered_json& schema); diff --git a/llama/llama-cparams.cpp b/llama/llama-cparams.cpp deleted file mode 100644 index 5a5d14cb0..000000000 --- a/llama/llama-cparams.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "llama-cparams.h" diff --git a/llama/llama-cparams.h b/llama/llama-cparams.h deleted file mode 100644 index 74fdb5c54..000000000 --- a/llama/llama-cparams.h +++ /dev/null @@ -1,64 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include "llama.h" - -#include - -struct llama_cparams { - uint32_t n_ctx; // context size used during inference - uint32_t n_batch; - uint32_t n_ubatch; - uint32_t n_seq_max; - int n_threads; // number of threads to use for generation - int n_threads_batch; // number of threads to use for batch processing - - float rope_freq_base; - float rope_freq_scale; - - uint32_t n_ctx_orig_yarn; - // These hyperparameters are not exposed in GGUF, because all - // existing YaRN models use the same values for them. - float yarn_ext_factor; - float yarn_attn_factor; - float yarn_beta_fast; - float yarn_beta_slow; - float defrag_thold; - - bool embeddings; - bool causal_attn; - bool offload_kqv; - bool flash_attn; - bool no_perf; - bool cross_attn; - - enum llama_pooling_type pooling_type; - - ggml_backend_sched_eval_callback cb_eval; - void * cb_eval_user_data; -}; diff --git a/llama/llama-cpp.h b/llama/llama-cpp.h deleted file mode 100644 index a0b7beb43..000000000 --- a/llama/llama-cpp.h +++ /dev/null @@ -1,56 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#ifndef __cplusplus -#error "This header is for C++ only" -#endif - -#include - -#include "llama.h" - -struct llama_model_deleter { - void operator()(llama_model * model) { llama_free_model(model); } -}; - -struct llama_context_deleter { - void operator()(llama_context * context) { llama_free(context); } -}; - -struct llama_sampler_deleter { - void operator()(llama_sampler * sampler) { llama_sampler_free(sampler); } -}; - -struct llama_lora_adapter_deleter { - void operator()(llama_lora_adapter * lora_adapter) { llama_lora_adapter_free(lora_adapter); } -}; - -typedef std::unique_ptr llama_model_ptr; -typedef std::unique_ptr llama_context_ptr; -typedef std::unique_ptr llama_sampler_ptr; -typedef std::unique_ptr llama_lora_adapter_ptr; diff --git a/llama/llama-quant.h b/llama/llama-quant.h deleted file mode 100644 index e60fc6278..000000000 --- a/llama/llama-quant.h +++ /dev/null @@ -1,27 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once diff --git a/llama/llama.cpp/.rsync-filter b/llama/llama.cpp/.rsync-filter new file mode 100644 index 000000000..186e1c12e --- /dev/null +++ b/llama/llama.cpp/.rsync-filter @@ -0,0 +1,22 @@ +protect **/*.go +include common/ +include common/base64.* +include common/common.* +include common/json-schema-to-grammar.* +include common/json.* +include common/log.* +include common/sampling.* +include common/stb_image.* +include include/ +include include/llama.* +include include/llama-*.* +include examples/ +include examples/llava/ +include examples/llava/clip.* +include examples/llava/llava.* +include src/ +include src/llama.* +include src/llama-*.* +include src/unicode-data.* +include src/unicode.* +exclude * diff --git a/llama/llama.cpp/LICENSE b/llama/llama.cpp/LICENSE new file mode 100644 index 000000000..acb96ce78 --- /dev/null +++ b/llama/llama.cpp/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023-2024 The ggml authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/llama/base64.hpp b/llama/llama.cpp/common/base64.hpp similarity index 100% rename from llama/base64.hpp rename to llama/llama.cpp/common/base64.hpp diff --git a/llama/common.cpp b/llama/llama.cpp/common/common.cpp similarity index 98% rename from llama/common.cpp rename to llama/llama.cpp/common/common.cpp index 132de88aa..4bb140ee2 100644 --- a/llama/common.cpp +++ b/llama/llama.cpp/common/common.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #if defined(_MSC_VER) #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING #endif diff --git a/llama/llama.cpp/common/common.go b/llama/llama.cpp/common/common.go new file mode 100644 index 000000000..ebbb738f2 --- /dev/null +++ b/llama/llama.cpp/common/common.go @@ -0,0 +1,6 @@ +package common + +// #cgo CXXFLAGS: -std=c++11 +// #cgo CPPFLAGS: -I${SRCDIR}/../include +// #cgo CPPFLAGS: -I${SRCDIR}/../../../ml/backend/ggml/ggml/include +import "C" diff --git a/llama/common.h b/llama/llama.cpp/common/common.h similarity index 95% rename from llama/common.h rename to llama/llama.cpp/common/common.h index db931490c..0d452cf0f 100644 --- a/llama/common.h +++ b/llama/llama.cpp/common/common.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - // Various helper functions and utilities #pragma once diff --git a/llama/json-schema-to-grammar.cpp b/llama/llama.cpp/common/json-schema-to-grammar.cpp similarity index 97% rename from llama/json-schema-to-grammar.cpp rename to llama/llama.cpp/common/json-schema-to-grammar.cpp index cc870f9f3..2a8dbd22d 100644 --- a/llama/json-schema-to-grammar.cpp +++ b/llama/llama.cpp/common/json-schema-to-grammar.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "json-schema-to-grammar.h" #include #include diff --git a/llama/llama.cpp/common/json-schema-to-grammar.h b/llama/llama.cpp/common/json-schema-to-grammar.h new file mode 100644 index 000000000..41623b346 --- /dev/null +++ b/llama/llama.cpp/common/json-schema-to-grammar.h @@ -0,0 +1,8 @@ +#pragma once + +#include "ggml.h" +// Change JSON_ASSERT from assert() to GGML_ASSERT: +#define JSON_ASSERT GGML_ASSERT +#include "json.hpp" + +std::string json_schema_to_grammar(const nlohmann::ordered_json& schema); diff --git a/llama/json.hpp b/llama/llama.cpp/common/json.hpp similarity index 100% rename from llama/json.hpp rename to llama/llama.cpp/common/json.hpp diff --git a/llama/log.cpp b/llama/llama.cpp/common/log.cpp similarity index 89% rename from llama/log.cpp rename to llama/llama.cpp/common/log.cpp index 959f353ad..04c7c0ed1 100644 --- a/llama/log.cpp +++ b/llama/llama.cpp/common/log.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "log.h" #include diff --git a/llama/log.h b/llama/llama.cpp/common/log.h similarity index 77% rename from llama/log.h rename to llama/llama.cpp/common/log.h index 14deeb15c..66605cc69 100644 --- a/llama/log.h +++ b/llama/llama.cpp/common/log.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "ggml.h" // for ggml_log_level diff --git a/llama/sampling.cpp b/llama/llama.cpp/common/sampling.cpp similarity index 93% rename from llama/sampling.cpp rename to llama/llama.cpp/common/sampling.cpp index b4b72e281..e83a971c7 100644 --- a/llama/sampling.cpp +++ b/llama/llama.cpp/common/sampling.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "sampling.h" #include "common.h" diff --git a/llama/sampling.h b/llama/llama.cpp/common/sampling.h similarity index 78% rename from llama/sampling.h rename to llama/llama.cpp/common/sampling.h index 58f409036..348911b18 100644 --- a/llama/sampling.h +++ b/llama/llama.cpp/common/sampling.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama.h" diff --git a/llama/stb_image.h b/llama/llama.cpp/common/stb_image.h similarity index 100% rename from llama/stb_image.h rename to llama/llama.cpp/common/stb_image.h diff --git a/llama/clip.cpp b/llama/llama.cpp/examples/llava/clip.cpp similarity index 98% rename from llama/clip.cpp rename to llama/llama.cpp/examples/llava/clip.cpp index d8cb50938..718052e16 100644 --- a/llama/clip.cpp +++ b/llama/llama.cpp/examples/llava/clip.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - // NOTE: This is modified from clip.cpp only for LLaVA, // so there might be still unnecessary artifacts hanging around // I'll gradually clean and extend it diff --git a/llama/clip.h b/llama/llama.cpp/examples/llava/clip.h similarity index 74% rename from llama/clip.h rename to llama/llama.cpp/examples/llava/clip.h index 42f24bd6c..1603edd26 100644 --- a/llama/clip.h +++ b/llama/llama.cpp/examples/llava/clip.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #ifndef CLIP_H #define CLIP_H diff --git a/llama/llava.cpp b/llama/llama.cpp/examples/llava/llava.cpp similarity index 95% rename from llama/llava.cpp rename to llama/llama.cpp/examples/llava/llava.cpp index 15393e2d9..0f0f3f623 100644 --- a/llama/llava.cpp +++ b/llama/llama.cpp/examples/llava/llava.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "clip.h" #include "llava.h" diff --git a/llama/llama.cpp/examples/llava/llava.go b/llama/llama.cpp/examples/llava/llava.go new file mode 100644 index 000000000..37b031cb7 --- /dev/null +++ b/llama/llama.cpp/examples/llava/llava.go @@ -0,0 +1,6 @@ +package llava + +// #cgo CXXFLAGS: -std=c++11 +// #cgo CPPFLAGS: -I${SRCDIR}/../../include -I${SRCDIR}/../../common +// #cgo CPPFLAGS: -I${SRCDIR}/../../../../ml/backend/ggml/ggml/include +import "C" diff --git a/llama/llava.h b/llama/llama.cpp/examples/llava/llava.h similarity index 59% rename from llama/llava.h rename to llama/llama.cpp/examples/llava/llava.h index 7e8e501f0..b6feb3027 100644 --- a/llama/llava.h +++ b/llama/llama.cpp/examples/llava/llava.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #ifndef LLAVA_H #define LLAVA_H diff --git a/llama/llama.cpp/include/llama-cpp.h b/llama/llama.cpp/include/llama-cpp.h new file mode 100644 index 000000000..1500cb2fc --- /dev/null +++ b/llama/llama.cpp/include/llama-cpp.h @@ -0,0 +1,30 @@ +#pragma once + +#ifndef __cplusplus +#error "This header is for C++ only" +#endif + +#include + +#include "llama.h" + +struct llama_model_deleter { + void operator()(llama_model * model) { llama_free_model(model); } +}; + +struct llama_context_deleter { + void operator()(llama_context * context) { llama_free(context); } +}; + +struct llama_sampler_deleter { + void operator()(llama_sampler * sampler) { llama_sampler_free(sampler); } +}; + +struct llama_lora_adapter_deleter { + void operator()(llama_lora_adapter * lora_adapter) { llama_lora_adapter_free(lora_adapter); } +}; + +typedef std::unique_ptr llama_model_ptr; +typedef std::unique_ptr llama_context_ptr; +typedef std::unique_ptr llama_sampler_ptr; +typedef std::unique_ptr llama_lora_adapter_ptr; diff --git a/llama/llama.h b/llama/llama.cpp/include/llama.h similarity index 98% rename from llama/llama.h rename to llama/llama.cpp/include/llama.h index 164d3b6fe..9f411960e 100644 --- a/llama/llama.h +++ b/llama/llama.cpp/include/llama.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #ifndef LLAMA_H #define LLAMA_H diff --git a/llama/llama-adapter.cpp b/llama/llama.cpp/src/llama-adapter.cpp similarity index 90% rename from llama/llama-adapter.cpp rename to llama/llama.cpp/src/llama-adapter.cpp index 02a48f3fc..9fd7edea3 100644 --- a/llama/llama-adapter.cpp +++ b/llama/llama.cpp/src/llama-adapter.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-adapter.h" #include "llama-model.h" diff --git a/llama/llama-adapter.h b/llama/llama.cpp/src/llama-adapter.h similarity index 55% rename from llama/llama-adapter.h rename to llama/llama.cpp/src/llama-adapter.h index 1bf860d7f..5f1870cc8 100644 --- a/llama/llama-adapter.h +++ b/llama/llama.cpp/src/llama-adapter.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama-impl.h" diff --git a/llama/llama-arch.cpp b/llama/llama.cpp/src/llama-arch.cpp similarity index 98% rename from llama/llama-arch.cpp rename to llama/llama.cpp/src/llama-arch.cpp index a6cc790e4..b35aeb315 100644 --- a/llama/llama-arch.cpp +++ b/llama/llama.cpp/src/llama-arch.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-arch.h" #include "llama-impl.h" diff --git a/llama/llama-arch.h b/llama/llama.cpp/src/llama-arch.h similarity index 89% rename from llama/llama-arch.h rename to llama/llama.cpp/src/llama-arch.h index fa8422a89..e8235ae00 100644 --- a/llama/llama-arch.h +++ b/llama/llama.cpp/src/llama-arch.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "ggml.h" // ggml_op diff --git a/llama/llama-batch.cpp b/llama/llama.cpp/src/llama-batch.cpp similarity index 91% rename from llama/llama-batch.cpp rename to llama/llama.cpp/src/llama-batch.cpp index 0e0488c3c..8682b0e68 100644 --- a/llama/llama-batch.cpp +++ b/llama/llama.cpp/src/llama-batch.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-batch.h" #include diff --git a/llama/llama-batch.h b/llama/llama.cpp/src/llama-batch.h similarity index 67% rename from llama/llama-batch.h rename to llama/llama.cpp/src/llama-batch.h index eb439c3d0..773c3808b 100644 --- a/llama/llama-batch.h +++ b/llama/llama.cpp/src/llama-batch.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama.h" diff --git a/llama/llama-chat.cpp b/llama/llama.cpp/src/llama-chat.cpp similarity index 95% rename from llama/llama-chat.cpp rename to llama/llama.cpp/src/llama-chat.cpp index 099b33422..44670d3d8 100644 --- a/llama/llama-chat.cpp +++ b/llama/llama.cpp/src/llama-chat.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-chat.h" #include "llama.h" diff --git a/llama/llama-chat.h b/llama/llama.cpp/src/llama-chat.h similarity index 54% rename from llama/llama-chat.h rename to llama/llama.cpp/src/llama-chat.h index deabed71d..b8e94d9ef 100644 --- a/llama/llama-chat.h +++ b/llama/llama.cpp/src/llama-chat.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include diff --git a/llama/llama-context.cpp b/llama/llama.cpp/src/llama-context.cpp similarity index 98% rename from llama/llama-context.cpp rename to llama/llama.cpp/src/llama-context.cpp index 91bfd13f7..9d0e7ca36 100644 --- a/llama/llama-context.cpp +++ b/llama/llama.cpp/src/llama-context.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-context.h" #include diff --git a/llama/llama-context.h b/llama/llama.cpp/src/llama-context.h similarity index 80% rename from llama/llama-context.h rename to llama/llama.cpp/src/llama-context.h index 643033946..4980a60e8 100644 --- a/llama/llama-context.h +++ b/llama/llama.cpp/src/llama-context.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama.h" diff --git a/llama/llama.cpp/src/llama-cparams.cpp b/llama/llama.cpp/src/llama-cparams.cpp new file mode 100644 index 000000000..28369be36 --- /dev/null +++ b/llama/llama.cpp/src/llama-cparams.cpp @@ -0,0 +1 @@ +#include "llama-cparams.h" diff --git a/llama/llama.cpp/src/llama-cparams.h b/llama/llama.cpp/src/llama-cparams.h new file mode 100644 index 000000000..9681e5a08 --- /dev/null +++ b/llama/llama.cpp/src/llama-cparams.h @@ -0,0 +1,38 @@ +#pragma once + +#include "llama.h" + +#include + +struct llama_cparams { + uint32_t n_ctx; // context size used during inference + uint32_t n_batch; + uint32_t n_ubatch; + uint32_t n_seq_max; + int n_threads; // number of threads to use for generation + int n_threads_batch; // number of threads to use for batch processing + + float rope_freq_base; + float rope_freq_scale; + + uint32_t n_ctx_orig_yarn; + // These hyperparameters are not exposed in GGUF, because all + // existing YaRN models use the same values for them. + float yarn_ext_factor; + float yarn_attn_factor; + float yarn_beta_fast; + float yarn_beta_slow; + float defrag_thold; + + bool embeddings; + bool causal_attn; + bool offload_kqv; + bool flash_attn; + bool no_perf; + bool cross_attn; + + enum llama_pooling_type pooling_type; + + ggml_backend_sched_eval_callback cb_eval; + void * cb_eval_user_data; +}; diff --git a/llama/llama-grammar.cpp b/llama/llama.cpp/src/llama-grammar.cpp similarity index 97% rename from llama/llama-grammar.cpp rename to llama/llama.cpp/src/llama-grammar.cpp index 243cb452c..186dc9a25 100644 --- a/llama/llama-grammar.cpp +++ b/llama/llama.cpp/src/llama-grammar.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-grammar.h" #include "llama-impl.h" diff --git a/llama/llama-grammar.h b/llama/llama.cpp/src/llama-grammar.h similarity index 78% rename from llama/llama-grammar.h rename to llama/llama.cpp/src/llama-grammar.h index 41811c742..f8b40c651 100644 --- a/llama/llama-grammar.h +++ b/llama/llama.cpp/src/llama-grammar.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama.h" diff --git a/llama/llama-hparams.cpp b/llama/llama.cpp/src/llama-hparams.cpp similarity index 61% rename from llama/llama-hparams.cpp rename to llama/llama.cpp/src/llama-hparams.cpp index d47225e76..42f8a58ff 100644 --- a/llama/llama-hparams.cpp +++ b/llama/llama.cpp/src/llama-hparams.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-hparams.h" #include "ggml.h" diff --git a/llama/llama-hparams.h b/llama/llama.cpp/src/llama-hparams.h similarity index 78% rename from llama/llama-hparams.h rename to llama/llama.cpp/src/llama-hparams.h index b2d4bd614..f826cd9ac 100644 --- a/llama/llama-hparams.h +++ b/llama/llama.cpp/src/llama-hparams.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama.h" diff --git a/llama/llama-impl.cpp b/llama/llama.cpp/src/llama-impl.cpp similarity index 82% rename from llama/llama-impl.cpp rename to llama/llama.cpp/src/llama-impl.cpp index de726cb21..a05ba4f63 100644 --- a/llama/llama-impl.cpp +++ b/llama/llama.cpp/src/llama-impl.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-impl.h" #include "llama.h" diff --git a/llama/llama-impl.h b/llama/llama.cpp/src/llama-impl.h similarity index 58% rename from llama/llama-impl.h rename to llama/llama.cpp/src/llama-impl.h index c9ae33f4a..12d1fb082 100644 --- a/llama/llama-impl.h +++ b/llama/llama.cpp/src/llama-impl.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "ggml.h" // for ggml_log_level diff --git a/llama/llama-kv-cache.cpp b/llama/llama.cpp/src/llama-kv-cache.cpp similarity index 95% rename from llama/llama-kv-cache.cpp rename to llama/llama.cpp/src/llama-kv-cache.cpp index aa555e652..cf814dbe5 100644 --- a/llama/llama-kv-cache.cpp +++ b/llama/llama.cpp/src/llama-kv-cache.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-kv-cache.h" #include "llama-impl.h" diff --git a/llama/llama-kv-cache.h b/llama/llama.cpp/src/llama-kv-cache.h similarity index 84% rename from llama/llama-kv-cache.h rename to llama/llama.cpp/src/llama-kv-cache.h index a4d65611a..dca6f3998 100644 --- a/llama/llama-kv-cache.h +++ b/llama/llama.cpp/src/llama-kv-cache.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama.h" diff --git a/llama/llama-mmap.cpp b/llama/llama.cpp/src/llama-mmap.cpp similarity index 93% rename from llama/llama-mmap.cpp rename to llama/llama.cpp/src/llama-mmap.cpp index 3868e9dd8..a99326335 100644 --- a/llama/llama-mmap.cpp +++ b/llama/llama.cpp/src/llama-mmap.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-mmap.h" #include "llama-impl.h" diff --git a/llama/llama-mmap.h b/llama/llama.cpp/src/llama-mmap.h similarity index 52% rename from llama/llama-mmap.h rename to llama/llama.cpp/src/llama-mmap.h index ebd7dc16e..6bcddee8c 100644 --- a/llama/llama-mmap.h +++ b/llama/llama.cpp/src/llama-mmap.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include diff --git a/llama/llama-model-loader.cpp b/llama/llama.cpp/src/llama-model-loader.cpp similarity index 97% rename from llama/llama-model-loader.cpp rename to llama/llama.cpp/src/llama-model-loader.cpp index ebb369e49..b12d65666 100644 --- a/llama/llama-model-loader.cpp +++ b/llama/llama.cpp/src/llama-model-loader.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-model-loader.h" #include "ggml.h" diff --git a/llama/llama-model-loader.h b/llama/llama.cpp/src/llama-model-loader.h similarity index 81% rename from llama/llama-model-loader.h rename to llama/llama.cpp/src/llama-model-loader.h index 873d4c0c5..1ec478195 100644 --- a/llama/llama-model-loader.h +++ b/llama/llama.cpp/src/llama-model-loader.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama.h" diff --git a/llama/llama-model.cpp b/llama/llama.cpp/src/llama-model.cpp similarity index 98% rename from llama/llama-model.cpp rename to llama/llama.cpp/src/llama-model.cpp index 2482f98a3..4f9bbf90e 100644 --- a/llama/llama-model.cpp +++ b/llama/llama.cpp/src/llama-model.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-model.h" #include "llama-impl.h" diff --git a/llama/llama-model.h b/llama/llama.cpp/src/llama-model.h similarity index 91% rename from llama/llama-model.h rename to llama/llama.cpp/src/llama-model.h index 756b09f42..5b23e2baf 100644 --- a/llama/llama-model.h +++ b/llama/llama.cpp/src/llama-model.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama.h" diff --git a/llama/llama-quant.cpp b/llama/llama.cpp/src/llama-quant.cpp similarity index 97% rename from llama/llama-quant.cpp rename to llama/llama.cpp/src/llama-quant.cpp index 6b4d288b0..27def6fd9 100644 --- a/llama/llama-quant.cpp +++ b/llama/llama.cpp/src/llama-quant.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-quant.h" #include "llama-impl.h" diff --git a/llama/llama.cpp/src/llama-quant.h b/llama/llama.cpp/src/llama-quant.h new file mode 100644 index 000000000..6f70f09be --- /dev/null +++ b/llama/llama.cpp/src/llama-quant.h @@ -0,0 +1 @@ +#pragma once diff --git a/llama/llama-sampling.cpp b/llama/llama.cpp/src/llama-sampling.cpp similarity index 98% rename from llama/llama-sampling.cpp rename to llama/llama.cpp/src/llama-sampling.cpp index 1071efdca..69cea2f14 100644 --- a/llama/llama-sampling.cpp +++ b/llama/llama.cpp/src/llama-sampling.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-sampling.h" #include "llama-impl.h" diff --git a/llama/llama-sampling.h b/llama/llama.cpp/src/llama-sampling.h similarity index 54% rename from llama/llama-sampling.h rename to llama/llama.cpp/src/llama-sampling.h index 10a7878f3..919f6fdfc 100644 --- a/llama/llama-sampling.h +++ b/llama/llama.cpp/src/llama-sampling.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once // TODO: rename llama-sampling.h/.cpp to llama-sampler.h/.cpp ? diff --git a/llama/llama-vocab.cpp b/llama/llama.cpp/src/llama-vocab.cpp similarity index 98% rename from llama/llama-vocab.cpp rename to llama/llama.cpp/src/llama-vocab.cpp index 7f9f699ac..8f44705ab 100644 --- a/llama/llama-vocab.cpp +++ b/llama/llama.cpp/src/llama-vocab.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-vocab.h" #include "llama-impl.h" diff --git a/llama/llama-vocab.h b/llama/llama.cpp/src/llama-vocab.h similarity index 84% rename from llama/llama-vocab.h rename to llama/llama.cpp/src/llama-vocab.h index 81b14fff4..0d00086da 100644 --- a/llama/llama-vocab.h +++ b/llama/llama.cpp/src/llama-vocab.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "llama.h" diff --git a/llama/llama.cpp b/llama/llama.cpp/src/llama.cpp similarity index 99% rename from llama/llama.cpp rename to llama/llama.cpp/src/llama.cpp index 9b123fce8..c95da45d3 100644 --- a/llama/llama.cpp +++ b/llama/llama.cpp/src/llama.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "llama-impl.h" #include "llama-chat.h" diff --git a/llama/llama.cpp/src/llama.go b/llama/llama.cpp/src/llama.go new file mode 100644 index 000000000..ddbd53785 --- /dev/null +++ b/llama/llama.cpp/src/llama.go @@ -0,0 +1,8 @@ +package llama + +// #cgo CXXFLAGS: -std=c++17 +// #cgo CPPFLAGS: -I${SRCDIR}/../include +// #cgo CPPFLAGS: -I${SRCDIR}/../../../ml/backend/ggml/ggml/include +// #cgo windows CPPFLAGS: -D_WIN32_WINNT=0x0602 +import "C" +import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src" diff --git a/llama/unicode-data.cpp b/llama/llama.cpp/src/unicode-data.cpp similarity index 99% rename from llama/unicode-data.cpp rename to llama/llama.cpp/src/unicode-data.cpp index 393cd273b..04dcd7fcf 100644 --- a/llama/unicode-data.cpp +++ b/llama/llama.cpp/src/unicode-data.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - // generated with scripts/gen-unicode-data.py #include "unicode-data.h" diff --git a/llama/llama.cpp/src/unicode-data.h b/llama/llama.cpp/src/unicode-data.h new file mode 100644 index 000000000..f6973ebd2 --- /dev/null +++ b/llama/llama.cpp/src/unicode-data.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include +#include +#include + +struct range_nfd { + uint32_t first; + uint32_t last; + uint32_t nfd; +}; + +static const uint32_t MAX_CODEPOINTS = 0x110000; + +extern const std::initializer_list> unicode_ranges_flags; +extern const std::unordered_set unicode_set_whitespace; +extern const std::initializer_list> unicode_map_lowercase; +extern const std::initializer_list> unicode_map_uppercase; +extern const std::initializer_list unicode_ranges_nfd; diff --git a/llama/unicode.cpp b/llama/llama.cpp/src/unicode.cpp similarity index 96% rename from llama/unicode.cpp rename to llama/llama.cpp/src/unicode.cpp index 5dcb2e985..6155da800 100644 --- a/llama/unicode.cpp +++ b/llama/llama.cpp/src/unicode.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #if defined(_MSC_VER) #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING #endif diff --git a/llama/unicode.h b/llama/llama.cpp/src/unicode.h similarity index 63% rename from llama/unicode.h rename to llama/llama.cpp/src/unicode.h index b6a99568b..c27098df7 100644 --- a/llama/unicode.h +++ b/llama/llama.cpp/src/unicode.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include diff --git a/llama/llama.go b/llama/llama.go index 18790a95d..1d4513e31 100644 --- a/llama/llama.go +++ b/llama/llama.go @@ -1,70 +1,20 @@ package llama -//go:generate make -j 8 - /* -#cgo CFLAGS: -O3 -std=c17 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64 -#cgo CXXFLAGS: -O3 -std=c++17 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64 -#cgo amd64,avx CFLAGS: -mavx -#cgo amd64,avx CXXFLAGS: -mavx -#cgo amd64,avx2 CFLAGS: -mavx2 -mfma -mf16c -#cgo amd64,avx2 CXXFLAGS: -mavx2 -mfma -mf16c -#cgo amd64,avx512 CFLAGS: -mavx512f -mavx512dq -mavx512bw -#cgo amd64,avx512 CXXFLAGS: -mavx512f -mavx512dq -mavx512bw -#cgo amd64,avx512bf16 CFLAGS: -mavx512bf16 -D__AVX512BF16__ -#cgo amd64,avx512bf16 CXXFLAGS: -mavx512bf16 -D__AVX512BF16__ -#cgo amd64,avx512vbmi CFLAGS: -mavx512vbmi -D__AVX512VBMI__ -#cgo amd64,avx512vbmi CXXFLAGS: -mavx512vbmi -D__AVX512VBMI__ -#cgo amd64,avx512vnni CFLAGS: -mavx512vnni -D__AVX512VNNI__ -#cgo amd64,avx512vnni CXXFLAGS: -mavx512vnni -D__AVX512VNNI__ -#cgo amd64,f16c CFLAGS: -mf16c -#cgo amd64,f16c CXXFLAGS: -mf16c -#cgo amd64,fma CFLAGS: -mfma -#cgo amd64,fma CXXFLAGS: -mfma -#cgo cuda CFLAGS: -fPIE -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1 -#cgo cuda CXXFLAGS: -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1 -#cgo cuda_jetpack5 LDFLAGS: -lggml_cuda_jetpack5 -#cgo cuda_jetpack6 LDFLAGS: -lggml_cuda_jetpack6 -#cgo cuda_v11 LDFLAGS: -lggml_cuda_v11 -#cgo cuda_v12 LDFLAGS: -lggml_cuda_v12 -#cgo darwin,amd64 CFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers -#cgo darwin,amd64 CXXFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers -#cgo darwin,amd64 LDFLAGS: -framework Foundation -#cgo darwin,amd64,avx2 CFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 -#cgo darwin,amd64,avx2 CXXFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 -#cgo darwin,amd64,avx2 LDFLAGS: -framework Accelerate -#cgo darwin,arm64 CFLAGS: -DGGML_USE_METAL -DGGML_USE_ACCELERATE -DGGML_METAL_EMBED_LIBRARY -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE -#cgo darwin,arm64 CXXFLAGS: -DGGML_USE_METAL -DGGML_USE_ACCELERATE -DGGML_METAL_EMBED_LIBRARY -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE -#cgo darwin,arm64 LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework Accelerate -#cgo linux CFLAGS: -D_GNU_SOURCE -#cgo linux CXXFLAGS: -D_GNU_SOURCE -#cgo linux LDFLAGS: -ldl -#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/linux-amd64 -#cgo linux,arm64 CFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA -#cgo linux,arm64 CXXFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA -#cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/linux-arm64 -#cgo linux,arm64,sve CFLAGS: -march=armv8.6-a+sve -#cgo linux,arm64,sve CXXFLAGS: -march=armv8.6-a+sve -#cgo linux,cuda LDFLAGS: -lcuda -lcudart -lcublas -lcublasLt -lpthread -lrt -lresolv -#cgo linux,rocm LDFLAGS: -lpthread -lrt -lresolv -#cgo rocm CFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIP -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1 -#cgo rocm CXXFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIP -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1 -#cgo rocm LDFLAGS: -L${SRCDIR} -lggml_rocm -lhipblas -lamdhip64 -lrocblas -#cgo windows CFLAGS: -Wno-discarded-qualifiers -D_WIN32_WINNT=0x602 -#cgo windows CXXFLAGS: -D_WIN32_WINNT=0x602 -#cgo windows LDFLAGS: -lmsvcrt -static-libstdc++ -static-libgcc -static -#cgo windows,amd64 LDFLAGS: -L${SRCDIR}/build/windows-amd64 -#cgo windows,arm64 CFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA -#cgo windows,arm64 CXXFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA -#cgo windows,arm64 LDFLAGS: -L${SRCDIR}/build/windows-arm64 -#cgo windows,cuda LDFLAGS: -lcuda -lcudart -lcublas -lcublasLt -#cgo windows,rocm LDFLAGS: -lggml_rocm -lhipblas -lamdhip64 -lrocblas +#cgo CFLAGS: -std=c11 +#cgo CXXFLAGS: -std=c++17 +#cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/include +#cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/common +#cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/examples/llava +#cgo CPPFLAGS: -I${SRCDIR}/llama.cpp/src +#cgo CPPFLAGS: -I${SRCDIR}/../ml/backend/ggml/ggml/include #include +#include "ggml.h" #include "llama.h" #include "clip.h" -#include "ggml.h" #include "llava.h" + #include "mllama.h" #include "sampling_ext.h" @@ -96,9 +46,15 @@ import ( "strings" "sync/atomic" "unsafe" + + _ "github.com/ollama/ollama/llama/llama.cpp/common" + _ "github.com/ollama/ollama/llama/llama.cpp/examples/llava" + _ "github.com/ollama/ollama/llama/llama.cpp/src" + "github.com/ollama/ollama/ml/backend/ggml/ggml/src" ) func BackendInit() { + ggml.OnceLoad() C.llama_backend_init() } diff --git a/llama/mmq.h b/llama/mmq.h deleted file mode 100644 index c78d3a1c1..000000000 --- a/llama/mmq.h +++ /dev/null @@ -1,36 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include "common.h" - -size_t ggml_backend_amx_desired_wsize(const struct ggml_tensor * dst); - -size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor); - -void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); - -void ggml_backend_amx_mul_mat(const struct ggml_compute_params * params, struct ggml_tensor * dst); diff --git a/llama/patches/0001-cuda.patch b/llama/patches/0001-cuda.patch index 574654b5f..0bf338f2b 100644 --- a/llama/patches/0001-cuda.patch +++ b/llama/patches/0001-cuda.patch @@ -4,39 +4,44 @@ Date: Thu, 6 Jun 2024 23:55:47 -0700 Subject: [PATCH] cuda --- - ggml/src/ggml-backend.cpp | 5 +++++ - ggml/src/ggml-cuda/ggml-cuda.cu | 4 ++++ - 2 files changed, 9 insertions(+) + ggml/src/ggml-backend.cpp | 1 - + ggml/src/ggml-cuda/ggml-cuda.cu | 1 + + ggml/src/ggml-metal/ggml-metal.m | 1 + + 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp -index e2d6c405..1b62c056 100644 +index e2d6c405..a12172dc 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp -@@ -106,7 +106,12 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) { +@@ -106,7 +106,6 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) { if (buffer->iface.free_buffer != NULL) { buffer->iface.free_buffer(buffer); } -+ -+// TODO: this needs to be freed in cuda and hip backends because -+// the cuda backend implementation compiled with msvc -+#if !defined(GGML_USE_CUDA) && !defined(GGML_USE_HIP) - delete buffer; -+#endif +- delete buffer; } size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) { diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu -index 0b06be72..0a6ae325 100644 +index 0b06be72..be29e979 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu -@@ -424,6 +424,10 @@ struct ggml_backend_cuda_buffer_context { +@@ -424,6 +424,7 @@ struct ggml_backend_cuda_buffer_context { static void ggml_backend_cuda_buffer_free_buffer(ggml_backend_buffer_t buffer) { ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context; delete ctx; -+ -+ // TODO: this needs to be freed in cuda and hipblas backends because -+ // the cuda backend implementation compiled with msvc -+ free(buffer); ++ delete buffer; } static bool ggml_backend_buffer_is_cuda(ggml_backend_buffer_t buffer) { +diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m +index a85502ee..cd8ef741 100644 +--- a/ggml/src/ggml-metal/ggml-metal.m ++++ b/ggml/src/ggml-metal/ggml-metal.m +@@ -4187,6 +4187,7 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer) + } + + free(ctx); ++ free(buffer); + } + + static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) { diff --git a/llama/patches/0006-conditional-fattn.patch b/llama/patches/0006-conditional-fattn.patch index 62c248074..739905780 100644 --- a/llama/patches/0006-conditional-fattn.patch +++ b/llama/patches/0006-conditional-fattn.patch @@ -8,10 +8,10 @@ Subject: [PATCH] conditional-fattn 1 file changed, 2 insertions(+) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu -index 0a6ae325..bb425ee8 100644 +index be29e979..aaa79ea4 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu -@@ -2162,9 +2162,11 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg +@@ -2159,9 +2159,11 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg case GGML_OP_ARGSORT: ggml_cuda_op_argsort(ctx, dst); break; diff --git a/llama/patches/0008-add-mllama-support.patch b/llama/patches/0007-add-mllama-support.patch similarity index 100% rename from llama/patches/0008-add-mllama-support.patch rename to llama/patches/0007-add-mllama-support.patch diff --git a/llama/patches/0007-blas.patch b/llama/patches/0007-blas.patch deleted file mode 100644 index 121a1cd95..000000000 --- a/llama/patches/0007-blas.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Jesse Gross -Date: Mon, 30 Sep 2024 16:31:04 -0700 -Subject: [PATCH] blas - ---- - ggml/src/ggml-blas/ggml-blas.cpp | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/ggml/src/ggml-blas/ggml-blas.cpp b/ggml/src/ggml-blas/ggml-blas.cpp -index ec158dfa..b3ac1fa4 100644 ---- a/ggml/src/ggml-blas/ggml-blas.cpp -+++ b/ggml/src/ggml-blas/ggml-blas.cpp -@@ -1,3 +1,5 @@ -+#ifdef GGML_USE_BLAS -+ - #include "ggml-impl.h" - #include "ggml-blas.h" - #include "ggml-backend-impl.h" -@@ -515,3 +517,5 @@ ggml_backend_reg_t ggml_backend_blas_reg(void) { - } - - GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg) -+ -+#endif // GGML_USE_BLAS -\ No newline at end of file diff --git a/llama/patches/0009-add-unpad-operator.patch b/llama/patches/0008-add-unpad-operator.patch similarity index 97% rename from llama/patches/0009-add-unpad-operator.patch rename to llama/patches/0008-add-unpad-operator.patch index ba857ef0c..fd070df9c 100644 --- a/llama/patches/0009-add-unpad-operator.patch +++ b/llama/patches/0008-add-unpad-operator.patch @@ -126,10 +126,10 @@ index b7fefb9d..b307d554 100644 case GGML_OP_TIMESTEP_EMBEDDING: case GGML_OP_ARGSORT: diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu -index bb425ee8..1e7c2a22 100644 +index aaa79ea4..9286f866 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu -@@ -2085,6 +2085,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg +@@ -2082,6 +2082,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg case GGML_OP_PAD: ggml_cuda_op_pad(ctx, dst); break; @@ -139,7 +139,7 @@ index bb425ee8..1e7c2a22 100644 case GGML_OP_ARANGE: ggml_cuda_op_arange(ctx, dst); break; -@@ -3013,6 +3016,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g +@@ -3010,6 +3013,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g case GGML_OP_GROUP_NORM: case GGML_OP_UPSCALE: case GGML_OP_PAD: @@ -211,10 +211,10 @@ index 8fd386b0..e2ededc3 100644 void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m -index a85502ee..84e027eb 100644 +index cd8ef741..318addec 100644 --- a/ggml/src/ggml-metal/ggml-metal.m +++ b/ggml/src/ggml-metal/ggml-metal.m -@@ -311,6 +311,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte +@@ -311,6 +311,7 @@ enum ggml_metal_kernel_type { GGML_METAL_KERNEL_TYPE_UPSCALE_F32, GGML_METAL_KERNEL_TYPE_PAD_F32, GGML_METAL_KERNEL_TYPE_PAD_REFLECT_1D_F32, @@ -222,7 +222,7 @@ index a85502ee..84e027eb 100644 GGML_METAL_KERNEL_TYPE_ARANGE_F32, GGML_METAL_KERNEL_TYPE_TIMESTEP_EMBEDDING_F32, GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC, -@@ -910,6 +911,7 @@ @implementation GGMLMetalClass +@@ -910,6 +911,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_UPSCALE_F32, upscale_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_PAD_F32, pad_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_PAD_REFLECT_1D_F32, pad_reflect_1d_f32, true); diff --git a/llama/patches/0010-fix-deepseek-deseret-regex.patch b/llama/patches/0009-fix-deepseek-deseret-regex.patch similarity index 100% rename from llama/patches/0010-fix-deepseek-deseret-regex.patch rename to llama/patches/0009-fix-deepseek-deseret-regex.patch diff --git a/llama/patches/0012-Maintain-ordering-for-rules-for-grammar.patch b/llama/patches/0010-Maintain-ordering-for-rules-for-grammar.patch similarity index 100% rename from llama/patches/0012-Maintain-ordering-for-rules-for-grammar.patch rename to llama/patches/0010-Maintain-ordering-for-rules-for-grammar.patch diff --git a/llama/patches/0013-fix-missing-arg-in-static-assert-on-windows.patch b/llama/patches/0011-fix-missing-arg-in-static-assert-on-windows.patch similarity index 100% rename from llama/patches/0013-fix-missing-arg-in-static-assert-on-windows.patch rename to llama/patches/0011-fix-missing-arg-in-static-assert-on-windows.patch diff --git a/llama/patches/0011-relative-include-paths.patch b/llama/patches/0011-relative-include-paths.patch deleted file mode 100644 index c1e56b9cf..000000000 --- a/llama/patches/0011-relative-include-paths.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: jmorganca -Date: Tue, 3 Dec 2024 21:30:51 -0800 -Subject: [PATCH] relative include paths - ---- - ggml/src/ggml-cpu/ggml-cpu.c | 2 +- - ggml/src/ggml-cpu/ggml-cpu.cpp | 3 +-- - ggml/src/ggml-quants.c | 2 +- - 3 files changed, 3 insertions(+), 4 deletions(-) - -diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c -index b307d554..4eb39c52 100644 ---- a/ggml/src/ggml-cpu/ggml-cpu.c -+++ b/ggml/src/ggml-cpu/ggml-cpu.c -@@ -10,7 +10,7 @@ - #include "ggml-quants.h" - #include "ggml-cpu-quants.h" - #include "ggml-threading.h" --#include "amx/amx.h" -+#include "amx.h" - #include "ggml.h" - - #if defined(_MSC_VER) || defined(__MINGW32__) -diff --git a/ggml/src/ggml-cpu/ggml-cpu.cpp b/ggml/src/ggml-cpu/ggml-cpu.cpp -index f11399cc..2a8b40ce 100644 ---- a/ggml/src/ggml-cpu/ggml-cpu.cpp -+++ b/ggml/src/ggml-cpu/ggml-cpu.cpp -@@ -4,8 +4,7 @@ - #include "ggml-cpu-aarch64.h" - #include "ggml-cpu-traits.h" - #include "ggml-impl.h" --#include "amx/amx.h" -- -+#include "amx.h" - #include - #include - #include -diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c -index 7918388a..e2ed84e4 100644 ---- a/ggml/src/ggml-quants.c -+++ b/ggml/src/ggml-quants.c -@@ -3,7 +3,7 @@ - - #include "ggml-quants.h" - #include "ggml-impl.h" --#include "ggml-cpu/ggml-cpu-impl.h" -+#include "ggml-cpu-impl.h" - #include "ggml-cpu.h" - - #include diff --git a/llama/patches/0014-llama-Ensure-KV-cache-is-fully-defragmented.patch b/llama/patches/0012-llama-Ensure-KV-cache-is-fully-defragmented.patch similarity index 100% rename from llama/patches/0014-llama-Ensure-KV-cache-is-fully-defragmented.patch rename to llama/patches/0012-llama-Ensure-KV-cache-is-fully-defragmented.patch diff --git a/llama/patches/0015-re-enable-gpu-for-clip.patch b/llama/patches/0013-re-enable-gpu-for-clip.patch similarity index 100% rename from llama/patches/0015-re-enable-gpu-for-clip.patch rename to llama/patches/0013-re-enable-gpu-for-clip.patch diff --git a/llama/patches/0014-sort-devices-by-score.patch b/llama/patches/0014-sort-devices-by-score.patch new file mode 100644 index 000000000..67c2127a4 --- /dev/null +++ b/llama/patches/0014-sort-devices-by-score.patch @@ -0,0 +1,82 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Tue, 14 Jan 2025 12:01:24 -0800 +Subject: [PATCH] sort devices by score + +--- + ggml/src/ggml-backend-reg.cpp | 21 +++++++++++++-------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp +index 899d16f2..ac5cda07 100644 +--- a/ggml/src/ggml-backend-reg.cpp ++++ b/ggml/src/ggml-backend-reg.cpp +@@ -150,7 +150,7 @@ struct ggml_backend_reg_entry { + + struct ggml_backend_registry { + std::vector backends; +- std::vector devices; ++ std::vector> devices; + + ggml_backend_registry() { + #ifdef GGML_USE_CUDA +@@ -195,7 +195,7 @@ struct ggml_backend_registry { + } + } + +- void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) { ++ void register_backend(ggml_backend_reg_t reg, int score = -1, dl_handle_ptr handle = nullptr) { + if (!reg) { + return; + } +@@ -206,15 +206,15 @@ struct ggml_backend_registry { + #endif + backends.push_back({ reg, std::move(handle) }); + for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) { +- register_device(ggml_backend_reg_dev_get(reg, i)); ++ register_device(ggml_backend_reg_dev_get(reg, i), score); + } + } + +- void register_device(ggml_backend_dev_t device) { ++ void register_device(ggml_backend_dev_t device, int score = -1) { + #ifndef NDEBUG + GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device)); + #endif +- devices.push_back(device); ++ devices.push_back({device, score}); + } + + ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) { +@@ -257,7 +257,7 @@ struct ggml_backend_registry { + + GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str()); + +- register_backend(reg, std::move(handle)); ++ register_backend(reg, score_fn ? score_fn() : -1, std::move(handle)); + + return reg; + } +@@ -280,7 +280,7 @@ struct ggml_backend_registry { + // remove devices + devices.erase( + std::remove_if(devices.begin(), devices.end(), +- [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }), ++ [reg](std::pair dev) { return ggml_backend_dev_backend_reg(dev.first) == reg; }), + devices.end()); + + // remove backend +@@ -338,7 +338,12 @@ size_t ggml_backend_dev_count() { + + ggml_backend_dev_t ggml_backend_dev_get(size_t index) { + GGML_ASSERT(index < ggml_backend_dev_count()); +- return get_reg().devices[index]; ++ auto devices = get_reg().devices; ++ if (!std::is_heap(devices.begin(), devices.end())) { ++ std::make_heap(devices.begin(), devices.end(), [](const auto & a, const auto & b) { return a.second < b.second; }); ++ } ++ ++ return devices[index].first; + } + + ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) { diff --git a/llama/patches/0015-add-phony-target-ggml-cpu-for-all-cpu-variants.patch b/llama/patches/0015-add-phony-target-ggml-cpu-for-all-cpu-variants.patch new file mode 100644 index 000000000..e68950a57 --- /dev/null +++ b/llama/patches/0015-add-phony-target-ggml-cpu-for-all-cpu-variants.patch @@ -0,0 +1,29 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Tue, 14 Jan 2025 15:59:04 -0800 +Subject: [PATCH] add phony target ggml-cpu for all cpu variants + +--- + ggml/src/CMakeLists.txt | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt +index 84101c32..72b488dd 100644 +--- a/ggml/src/CMakeLists.txt ++++ b/ggml/src/CMakeLists.txt +@@ -278,6 +278,7 @@ function(ggml_add_cpu_backend_variant tag_name) + endforeach() + + ggml_add_cpu_backend_variant_impl(${tag_name}) ++ add_dependencies(ggml-cpu ggml-cpu-${tag_name}) + endfunction() + + ggml_add_backend(CPU) +@@ -286,6 +287,7 @@ if (GGML_CPU_ALL_VARIANTS) + if (NOT GGML_BACKEND_DL) + message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL") + endif() ++ add_custom_target(ggml-cpu) + ggml_add_cpu_backend_variant(sandybridge AVX) + ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA) + ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512) diff --git a/llama/sgemm.h b/llama/sgemm.h deleted file mode 100644 index 3d2909515..000000000 --- a/llama/sgemm.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#include -#include -#ifdef __cplusplus -extern "C" { -#endif - -bool llamafile_sgemm(const struct ggml_compute_params * params, int64_t, int64_t, int64_t, - const void *, int64_t, const void *, int64_t, void *, int64_t, - int, int, int); - -#ifdef __cplusplus -} -#endif diff --git a/llama/unicode-data.h b/llama/unicode-data.h deleted file mode 100644 index 4bd020f9a..000000000 --- a/llama/unicode-data.h +++ /dev/null @@ -1,46 +0,0 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include -#include -#include -#include - -struct range_nfd { - uint32_t first; - uint32_t last; - uint32_t nfd; -}; - -static const uint32_t MAX_CODEPOINTS = 0x110000; - -extern const std::initializer_list> unicode_ranges_flags; -extern const std::unordered_set unicode_set_whitespace; -extern const std::initializer_list> unicode_map_lowercase; -extern const std::initializer_list> unicode_map_uppercase; -extern const std::initializer_list unicode_ranges_nfd; diff --git a/make/Makefile.cpu b/make/Makefile.cpu deleted file mode 100644 index 968ae9347..000000000 --- a/make/Makefile.cpu +++ /dev/null @@ -1,40 +0,0 @@ -# Build the discrete cpu runner(s) for the platform which do not rely on 3rd party GPU libraries - -include make/common-defs.make - -CPU_GOFLAGS="-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$(VERSION)\" $(TARGET_LDFLAGS)" -ifeq ($(ARCH),amd64) -ifeq ($(origin CUSTOM_CPU_FLAGS),undefined) - RUNNERS = cpu_avx cpu_avx2 -endif -endif - -DIST_RUNNERS = $(addprefix $(RUNNERS_DIST_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(RUNNERS))) -BUILD_RUNNERS = $(addprefix $(RUNNERS_BUILD_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(RUNNERS))) - -cpu: $(BUILD_RUNNERS) - -dist: $(DIST_RUNNERS) - -$(RUNNERS_BUILD_DIR)/cpu_avx/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS="avx" -$(RUNNERS_BUILD_DIR)/cpu_avx/ollama_llama_server$(EXE_EXT): ./llama/*.go ./llama/runner/*.go $(COMMON_SRCS) $(COMMON_HDRS) - @-mkdir -p $(dir $@) - GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./cmd/runner - -$(RUNNERS_BUILD_DIR)/cpu_avx2/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS="avx avx2" -$(RUNNERS_BUILD_DIR)/cpu_avx2/ollama_llama_server$(EXE_EXT): ./llama/*.go ./llama/runner/*.go $(COMMON_SRCS) $(COMMON_HDRS) - @-mkdir -p $(dir $@) - GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./cmd/runner - -$(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/% - @-mkdir -p $(dir $@) - cp $< $@ - -clean: - rm -f $(BUILD_RUNNERS) $(DIST_RUNNERS) - -.PHONY: clean cpu dist - -# Handy debugging for make variables -print-%: - @echo '$*=$($*)' diff --git a/make/Makefile.cuda_v11 b/make/Makefile.cuda_v11 deleted file mode 100644 index a6a81823e..000000000 --- a/make/Makefile.cuda_v11 +++ /dev/null @@ -1,13 +0,0 @@ -# Build rules for CUDA v11 runner - -include make/common-defs.make -include make/cuda-v11-defs.make - -GPU_RUNNER_VARIANT := _v11 -GPU_COMPILER=$(CUDA_11_COMPILER) -CUDA_ARCHITECTURES?=50;52;53;60;61;62;70;72;75;80;86 -GPU_LIB_DIR = $(CUDA_11_LIB_DIR) -CGO_EXTRA_LDFLAGS = $(CUDA_11_CGO_EXTRA_LDFLAGS) - -include make/cuda.make -include make/gpu.make \ No newline at end of file diff --git a/make/Makefile.cuda_v12 b/make/Makefile.cuda_v12 deleted file mode 100644 index 7c50b27b5..000000000 --- a/make/Makefile.cuda_v12 +++ /dev/null @@ -1,13 +0,0 @@ -# Build rules for CUDA v12 runner - -include make/common-defs.make -include make/cuda-v12-defs.make - -GPU_RUNNER_VARIANT := _v12 -GPU_COMPILER=$(CUDA_12_COMPILER) -CUDA_ARCHITECTURES?=60;61;62;70;72;75;80;86;87;89;90;90a -GPU_LIB_DIR = $(CUDA_12_LIB_DIR) -CGO_EXTRA_LDFLAGS = $(CUDA_12_CGO_EXTRA_LDFLAGS) - -include make/cuda.make -include make/gpu.make \ No newline at end of file diff --git a/make/Makefile.ollama b/make/Makefile.ollama deleted file mode 100644 index a7349a252..000000000 --- a/make/Makefile.ollama +++ /dev/null @@ -1,19 +0,0 @@ -# Makefile for building top-level ollama binary - -include make/common-defs.make - -exe: $(OLLAMA_EXE) -dist_exe dist_ollama: $(DIST_OLLAMA_EXE) - -GO_DEPS=$(foreach dir,$(shell go list -deps -f '{{.Dir}}' . ),$(wildcard $(dir)/*.go)) -CPU_GOFLAGS="-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$(VERSION)\" $(EXTRA_GOLDFLAGS) $(TARGET_LDFLAGS)" - -$(OLLAMA_EXE) $(DIST_OLLAMA_EXE): TARGET_CPU_FLAGS=$(CUSTOM_CPU_FLAGS) -$(OLLAMA_EXE) $(DIST_OLLAMA_EXE): $(COMMON_SRCS) $(COMMON_HDRS) $(GO_DEPS) - GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath $(if $(CUSTOM_CPU_FLAGS),-tags $(subst $(space),$(comma),$(CUSTOM_CPU_FLAGS))) -o $@ . - -.PHONY: ollama dist_ollama exe dist_exe - -# Handy debugging for make variables -print-%: - @echo '$*=$($*)' diff --git a/make/Makefile.rocm b/make/Makefile.rocm deleted file mode 100644 index 26ac6cf35..000000000 --- a/make/Makefile.rocm +++ /dev/null @@ -1,119 +0,0 @@ -# Build rules for ROCm runner -# -# Note: at present we only support a single ROCm version (whichever is default on the build system) -# unlike CUDA where we'll build both a v11 and v12 variant. - -include make/common-defs.make -include make/rocm-defs.make - -HIP_ARCHS_COMMON := gfx900 gfx940 gfx941 gfx942 gfx1010 gfx1012 gfx1030 gfx1100 gfx1101 gfx1102 -HIP_ARCHS_LINUX := gfx906:xnack- gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- - -ifeq ($(OS),windows) - GPU_LIB_DIR := $(shell cygpath -m -s "$(HIP_PATH)/bin") - CGO_EXTRA_LDFLAGS := -L$(shell cygpath -m -s "$(HIP_PATH)/lib") - HIP_ARCHS?=$(HIP_ARCHS_COMMON) - GPU_COMPILER_CFLAGS = $(CFLAGS) -D_WIN32_WINNT=0x602 - GPU_COMPILER_CXXFLAGS = $(CXXFLAGS) -D_WIN32_WINNT=0x602 -else ifeq ($(OS),linux) - GPU_LIB_DIR := $(strip $(shell ls -d $(HIP_PATH)/lib64 2>/dev/null || ls -d $(HIP_PATH)/lib 2>/dev/null)) - CGO_EXTRA_LDFLAGS := -L$(GPU_LIB_DIR) - HIP_ARCHS?=$(HIP_ARCHS_COMMON) $(HIP_ARCHS_LINUX) - GPU_COMPILER_CFLAGS = $(CFLAGS) -fPIC -D_GNU_SOURCE - GPU_COMPILER_CXXFLAGS = $(CXXFLAGS) -fPIC -D_GNU_SOURCE -endif -GPU_COMPILER=$(HIP_COMPILER) - -# TODO future multi-variant support for ROCm -# ROCM_VERSION = $(subst $(space),.,$(wordlist 1,2,$(subst .,$(space),$(word 3,$(subst -,$(space),$(filter HIP version: %,$(shell $(GPU_COMPILER) --version))))))) -# ifneq (,$(ROCM_VERSION)) -# GPU_RUNNER_VARIANT = _v$(ROCM_VERSION) -# endif - -GPU_RUNNER_GO_TAGS := rocm -GPU_RUNNER_NAME := rocm$(GPU_RUNNER_VARIANT) -GPU_RUNNER_DRIVER_LIB_LINK := -lamdhip64 -GPU_RUNNER_LIBS_SHORT := hipblas rocblas - -# Note: ROCm requires an extra step of discovering and copying the transitive dependencies on linux -ifeq ($(OS),windows) - ROCM_DIST_DEPS_DIR = ./dist/$(OS)-$(ARCH)/lib/ollama - GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))) -else ifeq ($(OS),linux) - ROCM_DIST_DEPS_DIR = ./dist/$(OS)-$(ARCH)-rocm/lib/ollama - GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT).*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))) - ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(GPU_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf)) - GPU_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL)) - FILTERED_GPU_TRANSITIVE_LIBS=$(sort $(filter-out $(addprefix %,$(notdir $(GPU_LIBS))), $(GPU_TRANSITIVE_LIBS))) - GPU_DIST_TRANSITIVE_LIB_DEPS = $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(FILTERED_GPU_TRANSITIVE_LIBS)))) -endif -GPU_DIST_LIB_DEPS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(GPU_LIBS)))) -ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt - -ifeq ($(OS),linux) - GPU_COMPILER_FPIC := -fPIC -Wno-unused-function -std=gnu++17 -else ifeq ($(OS),windows) - GPU_COMPILER_FPIC := -Xclang --dependent-lib=msvcrt -endif -GPU_RUNNER_ARCH_FLAGS := $(foreach arch,$(subst ;,$(space),$(HIP_ARCHS)),--offload-arch=$(arch)) - -# HIPCC uses clang which requires avx512 -> -mavx512f -mavx512dq -mavx512bw -GPU_VECTOR_FLAGS=$(if $(filter avx512,$(GPU_RUNNER_CPU_FLAGS)),avx512f avx512dq avx512bw) $(filter-out avx512,$(GPU_RUNNER_CPU_FLAGS)) - -GPU_COMPILER_CUFLAGS = \ - $(GPU_COMPILER_FPIC) \ - $(addprefix -m,$(GPU_VECTOR_FLAGS)) \ - -mf16c \ - -mfma \ - -c \ - -O3 \ - -DGGML_USE_CUDA \ - -DGGML_BUILD=1 \ - -DGGML_BACKEND_BUILD=1 \ - -DGGML_SHARED=1 \ - -DGGML_BACKEND_SHARED=1 \ - -DGGML_CUDA_DMMV_X=32 \ - -DGGML_CUDA_MMV_Y=1 \ - -DGGML_SCHED_MAX_COPIES=4 \ - -DGGML_USE_HIP \ - -DGGML_USE_LLAMAFILE \ - -DHIP_FAST_MATH \ - -D__HIP_PLATFORM_AMD__=1 \ - -D__HIP_ROCclr__=1 \ - -DNDEBUG \ - -DK_QUANTS_PER_ITERATION=2 \ - -D_CRT_SECURE_NO_WARNINGS \ - -D_GNU_SOURCE \ - -D_XOPEN_SOURCE=600 \ - -DUSE_PROF_API=1 \ - -std=gnu++17 \ - -x hip \ - -mllvm=-amdgpu-early-inline-all=true \ - -mllvm=-amdgpu-function-calls=false \ - -Wno-expansion-to-defined \ - -Wno-invalid-noreturn \ - -Wno-ignored-attributes \ - -Wno-pass-failed \ - -Wno-deprecated-declarations \ - -Wno-unused-result \ - -I./llama/ - -# Workaround buggy P2P copy on some windows multi-GPU setups -# This workaround breaks linux systems with small system RAM, so only enable on windows -ifeq ($(OS),windows) - GPU_COMPILER_CUFLAGS += -DGGML_CUDA_NO_PEER_COPY=1 -endif - -include make/gpu.make - -# Adjust the rules from gpu.make to handle the ROCm dependencies properly -$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(ROCBLAS_DIST_DEP_MANIFEST) $(GPU_DIST_TRANSITIVE_LIB_DEPS) -$(ROCBLAS_DIST_DEP_MANIFEST): - @-mkdir -p $(dir $@) - @echo "Copying rocblas library..." - (cd $(GPU_LIB_DIR)/rocblas/library/ && tar cf - . ) | (cd $(dir $@) && tar xf - ) - @echo "rocblas library copy complete" - -$(GPU_DIST_TRANSITIVE_LIB_DEPS): - @-mkdir -p $(dir $@) - $(CP) $(dir $(filter %$(notdir $@),$(GPU_TRANSITIVE_LIBS)))/$(notdir $@) $(dir $@) diff --git a/make/Makefile.sync b/make/Makefile.sync deleted file mode 100644 index 628d30e0b..000000000 --- a/make/Makefile.sync +++ /dev/null @@ -1,250 +0,0 @@ -# Helpers for managing our vendored llama.cpp repo and patch set - -REPO_ROOT:=./ -DEST_DIR:=./llama/ - -include $(DEST_DIR)vendoring - -LLAMACPP_REPO := ./llama/vendor/ - -# Relative to the vendor dir -VENDOR_RELATIVE_PATCH_DIR := ../patches/ - - -help-sync: - @echo "The following make targets will help you update llama.cpp to a new base commit, or work on new features/fixes" - @echo "" - @echo " make apply-patches # Establish the tracking repo if not already present, reset to the base commit, and apply our patch set" - @echo " make sync # Vendor llama.cpp and ggml from the tracking repo working tree" - @echo " make sync-clean # Remove all vendored files" - @echo " make create-patches # Generate the patch set based on the current commits in the tracking repo since the base commit" - @echo "" - @echo "For more details on the workflow, see the Vendoring section in 'docs/development.md'" - -apply-patches: $(LLAMACPP_REPO) - @if ! git -C $(LLAMACPP_REPO) --no-pager diff --exit-code ; then \ - echo "ERROR: Your llama.cpp repo is dirty. The apply-patches target requires a clean working tree"; \ - echo "To clobber: git -C $(LLAMACPP_REPO) reset --hard HEAD" ; \ - exit 1; \ - fi - @echo "Checking out $(LLAMACPP_BASE_COMMIT)" - @git -C $(LLAMACPP_REPO) checkout -q $(LLAMACPP_BASE_COMMIT) || \ - git -C $(LLAMACPP_REPO) fetch --all && git -C $(LLAMACPP_REPO) checkout -q $(LLAMACPP_BASE_COMMIT) - @echo "Applying ollama patches..." - @cd $(LLAMACPP_REPO) && git -c 'user.name=nobody' -c 'user.email=<>' am -3 $(VENDOR_RELATIVE_PATCH_DIR)*.patch || \ - echo "Please resolve the conflicts in $(LLAMACPP_REPO), and run 'git am --continue' to continue applying subsequent patches" - @echo "" - @echo "The tracking repo $(LLAMACPP_REPO) is now in a detached state with all patches applied." - @echo "Don't forget to commit any changes you make and run 'make create-patches' " - -$(LLAMACPP_REPO): - @echo "Cloning llama.cpp to $(LLAMACPP_REPO)" - git clone https://github.com/ggerganov/llama.cpp.git $@ - -create-patches: $(LLAMACPP_REPO) - @if ! git -C $(LLAMACPP_REPO) --no-pager diff --exit-code ; then \ - echo "ERROR: Your llama.cpp repo is dirty. You must commit any pending changes for format-patch to generate patches"; \ - exit 1; \ - fi - @cd $(LLAMACPP_REPO) && git format-patch --no-signature --no-numbered --zero-commit -o $(VENDOR_RELATIVE_PATCH_DIR) $(LLAMACPP_BASE_COMMIT) - -# Vendoring template logic -EXCLUDED_FILES=sgemm.cpp sgemm.h sampling_ext.cpp sampling_ext.h stb_image.h json.hpp llama_darwin.c base64.hpp -OLLAMA_NATIVE_FILES=mllama.cpp mllama.h llama_darwin.c sampling_ext.cpp sampling_ext.h -define vendor_file -$(strip $(addprefix $(2),$(notdir $1))) : $(addprefix $(LLAMACPP_REPO),$(1)) -ifneq ($$(filter-out $(EXCLUDED_FILES),$(notdir $1)),) - @echo "vendoring $1"; \ - mkdir -p $$(dir $$@) && \ - echo "/**" > $$@ && \ - echo " * llama.cpp - commit $$(LLAMACPP_BASE_COMMIT) - do not edit this file" >> $$@ && \ - echo " *" >> $$@ && \ - sed 's/^/ * /' <$(LLAMACPP_REPO)/LICENSE | sed 's/ *$$$$//' >> $$@ && \ - echo " */" >> $$@ && \ - echo "" >> $$@ && \ - cat $$< >> $$@ -else - @echo "vendoring $1"; \ - mkdir -p $$(dir $$@) && \ - cat $$< > $$@ -endif -VENDORED_FILES += $(strip $(addprefix $(2),$(notdir $1))) -endef - -# llama.cpp files -> llama/ -LLAMACPP_FILES=\ - src/unicode.cpp \ - src/unicode.h \ - src/unicode-data.cpp \ - src/unicode-data.h \ - src/llama.cpp \ - src/llama-adapter.cpp \ - src/llama-adapter.h \ - src/llama-arch.cpp \ - src/llama-arch.h \ - src/llama-batch.cpp \ - src/llama-batch.h \ - src/llama-chat.cpp \ - src/llama-chat.h \ - src/llama-context.cpp \ - src/llama-context.h \ - src/llama-cparams.cpp \ - src/llama-cparams.h \ - src/llama-grammar.cpp \ - src/llama-grammar.h \ - src/llama-hparams.cpp \ - src/llama-hparams.h \ - src/llama-impl.cpp \ - src/llama-impl.h \ - src/llama-kv-cache.cpp \ - src/llama-kv-cache.h \ - src/llama-mmap.cpp \ - src/llama-mmap.h \ - src/llama-model-loader.cpp \ - src/llama-model-loader.h \ - src/llama-model.cpp \ - src/llama-model.h \ - src/llama-quant.cpp \ - src/llama-quant.h \ - src/llama-sampling.cpp \ - src/llama-sampling.h \ - src/llama-vocab.cpp \ - src/llama-vocab.h \ - include/llama.h \ - include/llama-cpp.h \ - ggml/include/ggml-cpu.h \ - ggml/src/ggml-cpu/llamafile/sgemm.cpp \ - ggml/src/ggml-cpu/llamafile/sgemm.h -$(foreach name,$(LLAMACPP_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR)))) - -# llama.cpp files -> llama/llamafile -LLAMAFILE_FILES= \ - ggml/src/ggml-cpu/llamafile/sgemm.h -$(foreach name,$(LLAMAFILE_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR)llamafile/))) - -# ggml files -> llama/ -GGML_FILES= \ - ggml/src/ggml.c \ - ggml/include/ggml.h \ - ggml/src/ggml-quants.c \ - ggml/src/ggml-quants.h \ - ggml/src/ggml-metal/ggml-metal.metal \ - ggml/include/ggml-metal.h \ - ggml/src/ggml-impl.h \ - ggml/src/ggml-threading.h \ - ggml/include/ggml-cuda.h \ - ggml/src/ggml-backend-reg.cpp \ - ggml/src/ggml-metal/ggml-metal-impl.h \ - ggml/src/ggml-common.h \ - ggml/include/ggml-backend.h \ - ggml/src/ggml-backend.cpp \ - ggml/src/ggml-backend-impl.h \ - ggml/include/ggml-alloc.h \ - ggml/src/ggml-alloc.c \ - ggml/include/ggml-blas.h \ - ggml/include/ggml-cpp.h \ - ggml/src/ggml-threading.cpp \ - ggml/src/ggml-blas/ggml-blas.cpp \ - ggml/src/ggml-cpu/ggml-cpu.c \ - ggml/src/ggml-cpu/ggml-cpu.cpp \ - ggml/src/ggml-cpu/ggml-cpu-aarch64.h \ - ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp \ - ggml/src/ggml-cpu/ggml-cpu-quants.h \ - ggml/src/ggml-cpu/ggml-cpu-quants.c \ - ggml/src/ggml-cpu/ggml-cpu-impl.h \ - ggml/src/ggml-cpu/ggml-cpu-traits.h \ - ggml/src/ggml-cpu/ggml-cpu-traits.cpp \ - ggml/src/ggml-cpu/amx/amx.h \ - ggml/src/ggml-cpu/amx/amx.cpp \ - ggml/src/ggml-cpu/amx/mmq.cpp \ - ggml/src/ggml-cpu/amx/mmq.h -$(foreach name,$(GGML_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR)))) - -$(DEST_DIR)ggml-metal-embed.metal: $(DEST_DIR)ggml-common.h $(DEST_DIR)ggml-metal-impl.h - @sed -e '/__embed_ggml-common.h__/r $(DEST_DIR)/ggml-common.h' \ - -e '/__embed_ggml-common.h__/d' \ - < $(DEST_DIR)/ggml-metal.metal \ - > $(DEST_DIR)/ggml-metal-embed.metal.tmp - @sed -e '/#include "ggml-metal-impl.h"/r $(DEST_DIR)/ggml-metal-impl.h' \ - -e '/#include "ggml-metal-impl.h"/d' \ - < $(DEST_DIR)/ggml-metal-embed.metal.tmp \ - > $(DEST_DIR)/ggml-metal-embed.metal - @rm $(DEST_DIR)/ggml-metal-embed.metal.tmp - -VENDORED_FILES += $(DEST_DIR)ggml-metal-embed.metal - -# TODO generalize renaming pattern if we have more of these -$(DEST_DIR)ggml-metal_darwin_arm64.m : $(LLAMACPP_REPO)ggml/src/ggml-metal/ggml-metal.m - @echo "vendoring $(subst $(LLAMACPP_REPO),,$<)"; \ - mkdir -p $(dir $@) && \ - echo "/**" > $@ && \ - echo " * llama.cpp - commit $(LLAMACPP_BASE_COMMIT) - do not edit this file" >> $@ && \ - echo " *" >> $@ && \ - sed 's/^/ * /' <$(LLAMACPP_REPO)/LICENSE | sed 's/ *$$//' >> $@ && \ - echo " */" >> $@ && \ - echo "" >> $@ && \ - cat $< >> $@ -VENDORED_FILES += $(DEST_DIR)ggml-metal_darwin_arm64.m - -# ggml-cuda -> llama/ggml-cuda/ -GGML_CUDA_FILES= ggml/src/ggml-cuda/*.cu ggml/src/ggml-cuda/*.cuh -GGML_CUDA_FILES_EXPANDED = $(addprefix ggml/src/ggml-cuda/,$(notdir $(wildcard $(addprefix $(LLAMACPP_REPO),$(GGML_CUDA_FILES))))) -$(foreach name,$(GGML_CUDA_FILES_EXPANDED),$(eval $(call vendor_file,$(name),$(DEST_DIR)ggml-cuda/))) - -GGML_TEMPLATE_FILES= ggml/src/ggml-cuda/template-instances/*.cu -GGML_TEMPLATE_FILES_EXPANDED = $(addprefix ggml/src/ggml-cuda/template-instances/,$(notdir $(wildcard $(addprefix $(LLAMACPP_REPO),$(GGML_TEMPLATE_FILES))))) -$(foreach name,$(GGML_TEMPLATE_FILES_EXPANDED),$(eval $(call vendor_file,$(name),$(DEST_DIR)ggml-cuda/template-instances/))) - -GGML_VENDOR_FILES= ggml/src/ggml-cuda/vendors/*.h -GGML_VENDOR_FILES_EXPANDED=$(addprefix ggml/src/ggml-cuda/vendors/,$(notdir $(wildcard $(addprefix $(LLAMACPP_REPO),$(GGML_VENDOR_FILES))))) -$(foreach name,$(GGML_VENDOR_FILES_EXPANDED),$(eval $(call vendor_file,$(name),$(DEST_DIR)ggml-cuda/vendors/))) - -# llava -> llama/ -LAVA_FILES= \ - examples/llava/clip.cpp \ - examples/llava/clip.h \ - examples/llava/llava.cpp \ - examples/llava/llava.h \ - common/log.h \ - common/log.cpp \ - common/stb_image.h -# These files are mostly used by the llava code -# and shouldn't be necessary once we use clip.cpp directly -LAVA_FILES+= \ - common/common.cpp \ - common/common.h \ - common/sampling.cpp \ - common/sampling.h \ - common/json.hpp \ - common/json-schema-to-grammar.cpp \ - common/json-schema-to-grammar.h \ - common/base64.hpp -$(foreach name,$(LAVA_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR)))) - -$(DEST_DIR)build-info.cpp: - @echo "Generating $@" - @echo "int LLAMA_BUILD_NUMBER = 0;" > $@ - @echo "char const *LLAMA_COMMIT = \"$(LLAMACPP_BASE_COMMIT)\";" >> $@ - @echo "char const *LLAMA_COMPILER = \"\";" >> $@ - @echo "char const *LLAMA_BUILD_TARGET = \"\";" >> $@ -VENDORED_FILES += $(DEST_DIR)build-info.cpp - - -sync: $(LLAMACPP_REPO) .WAIT $(VENDORED_FILES) .WAIT remove-stale-files - -sync-clean: - rm -f $(VENDORED_FILES) $(EXTRA_NATIVE_FILES) - -PATS=*.c *.h *.cpp *.m *.metal *.cu *.cuh -NATIVE_DIRS=$(DEST_DIR) $(DEST_DIR)llamafile/ $(DEST_DIR)ggml-cuda/ $(DEST_DIR)ggml-cuda/template-instances/ $(DEST_DIR)ggml-cuda/vendors/ -ALL_NATIVE_FILES=$(foreach dir,$(NATIVE_DIRS),$(wildcard $(addprefix $(dir),$(PATS)))) -EXTRA_NATIVE_FILES=$(filter-out $(VENDORED_FILES) $(addprefix $(DEST_DIR),$(OLLAMA_NATIVE_FILES)), $(ALL_NATIVE_FILES)) -remove-stale-files: - @rm -f $(EXTRA_NATIVE_FILES) - -.PHONY: help-sync apply-patches sync create-patches remove-stale-fails .WAIT - - -# Handy debugging for make variables -print-%: - @echo '$*=$($*)' diff --git a/make/Makefile.test b/make/Makefile.test deleted file mode 100644 index 3b27d0dbe..000000000 --- a/make/Makefile.test +++ /dev/null @@ -1,19 +0,0 @@ -# Targets to assist in running tests - -include make/common-defs.make - -test: - cd .. && go test ./... - -integration: $(OLLAMA_EXE) - cd .. && go test --tags=integration ./integration -v - -lint: - cd .. && golangci-lint run -v - -# Note: in this makefile we error instead of building to allow more fine-grain control of testing flows -$(OLLAMA_EXE): - @echo "" - @echo "ERROR: You must build ollama first - use 'make all' to build the ollama binaries" - @echo "" - @exit 1 \ No newline at end of file diff --git a/make/common-defs.make b/make/common-defs.make deleted file mode 100644 index 03504a690..000000000 --- a/make/common-defs.make +++ /dev/null @@ -1,91 +0,0 @@ -# Common definitions for the various Makefiles -# No rules are defined here so this is safe to include at the beginning of other makefiles - -OS := $(shell uname -s) -ARCH ?= $(subst aarch64,arm64,$(subst x86_64,amd64,$(shell uname -m))) -ifneq (,$(findstring MINGW,$(OS))$(findstring MSYS,$(OS))) - OS := windows - ARCH := $(shell systeminfo 2>/dev/null | grep "System Type" | grep ARM64 > /dev/null && echo "arm64" || echo "amd64" ) -else ifeq ($(OS),Linux) - OS := linux -else ifeq ($(OS),Darwin) - OS := darwin -endif -comma:= , -empty:= -space:= $(empty) $(empty) -uc = $(subst a,A,$(subst b,B,$(subst c,C,$(subst d,D,$(subst e,E,$(subst f,F,$(subst g,G,$(subst h,H,$(subst i,I,$(subst j,J,$(subst k,K,$(subst l,L,$(subst m,M,$(subst n,N,$(subst o,O,$(subst p,P,$(subst q,Q,$(subst r,R,$(subst s,S,$(subst t,T,$(subst u,U,$(subst v,V,$(subst w,W,$(subst x,X,$(subst y,Y,$(subst z,Z,$1)))))))))))))))))))))))))) - -export CGO_CFLAGS_ALLOW = -mfma|-mf16c -export CGO_CXXFLAGS_ALLOW = -mfma|-mf16c -export HIP_PLATFORM = amd -export CGO_ENABLED=1 - -BUILD_DIR = ./llama/build/$(OS)-$(ARCH) -DIST_BASE = ./dist/$(OS)-$(ARCH) - -ifeq ($(OS),windows) - # Absolute paths with cygpath to convert to 8.3 without spaces - PWD="$(shell pwd)" - DIST_OLLAMA_EXE=$(DIST_BASE)/ollama$(EXE_EXT) -else - CCACHE:=$(shell command -v ccache 2>/dev/null || echo "") - DIST_OLLAMA_EXE=$(DIST_BASE)/bin/ollama$(EXE_EXT) -endif -DIST_LIB_DIR = $(DIST_BASE)/lib/ollama -RUNNERS_DIST_DIR = $(DIST_LIB_DIR)/runners -RUNNERS_BUILD_DIR = $(BUILD_DIR)/runners -VERSION?=$(shell git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g") - -# Conditionally enable ccache for cgo builds too -ifneq ($(CCACHE),) - CC?=$(CCACHE) gcc - CXX?=$(CCACHE) g++ - export CC - export CXX -endif - - -# Override in environment to tune CPU vector flags -ifeq ($(ARCH),amd64) -ifeq ($(origin CUSTOM_CPU_FLAGS),undefined) - GPU_RUNNER_CPU_FLAGS=avx - GPU_RUNNER_EXTRA_VARIANT=_avx -else - GPU_RUNNER_CPU_FLAGS=$(subst $(comma),$(space),$(CUSTOM_CPU_FLAGS)) -endif -endif - -ifeq ($(OS),windows) - CP := cp - OBJ_EXT := obj - SHARED_EXT := dll - EXE_EXT := .exe - SHARED_PREFIX := - CPU_FLAG_PREFIX := /arch: -ifneq ($(HIP_PATH),) - # If HIP_PATH has spaces, hipcc trips over them when subprocessing - HIP_PATH := $(shell cygpath -m -s "$(patsubst %\,%,$(HIP_PATH))") - export HIP_PATH -endif -else ifeq ($(OS),linux) - CP := cp -df - OBJ_EXT := o - SHARED_EXT := so - SHARED_PREFIX := lib - CPU_FLAG_PREFIX := -m -else - OBJ_EXT := o - SHARED_EXT := so - CPU_FLAG_PREFIX := -m - CP := cp -df -endif - -COMMON_SRCS := \ - $(wildcard ./llama/*.c) \ - $(wildcard ./llama/*.cpp) -COMMON_HDRS := \ - $(wildcard ./llama/*.h) \ - $(wildcard ./llama/*.hpp) - -OLLAMA_EXE=./ollama$(EXE_EXT) \ No newline at end of file diff --git a/make/cuda-v11-defs.make b/make/cuda-v11-defs.make deleted file mode 100644 index 264407ddc..000000000 --- a/make/cuda-v11-defs.make +++ /dev/null @@ -1,17 +0,0 @@ -# Common definitions for the various Makefiles which set cuda settings -# No rules are defined here so this is safe to include at the beginning of other makefiles - -ifeq ($(OS),windows) - CUDA_PATH?=$(shell cygpath -m -s "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\" 2>/dev/null)unknown - CUDA_BASE_DIR := $(dir $(shell cygpath -m -s "$(CUDA_PATH)\\.." 2>/dev/null)) - CUDA_11_PATH:=$(shell ls -d $(CUDA_BASE_DIR)/v11.? 2>/dev/null) - CUDA_11_COMPILER:=$(wildcard $(CUDA_11_PATH)/bin/nvcc.exe) - CUDA_11_LIB_DIR = $(strip $(shell ls -d $(CUDA_11_PATH)/bin 2>/dev/null)) - CUDA_11_CGO_EXTRA_LDFLAGS = -L"$(CUDA_11_PATH)/lib/x64" -else ifeq ($(OS),linux) - CUDA_PATH?=/usr/local/cuda - CUDA_11_PATH:=$(shell ls -d $(CUDA_PATH)-11 2>/dev/null) - CUDA_11_COMPILER:=$(wildcard $(CUDA_11_PATH)/bin/nvcc) - CUDA_11_LIB_DIR=$(strip $(shell ls -d $(CUDA_11_PATH)/lib64 2>/dev/null || ls -d $(CUDA_11_PATH)/lib 2>/dev/null)) - CUDA_11_CGO_EXTRA_LDFLAGS = -L"$(CUDA_11_LIB_DIR)" -L"$(CUDA_11_LIB_DIR)/stubs" -endif diff --git a/make/cuda-v12-defs.make b/make/cuda-v12-defs.make deleted file mode 100644 index f7c182b6f..000000000 --- a/make/cuda-v12-defs.make +++ /dev/null @@ -1,17 +0,0 @@ -# Common definitions for the various Makefiles which set cuda settings -# No rules are defined here so this is safe to include at the beginning of other makefiles - -ifeq ($(OS),windows) - CUDA_PATH?=$(shell cygpath -m -s "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\" 2>/dev/null)unknown - CUDA_BASE_DIR := $(dir $(shell cygpath -m -s "$(CUDA_PATH)\\.." 2>/dev/null)) - CUDA_12_PATH:=$(shell ls -d $(CUDA_BASE_DIR)/v12.? 2>/dev/null) - CUDA_12_COMPILER:=$(wildcard $(CUDA_12_PATH)/bin/nvcc.exe) - CUDA_12_LIB_DIR = $(strip $(shell ls -d $(CUDA_12_PATH)/bin 2>/dev/null)) - CUDA_12_CGO_EXTRA_LDFLAGS = -L"$(CUDA_12_PATH)/lib/x64" -else ifeq ($(OS),linux) - CUDA_PATH?=/usr/local/cuda - CUDA_12_PATH:=$(shell ls -d $(CUDA_PATH)-12 2>/dev/null) - CUDA_12_COMPILER:=$(wildcard $(CUDA_12_PATH)/bin/nvcc) - CUDA_12_LIB_DIR=$(strip $(shell ls -d $(CUDA_12_PATH)/lib64 2>/dev/null || ls -d $(CUDA_12_PATH)/lib 2>/dev/null)) - CUDA_12_CGO_EXTRA_LDFLAGS = -L"$(CUDA_12_LIB_DIR)" -L"$(CUDA_12_LIB_DIR)/stubs" -endif diff --git a/make/cuda.make b/make/cuda.make deleted file mode 100644 index 095663f5d..000000000 --- a/make/cuda.make +++ /dev/null @@ -1,56 +0,0 @@ -# Common definitions for all cuda versions - -ifndef GPU_RUNNER_VARIANT -dummy: - $(error This makefile is not meant to build directly, but instead included in other Makefiles that set required variables) -endif - - -GPU_RUNNER_NAME := cuda$(GPU_RUNNER_VARIANT) -GPU_RUNNER_GO_TAGS := cuda cuda$(GPU_RUNNER_VARIANT) -GPU_RUNNER_DRIVER_LIB_LINK := -lcuda -GPU_RUNNER_LIBS_SHORT := cublas cudart cublasLt - -ifeq ($(OS),windows) - # On windows, nvcc uses msvc which does not support avx512vbmi avx512vnni avx512bf16, but macros can turn them on - GPU_VECTOR_FLAGS=$(call uc,$(filter-out avx512bf16,$(filter-out avx512vnni,$(filter-out avx512vbmi,$(GPU_RUNNER_CPU_FLAGS))))) - GPU_COMPILER_EXTRA_FLAGS=$(if $(filter avx512vbmi,$(GPU_RUNNER_CPU_FLAGS)),-D__AVX512VBMI__) - GPU_COMPILER_EXTRA_FLAGS+=$(if $(filter avx512vnni,$(GPU_RUNNER_CPU_FLAGS)),-D__AVX512VNNI__) - GPU_COMPILER_EXTRA_FLAGS+=$(if $(filter avx512bf16,$(GPU_RUNNER_CPU_FLAGS)),-D__AVX512BF16__) - GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))) - GPU_COMPILER_CFLAGS = $(CFLAGS) -D_WIN32_WINNT=0x602 - GPU_COMPILER_CXXFLAGS = $(CXXFLAGS) -D_WIN32_WINNT=0x602 -else ifeq ($(OS),linux) - # On linux, nvcc requires avx512 -> -mavx512f -mavx512dq -mavx512bw - GPU_VECTOR_FLAGS=$(if $(filter avx512,$(GPU_RUNNER_CPU_FLAGS)),avx512f avx512dq avx512bw) $(filter-out avx512,$(GPU_RUNNER_CPU_FLAGS)) - GPU_COMPILER_EXTRA_FLAGS = -fPIC -Wno-unused-function -std=c++17 - GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT).*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))) - GPU_COMPILER_CFLAGS = $(CFLAGS) -Xcompiler -fPIC -D_GNU_SOURCE - GPU_COMPILER_CXXFLAGS = $(CXXFLAGS) -Xcompiler -fPIC -D_GNU_SOURCE -endif -GPU_DIST_LIB_DEPS= $(sort $(addprefix $(DIST_GPU_RUNNER_DEPS_DIR)/,$(notdir $(GPU_LIBS)))) - -GPU_RUNNER_ARCH_FLAGS := $(foreach arch,$(subst ;,$(space),$(CUDA_ARCHITECTURES)),--generate-code=arch=compute_$(arch)$(comma)code=[compute_$(arch)$(comma)sm_$(arch)]) \ - -DGGML_CUDA_USE_GRAPHS=1 -GPU_COMPILER_CUFLAGS = \ - $(GPU_COMPILER_EXTRA_FLAGS) \ - -Xcompiler "$(addprefix $(CPU_FLAG_PREFIX),$(GPU_VECTOR_FLAGS))" \ - -t2 \ - -DGGML_CUDA_DMMV_X=32 \ - -DGGML_CUDA_MMV_Y=1 \ - -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \ - -DGGML_USE_CUDA=1 \ - -DGGML_SHARED=1 \ - -DGGML_BACKEND_SHARED=1 \ - -DGGML_BUILD=1 \ - -DGGML_BACKEND_BUILD=1 \ - -DGGML_USE_LLAMAFILE \ - -DK_QUANTS_PER_ITERATION=2 \ - -DNDEBUG \ - -D_GNU_SOURCE \ - -D_XOPEN_SOURCE=600 \ - -Wno-deprecated-gpu-targets \ - --forward-unknown-to-host-compiler \ - -use_fast_math \ - -I./llama/ \ - -O3 diff --git a/make/gpu.make b/make/gpu.make deleted file mode 100644 index 96e1ad224..000000000 --- a/make/gpu.make +++ /dev/null @@ -1,89 +0,0 @@ -# Generalized GPU runner build - -ifndef GPU_RUNNER_NAME -dummy: - $(error This makefile is not meant to build directly, but instead included in other Makefiles that set required variables) -endif - -GPU_GOFLAGS="-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$(VERSION)\" $(EXTRA_GOLDFLAGS) $(TARGET_LDFLAGS)" - -# TODO Unify how we handle dependencies in the dist/packaging and install flow -# today, cuda is bundled, but rocm is split out. Should split them each out by runner -DIST_GPU_RUNNER_DEPS_DIR = $(DIST_LIB_DIR) - - -GPU_RUNNER_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT).*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT)))) - -GPU_RUNNER_SRCS := \ - $(filter-out $(wildcard llama/ggml-cuda/fattn*.cu),$(wildcard llama/ggml-cuda/*.cu)) \ - $(wildcard llama/ggml-cuda/template-instances/mmq*.cu) \ - llama/ggml.c llama/ggml-backend.cpp llama/ggml-alloc.c llama/ggml-quants.c llama/sgemm.cpp llama/ggml-threading.cpp -GPU_RUNNER_HDRS := \ - $(wildcard llama/ggml-cuda/*.cuh) - - -# Conditional flags and components to speed up developer builds -ifneq ($(OLLAMA_FAST_BUILD),) - GPU_COMPILER_CUFLAGS += \ - -DGGML_DISABLE_FLASH_ATTN -else - GPU_RUNNER_SRCS += \ - $(wildcard llama/ggml-cuda/fattn*.cu) \ - $(wildcard llama/ggml-cuda/template-instances/fattn-wmma*.cu) \ - $(wildcard llama/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu) \ - $(wildcard llama/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu) \ - $(wildcard llama/ggml-cuda/template-instances/fattn-vec*f16-f16.cu) -endif - -GPU_RUNNER_OBJS := $(GPU_RUNNER_SRCS:.cu=.$(GPU_RUNNER_NAME).$(OBJ_EXT)) -GPU_RUNNER_OBJS := $(GPU_RUNNER_OBJS:.c=.$(GPU_RUNNER_NAME).$(OBJ_EXT)) -GPU_RUNNER_OBJS := $(addprefix $(BUILD_DIR)/,$(GPU_RUNNER_OBJS:.cpp=.$(GPU_RUNNER_NAME).$(OBJ_EXT))) - -DIST_RUNNERS = $(addprefix $(RUNNERS_DIST_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT))) -BUILD_RUNNERS = $(addprefix $(RUNNERS_BUILD_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT))) - - -$(GPU_RUNNER_NAME): $(BUILD_RUNNERS) - -dist: $(DIST_RUNNERS) - -# Build targets -$(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cu - @-mkdir -p $(dir $@) - $(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) $(GPU_COMPILER_CUFLAGS) $(GPU_RUNNER_ARCH_FLAGS) -o $@ $< -$(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.c - @-mkdir -p $(dir $@) - $(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) -o $@ $< -$(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cpp - @-mkdir -p $(dir $@) - $(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CXXFLAGS) -o $@ $< -$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): TARGET_CGO_LDFLAGS = $(CGO_EXTRA_LDFLAGS) -L"$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/" -$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) ./llama/*.go ./llama/runner/*.go $(COMMON_SRCS) $(COMMON_HDRS) - @-mkdir -p $(dir $@) - GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./cmd/runner -$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS) - @-mkdir -p $(dir $@) - $(CCACHE) $(GPU_COMPILER) --shared -L$(GPU_LIB_DIR) $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@ - -# Distribution targets -$(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/% - @-mkdir -p $(dir $@) - $(CP) $< $@ -$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) $(GPU_DIST_LIB_DEPS) -$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) - @-mkdir -p $(dir $@) - $(CP) $< $@ -$(GPU_DIST_LIB_DEPS): - @-mkdir -p $(dir $@) - $(CP) $(GPU_LIB_DIR)/$(notdir $@) $(dir $@) - -clean: - rm -f $(GPU_RUNNER_OBJS) $(BUILD_RUNNERS) $(DIST_RUNNERS) - -.PHONY: clean $(GPU_RUNNER_NAME) - - -# Handy debugging for make variables -print-%: - @echo '$*=$($*)' - diff --git a/make/rocm-defs.make b/make/rocm-defs.make deleted file mode 100644 index 76a11f296..000000000 --- a/make/rocm-defs.make +++ /dev/null @@ -1,9 +0,0 @@ -# Common definitions for the various Makefiles which set cuda settings -# No rules are defined here so this is safe to include at the beginning of other makefiles - -ifeq ($(OS),windows) - HIP_COMPILER:=$(wildcard $(HIP_PATH)/bin/hipcc.bin.exe) -else ifeq ($(OS),linux) - HIP_PATH?=$(shell ls -d /opt/rocm 2>/dev/null) - HIP_COMPILER:=$(wildcard $(HIP_PATH)/bin/hipcc) -endif diff --git a/ml/backend/ggml/ggml/.rsync-filter b/ml/backend/ggml/ggml/.rsync-filter new file mode 100644 index 000000000..562effe31 --- /dev/null +++ b/ml/backend/ggml/ggml/.rsync-filter @@ -0,0 +1,21 @@ +protect **/*.go +protect **/*-embed.* +include include/ +include src/ +include src/ggml-blas/ +include src/ggml-cpu/ +include src/ggml-cpu/amx/ +include src/ggml-cpu/llamafile/ +include src/ggml-cuda/ +include src/ggml-cuda/template-instances/ +include src/ggml-hip/ +include src/ggml-metal/ +include **/CMakeLists.txt +include **/*.c +include **/*.h +include **/*.cpp +include **/*.cu +include **/*.cuh +include **/*.m +include **/*.metal +exclude * diff --git a/ml/backend/ggml/ggml/CMakeLists.txt b/ml/backend/ggml/ggml/CMakeLists.txt new file mode 100644 index 000000000..393506533 --- /dev/null +++ b/ml/backend/ggml/ggml/CMakeLists.txt @@ -0,0 +1,262 @@ +cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. +project("ggml" C CXX) +include(CheckIncludeFileCXX) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") +endif() + +if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + set(GGML_STANDALONE ON) + + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + + # configure project version + # TODO +else() + set(GGML_STANDALONE OFF) +endif() + +if (EMSCRIPTEN) + set(BUILD_SHARED_LIBS_DEFAULT OFF) + + option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON) +else() + if (MINGW) + set(BUILD_SHARED_LIBS_DEFAULT OFF) + else() + set(BUILD_SHARED_LIBS_DEFAULT ON) + endif() +endif() + +# remove the lib prefix on win32 mingw +if (WIN32) + set(CMAKE_STATIC_LIBRARY_PREFIX "") + set(CMAKE_SHARED_LIBRARY_PREFIX "") + set(CMAKE_SHARED_MODULE_PREFIX "") +endif() + +option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT}) +option(GGML_BACKEND_DL "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF) + +# +# option list +# + +# TODO: mark all options as advanced when not GGML_STANDALONE + +if (APPLE) + set(GGML_METAL_DEFAULT ON) + set(GGML_BLAS_DEFAULT ON) + set(GGML_BLAS_VENDOR_DEFAULT "Apple") +else() + set(GGML_METAL_DEFAULT OFF) + set(GGML_BLAS_DEFAULT OFF) + set(GGML_BLAS_VENDOR_DEFAULT "Generic") +endif() + +if (CMAKE_CROSSCOMPILING) + set(GGML_NATIVE_DEFAULT OFF) +else() + set(GGML_NATIVE_DEFAULT ON) +endif() + +# defaults +if (NOT GGML_LLAMAFILE_DEFAULT) + set(GGML_LLAMAFILE_DEFAULT OFF) +endif() + +if (NOT GGML_CUDA_GRAPHS_DEFAULT) + set(GGML_CUDA_GRAPHS_DEFAULT OFF) +endif() + +# general +option(GGML_STATIC "ggml: static link libraries" OFF) +option(GGML_NATIVE "ggml: optimize the build for the current system" ${GGML_NATIVE_DEFAULT}) +option(GGML_LTO "ggml: enable link time optimization" OFF) +option(GGML_CCACHE "ggml: use ccache if available" ON) + +# debug +option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON) +option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF) +option(GGML_GPROF "ggml: enable gprof" OFF) + +# build +option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF) + +# sanitizers +option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF) +option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF) +option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF) + +# instruction set specific +if (GGML_NATIVE OR NOT GGML_NATIVE_DEFAULT) + set(INS_ENB OFF) +else() + set(INS_ENB ON) +endif() + +option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF) +option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON) +option(GGML_AVX "ggml: enable AVX" ${INS_ENB}) +option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF) +option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB}) +option(GGML_AVX512 "ggml: enable AVX512F" OFF) +option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF) +option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF) +option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF) +if (NOT MSVC) + # in MSVC F16C and FMA is implied with AVX2/AVX512 + option(GGML_FMA "ggml: enable FMA" ${INS_ENB}) + option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) + # MSVC does not seem to support AMX + option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF) + option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF) + option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF) +endif() +option(GGML_LASX "ggml: enable lasx" ON) +option(GGML_LSX "ggml: enable lsx" ON) +option(GGML_RVV "ggml: enable rvv" ON) + +option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF) +set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM") + + +if (WIN32) + set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows version") +endif() + +# ggml core +set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism") +option(GGML_CPU "ggml: enable CPU backend" ON) + +# 3rd party libs / backends +option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON) +option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT}) +set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING + "ggml: BLAS library vendor") +option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT}) + +option(GGML_CUDA "ggml: use CUDA" OFF) +option(GGML_MUSA "ggml: use MUSA" OFF) +option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF) +option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF) +option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF) +set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING + "ggml: max. batch size for using peer access") +option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF) +option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF) +option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF) +option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT}) + +option(GGML_HIP "ggml: use HIP" OFF) +option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF) +option(GGML_VULKAN "ggml: use Vulkan" OFF) +option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF) +option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF) +option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF) +option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF) +option(GGML_VULKAN_PERF "ggml: enable Vulkan perf output" OFF) +option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF) +option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF) +option(GGML_KOMPUTE "ggml: use Kompute" OFF) +option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT}) +option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF) +option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF) +option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF) +option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL}) +set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING + "ggml: metal minimum macOS version") +set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)") +option(GGML_OPENMP "ggml: use OpenMP" ON) +option(GGML_RPC "ggml: use RPC" OFF) +option(GGML_SYCL "ggml: use SYCL" OFF) +option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF) +set (GGML_SYCL_TARGET "INTEL" CACHE STRING + "ggml: sycl target device") +set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING + "ggml: sycl device architecture") + +option(GGML_OPENCL "ggml: use OpenCL" OFF) +option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF) +option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON) +option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON) + +# extra artifacts +option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) +option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) + +# +# dependencies +# + +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED true) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED true) + +set(THREADS_PREFER_PTHREAD_FLAG ON) + +find_package(Threads REQUIRED) + +# +# build the library +# + +add_subdirectory(src) + +# +# tests and examples +# + +if (GGML_BUILD_TESTS) + enable_testing() + add_subdirectory(tests) +endif () + +if (GGML_BUILD_EXAMPLES) + add_subdirectory(examples) +endif () + +# +# install +# + +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +# all public headers +set(GGML_PUBLIC_HEADERS + include/ggml.h + include/ggml-cpu.h + include/ggml-alloc.h + include/ggml-backend.h + include/ggml-blas.h + include/ggml-cann.h + include/ggml-cuda.h + include/ggml-kompute.h + include/ggml-opt.h + include/ggml-metal.h + include/ggml-rpc.h + include/ggml-sycl.h + include/ggml-vulkan.h) + +set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") +#if (GGML_METAL) +# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal") +#endif() +install(TARGETS ggml LIBRARY PUBLIC_HEADER) +install(TARGETS ggml-base LIBRARY) + +if (GGML_STANDALONE) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in + ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc + @ONLY) + + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc + DESTINATION share/pkgconfig) +endif() diff --git a/ml/backend/ggml/ggml/LICENSE b/ml/backend/ggml/ggml/LICENSE new file mode 100644 index 000000000..acb96ce78 --- /dev/null +++ b/ml/backend/ggml/ggml/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023-2024 The ggml authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/llama/ggml-alloc.h b/ml/backend/ggml/ggml/include/ggml-alloc.h similarity index 70% rename from llama/ggml-alloc.h rename to ml/backend/ggml/ggml/include/ggml-alloc.h index 960ebf301..23600eea9 100644 --- a/llama/ggml-alloc.h +++ b/ml/backend/ggml/ggml/include/ggml-alloc.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "ggml.h" diff --git a/llama/ggml-backend.h b/ml/backend/ggml/ggml/include/ggml-backend.h similarity index 94% rename from llama/ggml-backend.h rename to ml/backend/ggml/ggml/include/ggml-backend.h index b67a183f5..7221a0830 100644 --- a/llama/ggml-backend.h +++ b/ml/backend/ggml/ggml/include/ggml-backend.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "ggml.h" diff --git a/ml/backend/ggml/ggml/include/ggml-blas.h b/ml/backend/ggml/ggml/include/ggml-blas.h new file mode 100644 index 000000000..87a81b363 --- /dev/null +++ b/ml/backend/ggml/ggml/include/ggml-blas.h @@ -0,0 +1,25 @@ +#pragma once + +#include "ggml.h" +#include "ggml-backend.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +// backend API +GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void); + +GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend); + +// number of threads used for conversion to float +// for openblas and blis, this will also set the number of threads used for blas operations +GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads); + +GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void); + + +#ifdef __cplusplus +} +#endif diff --git a/ml/backend/ggml/ggml/include/ggml-cann.h b/ml/backend/ggml/ggml/include/ggml-cann.h new file mode 100644 index 000000000..b469e228d --- /dev/null +++ b/ml/backend/ggml/ggml/include/ggml-cann.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2023-2024 The ggml authors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "ggml-backend.h" +#include "ggml.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Maximum number of CANN devices supported. + */ +#define GGML_CANN_MAX_DEVICES 16 + +GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void); + +/** + * @brief Initializes the CANN backend for a specified device. + * + * This function initializes the CANN backend for the given device. + * It verifies the device index, allocates a context, and creates a backend + * instance. + * + * @param device The index of the device to initialize. + * @return A pointer to the initialized backend instance, or nullptr on failure. + */ +GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device); + +/** + * @brief Checks if a given backend is a CANN backend. + * + * This function verifies if the provided backend is a CANN backend by comparing + * its GUID with the CANN backend's GUID. + * + * @param backend The backend instance to check. + * @return True if the backend is a CANN backend, false otherwise. + */ +GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend); + +/** + * @brief Retrieves the CANN buffer type for a specified device. + * + * This function initializes and returns the buffer type interface associated + * with the given device. It ensures thread-safe access using a mutex. + * + * @param device The device index for which to retrieve the buffer type. + * @return A pointer to the buffer type interface for the specified device, or + * nullptr if the device index is out of range. + */ +GGML_BACKEND_API ggml_backend_buffer_type_t +ggml_backend_cann_buffer_type(int32_t device); + +/** + * @brief Retrieves the number of CANN devices available. + * + * This function returns the number of CANN devices available based on + * information obtained from `ggml_cann_info()`. + * + * @return The number of CANN devices available. + */ +GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void); + +/** + * @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU. + * + * @return A pointer to the host buffer type interface. + */ +GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void); + +/** + * @brief Retrieves the description of a specific CANN device. + * + * This function sets the specified device, retrieves the SoC name, + * and writes it into the provided description buffer. + * + * @param device The device index to retrieve the description for. + * @param description Pointer to a buffer where the description will be written. + * @param description_size Size of the description buffer. + */ +GGML_BACKEND_API void ggml_backend_cann_get_device_description( + int32_t device, char* description, size_t description_size); + +/** + * @brief Retrieves the memory information of a specific CANN device. + * + * This function sets the specified device, retrieves the free and total + * memory information of the specified type (ACL_HBM_MEM), and stores them + * in the provided pointers. + * + * @param device The device index to retrieve memory information for. + * @param free Pointer to a variable where the free memory size will be stored. + * @param total Pointer to a variable where the total memory size will be + * stored. + */ +GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device, + size_t* free, + size_t* total); + +#ifdef __cplusplus +} +#endif diff --git a/llama/ggml-cpp.h b/ml/backend/ggml/ggml/include/ggml-cpp.h similarity index 56% rename from llama/ggml-cpp.h rename to ml/backend/ggml/ggml/include/ggml-cpp.h index ceb54875d..219361af4 100644 --- a/llama/ggml-cpp.h +++ b/ml/backend/ggml/ggml/include/ggml-cpp.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #ifndef __cplusplus diff --git a/llama/ggml-cpu.h b/ml/backend/ggml/ggml/include/ggml-cpu.h similarity index 84% rename from llama/ggml-cpu.h rename to ml/backend/ggml/ggml/include/ggml-cpu.h index c2b64e662..3aa71badb 100644 --- a/llama/ggml-cpu.h +++ b/ml/backend/ggml/ggml/include/ggml-cpu.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "ggml.h" diff --git a/llama/ggml-cuda.h b/ml/backend/ggml/ggml/include/ggml-cuda.h similarity index 56% rename from llama/ggml-cuda.h rename to ml/backend/ggml/ggml/include/ggml-cuda.h index c0fb681e9..22ad2c009 100644 --- a/llama/ggml-cuda.h +++ b/ml/backend/ggml/ggml/include/ggml-cuda.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "ggml.h" diff --git a/ml/backend/ggml/ggml/include/ggml-kompute.h b/ml/backend/ggml/ggml/include/ggml-kompute.h new file mode 100644 index 000000000..154aa56a7 --- /dev/null +++ b/ml/backend/ggml/ggml/include/ggml-kompute.h @@ -0,0 +1,50 @@ +#pragma once + +#include "ggml.h" +#include "ggml-backend.h" + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define GGML_KOMPUTE_MAX_DEVICES 16 + +struct ggml_vk_device { + int index; + int type; // same as VkPhysicalDeviceType + size_t heapSize; + const char * name; + const char * vendor; + int subgroupSize; + uint64_t bufferAlignment; + uint64_t maxAlloc; +}; + +struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count); +bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name); +bool ggml_vk_has_vulkan(void); +bool ggml_vk_has_device(void); +struct ggml_vk_device ggml_vk_current_device(void); + +// +// backend API +// + +// forward declaration +typedef struct ggml_backend * ggml_backend_t; + +GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device); + +GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend); + +GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device); + +GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void); + +#ifdef __cplusplus +} +#endif diff --git a/llama/ggml-metal.h b/ml/backend/ggml/ggml/include/ggml-metal.h similarity index 66% rename from llama/ggml-metal.h rename to ml/backend/ggml/ggml/include/ggml-metal.h index c3e7023e4..669c1f84a 100644 --- a/llama/ggml-metal.h +++ b/ml/backend/ggml/ggml/include/ggml-metal.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - // Note: this description is outdated // // An interface allowing to compute ggml_cgraph with Metal diff --git a/ml/backend/ggml/ggml/include/ggml-opencl.h b/ml/backend/ggml/ggml/include/ggml-opencl.h new file mode 100644 index 000000000..6b6177135 --- /dev/null +++ b/ml/backend/ggml/ggml/include/ggml-opencl.h @@ -0,0 +1,26 @@ +#ifndef GGML_OPENCL_H +#define GGML_OPENCL_H + +#include "ggml.h" +#include "ggml-backend.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// +// backend API +// +GGML_BACKEND_API ggml_backend_t ggml_backend_opencl_init(void); +GGML_BACKEND_API bool ggml_backend_is_opencl(ggml_backend_t backend); + +GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void); +GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void); + +GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl_reg(void); + +#ifdef __cplusplus +} +#endif + +#endif // GGML_OPENCL_H diff --git a/ml/backend/ggml/ggml/include/ggml-opt.h b/ml/backend/ggml/ggml/include/ggml-opt.h new file mode 100644 index 000000000..eb5eab9de --- /dev/null +++ b/ml/backend/ggml/ggml/include/ggml-opt.h @@ -0,0 +1,216 @@ +// This file contains functionality for training models using GGML. +// It is not strictly needed vs. just vanilla GGML but it provides a more high-level interface for common needs such as datasets. +// At the bottom of this file especially there are relatively high-level functions that are suitable use or adaptation in user code. +// +// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de) + +#pragma once + +#include "ggml.h" +#include "ggml-backend.h" + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + struct ggml_opt_dataset; + struct ggml_opt_context; + struct ggml_opt_result; + + typedef struct ggml_opt_dataset * ggml_opt_dataset_t; + typedef struct ggml_opt_context * ggml_opt_context_t; + typedef struct ggml_opt_result * ggml_opt_result_t; + + // ====== Loss ====== + + // built-in loss types, i.e. the built-in quantities minimized by the optimizer + // custom loss types can be defined via mean or sum which simply reduce the outputs for all datapoints to a single value + enum ggml_opt_loss_type { + GGML_OPT_LOSS_TYPE_MEAN, + GGML_OPT_LOSS_TYPE_SUM, + GGML_OPT_LOSS_TYPE_CROSS_ENTROPY, + GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR, + }; + + // ====== Dataset ====== + + GGML_API ggml_opt_dataset_t ggml_opt_dataset_init( + int64_t ne_datapoint, // number of elements per datapoint + int64_t ne_label, // number of elements per label + int64_t ndata, // total number of datapoints/labels + int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied) + GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset); + + // get underlying tensors that store the data + GGML_API struct ggml_tensor * ggml_opt_dataset_data (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata] + GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label, ndata] + + // shuffle idata first datapoints from dataset with RNG from opt_ctx, shuffle all datapoints if idata is negative + GGML_API void ggml_opt_dataset_shuffle(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, int64_t idata); + + // get batch at position ibatch from dataset and copy the data to data_batch and labels_batch + GGML_API void ggml_opt_dataset_get_batch( + ggml_opt_dataset_t dataset, + struct ggml_tensor * data_batch, // shape = [ne_datapoint, ndata_batch] + struct ggml_tensor * labels_batch, // shape = [ne_label, ndata_batch] + int64_t ibatch); + + // ====== Model / Context ====== + + enum ggml_opt_build_type { + GGML_OPT_BUILD_TYPE_FORWARD, + GGML_OPT_BUILD_TYPE_GRAD, + GGML_OPT_BUILD_TYPE_OPT, + }; + + // parameters that control which optimizer is used and how said optimizer tries to find the minimal loss + struct ggml_opt_optimizer_params { + // AdamW optimizer parameters + struct { + float alpha; // learning rate + float beta1; + float beta2; + float eps; // epsilon for numerical stability + float wd; // weight decay for AdamW, use 0.0f to disable + } adamw; + }; + + // callback to calculate optimizer parameters prior to a backward pass + // userdata can be used to pass arbitrary data + typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata); + + // returns the default optimizer params (constant) + // userdata is not used + GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata); + + // parameters for initializing a new optimization context + struct ggml_opt_params { + ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs + + struct ggml_context * ctx_compute; // created in user code, holds non-static tensors + + // the forward graph is defined by inputs and outputs + // those tensors and all tensors inbetween are not intended to be reusable between multiple optimization contexts + struct ggml_tensor * inputs; + struct ggml_tensor * outputs; + + enum ggml_opt_loss_type loss_type; + enum ggml_opt_build_type build_type; + + int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done + + ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters + void * get_opt_pars_ud; // userdata for calculating optimizer parameters + }; + + // get parameters for an optimization context with defaults set where possible + // parameters for which no sensible defaults exist are supplied as arguments to this function + GGML_API ggml_opt_params ggml_opt_default_params( + ggml_backend_sched_t backend_sched, + struct ggml_context * ctx_compute, + struct ggml_tensor * inputs, + struct ggml_tensor * outputs, + enum ggml_opt_loss_type loss_type); + + GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params); + GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx); + + // set gradients to zero, initilize loss, and optionally reset the optimizer + GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer); + + // get underlying tensors that store data + GGML_API struct ggml_tensor * ggml_opt_inputs( ggml_opt_context_t opt_ctx); // forward graph input tensor + GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor + GGML_API struct ggml_tensor * ggml_opt_labels( ggml_opt_context_t opt_ctx); // labels to compare outputs against + GGML_API struct ggml_tensor * ggml_opt_loss( ggml_opt_context_t opt_ctx); // scalar tensor that contains the loss + GGML_API struct ggml_tensor * ggml_opt_pred( ggml_opt_context_t opt_ctx); // predictions made by outputs + GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels + + GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node); + + // ====== Optimization Result ====== + + GGML_API ggml_opt_result_t ggml_opt_result_init(); + GGML_API void ggml_opt_result_free(ggml_opt_result_t result); + GGML_API void ggml_opt_result_reset(ggml_opt_result_t result); + + // get data from result, uncertainties are optional and can be ignored by passing NULL + GGML_API void ggml_opt_result_ndata( ggml_opt_result_t result, int64_t * ndata); // writes 1 value, number of datapoints + GGML_API void ggml_opt_result_loss( ggml_opt_result_t result, double * loss, double * unc); // writes 1 value + GGML_API void ggml_opt_result_pred( ggml_opt_result_t result, int32_t * pred); // writes ndata values + GGML_API void ggml_opt_result_accuracy(ggml_opt_result_t result, double * accuracy, double * unc); // writes 1 value + + // ====== Computation ====== + + // do forward pass, increment result if not NULL + GGML_API void ggml_opt_forward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result); + + // do forward pass, increment result if not NULL, do backward pass + GGML_API void ggml_opt_forward_backward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result); + + // ############################################################################ + // ## The high-level functions start here. They do not depend on any private ## + // ## functions or structs and can be copied to and adapted for user code. ## + // ############################################################################ + + // ====== Intended Usage ====== + // + // 1. Select the appropriate loss for your problem. + // 2. Create a dataset and set the data for the "data" tensor. Also set the "labels" tensor if your loss needs them. + // Setting the shard size to 1 will be fine, it's the granularity with which data is shuffled/loaded (bigger values are faster). + // 3. Create a GGML graph for your model with no_alloc == true. Use two separate contexts for the tensors. + // The first context should contain the model parameters and inputs and be allocated statically in user code. + // The second context should contain all other tensors and will be (re)allocated automatically. + // Due to this automated allocation the data of the second context is not defined when accessed in user code. + // Note that the second dimension of the inputs/outputs are interpreted as the number of datapoints in those tensors. + // 4. Call ggml_opt_fit. If you need more control you can use ggml_opt_epoch instead. + + // signature for a callback while evaluating opt_ctx on dataset, called after an evaluation + typedef void (*ggml_opt_epoch_callback)( + bool train, // true after training evaluation, false after validation evaluation + ggml_opt_context_t opt_ctx, + ggml_opt_dataset_t dataset, + ggml_opt_result_t result, // result associated with the dataset subsection + int64_t ibatch, // number of batches that have been evaluated so far + int64_t ibatch_max, // total number of batches in this dataset subsection + int64_t t_start_us); // time at which the evaluation on the dataset subsection was started + + // do training on front of dataset, do evaluation only on back of dataset + GGML_API void ggml_opt_epoch( + ggml_opt_context_t opt_ctx, + ggml_opt_dataset_t dataset, + ggml_opt_result_t result_train, // result to increment during training, ignored if NULL + ggml_opt_result_t result_eval, // result to increment during evaluation, ignored if NULL + int64_t idata_split, // data index at which to split training and evaluation + ggml_opt_epoch_callback callback_train, + ggml_opt_epoch_callback callback_eval); + + // callback that prints a progress bar on stderr + GGML_API void ggml_opt_epoch_callback_progress_bar( + bool train, + ggml_opt_context_t opt_ctx, + ggml_opt_dataset_t dataset, + ggml_opt_result_t result, + int64_t ibatch, + int64_t ibatch_max, + int64_t t_start_us); + + // fit model defined by inputs and outputs to dataset + GGML_API void ggml_opt_fit( + ggml_backend_sched_t backend_sched, // backend scheduler for constructing the compute graphs + ggml_context * ctx_compute, // context with temporarily allocated tensors to calculate the outputs + ggml_tensor * inputs, // input tensor with shape [ne_datapoint, ndata_batch] + ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used + ggml_opt_dataset_t dataset, // dataset with data and optionally also labels + enum ggml_opt_loss_type loss_type, // loss to minimize + ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t) + int64_t nepoch, // how many times the dataset should be iterated over + int64_t nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs + float val_split, // fraction of the dataset to use for validation, must be in [0.0f, 1.0f) + bool silent); // whether or not info prints to stderr should be suppressed + +#ifdef __cplusplus +} +#endif diff --git a/ml/backend/ggml/ggml/include/ggml-rpc.h b/ml/backend/ggml/ggml/include/ggml-rpc.h new file mode 100644 index 000000000..ade6c3b0e --- /dev/null +++ b/ml/backend/ggml/ggml/include/ggml-rpc.h @@ -0,0 +1,28 @@ +#pragma once + +#include "ggml.h" +#include "ggml-backend.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define GGML_RPC_MAX_SERVERS 16 + +// backend API +GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint); +GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend); + +GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint); + +GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total); + +GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem); + +GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void); + +GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint); + +#ifdef __cplusplus +} +#endif diff --git a/ml/backend/ggml/ggml/include/ggml-sycl.h b/ml/backend/ggml/ggml/include/ggml-sycl.h new file mode 100644 index 000000000..5ce349a88 --- /dev/null +++ b/ml/backend/ggml/ggml/include/ggml-sycl.h @@ -0,0 +1,49 @@ +// +// MIT license +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ggml.h" +#include "ggml-backend.h" + +#define GGML_SYCL_NAME "SYCL" +#define GGML_SYCL_MAX_DEVICES 48 + +#ifdef __cplusplus +extern "C" { +#endif + +// backend API +GGML_BACKEND_API ggml_backend_t ggml_backend_sycl_init(int device); + +GGML_BACKEND_API bool ggml_backend_is_sycl(ggml_backend_t backend); + +// devide buffer +GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device); + +// split tensor buffer that splits matrices by rows across multiple devices +GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split); + +// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU +GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void); + +GGML_BACKEND_API void ggml_backend_sycl_print_sycl_devices(void); +GGML_BACKEND_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len); +GGML_BACKEND_API void ggml_backend_sycl_get_device_description(int device, + char *description, + size_t description_size); +GGML_BACKEND_API int ggml_backend_sycl_get_device_count(); +GGML_BACKEND_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total); + +// SYCL doesn't support registering host memory, keep here for reference +// GGML_BACKEND_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size); +// GGML_BACKEND_API void ggml_backend_sycl_unregister_host_buffer(void * buffer); + +GGML_BACKEND_API ggml_backend_reg_t ggml_backend_sycl_reg(void); + +#ifdef __cplusplus +} +#endif diff --git a/ml/backend/ggml/ggml/include/ggml-vulkan.h b/ml/backend/ggml/ggml/include/ggml-vulkan.h new file mode 100644 index 000000000..53cdba072 --- /dev/null +++ b/ml/backend/ggml/ggml/include/ggml-vulkan.h @@ -0,0 +1,31 @@ +#pragma once + +#include "ggml.h" +#include "ggml-backend.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define GGML_VK_NAME "Vulkan" +#define GGML_VK_MAX_DEVICES 16 + +GGML_BACKEND_API void ggml_vk_instance_init(void); + +// backend API +GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num); + +GGML_BACKEND_API bool ggml_backend_is_vk(ggml_backend_t backend); +GGML_BACKEND_API int ggml_backend_vk_get_device_count(void); +GGML_BACKEND_API void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size); +GGML_BACKEND_API void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total); + +GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num); +// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU +GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void); + +GGML_BACKEND_API ggml_backend_reg_t ggml_backend_vk_reg(void); + +#ifdef __cplusplus +} +#endif diff --git a/llama/ggml.h b/ml/backend/ggml/ggml/include/ggml.h similarity index 98% rename from llama/ggml.h rename to ml/backend/ggml/ggml/include/ggml.h index 621362c83..1bc50fcae 100644 --- a/llama/ggml.h +++ b/ml/backend/ggml/ggml/include/ggml.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once // diff --git a/ml/backend/ggml/ggml/src/CMakeLists.txt b/ml/backend/ggml/ggml/src/CMakeLists.txt new file mode 100644 index 000000000..72b488dd8 --- /dev/null +++ b/ml/backend/ggml/ggml/src/CMakeLists.txt @@ -0,0 +1,340 @@ +include(CheckCXXCompilerFlag) + +add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES}) + +# enable libstdc++ assertions for debug builds +if (CMAKE_SYSTEM_NAME MATCHES "Linux") + add_compile_definitions($<$:_GLIBCXX_ASSERTIONS>) +endif() + +if (NOT MSVC) + if (GGML_SANITIZE_THREAD) + add_compile_options(-fsanitize=thread) + link_libraries (-fsanitize=thread) + endif() + + if (GGML_SANITIZE_ADDRESS) + add_compile_options(-fsanitize=address -fno-omit-frame-pointer) + link_libraries (-fsanitize=address) + endif() + + if (GGML_SANITIZE_UNDEFINED) + add_compile_options(-fsanitize=undefined) + link_libraries (-fsanitize=undefined) + endif() +endif() + +function(ggml_get_flags CCID CCVER) + set(C_FLAGS "") + set(CXX_FLAGS "") + + if (CCID MATCHES "Clang") + set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return) + set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi) + + if ( + (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR + (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0) + ) + list(APPEND C_FLAGS -Wdouble-promotion) + endif() + elseif (CCID STREQUAL "GNU") + set(C_FLAGS -Wdouble-promotion) + set(CXX_FLAGS -Wno-array-bounds) + + if (CCVER VERSION_GREATER_EQUAL 8.1.0) + list(APPEND CXX_FLAGS -Wextra-semi) + endif() + endif() + + set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE) + set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE) +endfunction() + +if (GGML_FATAL_WARNINGS) + if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + list(APPEND C_FLAGS -Werror) + list(APPEND CXX_FLAGS -Werror) + elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_options(/WX) + endif() +endif() + +if (GGML_ALL_WARNINGS) + if (NOT MSVC) + list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) + list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes + -Werror=implicit-int -Werror=implicit-function-declaration) + list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn) + + list(APPEND C_FLAGS ${WARNING_FLAGS}) + list(APPEND CXX_FLAGS ${WARNING_FLAGS}) + + ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}) + + add_compile_options("$<$:${C_FLAGS};${GF_C_FLAGS}>" + "$<$:${CXX_FLAGS};${GF_CXX_FLAGS}>") + else() + # todo : msvc + set(C_FLAGS "") + set(CXX_FLAGS "") + endif() +endif() + +if (GGML_LTO) + include(CheckIPOSupported) + check_ipo_supported(RESULT result OUTPUT output) + if (result) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + else() + message(WARNING "IPO is not supported: ${output}") + endif() +endif() + +if (GGML_CCACHE) + find_program(GGML_CCACHE_FOUND ccache) + + if (GGML_CCACHE_FOUND) + # TODO: should not be set globally + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + set(ENV{CCACHE_SLOPPINESS} time_macros) + message(STATUS "ccache found, compilation results will be cached. Disable with GGML_CCACHE=OFF.") + else() + message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with GGML_CCACHE=OFF") + endif () +endif() + +# this version of Apple ld64 is buggy +execute_process( + COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v + ERROR_VARIABLE output + OUTPUT_QUIET +) + +if (output MATCHES "dyld-1015\.7") + add_compile_definitions(HAVE_BUGGY_APPLE_LINKER) +endif() + +# architecture specific +# TODO: probably these flags need to be tweaked on some architectures +# feel free to update the Makefile for your architecture and send a pull request or issue +message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") +if (MSVC) + string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR) + message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}") +else () + set(CMAKE_GENERATOR_PLATFORM_LWR "") +endif () + +if (NOT MSVC) + if (GGML_STATIC) + add_link_options(-static) + if (MINGW) + add_link_options(-static-libgcc -static-libstdc++) + endif() + endif() + if (GGML_GPROF) + add_compile_options(-pg) + endif() +endif() + +if (MINGW) + # Target Windows 8 for PrefetchVirtualMemory + add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER}) +endif() + +# +# POSIX conformance +# + +# clock_gettime came in POSIX.1b (1993) +# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional +# posix_memalign came in POSIX.1-2001 / SUSv3 +# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985) + +# Somehow in OpenBSD whenever POSIX conformance is specified +# some string functions rely on locale_t availability, +# which was introduced in POSIX.1-2008, forcing us to go higher +if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD") + add_compile_definitions(_XOPEN_SOURCE=700) +else() + add_compile_definitions(_XOPEN_SOURCE=600) +endif() + +# Data types, macros and functions related to controlling CPU affinity and +# some memory allocation are available on Linux through GNU extensions in libc +if (CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Android") + add_compile_definitions(_GNU_SOURCE) +endif() + +# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1, +# and on macOS its availability depends on enabling Darwin extensions +# similarly on DragonFly, enabling BSD extensions is necessary +if ( + CMAKE_SYSTEM_NAME MATCHES "Darwin" OR + CMAKE_SYSTEM_NAME MATCHES "iOS" OR + CMAKE_SYSTEM_NAME MATCHES "tvOS" OR + CMAKE_SYSTEM_NAME MATCHES "DragonFly" +) + add_compile_definitions(_DARWIN_C_SOURCE) +endif() + +# alloca is a non-standard interface that is not visible on BSDs when +# POSIX conformance is specified, but not all of them provide a clean way +# to enable it in such cases +if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + add_compile_definitions(__BSD_VISIBLE) +endif() +if (CMAKE_SYSTEM_NAME MATCHES "NetBSD") + add_compile_definitions(_NETBSD_SOURCE) +endif() +if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD") + add_compile_definitions(_BSD_SOURCE) +endif() + +if (WIN32) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) +endif() + +# ggml + +if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS) + message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS") +endif() + +add_library(ggml-base + ../include/ggml.h + ../include/ggml-alloc.h + ../include/ggml-backend.h + ../include/ggml-cpp.h + ../include/ggml-opt.h + ggml.c + ggml-alloc.c + ggml-backend.cpp + ggml-opt.cpp + ggml-threading.cpp + ggml-threading.h + ggml-quants.c + ggml-quants.h) + +target_include_directories(ggml-base PRIVATE .) + +add_library(ggml + ggml-backend-reg.cpp) + +target_link_libraries(ggml PUBLIC ggml-base) + +if (CMAKE_SYSTEM_NAME MATCHES "Linux") + target_link_libraries(ggml PRIVATE dl) +endif() + +function(ggml_add_backend_library backend) + if (GGML_BACKEND_DL) + add_library(${backend} MODULE ${ARGN}) + # write the shared library to the output directory + set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) + target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL) + add_dependencies(ggml ${backend}) + else() + add_library(${backend} ${ARGN}) + target_link_libraries(ggml PUBLIC ${backend}) + install(TARGETS ${backend} LIBRARY) + endif() + + target_link_libraries(${backend} PRIVATE ggml-base) + target_include_directories(${backend} PRIVATE ..) + + if (${BUILD_SHARED_LIBS}) + target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD) + target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED) + endif() +endfunction() + +function(ggml_add_backend backend) + string(TOUPPER "GGML_${backend}" backend_id) + if (${backend_id}) + string(TOLOWER "ggml-${backend}" backend_target) + add_subdirectory(${backend_target}) + message(STATUS "Including ${backend} backend") + if (NOT GGML_BACKEND_DL) + string(TOUPPER "GGML_USE_${backend}" backend_use) + target_compile_definitions(ggml PUBLIC ${backend_use}) + endif() + endif() +endfunction() + +function(ggml_add_cpu_backend_variant tag_name) + set(GGML_CPU_TAG_NAME ${tag_name}) + # other: OPENMP LLAMAFILE CPU_HBM + foreach (feat NATIVE + AVX AVX2 AVX_VNNI FMA F16C + AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 + AMX_TILE AMX_INT8 AMX_BF16) + set(GGML_${feat} OFF) + endforeach() + + foreach (feat ${ARGN}) + set(GGML_${feat} ON) + endforeach() + + ggml_add_cpu_backend_variant_impl(${tag_name}) + add_dependencies(ggml-cpu ggml-cpu-${tag_name}) +endfunction() + +ggml_add_backend(CPU) + +if (GGML_CPU_ALL_VARIANTS) + if (NOT GGML_BACKEND_DL) + message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL") + endif() + add_custom_target(ggml-cpu) + ggml_add_cpu_backend_variant(sandybridge AVX) + ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA) + ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512) + ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI) + ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI) + if (NOT MSVC) + # MSVC doesn't support AMX + ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) + endif() +else () + ggml_add_cpu_backend_variant_impl("") +endif() + +ggml_add_backend(BLAS) +ggml_add_backend(CANN) +ggml_add_backend(CUDA) +ggml_add_backend(HIP) +ggml_add_backend(Kompute) +ggml_add_backend(METAL) +ggml_add_backend(MUSA) +ggml_add_backend(RPC) +ggml_add_backend(SYCL) +ggml_add_backend(Vulkan) +ggml_add_backend(OpenCL) + +foreach (target ggml-base ggml) + target_include_directories(${target} PUBLIC $ $) + target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump +endforeach() + +target_link_libraries(ggml-base PRIVATE Threads::Threads) + +find_library(MATH_LIBRARY m) +if (MATH_LIBRARY) + if (NOT WIN32 OR NOT DEFINED ENV{ONEAPI_ROOT}) + target_link_libraries(ggml-base PRIVATE m) + endif() +endif() + +if (CMAKE_SYSTEM_NAME MATCHES "Android") + target_link_libraries(ggml-base PRIVATE dl) +endif() + +if (BUILD_SHARED_LIBS) + foreach (target ggml-base ggml) + set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(${target} PRIVATE GGML_BUILD) + target_compile_definitions(${target} PUBLIC GGML_SHARED) + endforeach() +endif() diff --git a/llama/ggml-alloc.c b/ml/backend/ggml/ggml/src/ggml-alloc.c similarity index 96% rename from llama/ggml-alloc.c rename to ml/backend/ggml/ggml/src/ggml-alloc.c index 6ea83a901..8dc8226ac 100644 --- a/llama/ggml-alloc.c +++ b/ml/backend/ggml/ggml/src/ggml-alloc.c @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "ggml-alloc.h" #include "ggml-backend-impl.h" #include "ggml.h" diff --git a/llama/ggml-backend-impl.h b/ml/backend/ggml/ggml/src/ggml-backend-impl.h similarity index 90% rename from llama/ggml-backend-impl.h rename to ml/backend/ggml/ggml/src/ggml-backend-impl.h index 37b592077..36d72e95f 100644 --- a/llama/ggml-backend-impl.h +++ b/ml/backend/ggml/ggml/src/ggml-backend-impl.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once // ggml-backend internal header diff --git a/llama/ggml-backend-reg.cpp b/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp similarity index 90% rename from llama/ggml-backend-reg.cpp rename to ml/backend/ggml/ggml/src/ggml-backend-reg.cpp index 2ebc34398..ac5cda072 100644 --- a/llama/ggml-backend-reg.cpp +++ b/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "ggml-backend-impl.h" #include "ggml-backend.h" #include "ggml-impl.h" @@ -176,7 +150,7 @@ struct ggml_backend_reg_entry { struct ggml_backend_registry { std::vector backends; - std::vector devices; + std::vector> devices; ggml_backend_registry() { #ifdef GGML_USE_CUDA @@ -221,7 +195,7 @@ struct ggml_backend_registry { } } - void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) { + void register_backend(ggml_backend_reg_t reg, int score = -1, dl_handle_ptr handle = nullptr) { if (!reg) { return; } @@ -232,15 +206,15 @@ struct ggml_backend_registry { #endif backends.push_back({ reg, std::move(handle) }); for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) { - register_device(ggml_backend_reg_dev_get(reg, i)); + register_device(ggml_backend_reg_dev_get(reg, i), score); } } - void register_device(ggml_backend_dev_t device) { + void register_device(ggml_backend_dev_t device, int score = -1) { #ifndef NDEBUG GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device)); #endif - devices.push_back(device); + devices.push_back({device, score}); } ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) { @@ -283,7 +257,7 @@ struct ggml_backend_registry { GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str()); - register_backend(reg, std::move(handle)); + register_backend(reg, score_fn ? score_fn() : -1, std::move(handle)); return reg; } @@ -306,7 +280,7 @@ struct ggml_backend_registry { // remove devices devices.erase( std::remove_if(devices.begin(), devices.end(), - [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }), + [reg](std::pair dev) { return ggml_backend_dev_backend_reg(dev.first) == reg; }), devices.end()); // remove backend @@ -364,7 +338,12 @@ size_t ggml_backend_dev_count() { ggml_backend_dev_t ggml_backend_dev_get(size_t index) { GGML_ASSERT(index < ggml_backend_dev_count()); - return get_reg().devices[index]; + auto devices = get_reg().devices; + if (!std::is_heap(devices.begin(), devices.end())) { + std::make_heap(devices.begin(), devices.end(), [](const auto & a, const auto & b) { return a.second < b.second; }); + } + + return devices[index].first; } ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) { diff --git a/llama/ggml-backend.cpp b/ml/backend/ggml/ggml/src/ggml-backend.cpp similarity index 98% rename from llama/ggml-backend.cpp rename to ml/backend/ggml/ggml/src/ggml-backend.cpp index 3e11d73fc..a12172dcd 100644 --- a/llama/ggml-backend.cpp +++ b/ml/backend/ggml/ggml/src/ggml-backend.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - // Note: porting this file to C++ is a work in progress #ifdef _WIN32 @@ -132,12 +106,6 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) { if (buffer->iface.free_buffer != NULL) { buffer->iface.free_buffer(buffer); } - -// TODO: this needs to be freed in cuda and hip backends because -// the cuda backend implementation compiled with msvc -#if !defined(GGML_USE_CUDA) && !defined(GGML_USE_HIP) - delete buffer; -#endif } size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) { diff --git a/ml/backend/ggml/ggml/src/ggml-blas/CMakeLists.txt b/ml/backend/ggml/ggml/src/ggml-blas/CMakeLists.txt new file mode 100644 index 000000000..0bf3c05d9 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-blas/CMakeLists.txt @@ -0,0 +1,87 @@ +if (GGML_STATIC) + set(BLA_STATIC ON) +endif() +#if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22) +# set(BLA_SIZEOF_INTEGER 8) +#endif() + +set(BLA_VENDOR ${GGML_BLAS_VENDOR}) +find_package(BLAS) + +if (BLAS_FOUND) + message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") + + ggml_add_backend_library(ggml-blas + ggml-blas.cpp + ) + + if (${GGML_BLAS_VENDOR} MATCHES "Apple") + add_compile_definitions(ACCELERATE_NEW_LAPACK) + add_compile_definitions(ACCELERATE_LAPACK_ILP64) + add_compile_definitions(GGML_BLAS_USE_ACCELERATE) + elseif ("${BLAS_INCLUDE_DIRS}" STREQUAL "") + # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake. + # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268 + find_package(PkgConfig REQUIRED) + if (${GGML_BLAS_VENDOR} MATCHES "Generic") + pkg_check_modules(DepBLAS blas) + elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS") + # As of openblas v0.3.22, the 64-bit is named openblas64.pc + pkg_check_modules(DepBLAS openblas64) + if (NOT DepBLAS_FOUND) + pkg_check_modules(DepBLAS openblas) + endif() + elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME") + add_compile_definitions(GGML_BLAS_USE_BLIS) + pkg_check_modules(DepBLAS blis) + elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS") + pkg_check_modules(DepBLAS blas-atlas) + elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS") + pkg_check_modules(DepBLAS flexiblas_api) + elseif (${GGML_BLAS_VENDOR} MATCHES "Intel") + add_compile_definitions(GGML_BLAS_USE_MKL) + # all Intel* libraries share the same include path + pkg_check_modules(DepBLAS mkl-sdl) + elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC") + # this doesn't provide pkg-config + # suggest to assign BLAS_INCLUDE_DIRS on your own + if ("${NVHPC_VERSION}" STREQUAL "") + message(WARNING "Better to set NVHPC_VERSION") + else() + set(DepBLAS_FOUND ON) + set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include") + endif() + endif() + if (DepBLAS_FOUND) + set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS}) + else() + message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically" + " detected by pkgconfig, trying to find cblas.h from possible paths...") + find_path(BLAS_INCLUDE_DIRS + NAMES cblas.h + HINTS + /usr/include + /usr/local/include + /usr/include/openblas + /opt/homebrew/opt/openblas/include + /usr/local/opt/openblas/include + /usr/include/x86_64-linux-gnu/openblas/include + ) + endif() + endif() + + message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}") + + target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS}) + + if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel")) + add_compile_definitions(GGML_BLAS_USE_MKL) + endif() + + target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES}) + target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS}) +else() + message(ERROR "BLAS not found, please refer to " + "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" + " to set correct GGML_BLAS_VENDOR") +endif() diff --git a/ml/backend/ggml/ggml/src/ggml-blas/blas.go b/ml/backend/ggml/ggml/src/ggml-blas/blas.go new file mode 100644 index 000000000..b29c9f140 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-blas/blas.go @@ -0,0 +1,10 @@ +//go:build darwin && arm64 + +package blas + +// #cgo CXXFLAGS: -std=c++11 +// #cgo CPPFLAGS: -DGGML_USE_BLAS +// #cgo CPPFLAGS: -I${SRCDIR}/.. -I${SRCDIR}/../../include +// #cgo darwin,arm64 CPPFLAGS: -DGGML_BLAS_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 +// #cgo darwin,arm64 LDFLAGS: -framework Accelerate +import "C" diff --git a/llama/ggml-blas.cpp b/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp similarity index 92% rename from llama/ggml-blas.cpp rename to ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp index 44acf0bd4..ec158dfac 100644 --- a/llama/ggml-blas.cpp +++ b/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp @@ -1,31 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifdef GGML_USE_BLAS - #include "ggml-impl.h" #include "ggml-blas.h" #include "ggml-backend-impl.h" @@ -543,5 +515,3 @@ ggml_backend_reg_t ggml_backend_blas_reg(void) { } GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg) - -#endif // GGML_USE_BLAS \ No newline at end of file diff --git a/llama/ggml-common.h b/ml/backend/ggml/ggml/src/ggml-common.h similarity index 99% rename from llama/ggml-common.h rename to ml/backend/ggml/ggml/src/ggml-common.h index e227c13fb..f13fd4dea 100644 --- a/llama/ggml-common.h +++ b/ml/backend/ggml/ggml/src/ggml-common.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #ifndef GGML_COMMON_DECL #if defined(GGML_COMMON_DECL_C) diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/CMakeLists.txt b/ml/backend/ggml/ggml/src/ggml-cpu/CMakeLists.txt new file mode 100644 index 000000000..6b3641c42 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/CMakeLists.txt @@ -0,0 +1,346 @@ +function(ggml_add_cpu_backend_variant_impl tag_name) + if (tag_name) + set(GGML_CPU_NAME ggml-cpu-${tag_name}) + else() + set(GGML_CPU_NAME ggml-cpu) + endif() + + ggml_add_backend_library(${GGML_CPU_NAME}) + + list (APPEND GGML_CPU_SOURCES + ggml-cpu/ggml-cpu.c + ggml-cpu/ggml-cpu.cpp + ggml-cpu/ggml-cpu-aarch64.cpp + ggml-cpu/ggml-cpu-aarch64.h + ggml-cpu/ggml-cpu-hbm.cpp + ggml-cpu/ggml-cpu-hbm.h + ggml-cpu/ggml-cpu-quants.c + ggml-cpu/ggml-cpu-quants.h + ggml-cpu/ggml-cpu-traits.cpp + ggml-cpu/ggml-cpu-traits.h + ggml-cpu/amx/amx.cpp + ggml-cpu/amx/amx.h + ggml-cpu/amx/mmq.cpp + ggml-cpu/amx/mmq.h + ggml-cpu/ggml-cpu-impl.h + ) + + target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17) + target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu) + + if (APPLE AND GGML_ACCELERATE) + find_library(ACCELERATE_FRAMEWORK Accelerate) + if (ACCELERATE_FRAMEWORK) + message(STATUS "Accelerate framework found") + + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_ACCELERATE) + target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_NEW_LAPACK) + target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64) + + target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK}) + else() + message(WARNING "Accelerate framework not found") + endif() + endif() + + if (GGML_OPENMP) + find_package(OpenMP) + if (OpenMP_FOUND) + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP) + + target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX) + else() + message(WARNING "OpenMP not found") + endif() + endif() + + if (GGML_LLAMAFILE) + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_LLAMAFILE) + + list(APPEND GGML_CPU_SOURCES + ggml-cpu/llamafile/sgemm.cpp + ggml-cpu/llamafile/sgemm.h) + endif() + + if (GGML_CPU_HBM) + find_library(memkind memkind REQUIRED) + + message(STATUS "Using memkind for CPU HBM") + + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM) + + target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind) + endif() + + if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR + CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR + (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND + CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$")) + + message(STATUS "ARM detected") + + if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang") + message(FATAL_ERROR "MSVC is not supported for ARM, use clang") + else() + check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E) + if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "") + list(APPEND ARCH_FLAGS -mfp16-format=ieee) + endif() + + if (GGML_NATIVE) + # -mcpu=native does not always enable all the features in some compilers, + # so we check for them manually and enable them if available + + execute_process( + COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v - + INPUT_FILE "/dev/null" + OUTPUT_QUIET + ERROR_VARIABLE ARM_MCPU + RESULT_VARIABLE ARM_MCPU_RESULT + ) + if (NOT ARM_MCPU_RESULT) + string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}") + endif() + if ("${ARM_MCPU_FLAG}" STREQUAL "") + set(ARM_MCPU_FLAG -mcpu=native) + message(STATUS "ARM -mcpu not found, -mcpu=native will be used") + endif() + + include(CheckCXXSourceRuns) + + function(check_arm_feature tag code) + set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) + set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+${tag}") + check_cxx_source_runs( + "${code}" + GGML_MACHINE_SUPPORTS_${tag} + ) + if (GGML_MACHINE_SUPPORTS_${tag}) + set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+${tag}" PARENT_SCOPE) + else() + set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+no${tag}" PARENT_SCOPE) + endif() + set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) + endfunction() + + check_arm_feature(dotprod "#include \nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }") + check_arm_feature(i8mm "#include \nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }") + check_arm_feature(sve "#include \nint main() { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }") + + list(APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX}") + else() + if (GGML_CPU_ARM_ARCH) + list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH}) + endif() + endif() + + # show enabled features + if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + set(FEAT_INPUT_FILE "NUL") + else() + set(FEAT_INPUT_FILE "/dev/null") + endif() + + execute_process( + COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E - + INPUT_FILE ${FEAT_INPUT_FILE} + OUTPUT_VARIABLE ARM_FEATURE + RESULT_VARIABLE ARM_FEATURE_RESULT + ) + if (ARM_FEATURE_RESULT) + message(WARNING "Failed to get ARM features") + else() + foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC) + string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos) + if (NOT ${feature_pos} EQUAL -1) + message(STATUS "ARM feature ${feature} enabled") + endif() + endforeach() + endif() + endif() + elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR + (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND + CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$")) + + message(STATUS "x86 detected") + + if (MSVC) + # instruction set detection for MSVC only + if (GGML_NATIVE) + include(ggml-cpu/cmake/FindSIMD.cmake) + endif () + if (GGML_AVX512) + list(APPEND ARCH_FLAGS /arch:AVX512) + # /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__ + # MSVC has no compile-time flags enabling specific + # AVX512 extensions, neither it defines the + # macros corresponding to the extensions. + # Do it manually. + list(APPEND ARCH_DEFINITIONS GGML_AVX512) + if (GGML_AVX512_VBMI) + list(APPEND ARCH_DEFINITIONS __AVX512VBMI__) + if (CMAKE_C_COMPILER_ID STREQUAL "Clang") + list(APPEND ARCH_FLAGS -mavx512vbmi) + endif() + endif() + if (GGML_AVX512_VNNI) + list(APPEND ARCH_DEFINITIONS __AVX512VNNI__ GGML_AVX512_VNNI) + if (CMAKE_C_COMPILER_ID STREQUAL "Clang") + list(APPEND ARCH_FLAGS -mavx512vnni) + endif() + endif() + if (GGML_AVX512_BF16) + list(APPEND ARCH_DEFINITIONS __AVX512BF16__ GGML_AVX512_BF16) + if (CMAKE_C_COMPILER_ID STREQUAL "Clang") + list(APPEND ARCH_FLAGS -mavx512bf16) + endif() + endif() + if (GGML_AMX_TILE) + list(APPEND ARCH_DEFINITIONS __AMX_TILE__ GGML_AMX_TILE) + endif() + if (GGML_AMX_INT8) + list(APPEND ARCH_DEFINITIONS __AMX_INT8__ GGML_AMX_INT8) + endif() + if (GGML_AMX_BF16) + list(APPEND ARCH_DEFINITIONS __AMX_BF16__ GGML_AMX_BF16) + endif() + elseif (GGML_AVX2) + list(APPEND ARCH_FLAGS /arch:AVX2) + list(APPEND ARCH_DEFINITIONS GGML_AVX2 GGML_FMA GGML_F16C) + elseif (GGML_AVX) + list(APPEND ARCH_FLAGS /arch:AVX) + list(APPEND ARCH_DEFINITIONS GGML_AVX) + else () + list(APPEND ARCH_FLAGS /arch:SSE4.2) + list(APPEND ARCH_DEFINITIONS GGML_SSE42) + endif() + if (GGML_AVX_VNNI) + list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI) + endif() + else () + if (GGML_NATIVE) + list(APPEND ARCH_FLAGS -march=native) + else () + list(APPEND ARCH_FLAGS -msse4.2) + list(APPEND ARCH_DEFINITIONS GGML_SSE42) + if (GGML_F16C) + list(APPEND ARCH_FLAGS -mf16c) + list(APPEND ARCH_DEFINITIONS GGML_F16C) + endif() + if (GGML_FMA) + list(APPEND ARCH_FLAGS -mfma) + list(APPEND ARCH_DEFINITIONS GGML_FMA) + endif() + if (GGML_AVX) + list(APPEND ARCH_FLAGS -mavx) + list(APPEND ARCH_DEFINITIONS GGML_AVX) + endif() + if (GGML_AVX2) + list(APPEND ARCH_FLAGS -mavx2) + list(APPEND ARCH_DEFINITIONS GGML_AVX2) + endif() + if (GGML_AVX_VNNI) + list(APPEND ARCH_FLAGS -mavxvnni) + list(APPEND ARCH_DEFINITIONS GGML_AVX_VNNI) + endif() + if (GGML_AVX512) + list(APPEND ARCH_FLAGS -mavx512f) + list(APPEND ARCH_FLAGS -mavx512cd) + list(APPEND ARCH_FLAGS -mavx512vl) + list(APPEND ARCH_FLAGS -mavx512dq) + list(APPEND ARCH_FLAGS -mavx512bw) + list(APPEND ARCH_DEFINITIONS GGML_AVX512) + endif() + if (GGML_AVX512_VBMI) + list(APPEND ARCH_FLAGS -mavx512vbmi) + list(APPEND ARCH_DEFINITIONS GGML_AVX512_VBMI) + endif() + if (GGML_AVX512_VNNI) + list(APPEND ARCH_FLAGS -mavx512vnni) + list(APPEND ARCH_DEFINITIONS GGML_AVX512_VNNI) + endif() + if (GGML_AVX512_BF16) + list(APPEND ARCH_FLAGS -mavx512bf16) + list(APPEND ARCH_DEFINITIONS GGML_AVX512_BF16) + endif() + if (GGML_AMX_TILE) + list(APPEND ARCH_FLAGS -mamx-tile) + list(APPEND ARCH_DEFINITIONS GGML_AMX_TILE) + endif() + if (GGML_AMX_INT8) + list(APPEND ARCH_FLAGS -mamx-int8) + list(APPEND ARCH_DEFINITIONS GGML_AMX_INT8) + endif() + if (GGML_AMX_BF16) + list(APPEND ARCH_FLAGS -mamx-bf16) + list(APPEND ARCH_DEFINITIONS GGML_AMX_BF16) + endif() + endif() + endif() + elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") + message(STATUS "PowerPC detected") + execute_process(COMMAND bash -c "grep POWER10 /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER10_M) + string(FIND "${POWER10_M}" "POWER10" substring_index) + if (NOT DEFINED substring_index OR "${substring_index}" STREQUAL "") + set(substring_index -1) + endif() + + if (${substring_index} GREATER_EQUAL 0) + list(APPEND ARCH_FLAGS -mcpu=power10) + elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le") + list(APPEND ARCH_FLAGS -mcpu=powerpc64le) + else() + list(APPEND ARCH_FLAGS -mcpu=native -mtune=native) + # TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be) + endif() + elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64") + message(STATUS "loongarch64 detected") + + list(APPEND ARCH_FLAGS -march=loongarch64) + if (GGML_LASX) + list(APPEND ARCH_FLAGS -mlasx) + endif() + if (GGML_LSX) + list(APPEND ARCH_FLAGS -mlsx) + endif() + elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64") + message(STATUS "RISC-V detected") + if (GGML_RVV) + list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d) + endif() + else() + message(STATUS "Unknown architecture") + endif() + + if (GGML_CPU_AARCH64) + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64) + endif() + + message(STATUS "Adding CPU backend variant ${GGML_CPU_NAME}: ${ARCH_FLAGS} ${ARCH_DEFINITIONS}") + target_sources(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_SOURCES}) + target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS}) + target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS}) + + if (GGML_BACKEND_DL) + if (GGML_NATIVE) + # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE + message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS") + endif() + + # The feature detection code is compiled as a separate target so that + # it can be built without the architecture flags + # Since multiple variants of the CPU backend may be included in the same + # build, using set_source_files_properties() to set the arch flags is not possible + set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats) + add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp) + target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include) + target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS}) + target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED) + set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME}) + endif() + + if (EMSCRIPTEN) + set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128") + endif() +endfunction() diff --git a/llama/amx.cpp b/ml/backend/ggml/ggml/src/ggml-cpu/amx/amx.cpp similarity index 86% rename from llama/amx.cpp rename to ml/backend/ggml/ggml/src/ggml-cpu/amx/amx.cpp index a2c7e8e5a..5ec5263ce 100644 --- a/llama/amx.cpp +++ b/ml/backend/ggml/ggml/src/ggml-cpu/amx/amx.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "amx.h" #include "common.h" #include "mmq.h" diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/amx/amx.h b/ml/backend/ggml/ggml/src/ggml-cpu/amx/amx.h new file mode 100644 index 000000000..5b65d76bd --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/amx/amx.h @@ -0,0 +1,8 @@ +#include "ggml-backend.h" +#include "ggml-cpu-impl.h" + +// GGML internal header + +#if defined(__AMX_INT8__) && defined(__AVX512VNNI__) +ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void); +#endif diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/amx/common.h b/ml/backend/ggml/ggml/src/ggml-cpu/amx/common.h new file mode 100644 index 000000000..f392e8985 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/amx/common.h @@ -0,0 +1,91 @@ +#pragma once + +#include "ggml.h" +#include "ggml-cpu-impl.h" + +#include +#include +#include + +#if defined(GGML_USE_OPENMP) +#include +#endif + +#define TILE_M 16 +#define TILE_N 16 +#define TILE_K 32 +#define VNNI_BLK 4 + +#define AMX_BLK_SIZE 32 + +#define TMM0 0 +#define TMM1 1 +#define TMM2 2 +#define TMM3 3 +#define TMM4 4 +#define TMM5 5 +#define TMM6 6 +#define TMM7 7 + +// parallel routines +template ::value, int>::type = 0> +inline T div_up(T x, T y) { return (x + y - 1) / y; } + +template +inline void balance211(T n, T nth, T ith, T& n_start, T& n_end) { +#if 0 + // onednn partition pattern + T& n_my = n_end; + if (nth <= 1 || n == 0) { + n_start = 0; + n_my = n; + } else { + T n1 = div_up(n, nth); + T n2 = n1 - 1; + T T1 = n - n2 * nth; + n_my = ith < T1 ? n1 : n2; + n_start = ith <= T1 ? ith*n1 : T1 * n1 + (ith - T1) * n2; + } + n_end += n_start; +#else + // pytorch aten partition pattern + T n_my = div_up(n, nth); + n_start = ith * n_my; + n_end = std::min(n_start + n_my, n); +#endif +} + +template +inline void parallel_for(int n, const func_t& f) { +#if defined(GGML_USE_OPENMP) +#pragma omp parallel +{ + int nth = omp_get_num_threads(); + int ith = omp_get_thread_num(); + int tbegin, tend; + balance211(n, nth, ith, tbegin, tend); + f(tbegin, tend); +} +#else + f(0, n); +#endif +} + +template +inline void parallel_for_ggml(const ggml_compute_params * params, int n, const func_t & f) { + int tbegin, tend; + balance211(n, params->nth, params->ith, tbegin, tend); + f(tbegin, tend); +} + +// quantized types that have AMX support +inline bool qtype_has_amx_kernels(const enum ggml_type type) { + // TODO: fix padding for vnni format + return (type == GGML_TYPE_Q4_0) || + (type == GGML_TYPE_Q4_1) || + (type == GGML_TYPE_Q8_0) || + (type == GGML_TYPE_Q4_K) || + (type == GGML_TYPE_Q5_K) || + (type == GGML_TYPE_Q6_K) || + (type == GGML_TYPE_IQ4_XS); +} diff --git a/llama/mmq.cpp b/ml/backend/ggml/ggml/src/ggml-cpu/amx/mmq.cpp similarity index 98% rename from llama/mmq.cpp rename to ml/backend/ggml/ggml/src/ggml-cpu/amx/mmq.cpp index bb20e999c..0ea91596b 100644 --- a/llama/mmq.cpp +++ b/ml/backend/ggml/ggml/src/ggml-cpu/amx/mmq.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #if defined(__GNUC__) #pragma GCC diagnostic ignored "-Wpedantic" diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/amx/mmq.h b/ml/backend/ggml/ggml/src/ggml-cpu/amx/mmq.h new file mode 100644 index 000000000..baf768477 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/amx/mmq.h @@ -0,0 +1,10 @@ +#pragma once +#include "common.h" + +size_t ggml_backend_amx_desired_wsize(const struct ggml_tensor * dst); + +size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor); + +void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); + +void ggml_backend_amx_mul_mat(const struct ggml_compute_params * params, struct ggml_tensor * dst); diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/cpu-feats-x86.cpp b/ml/backend/ggml/ggml/src/ggml-cpu/cpu-feats-x86.cpp new file mode 100644 index 000000000..e8133d411 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/cpu-feats-x86.cpp @@ -0,0 +1,323 @@ +#include "ggml-backend-impl.h" + +#if defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64)) + +#ifdef _MSC_VER +#include +#endif + +#include +#include +#include +#include +#include + +// ref: https://cdrdv2-public.intel.com/782156/325383-sdm-vol-2abcd.pdf +struct cpuid_x86 { + bool SSE3(void) { return f_1_ecx[0]; } + bool PCLMULQDQ(void) { return f_1_ecx[1]; } + bool MONITOR(void) { return f_1_ecx[3]; } + bool SSSE3(void) { return f_1_ecx[9]; } + bool FMA(void) { return f_1_ecx[12]; } + bool CMPXCHG16B(void) { return f_1_ecx[13]; } + bool SSE41(void) { return f_1_ecx[19]; } + bool SSE42(void) { return f_1_ecx[20]; } + bool MOVBE(void) { return f_1_ecx[22]; } + bool POPCNT(void) { return f_1_ecx[23]; } + bool AES(void) { return f_1_ecx[25]; } + bool XSAVE(void) { return f_1_ecx[26]; } + bool OSXSAVE(void) { return f_1_ecx[27]; } + bool AVX(void) { return f_1_ecx[28]; } + bool F16C(void) { return f_1_ecx[29]; } + bool RDRAND(void) { return f_1_ecx[30]; } + + bool MSR(void) { return f_1_edx[5]; } + bool CX8(void) { return f_1_edx[8]; } + bool SEP(void) { return f_1_edx[11]; } + bool CMOV(void) { return f_1_edx[15]; } + bool CLFSH(void) { return f_1_edx[19]; } + bool MMX(void) { return f_1_edx[23]; } + bool FXSR(void) { return f_1_edx[24]; } + bool SSE(void) { return f_1_edx[25]; } + bool SSE2(void) { return f_1_edx[26]; } + + bool FSGSBASE(void) { return f_7_ebx[0]; } + bool BMI1(void) { return f_7_ebx[3]; } + bool HLE(void) { return is_intel && f_7_ebx[4]; } + bool AVX2(void) { return f_7_ebx[5]; } + bool BMI2(void) { return f_7_ebx[8]; } + bool ERMS(void) { return f_7_ebx[9]; } + bool INVPCID(void) { return f_7_ebx[10]; } + bool RTM(void) { return is_intel && f_7_ebx[11]; } + bool AVX512F(void) { return f_7_ebx[16]; } + bool AVX512DQ(void) { return f_7_ebx[17]; } + bool RDSEED(void) { return f_7_ebx[18]; } + bool ADX(void) { return f_7_ebx[19]; } + bool AVX512PF(void) { return f_7_ebx[26]; } + bool AVX512ER(void) { return f_7_ebx[27]; } + bool AVX512CD(void) { return f_7_ebx[28]; } + bool AVX512BW(void) { return f_7_ebx[30]; } + bool AVX512VL(void) { return f_7_ebx[31]; } + + bool SHA(void) { return f_7_ebx[29]; } + + bool PREFETCHWT1(void) { return f_7_ecx[0]; } + + bool LAHF(void) { return f_81_ecx[0]; } + bool LZCNT(void) { return is_intel && f_81_ecx[5]; } + bool ABM(void) { return is_amd && f_81_ecx[5]; } + bool SSE4a(void) { return is_amd && f_81_ecx[6]; } + bool XOP(void) { return is_amd && f_81_ecx[11]; } + bool TBM(void) { return is_amd && f_81_ecx[21]; } + + bool SYSCALL(void) { return is_intel && f_81_edx[11]; } + bool MMXEXT(void) { return is_amd && f_81_edx[22]; } + bool RDTSCP(void) { return is_intel && f_81_edx[27]; } + bool _3DNOWEXT(void) { return is_amd && f_81_edx[30]; } + bool _3DNOW(void) { return is_amd && f_81_edx[31]; } + + bool AVX512_VBMI(void) { return f_7_ecx[1]; } + bool AVX512_VNNI(void) { return f_7_ecx[11]; } + bool AVX512_FP16(void) { return f_7_edx[23]; } + bool AVX512_BF16(void) { return f_7_1_eax[5]; } + bool AVX_VNNI(void) { return f_7_1_eax[4]; } + + bool AMX_TILE(void) { return f_7_edx[24]; } + bool AMX_INT8(void) { return f_7_edx[25]; } + bool AMX_FP16(void) { return f_7_1_eax[21]; } + bool AMX_BF16(void) { return f_7_edx[22]; } + +#ifdef _MSC_VER + static void cpuid(int cpu_info[4], int eax) { + __cpuid(cpu_info, eax); + } + static void cpuidex(int cpu_info[4], int eax, int ecx) { + __cpuidex(cpu_info, eax, ecx); + } +#else + static void cpuid(int cpu_info[4], int eax) { + __asm__ __volatile__( + "cpuid" + : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) + : "a"(eax), "c"(0)); + } + static void cpuidex(int cpu_info[4], int eax, int ecx) { + __asm__ __volatile__( + "cpuid" + : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) + : "a"(eax), "c"(ecx)); + } +#endif + + cpuid_x86() { + std::array cpui; + std::vector> data; + + // calling __cpuid with 0x0 as the function_id argument + // gets the number of the highest valid function ID. + cpuid(cpui.data(), 0); + int n_ids = cpui[0]; + + for (int i = 0; i <= n_ids; ++i) { + cpuidex(cpui.data(), i, 0); + data.push_back(cpui); + } + + // capture vendor string + char vendor[0x20] = {}; + *reinterpret_cast(vendor) = data[0][1]; + *reinterpret_cast(vendor + 4) = data[0][3]; + *reinterpret_cast(vendor + 8) = data[0][2]; + this->vendor = vendor; + if (this->vendor == "GenuineIntel") { + is_intel = true; + } else if (this->vendor == "AuthenticAMD") { + is_amd = true; + } + + // load bitset with flags for function 0x00000001 + if (n_ids >= 1) { + f_1_ecx = data[1][2]; + f_1_edx = data[1][3]; + } + + // load bitset with flags for function 0x00000007 + if (n_ids >= 7) { + f_7_ebx = data[7][1]; + f_7_ecx = data[7][2]; + f_7_edx = data[7][3]; + cpuidex(cpui.data(), 7, 1); + f_7_1_eax = cpui[0]; + } + + // calling __cpuid with 0x80000000 as the function_id argument + // gets the number of the highest valid extended ID. + cpuid(cpui.data(), 0x80000000); + unsigned int n_ex_ids = cpui[0]; + + std::vector> ext_data; + for (unsigned int i = 0x80000000; i <= n_ex_ids; ++i) { + cpuidex(cpui.data(), i, 0); + ext_data.push_back(cpui); + } + + // load bitset with flags for function 0x80000001 + if (n_ex_ids >= 0x80000001) { + f_81_ecx = ext_data[1][2]; + f_81_edx = ext_data[1][3]; + } + + // interpret CPU brand string if reported + char brand[0x40] = {}; + if (n_ex_ids >= 0x80000004) { + std::memcpy(brand, ext_data[2].data(), sizeof(cpui)); + std::memcpy(brand + 16, ext_data[3].data(), sizeof(cpui)); + std::memcpy(brand + 32, ext_data[4].data(), sizeof(cpui)); + this->brand = brand; + } + } + + bool is_intel = false; + bool is_amd = false; + std::string vendor; + std::string brand; + std::bitset<32> f_1_ecx; + std::bitset<32> f_1_edx; + std::bitset<32> f_7_ebx; + std::bitset<32> f_7_ecx; + std::bitset<32> f_7_edx; + std::bitset<32> f_7_1_eax; + std::bitset<32> f_81_ecx; + std::bitset<32> f_81_edx; +}; + +#if 0 +void test_x86_is() { + cpuid_x86 is; + printf("CPU Vendor: %s\n", is.vendor.c_str()); + printf("Brand: %s\n", is.brand.c_str()); + printf("is_intel: %d\n", is.is_intel); + printf("is_amd: %d\n", is.is_amd); + printf("sse3: %d\n", is.SSE3()); + printf("pclmulqdq: %d\n", is.PCLMULQDQ()); + printf("ssse3: %d\n", is.SSSE3()); + printf("fma: %d\n", is.FMA()); + printf("cmpxchg16b: %d\n", is.CMPXCHG16B()); + printf("sse41: %d\n", is.SSE41()); + printf("sse42: %d\n", is.SSE42()); + printf("movbe: %d\n", is.MOVBE()); + printf("popcnt: %d\n", is.POPCNT()); + printf("aes: %d\n", is.AES()); + printf("xsave: %d\n", is.XSAVE()); + printf("osxsave: %d\n", is.OSXSAVE()); + printf("avx: %d\n", is.AVX()); + printf("f16c: %d\n", is.F16C()); + printf("rdrand: %d\n", is.RDRAND()); + printf("msr: %d\n", is.MSR()); + printf("cx8: %d\n", is.CX8()); + printf("sep: %d\n", is.SEP()); + printf("cmov: %d\n", is.CMOV()); + printf("clflush: %d\n", is.CLFSH()); + printf("mmx: %d\n", is.MMX()); + printf("fxsr: %d\n", is.FXSR()); + printf("sse: %d\n", is.SSE()); + printf("sse2: %d\n", is.SSE2()); + printf("fsgsbase: %d\n", is.FSGSBASE()); + printf("bmi1: %d\n", is.BMI1()); + printf("hle: %d\n", is.HLE()); + printf("avx2: %d\n", is.AVX2()); + printf("bmi2: %d\n", is.BMI2()); + printf("erms: %d\n", is.ERMS()); + printf("invpcid: %d\n", is.INVPCID()); + printf("rtm: %d\n", is.RTM()); + printf("avx512f: %d\n", is.AVX512F()); + printf("rdseed: %d\n", is.RDSEED()); + printf("adx: %d\n", is.ADX()); + printf("avx512pf: %d\n", is.AVX512PF()); + printf("avx512er: %d\n", is.AVX512ER()); + printf("avx512cd: %d\n", is.AVX512CD()); + printf("sha: %d\n", is.SHA()); + printf("prefetchwt1: %d\n", is.PREFETCHWT1()); + printf("lahf: %d\n", is.LAHF()); + printf("lzcnt: %d\n", is.LZCNT()); + printf("abm: %d\n", is.ABM()); + printf("sse4a: %d\n", is.SSE4a()); + printf("xop: %d\n", is.XOP()); + printf("tbm: %d\n", is.TBM()); + printf("syscall: %d\n", is.SYSCALL()); + printf("mmxext: %d\n", is.MMXEXT()); + printf("rdtscp: %d\n", is.RDTSCP()); + printf("3dnowext: %d\n", is._3DNOWEXT()); + printf("3dnow: %d\n", is._3DNOW()); + printf("avx512_vbmi: %d\n", is.AVX512_VBMI()); + printf("avx512_vnni: %d\n", is.AVX512_VNNI()); + printf("avx512_fp16: %d\n", is.AVX512_FP16()); + printf("avx512_bf16: %d\n", is.AVX512_BF16()); + printf("amx_tile: %d\n", is.AMX_TILE()); + printf("amx_int8: %d\n", is.AMX_INT8()); + printf("amx_fp16: %d\n", is.AMX_FP16()); + printf("amx_bf16: %d\n", is.AMX_BF16()); +} +#endif + +static int ggml_backend_cpu_x86_score() { + // FIXME: this does not check for OS support + + int score = 0; + cpuid_x86 is; + +#ifdef GGML_FMA + if (!is.FMA()) { return 0; } + score += 1; +#endif +#ifdef GGML_F16C + if (!is.F16C()) { return 0; } + score += 1<<1; +#endif +#ifdef GGML_SSE42 + if (!is.SSE42()) { return 0; } + score += 1<<2; +#endif +#ifdef GGML_AVX + if (!is.AVX()) { return 0; } + score += 1<<4; +#endif +#ifdef GGML_AVX2 + if (!is.AVX2()) { return 0; } + score += 1<<5; +#endif +#ifdef GGML_AVX_VNNI + if (!is.AVX_VNNI()) { return 0; } + score += 1<<6; +#endif +#ifdef GGML_AVX512 + if (!is.AVX512F()) { return 0; } + if (!is.AVX512CD()) { return 0; } + if (!is.AVX512VL()) { return 0; } + if (!is.AVX512DQ()) { return 0; } + if (!is.AVX512BW()) { return 0; } + score += 1<<7; +#endif +#ifdef GGML_AVX512_VBMI + if (!is.AVX512_VBMI()) { return 0; } + score += 1<<8; +#endif +#ifdef GGML_AVX512_BF16 + if (!is.AVX512_BF16()) { return 0; } + score += 1<<9; +#endif +#ifdef GGML_AVX512_VNNI + if (!is.AVX512_VNNI()) { return 0; } + score += 1<<10; +#endif +#ifdef GGML_AMX_INT8 + if (!is.AMX_INT8()) { return 0; } + score += 1<<11; +#endif + + return score; +} + +GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_x86_score) + +#endif // defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64)) diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/cpu.go b/ml/backend/ggml/ggml/src/ggml-cpu/cpu.go new file mode 100644 index 000000000..b05256288 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/cpu.go @@ -0,0 +1,10 @@ +package cpu + +// #cgo CXXFLAGS: -std=c++17 +// #cgo CPPFLAGS: -I${SRCDIR}/amx -I${SRCDIR}/llamafile -I${SRCDIR}/.. -I${SRCDIR}/../../include +// #cgo CPPFLAGS: -DGGML_USE_LLAMAFILE +// #cgo linux CPPFLAGS: -D_GNU_SOURCE +// #cgo darwin,arm64 CPPFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 +// #cgo darwin,arm64 LDFLAGS: -framework Accelerate +import "C" +import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src/ggml-cpu/llamafile" diff --git a/llama/ggml-cpu-aarch64.cpp b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp similarity index 99% rename from llama/ggml-cpu-aarch64.cpp rename to ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp index 0989fb203..622c63f1f 100644 --- a/llama/ggml-cpu-aarch64.cpp +++ b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #define GGML_COMMON_IMPL_CPP #define GGML_COMMON_DECL_CPP #include "ggml-common.h" diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-aarch64.h b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-aarch64.h new file mode 100644 index 000000000..6e84c826b --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-aarch64.h @@ -0,0 +1,8 @@ +#pragma once + +#include "ggml-cpu-traits.h" +#include "ggml.h" + +// GGML internal header + +ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void); diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp new file mode 100644 index 000000000..fa8dea2af --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp @@ -0,0 +1,55 @@ +#ifdef GGML_USE_CPU_HBM + +#include "ggml-backend.h" +#include "ggml-backend-impl.h" +#include "ggml-cpu.h" +#include "ggml-impl.h" + +#include "ggml-cpu-hbm.h" + +// buffer type HBM + +#include + +static const char * ggml_backend_cpu_hbm_buffer_type_get_name(ggml_backend_buffer_type_t buft) { + return "CPU_HBM"; + + GGML_UNUSED(buft); +} + +static void ggml_backend_cpu_hbm_buffer_free_buffer(ggml_backend_buffer_t buffer) { + hbw_free(buffer->context); +} + +static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, + size_t size) { + void * ptr; + int result = hbw_posix_memalign(&ptr, ggml_backend_cpu_buffer_type_get_alignment(buft), size); + if (result != 0) { + GGML_LOG_ERROR("failed to allocate HBM buffer of size %zu\n", size); + return NULL; + } + + ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size); + buffer->buft = buft; + buffer->iface.free_buffer = ggml_backend_cpu_hbm_buffer_free_buffer; + + return buffer; +} + +ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) { + static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = { + /* .iface = */ { + /* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer, + /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment, + /* .get_max_size = */ nullptr, // defaults to SIZE_MAX + /* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes + /* .is_host = */ ggml_backend_cpu_buffer_type_is_host, + }, + /* .context = */ nullptr, + }; + + return &ggml_backend_cpu_buffer_type_hbm; +} +#endif diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-hbm.h b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-hbm.h new file mode 100644 index 000000000..09a1f09d7 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-hbm.h @@ -0,0 +1,8 @@ +#pragma once + +#include "ggml-backend.h" +#include "ggml.h" + +// GGML CPU internal header + +ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void); diff --git a/llama/ggml-cpu-impl.h b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-impl.h similarity index 87% rename from llama/ggml-cpu-impl.h rename to ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-impl.h index 54dc108cc..d71076ad1 100644 --- a/llama/ggml-cpu-impl.h +++ b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-impl.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once // GGML CPU internal header diff --git a/llama/ggml-cpu-quants.c b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-quants.c similarity index 99% rename from llama/ggml-cpu-quants.c rename to ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-quants.c index a8288deca..8e1472266 100644 --- a/llama/ggml-cpu-quants.c +++ b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-quants.c @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #define GGML_COMMON_IMPL_C #include "ggml-common.h" diff --git a/llama/ggml-cpu-quants.h b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-quants.h similarity index 80% rename from llama/ggml-cpu-quants.h rename to ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-quants.h index e2cdf03ed..e33d9d473 100644 --- a/llama/ggml-cpu-quants.h +++ b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-quants.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #define GGML_COMMON_DECL_C diff --git a/llama/ggml-cpu-traits.cpp b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-traits.cpp similarity index 50% rename from llama/ggml-cpu-traits.cpp rename to ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-traits.cpp index 6d7ca0246..62a0712da 100644 --- a/llama/ggml-cpu-traits.cpp +++ b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-traits.cpp @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "ggml-cpu-traits.h" #include "ggml-backend-impl.h" diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-traits.h b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-traits.h new file mode 100644 index 000000000..99a6186b1 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu-traits.h @@ -0,0 +1,38 @@ +#pragma once +#include "ggml-backend-impl.h" +#include "ggml-cpu-impl.h" +#include "ggml.h" + +#ifdef __cplusplus +# include +extern "C" { +#endif + +// return true if op part of extra "accelerator" +bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op); +bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size); + +#ifdef __cplusplus +} + +namespace ggml::cpu { +// register in tensor->extra +class tensor_traits { + public: + virtual ~tensor_traits(); + virtual bool work_size(int n_threads, const struct ggml_tensor * op, size_t & size) = 0; + virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) = 0; +}; + +class extra_buffer_type { + public: + virtual ~extra_buffer_type(); + virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0; + virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op) = 0; +}; +} // namespace ggml::cpu + +// implemented in ggml-cpu.cpp. +std::vector & ggml_backend_cpu_get_extra_buffers_type(); + +#endif diff --git a/llama/ggml-cpu.c b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu.c similarity index 99% rename from llama/ggml-cpu.c rename to ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu.c index 272f03e31..b307d5542 100644 --- a/llama/ggml-cpu.c +++ b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu.c @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows #define _USE_MATH_DEFINES // For M_PI on MSVC @@ -36,7 +10,7 @@ #include "ggml-quants.h" #include "ggml-cpu-quants.h" #include "ggml-threading.h" -#include "amx.h" +#include "amx/amx.h" #include "ggml.h" #if defined(_MSC_VER) || defined(__MINGW32__) diff --git a/llama/ggml-cpu.cpp b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu.cpp similarity index 94% rename from llama/ggml-cpu.cpp rename to ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu.cpp index 38395101f..f11399cc6 100644 --- a/llama/ggml-cpu.cpp +++ b/ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu.cpp @@ -1,36 +1,11 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "ggml-backend.h" #include "ggml-backend-impl.h" #include "ggml-cpu.h" #include "ggml-cpu-aarch64.h" #include "ggml-cpu-traits.h" #include "ggml-impl.h" -#include "amx.h" +#include "amx/amx.h" + #include #include #include diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile.go b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile.go new file mode 100644 index 000000000..09b002ce5 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/llamafile.go @@ -0,0 +1,5 @@ +package llamafile + +// #cgo CXXFLAGS: -std=c++17 +// #cgo CPPFLAGS: -I${SRCDIR}/.. -I${SRCDIR}/../.. -I${SRCDIR}/../../../include +import "C" diff --git a/llama/sgemm.cpp b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp similarity index 100% rename from llama/sgemm.cpp rename to ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp diff --git a/llama/llamafile/sgemm.h b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.h similarity index 100% rename from llama/llamafile/sgemm.h rename to ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.h diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/CMakeLists.txt b/ml/backend/ggml/ggml/src/ggml-cuda/CMakeLists.txt new file mode 100644 index 000000000..14761650f --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/CMakeLists.txt @@ -0,0 +1,152 @@ +cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES + +find_package(CUDAToolkit) + +if (CUDAToolkit_FOUND) + message(STATUS "CUDA Toolkit found") + + if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + # native == GPUs available at build time + # 52 == Maxwell, lowest CUDA 12 standard + # 60 == P100, FP16 CUDA intrinsics + # 61 == Pascal, __dp4a instruction (per-byte integer dot product) + # 70 == V100, FP16 tensor cores + # 75 == Turing, int8 tensor cores + if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6" AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.24") + set(CMAKE_CUDA_ARCHITECTURES "native") + elseif(GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16) + set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") + else() + set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75") + endif() + endif() + message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") + + enable_language(CUDA) + + file(GLOB GGML_HEADERS_CUDA "*.cuh") + list(APPEND GGML_HEADERS_CUDA "../../include/ggml-cuda.h") + + file(GLOB GGML_SOURCES_CUDA "*.cu") + file(GLOB SRCS "template-instances/fattn-wmma*.cu") + list(APPEND GGML_SOURCES_CUDA ${SRCS}) + file(GLOB SRCS "template-instances/mmq*.cu") + list(APPEND GGML_SOURCES_CUDA ${SRCS}) + + if (GGML_CUDA_FA_ALL_QUANTS) + file(GLOB SRCS "template-instances/fattn-vec*.cu") + list(APPEND GGML_SOURCES_CUDA ${SRCS}) + add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS) + else() + file(GLOB SRCS "template-instances/fattn-vec*q4_0-q4_0.cu") + list(APPEND GGML_SOURCES_CUDA ${SRCS}) + file(GLOB SRCS "template-instances/fattn-vec*q8_0-q8_0.cu") + list(APPEND GGML_SOURCES_CUDA ${SRCS}) + file(GLOB SRCS "template-instances/fattn-vec*f16-f16.cu") + list(APPEND GGML_SOURCES_CUDA ${SRCS}) + endif() + + ggml_add_backend_library(ggml-cuda + ${GGML_HEADERS_CUDA} + ${GGML_SOURCES_CUDA} + ) + + add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE}) + + if (GGML_CUDA_GRAPHS) + add_compile_definitions(GGML_CUDA_USE_GRAPHS) + endif() + + if (GGML_CUDA_FORCE_MMQ) + add_compile_definitions(GGML_CUDA_FORCE_MMQ) + endif() + + if (GGML_CUDA_FORCE_CUBLAS) + add_compile_definitions(GGML_CUDA_FORCE_CUBLAS) + endif() + + if (GGML_CUDA_NO_VMM) + add_compile_definitions(GGML_CUDA_NO_VMM) + endif() + + if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16) + add_compile_definitions(GGML_CUDA_F16) + endif() + + if (GGML_CUDA_NO_PEER_COPY) + add_compile_definitions(GGML_CUDA_NO_PEER_COPY) + endif() + + if (GGML_STATIC) + if (WIN32) + # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library + target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas CUDA::cublasLt) + else () + target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) + endif() + else() + target_link_libraries(ggml-cuda PRIVATE CUDA::cudart CUDA::cublas CUDA::cublasLt) + endif() + + if (GGML_CUDA_NO_VMM) + # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so) + else() + target_link_libraries(ggml-cuda PRIVATE CUDA::cuda_driver) + endif() + + set(CUDA_CXX_FLAGS "") + + set(CUDA_FLAGS -use_fast_math) + + if (GGML_FATAL_WARNINGS) + list(APPEND CUDA_FLAGS -Werror all-warnings) + endif() + + if (GGML_ALL_WARNINGS AND NOT MSVC) + set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c) + if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "") + list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER}) + endif() + + execute_process( + COMMAND ${NVCC_CMD} -Xcompiler --version + OUTPUT_VARIABLE CUDA_CCFULLVER + ERROR_QUIET + ) + + if (NOT CUDA_CCFULLVER MATCHES clang) + set(CUDA_CCID "GNU") + execute_process( + COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion" + OUTPUT_VARIABLE CUDA_CCVER + ERROR_QUIET + ) + else() + if (CUDA_CCFULLVER MATCHES Apple) + set(CUDA_CCID "AppleClang") + else() + set(CUDA_CCID "Clang") + endif() + string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER}) + endif() + + message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}") + + ggml_get_flags(${CUDA_CCID} ${CUDA_CCVER}) + list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later + endif() + + if (NOT MSVC) + list(APPEND CUDA_CXX_FLAGS -Wno-pedantic) + endif() + + list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument + + if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "") + list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED}) + endif() + + target_compile_options(ggml-cuda PRIVATE "$<$:${CUDA_FLAGS}>") +else() + message(FATAL_ERROR "CUDA Toolkit not found") +endif() diff --git a/llama/ggml-cuda/acc.cu b/ml/backend/ggml/ggml/src/ggml-cuda/acc.cu similarity index 61% rename from llama/ggml-cuda/acc.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/acc.cu index 9ce47e60d..96bfe1c9d 100644 --- a/llama/ggml-cuda/acc.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/acc.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "acc.cuh" static __global__ void acc_f32(const float * x, const float * y, float * dst, const int ne, diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/acc.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/acc.cuh new file mode 100644 index 000000000..1168ea1b2 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/acc.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_ACC_BLOCK_SIZE 256 + +void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/arange.cu b/ml/backend/ggml/ggml/src/ggml-cuda/arange.cu new file mode 100644 index 000000000..b5e495a24 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/arange.cu @@ -0,0 +1,34 @@ +#include "arange.cuh" + +static __global__ void arange_f32(float * dst, const int ne0, const float start, const float step) { + // blockIDx.x: idx of ne0 / BLOCK_SIZE + int nidx = threadIdx.x + blockIdx.x * blockDim.x; + if (nidx >= ne0) { + return; + } + dst[nidx] = start + step * nidx; +} + +static void arange_f32_cuda(float * dst, const int ne0, const float start, const float step, cudaStream_t stream) { + int num_blocks = (ne0 + CUDA_ARANGE_BLOCK_SIZE - 1) / CUDA_ARANGE_BLOCK_SIZE; + arange_f32<<>>(dst, ne0, start, step); +} + +void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(dst->type == GGML_TYPE_F32); + + float start; + float stop; + float step; + memcpy(&start, (float *)dst->op_params + 0, sizeof(float)); + memcpy(&stop, (float *)dst->op_params + 1, sizeof(float)); + memcpy(&step, (float *)dst->op_params + 2, sizeof(float)); + + int64_t steps = (int64_t)ceil((stop - start) / step); + GGML_ASSERT(ggml_nelements(dst) == steps); + + arange_f32_cuda(dst_d, dst->ne[0], start, step, stream); +} diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/arange.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/arange.cuh new file mode 100644 index 000000000..41e74fdfc --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/arange.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_ARANGE_BLOCK_SIZE 256 + +void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/argmax.cu b/ml/backend/ggml/ggml/src/ggml-cuda/argmax.cu similarity index 69% rename from llama/ggml-cuda/argmax.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/argmax.cu index 8bbfd7c05..5340eedc0 100644 --- a/llama/ggml-cuda/argmax.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/argmax.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include #include diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/argmax.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/argmax.cuh new file mode 100644 index 000000000..5b7223adc --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/argmax.cuh @@ -0,0 +1,3 @@ +#include "common.cuh" + +void ggml_cuda_argmax(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/argsort.cu b/ml/backend/ggml/ggml/src/ggml-cuda/argsort.cu similarity index 73% rename from llama/ggml-cuda/argsort.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/argsort.cu index d9aaaa13c..607ded855 100644 --- a/llama/ggml-cuda/argsort.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/argsort.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "argsort.cuh" template diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/argsort.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/argsort.cuh new file mode 100644 index 000000000..68a001547 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/argsort.cuh @@ -0,0 +1,3 @@ +#include "common.cuh" + +void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/binbcast.cu b/ml/backend/ggml/ggml/src/ggml-cuda/binbcast.cu similarity index 91% rename from llama/ggml-cuda/binbcast.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/binbcast.cu index 40b9fcbe1..c7b6be4e2 100644 --- a/llama/ggml-cuda/binbcast.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/binbcast.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "binbcast.cuh" #include diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/binbcast.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/binbcast.cuh new file mode 100644 index 000000000..3ac1c9b03 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/binbcast.cuh @@ -0,0 +1,9 @@ +#include "common.cuh" + +void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_repeat_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/clamp.cu b/ml/backend/ggml/ggml/src/ggml-cuda/clamp.cu new file mode 100644 index 000000000..8009a3e3d --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/clamp.cu @@ -0,0 +1,34 @@ +#include "clamp.cuh" + +static __global__ void clamp_f32(const float * x, float * dst, const float min, const float max, const int k) { + const int i = blockDim.x*blockIdx.x + threadIdx.x; + + if (i >= k) { + return; + } + + dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]); +} + +static void clamp_f32_cuda(const float * x, float * dst, const float min, const float max, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_CLAMP_BLOCK_SIZE - 1) / CUDA_CLAMP_BLOCK_SIZE; + clamp_f32<<>>(x, dst, min, max, k); +} + + +void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const float * src0_d = (const float *)src0->data; + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + + float min; + float max; + memcpy(&min, dst->op_params, sizeof(float)); + memcpy(&max, (float *) dst->op_params + 1, sizeof(float)); + + clamp_f32_cuda(src0_d, dst_d, min, max, ggml_nelements(src0), stream); +} diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/clamp.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/clamp.cuh new file mode 100644 index 000000000..7f9559dd1 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/clamp.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_CLAMP_BLOCK_SIZE 256 + +void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/common.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/common.cuh similarity index 94% rename from llama/ggml-cuda/common.cuh rename to ml/backend/ggml/ggml/src/ggml-cuda/common.cuh index 2a40b8499..2c0a56226 100644 --- a/llama/ggml-cuda/common.cuh +++ b/ml/backend/ggml/ggml/src/ggml-cuda/common.cuh @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "ggml.h" diff --git a/llama/ggml-cuda/concat.cu b/ml/backend/ggml/ggml/src/ggml-cuda/concat.cu similarity index 85% rename from llama/ggml-cuda/concat.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/concat.cu index d8c473913..5eb9f08da 100644 --- a/llama/ggml-cuda/concat.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/concat.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "concat.cuh" // contiguous kernels diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/concat.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/concat.cuh new file mode 100644 index 000000000..aa506a05f --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/concat.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_CONCAT_BLOCK_SIZE 256 + +void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/conv-transpose-1d.cu b/ml/backend/ggml/ggml/src/ggml-cuda/conv-transpose-1d.cu similarity index 72% rename from llama/ggml-cuda/conv-transpose-1d.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/conv-transpose-1d.cu index da53e9469..b1e94d6f7 100644 --- a/llama/ggml-cuda/conv-transpose-1d.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/conv-transpose-1d.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "conv-transpose-1d.cuh" static __global__ void conv_transpose_1d_kernel( diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/conv-transpose-1d.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/conv-transpose-1d.cuh new file mode 100644 index 000000000..6c2cf666b --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/conv-transpose-1d.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_CONV_TRANPOSE_1D_BLOCK_SIZE 256 + +void ggml_cuda_op_conv_transpose_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/convert.cu b/ml/backend/ggml/ggml/src/ggml-cuda/convert.cu similarity index 95% rename from llama/ggml-cuda/convert.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/convert.cu index 6ddb87fc3..5b0dfacef 100644 --- a/llama/ggml-cuda/convert.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/convert.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "convert.cuh" #include "dequantize.cuh" diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/convert.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/convert.cuh new file mode 100644 index 000000000..5394be9f1 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/convert.cuh @@ -0,0 +1,13 @@ +#include "common.cuh" + +#define CUDA_DEQUANTIZE_BLOCK_SIZE 256 + +template +using to_t_cuda_t = void (*)(const void * __restrict__ x, T * __restrict__ y, int64_t k, cudaStream_t stream); + +typedef to_t_cuda_t to_fp32_cuda_t; +typedef to_t_cuda_t to_fp16_cuda_t; + +to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type); + +to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type); diff --git a/llama/ggml-cuda/count-equal.cu b/ml/backend/ggml/ggml/src/ggml-cuda/count-equal.cu similarity index 62% rename from llama/ggml-cuda/count-equal.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/count-equal.cu index e4496fc1b..08898115d 100644 --- a/llama/ggml-cuda/count-equal.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/count-equal.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" #include "count-equal.cuh" diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/count-equal.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/count-equal.cuh new file mode 100644 index 000000000..8467da79e --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/count-equal.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_COUNT_EQUAL_CHUNK_SIZE 128 + +void ggml_cuda_count_equal(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/cpy.cu b/ml/backend/ggml/ggml/src/ggml-cuda/cpy.cu similarity index 94% rename from llama/ggml-cuda/cpy.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/cpy.cu index ffdef8c43..54c0f66d2 100644 --- a/llama/ggml-cuda/cpy.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/cpy.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "cpy.cuh" typedef void (*cpy_kernel_t)(const char * cx, char * cdst); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/cpy.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/cpy.cuh new file mode 100644 index 000000000..28b06cdda --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/cpy.cuh @@ -0,0 +1,9 @@ +#include "common.cuh" + +#define CUDA_CPY_BLOCK_SIZE 64 + +void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1); + +void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1); diff --git a/llama/ggml-cuda/cross-entropy-loss.cu b/ml/backend/ggml/ggml/src/ggml-cuda/cross-entropy-loss.cu similarity index 82% rename from llama/ggml-cuda/cross-entropy-loss.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/cross-entropy-loss.cu index 5bfddc79b..ed09406a8 100644 --- a/llama/ggml-cuda/cross-entropy-loss.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/cross-entropy-loss.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" #include "cross-entropy-loss.cuh" #include "sum.cuh" diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/cross-entropy-loss.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/cross-entropy-loss.cuh new file mode 100644 index 000000000..9ec7152ff --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/cross-entropy-loss.cuh @@ -0,0 +1,7 @@ +#include "common.cuh" + +#define CUDA_CROSS_ENTROPY_LOSS_BLOCK_SIZE 256 + +void ggml_cuda_cross_entropy_loss(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_cross_entropy_loss_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/dequantize.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/dequantize.cuh similarity index 68% rename from llama/ggml-cuda/dequantize.cuh rename to ml/backend/ggml/ggml/src/ggml-cuda/dequantize.cuh index 016de0db6..bd3c2d9db 100644 --- a/llama/ggml-cuda/dequantize.cuh +++ b/ml/backend/ggml/ggml/src/ggml-cuda/dequantize.cuh @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" static __device__ __forceinline__ void dequantize_q4_0(const void * vx, const int64_t ib, const int iqs, dfloat2 & v){ diff --git a/llama/ggml-cuda/diagmask.cu b/ml/backend/ggml/ggml/src/ggml-cuda/diagmask.cu similarity index 58% rename from llama/ggml-cuda/diagmask.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/diagmask.cu index e80a953ae..4b713ba22 100644 --- a/llama/ggml-cuda/diagmask.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/diagmask.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "diagmask.cuh" static __global__ void diag_mask_inf_f32(const float * x, float * dst, const int ncols, const int rows_per_channel, const int n_past) { diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/diagmask.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/diagmask.cuh new file mode 100644 index 000000000..6cdbef17e --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/diagmask.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32 + +void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/fattn-common.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-common.cuh similarity index 95% rename from llama/ggml-cuda/fattn-common.cuh rename to ml/backend/ggml/ggml/src/ggml-cuda/fattn-common.cuh index 011654d31..ee9752da6 100644 --- a/llama/ggml-cuda/fattn-common.cuh +++ b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-common.cuh @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "common.cuh" diff --git a/llama/ggml-cuda/fattn-tile-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f16.cu similarity index 91% rename from llama/ggml-cuda/fattn-tile-f16.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f16.cu index 72d265ef2..4d314dacb 100644 --- a/llama/ggml-cuda/fattn-tile-f16.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f16.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" #include "fattn-common.cuh" #include "fattn-tile-f16.cuh" diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f16.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f16.cuh new file mode 100644 index 000000000..ffc587842 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f16.cuh @@ -0,0 +1,3 @@ +#include "common.cuh" + +void ggml_cuda_flash_attn_ext_tile_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/fattn-tile-f32.cu b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f32.cu similarity index 91% rename from llama/ggml-cuda/fattn-tile-f32.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f32.cu index 3be1c7a62..bb3360447 100644 --- a/llama/ggml-cuda/fattn-tile-f32.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f32.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" #include "fattn-common.cuh" #include "fattn-tile-f32.cuh" diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f32.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f32.cuh new file mode 100644 index 000000000..b1c546c80 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile-f32.cuh @@ -0,0 +1,3 @@ +#include "common.cuh" + +void ggml_cuda_flash_attn_ext_tile_f32(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/fattn-vec-f16.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-vec-f16.cuh similarity index 93% rename from llama/ggml-cuda/fattn-vec-f16.cuh rename to ml/backend/ggml/ggml/src/ggml-cuda/fattn-vec-f16.cuh index 334a05c37..34a2992c7 100644 --- a/llama/ggml-cuda/fattn-vec-f16.cuh +++ b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-vec-f16.cuh @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" #include "fattn-common.cuh" diff --git a/llama/ggml-cuda/fattn-vec-f32.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-vec-f32.cuh similarity index 92% rename from llama/ggml-cuda/fattn-vec-f32.cuh rename to ml/backend/ggml/ggml/src/ggml-cuda/fattn-vec-f32.cuh index 0bb230004..a28fc8b7f 100644 --- a/llama/ggml-cuda/fattn-vec-f32.cuh +++ b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-vec-f32.cuh @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" #include "fattn-common.cuh" diff --git a/llama/ggml-cuda/fattn-wmma-f16.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-wmma-f16.cuh similarity index 94% rename from llama/ggml-cuda/fattn-wmma-f16.cuh rename to ml/backend/ggml/ggml/src/ggml-cuda/fattn-wmma-f16.cuh index d82984f40..860d0e6dc 100644 --- a/llama/ggml-cuda/fattn-wmma-f16.cuh +++ b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-wmma-f16.cuh @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" #include "fattn-common.cuh" diff --git a/llama/ggml-cuda/fattn.cu b/ml/backend/ggml/ggml/src/ggml-cuda/fattn.cu similarity index 92% rename from llama/ggml-cuda/fattn.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/fattn.cu index 4828e9d84..0b26b0f8e 100644 --- a/llama/ggml-cuda/fattn.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/fattn.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" #include "fattn-common.cuh" #include "fattn-tile-f16.cuh" diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/fattn.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/fattn.cuh new file mode 100644 index 000000000..ad3ca7a8d --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/fattn.cuh @@ -0,0 +1,3 @@ +#include "common.cuh" + +void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/getrows.cu b/ml/backend/ggml/ggml/src/ggml-cuda/getrows.cu similarity index 84% rename from llama/ggml-cuda/getrows.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/getrows.cu index 6cf1e516e..4c3703238 100644 --- a/llama/ggml-cuda/getrows.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/getrows.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "getrows.cuh" #include "dequantize.cuh" diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/getrows.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/getrows.cuh new file mode 100644 index 000000000..bbf130232 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/getrows.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_GET_ROWS_BLOCK_SIZE 256 + +void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/ggml-cuda.cu b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu similarity index 98% rename from llama/ggml-cuda/ggml-cuda.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu index 0894fdad7..9286f8665 100644 --- a/llama/ggml-cuda/ggml-cuda.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "ggml-cuda.h" #include "ggml-impl.h" #include "ggml-backend-impl.h" @@ -450,10 +424,7 @@ struct ggml_backend_cuda_buffer_context { static void ggml_backend_cuda_buffer_free_buffer(ggml_backend_buffer_t buffer) { ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context; delete ctx; - - // TODO: this needs to be freed in cuda and hipblas backends because - // the cuda backend implementation compiled with msvc - free(buffer); + delete buffer; } static bool ggml_backend_buffer_is_cuda(ggml_backend_buffer_t buffer) { diff --git a/llama/ggml-cuda/im2col.cu b/ml/backend/ggml/ggml/src/ggml-cuda/im2col.cu similarity index 78% rename from llama/ggml-cuda/im2col.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/im2col.cu index 0ceaa02c9..86a54e42b 100644 --- a/llama/ggml-cuda/im2col.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/im2col.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "im2col.cuh" template diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/im2col.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/im2col.cuh new file mode 100644 index 000000000..1ce8fae4d --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/im2col.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_IM2COL_BLOCK_SIZE 256 + +void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/mma.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/mma.cuh similarity index 86% rename from llama/ggml-cuda/mma.cuh rename to ml/backend/ggml/ggml/src/ggml-cuda/mma.cuh index 557cdcd1e..7d11540af 100644 --- a/llama/ggml-cuda/mma.cuh +++ b/ml/backend/ggml/ggml/src/ggml-cuda/mma.cuh @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" struct mma_int_A_I16K4 { diff --git a/llama/ggml-cuda/mmq.cu b/ml/backend/ggml/ggml/src/ggml-cuda/mmq.cu similarity index 80% rename from llama/ggml-cuda/mmq.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/mmq.cu index 0dc63b31b..270251df4 100644 --- a/llama/ggml-cuda/mmq.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/mmq.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "mmq.cuh" void ggml_cuda_op_mul_mat_q( diff --git a/llama/ggml-cuda/mmq.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/mmq.cuh similarity index 98% rename from llama/ggml-cuda/mmq.cuh rename to ml/backend/ggml/ggml/src/ggml-cuda/mmq.cuh index 1da4680a2..3cd508a1d 100644 --- a/llama/ggml-cuda/mmq.cuh +++ b/ml/backend/ggml/ggml/src/ggml-cuda/mmq.cuh @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include "common.cuh" diff --git a/llama/ggml-cuda/mmv.cu b/ml/backend/ggml/ggml/src/ggml-cuda/mmv.cu similarity index 89% rename from llama/ggml-cuda/mmv.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/mmv.cu index 37559c742..ac45f2d17 100644 --- a/llama/ggml-cuda/mmv.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/mmv.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" #include "mmv.cuh" diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/mmv.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/mmv.cuh new file mode 100644 index 000000000..78a1cd4a6 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/mmv.cuh @@ -0,0 +1,12 @@ +#include "common.cuh" + +// maximum number of src0 rows with which to use mul_mat_vec over cuBLAS if FP16 tensor cores are available +#define MMV_MAX_ROWS 512 + +void ggml_cuda_mul_mat_vec(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); + +void ggml_cuda_op_mul_mat_vec( + ggml_backend_cuda_context & ctx, + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i, + const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols, + const int64_t src1_padded_row_size, cudaStream_t stream); diff --git a/llama/ggml-cuda/mmvq.cu b/ml/backend/ggml/ggml/src/ggml-cuda/mmvq.cu similarity index 93% rename from llama/ggml-cuda/mmvq.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/mmvq.cu index 19ea9aa96..e3b912d87 100644 --- a/llama/ggml-cuda/mmvq.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/mmvq.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "mmvq.cuh" #include "vecdotq.cuh" diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/mmvq.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/mmvq.cuh new file mode 100644 index 000000000..d9e42fdd6 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/mmvq.cuh @@ -0,0 +1,9 @@ +#include "common.cuh" + +#define MMVQ_MAX_BATCH_SIZE 8 // Max. batch size for which to use MMVQ kernels. + +void ggml_cuda_op_mul_mat_vec_q( + ggml_backend_cuda_context & ctx, + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i, + const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols, + const int64_t src1_padded_row_size, cudaStream_t stream); diff --git a/llama/ggml-cuda/norm.cu b/ml/backend/ggml/ggml/src/ggml-cuda/norm.cu similarity index 85% rename from llama/ggml-cuda/norm.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/norm.cu index 6bc05ff70..133e219f0 100644 --- a/llama/ggml-cuda/norm.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/norm.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "norm.cuh" template diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/norm.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/norm.cuh new file mode 100644 index 000000000..431a8f74d --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/norm.cuh @@ -0,0 +1,7 @@ +#include "common.cuh" + +void ggml_cuda_op_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/opt-step-adamw.cu b/ml/backend/ggml/ggml/src/ggml-cuda/opt-step-adamw.cu similarity index 70% rename from llama/ggml-cuda/opt-step-adamw.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/opt-step-adamw.cu index 4bde5c599..35154f299 100644 --- a/llama/ggml-cuda/opt-step-adamw.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/opt-step-adamw.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "ggml-impl.h" #include "opt-step-adamw.cuh" diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/opt-step-adamw.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/opt-step-adamw.cuh new file mode 100644 index 000000000..58d6f6e5d --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/opt-step-adamw.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_OPT_STEP_ADAMW_BLOCK_SIZE 256 + +void ggml_cuda_opt_step_adamw(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/out-prod.cu b/ml/backend/ggml/ggml/src/ggml-cuda/out-prod.cu similarity index 57% rename from llama/ggml-cuda/out-prod.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/out-prod.cu index fb2cc3838..619cfdcb5 100644 --- a/llama/ggml-cuda/out-prod.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/out-prod.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "out-prod.cuh" #include diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/out-prod.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/out-prod.cuh new file mode 100644 index 000000000..a0046f5f8 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/out-prod.cuh @@ -0,0 +1,3 @@ +#include "common.cuh" + +void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/pad.cu b/ml/backend/ggml/ggml/src/ggml-cuda/pad.cu similarity index 74% rename from llama/ggml-cuda/pad.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/pad.cu index aa61c0ada..39fd4b165 100644 --- a/llama/ggml-cuda/pad.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/pad.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "pad.cuh" static __global__ void pad_f32(const float * x, float * dst, const int ne0, const int ne00, const int ne01, const int ne02, const int ne03) { diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/pad.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/pad.cuh new file mode 100644 index 000000000..e2ededc3c --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/pad.cuh @@ -0,0 +1,6 @@ +#include "common.cuh" + +#define CUDA_PAD_BLOCK_SIZE 256 + +void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/pool2d.cu b/ml/backend/ggml/ggml/src/ggml-cuda/pool2d.cu similarity index 72% rename from llama/ggml-cuda/pool2d.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/pool2d.cu index adbf1b551..c6d51e4d6 100644 --- a/llama/ggml-cuda/pool2d.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/pool2d.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "pool2d.cuh" template diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/pool2d.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/pool2d.cuh new file mode 100644 index 000000000..7841292bc --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/pool2d.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_POOL2D_BLOCK_SIZE 256 + +void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/quantize.cu b/ml/backend/ggml/ggml/src/ggml-cuda/quantize.cu similarity index 81% rename from llama/ggml-cuda/quantize.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/quantize.cu index 60341bee8..1702e4ce2 100644 --- a/llama/ggml-cuda/quantize.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/quantize.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "quantize.cuh" #include diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/quantize.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/quantize.cuh new file mode 100644 index 000000000..03bf322b9 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/quantize.cuh @@ -0,0 +1,24 @@ +#pragma once + +#include "common.cuh" +#include "mmq.cuh" + +#include + +#define CUDA_QUANTIZE_BLOCK_SIZE 256 +#define CUDA_QUANTIZE_BLOCK_SIZE_MMQ 128 + +static_assert(MATRIX_ROW_PADDING % CUDA_QUANTIZE_BLOCK_SIZE == 0, "Risk of out-of-bounds access."); +static_assert(MATRIX_ROW_PADDING % (4*CUDA_QUANTIZE_BLOCK_SIZE_MMQ) == 0, "Risk of out-of-bounds access."); + +typedef void (*quantize_cuda_t)( + const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded, + const ggml_type type_x, cudaStream_t stream); + +void quantize_row_q8_1_cuda( + const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded, + const ggml_type type_x, cudaStream_t stream); + +void quantize_mmq_q8_1_cuda( + const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded, + const ggml_type type_x, cudaStream_t stream); diff --git a/llama/ggml-cuda/rope.cu b/ml/backend/ggml/ggml/src/ggml-cuda/rope.cu similarity index 94% rename from llama/ggml-cuda/rope.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/rope.cu index fc9f6f2f4..2c84778d2 100644 --- a/llama/ggml-cuda/rope.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/rope.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "rope.cuh" struct rope_corr_dims { diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/rope.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/rope.cuh new file mode 100644 index 000000000..0f787a0b2 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/rope.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_ROPE_BLOCK_SIZE 256 + +void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/scale.cu b/ml/backend/ggml/ggml/src/ggml-cuda/scale.cu new file mode 100644 index 000000000..1405e066e --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/scale.cu @@ -0,0 +1,31 @@ +#include "scale.cuh" + +static __global__ void scale_f32(const float * x, float * dst, const float scale, const int k) { + const int i = blockDim.x*blockIdx.x + threadIdx.x; + + if (i >= k) { + return; + } + + dst[i] = scale * x[i]; +} + +static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE; + scale_f32<<>>(x, dst, scale, k); +} + +void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const float * src0_d = (const float *)src0->data; + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + + float scale; + memcpy(&scale, dst->op_params, sizeof(float)); + + scale_f32_cuda(src0_d, dst_d, scale, ggml_nelements(src0), stream); +} diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/scale.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/scale.cuh new file mode 100644 index 000000000..8ff75c829 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/scale.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_SCALE_BLOCK_SIZE 256 + +void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/softmax.cu b/ml/backend/ggml/ggml/src/ggml-cuda/softmax.cu similarity index 86% rename from llama/ggml-cuda/softmax.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/softmax.cu index 52aad62f6..c24abae1f 100644 --- a/llama/ggml-cuda/softmax.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/softmax.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" #include "softmax.cuh" diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/softmax.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/softmax.cuh new file mode 100644 index 000000000..4ef4ff86c --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/softmax.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_SOFT_MAX_BLOCK_SIZE 1024 + +void ggml_cuda_op_soft_max(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/sum.cu b/ml/backend/ggml/ggml/src/ggml-cuda/sum.cu similarity index 54% rename from llama/ggml-cuda/sum.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/sum.cu index e1f0b86e8..e0dafc1d2 100644 --- a/llama/ggml-cuda/sum.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/sum.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700 #define USE_CUB #endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700 diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/sum.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/sum.cuh new file mode 100644 index 000000000..8cadc3736 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/sum.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream); + +void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/sumrows.cu b/ml/backend/ggml/ggml/src/ggml-cuda/sumrows.cu new file mode 100644 index 000000000..38dbf1b5e --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/sumrows.cu @@ -0,0 +1,39 @@ +#include "sumrows.cuh" + +static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) { + const int row = blockIdx.x; + const int col = threadIdx.x; + + float sum = 0.0f; + for (int i = col; i < ncols; i += blockDim.x) { + sum += x[row * ncols + i]; + } + + sum = warp_reduce_sum(sum); + + if (col == 0) { + dst[row] = sum; + } +} + +void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + const dim3 block_dims(WARP_SIZE, 1, 1); + const dim3 block_nums(nrows, 1, 1); + k_sum_rows_f32<<>>(x, dst, ncols); +} + +void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const float * src0_d = (const float *)src0->data; + float * dst_d = (float *)dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + GGML_ASSERT(ggml_is_contiguous(src0)); + + const int64_t ncols = src0->ne[0]; + const int64_t nrows = ggml_nrows(src0); + + sum_rows_f32_cuda(src0_d, dst_d, ncols, nrows, stream); +} diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/sumrows.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/sumrows.cuh new file mode 100644 index 000000000..191db1c13 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/sumrows.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream); + +void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu new file mode 100644 index 000000000..6696a2384 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu new file mode 100644 index 000000000..dd070db28 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu new file mode 100644 index 000000000..54dcde6f5 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu new file mode 100644 index 000000000..4ec22f791 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu new file mode 100644 index 000000000..3c15bf7f0 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu new file mode 100644 index 000000000..7e61b5fdc --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu new file mode 100644 index 000000000..fdb15b580 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu new file mode 100644 index 000000000..0f7c417d2 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu new file mode 100644 index 000000000..851f33c43 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu new file mode 100644 index 000000000..763809cbe --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu new file mode 100644 index 000000000..f2a276e50 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu new file mode 100644 index 000000000..cb227f6f5 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu new file mode 100644 index 000000000..97ac0520c --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu new file mode 100644 index 000000000..c772b4263 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu new file mode 100644 index 000000000..5cb743081 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu new file mode 100644 index 000000000..98a709d17 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu new file mode 100644 index 000000000..4f2f947ae --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu new file mode 100644 index 000000000..11f96b6f6 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu new file mode 100644 index 000000000..b39bdc061 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu new file mode 100644 index 000000000..bbd6a2c7f --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu new file mode 100644 index 000000000..9d84ff2b1 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu new file mode 100644 index 000000000..bc8a5bff6 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu new file mode 100644 index 000000000..a679100c8 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu new file mode 100644 index 000000000..8f21bccf7 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu new file mode 100644 index 000000000..858b00fd7 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu new file mode 100644 index 000000000..0fc8011fa --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu new file mode 100644 index 000000000..261fdf623 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu new file mode 100644 index 000000000..0fb824738 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu new file mode 100644 index 000000000..a9d9d089b --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu new file mode 100644 index 000000000..7d7b27920 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu new file mode 100644 index 000000000..a092ee2d5 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu new file mode 100644 index 000000000..db55927a1 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu new file mode 100644 index 000000000..c3c21cefa --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu new file mode 100644 index 000000000..35dd9f520 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu new file mode 100644 index 000000000..050c22ac7 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu new file mode 100644 index 000000000..de4866c5e --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu new file mode 100644 index 000000000..57a10bc4b --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu new file mode 100644 index 000000000..e0f08b46a --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu new file mode 100644 index 000000000..1c8e8a467 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu new file mode 100644 index 000000000..cefed83fb --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu new file mode 100644 index 000000000..aede6e358 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu new file mode 100644 index 000000000..1a1a92c78 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu new file mode 100644 index 000000000..ad667473d --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f16.cuh" + +DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu new file mode 100644 index 000000000..c499f455d --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu new file mode 100644 index 000000000..8286ebf37 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu new file mode 100644 index 000000000..458786882 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu new file mode 100644 index 000000000..d89103ce0 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu new file mode 100644 index 000000000..bb75fd42f --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu new file mode 100644 index 000000000..b1629817e --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu new file mode 100644 index 000000000..d8657604d --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu new file mode 100644 index 000000000..2e5bd2f1a --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu new file mode 100644 index 000000000..be5f302d9 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu new file mode 100644 index 000000000..8dd91cd72 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu new file mode 100644 index 000000000..4cb791502 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu new file mode 100644 index 000000000..09dea4267 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu new file mode 100644 index 000000000..0fbb60769 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu new file mode 100644 index 000000000..2aeab83b2 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu new file mode 100644 index 000000000..599415b49 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu new file mode 100644 index 000000000..e4f8e3083 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu new file mode 100644 index 000000000..34d166527 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu new file mode 100644 index 000000000..4bebef45a --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu new file mode 100644 index 000000000..326468da2 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu new file mode 100644 index 000000000..511b58f4e --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu new file mode 100644 index 000000000..d9906d142 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu new file mode 100644 index 000000000..f61c183ab --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu new file mode 100644 index 000000000..c10450fd2 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu new file mode 100644 index 000000000..2d5cb195c --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu new file mode 100644 index 000000000..b384f34d7 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu new file mode 100644 index 000000000..446e293b1 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu new file mode 100644 index 000000000..6f4302988 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu new file mode 100644 index 000000000..1cd8ba88f --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu new file mode 100644 index 000000000..1ee2eab65 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu new file mode 100644 index 000000000..2bc77816a --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu new file mode 100644 index 000000000..d55ced08b --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu new file mode 100644 index 000000000..8361e99c4 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu new file mode 100644 index 000000000..7507a67c4 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu new file mode 100644 index 000000000..61f050b23 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu new file mode 100644 index 000000000..d4a49d9c9 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu new file mode 100644 index 000000000..d14627897 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu new file mode 100644 index 000000000..e73f917a1 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu new file mode 100644 index 000000000..d40825dfc --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_F16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu new file mode 100644 index 000000000..b5c6869f4 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu new file mode 100644 index 000000000..4e21b0cca --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu new file mode 100644 index 000000000..2eac321b3 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu new file mode 100644 index 000000000..f7d2c3b4e --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu new file mode 100644 index 000000000..a013f400b --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f32.cuh" + +DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q8_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu new file mode 100644 index 000000000..2d94e65c2 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-wmma-f16.cuh" + +DECL_FATTN_WMMA_F16_CASE(64, 16, float); +DECL_FATTN_WMMA_F16_CASE(80, 16, float); +DECL_FATTN_WMMA_F16_CASE(96, 16, float); +DECL_FATTN_WMMA_F16_CASE(112, 16, float); +DECL_FATTN_WMMA_F16_CASE(128, 16, float); +DECL_FATTN_WMMA_F16_CASE(256, 16, float); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu new file mode 100644 index 000000000..c3d9df3c4 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu @@ -0,0 +1,9 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-wmma-f16.cuh" + +DECL_FATTN_WMMA_F16_CASE(64, 32, float); +DECL_FATTN_WMMA_F16_CASE(80, 32, float); +DECL_FATTN_WMMA_F16_CASE(96, 32, float); +DECL_FATTN_WMMA_F16_CASE(112, 32, float); +DECL_FATTN_WMMA_F16_CASE(128, 32, float); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu new file mode 100644 index 000000000..bb680e401 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-wmma-f16.cuh" + +DECL_FATTN_WMMA_F16_CASE(64, 16, half); +DECL_FATTN_WMMA_F16_CASE(80, 16, half); +DECL_FATTN_WMMA_F16_CASE(96, 16, half); +DECL_FATTN_WMMA_F16_CASE(112, 16, half); +DECL_FATTN_WMMA_F16_CASE(128, 16, half); +DECL_FATTN_WMMA_F16_CASE(256, 16, half); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu new file mode 100644 index 000000000..073f71b1f --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-wmma-f16.cuh" + +DECL_FATTN_WMMA_F16_CASE(64, 32, half); +DECL_FATTN_WMMA_F16_CASE(80, 32, half); +DECL_FATTN_WMMA_F16_CASE(96, 32, half); +DECL_FATTN_WMMA_F16_CASE(112, 32, half); +DECL_FATTN_WMMA_F16_CASE(128, 32, half); +DECL_FATTN_WMMA_F16_CASE(256, 32, half); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu new file mode 100644 index 000000000..d30710c5f --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu @@ -0,0 +1,8 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-wmma-f16.cuh" + +DECL_FATTN_WMMA_F16_CASE(64, 8, half); +DECL_FATTN_WMMA_F16_CASE(96, 8, half); +DECL_FATTN_WMMA_F16_CASE(128, 8, half); +DECL_FATTN_WMMA_F16_CASE(256, 8, half); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/generate_cu_files.py b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/generate_cu_files.py new file mode 100755 index 000000000..d7874e6ea --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/generate_cu_files.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +from glob import glob +import os + +TYPES_KV = ["GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0", "GGML_TYPE_F16"] + +SOURCE_FATTN_VEC = """// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-vec-f{vkq_size}.cuh" + +DECL_FATTN_VEC_F{vkq_size}_CASE({head_size}, {type_k}, {type_v}); +""" + +SOURCE_FATTN_WMMA_START = """// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-wmma-f16.cuh" + +""" + +SOURCE_FATTN_WMMA_CASE = "DECL_FATTN_WMMA_F16_CASE({head_size}, {cols_per_block}, {kq_acc_t});\n" + +TYPES_MMQ = [ + "GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0", + "GGML_TYPE_Q2_K", "GGML_TYPE_Q3_K", "GGML_TYPE_Q4_K", "GGML_TYPE_Q5_K", "GGML_TYPE_Q6_K", + "GGML_TYPE_IQ2_XXS", "GGML_TYPE_IQ2_XS", "GGML_TYPE_IQ2_S", "GGML_TYPE_IQ3_XXS", "GGML_TYPE_IQ3_S", + "GGML_TYPE_IQ1_S", "GGML_TYPE_IQ4_NL", "GGML_TYPE_IQ4_XS" +] + +SOURCE_MMQ = """// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE({type}); +""" + + +def get_short_name(long_quant_name): + return long_quant_name.replace("GGML_TYPE_", "").lower() + + +def get_head_sizes(type_k, type_v): + if type_k == "GGML_TYPE_F16" and type_v == "GGML_TYPE_F16": + return [64, 128, 256] + if type_k == "GGML_TYPE_F16": + return [64, 128] + return [128] + + +for filename in glob("*.cu"): + os.remove(filename) + +for vkq_size in [16, 32]: + for type_k in TYPES_KV: + for type_v in TYPES_KV: + for head_size in get_head_sizes(type_k, type_v): + with open(f"fattn-vec-f{vkq_size}-instance-hs{head_size}-{get_short_name(type_k)}-{get_short_name(type_v)}.cu", "w") as f: + f.write(SOURCE_FATTN_VEC.format(vkq_size=vkq_size, head_size=head_size, type_k=type_k, type_v=type_v)) + +for kq_acc_t in ["half", "float"]: + for cols_per_block in [8, 16, 32]: + if kq_acc_t == "float" and cols_per_block == 8: + continue + + with open(f"fattn-wmma-f16-instance-kq{kq_acc_t}-cpb{cols_per_block}.cu", "w") as f: + f.write(SOURCE_FATTN_WMMA_START) + + for head_size in [64, 80, 96, 112, 128, 256]: + if cols_per_block == 8 and head_size % 32 != 0: # wmma fragment is 8x32 + continue + if kq_acc_t == "float" and cols_per_block == 32 and head_size == 256: # register spilling, bad performance + continue + f.write(SOURCE_FATTN_WMMA_CASE.format(kq_acc_t=kq_acc_t, cols_per_block=cols_per_block, head_size=head_size)) + +for type in TYPES_MMQ: + with open(f"mmq-instance-{get_short_name(type)}.cu", "w") as f: + f.write(SOURCE_MMQ.format(type=type)) diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu new file mode 100644 index 000000000..84ec85029 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ1_S); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu new file mode 100644 index 000000000..583c4e5a5 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ2_S); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu new file mode 100644 index 000000000..edaf1560d --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ2_XS); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu new file mode 100644 index 000000000..233d9342c --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ2_XXS); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu new file mode 100644 index 000000000..6092dc713 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ3_S); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu new file mode 100644 index 000000000..1d5bd201f --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ3_XXS); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu new file mode 100644 index 000000000..eb02fab00 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ4_NL); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu new file mode 100644 index 000000000..1eb3b7430 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_IQ4_XS); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu new file mode 100644 index 000000000..6415369dc --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q2_K); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu new file mode 100644 index 000000000..ffb6213af --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q3_K); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu new file mode 100644 index 000000000..0c0b0c8a8 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu new file mode 100644 index 000000000..ee67f6942 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu new file mode 100644 index 000000000..9eeb3cd7f --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_K); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu new file mode 100644 index 000000000..cc57fb975 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q5_0); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu new file mode 100644 index 000000000..721ac790c --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q5_1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu new file mode 100644 index 000000000..a2e90ffd5 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q5_K); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu new file mode 100644 index 000000000..470938fef --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q6_K); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu new file mode 100644 index 000000000..974477bbb --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q8_0); diff --git a/llama/ggml-cuda/tsembd.cu b/ml/backend/ggml/ggml/src/ggml-cuda/tsembd.cu similarity index 59% rename from llama/ggml-cuda/tsembd.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/tsembd.cu index c60367838..153ddbcda 100644 --- a/llama/ggml-cuda/tsembd.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/tsembd.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "tsembd.cuh" static __global__ void timestep_embedding_f32(const float * timesteps, float * dst, const int nb1, const int dim, const int max_period) { diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/tsembd.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/tsembd.cuh new file mode 100644 index 000000000..84340e3d7 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/tsembd.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_TIMESTEP_EMBEDDING_BLOCK_SIZE 256 + +void ggml_cuda_op_timestep_embedding(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/unary.cu b/ml/backend/ggml/ggml/src/ggml-cuda/unary.cu similarity index 92% rename from llama/ggml-cuda/unary.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/unary.cu index e20cba020..81fc92202 100644 --- a/llama/ggml-cuda/unary.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/unary.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "unary.cuh" static __global__ void neg_f32(const float * x, float * dst, const int k) { diff --git a/llama/ggml-cuda/unary.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/unary.cuh similarity index 58% rename from llama/ggml-cuda/unary.cuh rename to ml/backend/ggml/ggml/src/ggml-cuda/unary.cuh index 3a9161bf9..c91936728 100644 --- a/llama/ggml-cuda/unary.cuh +++ b/ml/backend/ggml/ggml/src/ggml-cuda/unary.cuh @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" #define CUDA_NEG_BLOCK_SIZE 256 diff --git a/llama/ggml-cuda/upscale.cu b/ml/backend/ggml/ggml/src/ggml-cuda/upscale.cu similarity index 63% rename from llama/ggml-cuda/upscale.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/upscale.cu index 19c8f2a18..cf513c3ad 100644 --- a/llama/ggml-cuda/upscale.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/upscale.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "upscale.cuh" static __global__ void upscale_f32(const float * x, float * dst, diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/upscale.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/upscale.cuh new file mode 100644 index 000000000..d4d765230 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/upscale.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_UPSCALE_BLOCK_SIZE 256 + +void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/llama/ggml-cuda/vecdotq.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/vecdotq.cuh similarity index 96% rename from llama/ggml-cuda/vecdotq.cuh rename to ml/backend/ggml/ggml/src/ggml-cuda/vecdotq.cuh index 43719cbd7..40091a0ef 100644 --- a/llama/ggml-cuda/vecdotq.cuh +++ b/ml/backend/ggml/ggml/src/ggml-cuda/vecdotq.cuh @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" #include diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/vendors/cuda.h b/ml/backend/ggml/ggml/src/ggml-cuda/vendors/cuda.h new file mode 100644 index 000000000..db9f6a165 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/vendors/cuda.h @@ -0,0 +1,14 @@ +#pragma once + +#include +#include +#include +#include + +#if CUDART_VERSION < 11020 +#define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED +#define CUBLAS_TF32_TENSOR_OP_MATH CUBLAS_TENSOR_OP_MATH +#define CUBLAS_COMPUTE_16F CUDA_R_16F +#define CUBLAS_COMPUTE_32F CUDA_R_32F +#define cublasComputeType_t cudaDataType_t +#endif // CUDART_VERSION < 11020 diff --git a/llama/ggml-cuda/vendors/hip.h b/ml/backend/ggml/ggml/src/ggml-cuda/vendors/hip.h similarity index 85% rename from llama/ggml-cuda/vendors/hip.h rename to ml/backend/ggml/ggml/src/ggml-cuda/vendors/hip.h index 7b3102f39..c905b15d7 100644 --- a/llama/ggml-cuda/vendors/hip.h +++ b/ml/backend/ggml/ggml/src/ggml-cuda/vendors/hip.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include diff --git a/llama/ggml-cuda/vendors/musa.h b/ml/backend/ggml/ggml/src/ggml-cuda/vendors/musa.h similarity index 83% rename from llama/ggml-cuda/vendors/musa.h rename to ml/backend/ggml/ggml/src/ggml-cuda/vendors/musa.h index 7b1a4ac41..6cc1b69ee 100644 --- a/llama/ggml-cuda/vendors/musa.h +++ b/ml/backend/ggml/ggml/src/ggml-cuda/vendors/musa.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #include diff --git a/llama/ggml-cuda/wkv6.cu b/ml/backend/ggml/ggml/src/ggml-cuda/wkv6.cu similarity index 71% rename from llama/ggml-cuda/wkv6.cu rename to ml/backend/ggml/ggml/src/ggml-cuda/wkv6.cu index fe4e5b9d4..42578341a 100644 --- a/llama/ggml-cuda/wkv6.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/wkv6.cu @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #include "common.cuh" #include "wkv6.cuh" diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/wkv6.cuh b/ml/backend/ggml/ggml/src/ggml-cuda/wkv6.cuh new file mode 100644 index 000000000..a7124ee51 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-cuda/wkv6.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_WKV_BLOCK_SIZE 64 + +void ggml_cuda_op_rwkv_wkv6(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ml/backend/ggml/ggml/src/ggml-hip/CMakeLists.txt b/ml/backend/ggml/ggml/src/ggml-hip/CMakeLists.txt new file mode 100644 index 000000000..b15fbd24d --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-hip/CMakeLists.txt @@ -0,0 +1,104 @@ +if (NOT EXISTS $ENV{ROCM_PATH}) + if (NOT EXISTS /opt/rocm) + set(ROCM_PATH /usr) + else() + set(ROCM_PATH /opt/rocm) + endif() +else() + set(ROCM_PATH $ENV{ROCM_PATH}) +endif() + +list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}) +list(APPEND CMAKE_PREFIX_PATH "${ROCM_PATH}/lib64/cmake") + +# CMake on Windows doesn't support the HIP language yet +if (WIN32) + set(CXX_IS_HIPCC TRUE) +else() + string(REGEX MATCH "hipcc(\.bat)?$" CXX_IS_HIPCC "${CMAKE_CXX_COMPILER}") +endif() + +if (CXX_IS_HIPCC) + if (LINUX) + if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") + message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++") + endif() + + message(WARNING "Setting hipcc as the C++ compiler is legacy behavior." + " Prefer setting the HIP compiler directly. See README for details.") + endif() +else() + # Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES. + if (AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES) + set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS}) + endif() + cmake_minimum_required(VERSION 3.21) + enable_language(HIP) +endif() + +find_package(hip REQUIRED) +find_package(hipblas REQUIRED) +find_package(rocblas REQUIRED) + +message(STATUS "HIP and hipBLAS found") + +file(GLOB GGML_HEADERS_ROCM "../ggml-cuda/*.cuh") +list(APPEND GGML_HEADERS_ROCM "../../include/ggml-cuda.h") + +file(GLOB GGML_SOURCES_ROCM "../ggml-cuda/*.cu") +file(GLOB SRCS "../ggml-cuda/template-instances/fattn-wmma*.cu") +list(APPEND GGML_SOURCES_ROCM ${SRCS}) +file(GLOB SRCS "../ggml-cuda/template-instances/mmq*.cu") +list(APPEND GGML_SOURCES_ROCM ${SRCS}) + +if (GGML_CUDA_FA_ALL_QUANTS) + file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*.cu") + list(APPEND GGML_SOURCES_ROCM ${SRCS}) + add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS) +else() + file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu") + list(APPEND GGML_SOURCES_ROCM ${SRCS}) + file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu") + list(APPEND GGML_SOURCES_ROCM ${SRCS}) + file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*f16-f16.cu") + list(APPEND GGML_SOURCES_ROCM ${SRCS}) +endif() + +ggml_add_backend_library(ggml-hip + ${GGML_HEADERS_ROCM} + ${GGML_SOURCES_ROCM} + ) + +# TODO: do not use CUDA definitions for HIP +target_compile_definitions(ggml PUBLIC GGML_USE_CUDA) + +add_compile_definitions(GGML_USE_HIP) + +if (GGML_HIP_UMA) + add_compile_definitions(GGML_HIP_UMA) +endif() + +if (GGML_CUDA_FORCE_MMQ) + add_compile_definitions(GGML_CUDA_FORCE_MMQ) +endif() + +if (GGML_CUDA_FORCE_CUBLAS) + add_compile_definitions(GGML_CUDA_FORCE_CUBLAS) +endif() + +if (GGML_CUDA_NO_PEER_COPY) + add_compile_definitions(GGML_CUDA_NO_PEER_COPY) +endif() + +if (CXX_IS_HIPCC) + set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX) + target_link_libraries(ggml-hip PRIVATE hip::device) +else() + set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP) +endif() + +if (GGML_STATIC) + message(FATAL_ERROR "Static linking not supported for HIP/ROCm") +endif() + +target_link_libraries(ggml-hip PRIVATE ggml-base hip::host roc::rocblas roc::hipblas) diff --git a/llama/ggml-impl.h b/ml/backend/ggml/ggml/src/ggml-impl.h similarity index 93% rename from llama/ggml-impl.h rename to ml/backend/ggml/ggml/src/ggml-impl.h index 46760fb32..549772c57 100644 --- a/llama/ggml-impl.h +++ b/ml/backend/ggml/ggml/src/ggml-impl.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once // GGML internal header diff --git a/ml/backend/ggml/ggml/src/ggml-metal/CMakeLists.txt b/ml/backend/ggml/ggml/src/ggml-metal/CMakeLists.txt new file mode 100644 index 000000000..89fcde2fa --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-metal/CMakeLists.txt @@ -0,0 +1,121 @@ +find_library(FOUNDATION_LIBRARY Foundation REQUIRED) +find_library(METAL_FRAMEWORK Metal REQUIRED) +find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) + +message(STATUS "Metal framework found") + +ggml_add_backend_library(ggml-metal + ggml-metal.m + ) + +target_link_libraries(ggml-metal PRIVATE + ${FOUNDATION_LIBRARY} + ${METAL_FRAMEWORK} + ${METALKIT_FRAMEWORK} + ) + +if (GGML_METAL_NDEBUG) + add_compile_definitions(GGML_METAL_NDEBUG) +endif() + +if (GGML_METAL_USE_BF16) + add_compile_definitions(GGML_METAL_USE_BF16) +endif() + +# copy metal files to bin directory +configure_file(../ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY) +configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY) +configure_file(ggml-metal-impl.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal-impl.h COPYONLY) + +if (GGML_METAL_EMBED_LIBRARY) + enable_language(ASM) + + add_compile_definitions(GGML_METAL_EMBED_LIBRARY) + + set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/../ggml-common.h") + set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal") + set(METALLIB_IMPL "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal-impl.h") + + file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated") + + # merge ggml-common.h and ggml-metal.metal into a single file + set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s") + set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal") + set(METALLIB_SOURCE_EMBED_TMP "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal.tmp") + + add_custom_command( + OUTPUT ${METALLIB_EMBED_ASM} + COMMAND echo "Embedding Metal library" + COMMAND sed -e '/__embed_ggml-common.h__/r ${METALLIB_COMMON}' -e '/__embed_ggml-common.h__/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED_TMP} + COMMAND sed -e '/\#include \"ggml-metal-impl.h\"/r ${METALLIB_IMPL}' -e '/\#include \"ggml-metal-impl.h\"/d' < ${METALLIB_SOURCE_EMBED_TMP} > ${METALLIB_SOURCE_EMBED} + COMMAND echo ".section __DATA,__ggml_metallib" > ${METALLIB_EMBED_ASM} + COMMAND echo ".globl _ggml_metallib_start" >> ${METALLIB_EMBED_ASM} + COMMAND echo "_ggml_metallib_start:" >> ${METALLIB_EMBED_ASM} + COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM} + COMMAND echo ".globl _ggml_metallib_end" >> ${METALLIB_EMBED_ASM} + COMMAND echo "_ggml_metallib_end:" >> ${METALLIB_EMBED_ASM} + DEPENDS ../ggml-common.h ggml-metal.metal ggml-metal-impl.h + COMMENT "Generate assembly for embedded Metal library" + ) + + target_sources(ggml-metal PRIVATE ${METALLIB_EMBED_ASM}) +else() + if (GGML_METAL_SHADER_DEBUG) + # custom command to do the following: + # xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air + # xcrun -sdk macosx metallib ggml-metal.air -o default.metallib + # + # note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works + # disabling fast math is needed in order to pass tests/test-backend-ops + # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1 + # note: unfortunately, we have to call it default.metallib instead of ggml.metallib + # ref: https://github.com/ggerganov/whisper.cpp/issues/1720 + set(XC_FLAGS -fno-fast-math -fno-inline -g) + else() + set(XC_FLAGS -O3) + endif() + + # Append macOS metal versioning flags + if (GGML_METAL_MACOSX_VERSION_MIN) + message(STATUS "Adding -mmacosx-version-min=${GGML_METAL_MACOSX_VERSION_MIN} flag to metal compilation") + list (APPEND XC_FLAGS -mmacosx-version-min=${GGML_METAL_MACOSX_VERSION_MIN}) + endif() + + if (GGML_METAL_STD) + message(STATUS "Adding -std=${GGML_METAL_STD} flag to metal compilation") + list (APPEND XC_FLAGS -std=${GGML_METAL_STD}) + endif() + + add_custom_command( + OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib + COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air + COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib + COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air + COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h + COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal + DEPENDS ggml-metal.metal ggml-common.h + COMMENT "Compiling Metal kernels" + ) + + # FIXME: only add to the ggml-metal target? + add_custom_target( + ggml-metal-lib ALL + DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib + ) +endif() # GGML_METAL_EMBED_LIBRARY + +if (NOT GGML_METAL_EMBED_LIBRARY) + install( + FILES src/ggml-metal/ggml-metal.metal + PERMISSIONS + OWNER_READ + OWNER_WRITE + GROUP_READ + WORLD_READ + DESTINATION ${CMAKE_INSTALL_BINDIR}) + + install( + FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib + DESTINATION ${CMAKE_INSTALL_BINDIR} + ) +endif() diff --git a/llama/ggml-metal-embed.metal b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal similarity index 99% rename from llama/ggml-metal-embed.metal rename to ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal index 7f4666c93..2e51b87a0 100644 --- a/llama/ggml-metal-embed.metal +++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal @@ -1,58 +1,7 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - +// Code generated Fri Jan 10 13:05:45 PST 2025. DO NOT EDIT. #define GGML_COMMON_DECL_METAL #define GGML_COMMON_IMPL_METAL #if defined(GGML_METAL_EMBED_LIBRARY) -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #ifndef GGML_COMMON_DECL #if defined(GGML_COMMON_DECL_C) @@ -1910,32 +1859,6 @@ GGML_TABLE_END() // TODO: this should not be a relative path, but can't figure out how to set Metal include paths in Package.swift #include "../ggml-common.h" #endif -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #ifndef GGML_METAL_IMPL #define GGML_METAL_IMPL diff --git a/llama/ggml-metal-embed_darwin_arm64.s b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s similarity index 87% rename from llama/ggml-metal-embed_darwin_arm64.s rename to ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s index a108c8251..47c729a6a 100644 --- a/llama/ggml-metal-embed_darwin_arm64.s +++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s @@ -3,4 +3,4 @@ _ggml_metallib_start: .incbin "ggml-metal-embed.metal" .globl _ggml_metallib_end -_ggml_metallib_end: \ No newline at end of file +_ggml_metallib_end: diff --git a/llama/ggml-metal-impl.h b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-impl.h similarity index 81% rename from llama/ggml-metal-impl.h rename to ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-impl.h index 19103fb59..e3dc25f16 100644 --- a/llama/ggml-metal-impl.h +++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-impl.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #ifndef GGML_METAL_IMPL #define GGML_METAL_IMPL diff --git a/llama/ggml-metal_darwin_arm64.m b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m similarity index 99% rename from llama/ggml-metal_darwin_arm64.m rename to ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m index d72129c3a..318addecc 100644 --- a/llama/ggml-metal_darwin_arm64.m +++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #import "ggml-metal.h" #import "ggml-impl.h" @@ -4246,6 +4220,7 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer) } free(ctx); + free(buffer); } static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) { diff --git a/llama/ggml-metal.metal b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal similarity index 99% rename from llama/ggml-metal.metal rename to ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal index 1bca09725..204c93e6f 100644 --- a/llama/ggml-metal.metal +++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #define GGML_COMMON_DECL_METAL #define GGML_COMMON_IMPL_METAL #if defined(GGML_METAL_EMBED_LIBRARY) diff --git a/ml/backend/ggml/ggml/src/ggml-metal/metal.go b/ml/backend/ggml/ggml/src/ggml-metal/metal.go new file mode 100644 index 000000000..1025e205d --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-metal/metal.go @@ -0,0 +1,9 @@ +//go:build darwin && arm64 + +package metal + +//go:generate sh -c "{ echo // Code generated $(date). DO NOT EDIT.; sed -e '/__embed_ggml-common.h__/r ../ggml-common.h' -e '/__embed_ggml-common.h__/d' -e '/#include \"ggml-metal-impl.h\"/r ggml-metal-impl.h' -e '/#include \"ggml-metal-impl.h\"/d' ggml-metal.metal; } >ggml-metal-embed.metal" + +// #cgo CPPFLAGS: -DGGML_METAL_EMBED_LIBRARY -I.. -I../../include +// #cgo LDFLAGS: -framework Metal -framework MetalKit +import "C" diff --git a/ml/backend/ggml/ggml/src/ggml-opt.cpp b/ml/backend/ggml/ggml/src/ggml-opt.cpp new file mode 100644 index 000000000..7c3e24103 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-opt.cpp @@ -0,0 +1,854 @@ +#include "ggml-opt.h" + +#include "ggml.h" +#include "ggml-alloc.h" +#include "ggml-backend.h" +#include "ggml-impl.h" + +#include +#include +#include +#include +#include +#include +#include + +struct ggml_opt_dataset { + struct ggml_context * ctx = nullptr; + ggml_backend_buffer_t buf = nullptr; + struct ggml_tensor * data = nullptr; + struct ggml_tensor * labels = nullptr; + + int64_t ndata = -1; + int64_t ndata_shard = -1; + size_t nbs_data = -1; + size_t nbs_labels = -1; + + std::vector permutation; +}; + +struct ggml_opt_context { + ggml_backend_sched_t backend_sched = nullptr; + ggml_cgraph * allocated_graph = nullptr; + ggml_cgraph * allocated_graph_copy = nullptr; + struct ggml_context * ctx_static = nullptr; + struct ggml_context * ctx_static_cpu = nullptr; + struct ggml_context * ctx_compute = nullptr; + struct ggml_context * ctx_copy = nullptr; + ggml_backend_buffer_t buf_static = nullptr; + ggml_backend_buffer_t buf_static_cpu = nullptr; + std::mt19937 rng; + + struct ggml_tensor * inputs = nullptr; + struct ggml_tensor * outputs = nullptr; + struct ggml_tensor * labels = nullptr; + + struct ggml_tensor * loss = nullptr; + struct ggml_tensor * pred = nullptr; + struct ggml_tensor * ncorrect = nullptr; + + struct ggml_cgraph * gf = nullptr; + struct ggml_cgraph * gb_grad = nullptr; + struct ggml_cgraph * gb_opt = nullptr; + + int64_t iter = 1; + int32_t opt_period = 1; + int32_t opt_i = 0; + bool loss_per_datapoint = false; + + ggml_opt_get_optimizer_params get_opt_pars = nullptr; + void * get_opt_pars_ud = nullptr; + struct ggml_tensor * adamw_params = nullptr; +}; + +struct ggml_opt_result { + int64_t ndata = 0; + std::vector loss; + std::vector pred; + int64_t ncorrect = 0; + + int64_t opt_period = -1; + bool loss_per_datapoint = false; +}; + +// ====== Dataset ====== + +ggml_opt_dataset_t ggml_opt_dataset_init(int64_t ne_datapoint, int64_t ne_label, int64_t ndata, int64_t ndata_shard) { + GGML_ASSERT(ne_datapoint > 0); + GGML_ASSERT(ne_label >= 0); + GGML_ASSERT(ndata > 0); + GGML_ASSERT(ndata_shard > 0); + + ggml_opt_dataset_t result = new ggml_opt_dataset; + result->ndata = ndata; + result->ndata_shard = ndata_shard; + + { + struct ggml_init_params params = { + /*.mem_size =*/ 2*ggml_tensor_overhead(), + /*.mem_buffer =*/ nullptr, + /*.no_alloc =*/ true, + }; + result->ctx = ggml_init(params); + } + + result->data = ggml_new_tensor_2d(result->ctx, GGML_TYPE_F32, ne_datapoint, ndata); + result->nbs_data = ggml_nbytes(result->data) * ndata_shard/ndata; + + if (ne_label > 0) { + result->labels = ggml_new_tensor_2d(result->ctx, GGML_TYPE_F32, ne_label, ndata); + result->nbs_labels = ggml_nbytes(result->labels) * ndata_shard/ndata; + } else { + result->labels = nullptr; + result->nbs_labels = 0; + } + + result->buf = ggml_backend_alloc_ctx_tensors_from_buft(result->ctx, ggml_backend_cpu_buffer_type()); + + const int64_t nshards = ndata/ndata_shard; + result->permutation.resize(nshards); + for (int64_t i = 0; i < nshards; ++i) { + result->permutation[i] = i; + } + return result; +} + +void ggml_opt_dataset_free(ggml_opt_dataset_t dataset) { + ggml_backend_buffer_free(dataset->buf); + ggml_free(dataset->ctx); + delete dataset; +} + +struct ggml_tensor * ggml_opt_dataset_data(ggml_opt_dataset_t dataset) { + return dataset->data; +} + +struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset) { + return dataset->labels; +} + +void ggml_opt_dataset_shuffle(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, int64_t idata) { + GGML_ASSERT(idata <= dataset->ndata); + + if (idata < 0) { + std::shuffle(dataset->permutation.begin(), dataset->permutation.end(), opt_ctx->rng); + return; + } + + GGML_ASSERT(idata % dataset->ndata_shard == 0); + const int64_t ishard_max = idata / dataset->ndata_shard; + std::shuffle(dataset->permutation.begin(), dataset->permutation.begin() + ishard_max, opt_ctx->rng); +} + +void ggml_opt_dataset_get_batch(ggml_opt_dataset_t dataset, struct ggml_tensor * data_batch, struct ggml_tensor * labels_batch, int64_t ibatch) { + GGML_ASSERT( data_batch && ggml_is_contiguous(data_batch)); + GGML_ASSERT(!labels_batch || ggml_is_contiguous(labels_batch)); + GGML_ASSERT((labels_batch == nullptr) == (dataset->labels == nullptr)); + + const size_t nb_data_batch = ggml_nbytes(data_batch); + GGML_ASSERT(nb_data_batch % dataset->nbs_data == 0); + const int64_t shards_per_batch = nb_data_batch / dataset->nbs_data; + + if (labels_batch) { + const size_t nb_labels_batch = ggml_nbytes(labels_batch); + GGML_ASSERT(nb_labels_batch == shards_per_batch*dataset->nbs_labels); + } + + GGML_ASSERT((ibatch + 1)*shards_per_batch <= int64_t(dataset->permutation.size())); + + for (int64_t ishard_batch = 0; ishard_batch < shards_per_batch; ++ishard_batch) { + const int64_t ishard = dataset->permutation[ibatch*shards_per_batch + ishard_batch]; + + const char * ptr_data = (const char *) dataset->data->data + ishard*dataset->nbs_data; + ggml_backend_tensor_set(data_batch, ptr_data, ishard_batch*dataset->nbs_data, dataset->nbs_data); + + if (!labels_batch) { + continue; + } + + const char * ptr_labels = (const char *) dataset->labels->data + ishard*dataset->nbs_labels; + ggml_backend_tensor_set(labels_batch, ptr_labels, ishard_batch*dataset->nbs_labels, dataset->nbs_labels); + } +} + +// ====== Model / Context ====== + +struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata) { + GGML_UNUSED(userdata); + + ggml_opt_optimizer_params result; + + result.adamw.alpha = 0.001f; + result.adamw.beta1 = 0.9f; + result.adamw.beta2 = 0.999f; + result.adamw.eps = 1e-8f; + result.adamw.wd = 0.0f; + + return result; +} + +struct ggml_opt_params ggml_opt_default_params( + ggml_backend_sched_t backend_sched, + struct ggml_context * ctx_compute, + struct ggml_tensor * inputs, + struct ggml_tensor * outputs, + enum ggml_opt_loss_type loss_type) { + return { + /*backend_sched =*/ backend_sched, + /*ctx_compute =*/ ctx_compute, + /*inputs =*/ inputs, + /*logits =*/ outputs, + /*loss_type =*/ loss_type, + /*build_type =*/ GGML_OPT_BUILD_TYPE_OPT, + /*opt_period =*/ 1, + /*get_opt_pars =*/ ggml_opt_get_default_optimizer_params, + /*get_opt_pars_ud =*/ nullptr, + }; +} + +static ggml_tensor * map_tensor(std::map & tensor_map, ggml_context * ctx, ggml_tensor * tensor) { + if (!tensor) { + return nullptr; + } + + if (tensor_map.find(tensor) != tensor_map.end()) { + return tensor_map[tensor]; + } + + ggml_tensor * new_tensor = ggml_dup_tensor(ctx, tensor); + tensor_map[tensor] = new_tensor; + + new_tensor->op = tensor->op; + for (int i = 0; i < GGML_MAX_DIMS; i++) { + new_tensor->nb[i] = tensor->nb[i]; + } + new_tensor->flags = tensor->flags; + memcpy(new_tensor->op_params, tensor->op_params, sizeof(tensor->op_params)); + strcpy(new_tensor->name, tensor->name); + new_tensor->data = tensor->data; + new_tensor->buffer = tensor->buffer; + new_tensor->extra = tensor->extra; + new_tensor->view_offs = tensor->view_offs; + new_tensor->view_src = map_tensor(tensor_map, ctx, tensor->view_src); + for (int i = 0; i < GGML_MAX_SRC; i++) { + new_tensor->src[i] = map_tensor(tensor_map, ctx, tensor->src[i]); + } + + return new_tensor; +} + +static ggml_cgraph * dup_graph(ggml_context * ctx, ggml_cgraph * src) { + std::map tensor_map; + + ggml_cgraph * dst = ggml_new_graph_custom(ctx, src->size, /*grads =*/ true); + + for (int i = 0; i < src->n_leafs; i++) { + ggml_build_forward_expand(dst, map_tensor(tensor_map, ctx, src->leafs[i])); + } + GGML_ASSERT(dst->n_leafs == src->n_leafs); + for (int i = 0; i < src->n_nodes; i++) { + ggml_build_forward_expand(dst, map_tensor(tensor_map, ctx, src->nodes[i])); + } + GGML_ASSERT(dst->n_nodes == src->n_nodes); + for (int i = 0; i < src->n_nodes; ++i) { + const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]); + const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]); + + GGML_ASSERT(igrad_src != GGML_HASHSET_FULL); + GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src)); + GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL); + GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst)); + + dst->grads[igrad_dst] = src->grads[igrad_src]; + dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src]; + } + + return dst; +} + +static void ggml_opt_alloc_graph(ggml_opt_context_t opt_ctx, ggml_cgraph * graph) { + GGML_ASSERT(graph); + if (opt_ctx->allocated_graph == graph) { + return; + } + + ggml_backend_sched_reset(opt_ctx->backend_sched); // clear allocation of previous graph + + { + ggml_init_params params = { + /*.mem_size =*/ ggml_tensor_overhead() * GGML_DEFAULT_GRAPH_SIZE, + /*.mem_buffer =*/ nullptr, + /*.no_alloc =*/ true, + }; + ggml_free(opt_ctx->ctx_copy); + opt_ctx->ctx_copy = ggml_init(params); + } + + opt_ctx->allocated_graph_copy = dup_graph(opt_ctx->ctx_copy, graph); + + ggml_backend_sched_alloc_graph(opt_ctx->backend_sched, opt_ctx->allocated_graph_copy); + opt_ctx->allocated_graph = graph; +} + +ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params) { + ggml_opt_context_t result = new struct ggml_opt_context; + result->backend_sched = params.backend_sched; + result->ctx_compute = params.ctx_compute; + result->inputs = params.inputs; + result->outputs = params.outputs; + result->opt_period = params.opt_period; + result->get_opt_pars = params.get_opt_pars; + result->get_opt_pars_ud = params.get_opt_pars_ud; + + GGML_ASSERT(result->inputs->data && "the inputs must be allocated statically"); + GGML_ASSERT(result->opt_period >= 1); + + const bool accumulate = params.build_type == GGML_OPT_BUILD_TYPE_GRAD || + (params.build_type == GGML_OPT_BUILD_TYPE_OPT && result->opt_period > 1); + + ggml_set_input(result->inputs); + ggml_set_output(result->outputs); + + result->gf = ggml_new_graph_custom(result->ctx_compute, GGML_DEFAULT_GRAPH_SIZE, /*grads =*/ true); // Forward pass. + ggml_build_forward_expand(result->gf, result->outputs); + + int n_param = 0; + for (int i = 0; i < result->gf->n_nodes; ++i) { + if (result->gf->nodes[i]->flags & GGML_TENSOR_FLAG_PARAM) { + n_param++; + } + } + + { + // The static context is used for: + // - gradients (1 tensor per param if using gradient accumulation) + // - optimizer momenta (2 tensors per param) + // - labels + // - loss + its gradient (up to 5 tensors) + // - pred + // - ncorrect (2 tensors). + const size_t tensors_per_param = (accumulate ? 1 : 0) + (params.build_type == GGML_OPT_BUILD_TYPE_OPT ? 2 : 0); + const size_t size_meta = (tensors_per_param*n_param + 9) * ggml_tensor_overhead(); + struct ggml_init_params params = { + /*.mem_size =*/ size_meta, + /*.mem_buffer =*/ nullptr, + /*.no_alloc =*/ true, + }; + result->ctx_static = ggml_init(params); + } + { + // The static cpu context is used for: + // - optimizer parameters (1 for the entire context) + const size_t size_meta = 1 * ggml_tensor_overhead(); + struct ggml_init_params params = { + /*.mem_size =*/ size_meta, + /*.mem_buffer =*/ nullptr, + /*.no_alloc =*/ true, + }; + result->ctx_static_cpu = ggml_init(params); + } + + + switch (params.loss_type) { + case GGML_OPT_LOSS_TYPE_MEAN: { + result->loss = ggml_sum(result->ctx_static, result->outputs); + ggml_set_name(result->loss, "loss_sum"); + const float scale = 1.0f / (result->opt_period * ggml_nelements(result->outputs)); + result->loss = ggml_scale(result->ctx_static, result->loss, scale); + ggml_set_name(result->loss, "loss_mean"); + result->loss_per_datapoint = true; + break; + } + case GGML_OPT_LOSS_TYPE_SUM: { + result->loss = ggml_sum(result->ctx_static, result->outputs); + ggml_set_name(result->loss, "loss_sum"); + result->loss_per_datapoint = false; + break; + } + case GGML_OPT_LOSS_TYPE_CROSS_ENTROPY: { + result->labels = ggml_dup_tensor(result->ctx_static, result->outputs); + ggml_set_input(result->labels); + ggml_set_name(result->labels, "labels"); + result->loss = ggml_cross_entropy_loss(result->ctx_static, result->outputs, result->labels); + ggml_set_name(result->loss, "loss_cross_entropy"); + if (result->opt_period > 1) { + result->loss = ggml_scale(result->ctx_static, result->loss, 1.0f / result->opt_period); + ggml_set_name(result->loss, "loss_cross_entropy_scaled"); + } + result->loss_per_datapoint = true; + break; + } + case GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR: { + result->labels = ggml_dup_tensor(result->ctx_static, result->outputs); + ggml_set_input(result->labels); + ggml_set_name(result->labels, "labels"); + result->loss = ggml_sub(result->ctx_static, result->outputs, result->labels); + ggml_set_name(result->loss, "loss_error"); + result->loss = ggml_sqr(result->ctx_static, result->loss); + ggml_set_name(result->loss, "loss_squared_error"); + result->loss = ggml_sum(result->ctx_static, result->loss); + ggml_set_name(result->loss, "loss_sum_squared_error"); + const float scale = 1.0f / (result->opt_period * ggml_nelements(result->outputs)); + result->loss = ggml_scale(result->ctx_static, result->loss, scale); + ggml_set_name(result->loss, "loss_mean_squared_error"); + result->loss_per_datapoint = true; + break; + } + } + ggml_set_output(result->loss); + ggml_set_loss(result->loss); + ggml_build_forward_expand(result->gf, result->loss); + + result->pred = ggml_argmax(result->ctx_static, result->outputs); + ggml_set_name(result->pred, "pred"); + ggml_set_output(result->pred); + ggml_build_forward_expand(result->gf, result->pred); + + if (result->labels) { + result->ncorrect = ggml_count_equal(result->ctx_static, result->pred, ggml_argmax(result->ctx_static, result->labels)); + ggml_set_name(result->ncorrect, "ncorrect"); + ggml_set_output(result->ncorrect); + ggml_build_forward_expand(result->gf, result->ncorrect); + } else { + result->ncorrect = nullptr; + } + + if (params.build_type == GGML_OPT_BUILD_TYPE_FORWARD) { + result->buf_static = ggml_backend_alloc_ctx_tensors(result->ctx_static, ggml_backend_sched_get_backend(result->backend_sched, 0)); + return result; + } + + // gb_grad == graph backward gradients, forward pass, then backward pass to calculate gradients. + result->gb_grad = ggml_graph_dup(result->ctx_compute, result->gf); + ggml_build_backward_expand(result->ctx_static, result->ctx_compute, result->gb_grad, accumulate); + + if (params.build_type == GGML_OPT_BUILD_TYPE_GRAD) { + result->buf_static = ggml_backend_alloc_ctx_tensors(result->ctx_static, ggml_backend_sched_get_backend(result->backend_sched, 0)); + ggml_graph_reset(result->gb_grad); + return result; + } + + GGML_ASSERT(params.build_type == GGML_OPT_BUILD_TYPE_OPT); + + // gb_opt == graph backward optimize, forward pass, then backward pass to calculate gradients, then optimizer step. + result->gb_opt = ggml_graph_dup(result->ctx_compute, result->gb_grad); + + result->adamw_params = ggml_new_tensor_1d(result->ctx_static_cpu, GGML_TYPE_F32, 7); + ggml_set_input(result->adamw_params); + ggml_set_name(result->adamw_params, "adamw_params"); + + for (int i = result->gf->n_nodes-1; i >= 0; --i) { + struct ggml_tensor * node = result->gb_opt->nodes[i]; + struct ggml_tensor * grad = ggml_graph_get_grad(result->gb_opt, node); + + if (node->flags & GGML_TENSOR_FLAG_PARAM) { + struct ggml_tensor * m = ggml_dup_tensor(result->ctx_static, node); + struct ggml_tensor * v = ggml_dup_tensor(result->ctx_static, node); + struct ggml_tensor * opt_step = ggml_opt_step_adamw(result->ctx_compute, node, grad, m, v, result->adamw_params); + ggml_build_forward_expand(result->gb_opt, opt_step); + } + } + + result->buf_static = ggml_backend_alloc_ctx_tensors( + result->ctx_static, ggml_backend_sched_get_backend(result->backend_sched, 0)); + + result->buf_static_cpu = ggml_backend_alloc_ctx_tensors_from_buft(result->ctx_static_cpu, ggml_backend_cpu_buffer_type()); + + ggml_graph_reset(result->gb_opt); + + return result; +} + +void ggml_opt_free(ggml_opt_context_t opt_ctx) { + if (opt_ctx == nullptr) { + return; + } + ggml_backend_buffer_free(opt_ctx->buf_static); + ggml_backend_buffer_free(opt_ctx->buf_static_cpu); + ggml_free(opt_ctx->ctx_static); + ggml_free(opt_ctx->ctx_static_cpu); + delete opt_ctx; +} + +void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer) { + if (optimizer) { + ggml_graph_reset(opt_ctx->gb_opt); + opt_ctx->iter = 1; + } else { + ggml_graph_reset(opt_ctx->gb_grad); + } +} + +struct ggml_tensor * ggml_opt_inputs(ggml_opt_context_t opt_ctx) { + return opt_ctx->inputs; +} + +struct ggml_tensor * ggml_opt_outputs(ggml_opt_context_t opt_ctx) { + return opt_ctx->outputs; +} + +struct ggml_tensor * ggml_opt_labels(ggml_opt_context_t opt_ctx) { + return opt_ctx->labels; +} + +struct ggml_tensor * ggml_opt_loss(ggml_opt_context_t opt_ctx) { + return opt_ctx->loss; +} + +struct ggml_tensor * ggml_opt_pred(ggml_opt_context_t opt_ctx) { + return opt_ctx->pred; +} + +struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx) { + return opt_ctx->ncorrect; +} + +struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node) { + return ggml_graph_get_grad_acc(opt_ctx->gb_opt, node); +} + +// ====== Optimization Result ====== + +ggml_opt_result_t ggml_opt_result_init() { + return new ggml_opt_result; +} + +void ggml_opt_result_free(ggml_opt_result_t result) { + delete result; +} + +void ggml_opt_result_reset(ggml_opt_result_t result) { + result->ndata = 0; + result->loss.clear(); + result->pred.clear(); + result->ncorrect = 0; +} + +void ggml_opt_result_ndata(ggml_opt_result_t result, int64_t * ndata) { + *ndata = result->ndata; +} + +void ggml_opt_result_loss(ggml_opt_result_t result, double * loss, double * unc) { + const int64_t nbatches = result->loss.size(); // Number of physical batches. + + if (nbatches == 0) { + *loss = 0.0; + *unc = NAN; + return; + } + + double sum = 0.0; + double sum_squared = 0.0; + + for (const float & loss : result->loss) { + // If the loss is per datapoint it was scaled by 1.0f/opt_period for each physical batch. + const float loss_scaled = result->loss_per_datapoint ? loss*result->opt_period : loss; + sum += loss_scaled; + sum_squared += loss_scaled*loss_scaled; + } + + const double mean = sum/nbatches; + *loss = result->loss_per_datapoint ? mean : sum; + + if (!unc) { + return; + } + + if (nbatches < 2) { + *unc = NAN; + return; + } + + const double var_sum = sum_squared/nbatches - mean*mean; // variance without Bessel's correction, i.e. nbatches/(nbatches-1) + *unc = result->loss_per_datapoint ? sqrt(var_sum / (nbatches - 1)) : sqrt(var_sum * nbatches/(nbatches - 1)); +} + +void ggml_opt_result_pred(ggml_opt_result_t result, int32_t * pred) { + for (size_t i = 0; i < result->pred.size(); ++i) { + pred[i] = result->pred[i]; + } +} + +void ggml_opt_result_accuracy(ggml_opt_result_t result, double * accuracy, double * unc) { + *accuracy = result->ncorrect >= 0 ? double(result->ncorrect) / double(result->ndata) : NAN; + + if (!unc) { + return; + } + + *unc = result->ncorrect >= 0 && result->ndata >= 2 ? + sqrt((*accuracy) * (1.0 - (*accuracy)) / double(result->ndata - 1)) : NAN; +} + +// ====== Computation ====== + +static void ggml_opt_eval_graph(ggml_opt_context_t opt_ctx, ggml_cgraph * graph, ggml_opt_result * result) { + if (graph != opt_ctx->gf) { + struct ggml_opt_optimizer_params opt_pars = opt_ctx->get_opt_pars(opt_ctx->get_opt_pars_ud); + + GGML_ASSERT(opt_pars.adamw.alpha > 0.0f); + GGML_ASSERT(opt_pars.adamw.beta1 >= 0.0f); + GGML_ASSERT(opt_pars.adamw.beta1 <= 1.0f); + GGML_ASSERT(opt_pars.adamw.beta2 >= 0.0f); + GGML_ASSERT(opt_pars.adamw.beta2 <= 1.0f); + GGML_ASSERT(opt_pars.adamw.eps >= 0.0f); + GGML_ASSERT(opt_pars.adamw.wd >= 0.0f); + GGML_ASSERT(opt_pars.adamw.wd <= 1.0f); + + // beta1, beta2 after applying warmup + const float beta1h = 1.0f/(1.0f - powf(opt_pars.adamw.beta1, opt_ctx->iter)); + const float beta2h = 1.0f/(1.0f - powf(opt_pars.adamw.beta2, opt_ctx->iter)); + + float * adamw_par_data = ggml_get_data_f32(opt_ctx->adamw_params); + adamw_par_data[0] = opt_pars.adamw.alpha; + adamw_par_data[1] = opt_pars.adamw.beta1; + adamw_par_data[2] = opt_pars.adamw.beta2; + adamw_par_data[3] = opt_pars.adamw.eps; + adamw_par_data[4] = opt_pars.adamw.wd; + adamw_par_data[5] = beta1h; + adamw_par_data[6] = beta2h; + } + + ggml_opt_alloc_graph(opt_ctx, graph); + ggml_backend_sched_graph_compute(opt_ctx->backend_sched, opt_ctx->allocated_graph_copy); + opt_ctx->iter += opt_ctx->allocated_graph == opt_ctx->gb_opt; + + if (!result) { + return; + } + + if (result->ndata == 0) { + result->loss_per_datapoint = opt_ctx->loss_per_datapoint; + result->opt_period = opt_ctx->opt_period; + } else { + GGML_ASSERT(result->loss_per_datapoint == opt_ctx->loss_per_datapoint); + GGML_ASSERT(result->opt_period == opt_ctx->opt_period); + } + + const int64_t ndata = opt_ctx->outputs->ne[1]; + GGML_ASSERT(result->ndata == ndata*int64_t(result->loss.size()) && "varying batch size not supported"); + result->ndata += ndata; + + GGML_ASSERT(ggml_is_scalar(opt_ctx->loss)); + GGML_ASSERT(opt_ctx->loss->type == GGML_TYPE_F32); + float loss; + ggml_backend_tensor_get(opt_ctx->loss, &loss, 0, ggml_nbytes(opt_ctx->loss)); + result->loss.push_back(loss); + + GGML_ASSERT(opt_ctx->pred->type == GGML_TYPE_I32); + std::vector pred(ndata); + ggml_backend_tensor_get(opt_ctx->pred, pred.data(), 0, ggml_nbytes(opt_ctx->pred)); + result->pred.insert(result->pred.end(), pred.begin(), pred.end()); + + if (!opt_ctx->labels || result->ncorrect < 0) { + result->ncorrect = -1; + return; + } + + GGML_ASSERT(ggml_is_scalar(opt_ctx->ncorrect)); + GGML_ASSERT(opt_ctx->ncorrect->type == GGML_TYPE_I64); + int64_t ncorrect; + ggml_backend_tensor_get(opt_ctx->ncorrect, &ncorrect, 0, ggml_nbytes(opt_ctx->ncorrect)); + result->ncorrect += ncorrect; +} + +void ggml_opt_forward(ggml_opt_context_t opt_ctx, ggml_opt_result * result) { + ggml_opt_eval_graph(opt_ctx, opt_ctx->gf, result); +} + +void ggml_opt_forward_backward(ggml_opt_context_t opt_ctx, ggml_opt_result * result) { + if (opt_ctx->opt_period == 1) { + ggml_opt_eval_graph(opt_ctx, opt_ctx->gb_opt, result); + return; + } + + const int32_t opt_i_next = (opt_ctx->opt_i + 1) % opt_ctx->opt_period; + if (opt_i_next == 0) { + ggml_opt_eval_graph(opt_ctx, opt_ctx->gb_opt, result); + ggml_opt_reset(opt_ctx, /*optimizer =*/ false); + } else { + ggml_opt_eval_graph(opt_ctx, opt_ctx->gb_grad, result); + } + opt_ctx->opt_i = opt_i_next; +} + +// ====== High-Level Functions ====== + +void ggml_opt_epoch( + ggml_opt_context_t opt_ctx, + ggml_opt_dataset_t dataset, + ggml_opt_result_t result_train, + ggml_opt_result_t result_eval, + int64_t idata_split, + ggml_opt_epoch_callback callback_train, + ggml_opt_epoch_callback callback_eval) { + struct ggml_tensor * inputs = ggml_opt_inputs(opt_ctx); + struct ggml_tensor * labels = ggml_opt_labels(opt_ctx); + struct ggml_tensor * data = ggml_opt_dataset_data(dataset); + GGML_ASSERT(data->ne[0] == inputs->ne[0]); + + const int64_t ndata = data->ne[1]; + const int64_t ndata_batch = inputs->ne[1]; + + GGML_ASSERT(data->ne[1] % inputs->ne[1] == 0); + const int64_t nbatches = ndata/ndata_batch; + + idata_split = idata_split < 0 ? ndata : idata_split; + GGML_ASSERT(idata_split % ndata_batch == 0); + const int64_t ibatch_split = idata_split / ndata_batch; + + int64_t ibatch = 0; + int64_t t_loop_start = ggml_time_us(); + for (; ibatch < ibatch_split; ++ibatch) { + ggml_opt_dataset_get_batch(dataset, inputs, labels, ibatch); + ggml_opt_forward_backward(opt_ctx, result_train); + if (callback_train) { + callback_train(true, opt_ctx, dataset, result_train, ibatch+1, ibatch_split, t_loop_start); + } + } + t_loop_start = ggml_time_us(); + for (; ibatch < nbatches; ++ibatch) { + ggml_opt_dataset_get_batch(dataset, inputs, labels, ibatch); + ggml_opt_forward(opt_ctx, result_eval); + if (callback_eval) { + callback_eval(false, opt_ctx, dataset, result_eval, ibatch+1-ibatch_split, nbatches-ibatch_split, t_loop_start); + } + } +} + +void ggml_opt_epoch_callback_progress_bar( + bool train, + ggml_opt_context_t opt_ctx, + ggml_opt_dataset_t dataset, + ggml_opt_result_t result, + int64_t ibatch, + int64_t ibatch_max, + int64_t t_start_us) { + fprintf(stderr, "%s[", train ? "train: " : "val: "); + + constexpr int64_t bar_length = 25; + for (int64_t j = 0; j < bar_length; ++j) { + const int64_t ibatch_j = ibatch_max * j/bar_length; + if (ibatch_j < ibatch) { + fprintf(stderr, "="); + } else if (ibatch_max * (j - 1)/bar_length < ibatch) { + fprintf(stderr, ">"); + } else { + fprintf(stderr, " "); + } + } + + const int64_t batch_size = ggml_opt_inputs(opt_ctx)->ne[1]; + const int64_t idata = ibatch*batch_size; + const int64_t idata_max = ibatch_max*batch_size; + + double loss; + double loss_unc; + ggml_opt_result_loss(result, &loss, &loss_unc); + + double accuracy; + double accuracy_unc; + ggml_opt_result_accuracy(result, &accuracy, &accuracy_unc); + + const int64_t t_ibatch_us = ggml_time_us() - t_start_us; + int64_t t_ibatch_s = t_ibatch_us / 1000000; + const int64_t t_ibatch_h = t_ibatch_s / 3600; + t_ibatch_s -= t_ibatch_h * 3600; + const int64_t t_ibatch_m = t_ibatch_s / 60; + t_ibatch_s -= t_ibatch_m * 60; + + const int64_t t_eta_us = t_ibatch_us * (ibatch_max - ibatch)/ibatch; + int64_t t_eta_s = t_eta_us / 1000000; + const int64_t t_eta_h = t_eta_s / 3600; + t_eta_s -= t_eta_h * 3600; + const int64_t t_eta_m = t_eta_s / 60; + t_eta_s -= t_eta_m * 60; + + fprintf(stderr, "| data=%06" PRId64 "/%06" PRId64 ", loss=%.6lf+-%.6lf, accuracy=%.2lf+-%.2lf%%, " + "t=%02" PRId64 ":%02" PRId64 ":%02" PRId64 ", ETA=%02" PRId64 ":%02" PRId64 ":%02" PRId64 "]\r", + idata, idata_max, loss, loss_unc, 100.0*accuracy, 100.0*accuracy_unc, + t_ibatch_h, t_ibatch_m, t_ibatch_s, t_eta_h, t_eta_m, t_eta_s); + if (ibatch == ibatch_max) { + fprintf(stderr, "\n"); + } + fflush(stderr); + + GGML_UNUSED(dataset); +} + +void ggml_opt_fit( + ggml_backend_sched_t backend_sched, + ggml_context * ctx_compute, + ggml_tensor * inputs, + ggml_tensor * outputs, + ggml_opt_dataset_t dataset, + enum ggml_opt_loss_type loss_type, + ggml_opt_get_optimizer_params get_opt_pars, + int64_t nepoch, + int64_t nbatch_logical, + float val_split, + bool silent) { + ggml_time_init(); + const int64_t t_start_us = ggml_time_us(); + + const int64_t ndata = ggml_opt_dataset_data(dataset)->ne[1]; + const int64_t nbatch_physical = inputs->ne[1]; + GGML_ASSERT(ndata % nbatch_logical == 0); + GGML_ASSERT(nbatch_logical % nbatch_physical == 0); + + const int64_t opt_period = nbatch_logical / nbatch_physical; + const int64_t nbatches_logical = ndata / nbatch_logical; + + GGML_ASSERT(val_split >= 0.0f); + GGML_ASSERT(val_split < 1.0f); + const int64_t ibatch_split = int64_t(((1.0f - val_split) * nbatches_logical)) * opt_period; // train <-> val split index (physical) + const int64_t idata_split = ibatch_split * nbatch_physical; + + int64_t epoch = 1; + + ggml_opt_params params = ggml_opt_default_params(backend_sched, ctx_compute, inputs, outputs, loss_type); + params.opt_period = opt_period; + params.get_opt_pars = get_opt_pars; + params.get_opt_pars_ud = &epoch; + ggml_opt_context_t opt_ctx = ggml_opt_init(params); + + // Shuffling the data is generally useful but there is only a point if not all data is used in a single batch. + if (nbatch_logical < ndata) { + ggml_opt_dataset_shuffle(opt_ctx, dataset, -1); // Shuffle all data (train + validation). + } + + ggml_opt_result_t result_train = ggml_opt_result_init(); + ggml_opt_result_t result_val = ggml_opt_result_init(); + + ggml_opt_epoch_callback epoch_callback = silent ? nullptr : ggml_opt_epoch_callback_progress_bar; + + for (; epoch <= nepoch; ++epoch) { + if (nbatch_logical < idata_split) { + ggml_opt_dataset_shuffle(opt_ctx, dataset, idata_split); + } + + ggml_opt_result_reset(result_train); + ggml_opt_result_reset(result_val); + + if (!silent) { + fprintf(stderr, "%s: epoch %04" PRId64 "/%04" PRId64 ":\n", __func__, epoch, nepoch); + } + ggml_opt_epoch(opt_ctx, dataset, result_train, result_val, idata_split, epoch_callback, epoch_callback); + if (!silent) { + fprintf(stderr, "\n"); + } + } + + if (!silent) { + int64_t t_total_s = (ggml_time_us() - t_start_us) / 1000000; + const int64_t t_total_h = t_total_s / 3600; + t_total_s -= t_total_h * 3600; + const int64_t t_total_m = t_total_s / 60; + t_total_s -= t_total_m * 60; + fprintf(stderr, "%s: training took %02" PRId64 ":%02" PRId64 ":%02" PRId64 "\n", __func__, t_total_h, t_total_m, t_total_s); + } + + ggml_opt_free(opt_ctx); + ggml_opt_result_free(result_train); + ggml_opt_result_free(result_val); +} diff --git a/llama/ggml-quants.c b/ml/backend/ggml/ggml/src/ggml-quants.c similarity index 99% rename from llama/ggml-quants.c rename to ml/backend/ggml/ggml/src/ggml-quants.c index 6f824d420..7918388ae 100644 --- a/llama/ggml-quants.c +++ b/ml/backend/ggml/ggml/src/ggml-quants.c @@ -1,35 +1,9 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #define GGML_COMMON_IMPL_C #include "ggml-common.h" #include "ggml-quants.h" #include "ggml-impl.h" -#include "ggml-cpu-impl.h" +#include "ggml-cpu/ggml-cpu-impl.h" #include "ggml-cpu.h" #include diff --git a/llama/ggml-quants.h b/ml/backend/ggml/ggml/src/ggml-quants.h similarity index 87% rename from llama/ggml-quants.h rename to ml/backend/ggml/ggml/src/ggml-quants.h index cf518ba08..d09173e11 100644 --- a/llama/ggml-quants.h +++ b/ml/backend/ggml/ggml/src/ggml-quants.h @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #pragma once #define GGML_COMMON_DECL_C diff --git a/ml/backend/ggml/ggml/src/ggml-threading.cpp b/ml/backend/ggml/ggml/src/ggml-threading.cpp new file mode 100644 index 000000000..25a19eedb --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-threading.cpp @@ -0,0 +1,12 @@ +#include "ggml-threading.h" +#include + +std::mutex ggml_critical_section_mutex; + +void ggml_critical_section_start() { + ggml_critical_section_mutex.lock(); +} + +void ggml_critical_section_end(void) { + ggml_critical_section_mutex.unlock(); +} diff --git a/ml/backend/ggml/ggml/src/ggml-threading.h b/ml/backend/ggml/ggml/src/ggml-threading.h new file mode 100644 index 000000000..dec2c8840 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml-threading.h @@ -0,0 +1,14 @@ +#pragma once + +#include "ggml.h" + +#ifdef __cplusplus +extern "C" { +#endif + +GGML_API void ggml_critical_section_start(void); +GGML_API void ggml_critical_section_end(void); + +#ifdef __cplusplus +} +#endif diff --git a/llama/ggml.c b/ml/backend/ggml/ggml/src/ggml.c similarity index 99% rename from llama/ggml.c rename to ml/backend/ggml/ggml/src/ggml.c index 8d442e08a..7ffcd907d 100644 --- a/llama/ggml.c +++ b/ml/backend/ggml/ggml/src/ggml.c @@ -1,29 +1,3 @@ -/** - * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file - * - * MIT License - * - * Copyright (c) 2023-2024 The ggml authors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - #define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows #define _USE_MATH_DEFINES // For M_PI on MSVC diff --git a/ml/backend/ggml/ggml/src/ggml.go b/ml/backend/ggml/ggml/src/ggml.go new file mode 100644 index 000000000..b64922a6e --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml.go @@ -0,0 +1,68 @@ +package ggml + +// #cgo CXXFLAGS: -std=c++17 +// #cgo CPPFLAGS: -DNDEBUG -DGGML_USE_CPU +// #cgo CPPFLAGS: -I${SRCDIR}/../include -I${SRCDIR}/ggml-cpu +// #cgo windows LDFLAGS: -lmsvcrt -static -static-libgcc -static-libstdc++ +// #include +// #include "ggml-backend.h" +// extern void sink(int level, char *text, void *user_data); +import "C" + +import ( + "log/slog" + "os" + "path/filepath" + "runtime" + "strings" + "sync" + "unsafe" + + _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src/ggml-cpu" +) + +func init() { + C.ggml_log_set((C.ggml_log_callback)(C.sink), nil) +} + +//export sink +func sink(level C.int, text *C.char, _ unsafe.Pointer) { + msg := strings.TrimSpace(C.GoString(text)) + switch level { + case C.GGML_LOG_LEVEL_DEBUG: + slog.Debug(msg) + case C.GGML_LOG_LEVEL_INFO: + slog.Info(msg) + case C.GGML_LOG_LEVEL_WARN: + slog.Warn(msg) + case C.GGML_LOG_LEVEL_ERROR: + slog.Error(msg) + } +} + +var OnceLoad = sync.OnceFunc(func() { + var lib struct{ name, defaultValue string } + switch runtime.GOOS { + case "darwin", "linux": + lib.name = "LD_LIBRARY_PATH" + lib.defaultValue = "/usr/local/lib:/usr/lib" + case "windows": + lib.name = "PATH" + lib.defaultValue = "." + default: + return + } + + paths, ok := os.LookupEnv(lib.name) + if !ok { + paths = lib.defaultValue + } + + for _, path := range filepath.SplitList(paths) { + func() { + cpath := C.CString(path) + defer C.free(unsafe.Pointer(cpath)) + C.ggml_backend_load_all_from_path(cpath) + }() + } +}) diff --git a/ml/backend/ggml/ggml/src/ggml_darwin_arm64.go b/ml/backend/ggml/ggml/src/ggml_darwin_arm64.go new file mode 100644 index 000000000..beffa64e2 --- /dev/null +++ b/ml/backend/ggml/ggml/src/ggml_darwin_arm64.go @@ -0,0 +1,10 @@ +package ggml + +// #cgo CPPFLAGS: -DGGML_USE_METAL -DGGML_USE_BLAS +// #cgo LDFLAGS: -framework Foundation +import "C" + +import ( + _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src/ggml-blas" + _ "github.com/ollama/ollama/ml/backend/ggml/ggml/src/ggml-metal" +) diff --git a/ml/backend/ggml/ggml_debug.go b/ml/backend/ggml/ggml_debug.go new file mode 100644 index 000000000..9ddb2718c --- /dev/null +++ b/ml/backend/ggml/ggml_debug.go @@ -0,0 +1,6 @@ +//go:build debug + +package ggml + +// #cgo CPPFLAGS: -DOLLAMA_DEBUG +import "C" diff --git a/scripts/build_darwin.sh b/scripts/build_darwin.sh index cbf6f61d9..a4fb67c2d 100755 --- a/scripts/build_darwin.sh +++ b/scripts/build_darwin.sh @@ -10,9 +10,7 @@ mkdir -p dist # If installed to an alternate location use the following to enable # export SDKROOT=/Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk # export DEVELOPER_DIR=/Applications/Xcode_12.5.1.app/Contents/Developer -export CGO_CFLAGS=-mmacosx-version-min=11.3 -export CGO_CXXFLAGS=-mmacosx-version-min=11.3 -export CGO_LDFLAGS=-mmacosx-version-min=11.3 +export CGO_CPPFLAGS=-mmacosx-version-min=11.3 rm -rf llama/build dist/darwin-* diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index 894d9dd2e..a0c3d2f00 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -18,7 +18,7 @@ docker buildx build \ --output type=local,dest=./dist/ \ --platform=${PLATFORM} \ ${OLLAMA_COMMON_BUILD_ARGS} \ - --target dist \ + --target archive \ -f Dockerfile \ . @@ -26,4 +26,4 @@ docker buildx build \ if echo $PLATFORM | grep "," > /dev/null ; then mv -f ./dist/linux_*64/ollama* ./dist/ rmdir ./dist/linux_*64 -fi \ No newline at end of file +fi