From 7f69031491dec04ab57b30eace26186b6004388d Mon Sep 17 00:00:00 2001 From: jmorganca Date: Tue, 17 Dec 2024 11:22:32 -0800 Subject: [PATCH] llama: update vendored code to 081b29bd --- api/types.go | 2 - cmd/interactive_test.go | 9 +- docs/api.md | 4 - llama/amx.cpp | 2 +- llama/amx.h | 2 +- llama/clip.cpp | 2 +- llama/clip.h | 2 +- llama/common.cpp | 21 +- llama/common.h | 17 +- llama/ggml-alloc.c | 3 +- llama/ggml-alloc.h | 2 +- llama/ggml-backend-impl.h | 2 +- llama/ggml-backend-reg.cpp | 2 +- llama/ggml-backend.cpp | 2 +- llama/ggml-backend.h | 2 +- llama/ggml-blas.cpp | 2 +- llama/ggml-blas.h | 2 +- llama/ggml-common.h | 2 +- llama/ggml-cpp.h | 2 +- llama/ggml-cpu-aarch64.cpp | 2 +- llama/ggml-cpu-aarch64.h | 2 +- llama/ggml-cpu-impl.h | 2 +- llama/ggml-cpu-quants.c | 2 +- llama/ggml-cpu-quants.h | 2 +- llama/ggml-cpu-traits.cpp | 2 +- llama/ggml-cpu-traits.h | 2 +- llama/ggml-cpu.c | 2 +- llama/ggml-cpu.cpp | 5 +- llama/ggml-cpu.h | 2 +- llama/ggml-cuda.h | 2 +- llama/ggml-cuda/acc.cu | 2 +- llama/ggml-cuda/acc.cuh | 2 +- llama/ggml-cuda/arange.cu | 2 +- llama/ggml-cuda/arange.cuh | 2 +- llama/ggml-cuda/argmax.cu | 2 +- llama/ggml-cuda/argmax.cuh | 2 +- llama/ggml-cuda/argsort.cu | 2 +- llama/ggml-cuda/argsort.cuh | 2 +- llama/ggml-cuda/binbcast.cu | 2 +- llama/ggml-cuda/binbcast.cuh | 2 +- llama/ggml-cuda/clamp.cu | 2 +- llama/ggml-cuda/clamp.cuh | 2 +- llama/ggml-cuda/common.cuh | 2 +- llama/ggml-cuda/concat.cu | 2 +- llama/ggml-cuda/concat.cuh | 2 +- llama/ggml-cuda/conv-transpose-1d.cu | 2 +- llama/ggml-cuda/conv-transpose-1d.cuh | 2 +- llama/ggml-cuda/convert.cu | 2 +- llama/ggml-cuda/convert.cuh | 2 +- llama/ggml-cuda/count-equal.cu | 2 +- llama/ggml-cuda/count-equal.cuh | 2 +- llama/ggml-cuda/cpy.cu | 2 +- llama/ggml-cuda/cpy.cuh | 2 +- llama/ggml-cuda/cross-entropy-loss.cu | 2 +- llama/ggml-cuda/cross-entropy-loss.cuh | 2 +- llama/ggml-cuda/dequantize.cuh | 2 +- llama/ggml-cuda/diagmask.cu | 2 +- llama/ggml-cuda/diagmask.cuh | 2 +- llama/ggml-cuda/fattn-common.cuh | 2 +- llama/ggml-cuda/fattn-tile-f16.cu | 2 +- llama/ggml-cuda/fattn-tile-f16.cuh | 2 +- llama/ggml-cuda/fattn-tile-f32.cu | 2 +- llama/ggml-cuda/fattn-tile-f32.cuh | 2 +- llama/ggml-cuda/fattn-vec-f16.cuh | 2 +- llama/ggml-cuda/fattn-vec-f32.cuh | 2 +- llama/ggml-cuda/fattn-wmma-f16.cuh | 2 +- llama/ggml-cuda/fattn.cu | 2 +- llama/ggml-cuda/fattn.cuh | 2 +- llama/ggml-cuda/getrows.cu | 2 +- llama/ggml-cuda/getrows.cuh | 2 +- llama/ggml-cuda/ggml-cuda.cu | 2 +- llama/ggml-cuda/im2col.cu | 2 +- llama/ggml-cuda/im2col.cuh | 2 +- llama/ggml-cuda/mma.cuh | 2 +- llama/ggml-cuda/mmq.cu | 2 +- llama/ggml-cuda/mmq.cuh | 2 +- llama/ggml-cuda/mmv.cu | 2 +- llama/ggml-cuda/mmv.cuh | 2 +- llama/ggml-cuda/mmvq.cu | 2 +- llama/ggml-cuda/mmvq.cuh | 2 +- llama/ggml-cuda/norm.cu | 2 +- llama/ggml-cuda/norm.cuh | 2 +- llama/ggml-cuda/opt-step-adamw.cu | 2 +- llama/ggml-cuda/opt-step-adamw.cuh | 2 +- llama/ggml-cuda/out-prod.cu | 2 +- llama/ggml-cuda/out-prod.cuh | 2 +- llama/ggml-cuda/pad.cu | 2 +- llama/ggml-cuda/pad.cuh | 2 +- llama/ggml-cuda/pool2d.cu | 2 +- llama/ggml-cuda/pool2d.cuh | 2 +- llama/ggml-cuda/quantize.cu | 2 +- llama/ggml-cuda/quantize.cuh | 2 +- llama/ggml-cuda/rope.cu | 2 +- llama/ggml-cuda/rope.cuh | 2 +- llama/ggml-cuda/scale.cu | 2 +- llama/ggml-cuda/scale.cuh | 2 +- llama/ggml-cuda/softmax.cu | 2 +- llama/ggml-cuda/softmax.cuh | 2 +- llama/ggml-cuda/sum.cu | 2 +- llama/ggml-cuda/sum.cuh | 2 +- llama/ggml-cuda/sumrows.cu | 2 +- llama/ggml-cuda/sumrows.cuh | 2 +- .../fattn-vec-f16-instance-hs128-f16-f16.cu | 2 +- .../fattn-vec-f16-instance-hs128-f16-q4_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-f16-q4_1.cu | 2 +- .../fattn-vec-f16-instance-hs128-f16-q5_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-f16-q5_1.cu | 2 +- .../fattn-vec-f16-instance-hs128-f16-q8_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q4_0-f16.cu | 2 +- .../fattn-vec-f16-instance-hs128-q4_0-q4_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q4_0-q4_1.cu | 2 +- .../fattn-vec-f16-instance-hs128-q4_0-q5_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q4_0-q5_1.cu | 2 +- .../fattn-vec-f16-instance-hs128-q4_0-q8_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q4_1-f16.cu | 2 +- .../fattn-vec-f16-instance-hs128-q4_1-q4_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q4_1-q4_1.cu | 2 +- .../fattn-vec-f16-instance-hs128-q4_1-q5_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q4_1-q5_1.cu | 2 +- .../fattn-vec-f16-instance-hs128-q4_1-q8_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q5_0-f16.cu | 2 +- .../fattn-vec-f16-instance-hs128-q5_0-q4_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q5_0-q4_1.cu | 2 +- .../fattn-vec-f16-instance-hs128-q5_0-q5_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q5_0-q5_1.cu | 2 +- .../fattn-vec-f16-instance-hs128-q5_0-q8_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q5_1-f16.cu | 2 +- .../fattn-vec-f16-instance-hs128-q5_1-q4_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q5_1-q4_1.cu | 2 +- .../fattn-vec-f16-instance-hs128-q5_1-q5_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q5_1-q5_1.cu | 2 +- .../fattn-vec-f16-instance-hs128-q5_1-q8_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q8_0-f16.cu | 2 +- .../fattn-vec-f16-instance-hs128-q8_0-q4_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q8_0-q4_1.cu | 2 +- .../fattn-vec-f16-instance-hs128-q8_0-q5_0.cu | 2 +- .../fattn-vec-f16-instance-hs128-q8_0-q5_1.cu | 2 +- .../fattn-vec-f16-instance-hs128-q8_0-q8_0.cu | 2 +- .../fattn-vec-f16-instance-hs256-f16-f16.cu | 2 +- .../fattn-vec-f16-instance-hs64-f16-f16.cu | 2 +- .../fattn-vec-f16-instance-hs64-f16-q4_0.cu | 2 +- .../fattn-vec-f16-instance-hs64-f16-q4_1.cu | 2 +- .../fattn-vec-f16-instance-hs64-f16-q5_0.cu | 2 +- .../fattn-vec-f16-instance-hs64-f16-q5_1.cu | 2 +- .../fattn-vec-f16-instance-hs64-f16-q8_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-f16-f16.cu | 2 +- .../fattn-vec-f32-instance-hs128-f16-q4_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-f16-q4_1.cu | 2 +- .../fattn-vec-f32-instance-hs128-f16-q5_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-f16-q5_1.cu | 2 +- .../fattn-vec-f32-instance-hs128-f16-q8_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q4_0-f16.cu | 2 +- .../fattn-vec-f32-instance-hs128-q4_0-q4_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q4_0-q4_1.cu | 2 +- .../fattn-vec-f32-instance-hs128-q4_0-q5_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q4_0-q5_1.cu | 2 +- .../fattn-vec-f32-instance-hs128-q4_0-q8_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q4_1-f16.cu | 2 +- .../fattn-vec-f32-instance-hs128-q4_1-q4_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q4_1-q4_1.cu | 2 +- .../fattn-vec-f32-instance-hs128-q4_1-q5_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q4_1-q5_1.cu | 2 +- .../fattn-vec-f32-instance-hs128-q4_1-q8_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q5_0-f16.cu | 2 +- .../fattn-vec-f32-instance-hs128-q5_0-q4_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q5_0-q4_1.cu | 2 +- .../fattn-vec-f32-instance-hs128-q5_0-q5_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q5_0-q5_1.cu | 2 +- .../fattn-vec-f32-instance-hs128-q5_0-q8_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q5_1-f16.cu | 2 +- .../fattn-vec-f32-instance-hs128-q5_1-q4_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q5_1-q4_1.cu | 2 +- .../fattn-vec-f32-instance-hs128-q5_1-q5_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q5_1-q5_1.cu | 2 +- .../fattn-vec-f32-instance-hs128-q5_1-q8_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q8_0-f16.cu | 2 +- .../fattn-vec-f32-instance-hs128-q8_0-q4_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q8_0-q4_1.cu | 2 +- .../fattn-vec-f32-instance-hs128-q8_0-q5_0.cu | 2 +- .../fattn-vec-f32-instance-hs128-q8_0-q5_1.cu | 2 +- .../fattn-vec-f32-instance-hs128-q8_0-q8_0.cu | 2 +- .../fattn-vec-f32-instance-hs256-f16-f16.cu | 2 +- .../fattn-vec-f32-instance-hs64-f16-f16.cu | 2 +- .../fattn-vec-f32-instance-hs64-f16-q4_0.cu | 2 +- .../fattn-vec-f32-instance-hs64-f16-q4_1.cu | 2 +- .../fattn-vec-f32-instance-hs64-f16-q5_0.cu | 2 +- .../fattn-vec-f32-instance-hs64-f16-q5_1.cu | 2 +- .../fattn-vec-f32-instance-hs64-f16-q8_0.cu | 2 +- .../fattn-wmma-f16-instance-kqfloat-cpb16.cu | 2 +- .../fattn-wmma-f16-instance-kqfloat-cpb32.cu | 2 +- .../fattn-wmma-f16-instance-kqhalf-cpb16.cu | 2 +- .../fattn-wmma-f16-instance-kqhalf-cpb32.cu | 2 +- .../fattn-wmma-f16-instance-kqhalf-cpb8.cu | 2 +- .../template-instances/mmq-instance-iq1_s.cu | 2 +- .../template-instances/mmq-instance-iq2_s.cu | 2 +- .../template-instances/mmq-instance-iq2_xs.cu | 2 +- .../mmq-instance-iq2_xxs.cu | 2 +- .../template-instances/mmq-instance-iq3_s.cu | 2 +- .../mmq-instance-iq3_xxs.cu | 2 +- .../template-instances/mmq-instance-iq4_nl.cu | 2 +- .../template-instances/mmq-instance-iq4_xs.cu | 2 +- .../template-instances/mmq-instance-q2_k.cu | 2 +- .../template-instances/mmq-instance-q3_k.cu | 2 +- .../template-instances/mmq-instance-q4_0.cu | 2 +- .../template-instances/mmq-instance-q4_1.cu | 2 +- .../template-instances/mmq-instance-q4_k.cu | 2 +- .../template-instances/mmq-instance-q5_0.cu | 2 +- .../template-instances/mmq-instance-q5_1.cu | 2 +- .../template-instances/mmq-instance-q5_k.cu | 2 +- .../template-instances/mmq-instance-q6_k.cu | 2 +- .../template-instances/mmq-instance-q8_0.cu | 2 +- llama/ggml-cuda/tsembd.cu | 2 +- llama/ggml-cuda/tsembd.cuh | 2 +- llama/ggml-cuda/unary.cu | 2 +- llama/ggml-cuda/unary.cuh | 2 +- llama/ggml-cuda/upscale.cu | 2 +- llama/ggml-cuda/upscale.cuh | 2 +- llama/ggml-cuda/vecdotq.cuh | 2 +- llama/ggml-cuda/vendors/cuda.h | 2 +- llama/ggml-cuda/vendors/hip.h | 2 +- llama/ggml-cuda/vendors/musa.h | 2 +- llama/ggml-cuda/wkv6.cu | 2 +- llama/ggml-cuda/wkv6.cuh | 2 +- llama/ggml-impl.h | 18 +- llama/ggml-metal-embed.metal | 6 +- llama/ggml-metal-impl.h | 2 +- llama/ggml-metal.h | 2 +- llama/ggml-metal.metal | 2 +- llama/ggml-metal_darwin_arm64.m | 2 +- llama/ggml-quants.c | 2 +- llama/ggml-quants.h | 2 +- llama/ggml-threading.cpp | 2 +- llama/ggml-threading.h | 2 +- llama/ggml.c | 71 ++-- llama/ggml.h | 2 +- llama/json-schema-to-grammar.cpp | 2 +- llama/json-schema-to-grammar.h | 2 +- llama/llama-grammar.cpp | 2 +- llama/llama-grammar.h | 2 +- llama/llama-impl.h | 2 +- llama/llama-sampling.cpp | 127 ++----- llama/llama-sampling.h | 2 +- llama/llama-vocab.cpp | 4 +- llama/llama-vocab.h | 2 +- llama/llama.cpp | 309 +++++++++++++++++- llama/llama.go | 2 - llama/llama.h | 16 +- llama/llava.cpp | 2 +- llama/llava.h | 2 +- llama/log.cpp | 2 +- llama/log.h | 2 +- llama/mmq.cpp | 2 +- llama/mmq.h | 2 +- llama/runner/runner.go | 2 - llama/sampling.cpp | 29 +- llama/sampling.h | 2 +- llama/sampling_ext.cpp | 1 - llama/sampling_ext.h | 1 - llama/unicode-data.cpp | 2 +- llama/unicode-data.h | 2 +- llama/unicode.cpp | 104 +++--- llama/unicode.h | 21 +- llama/vendoring | 2 +- llm/server.go | 1 - parser/parser_test.go | 1 - server/images.go | 7 + 266 files changed, 760 insertions(+), 505 deletions(-) diff --git a/api/types.go b/api/types.go index 0ea0b9bf0..e56eb1684 100644 --- a/api/types.go +++ b/api/types.go @@ -225,7 +225,6 @@ type Options struct { Mirostat int `json:"mirostat,omitempty"` MirostatTau float32 `json:"mirostat_tau,omitempty"` MirostatEta float32 `json:"mirostat_eta,omitempty"` - PenalizeNewline bool `json:"penalize_newline,omitempty"` Stop []string `json:"stop,omitempty"` } @@ -602,7 +601,6 @@ func DefaultOptions() Options { Mirostat: 0, MirostatTau: 5.0, MirostatEta: 0.1, - PenalizeNewline: true, Seed: -1, Runner: Runner{ diff --git a/cmd/interactive_test.go b/cmd/interactive_test.go index 118f42640..a0cfa5439 100644 --- a/cmd/interactive_test.go +++ b/cmd/interactive_test.go @@ -63,17 +63,15 @@ func TestModelfileBuilder(t *testing.T) { {Role: "assistant", Content: "Yes it is true, I am half horse, half shark."}, }, Options: map[string]any{ - "temperature": 0.9, - "seed": 42, - "penalize_newline": false, - "stop": []string{"hi", "there"}, + "temperature": 0.9, + "seed": 42, + "stop": []string{"hi", "there"}, }, } t.Run("model", func(t *testing.T) { expect := `FROM hork SYSTEM You are part horse and part shark, but all hork. Do horklike things -PARAMETER penalize_newline false PARAMETER seed 42 PARAMETER stop hi PARAMETER stop there @@ -92,7 +90,6 @@ MESSAGE assistant Yes it is true, I am half horse, half shark. opts.ParentModel = "horseshark" expect := `FROM horseshark SYSTEM You are part horse and part shark, but all hork. Do horklike things -PARAMETER penalize_newline false PARAMETER seed 42 PARAMETER stop hi PARAMETER stop there diff --git a/docs/api.md b/docs/api.md index 41605fc70..692ac4f5a 100644 --- a/docs/api.md +++ b/docs/api.md @@ -396,17 +396,13 @@ curl http://localhost:11434/api/generate -d '{ "mirostat": 1, "mirostat_tau": 0.8, "mirostat_eta": 0.6, - "penalize_newline": true, "stop": ["\n", "user:"], "numa": false, "num_ctx": 1024, "num_batch": 2, "num_gpu": 1, "main_gpu": 0, - "low_vram": false, - "vocab_only": false, "use_mmap": true, - "use_mlock": false, "num_thread": 8 } }' diff --git a/llama/amx.cpp b/llama/amx.cpp index 7e375ced1..b63fcda6f 100644 --- a/llama/amx.cpp +++ b/llama/amx.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/amx.h b/llama/amx.h index 384d7ecee..021ab73ca 100644 --- a/llama/amx.h +++ b/llama/amx.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/clip.cpp b/llama/clip.cpp index dafbc3236..1d7e126c5 100644 --- a/llama/clip.cpp +++ b/llama/clip.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/clip.h b/llama/clip.h index 4c64880e2..b44f7517c 100644 --- a/llama/clip.h +++ b/llama/clip.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/common.cpp b/llama/common.cpp index 0bf26ce0f..05b598864 100644 --- a/llama/common.cpp +++ b/llama/common.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -966,6 +966,25 @@ struct common_init_result common_init_from_params(common_params & params) { params.sampling.ignore_eos = false; } + if (params.sampling.ignore_eos) { + for (llama_token i = 0; i < llama_n_vocab(model); i++) { + if (llama_token_is_eog(model, i)) { + LOG_INF("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(lctx, i).c_str(), -INFINITY); + params.sampling.logit_bias.push_back({i, -INFINITY}); + } + } + } + + if (params.sampling.penalty_last_n == -1) { + LOG_INF("%s: setting penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx)); + params.sampling.penalty_last_n = llama_n_ctx(lctx); + } + + if (params.sampling.dry_penalty_last_n == -1) { + LOG_INF("%s: setting dry_penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx)); + params.sampling.dry_penalty_last_n = llama_n_ctx(lctx); + } + if (params.warmup) { LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__); diff --git a/llama/common.h b/llama/common.h index b5b0168b1..4c605f757 100644 --- a/llama/common.h +++ b/llama/common.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -121,6 +121,7 @@ enum common_sampler_type { COMMON_SAMPLER_TYPE_TEMPERATURE = 7, COMMON_SAMPLER_TYPE_XTC = 8, COMMON_SAMPLER_TYPE_INFILL = 9, + COMMON_SAMPLER_TYPE_PENALTIES = 10, }; // dimensionality reduction methods, used by cvector-generator @@ -156,7 +157,6 @@ struct common_params_sampling { int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 float mirostat_tau = 5.00f; // target entropy float mirostat_eta = 0.10f; // learning rate - bool penalize_nl = false; // consider newlines as a repeatable token bool ignore_eos = false; bool no_perf = false; // disable performance metrics bool timing_per_token = false; @@ -165,6 +165,7 @@ struct common_params_sampling { std::vector samplers = { + COMMON_SAMPLER_TYPE_PENALTIES, COMMON_SAMPLER_TYPE_DRY, COMMON_SAMPLER_TYPE_TOP_K, COMMON_SAMPLER_TYPE_TYPICAL_P, @@ -219,11 +220,13 @@ struct common_params { float defrag_thold = 0.1f; // KV cache defragmentation threshold // offload params - std::vector devices; // devices to use for offloading - int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default) - int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors - float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs - enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs + std::vector devices; // devices to use for offloading + + int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default) + int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors + float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs + + enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs struct cpu_params cpuparams; struct cpu_params cpuparams_batch; diff --git a/llama/ggml-alloc.c b/llama/ggml-alloc.c index f5fd1fc21..ffdfb8977 100644 --- a/llama/ggml-alloc.c +++ b/llama/ggml-alloc.c @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -560,7 +560,6 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor size_t offset = ggml_dyn_tallocr_alloc(alloc, size, node); hn->buffer_id = buffer_id; hn->offset = offset; - return; } } diff --git a/llama/ggml-alloc.h b/llama/ggml-alloc.h index d17cd4f63..3dc1f5a1f 100644 --- a/llama/ggml-alloc.h +++ b/llama/ggml-alloc.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-backend-impl.h b/llama/ggml-backend-impl.h index f39d669bd..2e04f91be 100644 --- a/llama/ggml-backend-impl.h +++ b/llama/ggml-backend-impl.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-backend-reg.cpp b/llama/ggml-backend-reg.cpp index 31b4df87c..0baa8422f 100644 --- a/llama/ggml-backend-reg.cpp +++ b/llama/ggml-backend-reg.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-backend.cpp b/llama/ggml-backend.cpp index fbb697e51..956611a2a 100644 --- a/llama/ggml-backend.cpp +++ b/llama/ggml-backend.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-backend.h b/llama/ggml-backend.h index 9ce526889..83413fa9e 100644 --- a/llama/ggml-backend.h +++ b/llama/ggml-backend.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-blas.cpp b/llama/ggml-blas.cpp index 382909fee..97e269807 100644 --- a/llama/ggml-blas.cpp +++ b/llama/ggml-blas.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-blas.h b/llama/ggml-blas.h index b1f1d8a66..62278ada5 100644 --- a/llama/ggml-blas.h +++ b/llama/ggml-blas.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-common.h b/llama/ggml-common.h index f4b6189ba..01ad72d7a 100644 --- a/llama/ggml-common.h +++ b/llama/ggml-common.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cpp.h b/llama/ggml-cpp.h index c23921a04..1066459ae 100644 --- a/llama/ggml-cpp.h +++ b/llama/ggml-cpp.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cpu-aarch64.cpp b/llama/ggml-cpu-aarch64.cpp index 3677698a7..39ac7a331 100644 --- a/llama/ggml-cpu-aarch64.cpp +++ b/llama/ggml-cpu-aarch64.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cpu-aarch64.h b/llama/ggml-cpu-aarch64.h index 86ac1142c..dc6376bbc 100644 --- a/llama/ggml-cpu-aarch64.h +++ b/llama/ggml-cpu-aarch64.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cpu-impl.h b/llama/ggml-cpu-impl.h index abdfb73a7..7bd64bb1e 100644 --- a/llama/ggml-cpu-impl.h +++ b/llama/ggml-cpu-impl.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cpu-quants.c b/llama/ggml-cpu-quants.c index b516f8fe2..08df46ff2 100644 --- a/llama/ggml-cpu-quants.c +++ b/llama/ggml-cpu-quants.c @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cpu-quants.h b/llama/ggml-cpu-quants.h index ca4d246ea..d02d9cf13 100644 --- a/llama/ggml-cpu-quants.h +++ b/llama/ggml-cpu-quants.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cpu-traits.cpp b/llama/ggml-cpu-traits.cpp index 00fce8813..c7d518ea9 100644 --- a/llama/ggml-cpu-traits.cpp +++ b/llama/ggml-cpu-traits.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cpu-traits.h b/llama/ggml-cpu-traits.h index 36aa251b5..51af5290b 100644 --- a/llama/ggml-cpu-traits.h +++ b/llama/ggml-cpu-traits.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cpu.c b/llama/ggml-cpu.c index b6797e3ab..b697fac09 100644 --- a/llama/ggml-cpu.c +++ b/llama/ggml-cpu.c @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cpu.cpp b/llama/ggml-cpu.cpp index eb21a55aa..104bb73f5 100644 --- a/llama/ggml-cpu.cpp +++ b/llama/ggml-cpu.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -419,8 +419,11 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st switch (op->op) { case GGML_OP_CPY: return + op->type != GGML_TYPE_IQ3_XXS && + op->type != GGML_TYPE_IQ3_S && op->type != GGML_TYPE_IQ2_XXS && op->type != GGML_TYPE_IQ2_XS && + op->type != GGML_TYPE_IQ2_S && op->type != GGML_TYPE_IQ1_S && op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float case GGML_OP_MUL_MAT: diff --git a/llama/ggml-cpu.h b/llama/ggml-cpu.h index fa135856a..f9d5c1b44 100644 --- a/llama/ggml-cpu.h +++ b/llama/ggml-cpu.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda.h b/llama/ggml-cuda.h index 5388c3c30..d8a9a74d7 100644 --- a/llama/ggml-cuda.h +++ b/llama/ggml-cuda.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/acc.cu b/llama/ggml-cuda/acc.cu index a49aafc81..6db611b7b 100644 --- a/llama/ggml-cuda/acc.cu +++ b/llama/ggml-cuda/acc.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/acc.cuh b/llama/ggml-cuda/acc.cuh index e9b4c54e5..e0048b904 100644 --- a/llama/ggml-cuda/acc.cuh +++ b/llama/ggml-cuda/acc.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/arange.cu b/llama/ggml-cuda/arange.cu index e9d41ec4c..b26728c22 100644 --- a/llama/ggml-cuda/arange.cu +++ b/llama/ggml-cuda/arange.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/arange.cuh b/llama/ggml-cuda/arange.cuh index 600f4c4d3..039525ad0 100644 --- a/llama/ggml-cuda/arange.cuh +++ b/llama/ggml-cuda/arange.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/argmax.cu b/llama/ggml-cuda/argmax.cu index b84f2d467..6f916c432 100644 --- a/llama/ggml-cuda/argmax.cu +++ b/llama/ggml-cuda/argmax.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/argmax.cuh b/llama/ggml-cuda/argmax.cuh index 8fca051f7..940724cc1 100644 --- a/llama/ggml-cuda/argmax.cuh +++ b/llama/ggml-cuda/argmax.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/argsort.cu b/llama/ggml-cuda/argsort.cu index 90a1ecf91..2099b5de4 100644 --- a/llama/ggml-cuda/argsort.cu +++ b/llama/ggml-cuda/argsort.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/argsort.cuh b/llama/ggml-cuda/argsort.cuh index 17ffc03f4..b0069c0f0 100644 --- a/llama/ggml-cuda/argsort.cuh +++ b/llama/ggml-cuda/argsort.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/binbcast.cu b/llama/ggml-cuda/binbcast.cu index 89176cb8e..a891de6bc 100644 --- a/llama/ggml-cuda/binbcast.cu +++ b/llama/ggml-cuda/binbcast.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/binbcast.cuh b/llama/ggml-cuda/binbcast.cuh index f71cd10c3..78af15b92 100644 --- a/llama/ggml-cuda/binbcast.cuh +++ b/llama/ggml-cuda/binbcast.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/clamp.cu b/llama/ggml-cuda/clamp.cu index ae828ac9b..f6cd97ab0 100644 --- a/llama/ggml-cuda/clamp.cu +++ b/llama/ggml-cuda/clamp.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/clamp.cuh b/llama/ggml-cuda/clamp.cuh index 9ea28b9db..4a10db70e 100644 --- a/llama/ggml-cuda/clamp.cuh +++ b/llama/ggml-cuda/clamp.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/common.cuh b/llama/ggml-cuda/common.cuh index f46137c9e..7603ee3f3 100644 --- a/llama/ggml-cuda/common.cuh +++ b/llama/ggml-cuda/common.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/concat.cu b/llama/ggml-cuda/concat.cu index a2d4dbb95..13e25851a 100644 --- a/llama/ggml-cuda/concat.cu +++ b/llama/ggml-cuda/concat.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/concat.cuh b/llama/ggml-cuda/concat.cuh index 5fb80402f..9ab0b933c 100644 --- a/llama/ggml-cuda/concat.cuh +++ b/llama/ggml-cuda/concat.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/conv-transpose-1d.cu b/llama/ggml-cuda/conv-transpose-1d.cu index 7f4d76f18..73910fba0 100644 --- a/llama/ggml-cuda/conv-transpose-1d.cu +++ b/llama/ggml-cuda/conv-transpose-1d.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/conv-transpose-1d.cuh b/llama/ggml-cuda/conv-transpose-1d.cuh index 96f719515..a545dddb1 100644 --- a/llama/ggml-cuda/conv-transpose-1d.cuh +++ b/llama/ggml-cuda/conv-transpose-1d.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/convert.cu b/llama/ggml-cuda/convert.cu index b101e5e6e..24e234188 100644 --- a/llama/ggml-cuda/convert.cu +++ b/llama/ggml-cuda/convert.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/convert.cuh b/llama/ggml-cuda/convert.cuh index 6ea121967..5a2ada20d 100644 --- a/llama/ggml-cuda/convert.cuh +++ b/llama/ggml-cuda/convert.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/count-equal.cu b/llama/ggml-cuda/count-equal.cu index 0ae127151..4d29fd77c 100644 --- a/llama/ggml-cuda/count-equal.cu +++ b/llama/ggml-cuda/count-equal.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/count-equal.cuh b/llama/ggml-cuda/count-equal.cuh index abf20d980..e5b768526 100644 --- a/llama/ggml-cuda/count-equal.cuh +++ b/llama/ggml-cuda/count-equal.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/cpy.cu b/llama/ggml-cuda/cpy.cu index 47103d518..2e077dc04 100644 --- a/llama/ggml-cuda/cpy.cu +++ b/llama/ggml-cuda/cpy.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/cpy.cuh b/llama/ggml-cuda/cpy.cuh index 6c1860c22..139e499cd 100644 --- a/llama/ggml-cuda/cpy.cuh +++ b/llama/ggml-cuda/cpy.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/cross-entropy-loss.cu b/llama/ggml-cuda/cross-entropy-loss.cu index 5ab09f10d..1082ede47 100644 --- a/llama/ggml-cuda/cross-entropy-loss.cu +++ b/llama/ggml-cuda/cross-entropy-loss.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/cross-entropy-loss.cuh b/llama/ggml-cuda/cross-entropy-loss.cuh index 1f1e4c828..7643904a4 100644 --- a/llama/ggml-cuda/cross-entropy-loss.cuh +++ b/llama/ggml-cuda/cross-entropy-loss.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/dequantize.cuh b/llama/ggml-cuda/dequantize.cuh index 31ec4a261..e7b2f9667 100644 --- a/llama/ggml-cuda/dequantize.cuh +++ b/llama/ggml-cuda/dequantize.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/diagmask.cu b/llama/ggml-cuda/diagmask.cu index 89dc3b119..51bf18b2c 100644 --- a/llama/ggml-cuda/diagmask.cu +++ b/llama/ggml-cuda/diagmask.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/diagmask.cuh b/llama/ggml-cuda/diagmask.cuh index 54bdb98c0..ef2a4fd76 100644 --- a/llama/ggml-cuda/diagmask.cuh +++ b/llama/ggml-cuda/diagmask.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/fattn-common.cuh b/llama/ggml-cuda/fattn-common.cuh index 46a58b58f..338795d31 100644 --- a/llama/ggml-cuda/fattn-common.cuh +++ b/llama/ggml-cuda/fattn-common.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/fattn-tile-f16.cu b/llama/ggml-cuda/fattn-tile-f16.cu index 92ada9ecb..9ede4f690 100644 --- a/llama/ggml-cuda/fattn-tile-f16.cu +++ b/llama/ggml-cuda/fattn-tile-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/fattn-tile-f16.cuh b/llama/ggml-cuda/fattn-tile-f16.cuh index 8d79eb863..d5dfca977 100644 --- a/llama/ggml-cuda/fattn-tile-f16.cuh +++ b/llama/ggml-cuda/fattn-tile-f16.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/fattn-tile-f32.cu b/llama/ggml-cuda/fattn-tile-f32.cu index 1e0c0b71b..e7e13eba7 100644 --- a/llama/ggml-cuda/fattn-tile-f32.cu +++ b/llama/ggml-cuda/fattn-tile-f32.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/fattn-tile-f32.cuh b/llama/ggml-cuda/fattn-tile-f32.cuh index 7c3944b29..7272b04da 100644 --- a/llama/ggml-cuda/fattn-tile-f32.cuh +++ b/llama/ggml-cuda/fattn-tile-f32.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/fattn-vec-f16.cuh b/llama/ggml-cuda/fattn-vec-f16.cuh index 51485b1f5..d9d2b1a8d 100644 --- a/llama/ggml-cuda/fattn-vec-f16.cuh +++ b/llama/ggml-cuda/fattn-vec-f16.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/fattn-vec-f32.cuh b/llama/ggml-cuda/fattn-vec-f32.cuh index b317368e7..b3ab75780 100644 --- a/llama/ggml-cuda/fattn-vec-f32.cuh +++ b/llama/ggml-cuda/fattn-vec-f32.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/fattn-wmma-f16.cuh b/llama/ggml-cuda/fattn-wmma-f16.cuh index babedef8a..745457059 100644 --- a/llama/ggml-cuda/fattn-wmma-f16.cuh +++ b/llama/ggml-cuda/fattn-wmma-f16.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/fattn.cu b/llama/ggml-cuda/fattn.cu index a9c07bbf6..dbeb5344f 100644 --- a/llama/ggml-cuda/fattn.cu +++ b/llama/ggml-cuda/fattn.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/fattn.cuh b/llama/ggml-cuda/fattn.cuh index efe7e1c18..b48852350 100644 --- a/llama/ggml-cuda/fattn.cuh +++ b/llama/ggml-cuda/fattn.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/getrows.cu b/llama/ggml-cuda/getrows.cu index 74172cbd0..55b7311b4 100644 --- a/llama/ggml-cuda/getrows.cu +++ b/llama/ggml-cuda/getrows.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/getrows.cuh b/llama/ggml-cuda/getrows.cuh index 503e5a6df..9370b4f77 100644 --- a/llama/ggml-cuda/getrows.cuh +++ b/llama/ggml-cuda/getrows.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/ggml-cuda.cu b/llama/ggml-cuda/ggml-cuda.cu index dc71ded53..2449f1b03 100644 --- a/llama/ggml-cuda/ggml-cuda.cu +++ b/llama/ggml-cuda/ggml-cuda.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/im2col.cu b/llama/ggml-cuda/im2col.cu index 7ee597304..93d1113f0 100644 --- a/llama/ggml-cuda/im2col.cu +++ b/llama/ggml-cuda/im2col.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/im2col.cuh b/llama/ggml-cuda/im2col.cuh index 728a78916..d694d8023 100644 --- a/llama/ggml-cuda/im2col.cuh +++ b/llama/ggml-cuda/im2col.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/mma.cuh b/llama/ggml-cuda/mma.cuh index 0cb75d795..f1c4d8532 100644 --- a/llama/ggml-cuda/mma.cuh +++ b/llama/ggml-cuda/mma.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/mmq.cu b/llama/ggml-cuda/mmq.cu index 965f0499f..021b762c8 100644 --- a/llama/ggml-cuda/mmq.cu +++ b/llama/ggml-cuda/mmq.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/mmq.cuh b/llama/ggml-cuda/mmq.cuh index 2498a6d09..3a8835e7e 100644 --- a/llama/ggml-cuda/mmq.cuh +++ b/llama/ggml-cuda/mmq.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/mmv.cu b/llama/ggml-cuda/mmv.cu index 932709b68..6bfe30a7f 100644 --- a/llama/ggml-cuda/mmv.cu +++ b/llama/ggml-cuda/mmv.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/mmv.cuh b/llama/ggml-cuda/mmv.cuh index 86575ccfa..5cd8922f7 100644 --- a/llama/ggml-cuda/mmv.cuh +++ b/llama/ggml-cuda/mmv.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/mmvq.cu b/llama/ggml-cuda/mmvq.cu index cdf7c7778..276d80873 100644 --- a/llama/ggml-cuda/mmvq.cu +++ b/llama/ggml-cuda/mmvq.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/mmvq.cuh b/llama/ggml-cuda/mmvq.cuh index 07aa2c4ef..c9a200cdc 100644 --- a/llama/ggml-cuda/mmvq.cuh +++ b/llama/ggml-cuda/mmvq.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/norm.cu b/llama/ggml-cuda/norm.cu index b12468f40..5c207af5b 100644 --- a/llama/ggml-cuda/norm.cu +++ b/llama/ggml-cuda/norm.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/norm.cuh b/llama/ggml-cuda/norm.cuh index 36a6a03c4..0f4a2951b 100644 --- a/llama/ggml-cuda/norm.cuh +++ b/llama/ggml-cuda/norm.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/opt-step-adamw.cu b/llama/ggml-cuda/opt-step-adamw.cu index 17ddb60df..42f32be08 100644 --- a/llama/ggml-cuda/opt-step-adamw.cu +++ b/llama/ggml-cuda/opt-step-adamw.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/opt-step-adamw.cuh b/llama/ggml-cuda/opt-step-adamw.cuh index 99f3da8cf..f7c9b7681 100644 --- a/llama/ggml-cuda/opt-step-adamw.cuh +++ b/llama/ggml-cuda/opt-step-adamw.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/out-prod.cu b/llama/ggml-cuda/out-prod.cu index cfcec7636..77ac57dfd 100644 --- a/llama/ggml-cuda/out-prod.cu +++ b/llama/ggml-cuda/out-prod.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/out-prod.cuh b/llama/ggml-cuda/out-prod.cuh index 3c7e747f8..bf22d0cb6 100644 --- a/llama/ggml-cuda/out-prod.cuh +++ b/llama/ggml-cuda/out-prod.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/pad.cu b/llama/ggml-cuda/pad.cu index 429c7132d..3506f55e9 100644 --- a/llama/ggml-cuda/pad.cu +++ b/llama/ggml-cuda/pad.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/pad.cuh b/llama/ggml-cuda/pad.cuh index c70f78875..e4030f7e7 100644 --- a/llama/ggml-cuda/pad.cuh +++ b/llama/ggml-cuda/pad.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/pool2d.cu b/llama/ggml-cuda/pool2d.cu index 8bb8fbd39..845c302df 100644 --- a/llama/ggml-cuda/pool2d.cu +++ b/llama/ggml-cuda/pool2d.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/pool2d.cuh b/llama/ggml-cuda/pool2d.cuh index d079a5a11..60a1029b8 100644 --- a/llama/ggml-cuda/pool2d.cuh +++ b/llama/ggml-cuda/pool2d.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/quantize.cu b/llama/ggml-cuda/quantize.cu index dd4eb9324..f4f18fdf2 100644 --- a/llama/ggml-cuda/quantize.cu +++ b/llama/ggml-cuda/quantize.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/quantize.cuh b/llama/ggml-cuda/quantize.cuh index c3672dfae..c69ea9881 100644 --- a/llama/ggml-cuda/quantize.cuh +++ b/llama/ggml-cuda/quantize.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/rope.cu b/llama/ggml-cuda/rope.cu index 9c61e8faf..ff351db47 100644 --- a/llama/ggml-cuda/rope.cu +++ b/llama/ggml-cuda/rope.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/rope.cuh b/llama/ggml-cuda/rope.cuh index f22911d24..c6420e245 100644 --- a/llama/ggml-cuda/rope.cuh +++ b/llama/ggml-cuda/rope.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/scale.cu b/llama/ggml-cuda/scale.cu index 76eb6f35d..feff5a12b 100644 --- a/llama/ggml-cuda/scale.cu +++ b/llama/ggml-cuda/scale.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/scale.cuh b/llama/ggml-cuda/scale.cuh index 8acab2e2b..f807772b7 100644 --- a/llama/ggml-cuda/scale.cuh +++ b/llama/ggml-cuda/scale.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/softmax.cu b/llama/ggml-cuda/softmax.cu index dc74bf605..9a664dd7a 100644 --- a/llama/ggml-cuda/softmax.cu +++ b/llama/ggml-cuda/softmax.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/softmax.cuh b/llama/ggml-cuda/softmax.cuh index ae179064b..178d5835b 100644 --- a/llama/ggml-cuda/softmax.cuh +++ b/llama/ggml-cuda/softmax.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/sum.cu b/llama/ggml-cuda/sum.cu index a7f475df9..6abb22d29 100644 --- a/llama/ggml-cuda/sum.cu +++ b/llama/ggml-cuda/sum.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/sum.cuh b/llama/ggml-cuda/sum.cuh index ebbc5f3cc..134514634 100644 --- a/llama/ggml-cuda/sum.cuh +++ b/llama/ggml-cuda/sum.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/sumrows.cu b/llama/ggml-cuda/sumrows.cu index 7cebb3d79..4cfa98595 100644 --- a/llama/ggml-cuda/sumrows.cu +++ b/llama/ggml-cuda/sumrows.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/sumrows.cuh b/llama/ggml-cuda/sumrows.cuh index 8c45f8309..890d149f3 100644 --- a/llama/ggml-cuda/sumrows.cuh +++ b/llama/ggml-cuda/sumrows.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu index 6df43898a..fb8a6413f 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu index 075ead721..d51ab412a 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu index 604cf7fa8..c68c98169 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu index 89abad5f1..1eb161955 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu index 9dc9a883a..96f14e1cf 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu index ef40f0dba..1d289ce78 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu index 20dfc61a6..4274d14b0 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu index 514b7731c..4f6a49e48 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu index ae2a66989..e73dc503b 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu index ea2ec19b8..33d21bce6 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu index 3298b1e41..277bb7d1f 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu index 0243ee5c4..2bea2d234 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu index c2fd7666f..f9dcbe888 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu index 014f978c9..3c1c11479 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu index 23acfacf5..054f51c5a 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu index 10d4f84d2..3989dd786 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu index bbaa83367..0f51e825e 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu index 02e0dd320..07364b030 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu index f69195221..00a5e4db6 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu index 8131f6b14..d7110f1c6 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu index e3f9bdd4c..6be93270b 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu index c1c13c7fa..fb21ad841 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu index 6860e9555..cb2853875 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu index 5d5ebb7a8..b9323e9c2 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu index e4203928d..83d10682a 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu index 53daa03cb..18d3fe017 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu index 49489a958..f448877ed 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu index 936132ac0..0f7135742 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu index dccdd0343..5a9280230 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu index aa9606280..278e1778a 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu index 93f56461b..432af8f2f 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu index 3c9db7a78..4245e1ed6 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu index f1e287875..c2582463a 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu index ecf18ad2b..c9f9119e3 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu index 4c74eebeb..1a832dca2 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu index fea31c915..5889ab1f8 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu index d4d464522..3e5ab335f 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu index 5c8d298f5..c7b29fa96 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu index 76a17f036..3aacd0b6c 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu index eb692c818..1713d9ba0 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu index 85f6bede1..797cdbf50 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu index cfa78304e..264ccdb33 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu index 52c9eebaa..e525b7f68 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu index cfa4c2a50..c5717ce68 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu index 02aef31d5..7bae7853f 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu index c7dce6a6e..e20053de7 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu index 3d0198668..9923d4ee0 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu index 283d91716..049a4139f 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu index e33e64e1e..b7c334839 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu index 0f63d587f..834dd185e 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu index 4a9a2e951..ab1fa04a8 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu index b27ee133f..5b22898d6 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu index 7c55961df..5ca2a90c8 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu index 147bd03e1..d4ab31472 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu index b7a8e5246..34d5a4e6f 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu index 52a97e00c..7e997ceb4 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu index 3ca391e32..220b966a8 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu index a3da427cf..8059a5967 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu index 2a975f5d6..8aaae726d 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu index 8f9f54d55..af82c8c5e 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu index 6bba7acc1..f4868aa85 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu index 92a7971a4..8a9218feb 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu index 23596d172..d4b2a48ce 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu index 42113656d..7810035ff 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu index 88e07e336..cf1d76be9 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu index 92e022510..8af0b435f 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu index fdd0ffee0..a471c6f1a 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu index 0a44bece2..f84082876 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu index f6cd122f1..ae9781961 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu index 1d81b9500..8bdf58e7f 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu index 5b26a0813..a014fd4e0 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu index 003d7d68e..83b977d91 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu index 392b18fcf..0f0a91150 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu index 2e78b0ee3..7f004d2e2 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu index b425254b3..aa5442231 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu index 7d6344d87..0a838ec98 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu index 64daf5d35..a8b4b3fd3 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu index ac6db018e..1a704f5c6 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu index 001087c62..2a51a222c 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu index 5c68f760d..2372995d5 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu index d66d8c4fe..ad29aaf64 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu index 0d8b6b178..6ffa959a3 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu index aaafe33cf..1ee14e28d 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu index bbed377eb..7d097471d 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu index d047f0947..15d5e3c22 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu index cb61979c6..97efc77af 100644 --- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu index 59731a34c..a6b5fcf19 100644 --- a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +++ b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu index 4c0d8b2c0..cce80fc55 100644 --- a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +++ b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu index 2eca1711c..4d86450cc 100644 --- a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +++ b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu index 3dd7ab0bb..1629af503 100644 --- a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +++ b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu index 77e464244..3fba3533f 100644 --- a/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +++ b/llama/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq1_s.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq1_s.cu index 75fc71443..1ec23c3fc 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-iq1_s.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq2_s.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq2_s.cu index f6618e14b..524b78f5b 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-iq2_s.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu index 1f7bf8feb..f1be2630f 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu index d801a2526..2f01d7848 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq3_s.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq3_s.cu index 1b7541edf..eabfd6e51 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-iq3_s.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu index 73372686f..f3d479283 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu index e0b7aa416..47376f651 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu b/llama/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu index 56be2d974..e8feefee2 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q2_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q2_k.cu index 60fa51aa9..270a0994f 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-q2_k.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-q2_k.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q3_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q3_k.cu index f65cfd209..6e5686a42 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-q3_k.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-q3_k.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q4_0.cu b/llama/ggml-cuda/template-instances/mmq-instance-q4_0.cu index da02c911a..113df2d2d 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-q4_0.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-q4_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q4_1.cu b/llama/ggml-cuda/template-instances/mmq-instance-q4_1.cu index 34e8f679f..0102022c4 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-q4_1.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-q4_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q4_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q4_k.cu index e9033e75f..15adf4814 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-q4_k.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-q4_k.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q5_0.cu b/llama/ggml-cuda/template-instances/mmq-instance-q5_0.cu index 41b33713f..de9f47beb 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-q5_0.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-q5_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q5_1.cu b/llama/ggml-cuda/template-instances/mmq-instance-q5_1.cu index 815654d13..608180f13 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-q5_1.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-q5_1.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q5_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q5_k.cu index 93b2d0e06..665275aee 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-q5_k.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-q5_k.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q6_k.cu b/llama/ggml-cuda/template-instances/mmq-instance-q6_k.cu index 72042bf24..42e4c687f 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-q6_k.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-q6_k.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/template-instances/mmq-instance-q8_0.cu b/llama/ggml-cuda/template-instances/mmq-instance-q8_0.cu index 2bc5b2ccf..e6050a49a 100644 --- a/llama/ggml-cuda/template-instances/mmq-instance-q8_0.cu +++ b/llama/ggml-cuda/template-instances/mmq-instance-q8_0.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/tsembd.cu b/llama/ggml-cuda/tsembd.cu index 467c4dfc0..8dcd03036 100644 --- a/llama/ggml-cuda/tsembd.cu +++ b/llama/ggml-cuda/tsembd.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/tsembd.cuh b/llama/ggml-cuda/tsembd.cuh index 75137a59d..23dfa3ed3 100644 --- a/llama/ggml-cuda/tsembd.cuh +++ b/llama/ggml-cuda/tsembd.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/unary.cu b/llama/ggml-cuda/unary.cu index b86253caf..9e3f5b259 100644 --- a/llama/ggml-cuda/unary.cu +++ b/llama/ggml-cuda/unary.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/unary.cuh b/llama/ggml-cuda/unary.cuh index 7845f2ae7..3543a85c3 100644 --- a/llama/ggml-cuda/unary.cuh +++ b/llama/ggml-cuda/unary.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/upscale.cu b/llama/ggml-cuda/upscale.cu index 1a45c5748..8d733d175 100644 --- a/llama/ggml-cuda/upscale.cu +++ b/llama/ggml-cuda/upscale.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/upscale.cuh b/llama/ggml-cuda/upscale.cuh index 93116d183..52636d526 100644 --- a/llama/ggml-cuda/upscale.cuh +++ b/llama/ggml-cuda/upscale.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/vecdotq.cuh b/llama/ggml-cuda/vecdotq.cuh index 1f9606492..cee27cab7 100644 --- a/llama/ggml-cuda/vecdotq.cuh +++ b/llama/ggml-cuda/vecdotq.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/vendors/cuda.h b/llama/ggml-cuda/vendors/cuda.h index 07a2e6446..9914a0a1f 100644 --- a/llama/ggml-cuda/vendors/cuda.h +++ b/llama/ggml-cuda/vendors/cuda.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/vendors/hip.h b/llama/ggml-cuda/vendors/hip.h index 9e88e723c..bd7534bb5 100644 --- a/llama/ggml-cuda/vendors/hip.h +++ b/llama/ggml-cuda/vendors/hip.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/vendors/musa.h b/llama/ggml-cuda/vendors/musa.h index 8902cd967..d6747bd26 100644 --- a/llama/ggml-cuda/vendors/musa.h +++ b/llama/ggml-cuda/vendors/musa.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/wkv6.cu b/llama/ggml-cuda/wkv6.cu index d458e1afb..fb76f1008 100644 --- a/llama/ggml-cuda/wkv6.cu +++ b/llama/ggml-cuda/wkv6.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-cuda/wkv6.cuh b/llama/ggml-cuda/wkv6.cuh index 4d3df9feb..6a2e44ac0 100644 --- a/llama/ggml-cuda/wkv6.cuh +++ b/llama/ggml-cuda/wkv6.cuh @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-impl.h b/llama/ggml-impl.h index f5f0c7649..e43a549be 100644 --- a/llama/ggml-impl.h +++ b/llama/ggml-impl.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -577,6 +577,22 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) { #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x) #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x) +// expose GGUF internals for test code + +GGML_API size_t gguf_type_size(enum gguf_type type); + +GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params); + +struct gguf_buf { + void * data; + size_t size; + size_t offset; +}; +GGML_API struct gguf_buf gguf_buf_init(size_t size); +GGML_API void gguf_buf_free(struct gguf_buf buf); + +GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta); + #ifdef __cplusplus } #endif diff --git a/llama/ggml-metal-embed.metal b/llama/ggml-metal-embed.metal index f45d869e9..f560e5c9c 100644 --- a/llama/ggml-metal-embed.metal +++ b/llama/ggml-metal-embed.metal @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -28,7 +28,7 @@ #define GGML_COMMON_IMPL_METAL #if defined(GGML_METAL_EMBED_LIBRARY) /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -1911,7 +1911,7 @@ GGML_TABLE_END() #include "../ggml-common.h" #endif /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-metal-impl.h b/llama/ggml-metal-impl.h index 982b6f9dc..d361e7728 100644 --- a/llama/ggml-metal-impl.h +++ b/llama/ggml-metal-impl.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-metal.h b/llama/ggml-metal.h index f8e84bf23..7d6a7e981 100644 --- a/llama/ggml-metal.h +++ b/llama/ggml-metal.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-metal.metal b/llama/ggml-metal.metal index 8552f726b..615ec0e3a 100644 --- a/llama/ggml-metal.metal +++ b/llama/ggml-metal.metal @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-metal_darwin_arm64.m b/llama/ggml-metal_darwin_arm64.m index 56d8a7549..c0b83c7d7 100644 --- a/llama/ggml-metal_darwin_arm64.m +++ b/llama/ggml-metal_darwin_arm64.m @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-quants.c b/llama/ggml-quants.c index 7cf946749..6d5432865 100644 --- a/llama/ggml-quants.c +++ b/llama/ggml-quants.c @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-quants.h b/llama/ggml-quants.h index 2edd3d878..a698f73ac 100644 --- a/llama/ggml-quants.h +++ b/llama/ggml-quants.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-threading.cpp b/llama/ggml-threading.cpp index 4d2c10f0f..741737968 100644 --- a/llama/ggml-threading.cpp +++ b/llama/ggml-threading.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml-threading.h b/llama/ggml-threading.h index baa20979c..bf5085f5d 100644 --- a/llama/ggml-threading.h +++ b/llama/ggml-threading.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/ggml.c b/llama/ggml.c index f836cba14..c1b47f191 100644 --- a/llama/ggml.c +++ b/llama/ggml.c @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -6084,12 +6084,12 @@ struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, co struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node); - return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grads[igrad] : NULL; + return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL; } struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node); - return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grad_accs[igrad] : NULL; + return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL; } void ggml_graph_print(const struct ggml_cgraph * cgraph) { @@ -6536,7 +6536,7 @@ struct gguf_context { void * data; }; -static size_t gguf_type_size(enum gguf_type type) { +size_t gguf_type_size(enum gguf_type type) { GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT); return GGUF_TYPE_SIZE[type]; } @@ -6664,13 +6664,7 @@ struct gguf_context * gguf_init_empty(void) { return ctx; } -struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) { - FILE * file = ggml_fopen(fname, "rb"); - if (!file) { - fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno)); - return NULL; - } - +struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) { // offset from start of file size_t offset = 0; @@ -6683,7 +6677,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p for (uint32_t i = 0; i < sizeof(magic); i++) { if (magic[i] != GGUF_MAGIC[i]) { fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]); - fclose(file); return NULL; } } @@ -6694,7 +6687,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context)); if (!ctx) { fprintf(stderr, "%s: failed to allocate memory for context\n", __func__); - fclose(file); return NULL; } @@ -6712,7 +6704,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p if (ctx->header.version == 1) { fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__); - fclose(file); gguf_free(ctx); return NULL; } @@ -6725,7 +6716,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p if (!ok) { fprintf(stderr, "%s: failed to read header\n", __func__); - fclose(file); gguf_free(ctx); return NULL; } @@ -6735,12 +6725,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p { const uint64_t n_kv = ctx->header.n_kv; - ctx->kv = calloc(n_kv, sizeof(struct gguf_kv)); - if (!ctx->kv) { - fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__); - fclose(file); - gguf_free(ctx); - return NULL; + if (n_kv > 0) { + ctx->kv = calloc(n_kv, sizeof(struct gguf_kv)); + if (!ctx->kv) { + fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__); + gguf_free(ctx); + return NULL; + } } for (uint64_t i = 0; i < n_kv; ++i) { @@ -6787,7 +6778,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p // prevent from integer overflow in the malloc below if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) { fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n); - fclose(file); gguf_free(ctx); return NULL; } @@ -6795,7 +6785,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p kv->value.arr.data = calloc(kv->value.arr.n, gguf_type_size(kv->value.arr.type)); if (!kv->value.arr.data) { fprintf(stderr, "%s: failed to allocate memory for array\n", __func__); - fclose(file); gguf_free(ctx); return NULL; } @@ -6807,7 +6796,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p // prevent from integer overflow in the malloc below if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) { fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n); - fclose(file); gguf_free(ctx); return NULL; } @@ -6815,7 +6803,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct gguf_str)); if (!kv->value.arr.data) { fprintf(stderr, "%s: failed to allocate memory for array\n", __func__); - fclose(file); gguf_free(ctx); return NULL; } @@ -6846,7 +6833,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p if (!ok) { fprintf(stderr, "%s: failed to read key-value pairs\n", __func__); - fclose(file); gguf_free(ctx); return NULL; } @@ -6857,7 +6843,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct gguf_tensor_info)); if (!ctx->infos) { fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__); - fclose(file); gguf_free(ctx); return NULL; } @@ -6893,7 +6878,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p if (!ok) { fprintf(stderr, "%s: failed to read tensor info\n", __func__); - fclose(file); gguf_free(ctx); return NULL; } @@ -6936,7 +6920,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p // this tensor type support have been removed: fprintf(stderr, "%s: tensor '%s' of type %d: %s\n", __func__, info->name.data, (int) info->type, ggml_type_name(info->type)); - fclose(file); gguf_free(ctx); return NULL; } @@ -6944,7 +6927,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p if (ne % ggml_blck_size(info->type) != 0) { fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n", __func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type)); - fclose(file); gguf_free(ctx); return NULL; } @@ -6976,7 +6958,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p *params.ctx = ggml_init(pdata); if (*params.ctx == NULL) { fprintf(stderr, "%s: failed to initialize context\n", __func__); - fclose(file); gguf_free(ctx); return NULL; } @@ -6995,7 +6976,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p if (!ok) { fprintf(stderr, "%s: failed to read tensor data\n", __func__); - fclose(file); ggml_free(ctx_data); gguf_free(ctx); return NULL; @@ -7034,7 +7014,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p if (!ok) { fprintf(stderr, "%s: failed to read the tensor data\n", __func__); - fclose(file); ggml_free(ctx_data); gguf_free(ctx); return NULL; @@ -7043,11 +7022,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p ggml_set_no_alloc(ctx_data, params.no_alloc); } - fclose(file); - return ctx; } +struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) { + FILE * file = ggml_fopen(fname, "rb"); + if (!file) { + fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno)); + return NULL; + } + + struct gguf_context * result = gguf_init_from_file_impl(file, params); + fclose(file); + return result; +} + void gguf_free(struct gguf_context * ctx) { if (ctx == NULL) { return; @@ -7507,13 +7496,7 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo // fwrite(val, sizeof(char), size, file); //} -struct gguf_buf { - void * data; - size_t size; - size_t offset; -}; - -static struct gguf_buf gguf_buf_init(size_t size) { +struct gguf_buf gguf_buf_init(size_t size) { struct gguf_buf buf = { /*buf.data =*/ size == 0 ? NULL : GGML_CALLOC(1, size), /*buf.size =*/ size, @@ -7523,7 +7506,7 @@ static struct gguf_buf gguf_buf_init(size_t size) { return buf; } -static void gguf_buf_free(struct gguf_buf buf) { +void gguf_buf_free(struct gguf_buf buf) { if (buf.data) { GGML_FREE(buf.data); } @@ -7561,7 +7544,7 @@ static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_si buf->offset += el_size; } -static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) { +void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) { // write header gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic)); gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version)); diff --git a/llama/ggml.h b/llama/ggml.h index b3be4485d..758afeabc 100644 --- a/llama/ggml.h +++ b/llama/ggml.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/json-schema-to-grammar.cpp b/llama/json-schema-to-grammar.cpp index 8ae99aafc..6b7a6d229 100644 --- a/llama/json-schema-to-grammar.cpp +++ b/llama/json-schema-to-grammar.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/json-schema-to-grammar.h b/llama/json-schema-to-grammar.h index b8a31467e..dfe21235b 100644 --- a/llama/json-schema-to-grammar.h +++ b/llama/json-schema-to-grammar.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/llama-grammar.cpp b/llama/llama-grammar.cpp index a56f198a8..f44451e54 100644 --- a/llama/llama-grammar.cpp +++ b/llama/llama-grammar.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/llama-grammar.h b/llama/llama-grammar.h index e6b92d7de..67c45199f 100644 --- a/llama/llama-grammar.h +++ b/llama/llama-grammar.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/llama-impl.h b/llama/llama-impl.h index 99a71baea..7ecdf3bad 100644 --- a/llama/llama-impl.h +++ b/llama/llama-impl.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/llama-sampling.cpp b/llama/llama-sampling.cpp index d9bce9e9b..a94a2bbdf 100644 --- a/llama/llama-sampling.cpp +++ b/llama/llama-sampling.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -1422,19 +1422,15 @@ struct llama_sampler * llama_sampler_init_grammar_impl(const struct llama_vocab // penalties struct llama_sampler_penalties { - const int32_t n_vocab; - const llama_token special_eos_id; - const llama_token linefeed_id; - const int32_t penalty_last_n; const float penalty_repeat; const float penalty_freq; const float penalty_present; - const bool penalize_nl; - const bool ignore_eos; - ring_buffer prev; + + // a frequency map to count token occurrences + std::unordered_map token_count; }; static const char * llama_sampler_penalties_name(const struct llama_sampler * /*smpl*/) { @@ -1447,76 +1443,50 @@ static void llama_sampler_penalties_accept(struct llama_sampler * smpl, llama_to return; } + ctx->token_count[token]++; + + // if the ring buffer is full, remove the oldest token + if (ctx->prev.size() >= (size_t) ctx->penalty_last_n) { + const auto old = ctx->prev.front(); + + ctx->token_count[old]--; + if (ctx->token_count[old] == 0) { + ctx->token_count.erase(old); + } + } + ctx->prev.push_back(token); + +#if 0 + // sanity check + std::unordered_map tmp; + for (int i = 0; i < std::min(ctx->penalty_last_n, ctx->prev.size()); ++i) { + tmp[ctx->prev.rat(i)]++; + } + + assert(ctx->token_count == tmp); +#endif } static void llama_sampler_penalties_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) { auto * ctx = (llama_sampler_penalties *) smpl->ctx; - if (ctx->ignore_eos) { - assert(ctx->special_eos_id >= 0); - - // optimistically check if the candidates are not yet sorted/shuffled/truncated - if (cur_p->size > (size_t) ctx->special_eos_id && cur_p->data[ctx->special_eos_id].id == ctx->special_eos_id) { - cur_p->data[ctx->special_eos_id].logit = -INFINITY; - } else { - // else, search for the special EOS token - for (size_t i = 0; i < cur_p->size; ++i) { - if (cur_p->data[i].id == ctx->special_eos_id) { - cur_p->data[i].logit = -INFINITY; - break; - } - } - } - } - if ((ctx->penalty_last_n == 0) || (ctx->penalty_repeat == 1.0f && ctx->penalty_freq == 0.0f && ctx->penalty_present == 0.0f)) { return; } - bool nl_found = false; - size_t nl_idx = 0; - float nl_logit = -INFINITY; - if (!ctx->penalize_nl) { - assert(ctx->linefeed_id >= 0); - - // optimistically check if the candidates are not yet sorted/shuffled/truncated - if (cur_p->size > (size_t) ctx->linefeed_id && cur_p->data[ctx->linefeed_id].id == ctx->linefeed_id) { - nl_found = true; - nl_idx = ctx->linefeed_id; - nl_logit = cur_p->data[ctx->linefeed_id].logit; - } else { - // else, search for the linefeed token - for (size_t i = 0; i < cur_p->size; ++i) { - if (cur_p->data[i].id == ctx->linefeed_id) { - nl_found = true; - nl_idx = i; - nl_logit = cur_p->data[i].logit; - break; - } - } - } - } - - // Create a frequency map to count occurrences of each token in last_tokens - // TODO: optimize this by maintaining the token count in the sampler context - using llama_token_cnt = std::unordered_map; - llama_token_cnt token_count; - - for (int i = 0; i < std::min(ctx->penalty_last_n, ctx->prev.size()); ++i) { - token_count[ctx->prev.rat(i)]++; - } - // Apply frequency and presence penalties to the cur_p for (size_t i = 0; i < cur_p->size; ++i) { - const auto token_iter = token_count.find(cur_p->data[i].id); - if (token_iter == token_count.end()) { + const auto token_iter = ctx->token_count.find(cur_p->data[i].id); + if (token_iter == ctx->token_count.end()) { continue; } const int count = token_iter->second; + assert(count > 0 && count <= ctx->penalty_last_n); + // The academic publication that described this technique actually just only divided, but that would cause tokens with negative logits to become more likely, which is obviously wrong. // This is common fix for this problem, which is to multiply by the penalty instead of dividing. if (cur_p->data[i].logit <= 0) { @@ -1529,30 +1499,21 @@ static void llama_sampler_penalties_apply(struct llama_sampler * smpl, llama_tok } cur_p->sorted = false; - - if (!ctx->penalize_nl && nl_found) { - // restore the logit of the newline token if it was penalized - cur_p->data[nl_idx].logit = nl_logit; - } } static void llama_sampler_penalties_reset(struct llama_sampler * smpl) { auto * ctx = (llama_sampler_penalties *) smpl->ctx; ctx->prev.clear(); + ctx->token_count.clear(); } static struct llama_sampler * llama_sampler_penalties_clone(const struct llama_sampler * smpl) { const auto * ctx = (const llama_sampler_penalties *) smpl->ctx; auto * result = llama_sampler_init_penalties( - ctx->n_vocab, - ctx->special_eos_id, - ctx->linefeed_id, ctx->penalty_last_n, ctx->penalty_repeat, ctx->penalty_freq, - ctx->penalty_present, - ctx->penalize_nl, - ctx->ignore_eos); + ctx->penalty_present); // copy the state { @@ -1578,38 +1539,21 @@ static struct llama_sampler_i llama_sampler_penalties_i = { }; struct llama_sampler * llama_sampler_init_penalties( - int32_t n_vocab, - llama_token special_eos_id, - llama_token linefeed_id, int32_t penalty_last_n, float penalty_repeat, float penalty_freq, - float penalty_present, - bool penalize_nl, - bool ignore_eos) { - if (linefeed_id == LLAMA_TOKEN_NULL) { - penalize_nl = true; - } - - if (special_eos_id == LLAMA_TOKEN_NULL) { - ignore_eos = false; - } - + float penalty_present) { penalty_last_n = std::max(penalty_last_n, 0); return new llama_sampler { /* .iface = */ &llama_sampler_penalties_i, /* .ctx = */ new llama_sampler_penalties { - /* .n_vocab = */ n_vocab, - /* .special_eos_id = */ special_eos_id, - /* .linefeed_id = */ linefeed_id, /* .penalty_last_n = */ penalty_last_n, /* .penalty_repeat = */ penalty_repeat, /* .penalty_freq = */ penalty_freq, /* .penalty_present = */ penalty_present, - /* .penalize_nl = */ penalize_nl, - /* .ignore_eos = */ ignore_eos, /* .prev = */ ring_buffer(penalty_last_n), + /* .token_count = */ {}, }, }; } @@ -1637,7 +1581,8 @@ static void get_overlapping_token_sequences(const llama_vocab & vocab, const std if (word.find(str) != std::string::npos) { token_sequences.emplace(token_id, std::vector()); } else { - size_t word_len = word.size(), str_len = str.size(); + size_t word_len = word.size(); + size_t str_len = str.size(); size_t pos = -1; while ((pos = word.find(str[0], pos + 1)) != std::string::npos) { bool match = true; diff --git a/llama/llama-sampling.h b/llama/llama-sampling.h index e6b2d0800..ede8f14f3 100644 --- a/llama/llama-sampling.h +++ b/llama/llama-sampling.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/llama-vocab.cpp b/llama/llama-vocab.cpp index 6d16e2a9f..ab810489a 100644 --- a/llama/llama-vocab.cpp +++ b/llama/llama-vocab.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -764,7 +764,7 @@ struct llm_tokenizer_wpm_session { std::vector words(1, ""); for (const uint32_t cpt : cpts_nfd) { - const auto flags = unicode_cpt_flags(cpt); + const auto flags = unicode_cpt_flags_from_cpt(cpt); if (flags.is_whitespace) { if (words.back().size()) { // finish previous word if any diff --git a/llama/llama-vocab.h b/llama/llama-vocab.h index c9e940a5d..888068dff 100644 --- a/llama/llama-vocab.h +++ b/llama/llama-vocab.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/llama.cpp b/llama/llama.cpp index 938368687..1a861eac2 100644 --- a/llama/llama.cpp +++ b/llama/llama.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -211,6 +211,7 @@ enum llm_arch { LLM_ARCH_OLMOE, LLM_ARCH_OPENELM, LLM_ARCH_ARCTIC, + LLM_ARCH_DEEPSEEK, LLM_ARCH_DEEPSEEK2, LLM_ARCH_CHATGLM, LLM_ARCH_BITNET, @@ -268,6 +269,7 @@ static const std::map LLM_ARCH_NAMES = { { LLM_ARCH_OLMOE, "olmoe" }, { LLM_ARCH_OPENELM, "openelm" }, { LLM_ARCH_ARCTIC, "arctic" }, + { LLM_ARCH_DEEPSEEK, "deepseek" }, { LLM_ARCH_DEEPSEEK2, "deepseek2" }, { LLM_ARCH_CHATGLM, "chatglm" }, { LLM_ARCH_BITNET, "bitnet" }, @@ -1386,6 +1388,33 @@ static const std::map> LLM_TENSOR_N { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }, }, }, + { + LLM_ARCH_DEEPSEEK, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ROPE_FREQS, "rope_freqs" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" }, + { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" }, + { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" }, + { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }, + { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" }, + { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" }, + { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" }, + { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" }, + }, + }, { LLM_ARCH_DEEPSEEK2, { @@ -1678,6 +1707,7 @@ enum llm_chat_template { LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN, LLM_CHAT_TEMPLATE_MISTRAL_V7, LLM_CHAT_TEMPLATE_PHI_3, + LLM_CHAT_TEMPLATE_FALCON_3, LLM_CHAT_TEMPLATE_ZEPHYR, LLM_CHAT_TEMPLATE_MONARCH, LLM_CHAT_TEMPLATE_GEMMA, @@ -1695,6 +1725,7 @@ enum llm_chat_template { LLM_CHAT_TEMPLATE_EXAONE_3, LLM_CHAT_TEMPLATE_RWKV_WORLD, LLM_CHAT_TEMPLATE_GRANITE, + LLM_CHAT_TEMPLATE_GIGACHAT, LLM_CHAT_TEMPLATE_UNKNOWN, }; @@ -1709,6 +1740,7 @@ static const std::map LLM_CHAT_TEMPLATES = { { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN }, { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 }, { "phi3", LLM_CHAT_TEMPLATE_PHI_3 }, + { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 }, { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR }, { "monarch", LLM_CHAT_TEMPLATE_MONARCH }, { "gemma", LLM_CHAT_TEMPLATE_GEMMA }, @@ -1726,6 +1758,7 @@ static const std::map LLM_CHAT_TEMPLATES = { { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 }, { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD }, { "granite", LLM_CHAT_TEMPLATE_GRANITE }, + { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT }, }; static llm_arch llm_arch_from_string(const std::string & name) { @@ -6270,6 +6303,19 @@ static void llm_load_hparams( model.type = e_model::MODEL_UNKNOWN; } } break; + case LLM_ARCH_DEEPSEEK: + { + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead); + ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp); + ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared); + ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale); + + switch (hparams.n_layer) { + case 28: model.type = e_model::MODEL_20B; break; + default: model.type = e_model::MODEL_UNKNOWN; + } + } break; case LLM_ARCH_DEEPSEEK2: { bool is_lite = (hparams.n_layer == 27); @@ -6611,6 +6657,11 @@ static void llm_load_vocab( } else if ( tokenizer_pre == "falcon") { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON; + } else if ( + tokenizer_pre == "falcon3") { + vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3; + vocab.tokenizer_ignore_merges = true; + vocab.tokenizer_add_bos = true; } else if ( tokenizer_pre == "mpt") { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MPT; @@ -6622,6 +6673,7 @@ static void llm_load_vocab( tokenizer_pre == "phi-2" || tokenizer_pre == "jina-es" || tokenizer_pre == "jina-de" || + tokenizer_pre == "gigachat" || tokenizer_pre == "jina-v1-en" || tokenizer_pre == "jina-v2-es" || tokenizer_pre == "jina-v2-de" || @@ -7274,6 +7326,13 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) { LLAMA_LOG_INFO("%s: max token length = %d\n", __func__, vocab.max_token_len); + if (model.arch == LLM_ARCH_DEEPSEEK) { + LLAMA_LOG_INFO("%s: n_layer_dense_lead = %d\n", __func__, hparams.n_layer_dense_lead); + LLAMA_LOG_INFO("%s: n_ff_exp = %d\n", __func__, hparams.n_ff_exp); + LLAMA_LOG_INFO("%s: n_expert_shared = %d\n", __func__, hparams.n_expert_shared); + LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n", __func__, hparams.expert_weights_scale); + } + if (model.arch == LLM_ARCH_DEEPSEEK2) { LLAMA_LOG_INFO("%s: n_layer_dense_lead = %d\n", __func__, hparams.n_layer_dense_lead); LLAMA_LOG_INFO("%s: n_lora_q = %d\n", __func__, hparams.n_lora_q); @@ -9104,6 +9163,55 @@ static bool llm_load_tensors( layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), {n_embd, n_ff, n_expert}, 0); } } break; + case LLM_ARCH_DEEPSEEK: + { + + const int64_t n_ff_exp = hparams.n_ff_exp; + const int64_t n_expert_shared = hparams.n_expert_shared; + + model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + + // output + model.output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); + model.output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, 0); + + for (int i = 0; i < n_layer; ++i) { + auto & layer = model.layers[i]; + + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); + + layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd}, 0); + layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_gqa}, 0); + layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa}, 0); + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0); + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); + + if (i < (int) hparams.n_layer_dense_lead) { + layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0); + layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0); + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0); + } else { + layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert}, 0); + + if (n_expert == 0) { + throw std::runtime_error("n_expert must be > 0"); + } + if (n_expert_used == 0) { + throw std::runtime_error("n_expert_used must be > 0"); + } + + // MoE branch + layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), { n_embd, n_ff_exp, n_expert}, 0); + layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), {n_ff_exp, n_embd, n_expert}, 0); + layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), { n_embd, n_ff_exp, n_expert}, 0); + + // Shared expert branch + layer.ffn_gate_shexp = create_tensor(tn(LLM_TENSOR_FFN_GATE_SHEXP, "weight", i), {n_embd, n_ff_exp * n_expert_shared}, 0); + layer.ffn_down_shexp = create_tensor(tn(LLM_TENSOR_FFN_DOWN_SHEXP, "weight", i), { n_ff_exp * n_expert_shared, n_embd}, 0); + layer.ffn_up_shexp = create_tensor(tn(LLM_TENSOR_FFN_UP_SHEXP, "weight", i), {n_embd, n_ff_exp * n_expert_shared}, 0); + } + } + } break; case LLM_ARCH_DEEPSEEK2: { const bool is_lite = (hparams.n_layer == 27); @@ -15737,6 +15845,161 @@ struct llm_build_context { return gf; } + struct ggml_cgraph * build_deepseek() { + struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false); + + // mutable variable, needed during the last layer of the computation to skip unused tokens + int32_t n_tokens = this->n_tokens; + + const int64_t n_embd_head = hparams.n_embd_head_v; + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); + + struct ggml_tensor * cur; + struct ggml_tensor * inpL; + + inpL = llm_build_inp_embd(ctx0, lctx, hparams, ubatch, model.tok_embd, cb); + + // inp_pos - contains the positions + struct ggml_tensor * inp_pos = build_inp_pos(); + + // KQ_mask (mask for 1 head, it will be broadcasted to all heads) + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); + const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale; + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor * inpSA = inpL; + + // norm + cur = llm_build_norm(ctx0, inpL, hparams, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, cb, il); + cb(cur, "attn_norm", il); + + // self-attention + { + // rope freq factors for llama3; may return nullptr for llama2 and other models + struct ggml_tensor * rope_factors = build_rope_factors(il); + + // compute Q and K and RoPE them + struct ggml_tensor * Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + } + + struct ggml_tensor * Kcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + if (model.layers[il].bk) { + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + cb(Kcur, "Kcur", il); + } + + struct ggml_tensor * Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + if (model.layers[il].bv) { + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + cb(Vcur, "Vcur", il); + } + + Qcur = ggml_rope_ext( + ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, rope_factors, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + cb(Qcur, "Qcur", il); + + Kcur = ggml_rope_ext( + ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, rope_factors, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + cb(Kcur, "Kcur", il); + + cur = llm_build_kv(ctx0, lctx, kv_self, gf, + model.layers[il].wo, model.layers[il].bo, + Kcur, Vcur, Qcur, KQ_mask, n_tokens, kv_head, n_kv, kq_scale, cb, il); + } + + if (il == n_layer - 1) { + // skip computing output for unused tokens + struct ggml_tensor * inp_out_ids = build_inp_out_ids(); + n_tokens = n_outputs; + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); + } + + + struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + cur = llm_build_norm(ctx0, ffn_inp, hparams, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, cb, il); + cb(cur, "ffn_norm", il); + + if ((uint32_t) il < hparams.n_layer_dense_lead) { + cur = llm_build_ffn(ctx0, lctx, cur, + model.layers[il].ffn_up, NULL, NULL, + model.layers[il].ffn_gate, NULL, NULL, + model.layers[il].ffn_down, NULL, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, cb, il); + cb(cur, "ffn_out", il); + } else { + // MoE branch + ggml_tensor * moe_out = + llm_build_moe_ffn(ctx0, lctx, cur, + model.layers[il].ffn_gate_inp, + model.layers[il].ffn_up_exps, + model.layers[il].ffn_gate_exps, + model.layers[il].ffn_down_exps, + n_expert, n_expert_used, + LLM_FFN_SILU, false, + false, hparams.expert_weights_scale, + cb, il); + cb(moe_out, "ffn_moe_out", il); + + // FFN shared expert + { + ggml_tensor * ffn_shexp = llm_build_ffn(ctx0, lctx, cur, + model.layers[il].ffn_up_shexp, NULL, NULL, + model.layers[il].ffn_gate_shexp, NULL, NULL, + model.layers[il].ffn_down_shexp, NULL, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, cb, il); + cb(ffn_shexp, "ffn_shexp", il); + + cur = ggml_add(ctx0, moe_out, ffn_shexp); + cb(cur, "ffn_out", il); + } + } + + cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = llm_build_norm(ctx0, cur, hparams, + model.output_norm, NULL, + LLM_NORM_RMS, cb, -1); + cb(cur, "result_norm", -1); + + // lm_head + cur = llm_build_lora_mm(lctx, ctx0, model.output, cur); + + cb(cur, "result_output", -1); + + ggml_build_forward_expand(gf, cur); + + return gf; + } + struct ggml_cgraph * build_deepseek2() { struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false); @@ -17580,6 +17843,10 @@ static struct ggml_cgraph * llama_build_graph( { result = llm.build_arctic(); } break; + case LLM_ARCH_DEEPSEEK: + { + result = llm.build_deepseek(); + } break; case LLM_ARCH_DEEPSEEK2: { result = llm.build_deepseek2(); @@ -20830,6 +21097,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) { case LLM_ARCH_COMMAND_R: case LLM_ARCH_OLMO: case LLM_ARCH_ARCTIC: + case LLM_ARCH_DEEPSEEK: case LLM_ARCH_DEEPSEEK2: case LLM_ARCH_CHATGLM: case LLM_ARCH_GRANITE: @@ -22659,6 +22927,8 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) { } } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) { return LLM_CHAT_TEMPLATE_PHI_3; + } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) { + return LLM_CHAT_TEMPLATE_FALCON_3; } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) { return LLM_CHAT_TEMPLATE_ZEPHYR; } else if (tmpl_contains("bos_token + message['role']")) { @@ -22703,6 +22973,8 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) { return LLM_CHAT_TEMPLATE_RWKV_WORLD; } else if (tmpl_contains("<|start_of_role|>")) { return LLM_CHAT_TEMPLATE_GRANITE; + } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) { + return LLM_CHAT_TEMPLATE_GIGACHAT; } return LLM_CHAT_TEMPLATE_UNKNOWN; } @@ -22809,6 +23081,15 @@ static int32_t llama_chat_apply_template_internal( if (add_ass) { ss << "<|assistant|>\n"; } + } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) { + // Falcon 3 + for (auto message : chat) { + std::string role(message->role); + ss << "<|" << role << "|>\n" << message->content << "\n"; + } + if (add_ass) { + ss << "<|assistant|>\n"; + } } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) { // zephyr template for (auto message : chat) { @@ -23026,6 +23307,32 @@ static int32_t llama_chat_apply_template_internal( if (add_ass) { ss << "<|start_of_role|>assistant<|end_of_role|>\n"; } + } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) { + // GigaChat template + bool has_system = !chat.empty() && std::string(chat[0]->role) == "system"; + + // Handle system message if present + if (has_system) { + ss << "" << chat[0]->content << "<|message_sep|>"; + } else { + ss << ""; + } + + // Process remaining messages + for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) { + std::string role(chat[i]->role); + if (role == "user") { + ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>" + << "available functions<|role_sep|>[]<|message_sep|>"; + } else if (role == "assistant") { + ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>"; + } + } + + // Add generation prompt if needed + if (add_ass) { + ss << "assistant<|role_sep|>"; + } } else { // template not supported return -1; diff --git a/llama/llama.go b/llama/llama.go index c11d53411..46bed885f 100644 --- a/llama/llama.go +++ b/llama/llama.go @@ -671,7 +671,6 @@ type SamplingParams struct { Mirostat int MirostatTau float32 MirostatEta float32 - PenalizeNl bool Seed uint32 Grammar string } @@ -690,7 +689,6 @@ func NewSamplingContext(model *Model, params SamplingParams) (*SamplingContext, cparams.mirostat = C.int32_t(params.Mirostat) cparams.mirostat_tau = C.float(params.MirostatTau) cparams.mirostat_eta = C.float(params.MirostatEta) - cparams.penalize_nl = C.bool(params.PenalizeNl) cparams.seed = C.uint32_t(params.Seed) grammar := C.CString(params.Grammar) diff --git a/llama/llama.h b/llama/llama.h index a73aea997..e00dfa2ac 100644 --- a/llama/llama.h +++ b/llama/llama.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -1170,16 +1170,12 @@ extern "C" { const char * grammar_str, const char * grammar_root); + /// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first. LLAMA_API struct llama_sampler * llama_sampler_init_penalties( - int32_t n_vocab, // llama_n_vocab() - llama_token special_eos_id, // llama_token_eos() - llama_token linefeed_id, // llama_token_nl() - int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size) - float penalty_repeat, // 1.0 = disabled - float penalty_freq, // 0.0 = disabled - float penalty_present, // 0.0 = disabled - bool penalize_nl, // consider newlines as a repeatable token - bool ignore_eos); // ignore the end-of-sequence token + int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size) + float penalty_repeat, // 1.0 = disabled + float penalty_freq, // 0.0 = disabled + float penalty_present); // 0.0 = disabled /// @details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982 LLAMA_API struct llama_sampler * llama_sampler_init_dry( diff --git a/llama/llava.cpp b/llama/llava.cpp index 8e35e7c61..b7d72c23b 100644 --- a/llama/llava.cpp +++ b/llama/llava.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/llava.h b/llama/llava.h index 8f26901f9..787a2b2b2 100644 --- a/llama/llava.h +++ b/llama/llava.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/log.cpp b/llama/log.cpp index 9815dd68d..7854b4840 100644 --- a/llama/log.cpp +++ b/llama/log.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/log.h b/llama/log.h index 4fc59d608..24e27fd2d 100644 --- a/llama/log.h +++ b/llama/log.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/mmq.cpp b/llama/mmq.cpp index 3e2ce6295..1506c6da5 100644 --- a/llama/mmq.cpp +++ b/llama/mmq.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/mmq.h b/llama/mmq.h index 63773678c..7df3f3267 100644 --- a/llama/mmq.h +++ b/llama/mmq.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/runner/runner.go b/llama/runner/runner.go index 9771420ef..4dd2ea2b4 100644 --- a/llama/runner/runner.go +++ b/llama/runner/runner.go @@ -568,7 +568,6 @@ type Options struct { Mirostat int `json:"mirostat"` MirostatTau float32 `json:"mirostat_tau"` MirostatEta float32 `json:"mirostat_eta"` - PenalizeNewline bool `json:"penalize_nl"` Stop []string `json:"stop"` } @@ -640,7 +639,6 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) { samplingParams.Mirostat = req.Mirostat samplingParams.MirostatTau = req.MirostatTau samplingParams.MirostatEta = req.MirostatEta - samplingParams.PenalizeNl = req.PenalizeNewline samplingParams.Seed = uint32(req.Seed) samplingParams.Grammar = req.Grammar diff --git a/llama/sampling.cpp b/llama/sampling.cpp index 3d0345e02..361c07447 100644 --- a/llama/sampling.cpp +++ b/llama/sampling.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -187,32 +187,20 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co params.logit_bias.size(), params.logit_bias.data())); - llama_sampler_chain_add(result->chain, - llama_sampler_init_penalties( - llama_n_vocab (model), - llama_token_eos(model), - llama_token_nl (model), - params.penalty_last_n, - params.penalty_repeat, - params.penalty_freq, - params.penalty_present, - params.penalize_nl, - params.ignore_eos)); - if (params.mirostat == 0) { for (const auto & cnstr : params.samplers) { switch (cnstr) { - case COMMON_SAMPLER_TYPE_DRY: + case COMMON_SAMPLER_TYPE_DRY: { - std::vector c_breakers; + std::vector c_breakers; c_breakers.reserve(params.dry_sequence_breakers.size()); - for (const auto& str : params.dry_sequence_breakers) { + for (const auto & str : params.dry_sequence_breakers) { c_breakers.push_back(str.c_str()); } llama_sampler_chain_add(result->chain, llama_sampler_init_dry (model, params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); } - break; + break; case COMMON_SAMPLER_TYPE_TOP_K: llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k)); break; @@ -234,6 +222,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co case COMMON_SAMPLER_TYPE_INFILL: llama_sampler_chain_add(result->chain, llama_sampler_init_infill (model)); break; + case COMMON_SAMPLER_TYPE_PENALTIES: + llama_sampler_chain_add(result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present)); + break; default: GGML_ASSERT(false && "unknown sampler type"); } @@ -441,6 +432,7 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) { case COMMON_SAMPLER_TYPE_TEMPERATURE: return 't'; case COMMON_SAMPLER_TYPE_XTC: return 'x'; case COMMON_SAMPLER_TYPE_INFILL: return 'i'; + case COMMON_SAMPLER_TYPE_PENALTIES: return 'e'; default : return '?'; } } @@ -455,6 +447,7 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) { case COMMON_SAMPLER_TYPE_TEMPERATURE: return "temperature"; case COMMON_SAMPLER_TYPE_XTC: return "xtc"; case COMMON_SAMPLER_TYPE_INFILL: return "infill"; + case COMMON_SAMPLER_TYPE_PENALTIES: return "penalties"; default : return ""; } } @@ -469,6 +462,7 @@ std::vector common_sampler_types_from_names(const std::vect { "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE }, { "xtc", COMMON_SAMPLER_TYPE_XTC }, { "infill", COMMON_SAMPLER_TYPE_INFILL }, + { "penalties", COMMON_SAMPLER_TYPE_PENALTIES }, }; // since samplers names are written multiple ways @@ -515,6 +509,7 @@ std::vector common_sampler_types_from_chars(const std::stri { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TEMPERATURE), COMMON_SAMPLER_TYPE_TEMPERATURE }, { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC), COMMON_SAMPLER_TYPE_XTC }, { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_INFILL), COMMON_SAMPLER_TYPE_INFILL }, + { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_PENALTIES), COMMON_SAMPLER_TYPE_PENALTIES }, }; std::vector samplers; diff --git a/llama/sampling.h b/llama/sampling.h index 01c955e88..a7693f2ce 100644 --- a/llama/sampling.h +++ b/llama/sampling.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/sampling_ext.cpp b/llama/sampling_ext.cpp index 030864a13..0f137dc8d 100644 --- a/llama/sampling_ext.cpp +++ b/llama/sampling_ext.cpp @@ -18,7 +18,6 @@ struct common_sampler *common_sampler_cinit(const struct llama_model *model, str sparams.mirostat = params->mirostat; sparams.mirostat_tau = params->mirostat_tau; sparams.mirostat_eta = params->mirostat_eta; - sparams.penalize_nl = params->penalize_nl; sparams.seed = params->seed; sparams.grammar = params->grammar; sparams.xtc_probability = 0.0; diff --git a/llama/sampling_ext.h b/llama/sampling_ext.h index 1bd355f8f..39f499f19 100644 --- a/llama/sampling_ext.h +++ b/llama/sampling_ext.h @@ -23,7 +23,6 @@ extern "C" int32_t mirostat; float mirostat_tau; float mirostat_eta; - bool penalize_nl; uint32_t seed; char *grammar; }; diff --git a/llama/unicode-data.cpp b/llama/unicode-data.cpp index b22fad9b1..e903ea926 100644 --- a/llama/unicode-data.cpp +++ b/llama/unicode-data.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/unicode-data.h b/llama/unicode-data.h index f61b4744d..eb5743940 100644 --- a/llama/unicode-data.h +++ b/llama/unicode-data.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * diff --git a/llama/unicode.cpp b/llama/unicode.cpp index 4bfa4cdcc..a4033d4f6 100644 --- a/llama/unicode.cpp +++ b/llama/unicode.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -102,15 +102,15 @@ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) { throw std::invalid_argument("failed to convert utf8 to codepoint"); } -//static std::vector unicode_cpt_to_utf16(uint32_t cp) { +//static std::vector unicode_cpt_to_utf16(uint32_t cpt) { // std::vector result; -// if (/* 0x0000 <= cp && */ cp <= 0xffff) { -// result.emplace_back(cp); +// if (/* 0x0000 <= cpt && */ cpt <= 0xffff) { +// result.emplace_back(cpt); // return result; // } -// if (0x10000 <= cp && cp <= 0x10ffff) { -// result.emplace_back(0xd800 | ((cp - 0x10000) >> 10)); -// result.emplace_back(0xdc00 | ((cp - 0x10000) & 0x03ff)); +// if (0x10000 <= cpt && cpt <= 0x10ffff) { +// result.emplace_back(0xd800 | ((cpt - 0x10000) >> 10)); +// result.emplace_back(0xdc00 | ((cpt - 0x10000) & 0x03ff)); // return result; // } // throw std::invalid_argument("failed to convert codepoint to utf16"); @@ -151,8 +151,8 @@ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) { // return result; //} -static std::vector unicode_cpt_flags_array() { - std::vector cpt_flags(MAX_CODEPOINTS, codepoint_flags::UNDEFINED); +static std::vector unicode_cpt_flags_array() { + std::vector cpt_flags(MAX_CODEPOINTS, unicode_cpt_flags::UNDEFINED); assert (unicode_ranges_flags.begin()[0].first == 0); assert (unicode_ranges_flags.begin()[unicode_ranges_flags.size()-1].first == MAX_CODEPOINTS); @@ -301,8 +301,8 @@ static std::vector unicode_regex_split_custom_gpt2(const std::string & t return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE; }; - auto _get_flags = [&] (const size_t pos) -> codepoint_flags { - return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : codepoint_flags{}; + auto _get_flags = [&] (const size_t pos) -> unicode_cpt_flags { + return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags_from_cpt(cpts[pos]) : unicode_cpt_flags{}; }; size_t _prev_end = offset_ini; @@ -419,8 +419,8 @@ static std::vector unicode_regex_split_custom_llama3(const std::string & return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE; }; - auto _get_flags = [&] (const size_t pos) -> codepoint_flags { - return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : codepoint_flags{}; + auto _get_flags = [&] (const size_t pos) -> unicode_cpt_flags { + return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags_from_cpt(cpts[pos]) : unicode_cpt_flags{}; }; size_t _prev_end = offset_ini; @@ -620,29 +620,29 @@ static std::vector unicode_regex_split_custom(const std::string & text, // interface // -std::string unicode_cpt_to_utf8(uint32_t cp) { +std::string unicode_cpt_to_utf8(uint32_t cpt) { std::string result; - if (/* 0x00 <= cp && */ cp <= 0x7f) { - result.push_back(cp); + if (/* 0x00 <= cpt && */ cpt <= 0x7f) { + result.push_back(cpt); return result; } - if (0x80 <= cp && cp <= 0x7ff) { - result.push_back(0xc0 | ((cp >> 6) & 0x1f)); - result.push_back(0x80 | (cp & 0x3f)); + if (0x80 <= cpt && cpt <= 0x7ff) { + result.push_back(0xc0 | ((cpt >> 6) & 0x1f)); + result.push_back(0x80 | (cpt & 0x3f)); return result; } - if (0x800 <= cp && cp <= 0xffff) { - result.push_back(0xe0 | ((cp >> 12) & 0x0f)); - result.push_back(0x80 | ((cp >> 6) & 0x3f)); - result.push_back(0x80 | (cp & 0x3f)); + if (0x800 <= cpt && cpt <= 0xffff) { + result.push_back(0xe0 | ((cpt >> 12) & 0x0f)); + result.push_back(0x80 | ((cpt >> 6) & 0x3f)); + result.push_back(0x80 | (cpt & 0x3f)); return result; } - if (0x10000 <= cp && cp <= 0x10ffff) { - result.push_back(0xf0 | ((cp >> 18) & 0x07)); - result.push_back(0x80 | ((cp >> 12) & 0x3f)); - result.push_back(0x80 | ((cp >> 6) & 0x3f)); - result.push_back(0x80 | (cp & 0x3f)); + if (0x10000 <= cpt && cpt <= 0x10ffff) { + result.push_back(0xf0 | ((cpt >> 18) & 0x07)); + result.push_back(0x80 | ((cpt >> 12) & 0x3f)); + result.push_back(0x80 | ((cpt >> 6) & 0x3f)); + result.push_back(0x80 | (cpt & 0x3f)); return result; } @@ -672,19 +672,19 @@ std::vector unicode_cpts_from_utf8(const std::string & utf8) { return result; } -codepoint_flags unicode_cpt_flags(const uint32_t cp) { - static const codepoint_flags undef(codepoint_flags::UNDEFINED); +unicode_cpt_flags unicode_cpt_flags_from_cpt(const uint32_t cpt) { + static const unicode_cpt_flags undef(unicode_cpt_flags::UNDEFINED); static const auto cpt_flags = unicode_cpt_flags_array(); - return cp < cpt_flags.size() ? cpt_flags[cp] : undef; + return cpt < cpt_flags.size() ? cpt_flags[cpt] : undef; } -codepoint_flags unicode_cpt_flags(const std::string & utf8) { - static const codepoint_flags undef(codepoint_flags::UNDEFINED); +unicode_cpt_flags unicode_cpt_flags_from_utf8(const std::string & utf8) { + static const unicode_cpt_flags undef(unicode_cpt_flags::UNDEFINED); if (utf8.empty()) { return undef; // undefined } size_t offset = 0; - return unicode_cpt_flags(unicode_cpt_from_utf8(utf8, offset)); + return unicode_cpt_flags_from_cpt(unicode_cpt_from_utf8(utf8, offset)); } std::string unicode_byte_to_utf8(uint8_t byte) { @@ -697,41 +697,41 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) { return map.at(utf8); } -uint32_t unicode_tolower(uint32_t cp) { +uint32_t unicode_tolower(uint32_t cpt) { // binary search - auto it = std::lower_bound(unicode_map_lowercase.begin(), unicode_map_lowercase.end(), cp, + auto it = std::lower_bound(unicode_map_lowercase.begin(), unicode_map_lowercase.end(), cpt, [](const std::pair & pair, uint32_t value) { return pair.first < value; }); - if (it != unicode_map_lowercase.end() && it->first == cp) { + if (it != unicode_map_lowercase.end() && it->first == cpt) { return it->second; } - return cp; // Return the original code point if no lowercase mapping is found + return cpt; // Return the original code point if no lowercase mapping is found } std::vector unicode_regex_split(const std::string & text, const std::vector & regex_exprs) { // unicode categories static const std::map k_ucat_enum = { - { "\\p{N}", codepoint_flags::NUMBER }, - { "\\p{L}", codepoint_flags::LETTER }, - { "\\p{P}", codepoint_flags::PUNCTUATION }, + { "\\p{N}", unicode_cpt_flags::NUMBER }, + { "\\p{L}", unicode_cpt_flags::LETTER }, + { "\\p{P}", unicode_cpt_flags::PUNCTUATION }, }; static const std::map k_ucat_cpt = { - { codepoint_flags::NUMBER, 0xD1 }, - { codepoint_flags::LETTER, 0xD2 }, - { codepoint_flags::PUNCTUATION, 0xD3 }, + { unicode_cpt_flags::NUMBER, 0xD1 }, + { unicode_cpt_flags::LETTER, 0xD2 }, + { unicode_cpt_flags::PUNCTUATION, 0xD3 }, }; static const std::map k_ucat_map = { - { codepoint_flags::NUMBER, "\x30-\x39" }, // 0-9 - { codepoint_flags::LETTER, "\x41-\x5A\x61-\x7A" }, // A-Za-z - { codepoint_flags::PUNCTUATION, "\x21-\x23\x25-\x2A\x2C-\x2F\x3A-\x3B\x3F-\x40\\\x5B-\\\x5D\x5F\\\x7B\\\x7D" }, // !-#%-*,-/:-;?-@\[-\]_\{\} + { unicode_cpt_flags::NUMBER, "\x30-\x39" }, // 0-9 + { unicode_cpt_flags::LETTER, "\x41-\x5A\x61-\x7A" }, // A-Za-z + { unicode_cpt_flags::PUNCTUATION, "\x21-\x23\x25-\x2A\x2C-\x2F\x3A-\x3B\x3F-\x40\\\x5B-\\\x5D\x5F\\\x7B\\\x7D" }, // !-#%-*,-/:-;?-@\[-\]_\{\} }; // compute collapsed codepoints only if needed by at least one regex bool need_collapse = false; - for (auto & regex_expr : regex_exprs) { + for (const auto & regex_expr : regex_exprs) { // search for unicode categories for (const auto & ucat : k_ucat_enum) { if (std::string::npos != regex_expr.find(ucat.first)) { @@ -757,7 +757,7 @@ std::vector unicode_regex_split(const std::string & text, const std continue; } - const auto flags = unicode_cpt_flags(cpts[i]); + const auto flags = unicode_cpt_flags_from_cpt(cpts[i]); if (flags.is_whitespace) { //NOTE: C++ std::regex \s does not mach 0x85, Rust and Python regex does. @@ -773,7 +773,7 @@ std::vector unicode_regex_split(const std::string & text, const std std::vector bpe_offsets = { cpts.size() }; - for (auto & regex_expr : regex_exprs) { + for (const auto & regex_expr : regex_exprs) { // first, see if we have an efficient custom regex implementation auto tmp = unicode_regex_split_custom(text, regex_expr, bpe_offsets); @@ -787,7 +787,7 @@ std::vector unicode_regex_split(const std::string & text, const std // if a unicode category is used in the regex, we use the collapsed text and replace the unicode category // with the corresponding collapsed representation bool use_collapsed = false; - for (auto & ucat : k_ucat_enum) { + for (const auto & ucat : k_ucat_enum) { if (std::string::npos != regex_expr.find(ucat.first)) { use_collapsed = true; break; @@ -853,7 +853,7 @@ std::vector unicode_regex_split(const std::string & text, const std // std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as fallback std::wstring wtext(cpts.begin(), cpts.end()); for (size_t i = 0; i < wtext.size(); ++i) { - if (wtext[i] > 0x7F && unicode_cpt_flags(wtext[i]).is_whitespace) { + if (wtext[i] > 0x7F && unicode_cpt_flags_from_cpt(wtext[i]).is_whitespace) { wtext[i] = 0x0B; } } diff --git a/llama/unicode.h b/llama/unicode.h index eca7da920..c28ba9cba 100644 --- a/llama/unicode.h +++ b/llama/unicode.h @@ -1,5 +1,5 @@ /** - * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file + * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file * * MIT License * @@ -30,9 +30,7 @@ #include #include -// TODO: prefix all symbols with "llama_" - -struct codepoint_flags { +struct unicode_cpt_flags { enum { UNDEFINED = 0x0001, NUMBER = 0x0002, // regex: \p{N} @@ -61,7 +59,7 @@ struct codepoint_flags { uint16_t is_nfd : 1; // decode from uint16 - inline codepoint_flags(const uint16_t flags=0) { + inline unicode_cpt_flags(const uint16_t flags = 0) { *reinterpret_cast(this) = flags; } @@ -76,18 +74,19 @@ struct codepoint_flags { size_t unicode_len_utf8(char src); -std::string unicode_cpt_to_utf8(uint32_t cp); -uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset); +std::string unicode_cpt_to_utf8 (uint32_t cpt); +uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset); + std::vector unicode_cpts_from_utf8(const std::string & utf8); std::vector unicode_cpts_normalize_nfd(const std::vector & cpts); -codepoint_flags unicode_cpt_flags(const uint32_t cp); -codepoint_flags unicode_cpt_flags(const std::string & utf8); +unicode_cpt_flags unicode_cpt_flags_from_cpt (uint32_t cpt); +unicode_cpt_flags unicode_cpt_flags_from_utf8(const std::string & utf8); std::string unicode_byte_to_utf8(uint8_t byte); -uint8_t unicode_utf8_to_byte(const std::string & utf8); +uint8_t unicode_utf8_to_byte(const std::string & utf8); -uint32_t unicode_tolower(uint32_t cp); +uint32_t unicode_tolower(uint32_t cpt); std::vector unicode_regex_split(const std::string & text, const std::vector & regex_exprs); diff --git a/llama/vendoring b/llama/vendoring index f36fbcc6f..cb1ac85ea 100644 --- a/llama/vendoring +++ b/llama/vendoring @@ -1 +1 @@ -LLAMACPP_BASE_COMMIT=ba1cb19cdd0d92e012e0f6e009e0620f854b6afd +LLAMACPP_BASE_COMMIT=081b29bd2a3d91e7772e3910ce223dd63b8d7d26 diff --git a/llm/server.go b/llm/server.go index bb9062adc..89e5f54a6 100644 --- a/llm/server.go +++ b/llm/server.go @@ -692,7 +692,6 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu "mirostat": req.Options.Mirostat, "mirostat_tau": req.Options.MirostatTau, "mirostat_eta": req.Options.MirostatEta, - "penalize_nl": req.Options.PenalizeNewline, "seed": req.Options.Seed, "stop": req.Options.Stop, "image_data": req.Images, diff --git a/parser/parser_test.go b/parser/parser_test.go index b5614c2ed..698de4368 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -494,7 +494,6 @@ func TestParseFileParameters(t *testing.T) { "mirostat 1": {"mirostat", "1"}, "mirostat_tau 1.0": {"mirostat_tau", "1.0"}, "mirostat_eta 1.0": {"mirostat_eta", "1.0"}, - "penalize_newline true": {"penalize_newline", "true"}, "stop ### User:": {"stop", "### User:"}, "stop ### User: ": {"stop", "### User:"}, "stop \"### User:\"": {"stop", "### User:"}, diff --git a/server/images.go b/server/images.go index 4006584fa..5b3504852 100644 --- a/server/images.go +++ b/server/images.go @@ -355,6 +355,8 @@ func realpath(rel, from string) string { return abspath } +var deprecatedParameters = []string{"penalize_newline"} + func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantization string, modelfile *parser.File, fn func(resp api.ProgressResponse)) (err error) { config := ConfigV2{ OS: "linux", @@ -526,6 +528,11 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio messages = append(messages, &api.Message{Role: role, Content: content}) default: + if slices.Contains(deprecatedParameters, c.Name) { + fn(api.ProgressResponse{Status: fmt.Sprintf("warning: parameter %s is deprecated", c.Name)}) + break + } + ps, err := api.FormatParams(map[string][]string{c.Name: {c.Args}}) if err != nil { return err