From 60be9e28409d1111185dd663f814102bf9e7edfa Mon Sep 17 00:00:00 2001 From: Josh Yan Date: Wed, 10 Jul 2024 13:46:38 -0700 Subject: [PATCH] patch --- llm/patches/10-quantize-callback.diff | 57 ++++++++++++++------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/llm/patches/10-quantize-callback.diff b/llm/patches/10-quantize-callback.diff index d3be19785..4d8c7dcfe 100644 --- a/llm/patches/10-quantize-callback.diff +++ b/llm/patches/10-quantize-callback.diff @@ -1,18 +1,32 @@ -From fa509abf281177eacdc71a2a14432c4e6ed74a47 Mon Sep 17 00:00:00 2001 +From ed941590d59fc07b1ad21d6aa458588e47d1e446 Mon Sep 17 00:00:00 2001 From: Josh Yan -Date: Wed, 10 Jul 2024 12:58:31 -0700 -Subject: [PATCH] quantize callback +Date: Wed, 10 Jul 2024 13:39:39 -0700 +Subject: [PATCH] quantize progress --- - llama.cpp | 8 ++++++++ - llama.h | 3 +++ + include/llama.h | 3 +++ + src/llama.cpp | 8 ++++++++ 2 files changed, 11 insertions(+) -diff --git a/llama.cpp b/llama.cpp -index 61948751..d3126510 100644 ---- a/llama.cpp -+++ b/llama.cpp -@@ -15586,6 +15586,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s +diff --git a/include/llama.h b/include/llama.h +index bb4b05ba..613db68e 100644 +--- a/include/llama.h ++++ b/include/llama.h +@@ -349,6 +349,9 @@ extern "C" { + bool keep_split; // quantize to the same number of shards + void * imatrix; // pointer to importance matrix data + void * kv_overrides; // pointer to vector containing overrides ++ ++ llama_progress_callback quantize_callback; // callback to report quantization progress ++ void * quantize_callback_data; // user data for the callback + } llama_model_quantize_params; + + // grammar types +diff --git a/src/llama.cpp b/src/llama.cpp +index 2b9ace28..ac640c02 100644 +--- a/src/llama.cpp ++++ b/src/llama.cpp +@@ -18252,6 +18252,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s const auto tn = LLM_TN(model.arch); new_ofstream(0); for (int i = 0; i < ml.n_tensors; ++i) { @@ -25,28 +39,15 @@ index 61948751..d3126510 100644 auto weight = ml.get_weight(i); struct ggml_tensor * tensor = weight->tensor; if (weight->idx != cur_split && params->keep_split) { -@@ -16119,6 +16125,8 @@ struct llama_model_quantize_params llama_model_quantize_default_params() { +@@ -18789,6 +18795,8 @@ struct llama_model_quantize_params llama_model_quantize_default_params() { /*.keep_split =*/ false, /*.imatrix =*/ nullptr, /*.kv_overrides =*/ nullptr, + /*.quantize_callback =*/ nullptr, -+ /*.quantize_callback_data =*/ nullptr, +++ /*.quantize_callback_data =*/ nullptr, }; - + return result; -diff --git a/llama.h b/llama.h -index da310ffa..3cbe6023 100644 ---- a/llama.h -+++ b/llama.h -@@ -337,6 +337,9 @@ extern "C" { - bool keep_split; // quantize to the same number of shards - void * imatrix; // pointer to importance matrix data - void * kv_overrides; // pointer to vector containing overrides -+ -+ llama_progress_callback quantize_callback; // callback to report quantization progress -+ void * quantize_callback_data; // user data for the callback - } llama_model_quantize_params; - - // grammar types -- -2.39.3 (Apple Git-146) \ No newline at end of file +2.39.3 (Apple Git-146) +