From 4b903f088aa8d14404e5650d42db8c15530803d5 Mon Sep 17 00:00:00 2001
From: Jeffrey Morgan <jmorganca@gmail.com>
Date: Tue, 13 May 2025 13:11:11 -0700
Subject: [PATCH] llama: fix crash on snowflake embedding model (#10690)

---
 llama/llama.cpp/src/llama-vocab.cpp                |  2 --
 llama/patches/0014-fix-string-arr-kv-loading.patch | 14 ++++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/llama/llama.cpp/src/llama-vocab.cpp b/llama/llama.cpp/src/llama-vocab.cpp
index b098bb25c..9f5fd57b8 100644
--- a/llama/llama.cpp/src/llama-vocab.cpp
+++ b/llama/llama.cpp/src/llama-vocab.cpp
@@ -1469,8 +1469,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
             const int precompiled_charsmap_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP).c_str());
             if (precompiled_charsmap_keyidx != -1) {
                 const gguf_type pc_type = gguf_get_arr_type(ctx, precompiled_charsmap_keyidx);
-                GGML_ASSERT(pc_type == GGUF_TYPE_INT8 || pc_type == GGUF_TYPE_UINT8);
-
                 const size_t n_precompiled_charsmap = gguf_get_arr_data_n(ctx, precompiled_charsmap_keyidx);
                 const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
                 precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap);
diff --git a/llama/patches/0014-fix-string-arr-kv-loading.patch b/llama/patches/0014-fix-string-arr-kv-loading.patch
index 07cb397bf..f879c50ee 100644
--- a/llama/patches/0014-fix-string-arr-kv-loading.patch
+++ b/llama/patches/0014-fix-string-arr-kv-loading.patch
@@ -9,8 +9,8 @@ such as vocab fields
 ---
  ggml/include/gguf.h | 1 +
  ggml/src/gguf.cpp   | 7 +++++--
- src/llama-vocab.cpp | 2 +-
- 3 files changed, 7 insertions(+), 3 deletions(-)
+ src/llama-vocab.cpp | 4 +---
+ 3 files changed, 7 insertions(+), 5 deletions(-)
 
 diff --git a/ggml/include/gguf.h b/ggml/include/gguf.h
 index 79ee2020..3efb22f0 100644
@@ -53,13 +53,15 @@ index 381a9c7d..e45b453d 100644
  }
  
 diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
-index 10f34d33..b098bb25 100644
+index 10f34d33..9f5fd57b 100644
 --- a/src/llama-vocab.cpp
 +++ b/src/llama-vocab.cpp
-@@ -1471,7 +1471,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
+@@ -1469,9 +1469,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
+             const int precompiled_charsmap_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP).c_str());
+             if (precompiled_charsmap_keyidx != -1) {
                  const gguf_type pc_type = gguf_get_arr_type(ctx, precompiled_charsmap_keyidx);
-                 GGML_ASSERT(pc_type == GGUF_TYPE_INT8 || pc_type == GGUF_TYPE_UINT8);
- 
+-                GGML_ASSERT(pc_type == GGUF_TYPE_INT8 || pc_type == GGUF_TYPE_UINT8);
+-
 -                const size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
 +                const size_t n_precompiled_charsmap = gguf_get_arr_data_n(ctx, precompiled_charsmap_keyidx);
                  const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);