relay load model errors to the client (#3065)

2024-03-11 16:48:27 -04:00
parent 6d3adfbea2
commit b80661e8c7
3 changed files with 51 additions and 11 deletions
--- a/llm/patches/03-load_exception.diff
+++ b/llm/patches/03-load_exception.diff
@@ -0,0 +1,44 @@
+diff --git a/llama.cpp b/llama.cpp
+index 4225f955..7b762f86 100644
+--- a/llama.cpp
+++ b/llama.cpp
+@@ -4756,7 +4756,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
+         }
+     } catch (const std::exception & err) {
+         LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
+-        return -1;
+        throw;
+     }
+ 
+     return 0;
+@@ -12102,16 +12102,22 @@ struct llama_model * llama_load_model_from_file(
+         };
+     }
+ 
+-    int status = llama_model_load(path_model, *model, params);
+-    GGML_ASSERT(status <= 0);
+-    if (status < 0) {
+-        if (status == -1) {
+-            LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
+-        } else if (status == -2) {
+-            LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
+    try {
+        int status = llama_model_load(path_model, *model, params);
+        GGML_ASSERT(status <= 0);
+        if (status < 0) {
+            if (status == -1) {
+                LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
+            } else if (status == -2) {
+                LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
+            }
+            delete model;
+            return nullptr;
+         }
+    } catch (...) {
+        LLAMA_LOG_ERROR("%s: exception loading model\n", __func__);
+         delete model;
+-        return nullptr;
+        throw;
+     }
+ 
+     return model;