llama: use dynamic backend loading for mllama and clip (#8835)
This commit is contained in:
102
llama/patches/0013-use-dynamic-backend-loading-for-clip.patch
Normal file
102
llama/patches/0013-use-dynamic-backend-loading-for-clip.patch
Normal file
@@ -0,0 +1,102 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: jmorganca <jmorganca@gmail.com>
|
||||
Date: Sat, 4 Jan 2025 22:52:48 -0800
|
||||
Subject: [PATCH] use dynamic backend loading for clip
|
||||
|
||||
---
|
||||
examples/llava/clip.cpp | 74 +++++++++++++++--------------------------
|
||||
1 file changed, 27 insertions(+), 47 deletions(-)
|
||||
|
||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||
index b3c1829f..86b91d5c 100644
|
||||
--- a/examples/llava/clip.cpp
|
||||
+++ b/examples/llava/clip.cpp
|
||||
@@ -8,25 +8,25 @@
|
||||
#include "ggml-alloc.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
-//#ifdef GGML_USE_CUDA
|
||||
-//#include "ggml-cuda.h"
|
||||
-//#endif
|
||||
-//
|
||||
-//#ifdef GGML_USE_SYCL
|
||||
-//#include "ggml-sycl.h"
|
||||
-//#endif
|
||||
-//
|
||||
-//#ifdef GGML_USE_METAL
|
||||
-//#include "ggml-metal.h"
|
||||
-//#endif
|
||||
-//
|
||||
-//#ifdef GGML_USE_CANN
|
||||
-//#include "ggml-cann.h"
|
||||
-//#endif
|
||||
-//
|
||||
-//#ifdef GGML_USE_VULKAN
|
||||
-//#include "ggml-vulkan.h"
|
||||
-//#endif
|
||||
+#ifdef GGML_USE_CUDA
|
||||
+#include "ggml-cuda.h"
|
||||
+#endif
|
||||
+
|
||||
+#ifdef GGML_USE_SYCL
|
||||
+#include "ggml-sycl.h"
|
||||
+#endif
|
||||
+
|
||||
+#ifdef GGML_USE_METAL
|
||||
+#include "ggml-metal.h"
|
||||
+#endif
|
||||
+
|
||||
+#ifdef GGML_USE_CANN
|
||||
+#include "ggml-cann.h"
|
||||
+#endif
|
||||
+
|
||||
+#ifdef GGML_USE_VULKAN
|
||||
+#include "ggml-vulkan.h"
|
||||
+#endif
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#include "stb_image.h"
|
||||
@@ -1235,35 +1235,15 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
}
|
||||
}
|
||||
|
||||
-//#ifdef GGML_USE_CUDA
|
||||
-// new_clip->backend = ggml_backend_cuda_init(0);
|
||||
-// LOG_INF("%s: CLIP using CUDA backend\n", __func__);
|
||||
-//#endif
|
||||
-//
|
||||
-//#ifdef GGML_USE_METAL
|
||||
-// new_clip->backend = ggml_backend_metal_init();
|
||||
-// LOG_INF("%s: CLIP using Metal backend\n", __func__);
|
||||
-//#endif
|
||||
-//
|
||||
-//#ifdef GGML_USE_CANN
|
||||
-// new_clip->backend = ggml_backend_cann_init(0);
|
||||
-// LOG_INF("%s: CLIP using CANN backend\n", __func__);
|
||||
-//#endif
|
||||
-//
|
||||
-//#ifdef GGML_USE_VULKAN
|
||||
-// new_clip->backend = ggml_backend_vk_init(0);
|
||||
-// LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
|
||||
-//#endif
|
||||
-//
|
||||
-//#ifdef GGML_USE_SYCL
|
||||
-// new_clip->backend = ggml_backend_sycl_init(0);
|
||||
-// LOG_INF("%s: CLIP using SYCL backend\n", __func__);
|
||||
-//#endif
|
||||
-
|
||||
- if (!new_clip->backend) {
|
||||
- new_clip->backend = ggml_backend_cpu_init();
|
||||
- LOG_INF("%s: CLIP using CPU backend\n", __func__);
|
||||
+ ggml_backend_t backend = ggml_backend_init_best();
|
||||
+ if (backend == nullptr) {
|
||||
+ LOG_ERR("%s: failed to initialize backend\n", __func__);
|
||||
+ clip_free(new_clip);
|
||||
+ gguf_free(ctx);
|
||||
+ return nullptr;
|
||||
}
|
||||
+ LOG_INF("%s: using %s backend\n", __func__, ggml_backend_name(backend));
|
||||
+ new_clip->backend = backend;
|
||||
|
||||
// model size and capabilities
|
||||
{
|
||||
Reference in New Issue
Block a user