ml/backend/ggml: fix crash on dlopen for non-AVX systems (#8976)

2025-02-10 09:52:12 -08:00 · 2025-02-10 09:52:12 -08:00 · f4711da7bd
commit f4711da7bd
parent 38117fba83
2 changed files with 64 additions and 8 deletions
--- a/llama/patches/0016-remove-sgemm-global-variables.patch
+++ b/llama/patches/0016-remove-sgemm-global-variables.patch
@ -0,0 +1,55 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: jmorganca <jmorganca@gmail.com>
 Date: Sun, 9 Feb 2025 17:22:15 -0800
 Subject: [PATCH] remove sgemm global variables
 removes the 'iq4nlt' global variable in sgemm.cpp that causes
 a runtime crash when calling dlopen on ggml-cpu libraries as
 its initialization depends on AVX instructions the host machine
 may not have
 ---
 ggml/src/ggml-cpu/llamafile/sgemm.cpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)
 diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
 index 8fce576c..3f260ce5 100644
 --- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp
 +++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
@@ -279,14 +279,6 @@ template <> inline __m256bh load(const float *p) {
 }
 #endif
 -////////////////////////////////////////////////////////////////////////////////////////////////////
 -// CONSTANTS
 -
 -#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
 -static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
 -static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
 -#endif
 -
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // FLOATING POINT MATRIX MULTIPLICATION
@@ -613,6 +605,14 @@ class tinyBLAS_Q0_AVX {
                     TC *C, int64_t ldc,
                     int ith, int nth)
         : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
 +        const int8_t kvalues_iq4nl[16] = {
 +            -127, -104, -83, -65,
 +            -49,  -35,  -22, -10,
 +              1,   13,   25,  38,
 +             53,   69,   89, 113
 +        };
 +
 +        iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl);
     }
     void matmul(int64_t m, int64_t n) {
@@ -1037,6 +1037,7 @@ class tinyBLAS_Q0_AVX {
     const int64_t ldc;
     const int ith;
     const int nth;
 +    __m128i iq4nlt;
 };
 #endif // __AVX__
--- a/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp
@ -279,14 +279,6 @@ template <> inline __m256bh load(const float *p) {
 }
 #endif
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // CONSTANTS
 #if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
 static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
 static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
 #endif
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // FLOATING POINT MATRIX MULTIPLICATION
@ -613,6 +605,14 @@ class tinyBLAS_Q0_AVX {
                    TC *C, int64_t ldc,
                    int ith, int nth)
        : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
        const int8_t kvalues_iq4nl[16] = {
            -127, -104, -83, -65,
            -49,  -35,  -22, -10,
              1,   13,   25,  38,
             53,   69,   89, 113
        };
        iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl);
    }
    void matmul(int64_t m, int64_t n) {
@ -1037,6 +1037,7 @@ class tinyBLAS_Q0_AVX {
    const int64_t ldc;
    const int ith;
    const int nth;
    __m128i iq4nlt;
 };
 #endif // __AVX__