From 9d6df9080502adcb6f25950e3d829ab05ec8cfc8 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Mon, 12 May 2025 15:23:31 -0700 Subject: [PATCH] Follow up to #10363 (#10647) The quantization PR didn't block all unsupported file types, which this PR fixes. It also updates the API docs to reflect the now reduced set of supported types. --- docs/api.md | 67 ++++++----- fs/ggml/type.go | 125 +++++++++----------- server/quantization.go | 55 +-------- server/quantization_test.go | 223 ------------------------------------ 4 files changed, 88 insertions(+), 382 deletions(-) diff --git a/docs/api.md b/docs/api.md index b2b11573d..abd276150 100644 --- a/docs/api.md +++ b/docs/api.md @@ -19,7 +19,7 @@ ### Model names -Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version. +Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q8_0` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version. ### Durations @@ -952,19 +952,8 @@ If you are creating a model from a safetensors directory or from a GGUF file, yo | Type | Recommended | | --- | :-: | -| q2_K | | -| q3_K_L | | -| q3_K_M | | -| q3_K_S | | -| q4_0 | | -| q4_1 | | | q4_K_M | * | | q4_K_S | | -| q5_0 | | -| q5_1 | | -| q5_K_M | | -| q5_K_S | | -| q6_K | | | q8_0 | * | ### Examples @@ -1009,8 +998,8 @@ Quantize a non-quantized model. ```shell curl http://localhost:11434/api/create -d '{ - "model": "llama3.1:quantized", - "from": "llama3.1:8b-instruct-fp16", + "model": "llama3.2:quantized", + "from": "llama3.2:3b-instruct-fp16", "quantize": "q4_K_M" }' ``` @@ -1020,12 +1009,14 @@ curl http://localhost:11434/api/create -d '{ A stream of JSON objects is returned: ```json -{"status":"quantizing F16 model to Q4_K_M"} -{"status":"creating new layer sha256:667b0c1932bc6ffc593ed1d03f895bf2dc8dc6df21db3042284a6f4416b06a29"} -{"status":"using existing layer sha256:11ce4ee3e170f6adebac9a991c22e22ab3f8530e154ee669954c4bc73061c258"} -{"status":"using existing layer sha256:0ba8f0e314b4264dfd19df045cde9d4c394a52474bf92ed6a3de22a4ca31a177"} +{"status":"quantizing F16 model to Q4_K_M","digest":"0","total":6433687776,"completed":12302} +{"status":"quantizing F16 model to Q4_K_M","digest":"0","total":6433687776,"completed":6433687552} +{"status":"verifying conversion"} +{"status":"creating new layer sha256:fb7f4f211b89c6c4928ff4ddb73db9f9c0cfca3e000c3e40d6cf27ddc6ca72eb"} +{"status":"using existing layer sha256:966de95ca8a62200913e3f8bfbf84c8494536f1b94b49166851e76644e966396"} +{"status":"using existing layer sha256:fcc5a6bec9daf9b561a68827b67ab6088e1dba9d1fa2a50d7bbcc8384e0a265d"} +{"status":"using existing layer sha256:a70ff7e570d97baaf4e62ac6e6ad9975e04caa6d900d3742d37698494479e0cd"} {"status":"using existing layer sha256:56bb8bd477a519ffa694fc449c2413c6f0e1d3b1c88fa7e3c9d88d3ae49d4dcb"} -{"status":"creating new layer sha256:455f34728c9b5dd3376378bfb809ee166c145b0b4c1f1a6feca069055066ef9a"} {"status":"writing manifest"} {"status":"success"} ``` @@ -1163,29 +1154,37 @@ A single JSON object will be returned. { "models": [ { - "name": "codellama:13b", - "modified_at": "2023-11-04T14:56:49.277302595-07:00", - "size": 7365960935, - "digest": "9f438cb9cd581fc025612d27f7c1a6669ff83a8bb0ed86c94fcf4c5440555697", + "name": "deepseek-r1:latest", + "model": "deepseek-r1:latest", + "modified_at": "2025-05-10T08:06:48.639712648-07:00", + "size": 4683075271, + "digest": "0a8c266910232fd3291e71e5ba1e058cc5af9d411192cf88b6d30e92b6e73163", "details": { + "parent_model": "", "format": "gguf", - "family": "llama", - "families": null, - "parameter_size": "13B", - "quantization_level": "Q4_0" + "family": "qwen2", + "families": [ + "qwen2" + ], + "parameter_size": "7.6B", + "quantization_level": "Q4_K_M" } }, { - "name": "llama3:latest", - "modified_at": "2023-12-07T09:32:18.757212583-08:00", - "size": 3825819519, - "digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e", + "name": "llama3.2:latest", + "model": "llama3.2:latest", + "modified_at": "2025-05-04T17:37:44.706015396-07:00", + "size": 2019393189, + "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72", "details": { + "parent_model": "", "format": "gguf", "family": "llama", - "families": null, - "parameter_size": "7B", - "quantization_level": "Q4_0" + "families": [ + "llama" + ], + "parameter_size": "3.2B", + "quantization_level": "Q4_K_M" } } ] diff --git a/fs/ggml/type.go b/fs/ggml/type.go index 8172c46d9..4d3d5bcad 100644 --- a/fs/ggml/type.go +++ b/fs/ggml/type.go @@ -12,42 +12,42 @@ type FileType uint32 const ( FileTypeF32 FileType = iota FileTypeF16 - FileTypeQ4_0 - FileTypeQ4_1 + fileTypeQ4_0 + fileTypeQ4_1 fileTypeQ4_1_F16 // unused by GGML fileTypeQ4_2 // unused by GGML fileTypeQ4_3 // unused by GGML FileTypeQ8_0 - FileTypeQ5_0 - FileTypeQ5_1 - FileTypeQ2_K - FileTypeQ3_K_S - FileTypeQ3_K_M - FileTypeQ3_K_L + fileTypeQ5_0 + fileTypeQ5_1 + fileTypeQ2_K + fileTypeQ3_K_S + fileTypeQ3_K_M + fileTypeQ3_K_L FileTypeQ4_K_S FileTypeQ4_K_M - FileTypeQ5_K_S - FileTypeQ5_K_M - FileTypeQ6_K - fileTypeIQ2_XXS // not supported by ollama - fileTypeIQ2_XS // not supported by ollama - FileTypeQ2_K_S - fileTypeIQ3_XS // not supported by ollama - fileTypeIQ3_XXS // not supported by ollama - fileTypeIQ1_S // not supported by ollama - fileTypeIQ4_NL // not supported by ollama - fileTypeIQ3_S // not supported by ollama - fileTypeIQ3_M // not supported by ollama - fileTypeIQ2_S // not supported by ollama - fileTypeIQ2_M // not supported by ollama - fileTypeIQ4_XS // not supported by ollama - fileTypeIQ1_M // not supported by ollama + fileTypeQ5_K_S + fileTypeQ5_K_M + fileTypeQ6_K + fileTypeIQ2_XXS + fileTypeIQ2_XS + fileTypeQ2_K_S + fileTypeIQ3_XS + fileTypeIQ3_XXS + fileTypeIQ1_S + fileTypeIQ4_NL + fileTypeIQ3_S + fileTypeIQ3_M + fileTypeIQ2_S + fileTypeIQ2_M + fileTypeIQ4_XS + fileTypeIQ1_M FileTypeBF16 fileTypeQ4_0_4_4 // unused by GGML fileTypeQ4_0_4_8 // unused by GGML fileTypeQ4_0_8_8 // unused by GGML - fileTypeTQ1_0 // not supported by ollama - fileTypeTQ2_0 // not supported by ollama + fileTypeTQ1_0 + fileTypeTQ2_0 FileTypeUnknown = 1024 ) @@ -60,36 +60,12 @@ func ParseFileType(s string) (FileType, error) { return FileTypeF32, nil case "F16": return FileTypeF16, nil - case "Q4_0": - return FileTypeQ4_0, nil - case "Q4_1": - return FileTypeQ4_1, nil case "Q8_0": return FileTypeQ8_0, nil - case "Q5_0": - return FileTypeQ5_0, nil - case "Q5_1": - return FileTypeQ5_1, nil - case "Q2_K": - return FileTypeQ2_K, nil - case "Q3_K_S": - return FileTypeQ3_K_S, nil - case "Q3_K_M": - return FileTypeQ3_K_M, nil - case "Q3_K_L": - return FileTypeQ3_K_L, nil case "Q4_K_S": return FileTypeQ4_K_S, nil case "Q4_K_M", "Q4_K": return FileTypeQ4_K_M, nil - case "Q5_K_S": - return FileTypeQ5_K_S, nil - case "Q5_K_M", "Q5_K": - return FileTypeQ5_K_M, nil - case "Q6_K": - return FileTypeQ6_K, nil - case "Q2_K_S": - return FileTypeQ2_K_S, nil case "BF16": return FileTypeBF16, nil default: @@ -111,40 +87,41 @@ func ParseFileType(s string) (FileType, error) { } func (t FileType) String() string { + // Note: this routine will return a broader set of file types for existing models switch t { case FileTypeF32: return "F32" case FileTypeF16: return "F16" - case FileTypeQ4_0: + case fileTypeQ4_0: return "Q4_0" - case FileTypeQ4_1: + case fileTypeQ4_1: return "Q4_1" case FileTypeQ8_0: return "Q8_0" - case FileTypeQ5_0: + case fileTypeQ5_0: return "Q5_0" - case FileTypeQ5_1: + case fileTypeQ5_1: return "Q5_1" - case FileTypeQ2_K: + case fileTypeQ2_K: return "Q2_K" - case FileTypeQ3_K_S: + case fileTypeQ3_K_S: return "Q3_K_S" - case FileTypeQ3_K_M: + case fileTypeQ3_K_M: return "Q3_K_M" - case FileTypeQ3_K_L: + case fileTypeQ3_K_L: return "Q3_K_L" case FileTypeQ4_K_S: return "Q4_K_S" case FileTypeQ4_K_M: return "Q4_K_M" - case FileTypeQ5_K_S: + case fileTypeQ5_K_S: return "Q5_K_S" - case FileTypeQ5_K_M: + case fileTypeQ5_K_M: return "Q5_K_M" - case FileTypeQ6_K: + case fileTypeQ6_K: return "Q6_K" - case FileTypeQ2_K_S: + case fileTypeQ2_K_S: return "Q2_K_S" case FileTypeBF16: return "BF16" @@ -163,35 +140,35 @@ func (ftype FileType) ToTensorType() TensorType { return TensorTypeF32 case FileTypeF16: return TensorTypeF16 - case FileTypeQ4_0: + case fileTypeQ4_0: return TensorTypeQ4_0 - case FileTypeQ4_1: + case fileTypeQ4_1: return TensorTypeQ4_1 case FileTypeQ8_0: return TensorTypeQ8_0 - case FileTypeQ5_0: + case fileTypeQ5_0: return TensorTypeQ5_0 - case FileTypeQ5_1: + case fileTypeQ5_1: return TensorTypeQ5_1 - case FileTypeQ2_K: + case fileTypeQ2_K: return TensorTypeQ2_K - case FileTypeQ3_K_S: + case fileTypeQ3_K_S: return TensorTypeQ3_K - case FileTypeQ3_K_M: + case fileTypeQ3_K_M: return TensorTypeQ3_K - case FileTypeQ3_K_L: + case fileTypeQ3_K_L: return TensorTypeQ3_K case FileTypeQ4_K_S: return TensorTypeQ4_K case FileTypeQ4_K_M: return TensorTypeQ4_K - case FileTypeQ5_K_S: + case fileTypeQ5_K_S: return TensorTypeQ5_K - case FileTypeQ5_K_M: + case fileTypeQ5_K_M: return TensorTypeQ5_K - case FileTypeQ6_K: + case fileTypeQ6_K: return TensorTypeQ6_K - case FileTypeQ2_K_S: + case fileTypeQ2_K_S: return TensorTypeQ2_K case FileTypeBF16: return TensorTypeBF16 diff --git a/server/quantization.go b/server/quantization.go index 80bc093db..adfc948ec 100644 --- a/server/quantization.go +++ b/server/quantization.go @@ -70,23 +70,7 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType newType = fsggml.TensorTypeQ6_K } } else if strings.Contains(name, "attn_v.weight") { - if ftype == fsggml.FileTypeQ2_K { - if kv.GQA() >= 4 { - newType = fsggml.TensorTypeQ4_K - } else { - newType = fsggml.TensorTypeQ3_K - } - } else if ftype == fsggml.FileTypeQ2_K_S && kv.GQA() >= 4 { - newType = fsggml.TensorTypeQ4_K - } else if ftype == fsggml.FileTypeQ3_K_M { - if qs.iAttnV < 2 { - newType = fsggml.TensorTypeQ5_K - } else { - newType = fsggml.TensorTypeQ4_K - } - } else if ftype == fsggml.FileTypeQ3_K_L { - newType = fsggml.TensorTypeQ5_K - } else if (ftype == fsggml.FileTypeQ4_K_M || ftype == fsggml.FileTypeQ5_K_M) && + if (ftype == fsggml.FileTypeQ4_K_M) && useMoreBits(qs.iAttnV, qs.nAttnV) { newType = fsggml.TensorTypeQ6_K } else if ftype == fsggml.FileTypeQ4_K_S && qs.iAttnV < 4 { @@ -114,54 +98,23 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType } else if strings.Contains(name, "ffn_down") { iLayer := qs.iFfnDown n_layer := qs.nFfnDown - if ftype == fsggml.FileTypeQ2_K { - newType = fsggml.TensorTypeQ3_K - } else if ftype == fsggml.FileTypeQ2_K_S { - if iLayer < n_layer/8 { - newType = fsggml.TensorTypeQ4_K - } - } else if ftype == fsggml.FileTypeQ3_K_M { - if iLayer < n_layer/16 { - newType = fsggml.TensorTypeQ5_K - } else if useMoreBits(iLayer, n_layer) { - newType = fsggml.TensorTypeQ4_K - } else { - newType = fsggml.TensorTypeQ3_K - } - } else if ftype == fsggml.FileTypeQ3_K_L { - newType = fsggml.TensorTypeQ5_K - } else if ftype == fsggml.FileTypeQ4_K_M { + if ftype == fsggml.FileTypeQ4_K_M { if useMoreBits(iLayer, n_layer) { newType = fsggml.TensorTypeQ6_K } - } else if ftype == fsggml.FileTypeQ5_K_M && useMoreBits(iLayer, n_layer) { - newType = fsggml.TensorTypeQ6_K } else if ftype == fsggml.FileTypeQ4_K_S && iLayer < n_layer/8 { newType = fsggml.TensorTypeQ5_K } qs.iFfnDown++ } else if strings.Contains(name, "attn_output.weight") { if nExperts == 8 { - if ftype == fsggml.FileTypeQ2_K || ftype == fsggml.FileTypeQ3_K_S || ftype == fsggml.FileTypeQ3_K_M || - ftype == fsggml.FileTypeQ4_K_S || ftype == fsggml.FileTypeQ4_K_M { - newType = fsggml.TensorTypeQ5_K - } - } else { - if ftype == fsggml.FileTypeQ2_K { - newType = fsggml.TensorTypeQ3_K - } else if ftype == fsggml.FileTypeQ3_K_M { - newType = fsggml.TensorTypeQ4_K - } else if ftype == fsggml.FileTypeQ3_K_L { + if ftype == fsggml.FileTypeQ4_K_S || ftype == fsggml.FileTypeQ4_K_M { newType = fsggml.TensorTypeQ5_K } } } else if strings.Contains(name, "attn_qkv.weight") { - if ftype == fsggml.FileTypeQ3_K_M || ftype == fsggml.FileTypeQ3_K_L { - newType = fsggml.TensorTypeQ4_K - } else if ftype == fsggml.FileTypeQ4_K_M { + if ftype == fsggml.FileTypeQ4_K_M { newType = fsggml.TensorTypeQ5_K - } else if ftype == fsggml.FileTypeQ5_K_M { - newType = fsggml.TensorTypeQ6_K } } diff --git a/server/quantization_test.go b/server/quantization_test.go index b7e133507..495297df3 100644 --- a/server/quantization_test.go +++ b/server/quantization_test.go @@ -42,71 +42,6 @@ func TestGetTensorNewType(t *testing.T) { ftype: fsggml.FileTypeF32, expected: fsggml.TensorTypeQ6_K, }, - { - name: "attn_v.weight_q4_k", - kv: map[string]any{ - "general.architecture": "foo", - "foo.attention.head_count": uint32(4), - "foo.attention.head_count_kv": uint32(1), - }, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "blk.0.attn_v.weight", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ2_K, - expected: fsggml.TensorTypeQ4_K, - }, - { - name: "attn_v.weight_q3_k", - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "blk.0.attn_v.weight", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ2_K, - expected: fsggml.TensorTypeQ3_K, - }, - { - name: "attn_v.weight_q2_k_s_q4_k", - kv: map[string]any{ - "general.architecture": "foo", - "foo.attention.head_count": uint32(4), - "foo.attention.head_count_kv": uint32(1), - }, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "blk.0.attn_v.weight", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ2_K_S, - expected: fsggml.TensorTypeQ4_K, - }, - { - name: "attn_v.weight_q3_k_m", - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "blk.0.attn_v.weight", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ3_K_M, - expected: fsggml.TensorTypeQ5_K, - }, - { - name: "attn_v.weight_q3_k_m_i", - qs: quantizeState{ - iAttnV: 2, - }, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "blk.0.attn_v.weight", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ3_K_M, - expected: fsggml.TensorTypeQ4_K, - }, - { - name: "attn_v.weight_q3_k_l", - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "blk.0.attn_v.weight", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ3_K_L, - expected: fsggml.TensorTypeQ5_K, - }, { name: "attn_v.weight_q4_k_m", qs: quantizeState{ @@ -156,88 +91,6 @@ func TestGetTensorNewType(t *testing.T) { ftype: fsggml.FileTypeF32, expected: fsggml.TensorTypeQ8_0, }, - { - name: "ffn_down_q2_k", - qs: quantizeState{}, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "ffn_down", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ2_K, - expected: fsggml.TensorTypeQ3_K, - }, - { - name: "ffn_down_q2_k_s", - qs: quantizeState{}, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "ffn_down", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ2_K_S, - expected: fsggml.TensorTypeQ4_0, - }, - { - name: "ffn_down_q2_k_s_layers", - qs: quantizeState{ - iFfnDown: 2, - nFfnDown: 3 * 8, - }, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "ffn_down", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ2_K_S, - expected: fsggml.TensorTypeQ4_K, - }, - { - name: "ffn_down_q3_k_m_base", - qs: quantizeState{ - iFfnDown: 1, - nFfnDown: 8, - }, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "ffn_down", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ3_K_M, - expected: fsggml.TensorTypeQ3_K, - }, - { - name: "ffn_down_q3_k_m_16", - qs: quantizeState{ - iFfnDown: 2, - nFfnDown: 3 * 16, - }, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "ffn_down", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ3_K_M, - expected: fsggml.TensorTypeQ5_K, - }, - { - name: "ffn_down_q3_k_m_8", - qs: quantizeState{ - iFfnDown: 2, - nFfnDown: 3 * 8, - }, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "ffn_down", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ3_K_M, - expected: fsggml.TensorTypeQ4_K, - }, - { - name: "ffn_down_q3_k_l", - qs: quantizeState{}, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "ffn_down", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ3_K_L, - expected: fsggml.TensorTypeQ5_K, - }, { name: "ffn_down_q4_k_m", qs: quantizeState{ @@ -264,19 +117,6 @@ func TestGetTensorNewType(t *testing.T) { ftype: fsggml.FileTypeQ4_K_M, expected: fsggml.TensorTypeQ6_K, }, - { - name: "ffn_down_q5_k_m", - qs: quantizeState{ - iFfnDown: 2, - nFfnDown: 3 * 8, - }, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "ffn_down", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ5_K_M, - expected: fsggml.TensorTypeQ6_K, - }, { name: "ffn_down_q4_k_s", qs: quantizeState{ @@ -290,59 +130,6 @@ func TestGetTensorNewType(t *testing.T) { ftype: fsggml.FileTypeQ4_K_S, expected: fsggml.TensorTypeQ5_K, }, - { - name: "attn_output.weight_8_expert", - qs: quantizeState{}, - kv: map[string]any{ - "general.architecture": "foo", - "foo.expert_count": uint32(8), - }, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "blk.0.attn_output.weight", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ2_K, - expected: fsggml.TensorTypeQ5_K, - }, - { - name: "attn_output.weight_q2", - qs: quantizeState{}, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "blk.0.attn_output.weight", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ2_K, - expected: fsggml.TensorTypeQ3_K, - }, - { - name: "attn_output.weight_q3_k_m", - qs: quantizeState{}, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "blk.0.attn_output.weight", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ3_K_M, - expected: fsggml.TensorTypeQ4_K, - }, - { - name: "attn_output.weight_q3_k_l", - qs: quantizeState{}, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "blk.0.attn_output.weight", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ3_K_L, - expected: fsggml.TensorTypeQ5_K, - }, - { - name: "attn_qkv.weight_q3_k_m", - qs: quantizeState{}, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "blk.0.attn_qkv.weight", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ3_K_M, - expected: fsggml.TensorTypeQ4_K, - }, { name: "attn_qkv.weight_q4_k_m", qs: quantizeState{}, @@ -353,16 +140,6 @@ func TestGetTensorNewType(t *testing.T) { ftype: fsggml.FileTypeQ4_K_M, expected: fsggml.TensorTypeQ5_K, }, - { - name: "attn_qkv.weight_q5_k_m", - qs: quantizeState{}, - kv: map[string]any{}, - newType: fsggml.TensorTypeQ4_0, - tensor_name: "blk.0.attn_qkv.weight", - shape: []uint64{256}, - ftype: fsggml.FileTypeQ5_K_M, - expected: fsggml.TensorTypeQ6_K, - }, } for _, tt := range cases { t.Run(tt.name, func(t *testing.T) {