The quantization PR didn't block all unsupported file types,
which this PR fixes.  It also updates the API docs to reflect
the now reduced set of supported types.
This commit is contained in:
Daniel Hiltgen 2025-05-12 15:23:31 -07:00 committed by GitHub
parent 0cefd46f23
commit 9d6df90805
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 88 additions and 382 deletions

View File

@ -19,7 +19,7 @@
### Model names
Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q8_0` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
### Durations
@ -952,19 +952,8 @@ If you are creating a model from a safetensors directory or from a GGUF file, yo
| Type | Recommended |
| --- | :-: |
| q2_K | |
| q3_K_L | |
| q3_K_M | |
| q3_K_S | |
| q4_0 | |
| q4_1 | |
| q4_K_M | * |
| q4_K_S | |
| q5_0 | |
| q5_1 | |
| q5_K_M | |
| q5_K_S | |
| q6_K | |
| q8_0 | * |
### Examples
@ -1009,8 +998,8 @@ Quantize a non-quantized model.
```shell
curl http://localhost:11434/api/create -d '{
"model": "llama3.1:quantized",
"from": "llama3.1:8b-instruct-fp16",
"model": "llama3.2:quantized",
"from": "llama3.2:3b-instruct-fp16",
"quantize": "q4_K_M"
}'
```
@ -1020,12 +1009,14 @@ curl http://localhost:11434/api/create -d '{
A stream of JSON objects is returned:
```json
{"status":"quantizing F16 model to Q4_K_M"}
{"status":"creating new layer sha256:667b0c1932bc6ffc593ed1d03f895bf2dc8dc6df21db3042284a6f4416b06a29"}
{"status":"using existing layer sha256:11ce4ee3e170f6adebac9a991c22e22ab3f8530e154ee669954c4bc73061c258"}
{"status":"using existing layer sha256:0ba8f0e314b4264dfd19df045cde9d4c394a52474bf92ed6a3de22a4ca31a177"}
{"status":"quantizing F16 model to Q4_K_M","digest":"0","total":6433687776,"completed":12302}
{"status":"quantizing F16 model to Q4_K_M","digest":"0","total":6433687776,"completed":6433687552}
{"status":"verifying conversion"}
{"status":"creating new layer sha256:fb7f4f211b89c6c4928ff4ddb73db9f9c0cfca3e000c3e40d6cf27ddc6ca72eb"}
{"status":"using existing layer sha256:966de95ca8a62200913e3f8bfbf84c8494536f1b94b49166851e76644e966396"}
{"status":"using existing layer sha256:fcc5a6bec9daf9b561a68827b67ab6088e1dba9d1fa2a50d7bbcc8384e0a265d"}
{"status":"using existing layer sha256:a70ff7e570d97baaf4e62ac6e6ad9975e04caa6d900d3742d37698494479e0cd"}
{"status":"using existing layer sha256:56bb8bd477a519ffa694fc449c2413c6f0e1d3b1c88fa7e3c9d88d3ae49d4dcb"}
{"status":"creating new layer sha256:455f34728c9b5dd3376378bfb809ee166c145b0b4c1f1a6feca069055066ef9a"}
{"status":"writing manifest"}
{"status":"success"}
```
@ -1163,29 +1154,37 @@ A single JSON object will be returned.
{
"models": [
{
"name": "codellama:13b",
"modified_at": "2023-11-04T14:56:49.277302595-07:00",
"size": 7365960935,
"digest": "9f438cb9cd581fc025612d27f7c1a6669ff83a8bb0ed86c94fcf4c5440555697",
"name": "deepseek-r1:latest",
"model": "deepseek-r1:latest",
"modified_at": "2025-05-10T08:06:48.639712648-07:00",
"size": 4683075271,
"digest": "0a8c266910232fd3291e71e5ba1e058cc5af9d411192cf88b6d30e92b6e73163",
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": null,
"parameter_size": "13B",
"quantization_level": "Q4_0"
"family": "qwen2",
"families": [
"qwen2"
],
"parameter_size": "7.6B",
"quantization_level": "Q4_K_M"
}
},
{
"name": "llama3:latest",
"modified_at": "2023-12-07T09:32:18.757212583-08:00",
"size": 3825819519,
"digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e",
"name": "llama3.2:latest",
"model": "llama3.2:latest",
"modified_at": "2025-05-04T17:37:44.706015396-07:00",
"size": 2019393189,
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": null,
"parameter_size": "7B",
"quantization_level": "Q4_0"
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
}
]

View File

@ -12,42 +12,42 @@ type FileType uint32
const (
FileTypeF32 FileType = iota
FileTypeF16
FileTypeQ4_0
FileTypeQ4_1
fileTypeQ4_0
fileTypeQ4_1
fileTypeQ4_1_F16 // unused by GGML
fileTypeQ4_2 // unused by GGML
fileTypeQ4_3 // unused by GGML
FileTypeQ8_0
FileTypeQ5_0
FileTypeQ5_1
FileTypeQ2_K
FileTypeQ3_K_S
FileTypeQ3_K_M
FileTypeQ3_K_L
fileTypeQ5_0
fileTypeQ5_1
fileTypeQ2_K
fileTypeQ3_K_S
fileTypeQ3_K_M
fileTypeQ3_K_L
FileTypeQ4_K_S
FileTypeQ4_K_M
FileTypeQ5_K_S
FileTypeQ5_K_M
FileTypeQ6_K
fileTypeIQ2_XXS // not supported by ollama
fileTypeIQ2_XS // not supported by ollama
FileTypeQ2_K_S
fileTypeIQ3_XS // not supported by ollama
fileTypeIQ3_XXS // not supported by ollama
fileTypeIQ1_S // not supported by ollama
fileTypeIQ4_NL // not supported by ollama
fileTypeIQ3_S // not supported by ollama
fileTypeIQ3_M // not supported by ollama
fileTypeIQ2_S // not supported by ollama
fileTypeIQ2_M // not supported by ollama
fileTypeIQ4_XS // not supported by ollama
fileTypeIQ1_M // not supported by ollama
fileTypeQ5_K_S
fileTypeQ5_K_M
fileTypeQ6_K
fileTypeIQ2_XXS
fileTypeIQ2_XS
fileTypeQ2_K_S
fileTypeIQ3_XS
fileTypeIQ3_XXS
fileTypeIQ1_S
fileTypeIQ4_NL
fileTypeIQ3_S
fileTypeIQ3_M
fileTypeIQ2_S
fileTypeIQ2_M
fileTypeIQ4_XS
fileTypeIQ1_M
FileTypeBF16
fileTypeQ4_0_4_4 // unused by GGML
fileTypeQ4_0_4_8 // unused by GGML
fileTypeQ4_0_8_8 // unused by GGML
fileTypeTQ1_0 // not supported by ollama
fileTypeTQ2_0 // not supported by ollama
fileTypeTQ1_0
fileTypeTQ2_0
FileTypeUnknown = 1024
)
@ -60,36 +60,12 @@ func ParseFileType(s string) (FileType, error) {
return FileTypeF32, nil
case "F16":
return FileTypeF16, nil
case "Q4_0":
return FileTypeQ4_0, nil
case "Q4_1":
return FileTypeQ4_1, nil
case "Q8_0":
return FileTypeQ8_0, nil
case "Q5_0":
return FileTypeQ5_0, nil
case "Q5_1":
return FileTypeQ5_1, nil
case "Q2_K":
return FileTypeQ2_K, nil
case "Q3_K_S":
return FileTypeQ3_K_S, nil
case "Q3_K_M":
return FileTypeQ3_K_M, nil
case "Q3_K_L":
return FileTypeQ3_K_L, nil
case "Q4_K_S":
return FileTypeQ4_K_S, nil
case "Q4_K_M", "Q4_K":
return FileTypeQ4_K_M, nil
case "Q5_K_S":
return FileTypeQ5_K_S, nil
case "Q5_K_M", "Q5_K":
return FileTypeQ5_K_M, nil
case "Q6_K":
return FileTypeQ6_K, nil
case "Q2_K_S":
return FileTypeQ2_K_S, nil
case "BF16":
return FileTypeBF16, nil
default:
@ -111,40 +87,41 @@ func ParseFileType(s string) (FileType, error) {
}
func (t FileType) String() string {
// Note: this routine will return a broader set of file types for existing models
switch t {
case FileTypeF32:
return "F32"
case FileTypeF16:
return "F16"
case FileTypeQ4_0:
case fileTypeQ4_0:
return "Q4_0"
case FileTypeQ4_1:
case fileTypeQ4_1:
return "Q4_1"
case FileTypeQ8_0:
return "Q8_0"
case FileTypeQ5_0:
case fileTypeQ5_0:
return "Q5_0"
case FileTypeQ5_1:
case fileTypeQ5_1:
return "Q5_1"
case FileTypeQ2_K:
case fileTypeQ2_K:
return "Q2_K"
case FileTypeQ3_K_S:
case fileTypeQ3_K_S:
return "Q3_K_S"
case FileTypeQ3_K_M:
case fileTypeQ3_K_M:
return "Q3_K_M"
case FileTypeQ3_K_L:
case fileTypeQ3_K_L:
return "Q3_K_L"
case FileTypeQ4_K_S:
return "Q4_K_S"
case FileTypeQ4_K_M:
return "Q4_K_M"
case FileTypeQ5_K_S:
case fileTypeQ5_K_S:
return "Q5_K_S"
case FileTypeQ5_K_M:
case fileTypeQ5_K_M:
return "Q5_K_M"
case FileTypeQ6_K:
case fileTypeQ6_K:
return "Q6_K"
case FileTypeQ2_K_S:
case fileTypeQ2_K_S:
return "Q2_K_S"
case FileTypeBF16:
return "BF16"
@ -163,35 +140,35 @@ func (ftype FileType) ToTensorType() TensorType {
return TensorTypeF32
case FileTypeF16:
return TensorTypeF16
case FileTypeQ4_0:
case fileTypeQ4_0:
return TensorTypeQ4_0
case FileTypeQ4_1:
case fileTypeQ4_1:
return TensorTypeQ4_1
case FileTypeQ8_0:
return TensorTypeQ8_0
case FileTypeQ5_0:
case fileTypeQ5_0:
return TensorTypeQ5_0
case FileTypeQ5_1:
case fileTypeQ5_1:
return TensorTypeQ5_1
case FileTypeQ2_K:
case fileTypeQ2_K:
return TensorTypeQ2_K
case FileTypeQ3_K_S:
case fileTypeQ3_K_S:
return TensorTypeQ3_K
case FileTypeQ3_K_M:
case fileTypeQ3_K_M:
return TensorTypeQ3_K
case FileTypeQ3_K_L:
case fileTypeQ3_K_L:
return TensorTypeQ3_K
case FileTypeQ4_K_S:
return TensorTypeQ4_K
case FileTypeQ4_K_M:
return TensorTypeQ4_K
case FileTypeQ5_K_S:
case fileTypeQ5_K_S:
return TensorTypeQ5_K
case FileTypeQ5_K_M:
case fileTypeQ5_K_M:
return TensorTypeQ5_K
case FileTypeQ6_K:
case fileTypeQ6_K:
return TensorTypeQ6_K
case FileTypeQ2_K_S:
case fileTypeQ2_K_S:
return TensorTypeQ2_K
case FileTypeBF16:
return TensorTypeBF16

View File

@ -70,23 +70,7 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
newType = fsggml.TensorTypeQ6_K
}
} else if strings.Contains(name, "attn_v.weight") {
if ftype == fsggml.FileTypeQ2_K {
if kv.GQA() >= 4 {
newType = fsggml.TensorTypeQ4_K
} else {
newType = fsggml.TensorTypeQ3_K
}
} else if ftype == fsggml.FileTypeQ2_K_S && kv.GQA() >= 4 {
newType = fsggml.TensorTypeQ4_K
} else if ftype == fsggml.FileTypeQ3_K_M {
if qs.iAttnV < 2 {
newType = fsggml.TensorTypeQ5_K
} else {
newType = fsggml.TensorTypeQ4_K
}
} else if ftype == fsggml.FileTypeQ3_K_L {
newType = fsggml.TensorTypeQ5_K
} else if (ftype == fsggml.FileTypeQ4_K_M || ftype == fsggml.FileTypeQ5_K_M) &&
if (ftype == fsggml.FileTypeQ4_K_M) &&
useMoreBits(qs.iAttnV, qs.nAttnV) {
newType = fsggml.TensorTypeQ6_K
} else if ftype == fsggml.FileTypeQ4_K_S && qs.iAttnV < 4 {
@ -114,54 +98,23 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
} else if strings.Contains(name, "ffn_down") {
iLayer := qs.iFfnDown
n_layer := qs.nFfnDown
if ftype == fsggml.FileTypeQ2_K {
newType = fsggml.TensorTypeQ3_K
} else if ftype == fsggml.FileTypeQ2_K_S {
if iLayer < n_layer/8 {
newType = fsggml.TensorTypeQ4_K
}
} else if ftype == fsggml.FileTypeQ3_K_M {
if iLayer < n_layer/16 {
newType = fsggml.TensorTypeQ5_K
} else if useMoreBits(iLayer, n_layer) {
newType = fsggml.TensorTypeQ4_K
} else {
newType = fsggml.TensorTypeQ3_K
}
} else if ftype == fsggml.FileTypeQ3_K_L {
newType = fsggml.TensorTypeQ5_K
} else if ftype == fsggml.FileTypeQ4_K_M {
if ftype == fsggml.FileTypeQ4_K_M {
if useMoreBits(iLayer, n_layer) {
newType = fsggml.TensorTypeQ6_K
}
} else if ftype == fsggml.FileTypeQ5_K_M && useMoreBits(iLayer, n_layer) {
newType = fsggml.TensorTypeQ6_K
} else if ftype == fsggml.FileTypeQ4_K_S && iLayer < n_layer/8 {
newType = fsggml.TensorTypeQ5_K
}
qs.iFfnDown++
} else if strings.Contains(name, "attn_output.weight") {
if nExperts == 8 {
if ftype == fsggml.FileTypeQ2_K || ftype == fsggml.FileTypeQ3_K_S || ftype == fsggml.FileTypeQ3_K_M ||
ftype == fsggml.FileTypeQ4_K_S || ftype == fsggml.FileTypeQ4_K_M {
newType = fsggml.TensorTypeQ5_K
}
} else {
if ftype == fsggml.FileTypeQ2_K {
newType = fsggml.TensorTypeQ3_K
} else if ftype == fsggml.FileTypeQ3_K_M {
newType = fsggml.TensorTypeQ4_K
} else if ftype == fsggml.FileTypeQ3_K_L {
if ftype == fsggml.FileTypeQ4_K_S || ftype == fsggml.FileTypeQ4_K_M {
newType = fsggml.TensorTypeQ5_K
}
}
} else if strings.Contains(name, "attn_qkv.weight") {
if ftype == fsggml.FileTypeQ3_K_M || ftype == fsggml.FileTypeQ3_K_L {
newType = fsggml.TensorTypeQ4_K
} else if ftype == fsggml.FileTypeQ4_K_M {
if ftype == fsggml.FileTypeQ4_K_M {
newType = fsggml.TensorTypeQ5_K
} else if ftype == fsggml.FileTypeQ5_K_M {
newType = fsggml.TensorTypeQ6_K
}
}

View File

@ -42,71 +42,6 @@ func TestGetTensorNewType(t *testing.T) {
ftype: fsggml.FileTypeF32,
expected: fsggml.TensorTypeQ6_K,
},
{
name: "attn_v.weight_q4_k",
kv: map[string]any{
"general.architecture": "foo",
"foo.attention.head_count": uint32(4),
"foo.attention.head_count_kv": uint32(1),
},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_v.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K,
expected: fsggml.TensorTypeQ4_K,
},
{
name: "attn_v.weight_q3_k",
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_v.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K,
expected: fsggml.TensorTypeQ3_K,
},
{
name: "attn_v.weight_q2_k_s_q4_k",
kv: map[string]any{
"general.architecture": "foo",
"foo.attention.head_count": uint32(4),
"foo.attention.head_count_kv": uint32(1),
},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_v.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K_S,
expected: fsggml.TensorTypeQ4_K,
},
{
name: "attn_v.weight_q3_k_m",
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_v.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ5_K,
},
{
name: "attn_v.weight_q3_k_m_i",
qs: quantizeState{
iAttnV: 2,
},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_v.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ4_K,
},
{
name: "attn_v.weight_q3_k_l",
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_v.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_L,
expected: fsggml.TensorTypeQ5_K,
},
{
name: "attn_v.weight_q4_k_m",
qs: quantizeState{
@ -156,88 +91,6 @@ func TestGetTensorNewType(t *testing.T) {
ftype: fsggml.FileTypeF32,
expected: fsggml.TensorTypeQ8_0,
},
{
name: "ffn_down_q2_k",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K,
expected: fsggml.TensorTypeQ3_K,
},
{
name: "ffn_down_q2_k_s",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K_S,
expected: fsggml.TensorTypeQ4_0,
},
{
name: "ffn_down_q2_k_s_layers",
qs: quantizeState{
iFfnDown: 2,
nFfnDown: 3 * 8,
},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K_S,
expected: fsggml.TensorTypeQ4_K,
},
{
name: "ffn_down_q3_k_m_base",
qs: quantizeState{
iFfnDown: 1,
nFfnDown: 8,
},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ3_K,
},
{
name: "ffn_down_q3_k_m_16",
qs: quantizeState{
iFfnDown: 2,
nFfnDown: 3 * 16,
},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ5_K,
},
{
name: "ffn_down_q3_k_m_8",
qs: quantizeState{
iFfnDown: 2,
nFfnDown: 3 * 8,
},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ4_K,
},
{
name: "ffn_down_q3_k_l",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_L,
expected: fsggml.TensorTypeQ5_K,
},
{
name: "ffn_down_q4_k_m",
qs: quantizeState{
@ -264,19 +117,6 @@ func TestGetTensorNewType(t *testing.T) {
ftype: fsggml.FileTypeQ4_K_M,
expected: fsggml.TensorTypeQ6_K,
},
{
name: "ffn_down_q5_k_m",
qs: quantizeState{
iFfnDown: 2,
nFfnDown: 3 * 8,
},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ5_K_M,
expected: fsggml.TensorTypeQ6_K,
},
{
name: "ffn_down_q4_k_s",
qs: quantizeState{
@ -290,59 +130,6 @@ func TestGetTensorNewType(t *testing.T) {
ftype: fsggml.FileTypeQ4_K_S,
expected: fsggml.TensorTypeQ5_K,
},
{
name: "attn_output.weight_8_expert",
qs: quantizeState{},
kv: map[string]any{
"general.architecture": "foo",
"foo.expert_count": uint32(8),
},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_output.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K,
expected: fsggml.TensorTypeQ5_K,
},
{
name: "attn_output.weight_q2",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_output.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K,
expected: fsggml.TensorTypeQ3_K,
},
{
name: "attn_output.weight_q3_k_m",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_output.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ4_K,
},
{
name: "attn_output.weight_q3_k_l",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_output.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_L,
expected: fsggml.TensorTypeQ5_K,
},
{
name: "attn_qkv.weight_q3_k_m",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_qkv.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ4_K,
},
{
name: "attn_qkv.weight_q4_k_m",
qs: quantizeState{},
@ -353,16 +140,6 @@ func TestGetTensorNewType(t *testing.T) {
ftype: fsggml.FileTypeQ4_K_M,
expected: fsggml.TensorTypeQ5_K,
},
{
name: "attn_qkv.weight_q5_k_m",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_qkv.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ5_K_M,
expected: fsggml.TensorTypeQ6_K,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {