The quantization PR didn't block all unsupported file types,
which this PR fixes.  It also updates the API docs to reflect
the now reduced set of supported types.
This commit is contained in:
Daniel Hiltgen 2025-05-12 15:23:31 -07:00 committed by GitHub
parent 0cefd46f23
commit 9d6df90805
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 88 additions and 382 deletions

View File

@ -19,7 +19,7 @@
### Model names ### Model names
Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version. Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q8_0` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
### Durations ### Durations
@ -952,19 +952,8 @@ If you are creating a model from a safetensors directory or from a GGUF file, yo
| Type | Recommended | | Type | Recommended |
| --- | :-: | | --- | :-: |
| q2_K | |
| q3_K_L | |
| q3_K_M | |
| q3_K_S | |
| q4_0 | |
| q4_1 | |
| q4_K_M | * | | q4_K_M | * |
| q4_K_S | | | q4_K_S | |
| q5_0 | |
| q5_1 | |
| q5_K_M | |
| q5_K_S | |
| q6_K | |
| q8_0 | * | | q8_0 | * |
### Examples ### Examples
@ -1009,8 +998,8 @@ Quantize a non-quantized model.
```shell ```shell
curl http://localhost:11434/api/create -d '{ curl http://localhost:11434/api/create -d '{
"model": "llama3.1:quantized", "model": "llama3.2:quantized",
"from": "llama3.1:8b-instruct-fp16", "from": "llama3.2:3b-instruct-fp16",
"quantize": "q4_K_M" "quantize": "q4_K_M"
}' }'
``` ```
@ -1020,12 +1009,14 @@ curl http://localhost:11434/api/create -d '{
A stream of JSON objects is returned: A stream of JSON objects is returned:
```json ```json
{"status":"quantizing F16 model to Q4_K_M"} {"status":"quantizing F16 model to Q4_K_M","digest":"0","total":6433687776,"completed":12302}
{"status":"creating new layer sha256:667b0c1932bc6ffc593ed1d03f895bf2dc8dc6df21db3042284a6f4416b06a29"} {"status":"quantizing F16 model to Q4_K_M","digest":"0","total":6433687776,"completed":6433687552}
{"status":"using existing layer sha256:11ce4ee3e170f6adebac9a991c22e22ab3f8530e154ee669954c4bc73061c258"} {"status":"verifying conversion"}
{"status":"using existing layer sha256:0ba8f0e314b4264dfd19df045cde9d4c394a52474bf92ed6a3de22a4ca31a177"} {"status":"creating new layer sha256:fb7f4f211b89c6c4928ff4ddb73db9f9c0cfca3e000c3e40d6cf27ddc6ca72eb"}
{"status":"using existing layer sha256:966de95ca8a62200913e3f8bfbf84c8494536f1b94b49166851e76644e966396"}
{"status":"using existing layer sha256:fcc5a6bec9daf9b561a68827b67ab6088e1dba9d1fa2a50d7bbcc8384e0a265d"}
{"status":"using existing layer sha256:a70ff7e570d97baaf4e62ac6e6ad9975e04caa6d900d3742d37698494479e0cd"}
{"status":"using existing layer sha256:56bb8bd477a519ffa694fc449c2413c6f0e1d3b1c88fa7e3c9d88d3ae49d4dcb"} {"status":"using existing layer sha256:56bb8bd477a519ffa694fc449c2413c6f0e1d3b1c88fa7e3c9d88d3ae49d4dcb"}
{"status":"creating new layer sha256:455f34728c9b5dd3376378bfb809ee166c145b0b4c1f1a6feca069055066ef9a"}
{"status":"writing manifest"} {"status":"writing manifest"}
{"status":"success"} {"status":"success"}
``` ```
@ -1163,29 +1154,37 @@ A single JSON object will be returned.
{ {
"models": [ "models": [
{ {
"name": "codellama:13b", "name": "deepseek-r1:latest",
"modified_at": "2023-11-04T14:56:49.277302595-07:00", "model": "deepseek-r1:latest",
"size": 7365960935, "modified_at": "2025-05-10T08:06:48.639712648-07:00",
"digest": "9f438cb9cd581fc025612d27f7c1a6669ff83a8bb0ed86c94fcf4c5440555697", "size": 4683075271,
"digest": "0a8c266910232fd3291e71e5ba1e058cc5af9d411192cf88b6d30e92b6e73163",
"details": { "details": {
"parent_model": "",
"format": "gguf", "format": "gguf",
"family": "llama", "family": "qwen2",
"families": null, "families": [
"parameter_size": "13B", "qwen2"
"quantization_level": "Q4_0" ],
"parameter_size": "7.6B",
"quantization_level": "Q4_K_M"
} }
}, },
{ {
"name": "llama3:latest", "name": "llama3.2:latest",
"modified_at": "2023-12-07T09:32:18.757212583-08:00", "model": "llama3.2:latest",
"size": 3825819519, "modified_at": "2025-05-04T17:37:44.706015396-07:00",
"digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e", "size": 2019393189,
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"details": { "details": {
"parent_model": "",
"format": "gguf", "format": "gguf",
"family": "llama", "family": "llama",
"families": null, "families": [
"parameter_size": "7B", "llama"
"quantization_level": "Q4_0" ],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
} }
} }
] ]

View File

@ -12,42 +12,42 @@ type FileType uint32
const ( const (
FileTypeF32 FileType = iota FileTypeF32 FileType = iota
FileTypeF16 FileTypeF16
FileTypeQ4_0 fileTypeQ4_0
FileTypeQ4_1 fileTypeQ4_1
fileTypeQ4_1_F16 // unused by GGML fileTypeQ4_1_F16 // unused by GGML
fileTypeQ4_2 // unused by GGML fileTypeQ4_2 // unused by GGML
fileTypeQ4_3 // unused by GGML fileTypeQ4_3 // unused by GGML
FileTypeQ8_0 FileTypeQ8_0
FileTypeQ5_0 fileTypeQ5_0
FileTypeQ5_1 fileTypeQ5_1
FileTypeQ2_K fileTypeQ2_K
FileTypeQ3_K_S fileTypeQ3_K_S
FileTypeQ3_K_M fileTypeQ3_K_M
FileTypeQ3_K_L fileTypeQ3_K_L
FileTypeQ4_K_S FileTypeQ4_K_S
FileTypeQ4_K_M FileTypeQ4_K_M
FileTypeQ5_K_S fileTypeQ5_K_S
FileTypeQ5_K_M fileTypeQ5_K_M
FileTypeQ6_K fileTypeQ6_K
fileTypeIQ2_XXS // not supported by ollama fileTypeIQ2_XXS
fileTypeIQ2_XS // not supported by ollama fileTypeIQ2_XS
FileTypeQ2_K_S fileTypeQ2_K_S
fileTypeIQ3_XS // not supported by ollama fileTypeIQ3_XS
fileTypeIQ3_XXS // not supported by ollama fileTypeIQ3_XXS
fileTypeIQ1_S // not supported by ollama fileTypeIQ1_S
fileTypeIQ4_NL // not supported by ollama fileTypeIQ4_NL
fileTypeIQ3_S // not supported by ollama fileTypeIQ3_S
fileTypeIQ3_M // not supported by ollama fileTypeIQ3_M
fileTypeIQ2_S // not supported by ollama fileTypeIQ2_S
fileTypeIQ2_M // not supported by ollama fileTypeIQ2_M
fileTypeIQ4_XS // not supported by ollama fileTypeIQ4_XS
fileTypeIQ1_M // not supported by ollama fileTypeIQ1_M
FileTypeBF16 FileTypeBF16
fileTypeQ4_0_4_4 // unused by GGML fileTypeQ4_0_4_4 // unused by GGML
fileTypeQ4_0_4_8 // unused by GGML fileTypeQ4_0_4_8 // unused by GGML
fileTypeQ4_0_8_8 // unused by GGML fileTypeQ4_0_8_8 // unused by GGML
fileTypeTQ1_0 // not supported by ollama fileTypeTQ1_0
fileTypeTQ2_0 // not supported by ollama fileTypeTQ2_0
FileTypeUnknown = 1024 FileTypeUnknown = 1024
) )
@ -60,36 +60,12 @@ func ParseFileType(s string) (FileType, error) {
return FileTypeF32, nil return FileTypeF32, nil
case "F16": case "F16":
return FileTypeF16, nil return FileTypeF16, nil
case "Q4_0":
return FileTypeQ4_0, nil
case "Q4_1":
return FileTypeQ4_1, nil
case "Q8_0": case "Q8_0":
return FileTypeQ8_0, nil return FileTypeQ8_0, nil
case "Q5_0":
return FileTypeQ5_0, nil
case "Q5_1":
return FileTypeQ5_1, nil
case "Q2_K":
return FileTypeQ2_K, nil
case "Q3_K_S":
return FileTypeQ3_K_S, nil
case "Q3_K_M":
return FileTypeQ3_K_M, nil
case "Q3_K_L":
return FileTypeQ3_K_L, nil
case "Q4_K_S": case "Q4_K_S":
return FileTypeQ4_K_S, nil return FileTypeQ4_K_S, nil
case "Q4_K_M", "Q4_K": case "Q4_K_M", "Q4_K":
return FileTypeQ4_K_M, nil return FileTypeQ4_K_M, nil
case "Q5_K_S":
return FileTypeQ5_K_S, nil
case "Q5_K_M", "Q5_K":
return FileTypeQ5_K_M, nil
case "Q6_K":
return FileTypeQ6_K, nil
case "Q2_K_S":
return FileTypeQ2_K_S, nil
case "BF16": case "BF16":
return FileTypeBF16, nil return FileTypeBF16, nil
default: default:
@ -111,40 +87,41 @@ func ParseFileType(s string) (FileType, error) {
} }
func (t FileType) String() string { func (t FileType) String() string {
// Note: this routine will return a broader set of file types for existing models
switch t { switch t {
case FileTypeF32: case FileTypeF32:
return "F32" return "F32"
case FileTypeF16: case FileTypeF16:
return "F16" return "F16"
case FileTypeQ4_0: case fileTypeQ4_0:
return "Q4_0" return "Q4_0"
case FileTypeQ4_1: case fileTypeQ4_1:
return "Q4_1" return "Q4_1"
case FileTypeQ8_0: case FileTypeQ8_0:
return "Q8_0" return "Q8_0"
case FileTypeQ5_0: case fileTypeQ5_0:
return "Q5_0" return "Q5_0"
case FileTypeQ5_1: case fileTypeQ5_1:
return "Q5_1" return "Q5_1"
case FileTypeQ2_K: case fileTypeQ2_K:
return "Q2_K" return "Q2_K"
case FileTypeQ3_K_S: case fileTypeQ3_K_S:
return "Q3_K_S" return "Q3_K_S"
case FileTypeQ3_K_M: case fileTypeQ3_K_M:
return "Q3_K_M" return "Q3_K_M"
case FileTypeQ3_K_L: case fileTypeQ3_K_L:
return "Q3_K_L" return "Q3_K_L"
case FileTypeQ4_K_S: case FileTypeQ4_K_S:
return "Q4_K_S" return "Q4_K_S"
case FileTypeQ4_K_M: case FileTypeQ4_K_M:
return "Q4_K_M" return "Q4_K_M"
case FileTypeQ5_K_S: case fileTypeQ5_K_S:
return "Q5_K_S" return "Q5_K_S"
case FileTypeQ5_K_M: case fileTypeQ5_K_M:
return "Q5_K_M" return "Q5_K_M"
case FileTypeQ6_K: case fileTypeQ6_K:
return "Q6_K" return "Q6_K"
case FileTypeQ2_K_S: case fileTypeQ2_K_S:
return "Q2_K_S" return "Q2_K_S"
case FileTypeBF16: case FileTypeBF16:
return "BF16" return "BF16"
@ -163,35 +140,35 @@ func (ftype FileType) ToTensorType() TensorType {
return TensorTypeF32 return TensorTypeF32
case FileTypeF16: case FileTypeF16:
return TensorTypeF16 return TensorTypeF16
case FileTypeQ4_0: case fileTypeQ4_0:
return TensorTypeQ4_0 return TensorTypeQ4_0
case FileTypeQ4_1: case fileTypeQ4_1:
return TensorTypeQ4_1 return TensorTypeQ4_1
case FileTypeQ8_0: case FileTypeQ8_0:
return TensorTypeQ8_0 return TensorTypeQ8_0
case FileTypeQ5_0: case fileTypeQ5_0:
return TensorTypeQ5_0 return TensorTypeQ5_0
case FileTypeQ5_1: case fileTypeQ5_1:
return TensorTypeQ5_1 return TensorTypeQ5_1
case FileTypeQ2_K: case fileTypeQ2_K:
return TensorTypeQ2_K return TensorTypeQ2_K
case FileTypeQ3_K_S: case fileTypeQ3_K_S:
return TensorTypeQ3_K return TensorTypeQ3_K
case FileTypeQ3_K_M: case fileTypeQ3_K_M:
return TensorTypeQ3_K return TensorTypeQ3_K
case FileTypeQ3_K_L: case fileTypeQ3_K_L:
return TensorTypeQ3_K return TensorTypeQ3_K
case FileTypeQ4_K_S: case FileTypeQ4_K_S:
return TensorTypeQ4_K return TensorTypeQ4_K
case FileTypeQ4_K_M: case FileTypeQ4_K_M:
return TensorTypeQ4_K return TensorTypeQ4_K
case FileTypeQ5_K_S: case fileTypeQ5_K_S:
return TensorTypeQ5_K return TensorTypeQ5_K
case FileTypeQ5_K_M: case fileTypeQ5_K_M:
return TensorTypeQ5_K return TensorTypeQ5_K
case FileTypeQ6_K: case fileTypeQ6_K:
return TensorTypeQ6_K return TensorTypeQ6_K
case FileTypeQ2_K_S: case fileTypeQ2_K_S:
return TensorTypeQ2_K return TensorTypeQ2_K
case FileTypeBF16: case FileTypeBF16:
return TensorTypeBF16 return TensorTypeBF16

View File

@ -70,23 +70,7 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
newType = fsggml.TensorTypeQ6_K newType = fsggml.TensorTypeQ6_K
} }
} else if strings.Contains(name, "attn_v.weight") { } else if strings.Contains(name, "attn_v.weight") {
if ftype == fsggml.FileTypeQ2_K { if (ftype == fsggml.FileTypeQ4_K_M) &&
if kv.GQA() >= 4 {
newType = fsggml.TensorTypeQ4_K
} else {
newType = fsggml.TensorTypeQ3_K
}
} else if ftype == fsggml.FileTypeQ2_K_S && kv.GQA() >= 4 {
newType = fsggml.TensorTypeQ4_K
} else if ftype == fsggml.FileTypeQ3_K_M {
if qs.iAttnV < 2 {
newType = fsggml.TensorTypeQ5_K
} else {
newType = fsggml.TensorTypeQ4_K
}
} else if ftype == fsggml.FileTypeQ3_K_L {
newType = fsggml.TensorTypeQ5_K
} else if (ftype == fsggml.FileTypeQ4_K_M || ftype == fsggml.FileTypeQ5_K_M) &&
useMoreBits(qs.iAttnV, qs.nAttnV) { useMoreBits(qs.iAttnV, qs.nAttnV) {
newType = fsggml.TensorTypeQ6_K newType = fsggml.TensorTypeQ6_K
} else if ftype == fsggml.FileTypeQ4_K_S && qs.iAttnV < 4 { } else if ftype == fsggml.FileTypeQ4_K_S && qs.iAttnV < 4 {
@ -114,54 +98,23 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
} else if strings.Contains(name, "ffn_down") { } else if strings.Contains(name, "ffn_down") {
iLayer := qs.iFfnDown iLayer := qs.iFfnDown
n_layer := qs.nFfnDown n_layer := qs.nFfnDown
if ftype == fsggml.FileTypeQ2_K { if ftype == fsggml.FileTypeQ4_K_M {
newType = fsggml.TensorTypeQ3_K
} else if ftype == fsggml.FileTypeQ2_K_S {
if iLayer < n_layer/8 {
newType = fsggml.TensorTypeQ4_K
}
} else if ftype == fsggml.FileTypeQ3_K_M {
if iLayer < n_layer/16 {
newType = fsggml.TensorTypeQ5_K
} else if useMoreBits(iLayer, n_layer) {
newType = fsggml.TensorTypeQ4_K
} else {
newType = fsggml.TensorTypeQ3_K
}
} else if ftype == fsggml.FileTypeQ3_K_L {
newType = fsggml.TensorTypeQ5_K
} else if ftype == fsggml.FileTypeQ4_K_M {
if useMoreBits(iLayer, n_layer) { if useMoreBits(iLayer, n_layer) {
newType = fsggml.TensorTypeQ6_K newType = fsggml.TensorTypeQ6_K
} }
} else if ftype == fsggml.FileTypeQ5_K_M && useMoreBits(iLayer, n_layer) {
newType = fsggml.TensorTypeQ6_K
} else if ftype == fsggml.FileTypeQ4_K_S && iLayer < n_layer/8 { } else if ftype == fsggml.FileTypeQ4_K_S && iLayer < n_layer/8 {
newType = fsggml.TensorTypeQ5_K newType = fsggml.TensorTypeQ5_K
} }
qs.iFfnDown++ qs.iFfnDown++
} else if strings.Contains(name, "attn_output.weight") { } else if strings.Contains(name, "attn_output.weight") {
if nExperts == 8 { if nExperts == 8 {
if ftype == fsggml.FileTypeQ2_K || ftype == fsggml.FileTypeQ3_K_S || ftype == fsggml.FileTypeQ3_K_M || if ftype == fsggml.FileTypeQ4_K_S || ftype == fsggml.FileTypeQ4_K_M {
ftype == fsggml.FileTypeQ4_K_S || ftype == fsggml.FileTypeQ4_K_M {
newType = fsggml.TensorTypeQ5_K
}
} else {
if ftype == fsggml.FileTypeQ2_K {
newType = fsggml.TensorTypeQ3_K
} else if ftype == fsggml.FileTypeQ3_K_M {
newType = fsggml.TensorTypeQ4_K
} else if ftype == fsggml.FileTypeQ3_K_L {
newType = fsggml.TensorTypeQ5_K newType = fsggml.TensorTypeQ5_K
} }
} }
} else if strings.Contains(name, "attn_qkv.weight") { } else if strings.Contains(name, "attn_qkv.weight") {
if ftype == fsggml.FileTypeQ3_K_M || ftype == fsggml.FileTypeQ3_K_L { if ftype == fsggml.FileTypeQ4_K_M {
newType = fsggml.TensorTypeQ4_K
} else if ftype == fsggml.FileTypeQ4_K_M {
newType = fsggml.TensorTypeQ5_K newType = fsggml.TensorTypeQ5_K
} else if ftype == fsggml.FileTypeQ5_K_M {
newType = fsggml.TensorTypeQ6_K
} }
} }

View File

@ -42,71 +42,6 @@ func TestGetTensorNewType(t *testing.T) {
ftype: fsggml.FileTypeF32, ftype: fsggml.FileTypeF32,
expected: fsggml.TensorTypeQ6_K, expected: fsggml.TensorTypeQ6_K,
}, },
{
name: "attn_v.weight_q4_k",
kv: map[string]any{
"general.architecture": "foo",
"foo.attention.head_count": uint32(4),
"foo.attention.head_count_kv": uint32(1),
},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_v.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K,
expected: fsggml.TensorTypeQ4_K,
},
{
name: "attn_v.weight_q3_k",
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_v.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K,
expected: fsggml.TensorTypeQ3_K,
},
{
name: "attn_v.weight_q2_k_s_q4_k",
kv: map[string]any{
"general.architecture": "foo",
"foo.attention.head_count": uint32(4),
"foo.attention.head_count_kv": uint32(1),
},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_v.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K_S,
expected: fsggml.TensorTypeQ4_K,
},
{
name: "attn_v.weight_q3_k_m",
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_v.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ5_K,
},
{
name: "attn_v.weight_q3_k_m_i",
qs: quantizeState{
iAttnV: 2,
},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_v.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ4_K,
},
{
name: "attn_v.weight_q3_k_l",
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_v.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_L,
expected: fsggml.TensorTypeQ5_K,
},
{ {
name: "attn_v.weight_q4_k_m", name: "attn_v.weight_q4_k_m",
qs: quantizeState{ qs: quantizeState{
@ -156,88 +91,6 @@ func TestGetTensorNewType(t *testing.T) {
ftype: fsggml.FileTypeF32, ftype: fsggml.FileTypeF32,
expected: fsggml.TensorTypeQ8_0, expected: fsggml.TensorTypeQ8_0,
}, },
{
name: "ffn_down_q2_k",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K,
expected: fsggml.TensorTypeQ3_K,
},
{
name: "ffn_down_q2_k_s",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K_S,
expected: fsggml.TensorTypeQ4_0,
},
{
name: "ffn_down_q2_k_s_layers",
qs: quantizeState{
iFfnDown: 2,
nFfnDown: 3 * 8,
},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K_S,
expected: fsggml.TensorTypeQ4_K,
},
{
name: "ffn_down_q3_k_m_base",
qs: quantizeState{
iFfnDown: 1,
nFfnDown: 8,
},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ3_K,
},
{
name: "ffn_down_q3_k_m_16",
qs: quantizeState{
iFfnDown: 2,
nFfnDown: 3 * 16,
},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ5_K,
},
{
name: "ffn_down_q3_k_m_8",
qs: quantizeState{
iFfnDown: 2,
nFfnDown: 3 * 8,
},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ4_K,
},
{
name: "ffn_down_q3_k_l",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_L,
expected: fsggml.TensorTypeQ5_K,
},
{ {
name: "ffn_down_q4_k_m", name: "ffn_down_q4_k_m",
qs: quantizeState{ qs: quantizeState{
@ -264,19 +117,6 @@ func TestGetTensorNewType(t *testing.T) {
ftype: fsggml.FileTypeQ4_K_M, ftype: fsggml.FileTypeQ4_K_M,
expected: fsggml.TensorTypeQ6_K, expected: fsggml.TensorTypeQ6_K,
}, },
{
name: "ffn_down_q5_k_m",
qs: quantizeState{
iFfnDown: 2,
nFfnDown: 3 * 8,
},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "ffn_down",
shape: []uint64{256},
ftype: fsggml.FileTypeQ5_K_M,
expected: fsggml.TensorTypeQ6_K,
},
{ {
name: "ffn_down_q4_k_s", name: "ffn_down_q4_k_s",
qs: quantizeState{ qs: quantizeState{
@ -290,59 +130,6 @@ func TestGetTensorNewType(t *testing.T) {
ftype: fsggml.FileTypeQ4_K_S, ftype: fsggml.FileTypeQ4_K_S,
expected: fsggml.TensorTypeQ5_K, expected: fsggml.TensorTypeQ5_K,
}, },
{
name: "attn_output.weight_8_expert",
qs: quantizeState{},
kv: map[string]any{
"general.architecture": "foo",
"foo.expert_count": uint32(8),
},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_output.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K,
expected: fsggml.TensorTypeQ5_K,
},
{
name: "attn_output.weight_q2",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_output.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ2_K,
expected: fsggml.TensorTypeQ3_K,
},
{
name: "attn_output.weight_q3_k_m",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_output.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ4_K,
},
{
name: "attn_output.weight_q3_k_l",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_output.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_L,
expected: fsggml.TensorTypeQ5_K,
},
{
name: "attn_qkv.weight_q3_k_m",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_qkv.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ3_K_M,
expected: fsggml.TensorTypeQ4_K,
},
{ {
name: "attn_qkv.weight_q4_k_m", name: "attn_qkv.weight_q4_k_m",
qs: quantizeState{}, qs: quantizeState{},
@ -353,16 +140,6 @@ func TestGetTensorNewType(t *testing.T) {
ftype: fsggml.FileTypeQ4_K_M, ftype: fsggml.FileTypeQ4_K_M,
expected: fsggml.TensorTypeQ5_K, expected: fsggml.TensorTypeQ5_K,
}, },
{
name: "attn_qkv.weight_q5_k_m",
qs: quantizeState{},
kv: map[string]any{},
newType: fsggml.TensorTypeQ4_0,
tensor_name: "blk.0.attn_qkv.weight",
shape: []uint64{256},
ftype: fsggml.FileTypeQ5_K_M,
expected: fsggml.TensorTypeQ6_K,
},
} }
for _, tt := range cases { for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {