The quantization PR didn't block all unsupported file types, which this PR fixes. It also updates the API docs to reflect the now reduced set of supported types.
This commit is contained in:
parent
0cefd46f23
commit
9d6df90805
67
docs/api.md
67
docs/api.md
@ -19,7 +19,7 @@
|
||||
|
||||
### Model names
|
||||
|
||||
Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
|
||||
Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q8_0` and `llama3:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
|
||||
|
||||
### Durations
|
||||
|
||||
@ -952,19 +952,8 @@ If you are creating a model from a safetensors directory or from a GGUF file, yo
|
||||
|
||||
| Type | Recommended |
|
||||
| --- | :-: |
|
||||
| q2_K | |
|
||||
| q3_K_L | |
|
||||
| q3_K_M | |
|
||||
| q3_K_S | |
|
||||
| q4_0 | |
|
||||
| q4_1 | |
|
||||
| q4_K_M | * |
|
||||
| q4_K_S | |
|
||||
| q5_0 | |
|
||||
| q5_1 | |
|
||||
| q5_K_M | |
|
||||
| q5_K_S | |
|
||||
| q6_K | |
|
||||
| q8_0 | * |
|
||||
|
||||
### Examples
|
||||
@ -1009,8 +998,8 @@ Quantize a non-quantized model.
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/create -d '{
|
||||
"model": "llama3.1:quantized",
|
||||
"from": "llama3.1:8b-instruct-fp16",
|
||||
"model": "llama3.2:quantized",
|
||||
"from": "llama3.2:3b-instruct-fp16",
|
||||
"quantize": "q4_K_M"
|
||||
}'
|
||||
```
|
||||
@ -1020,12 +1009,14 @@ curl http://localhost:11434/api/create -d '{
|
||||
A stream of JSON objects is returned:
|
||||
|
||||
```json
|
||||
{"status":"quantizing F16 model to Q4_K_M"}
|
||||
{"status":"creating new layer sha256:667b0c1932bc6ffc593ed1d03f895bf2dc8dc6df21db3042284a6f4416b06a29"}
|
||||
{"status":"using existing layer sha256:11ce4ee3e170f6adebac9a991c22e22ab3f8530e154ee669954c4bc73061c258"}
|
||||
{"status":"using existing layer sha256:0ba8f0e314b4264dfd19df045cde9d4c394a52474bf92ed6a3de22a4ca31a177"}
|
||||
{"status":"quantizing F16 model to Q4_K_M","digest":"0","total":6433687776,"completed":12302}
|
||||
{"status":"quantizing F16 model to Q4_K_M","digest":"0","total":6433687776,"completed":6433687552}
|
||||
{"status":"verifying conversion"}
|
||||
{"status":"creating new layer sha256:fb7f4f211b89c6c4928ff4ddb73db9f9c0cfca3e000c3e40d6cf27ddc6ca72eb"}
|
||||
{"status":"using existing layer sha256:966de95ca8a62200913e3f8bfbf84c8494536f1b94b49166851e76644e966396"}
|
||||
{"status":"using existing layer sha256:fcc5a6bec9daf9b561a68827b67ab6088e1dba9d1fa2a50d7bbcc8384e0a265d"}
|
||||
{"status":"using existing layer sha256:a70ff7e570d97baaf4e62ac6e6ad9975e04caa6d900d3742d37698494479e0cd"}
|
||||
{"status":"using existing layer sha256:56bb8bd477a519ffa694fc449c2413c6f0e1d3b1c88fa7e3c9d88d3ae49d4dcb"}
|
||||
{"status":"creating new layer sha256:455f34728c9b5dd3376378bfb809ee166c145b0b4c1f1a6feca069055066ef9a"}
|
||||
{"status":"writing manifest"}
|
||||
{"status":"success"}
|
||||
```
|
||||
@ -1163,29 +1154,37 @@ A single JSON object will be returned.
|
||||
{
|
||||
"models": [
|
||||
{
|
||||
"name": "codellama:13b",
|
||||
"modified_at": "2023-11-04T14:56:49.277302595-07:00",
|
||||
"size": 7365960935,
|
||||
"digest": "9f438cb9cd581fc025612d27f7c1a6669ff83a8bb0ed86c94fcf4c5440555697",
|
||||
"name": "deepseek-r1:latest",
|
||||
"model": "deepseek-r1:latest",
|
||||
"modified_at": "2025-05-10T08:06:48.639712648-07:00",
|
||||
"size": 4683075271,
|
||||
"digest": "0a8c266910232fd3291e71e5ba1e058cc5af9d411192cf88b6d30e92b6e73163",
|
||||
"details": {
|
||||
"parent_model": "",
|
||||
"format": "gguf",
|
||||
"family": "llama",
|
||||
"families": null,
|
||||
"parameter_size": "13B",
|
||||
"quantization_level": "Q4_0"
|
||||
"family": "qwen2",
|
||||
"families": [
|
||||
"qwen2"
|
||||
],
|
||||
"parameter_size": "7.6B",
|
||||
"quantization_level": "Q4_K_M"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "llama3:latest",
|
||||
"modified_at": "2023-12-07T09:32:18.757212583-08:00",
|
||||
"size": 3825819519,
|
||||
"digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e",
|
||||
"name": "llama3.2:latest",
|
||||
"model": "llama3.2:latest",
|
||||
"modified_at": "2025-05-04T17:37:44.706015396-07:00",
|
||||
"size": 2019393189,
|
||||
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
|
||||
"details": {
|
||||
"parent_model": "",
|
||||
"format": "gguf",
|
||||
"family": "llama",
|
||||
"families": null,
|
||||
"parameter_size": "7B",
|
||||
"quantization_level": "Q4_0"
|
||||
"families": [
|
||||
"llama"
|
||||
],
|
||||
"parameter_size": "3.2B",
|
||||
"quantization_level": "Q4_K_M"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
125
fs/ggml/type.go
125
fs/ggml/type.go
@ -12,42 +12,42 @@ type FileType uint32
|
||||
const (
|
||||
FileTypeF32 FileType = iota
|
||||
FileTypeF16
|
||||
FileTypeQ4_0
|
||||
FileTypeQ4_1
|
||||
fileTypeQ4_0
|
||||
fileTypeQ4_1
|
||||
fileTypeQ4_1_F16 // unused by GGML
|
||||
fileTypeQ4_2 // unused by GGML
|
||||
fileTypeQ4_3 // unused by GGML
|
||||
FileTypeQ8_0
|
||||
FileTypeQ5_0
|
||||
FileTypeQ5_1
|
||||
FileTypeQ2_K
|
||||
FileTypeQ3_K_S
|
||||
FileTypeQ3_K_M
|
||||
FileTypeQ3_K_L
|
||||
fileTypeQ5_0
|
||||
fileTypeQ5_1
|
||||
fileTypeQ2_K
|
||||
fileTypeQ3_K_S
|
||||
fileTypeQ3_K_M
|
||||
fileTypeQ3_K_L
|
||||
FileTypeQ4_K_S
|
||||
FileTypeQ4_K_M
|
||||
FileTypeQ5_K_S
|
||||
FileTypeQ5_K_M
|
||||
FileTypeQ6_K
|
||||
fileTypeIQ2_XXS // not supported by ollama
|
||||
fileTypeIQ2_XS // not supported by ollama
|
||||
FileTypeQ2_K_S
|
||||
fileTypeIQ3_XS // not supported by ollama
|
||||
fileTypeIQ3_XXS // not supported by ollama
|
||||
fileTypeIQ1_S // not supported by ollama
|
||||
fileTypeIQ4_NL // not supported by ollama
|
||||
fileTypeIQ3_S // not supported by ollama
|
||||
fileTypeIQ3_M // not supported by ollama
|
||||
fileTypeIQ2_S // not supported by ollama
|
||||
fileTypeIQ2_M // not supported by ollama
|
||||
fileTypeIQ4_XS // not supported by ollama
|
||||
fileTypeIQ1_M // not supported by ollama
|
||||
fileTypeQ5_K_S
|
||||
fileTypeQ5_K_M
|
||||
fileTypeQ6_K
|
||||
fileTypeIQ2_XXS
|
||||
fileTypeIQ2_XS
|
||||
fileTypeQ2_K_S
|
||||
fileTypeIQ3_XS
|
||||
fileTypeIQ3_XXS
|
||||
fileTypeIQ1_S
|
||||
fileTypeIQ4_NL
|
||||
fileTypeIQ3_S
|
||||
fileTypeIQ3_M
|
||||
fileTypeIQ2_S
|
||||
fileTypeIQ2_M
|
||||
fileTypeIQ4_XS
|
||||
fileTypeIQ1_M
|
||||
FileTypeBF16
|
||||
fileTypeQ4_0_4_4 // unused by GGML
|
||||
fileTypeQ4_0_4_8 // unused by GGML
|
||||
fileTypeQ4_0_8_8 // unused by GGML
|
||||
fileTypeTQ1_0 // not supported by ollama
|
||||
fileTypeTQ2_0 // not supported by ollama
|
||||
fileTypeTQ1_0
|
||||
fileTypeTQ2_0
|
||||
|
||||
FileTypeUnknown = 1024
|
||||
)
|
||||
@ -60,36 +60,12 @@ func ParseFileType(s string) (FileType, error) {
|
||||
return FileTypeF32, nil
|
||||
case "F16":
|
||||
return FileTypeF16, nil
|
||||
case "Q4_0":
|
||||
return FileTypeQ4_0, nil
|
||||
case "Q4_1":
|
||||
return FileTypeQ4_1, nil
|
||||
case "Q8_0":
|
||||
return FileTypeQ8_0, nil
|
||||
case "Q5_0":
|
||||
return FileTypeQ5_0, nil
|
||||
case "Q5_1":
|
||||
return FileTypeQ5_1, nil
|
||||
case "Q2_K":
|
||||
return FileTypeQ2_K, nil
|
||||
case "Q3_K_S":
|
||||
return FileTypeQ3_K_S, nil
|
||||
case "Q3_K_M":
|
||||
return FileTypeQ3_K_M, nil
|
||||
case "Q3_K_L":
|
||||
return FileTypeQ3_K_L, nil
|
||||
case "Q4_K_S":
|
||||
return FileTypeQ4_K_S, nil
|
||||
case "Q4_K_M", "Q4_K":
|
||||
return FileTypeQ4_K_M, nil
|
||||
case "Q5_K_S":
|
||||
return FileTypeQ5_K_S, nil
|
||||
case "Q5_K_M", "Q5_K":
|
||||
return FileTypeQ5_K_M, nil
|
||||
case "Q6_K":
|
||||
return FileTypeQ6_K, nil
|
||||
case "Q2_K_S":
|
||||
return FileTypeQ2_K_S, nil
|
||||
case "BF16":
|
||||
return FileTypeBF16, nil
|
||||
default:
|
||||
@ -111,40 +87,41 @@ func ParseFileType(s string) (FileType, error) {
|
||||
}
|
||||
|
||||
func (t FileType) String() string {
|
||||
// Note: this routine will return a broader set of file types for existing models
|
||||
switch t {
|
||||
case FileTypeF32:
|
||||
return "F32"
|
||||
case FileTypeF16:
|
||||
return "F16"
|
||||
case FileTypeQ4_0:
|
||||
case fileTypeQ4_0:
|
||||
return "Q4_0"
|
||||
case FileTypeQ4_1:
|
||||
case fileTypeQ4_1:
|
||||
return "Q4_1"
|
||||
case FileTypeQ8_0:
|
||||
return "Q8_0"
|
||||
case FileTypeQ5_0:
|
||||
case fileTypeQ5_0:
|
||||
return "Q5_0"
|
||||
case FileTypeQ5_1:
|
||||
case fileTypeQ5_1:
|
||||
return "Q5_1"
|
||||
case FileTypeQ2_K:
|
||||
case fileTypeQ2_K:
|
||||
return "Q2_K"
|
||||
case FileTypeQ3_K_S:
|
||||
case fileTypeQ3_K_S:
|
||||
return "Q3_K_S"
|
||||
case FileTypeQ3_K_M:
|
||||
case fileTypeQ3_K_M:
|
||||
return "Q3_K_M"
|
||||
case FileTypeQ3_K_L:
|
||||
case fileTypeQ3_K_L:
|
||||
return "Q3_K_L"
|
||||
case FileTypeQ4_K_S:
|
||||
return "Q4_K_S"
|
||||
case FileTypeQ4_K_M:
|
||||
return "Q4_K_M"
|
||||
case FileTypeQ5_K_S:
|
||||
case fileTypeQ5_K_S:
|
||||
return "Q5_K_S"
|
||||
case FileTypeQ5_K_M:
|
||||
case fileTypeQ5_K_M:
|
||||
return "Q5_K_M"
|
||||
case FileTypeQ6_K:
|
||||
case fileTypeQ6_K:
|
||||
return "Q6_K"
|
||||
case FileTypeQ2_K_S:
|
||||
case fileTypeQ2_K_S:
|
||||
return "Q2_K_S"
|
||||
case FileTypeBF16:
|
||||
return "BF16"
|
||||
@ -163,35 +140,35 @@ func (ftype FileType) ToTensorType() TensorType {
|
||||
return TensorTypeF32
|
||||
case FileTypeF16:
|
||||
return TensorTypeF16
|
||||
case FileTypeQ4_0:
|
||||
case fileTypeQ4_0:
|
||||
return TensorTypeQ4_0
|
||||
case FileTypeQ4_1:
|
||||
case fileTypeQ4_1:
|
||||
return TensorTypeQ4_1
|
||||
case FileTypeQ8_0:
|
||||
return TensorTypeQ8_0
|
||||
case FileTypeQ5_0:
|
||||
case fileTypeQ5_0:
|
||||
return TensorTypeQ5_0
|
||||
case FileTypeQ5_1:
|
||||
case fileTypeQ5_1:
|
||||
return TensorTypeQ5_1
|
||||
case FileTypeQ2_K:
|
||||
case fileTypeQ2_K:
|
||||
return TensorTypeQ2_K
|
||||
case FileTypeQ3_K_S:
|
||||
case fileTypeQ3_K_S:
|
||||
return TensorTypeQ3_K
|
||||
case FileTypeQ3_K_M:
|
||||
case fileTypeQ3_K_M:
|
||||
return TensorTypeQ3_K
|
||||
case FileTypeQ3_K_L:
|
||||
case fileTypeQ3_K_L:
|
||||
return TensorTypeQ3_K
|
||||
case FileTypeQ4_K_S:
|
||||
return TensorTypeQ4_K
|
||||
case FileTypeQ4_K_M:
|
||||
return TensorTypeQ4_K
|
||||
case FileTypeQ5_K_S:
|
||||
case fileTypeQ5_K_S:
|
||||
return TensorTypeQ5_K
|
||||
case FileTypeQ5_K_M:
|
||||
case fileTypeQ5_K_M:
|
||||
return TensorTypeQ5_K
|
||||
case FileTypeQ6_K:
|
||||
case fileTypeQ6_K:
|
||||
return TensorTypeQ6_K
|
||||
case FileTypeQ2_K_S:
|
||||
case fileTypeQ2_K_S:
|
||||
return TensorTypeQ2_K
|
||||
case FileTypeBF16:
|
||||
return TensorTypeBF16
|
||||
|
@ -70,23 +70,7 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
|
||||
newType = fsggml.TensorTypeQ6_K
|
||||
}
|
||||
} else if strings.Contains(name, "attn_v.weight") {
|
||||
if ftype == fsggml.FileTypeQ2_K {
|
||||
if kv.GQA() >= 4 {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
} else {
|
||||
newType = fsggml.TensorTypeQ3_K
|
||||
}
|
||||
} else if ftype == fsggml.FileTypeQ2_K_S && kv.GQA() >= 4 {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
} else if ftype == fsggml.FileTypeQ3_K_M {
|
||||
if qs.iAttnV < 2 {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
} else {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
}
|
||||
} else if ftype == fsggml.FileTypeQ3_K_L {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
} else if (ftype == fsggml.FileTypeQ4_K_M || ftype == fsggml.FileTypeQ5_K_M) &&
|
||||
if (ftype == fsggml.FileTypeQ4_K_M) &&
|
||||
useMoreBits(qs.iAttnV, qs.nAttnV) {
|
||||
newType = fsggml.TensorTypeQ6_K
|
||||
} else if ftype == fsggml.FileTypeQ4_K_S && qs.iAttnV < 4 {
|
||||
@ -114,54 +98,23 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
|
||||
} else if strings.Contains(name, "ffn_down") {
|
||||
iLayer := qs.iFfnDown
|
||||
n_layer := qs.nFfnDown
|
||||
if ftype == fsggml.FileTypeQ2_K {
|
||||
newType = fsggml.TensorTypeQ3_K
|
||||
} else if ftype == fsggml.FileTypeQ2_K_S {
|
||||
if iLayer < n_layer/8 {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
}
|
||||
} else if ftype == fsggml.FileTypeQ3_K_M {
|
||||
if iLayer < n_layer/16 {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
} else if useMoreBits(iLayer, n_layer) {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
} else {
|
||||
newType = fsggml.TensorTypeQ3_K
|
||||
}
|
||||
} else if ftype == fsggml.FileTypeQ3_K_L {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
} else if ftype == fsggml.FileTypeQ4_K_M {
|
||||
if ftype == fsggml.FileTypeQ4_K_M {
|
||||
if useMoreBits(iLayer, n_layer) {
|
||||
newType = fsggml.TensorTypeQ6_K
|
||||
}
|
||||
} else if ftype == fsggml.FileTypeQ5_K_M && useMoreBits(iLayer, n_layer) {
|
||||
newType = fsggml.TensorTypeQ6_K
|
||||
} else if ftype == fsggml.FileTypeQ4_K_S && iLayer < n_layer/8 {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
}
|
||||
qs.iFfnDown++
|
||||
} else if strings.Contains(name, "attn_output.weight") {
|
||||
if nExperts == 8 {
|
||||
if ftype == fsggml.FileTypeQ2_K || ftype == fsggml.FileTypeQ3_K_S || ftype == fsggml.FileTypeQ3_K_M ||
|
||||
ftype == fsggml.FileTypeQ4_K_S || ftype == fsggml.FileTypeQ4_K_M {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
}
|
||||
} else {
|
||||
if ftype == fsggml.FileTypeQ2_K {
|
||||
newType = fsggml.TensorTypeQ3_K
|
||||
} else if ftype == fsggml.FileTypeQ3_K_M {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
} else if ftype == fsggml.FileTypeQ3_K_L {
|
||||
if ftype == fsggml.FileTypeQ4_K_S || ftype == fsggml.FileTypeQ4_K_M {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
}
|
||||
}
|
||||
} else if strings.Contains(name, "attn_qkv.weight") {
|
||||
if ftype == fsggml.FileTypeQ3_K_M || ftype == fsggml.FileTypeQ3_K_L {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
} else if ftype == fsggml.FileTypeQ4_K_M {
|
||||
if ftype == fsggml.FileTypeQ4_K_M {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
} else if ftype == fsggml.FileTypeQ5_K_M {
|
||||
newType = fsggml.TensorTypeQ6_K
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -42,71 +42,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeF32,
|
||||
expected: fsggml.TensorTypeQ6_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q4_k",
|
||||
kv: map[string]any{
|
||||
"general.architecture": "foo",
|
||||
"foo.attention.head_count": uint32(4),
|
||||
"foo.attention.head_count_kv": uint32(1),
|
||||
},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q3_k",
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ3_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q2_k_s_q4_k",
|
||||
kv: map[string]any{
|
||||
"general.architecture": "foo",
|
||||
"foo.attention.head_count": uint32(4),
|
||||
"foo.attention.head_count_kv": uint32(1),
|
||||
},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K_S,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q3_k_m",
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q3_k_m_i",
|
||||
qs: quantizeState{
|
||||
iAttnV: 2,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q3_k_l",
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_L,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q4_k_m",
|
||||
qs: quantizeState{
|
||||
@ -156,88 +91,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeF32,
|
||||
expected: fsggml.TensorTypeQ8_0,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q2_k",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ3_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q2_k_s",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K_S,
|
||||
expected: fsggml.TensorTypeQ4_0,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q2_k_s_layers",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 2,
|
||||
nFfnDown: 3 * 8,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K_S,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q3_k_m_base",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 1,
|
||||
nFfnDown: 8,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ3_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q3_k_m_16",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 2,
|
||||
nFfnDown: 3 * 16,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q3_k_m_8",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 2,
|
||||
nFfnDown: 3 * 8,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q3_k_l",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_L,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q4_k_m",
|
||||
qs: quantizeState{
|
||||
@ -264,19 +117,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeQ4_K_M,
|
||||
expected: fsggml.TensorTypeQ6_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q5_k_m",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 2,
|
||||
nFfnDown: 3 * 8,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ5_K_M,
|
||||
expected: fsggml.TensorTypeQ6_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q4_k_s",
|
||||
qs: quantizeState{
|
||||
@ -290,59 +130,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeQ4_K_S,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_output.weight_8_expert",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{
|
||||
"general.architecture": "foo",
|
||||
"foo.expert_count": uint32(8),
|
||||
},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_output.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_output.weight_q2",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_output.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ3_K,
|
||||
},
|
||||
{
|
||||
name: "attn_output.weight_q3_k_m",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_output.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_output.weight_q3_k_l",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_output.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_L,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_qkv.weight_q3_k_m",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_qkv.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_qkv.weight_q4_k_m",
|
||||
qs: quantizeState{},
|
||||
@ -353,16 +140,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeQ4_K_M,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_qkv.weight_q5_k_m",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_qkv.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ5_K_M,
|
||||
expected: fsggml.TensorTypeQ6_K,
|
||||
},
|
||||
}
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user