ggml: check if vocab key is present to use in size estimate

Vocab is expected to be present when estimating graph size,
but we should not panic if its not found.
This commit is contained in:
Bruce MacDonald 2025-02-21 14:55:57 -08:00
parent f53f4198c3
commit 4a8c539c99
2 changed files with 33 additions and 1 deletions

View File

@ -379,7 +379,13 @@ func (f GGML) GraphSize(context, batch uint64, kvCacheType string) (kv, partialO
embedding := f.KV().EmbeddingLength()
heads := f.KV().HeadCount()
headsKV := f.KV().HeadCountKV()
vocab := uint64(f.KV()["tokenizer.ggml.tokens"].(*array).size)
var vocab uint64
if tokensArray, ok := f.KV()["tokenizer.ggml.tokens"].(*array); ok && tokensArray != nil {
vocab = uint64(tokensArray.size)
} else {
vocab = 1 // ignore vocab size if not found
slog.Warn("tokenizer.ggml.tokens not found or nil, using 0 for vocab size in size calculation")
}
embeddingHeads := f.KV().EmbeddingHeadCount()
embeddingHeadsK := f.KV().EmbeddingHeadCountK()

View File

@ -9,6 +9,32 @@ import (
"github.com/google/go-cmp/cmp"
)
type ggufModel struct {
kv KV
tensors Tensors
}
func (m *ggufModel) KV() KV { return m.kv }
func (m *ggufModel) Tensors() Tensors { return m.tensors }
func TestGraphNoVocab(t *testing.T) {
g := &GGML{
container: &containerGGUF{},
model: &ggufModel{
kv: KV{
"general.architecture": "llama",
"block_count": uint32(1),
},
tensors: Tensors{
items: []*Tensor{},
},
},
}
// This should not panic
_, _, _ = g.GraphSize(1, 1, "f16")
}
func TestTensorLayers(t *testing.T) {
tensors := make(map[string]*Tensor)
for _, name := range []string{