diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index 90d1d4406..007906472 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -379,7 +379,13 @@ func (f GGML) GraphSize(context, batch uint64, kvCacheType string) (kv, partialO embedding := f.KV().EmbeddingLength() heads := f.KV().HeadCount() headsKV := f.KV().HeadCountKV() - vocab := uint64(f.KV()["tokenizer.ggml.tokens"].(*array).size) + var vocab uint64 + if tokensArray, ok := f.KV()["tokenizer.ggml.tokens"].(*array); ok && tokensArray != nil { + vocab = uint64(tokensArray.size) + } else { + vocab = 1 // ignore vocab size if not found + slog.Warn("tokenizer.ggml.tokens not found or nil, using 0 for vocab size in size calculation") + } embeddingHeads := f.KV().EmbeddingHeadCount() embeddingHeadsK := f.KV().EmbeddingHeadCountK() diff --git a/fs/ggml/ggml_test.go b/fs/ggml/ggml_test.go index 4fcdf0854..a9730e904 100644 --- a/fs/ggml/ggml_test.go +++ b/fs/ggml/ggml_test.go @@ -9,6 +9,32 @@ import ( "github.com/google/go-cmp/cmp" ) +type ggufModel struct { + kv KV + tensors Tensors +} + +func (m *ggufModel) KV() KV { return m.kv } +func (m *ggufModel) Tensors() Tensors { return m.tensors } + +func TestGraphNoVocab(t *testing.T) { + g := &GGML{ + container: &containerGGUF{}, + model: &ggufModel{ + kv: KV{ + "general.architecture": "llama", + "block_count": uint32(1), + }, + tensors: Tensors{ + items: []*Tensor{}, + }, + }, + } + + // This should not panic + _, _, _ = g.GraphSize(1, 1, "f16") +} + func TestTensorLayers(t *testing.T) { tensors := make(map[string]*Tensor) for _, name := range []string{