diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go
index 90d1d4406..007906472 100644
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -379,7 +379,13 @@ func (f GGML) GraphSize(context, batch uint64, kvCacheType string) (kv, partialO
 	embedding := f.KV().EmbeddingLength()
 	heads := f.KV().HeadCount()
 	headsKV := f.KV().HeadCountKV()
-	vocab := uint64(f.KV()["tokenizer.ggml.tokens"].(*array).size)
+	var vocab uint64
+	if tokensArray, ok := f.KV()["tokenizer.ggml.tokens"].(*array); ok && tokensArray != nil {
+		vocab = uint64(tokensArray.size)
+	} else {
+		vocab = 1 // ignore vocab size if not found
+		slog.Warn("tokenizer.ggml.tokens not found or nil, using 0 for vocab size in size calculation")
+	}
 
 	embeddingHeads := f.KV().EmbeddingHeadCount()
 	embeddingHeadsK := f.KV().EmbeddingHeadCountK()
diff --git a/fs/ggml/ggml_test.go b/fs/ggml/ggml_test.go
index 4fcdf0854..a9730e904 100644
--- a/fs/ggml/ggml_test.go
+++ b/fs/ggml/ggml_test.go
@@ -9,6 +9,32 @@ import (
 	"github.com/google/go-cmp/cmp"
 )
 
+type ggufModel struct {
+	kv      KV
+	tensors Tensors
+}
+
+func (m *ggufModel) KV() KV           { return m.kv }
+func (m *ggufModel) Tensors() Tensors { return m.tensors }
+
+func TestGraphNoVocab(t *testing.T) {
+	g := &GGML{
+		container: &containerGGUF{},
+		model: &ggufModel{
+			kv: KV{
+				"general.architecture": "llama",
+				"block_count":          uint32(1),
+			},
+			tensors: Tensors{
+				items: []*Tensor{},
+			},
+		},
+	}
+
+	// This should not panic
+	_, _, _ = g.GraphSize(1, 1, "f16")
+}
+
 func TestTensorLayers(t *testing.T) {
 	tensors := make(map[string]*Tensor)
 	for _, name := range []string{