diff --git a/convert/tokenizer_spm.go b/convert/tokenizer_spm.go index d8a012c08..340c3d581 100644 --- a/convert/tokenizer_spm.go +++ b/convert/tokenizer_spm.go @@ -47,6 +47,12 @@ func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) { v.Types = append(v.Types, int32(t)) default: tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL) + + // temporary fix to handle gemma3 broken configs + if slices.Contains([]string{"", ""}, piece.GetPiece()) { + tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL) + } + for _, t := range ast { if t.Content == piece.GetPiece() { tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)