diff --git a/model/process_text_spm.go b/model/process_text_spm.go index 9d61746a2..68e3ed015 100644 --- a/model/process_text_spm.go +++ b/model/process_text_spm.go @@ -169,6 +169,10 @@ func (spm SentencePieceModel) Encode(s string, addSpecial bool) ([]int32, error) continue } + if id := spm.vocab.Encode(string(left.runes) + string(right.runes)); id < 0 { + continue + } + merges[pair.a].runes = append(left.runes, right.runes...) merges[pair.b].runes = nil merges[pair.a].n = right.n