From 526b2ed10296cc3d1ae89121eedcbbbe257741a3 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 12 May 2025 17:29:46 -0700 Subject: [PATCH] fix vocabulary (#10679) --- model/models/gemma2/model.go | 2 ++ model/models/gemma3/model_text.go | 13 ------------- model/models/llama/model.go | 3 +++ model/models/llama4/model.go | 3 +++ model/models/mistral3/model.go | 17 +++++++++++++++++ model/models/mistral3/model_text.go | 13 ------------- model/models/mllama/model.go | 3 +++ 7 files changed, 28 insertions(+), 26 deletions(-) diff --git a/model/models/gemma2/model.go b/model/models/gemma2/model.go index d418f6827..3156b0068 100644 --- a/model/models/gemma2/model.go +++ b/model/models/gemma2/model.go @@ -45,6 +45,8 @@ func New(c fs.Config) (model.Model, error) { Types: c.Ints("tokenizer.ggml.token_type"), BOS: int32(c.Uint("tokenizer.ggml.bos_token_id")), EOS: int32(c.Uint("tokenizer.ggml.eos_token_id")), + // TODO: set EOT to EOS otherwise 0 will stop generation + EOT: int32(c.Uint("tokenizer.ggml.eos_token_id")), }, ), Layers: make([]Layer, c.Uint("block_count")), diff --git a/model/models/gemma3/model_text.go b/model/models/gemma3/model_text.go index c1e843d8f..741818a29 100644 --- a/model/models/gemma3/model_text.go +++ b/model/models/gemma3/model_text.go @@ -7,7 +7,6 @@ import ( "github.com/ollama/ollama/kvcache" "github.com/ollama/ollama/ml" "github.com/ollama/ollama/ml/nn" - "github.com/ollama/ollama/model" "github.com/ollama/ollama/model/input" ) @@ -20,9 +19,6 @@ type TextConfig struct { } type TextModel struct { - model.Base - model.SentencePieceModel - TokenEmbedding *nn.Embedding `gguf:"token_embd"` Layers []TextLayer `gguf:"blk"` OutputNorm *nn.RMSNorm `gguf:"output_norm"` @@ -45,15 +41,6 @@ func newTextModel(c fs.Config) *TextModel { numBlocks := int(c.Uint("block_count")) m := TextModel{ - SentencePieceModel: model.NewSentencePieceModel( - &model.Vocabulary{ - Values: c.Strings("tokenizer.ggml.tokens"), - Scores: c.Floats("tokenizer.ggml.scores"), - Types: c.Ints("tokenizer.ggml.token_type"), - BOS: int32(c.Uint("tokenizer.ggml.bos_token_id")), - EOS: int32(c.Uint("tokenizer.ggml.eos_token_id")), - }, - ), Layers: make([]TextLayer, numBlocks), TextConfig: &TextConfig{ hiddenSize: int(c.Uint("embedding_length")), diff --git a/model/models/llama/model.go b/model/models/llama/model.go index 3e5a54278..c75d7eb2f 100644 --- a/model/models/llama/model.go +++ b/model/models/llama/model.go @@ -47,6 +47,9 @@ func New(c fs.Config) (model.Model, error) { AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true), EOS: int32(c.Uint("tokenizer.ggml.eos_token_id")), AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false), + // TODO: set EOT to EOS otherwise 0 will stop generation + EOT: int32(c.Uint("tokenizer.ggml.eos_token_id")), + AddEOT: c.Bool("tokenizer.ggml.add_eos_token", false), }, ), Layers: make([]Layer, c.Uint("block_count")), diff --git a/model/models/llama4/model.go b/model/models/llama4/model.go index 632d313ec..798f0d162 100644 --- a/model/models/llama4/model.go +++ b/model/models/llama4/model.go @@ -45,6 +45,9 @@ func New(c fs.Config) (model.Model, error) { AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true), EOS: int32(c.Uint("tokenizer.ggml.eos_token_id")), AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false), + // TODO: set EOT to EOS otherwise 0 will stop generation + EOT: int32(c.Uint("tokenizer.ggml.eos_token_id")), + AddEOT: c.Bool("tokenizer.ggml.add_eos_token", false), }, ), ImageProcessor: newImageProcessor(c), diff --git a/model/models/mistral3/model.go b/model/models/mistral3/model.go index f749fdcd2..c96852441 100644 --- a/model/models/mistral3/model.go +++ b/model/models/mistral3/model.go @@ -16,6 +16,8 @@ import ( type Model struct { model.Base + model.BytePairEncoding + *TextModel *VisionModel `gguf:"v,vision"` *MultiModalProjector `gguf:"mm"` @@ -40,6 +42,21 @@ func New(c fs.Config) (model.Model, error) { VisionModel: newVisionModel(c), ImageProcessor: newImageProcessor(c), MultiModalProjector: newMultiModalProjector(c), + BytePairEncoding: model.NewBytePairEncoding( + c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`), + &model.Vocabulary{ + Values: c.Strings("tokenizer.ggml.tokens"), + Types: c.Ints("tokenizer.ggml.token_type"), + Merges: c.Strings("tokenizer.ggml.merges"), + BOS: int32(c.Uint("tokenizer.ggml.bos_token_id", 1)), + AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true), + EOS: int32(c.Uint("tokenizer.ggml.eos_token_id", 2)), + AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false), + // TODO: set EOT to EOS otherwise 0 will stop generation + EOT: int32(c.Uint("tokenizer.ggml.eos_token_id")), + AddEOT: c.Bool("tokenizer.ggml.add_eos_token", false), + }, + ), } m.Cache = kvcache.NewCausalCache(m.TextModel.Shift) diff --git a/model/models/mistral3/model_text.go b/model/models/mistral3/model_text.go index 1bf72acd8..565b001a7 100644 --- a/model/models/mistral3/model_text.go +++ b/model/models/mistral3/model_text.go @@ -21,7 +21,6 @@ type TextOptions struct { type TextModel struct { model.Base - model.BytePairEncoding TokenEmbedding *nn.Embedding `gguf:"token_embd"` Layers []Layer `gguf:"blk"` @@ -148,18 +147,6 @@ func NewTextModel(c fs.Config) (*TextModel, error) { } textModel := &TextModel{ - BytePairEncoding: model.NewBytePairEncoding( - c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`), - &model.Vocabulary{ - Values: c.Strings("tokenizer.ggml.tokens"), - Types: c.Ints("tokenizer.ggml.token_type"), - Merges: c.Strings("tokenizer.ggml.merges"), - BOS: int32(c.Uint("tokenizer.ggml.bos_token_id", 1)), - AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true), - EOS: int32(c.Uint("tokenizer.ggml.eos_token_id", 2)), - AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false), - }, - ), Layers: make([]Layer, c.Uint("block_count")), TextOptions: &TextOptions{ hiddenSize: int(c.Uint("embedding_length")), diff --git a/model/models/mllama/model.go b/model/models/mllama/model.go index 149876c9c..3fa26dedb 100644 --- a/model/models/mllama/model.go +++ b/model/models/mllama/model.go @@ -49,6 +49,9 @@ func New(c fs.Config) (model.Model, error) { AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true), EOS: int32(c.Uint("tokenizer.ggml.eos_token_id")), AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false), + // TODO: set EOT to EOS otherwise 0 will stop generation + EOT: int32(c.Uint("tokenizer.ggml.eos_token_id")), + AddEOT: c.Bool("tokenizer.ggml.add_eos_token", false), }, ), ImageProcessor: newImageProcessor(c),