fix vocabulary (#10679)

2025-05-12 17:29:46 -07:00 · 2025-05-12 17:29:46 -07:00 · 526b2ed102
commit 526b2ed102
parent a7240c6d63
7 changed files with 28 additions and 26 deletions
--- a/model/models/gemma2/model.go
+++ b/model/models/gemma2/model.go
@ -45,6 +45,8 @@ func New(c fs.Config) (model.Model, error) {
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				BOS:    int32(c.Uint("tokenizer.ggml.bos_token_id")),
 				EOS:    int32(c.Uint("tokenizer.ggml.eos_token_id")),
 				// TODO: set EOT to EOS otherwise 0 will stop generation
 				EOT: int32(c.Uint("tokenizer.ggml.eos_token_id")),
 			},
 		),
 		Layers: make([]Layer, c.Uint("block_count")),
--- a/model/models/gemma3/model_text.go
+++ b/model/models/gemma3/model_text.go
@ -7,7 +7,6 @@ import (
 	"github.com/ollama/ollama/kvcache"
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/ml/nn"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
 )
@ -20,9 +19,6 @@ type TextConfig struct {
 }
 type TextModel struct {
 	model.Base
 	model.SentencePieceModel
 	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
 	Layers         []TextLayer   `gguf:"blk"`
 	OutputNorm     *nn.RMSNorm   `gguf:"output_norm"`
@ -45,15 +41,6 @@ func newTextModel(c fs.Config) *TextModel {
 	numBlocks := int(c.Uint("block_count"))
 	m := TextModel{
 		SentencePieceModel: model.NewSentencePieceModel(
 			&model.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Scores: c.Floats("tokenizer.ggml.scores"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				BOS:    int32(c.Uint("tokenizer.ggml.bos_token_id")),
 				EOS:    int32(c.Uint("tokenizer.ggml.eos_token_id")),
 			},
 		),
 		Layers: make([]TextLayer, numBlocks),
 		TextConfig: &TextConfig{
 			hiddenSize:     int(c.Uint("embedding_length")),
--- a/model/models/llama/model.go
+++ b/model/models/llama/model.go
@ -47,6 +47,9 @@ func New(c fs.Config) (model.Model, error) {
 				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
 				EOS:    int32(c.Uint("tokenizer.ggml.eos_token_id")),
 				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
 				// TODO: set EOT to EOS otherwise 0 will stop generation
 				EOT:    int32(c.Uint("tokenizer.ggml.eos_token_id")),
 				AddEOT: c.Bool("tokenizer.ggml.add_eos_token", false),
 			},
 		),
 		Layers: make([]Layer, c.Uint("block_count")),
--- a/model/models/llama4/model.go
+++ b/model/models/llama4/model.go
@ -45,6 +45,9 @@ func New(c fs.Config) (model.Model, error) {
 				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
 				EOS:    int32(c.Uint("tokenizer.ggml.eos_token_id")),
 				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
 				// TODO: set EOT to EOS otherwise 0 will stop generation
 				EOT:    int32(c.Uint("tokenizer.ggml.eos_token_id")),
 				AddEOT: c.Bool("tokenizer.ggml.add_eos_token", false),
 			},
 		),
 		ImageProcessor: newImageProcessor(c),
--- a/model/models/mistral3/model.go
+++ b/model/models/mistral3/model.go
@ -16,6 +16,8 @@ import (
 type Model struct {
 	model.Base
 	model.BytePairEncoding
 	*TextModel
 	*VisionModel         `gguf:"v,vision"`
 	*MultiModalProjector `gguf:"mm"`
@ -40,6 +42,21 @@ func New(c fs.Config) (model.Model, error) {
 		VisionModel:         newVisionModel(c),
 		ImageProcessor:      newImageProcessor(c),
 		MultiModalProjector: newMultiModalProjector(c),
 		BytePairEncoding: model.NewBytePairEncoding(
 			c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
 			&model.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
 				BOS:    int32(c.Uint("tokenizer.ggml.bos_token_id", 1)),
 				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
 				EOS:    int32(c.Uint("tokenizer.ggml.eos_token_id", 2)),
 				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
 				// TODO: set EOT to EOS otherwise 0 will stop generation
 				EOT:    int32(c.Uint("tokenizer.ggml.eos_token_id")),
 				AddEOT: c.Bool("tokenizer.ggml.add_eos_token", false),
 			},
 		),
 	}
 	m.Cache = kvcache.NewCausalCache(m.TextModel.Shift)
--- a/model/models/mistral3/model_text.go
+++ b/model/models/mistral3/model_text.go
@ -21,7 +21,6 @@ type TextOptions struct {
 type TextModel struct {
 	model.Base
 	model.BytePairEncoding
 	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
 	Layers         []Layer       `gguf:"blk"`
@ -148,18 +147,6 @@ func NewTextModel(c fs.Config) (*TextModel, error) {
 	}
 	textModel := &TextModel{
 		BytePairEncoding: model.NewBytePairEncoding(
 			c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
 			&model.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
 				BOS:    int32(c.Uint("tokenizer.ggml.bos_token_id", 1)),
 				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
 				EOS:    int32(c.Uint("tokenizer.ggml.eos_token_id", 2)),
 				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
 			},
 		),
 		Layers: make([]Layer, c.Uint("block_count")),
 		TextOptions: &TextOptions{
 			hiddenSize: int(c.Uint("embedding_length")),
--- a/model/models/mllama/model.go
+++ b/model/models/mllama/model.go
@ -49,6 +49,9 @@ func New(c fs.Config) (model.Model, error) {
 				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
 				EOS:    int32(c.Uint("tokenizer.ggml.eos_token_id")),
 				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
 				// TODO: set EOT to EOS otherwise 0 will stop generation
 				EOT:    int32(c.Uint("tokenizer.ggml.eos_token_id")),
 				AddEOT: c.Bool("tokenizer.ggml.add_eos_token", false),
 			},
 		),
 		ImageProcessor: newImageProcessor(c),