From ab39e08eb974693808f5784d5d3d7d336e5ae526 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 11 Mar 2025 04:25:16 -0700 Subject: [PATCH] llm: auto detect models that require Ollama Engine (#1) --- fs/ggml/ggml.go | 4 ++++ llm/server.go | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index fe98a71b3..d32296d9c 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -133,6 +133,10 @@ func (kv KV) Floats(key string, defaultValue ...[]float32) []float32 { return s } +func (kv KV) OllamaEngineRequired() bool { + return kv.Architecture() == "gemma3" +} + func keyValue[T string | uint32 | uint64 | float32 | *array | bool](kv KV, key string, defaultValue ...T) T { if !strings.HasPrefix(key, "tokenizer.") && !strings.HasPrefix(key, "general.") { key = kv.Architecture() + "." + key diff --git a/llm/server.go b/llm/server.go index a53306fb0..c6f117125 100644 --- a/llm/server.go +++ b/llm/server.go @@ -271,7 +271,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a var llamaModel *llama.Model var textProcessor model.TextProcessor - if envconfig.NewEngine() { + if envconfig.NewEngine() || f.KV().OllamaEngineRequired() { textProcessor, err = model.NewTextProcessor(modelPath) if err != nil { // To prepare for opt-out mode, instead of treating this as an error, we fallback to the old runner