diff --git a/model/models/llama4/model_text.go b/model/models/llama4/model_text.go index 3f9f578f1..46174cafa 100644 --- a/model/models/llama4/model_text.go +++ b/model/models/llama4/model_text.go @@ -31,8 +31,28 @@ func (sa *TextAttention) Forward(ctx ml.Context, hiddenStates, positions, attent value = value.Reshape(ctx, headDim, opts.numKVHeads, batchSize) if useRope { - query = query.RoPE(ctx, positions, sa.RopeFactors, uint32(opts.ropeDim), uint32(0), opts.ropeBase, opts.ropeScale) - key = key.RoPE(ctx, positions, sa.RopeFactors, uint32(opts.ropeDim), uint32(0), opts.ropeBase, opts.ropeScale) + query = query.RoPE( + ctx, + positions, + sa.RopeFactors, + ml.RoPEConfig{ + Dim: uint32(opts.ropeDim), + Type: ml.RopeTypeNormal, + Base: opts.ropeBase, + Scale: opts.ropeScale, + }, + ) + key = key.RoPE( + ctx, + positions, + sa.RopeFactors, + ml.RoPEConfig{ + Dim: uint32(opts.ropeDim), + Type: ml.RopeTypeNormal, + Base: opts.ropeBase, + Scale: opts.ropeScale, + }, + ) } if opts.useQKNorm { @@ -255,5 +275,15 @@ func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor } func (m *TextModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) { - return key.RoPE(ctx, shift, m.Layers[layer].Attention.RopeFactors, uint32(0), uint32(m.ropeDim), m.ropeBase, m.ropeScale), nil + return key.RoPE( + ctx, + shift, + m.Layers[layer].Attention.RopeFactors, + ml.RoPEConfig{ + Dim: uint32(m.TextOptions.ropeDim), + Type: ml.RopeTypeNormal, + Base: m.TextOptions.ropeBase, + Scale: m.TextOptions.ropeScale, + }, + ), nil } diff --git a/model/models/mllama/model_text.go b/model/models/mllama/model_text.go index dccb084d5..0ad300eba 100644 --- a/model/models/mllama/model_text.go +++ b/model/models/mllama/model_text.go @@ -240,7 +240,7 @@ func newTextModel(c fs.Config) *TextModel { numHeads: int(c.Uint("attention.head_count")), numKVHeads: int(c.Uint("attention.head_count_kv")), eps: c.Float("attention.layer_norm_rms_epsilon"), - crossAttentionLayers: c.Uints("attention.cross_attention_layers"), + crossAttentionLayers: c.Ints("attention.cross_attention_layers"), ropeConfig: ml.RoPEConfig{ Base: c.Float("rope.freq_base"), Scale: c.Float("rope.freq_scale", 1), diff --git a/model/models/qwen25vl/model_text.go b/model/models/qwen25vl/model_text.go index 3a10e3424..549cc139b 100644 --- a/model/models/qwen25vl/model_text.go +++ b/model/models/qwen25vl/model_text.go @@ -35,7 +35,7 @@ func NewTextModel(c fs.Config) *TextModel { c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`), &model.Vocabulary{ Values: c.Strings("tokenizer.ggml.tokens"), - Types: c.Uints("tokenizer.ggml.token_type"), + Types: c.Ints("tokenizer.ggml.token_type"), Merges: c.Strings("tokenizer.ggml.merges"), BOS: int32(c.Uint("tokenizer.ggml.bos_token_id")), AddBOS: c.Bool("tokenizer.ggml.add_bos_token", false), diff --git a/server/create.go b/server/create.go index 810322a79..41c8731cc 100644 --- a/server/create.go +++ b/server/create.go @@ -514,17 +514,34 @@ func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML } else if err != nil { return nil, err } - } - // Fallback to creating layer from file copy (either NewLayerFromLayer failed, or digest empty/n != stat.Size()) - if layer.Digest == "" { - layer, err = NewLayer(io.NewSectionReader(blob, 0, n), mediatype) - if err != nil { - return nil, err + mediatype := "application/vnd.ollama.image.model" + if f.KV().Kind() == "adapter" { + mediatype = "application/vnd.ollama.image.adapter" + } else if _, ok := f.KV()[fmt.Sprintf("%s.vision.block_count", f.KV().Architecture())]; ok || f.KV().Kind() == "projector" { + mediatype = "application/vnd.ollama.image.projector" } - } - layers = append(layers, &layerGGML{layer, f}) + var layer Layer + if digest != "" && n == stat.Size() && offset == 0 { + layer, err = NewLayerFromLayer(digest, mediatype, blob.Name()) + if err != nil { + slog.Debug("could not create new layer from layer", "error", err) + return nil, err + } + } + + // Fallback to creating layer from file copy (either NewLayerFromLayer failed, or digest empty/n != stat.Size()) + if layer.Digest == "" { + layer, err = NewLayer(io.NewSectionReader(blob, offset, n), mediatype) + if err != nil { + return nil, err + } + } + + layers = append(layers, &layerGGML{layer, f}) + offset = n + } return detectChatTemplate(layers) }