From ed14ce2db87ba5e34b293a312eb5237efa9b6761 Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Tue, 18 Mar 2025 14:10:16 -0700 Subject: [PATCH] convert mistral-3.1-2503 --- convert/convert.go | 4 +- convert/convert_mistral.go | 188 ++++++++++++++++++++-------------- convert/reader.go | 5 +- model/models/llama/model.go | 25 +---- model/models/mistral/model.go | 40 +++++--- parser/parser.go | 8 +- 6 files changed, 152 insertions(+), 118 deletions(-) diff --git a/convert/convert.go b/convert/convert.go index 695f5598a..b0a44d5aa 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -182,9 +182,9 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error { var conv ModelConverter switch p.Architectures[0] { - case "LlamaForCausalLM": + case "LlamaForCausalLM", "MistralForCausalLM": conv = &llamaModel{} - case "MistralForCausalLM": + case "Mistral3ForConditionalGeneration": conv = &mistralModel{} case "MixtralForCausalLM": conv = &mixtralModel{} diff --git a/convert/convert_mistral.go b/convert/convert_mistral.go index 8e881974d..dc3c80fce 100644 --- a/convert/convert_mistral.go +++ b/convert/convert_mistral.go @@ -14,20 +14,39 @@ import ( type mistralModel struct { ModelParameters - NLayers uint32 `json:"n_layers"` - NumHiddenLayers uint32 `json:"num_hidden_layers"` - NLayer uint32 `json:"n_layer"` - MaxPositionEmbeddings uint32 `json:"max_position_embeddings"` - NCtx uint32 `json:"n_ctx"` - HiddenSize uint32 `json:"hidden_size"` - NEmbd uint32 `json:"n_embd"` - IntermediateSize uint32 `json:"intermediate_size"` - NInner uint32 `json:"n_inner"` - NumAttentionHeads uint32 `json:"num_attention_heads"` - NHead uint32 `json:"n_head"` - NumKeyValueHeads uint32 `json:"num_key_value_heads"` - RopeTheta float32 `json:"rope_theta"` - RopeScaling struct { + // Text model parameters + TextConfig struct { + NumHiddenLayers uint32 `json:"num_hidden_layers"` + MaxPositionEmbeddings uint32 `json:"max_position_embeddings"` + HiddenSize uint32 `json:"hidden_size"` + IntermediateSize uint32 `json:"intermediate_size"` + NumAttentionHeads uint32 `json:"num_attention_heads"` + NumKeyValueHeads uint32 `json:"num_key_value_heads"` + RopeTheta float32 `json:"rope_theta"` + RMSNormEPS float32 `json:"rms_norm_eps"` + HeadDim uint32 `json:"head_dim"` + } `json:"text_config"` + + // Vision model parameters + VisionConfig struct { + NumHiddenLayers uint32 `json:"num_hidden_layers"` + HiddenSize uint32 `json:"hidden_size"` + IntermediateSize uint32 `json:"intermediate_size"` + NumAttentionHeads uint32 `json:"num_attention_heads"` + ImageSize uint32 `json:"image_size"` + PatchSize uint32 `json:"patch_size"` + RopeTheta float32 `json:"rope_theta"` + } `json:"vision_config"` + + // Multimodal specific parameters + ImageTokenIndex uint32 `json:"image_token_index"` + MultimodalProjectorBias bool `json:"multimodal_projector_bias"` + ProjectorHiddenAct string `json:"projector_hidden_act"` + SpatialMergeSize uint32 `json:"spatial_merge_size"` + VisionFeatureLayer int32 `json:"vision_feature_layer"` + + // For RoPE scaling if needed + RopeScaling struct { Type string `json:"type"` RopeType string `json:"rope_type"` Factor float32 `json:"factor"` @@ -37,44 +56,46 @@ type mistralModel struct { factors ropeFactor } `json:"rope_scaling"` - RMSNormEPS float32 `json:"rms_norm_eps"` - LayerNormEPS float32 `json:"layer_norm_eps"` - LayerNormEpsilon float32 `json:"layer_norm_epsilon"` - NormEpsilon float32 `json:"norm_epsilon"` - HeadDim uint32 `json:"head_dim"` } func (p *mistralModel) KV(t *Tokenizer) ggml.KV { kv := p.ModelParameters.KV(t) kv["general.architecture"] = "mistral" kv["mistral.vocab_size"] = p.VocabSize + kv["mistral.image_token_index"] = p.ImageTokenIndex + kv["mistral.multimodal_projector_bias"] = p.MultimodalProjectorBias + kv["mistral.projector_hidden_act"] = p.ProjectorHiddenAct + kv["mistral.spatial_merge_size"] = p.SpatialMergeSize + // kv["mistral.vision_feature_layer"] = p.VisionFeatureLayer - kv["mistral.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer) + // Text model config + kv["mistral.block_count"] = p.TextConfig.NumHiddenLayers + kv["mistral.context_length"] = p.TextConfig.MaxPositionEmbeddings + kv["mistral.embedding_length"] = p.TextConfig.HiddenSize + kv["mistral.feed_forward_length"] = p.TextConfig.IntermediateSize + kv["mistral.attention.head_count"] = p.TextConfig.NumAttentionHeads + kv["mistral.attention.head_count_kv"] = p.TextConfig.NumKeyValueHeads + kv["mistral.rope.dimension_count"] = p.TextConfig.HiddenSize / p.TextConfig.NumAttentionHeads + kv["mistral.rope.freq_base"] = p.TextConfig.RopeTheta + kv["mistral.attention.layer_norm_rms_epsilon"] = p.TextConfig.RMSNormEPS + kv["mistral.attention.key_length"] = p.TextConfig.HeadDim + kv["mistral.attention.value_length"] = p.TextConfig.HeadDim - if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 { - kv["mistral.context_length"] = contextLength - } - - if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 { - kv["mistral.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd) - } - - if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 { - kv["mistral.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner) - } - - kv["mistral.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead) - kv["mistral.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer) - - if p.RopeTheta > 0 { - kv["mistral.rope.freq_base"] = p.RopeTheta - } + // Vision model config + kv["mistral.vision.block_count"] = p.VisionConfig.NumHiddenLayers + kv["mistral.vision.embedding_length"] = p.VisionConfig.HiddenSize + kv["mistral.vision.feed_forward_length"] = p.VisionConfig.IntermediateSize + kv["mistral.vision.attention.head_count"] = p.VisionConfig.NumAttentionHeads + kv["mistral.vision.image_size"] = p.VisionConfig.ImageSize + kv["mistral.vision.patch_size"] = p.VisionConfig.PatchSize + kv["mistral.vision.rope.freq_base"] = p.VisionConfig.RopeTheta + // If RoPE scaling is present if p.RopeScaling.Type == "linear" { kv["mistral.rope.scaling.type"] = p.RopeScaling.Type kv["mistral.rope.scaling.factor"] = p.RopeScaling.Factor } else if p.RopeScaling.RopeType == "llama3" { - dim := p.HiddenSize / p.NumAttentionHeads + dim := p.TextConfig.HiddenSize / p.TextConfig.NumAttentionHeads for i := uint32(0); i < dim; i += 2 { factor := cmp.Or(p.RopeScaling.Factor, 8.0) factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0) @@ -84,7 +105,7 @@ func (p *mistralModel) KV(t *Tokenizer) ggml.KV { lambdaLow := float32(original) / factorLow lambdaHigh := float32(original) / factorHigh - lambda := 2 * math.Pi * math.Pow(float64(p.RopeTheta), float64(i)/float64(dim)) + lambda := 2 * math.Pi * math.Pow(float64(p.TextConfig.RopeTheta), float64(i)/float64(dim)) if lambda < float64(lambdaHigh) { p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0) } else if lambda > float64(lambdaLow) { @@ -96,23 +117,6 @@ func (p *mistralModel) KV(t *Tokenizer) ggml.KV { } } - if p.NumKeyValueHeads > 0 { - kv["mistral.attention.head_count_kv"] = p.NumKeyValueHeads - } - - if p.RMSNormEPS > 0 { - kv["mistral.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS - } - - if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 { - kv["mistral.attention.layer_norm_epsilon"] = layerNormEpsilon - } - - if p.HeadDim > 0 { - kv["mistral.attention.key_length"] = p.HeadDim - kv["mistral.attention.value_length"] = p.HeadDim - } - return kv } @@ -129,18 +133,13 @@ func (p *mistralModel) Tensors(ts []Tensor) []ggml.Tensor { } for _, t := range ts { + // Process tensors that require repacking if strings.HasSuffix(t.Name(), "attn_q.weight") || strings.HasSuffix(t.Name(), "attn_k.weight") { t.SetRepacker(p.repack) } - if strings.HasPrefix(t.Name(), "patch_merger.") || - strings.HasPrefix(t.Name(), "pre_mm_projector_output_norm.") || - strings.HasPrefix(t.Name(), "vision_encoder.") || - strings.HasPrefix(t.Name(), "vision_language_adapter.") { - continue - } - + // Add all tensors to output out = append(out, ggml.Tensor{ Name: t.Name(), Kind: t.Kind(), @@ -154,19 +153,42 @@ func (p *mistralModel) Tensors(ts []Tensor) []ggml.Tensor { func (p *mistralModel) Replacements() []string { return []string{ - "tok_embeddings", "token_embd", - "norm", "output_norm", - "layers", "blk", - "attention_norm", "attn_norm", - "attention.wq", "attn_q", - "attention.wk", "attn_k", - "attention.wv", "attn_v", - "attention.wo", "attn_output", - "feed_forward.w1", "ffn_gate", - "feed_forward.w2", "ffn_down", - "feed_forward.w3", "ffn_up", - "ffn_norm", "ffn_norm", - "output", "output", + // Language model replacements + "language_model.model.embed_tokens", "token_embd", + "language_model.model.norm", "output_norm", + "language_model.model.layers", "blk", + "language_model.model.layers.*.input_layernorm", "input_layernorm", + "language_model.model.layers.*.self_attn.q_proj", "self_attn.q_proj", + "language_model.model.layers.*.self_attn.k_proj", "self_attn.k_proj", + "language_model.model.layers.*.self_attn.v_proj", "self_attn.v_proj", + "language_model.model.layers.*.self_attn.o_proj", "self_attn.o_proj", + "language_model.model.layers.*.mlp.gate_proj", "mlp.gate_proj", + "language_model.model.layers.*.mlp.down_proj", "mlp.down_proj", + "language_model.model.layers.*.mlp.up_proj", "mlp.up_proj", + "language_model.model.layers.*.post_attention_layernorm", "post_attention_layernorm", + "language_model.lm_head", "output", + + // Vision model replacements - map to shorter prefixes + "vision_tower", "v", + "multi_modal_projector", "mm", + + // Vision transformer blocks - these should be updated accordingly + "vision_tower.transformer.layers", "v.blk", + "vision_tower.transformer.layers.*.attention_norm", "v.attn_norm", + "vision_tower.transformer.layers.*.attention.q_proj", "v.attn_q", + "vision_tower.transformer.layers.*.attention.k_proj", "v.attn_k", + "vision_tower.transformer.layers.*.attention.v_proj", "v.attn_v", + "vision_tower.transformer.layers.*.attention.o_proj", "v.attn_output", + "vision_tower.transformer.layers.*.feed_forward.gate_proj", "v.ffn_gate", + "vision_tower.transformer.layers.*.feed_forward.down_proj", "v.ffn_down", + "vision_tower.transformer.layers.*.feed_forward.up_proj", "v.ffn_up", + "vision_tower.transformer.layers.*.ffn_norm", "v.ffn_norm", + "vision_tower.ln_pre", "v.encoder_norm", + "vision_tower.patch_conv", "v.patch_conv", + + // Multimodal projector components + "multi_modal_projector.patch_merger", "mm.patch_merger", + "multi_modal_projector.norm", "mm.norm", } } @@ -178,9 +200,17 @@ func (p *mistralModel) repack(name string, data []float32, shape []uint64) ([]fl var heads uint32 if strings.HasSuffix(name, "attn_q.weight") { - heads = p.NumAttentionHeads + if strings.Contains(name, "vision") { + heads = p.VisionConfig.NumAttentionHeads + } else { + heads = p.TextConfig.NumAttentionHeads + } } else if strings.HasSuffix(name, "attn_k.weight") { - heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads) + if strings.Contains(name, "vision") { + heads = p.VisionConfig.NumAttentionHeads + } else { + heads = cmp.Or(p.TextConfig.NumKeyValueHeads, p.TextConfig.NumAttentionHeads) + } } else { return nil, fmt.Errorf("unknown tensor for repack: %s", name) } diff --git a/convert/reader.go b/convert/reader.go index 904b13a42..c1218e66d 100644 --- a/convert/reader.go +++ b/convert/reader.go @@ -62,7 +62,10 @@ func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) { Pattern string Func func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error) }{ - {"*.safetensors", parseSafetensors}, + {"model-*-of-*.safetensors", parseSafetensors}, + {"model.safetensors", parseSafetensors}, + {"adapters.safetensors", parseSafetensors}, + {"adapter_model.safetensors", parseSafetensors}, {"pytorch_model-*-of-*.bin", parseTorch}, {"pytorch_model.bin", parseTorch}, {"consolidated.*.pth", parseTorch}, diff --git a/model/models/llama/model.go b/model/models/llama/model.go index 47a88043e..19a2ab8c4 100644 --- a/model/models/llama/model.go +++ b/model/models/llama/model.go @@ -13,9 +13,9 @@ import ( ) type Options struct { - hiddenSize, numHeads, numKVHeads, headDim int - eps, ropeBase, ropeScale float32 - ropeDim uint32 + hiddenSize, numHeads, numKVHeads int + eps, ropeBase, ropeScale float32 + ropeDim uint32 } type Model struct { @@ -37,8 +37,6 @@ func New(c ml.Config) (model.Model, error) { m := Model{ BytePairEncoding: model.NewBytePairEncoding( - // TODO: need to set this in the conversion for mistral: - // tokenizer.ggml.pretokenizer = [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+ c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`), &model.Vocabulary{ Values: c.Strings("tokenizer.ggml.tokens"), @@ -55,7 +53,6 @@ func New(c ml.Config) (model.Model, error) { hiddenSize: int(c.Uint("embedding_length")), numHeads: int(c.Uint("attention.head_count")), numKVHeads: int(c.Uint("attention.head_count_kv")), - headDim: int(c.Uint("attention.key_length")), eps: c.Float("attention.layer_norm_rms_epsilon"), ropeBase: c.Float("rope.freq_base"), ropeScale: c.Float("rope.freq_scale", 1), @@ -78,36 +75,24 @@ type SelfAttention struct { func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor { batchSize := hiddenState.Dim(1) + headDim := opts.hiddenSize / opts.numHeads ropeType := uint32(0) - // Get head dimension - use explicit value if available, otherwise calculate - headDim := opts.headDim - if headDim == 0 { - headDim = opts.hiddenSize / opts.numHeads - } - // Query projection and reshape q := sa.Query.Forward(ctx, hiddenState) q = q.Reshape(ctx, headDim, opts.numHeads, batchSize) q = q.RoPE(ctx, positionIDs, sa.RopeFactors, opts.ropeDim, ropeType, opts.ropeBase, opts.ropeScale) - // Key projection and reshape k := sa.Key.Forward(ctx, hiddenState) k = k.Reshape(ctx, headDim, opts.numKVHeads, batchSize) k = k.RoPE(ctx, positionIDs, sa.RopeFactors, opts.ropeDim, ropeType, opts.ropeBase, opts.ropeScale) - // Value projection and reshape v := sa.Value.Forward(ctx, hiddenState) v = v.Reshape(ctx, headDim, opts.numKVHeads, batchSize) - // Attention computation scaleFactor := 1.0 / math.Sqrt(float64(headDim)) kqv := nn.Attention(ctx, q, k, v, scaleFactor, cache) + kqv = kqv.Reshape(ctx, opts.hiddenSize, batchSize) - // Reshape attention output for final projection - outputDim := headDim * opts.numHeads - kqv = kqv.Reshape(ctx, outputDim, batchSize) - - // Apply output projection return sa.Output.Forward(ctx, kqv) } diff --git a/model/models/mistral/model.go b/model/models/mistral/model.go index 9ebcff3c0..8bfd1a798 100644 --- a/model/models/mistral/model.go +++ b/model/models/mistral/model.go @@ -37,10 +37,7 @@ func New(c ml.Config) (model.Model, error) { m := Model{ BytePairEncoding: model.NewBytePairEncoding( - // TODO: need to set this in the conversion for mistral: - // tokenizer.ggml.pretokenizer = [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+ - c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`), - // c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`), + c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`), &model.Vocabulary{ Values: c.Strings("tokenizer.ggml.tokens"), Types: c.Uints("tokenizer.ggml.token_type"), @@ -64,16 +61,29 @@ func New(c ml.Config) (model.Model, error) { }, } + fmt.Println("Model Parameters:") + fmt.Printf(" model_type: %q\n", "gpt2") + fmt.Printf(" vocab_size: %d\n", len(c.Strings("tokenizer.ggml.tokens"))) + fmt.Printf(" hidden_size: %d\n", m.Options.hiddenSize) + fmt.Printf(" num_hidden_layers: %d\n", c.Uint("block_count")) + fmt.Printf(" num_attention_heads: %d\n", m.Options.numHeads) + fmt.Printf(" num_key_value_heads: %d\n", m.Options.numKVHeads) + fmt.Printf(" rms_norm_eps: %g\n", m.Options.eps) + fmt.Printf(" rope_theta: %g\n", m.Options.ropeBase) + fmt.Printf(" bos_token_id: %d\n", c.Uint("tokenizer.ggml.bos_token_id")) + fmt.Printf(" eos_token_id: %d\n", c.Uint("tokenizer.ggml.eos_token_id")) + fmt.Printf(" pad_token_id: %d\n", c.Uint("tokenizer.ggml.pad_token_id", 0)) + m.Cache = kvcache.NewCausalCache(m.Shift) return &m, nil } type SelfAttention struct { - Query *nn.Linear `gguf:"attn_q"` - Key *nn.Linear `gguf:"attn_k"` - Value *nn.Linear `gguf:"attn_v"` - Output *nn.Linear `gguf:"attn_output"` + Query *nn.Linear `gguf:"self_attn.q_proj"` + Key *nn.Linear `gguf:"self_attn.k_proj"` + Value *nn.Linear `gguf:"self_attn.v_proj"` + Output *nn.Linear `gguf:"self_attn.o_proj"` RopeFactors ml.Tensor `gguf:"rope_freqs.weight"` } @@ -117,9 +127,9 @@ func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tenso } type MLP struct { - Up *nn.Linear `gguf:"ffn_up"` - Down *nn.Linear `gguf:"ffn_down"` - Gate *nn.Linear `gguf:"ffn_gate"` + Up *nn.Linear `gguf:"mlp.up_proj"` + Down *nn.Linear `gguf:"mlp.down_proj"` + Gate *nn.Linear `gguf:"mlp.gate_proj"` } func (mlp *MLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *Options) ml.Tensor { @@ -128,9 +138,9 @@ func (mlp *MLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *Options) ml } type Layer struct { - AttentionNorm *nn.RMSNorm `gguf:"attn_norm"` + AttentionNorm *nn.RMSNorm `gguf:"input_layernorm"` SelfAttention *SelfAttention - MLPNorm *nn.RMSNorm `gguf:"ffn_norm"` + MLPNorm *nn.RMSNorm `gguf:"post_attention_layernorm"` MLP *MLP } @@ -171,6 +181,7 @@ func (m *Model) Forward(ctx ml.Context, opts input.Options) (ml.Tensor, error) { return nil, err } + // Get token embeddings hiddenState := m.TokenEmbedding.Forward(ctx, inputs) for i, layer := range m.Layers { @@ -184,7 +195,10 @@ func (m *Model) Forward(ctx ml.Context, opts input.Options) (ml.Tensor, error) { hiddenState = layer.Forward(ctx, hiddenState, positions, lastLayerOutputs, m.Cache, m.Options) } + // Apply output normalization hiddenState = m.OutputNorm.Forward(ctx, hiddenState, m.eps) + + // Apply output projection return m.Output.Forward(ctx, hiddenState), nil } diff --git a/parser/parser.go b/parser/parser.go index eb916cbfb..6832351fb 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -211,9 +211,7 @@ func filesForModel(path string) ([]string, error) { } var files []string - if st, _ := glob(filepath.Join(path, "consolidated.safetensors"), "application/octet-stream"); len(st) > 0 { - files = append(files, st...) - } else if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 { + if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 { // safetensors files might be unresolved git lfs references; skip if they are // covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors files = append(files, st...) @@ -224,6 +222,10 @@ func filesForModel(path string) ([]string, error) { // covers adapter_model.safetensors files = append(files, st...) } else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 { + // pytorch files might also be unresolved git lfs references; skip if they are + // covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin + files = append(files, pt...) + } else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/zip"); len(pt) > 0 { // pytorch files might also be unresolved git lfs references; skip if they are // covers consolidated.x.pth, consolidated.pth files = append(files, pt...)