From 684f0d92917a40355a21680f793b0386c3f683e4 Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Mon, 12 May 2025 12:10:15 -0700 Subject: [PATCH] set default values for vision model in config --- convert/convert_qwen2.go | 2 +- convert/convert_qwen25vl.go | 17 +++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/convert/convert_qwen2.go b/convert/convert_qwen2.go index 48b92bbdd..3647c4e54 100644 --- a/convert/convert_qwen2.go +++ b/convert/convert_qwen2.go @@ -40,7 +40,7 @@ func (q *qwen2Model) KV(t *Tokenizer) ggml.KV { case "yarn": kv["qwen2.rope.scaling.type"] = q.RopeScaling.Type kv["qwen2.rope.scaling.factor"] = q.RopeScaling.Factor - case "mrope": + case "mrope", "default": kv["qwen2.rope.mrope_section"] = q.RopeScaling.MropeSection default: panic("unknown rope scaling type") diff --git a/convert/convert_qwen25vl.go b/convert/convert_qwen25vl.go index ff9becd8a..c0a20729b 100644 --- a/convert/convert_qwen25vl.go +++ b/convert/convert_qwen25vl.go @@ -39,19 +39,24 @@ func (q *qwen25VLModel) KV(t *Tokenizer) ggml.KV { } } - kv["qwen25vl.vision.block_count"] = q.VisionModel.Depth + if q.VisionModel.FullAttentionBlocks == nil { + kv["qwen25vl.vision.fullatt_block_indexes"] = []int32{7, 15, 23, 31} + } + + // Use cmp.Or directly in the KV assignments to provide defaults for missing values + kv["qwen25vl.vision.block_count"] = cmp.Or(q.VisionModel.Depth, 32) kv["qwen25vl.vision.embedding_length"] = q.VisionModel.HiddenSize - kv["qwen25vl.vision.attention.head_count"] = q.VisionModel.NumHeads + kv["qwen25vl.vision.attention.head_count"] = cmp.Or(q.VisionModel.NumHeads, 16) kv["qwen25vl.vision.num_channels"] = q.VisionModel.InChannels - kv["qwen25vl.vision.patch_size"] = q.VisionModel.PatchSize - kv["qwen25vl.vision.spatial_merge_size"] = q.VisionModel.SpatialMergeSize + kv["qwen25vl.vision.patch_size"] = cmp.Or(q.VisionModel.PatchSize, 14) + kv["qwen25vl.vision.spatial_merge_size"] = cmp.Or(q.VisionModel.SpatialMergeSize, 2) kv["qwen25vl.vision.spatial_patch_size"] = q.VisionModel.SpatialPatchSize - kv["qwen25vl.vision.window_size"] = q.VisionModel.WindowSize + kv["qwen25vl.vision.window_size"] = cmp.Or(q.VisionModel.WindowSize, 112) kv["qwen25vl.vision.attention.layer_norm_epsilon"] = cmp.Or(q.VisionModel.RMSNormEps, 1e-6) // RoPE theta increased from 1e4 to 1e5 to compensate for numerical differences between tensor operations; empirically produces better results. kv["qwen25vl.vision.rope.freq_base"] = cmp.Or(q.VisionModel.RopeTheta, 1e4) kv["qwen25vl.vision.fullatt_block_indexes"] = q.VisionModel.FullAttentionBlocks - kv["qwen25vl.vision.temporal_patch_size"] = q.VisionModel.TemporalPatchSize + kv["qwen25vl.vision.temporal_patch_size"] = cmp.Or(q.VisionModel.TemporalPatchSize, 2) return kv }