From 16b13e0cfcb359b0f73ecaf4e68a4a0529c7ce89 Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Fri, 2 May 2025 15:42:35 -0700 Subject: [PATCH] Revert "ropeTheta should be 1e5" This reverts commit cc1638b26763eae7daddd44e3975a885671ef9d3. This reverts commit b32385591307e2d33a8f43ce1626b529d2dac83e. --- convert/convert_qwen25vl.go | 2 +- model/models/qwen25vl/model_vision.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/convert/convert_qwen25vl.go b/convert/convert_qwen25vl.go index 12d061e55..bc21f629b 100644 --- a/convert/convert_qwen25vl.go +++ b/convert/convert_qwen25vl.go @@ -48,7 +48,7 @@ func (q *qwen25VLModel) KV(t *Tokenizer) ggml.KV { kv["qwen25vl.vision.spatial_patch_size"] = q.VisionModel.SpatialPatchSize kv["qwen25vl.vision.window_size"] = q.VisionModel.WindowSize kv["qwen25vl.vision.attention.layer_norm_epsilon"] = cmp.Or(q.VisionModel.RMSNormEps, 1e-6) - kv["qwen25vl.vision.rope.freq_base"] = cmp.Or(q.VisionModel.RopeTheta, 1e5) // note: other implementations use 1e4, but we have seen better results with 1e5 + kv["qwen25vl.vision.rope.freq_base"] = cmp.Or(q.VisionModel.RopeTheta, 1e4) kv["qwen25vl.vision.fullatt_block_indexes"] = q.VisionModel.FullAttentionBlocks kv["qwen25vl.vision.temporal_patch_size"] = q.VisionModel.TemporalPatchSize diff --git a/model/models/qwen25vl/model_vision.go b/model/models/qwen25vl/model_vision.go index 8b321f15e..2c839b3bd 100644 --- a/model/models/qwen25vl/model_vision.go +++ b/model/models/qwen25vl/model_vision.go @@ -360,7 +360,7 @@ func newVisionModel(c fs.Config) *VisionModel { numHeads := int(c.Uint("vision.attention.head_count", 16)) numChannels := int(c.Uint("vision.num_channels", 3)) eps := c.Float("vision.attention.layer_norm_epsilon", 1e-6) - ropeTheta := c.Float("vision.rope.freq_base", 100000.0) + ropeTheta := c.Float("vision.rope.freq_base", 10000.0) spatialMergeSize := int(c.Uint("vision.spatial_merge_size", 2)) windowSize := int(c.Uint("vision.window_size", 112)) fullAttnBlocks := c.Ints("qwen25vl.vision.fullatt_block_indexes", []int32{7, 15, 23, 31}) @@ -383,7 +383,7 @@ func newVisionModel(c fs.Config) *VisionModel { } for i := range fullAttnBlocks { - // full attention block indexes have to be converted to int for use with the slices package + // full attention block indexes have to be converted to int for use with the slices package model.fullAttnBlocks = append(model.fullAttnBlocks, int(fullAttnBlocks[i])) }