Revert "ropeTheta should be 1e5"
This reverts commit cc1638b26763eae7daddd44e3975a885671ef9d3. This reverts commit b32385591307e2d33a8f43ce1626b529d2dac83e.
This commit is contained in:
parent
75441c56f3
commit
16b13e0cfc
@ -48,7 +48,7 @@ func (q *qwen25VLModel) KV(t *Tokenizer) ggml.KV {
|
|||||||
kv["qwen25vl.vision.spatial_patch_size"] = q.VisionModel.SpatialPatchSize
|
kv["qwen25vl.vision.spatial_patch_size"] = q.VisionModel.SpatialPatchSize
|
||||||
kv["qwen25vl.vision.window_size"] = q.VisionModel.WindowSize
|
kv["qwen25vl.vision.window_size"] = q.VisionModel.WindowSize
|
||||||
kv["qwen25vl.vision.attention.layer_norm_epsilon"] = cmp.Or(q.VisionModel.RMSNormEps, 1e-6)
|
kv["qwen25vl.vision.attention.layer_norm_epsilon"] = cmp.Or(q.VisionModel.RMSNormEps, 1e-6)
|
||||||
kv["qwen25vl.vision.rope.freq_base"] = cmp.Or(q.VisionModel.RopeTheta, 1e5) // note: other implementations use 1e4, but we have seen better results with 1e5
|
kv["qwen25vl.vision.rope.freq_base"] = cmp.Or(q.VisionModel.RopeTheta, 1e4)
|
||||||
kv["qwen25vl.vision.fullatt_block_indexes"] = q.VisionModel.FullAttentionBlocks
|
kv["qwen25vl.vision.fullatt_block_indexes"] = q.VisionModel.FullAttentionBlocks
|
||||||
kv["qwen25vl.vision.temporal_patch_size"] = q.VisionModel.TemporalPatchSize
|
kv["qwen25vl.vision.temporal_patch_size"] = q.VisionModel.TemporalPatchSize
|
||||||
|
|
||||||
|
@ -360,7 +360,7 @@ func newVisionModel(c fs.Config) *VisionModel {
|
|||||||
numHeads := int(c.Uint("vision.attention.head_count", 16))
|
numHeads := int(c.Uint("vision.attention.head_count", 16))
|
||||||
numChannels := int(c.Uint("vision.num_channels", 3))
|
numChannels := int(c.Uint("vision.num_channels", 3))
|
||||||
eps := c.Float("vision.attention.layer_norm_epsilon", 1e-6)
|
eps := c.Float("vision.attention.layer_norm_epsilon", 1e-6)
|
||||||
ropeTheta := c.Float("vision.rope.freq_base", 100000.0)
|
ropeTheta := c.Float("vision.rope.freq_base", 10000.0)
|
||||||
spatialMergeSize := int(c.Uint("vision.spatial_merge_size", 2))
|
spatialMergeSize := int(c.Uint("vision.spatial_merge_size", 2))
|
||||||
windowSize := int(c.Uint("vision.window_size", 112))
|
windowSize := int(c.Uint("vision.window_size", 112))
|
||||||
fullAttnBlocks := c.Ints("qwen25vl.vision.fullatt_block_indexes", []int32{7, 15, 23, 31})
|
fullAttnBlocks := c.Ints("qwen25vl.vision.fullatt_block_indexes", []int32{7, 15, 23, 31})
|
||||||
|
Loading…
x
Reference in New Issue
Block a user