add comment explaining rope theta
This commit is contained in:
parent
45f96e898d
commit
75441c56f3
@ -48,7 +48,7 @@ func (q *qwen25VLModel) KV(t *Tokenizer) ggml.KV {
|
||||
kv["qwen25vl.vision.spatial_patch_size"] = q.VisionModel.SpatialPatchSize
|
||||
kv["qwen25vl.vision.window_size"] = q.VisionModel.WindowSize
|
||||
kv["qwen25vl.vision.attention.layer_norm_epsilon"] = cmp.Or(q.VisionModel.RMSNormEps, 1e-6)
|
||||
kv["qwen25vl.vision.rope.freq_base"] = cmp.Or(q.VisionModel.RopeTheta, 1e5)
|
||||
kv["qwen25vl.vision.rope.freq_base"] = cmp.Or(q.VisionModel.RopeTheta, 1e5) // note: other implementations use 1e4, but we have seen better results with 1e5
|
||||
kv["qwen25vl.vision.fullatt_block_indexes"] = q.VisionModel.FullAttentionBlocks
|
||||
kv["qwen25vl.vision.temporal_patch_size"] = q.VisionModel.TemporalPatchSize
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user