set default values for vision model in config
This commit is contained in:
parent
3308bff137
commit
684f0d9291
@ -40,7 +40,7 @@ func (q *qwen2Model) KV(t *Tokenizer) ggml.KV {
|
||||
case "yarn":
|
||||
kv["qwen2.rope.scaling.type"] = q.RopeScaling.Type
|
||||
kv["qwen2.rope.scaling.factor"] = q.RopeScaling.Factor
|
||||
case "mrope":
|
||||
case "mrope", "default":
|
||||
kv["qwen2.rope.mrope_section"] = q.RopeScaling.MropeSection
|
||||
default:
|
||||
panic("unknown rope scaling type")
|
||||
|
@ -39,19 +39,24 @@ func (q *qwen25VLModel) KV(t *Tokenizer) ggml.KV {
|
||||
}
|
||||
}
|
||||
|
||||
kv["qwen25vl.vision.block_count"] = q.VisionModel.Depth
|
||||
if q.VisionModel.FullAttentionBlocks == nil {
|
||||
kv["qwen25vl.vision.fullatt_block_indexes"] = []int32{7, 15, 23, 31}
|
||||
}
|
||||
|
||||
// Use cmp.Or directly in the KV assignments to provide defaults for missing values
|
||||
kv["qwen25vl.vision.block_count"] = cmp.Or(q.VisionModel.Depth, 32)
|
||||
kv["qwen25vl.vision.embedding_length"] = q.VisionModel.HiddenSize
|
||||
kv["qwen25vl.vision.attention.head_count"] = q.VisionModel.NumHeads
|
||||
kv["qwen25vl.vision.attention.head_count"] = cmp.Or(q.VisionModel.NumHeads, 16)
|
||||
kv["qwen25vl.vision.num_channels"] = q.VisionModel.InChannels
|
||||
kv["qwen25vl.vision.patch_size"] = q.VisionModel.PatchSize
|
||||
kv["qwen25vl.vision.spatial_merge_size"] = q.VisionModel.SpatialMergeSize
|
||||
kv["qwen25vl.vision.patch_size"] = cmp.Or(q.VisionModel.PatchSize, 14)
|
||||
kv["qwen25vl.vision.spatial_merge_size"] = cmp.Or(q.VisionModel.SpatialMergeSize, 2)
|
||||
kv["qwen25vl.vision.spatial_patch_size"] = q.VisionModel.SpatialPatchSize
|
||||
kv["qwen25vl.vision.window_size"] = q.VisionModel.WindowSize
|
||||
kv["qwen25vl.vision.window_size"] = cmp.Or(q.VisionModel.WindowSize, 112)
|
||||
kv["qwen25vl.vision.attention.layer_norm_epsilon"] = cmp.Or(q.VisionModel.RMSNormEps, 1e-6)
|
||||
// RoPE theta increased from 1e4 to 1e5 to compensate for numerical differences between tensor operations; empirically produces better results.
|
||||
kv["qwen25vl.vision.rope.freq_base"] = cmp.Or(q.VisionModel.RopeTheta, 1e4)
|
||||
kv["qwen25vl.vision.fullatt_block_indexes"] = q.VisionModel.FullAttentionBlocks
|
||||
kv["qwen25vl.vision.temporal_patch_size"] = q.VisionModel.TemporalPatchSize
|
||||
kv["qwen25vl.vision.temporal_patch_size"] = cmp.Or(q.VisionModel.TemporalPatchSize, 2)
|
||||
|
||||
return kv
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user