diff --git a/convert/convert_qwen25vl.go b/convert/convert_qwen25vl.go
index 4fc64cf0c..c71a096aa 100644
--- a/convert/convert_qwen25vl.go
+++ b/convert/convert_qwen25vl.go
@@ -12,18 +12,18 @@ type qwen25VLModel struct {
 	qwen2Model
 
 	VisionModel struct {
-		Depth               uint32   `json:"depth"`
-		HiddenSize          uint32   `json:"hidden_size"`
-		NumHeads            uint32   `json:"num_heads"`
-		InChannels          uint32   `json:"in_chans"`
-		PatchSize           uint32   `json:"patch_size"`
-		SpatialMergeSize    uint32   `json:"spatial_merge_size"`
-		SpatialPatchSize    uint32   `json:"spatial_patch_size"`
-		WindowSize          uint32   `json:"window_size"`
-		RMSNormEps          float32  `json:"layer_norm_epsilon"`
-		RopeTheta           float32  `json:"rope_theta"`
-		FullAttentionBlocks []uint32 `json:"fullatt_block_indexes"`
-		TemporalPatchSize   uint32   `json:"temporal_patch_size"`
+		Depth               uint32  `json:"depth"`
+		HiddenSize          uint32  `json:"hidden_size"`
+		NumHeads            uint32  `json:"num_heads"`
+		InChannels          uint32  `json:"in_chans"`
+		PatchSize           uint32  `json:"patch_size"`
+		SpatialMergeSize    uint32  `json:"spatial_merge_size"`
+		SpatialPatchSize    uint32  `json:"spatial_patch_size"`
+		WindowSize          uint32  `json:"window_size"`
+		RMSNormEps          float32 `json:"layer_norm_epsilon"`
+		RopeTheta           float32 `json:"rope_theta"`
+		FullAttentionBlocks []int32 `json:"fullatt_block_indexes"`
+		TemporalPatchSize   uint32  `json:"temporal_patch_size"`
 	} `json:"vision_config"`
 }
 
diff --git a/model/models/qwen25vl/model.go b/model/models/qwen25vl/model.go
index 2d938b707..552c38cc0 100644
--- a/model/models/qwen25vl/model.go
+++ b/model/models/qwen25vl/model.go
@@ -69,7 +69,6 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, er
 func (m *Model) PostTokenize(inputs []input.Input) ([]input.Input, error) {
 	var result []input.Input
 
-	// Get image token IDs from config
 	var (
 		imageToken       int32 = 151655
 		visionStartToken int32 = 151652