diff --git a/model/models/qwen25vl/model_vision.go b/model/models/qwen25vl/model_vision.go index 969d255f1..2c839b3bd 100644 --- a/model/models/qwen25vl/model_vision.go +++ b/model/models/qwen25vl/model_vision.go @@ -245,7 +245,7 @@ func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor, grid *Grid) } } - hiddenStates = m.PatchMerger.Forward(ctx, hiddenStates, m.VisionModelOptions) + hiddenStates = m.PatchMerger.Forward(ctx, hiddenStates, m.VisionModelOptions) reverseWindowIndex := windowIndex.Argsort(ctx) return hiddenStates.Rows(ctx, reverseWindowIndex) } diff --git a/model/models/qwen25vl/process_image.go b/model/models/qwen25vl/process_image.go index 66294e155..a725fe85d 100644 --- a/model/models/qwen25vl/process_image.go +++ b/model/models/qwen25vl/process_image.go @@ -131,22 +131,22 @@ func (p *ImageProcessor) createPatches(pixels []float32, height, width int, grid // in the format expected by the forward pass patchIndex := 0 - for t := 0; t < grid.Temporal; t++ { + for range grid.Temporal { // For each patch in the grid for h := 0; h < grid.Height; h += mergeSize { for w := 0; w < grid.Width; w += mergeSize { // Handle the 2x2 merged patches - for mh := 0; mh < mergeSize; mh++ { - for mw := 0; mw < mergeSize; mw++ { + for mh := range mergeSize { + for mw := range mergeSize { // For each pixel in the patch - for py := 0; py < patchSize; py++ { - for px := 0; px < patchSize; px++ { + for py := range patchSize { + for px := range patchSize { // Calculate source coordinates y := (h+mh)*patchSize + py x := (w+mw)*patchSize + px // For each channel - for c := 0; c < channels; c++ { + for c := range channels { // Channel-first format (CHW) srcIdx := c*height*width + y*width + x @@ -167,9 +167,9 @@ func (p *ImageProcessor) createPatches(pixels []float32, height, width int, grid // Handle temporal dimension padding (if needed) for tp := 1; tp < temporalPatchSize; tp++ { - for py := 0; py < patchSize; py++ { - for px := 0; px < patchSize; px++ { - for c := 0; c < channels; c++ { + for py := range patchSize { + for px := range patchSize { + for c := range channels { srcIdx := patchIndex*patchDim + (c * temporalPatchSize * patchSize * patchSize) + (0 * patchSize * patchSize) + // first temporal frame diff --git a/model/models/qwen2vl/imageproc.go b/model/models/qwen2vl/imageproc.go index 82abf7321..964b39072 100644 --- a/model/models/qwen2vl/imageproc.go +++ b/model/models/qwen2vl/imageproc.go @@ -14,7 +14,7 @@ import ( const ( DefaultFactor = 28 DefaultMinPixels = 56 * 56 - DefaultMaxPixels = 14 * 14 * 4 * 1280 // TODO: might need to change + DefaultMaxPixels = 14 * 14 * 4 * 1280 ) // smartResize calculates the size of the image to resize to based on the