From f63e62e546f7378ba42235b89d98d17fc2b8323c Mon Sep 17 00:00:00 2001 From: jmorganca Date: Tue, 11 Mar 2025 12:01:57 +0100 Subject: [PATCH] reduce kernel size, add TODO for loading from config --- model/models/gemma3/model.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/model/models/gemma3/model.go b/model/models/gemma3/model.go index 7418bb12f..9aaa974ab 100644 --- a/model/models/gemma3/model.go +++ b/model/models/gemma3/model.go @@ -90,7 +90,11 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, er visionOutputs := m.VisionModel.Forward(ctx, pixelValues) visionOutputs = visionOutputs.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx) patchesPerImage := m.ImageProcessor.imageSize / m.ImageProcessor.patchSize - kernelSize := patchesPerImage * patchesPerImage / 256 + + // TODO (jmorganca): read this from the model config + // it should instead be math.Sqrt(tokens per image) + tokensPerSide := 8 + kernelSize := patchesPerImage / tokensPerSide visionOutputs = visionOutputs.AvgPool1D(ctx, kernelSize, kernelSize, 0) visionOutputs = visionOutputs.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)