From a7240c6d636836f0bca01790038d7194f519604b Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Mon, 12 May 2025 16:08:42 -0700 Subject: [PATCH] models: remove unused qwen2vl processing (#10677) --- model/models/qwen2vl/imageproc.go | 74 ------------------------ model/models/qwen2vl/imageproc_test.go | 78 -------------------------- 2 files changed, 152 deletions(-) delete mode 100644 model/models/qwen2vl/imageproc.go delete mode 100644 model/models/qwen2vl/imageproc_test.go diff --git a/model/models/qwen2vl/imageproc.go b/model/models/qwen2vl/imageproc.go deleted file mode 100644 index 964b39072..000000000 --- a/model/models/qwen2vl/imageproc.go +++ /dev/null @@ -1,74 +0,0 @@ -package qwen2vl - -import ( - "fmt" - "image" - _ "image/jpeg" - _ "image/png" - "io" - "math" - - "github.com/ollama/ollama/model/imageproc" -) - -const ( - DefaultFactor = 28 - DefaultMinPixels = 56 * 56 - DefaultMaxPixels = 14 * 14 * 4 * 1280 -) - -// smartResize calculates the size of the image to resize to based on the -// factor, minPixels, and maxPixels. -func smartResize(size image.Point, factor, minPixels, maxPixels int) image.Point { - // 1. Both dimensions of size are divisible by factor - // 2. The area of the image is between minPixels and maxPixels - // 3. The aspect ratio of the image is as close to 1:1 as possible - - if size.Y < factor || size.X < factor { - panic("image is too small to resize") - } else if max(size.X, size.Y)/min(size.X, size.Y) > 200 { - panic("aspect ratio must be less than 200:1") - } - - f := float64(factor) - width := float64(size.X) - height := float64(size.Y) - - xBar := math.Round(width/f) * f - yBar := math.Round(height/f) * f - - if xBar*yBar > float64(maxPixels) { - beta := math.Sqrt(height * width / float64(maxPixels)) - xBar = math.Floor(width/beta/f) * f - yBar = math.Floor(height/beta/f) * f - } else if xBar*yBar < float64(minPixels) { - beta := math.Sqrt(float64(minPixels) / (height * width)) - xBar = math.Ceil(width*beta/f) * f - yBar = math.Ceil(height*beta/f) * f - } - - return image.Point{int(xBar), int(yBar)} -} - -func resizeImage(img image.Image, format string, size image.Point) image.Image { - if format == "png" { - img = imageproc.Composite(img) - } - - return imageproc.Resize(img, size, imageproc.ResizeBilinear) -} - -func Preprocess(imageData io.Reader) ([]float32, map[string]any, error) { - img, format, err := image.Decode(imageData) - if err != nil { - return nil, nil, fmt.Errorf("failed to decode image: %w", err) - } - - size := smartResize(img.Bounds().Max, DefaultFactor, DefaultMinPixels, DefaultMaxPixels) - img = resizeImage(img, format, size) - - data := imageproc.Normalize(img, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, true, true) - - opts := map[string]any{} - return data, opts, nil -} diff --git a/model/models/qwen2vl/imageproc_test.go b/model/models/qwen2vl/imageproc_test.go deleted file mode 100644 index 817b61a5c..000000000 --- a/model/models/qwen2vl/imageproc_test.go +++ /dev/null @@ -1,78 +0,0 @@ -package qwen2vl - -import ( - "bytes" - "image" - "image/png" - "testing" -) - -func TestSmartResize(t *testing.T) { - type smartResizeCase struct { - TestImage image.Image - Expected image.Point - } - - cases := []smartResizeCase{ - { - TestImage: image.NewRGBA(image.Rect(0, 0, 1024, 1024)), - Expected: image.Point{980, 980}, - }, - { - TestImage: image.NewRGBA(image.Rect(0, 0, 1024, 768)), - Expected: image.Point{1036, 756}, - }, - { - TestImage: image.NewRGBA(image.Rect(0, 0, 2000, 2000)), - Expected: image.Point{980, 980}, - }, - } - - for _, c := range cases { - b := c.TestImage.Bounds().Max - actual := smartResize(b, DefaultFactor, DefaultMinPixels, DefaultMaxPixels) - if actual != c.Expected { - t.Errorf("expected: %v, actual: %v", c.Expected, actual) - } - } -} - -func TestPreprocess(t *testing.T) { - type preprocessCase struct { - TestImage image.Image - ExpectedLen int - } - - cases := []preprocessCase{ - { - TestImage: image.NewRGBA(image.Rect(0, 0, 256, 256)), - ExpectedLen: 252 * 252 * 3 * 1, - }, - { - TestImage: image.NewRGBA(image.Rect(0, 0, 2000, 2000)), - ExpectedLen: 980 * 980 * 3 * 1, - }, - } - - for _, c := range cases { - var buf bytes.Buffer - err := png.Encode(&buf, c.TestImage) - if err != nil { - t.Fatal(err) - } - - imgData, _, err := Preprocess(&buf) - if err != nil { - t.Fatalf("error processing: %q", err) - } - - switch len(imgData) { - case 0: - t.Errorf("no image data returned") - case c.ExpectedLen: - // ok - default: - t.Errorf("unexpected image data length: %d, expected: %d", len(imgData), c.ExpectedLen) - } - } -}