Compare commits

..

11 Commits

Author SHA1 Message Date
ParthSareen
8ed95a4e96 add tests, organize, comments 2025-05-13 17:44:47 -07:00
ParthSareen
bc83789be9 tools package and utils 2025-05-13 17:44:45 -07:00
ParthSareen
4059b8db01 renaming and splitting stuff up 2025-05-13 17:43:15 -07:00
ParthSareen
b8b9c0c7cf checkpoint 2025-05-13 17:43:15 -07:00
ParthSareen
779547fcde checkpoint - cleanup still left, functionality setup 2025-05-13 17:43:15 -07:00
ParthSareen
6cb7494061 checkpoint for new parser
TODO:
- cleanup routes interface
- internal/external states
2025-05-13 17:43:15 -07:00
ParthSareen
a44734b030 add new parser, tests, and templates 2025-05-13 17:43:15 -07:00
ParthSareen
b5a982ecb0 wip 2025-05-13 17:43:15 -07:00
ParthSareen
516a540df7 jsonv2 decoder 2025-05-13 17:43:15 -07:00
ParthSareen
7f2f996cd6 server/routes: catch when JSON tool was used 2025-05-13 17:43:15 -07:00
ParthSareen
610054a234 model: support tools streaming and improve parsing 2025-05-13 17:43:15 -07:00
45 changed files with 1913 additions and 1948 deletions

View File

@ -191,8 +191,6 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
conv = &phi3Model{}
case "Qwen2ForCausalLM":
conv = &qwen2Model{}
case "Qwen2_5_VLForConditionalGeneration":
conv = &qwen25VLModel{}
case "BertModel":
conv = &bertModel{}
case "CohereForCausalLM":

View File

@ -15,7 +15,6 @@ type qwen2Model struct {
Type string `json:"type"`
Factor ropeFactor `json:"factor"`
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
MropeSection []int32 `json:"mrope_section"`
} `json:"rope_scaling"`
RMSNormEPS float32 `json:"rms_norm_eps"`
}
@ -40,8 +39,6 @@ func (q *qwen2Model) KV(t *Tokenizer) ggml.KV {
case "yarn":
kv["qwen2.rope.scaling.type"] = q.RopeScaling.Type
kv["qwen2.rope.scaling.factor"] = q.RopeScaling.Factor
case "mrope", "default":
kv["qwen2.rope.mrope_section"] = q.RopeScaling.MropeSection
default:
panic("unknown rope scaling type")
}

View File

@ -1,102 +0,0 @@
package convert
import (
"cmp"
"slices"
"strings"
"github.com/ollama/ollama/fs/ggml"
)
type qwen25VLModel struct {
qwen2Model
VisionModel struct {
Depth uint32 `json:"depth"`
HiddenSize uint32 `json:"hidden_size"`
NumHeads uint32 `json:"num_heads"`
InChannels uint32 `json:"in_chans"`
PatchSize uint32 `json:"patch_size"`
SpatialMergeSize uint32 `json:"spatial_merge_size"`
SpatialPatchSize uint32 `json:"spatial_patch_size"`
WindowSize uint32 `json:"window_size"`
RMSNormEps float32 `json:"layer_norm_epsilon"`
RopeTheta float32 `json:"rope_theta"`
FullAttentionBlocks []int32 `json:"fullatt_block_indexes"`
TemporalPatchSize uint32 `json:"temporal_patch_size"`
} `json:"vision_config"`
}
var _ ModelConverter = (*qwen25VLModel)(nil)
func (q *qwen25VLModel) KV(t *Tokenizer) ggml.KV {
kv := q.ModelParameters.KV(t)
kv["general.architecture"] = "qwen25vl"
for k, v := range q.qwen2Model.KV(t) {
if strings.HasPrefix(k, "qwen2.") {
kv[strings.Replace(k, "qwen2.", "qwen25vl.", 1)] = v
}
}
if q.VisionModel.FullAttentionBlocks == nil {
kv["qwen25vl.vision.fullatt_block_indexes"] = []int32{7, 15, 23, 31}
}
kv["qwen25vl.vision.block_count"] = cmp.Or(q.VisionModel.Depth, 32)
kv["qwen25vl.vision.embedding_length"] = q.VisionModel.HiddenSize
kv["qwen25vl.vision.attention.head_count"] = cmp.Or(q.VisionModel.NumHeads, 16)
kv["qwen25vl.vision.num_channels"] = q.VisionModel.InChannels
kv["qwen25vl.vision.patch_size"] = cmp.Or(q.VisionModel.PatchSize, 14)
kv["qwen25vl.vision.spatial_merge_size"] = cmp.Or(q.VisionModel.SpatialMergeSize, 2)
kv["qwen25vl.vision.spatial_patch_size"] = q.VisionModel.SpatialPatchSize
kv["qwen25vl.vision.window_size"] = cmp.Or(q.VisionModel.WindowSize, 112)
kv["qwen25vl.vision.attention.layer_norm_epsilon"] = cmp.Or(q.VisionModel.RMSNormEps, 1e-6)
kv["qwen25vl.vision.rope.freq_base"] = cmp.Or(q.VisionModel.RopeTheta, 1e4)
kv["qwen25vl.vision.fullatt_block_indexes"] = q.VisionModel.FullAttentionBlocks
kv["qwen25vl.vision.temporal_patch_size"] = cmp.Or(q.VisionModel.TemporalPatchSize, 2)
return kv
}
func (q *qwen25VLModel) Tensors(ts []Tensor) []*ggml.Tensor {
var out []*ggml.Tensor
for _, t := range ts {
if strings.Contains(t.Name(), "patch_embed.proj") {
for t := range splitDim(t, 2,
strings.NewReplacer("patch_embed.proj", "patch_embd_0"),
strings.NewReplacer("patch_embed.proj", "patch_embd_1"),
) {
t.Shape = slices.DeleteFunc(t.Shape, func(i uint64) bool { return i == 1 })
out = append(out, t)
}
} else if strings.Contains(t.Name(), "attn.qkv") {
out = append(out, slices.Collect(splitDim(t, 0,
strings.NewReplacer("attn.qkv", "attn_q"),
strings.NewReplacer("attn.qkv", "attn_k"),
strings.NewReplacer("attn.qkv", "attn_v"),
))...)
} else {
out = append(out, &ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
}
return out
}
func (p *qwen25VLModel) Replacements() []string {
return append(
p.qwen2Model.Replacements(),
"visual", "v",
"blocks", "blk",
"attn.proj", "attn_out",
"norm1", "ln1",
"norm2", "ln2",
)
}

View File

@ -1,56 +0,0 @@
package convert
import (
"iter"
"slices"
"strings"
"github.com/ollama/ollama/fs/ggml"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
)
// splitDim splits a tensor along a specified dimension into multiple tensors. The dimension
// is split evenly based on the number of replacers provided.
func splitDim(t Tensor, dim int, replacers ...*strings.Replacer) iter.Seq[*ggml.Tensor] {
return func(yield func(*ggml.Tensor) bool) {
for i, replacer := range replacers {
shape := slices.Clone(t.Shape())
shape[dim] = shape[dim] / uint64(len(replacers))
slice := slices.Repeat([]tensor.Slice{nil}, len(shape))
slice[dim] = tensor.S(i*int(shape[dim]), (i+1)*int(shape[dim]))
tt := t.Clone()
tt.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
dims := make([]int, len(shape))
for i := range shape {
dims[i] = int(shape[i])
}
var t tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
t, err := t.Slice(slice...)
if err != nil {
return nil, err
}
t = tensor.Materialize(t)
// flatten tensor so it can be written as a vector
if err := t.Reshape(t.Shape().TotalSize()); err != nil {
return nil, err
}
return native.VectorF32(t.(*tensor.Dense))
})
if !yield(&ggml.Tensor{
Name: replacer.Replace(t.Name()),
Kind: t.Kind(),
Shape: shape,
WriterTo: tt,
}) {
break
}
}
}
}

View File

@ -6,7 +6,6 @@ import (
"fmt"
"io"
"log/slog"
"math"
"slices"
"strings"
@ -127,7 +126,6 @@ func (kv KV) OllamaEngineRequired() bool {
"mistral3",
"llama4",
"mllama",
"qwen25vl",
}, kv.Architecture())
}
@ -651,29 +649,6 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
graphSize = 4 * (imageSize*imageSize*numChannels +
embeddingLength*patchSize +
numPatches*numPatches*headCount)
case "qwen25vl":
maxPixels := uint64(llm.KV().Uint("vision.max_pixels", 28*28*1280))
mergeSize := uint64(llm.KV().Uint("vision.spatial_merge_size", 2))
temporalPatchSize := uint64(2)
// Calculate max possible patches based on max_pixels
maxHeight := uint64(math.Sqrt(float64(maxPixels)))
maxWidth := maxPixels / maxHeight
maxGridHeight := maxHeight / patchSize
maxGridWidth := maxWidth / patchSize
// Account for merged patches (2x2 grid)
numPatches := (maxGridHeight * maxGridWidth) / (mergeSize * mergeSize)
// Calculate graph size based on typical operations in ProcessImage and createPatches
graphSize = 4 * (maxPixels*numChannels + // Original image storage
// Normalized pixels
maxPixels*numChannels +
// Patches storage (numPatches * channels * temporalPatchSize * patchSize^2)
numPatches*numChannels*temporalPatchSize*patchSize*patchSize +
// Self-attention calculations (similar to other architectures)
numPatches*numPatches*headCount +
// Additional buffer for processing
embeddingLength*numPatches)
case "llama4":
// vision graph is computed independently in the same schedule
// and is negligible compared to the worst case text graph

1
go.mod
View File

@ -19,6 +19,7 @@ require (
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
github.com/dlclark/regexp2 v1.11.4
github.com/emirpasic/gods/v2 v2.0.0-alpha
github.com/go-json-experiment/json v0.0.0-20250417205406-170dfdcf87d1
github.com/google/go-cmp v0.6.0
github.com/mattn/go-runewidth v0.0.14
github.com/nlpodyssey/gopickle v0.3.0

2
go.sum
View File

@ -69,6 +69,8 @@ github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3
github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY=
github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-json-experiment/json v0.0.0-20250417205406-170dfdcf87d1 h1:+VexzzkMLb1tnvpuQdGT/DicIRW7MN8ozsXqBMgp0Hk=
github.com/go-json-experiment/json v0.0.0-20250417205406-170dfdcf87d1/go.mod h1:TiCD2a1pcmjd7YnhGH0f/zKNcCD06B029pHhzV23c2M=
github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=

View File

@ -1,277 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Michael Yang <git@mxy.ng>
Date: Thu, 1 May 2025 13:45:12 -0700
Subject: [PATCH] add argsort and cuda copy for i32
---
ggml/src/ggml-cpu/ops.cpp | 43 ++++++++++++++
ggml/src/ggml-cuda/argsort.cu | 102 +++++++++++++++++++++++++++++++++-
ggml/src/ggml-cuda/cpy.cu | 49 ++++++++++++++++
3 files changed, 192 insertions(+), 2 deletions(-)
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
index becdae07..7a44b6cf 100644
--- a/ggml/src/ggml-cpu/ops.cpp
+++ b/ggml/src/ggml-cpu/ops.cpp
@@ -6890,6 +6890,45 @@ static void ggml_compute_forward_argsort_f32(
}
}
+static void ggml_compute_forward_argsort_i32(
+ const ggml_compute_params * params,
+ ggml_tensor * dst) {
+
+ const ggml_tensor * src0 = dst->src[0];
+
+ GGML_TENSOR_UNARY_OP_LOCALS
+
+ GGML_ASSERT(nb0 == sizeof(int32_t));
+
+ const int ith = params->ith;
+ const int nth = params->nth;
+
+ const int64_t nr = ggml_nrows(src0);
+
+ ggml_sort_order order = (ggml_sort_order) ggml_get_op_params_i32(dst, 0);
+
+ for (int64_t i = ith; i < nr; i += nth) {
+ int32_t * dst_data = (int32_t *)((char *) dst->data + i*nb1);
+ const int32_t * src_data = (int32_t *)((char *) src0->data + i*nb01);
+
+ for (int64_t j = 0; j < ne0; j++) {
+ dst_data[j] = j;
+ }
+
+ // C doesn't have a functional sort, so we do a bubble sort instead
+ for (int64_t j = 0; j < ne0; j++) {
+ for (int64_t k = j + 1; k < ne0; k++) {
+ if ((order == GGML_SORT_ORDER_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
+ (order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
+ int32_t tmp = dst_data[j];
+ dst_data[j] = dst_data[k];
+ dst_data[k] = tmp;
+ }
+ }
+ }
+ }
+}
+
void ggml_compute_forward_argsort(
const ggml_compute_params * params,
ggml_tensor * dst) {
@@ -6901,6 +6940,10 @@ void ggml_compute_forward_argsort(
{
ggml_compute_forward_argsort_f32(params, dst);
} break;
+ case GGML_TYPE_I32:
+ {
+ ggml_compute_forward_argsort_i32(params, dst);
+ } break;
default:
{
GGML_ABORT("fatal error");
diff --git a/ggml/src/ggml-cuda/argsort.cu b/ggml/src/ggml-cuda/argsort.cu
index 607ded85..53b02634 100644
--- a/ggml/src/ggml-cuda/argsort.cu
+++ b/ggml/src/ggml-cuda/argsort.cu
@@ -85,13 +85,107 @@ static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, co
}
}
+
+template<ggml_sort_order order>
+static __global__ void k_argsort_i32_i32(const int32_t * x, int * dst, const int ncols, const int ncols_pad) {
+ extern __shared__ int shared_mem[];
+ int * indices = shared_mem;
+
+ const int tid = threadIdx.x;
+ const int row = blockIdx.y;
+
+ // Initialize all indices, handling the case where threads < ncols_pad
+ for (int i = tid; i < ncols_pad; i += blockDim.x) {
+ indices[i] = i < ncols ? i : 0; // Use 0 for padding indices
+ }
+ __syncthreads();
+
+ // Bitonic sort
+ for (int k = 2; k <= ncols_pad; k *= 2) {
+ for (int j = k/2; j > 0; j /= 2) {
+ for (int i = tid; i < ncols_pad; i += blockDim.x) {
+ const int ij = i ^ j;
+ if (ij > i) {
+ // Only compare values within the actual data range
+ if (i < ncols && ij < ncols) {
+ if ((i & k) == 0) {
+ if (order == GGML_SORT_ORDER_ASC) {
+ if (x[row * ncols + indices[i]] > x[row * ncols + indices[ij]]) {
+ int tmp = indices[i];
+ indices[i] = indices[ij];
+ indices[ij] = tmp;
+ }
+ } else {
+ if (x[row * ncols + indices[i]] < x[row * ncols + indices[ij]]) {
+ int tmp = indices[i];
+ indices[i] = indices[ij];
+ indices[ij] = tmp;
+ }
+ }
+ } else {
+ if (order == GGML_SORT_ORDER_ASC) {
+ if (x[row * ncols + indices[i]] < x[row * ncols + indices[ij]]) {
+ int tmp = indices[i];
+ indices[i] = indices[ij];
+ indices[ij] = tmp;
+ }
+ } else {
+ if (x[row * ncols + indices[i]] > x[row * ncols + indices[ij]]) {
+ int tmp = indices[i];
+ indices[i] = indices[ij];
+ indices[ij] = tmp;
+ }
+ }
+ }
+ }
+ }
+ }
+ __syncthreads();
+ }
+ }
+
+ // Write sorted indices to output, only threads handling valid data
+ for (int i = tid; i < ncols; i += blockDim.x) {
+ dst[row * ncols + i] = indices[i];
+ }
+}
+
+static void argsort_i32_i32_cuda(const int32_t * x, int * dst, const int ncols, const int nrows, ggml_sort_order order, cudaStream_t stream) {
+ // Bitonic sort requires ncols to be power of 2
+ const int ncols_pad = next_power_of_2(ncols);
+
+ // Ensure thread count doesn't exceed maximum (typically 1024)
+ const int max_threads = 1024; // This is the typical max for most GPUs
+ const int threads_per_block = ncols_pad > max_threads ? max_threads : ncols_pad;
+
+ const dim3 block_dims(threads_per_block, 1, 1);
+ const dim3 block_nums(1, nrows, 1);
+ const size_t shared_mem = ncols_pad * sizeof(int);
+
+ // Check if shared memory size is within limits
+ const size_t max_shared_mem = ggml_cuda_info().devices[ggml_cuda_get_device()].smpb;
+
+ // Instead of logging an error, use GGML_ASSERT with a descriptive message
+ GGML_ASSERT(shared_mem <= max_shared_mem && "argsort: required shared memory exceeds device limit");
+
+ // Launch kernels with the updated thread configuration
+ if (order == GGML_SORT_ORDER_ASC) {
+ k_argsort_i32_i32<GGML_SORT_ORDER_ASC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
+ } else if (order == GGML_SORT_ORDER_DESC) {
+ k_argsort_i32_i32<GGML_SORT_ORDER_DESC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
+ } else {
+ GGML_ABORT("fatal error");
+ }
+}
+
+
void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
cudaStream_t stream = ctx.stream();
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
+ GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_I32);
GGML_ASSERT( dst->type == GGML_TYPE_I32);
GGML_ASSERT(ggml_is_contiguous(src0));
@@ -100,5 +194,9 @@ void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0];
- argsort_f32_i32_cuda(src0_d, (int *)dst_d, ncols, nrows, order, stream);
+ if (src0->type == GGML_TYPE_I32) {
+ argsort_i32_i32_cuda((const int32_t *)src0_d, (int *)dst_d, ncols, nrows, order, stream);
+ } else {
+ argsort_f32_i32_cuda(src0_d, (int *)dst_d, ncols, nrows, order, stream);
+ }
}
diff --git a/ggml/src/ggml-cuda/cpy.cu b/ggml/src/ggml-cuda/cpy.cu
index 2d46176e..47383486 100644
--- a/ggml/src/ggml-cuda/cpy.cu
+++ b/ggml/src/ggml-cuda/cpy.cu
@@ -38,6 +38,13 @@ static __device__ void cpy_1_f16_f32(const char * cxi, char * cdsti) {
*dsti = *xi;
}
+static __device__ void cpy_1_i32_i32(const char * cxi, char * cdsti) {
+ const int32_t * xi = (const int32_t *) cxi;
+ int32_t * dsti = (int32_t *) cdsti;
+
+ *dsti = *xi;
+}
+
template <cpy_kernel_t cpy_1>
static __global__ void cpy_f32_f16(const char * cx, char * cdst_direct, const int ne,
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
@@ -68,6 +75,44 @@ static __global__ void cpy_f32_f16(const char * cx, char * cdst_direct, const in
cpy_1(cx + x_offset, cdst + dst_offset);
}
+// First, add this template function after the other template functions
+template <cpy_kernel_t cpy_1>
+static __global__ void cpy_i32_i32(const char * cx, char * cdst, const int ne,
+ const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
+ const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11,
+ const int nb12, const int nb13) {
+ const int64_t i = blockDim.x*blockIdx.x + threadIdx.x;
+
+ if (i >= ne) {
+ return;
+ }
+
+ const int64_t i03 = i/(ne00 * ne01 * ne02);
+ const int64_t i02 = (i - i03*ne00*ne01*ne02 )/ (ne00*ne01);
+ const int64_t i01 = (i - i03*ne00*ne01*ne02 - i02*ne01*ne00) / ne00;
+ const int64_t i00 = i - i03*ne00*ne01*ne02 - i02*ne01*ne00 - i01*ne00;
+ const int64_t x_offset = i00*nb00 + i01*nb01 + i02*nb02 + i03 * nb03;
+
+ const int64_t i13 = i/(ne10 * ne11 * ne12);
+ const int64_t i12 = (i - i13*ne10*ne11*ne12) / (ne10*ne11);
+ const int64_t i11 = (i - i13*ne10*ne11*ne12 - i12*ne10*ne11) / ne10;
+ const int64_t i10 = i - i13*ne10*ne11*ne12 - i12*ne10*ne11 - i11*ne10;
+ const int64_t dst_offset = i10*nb10 + i11*nb11 + i12*nb12 + i13 * nb13;
+
+ cpy_1(cx + x_offset, cdst + dst_offset);
+}
+
+// Then modify the ggml_cpy_i32_i32_cuda function to use the new template
+static void ggml_cpy_i32_i32_cuda(
+ const char * cx, char * cdst, const int ne,
+ const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
+ const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, cudaStream_t stream, char ** cdst_indirect, int graph_cpynode_index) {
+
+ const int num_blocks = (ne + CUDA_CPY_BLOCK_SIZE - 1) / CUDA_CPY_BLOCK_SIZE;
+ cpy_i32_i32<cpy_1_i32_i32><<<num_blocks, CUDA_CPY_BLOCK_SIZE, 0, stream>>>
+ (cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
+}
+
static __device__ void cpy_blck_f32_q8_0(const char * cxi, char * cdsti) {
const float * xi = (const float *) cxi;
block_q8_0 * dsti = (block_q8_0 *) cdsti;
@@ -631,6 +676,8 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
ggml_cpy_f16_f16_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {
ggml_cpy_f16_f32_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
+ } else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
+ ggml_cpy_i32_i32_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
} else {
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
ggml_type_name(src0->type), ggml_type_name(src1->type));
@@ -686,6 +733,8 @@ void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1) {
return (void*) cpy_f32_f16<cpy_1_f32_f16>;
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {
return (void*) cpy_f32_f16<cpy_1_f16_f32>;
+ } else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
+ return (void*) cpy_i32_i32<cpy_1_i32_i32>;
} else {
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
ggml_type_name(src0->type), ggml_type_name(src1->type));

View File

@ -119,21 +119,6 @@ type Context interface {
Layer(int) Context
}
// RopeOptions contains optional parameters for RoPE function
type RopeOptions struct {
OriginalContextLen uint32
}
// RopeOption defines a function that modifies RopeOpts
type RopeOption func(*RopeOptions)
// WithContextLen sets a custom context length
func WithContextLen(len uint32) RopeOption {
return func(opts *RopeOptions) {
opts.OriginalContextLen = len
}
}
type Tensor interface {
Dim(n int) int
Stride(n int) int
@ -159,7 +144,7 @@ type Tensor interface {
AvgPool2D(ctx Context, k, s int, p float32) Tensor
Conv2D(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
RoPE(ctx Context, positionIDs, ropeFactors Tensor, dim, ropeType uint32, base, scale float32, options ...RopeOption) Tensor
RoPE(ctx Context, positionIDs, ropeFactors Tensor, dim, ropeType uint32, base, scale float32) Tensor
IM2Col(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
Sin(ctx Context) Tensor
@ -187,7 +172,6 @@ type Tensor interface {
Duplicate(ctx Context) Tensor
TopK(ctx Context, k int) Tensor
Argsort(ctx Context) Tensor
}
// ScaledDotProductAttention implements a fused attention

View File

@ -1060,17 +1060,7 @@ const (
ropeTypeVision C.int = 24
)
func (t *Tensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, ropeDim, ropeType uint32, ropeBase, ropeScale float32, options ...ml.RopeOption) ml.Tensor {
// Default options
opts := &ml.RopeOptions{
OriginalContextLen: 131072,
}
// Apply any provided options
for _, option := range options {
option(opts)
}
func (t *Tensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, ropeDim, ropeType uint32, ropeBase, ropeScale float32) ml.Tensor {
if ropeFactors == nil {
ropeFactors = &Tensor{b: t.b}
}
@ -1083,19 +1073,16 @@ func (t *Tensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, ropeDi
return &Tensor{
b: t.b,
t: C.ggml_rope_ext(
ctx.(*Context).ctx,
dequant,
positionIDs.(*Tensor).t,
ropeFactors.(*Tensor).t,
ctx.(*Context).ctx, dequant, positionIDs.(*Tensor).t, ropeFactors.(*Tensor).t,
C.int(ropeDim),
C.int(ropeType),
C.int(opts.OriginalContextLen),
131072, // YaRN n_ctx_train
C.float(ropeBase),
C.float(ropeScale),
C.float(0.0),
C.float(1.0),
C.float(32.0),
C.float(1.0),
0., // YaRN ext_factor
1., // YaRN attn_factor
32., // YaRN beta_fast
1., // YaRN beta_slow
),
}
}
@ -1189,10 +1176,3 @@ func (t *Tensor) TopK(ctx ml.Context, k int) ml.Tensor {
t: C.ggml_top_k(ctx.(*Context).ctx, t.t, C.int(k)),
}
}
func (t *Tensor) Argsort(ctx ml.Context) ml.Tensor {
return &Tensor{
b: t.b,
t: C.ggml_argsort(ctx.(*Context).ctx, t.t, C.GGML_SORT_ORDER_ASC),
}
}

View File

@ -6822,45 +6822,6 @@ static void ggml_compute_forward_argsort_f32(
}
}
static void ggml_compute_forward_argsort_i32(
const ggml_compute_params * params,
ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
GGML_TENSOR_UNARY_OP_LOCALS
GGML_ASSERT(nb0 == sizeof(int32_t));
const int ith = params->ith;
const int nth = params->nth;
const int64_t nr = ggml_nrows(src0);
ggml_sort_order order = (ggml_sort_order) ggml_get_op_params_i32(dst, 0);
for (int64_t i = ith; i < nr; i += nth) {
int32_t * dst_data = (int32_t *)((char *) dst->data + i*nb1);
const int32_t * src_data = (int32_t *)((char *) src0->data + i*nb01);
for (int64_t j = 0; j < ne0; j++) {
dst_data[j] = j;
}
// C doesn't have a functional sort, so we do a bubble sort instead
for (int64_t j = 0; j < ne0; j++) {
for (int64_t k = j + 1; k < ne0; k++) {
if ((order == GGML_SORT_ORDER_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
(order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
int32_t tmp = dst_data[j];
dst_data[j] = dst_data[k];
dst_data[k] = tmp;
}
}
}
}
}
void ggml_compute_forward_argsort(
const ggml_compute_params * params,
ggml_tensor * dst) {
@ -6872,10 +6833,6 @@ void ggml_compute_forward_argsort(
{
ggml_compute_forward_argsort_f32(params, dst);
} break;
case GGML_TYPE_I32:
{
ggml_compute_forward_argsort_i32(params, dst);
} break;
default:
{
GGML_ABORT("fatal error");

View File

@ -85,107 +85,13 @@ static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, co
}
}
template<ggml_sort_order order>
static __global__ void k_argsort_i32_i32(const int32_t * x, int * dst, const int ncols, const int ncols_pad) {
extern __shared__ int shared_mem[];
int * indices = shared_mem;
const int tid = threadIdx.x;
const int row = blockIdx.y;
// Initialize all indices, handling the case where threads < ncols_pad
for (int i = tid; i < ncols_pad; i += blockDim.x) {
indices[i] = i < ncols ? i : 0; // Use 0 for padding indices
}
__syncthreads();
// Bitonic sort
for (int k = 2; k <= ncols_pad; k *= 2) {
for (int j = k/2; j > 0; j /= 2) {
for (int i = tid; i < ncols_pad; i += blockDim.x) {
const int ij = i ^ j;
if (ij > i) {
// Only compare values within the actual data range
if (i < ncols && ij < ncols) {
if ((i & k) == 0) {
if (order == GGML_SORT_ORDER_ASC) {
if (x[row * ncols + indices[i]] > x[row * ncols + indices[ij]]) {
int tmp = indices[i];
indices[i] = indices[ij];
indices[ij] = tmp;
}
} else {
if (x[row * ncols + indices[i]] < x[row * ncols + indices[ij]]) {
int tmp = indices[i];
indices[i] = indices[ij];
indices[ij] = tmp;
}
}
} else {
if (order == GGML_SORT_ORDER_ASC) {
if (x[row * ncols + indices[i]] < x[row * ncols + indices[ij]]) {
int tmp = indices[i];
indices[i] = indices[ij];
indices[ij] = tmp;
}
} else {
if (x[row * ncols + indices[i]] > x[row * ncols + indices[ij]]) {
int tmp = indices[i];
indices[i] = indices[ij];
indices[ij] = tmp;
}
}
}
}
}
}
__syncthreads();
}
}
// Write sorted indices to output, only threads handling valid data
for (int i = tid; i < ncols; i += blockDim.x) {
dst[row * ncols + i] = indices[i];
}
}
static void argsort_i32_i32_cuda(const int32_t * x, int * dst, const int ncols, const int nrows, ggml_sort_order order, cudaStream_t stream) {
// Bitonic sort requires ncols to be power of 2
const int ncols_pad = next_power_of_2(ncols);
// Ensure thread count doesn't exceed maximum (typically 1024)
const int max_threads = 1024; // This is the typical max for most GPUs
const int threads_per_block = ncols_pad > max_threads ? max_threads : ncols_pad;
const dim3 block_dims(threads_per_block, 1, 1);
const dim3 block_nums(1, nrows, 1);
const size_t shared_mem = ncols_pad * sizeof(int);
// Check if shared memory size is within limits
const size_t max_shared_mem = ggml_cuda_info().devices[ggml_cuda_get_device()].smpb;
// Instead of logging an error, use GGML_ASSERT with a descriptive message
GGML_ASSERT(shared_mem <= max_shared_mem && "argsort: required shared memory exceeds device limit");
// Launch kernels with the updated thread configuration
if (order == GGML_SORT_ORDER_ASC) {
k_argsort_i32_i32<GGML_SORT_ORDER_ASC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
} else if (order == GGML_SORT_ORDER_DESC) {
k_argsort_i32_i32<GGML_SORT_ORDER_DESC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
} else {
GGML_ABORT("fatal error");
}
}
void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
cudaStream_t stream = ctx.stream();
GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_I32);
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_I32);
GGML_ASSERT(ggml_is_contiguous(src0));
@ -194,9 +100,5 @@ void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0];
if (src0->type == GGML_TYPE_I32) {
argsort_i32_i32_cuda((const int32_t *)src0_d, (int *)dst_d, ncols, nrows, order, stream);
} else {
argsort_f32_i32_cuda(src0_d, (int *)dst_d, ncols, nrows, order, stream);
}
argsort_f32_i32_cuda(src0_d, (int *)dst_d, ncols, nrows, order, stream);
}

View File

@ -38,13 +38,6 @@ static __device__ void cpy_1_f16_f32(const char * cxi, char * cdsti) {
*dsti = *xi;
}
static __device__ void cpy_1_i32_i32(const char * cxi, char * cdsti) {
const int32_t * xi = (const int32_t *) cxi;
int32_t * dsti = (int32_t *) cdsti;
*dsti = *xi;
}
template <cpy_kernel_t cpy_1>
static __global__ void cpy_f32_f16(const char * cx, char * cdst_direct, const int ne,
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
@ -75,44 +68,6 @@ static __global__ void cpy_f32_f16(const char * cx, char * cdst_direct, const in
cpy_1(cx + x_offset, cdst + dst_offset);
}
// First, add this template function after the other template functions
template <cpy_kernel_t cpy_1>
static __global__ void cpy_i32_i32(const char * cx, char * cdst, const int ne,
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11,
const int nb12, const int nb13) {
const int64_t i = blockDim.x*blockIdx.x + threadIdx.x;
if (i >= ne) {
return;
}
const int64_t i03 = i/(ne00 * ne01 * ne02);
const int64_t i02 = (i - i03*ne00*ne01*ne02 )/ (ne00*ne01);
const int64_t i01 = (i - i03*ne00*ne01*ne02 - i02*ne01*ne00) / ne00;
const int64_t i00 = i - i03*ne00*ne01*ne02 - i02*ne01*ne00 - i01*ne00;
const int64_t x_offset = i00*nb00 + i01*nb01 + i02*nb02 + i03 * nb03;
const int64_t i13 = i/(ne10 * ne11 * ne12);
const int64_t i12 = (i - i13*ne10*ne11*ne12) / (ne10*ne11);
const int64_t i11 = (i - i13*ne10*ne11*ne12 - i12*ne10*ne11) / ne10;
const int64_t i10 = i - i13*ne10*ne11*ne12 - i12*ne10*ne11 - i11*ne10;
const int64_t dst_offset = i10*nb10 + i11*nb11 + i12*nb12 + i13 * nb13;
cpy_1(cx + x_offset, cdst + dst_offset);
}
// Then modify the ggml_cpy_i32_i32_cuda function to use the new template
static void ggml_cpy_i32_i32_cuda(
const char * cx, char * cdst, const int ne,
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, cudaStream_t stream, char ** cdst_indirect, int graph_cpynode_index) {
const int num_blocks = (ne + CUDA_CPY_BLOCK_SIZE - 1) / CUDA_CPY_BLOCK_SIZE;
cpy_i32_i32<cpy_1_i32_i32><<<num_blocks, CUDA_CPY_BLOCK_SIZE, 0, stream>>>
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
}
static __device__ void cpy_blck_f32_q8_0(const char * cxi, char * cdsti) {
const float * xi = (const float *) cxi;
block_q8_0 * dsti = (block_q8_0 *) cdsti;
@ -678,8 +633,6 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
ggml_cpy_f16_f16_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {
ggml_cpy_f16_f32_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
} else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
ggml_cpy_i32_i32_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
} else {
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
ggml_type_name(src0->type), ggml_type_name(src1->type));
@ -735,8 +688,6 @@ void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1) {
return (void*) cpy_f32_f16<cpy_1_f32_f16>;
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {
return (void*) cpy_f32_f16<cpy_1_f16_f32>;
} else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
return (void*) cpy_i32_i32<cpy_1_i32_i32>;
} else {
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
ggml_type_name(src0->type), ggml_type_name(src1->type));

View File

@ -7,5 +7,4 @@ import (
_ "github.com/ollama/ollama/model/models/llama4"
_ "github.com/ollama/ollama/model/models/mistral3"
_ "github.com/ollama/ollama/model/models/mllama"
_ "github.com/ollama/ollama/model/models/qwen25vl"
)

View File

@ -1,187 +0,0 @@
package qwen25vl
import (
"bytes"
"fmt"
"image"
"slices"
"sync"
"github.com/ollama/ollama/fs"
"github.com/ollama/ollama/kvcache"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/model/input"
)
type Model struct {
model.Base
model.BytePairEncoding
*TextModel
*VisionModel `gguf:"v,vision"`
ImageProcessor
}
// Implement MultimodalProcessor interface
var _ model.MultimodalProcessor = (*Model)(nil)
func New(c fs.Config) (model.Model, error) {
m := &Model{
BytePairEncoding: model.NewBytePairEncoding(
c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
&model.Vocabulary{
Values: c.Strings("tokenizer.ggml.tokens"),
Types: c.Ints("tokenizer.ggml.token_type"),
Merges: c.Strings("tokenizer.ggml.merges"),
BOS: int32(c.Uint("tokenizer.ggml.bos_token_id")),
AddBOS: c.Bool("tokenizer.ggml.add_bos_token", false),
EOS: int32(c.Uint("tokenizer.ggml.eos_token_id")),
AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
EOT: int32(c.Uint("tokenizer.ggml.eos_token_id")),
AddEOT: c.Bool("tokenizer.ggml.add_eos_token", false),
},
),
TextModel: NewTextModel(c),
VisionModel: newVisionModel(c),
ImageProcessor: newImageProcessor(c),
}
m.Cache = kvcache.NewCausalCache(m.TextModel.Shift)
return m, nil
}
func (m *Model) PixelValues(ctx ml.Context, multimodalData []byte) (ml.Tensor, *Grid, error) {
image, _, err := image.Decode(bytes.NewReader(multimodalData))
if err != nil {
return nil, nil, err
}
f32s, grid, err := m.ImageProcessor.ProcessImage(image)
if err != nil {
return nil, nil, err
}
// Calculate tensor dimensions
patchDim := m.ImageProcessor.numChannels * m.ImageProcessor.temporalPatchSize *
m.ImageProcessor.patchSize * m.ImageProcessor.patchSize
numPatches := grid.Temporal * grid.Height * grid.Width
pixelValues, err := ctx.Input().FromFloatSlice(f32s, patchDim, numPatches)
if err != nil {
return nil, nil, fmt.Errorf("failed to create tensor from image: %w", err)
}
return pixelValues, grid, nil
}
func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, error) {
if len(m.VisionModel.Layers) == 0 {
return nil, model.ErrNoVisionModel
}
pixels, grid, err := m.PixelValues(ctx, multimodalData)
if err != nil {
return nil, err
}
visionOutputs := m.VisionModel.Forward(ctx, pixels, grid)
return &chunks{Model: m, Tensor: visionOutputs}, nil
}
type chunks struct {
*Model
ml.Tensor
dataOnce sync.Once
data []float32
}
type chunk struct {
*chunks
s, n int
}
func (r *chunk) floats() []float32 {
r.dataOnce.Do(func() {
temp := r.Backend().NewContext()
defer temp.Close()
temp.Forward(r.Tensor).Compute(r.Tensor)
r.data = r.Floats()
})
return r.data[r.s*r.Dim(0) : (r.s+r.n)*r.Dim(0)]
}
// PostTokenize arranges Qwen-2.5-VL's inputs for the forward pass
func (m *Model) PostTokenize(inputs []input.Input) ([]input.Input, error) {
var result []input.Input
var (
imageToken int32 = 151655
visionStartToken int32 = 151652
visionEndToken int32 = 151653
)
nImg := 0
for _, inp := range inputs {
if inp.Multimodal == nil {
// If not a multimodal input, add it to the result unchanged
result = append(result, inp)
} else {
// Adding the 'Picture' prefix is a hack, at the time of writing there is no way to prefix
// the image tokens with a prompt, so we add a prefix here
nImg++
pre, err := m.Encode(fmt.Sprintf(" Picture %d: ", nImg), true)
if err != nil {
return nil, fmt.Errorf("failed to encode image prompt: %w", err)
}
for i := range pre {
result = append(result, input.Input{Token: pre[i]})
}
// This is an image token with multimodal data
chunksData := inp.Multimodal.(*chunks)
patchesPerChunk := chunksData.Dim(1)
// First add the vision start token
result = append(result, input.Input{Token: visionStartToken, SameBatch: patchesPerChunk + 2})
// Add the image token with the multimodal tensor data at the first position
// Create a chunk with proper s and n values
result = append(result, input.Input{
Token: imageToken,
Multimodal: &chunk{chunks: chunksData, s: 0, n: patchesPerChunk},
MultimodalHash: inp.MultimodalHash,
SameBatch: patchesPerChunk,
})
// Add the placeholder tokens for the remaining positions (tokensPerGrid-1)
result = append(result, slices.Repeat([]input.Input{{Token: imageToken}}, patchesPerChunk-1)...)
result = append(result, input.Input{Token: visionEndToken})
}
}
return result, nil
}
func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
positions, err := ctx.Input().FromIntSlice(batch.Positions, len(batch.Positions))
if err != nil {
return nil, err
}
outputs, err := ctx.Input().FromIntSlice(batch.Outputs, len(batch.Outputs))
if err != nil {
return nil, err
}
return m.TextModel.Forward(ctx, batch.Inputs, positions, outputs, batch, m.Cache)
}
func init() {
model.Register("qwen25vl", New)
}

View File

@ -1,155 +0,0 @@
package qwen25vl
import (
"math"
"github.com/ollama/ollama/fs"
"github.com/ollama/ollama/kvcache"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml/nn"
"github.com/ollama/ollama/model/input"
)
type TextOptions struct {
ctxLen, hiddenSize, numHeads, numKVHeads int
eps, ropeBase, ropeScale float32
ropeDim, defaultContextLen uint32
}
type TextModel struct {
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
Layers []Layer `gguf:"blk"`
OutputNorm *nn.RMSNorm `gguf:"output_norm"`
Output *nn.Linear `gguf:"output,alt:token_embd"`
*TextOptions
}
func NewTextModel(c fs.Config) *TextModel {
m := TextModel{
Layers: make([]Layer, c.Uint("block_count")),
TextOptions: &TextOptions{
ctxLen: int(c.Uint("context_length")),
hiddenSize: int(c.Uint("embedding_length")),
numHeads: int(c.Uint("attention.head_count")),
numKVHeads: int(c.Uint("attention.head_count_kv")),
eps: c.Float("attention.layer_norm_rms_epsilon"),
ropeBase: c.Float("rope.freq_base"),
ropeScale: c.Float("rope.freq_scale", 1),
ropeDim: c.Uint("rope.dimension_count", 128),
defaultContextLen: c.Uint("context_length", 128000),
},
}
return &m
}
// SelfAttention implements the multi-head self-attention mechanism
// with separate projections for query, key, value and output transformations
type SelfAttention struct {
Query *nn.Linear `gguf:"attn_q"`
Key *nn.Linear `gguf:"attn_k"`
Value *nn.Linear `gguf:"attn_v"`
Output *nn.Linear `gguf:"attn_output"`
}
func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
batchSize := hiddenState.Dim(1)
headDim := opts.hiddenSize / opts.numHeads
q := sa.Query.Forward(ctx, hiddenState)
q = q.Reshape(ctx, headDim, opts.numHeads, batchSize)
q = q.RoPE(ctx, positionIDs, nil, opts.ropeDim, 2, opts.ropeBase, opts.ropeScale, ml.WithContextLen(opts.defaultContextLen))
k := sa.Key.Forward(ctx, hiddenState)
k = k.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
k = k.RoPE(ctx, positionIDs, nil, opts.ropeDim, 2, opts.ropeBase, opts.ropeScale, ml.WithContextLen(opts.defaultContextLen))
v := sa.Value.Forward(ctx, hiddenState)
v = v.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
scaleFactor := 1.0 / math.Sqrt(float64(headDim))
kqv := nn.Attention(ctx, q, k, v, scaleFactor, cache)
kqv = kqv.Reshape(ctx, opts.hiddenSize, batchSize)
return sa.Output.Forward(ctx, kqv)
}
// Shift applies rotary position embeddings to the key tensor for causal attention caching
func (m *TextModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
return key.RoPE(ctx, shift, nil, m.ropeDim, 2, m.ropeBase, m.ropeScale, ml.WithContextLen(m.defaultContextLen)), nil
}
// MLP implements the feed-forward network component with SwiGLU activation
type MLP struct {
Up *nn.Linear `gguf:"ffn_up"`
Down *nn.Linear `gguf:"ffn_down"`
Gate *nn.Linear `gguf:"ffn_gate"`
}
func (mlp *MLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *TextOptions) ml.Tensor {
// Apply SwiGLU activation gating
hiddenState = mlp.Gate.Forward(ctx, hiddenState).SILU(ctx).Mul(ctx, mlp.Up.Forward(ctx, hiddenState))
// Project back to hidden dimension
return mlp.Down.Forward(ctx, hiddenState)
}
// Layer represents a single transformer layer combining self-attention and feed-forward components
type Layer struct {
AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
SelfAttention *SelfAttention
MLPNorm *nn.RMSNorm `gguf:"ffn_norm"`
MLP *MLP
}
func (l *Layer) Forward(ctx ml.Context, hiddenState, positionIDs, outputs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
// Self-attention branch with residual connection
residual := hiddenState
hiddenState = l.AttentionNorm.Forward(ctx, hiddenState, opts.eps)
hiddenState = l.SelfAttention.Forward(ctx, hiddenState, positionIDs, cache, opts)
// In the final layer (outputs != nil), optimize by pruning to just the token positions
// we need logits for.
if outputs != nil {
hiddenState = hiddenState.Rows(ctx, outputs)
residual = residual.Rows(ctx, outputs)
}
hiddenState = hiddenState.Add(ctx, residual)
// Feed-forward branch with residual connection
residual = hiddenState
hiddenState = l.MLPNorm.Forward(ctx, hiddenState, opts.eps)
hiddenState = l.MLP.Forward(ctx, hiddenState, opts)
return hiddenState.Add(ctx, residual)
}
func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor, batch input.Batch, cache kvcache.Cache) (ml.Tensor, error) {
// Initial token embedding
hiddenStates := m.TokenEmbedding.Forward(ctx, inputs).Duplicate(ctx)
for _, mi := range batch.Multimodal {
f32s := mi.Multimodal.(*chunk).floats()
img, err := ctx.Input().FromFloatSlice(f32s, len(f32s)/m.hiddenSize, m.hiddenSize)
if err != nil {
panic(err)
}
ctx.Forward(img.Copy(ctx, hiddenStates.View(ctx, mi.Index*hiddenStates.Stride(1), img.Dim(0)*img.Dim(1))))
}
// Process through transformer layers
for i, layer := range m.Layers {
cache.SetLayer(i)
var lastLayerOutputs ml.Tensor
if i == len(m.Layers)-1 {
lastLayerOutputs = outputs
}
hiddenStates = layer.Forward(ctx, hiddenStates, positions, lastLayerOutputs, cache, m.TextOptions)
}
hiddenStates = m.OutputNorm.Forward(ctx, hiddenStates, m.eps)
return m.Output.Forward(ctx, hiddenStates), nil
}

View File

@ -1,391 +0,0 @@
package qwen25vl
import (
"fmt"
"math"
"slices"
"github.com/ollama/ollama/fs"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml/nn"
)
// We only support batch size of 1
var batchSize int = 1
func rotateHalf(ctx ml.Context, t ml.Tensor) ml.Tensor {
x1 := t.View(ctx, 0, t.Dim(0)/2, t.Stride(1), t.Dim(1), t.Stride(2), t.Dim(2), t.Stride(3), t.Dim(3))
x2 := t.View(ctx, t.Stride(0)*t.Dim(0)/2, t.Dim(0)/2, t.Stride(1), t.Dim(1), t.Stride(2), t.Dim(2), t.Stride(3), t.Dim(3)).Contiguous(ctx)
return x2.Neg(ctx).Concat(ctx, x1, 0)
}
func applyRotaryPositionalEmbedding(ctx ml.Context, t, cos, sin ml.Tensor) ml.Tensor {
return t.Mul(ctx, cos).Add(ctx, rotateHalf(ctx, t).Mul(ctx, sin))
}
func blockDiagonalMask(ctx ml.Context, seqLength int, bounds []int, numHeads int) ml.Tensor {
// Create a flat slice for the mask (all -inf initially to block all attention)
flat := make([]float32, seqLength*seqLength)
for i := range flat {
flat[i] = float32(math.Inf(-1)) // Negative infinity to block attention
}
// Fill in the mask with zeros for tokens that CAN attend to each other
for i := 1; i < len(bounds); i++ {
start := bounds[i-1]
end := bounds[i]
// Enable attention within this sequence block by setting values to 0
for row := start; row < end; row++ {
for col := start; col < end; col++ {
idx := row*seqLength + col
flat[idx] = 0.0 // 0 allows attention, -inf blocks it
}
}
}
mask, err := ctx.Input().FromFloatSlice(flat, seqLength, seqLength)
if err != nil {
panic(err)
}
// Reshape to match [seqLength, seqLength, 1] for broadcasting
mask = mask.Reshape(ctx, seqLength, seqLength, 1)
return mask
}
type VisionSelfAttention struct {
Query *nn.Linear `gguf:"attn_q"`
Key *nn.Linear `gguf:"attn_k"`
Value *nn.Linear `gguf:"attn_v"`
Output *nn.Linear `gguf:"attn_out"`
}
func (sa *VisionSelfAttention) Forward(ctx ml.Context, hiddenStates, cos, sin, mask ml.Tensor, opts *VisionModelOptions) ml.Tensor {
query := sa.Query.Forward(ctx, hiddenStates)
key := sa.Key.Forward(ctx, hiddenStates)
value := sa.Value.Forward(ctx, hiddenStates)
query = query.Reshape(ctx, opts.headDim, opts.numHeads, query.Dim(1), batchSize)
key = key.Reshape(ctx, opts.headDim, opts.numHeads, key.Dim(1), batchSize)
value = value.Reshape(ctx, opts.headDim, opts.numHeads, value.Dim(1), batchSize)
query = applyRotaryPositionalEmbedding(ctx, query, cos, sin)
key = applyRotaryPositionalEmbedding(ctx, key, cos, sin)
// Scale factor for scaled dot-product attention
scale := 1.0 / math.Sqrt(float64(opts.headDim))
// Scaled dot-product attention
query = query.Permute(ctx, 0, 2, 1, 3)
key = key.Permute(ctx, 0, 2, 1, 3)
value = value.Permute(ctx, 1, 2, 0, 3).Contiguous(ctx)
kq := key.MulmatFullPrec(ctx, query)
kq = kq.Scale(ctx, scale)
if mask != nil {
kq = kq.Add(ctx, mask)
}
kq = kq.Softmax(ctx)
kqv := value.Mulmat(ctx, kq)
attention := kqv.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
attention = attention.Reshape(ctx, opts.hiddenSize, attention.Dim(2), batchSize)
return sa.Output.Forward(ctx, attention)
}
// VisionMLP implements the multi-layer perceptron
type VisionMLP struct {
Gate *nn.Linear `gguf:"ffn_gate"`
Up *nn.Linear `gguf:"ffn_up"`
Down *nn.Linear `gguf:"ffn_down"`
}
func (mlp *VisionMLP) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *VisionModelOptions) ml.Tensor {
// Using activation as specified in config (likely GELU or SiLU/Swish)
gateOutput := mlp.Gate.Forward(ctx, hiddenStates)
upOutput := mlp.Up.Forward(ctx, hiddenStates)
hiddenStates = gateOutput.SILU(ctx).Mul(ctx, upOutput)
return mlp.Down.Forward(ctx, hiddenStates)
}
type VisionEncoderLayer struct {
Norm1 *nn.RMSNorm `gguf:"ln1"`
SelfAttention *VisionSelfAttention
Norm2 *nn.RMSNorm `gguf:"ln2"`
MLP *VisionMLP
}
func (e *VisionEncoderLayer) Forward(ctx ml.Context, hiddenStates, cos, sin, mask ml.Tensor, opts *VisionModelOptions) ml.Tensor {
residual := hiddenStates
hiddenStates = e.Norm1.Forward(ctx, hiddenStates, opts.eps)
hiddenStates = e.SelfAttention.Forward(ctx, hiddenStates, cos, sin, mask, opts)
hiddenStates = hiddenStates.Add(ctx, residual)
residual = hiddenStates
hiddenStates = e.Norm2.Forward(ctx, hiddenStates, opts.eps)
hiddenStates = e.MLP.Forward(ctx, hiddenStates, opts)
return hiddenStates.Add(ctx, residual)
}
// VisionModelOptions contains configuration options
type VisionModelOptions struct {
hiddenSize int
numHeads int
headDim int
patchSize int
numChannels int
eps float32
ropeTheta float32
spatialMergeSize int
windowSize int
fullAttnBlocks []int32
temporalPatchSize int
}
type PatchEmbedding struct {
PatchConv0 *nn.Conv2D `gguf:"patch_embd_0"`
PatchConv1 *nn.Conv2D `gguf:"patch_embd_1"`
}
func (pe *PatchEmbedding) Forward(ctx ml.Context, pixelValues ml.Tensor, opts *VisionModelOptions) ml.Tensor {
numPatches := pixelValues.Shape()[1]
// Reshape the input tensor to match the expected dimensions
pixelValues = pixelValues.Reshape(ctx, opts.patchSize*opts.patchSize, opts.temporalPatchSize, opts.numChannels, numPatches)
// Permute the tensor to bring the temporal dimension to the front
pixelValues = pixelValues.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)
// Split the tensor into parts for the temporal convolutions
in0 := pixelValues.View(ctx, 0, 1, pixelValues.Stride(1), pixelValues.Dim(1), pixelValues.Stride(2), pixelValues.Dim(2), pixelValues.Stride(3), pixelValues.Dim(3)).Contiguous(ctx)
in0 = in0.Reshape(ctx, opts.patchSize, opts.patchSize, opts.numChannels, numPatches)
in1 := pixelValues.View(ctx, pixelValues.Stride(0), 1, pixelValues.Stride(1), pixelValues.Dim(1), pixelValues.Stride(2), pixelValues.Dim(2), pixelValues.Stride(3), pixelValues.Dim(3)).Contiguous(ctx)
in1 = in1.Reshape(ctx, opts.patchSize, opts.patchSize, opts.numChannels, numPatches)
s0, s1 := opts.patchSize, opts.patchSize // Use full stride
p0, p1 := 0, 0 // padding
d0, d1 := 1, 1 // dilation
out0 := pe.PatchConv0.Forward(ctx, in0, s0, s1, p0, p1, d0, d1)
out1 := pe.PatchConv1.Forward(ctx, in1, s0, s1, p0, p1, d0, d1)
// Add the outputs from the two temporal convolutions
out := out0.Add(ctx, out1)
// Reshape the output tensor to match the expected dimensions
return out.Reshape(ctx, opts.hiddenSize, numPatches)
}
// VisionPatchMerger implements patch merging for the Qwen vision model
type VisionPatchMerger struct {
LNQ *nn.RMSNorm `gguf:"ln_q"`
MLP0 *nn.Linear `gguf:"mlp.0"`
MLP2 *nn.Linear `gguf:"mlp.2"`
}
// Forward computes patch merging for the vision model
func (pm *VisionPatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, opts *VisionModelOptions) ml.Tensor {
normalized := pm.LNQ.Forward(ctx, visionOutputs, opts.eps)
hiddenSize := visionOutputs.Dim(0) * (opts.spatialMergeSize * opts.spatialMergeSize)
// Reshape the normalized output to view the hidden size dimension
reshaped := normalized.Reshape(ctx, hiddenSize, normalized.Dim(1)/(opts.spatialMergeSize*opts.spatialMergeSize), batchSize)
hidden := pm.MLP0.Forward(ctx, reshaped)
activated := hidden.GELU(ctx)
output := pm.MLP2.Forward(ctx, activated)
return output
}
// VisionModel implements the Qwen vision model
type VisionModel struct {
PatchEmbedding *PatchEmbedding
Layers []VisionEncoderLayer `gguf:"blk"`
PatchMerger *VisionPatchMerger `gguf:"merger"`
*VisionModelOptions
}
// Forward computes the vision model for an input tensor
func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor, grid *Grid) ml.Tensor {
// Extract patch embeddings
hiddenStates := m.PatchEmbedding.Forward(ctx, pixelValues, m.VisionModelOptions)
positionEmbedding := m.PositionalEmbedding(ctx, grid)
windowIndex, bounds := m.WindowIndex(ctx, grid)
spatialMergeUnit := m.spatialMergeSize * m.spatialMergeSize
hiddenStates = hiddenStates.Reshape(ctx, hiddenStates.Dim(0)*spatialMergeUnit, hiddenStates.Dim(1)/spatialMergeUnit)
hiddenStates = hiddenStates.Rows(ctx, windowIndex)
hiddenStates = hiddenStates.Reshape(ctx, hiddenStates.Dim(0)/spatialMergeUnit, hiddenStates.Dim(1)*spatialMergeUnit)
positionEmbedding = positionEmbedding.Reshape(ctx, positionEmbedding.Dim(0)*spatialMergeUnit, positionEmbedding.Dim(1)/spatialMergeUnit)
positionEmbedding = positionEmbedding.Rows(ctx, windowIndex)
positionEmbedding = positionEmbedding.Reshape(ctx, positionEmbedding.Dim(0)/spatialMergeUnit, positionEmbedding.Dim(1)*spatialMergeUnit)
positionEmbedding = positionEmbedding.Concat(ctx, positionEmbedding, 0)
cos, sin := positionEmbedding.Cos(ctx), positionEmbedding.Sin(ctx)
cos = cos.Reshape(ctx, cos.Dim(0), 1, cos.Dim(1))
sin = sin.Reshape(ctx, sin.Dim(0), 1, sin.Dim(1))
mask := blockDiagonalMask(ctx, hiddenStates.Dim(1), bounds, m.VisionModelOptions.numHeads)
// Apply encoder layers
for i, layer := range m.Layers {
if slices.Contains(m.fullAttnBlocks, int32(i)) {
hiddenStates = layer.Forward(ctx, hiddenStates, cos, sin, nil, m.VisionModelOptions)
} else {
hiddenStates = layer.Forward(
ctx,
hiddenStates,
cos,
sin,
mask,
m.VisionModelOptions,
)
}
}
hiddenStates = m.PatchMerger.Forward(ctx, hiddenStates, m.VisionModelOptions)
reverseWindowIndex := windowIndex.Argsort(ctx)
return hiddenStates.Rows(ctx, reverseWindowIndex)
}
// WindowIndex divides the grid into windows and returns:
// 1. A tensor containing flattened indices of all grid points organized by windows
// 2. A slice of boundaries that mark where each window's data begins and ends
// in the flattened representation, scaled by spatialMergeSize squared
//
// The boundaries slice always starts with 0 and contains cumulative ending
// positions for each window, allowing downstream processing to identify
// window boundaries in the tensor data.
func (m *VisionModel) WindowIndex(ctx ml.Context, grid *Grid) (ml.Tensor, []int) {
vitMergerWindowSize := m.windowSize / m.spatialMergeSize / m.patchSize
llmGridH := grid.Height / m.spatialMergeSize
llmGridW := grid.Width / m.spatialMergeSize
// Calculate window parameters
numWindowsH := int(math.Ceil(float64(llmGridH) / float64(vitMergerWindowSize)))
numWindowsW := int(math.Ceil(float64(llmGridW) / float64(vitMergerWindowSize)))
// Initialize index_new slice
var index []int32
// Initialize bounds with the first element as 0
bounds := []int{0}
totalSeqLen := 0
// Process each window without padding
for wh := range numWindowsH {
for ww := range numWindowsW {
// Calculate window boundaries
hStart := wh * vitMergerWindowSize
wStart := ww * vitMergerWindowSize
hEnd := min(hStart+vitMergerWindowSize, llmGridH)
wEnd := min(wStart+vitMergerWindowSize, llmGridW)
// Calculate sequence length for this window
seqLen := (hEnd - hStart) * (wEnd - wStart)
// Collect indices for this window
for h := hStart; h < hEnd; h++ {
for w := wStart; w < wEnd; w++ {
index = append(index, int32(h*llmGridW+w))
}
}
totalSeqLen += seqLen
bounds = append(bounds, totalSeqLen*(m.spatialMergeSize*m.spatialMergeSize)+bounds[0])
}
}
t, err := ctx.Input().FromIntSlice(index, len(index))
if err != nil {
panic(err)
}
return t, bounds
}
// PositionalEmbedding generates rotary position embeddings for attention mechanisms
func (m *VisionModel) PositionalEmbedding(ctx ml.Context, grid *Grid) ml.Tensor {
dim := m.headDim / 2
freq := dim / 2
theta := float64(m.ropeTheta)
merge := m.spatialMergeSize
// Create frequency patterns for position encoding
maxGridSize := max(grid.Height, grid.Width)
freqVals := make([]float32, freq*maxGridSize)
for i := range maxGridSize {
for j := range freq {
freqVals[i*freq+j] = float32(i) / float32(math.Pow(theta, float64(j*2)/float64(dim)))
}
}
freqs, err := ctx.Input().FromFloatSlice(freqVals, freq, maxGridSize)
if err != nil {
panic(fmt.Errorf("failed to create tensor from frequencies: %w", err))
}
// Create position coordinates (y,x pairs) for the grid
// In PyTorch: Equivalent to generating position ids with torch.arange()
coords := make([]int32, 0, grid.Height*grid.Width*2)
for y := range grid.Height {
for x := range grid.Width {
coords = append(coords, int32(y), int32(x))
}
}
pos, err := ctx.Input().FromIntSlice(coords, 2, grid.Width, grid.Height)
if err != nil {
panic(fmt.Errorf("failed to create tensor from positions: %w", err))
}
// Reshape and permute positions to match spatial merging pattern
pos = pos.Reshape(ctx, 2, grid.Width, merge, grid.Height/merge)
pos = pos.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
pos = pos.Reshape(ctx, 2, merge, merge, grid.Width/merge*grid.Height/merge)
pos = pos.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
pos = pos.Reshape(ctx, 2*merge*merge*grid.Width/merge*grid.Height/merge)
// Use position indices to look up corresponding frequency values
positionalEmbedding := freqs.Rows(ctx, pos)
positionalEmbedding = positionalEmbedding.Reshape(ctx, positionalEmbedding.Dim(0)*2, positionalEmbedding.Dim(1)/2)
return positionalEmbedding
}
// newVisionModel creates a new instance of the Qwen vision model
func newVisionModel(c fs.Config) *VisionModel {
patchSize := int(c.Uint("vision.patch_size", 14))
hiddenSize := int(c.Uint("vision.embedding_length", 1280))
numHeads := int(c.Uint("vision.attention.head_count", 16))
numChannels := int(c.Uint("vision.num_channels", 3))
eps := c.Float("vision.attention.layer_norm_epsilon", 1e-6)
ropeTheta := c.Float("vision.rope.freq_base", 10000.0)
spatialMergeSize := int(c.Uint("vision.spatial_merge_size", 2))
windowSize := int(c.Uint("vision.window_size", 112))
fullAttnBlocks := c.Ints("qwen25vl.vision.fullatt_block_indexes", []int32{7, 15, 23, 31})
temporalPatchSize := int(c.Uint("vision.temporal_patch_size", 2))
model := &VisionModel{
Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count", 32)),
VisionModelOptions: &VisionModelOptions{
hiddenSize: hiddenSize,
numHeads: numHeads,
headDim: hiddenSize / numHeads,
patchSize: patchSize,
numChannels: numChannels,
eps: eps,
ropeTheta: ropeTheta,
spatialMergeSize: spatialMergeSize,
windowSize: windowSize,
temporalPatchSize: temporalPatchSize,
fullAttnBlocks: fullAttnBlocks,
},
}
return model
}

View File

@ -1,184 +0,0 @@
package qwen25vl
import (
"fmt"
"image"
"math"
"github.com/ollama/ollama/fs"
"github.com/ollama/ollama/model/imageproc"
)
// ImageProcessor contains configuration for the Qwen 2.5 VL image processing
type ImageProcessor struct {
numChannels int
patchSize int
temporalPatchSize int
mergeSize int
minPixels int
maxPixels int
factor int
rescaleFactor float32
imageMean []float32
imageStd []float32
}
// newImageProcessor creates a new image processor with default values
func newImageProcessor(c fs.Config) ImageProcessor {
patchSize := int(c.Uint("vision.patch_size", 14))
mergeSize := int(c.Uint("vision.spatial_merge_size", 2))
return ImageProcessor{
numChannels: int(c.Uint("vision.num_channels", 3)), // not set
patchSize: patchSize,
temporalPatchSize: 2,
mergeSize: mergeSize,
minPixels: 56 * 56,
maxPixels: int(c.Uint("vision.max_pixels", 28*28*1280)), // 1MP limit
factor: patchSize * mergeSize,
rescaleFactor: 1.0 / 255.0,
imageMean: imageproc.ClipDefaultMean[:],
imageStd: imageproc.ClipDefaultSTD[:],
}
}
// SmartResize implements the smart resize algorithm
func (p *ImageProcessor) SmartResize(height, width int) (int, int) {
factor := p.factor
if height < factor || width < factor {
panic(fmt.Sprintf("height:%d or width:%d must be larger than factor:%d", height, width, factor))
} else if aspectRatio := max(height, width) / min(height, width); aspectRatio > 200 {
panic(fmt.Sprintf("absolute aspect ratio must be smaller than 200, got %v", aspectRatio))
}
round := func(x float64) int { return int(math.RoundToEven(x)) }
hBar := round(float64(height)/float64(factor)) * factor
wBar := round(float64(width)/float64(factor)) * factor
if hBar*wBar > p.maxPixels {
beta := math.Sqrt(float64(height*width) / float64(p.maxPixels))
hBar = int(math.Floor(float64(height)/beta/float64(factor))) * factor
wBar = int(math.Floor(float64(width)/beta/float64(factor))) * factor
} else if hBar*wBar < p.minPixels {
beta := math.Sqrt(float64(p.minPixels) / float64(height*width))
hBar = int(math.Ceil(float64(height)*beta/float64(factor))) * factor
wBar = int(math.Ceil(float64(width)*beta/float64(factor))) * factor
}
return hBar, wBar
}
type Grid struct {
Height int
Width int
Temporal int
}
func (p *ImageProcessor) ProcessImage(img image.Image) ([]float32, *Grid, error) {
origWidth := img.Bounds().Dx()
origHeight := img.Bounds().Dy()
// Calculate smart resize dimensions
resizedHeight, resizedWidth := p.SmartResize(origHeight, origWidth)
// Resize image using existing functions
resizedImg := imageproc.Resize(img, image.Point{X: resizedWidth, Y: resizedHeight}, imageproc.ResizeBilinear)
normalizedPixels := imageproc.Normalize(
resizedImg,
[3]float32{p.imageMean[0], p.imageMean[1], p.imageMean[2]},
[3]float32{p.imageStd[0], p.imageStd[1], p.imageStd[2]},
true, // rescale
true, // channelFirst
)
// Calculate grid dimensions
grid := &Grid{
Height: resizedHeight / p.patchSize,
Width: resizedWidth / p.patchSize,
Temporal: 1, // For single images, temporal dimension is 1
}
patches, err := p.createPatches(normalizedPixels, resizedHeight, resizedWidth, grid)
if err != nil {
return nil, nil, fmt.Errorf("failed to create patches: %v", err)
}
// Return patches and grid dimensions
return patches, grid, nil
}
func (p *ImageProcessor) createPatches(pixels []float32, height, width int, grid *Grid) ([]float32, error) {
channels := p.numChannels
patchSize := p.patchSize
mergeSize := p.mergeSize
temporalPatchSize := p.temporalPatchSize
// Calculate output dimensions
numPatches := grid.Temporal * grid.Height * grid.Width
patchDim := channels * temporalPatchSize * patchSize * patchSize
result := make([]float32, numPatches*patchDim)
patchIndex := 0
// Single temporal frame handling (copies to all frames)
for range grid.Temporal {
for h := 0; h < grid.Height; h += mergeSize {
for w := 0; w < grid.Width; w += mergeSize {
// Handle the 2x2 merged patches
for mh := range mergeSize {
for mw := range mergeSize {
baseOffset := patchIndex * patchDim
// Extract patch data for first temporal frame
for c := range channels {
channelOffset := baseOffset + (c * temporalPatchSize * patchSize * patchSize)
for py := range patchSize {
for px := range patchSize {
// Calculate source pixel coordinates
y := (h+mh)*patchSize + py
x := (w+mw)*patchSize + px
// Source index in input tensor (CHW format)
srcIdx := c*height*width + y*width + x
// Destination index in first temporal frame
dstIdx := channelOffset + (py * patchSize) + px
if srcIdx < len(pixels) && dstIdx < len(result) {
result[dstIdx] = pixels[srcIdx]
}
}
}
}
// Copy first temporal frame to all other frames
if temporalPatchSize > 1 {
for c := range channels {
channelOffset := baseOffset + (c * temporalPatchSize * patchSize * patchSize)
firstFrameOffset := channelOffset
frameSize := patchSize * patchSize
// Copy first frame to all other frames
for tp := 1; tp < temporalPatchSize; tp++ {
currentFrameOffset := channelOffset + (tp * frameSize)
copy(result[currentFrameOffset:currentFrameOffset+frameSize],
result[firstFrameOffset:firstFrameOffset+frameSize])
}
}
}
patchIndex++
}
}
}
}
}
return result, nil
}

View File

@ -10,9 +10,6 @@ import (
"log/slog"
"net/http"
"os"
"slices"
"strings"
"text/template/parse"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/fs/ggml"
@ -129,123 +126,19 @@ func detectContentType(r io.Reader) (string, error) {
return "unknown", nil
}
func parseObjects(s string) []map[string]any {
var objs []map[string]any
for offset := 0; offset < len(s); {
var obj map[string]any
decoder := json.NewDecoder(strings.NewReader(s[offset:]))
if err := decoder.Decode(&obj); errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
break
} else if syntax := &(json.SyntaxError{}); errors.As(err, &syntax) {
// skip over any syntax errors
offset += int(syntax.Offset)
} else if unmarshalType := &(json.UnmarshalTypeError{}); errors.As(err, &unmarshalType) {
// skip over any unmarshalable types
offset += int(unmarshalType.Offset)
} else if err != nil {
return nil
} else {
offset += int(decoder.InputOffset())
objs = append(objs, obj)
}
}
// func ToolTemplate(m *Model) (*gotmpl.Template, bool) {
// // create a subtree from the node that ranges over .ToolCalls
// tmpl := m.Template.Subtree(func(n parse.Node) bool {
// if t, ok := n.(*parse.RangeNode); ok {
// return slices.Contains(template.Identifiers(t.Pipe), "ToolCalls")
// }
return objs
}
// return false
// })
// parseToolCalls attempts to parse a JSON string into a slice of ToolCalls.
// mxyng: this only really works if the input contains tool calls in some JSON format
func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
// create a subtree from the node that ranges over .ToolCalls
tmpl := m.Template.Subtree(func(n parse.Node) bool {
if t, ok := n.(*parse.RangeNode); ok {
return slices.Contains(template.Identifiers(t.Pipe), "ToolCalls")
}
// if tmpl == nil {
// return nil, false
// }
return false
})
if tmpl == nil {
return nil, false
}
var b bytes.Buffer
if err := tmpl.Execute(&b, map[string][]api.ToolCall{
"ToolCalls": {
{
Function: api.ToolCallFunction{
Name: "@@name@@",
Arguments: api.ToolCallFunctionArguments{
"@@argument@@": 1,
},
},
},
},
}); err != nil {
return nil, false
}
templateObjects := parseObjects(b.String())
if len(templateObjects) == 0 {
return nil, false
}
// find the keys that correspond to the name and arguments fields
var name, arguments string
for k, v := range templateObjects[0] {
switch v.(type) {
case string:
name = k
case map[string]any:
arguments = k
}
}
if name == "" || arguments == "" {
return nil, false
}
responseObjects := parseObjects(s)
if len(responseObjects) == 0 {
return nil, false
}
// collect all nested objects
var collect func(any) []map[string]any
collect = func(obj any) (all []map[string]any) {
switch o := obj.(type) {
case map[string]any:
all = append(all, o)
for _, v := range o {
all = append(all, collect(v)...)
}
case []any:
for _, v := range o {
all = append(all, collect(v)...)
}
}
return all
}
var objs []map[string]any
for _, p := range responseObjects {
objs = append(objs, collect(p)...)
}
var toolCalls []api.ToolCall
for _, kv := range objs {
n, nok := kv[name].(string)
a, aok := kv[arguments].(map[string]any)
if nok && aok {
toolCalls = append(toolCalls, api.ToolCall{
Function: api.ToolCallFunction{
Name: n,
Arguments: a,
},
})
}
}
return toolCalls, len(toolCalls) > 0
}
// return tmpl, true
// }

View File

@ -1,179 +1,185 @@
package server
import (
"bytes"
"encoding/json"
"fmt"
"os"
"path/filepath"
"testing"
// import (
// "testing"
// gotmpl "text/template"
// )
"github.com/google/go-cmp/cmp"
// func TestToolToken(t *testing.T) {
// cases := []struct {
// name string
// template string
// want string
// ok bool
// }{
// {
// name: "basic tool call with action prefix",
// template: "{{if .ToolCalls}}Action: ```json{{end}}",
// want: "Action:",
// ok: true,
// },
// {
// name: "incomplete functools bracket",
// template: "{{if .ToolCalls}}functools[{{end}}",
// want: "functools",
// ok: true,
// },
// {
// name: "tool call with angle brackets",
// template: "{{if .ToolCalls}}Hello, world! <tool_call>{{end}}",
// want: "<tool_call>",
// ok: true,
// },
// {
// name: "multiple tool call formats",
// template: "{{if .ToolCalls}}[tool_call] <tool_call>{{end}}",
// want: "[tool_call]",
// ok: true,
// },
// {
// name: "single angle bracket tool call",
// template: "{{if .ToolCalls}}<tool_call>{{end}}",
// want: "<tool_call>",
// ok: true,
// },
// {
// name: "incomplete angle bracket after tool call",
// template: "{{if .ToolCalls}}[tool_call] <{{end}}",
// want: "[tool_call]",
// ok: true,
// },
// {
// name: "angle bracket prefix with tool call",
// template: "{{if .ToolCalls}}> <tool_call>{{end}}",
// want: "<tool_call>",
// ok: true,
// },
// {
// name: "uppercase tool call with incomplete bracket",
// template: "{{if .ToolCalls}}[TOOL_CALL] [{{end}}",
// want: "[TOOL_CALL]",
// ok: true,
// },
// {
// name: "uppercase tool call with adjacent bracket",
// template: "{{if .ToolCalls}}[TOOL_CALL][{{end}}",
// want: "[TOOL_CALL]",
// ok: true,
// },
// {
// name: "tool call with pipe delimiters",
// template: "{{if .ToolCalls}}<|tool_call|>{{end}}",
// want: "<|tool_call|>",
// ok: true,
// },
// }
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/template"
)
// for _, tt := range cases {
// t.Run(tt.name, func(t *testing.T) {
// tmpl, err := gotmpl.New("test").Parse(tt.template)
// if err != nil {
// t.Fatalf("failed to parse template: %v", err)
// }
// got, ok := ToolPrefix(tmpl)
// if got != tt.want {
// t.Errorf("ToolToken(%q) = %q; want %q", tt.template, got, tt.want)
// }
// if ok != tt.ok {
// t.Errorf("ToolToken(%q) = %v; want %v", tt.template, ok, tt.ok)
// }
// })
// }
// }
func readFile(t *testing.T, base, name string) *bytes.Buffer {
t.Helper()
// func TestTextAfterToolCalls(t *testing.T) {
// cases := []struct {
// name string
// template string
// want string
// ok bool
// }{
// {
// name: "basic tool call with text after",
// template: `{{if .ToolCalls}}tool response{{end}}`,
// want: "tool response",
// ok: true,
// },
// {
// name: "tool call with mixed content after",
// template: `{{if .ToolCalls}}<tool_call>{{.Something}}{{end}}`,
// want: "<tool_call>",
// ok: true,
// },
// {
// name: "tool call with no text after",
// template: `{{if .ToolCalls}}{{.Something}}{{end}}`,
// want: "",
// ok: true,
// },
// {
// name: "nested tool call",
// template: `{{if .Something}}{{if .ToolCalls}}[TOOL_CALL]{{end}}{{end}}`,
// want: "[TOOL_CALL]",
// ok: true,
// },
// {
// name: "no tool calls",
// template: `{{if .Something}}no tools here{{end}}`,
// want: "",
// ok: false,
// },
// {
// name: "empty template",
// template: ``,
// want: "",
// ok: false,
// },
// {
// name: "multiple tool calls sections",
// template: `{{if .ToolCalls}}first{{end}}{{if .ToolCalls}}second{{end}}`,
// want: "first",
// ok: true,
// },
// {
// name: "range over tool calls",
// template: `{{if .ToolCalls}}{{range .ToolCalls}}tool{{end}}{{end}}`,
// want: "",
// ok: true,
// },
// {
// name: "tool calls with pipe delimiters",
// template: `{{if .ToolCalls}}<|tool|>{{end}}`,
// want: "<|tool|>",
// ok: true,
// },
// {
// name: "tool calls with nested template",
// template: `{{if .ToolCalls}}{{template "tool" .}}{{end}}`,
// want: "",
// ok: true,
// },
// {
// name: "tool calls with whitespace variations",
// template: `{{if .ToolCalls}} tool {{end}}`,
// want: " tool ",
// ok: true,
// },
// }
bts, err := os.ReadFile(filepath.Join(base, name))
if err != nil {
t.Fatal(err)
}
// for _, tt := range cases {
// t.Run(tt.name, func(t *testing.T) {
// tmpl, err := gotmpl.New("test").Parse(tt.template)
// if err != nil {
// t.Fatalf("failed to parse template: %v", err)
// }
return bytes.NewBuffer(bts)
}
func TestExecuteWithTools(t *testing.T) {
p := filepath.Join("testdata", "tools")
cases := []struct {
model string
output string
ok bool
}{
{"mistral", `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`, true},
{"mistral", `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]
The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`, true},
{"mistral", `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"To }]`, false},
{"mistral", `I'm not aware of that information. However, I can suggest searching for the weather using the "get_current_weather" function:
[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`, true},
{"mistral", " The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.", false},
{"command-r-plus", "Action: ```json" + `
[
{
"tool_name": "get_current_weather",
"parameters": {
"format": "fahrenheit",
"location": "San Francisco, CA"
}
},
{
"tool_name": "get_current_weather",
"parameters": {
"format": "celsius",
"location": "Toronto, Canada"
}
}
]
` + "```", true},
{"command-r-plus", " The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.", false},
{"firefunction", ` functools[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`, true},
{"firefunction", " The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.", false},
{"llama3-groq-tool-use", `<tool_call>
{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}
{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}
</tool_call>`, true},
{"xlam", `{"tool_calls": [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]}`, true},
{"nemotron", `<toolcall>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]} </toolcall>`, true},
}
var tools []api.Tool
if err := json.Unmarshal(readFile(t, p, "tools.json").Bytes(), &tools); err != nil {
t.Fatal(err)
}
var messages []api.Message
if err := json.Unmarshal(readFile(t, p, "messages.json").Bytes(), &messages); err != nil {
t.Fatal(err)
}
calls := []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_current_weather",
Arguments: api.ToolCallFunctionArguments{
"format": "fahrenheit",
"location": "San Francisco, CA",
},
},
},
{
Function: api.ToolCallFunction{
Name: "get_current_weather",
Arguments: api.ToolCallFunctionArguments{
"format": "celsius",
"location": "Toronto, Canada",
},
},
},
}
for _, tt := range cases {
t.Run(tt.model, func(t *testing.T) {
tmpl, err := template.Parse(readFile(t, p, fmt.Sprintf("%s.gotmpl", tt.model)).String())
if err != nil {
t.Fatal(err)
}
t.Run("template", func(t *testing.T) {
var actual bytes.Buffer
if err := tmpl.Execute(&actual, template.Values{Tools: tools, Messages: messages}); err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(actual.String(), readFile(t, p, fmt.Sprintf("%s.out", tt.model)).String()); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
t.Run("parse", func(t *testing.T) {
m := &Model{Template: tmpl}
actual, ok := m.parseToolCalls(tt.output)
if ok != tt.ok {
t.Fatalf("expected %t, got %t", tt.ok, ok)
}
if tt.ok {
if diff := cmp.Diff(actual, calls); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
}
})
})
}
}
func TestParseObjects(t *testing.T) {
tests := []struct {
input string
want []map[string]any
}{
{
input: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
want: []map[string]any{
{"name": "get_current_weather", "arguments": map[string]any{"format": "fahrenheit", "location": "San Francisco, CA"}},
{"name": "get_current_weather", "arguments": map[string]any{"format": "celsius", "location": "Toronto, Canada"}},
},
},
{
input: `<toolcall>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </toolcall>`,
want: []map[string]any{
{"name": "get_current_weather", "arguments": map[string]any{"format": "fahrenheit", "location": "San Francisco, CA"}},
},
},
{
input: `<toolcall>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </toolcall> <toolcall>{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, ON"}} </toolcall>`,
want: []map[string]any{
{"name": "get_current_weather", "arguments": map[string]any{"format": "fahrenheit", "location": "San Francisco, CA"}},
{"name": "get_current_weather", "arguments": map[string]any{"format": "celsius", "location": "Toronto, ON"}},
},
},
{
input: `{"name": "get_current_weather", "arguments": `,
want: nil,
},
}
for _, tc := range tests {
t.Run(tc.input, func(t *testing.T) {
got := parseObjects(tc.input)
if diff := cmp.Diff(got, tc.want); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}
}
// got, ok := extractToolCallsTemplate(tmpl)
// if got != tt.want {
// t.Errorf("TextAfterToolCalls() got = %q, want %q", got, tt.want)
// }
// if ok != tt.ok {
// t.Errorf("TextAfterToolCalls() ok = %v, want %v", ok, tt.ok)
// }
// })
// }
// }

View File

@ -38,6 +38,7 @@ import (
"github.com/ollama/ollama/server/internal/client/ollama"
"github.com/ollama/ollama/server/internal/registry"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/tools"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
@ -1482,11 +1483,22 @@ func (s *Server) ChatHandler(c *gin.Context) {
return
}
slog.Debug("chat request", "images", len(images), "prompt", prompt)
var toolParser *tools.Parser
if len(req.Tools) > 0 {
toolParser, err = tools.NewParser(m.Template.Template)
if err != nil {
slog.Error("failed to create tool parser", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
}
ch := make(chan any)
go func() {
defer close(ch)
var sb strings.Builder
var toolCallIndex int = 0
if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
Prompt: prompt,
Images: images,
@ -1512,37 +1524,21 @@ func (s *Server) ChatHandler(c *gin.Context) {
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
}
// TODO: tool call checking and filtering should be moved outside of this callback once streaming
// however this was a simple change for now without reworking streaming logic of this (and other)
// handlers
if req.Stream != nil && !*req.Stream || len(req.Tools) == 0 {
ch <- res
return
}
// Streaming tool calls:
// If tools are recognized, use a flag to track the sending of a tool downstream
// This ensures that content is cleared from the message on the last chunk sent
sb.WriteString(r.Content)
if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
res.Message.ToolCalls = toolCalls
for i := range toolCalls {
toolCalls[i].Function.Index = toolCallIndex
toolCallIndex++
if len(req.Tools) > 0 && !toolParser.Done {
toolCalls, content, err := toolParser.Add(r.Content)
if err == nil {
if len(content) > 0 {
res.Message.Content = content
slog.Debug("tools: setting content to", "content", content)
} else if len(toolCalls) > 0 {
res.Message.ToolCalls = toolCalls
res.Message.Content = ""
} else {
return
}
}
res.Message.Content = ""
sb.Reset()
ch <- res
return
}
if r.Done {
// Send any remaining content if no tool calls were detected
if toolCallIndex == 0 {
res.Message.Content = sb.String()
}
ch <- res
}
ch <- res
}); err != nil {
ch <- gin.H{"error": err.Error()}
}
@ -1551,11 +1547,15 @@ func (s *Server) ChatHandler(c *gin.Context) {
if req.Stream != nil && !*req.Stream {
var resp api.ChatResponse
var sb strings.Builder
var toolCalls []api.ToolCall
for rr := range ch {
switch t := rr.(type) {
case api.ChatResponse:
sb.WriteString(t.Message.Content)
resp = t
if len(req.Tools) > 0 {
toolCalls = append(toolCalls, t.Message.ToolCalls...)
}
case gin.H:
msg, ok := t["error"].(string)
if !ok {
@ -1571,12 +1571,8 @@ func (s *Server) ChatHandler(c *gin.Context) {
}
resp.Message.Content = sb.String()
if len(req.Tools) > 0 {
if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
resp.Message.ToolCalls = toolCalls
resp.Message.Content = ""
}
if len(toolCalls) > 0 {
resp.Message.ToolCalls = toolCalls
}
c.JSON(http.StatusOK, resp)

44
tools/testdata/llama3.2.gotmpl vendored Normal file
View File

@ -0,0 +1,44 @@
<|start_header_id|>system<|end_header_id|>
Cutting Knowledge Date: December 2023
{{ if .System }}{{ .System }}
{{- end }}
{{- if .Tools }}When you receive a tool call response, use the output to format an answer to the orginal user question.
You are a helpful assistant with tool calling capabilities.
{{- end }}<|eot_id|>
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 }}
{{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|>
{{- if and $.Tools $last }}
Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.
Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.
{{ range $.Tools }}
{{- . }}
{{ end }}
{{ .Content }}<|eot_id|>
{{- else }}
{{ .Content }}<|eot_id|>
{{- end }}{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
{{ end }}
{{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>
{{- if .ToolCalls }}
{{ range .ToolCalls }}
{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }}
{{- else }}
{{ .Content }}
{{- end }}{{ if not $last }}<|eot_id|>{{ end }}
{{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|>
{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
{{ end }}
{{- end }}
{{- end }}

24
tools/testdata/llama3.2.out vendored Normal file
View File

@ -0,0 +1,24 @@
<|start_header_id|>system<|end_header_id|>
Cutting Knowledge Date: December 2023
You are a knowledgeable assistant. You can answer questions and perform tasks.When you receive a tool call response, use the output to format an answer to the orginal user question.
You are a helpful assistant with tool calling capabilities.<|eot_id|><|start_header_id|>user<|end_header_id|>
What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
{"name": "get_current_weather", "parameters": {"format":"celsius","location":"Paris, France"}}<|eot_id|><|start_header_id|>ipython<|end_header_id|>
22<|eot_id|><|start_header_id|>assistant<|end_header_id|>
The current temperature in Paris, France is 22 degrees Celsius.<|eot_id|><|start_header_id|>user<|end_header_id|>
Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.
Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.
{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}
What's the weather like today in San Francisco and Toronto?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

51
tools/testdata/qwen2.5-coder.gotmpl vendored Normal file
View File

@ -0,0 +1,51 @@
{{- if .Suffix }}<|fim_prefix|>{{ .Prompt }}<|fim_suffix|>{{ .Suffix }}<|fim_middle|>
{{- else if .Messages }}
{{- if or .System .Tools }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
{{- else }}
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}

31
tools/testdata/qwen2.5-coder.out vendored Normal file
View File

@ -0,0 +1,31 @@
<|im_start|>system
You are a knowledgeable assistant. You can answer questions and perform tasks.
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call><|im_end|>
<|im_start|>user
What's the weather like today in Paris?<|im_end|>
<|im_start|>assistant
<tool_call>
{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
22
</tool_response><|im_end|>
<|im_start|>assistant
The current temperature in Paris, France is 22 degrees Celsius.<|im_end|>
<|im_start|>user
What's the weather like today in San Francisco and Toronto?<|im_end|>
<|im_start|>assistant

50
tools/testdata/qwen3.gotmpl vendored Normal file
View File

@ -0,0 +1,50 @@
{{- if .Messages }}
{{- if or .System .Tools }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
{{- else }}
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}

31
tools/testdata/qwen3.out vendored Normal file
View File

@ -0,0 +1,31 @@
<|im_start|>system
You are a knowledgeable assistant. You can answer questions and perform tasks.
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call><|im_end|>
<|im_start|>user
What's the weather like today in Paris?<|im_end|>
<|im_start|>assistant
<tool_call>
{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
22
</tool_response><|im_end|>
<|im_start|>assistant
The current temperature in Paris, France is 22 degrees Celsius.<|im_end|>
<|im_start|>user
What's the weather like today in San Francisco and Toronto?<|im_end|>
<|im_start|>assistant

242
tools/tools.go Normal file
View File

@ -0,0 +1,242 @@
package tools
import (
"errors"
"io"
"log/slog"
"strings"
gotmpl "text/template"
jsonv2 "github.com/go-json-experiment/json"
jsontext "github.com/go-json-experiment/json/jsontext"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/template"
)
type Parser struct {
greedyParse bool
prefixFound bool
prefixPartial bool
tmpl *gotmpl.Template
sb *strings.Builder
prefix string
index int
name string
arguments string
Done bool
}
// parseJSONToolCalls attempts to parse a JSON string into a slice of ToolCalls.
// It first tries to incrementally decode the JSON to handle partial inputs.
// Returns:
// - []api.ToolCall: The parsed tool calls if successful
// - bool: True if JSON is incomplete and needs more input
func (p *Parser) parseJSONToolCalls(s string) ([]api.ToolCall, bool) {
// First try incremental decoding to handle partial JSON
dec := jsontext.NewDecoder(strings.NewReader(s))
if got, err := dec.ReadValue(); err == nil {
s = got.String()
}
// Attempt full unmarshal of the JSON
var resp any
err := jsonv2.Unmarshal([]byte(s), &resp)
if err != nil {
// Handle incomplete JSON cases
if errors.Is(err, io.ErrUnexpectedEOF) || err.Error() == "unexpected end of JSON input" {
slog.Debug("incomplete JSON detected", "input", s)
return nil, true
}
slog.Debug("failed to unmarshal response", "error", err)
return nil, false
}
// Collect all nested objects that could contain tool calls
var objs []map[string]any
objs = append(objs, collect(resp)...)
if len(objs) == 0 {
return nil, false
}
var toolCalls []api.ToolCall
for _, kv := range objs {
n, nok := kv[p.name].(string)
a, aok := kv[p.arguments].(map[string]any)
if nok && aok {
toolCalls = append(toolCalls, api.ToolCall{
Function: api.ToolCallFunction{
Name: n,
Arguments: a,
},
})
}
}
// Valid JSON, no tool calls found
if len(toolCalls) == 0 {
return nil, false
}
return toolCalls, false
}
// checkPrefix processes a string to find and handle a prefix pattern.
//
// Returns:
// - The processed string with prefix removed if found
// - Whether the prefix was found at the start of the string
// - Whether to continue parsing
func (p *Parser) checkPrefix(s string) (string, bool, bool) {
// Keep original for overlap checks
original := s
s = strings.TrimSpace(s)
if s == "" {
return "", false, true
}
// If no prefix defined, just return trimmed string
if p.prefix == "" {
return s, false, true
}
// Check for prefix at start of string
if processedStr, hasPrefix := strings.CutPrefix(s, p.prefix); hasPrefix {
// Found prefix at start - accumulate for potential tool
return processedStr, true, true
}
// Check if prefix overlaps end of string
if overlap := suffixOverlap(original, p.prefix); overlap > 0 {
p.prefixPartial = true
// Return everything except overlapping portion
p.sb.Reset()
p.sb.WriteString(original[len(original)-overlap:])
return original[0 : len(original)-overlap], false, false
}
// Check if prefix appears in middle of string
if idx := strings.Index(original, p.prefix); idx != -1 {
p.prefixPartial = true
// Save remainder starting at prefix for next pass
p.sb.Reset()
p.sb.WriteString(strings.TrimSpace(original[idx:]))
// Return everything before prefix
return original[:idx], false, false
}
// No prefix found
p.prefixPartial = false
return s, false, true
}
// Add processes a string input to parse tool calls and content.
// It handles prefix detection and JSON parsing to extract tool calls.
//
// Returns:
// - tools: Any parsed tool calls
// - content: Non-tool call content
// - err: Error if parsing failed
func (p *Parser) Add(s string) (tools []api.ToolCall, content string, err error) {
p.sb.WriteString(s)
s = p.sb.String()
if len(s) == 0 {
return nil, "", nil
}
// Check for prefix pattern in input
s, prefixFound, shouldContinue := p.checkPrefix(s)
if !shouldContinue {
if s != "" {
// Return content before prefix
return nil, s, nil
}
// Need more input to complete prefix
return nil, "", nil
}
// Update prefix found state
if prefixFound {
p.prefixFound = true
}
// Exit if prefix exists in template, greedy parsing is off, and prefix not found
if !p.greedyParse && !p.prefixFound {
p.sb.Reset()
return nil, "", errors.New("prefix not found")
}
toolCalls, isPartial := p.parseJSONToolCalls(s)
if isPartial {
// Need more input to complete JSON
return nil, "", nil
}
// Do not try greedy parsing if partial JSON not found
p.greedyParse = false
// Handle invalid tool call format
if len(toolCalls) == 0 {
p.sb.Reset()
if p.prefix == "" {
p.Done = true
}
if p.prefixFound {
// Drop tokens since prefix was found
return nil, "", nil
}
return nil, s, nil
}
for _, tc := range toolCalls {
tc.Function.Index = p.index
p.index++
}
// Mark as done if no prefix needed
if p.prefix == "" {
p.Done = true
}
p.sb.Reset()
return toolCalls, "", nil
}
// NewParser creates a new tool call parser from a template. It extracts the tool call format,
// prefix, and field names from the template to use for parsing tool calls from model output.
//
// Returns an error if the template does not contain valid tool call formatting.
func NewParser(templateToProcess *gotmpl.Template) (*Parser, error) {
parsed, err := template.Parse(templateToProcess.Root.String())
if err != nil {
return nil, err
}
if parsed == nil {
return nil, errors.New("failed to parse template")
}
tt, tc := toolTemplate(parsed)
if !tc {
return nil, errors.New("failed to find tool calls in template")
}
if tt == nil {
return nil, errors.New("failed to find tool template")
}
tp := toolPrefix(templateToProcess)
tp = strings.TrimSpace(tp)
name, arguments, err := extractToolArgs(tt)
if err != nil {
return nil, err
}
return &Parser{
tmpl: tt,
sb: &strings.Builder{},
prefix: tp,
greedyParse: true,
name: name,
arguments: arguments,
}, nil
}

482
tools/tools_test.go Normal file
View File

@ -0,0 +1,482 @@
package tools
import (
"bytes"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/template"
)
func readFile(t *testing.T, base, name string) *bytes.Buffer {
t.Helper()
bts, err := os.ReadFile(filepath.Join(base, name))
if err != nil {
t.Fatal(err)
}
return bytes.NewBuffer(bts)
}
func TestParseToolCalls(t *testing.T) {
p := filepath.Join("testdata")
t1 := api.ToolCall{
Function: api.ToolCallFunction{
Name: "get_current_weather",
Arguments: api.ToolCallFunctionArguments{
"format": "fahrenheit",
"location": "San Francisco, CA",
},
},
}
t2 := api.ToolCall{
Function: api.ToolCallFunction{
Name: "get_current_weather",
Arguments: api.ToolCallFunctionArguments{
"format": "celsius",
"location": "Toronto, Canada",
},
},
}
cases := []struct {
name string
model string
output string
expectedToolCall []api.ToolCall
expectedTokens string
}{
{
name: "mistral malformed json with tool calls prefix",
model: "mistral",
output: `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_curren}]`,
expectedToolCall: []api.ToolCall{},
expectedTokens: "",
},
{
name: "mistral multiple tool calls without prefix",
model: "mistral",
output: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
expectedToolCall: []api.ToolCall{t1, t2},
expectedTokens: "",
},
{
name: "mistral tool calls with text between no prefix",
model: "mistral",
output: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]
model outputs more tokens here and then [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
expectedToolCall: []api.ToolCall{t1, t2},
expectedTokens: `model outputs more tokens here and then [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
},
{
name: "mistral valid json with tool calls prefix",
model: "mistral",
output: `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
expectedToolCall: []api.ToolCall{t1, t2},
expectedTokens: "",
},
{
name: "mistral multiple tool calls with text between and prefix",
model: "mistral",
output: `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]
model outputs more tokens here and then [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
expectedToolCall: []api.ToolCall{t1, t2, t1, t2},
expectedTokens: "",
},
{
name: "mistral incomplete json with tool calls prefix",
model: "mistral",
output: `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, `,
expectedToolCall: []api.ToolCall{},
expectedTokens: "",
},
{
name: "mistral invalid tool call with explanatory text no prefix",
model: "mistral",
output: `I'm not aware of that information. However, I can suggest searching for the weather using the "get_current_weather" function:
[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
expectedToolCall: []api.ToolCall{},
expectedTokens: `I'm not aware of that information. However, I can suggest searching for the weather using the "get_current_weather" function: [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
},
{
name: "mistral tool calls without prefix",
model: "mistral",
output: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
expectedToolCall: []api.ToolCall{t1, t2},
expectedTokens: "",
},
{
name: "command r plus tool calls with json block format",
model: "command-r-plus",
output: "Action: ```json" + `
[
{
"tool_name": "get_current_weather",
"parameters": {
"format": "fahrenheit",
"location": "San Francisco, CA"
}
},
{
"tool_name": "get_current_weather",
"parameters": {
"format": "celsius",
"location": "Toronto, Canada"
}
}
]
` + "```",
expectedToolCall: []api.ToolCall{t1, t2},
expectedTokens: "",
},
{
name: "firefunction tool calls with functools prefix",
model: "firefunction",
output: ` functools[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
expectedToolCall: []api.ToolCall{t1, t2},
expectedTokens: "",
},
{
name: "llama3 groq single tool call with xml tags",
model: "llama3-groq-tool-use",
output: `<tool_call>
{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}
</tool_call>`,
expectedToolCall: []api.ToolCall{t1},
expectedTokens: "",
},
{
name: "xlam tool calls with wrapper object",
model: "xlam",
output: `{"tool_calls": [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]}`,
expectedToolCall: []api.ToolCall{t1, t2},
expectedTokens: "",
},
{
name: "qwen2.5-coder single tool call with prefix",
model: "qwen2.5-coder",
output: `<tool_call>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}</tool_call>`,
expectedToolCall: []api.ToolCall{t1},
expectedTokens: "",
},
{
name: "qwen2.5-coder multiple tool calls with and without prefix",
model: "qwen2.5-coder",
output: `{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} <tool_call>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}</tool_call> <tool_call>{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}</tool_call>`,
expectedToolCall: []api.ToolCall{t1, t1, t2},
expectedTokens: "",
},
{
name: "qwen2.5-coder multiple tool calls without prefix",
model: "qwen2.5-coder",
output: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}, {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
expectedToolCall: []api.ToolCall{t1, t2},
expectedTokens: "",
},
{
name: "qwen2.5-coder plain text response no tool calls",
model: "qwen2.5-coder",
output: "The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.",
expectedToolCall: []api.ToolCall{},
expectedTokens: "The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.",
},
{
name: "qwen2.5-coder tool calls with trailing text",
model: "qwen2.5-coder",
output: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}, {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}] some tokens after call`,
expectedToolCall: []api.ToolCall{t1, t2},
expectedTokens: "some tokens after call",
},
{
name: "qwen2.5 tool calls with prefix and trailing text",
model: "qwen2.5-coder",
output: `<tool_call> [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}, {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}] </tool_call> some tokens after call`,
expectedToolCall: []api.ToolCall{t1, t2},
expectedTokens: "",
},
{
name: "qwen2.5 tool calls without prefix and valid tool call",
model: "qwen2.5-coder",
output: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}, {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
expectedToolCall: []api.ToolCall{t1, t2},
expectedTokens: "",
},
{
name: "qwen2.5 tool calls without prefix and invalid tool call",
model: "qwen2.5-coder",
output: `[{"options": "foo"}]`,
expectedToolCall: []api.ToolCall{},
expectedTokens: `[{"options": "foo"}]`,
},
{
name: "qwen2.5 tool calls with prefix and invalid tool call",
model: "qwen2.5-coder",
output: `<tool_call> [{"options": "foo"}] </tool_call> `,
expectedToolCall: []api.ToolCall{},
expectedTokens: ``,
},
{
name: "qwen3 tool call with think prefix and tool prefix (sent as a single token)",
model: "qwen3",
output: `<think>Okay, let me think what tool we should use...</think><tool_call>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}</tool_call>`,
expectedToolCall: []api.ToolCall{t1},
expectedTokens: "<think>Okay, let me think what tool we should use...</think>",
},
{
name: "qwen3 tool call with think prefix, tool prefix, and whitespace (sent as separate tokens)",
model: "qwen3",
output: `<think>Okay, let me think what tool we should use...</think> <tool_call> {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
expectedToolCall: []api.ToolCall{t1},
expectedTokens: "<think>Okay, let me think what tool we should use...</think>",
},
{
name: "qwen3 empty think prefix without tool prefix and invalid tool call",
model: "qwen3",
output: `<think></think>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
expectedToolCall: []api.ToolCall{},
expectedTokens: `<think></think>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
},
{
name: "qwen3 empty think prefix with tool prefix and valid tool call",
model: "qwen3",
output: `<think></think><tool_call>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
expectedToolCall: []api.ToolCall{t1},
expectedTokens: `<think></think>`,
},
{
name: "qwen3 invalid tool call with fake tool prefix (single rune suffix match)",
model: "qwen3",
output: `<think></think>< fakeout{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
expectedToolCall: []api.ToolCall{},
expectedTokens: `<think></think>< fakeout{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
},
{
name: "qwen3 invalid tool call with partial tool prefix (multiple rune suffix match)",
model: "qwen3",
output: `<think></think><tool_c fakeout{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
expectedToolCall: []api.ToolCall{},
expectedTokens: `<think></think><tool_c fakeout{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
},
{
name: "qwen3 invalid tool call with malformed tool prefix",
model: "qwen3",
output: `<think></think><tool_cfakeout {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
expectedToolCall: []api.ToolCall{},
expectedTokens: `<think></think><tool_cfakeout {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
},
{
name: "llama3.2 valid tool call without prefix",
model: "llama3.2",
output: `{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}}`,
expectedToolCall: []api.ToolCall{t1},
expectedTokens: "",
},
{
name: "llama3.2 incomplete tool call without prefix",
model: "llama3.2",
output: `{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, `,
expectedToolCall: []api.ToolCall{},
expectedTokens: "",
},
{
name: "llama3.2 tool call with leading text",
model: "llama3.2",
output: `some non json text{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}}`,
expectedToolCall: []api.ToolCall{},
expectedTokens: `some non json text{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}}`,
},
{
name: "llama3.2 tool call with invalid tool prefix (no prefix in template)",
model: "llama3.2",
output: `<tool_call>{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}}`,
expectedToolCall: []api.ToolCall{},
expectedTokens: `<tool_call>{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}}`,
},
}
var tools []api.Tool
if err := json.Unmarshal(readFile(t, p, "tools.json").Bytes(), &tools); err != nil {
t.Fatal(err)
}
var messages []api.Message
if err := json.Unmarshal(readFile(t, p, "messages.json").Bytes(), &messages); err != nil {
t.Fatal(err)
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
tmpl, err := template.Parse(readFile(t, p, fmt.Sprintf("%s.gotmpl", tt.model)).String())
if err != nil {
t.Fatal(err)
}
t.Run("template", func(t *testing.T) {
actual := &bytes.Buffer{} // Create new buffer for each test
if err := tmpl.Execute(actual, template.Values{Tools: tools, Messages: messages}); err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(actual.String(), readFile(t, p, fmt.Sprintf("%s.out", tt.model)).String()); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
t.Run("parse", func(t *testing.T) {
// fmt.Printf("tmpl: %s\n", tmpl.Root.String())
tp, err := NewParser(tmpl.Template)
if err != nil {
t.Fatal(err)
}
got := []api.ToolCall{}
var gotTokens strings.Builder
var add bool
tokens := strings.Fields(tt.output)
for _, tok := range tokens {
s := " " + tok
add = true
if !tp.Done {
toolCalls, content, err := tp.Add(s)
if err == nil {
if content != "" {
fmt.Printf("content: %q\n", content)
gotTokens.WriteString(content)
add = false
} else if len(toolCalls) > 0 {
got = append(got, toolCalls...)
add = false
} else {
add = false
}
}
}
if add {
gotTokens.WriteString(s)
}
}
// Compare tool calls if we expect any
if diff := cmp.Diff(got, tt.expectedToolCall); diff != "" {
t.Errorf("tool calls mismatch (-got +want):\n%s", diff)
}
// Compare tokens if we expect any
stripped := strings.TrimSpace(gotTokens.String())
if diff := cmp.Diff(stripped, tt.expectedTokens); diff != "" {
t.Log("actualTokens", stripped, "expectedTokens", tt.expectedTokens)
t.Errorf("tokens mismatch (-got +want):\n%s", diff)
}
})
})
}
}
func TestParseJSONToolCalls(t *testing.T) {
tests := []struct {
name string
input string
parser *Parser
wantToolCalls []api.ToolCall
wantPartial bool
wantValid bool
}{
{
name: "valid single tool call",
input: `{"name": "test_tool", "arguments": {"arg1": "value1"}}`,
parser: &Parser{name: "name", arguments: "arguments"},
wantToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "test_tool",
Arguments: map[string]any{
"arg1": "value1",
},
},
},
},
wantPartial: false,
wantValid: true,
},
{
name: "incomplete JSON",
input: `{"name": "test_tool", "arguments": {"arg1": `,
parser: &Parser{name: "name", arguments: "arguments"},
wantToolCalls: nil,
wantPartial: true,
wantValid: false,
},
{
name: "invalid JSON",
input: `not json at all`,
parser: &Parser{name: "name", arguments: "arguments"},
wantToolCalls: nil,
wantPartial: false,
wantValid: false,
},
{
name: "missing required fields",
input: `{"other": "field"}`,
parser: &Parser{name: "name", arguments: "arguments"},
wantToolCalls: nil,
wantPartial: false,
wantValid: false,
},
{
name: "multiple tool calls in array",
input: `[
{"name": "tool1", "arguments": {"arg1": 1}},
{"name": "tool2", "arguments": {"arg2": "value"}}
]`,
parser: &Parser{name: "name", arguments: "arguments"},
wantToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "tool1",
Arguments: map[string]any{
"arg1": float64(1),
},
},
},
{
Function: api.ToolCallFunction{
Name: "tool2",
Arguments: map[string]any{
"arg2": "value",
},
},
},
},
wantPartial: false,
wantValid: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotCalls, gotPartial := tt.parser.parseJSONToolCalls(tt.input)
if gotPartial != tt.wantPartial {
t.Errorf("parseJSONToolCalls() partial = %v, want %v", gotPartial, tt.wantPartial)
}
if len(gotCalls) != 0 != tt.wantValid {
t.Errorf("parseJSONToolCalls() valid = %v, want %v", len(gotCalls) == 0, tt.wantValid)
}
if diff := cmp.Diff(gotCalls, tt.wantToolCalls); diff != "" {
t.Errorf("parseJSONToolCalls() tool calls mismatch (-got +want):\n%s", diff)
}
})
}
}

257
tools/utils.go Normal file
View File

@ -0,0 +1,257 @@
package tools
import (
"bytes"
"errors"
"log/slog"
"slices"
"strings"
gotmpl "text/template"
"text/template/parse"
jsonv2 "github.com/go-json-experiment/json"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/template"
)
// extractToolCallsFormat traverses a template AST to find text that follows a ".ToolCalls" condition.
// It walks the template nodes looking for if-statements containing ".ToolCalls" and extracts any
// immediate text nodes that follow. This is used to identify tool call prefixes and formatting.
//
// Returns:
// - string: The extracted text following the first ".ToolCalls" condition found
// - bool: Whether a ".ToolCalls" condition was found in the template
func extractToolCallsFormat(tmpl *gotmpl.Template) (string, bool) {
if tmpl == nil || tmpl.Tree == nil {
slog.Debug("TextAfterToolCalls: template or tree is nil")
return "", false
}
var result string
var found bool
var walk func(nodes []parse.Node)
walk = func(nodes []parse.Node) {
for _, node := range nodes {
if found {
return
}
switch n := node.(type) {
case *parse.IfNode:
if isToolCallsNode(n) {
// Collect immediate TextNode(s) at start of IfNode's list
var sb strings.Builder
for _, innerNode := range n.List.Nodes {
if tn, ok := innerNode.(*parse.TextNode); ok {
sb.Write(tn.Text)
} else {
// Stop at first non-text node
break
}
}
result = sb.String()
found = true
return
}
// Recurse into child nodes
walk(n.List.Nodes)
if n.ElseList != nil {
walk(n.ElseList.Nodes)
}
case *parse.ListNode:
walk(n.Nodes)
case *parse.RangeNode:
walk(n.List.Nodes)
if n.ElseList != nil {
walk(n.ElseList.Nodes)
}
case *parse.WithNode:
walk(n.List.Nodes)
if n.ElseList != nil {
walk(n.ElseList.Nodes)
}
default:
// Continue to next node
continue
}
if found {
return
}
}
}
walk(tmpl.Tree.Root.Nodes)
return result, found
}
// isToolCallsNode detects if a node's condition includes ".ToolCalls"
func isToolCallsNode(n *parse.IfNode) bool {
for _, cmd := range n.Pipe.Cmds {
for _, arg := range cmd.Args {
if field, ok := arg.(*parse.FieldNode); ok {
if slices.Contains(field.Ident, "ToolCalls") {
return true
}
}
}
}
return false
}
// TODO(parthsareen): get full prefix from the template instead of just the first token
// toolPrefix returns the prefix for the tool call if it exists from a template
func toolPrefix(tmpl *gotmpl.Template) string {
tokenText, ok := extractToolCallsFormat(tmpl)
if !ok {
return ""
}
tokenText = strings.TrimSpace(tokenText)
if tokenText == "" {
return ""
}
first := strings.Fields(tokenText)[0]
start := -1
end := -1
for i, r := range tokenText {
if r == '<' || r == '[' {
start = i
}
if (r == '>' || r == ']') && start != -1 {
end = i
break
}
}
if start != -1 && end != -1 {
// return the token including the [ or < and the ] or >
return tokenText[start : end+1]
} else if start != -1 {
// get until the [ or < - in the case tag was not closed
return tokenText[:start]
} else if end != -1 {
// get after the ] or > - in the case tag was not opened
return tokenText[end+1:]
}
return first
}
// toolTemplate creates a subtree from the node that ranges over .ToolCalls
//
// Returns:
// - *gotmpl.Template: The subtree containing the .ToolCalls range
// - bool: Whether a .ToolCalls range was found in the template
func toolTemplate(t *template.Template) (*gotmpl.Template, bool) {
tmpl := t.Subtree(func(n parse.Node) bool {
if t, ok := n.(*parse.RangeNode); ok {
return slices.Contains(template.Identifiers(t.Pipe), "ToolCalls")
}
return false
})
if tmpl == nil {
return nil, false
}
return tmpl, true
}
// suffixOverlap returns the length of the longest suffix overlap between two strings
//
// Returns:
// - int: The length of the longest suffix overlap
func suffixOverlap(s, delim string) int {
max := min(len(delim), len(s))
for i := max; i > 0; i-- {
if strings.HasSuffix(s, delim[:i]) {
return i
}
}
return 0
}
// extractToolArgs executes a template with a known tool call format to extract the name and arguments
//
// Returns:
// - string: The name of the tool call
// - string: The arguments of the tool call
// - error: Error if parsing failed
func extractToolArgs(tmpl *gotmpl.Template) (name, arguments string, err error) {
var b bytes.Buffer
if err := tmpl.Execute(&b, map[string][]api.ToolCall{
"ToolCalls": {
{
Function: api.ToolCallFunction{
Name: "@@name@@",
Arguments: api.ToolCallFunctionArguments{
"@@argument@@": 1,
},
},
},
},
}); err != nil {
return "", "", err
}
var obj any
err = jsonv2.Unmarshal(b.Bytes(), &obj)
if err != nil {
return "", "", err
}
var objs []map[string]any
switch v := obj.(type) {
case map[string]any:
objs = []map[string]any{v}
case []map[string]any:
objs = v
case []any:
objs = collect(v)
}
if len(objs) == 0 {
return "", "", errors.New("no template objects found")
}
// find the keys that correspond to the name and arguments fields
for k, v := range objs[0] {
switch v.(type) {
case string:
name = k
case map[string]any:
arguments = k
}
}
if name == "" || arguments == "" {
slog.Debug("missing required fields in tool call template", "name", name, "arguments", arguments)
return "", "", errors.New("missing required fields in tool call template")
}
return name, arguments, nil
}
// collect recursively traverses an object to collect all nested maps
//
// Returns:
// - []map[string]any: A slice of all nested maps found in the object
func collect(obj any) []map[string]any {
var all []map[string]any
switch o := obj.(type) {
case map[string]any:
all = append(all, o)
for _, v := range o {
all = append(all, collect(v)...)
}
case []any:
for _, v := range o {
all = append(all, collect(v)...)
}
default:
return nil
}
return all
}

464
tools/utils_test.go Normal file
View File

@ -0,0 +1,464 @@
package tools
import (
"testing"
gotmpl "text/template"
"github.com/ollama/ollama/template"
)
func TestExtractToolCallsFormat(t *testing.T) {
cases := []struct {
name string
template string
want string
found bool
}{
{
name: "nil template",
template: "",
want: "",
found: false,
},
{
name: "basic tool call with text",
template: "{{if .ToolCalls}}Hello world{{end}}",
want: "Hello world",
found: true,
},
{
name: "tool call with json format",
template: "{{if .ToolCalls}}```json\n{{end}}",
want: "```json\n",
found: true,
},
{
name: "tool call in range",
template: "{{range .ToolCalls}}tool: {{.}}{{end}}",
want: "",
found: false,
},
{
name: "tool call with multiple text nodes",
template: "{{if .ToolCalls}}First text{{if .Something}}inner{{end}}Second text{{end}}",
want: "First text",
found: true,
},
{
name: "nested if without tool calls",
template: "{{if .Something}}{{if .OtherThing}}text{{end}}{{end}}",
want: "",
found: false,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
tmpl, err := gotmpl.New("test").Parse(tc.template)
if err != nil && tc.template != "" {
t.Fatalf("failed to parse template: %v", err)
}
got, found := extractToolCallsFormat(tmpl)
if got != tc.want {
t.Errorf("got text %q, want %q", got, tc.want)
}
if found != tc.found {
t.Errorf("got found %v, want %v", found, tc.found)
}
})
}
}
func TestToolPrefix(t *testing.T) {
cases := []struct {
name string
template string
want string
}{
{
name: "basic tool call with action prefix",
template: "{{if .ToolCalls}}Action: ```json{{end}}",
want: "Action:",
},
{
name: "incomplete functools bracket",
template: "{{if .ToolCalls}}functools[{{end}}",
want: "functools",
},
{
name: "tool call with angle brackets",
template: "{{if .ToolCalls}}Hello, world! <tool_call>{{end}}",
want: "<tool_call>",
},
{
name: "multiple tool call formats",
template: "{{if .ToolCalls}}[tool_call] <tool_call>{{end}}",
want: "[tool_call]",
},
{
name: "single angle bracket tool call",
template: "{{if .ToolCalls}}<tool_call>{{end}}",
want: "<tool_call>",
},
{
name: "incomplete angle bracket after tool call",
template: "{{if .ToolCalls}}[tool_call] <{{end}}",
want: "[tool_call]",
},
{
name: "angle bracket prefix with tool call",
template: "{{if .ToolCalls}}> <tool_call>{{end}}",
want: "<tool_call>",
},
{
name: "uppercase tool call with incomplete bracket",
template: "{{if .ToolCalls}}[TOOL_CALL] [{{end}}",
want: "[TOOL_CALL]",
},
{
name: "uppercase tool call with adjacent bracket",
template: "{{if .ToolCalls}}[TOOL_CALL][{{end}}",
want: "[TOOL_CALL]",
},
{
name: "tool call with pipe delimiters",
template: "{{if .ToolCalls}}<|tool_call|>{{end}}",
want: "<|tool_call|>",
},
{
name: "tool with no prefix",
template: "{{if .ToolCalls}}{{end}}",
want: "",
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
tmpl, err := gotmpl.New("test").Parse(tt.template)
if err != nil {
t.Fatalf("failed to parse template: %v", err)
}
got := toolPrefix(tmpl)
if got != tt.want {
t.Errorf("ToolToken(%q) = %q; want %q", tt.template, got, tt.want)
}
})
}
}
func TestToolTemplate(t *testing.T) {
cases := []struct {
name string
template string
want bool
}{
{
name: "basic tool call range",
template: "{{range .ToolCalls}}test{{end}}",
want: true,
},
{
name: "no tool calls",
template: "{{range .Other}}test{{end}}",
want: false,
},
{
name: "nested tool calls",
template: "{{range .Outer}}{{range .ToolCalls}}test{{end}}{{end}}",
want: true,
},
{
name: "empty template",
template: "",
want: false,
},
{
name: "tool calls in if statement",
template: "{{if .ToolCalls}}test{{end}}",
want: false,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
tmpl, err := gotmpl.New("test").Parse(tt.template)
if err != nil {
t.Fatalf("failed to parse template: %v", err)
}
parsed, err := template.Parse(tmpl.Root.String())
if err != nil {
t.Fatalf("failed to parse template: %v", err)
}
_, got := toolTemplate(parsed)
if got != tt.want {
t.Errorf("toolTemplate() = %v; want %v", got, tt.want)
}
})
}
}
func TestSuffixOverlap(t *testing.T) {
cases := []struct {
name string
s string
d string
want int
}{
{
name: "no overlap",
s: "hello world",
d: "",
want: 0,
},
{
name: "full overlap",
s: "<tool_call>",
d: "<tool_call>",
want: 11,
},
{
name: "partial overlap",
s: "text <tool_call>",
d: "<tool_call>",
want: 11,
},
{
name: "delimiter longer than string",
s: "<tool>",
d: "<tool_call>",
want: 0,
},
{
name: "empty string",
s: "",
d: "<tool_call>",
want: 0,
},
{
name: "empty delimiter",
s: "<tool_call>",
d: "",
want: 0,
},
{
name: "single char overlap",
s: "test<",
d: "<tool_call>",
want: 1,
},
{
name: "partial tool call",
s: "hello <tool_",
d: "<tool_call>",
want: 6,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
got := suffixOverlap(tt.s, tt.d)
if got != tt.want {
t.Errorf("suffixOverlap(%q, %q) = %d; want %d", tt.s, tt.d, got, tt.want)
}
})
}
}
func TestExtractToolArgs(t *testing.T) {
cases := []struct {
name string
template string
want string
ok bool
}{
{
name: "basic tool call with text after",
template: `{{if .ToolCalls}}tool response{{end}}`,
want: "tool response",
ok: true,
},
{
name: "tool call with mixed content after",
template: `{{if .ToolCalls}}<tool_call>{{.Something}}{{end}}`,
want: "<tool_call>",
ok: true,
},
{
name: "tool call with no text after",
template: `{{if .ToolCalls}}{{.Something}}{{end}}`,
want: "",
ok: true,
},
{
name: "nested tool call",
template: `{{if .Something}}{{if .ToolCalls}}[TOOL_CALL]{{end}}{{end}}`,
want: "[TOOL_CALL]",
ok: true,
},
{
name: "no tool calls",
template: `{{if .Something}}no tools here{{end}}`,
want: "",
ok: false,
},
{
name: "empty template",
template: ``,
want: "",
ok: false,
},
{
name: "multiple tool calls sections",
template: `{{if .ToolCalls}}first{{end}}{{if .ToolCalls}}second{{end}}`,
want: "first",
ok: true,
},
{
name: "range over tool calls",
template: `{{if .ToolCalls}}{{range .ToolCalls}}tool{{end}}{{end}}`,
want: "",
ok: true,
},
{
name: "tool calls with pipe delimiters",
template: `{{if .ToolCalls}}<|tool|>{{end}}`,
want: "<|tool|>",
ok: true,
},
{
name: "tool calls with nested template",
template: `{{if .ToolCalls}}{{template "tool" .}}{{end}}`,
want: "",
ok: true,
},
{
name: "tool calls with whitespace variations",
template: `{{if .ToolCalls}} tool {{end}}`,
want: " tool ",
ok: true,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
tmpl, err := gotmpl.New("test").Parse(tt.template)
if err != nil {
t.Fatalf("failed to parse template: %v", err)
}
got, ok := extractToolCallsFormat(tmpl)
if got != tt.want {
t.Errorf("TextAfterToolCalls() got = %q, want %q", got, tt.want)
}
if ok != tt.ok {
t.Errorf("TextAfterToolCalls() ok = %v, want %v", ok, tt.ok)
}
})
}
}
func TestCollect(t *testing.T) {
cases := []struct {
name string
obj any
want []map[string]any
}{
{
name: "simple map",
obj: map[string]any{
"key": "value",
},
want: []map[string]any{
{"key": "value"},
},
},
{
name: "nested map",
obj: map[string]any{
"outer": map[string]any{
"inner": "value",
},
},
want: []map[string]any{
{"outer": map[string]any{"inner": "value"}},
{"inner": "value"},
},
},
{
name: "array of maps",
obj: []any{
map[string]any{"key1": "val1"},
map[string]any{"key2": "val2"},
},
want: []map[string]any{
{"key1": "val1"},
{"key2": "val2"},
},
},
{
name: "deeply nested",
obj: map[string]any{
"l1": map[string]any{
"l2": map[string]any{
"l3": "value",
},
},
},
want: []map[string]any{
{"l1": map[string]any{"l2": map[string]any{"l3": "value"}}},
{"l2": map[string]any{"l3": "value"}},
{"l3": "value"},
},
},
{
name: "non-map value",
obj: "string",
want: nil,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
got := collect(tt.obj)
if len(got) != len(tt.want) {
t.Errorf("collect() got %d maps, want %d", len(got), len(tt.want))
return
}
// Compare each map in the result
for i := range tt.want {
if !mapsEqual(got[i], tt.want[i]) {
t.Errorf("collect() map[%d] = %v, want %v", i, got[i], tt.want[i])
}
}
})
}
}
// mapsEqual compares two maps for deep equality
func mapsEqual(m1, m2 map[string]any) bool {
if len(m1) != len(m2) {
return false
}
for k, v1 := range m1 {
v2, ok := m2[k]
if !ok {
return false
}
switch val1 := v1.(type) {
case map[string]any:
val2, ok := v2.(map[string]any)
if !ok || !mapsEqual(val1, val2) {
return false
}
default:
if v1 != v2 {
return false
}
}
}
return true
}