Compare commits
11 Commits
main
...
tool-parsi
Author | SHA1 | Date | |
---|---|---|---|
![]() |
8ed95a4e96 | ||
![]() |
bc83789be9 | ||
![]() |
4059b8db01 | ||
![]() |
b8b9c0c7cf | ||
![]() |
779547fcde | ||
![]() |
6cb7494061 | ||
![]() |
a44734b030 | ||
![]() |
b5a982ecb0 | ||
![]() |
516a540df7 | ||
![]() |
7f2f996cd6 | ||
![]() |
610054a234 |
@ -191,8 +191,6 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
|
|||||||
conv = &phi3Model{}
|
conv = &phi3Model{}
|
||||||
case "Qwen2ForCausalLM":
|
case "Qwen2ForCausalLM":
|
||||||
conv = &qwen2Model{}
|
conv = &qwen2Model{}
|
||||||
case "Qwen2_5_VLForConditionalGeneration":
|
|
||||||
conv = &qwen25VLModel{}
|
|
||||||
case "BertModel":
|
case "BertModel":
|
||||||
conv = &bertModel{}
|
conv = &bertModel{}
|
||||||
case "CohereForCausalLM":
|
case "CohereForCausalLM":
|
||||||
|
@ -15,7 +15,6 @@ type qwen2Model struct {
|
|||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
Factor ropeFactor `json:"factor"`
|
Factor ropeFactor `json:"factor"`
|
||||||
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||||
MropeSection []int32 `json:"mrope_section"`
|
|
||||||
} `json:"rope_scaling"`
|
} `json:"rope_scaling"`
|
||||||
RMSNormEPS float32 `json:"rms_norm_eps"`
|
RMSNormEPS float32 `json:"rms_norm_eps"`
|
||||||
}
|
}
|
||||||
@ -40,8 +39,6 @@ func (q *qwen2Model) KV(t *Tokenizer) ggml.KV {
|
|||||||
case "yarn":
|
case "yarn":
|
||||||
kv["qwen2.rope.scaling.type"] = q.RopeScaling.Type
|
kv["qwen2.rope.scaling.type"] = q.RopeScaling.Type
|
||||||
kv["qwen2.rope.scaling.factor"] = q.RopeScaling.Factor
|
kv["qwen2.rope.scaling.factor"] = q.RopeScaling.Factor
|
||||||
case "mrope", "default":
|
|
||||||
kv["qwen2.rope.mrope_section"] = q.RopeScaling.MropeSection
|
|
||||||
default:
|
default:
|
||||||
panic("unknown rope scaling type")
|
panic("unknown rope scaling type")
|
||||||
}
|
}
|
||||||
|
@ -1,102 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"cmp"
|
|
||||||
"slices"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
|
||||||
)
|
|
||||||
|
|
||||||
type qwen25VLModel struct {
|
|
||||||
qwen2Model
|
|
||||||
|
|
||||||
VisionModel struct {
|
|
||||||
Depth uint32 `json:"depth"`
|
|
||||||
HiddenSize uint32 `json:"hidden_size"`
|
|
||||||
NumHeads uint32 `json:"num_heads"`
|
|
||||||
InChannels uint32 `json:"in_chans"`
|
|
||||||
PatchSize uint32 `json:"patch_size"`
|
|
||||||
SpatialMergeSize uint32 `json:"spatial_merge_size"`
|
|
||||||
SpatialPatchSize uint32 `json:"spatial_patch_size"`
|
|
||||||
WindowSize uint32 `json:"window_size"`
|
|
||||||
RMSNormEps float32 `json:"layer_norm_epsilon"`
|
|
||||||
RopeTheta float32 `json:"rope_theta"`
|
|
||||||
FullAttentionBlocks []int32 `json:"fullatt_block_indexes"`
|
|
||||||
TemporalPatchSize uint32 `json:"temporal_patch_size"`
|
|
||||||
} `json:"vision_config"`
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ ModelConverter = (*qwen25VLModel)(nil)
|
|
||||||
|
|
||||||
func (q *qwen25VLModel) KV(t *Tokenizer) ggml.KV {
|
|
||||||
kv := q.ModelParameters.KV(t)
|
|
||||||
kv["general.architecture"] = "qwen25vl"
|
|
||||||
|
|
||||||
for k, v := range q.qwen2Model.KV(t) {
|
|
||||||
if strings.HasPrefix(k, "qwen2.") {
|
|
||||||
kv[strings.Replace(k, "qwen2.", "qwen25vl.", 1)] = v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if q.VisionModel.FullAttentionBlocks == nil {
|
|
||||||
kv["qwen25vl.vision.fullatt_block_indexes"] = []int32{7, 15, 23, 31}
|
|
||||||
}
|
|
||||||
|
|
||||||
kv["qwen25vl.vision.block_count"] = cmp.Or(q.VisionModel.Depth, 32)
|
|
||||||
kv["qwen25vl.vision.embedding_length"] = q.VisionModel.HiddenSize
|
|
||||||
kv["qwen25vl.vision.attention.head_count"] = cmp.Or(q.VisionModel.NumHeads, 16)
|
|
||||||
kv["qwen25vl.vision.num_channels"] = q.VisionModel.InChannels
|
|
||||||
kv["qwen25vl.vision.patch_size"] = cmp.Or(q.VisionModel.PatchSize, 14)
|
|
||||||
kv["qwen25vl.vision.spatial_merge_size"] = cmp.Or(q.VisionModel.SpatialMergeSize, 2)
|
|
||||||
kv["qwen25vl.vision.spatial_patch_size"] = q.VisionModel.SpatialPatchSize
|
|
||||||
kv["qwen25vl.vision.window_size"] = cmp.Or(q.VisionModel.WindowSize, 112)
|
|
||||||
kv["qwen25vl.vision.attention.layer_norm_epsilon"] = cmp.Or(q.VisionModel.RMSNormEps, 1e-6)
|
|
||||||
kv["qwen25vl.vision.rope.freq_base"] = cmp.Or(q.VisionModel.RopeTheta, 1e4)
|
|
||||||
kv["qwen25vl.vision.fullatt_block_indexes"] = q.VisionModel.FullAttentionBlocks
|
|
||||||
kv["qwen25vl.vision.temporal_patch_size"] = cmp.Or(q.VisionModel.TemporalPatchSize, 2)
|
|
||||||
|
|
||||||
return kv
|
|
||||||
}
|
|
||||||
|
|
||||||
func (q *qwen25VLModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
|
||||||
var out []*ggml.Tensor
|
|
||||||
|
|
||||||
for _, t := range ts {
|
|
||||||
if strings.Contains(t.Name(), "patch_embed.proj") {
|
|
||||||
for t := range splitDim(t, 2,
|
|
||||||
strings.NewReplacer("patch_embed.proj", "patch_embd_0"),
|
|
||||||
strings.NewReplacer("patch_embed.proj", "patch_embd_1"),
|
|
||||||
) {
|
|
||||||
t.Shape = slices.DeleteFunc(t.Shape, func(i uint64) bool { return i == 1 })
|
|
||||||
out = append(out, t)
|
|
||||||
}
|
|
||||||
} else if strings.Contains(t.Name(), "attn.qkv") {
|
|
||||||
out = append(out, slices.Collect(splitDim(t, 0,
|
|
||||||
strings.NewReplacer("attn.qkv", "attn_q"),
|
|
||||||
strings.NewReplacer("attn.qkv", "attn_k"),
|
|
||||||
strings.NewReplacer("attn.qkv", "attn_v"),
|
|
||||||
))...)
|
|
||||||
} else {
|
|
||||||
out = append(out, &ggml.Tensor{
|
|
||||||
Name: t.Name(),
|
|
||||||
Kind: t.Kind(),
|
|
||||||
Shape: t.Shape(),
|
|
||||||
WriterTo: t,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *qwen25VLModel) Replacements() []string {
|
|
||||||
return append(
|
|
||||||
p.qwen2Model.Replacements(),
|
|
||||||
"visual", "v",
|
|
||||||
"blocks", "blk",
|
|
||||||
"attn.proj", "attn_out",
|
|
||||||
"norm1", "ln1",
|
|
||||||
"norm2", "ln2",
|
|
||||||
)
|
|
||||||
}
|
|
@ -1,56 +0,0 @@
|
|||||||
package convert
|
|
||||||
|
|
||||||
import (
|
|
||||||
"iter"
|
|
||||||
"slices"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
|
||||||
"github.com/pdevine/tensor"
|
|
||||||
"github.com/pdevine/tensor/native"
|
|
||||||
)
|
|
||||||
|
|
||||||
// splitDim splits a tensor along a specified dimension into multiple tensors. The dimension
|
|
||||||
// is split evenly based on the number of replacers provided.
|
|
||||||
func splitDim(t Tensor, dim int, replacers ...*strings.Replacer) iter.Seq[*ggml.Tensor] {
|
|
||||||
return func(yield func(*ggml.Tensor) bool) {
|
|
||||||
for i, replacer := range replacers {
|
|
||||||
shape := slices.Clone(t.Shape())
|
|
||||||
shape[dim] = shape[dim] / uint64(len(replacers))
|
|
||||||
|
|
||||||
slice := slices.Repeat([]tensor.Slice{nil}, len(shape))
|
|
||||||
slice[dim] = tensor.S(i*int(shape[dim]), (i+1)*int(shape[dim]))
|
|
||||||
|
|
||||||
tt := t.Clone()
|
|
||||||
tt.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
|
|
||||||
dims := make([]int, len(shape))
|
|
||||||
for i := range shape {
|
|
||||||
dims[i] = int(shape[i])
|
|
||||||
}
|
|
||||||
|
|
||||||
var t tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
|
|
||||||
t, err := t.Slice(slice...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
t = tensor.Materialize(t)
|
|
||||||
// flatten tensor so it can be written as a vector
|
|
||||||
if err := t.Reshape(t.Shape().TotalSize()); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return native.VectorF32(t.(*tensor.Dense))
|
|
||||||
})
|
|
||||||
|
|
||||||
if !yield(&ggml.Tensor{
|
|
||||||
Name: replacer.Replace(t.Name()),
|
|
||||||
Kind: t.Kind(),
|
|
||||||
Shape: shape,
|
|
||||||
WriterTo: tt,
|
|
||||||
}) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -6,7 +6,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"math"
|
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@ -127,7 +126,6 @@ func (kv KV) OllamaEngineRequired() bool {
|
|||||||
"mistral3",
|
"mistral3",
|
||||||
"llama4",
|
"llama4",
|
||||||
"mllama",
|
"mllama",
|
||||||
"qwen25vl",
|
|
||||||
}, kv.Architecture())
|
}, kv.Architecture())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -651,29 +649,6 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
|
|||||||
graphSize = 4 * (imageSize*imageSize*numChannels +
|
graphSize = 4 * (imageSize*imageSize*numChannels +
|
||||||
embeddingLength*patchSize +
|
embeddingLength*patchSize +
|
||||||
numPatches*numPatches*headCount)
|
numPatches*numPatches*headCount)
|
||||||
case "qwen25vl":
|
|
||||||
maxPixels := uint64(llm.KV().Uint("vision.max_pixels", 28*28*1280))
|
|
||||||
mergeSize := uint64(llm.KV().Uint("vision.spatial_merge_size", 2))
|
|
||||||
temporalPatchSize := uint64(2)
|
|
||||||
|
|
||||||
// Calculate max possible patches based on max_pixels
|
|
||||||
maxHeight := uint64(math.Sqrt(float64(maxPixels)))
|
|
||||||
maxWidth := maxPixels / maxHeight
|
|
||||||
maxGridHeight := maxHeight / patchSize
|
|
||||||
maxGridWidth := maxWidth / patchSize
|
|
||||||
// Account for merged patches (2x2 grid)
|
|
||||||
numPatches := (maxGridHeight * maxGridWidth) / (mergeSize * mergeSize)
|
|
||||||
|
|
||||||
// Calculate graph size based on typical operations in ProcessImage and createPatches
|
|
||||||
graphSize = 4 * (maxPixels*numChannels + // Original image storage
|
|
||||||
// Normalized pixels
|
|
||||||
maxPixels*numChannels +
|
|
||||||
// Patches storage (numPatches * channels * temporalPatchSize * patchSize^2)
|
|
||||||
numPatches*numChannels*temporalPatchSize*patchSize*patchSize +
|
|
||||||
// Self-attention calculations (similar to other architectures)
|
|
||||||
numPatches*numPatches*headCount +
|
|
||||||
// Additional buffer for processing
|
|
||||||
embeddingLength*numPatches)
|
|
||||||
case "llama4":
|
case "llama4":
|
||||||
// vision graph is computed independently in the same schedule
|
// vision graph is computed independently in the same schedule
|
||||||
// and is negligible compared to the worst case text graph
|
// and is negligible compared to the worst case text graph
|
||||||
|
1
go.mod
1
go.mod
@ -19,6 +19,7 @@ require (
|
|||||||
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
|
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
|
||||||
github.com/dlclark/regexp2 v1.11.4
|
github.com/dlclark/regexp2 v1.11.4
|
||||||
github.com/emirpasic/gods/v2 v2.0.0-alpha
|
github.com/emirpasic/gods/v2 v2.0.0-alpha
|
||||||
|
github.com/go-json-experiment/json v0.0.0-20250417205406-170dfdcf87d1
|
||||||
github.com/google/go-cmp v0.6.0
|
github.com/google/go-cmp v0.6.0
|
||||||
github.com/mattn/go-runewidth v0.0.14
|
github.com/mattn/go-runewidth v0.0.14
|
||||||
github.com/nlpodyssey/gopickle v0.3.0
|
github.com/nlpodyssey/gopickle v0.3.0
|
||||||
|
2
go.sum
2
go.sum
@ -69,6 +69,8 @@ github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3
|
|||||||
github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY=
|
github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY=
|
||||||
github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY=
|
github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY=
|
||||||
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
|
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
|
||||||
|
github.com/go-json-experiment/json v0.0.0-20250417205406-170dfdcf87d1 h1:+VexzzkMLb1tnvpuQdGT/DicIRW7MN8ozsXqBMgp0Hk=
|
||||||
|
github.com/go-json-experiment/json v0.0.0-20250417205406-170dfdcf87d1/go.mod h1:TiCD2a1pcmjd7YnhGH0f/zKNcCD06B029pHhzV23c2M=
|
||||||
github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U=
|
github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U=
|
||||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||||
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||||
|
@ -1,277 +0,0 @@
|
|||||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Michael Yang <git@mxy.ng>
|
|
||||||
Date: Thu, 1 May 2025 13:45:12 -0700
|
|
||||||
Subject: [PATCH] add argsort and cuda copy for i32
|
|
||||||
|
|
||||||
---
|
|
||||||
ggml/src/ggml-cpu/ops.cpp | 43 ++++++++++++++
|
|
||||||
ggml/src/ggml-cuda/argsort.cu | 102 +++++++++++++++++++++++++++++++++-
|
|
||||||
ggml/src/ggml-cuda/cpy.cu | 49 ++++++++++++++++
|
|
||||||
3 files changed, 192 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
|
|
||||||
index becdae07..7a44b6cf 100644
|
|
||||||
--- a/ggml/src/ggml-cpu/ops.cpp
|
|
||||||
+++ b/ggml/src/ggml-cpu/ops.cpp
|
|
||||||
@@ -6890,6 +6890,45 @@ static void ggml_compute_forward_argsort_f32(
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
+static void ggml_compute_forward_argsort_i32(
|
|
||||||
+ const ggml_compute_params * params,
|
|
||||||
+ ggml_tensor * dst) {
|
|
||||||
+
|
|
||||||
+ const ggml_tensor * src0 = dst->src[0];
|
|
||||||
+
|
|
||||||
+ GGML_TENSOR_UNARY_OP_LOCALS
|
|
||||||
+
|
|
||||||
+ GGML_ASSERT(nb0 == sizeof(int32_t));
|
|
||||||
+
|
|
||||||
+ const int ith = params->ith;
|
|
||||||
+ const int nth = params->nth;
|
|
||||||
+
|
|
||||||
+ const int64_t nr = ggml_nrows(src0);
|
|
||||||
+
|
|
||||||
+ ggml_sort_order order = (ggml_sort_order) ggml_get_op_params_i32(dst, 0);
|
|
||||||
+
|
|
||||||
+ for (int64_t i = ith; i < nr; i += nth) {
|
|
||||||
+ int32_t * dst_data = (int32_t *)((char *) dst->data + i*nb1);
|
|
||||||
+ const int32_t * src_data = (int32_t *)((char *) src0->data + i*nb01);
|
|
||||||
+
|
|
||||||
+ for (int64_t j = 0; j < ne0; j++) {
|
|
||||||
+ dst_data[j] = j;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ // C doesn't have a functional sort, so we do a bubble sort instead
|
|
||||||
+ for (int64_t j = 0; j < ne0; j++) {
|
|
||||||
+ for (int64_t k = j + 1; k < ne0; k++) {
|
|
||||||
+ if ((order == GGML_SORT_ORDER_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
|
|
||||||
+ (order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
|
|
||||||
+ int32_t tmp = dst_data[j];
|
|
||||||
+ dst_data[j] = dst_data[k];
|
|
||||||
+ dst_data[k] = tmp;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
void ggml_compute_forward_argsort(
|
|
||||||
const ggml_compute_params * params,
|
|
||||||
ggml_tensor * dst) {
|
|
||||||
@@ -6901,6 +6940,10 @@ void ggml_compute_forward_argsort(
|
|
||||||
{
|
|
||||||
ggml_compute_forward_argsort_f32(params, dst);
|
|
||||||
} break;
|
|
||||||
+ case GGML_TYPE_I32:
|
|
||||||
+ {
|
|
||||||
+ ggml_compute_forward_argsort_i32(params, dst);
|
|
||||||
+ } break;
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
GGML_ABORT("fatal error");
|
|
||||||
diff --git a/ggml/src/ggml-cuda/argsort.cu b/ggml/src/ggml-cuda/argsort.cu
|
|
||||||
index 607ded85..53b02634 100644
|
|
||||||
--- a/ggml/src/ggml-cuda/argsort.cu
|
|
||||||
+++ b/ggml/src/ggml-cuda/argsort.cu
|
|
||||||
@@ -85,13 +85,107 @@ static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, co
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
+
|
|
||||||
+template<ggml_sort_order order>
|
|
||||||
+static __global__ void k_argsort_i32_i32(const int32_t * x, int * dst, const int ncols, const int ncols_pad) {
|
|
||||||
+ extern __shared__ int shared_mem[];
|
|
||||||
+ int * indices = shared_mem;
|
|
||||||
+
|
|
||||||
+ const int tid = threadIdx.x;
|
|
||||||
+ const int row = blockIdx.y;
|
|
||||||
+
|
|
||||||
+ // Initialize all indices, handling the case where threads < ncols_pad
|
|
||||||
+ for (int i = tid; i < ncols_pad; i += blockDim.x) {
|
|
||||||
+ indices[i] = i < ncols ? i : 0; // Use 0 for padding indices
|
|
||||||
+ }
|
|
||||||
+ __syncthreads();
|
|
||||||
+
|
|
||||||
+ // Bitonic sort
|
|
||||||
+ for (int k = 2; k <= ncols_pad; k *= 2) {
|
|
||||||
+ for (int j = k/2; j > 0; j /= 2) {
|
|
||||||
+ for (int i = tid; i < ncols_pad; i += blockDim.x) {
|
|
||||||
+ const int ij = i ^ j;
|
|
||||||
+ if (ij > i) {
|
|
||||||
+ // Only compare values within the actual data range
|
|
||||||
+ if (i < ncols && ij < ncols) {
|
|
||||||
+ if ((i & k) == 0) {
|
|
||||||
+ if (order == GGML_SORT_ORDER_ASC) {
|
|
||||||
+ if (x[row * ncols + indices[i]] > x[row * ncols + indices[ij]]) {
|
|
||||||
+ int tmp = indices[i];
|
|
||||||
+ indices[i] = indices[ij];
|
|
||||||
+ indices[ij] = tmp;
|
|
||||||
+ }
|
|
||||||
+ } else {
|
|
||||||
+ if (x[row * ncols + indices[i]] < x[row * ncols + indices[ij]]) {
|
|
||||||
+ int tmp = indices[i];
|
|
||||||
+ indices[i] = indices[ij];
|
|
||||||
+ indices[ij] = tmp;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ } else {
|
|
||||||
+ if (order == GGML_SORT_ORDER_ASC) {
|
|
||||||
+ if (x[row * ncols + indices[i]] < x[row * ncols + indices[ij]]) {
|
|
||||||
+ int tmp = indices[i];
|
|
||||||
+ indices[i] = indices[ij];
|
|
||||||
+ indices[ij] = tmp;
|
|
||||||
+ }
|
|
||||||
+ } else {
|
|
||||||
+ if (x[row * ncols + indices[i]] > x[row * ncols + indices[ij]]) {
|
|
||||||
+ int tmp = indices[i];
|
|
||||||
+ indices[i] = indices[ij];
|
|
||||||
+ indices[ij] = tmp;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ __syncthreads();
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ // Write sorted indices to output, only threads handling valid data
|
|
||||||
+ for (int i = tid; i < ncols; i += blockDim.x) {
|
|
||||||
+ dst[row * ncols + i] = indices[i];
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void argsort_i32_i32_cuda(const int32_t * x, int * dst, const int ncols, const int nrows, ggml_sort_order order, cudaStream_t stream) {
|
|
||||||
+ // Bitonic sort requires ncols to be power of 2
|
|
||||||
+ const int ncols_pad = next_power_of_2(ncols);
|
|
||||||
+
|
|
||||||
+ // Ensure thread count doesn't exceed maximum (typically 1024)
|
|
||||||
+ const int max_threads = 1024; // This is the typical max for most GPUs
|
|
||||||
+ const int threads_per_block = ncols_pad > max_threads ? max_threads : ncols_pad;
|
|
||||||
+
|
|
||||||
+ const dim3 block_dims(threads_per_block, 1, 1);
|
|
||||||
+ const dim3 block_nums(1, nrows, 1);
|
|
||||||
+ const size_t shared_mem = ncols_pad * sizeof(int);
|
|
||||||
+
|
|
||||||
+ // Check if shared memory size is within limits
|
|
||||||
+ const size_t max_shared_mem = ggml_cuda_info().devices[ggml_cuda_get_device()].smpb;
|
|
||||||
+
|
|
||||||
+ // Instead of logging an error, use GGML_ASSERT with a descriptive message
|
|
||||||
+ GGML_ASSERT(shared_mem <= max_shared_mem && "argsort: required shared memory exceeds device limit");
|
|
||||||
+
|
|
||||||
+ // Launch kernels with the updated thread configuration
|
|
||||||
+ if (order == GGML_SORT_ORDER_ASC) {
|
|
||||||
+ k_argsort_i32_i32<GGML_SORT_ORDER_ASC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
|
|
||||||
+ } else if (order == GGML_SORT_ORDER_DESC) {
|
|
||||||
+ k_argsort_i32_i32<GGML_SORT_ORDER_DESC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
|
|
||||||
+ } else {
|
|
||||||
+ GGML_ABORT("fatal error");
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+
|
|
||||||
void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
||||||
const ggml_tensor * src0 = dst->src[0];
|
|
||||||
const float * src0_d = (const float *)src0->data;
|
|
||||||
float * dst_d = (float *)dst->data;
|
|
||||||
cudaStream_t stream = ctx.stream();
|
|
||||||
|
|
||||||
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
|
||||||
+ GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_I32);
|
|
||||||
GGML_ASSERT( dst->type == GGML_TYPE_I32);
|
|
||||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
|
||||||
|
|
||||||
@@ -100,5 +194,9 @@ void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
||||||
|
|
||||||
enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0];
|
|
||||||
|
|
||||||
- argsort_f32_i32_cuda(src0_d, (int *)dst_d, ncols, nrows, order, stream);
|
|
||||||
+ if (src0->type == GGML_TYPE_I32) {
|
|
||||||
+ argsort_i32_i32_cuda((const int32_t *)src0_d, (int *)dst_d, ncols, nrows, order, stream);
|
|
||||||
+ } else {
|
|
||||||
+ argsort_f32_i32_cuda(src0_d, (int *)dst_d, ncols, nrows, order, stream);
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
diff --git a/ggml/src/ggml-cuda/cpy.cu b/ggml/src/ggml-cuda/cpy.cu
|
|
||||||
index 2d46176e..47383486 100644
|
|
||||||
--- a/ggml/src/ggml-cuda/cpy.cu
|
|
||||||
+++ b/ggml/src/ggml-cuda/cpy.cu
|
|
||||||
@@ -38,6 +38,13 @@ static __device__ void cpy_1_f16_f32(const char * cxi, char * cdsti) {
|
|
||||||
*dsti = *xi;
|
|
||||||
}
|
|
||||||
|
|
||||||
+static __device__ void cpy_1_i32_i32(const char * cxi, char * cdsti) {
|
|
||||||
+ const int32_t * xi = (const int32_t *) cxi;
|
|
||||||
+ int32_t * dsti = (int32_t *) cdsti;
|
|
||||||
+
|
|
||||||
+ *dsti = *xi;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
template <cpy_kernel_t cpy_1>
|
|
||||||
static __global__ void cpy_f32_f16(const char * cx, char * cdst_direct, const int ne,
|
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
|
||||||
@@ -68,6 +75,44 @@ static __global__ void cpy_f32_f16(const char * cx, char * cdst_direct, const in
|
|
||||||
cpy_1(cx + x_offset, cdst + dst_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
+// First, add this template function after the other template functions
|
|
||||||
+template <cpy_kernel_t cpy_1>
|
|
||||||
+static __global__ void cpy_i32_i32(const char * cx, char * cdst, const int ne,
|
|
||||||
+ const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
|
||||||
+ const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11,
|
|
||||||
+ const int nb12, const int nb13) {
|
|
||||||
+ const int64_t i = blockDim.x*blockIdx.x + threadIdx.x;
|
|
||||||
+
|
|
||||||
+ if (i >= ne) {
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ const int64_t i03 = i/(ne00 * ne01 * ne02);
|
|
||||||
+ const int64_t i02 = (i - i03*ne00*ne01*ne02 )/ (ne00*ne01);
|
|
||||||
+ const int64_t i01 = (i - i03*ne00*ne01*ne02 - i02*ne01*ne00) / ne00;
|
|
||||||
+ const int64_t i00 = i - i03*ne00*ne01*ne02 - i02*ne01*ne00 - i01*ne00;
|
|
||||||
+ const int64_t x_offset = i00*nb00 + i01*nb01 + i02*nb02 + i03 * nb03;
|
|
||||||
+
|
|
||||||
+ const int64_t i13 = i/(ne10 * ne11 * ne12);
|
|
||||||
+ const int64_t i12 = (i - i13*ne10*ne11*ne12) / (ne10*ne11);
|
|
||||||
+ const int64_t i11 = (i - i13*ne10*ne11*ne12 - i12*ne10*ne11) / ne10;
|
|
||||||
+ const int64_t i10 = i - i13*ne10*ne11*ne12 - i12*ne10*ne11 - i11*ne10;
|
|
||||||
+ const int64_t dst_offset = i10*nb10 + i11*nb11 + i12*nb12 + i13 * nb13;
|
|
||||||
+
|
|
||||||
+ cpy_1(cx + x_offset, cdst + dst_offset);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+// Then modify the ggml_cpy_i32_i32_cuda function to use the new template
|
|
||||||
+static void ggml_cpy_i32_i32_cuda(
|
|
||||||
+ const char * cx, char * cdst, const int ne,
|
|
||||||
+ const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
|
||||||
+ const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, cudaStream_t stream, char ** cdst_indirect, int graph_cpynode_index) {
|
|
||||||
+
|
|
||||||
+ const int num_blocks = (ne + CUDA_CPY_BLOCK_SIZE - 1) / CUDA_CPY_BLOCK_SIZE;
|
|
||||||
+ cpy_i32_i32<cpy_1_i32_i32><<<num_blocks, CUDA_CPY_BLOCK_SIZE, 0, stream>>>
|
|
||||||
+ (cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static __device__ void cpy_blck_f32_q8_0(const char * cxi, char * cdsti) {
|
|
||||||
const float * xi = (const float *) cxi;
|
|
||||||
block_q8_0 * dsti = (block_q8_0 *) cdsti;
|
|
||||||
@@ -631,6 +676,8 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
|
|
||||||
ggml_cpy_f16_f16_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
|
||||||
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {
|
|
||||||
ggml_cpy_f16_f32_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
|
||||||
+ } else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
|
|
||||||
+ ggml_cpy_i32_i32_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
|
||||||
} else {
|
|
||||||
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
|
|
||||||
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
|
||||||
@@ -686,6 +733,8 @@ void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1) {
|
|
||||||
return (void*) cpy_f32_f16<cpy_1_f32_f16>;
|
|
||||||
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {
|
|
||||||
return (void*) cpy_f32_f16<cpy_1_f16_f32>;
|
|
||||||
+ } else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
|
|
||||||
+ return (void*) cpy_i32_i32<cpy_1_i32_i32>;
|
|
||||||
} else {
|
|
||||||
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
|
|
||||||
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
|
@ -119,21 +119,6 @@ type Context interface {
|
|||||||
Layer(int) Context
|
Layer(int) Context
|
||||||
}
|
}
|
||||||
|
|
||||||
// RopeOptions contains optional parameters for RoPE function
|
|
||||||
type RopeOptions struct {
|
|
||||||
OriginalContextLen uint32
|
|
||||||
}
|
|
||||||
|
|
||||||
// RopeOption defines a function that modifies RopeOpts
|
|
||||||
type RopeOption func(*RopeOptions)
|
|
||||||
|
|
||||||
// WithContextLen sets a custom context length
|
|
||||||
func WithContextLen(len uint32) RopeOption {
|
|
||||||
return func(opts *RopeOptions) {
|
|
||||||
opts.OriginalContextLen = len
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type Tensor interface {
|
type Tensor interface {
|
||||||
Dim(n int) int
|
Dim(n int) int
|
||||||
Stride(n int) int
|
Stride(n int) int
|
||||||
@ -159,7 +144,7 @@ type Tensor interface {
|
|||||||
AvgPool2D(ctx Context, k, s int, p float32) Tensor
|
AvgPool2D(ctx Context, k, s int, p float32) Tensor
|
||||||
Conv2D(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
|
Conv2D(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
|
||||||
|
|
||||||
RoPE(ctx Context, positionIDs, ropeFactors Tensor, dim, ropeType uint32, base, scale float32, options ...RopeOption) Tensor
|
RoPE(ctx Context, positionIDs, ropeFactors Tensor, dim, ropeType uint32, base, scale float32) Tensor
|
||||||
IM2Col(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
|
IM2Col(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
|
||||||
|
|
||||||
Sin(ctx Context) Tensor
|
Sin(ctx Context) Tensor
|
||||||
@ -187,7 +172,6 @@ type Tensor interface {
|
|||||||
Duplicate(ctx Context) Tensor
|
Duplicate(ctx Context) Tensor
|
||||||
|
|
||||||
TopK(ctx Context, k int) Tensor
|
TopK(ctx Context, k int) Tensor
|
||||||
Argsort(ctx Context) Tensor
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ScaledDotProductAttention implements a fused attention
|
// ScaledDotProductAttention implements a fused attention
|
||||||
|
@ -1060,17 +1060,7 @@ const (
|
|||||||
ropeTypeVision C.int = 24
|
ropeTypeVision C.int = 24
|
||||||
)
|
)
|
||||||
|
|
||||||
func (t *Tensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, ropeDim, ropeType uint32, ropeBase, ropeScale float32, options ...ml.RopeOption) ml.Tensor {
|
func (t *Tensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, ropeDim, ropeType uint32, ropeBase, ropeScale float32) ml.Tensor {
|
||||||
// Default options
|
|
||||||
opts := &ml.RopeOptions{
|
|
||||||
OriginalContextLen: 131072,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply any provided options
|
|
||||||
for _, option := range options {
|
|
||||||
option(opts)
|
|
||||||
}
|
|
||||||
|
|
||||||
if ropeFactors == nil {
|
if ropeFactors == nil {
|
||||||
ropeFactors = &Tensor{b: t.b}
|
ropeFactors = &Tensor{b: t.b}
|
||||||
}
|
}
|
||||||
@ -1083,19 +1073,16 @@ func (t *Tensor) RoPE(ctx ml.Context, positionIDs, ropeFactors ml.Tensor, ropeDi
|
|||||||
return &Tensor{
|
return &Tensor{
|
||||||
b: t.b,
|
b: t.b,
|
||||||
t: C.ggml_rope_ext(
|
t: C.ggml_rope_ext(
|
||||||
ctx.(*Context).ctx,
|
ctx.(*Context).ctx, dequant, positionIDs.(*Tensor).t, ropeFactors.(*Tensor).t,
|
||||||
dequant,
|
|
||||||
positionIDs.(*Tensor).t,
|
|
||||||
ropeFactors.(*Tensor).t,
|
|
||||||
C.int(ropeDim),
|
C.int(ropeDim),
|
||||||
C.int(ropeType),
|
C.int(ropeType),
|
||||||
C.int(opts.OriginalContextLen),
|
131072, // YaRN n_ctx_train
|
||||||
C.float(ropeBase),
|
C.float(ropeBase),
|
||||||
C.float(ropeScale),
|
C.float(ropeScale),
|
||||||
C.float(0.0),
|
0., // YaRN ext_factor
|
||||||
C.float(1.0),
|
1., // YaRN attn_factor
|
||||||
C.float(32.0),
|
32., // YaRN beta_fast
|
||||||
C.float(1.0),
|
1., // YaRN beta_slow
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1189,10 +1176,3 @@ func (t *Tensor) TopK(ctx ml.Context, k int) ml.Tensor {
|
|||||||
t: C.ggml_top_k(ctx.(*Context).ctx, t.t, C.int(k)),
|
t: C.ggml_top_k(ctx.(*Context).ctx, t.t, C.int(k)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *Tensor) Argsort(ctx ml.Context) ml.Tensor {
|
|
||||||
return &Tensor{
|
|
||||||
b: t.b,
|
|
||||||
t: C.ggml_argsort(ctx.(*Context).ctx, t.t, C.GGML_SORT_ORDER_ASC),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
43
ml/backend/ggml/ggml/src/ggml-cpu/ops.cpp
vendored
43
ml/backend/ggml/ggml/src/ggml-cpu/ops.cpp
vendored
@ -6822,45 +6822,6 @@ static void ggml_compute_forward_argsort_f32(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_compute_forward_argsort_i32(
|
|
||||||
const ggml_compute_params * params,
|
|
||||||
ggml_tensor * dst) {
|
|
||||||
|
|
||||||
const ggml_tensor * src0 = dst->src[0];
|
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS
|
|
||||||
|
|
||||||
GGML_ASSERT(nb0 == sizeof(int32_t));
|
|
||||||
|
|
||||||
const int ith = params->ith;
|
|
||||||
const int nth = params->nth;
|
|
||||||
|
|
||||||
const int64_t nr = ggml_nrows(src0);
|
|
||||||
|
|
||||||
ggml_sort_order order = (ggml_sort_order) ggml_get_op_params_i32(dst, 0);
|
|
||||||
|
|
||||||
for (int64_t i = ith; i < nr; i += nth) {
|
|
||||||
int32_t * dst_data = (int32_t *)((char *) dst->data + i*nb1);
|
|
||||||
const int32_t * src_data = (int32_t *)((char *) src0->data + i*nb01);
|
|
||||||
|
|
||||||
for (int64_t j = 0; j < ne0; j++) {
|
|
||||||
dst_data[j] = j;
|
|
||||||
}
|
|
||||||
|
|
||||||
// C doesn't have a functional sort, so we do a bubble sort instead
|
|
||||||
for (int64_t j = 0; j < ne0; j++) {
|
|
||||||
for (int64_t k = j + 1; k < ne0; k++) {
|
|
||||||
if ((order == GGML_SORT_ORDER_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
|
|
||||||
(order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
|
|
||||||
int32_t tmp = dst_data[j];
|
|
||||||
dst_data[j] = dst_data[k];
|
|
||||||
dst_data[k] = tmp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ggml_compute_forward_argsort(
|
void ggml_compute_forward_argsort(
|
||||||
const ggml_compute_params * params,
|
const ggml_compute_params * params,
|
||||||
ggml_tensor * dst) {
|
ggml_tensor * dst) {
|
||||||
@ -6872,10 +6833,6 @@ void ggml_compute_forward_argsort(
|
|||||||
{
|
{
|
||||||
ggml_compute_forward_argsort_f32(params, dst);
|
ggml_compute_forward_argsort_f32(params, dst);
|
||||||
} break;
|
} break;
|
||||||
case GGML_TYPE_I32:
|
|
||||||
{
|
|
||||||
ggml_compute_forward_argsort_i32(params, dst);
|
|
||||||
} break;
|
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
GGML_ABORT("fatal error");
|
GGML_ABORT("fatal error");
|
||||||
|
102
ml/backend/ggml/ggml/src/ggml-cuda/argsort.cu
vendored
102
ml/backend/ggml/ggml/src/ggml-cuda/argsort.cu
vendored
@ -85,107 +85,13 @@ static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, co
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<ggml_sort_order order>
|
|
||||||
static __global__ void k_argsort_i32_i32(const int32_t * x, int * dst, const int ncols, const int ncols_pad) {
|
|
||||||
extern __shared__ int shared_mem[];
|
|
||||||
int * indices = shared_mem;
|
|
||||||
|
|
||||||
const int tid = threadIdx.x;
|
|
||||||
const int row = blockIdx.y;
|
|
||||||
|
|
||||||
// Initialize all indices, handling the case where threads < ncols_pad
|
|
||||||
for (int i = tid; i < ncols_pad; i += blockDim.x) {
|
|
||||||
indices[i] = i < ncols ? i : 0; // Use 0 for padding indices
|
|
||||||
}
|
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
// Bitonic sort
|
|
||||||
for (int k = 2; k <= ncols_pad; k *= 2) {
|
|
||||||
for (int j = k/2; j > 0; j /= 2) {
|
|
||||||
for (int i = tid; i < ncols_pad; i += blockDim.x) {
|
|
||||||
const int ij = i ^ j;
|
|
||||||
if (ij > i) {
|
|
||||||
// Only compare values within the actual data range
|
|
||||||
if (i < ncols && ij < ncols) {
|
|
||||||
if ((i & k) == 0) {
|
|
||||||
if (order == GGML_SORT_ORDER_ASC) {
|
|
||||||
if (x[row * ncols + indices[i]] > x[row * ncols + indices[ij]]) {
|
|
||||||
int tmp = indices[i];
|
|
||||||
indices[i] = indices[ij];
|
|
||||||
indices[ij] = tmp;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (x[row * ncols + indices[i]] < x[row * ncols + indices[ij]]) {
|
|
||||||
int tmp = indices[i];
|
|
||||||
indices[i] = indices[ij];
|
|
||||||
indices[ij] = tmp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (order == GGML_SORT_ORDER_ASC) {
|
|
||||||
if (x[row * ncols + indices[i]] < x[row * ncols + indices[ij]]) {
|
|
||||||
int tmp = indices[i];
|
|
||||||
indices[i] = indices[ij];
|
|
||||||
indices[ij] = tmp;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (x[row * ncols + indices[i]] > x[row * ncols + indices[ij]]) {
|
|
||||||
int tmp = indices[i];
|
|
||||||
indices[i] = indices[ij];
|
|
||||||
indices[ij] = tmp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
__syncthreads();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write sorted indices to output, only threads handling valid data
|
|
||||||
for (int i = tid; i < ncols; i += blockDim.x) {
|
|
||||||
dst[row * ncols + i] = indices[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void argsort_i32_i32_cuda(const int32_t * x, int * dst, const int ncols, const int nrows, ggml_sort_order order, cudaStream_t stream) {
|
|
||||||
// Bitonic sort requires ncols to be power of 2
|
|
||||||
const int ncols_pad = next_power_of_2(ncols);
|
|
||||||
|
|
||||||
// Ensure thread count doesn't exceed maximum (typically 1024)
|
|
||||||
const int max_threads = 1024; // This is the typical max for most GPUs
|
|
||||||
const int threads_per_block = ncols_pad > max_threads ? max_threads : ncols_pad;
|
|
||||||
|
|
||||||
const dim3 block_dims(threads_per_block, 1, 1);
|
|
||||||
const dim3 block_nums(1, nrows, 1);
|
|
||||||
const size_t shared_mem = ncols_pad * sizeof(int);
|
|
||||||
|
|
||||||
// Check if shared memory size is within limits
|
|
||||||
const size_t max_shared_mem = ggml_cuda_info().devices[ggml_cuda_get_device()].smpb;
|
|
||||||
|
|
||||||
// Instead of logging an error, use GGML_ASSERT with a descriptive message
|
|
||||||
GGML_ASSERT(shared_mem <= max_shared_mem && "argsort: required shared memory exceeds device limit");
|
|
||||||
|
|
||||||
// Launch kernels with the updated thread configuration
|
|
||||||
if (order == GGML_SORT_ORDER_ASC) {
|
|
||||||
k_argsort_i32_i32<GGML_SORT_ORDER_ASC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
|
|
||||||
} else if (order == GGML_SORT_ORDER_DESC) {
|
|
||||||
k_argsort_i32_i32<GGML_SORT_ORDER_DESC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
|
|
||||||
} else {
|
|
||||||
GGML_ABORT("fatal error");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||||
const ggml_tensor * src0 = dst->src[0];
|
const ggml_tensor * src0 = dst->src[0];
|
||||||
const float * src0_d = (const float *)src0->data;
|
const float * src0_d = (const float *)src0->data;
|
||||||
float * dst_d = (float *)dst->data;
|
float * dst_d = (float *)dst->data;
|
||||||
cudaStream_t stream = ctx.stream();
|
cudaStream_t stream = ctx.stream();
|
||||||
|
|
||||||
GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_I32);
|
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT( dst->type == GGML_TYPE_I32);
|
GGML_ASSERT( dst->type == GGML_TYPE_I32);
|
||||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||||
|
|
||||||
@ -194,9 +100,5 @@ void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|||||||
|
|
||||||
enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0];
|
enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0];
|
||||||
|
|
||||||
if (src0->type == GGML_TYPE_I32) {
|
argsort_f32_i32_cuda(src0_d, (int *)dst_d, ncols, nrows, order, stream);
|
||||||
argsort_i32_i32_cuda((const int32_t *)src0_d, (int *)dst_d, ncols, nrows, order, stream);
|
|
||||||
} else {
|
|
||||||
argsort_f32_i32_cuda(src0_d, (int *)dst_d, ncols, nrows, order, stream);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
49
ml/backend/ggml/ggml/src/ggml-cuda/cpy.cu
vendored
49
ml/backend/ggml/ggml/src/ggml-cuda/cpy.cu
vendored
@ -38,13 +38,6 @@ static __device__ void cpy_1_f16_f32(const char * cxi, char * cdsti) {
|
|||||||
*dsti = *xi;
|
*dsti = *xi;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __device__ void cpy_1_i32_i32(const char * cxi, char * cdsti) {
|
|
||||||
const int32_t * xi = (const int32_t *) cxi;
|
|
||||||
int32_t * dsti = (int32_t *) cdsti;
|
|
||||||
|
|
||||||
*dsti = *xi;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <cpy_kernel_t cpy_1>
|
template <cpy_kernel_t cpy_1>
|
||||||
static __global__ void cpy_f32_f16(const char * cx, char * cdst_direct, const int ne,
|
static __global__ void cpy_f32_f16(const char * cx, char * cdst_direct, const int ne,
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
||||||
@ -75,44 +68,6 @@ static __global__ void cpy_f32_f16(const char * cx, char * cdst_direct, const in
|
|||||||
cpy_1(cx + x_offset, cdst + dst_offset);
|
cpy_1(cx + x_offset, cdst + dst_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
// First, add this template function after the other template functions
|
|
||||||
template <cpy_kernel_t cpy_1>
|
|
||||||
static __global__ void cpy_i32_i32(const char * cx, char * cdst, const int ne,
|
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11,
|
|
||||||
const int nb12, const int nb13) {
|
|
||||||
const int64_t i = blockDim.x*blockIdx.x + threadIdx.x;
|
|
||||||
|
|
||||||
if (i >= ne) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int64_t i03 = i/(ne00 * ne01 * ne02);
|
|
||||||
const int64_t i02 = (i - i03*ne00*ne01*ne02 )/ (ne00*ne01);
|
|
||||||
const int64_t i01 = (i - i03*ne00*ne01*ne02 - i02*ne01*ne00) / ne00;
|
|
||||||
const int64_t i00 = i - i03*ne00*ne01*ne02 - i02*ne01*ne00 - i01*ne00;
|
|
||||||
const int64_t x_offset = i00*nb00 + i01*nb01 + i02*nb02 + i03 * nb03;
|
|
||||||
|
|
||||||
const int64_t i13 = i/(ne10 * ne11 * ne12);
|
|
||||||
const int64_t i12 = (i - i13*ne10*ne11*ne12) / (ne10*ne11);
|
|
||||||
const int64_t i11 = (i - i13*ne10*ne11*ne12 - i12*ne10*ne11) / ne10;
|
|
||||||
const int64_t i10 = i - i13*ne10*ne11*ne12 - i12*ne10*ne11 - i11*ne10;
|
|
||||||
const int64_t dst_offset = i10*nb10 + i11*nb11 + i12*nb12 + i13 * nb13;
|
|
||||||
|
|
||||||
cpy_1(cx + x_offset, cdst + dst_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Then modify the ggml_cpy_i32_i32_cuda function to use the new template
|
|
||||||
static void ggml_cpy_i32_i32_cuda(
|
|
||||||
const char * cx, char * cdst, const int ne,
|
|
||||||
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
|
|
||||||
const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, cudaStream_t stream, char ** cdst_indirect, int graph_cpynode_index) {
|
|
||||||
|
|
||||||
const int num_blocks = (ne + CUDA_CPY_BLOCK_SIZE - 1) / CUDA_CPY_BLOCK_SIZE;
|
|
||||||
cpy_i32_i32<cpy_1_i32_i32><<<num_blocks, CUDA_CPY_BLOCK_SIZE, 0, stream>>>
|
|
||||||
(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __device__ void cpy_blck_f32_q8_0(const char * cxi, char * cdsti) {
|
static __device__ void cpy_blck_f32_q8_0(const char * cxi, char * cdsti) {
|
||||||
const float * xi = (const float *) cxi;
|
const float * xi = (const float *) cxi;
|
||||||
block_q8_0 * dsti = (block_q8_0 *) cdsti;
|
block_q8_0 * dsti = (block_q8_0 *) cdsti;
|
||||||
@ -678,8 +633,6 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
|
|||||||
ggml_cpy_f16_f16_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
ggml_cpy_f16_f16_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
||||||
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {
|
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {
|
||||||
ggml_cpy_f16_f32_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
ggml_cpy_f16_f32_cuda (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
||||||
} else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
|
|
||||||
ggml_cpy_i32_i32_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
|
|
||||||
} else {
|
} else {
|
||||||
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
|
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
|
||||||
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
||||||
@ -735,8 +688,6 @@ void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1) {
|
|||||||
return (void*) cpy_f32_f16<cpy_1_f32_f16>;
|
return (void*) cpy_f32_f16<cpy_1_f32_f16>;
|
||||||
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {
|
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {
|
||||||
return (void*) cpy_f32_f16<cpy_1_f16_f32>;
|
return (void*) cpy_f32_f16<cpy_1_f16_f32>;
|
||||||
} else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
|
|
||||||
return (void*) cpy_i32_i32<cpy_1_i32_i32>;
|
|
||||||
} else {
|
} else {
|
||||||
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
|
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
|
||||||
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
||||||
|
@ -7,5 +7,4 @@ import (
|
|||||||
_ "github.com/ollama/ollama/model/models/llama4"
|
_ "github.com/ollama/ollama/model/models/llama4"
|
||||||
_ "github.com/ollama/ollama/model/models/mistral3"
|
_ "github.com/ollama/ollama/model/models/mistral3"
|
||||||
_ "github.com/ollama/ollama/model/models/mllama"
|
_ "github.com/ollama/ollama/model/models/mllama"
|
||||||
_ "github.com/ollama/ollama/model/models/qwen25vl"
|
|
||||||
)
|
)
|
||||||
|
@ -1,187 +0,0 @@
|
|||||||
package qwen25vl
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
"image"
|
|
||||||
"slices"
|
|
||||||
"sync"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs"
|
|
||||||
"github.com/ollama/ollama/kvcache"
|
|
||||||
"github.com/ollama/ollama/ml"
|
|
||||||
"github.com/ollama/ollama/model"
|
|
||||||
"github.com/ollama/ollama/model/input"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Model struct {
|
|
||||||
model.Base
|
|
||||||
model.BytePairEncoding
|
|
||||||
|
|
||||||
*TextModel
|
|
||||||
*VisionModel `gguf:"v,vision"`
|
|
||||||
|
|
||||||
ImageProcessor
|
|
||||||
}
|
|
||||||
|
|
||||||
// Implement MultimodalProcessor interface
|
|
||||||
var _ model.MultimodalProcessor = (*Model)(nil)
|
|
||||||
|
|
||||||
func New(c fs.Config) (model.Model, error) {
|
|
||||||
m := &Model{
|
|
||||||
BytePairEncoding: model.NewBytePairEncoding(
|
|
||||||
c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
|
|
||||||
&model.Vocabulary{
|
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
|
||||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
|
||||||
BOS: int32(c.Uint("tokenizer.ggml.bos_token_id")),
|
|
||||||
AddBOS: c.Bool("tokenizer.ggml.add_bos_token", false),
|
|
||||||
EOS: int32(c.Uint("tokenizer.ggml.eos_token_id")),
|
|
||||||
AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
|
|
||||||
EOT: int32(c.Uint("tokenizer.ggml.eos_token_id")),
|
|
||||||
AddEOT: c.Bool("tokenizer.ggml.add_eos_token", false),
|
|
||||||
},
|
|
||||||
),
|
|
||||||
TextModel: NewTextModel(c),
|
|
||||||
VisionModel: newVisionModel(c),
|
|
||||||
ImageProcessor: newImageProcessor(c),
|
|
||||||
}
|
|
||||||
|
|
||||||
m.Cache = kvcache.NewCausalCache(m.TextModel.Shift)
|
|
||||||
|
|
||||||
return m, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Model) PixelValues(ctx ml.Context, multimodalData []byte) (ml.Tensor, *Grid, error) {
|
|
||||||
image, _, err := image.Decode(bytes.NewReader(multimodalData))
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
f32s, grid, err := m.ImageProcessor.ProcessImage(image)
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate tensor dimensions
|
|
||||||
patchDim := m.ImageProcessor.numChannels * m.ImageProcessor.temporalPatchSize *
|
|
||||||
m.ImageProcessor.patchSize * m.ImageProcessor.patchSize
|
|
||||||
numPatches := grid.Temporal * grid.Height * grid.Width
|
|
||||||
|
|
||||||
pixelValues, err := ctx.Input().FromFloatSlice(f32s, patchDim, numPatches)
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, fmt.Errorf("failed to create tensor from image: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return pixelValues, grid, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, error) {
|
|
||||||
if len(m.VisionModel.Layers) == 0 {
|
|
||||||
return nil, model.ErrNoVisionModel
|
|
||||||
}
|
|
||||||
|
|
||||||
pixels, grid, err := m.PixelValues(ctx, multimodalData)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
visionOutputs := m.VisionModel.Forward(ctx, pixels, grid)
|
|
||||||
return &chunks{Model: m, Tensor: visionOutputs}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type chunks struct {
|
|
||||||
*Model
|
|
||||||
ml.Tensor
|
|
||||||
|
|
||||||
dataOnce sync.Once
|
|
||||||
data []float32
|
|
||||||
}
|
|
||||||
|
|
||||||
type chunk struct {
|
|
||||||
*chunks
|
|
||||||
s, n int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *chunk) floats() []float32 {
|
|
||||||
r.dataOnce.Do(func() {
|
|
||||||
temp := r.Backend().NewContext()
|
|
||||||
defer temp.Close()
|
|
||||||
temp.Forward(r.Tensor).Compute(r.Tensor)
|
|
||||||
r.data = r.Floats()
|
|
||||||
})
|
|
||||||
|
|
||||||
return r.data[r.s*r.Dim(0) : (r.s+r.n)*r.Dim(0)]
|
|
||||||
}
|
|
||||||
|
|
||||||
// PostTokenize arranges Qwen-2.5-VL's inputs for the forward pass
|
|
||||||
func (m *Model) PostTokenize(inputs []input.Input) ([]input.Input, error) {
|
|
||||||
var result []input.Input
|
|
||||||
|
|
||||||
var (
|
|
||||||
imageToken int32 = 151655
|
|
||||||
visionStartToken int32 = 151652
|
|
||||||
visionEndToken int32 = 151653
|
|
||||||
)
|
|
||||||
|
|
||||||
nImg := 0
|
|
||||||
for _, inp := range inputs {
|
|
||||||
if inp.Multimodal == nil {
|
|
||||||
// If not a multimodal input, add it to the result unchanged
|
|
||||||
result = append(result, inp)
|
|
||||||
} else {
|
|
||||||
// Adding the 'Picture' prefix is a hack, at the time of writing there is no way to prefix
|
|
||||||
// the image tokens with a prompt, so we add a prefix here
|
|
||||||
nImg++
|
|
||||||
pre, err := m.Encode(fmt.Sprintf(" Picture %d: ", nImg), true)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to encode image prompt: %w", err)
|
|
||||||
}
|
|
||||||
for i := range pre {
|
|
||||||
result = append(result, input.Input{Token: pre[i]})
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is an image token with multimodal data
|
|
||||||
chunksData := inp.Multimodal.(*chunks)
|
|
||||||
patchesPerChunk := chunksData.Dim(1)
|
|
||||||
|
|
||||||
// First add the vision start token
|
|
||||||
result = append(result, input.Input{Token: visionStartToken, SameBatch: patchesPerChunk + 2})
|
|
||||||
|
|
||||||
// Add the image token with the multimodal tensor data at the first position
|
|
||||||
// Create a chunk with proper s and n values
|
|
||||||
result = append(result, input.Input{
|
|
||||||
Token: imageToken,
|
|
||||||
Multimodal: &chunk{chunks: chunksData, s: 0, n: patchesPerChunk},
|
|
||||||
MultimodalHash: inp.MultimodalHash,
|
|
||||||
SameBatch: patchesPerChunk,
|
|
||||||
})
|
|
||||||
|
|
||||||
// Add the placeholder tokens for the remaining positions (tokensPerGrid-1)
|
|
||||||
result = append(result, slices.Repeat([]input.Input{{Token: imageToken}}, patchesPerChunk-1)...)
|
|
||||||
|
|
||||||
result = append(result, input.Input{Token: visionEndToken})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
|
||||||
positions, err := ctx.Input().FromIntSlice(batch.Positions, len(batch.Positions))
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
outputs, err := ctx.Input().FromIntSlice(batch.Outputs, len(batch.Outputs))
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return m.TextModel.Forward(ctx, batch.Inputs, positions, outputs, batch, m.Cache)
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
model.Register("qwen25vl", New)
|
|
||||||
}
|
|
@ -1,155 +0,0 @@
|
|||||||
package qwen25vl
|
|
||||||
|
|
||||||
import (
|
|
||||||
"math"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs"
|
|
||||||
"github.com/ollama/ollama/kvcache"
|
|
||||||
"github.com/ollama/ollama/ml"
|
|
||||||
"github.com/ollama/ollama/ml/nn"
|
|
||||||
"github.com/ollama/ollama/model/input"
|
|
||||||
)
|
|
||||||
|
|
||||||
type TextOptions struct {
|
|
||||||
ctxLen, hiddenSize, numHeads, numKVHeads int
|
|
||||||
eps, ropeBase, ropeScale float32
|
|
||||||
ropeDim, defaultContextLen uint32
|
|
||||||
}
|
|
||||||
|
|
||||||
type TextModel struct {
|
|
||||||
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
|
||||||
Layers []Layer `gguf:"blk"`
|
|
||||||
OutputNorm *nn.RMSNorm `gguf:"output_norm"`
|
|
||||||
Output *nn.Linear `gguf:"output,alt:token_embd"`
|
|
||||||
|
|
||||||
*TextOptions
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewTextModel(c fs.Config) *TextModel {
|
|
||||||
m := TextModel{
|
|
||||||
Layers: make([]Layer, c.Uint("block_count")),
|
|
||||||
TextOptions: &TextOptions{
|
|
||||||
ctxLen: int(c.Uint("context_length")),
|
|
||||||
hiddenSize: int(c.Uint("embedding_length")),
|
|
||||||
numHeads: int(c.Uint("attention.head_count")),
|
|
||||||
numKVHeads: int(c.Uint("attention.head_count_kv")),
|
|
||||||
eps: c.Float("attention.layer_norm_rms_epsilon"),
|
|
||||||
ropeBase: c.Float("rope.freq_base"),
|
|
||||||
ropeScale: c.Float("rope.freq_scale", 1),
|
|
||||||
ropeDim: c.Uint("rope.dimension_count", 128),
|
|
||||||
defaultContextLen: c.Uint("context_length", 128000),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
return &m
|
|
||||||
}
|
|
||||||
|
|
||||||
// SelfAttention implements the multi-head self-attention mechanism
|
|
||||||
// with separate projections for query, key, value and output transformations
|
|
||||||
type SelfAttention struct {
|
|
||||||
Query *nn.Linear `gguf:"attn_q"`
|
|
||||||
Key *nn.Linear `gguf:"attn_k"`
|
|
||||||
Value *nn.Linear `gguf:"attn_v"`
|
|
||||||
Output *nn.Linear `gguf:"attn_output"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
|
|
||||||
batchSize := hiddenState.Dim(1)
|
|
||||||
headDim := opts.hiddenSize / opts.numHeads
|
|
||||||
|
|
||||||
q := sa.Query.Forward(ctx, hiddenState)
|
|
||||||
q = q.Reshape(ctx, headDim, opts.numHeads, batchSize)
|
|
||||||
q = q.RoPE(ctx, positionIDs, nil, opts.ropeDim, 2, opts.ropeBase, opts.ropeScale, ml.WithContextLen(opts.defaultContextLen))
|
|
||||||
|
|
||||||
k := sa.Key.Forward(ctx, hiddenState)
|
|
||||||
k = k.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
|
||||||
k = k.RoPE(ctx, positionIDs, nil, opts.ropeDim, 2, opts.ropeBase, opts.ropeScale, ml.WithContextLen(opts.defaultContextLen))
|
|
||||||
|
|
||||||
v := sa.Value.Forward(ctx, hiddenState)
|
|
||||||
v = v.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
|
|
||||||
|
|
||||||
scaleFactor := 1.0 / math.Sqrt(float64(headDim))
|
|
||||||
kqv := nn.Attention(ctx, q, k, v, scaleFactor, cache)
|
|
||||||
kqv = kqv.Reshape(ctx, opts.hiddenSize, batchSize)
|
|
||||||
|
|
||||||
return sa.Output.Forward(ctx, kqv)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Shift applies rotary position embeddings to the key tensor for causal attention caching
|
|
||||||
func (m *TextModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
|
|
||||||
return key.RoPE(ctx, shift, nil, m.ropeDim, 2, m.ropeBase, m.ropeScale, ml.WithContextLen(m.defaultContextLen)), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// MLP implements the feed-forward network component with SwiGLU activation
|
|
||||||
type MLP struct {
|
|
||||||
Up *nn.Linear `gguf:"ffn_up"`
|
|
||||||
Down *nn.Linear `gguf:"ffn_down"`
|
|
||||||
Gate *nn.Linear `gguf:"ffn_gate"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (mlp *MLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *TextOptions) ml.Tensor {
|
|
||||||
// Apply SwiGLU activation gating
|
|
||||||
hiddenState = mlp.Gate.Forward(ctx, hiddenState).SILU(ctx).Mul(ctx, mlp.Up.Forward(ctx, hiddenState))
|
|
||||||
// Project back to hidden dimension
|
|
||||||
return mlp.Down.Forward(ctx, hiddenState)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Layer represents a single transformer layer combining self-attention and feed-forward components
|
|
||||||
type Layer struct {
|
|
||||||
AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
|
|
||||||
SelfAttention *SelfAttention
|
|
||||||
MLPNorm *nn.RMSNorm `gguf:"ffn_norm"`
|
|
||||||
MLP *MLP
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *Layer) Forward(ctx ml.Context, hiddenState, positionIDs, outputs ml.Tensor, cache kvcache.Cache, opts *TextOptions) ml.Tensor {
|
|
||||||
// Self-attention branch with residual connection
|
|
||||||
residual := hiddenState
|
|
||||||
|
|
||||||
hiddenState = l.AttentionNorm.Forward(ctx, hiddenState, opts.eps)
|
|
||||||
hiddenState = l.SelfAttention.Forward(ctx, hiddenState, positionIDs, cache, opts)
|
|
||||||
|
|
||||||
// In the final layer (outputs != nil), optimize by pruning to just the token positions
|
|
||||||
// we need logits for.
|
|
||||||
if outputs != nil {
|
|
||||||
hiddenState = hiddenState.Rows(ctx, outputs)
|
|
||||||
residual = residual.Rows(ctx, outputs)
|
|
||||||
}
|
|
||||||
|
|
||||||
hiddenState = hiddenState.Add(ctx, residual)
|
|
||||||
// Feed-forward branch with residual connection
|
|
||||||
residual = hiddenState
|
|
||||||
hiddenState = l.MLPNorm.Forward(ctx, hiddenState, opts.eps)
|
|
||||||
hiddenState = l.MLP.Forward(ctx, hiddenState, opts)
|
|
||||||
return hiddenState.Add(ctx, residual)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor, batch input.Batch, cache kvcache.Cache) (ml.Tensor, error) {
|
|
||||||
// Initial token embedding
|
|
||||||
hiddenStates := m.TokenEmbedding.Forward(ctx, inputs).Duplicate(ctx)
|
|
||||||
|
|
||||||
for _, mi := range batch.Multimodal {
|
|
||||||
f32s := mi.Multimodal.(*chunk).floats()
|
|
||||||
img, err := ctx.Input().FromFloatSlice(f32s, len(f32s)/m.hiddenSize, m.hiddenSize)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.Forward(img.Copy(ctx, hiddenStates.View(ctx, mi.Index*hiddenStates.Stride(1), img.Dim(0)*img.Dim(1))))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process through transformer layers
|
|
||||||
for i, layer := range m.Layers {
|
|
||||||
cache.SetLayer(i)
|
|
||||||
|
|
||||||
var lastLayerOutputs ml.Tensor
|
|
||||||
if i == len(m.Layers)-1 {
|
|
||||||
lastLayerOutputs = outputs
|
|
||||||
}
|
|
||||||
|
|
||||||
hiddenStates = layer.Forward(ctx, hiddenStates, positions, lastLayerOutputs, cache, m.TextOptions)
|
|
||||||
}
|
|
||||||
|
|
||||||
hiddenStates = m.OutputNorm.Forward(ctx, hiddenStates, m.eps)
|
|
||||||
return m.Output.Forward(ctx, hiddenStates), nil
|
|
||||||
}
|
|
@ -1,391 +0,0 @@
|
|||||||
package qwen25vl
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"math"
|
|
||||||
"slices"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs"
|
|
||||||
"github.com/ollama/ollama/ml"
|
|
||||||
"github.com/ollama/ollama/ml/nn"
|
|
||||||
)
|
|
||||||
|
|
||||||
// We only support batch size of 1
|
|
||||||
var batchSize int = 1
|
|
||||||
|
|
||||||
func rotateHalf(ctx ml.Context, t ml.Tensor) ml.Tensor {
|
|
||||||
x1 := t.View(ctx, 0, t.Dim(0)/2, t.Stride(1), t.Dim(1), t.Stride(2), t.Dim(2), t.Stride(3), t.Dim(3))
|
|
||||||
x2 := t.View(ctx, t.Stride(0)*t.Dim(0)/2, t.Dim(0)/2, t.Stride(1), t.Dim(1), t.Stride(2), t.Dim(2), t.Stride(3), t.Dim(3)).Contiguous(ctx)
|
|
||||||
return x2.Neg(ctx).Concat(ctx, x1, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
func applyRotaryPositionalEmbedding(ctx ml.Context, t, cos, sin ml.Tensor) ml.Tensor {
|
|
||||||
return t.Mul(ctx, cos).Add(ctx, rotateHalf(ctx, t).Mul(ctx, sin))
|
|
||||||
}
|
|
||||||
|
|
||||||
func blockDiagonalMask(ctx ml.Context, seqLength int, bounds []int, numHeads int) ml.Tensor {
|
|
||||||
// Create a flat slice for the mask (all -inf initially to block all attention)
|
|
||||||
flat := make([]float32, seqLength*seqLength)
|
|
||||||
for i := range flat {
|
|
||||||
flat[i] = float32(math.Inf(-1)) // Negative infinity to block attention
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fill in the mask with zeros for tokens that CAN attend to each other
|
|
||||||
for i := 1; i < len(bounds); i++ {
|
|
||||||
start := bounds[i-1]
|
|
||||||
end := bounds[i]
|
|
||||||
|
|
||||||
// Enable attention within this sequence block by setting values to 0
|
|
||||||
for row := start; row < end; row++ {
|
|
||||||
for col := start; col < end; col++ {
|
|
||||||
idx := row*seqLength + col
|
|
||||||
flat[idx] = 0.0 // 0 allows attention, -inf blocks it
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
mask, err := ctx.Input().FromFloatSlice(flat, seqLength, seqLength)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
// Reshape to match [seqLength, seqLength, 1] for broadcasting
|
|
||||||
mask = mask.Reshape(ctx, seqLength, seqLength, 1)
|
|
||||||
|
|
||||||
return mask
|
|
||||||
}
|
|
||||||
|
|
||||||
type VisionSelfAttention struct {
|
|
||||||
Query *nn.Linear `gguf:"attn_q"`
|
|
||||||
Key *nn.Linear `gguf:"attn_k"`
|
|
||||||
Value *nn.Linear `gguf:"attn_v"`
|
|
||||||
Output *nn.Linear `gguf:"attn_out"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sa *VisionSelfAttention) Forward(ctx ml.Context, hiddenStates, cos, sin, mask ml.Tensor, opts *VisionModelOptions) ml.Tensor {
|
|
||||||
query := sa.Query.Forward(ctx, hiddenStates)
|
|
||||||
key := sa.Key.Forward(ctx, hiddenStates)
|
|
||||||
value := sa.Value.Forward(ctx, hiddenStates)
|
|
||||||
|
|
||||||
query = query.Reshape(ctx, opts.headDim, opts.numHeads, query.Dim(1), batchSize)
|
|
||||||
key = key.Reshape(ctx, opts.headDim, opts.numHeads, key.Dim(1), batchSize)
|
|
||||||
value = value.Reshape(ctx, opts.headDim, opts.numHeads, value.Dim(1), batchSize)
|
|
||||||
|
|
||||||
query = applyRotaryPositionalEmbedding(ctx, query, cos, sin)
|
|
||||||
key = applyRotaryPositionalEmbedding(ctx, key, cos, sin)
|
|
||||||
|
|
||||||
// Scale factor for scaled dot-product attention
|
|
||||||
scale := 1.0 / math.Sqrt(float64(opts.headDim))
|
|
||||||
|
|
||||||
// Scaled dot-product attention
|
|
||||||
query = query.Permute(ctx, 0, 2, 1, 3)
|
|
||||||
key = key.Permute(ctx, 0, 2, 1, 3)
|
|
||||||
value = value.Permute(ctx, 1, 2, 0, 3).Contiguous(ctx)
|
|
||||||
kq := key.MulmatFullPrec(ctx, query)
|
|
||||||
kq = kq.Scale(ctx, scale)
|
|
||||||
if mask != nil {
|
|
||||||
kq = kq.Add(ctx, mask)
|
|
||||||
}
|
|
||||||
kq = kq.Softmax(ctx)
|
|
||||||
kqv := value.Mulmat(ctx, kq)
|
|
||||||
attention := kqv.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
|
|
||||||
attention = attention.Reshape(ctx, opts.hiddenSize, attention.Dim(2), batchSize)
|
|
||||||
|
|
||||||
return sa.Output.Forward(ctx, attention)
|
|
||||||
}
|
|
||||||
|
|
||||||
// VisionMLP implements the multi-layer perceptron
|
|
||||||
type VisionMLP struct {
|
|
||||||
Gate *nn.Linear `gguf:"ffn_gate"`
|
|
||||||
Up *nn.Linear `gguf:"ffn_up"`
|
|
||||||
Down *nn.Linear `gguf:"ffn_down"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (mlp *VisionMLP) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *VisionModelOptions) ml.Tensor {
|
|
||||||
// Using activation as specified in config (likely GELU or SiLU/Swish)
|
|
||||||
gateOutput := mlp.Gate.Forward(ctx, hiddenStates)
|
|
||||||
upOutput := mlp.Up.Forward(ctx, hiddenStates)
|
|
||||||
hiddenStates = gateOutput.SILU(ctx).Mul(ctx, upOutput)
|
|
||||||
|
|
||||||
return mlp.Down.Forward(ctx, hiddenStates)
|
|
||||||
}
|
|
||||||
|
|
||||||
type VisionEncoderLayer struct {
|
|
||||||
Norm1 *nn.RMSNorm `gguf:"ln1"`
|
|
||||||
SelfAttention *VisionSelfAttention
|
|
||||||
Norm2 *nn.RMSNorm `gguf:"ln2"`
|
|
||||||
MLP *VisionMLP
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *VisionEncoderLayer) Forward(ctx ml.Context, hiddenStates, cos, sin, mask ml.Tensor, opts *VisionModelOptions) ml.Tensor {
|
|
||||||
residual := hiddenStates
|
|
||||||
hiddenStates = e.Norm1.Forward(ctx, hiddenStates, opts.eps)
|
|
||||||
hiddenStates = e.SelfAttention.Forward(ctx, hiddenStates, cos, sin, mask, opts)
|
|
||||||
hiddenStates = hiddenStates.Add(ctx, residual)
|
|
||||||
|
|
||||||
residual = hiddenStates
|
|
||||||
hiddenStates = e.Norm2.Forward(ctx, hiddenStates, opts.eps)
|
|
||||||
hiddenStates = e.MLP.Forward(ctx, hiddenStates, opts)
|
|
||||||
return hiddenStates.Add(ctx, residual)
|
|
||||||
}
|
|
||||||
|
|
||||||
// VisionModelOptions contains configuration options
|
|
||||||
type VisionModelOptions struct {
|
|
||||||
hiddenSize int
|
|
||||||
numHeads int
|
|
||||||
headDim int
|
|
||||||
patchSize int
|
|
||||||
numChannels int
|
|
||||||
eps float32
|
|
||||||
ropeTheta float32
|
|
||||||
spatialMergeSize int
|
|
||||||
windowSize int
|
|
||||||
fullAttnBlocks []int32
|
|
||||||
temporalPatchSize int
|
|
||||||
}
|
|
||||||
|
|
||||||
type PatchEmbedding struct {
|
|
||||||
PatchConv0 *nn.Conv2D `gguf:"patch_embd_0"`
|
|
||||||
PatchConv1 *nn.Conv2D `gguf:"patch_embd_1"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (pe *PatchEmbedding) Forward(ctx ml.Context, pixelValues ml.Tensor, opts *VisionModelOptions) ml.Tensor {
|
|
||||||
numPatches := pixelValues.Shape()[1]
|
|
||||||
|
|
||||||
// Reshape the input tensor to match the expected dimensions
|
|
||||||
pixelValues = pixelValues.Reshape(ctx, opts.patchSize*opts.patchSize, opts.temporalPatchSize, opts.numChannels, numPatches)
|
|
||||||
|
|
||||||
// Permute the tensor to bring the temporal dimension to the front
|
|
||||||
pixelValues = pixelValues.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)
|
|
||||||
|
|
||||||
// Split the tensor into parts for the temporal convolutions
|
|
||||||
in0 := pixelValues.View(ctx, 0, 1, pixelValues.Stride(1), pixelValues.Dim(1), pixelValues.Stride(2), pixelValues.Dim(2), pixelValues.Stride(3), pixelValues.Dim(3)).Contiguous(ctx)
|
|
||||||
in0 = in0.Reshape(ctx, opts.patchSize, opts.patchSize, opts.numChannels, numPatches)
|
|
||||||
in1 := pixelValues.View(ctx, pixelValues.Stride(0), 1, pixelValues.Stride(1), pixelValues.Dim(1), pixelValues.Stride(2), pixelValues.Dim(2), pixelValues.Stride(3), pixelValues.Dim(3)).Contiguous(ctx)
|
|
||||||
in1 = in1.Reshape(ctx, opts.patchSize, opts.patchSize, opts.numChannels, numPatches)
|
|
||||||
|
|
||||||
s0, s1 := opts.patchSize, opts.patchSize // Use full stride
|
|
||||||
p0, p1 := 0, 0 // padding
|
|
||||||
d0, d1 := 1, 1 // dilation
|
|
||||||
out0 := pe.PatchConv0.Forward(ctx, in0, s0, s1, p0, p1, d0, d1)
|
|
||||||
out1 := pe.PatchConv1.Forward(ctx, in1, s0, s1, p0, p1, d0, d1)
|
|
||||||
|
|
||||||
// Add the outputs from the two temporal convolutions
|
|
||||||
out := out0.Add(ctx, out1)
|
|
||||||
|
|
||||||
// Reshape the output tensor to match the expected dimensions
|
|
||||||
return out.Reshape(ctx, opts.hiddenSize, numPatches)
|
|
||||||
}
|
|
||||||
|
|
||||||
// VisionPatchMerger implements patch merging for the Qwen vision model
|
|
||||||
type VisionPatchMerger struct {
|
|
||||||
LNQ *nn.RMSNorm `gguf:"ln_q"`
|
|
||||||
MLP0 *nn.Linear `gguf:"mlp.0"`
|
|
||||||
MLP2 *nn.Linear `gguf:"mlp.2"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Forward computes patch merging for the vision model
|
|
||||||
func (pm *VisionPatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, opts *VisionModelOptions) ml.Tensor {
|
|
||||||
normalized := pm.LNQ.Forward(ctx, visionOutputs, opts.eps)
|
|
||||||
|
|
||||||
hiddenSize := visionOutputs.Dim(0) * (opts.spatialMergeSize * opts.spatialMergeSize)
|
|
||||||
|
|
||||||
// Reshape the normalized output to view the hidden size dimension
|
|
||||||
reshaped := normalized.Reshape(ctx, hiddenSize, normalized.Dim(1)/(opts.spatialMergeSize*opts.spatialMergeSize), batchSize)
|
|
||||||
hidden := pm.MLP0.Forward(ctx, reshaped)
|
|
||||||
activated := hidden.GELU(ctx)
|
|
||||||
|
|
||||||
output := pm.MLP2.Forward(ctx, activated)
|
|
||||||
|
|
||||||
return output
|
|
||||||
}
|
|
||||||
|
|
||||||
// VisionModel implements the Qwen vision model
|
|
||||||
type VisionModel struct {
|
|
||||||
PatchEmbedding *PatchEmbedding
|
|
||||||
Layers []VisionEncoderLayer `gguf:"blk"`
|
|
||||||
PatchMerger *VisionPatchMerger `gguf:"merger"`
|
|
||||||
|
|
||||||
*VisionModelOptions
|
|
||||||
}
|
|
||||||
|
|
||||||
// Forward computes the vision model for an input tensor
|
|
||||||
func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor, grid *Grid) ml.Tensor {
|
|
||||||
// Extract patch embeddings
|
|
||||||
hiddenStates := m.PatchEmbedding.Forward(ctx, pixelValues, m.VisionModelOptions)
|
|
||||||
|
|
||||||
positionEmbedding := m.PositionalEmbedding(ctx, grid)
|
|
||||||
|
|
||||||
windowIndex, bounds := m.WindowIndex(ctx, grid)
|
|
||||||
|
|
||||||
spatialMergeUnit := m.spatialMergeSize * m.spatialMergeSize
|
|
||||||
|
|
||||||
hiddenStates = hiddenStates.Reshape(ctx, hiddenStates.Dim(0)*spatialMergeUnit, hiddenStates.Dim(1)/spatialMergeUnit)
|
|
||||||
hiddenStates = hiddenStates.Rows(ctx, windowIndex)
|
|
||||||
hiddenStates = hiddenStates.Reshape(ctx, hiddenStates.Dim(0)/spatialMergeUnit, hiddenStates.Dim(1)*spatialMergeUnit)
|
|
||||||
|
|
||||||
positionEmbedding = positionEmbedding.Reshape(ctx, positionEmbedding.Dim(0)*spatialMergeUnit, positionEmbedding.Dim(1)/spatialMergeUnit)
|
|
||||||
positionEmbedding = positionEmbedding.Rows(ctx, windowIndex)
|
|
||||||
positionEmbedding = positionEmbedding.Reshape(ctx, positionEmbedding.Dim(0)/spatialMergeUnit, positionEmbedding.Dim(1)*spatialMergeUnit)
|
|
||||||
positionEmbedding = positionEmbedding.Concat(ctx, positionEmbedding, 0)
|
|
||||||
|
|
||||||
cos, sin := positionEmbedding.Cos(ctx), positionEmbedding.Sin(ctx)
|
|
||||||
cos = cos.Reshape(ctx, cos.Dim(0), 1, cos.Dim(1))
|
|
||||||
sin = sin.Reshape(ctx, sin.Dim(0), 1, sin.Dim(1))
|
|
||||||
|
|
||||||
mask := blockDiagonalMask(ctx, hiddenStates.Dim(1), bounds, m.VisionModelOptions.numHeads)
|
|
||||||
// Apply encoder layers
|
|
||||||
for i, layer := range m.Layers {
|
|
||||||
if slices.Contains(m.fullAttnBlocks, int32(i)) {
|
|
||||||
hiddenStates = layer.Forward(ctx, hiddenStates, cos, sin, nil, m.VisionModelOptions)
|
|
||||||
} else {
|
|
||||||
hiddenStates = layer.Forward(
|
|
||||||
ctx,
|
|
||||||
hiddenStates,
|
|
||||||
cos,
|
|
||||||
sin,
|
|
||||||
mask,
|
|
||||||
m.VisionModelOptions,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
hiddenStates = m.PatchMerger.Forward(ctx, hiddenStates, m.VisionModelOptions)
|
|
||||||
reverseWindowIndex := windowIndex.Argsort(ctx)
|
|
||||||
return hiddenStates.Rows(ctx, reverseWindowIndex)
|
|
||||||
}
|
|
||||||
|
|
||||||
// WindowIndex divides the grid into windows and returns:
|
|
||||||
// 1. A tensor containing flattened indices of all grid points organized by windows
|
|
||||||
// 2. A slice of boundaries that mark where each window's data begins and ends
|
|
||||||
// in the flattened representation, scaled by spatialMergeSize squared
|
|
||||||
//
|
|
||||||
// The boundaries slice always starts with 0 and contains cumulative ending
|
|
||||||
// positions for each window, allowing downstream processing to identify
|
|
||||||
// window boundaries in the tensor data.
|
|
||||||
func (m *VisionModel) WindowIndex(ctx ml.Context, grid *Grid) (ml.Tensor, []int) {
|
|
||||||
vitMergerWindowSize := m.windowSize / m.spatialMergeSize / m.patchSize
|
|
||||||
|
|
||||||
llmGridH := grid.Height / m.spatialMergeSize
|
|
||||||
llmGridW := grid.Width / m.spatialMergeSize
|
|
||||||
|
|
||||||
// Calculate window parameters
|
|
||||||
numWindowsH := int(math.Ceil(float64(llmGridH) / float64(vitMergerWindowSize)))
|
|
||||||
numWindowsW := int(math.Ceil(float64(llmGridW) / float64(vitMergerWindowSize)))
|
|
||||||
|
|
||||||
// Initialize index_new slice
|
|
||||||
var index []int32
|
|
||||||
|
|
||||||
// Initialize bounds with the first element as 0
|
|
||||||
bounds := []int{0}
|
|
||||||
totalSeqLen := 0
|
|
||||||
|
|
||||||
// Process each window without padding
|
|
||||||
for wh := range numWindowsH {
|
|
||||||
for ww := range numWindowsW {
|
|
||||||
// Calculate window boundaries
|
|
||||||
hStart := wh * vitMergerWindowSize
|
|
||||||
wStart := ww * vitMergerWindowSize
|
|
||||||
hEnd := min(hStart+vitMergerWindowSize, llmGridH)
|
|
||||||
wEnd := min(wStart+vitMergerWindowSize, llmGridW)
|
|
||||||
|
|
||||||
// Calculate sequence length for this window
|
|
||||||
seqLen := (hEnd - hStart) * (wEnd - wStart)
|
|
||||||
|
|
||||||
// Collect indices for this window
|
|
||||||
for h := hStart; h < hEnd; h++ {
|
|
||||||
for w := wStart; w < wEnd; w++ {
|
|
||||||
index = append(index, int32(h*llmGridW+w))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
totalSeqLen += seqLen
|
|
||||||
bounds = append(bounds, totalSeqLen*(m.spatialMergeSize*m.spatialMergeSize)+bounds[0])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t, err := ctx.Input().FromIntSlice(index, len(index))
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return t, bounds
|
|
||||||
}
|
|
||||||
|
|
||||||
// PositionalEmbedding generates rotary position embeddings for attention mechanisms
|
|
||||||
func (m *VisionModel) PositionalEmbedding(ctx ml.Context, grid *Grid) ml.Tensor {
|
|
||||||
dim := m.headDim / 2
|
|
||||||
freq := dim / 2
|
|
||||||
theta := float64(m.ropeTheta)
|
|
||||||
merge := m.spatialMergeSize
|
|
||||||
|
|
||||||
// Create frequency patterns for position encoding
|
|
||||||
maxGridSize := max(grid.Height, grid.Width)
|
|
||||||
freqVals := make([]float32, freq*maxGridSize)
|
|
||||||
for i := range maxGridSize {
|
|
||||||
for j := range freq {
|
|
||||||
freqVals[i*freq+j] = float32(i) / float32(math.Pow(theta, float64(j*2)/float64(dim)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
freqs, err := ctx.Input().FromFloatSlice(freqVals, freq, maxGridSize)
|
|
||||||
if err != nil {
|
|
||||||
panic(fmt.Errorf("failed to create tensor from frequencies: %w", err))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create position coordinates (y,x pairs) for the grid
|
|
||||||
// In PyTorch: Equivalent to generating position ids with torch.arange()
|
|
||||||
coords := make([]int32, 0, grid.Height*grid.Width*2)
|
|
||||||
for y := range grid.Height {
|
|
||||||
for x := range grid.Width {
|
|
||||||
coords = append(coords, int32(y), int32(x))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pos, err := ctx.Input().FromIntSlice(coords, 2, grid.Width, grid.Height)
|
|
||||||
if err != nil {
|
|
||||||
panic(fmt.Errorf("failed to create tensor from positions: %w", err))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reshape and permute positions to match spatial merging pattern
|
|
||||||
pos = pos.Reshape(ctx, 2, grid.Width, merge, grid.Height/merge)
|
|
||||||
pos = pos.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
|
|
||||||
pos = pos.Reshape(ctx, 2, merge, merge, grid.Width/merge*grid.Height/merge)
|
|
||||||
pos = pos.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
|
|
||||||
pos = pos.Reshape(ctx, 2*merge*merge*grid.Width/merge*grid.Height/merge)
|
|
||||||
|
|
||||||
// Use position indices to look up corresponding frequency values
|
|
||||||
positionalEmbedding := freqs.Rows(ctx, pos)
|
|
||||||
positionalEmbedding = positionalEmbedding.Reshape(ctx, positionalEmbedding.Dim(0)*2, positionalEmbedding.Dim(1)/2)
|
|
||||||
return positionalEmbedding
|
|
||||||
}
|
|
||||||
|
|
||||||
// newVisionModel creates a new instance of the Qwen vision model
|
|
||||||
func newVisionModel(c fs.Config) *VisionModel {
|
|
||||||
patchSize := int(c.Uint("vision.patch_size", 14))
|
|
||||||
hiddenSize := int(c.Uint("vision.embedding_length", 1280))
|
|
||||||
numHeads := int(c.Uint("vision.attention.head_count", 16))
|
|
||||||
numChannels := int(c.Uint("vision.num_channels", 3))
|
|
||||||
eps := c.Float("vision.attention.layer_norm_epsilon", 1e-6)
|
|
||||||
ropeTheta := c.Float("vision.rope.freq_base", 10000.0)
|
|
||||||
spatialMergeSize := int(c.Uint("vision.spatial_merge_size", 2))
|
|
||||||
windowSize := int(c.Uint("vision.window_size", 112))
|
|
||||||
fullAttnBlocks := c.Ints("qwen25vl.vision.fullatt_block_indexes", []int32{7, 15, 23, 31})
|
|
||||||
temporalPatchSize := int(c.Uint("vision.temporal_patch_size", 2))
|
|
||||||
|
|
||||||
model := &VisionModel{
|
|
||||||
Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count", 32)),
|
|
||||||
VisionModelOptions: &VisionModelOptions{
|
|
||||||
hiddenSize: hiddenSize,
|
|
||||||
numHeads: numHeads,
|
|
||||||
headDim: hiddenSize / numHeads,
|
|
||||||
patchSize: patchSize,
|
|
||||||
numChannels: numChannels,
|
|
||||||
eps: eps,
|
|
||||||
ropeTheta: ropeTheta,
|
|
||||||
spatialMergeSize: spatialMergeSize,
|
|
||||||
windowSize: windowSize,
|
|
||||||
temporalPatchSize: temporalPatchSize,
|
|
||||||
fullAttnBlocks: fullAttnBlocks,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
return model
|
|
||||||
}
|
|
@ -1,184 +0,0 @@
|
|||||||
package qwen25vl
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"image"
|
|
||||||
"math"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs"
|
|
||||||
"github.com/ollama/ollama/model/imageproc"
|
|
||||||
)
|
|
||||||
|
|
||||||
// ImageProcessor contains configuration for the Qwen 2.5 VL image processing
|
|
||||||
type ImageProcessor struct {
|
|
||||||
numChannels int
|
|
||||||
patchSize int
|
|
||||||
temporalPatchSize int
|
|
||||||
mergeSize int
|
|
||||||
minPixels int
|
|
||||||
maxPixels int
|
|
||||||
factor int
|
|
||||||
rescaleFactor float32
|
|
||||||
imageMean []float32
|
|
||||||
imageStd []float32
|
|
||||||
}
|
|
||||||
|
|
||||||
// newImageProcessor creates a new image processor with default values
|
|
||||||
func newImageProcessor(c fs.Config) ImageProcessor {
|
|
||||||
patchSize := int(c.Uint("vision.patch_size", 14))
|
|
||||||
mergeSize := int(c.Uint("vision.spatial_merge_size", 2))
|
|
||||||
|
|
||||||
return ImageProcessor{
|
|
||||||
numChannels: int(c.Uint("vision.num_channels", 3)), // not set
|
|
||||||
patchSize: patchSize,
|
|
||||||
temporalPatchSize: 2,
|
|
||||||
mergeSize: mergeSize,
|
|
||||||
minPixels: 56 * 56,
|
|
||||||
maxPixels: int(c.Uint("vision.max_pixels", 28*28*1280)), // 1MP limit
|
|
||||||
factor: patchSize * mergeSize,
|
|
||||||
rescaleFactor: 1.0 / 255.0,
|
|
||||||
imageMean: imageproc.ClipDefaultMean[:],
|
|
||||||
imageStd: imageproc.ClipDefaultSTD[:],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// SmartResize implements the smart resize algorithm
|
|
||||||
func (p *ImageProcessor) SmartResize(height, width int) (int, int) {
|
|
||||||
factor := p.factor
|
|
||||||
|
|
||||||
if height < factor || width < factor {
|
|
||||||
panic(fmt.Sprintf("height:%d or width:%d must be larger than factor:%d", height, width, factor))
|
|
||||||
} else if aspectRatio := max(height, width) / min(height, width); aspectRatio > 200 {
|
|
||||||
panic(fmt.Sprintf("absolute aspect ratio must be smaller than 200, got %v", aspectRatio))
|
|
||||||
}
|
|
||||||
|
|
||||||
round := func(x float64) int { return int(math.RoundToEven(x)) }
|
|
||||||
|
|
||||||
hBar := round(float64(height)/float64(factor)) * factor
|
|
||||||
wBar := round(float64(width)/float64(factor)) * factor
|
|
||||||
|
|
||||||
if hBar*wBar > p.maxPixels {
|
|
||||||
beta := math.Sqrt(float64(height*width) / float64(p.maxPixels))
|
|
||||||
|
|
||||||
hBar = int(math.Floor(float64(height)/beta/float64(factor))) * factor
|
|
||||||
wBar = int(math.Floor(float64(width)/beta/float64(factor))) * factor
|
|
||||||
} else if hBar*wBar < p.minPixels {
|
|
||||||
beta := math.Sqrt(float64(p.minPixels) / float64(height*width))
|
|
||||||
|
|
||||||
hBar = int(math.Ceil(float64(height)*beta/float64(factor))) * factor
|
|
||||||
wBar = int(math.Ceil(float64(width)*beta/float64(factor))) * factor
|
|
||||||
}
|
|
||||||
|
|
||||||
return hBar, wBar
|
|
||||||
}
|
|
||||||
|
|
||||||
type Grid struct {
|
|
||||||
Height int
|
|
||||||
Width int
|
|
||||||
Temporal int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *ImageProcessor) ProcessImage(img image.Image) ([]float32, *Grid, error) {
|
|
||||||
origWidth := img.Bounds().Dx()
|
|
||||||
origHeight := img.Bounds().Dy()
|
|
||||||
|
|
||||||
// Calculate smart resize dimensions
|
|
||||||
resizedHeight, resizedWidth := p.SmartResize(origHeight, origWidth)
|
|
||||||
|
|
||||||
// Resize image using existing functions
|
|
||||||
resizedImg := imageproc.Resize(img, image.Point{X: resizedWidth, Y: resizedHeight}, imageproc.ResizeBilinear)
|
|
||||||
|
|
||||||
normalizedPixels := imageproc.Normalize(
|
|
||||||
resizedImg,
|
|
||||||
[3]float32{p.imageMean[0], p.imageMean[1], p.imageMean[2]},
|
|
||||||
[3]float32{p.imageStd[0], p.imageStd[1], p.imageStd[2]},
|
|
||||||
true, // rescale
|
|
||||||
true, // channelFirst
|
|
||||||
)
|
|
||||||
|
|
||||||
// Calculate grid dimensions
|
|
||||||
grid := &Grid{
|
|
||||||
Height: resizedHeight / p.patchSize,
|
|
||||||
Width: resizedWidth / p.patchSize,
|
|
||||||
Temporal: 1, // For single images, temporal dimension is 1
|
|
||||||
}
|
|
||||||
|
|
||||||
patches, err := p.createPatches(normalizedPixels, resizedHeight, resizedWidth, grid)
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, fmt.Errorf("failed to create patches: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return patches and grid dimensions
|
|
||||||
return patches, grid, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *ImageProcessor) createPatches(pixels []float32, height, width int, grid *Grid) ([]float32, error) {
|
|
||||||
channels := p.numChannels
|
|
||||||
patchSize := p.patchSize
|
|
||||||
mergeSize := p.mergeSize
|
|
||||||
temporalPatchSize := p.temporalPatchSize
|
|
||||||
|
|
||||||
// Calculate output dimensions
|
|
||||||
numPatches := grid.Temporal * grid.Height * grid.Width
|
|
||||||
patchDim := channels * temporalPatchSize * patchSize * patchSize
|
|
||||||
|
|
||||||
result := make([]float32, numPatches*patchDim)
|
|
||||||
patchIndex := 0
|
|
||||||
|
|
||||||
// Single temporal frame handling (copies to all frames)
|
|
||||||
for range grid.Temporal {
|
|
||||||
for h := 0; h < grid.Height; h += mergeSize {
|
|
||||||
for w := 0; w < grid.Width; w += mergeSize {
|
|
||||||
// Handle the 2x2 merged patches
|
|
||||||
for mh := range mergeSize {
|
|
||||||
for mw := range mergeSize {
|
|
||||||
baseOffset := patchIndex * patchDim
|
|
||||||
|
|
||||||
// Extract patch data for first temporal frame
|
|
||||||
for c := range channels {
|
|
||||||
channelOffset := baseOffset + (c * temporalPatchSize * patchSize * patchSize)
|
|
||||||
|
|
||||||
for py := range patchSize {
|
|
||||||
for px := range patchSize {
|
|
||||||
// Calculate source pixel coordinates
|
|
||||||
y := (h+mh)*patchSize + py
|
|
||||||
x := (w+mw)*patchSize + px
|
|
||||||
|
|
||||||
// Source index in input tensor (CHW format)
|
|
||||||
srcIdx := c*height*width + y*width + x
|
|
||||||
|
|
||||||
// Destination index in first temporal frame
|
|
||||||
dstIdx := channelOffset + (py * patchSize) + px
|
|
||||||
|
|
||||||
if srcIdx < len(pixels) && dstIdx < len(result) {
|
|
||||||
result[dstIdx] = pixels[srcIdx]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy first temporal frame to all other frames
|
|
||||||
if temporalPatchSize > 1 {
|
|
||||||
for c := range channels {
|
|
||||||
channelOffset := baseOffset + (c * temporalPatchSize * patchSize * patchSize)
|
|
||||||
firstFrameOffset := channelOffset
|
|
||||||
frameSize := patchSize * patchSize
|
|
||||||
|
|
||||||
// Copy first frame to all other frames
|
|
||||||
for tp := 1; tp < temporalPatchSize; tp++ {
|
|
||||||
currentFrameOffset := channelOffset + (tp * frameSize)
|
|
||||||
copy(result[currentFrameOffset:currentFrameOffset+frameSize],
|
|
||||||
result[firstFrameOffset:firstFrameOffset+frameSize])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
patchIndex++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result, nil
|
|
||||||
}
|
|
133
server/model.go
133
server/model.go
@ -10,9 +10,6 @@ import (
|
|||||||
"log/slog"
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"slices"
|
|
||||||
"strings"
|
|
||||||
"text/template/parse"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/fs/ggml"
|
||||||
@ -129,123 +126,19 @@ func detectContentType(r io.Reader) (string, error) {
|
|||||||
return "unknown", nil
|
return "unknown", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseObjects(s string) []map[string]any {
|
// func ToolTemplate(m *Model) (*gotmpl.Template, bool) {
|
||||||
var objs []map[string]any
|
// // create a subtree from the node that ranges over .ToolCalls
|
||||||
for offset := 0; offset < len(s); {
|
// tmpl := m.Template.Subtree(func(n parse.Node) bool {
|
||||||
var obj map[string]any
|
// if t, ok := n.(*parse.RangeNode); ok {
|
||||||
decoder := json.NewDecoder(strings.NewReader(s[offset:]))
|
// return slices.Contains(template.Identifiers(t.Pipe), "ToolCalls")
|
||||||
if err := decoder.Decode(&obj); errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
|
// }
|
||||||
break
|
|
||||||
} else if syntax := &(json.SyntaxError{}); errors.As(err, &syntax) {
|
|
||||||
// skip over any syntax errors
|
|
||||||
offset += int(syntax.Offset)
|
|
||||||
} else if unmarshalType := &(json.UnmarshalTypeError{}); errors.As(err, &unmarshalType) {
|
|
||||||
// skip over any unmarshalable types
|
|
||||||
offset += int(unmarshalType.Offset)
|
|
||||||
} else if err != nil {
|
|
||||||
return nil
|
|
||||||
} else {
|
|
||||||
offset += int(decoder.InputOffset())
|
|
||||||
objs = append(objs, obj)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return objs
|
// return false
|
||||||
}
|
// })
|
||||||
|
|
||||||
// parseToolCalls attempts to parse a JSON string into a slice of ToolCalls.
|
// if tmpl == nil {
|
||||||
// mxyng: this only really works if the input contains tool calls in some JSON format
|
// return nil, false
|
||||||
func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
|
// }
|
||||||
// create a subtree from the node that ranges over .ToolCalls
|
|
||||||
tmpl := m.Template.Subtree(func(n parse.Node) bool {
|
|
||||||
if t, ok := n.(*parse.RangeNode); ok {
|
|
||||||
return slices.Contains(template.Identifiers(t.Pipe), "ToolCalls")
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
// return tmpl, true
|
||||||
})
|
// }
|
||||||
|
|
||||||
if tmpl == nil {
|
|
||||||
return nil, false
|
|
||||||
}
|
|
||||||
|
|
||||||
var b bytes.Buffer
|
|
||||||
if err := tmpl.Execute(&b, map[string][]api.ToolCall{
|
|
||||||
"ToolCalls": {
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "@@name@@",
|
|
||||||
Arguments: api.ToolCallFunctionArguments{
|
|
||||||
"@@argument@@": 1,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}); err != nil {
|
|
||||||
return nil, false
|
|
||||||
}
|
|
||||||
|
|
||||||
templateObjects := parseObjects(b.String())
|
|
||||||
if len(templateObjects) == 0 {
|
|
||||||
return nil, false
|
|
||||||
}
|
|
||||||
|
|
||||||
// find the keys that correspond to the name and arguments fields
|
|
||||||
var name, arguments string
|
|
||||||
for k, v := range templateObjects[0] {
|
|
||||||
switch v.(type) {
|
|
||||||
case string:
|
|
||||||
name = k
|
|
||||||
case map[string]any:
|
|
||||||
arguments = k
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if name == "" || arguments == "" {
|
|
||||||
return nil, false
|
|
||||||
}
|
|
||||||
|
|
||||||
responseObjects := parseObjects(s)
|
|
||||||
if len(responseObjects) == 0 {
|
|
||||||
return nil, false
|
|
||||||
}
|
|
||||||
|
|
||||||
// collect all nested objects
|
|
||||||
var collect func(any) []map[string]any
|
|
||||||
collect = func(obj any) (all []map[string]any) {
|
|
||||||
switch o := obj.(type) {
|
|
||||||
case map[string]any:
|
|
||||||
all = append(all, o)
|
|
||||||
for _, v := range o {
|
|
||||||
all = append(all, collect(v)...)
|
|
||||||
}
|
|
||||||
case []any:
|
|
||||||
for _, v := range o {
|
|
||||||
all = append(all, collect(v)...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return all
|
|
||||||
}
|
|
||||||
|
|
||||||
var objs []map[string]any
|
|
||||||
for _, p := range responseObjects {
|
|
||||||
objs = append(objs, collect(p)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
var toolCalls []api.ToolCall
|
|
||||||
for _, kv := range objs {
|
|
||||||
n, nok := kv[name].(string)
|
|
||||||
a, aok := kv[arguments].(map[string]any)
|
|
||||||
if nok && aok {
|
|
||||||
toolCalls = append(toolCalls, api.ToolCall{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: n,
|
|
||||||
Arguments: a,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return toolCalls, len(toolCalls) > 0
|
|
||||||
}
|
|
||||||
|
@ -1,179 +1,185 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
// import (
|
||||||
"bytes"
|
// "testing"
|
||||||
"encoding/json"
|
// gotmpl "text/template"
|
||||||
"fmt"
|
// )
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
// func TestToolToken(t *testing.T) {
|
||||||
|
// cases := []struct {
|
||||||
|
// name string
|
||||||
|
// template string
|
||||||
|
// want string
|
||||||
|
// ok bool
|
||||||
|
// }{
|
||||||
|
// {
|
||||||
|
// name: "basic tool call with action prefix",
|
||||||
|
// template: "{{if .ToolCalls}}Action: ```json{{end}}",
|
||||||
|
// want: "Action:",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "incomplete functools bracket",
|
||||||
|
// template: "{{if .ToolCalls}}functools[{{end}}",
|
||||||
|
// want: "functools",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "tool call with angle brackets",
|
||||||
|
// template: "{{if .ToolCalls}}Hello, world! <tool_call>{{end}}",
|
||||||
|
// want: "<tool_call>",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "multiple tool call formats",
|
||||||
|
// template: "{{if .ToolCalls}}[tool_call] <tool_call>{{end}}",
|
||||||
|
// want: "[tool_call]",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "single angle bracket tool call",
|
||||||
|
// template: "{{if .ToolCalls}}<tool_call>{{end}}",
|
||||||
|
// want: "<tool_call>",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "incomplete angle bracket after tool call",
|
||||||
|
// template: "{{if .ToolCalls}}[tool_call] <{{end}}",
|
||||||
|
// want: "[tool_call]",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "angle bracket prefix with tool call",
|
||||||
|
// template: "{{if .ToolCalls}}> <tool_call>{{end}}",
|
||||||
|
// want: "<tool_call>",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "uppercase tool call with incomplete bracket",
|
||||||
|
// template: "{{if .ToolCalls}}[TOOL_CALL] [{{end}}",
|
||||||
|
// want: "[TOOL_CALL]",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "uppercase tool call with adjacent bracket",
|
||||||
|
// template: "{{if .ToolCalls}}[TOOL_CALL][{{end}}",
|
||||||
|
// want: "[TOOL_CALL]",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "tool call with pipe delimiters",
|
||||||
|
// template: "{{if .ToolCalls}}<|tool_call|>{{end}}",
|
||||||
|
// want: "<|tool_call|>",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// }
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
// for _, tt := range cases {
|
||||||
"github.com/ollama/ollama/template"
|
// t.Run(tt.name, func(t *testing.T) {
|
||||||
)
|
// tmpl, err := gotmpl.New("test").Parse(tt.template)
|
||||||
|
// if err != nil {
|
||||||
|
// t.Fatalf("failed to parse template: %v", err)
|
||||||
|
// }
|
||||||
|
// got, ok := ToolPrefix(tmpl)
|
||||||
|
// if got != tt.want {
|
||||||
|
// t.Errorf("ToolToken(%q) = %q; want %q", tt.template, got, tt.want)
|
||||||
|
// }
|
||||||
|
// if ok != tt.ok {
|
||||||
|
// t.Errorf("ToolToken(%q) = %v; want %v", tt.template, ok, tt.ok)
|
||||||
|
// }
|
||||||
|
// })
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
func readFile(t *testing.T, base, name string) *bytes.Buffer {
|
// func TestTextAfterToolCalls(t *testing.T) {
|
||||||
t.Helper()
|
// cases := []struct {
|
||||||
|
// name string
|
||||||
|
// template string
|
||||||
|
// want string
|
||||||
|
// ok bool
|
||||||
|
// }{
|
||||||
|
// {
|
||||||
|
// name: "basic tool call with text after",
|
||||||
|
// template: `{{if .ToolCalls}}tool response{{end}}`,
|
||||||
|
// want: "tool response",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "tool call with mixed content after",
|
||||||
|
// template: `{{if .ToolCalls}}<tool_call>{{.Something}}{{end}}`,
|
||||||
|
// want: "<tool_call>",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "tool call with no text after",
|
||||||
|
// template: `{{if .ToolCalls}}{{.Something}}{{end}}`,
|
||||||
|
// want: "",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "nested tool call",
|
||||||
|
// template: `{{if .Something}}{{if .ToolCalls}}[TOOL_CALL]{{end}}{{end}}`,
|
||||||
|
// want: "[TOOL_CALL]",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "no tool calls",
|
||||||
|
// template: `{{if .Something}}no tools here{{end}}`,
|
||||||
|
// want: "",
|
||||||
|
// ok: false,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "empty template",
|
||||||
|
// template: ``,
|
||||||
|
// want: "",
|
||||||
|
// ok: false,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "multiple tool calls sections",
|
||||||
|
// template: `{{if .ToolCalls}}first{{end}}{{if .ToolCalls}}second{{end}}`,
|
||||||
|
// want: "first",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "range over tool calls",
|
||||||
|
// template: `{{if .ToolCalls}}{{range .ToolCalls}}tool{{end}}{{end}}`,
|
||||||
|
// want: "",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "tool calls with pipe delimiters",
|
||||||
|
// template: `{{if .ToolCalls}}<|tool|>{{end}}`,
|
||||||
|
// want: "<|tool|>",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "tool calls with nested template",
|
||||||
|
// template: `{{if .ToolCalls}}{{template "tool" .}}{{end}}`,
|
||||||
|
// want: "",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// name: "tool calls with whitespace variations",
|
||||||
|
// template: `{{if .ToolCalls}} tool {{end}}`,
|
||||||
|
// want: " tool ",
|
||||||
|
// ok: true,
|
||||||
|
// },
|
||||||
|
// }
|
||||||
|
|
||||||
bts, err := os.ReadFile(filepath.Join(base, name))
|
// for _, tt := range cases {
|
||||||
if err != nil {
|
// t.Run(tt.name, func(t *testing.T) {
|
||||||
t.Fatal(err)
|
// tmpl, err := gotmpl.New("test").Parse(tt.template)
|
||||||
}
|
// if err != nil {
|
||||||
|
// t.Fatalf("failed to parse template: %v", err)
|
||||||
|
// }
|
||||||
|
|
||||||
return bytes.NewBuffer(bts)
|
// got, ok := extractToolCallsTemplate(tmpl)
|
||||||
}
|
// if got != tt.want {
|
||||||
|
// t.Errorf("TextAfterToolCalls() got = %q, want %q", got, tt.want)
|
||||||
func TestExecuteWithTools(t *testing.T) {
|
// }
|
||||||
p := filepath.Join("testdata", "tools")
|
// if ok != tt.ok {
|
||||||
cases := []struct {
|
// t.Errorf("TextAfterToolCalls() ok = %v, want %v", ok, tt.ok)
|
||||||
model string
|
// }
|
||||||
output string
|
// })
|
||||||
ok bool
|
// }
|
||||||
}{
|
// }
|
||||||
{"mistral", `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`, true},
|
|
||||||
{"mistral", `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]
|
|
||||||
|
|
||||||
The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`, true},
|
|
||||||
{"mistral", `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"To }]`, false},
|
|
||||||
{"mistral", `I'm not aware of that information. However, I can suggest searching for the weather using the "get_current_weather" function:
|
|
||||||
|
|
||||||
[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`, true},
|
|
||||||
{"mistral", " The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.", false},
|
|
||||||
{"command-r-plus", "Action: ```json" + `
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"tool_name": "get_current_weather",
|
|
||||||
"parameters": {
|
|
||||||
"format": "fahrenheit",
|
|
||||||
"location": "San Francisco, CA"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"tool_name": "get_current_weather",
|
|
||||||
"parameters": {
|
|
||||||
"format": "celsius",
|
|
||||||
"location": "Toronto, Canada"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
` + "```", true},
|
|
||||||
{"command-r-plus", " The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.", false},
|
|
||||||
{"firefunction", ` functools[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`, true},
|
|
||||||
{"firefunction", " The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.", false},
|
|
||||||
{"llama3-groq-tool-use", `<tool_call>
|
|
||||||
{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}
|
|
||||||
{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}
|
|
||||||
</tool_call>`, true},
|
|
||||||
{"xlam", `{"tool_calls": [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]}`, true},
|
|
||||||
{"nemotron", `<toolcall>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]} </toolcall>`, true},
|
|
||||||
}
|
|
||||||
|
|
||||||
var tools []api.Tool
|
|
||||||
if err := json.Unmarshal(readFile(t, p, "tools.json").Bytes(), &tools); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var messages []api.Message
|
|
||||||
if err := json.Unmarshal(readFile(t, p, "messages.json").Bytes(), &messages); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
calls := []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "get_current_weather",
|
|
||||||
Arguments: api.ToolCallFunctionArguments{
|
|
||||||
"format": "fahrenheit",
|
|
||||||
"location": "San Francisco, CA",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "get_current_weather",
|
|
||||||
Arguments: api.ToolCallFunctionArguments{
|
|
||||||
"format": "celsius",
|
|
||||||
"location": "Toronto, Canada",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range cases {
|
|
||||||
t.Run(tt.model, func(t *testing.T) {
|
|
||||||
tmpl, err := template.Parse(readFile(t, p, fmt.Sprintf("%s.gotmpl", tt.model)).String())
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Run("template", func(t *testing.T) {
|
|
||||||
var actual bytes.Buffer
|
|
||||||
if err := tmpl.Execute(&actual, template.Values{Tools: tools, Messages: messages}); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if diff := cmp.Diff(actual.String(), readFile(t, p, fmt.Sprintf("%s.out", tt.model)).String()); diff != "" {
|
|
||||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("parse", func(t *testing.T) {
|
|
||||||
m := &Model{Template: tmpl}
|
|
||||||
actual, ok := m.parseToolCalls(tt.output)
|
|
||||||
if ok != tt.ok {
|
|
||||||
t.Fatalf("expected %t, got %t", tt.ok, ok)
|
|
||||||
}
|
|
||||||
|
|
||||||
if tt.ok {
|
|
||||||
if diff := cmp.Diff(actual, calls); diff != "" {
|
|
||||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseObjects(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
input string
|
|
||||||
want []map[string]any
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
input: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
|
|
||||||
want: []map[string]any{
|
|
||||||
{"name": "get_current_weather", "arguments": map[string]any{"format": "fahrenheit", "location": "San Francisco, CA"}},
|
|
||||||
{"name": "get_current_weather", "arguments": map[string]any{"format": "celsius", "location": "Toronto, Canada"}},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: `<toolcall>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </toolcall>`,
|
|
||||||
want: []map[string]any{
|
|
||||||
{"name": "get_current_weather", "arguments": map[string]any{"format": "fahrenheit", "location": "San Francisco, CA"}},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: `<toolcall>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </toolcall> <toolcall>{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, ON"}} </toolcall>`,
|
|
||||||
want: []map[string]any{
|
|
||||||
{"name": "get_current_weather", "arguments": map[string]any{"format": "fahrenheit", "location": "San Francisco, CA"}},
|
|
||||||
{"name": "get_current_weather", "arguments": map[string]any{"format": "celsius", "location": "Toronto, ON"}},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
input: `{"name": "get_current_weather", "arguments": `,
|
|
||||||
want: nil,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tc := range tests {
|
|
||||||
t.Run(tc.input, func(t *testing.T) {
|
|
||||||
got := parseObjects(tc.input)
|
|
||||||
|
|
||||||
if diff := cmp.Diff(got, tc.want); diff != "" {
|
|
||||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -38,6 +38,7 @@ import (
|
|||||||
"github.com/ollama/ollama/server/internal/client/ollama"
|
"github.com/ollama/ollama/server/internal/client/ollama"
|
||||||
"github.com/ollama/ollama/server/internal/registry"
|
"github.com/ollama/ollama/server/internal/registry"
|
||||||
"github.com/ollama/ollama/template"
|
"github.com/ollama/ollama/template"
|
||||||
|
"github.com/ollama/ollama/tools"
|
||||||
"github.com/ollama/ollama/types/errtypes"
|
"github.com/ollama/ollama/types/errtypes"
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
@ -1482,11 +1483,22 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
slog.Debug("chat request", "images", len(images), "prompt", prompt)
|
||||||
|
|
||||||
|
var toolParser *tools.Parser
|
||||||
|
if len(req.Tools) > 0 {
|
||||||
|
toolParser, err = tools.NewParser(m.Template.Template)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("failed to create tool parser", "error", err)
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ch := make(chan any)
|
ch := make(chan any)
|
||||||
go func() {
|
go func() {
|
||||||
defer close(ch)
|
defer close(ch)
|
||||||
var sb strings.Builder
|
|
||||||
var toolCallIndex int = 0
|
|
||||||
if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
|
if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
|
||||||
Prompt: prompt,
|
Prompt: prompt,
|
||||||
Images: images,
|
Images: images,
|
||||||
@ -1512,37 +1524,21 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: tool call checking and filtering should be moved outside of this callback once streaming
|
if len(req.Tools) > 0 && !toolParser.Done {
|
||||||
// however this was a simple change for now without reworking streaming logic of this (and other)
|
toolCalls, content, err := toolParser.Add(r.Content)
|
||||||
// handlers
|
if err == nil {
|
||||||
if req.Stream != nil && !*req.Stream || len(req.Tools) == 0 {
|
if len(content) > 0 {
|
||||||
ch <- res
|
res.Message.Content = content
|
||||||
return
|
slog.Debug("tools: setting content to", "content", content)
|
||||||
}
|
} else if len(toolCalls) > 0 {
|
||||||
|
res.Message.ToolCalls = toolCalls
|
||||||
// Streaming tool calls:
|
res.Message.Content = ""
|
||||||
// If tools are recognized, use a flag to track the sending of a tool downstream
|
} else {
|
||||||
// This ensures that content is cleared from the message on the last chunk sent
|
return
|
||||||
sb.WriteString(r.Content)
|
}
|
||||||
if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
|
|
||||||
res.Message.ToolCalls = toolCalls
|
|
||||||
for i := range toolCalls {
|
|
||||||
toolCalls[i].Function.Index = toolCallIndex
|
|
||||||
toolCallIndex++
|
|
||||||
}
|
}
|
||||||
res.Message.Content = ""
|
|
||||||
sb.Reset()
|
|
||||||
ch <- res
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if r.Done {
|
|
||||||
// Send any remaining content if no tool calls were detected
|
|
||||||
if toolCallIndex == 0 {
|
|
||||||
res.Message.Content = sb.String()
|
|
||||||
}
|
|
||||||
ch <- res
|
|
||||||
}
|
}
|
||||||
|
ch <- res
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
ch <- gin.H{"error": err.Error()}
|
ch <- gin.H{"error": err.Error()}
|
||||||
}
|
}
|
||||||
@ -1551,11 +1547,15 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
if req.Stream != nil && !*req.Stream {
|
if req.Stream != nil && !*req.Stream {
|
||||||
var resp api.ChatResponse
|
var resp api.ChatResponse
|
||||||
var sb strings.Builder
|
var sb strings.Builder
|
||||||
|
var toolCalls []api.ToolCall
|
||||||
for rr := range ch {
|
for rr := range ch {
|
||||||
switch t := rr.(type) {
|
switch t := rr.(type) {
|
||||||
case api.ChatResponse:
|
case api.ChatResponse:
|
||||||
sb.WriteString(t.Message.Content)
|
sb.WriteString(t.Message.Content)
|
||||||
resp = t
|
resp = t
|
||||||
|
if len(req.Tools) > 0 {
|
||||||
|
toolCalls = append(toolCalls, t.Message.ToolCalls...)
|
||||||
|
}
|
||||||
case gin.H:
|
case gin.H:
|
||||||
msg, ok := t["error"].(string)
|
msg, ok := t["error"].(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
@ -1571,12 +1571,8 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
resp.Message.Content = sb.String()
|
resp.Message.Content = sb.String()
|
||||||
|
if len(toolCalls) > 0 {
|
||||||
if len(req.Tools) > 0 {
|
resp.Message.ToolCalls = toolCalls
|
||||||
if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
|
|
||||||
resp.Message.ToolCalls = toolCalls
|
|
||||||
resp.Message.Content = ""
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
c.JSON(http.StatusOK, resp)
|
c.JSON(http.StatusOK, resp)
|
||||||
|
44
tools/testdata/llama3.2.gotmpl
vendored
Normal file
44
tools/testdata/llama3.2.gotmpl
vendored
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
<|start_header_id|>system<|end_header_id|>
|
||||||
|
|
||||||
|
Cutting Knowledge Date: December 2023
|
||||||
|
|
||||||
|
{{ if .System }}{{ .System }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Tools }}When you receive a tool call response, use the output to format an answer to the orginal user question.
|
||||||
|
|
||||||
|
You are a helpful assistant with tool calling capabilities.
|
||||||
|
{{- end }}<|eot_id|>
|
||||||
|
{{- range $i, $_ := .Messages }}
|
||||||
|
{{- $last := eq (len (slice $.Messages $i)) 1 }}
|
||||||
|
{{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|>
|
||||||
|
{{- if and $.Tools $last }}
|
||||||
|
|
||||||
|
Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.
|
||||||
|
|
||||||
|
Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.
|
||||||
|
|
||||||
|
{{ range $.Tools }}
|
||||||
|
{{- . }}
|
||||||
|
{{ end }}
|
||||||
|
{{ .Content }}<|eot_id|>
|
||||||
|
{{- else }}
|
||||||
|
|
||||||
|
{{ .Content }}<|eot_id|>
|
||||||
|
{{- end }}{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
|
||||||
|
|
||||||
|
{{ end }}
|
||||||
|
{{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>
|
||||||
|
{{- if .ToolCalls }}
|
||||||
|
{{ range .ToolCalls }}
|
||||||
|
{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }}
|
||||||
|
{{- else }}
|
||||||
|
|
||||||
|
{{ .Content }}
|
||||||
|
{{- end }}{{ if not $last }}<|eot_id|>{{ end }}
|
||||||
|
{{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|>
|
||||||
|
|
||||||
|
{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
|
||||||
|
|
||||||
|
{{ end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
24
tools/testdata/llama3.2.out
vendored
Normal file
24
tools/testdata/llama3.2.out
vendored
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
<|start_header_id|>system<|end_header_id|>
|
||||||
|
|
||||||
|
Cutting Knowledge Date: December 2023
|
||||||
|
|
||||||
|
You are a knowledgeable assistant. You can answer questions and perform tasks.When you receive a tool call response, use the output to format an answer to the orginal user question.
|
||||||
|
|
||||||
|
You are a helpful assistant with tool calling capabilities.<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||||
|
|
||||||
|
What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||||
|
|
||||||
|
{"name": "get_current_weather", "parameters": {"format":"celsius","location":"Paris, France"}}<|eot_id|><|start_header_id|>ipython<|end_header_id|>
|
||||||
|
|
||||||
|
22<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||||
|
|
||||||
|
The current temperature in Paris, France is 22 degrees Celsius.<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||||
|
|
||||||
|
Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.
|
||||||
|
|
||||||
|
Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.
|
||||||
|
|
||||||
|
{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}
|
||||||
|
|
||||||
|
What's the weather like today in San Francisco and Toronto?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||||
|
|
51
tools/testdata/qwen2.5-coder.gotmpl
vendored
Normal file
51
tools/testdata/qwen2.5-coder.gotmpl
vendored
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
{{- if .Suffix }}<|fim_prefix|>{{ .Prompt }}<|fim_suffix|>{{ .Suffix }}<|fim_middle|>
|
||||||
|
{{- else if .Messages }}
|
||||||
|
{{- if or .System .Tools }}<|im_start|>system
|
||||||
|
{{- if .System }}
|
||||||
|
{{ .System }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Tools }}
|
||||||
|
|
||||||
|
# Tools
|
||||||
|
|
||||||
|
You may call one or more functions to assist with the user query.
|
||||||
|
|
||||||
|
You are provided with function signatures within <tools></tools> XML tags:
|
||||||
|
<tools>
|
||||||
|
{{- range .Tools }}
|
||||||
|
{"type": "function", "function": {{ .Function }}}
|
||||||
|
{{- end }}
|
||||||
|
</tools>
|
||||||
|
|
||||||
|
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
||||||
|
<tool_call>
|
||||||
|
{"name": <function-name>, "arguments": <args-json-object>}
|
||||||
|
</tool_call>
|
||||||
|
{{- end }}<|im_end|>
|
||||||
|
{{ end }}
|
||||||
|
{{- range $i, $_ := .Messages }}
|
||||||
|
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
|
||||||
|
{{- if eq .Role "user" }}<|im_start|>user
|
||||||
|
{{ .Content }}<|im_end|>
|
||||||
|
{{ else if eq .Role "assistant" }}<|im_start|>assistant
|
||||||
|
{{ if .Content }}{{ .Content }}
|
||||||
|
{{- else if .ToolCalls }}<tool_call>
|
||||||
|
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
|
||||||
|
{{ end }}</tool_call>
|
||||||
|
{{- end }}{{ if not $last }}<|im_end|>
|
||||||
|
{{ end }}
|
||||||
|
{{- else if eq .Role "tool" }}<|im_start|>user
|
||||||
|
<tool_response>
|
||||||
|
{{ .Content }}
|
||||||
|
</tool_response><|im_end|>
|
||||||
|
{{ end }}
|
||||||
|
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
|
||||||
|
{{ end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- else }}
|
||||||
|
{{- if .System }}<|im_start|>system
|
||||||
|
{{ .System }}<|im_end|>
|
||||||
|
{{ end }}{{ if .Prompt }}<|im_start|>user
|
||||||
|
{{ .Prompt }}<|im_end|>
|
||||||
|
{{ end }}<|im_start|>assistant
|
||||||
|
{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}
|
31
tools/testdata/qwen2.5-coder.out
vendored
Normal file
31
tools/testdata/qwen2.5-coder.out
vendored
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
<|im_start|>system
|
||||||
|
You are a knowledgeable assistant. You can answer questions and perform tasks.
|
||||||
|
|
||||||
|
# Tools
|
||||||
|
|
||||||
|
You may call one or more functions to assist with the user query.
|
||||||
|
|
||||||
|
You are provided with function signatures within <tools></tools> XML tags:
|
||||||
|
<tools>
|
||||||
|
{"type": "function", "function": {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}
|
||||||
|
</tools>
|
||||||
|
|
||||||
|
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
||||||
|
<tool_call>
|
||||||
|
{"name": <function-name>, "arguments": <args-json-object>}
|
||||||
|
</tool_call><|im_end|>
|
||||||
|
<|im_start|>user
|
||||||
|
What's the weather like today in Paris?<|im_end|>
|
||||||
|
<|im_start|>assistant
|
||||||
|
<tool_call>
|
||||||
|
{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}
|
||||||
|
</tool_call><|im_end|>
|
||||||
|
<|im_start|>user
|
||||||
|
<tool_response>
|
||||||
|
22
|
||||||
|
</tool_response><|im_end|>
|
||||||
|
<|im_start|>assistant
|
||||||
|
The current temperature in Paris, France is 22 degrees Celsius.<|im_end|>
|
||||||
|
<|im_start|>user
|
||||||
|
What's the weather like today in San Francisco and Toronto?<|im_end|>
|
||||||
|
<|im_start|>assistant
|
50
tools/testdata/qwen3.gotmpl
vendored
Normal file
50
tools/testdata/qwen3.gotmpl
vendored
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
{{- if .Messages }}
|
||||||
|
{{- if or .System .Tools }}<|im_start|>system
|
||||||
|
{{- if .System }}
|
||||||
|
{{ .System }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Tools }}
|
||||||
|
|
||||||
|
# Tools
|
||||||
|
|
||||||
|
You may call one or more functions to assist with the user query.
|
||||||
|
|
||||||
|
You are provided with function signatures within <tools></tools> XML tags:
|
||||||
|
<tools>
|
||||||
|
{{- range .Tools }}
|
||||||
|
{"type": "function", "function": {{ .Function }}}
|
||||||
|
{{- end }}
|
||||||
|
</tools>
|
||||||
|
|
||||||
|
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
||||||
|
<tool_call>
|
||||||
|
{"name": <function-name>, "arguments": <args-json-object>}
|
||||||
|
</tool_call>
|
||||||
|
{{- end }}<|im_end|>
|
||||||
|
{{ end }}
|
||||||
|
{{- range $i, $_ := .Messages }}
|
||||||
|
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
|
||||||
|
{{- if eq .Role "user" }}<|im_start|>user
|
||||||
|
{{ .Content }}<|im_end|>
|
||||||
|
{{ else if eq .Role "assistant" }}<|im_start|>assistant
|
||||||
|
{{ if .Content }}{{ .Content }}
|
||||||
|
{{- else if .ToolCalls }}<tool_call>
|
||||||
|
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
|
||||||
|
{{ end }}</tool_call>
|
||||||
|
{{- end }}{{ if not $last }}<|im_end|>
|
||||||
|
{{ end }}
|
||||||
|
{{- else if eq .Role "tool" }}<|im_start|>user
|
||||||
|
<tool_response>
|
||||||
|
{{ .Content }}
|
||||||
|
</tool_response><|im_end|>
|
||||||
|
{{ end }}
|
||||||
|
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
|
||||||
|
{{ end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- else }}
|
||||||
|
{{- if .System }}<|im_start|>system
|
||||||
|
{{ .System }}<|im_end|>
|
||||||
|
{{ end }}{{ if .Prompt }}<|im_start|>user
|
||||||
|
{{ .Prompt }}<|im_end|>
|
||||||
|
{{ end }}<|im_start|>assistant
|
||||||
|
{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}
|
31
tools/testdata/qwen3.out
vendored
Normal file
31
tools/testdata/qwen3.out
vendored
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
<|im_start|>system
|
||||||
|
You are a knowledgeable assistant. You can answer questions and perform tasks.
|
||||||
|
|
||||||
|
# Tools
|
||||||
|
|
||||||
|
You may call one or more functions to assist with the user query.
|
||||||
|
|
||||||
|
You are provided with function signatures within <tools></tools> XML tags:
|
||||||
|
<tools>
|
||||||
|
{"type": "function", "function": {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}
|
||||||
|
</tools>
|
||||||
|
|
||||||
|
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
||||||
|
<tool_call>
|
||||||
|
{"name": <function-name>, "arguments": <args-json-object>}
|
||||||
|
</tool_call><|im_end|>
|
||||||
|
<|im_start|>user
|
||||||
|
What's the weather like today in Paris?<|im_end|>
|
||||||
|
<|im_start|>assistant
|
||||||
|
<tool_call>
|
||||||
|
{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}
|
||||||
|
</tool_call><|im_end|>
|
||||||
|
<|im_start|>user
|
||||||
|
<tool_response>
|
||||||
|
22
|
||||||
|
</tool_response><|im_end|>
|
||||||
|
<|im_start|>assistant
|
||||||
|
The current temperature in Paris, France is 22 degrees Celsius.<|im_end|>
|
||||||
|
<|im_start|>user
|
||||||
|
What's the weather like today in San Francisco and Toronto?<|im_end|>
|
||||||
|
<|im_start|>assistant
|
242
tools/tools.go
Normal file
242
tools/tools.go
Normal file
@ -0,0 +1,242 @@
|
|||||||
|
package tools
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"log/slog"
|
||||||
|
"strings"
|
||||||
|
gotmpl "text/template"
|
||||||
|
|
||||||
|
jsonv2 "github.com/go-json-experiment/json"
|
||||||
|
jsontext "github.com/go-json-experiment/json/jsontext"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/template"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Parser struct {
|
||||||
|
greedyParse bool
|
||||||
|
prefixFound bool
|
||||||
|
prefixPartial bool
|
||||||
|
tmpl *gotmpl.Template
|
||||||
|
sb *strings.Builder
|
||||||
|
prefix string
|
||||||
|
index int
|
||||||
|
name string
|
||||||
|
arguments string
|
||||||
|
Done bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseJSONToolCalls attempts to parse a JSON string into a slice of ToolCalls.
|
||||||
|
// It first tries to incrementally decode the JSON to handle partial inputs.
|
||||||
|
// Returns:
|
||||||
|
// - []api.ToolCall: The parsed tool calls if successful
|
||||||
|
// - bool: True if JSON is incomplete and needs more input
|
||||||
|
func (p *Parser) parseJSONToolCalls(s string) ([]api.ToolCall, bool) {
|
||||||
|
// First try incremental decoding to handle partial JSON
|
||||||
|
dec := jsontext.NewDecoder(strings.NewReader(s))
|
||||||
|
if got, err := dec.ReadValue(); err == nil {
|
||||||
|
s = got.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt full unmarshal of the JSON
|
||||||
|
var resp any
|
||||||
|
err := jsonv2.Unmarshal([]byte(s), &resp)
|
||||||
|
if err != nil {
|
||||||
|
// Handle incomplete JSON cases
|
||||||
|
if errors.Is(err, io.ErrUnexpectedEOF) || err.Error() == "unexpected end of JSON input" {
|
||||||
|
slog.Debug("incomplete JSON detected", "input", s)
|
||||||
|
return nil, true
|
||||||
|
}
|
||||||
|
slog.Debug("failed to unmarshal response", "error", err)
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect all nested objects that could contain tool calls
|
||||||
|
var objs []map[string]any
|
||||||
|
objs = append(objs, collect(resp)...)
|
||||||
|
if len(objs) == 0 {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
var toolCalls []api.ToolCall
|
||||||
|
for _, kv := range objs {
|
||||||
|
n, nok := kv[p.name].(string)
|
||||||
|
a, aok := kv[p.arguments].(map[string]any)
|
||||||
|
if nok && aok {
|
||||||
|
toolCalls = append(toolCalls, api.ToolCall{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: n,
|
||||||
|
Arguments: a,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Valid JSON, no tool calls found
|
||||||
|
if len(toolCalls) == 0 {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
return toolCalls, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkPrefix processes a string to find and handle a prefix pattern.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - The processed string with prefix removed if found
|
||||||
|
// - Whether the prefix was found at the start of the string
|
||||||
|
// - Whether to continue parsing
|
||||||
|
func (p *Parser) checkPrefix(s string) (string, bool, bool) {
|
||||||
|
// Keep original for overlap checks
|
||||||
|
original := s
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
if s == "" {
|
||||||
|
return "", false, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no prefix defined, just return trimmed string
|
||||||
|
if p.prefix == "" {
|
||||||
|
return s, false, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for prefix at start of string
|
||||||
|
if processedStr, hasPrefix := strings.CutPrefix(s, p.prefix); hasPrefix {
|
||||||
|
// Found prefix at start - accumulate for potential tool
|
||||||
|
return processedStr, true, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if prefix overlaps end of string
|
||||||
|
if overlap := suffixOverlap(original, p.prefix); overlap > 0 {
|
||||||
|
p.prefixPartial = true
|
||||||
|
// Return everything except overlapping portion
|
||||||
|
p.sb.Reset()
|
||||||
|
p.sb.WriteString(original[len(original)-overlap:])
|
||||||
|
return original[0 : len(original)-overlap], false, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if prefix appears in middle of string
|
||||||
|
if idx := strings.Index(original, p.prefix); idx != -1 {
|
||||||
|
p.prefixPartial = true
|
||||||
|
// Save remainder starting at prefix for next pass
|
||||||
|
p.sb.Reset()
|
||||||
|
p.sb.WriteString(strings.TrimSpace(original[idx:]))
|
||||||
|
// Return everything before prefix
|
||||||
|
return original[:idx], false, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// No prefix found
|
||||||
|
p.prefixPartial = false
|
||||||
|
return s, false, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add processes a string input to parse tool calls and content.
|
||||||
|
// It handles prefix detection and JSON parsing to extract tool calls.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - tools: Any parsed tool calls
|
||||||
|
// - content: Non-tool call content
|
||||||
|
// - err: Error if parsing failed
|
||||||
|
func (p *Parser) Add(s string) (tools []api.ToolCall, content string, err error) {
|
||||||
|
p.sb.WriteString(s)
|
||||||
|
s = p.sb.String()
|
||||||
|
if len(s) == 0 {
|
||||||
|
return nil, "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for prefix pattern in input
|
||||||
|
s, prefixFound, shouldContinue := p.checkPrefix(s)
|
||||||
|
if !shouldContinue {
|
||||||
|
if s != "" {
|
||||||
|
// Return content before prefix
|
||||||
|
return nil, s, nil
|
||||||
|
}
|
||||||
|
// Need more input to complete prefix
|
||||||
|
return nil, "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update prefix found state
|
||||||
|
if prefixFound {
|
||||||
|
p.prefixFound = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exit if prefix exists in template, greedy parsing is off, and prefix not found
|
||||||
|
if !p.greedyParse && !p.prefixFound {
|
||||||
|
p.sb.Reset()
|
||||||
|
return nil, "", errors.New("prefix not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
toolCalls, isPartial := p.parseJSONToolCalls(s)
|
||||||
|
if isPartial {
|
||||||
|
// Need more input to complete JSON
|
||||||
|
return nil, "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do not try greedy parsing if partial JSON not found
|
||||||
|
p.greedyParse = false
|
||||||
|
|
||||||
|
// Handle invalid tool call format
|
||||||
|
if len(toolCalls) == 0 {
|
||||||
|
p.sb.Reset()
|
||||||
|
if p.prefix == "" {
|
||||||
|
p.Done = true
|
||||||
|
}
|
||||||
|
if p.prefixFound {
|
||||||
|
// Drop tokens since prefix was found
|
||||||
|
return nil, "", nil
|
||||||
|
}
|
||||||
|
return nil, s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range toolCalls {
|
||||||
|
tc.Function.Index = p.index
|
||||||
|
p.index++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark as done if no prefix needed
|
||||||
|
if p.prefix == "" {
|
||||||
|
p.Done = true
|
||||||
|
}
|
||||||
|
|
||||||
|
p.sb.Reset()
|
||||||
|
return toolCalls, "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewParser creates a new tool call parser from a template. It extracts the tool call format,
|
||||||
|
// prefix, and field names from the template to use for parsing tool calls from model output.
|
||||||
|
//
|
||||||
|
// Returns an error if the template does not contain valid tool call formatting.
|
||||||
|
func NewParser(templateToProcess *gotmpl.Template) (*Parser, error) {
|
||||||
|
parsed, err := template.Parse(templateToProcess.Root.String())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if parsed == nil {
|
||||||
|
return nil, errors.New("failed to parse template")
|
||||||
|
}
|
||||||
|
|
||||||
|
tt, tc := toolTemplate(parsed)
|
||||||
|
if !tc {
|
||||||
|
return nil, errors.New("failed to find tool calls in template")
|
||||||
|
}
|
||||||
|
if tt == nil {
|
||||||
|
return nil, errors.New("failed to find tool template")
|
||||||
|
}
|
||||||
|
|
||||||
|
tp := toolPrefix(templateToProcess)
|
||||||
|
tp = strings.TrimSpace(tp)
|
||||||
|
|
||||||
|
name, arguments, err := extractToolArgs(tt)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Parser{
|
||||||
|
tmpl: tt,
|
||||||
|
sb: &strings.Builder{},
|
||||||
|
prefix: tp,
|
||||||
|
greedyParse: true,
|
||||||
|
name: name,
|
||||||
|
arguments: arguments,
|
||||||
|
}, nil
|
||||||
|
}
|
482
tools/tools_test.go
Normal file
482
tools/tools_test.go
Normal file
@ -0,0 +1,482 @@
|
|||||||
|
package tools
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/template"
|
||||||
|
)
|
||||||
|
|
||||||
|
func readFile(t *testing.T, base, name string) *bytes.Buffer {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
bts, err := os.ReadFile(filepath.Join(base, name))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return bytes.NewBuffer(bts)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseToolCalls(t *testing.T) {
|
||||||
|
p := filepath.Join("testdata")
|
||||||
|
t1 := api.ToolCall{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "get_current_weather",
|
||||||
|
Arguments: api.ToolCallFunctionArguments{
|
||||||
|
"format": "fahrenheit",
|
||||||
|
"location": "San Francisco, CA",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
t2 := api.ToolCall{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "get_current_weather",
|
||||||
|
Arguments: api.ToolCallFunctionArguments{
|
||||||
|
"format": "celsius",
|
||||||
|
"location": "Toronto, Canada",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
model string
|
||||||
|
output string
|
||||||
|
expectedToolCall []api.ToolCall
|
||||||
|
expectedTokens string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "mistral malformed json with tool calls prefix",
|
||||||
|
model: "mistral",
|
||||||
|
output: `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_curren}]`,
|
||||||
|
expectedToolCall: []api.ToolCall{},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "mistral multiple tool calls without prefix",
|
||||||
|
model: "mistral",
|
||||||
|
output: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1, t2},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "mistral tool calls with text between no prefix",
|
||||||
|
model: "mistral",
|
||||||
|
output: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]
|
||||||
|
model outputs more tokens here and then [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1, t2},
|
||||||
|
expectedTokens: `model outputs more tokens here and then [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "mistral valid json with tool calls prefix",
|
||||||
|
model: "mistral",
|
||||||
|
output: `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1, t2},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "mistral multiple tool calls with text between and prefix",
|
||||||
|
model: "mistral",
|
||||||
|
output: `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]
|
||||||
|
model outputs more tokens here and then [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1, t2, t1, t2},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "mistral incomplete json with tool calls prefix",
|
||||||
|
model: "mistral",
|
||||||
|
output: `[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, `,
|
||||||
|
expectedToolCall: []api.ToolCall{},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "mistral invalid tool call with explanatory text no prefix",
|
||||||
|
model: "mistral",
|
||||||
|
output: `I'm not aware of that information. However, I can suggest searching for the weather using the "get_current_weather" function:
|
||||||
|
|
||||||
|
[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
|
||||||
|
expectedToolCall: []api.ToolCall{},
|
||||||
|
expectedTokens: `I'm not aware of that information. However, I can suggest searching for the weather using the "get_current_weather" function: [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "mistral tool calls without prefix",
|
||||||
|
model: "mistral",
|
||||||
|
output: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1, t2},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "command r plus tool calls with json block format",
|
||||||
|
model: "command-r-plus",
|
||||||
|
output: "Action: ```json" + `
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"tool_name": "get_current_weather",
|
||||||
|
"parameters": {
|
||||||
|
"format": "fahrenheit",
|
||||||
|
"location": "San Francisco, CA"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tool_name": "get_current_weather",
|
||||||
|
"parameters": {
|
||||||
|
"format": "celsius",
|
||||||
|
"location": "Toronto, Canada"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
` + "```",
|
||||||
|
expectedToolCall: []api.ToolCall{t1, t2},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "firefunction tool calls with functools prefix",
|
||||||
|
model: "firefunction",
|
||||||
|
output: ` functools[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1, t2},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "llama3 groq single tool call with xml tags",
|
||||||
|
model: "llama3-groq-tool-use",
|
||||||
|
output: `<tool_call>
|
||||||
|
{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}
|
||||||
|
</tool_call>`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "xlam tool calls with wrapper object",
|
||||||
|
model: "xlam",
|
||||||
|
output: `{"tool_calls": [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}},{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]}`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1, t2},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen2.5-coder single tool call with prefix",
|
||||||
|
model: "qwen2.5-coder",
|
||||||
|
output: `<tool_call>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}</tool_call>`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen2.5-coder multiple tool calls with and without prefix",
|
||||||
|
model: "qwen2.5-coder",
|
||||||
|
output: `{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} <tool_call>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}</tool_call> <tool_call>{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}</tool_call>`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1, t1, t2},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen2.5-coder multiple tool calls without prefix",
|
||||||
|
model: "qwen2.5-coder",
|
||||||
|
output: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}, {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1, t2},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen2.5-coder plain text response no tool calls",
|
||||||
|
model: "qwen2.5-coder",
|
||||||
|
output: "The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.",
|
||||||
|
expectedToolCall: []api.ToolCall{},
|
||||||
|
expectedTokens: "The weather in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen2.5-coder tool calls with trailing text",
|
||||||
|
model: "qwen2.5-coder",
|
||||||
|
output: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}, {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}] some tokens after call`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1, t2},
|
||||||
|
expectedTokens: "some tokens after call",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen2.5 tool calls with prefix and trailing text",
|
||||||
|
model: "qwen2.5-coder",
|
||||||
|
output: `<tool_call> [{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}, {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}] </tool_call> some tokens after call`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1, t2},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen2.5 tool calls without prefix and valid tool call",
|
||||||
|
model: "qwen2.5-coder",
|
||||||
|
output: `[{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}, {"name": "get_current_weather", "arguments": {"format":"celsius","location":"Toronto, Canada"}}]`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1, t2},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen2.5 tool calls without prefix and invalid tool call",
|
||||||
|
model: "qwen2.5-coder",
|
||||||
|
output: `[{"options": "foo"}]`,
|
||||||
|
expectedToolCall: []api.ToolCall{},
|
||||||
|
expectedTokens: `[{"options": "foo"}]`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen2.5 tool calls with prefix and invalid tool call",
|
||||||
|
model: "qwen2.5-coder",
|
||||||
|
output: `<tool_call> [{"options": "foo"}] </tool_call> `,
|
||||||
|
expectedToolCall: []api.ToolCall{},
|
||||||
|
expectedTokens: ``,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen3 tool call with think prefix and tool prefix (sent as a single token)",
|
||||||
|
model: "qwen3",
|
||||||
|
output: `<think>Okay, let me think what tool we should use...</think><tool_call>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}}</tool_call>`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1},
|
||||||
|
expectedTokens: "<think>Okay, let me think what tool we should use...</think>",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen3 tool call with think prefix, tool prefix, and whitespace (sent as separate tokens)",
|
||||||
|
model: "qwen3",
|
||||||
|
output: `<think>Okay, let me think what tool we should use...</think> <tool_call> {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1},
|
||||||
|
expectedTokens: "<think>Okay, let me think what tool we should use...</think>",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen3 empty think prefix without tool prefix and invalid tool call",
|
||||||
|
model: "qwen3",
|
||||||
|
output: `<think></think>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
|
||||||
|
expectedToolCall: []api.ToolCall{},
|
||||||
|
expectedTokens: `<think></think>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen3 empty think prefix with tool prefix and valid tool call",
|
||||||
|
model: "qwen3",
|
||||||
|
output: `<think></think><tool_call>{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1},
|
||||||
|
expectedTokens: `<think></think>`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen3 invalid tool call with fake tool prefix (single rune suffix match)",
|
||||||
|
model: "qwen3",
|
||||||
|
output: `<think></think>< fakeout{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
|
||||||
|
expectedToolCall: []api.ToolCall{},
|
||||||
|
expectedTokens: `<think></think>< fakeout{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen3 invalid tool call with partial tool prefix (multiple rune suffix match)",
|
||||||
|
model: "qwen3",
|
||||||
|
output: `<think></think><tool_c fakeout{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
|
||||||
|
expectedToolCall: []api.ToolCall{},
|
||||||
|
expectedTokens: `<think></think><tool_c fakeout{"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "qwen3 invalid tool call with malformed tool prefix",
|
||||||
|
model: "qwen3",
|
||||||
|
output: `<think></think><tool_cfakeout {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
|
||||||
|
expectedToolCall: []api.ToolCall{},
|
||||||
|
expectedTokens: `<think></think><tool_cfakeout {"name": "get_current_weather", "arguments": {"format":"fahrenheit","location":"San Francisco, CA"}} </tool_call>`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "llama3.2 valid tool call without prefix",
|
||||||
|
model: "llama3.2",
|
||||||
|
output: `{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}}`,
|
||||||
|
expectedToolCall: []api.ToolCall{t1},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "llama3.2 incomplete tool call without prefix",
|
||||||
|
model: "llama3.2",
|
||||||
|
output: `{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, `,
|
||||||
|
expectedToolCall: []api.ToolCall{},
|
||||||
|
expectedTokens: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "llama3.2 tool call with leading text",
|
||||||
|
model: "llama3.2",
|
||||||
|
output: `some non json text{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}}`,
|
||||||
|
expectedToolCall: []api.ToolCall{},
|
||||||
|
expectedTokens: `some non json text{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}}`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "llama3.2 tool call with invalid tool prefix (no prefix in template)",
|
||||||
|
model: "llama3.2",
|
||||||
|
output: `<tool_call>{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}}`,
|
||||||
|
expectedToolCall: []api.ToolCall{},
|
||||||
|
expectedTokens: `<tool_call>{"name": "get_current_weather", "parameters": {"format":"fahrenheit","location":"San Francisco, CA"}}`,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var tools []api.Tool
|
||||||
|
if err := json.Unmarshal(readFile(t, p, "tools.json").Bytes(), &tools); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var messages []api.Message
|
||||||
|
if err := json.Unmarshal(readFile(t, p, "messages.json").Bytes(), &messages); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range cases {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
tmpl, err := template.Parse(readFile(t, p, fmt.Sprintf("%s.gotmpl", tt.model)).String())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("template", func(t *testing.T) {
|
||||||
|
actual := &bytes.Buffer{} // Create new buffer for each test
|
||||||
|
if err := tmpl.Execute(actual, template.Values{Tools: tools, Messages: messages}); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if diff := cmp.Diff(actual.String(), readFile(t, p, fmt.Sprintf("%s.out", tt.model)).String()); diff != "" {
|
||||||
|
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("parse", func(t *testing.T) {
|
||||||
|
// fmt.Printf("tmpl: %s\n", tmpl.Root.String())
|
||||||
|
tp, err := NewParser(tmpl.Template)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
got := []api.ToolCall{}
|
||||||
|
var gotTokens strings.Builder
|
||||||
|
|
||||||
|
var add bool
|
||||||
|
tokens := strings.Fields(tt.output)
|
||||||
|
for _, tok := range tokens {
|
||||||
|
s := " " + tok
|
||||||
|
|
||||||
|
add = true
|
||||||
|
if !tp.Done {
|
||||||
|
toolCalls, content, err := tp.Add(s)
|
||||||
|
if err == nil {
|
||||||
|
if content != "" {
|
||||||
|
fmt.Printf("content: %q\n", content)
|
||||||
|
gotTokens.WriteString(content)
|
||||||
|
add = false
|
||||||
|
} else if len(toolCalls) > 0 {
|
||||||
|
got = append(got, toolCalls...)
|
||||||
|
add = false
|
||||||
|
} else {
|
||||||
|
add = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if add {
|
||||||
|
gotTokens.WriteString(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare tool calls if we expect any
|
||||||
|
if diff := cmp.Diff(got, tt.expectedToolCall); diff != "" {
|
||||||
|
t.Errorf("tool calls mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare tokens if we expect any
|
||||||
|
stripped := strings.TrimSpace(gotTokens.String())
|
||||||
|
if diff := cmp.Diff(stripped, tt.expectedTokens); diff != "" {
|
||||||
|
t.Log("actualTokens", stripped, "expectedTokens", tt.expectedTokens)
|
||||||
|
t.Errorf("tokens mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseJSONToolCalls(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
input string
|
||||||
|
parser *Parser
|
||||||
|
wantToolCalls []api.ToolCall
|
||||||
|
wantPartial bool
|
||||||
|
wantValid bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid single tool call",
|
||||||
|
input: `{"name": "test_tool", "arguments": {"arg1": "value1"}}`,
|
||||||
|
parser: &Parser{name: "name", arguments: "arguments"},
|
||||||
|
wantToolCalls: []api.ToolCall{
|
||||||
|
{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "test_tool",
|
||||||
|
Arguments: map[string]any{
|
||||||
|
"arg1": "value1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantPartial: false,
|
||||||
|
wantValid: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "incomplete JSON",
|
||||||
|
input: `{"name": "test_tool", "arguments": {"arg1": `,
|
||||||
|
parser: &Parser{name: "name", arguments: "arguments"},
|
||||||
|
wantToolCalls: nil,
|
||||||
|
wantPartial: true,
|
||||||
|
wantValid: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid JSON",
|
||||||
|
input: `not json at all`,
|
||||||
|
parser: &Parser{name: "name", arguments: "arguments"},
|
||||||
|
wantToolCalls: nil,
|
||||||
|
wantPartial: false,
|
||||||
|
wantValid: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing required fields",
|
||||||
|
input: `{"other": "field"}`,
|
||||||
|
parser: &Parser{name: "name", arguments: "arguments"},
|
||||||
|
wantToolCalls: nil,
|
||||||
|
wantPartial: false,
|
||||||
|
wantValid: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple tool calls in array",
|
||||||
|
input: `[
|
||||||
|
{"name": "tool1", "arguments": {"arg1": 1}},
|
||||||
|
{"name": "tool2", "arguments": {"arg2": "value"}}
|
||||||
|
]`,
|
||||||
|
parser: &Parser{name: "name", arguments: "arguments"},
|
||||||
|
wantToolCalls: []api.ToolCall{
|
||||||
|
{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "tool1",
|
||||||
|
Arguments: map[string]any{
|
||||||
|
"arg1": float64(1),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "tool2",
|
||||||
|
Arguments: map[string]any{
|
||||||
|
"arg2": "value",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantPartial: false,
|
||||||
|
wantValid: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
gotCalls, gotPartial := tt.parser.parseJSONToolCalls(tt.input)
|
||||||
|
|
||||||
|
if gotPartial != tt.wantPartial {
|
||||||
|
t.Errorf("parseJSONToolCalls() partial = %v, want %v", gotPartial, tt.wantPartial)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(gotCalls) != 0 != tt.wantValid {
|
||||||
|
t.Errorf("parseJSONToolCalls() valid = %v, want %v", len(gotCalls) == 0, tt.wantValid)
|
||||||
|
}
|
||||||
|
|
||||||
|
if diff := cmp.Diff(gotCalls, tt.wantToolCalls); diff != "" {
|
||||||
|
t.Errorf("parseJSONToolCalls() tool calls mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
257
tools/utils.go
Normal file
257
tools/utils.go
Normal file
@ -0,0 +1,257 @@
|
|||||||
|
package tools
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"errors"
|
||||||
|
"log/slog"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
|
gotmpl "text/template"
|
||||||
|
"text/template/parse"
|
||||||
|
|
||||||
|
jsonv2 "github.com/go-json-experiment/json"
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/template"
|
||||||
|
)
|
||||||
|
|
||||||
|
// extractToolCallsFormat traverses a template AST to find text that follows a ".ToolCalls" condition.
|
||||||
|
// It walks the template nodes looking for if-statements containing ".ToolCalls" and extracts any
|
||||||
|
// immediate text nodes that follow. This is used to identify tool call prefixes and formatting.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - string: The extracted text following the first ".ToolCalls" condition found
|
||||||
|
// - bool: Whether a ".ToolCalls" condition was found in the template
|
||||||
|
func extractToolCallsFormat(tmpl *gotmpl.Template) (string, bool) {
|
||||||
|
if tmpl == nil || tmpl.Tree == nil {
|
||||||
|
slog.Debug("TextAfterToolCalls: template or tree is nil")
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
var result string
|
||||||
|
var found bool
|
||||||
|
|
||||||
|
var walk func(nodes []parse.Node)
|
||||||
|
walk = func(nodes []parse.Node) {
|
||||||
|
for _, node := range nodes {
|
||||||
|
if found {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
switch n := node.(type) {
|
||||||
|
case *parse.IfNode:
|
||||||
|
if isToolCallsNode(n) {
|
||||||
|
// Collect immediate TextNode(s) at start of IfNode's list
|
||||||
|
var sb strings.Builder
|
||||||
|
for _, innerNode := range n.List.Nodes {
|
||||||
|
if tn, ok := innerNode.(*parse.TextNode); ok {
|
||||||
|
sb.Write(tn.Text)
|
||||||
|
} else {
|
||||||
|
// Stop at first non-text node
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result = sb.String()
|
||||||
|
found = true
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Recurse into child nodes
|
||||||
|
walk(n.List.Nodes)
|
||||||
|
if n.ElseList != nil {
|
||||||
|
walk(n.ElseList.Nodes)
|
||||||
|
}
|
||||||
|
case *parse.ListNode:
|
||||||
|
walk(n.Nodes)
|
||||||
|
case *parse.RangeNode:
|
||||||
|
walk(n.List.Nodes)
|
||||||
|
if n.ElseList != nil {
|
||||||
|
walk(n.ElseList.Nodes)
|
||||||
|
}
|
||||||
|
case *parse.WithNode:
|
||||||
|
walk(n.List.Nodes)
|
||||||
|
if n.ElseList != nil {
|
||||||
|
walk(n.ElseList.Nodes)
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
// Continue to next node
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if found {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
walk(tmpl.Tree.Root.Nodes)
|
||||||
|
return result, found
|
||||||
|
}
|
||||||
|
|
||||||
|
// isToolCallsNode detects if a node's condition includes ".ToolCalls"
|
||||||
|
func isToolCallsNode(n *parse.IfNode) bool {
|
||||||
|
for _, cmd := range n.Pipe.Cmds {
|
||||||
|
for _, arg := range cmd.Args {
|
||||||
|
if field, ok := arg.(*parse.FieldNode); ok {
|
||||||
|
if slices.Contains(field.Ident, "ToolCalls") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(parthsareen): get full prefix from the template instead of just the first token
|
||||||
|
|
||||||
|
// toolPrefix returns the prefix for the tool call if it exists from a template
|
||||||
|
func toolPrefix(tmpl *gotmpl.Template) string {
|
||||||
|
tokenText, ok := extractToolCallsFormat(tmpl)
|
||||||
|
if !ok {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
tokenText = strings.TrimSpace(tokenText)
|
||||||
|
if tokenText == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
first := strings.Fields(tokenText)[0]
|
||||||
|
|
||||||
|
start := -1
|
||||||
|
end := -1
|
||||||
|
for i, r := range tokenText {
|
||||||
|
if r == '<' || r == '[' {
|
||||||
|
start = i
|
||||||
|
}
|
||||||
|
if (r == '>' || r == ']') && start != -1 {
|
||||||
|
end = i
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if start != -1 && end != -1 {
|
||||||
|
// return the token including the [ or < and the ] or >
|
||||||
|
return tokenText[start : end+1]
|
||||||
|
} else if start != -1 {
|
||||||
|
// get until the [ or < - in the case tag was not closed
|
||||||
|
return tokenText[:start]
|
||||||
|
} else if end != -1 {
|
||||||
|
// get after the ] or > - in the case tag was not opened
|
||||||
|
return tokenText[end+1:]
|
||||||
|
}
|
||||||
|
return first
|
||||||
|
}
|
||||||
|
|
||||||
|
// toolTemplate creates a subtree from the node that ranges over .ToolCalls
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - *gotmpl.Template: The subtree containing the .ToolCalls range
|
||||||
|
// - bool: Whether a .ToolCalls range was found in the template
|
||||||
|
func toolTemplate(t *template.Template) (*gotmpl.Template, bool) {
|
||||||
|
tmpl := t.Subtree(func(n parse.Node) bool {
|
||||||
|
if t, ok := n.(*parse.RangeNode); ok {
|
||||||
|
return slices.Contains(template.Identifiers(t.Pipe), "ToolCalls")
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
})
|
||||||
|
|
||||||
|
if tmpl == nil {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
return tmpl, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// suffixOverlap returns the length of the longest suffix overlap between two strings
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - int: The length of the longest suffix overlap
|
||||||
|
func suffixOverlap(s, delim string) int {
|
||||||
|
max := min(len(delim), len(s))
|
||||||
|
for i := max; i > 0; i-- {
|
||||||
|
if strings.HasSuffix(s, delim[:i]) {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractToolArgs executes a template with a known tool call format to extract the name and arguments
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - string: The name of the tool call
|
||||||
|
// - string: The arguments of the tool call
|
||||||
|
// - error: Error if parsing failed
|
||||||
|
func extractToolArgs(tmpl *gotmpl.Template) (name, arguments string, err error) {
|
||||||
|
var b bytes.Buffer
|
||||||
|
if err := tmpl.Execute(&b, map[string][]api.ToolCall{
|
||||||
|
"ToolCalls": {
|
||||||
|
{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "@@name@@",
|
||||||
|
Arguments: api.ToolCallFunctionArguments{
|
||||||
|
"@@argument@@": 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}); err != nil {
|
||||||
|
return "", "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
var obj any
|
||||||
|
err = jsonv2.Unmarshal(b.Bytes(), &obj)
|
||||||
|
if err != nil {
|
||||||
|
return "", "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
var objs []map[string]any
|
||||||
|
switch v := obj.(type) {
|
||||||
|
case map[string]any:
|
||||||
|
objs = []map[string]any{v}
|
||||||
|
case []map[string]any:
|
||||||
|
objs = v
|
||||||
|
case []any:
|
||||||
|
objs = collect(v)
|
||||||
|
}
|
||||||
|
if len(objs) == 0 {
|
||||||
|
return "", "", errors.New("no template objects found")
|
||||||
|
}
|
||||||
|
|
||||||
|
// find the keys that correspond to the name and arguments fields
|
||||||
|
for k, v := range objs[0] {
|
||||||
|
switch v.(type) {
|
||||||
|
case string:
|
||||||
|
name = k
|
||||||
|
case map[string]any:
|
||||||
|
arguments = k
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if name == "" || arguments == "" {
|
||||||
|
slog.Debug("missing required fields in tool call template", "name", name, "arguments", arguments)
|
||||||
|
return "", "", errors.New("missing required fields in tool call template")
|
||||||
|
}
|
||||||
|
|
||||||
|
return name, arguments, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// collect recursively traverses an object to collect all nested maps
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - []map[string]any: A slice of all nested maps found in the object
|
||||||
|
func collect(obj any) []map[string]any {
|
||||||
|
var all []map[string]any
|
||||||
|
switch o := obj.(type) {
|
||||||
|
case map[string]any:
|
||||||
|
all = append(all, o)
|
||||||
|
for _, v := range o {
|
||||||
|
all = append(all, collect(v)...)
|
||||||
|
}
|
||||||
|
case []any:
|
||||||
|
for _, v := range o {
|
||||||
|
all = append(all, collect(v)...)
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return all
|
||||||
|
}
|
464
tools/utils_test.go
Normal file
464
tools/utils_test.go
Normal file
@ -0,0 +1,464 @@
|
|||||||
|
package tools
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
gotmpl "text/template"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/template"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestExtractToolCallsFormat(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
template string
|
||||||
|
want string
|
||||||
|
found bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "nil template",
|
||||||
|
template: "",
|
||||||
|
want: "",
|
||||||
|
found: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "basic tool call with text",
|
||||||
|
template: "{{if .ToolCalls}}Hello world{{end}}",
|
||||||
|
want: "Hello world",
|
||||||
|
found: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool call with json format",
|
||||||
|
template: "{{if .ToolCalls}}```json\n{{end}}",
|
||||||
|
want: "```json\n",
|
||||||
|
found: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool call in range",
|
||||||
|
template: "{{range .ToolCalls}}tool: {{.}}{{end}}",
|
||||||
|
want: "",
|
||||||
|
found: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool call with multiple text nodes",
|
||||||
|
template: "{{if .ToolCalls}}First text{{if .Something}}inner{{end}}Second text{{end}}",
|
||||||
|
want: "First text",
|
||||||
|
found: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nested if without tool calls",
|
||||||
|
template: "{{if .Something}}{{if .OtherThing}}text{{end}}{{end}}",
|
||||||
|
want: "",
|
||||||
|
found: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
tmpl, err := gotmpl.New("test").Parse(tc.template)
|
||||||
|
if err != nil && tc.template != "" {
|
||||||
|
t.Fatalf("failed to parse template: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
got, found := extractToolCallsFormat(tmpl)
|
||||||
|
if got != tc.want {
|
||||||
|
t.Errorf("got text %q, want %q", got, tc.want)
|
||||||
|
}
|
||||||
|
if found != tc.found {
|
||||||
|
t.Errorf("got found %v, want %v", found, tc.found)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestToolPrefix(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
template string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "basic tool call with action prefix",
|
||||||
|
template: "{{if .ToolCalls}}Action: ```json{{end}}",
|
||||||
|
want: "Action:",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "incomplete functools bracket",
|
||||||
|
template: "{{if .ToolCalls}}functools[{{end}}",
|
||||||
|
want: "functools",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool call with angle brackets",
|
||||||
|
template: "{{if .ToolCalls}}Hello, world! <tool_call>{{end}}",
|
||||||
|
want: "<tool_call>",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple tool call formats",
|
||||||
|
template: "{{if .ToolCalls}}[tool_call] <tool_call>{{end}}",
|
||||||
|
want: "[tool_call]",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "single angle bracket tool call",
|
||||||
|
template: "{{if .ToolCalls}}<tool_call>{{end}}",
|
||||||
|
want: "<tool_call>",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "incomplete angle bracket after tool call",
|
||||||
|
template: "{{if .ToolCalls}}[tool_call] <{{end}}",
|
||||||
|
want: "[tool_call]",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "angle bracket prefix with tool call",
|
||||||
|
template: "{{if .ToolCalls}}> <tool_call>{{end}}",
|
||||||
|
want: "<tool_call>",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "uppercase tool call with incomplete bracket",
|
||||||
|
template: "{{if .ToolCalls}}[TOOL_CALL] [{{end}}",
|
||||||
|
want: "[TOOL_CALL]",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "uppercase tool call with adjacent bracket",
|
||||||
|
template: "{{if .ToolCalls}}[TOOL_CALL][{{end}}",
|
||||||
|
want: "[TOOL_CALL]",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool call with pipe delimiters",
|
||||||
|
template: "{{if .ToolCalls}}<|tool_call|>{{end}}",
|
||||||
|
want: "<|tool_call|>",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool with no prefix",
|
||||||
|
template: "{{if .ToolCalls}}{{end}}",
|
||||||
|
want: "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range cases {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
tmpl, err := gotmpl.New("test").Parse(tt.template)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to parse template: %v", err)
|
||||||
|
}
|
||||||
|
got := toolPrefix(tmpl)
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("ToolToken(%q) = %q; want %q", tt.template, got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestToolTemplate(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
template string
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "basic tool call range",
|
||||||
|
template: "{{range .ToolCalls}}test{{end}}",
|
||||||
|
want: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no tool calls",
|
||||||
|
template: "{{range .Other}}test{{end}}",
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nested tool calls",
|
||||||
|
template: "{{range .Outer}}{{range .ToolCalls}}test{{end}}{{end}}",
|
||||||
|
want: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty template",
|
||||||
|
template: "",
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool calls in if statement",
|
||||||
|
template: "{{if .ToolCalls}}test{{end}}",
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range cases {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
tmpl, err := gotmpl.New("test").Parse(tt.template)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to parse template: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
parsed, err := template.Parse(tmpl.Root.String())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to parse template: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, got := toolTemplate(parsed)
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("toolTemplate() = %v; want %v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSuffixOverlap(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
s string
|
||||||
|
d string
|
||||||
|
want int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "no overlap",
|
||||||
|
s: "hello world",
|
||||||
|
d: "",
|
||||||
|
want: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "full overlap",
|
||||||
|
s: "<tool_call>",
|
||||||
|
d: "<tool_call>",
|
||||||
|
want: 11,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "partial overlap",
|
||||||
|
s: "text <tool_call>",
|
||||||
|
d: "<tool_call>",
|
||||||
|
want: 11,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "delimiter longer than string",
|
||||||
|
s: "<tool>",
|
||||||
|
d: "<tool_call>",
|
||||||
|
want: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty string",
|
||||||
|
s: "",
|
||||||
|
d: "<tool_call>",
|
||||||
|
want: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty delimiter",
|
||||||
|
s: "<tool_call>",
|
||||||
|
d: "",
|
||||||
|
want: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "single char overlap",
|
||||||
|
s: "test<",
|
||||||
|
d: "<tool_call>",
|
||||||
|
want: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "partial tool call",
|
||||||
|
s: "hello <tool_",
|
||||||
|
d: "<tool_call>",
|
||||||
|
want: 6,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range cases {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := suffixOverlap(tt.s, tt.d)
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("suffixOverlap(%q, %q) = %d; want %d", tt.s, tt.d, got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractToolArgs(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
template string
|
||||||
|
want string
|
||||||
|
ok bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "basic tool call with text after",
|
||||||
|
template: `{{if .ToolCalls}}tool response{{end}}`,
|
||||||
|
want: "tool response",
|
||||||
|
ok: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool call with mixed content after",
|
||||||
|
template: `{{if .ToolCalls}}<tool_call>{{.Something}}{{end}}`,
|
||||||
|
want: "<tool_call>",
|
||||||
|
ok: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool call with no text after",
|
||||||
|
template: `{{if .ToolCalls}}{{.Something}}{{end}}`,
|
||||||
|
want: "",
|
||||||
|
ok: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nested tool call",
|
||||||
|
template: `{{if .Something}}{{if .ToolCalls}}[TOOL_CALL]{{end}}{{end}}`,
|
||||||
|
want: "[TOOL_CALL]",
|
||||||
|
ok: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no tool calls",
|
||||||
|
template: `{{if .Something}}no tools here{{end}}`,
|
||||||
|
want: "",
|
||||||
|
ok: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty template",
|
||||||
|
template: ``,
|
||||||
|
want: "",
|
||||||
|
ok: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple tool calls sections",
|
||||||
|
template: `{{if .ToolCalls}}first{{end}}{{if .ToolCalls}}second{{end}}`,
|
||||||
|
want: "first",
|
||||||
|
ok: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "range over tool calls",
|
||||||
|
template: `{{if .ToolCalls}}{{range .ToolCalls}}tool{{end}}{{end}}`,
|
||||||
|
want: "",
|
||||||
|
ok: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool calls with pipe delimiters",
|
||||||
|
template: `{{if .ToolCalls}}<|tool|>{{end}}`,
|
||||||
|
want: "<|tool|>",
|
||||||
|
ok: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool calls with nested template",
|
||||||
|
template: `{{if .ToolCalls}}{{template "tool" .}}{{end}}`,
|
||||||
|
want: "",
|
||||||
|
ok: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool calls with whitespace variations",
|
||||||
|
template: `{{if .ToolCalls}} tool {{end}}`,
|
||||||
|
want: " tool ",
|
||||||
|
ok: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range cases {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
tmpl, err := gotmpl.New("test").Parse(tt.template)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to parse template: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
got, ok := extractToolCallsFormat(tmpl)
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("TextAfterToolCalls() got = %q, want %q", got, tt.want)
|
||||||
|
}
|
||||||
|
if ok != tt.ok {
|
||||||
|
t.Errorf("TextAfterToolCalls() ok = %v, want %v", ok, tt.ok)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCollect(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
obj any
|
||||||
|
want []map[string]any
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "simple map",
|
||||||
|
obj: map[string]any{
|
||||||
|
"key": "value",
|
||||||
|
},
|
||||||
|
want: []map[string]any{
|
||||||
|
{"key": "value"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nested map",
|
||||||
|
obj: map[string]any{
|
||||||
|
"outer": map[string]any{
|
||||||
|
"inner": "value",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: []map[string]any{
|
||||||
|
{"outer": map[string]any{"inner": "value"}},
|
||||||
|
{"inner": "value"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "array of maps",
|
||||||
|
obj: []any{
|
||||||
|
map[string]any{"key1": "val1"},
|
||||||
|
map[string]any{"key2": "val2"},
|
||||||
|
},
|
||||||
|
want: []map[string]any{
|
||||||
|
{"key1": "val1"},
|
||||||
|
{"key2": "val2"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "deeply nested",
|
||||||
|
obj: map[string]any{
|
||||||
|
"l1": map[string]any{
|
||||||
|
"l2": map[string]any{
|
||||||
|
"l3": "value",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: []map[string]any{
|
||||||
|
{"l1": map[string]any{"l2": map[string]any{"l3": "value"}}},
|
||||||
|
{"l2": map[string]any{"l3": "value"}},
|
||||||
|
{"l3": "value"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "non-map value",
|
||||||
|
obj: "string",
|
||||||
|
want: nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range cases {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := collect(tt.obj)
|
||||||
|
if len(got) != len(tt.want) {
|
||||||
|
t.Errorf("collect() got %d maps, want %d", len(got), len(tt.want))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare each map in the result
|
||||||
|
for i := range tt.want {
|
||||||
|
if !mapsEqual(got[i], tt.want[i]) {
|
||||||
|
t.Errorf("collect() map[%d] = %v, want %v", i, got[i], tt.want[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// mapsEqual compares two maps for deep equality
|
||||||
|
func mapsEqual(m1, m2 map[string]any) bool {
|
||||||
|
if len(m1) != len(m2) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for k, v1 := range m1 {
|
||||||
|
v2, ok := m2[k]
|
||||||
|
if !ok {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
switch val1 := v1.(type) {
|
||||||
|
case map[string]any:
|
||||||
|
val2, ok := v2.(map[string]any)
|
||||||
|
if !ok || !mapsEqual(val1, val2) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
if v1 != v2 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user