Compare commits

..

2 Commits

Author SHA1 Message Date
ParthSareen
23e8ac9428 wip? 2025-05-07 19:00:44 -07:00
ParthSareen
611d3a17ed server: add python tool parsing logic 2025-05-02 16:23:54 -07:00
9 changed files with 544 additions and 12 deletions

View File

@@ -1407,6 +1407,7 @@ func NewCLI() *cobra.Command {
envVars["OLLAMA_LLM_LIBRARY"], envVars["OLLAMA_LLM_LIBRARY"],
envVars["OLLAMA_GPU_OVERHEAD"], envVars["OLLAMA_GPU_OVERHEAD"],
envVars["OLLAMA_LOAD_TIMEOUT"], envVars["OLLAMA_LOAD_TIMEOUT"],
envVars["OLLAMA_CONTEXT_LENGTH"],
}) })
default: default:
appendEnvDocs(cmd, envs) appendEnvDocs(cmd, envs)

View File

@@ -20,7 +20,7 @@ Please refer to the [GPU docs](./gpu.md).
## How can I specify the context window size? ## How can I specify the context window size?
By default, Ollama uses a context window size of 4096 tokens. By default, Ollama uses a context window size of 4096 tokens, unless you have a single GPU with <= 4 GB of VRAM, in which case it will default to 2048 tokens.
This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use: This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use:
@@ -31,7 +31,7 @@ OLLAMA_CONTEXT_LENGTH=8192 ollama serve
To change this when using `ollama run`, use `/set parameter`: To change this when using `ollama run`, use `/set parameter`:
```shell ```shell
/set parameter num_ctx 4096 /set parameter num_ctx 8192
``` ```
When using the API, specify the `num_ctx` parameter: When using the API, specify the `num_ctx` parameter:
@@ -41,7 +41,7 @@ curl http://localhost:11434/api/generate -d '{
"model": "llama3.2", "model": "llama3.2",
"prompt": "Why is the sky blue?", "prompt": "Why is the sky blue?",
"options": { "options": {
"num_ctx": 4096 "num_ctx": 8192
} }
}' }'
``` ```

View File

@@ -169,7 +169,7 @@ var (
// Enable the new Ollama engine // Enable the new Ollama engine
NewEngine = Bool("OLLAMA_NEW_ENGINE") NewEngine = Bool("OLLAMA_NEW_ENGINE")
// ContextLength sets the default context length // ContextLength sets the default context length
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096) ContextLength = Int64("OLLAMA_CONTEXT_LENGTH", -1)
) )
func String(s string) func() string { func String(s string) func() string {
@@ -227,6 +227,20 @@ func Uint64(key string, defaultValue uint64) func() uint64 {
} }
} }
func Int64(key string, defaultValue int64) func() int64 {
return func() int64 {
if s := Var(key); s != "" {
if n, err := strconv.ParseInt(s, 10, 64); err != nil {
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
} else {
return n
}
}
return defaultValue
}
}
// Set aside VRAM per GPU // Set aside VRAM per GPU
var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0) var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
@@ -255,7 +269,7 @@ func AsMap() map[string]EnvVar {
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"}, "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"}, "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"}, "OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"}, "OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default 4096 or 2048 with low VRAM)"},
"OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"}, "OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
// Informational // Informational

View File

@@ -278,9 +278,9 @@ func TestVar(t *testing.T) {
} }
func TestContextLength(t *testing.T) { func TestContextLength(t *testing.T) {
cases := map[string]uint{ cases := map[string]int64{
"": 4096, "": -1,
"2048": 2048, "4096": 4096,
} }
for k, v := range cases { for k, v := range cases {

226
server/python_tools.go Normal file
View File

@@ -0,0 +1,226 @@
package server
import (
"fmt"
"regexp"
"strconv"
"strings"
"github.com/ollama/ollama/api"
)
var (
pythonFuncRegex = regexp.MustCompile(`(\w+)\((.*?)\)`)
braces = map[rune]rune{
'[': ']',
'{': '}',
'(': ')',
'"': '"',
'\'': '\'',
}
)
// parsePythonValue converts a Python value string to its appropriate Go type
func parsePythonValue(value string) (any, error) {
value = strings.TrimSpace(value)
// string
if (strings.HasPrefix(value, "\"") && strings.HasSuffix(value, "\"")) ||
(strings.HasPrefix(value, "'") && strings.HasSuffix(value, "'")) {
// Remove quotes
result := value[1 : len(value)-1]
return result, nil
}
// bool
switch strings.ToLower(value) {
case "true":
return true, nil
case "false":
return false, nil
case "none":
return nil, nil
}
// int
if i, err := strconv.Atoi(value); err == nil {
return i, nil
}
// float
if f, err := strconv.ParseFloat(value, 64); err == nil {
return f, nil
}
// list
if strings.HasPrefix(value, "[") && strings.HasSuffix(value, "]") {
listStr := value[1 : len(value)-1]
var list []any
stack := []rune{}
start := 0
for i, char := range listStr {
if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
stack = stack[:len(stack)-1]
} else if _, ok := braces[char]; ok {
stack = append(stack, char)
}
if len(stack) == 0 && (char == ',' || i == len(listStr)-1) {
end := i
if i == len(listStr)-1 {
end = i + 1
}
item := strings.TrimSpace(listStr[start:end])
if val, err := parsePythonValue(item); err == nil {
list = append(list, val)
} else {
return nil, fmt.Errorf("invalid list item: %s", item)
}
start = i + 1
}
}
return list, nil
}
// dictionary
if strings.HasPrefix(value, "{") && strings.HasSuffix(value, "}") && strings.Contains(value, ":") {
dictStr := value[1 : len(value)-1]
dict := make(map[any]any)
stack := []rune{}
start := 0
for i, char := range dictStr {
if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
stack = stack[:len(stack)-1]
} else if _, ok := braces[char]; ok {
stack = append(stack, char)
}
if len(stack) == 0 && (char == ',' || i == len(dictStr)-1) {
end := i
if i == len(dictStr)-1 {
end = i + 1
}
item := strings.TrimSpace(dictStr[start:end])
kv := strings.SplitN(item, ":", 2)
if len(kv) != 2 {
return nil, fmt.Errorf("invalid dictionary key-value pair: %s", item)
}
key, err := parsePythonValue(strings.TrimSpace(kv[0]))
if err != nil {
return nil, fmt.Errorf("invalid dictionary key: %s", kv[0])
}
val, err := parsePythonValue(strings.TrimSpace(kv[1]))
if err != nil {
return nil, fmt.Errorf("invalid dictionary value: %s", kv[1])
}
dict[key] = val
start = i + 1
}
}
return dict, nil
}
// sets (stored as lists)
if strings.HasPrefix(value, "{") && strings.HasSuffix(value, "}") {
setStr := value[1 : len(value)-1]
var list []any
stack := []rune{}
start := 0
for i, char := range setStr {
if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
stack = stack[:len(stack)-1]
} else if _, ok := braces[char]; ok {
stack = append(stack, char)
}
if len(stack) == 0 && (char == ',' || i == len(setStr)-1) {
end := i
if i == len(setStr)-1 {
end = i + 1
}
item := strings.TrimSpace(setStr[start:end])
if val, err := parsePythonValue(item); err == nil {
list = append(list, val)
} else {
return nil, fmt.Errorf("invalid set item: %s", item)
}
start = i + 1
}
}
return list, nil
}
return nil, fmt.Errorf("invalid Python value: %s", value)
}
// parsePythonToolCall parses Python function calls from a string
// it supports keyword arguments, as well as multiple functions in a single string
func parsePythonToolCall(s string) ([]api.ToolCall, error) {
matches := pythonFuncRegex.FindAllStringSubmatchIndex(s, -1)
if len(matches) == 0 {
return nil, fmt.Errorf("no Python function calls found")
}
var toolCalls []api.ToolCall
for _, match := range matches {
name := s[match[2]:match[3]]
args := s[match[4]:match[5]]
var arguments api.ToolCallFunctionArguments
if len(args) == 0 {
toolCalls = append(toolCalls, api.ToolCall{
Function: api.ToolCallFunction{
Name: name,
},
})
continue
}
start := 0
stack := []rune{}
for i, char := range args {
if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
stack = stack[:len(stack)-1]
} else if _, ok := braces[char]; ok {
stack = append(stack, char)
}
if len(stack) == 0 && (char == ',' || i == len(args)-1) {
end := i
if i == len(args)-1 {
end = i + 1
}
kv := strings.SplitN(args[start:end], "=", 2)
if len(kv) == 2 {
key := strings.TrimSpace(kv[0])
valueStr := strings.TrimSpace(kv[1])
// Parse the value into appropriate type
value, err := parsePythonValue(valueStr)
if err != nil {
return nil, fmt.Errorf("failed to parse value for key %q: %v", key, err)
}
arguments[key] = value
} else {
return nil, fmt.Errorf("invalid argument format: %q", args[start:end])
}
start = i + 1
}
}
if len(arguments) > 0 {
toolCalls = append(toolCalls, api.ToolCall{
Function: api.ToolCallFunction{
Name: name,
Arguments: arguments,
},
})
}
}
if len(toolCalls) > 0 {
return toolCalls, nil
}
return nil, fmt.Errorf("failed to parse any valid tool calls")
}

269
server/python_tools_test.go Normal file
View File

@@ -0,0 +1,269 @@
package server
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api"
)
func TestParsePythonFunctionCall(t *testing.T) {
t1 := api.ToolCall{
Function: api.ToolCallFunction{
Name: "get_current_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "San Francisco, CA",
"format": "fahrenheit",
},
},
}
t2 := api.ToolCall{
Function: api.ToolCallFunction{
Name: "get_forecast",
Arguments: api.ToolCallFunctionArguments{
"days": 5,
"location": "Seattle",
},
},
}
t3 := api.ToolCall{
Function: api.ToolCallFunction{
Name: "get_current_weather",
Arguments: api.ToolCallFunctionArguments{
"list": []any{1, 2, 3},
"int": -1,
"float": 1.23,
"string": "hello",
},
},
}
t4 := api.ToolCall{
Function: api.ToolCallFunction{
Name: "get_current_weather",
},
}
cases := []struct {
name string
input string
want []api.ToolCall
err bool
}{
{
name: "malformed function call - missing closing paren",
input: "get_current_weather(location=\"San Francisco\"",
err: true,
},
{
name: "empty function call",
input: "get_current_weather()",
want: []api.ToolCall{t4},
err: false,
},
{
name: "single valid function call",
input: "get_current_weather(location=\"San Francisco, CA\", format=\"fahrenheit\")",
want: []api.ToolCall{t1},
},
{
name: "multiple valid function calls",
input: "get_current_weather(location=\"San Francisco, CA\", format=\"fahrenheit\") get_forecast(days=5, location=\"Seattle\")",
want: []api.ToolCall{t1, t2},
},
{
name: "multiple valid function calls with list",
input: "get_current_weather(list=[1,2,3], int=-1, float=1.23, string=\"hello\")",
want: []api.ToolCall{t3},
},
{
name: "positional arguments not supported",
input: "get_current_weather(1, 2, 3)",
err: true,
},
{
name: "invalid argument format without equals",
input: "get_current_weather(\"San Francisco\")",
err: true,
},
{
name: "nested lists",
input: "get_current_weather(data=[[1,2],[3,4]])",
want: []api.ToolCall{{
Function: api.ToolCallFunction{
Name: "get_current_weather",
Arguments: api.ToolCallFunctionArguments{
"data": []any{[]any{1, 2}, []any{3, 4}},
},
},
}},
},
{
name: "boolean and none values",
input: "get_current_weather(active=true, enabled=false, value=None)",
want: []api.ToolCall{{
Function: api.ToolCallFunction{
Name: "get_current_weather",
Arguments: api.ToolCallFunctionArguments{
"active": true,
"enabled": false,
"value": nil,
},
},
}},
},
{
name: "single vs double quotes",
input: "get_current_weather(str1='single', str2=\"double\")",
want: []api.ToolCall{{
Function: api.ToolCallFunction{
Name: "get_current_weather",
Arguments: api.ToolCallFunctionArguments{
"str1": "single",
"str2": "double",
},
},
}},
},
{
name: "whitespace handling",
input: "get_current_weather( location = \"San Francisco\" , temp = 72 )",
want: []api.ToolCall{{
Function: api.ToolCallFunction{
Name: "get_current_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "San Francisco",
"temp": 72,
},
},
}},
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
got, err := parsePythonToolCall(tt.input)
if (err != nil) != tt.err {
t.Fatalf("expected error: %v, got error: %v", tt.err, err)
}
if tt.err {
return
}
if diff := cmp.Diff(got, tt.want); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}
}
func TestParsePythonValue(t *testing.T) {
cases := []struct {
name string
input string
want any
err bool
}{
{
name: "string with double quotes",
input: "\"hello\"",
want: "hello",
},
{
name: "string with single quotes",
input: "'world'",
want: "world",
},
{
name: "integer",
input: "42",
want: 42,
},
{
name: "float",
input: "3.14",
want: 3.14,
},
{
name: "boolean true",
input: "True",
want: true,
},
{
name: "boolean false",
input: "False",
want: false,
},
{
name: "none/null",
input: "None",
want: nil,
},
{
name: "simple list",
input: "[1, 2, 3]",
want: []any{1, 2, 3},
},
{
name: "nested list",
input: "[1, [2, 3], 4]",
want: []any{1, []any{2, 3}, 4},
},
{
name: "mixed type list",
input: "[1, \"two\", 3.0, true]",
want: []any{1, "two", 3.0, true},
},
{
name: "invalid list",
input: "[1, 2,",
want: nil,
err: true,
},
{
name: "dictionaries",
input: "{'a': 1, 'b': 2}",
want: map[any]any{"a": 1, "b": 2},
err: false,
},
{
name: "int dictionary",
input: "{1: 2}",
want: map[any]any{1: 2},
err: false,
},
{
name: "mixed type dictionary",
input: "{'a': 1, 'b': 2.0, 'c': True}",
want: map[any]any{"a": 1, "b": 2.0, "c": true},
err: false,
},
{
name: "invalid dictionary - missing closing brace",
input: "{'a': 1, 'b': 2",
want: nil,
err: true,
},
{
name: "sets",
input: "{1, 2, 3}",
want: []any{1, 2, 3},
err: false,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
got, err := parsePythonValue(tt.input)
if (err != nil) != tt.err {
t.Fatalf("expected error: %v, got error: %v", tt.err, err)
}
if tt.err {
return
}
if diff := cmp.Diff(got, tt.want); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}
}

View File

@@ -299,6 +299,9 @@ func TestGenerateChat(t *testing.T) {
{Role: "user", Content: "Hello!"}, {Role: "user", Content: "Hello!"},
}, },
Stream: &stream, Stream: &stream,
Options: map[string]any{
"num_ctx": 1024,
},
}) })
if w.Code != http.StatusOK { if w.Code != http.StatusOK {
@@ -321,6 +324,9 @@ func TestGenerateChat(t *testing.T) {
{Role: "user", Content: "Hello!"}, {Role: "user", Content: "Hello!"},
}, },
Stream: &stream, Stream: &stream,
Options: map[string]any{
"num_ctx": 1024,
},
}) })
if w.Code != http.StatusOK { if w.Code != http.StatusOK {
@@ -344,6 +350,9 @@ func TestGenerateChat(t *testing.T) {
{Role: "user", Content: "Help me write tests."}, {Role: "user", Content: "Help me write tests."},
}, },
Stream: &stream, Stream: &stream,
Options: map[string]any{
"num_ctx": 1024,
},
}) })
if w.Code != http.StatusOK { if w.Code != http.StatusOK {

View File

@@ -81,10 +81,6 @@ func InitScheduler(ctx context.Context) *Scheduler {
// context must be canceled to decrement ref count and release the runner // context must be canceled to decrement ref count and release the runner
func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration *api.Duration) (chan *runnerRef, chan error) { func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration *api.Duration) (chan *runnerRef, chan error) {
if opts.NumCtx < 4 {
opts.NumCtx = 4
}
req := &LlmRequest{ req := &LlmRequest{
ctx: c, ctx: c,
model: model, model: model,
@@ -114,6 +110,11 @@ func (s *Scheduler) Run(ctx context.Context) {
}() }()
} }
const (
defaultContextLength = 4096
smallGpuContextLength = 2048
)
func (s *Scheduler) processPending(ctx context.Context) { func (s *Scheduler) processPending(ctx context.Context) {
for { for {
select { select {
@@ -166,6 +167,17 @@ func (s *Scheduler) processPending(ctx context.Context) {
gpus = s.getGpuFn() gpus = s.getGpuFn()
} }
if pending.origNumCtx == -1 {
if len(gpus) == 1 && gpus[0].Library != "cpu" && gpus[0].TotalMemory <= 4096*1024*1024 {
slog.Info("GPU is small, limiting default context window", "num_ctx", smallGpuContextLength)
pending.opts.NumCtx = smallGpuContextLength
pending.origNumCtx = smallGpuContextLength
} else {
pending.opts.NumCtx = defaultContextLength
pending.origNumCtx = defaultContextLength
}
}
if envconfig.MaxRunners() <= 0 { if envconfig.MaxRunners() <= 0 {
// No user specified MaxRunners, so figure out what automatic setting to use // No user specified MaxRunners, so figure out what automatic setting to use
// If all GPUs have reliable free memory reporting, defaultModelsPerGPU * the number of GPUs // If all GPUs have reliable free memory reporting, defaultModelsPerGPU * the number of GPUs

View File

@@ -148,6 +148,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
successCh: make(chan *runnerRef, 1), successCh: make(chan *runnerRef, 1),
errCh: make(chan error, 1), errCh: make(chan error, 1),
} }
b.req.opts.NumCtx = 4096
b.srv = &mockLlm{estimatedVRAM: estimatedVRAM, estimatedVRAMByGPU: map[string]uint64{"": estimatedVRAM}} b.srv = &mockLlm{estimatedVRAM: estimatedVRAM, estimatedVRAMByGPU: map[string]uint64{"": estimatedVRAM}}
return b return b
} }