lower default NUM_PARALLEL to 2

this is in part to "pay" for #10452, which doubled the default context length. The combination isn't fully neutral though, because even though the old 4x2k limit and the new 2x4k limit are memory equivalent, the 1x fallback is larger with 4k
Merge pull request #10452 from ollama/drifkin/4096-context-length
2025-04-29 02:03:51 -07:00 · 2025-04-28 17:13:51 -07:00 · 2025-04-28 17:03:27 -07:00 · 2025-04-28 17:02:10 -07:00 · 2025-04-28 16:54:11 -07:00
9 changed files with 12 additions and 544 deletions
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -1407,7 +1407,6 @@ func NewCLI() *cobra.Command {
 				envVars["OLLAMA_LLM_LIBRARY"],
 				envVars["OLLAMA_GPU_OVERHEAD"],
 				envVars["OLLAMA_LOAD_TIMEOUT"],
 				envVars["OLLAMA_CONTEXT_LENGTH"],
 			})
 		default:
 			appendEnvDocs(cmd, envs)
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -20,7 +20,7 @@ Please refer to the [GPU docs](./gpu.md).
 ## How can I specify the context window size?
-By default, Ollama uses a context window size of 4096 tokens, unless you have a single GPU with <= 4 GB of VRAM, in which case it will default to 2048 tokens. 
+By default, Ollama uses a context window size of 4096 tokens. 
 This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use: 
@@ -31,7 +31,7 @@ OLLAMA_CONTEXT_LENGTH=8192 ollama serve
 To change this when using `ollama run`, use `/set parameter`:
 ```shell
-/set parameter num_ctx 8192
+/set parameter num_ctx 4096
 ```
 When using the API, specify the `num_ctx` parameter:
@@ -41,7 +41,7 @@ curl http://localhost:11434/api/generate -d '{
  "model": "llama3.2",
  "prompt": "Why is the sky blue?",
  "options": {
-    "num_ctx": 8192
+    "num_ctx": 4096
  }
 }'
 ```
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -169,7 +169,7 @@ var (
 	// Enable the new Ollama engine
 	NewEngine = Bool("OLLAMA_NEW_ENGINE")
 	// ContextLength sets the default context length
-	ContextLength = Int64("OLLAMA_CONTEXT_LENGTH", -1)
+	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
 )
 func String(s string) func() string {
@@ -227,20 +227,6 @@ func Uint64(key string, defaultValue uint64) func() uint64 {
 	}
 }
 func Int64(key string, defaultValue int64) func() int64 {
 	return func() int64 {
 		if s := Var(key); s != "" {
 			if n, err := strconv.ParseInt(s, 10, 64); err != nil {
 				slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
 			} else {
 				return n
 			}
 		}
 		return defaultValue
 	}
 }
 // Set aside VRAM per GPU
 var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
@@ -269,7 +255,7 @@ func AsMap() map[string]EnvVar {
 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
 		"OLLAMA_MULTIUSER_CACHE":   {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
-		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default 4096 or 2048 with low VRAM)"},
+		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"},
 		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
 		// Informational
--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -278,9 +278,9 @@ func TestVar(t *testing.T) {
 }
 func TestContextLength(t *testing.T) {
-	cases := map[string]int64{
+	cases := map[string]uint{
-		"":     -1,
+		"":     4096,
-		"4096": 4096,
+		"2048": 2048,
 	}
 	for k, v := range cases {
--- a/server/python_tools.go
+++ b/server/python_tools.go
@@ -1,226 +0,0 @@
 package server
 import (
 	"fmt"
 	"regexp"
 	"strconv"
 	"strings"
 	"github.com/ollama/ollama/api"
 )
 var (
 	pythonFuncRegex = regexp.MustCompile(`(\w+)\((.*?)\)`)
 	braces          = map[rune]rune{
 		'[':  ']',
 		'{':  '}',
 		'(':  ')',
 		'"':  '"',
 		'\'': '\'',
 	}
 )
 // parsePythonValue converts a Python value string to its appropriate Go type
 func parsePythonValue(value string) (any, error) {
 	value = strings.TrimSpace(value)
 	// string
 	if (strings.HasPrefix(value, "\"") && strings.HasSuffix(value, "\"")) ||
 		(strings.HasPrefix(value, "'") && strings.HasSuffix(value, "'")) {
 		// Remove quotes
 		result := value[1 : len(value)-1]
 		return result, nil
 	}
 	// bool
 	switch strings.ToLower(value) {
 	case "true":
 		return true, nil
 	case "false":
 		return false, nil
 	case "none":
 		return nil, nil
 	}
 	// int
 	if i, err := strconv.Atoi(value); err == nil {
 		return i, nil
 	}
 	// float
 	if f, err := strconv.ParseFloat(value, 64); err == nil {
 		return f, nil
 	}
 	// list
 	if strings.HasPrefix(value, "[") && strings.HasSuffix(value, "]") {
 		listStr := value[1 : len(value)-1]
 		var list []any
 		stack := []rune{}
 		start := 0
 		for i, char := range listStr {
 			if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
 				stack = stack[:len(stack)-1]
 			} else if _, ok := braces[char]; ok {
 				stack = append(stack, char)
 			}
 			if len(stack) == 0 && (char == ',' || i == len(listStr)-1) {
 				end := i
 				if i == len(listStr)-1 {
 					end = i + 1
 				}
 				item := strings.TrimSpace(listStr[start:end])
 				if val, err := parsePythonValue(item); err == nil {
 					list = append(list, val)
 				} else {
 					return nil, fmt.Errorf("invalid list item: %s", item)
 				}
 				start = i + 1
 			}
 		}
 		return list, nil
 	}
 	// dictionary
 	if strings.HasPrefix(value, "{") && strings.HasSuffix(value, "}") && strings.Contains(value, ":") {
 		dictStr := value[1 : len(value)-1]
 		dict := make(map[any]any)
 		stack := []rune{}
 		start := 0
 		for i, char := range dictStr {
 			if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
 				stack = stack[:len(stack)-1]
 			} else if _, ok := braces[char]; ok {
 				stack = append(stack, char)
 			}
 			if len(stack) == 0 && (char == ',' || i == len(dictStr)-1) {
 				end := i
 				if i == len(dictStr)-1 {
 					end = i + 1
 				}
 				item := strings.TrimSpace(dictStr[start:end])
 				kv := strings.SplitN(item, ":", 2)
 				if len(kv) != 2 {
 					return nil, fmt.Errorf("invalid dictionary key-value pair: %s", item)
 				}
 				key, err := parsePythonValue(strings.TrimSpace(kv[0]))
 				if err != nil {
 					return nil, fmt.Errorf("invalid dictionary key: %s", kv[0])
 				}
 				val, err := parsePythonValue(strings.TrimSpace(kv[1]))
 				if err != nil {
 					return nil, fmt.Errorf("invalid dictionary value: %s", kv[1])
 				}
 				dict[key] = val
 				start = i + 1
 			}
 		}
 		return dict, nil
 	}
 	// sets (stored as lists)
 	if strings.HasPrefix(value, "{") && strings.HasSuffix(value, "}") {
 		setStr := value[1 : len(value)-1]
 		var list []any
 		stack := []rune{}
 		start := 0
 		for i, char := range setStr {
 			if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
 				stack = stack[:len(stack)-1]
 			} else if _, ok := braces[char]; ok {
 				stack = append(stack, char)
 			}
 			if len(stack) == 0 && (char == ',' || i == len(setStr)-1) {
 				end := i
 				if i == len(setStr)-1 {
 					end = i + 1
 				}
 				item := strings.TrimSpace(setStr[start:end])
 				if val, err := parsePythonValue(item); err == nil {
 					list = append(list, val)
 				} else {
 					return nil, fmt.Errorf("invalid set item: %s", item)
 				}
 				start = i + 1
 			}
 		}
 		return list, nil
 	}
 	return nil, fmt.Errorf("invalid Python value: %s", value)
 }
 // parsePythonToolCall parses Python function calls from a string
 // it supports keyword arguments, as well as multiple functions in a single string
 func parsePythonToolCall(s string) ([]api.ToolCall, error) {
 	matches := pythonFuncRegex.FindAllStringSubmatchIndex(s, -1)
 	if len(matches) == 0 {
 		return nil, fmt.Errorf("no Python function calls found")
 	}
 	var toolCalls []api.ToolCall
 	for _, match := range matches {
 		name := s[match[2]:match[3]]
 		args := s[match[4]:match[5]]
 		var arguments api.ToolCallFunctionArguments
 		if len(args) == 0 {
 			toolCalls = append(toolCalls, api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name: name,
 				},
 			})
 			continue
 		}
 		start := 0
 		stack := []rune{}
 		for i, char := range args {
 			if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
 				stack = stack[:len(stack)-1]
 			} else if _, ok := braces[char]; ok {
 				stack = append(stack, char)
 			}
 			if len(stack) == 0 && (char == ',' || i == len(args)-1) {
 				end := i
 				if i == len(args)-1 {
 					end = i + 1
 				}
 				kv := strings.SplitN(args[start:end], "=", 2)
 				if len(kv) == 2 {
 					key := strings.TrimSpace(kv[0])
 					valueStr := strings.TrimSpace(kv[1])
 					// Parse the value into appropriate type
 					value, err := parsePythonValue(valueStr)
 					if err != nil {
 						return nil, fmt.Errorf("failed to parse value for key %q: %v", key, err)
 					}
 					arguments[key] = value
 				} else {
 					return nil, fmt.Errorf("invalid argument format: %q", args[start:end])
 				}
 				start = i + 1
 			}
 		}
 		if len(arguments) > 0 {
 			toolCalls = append(toolCalls, api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      name,
 					Arguments: arguments,
 				},
 			})
 		}
 	}
 	if len(toolCalls) > 0 {
 		return toolCalls, nil
 	}
 	return nil, fmt.Errorf("failed to parse any valid tool calls")
 }
--- a/server/python_tools_test.go
+++ b/server/python_tools_test.go
@@ -1,269 +0,0 @@
 package server
 import (
 	"testing"
 	"github.com/google/go-cmp/cmp"
 	"github.com/ollama/ollama/api"
 )
 func TestParsePythonFunctionCall(t *testing.T) {
 	t1 := api.ToolCall{
 		Function: api.ToolCallFunction{
 			Name: "get_current_weather",
 			Arguments: api.ToolCallFunctionArguments{
 				"location": "San Francisco, CA",
 				"format":   "fahrenheit",
 			},
 		},
 	}
 	t2 := api.ToolCall{
 		Function: api.ToolCallFunction{
 			Name: "get_forecast",
 			Arguments: api.ToolCallFunctionArguments{
 				"days":     5,
 				"location": "Seattle",
 			},
 		},
 	}
 	t3 := api.ToolCall{
 		Function: api.ToolCallFunction{
 			Name: "get_current_weather",
 			Arguments: api.ToolCallFunctionArguments{
 				"list":   []any{1, 2, 3},
 				"int":    -1,
 				"float":  1.23,
 				"string": "hello",
 			},
 		},
 	}
 	t4 := api.ToolCall{
 		Function: api.ToolCallFunction{
 			Name: "get_current_weather",
 		},
 	}
 	cases := []struct {
 		name  string
 		input string
 		want  []api.ToolCall
 		err   bool
 	}{
 		{
 			name:  "malformed function call - missing closing paren",
 			input: "get_current_weather(location=\"San Francisco\"",
 			err:   true,
 		},
 		{
 			name:  "empty function call",
 			input: "get_current_weather()",
 			want:  []api.ToolCall{t4},
 			err:   false,
 		},
 		{
 			name:  "single valid function call",
 			input: "get_current_weather(location=\"San Francisco, CA\", format=\"fahrenheit\")",
 			want:  []api.ToolCall{t1},
 		},
 		{
 			name:  "multiple valid function calls",
 			input: "get_current_weather(location=\"San Francisco, CA\", format=\"fahrenheit\") get_forecast(days=5, location=\"Seattle\")",
 			want:  []api.ToolCall{t1, t2},
 		},
 		{
 			name:  "multiple valid function calls with list",
 			input: "get_current_weather(list=[1,2,3], int=-1, float=1.23, string=\"hello\")",
 			want:  []api.ToolCall{t3},
 		},
 		{
 			name:  "positional arguments not supported",
 			input: "get_current_weather(1, 2, 3)",
 			err:   true,
 		},
 		{
 			name:  "invalid argument format without equals",
 			input: "get_current_weather(\"San Francisco\")",
 			err:   true,
 		},
 		{
 			name:  "nested lists",
 			input: "get_current_weather(data=[[1,2],[3,4]])",
 			want: []api.ToolCall{{
 				Function: api.ToolCallFunction{
 					Name: "get_current_weather",
 					Arguments: api.ToolCallFunctionArguments{
 						"data": []any{[]any{1, 2}, []any{3, 4}},
 					},
 				},
 			}},
 		},
 		{
 			name:  "boolean and none values",
 			input: "get_current_weather(active=true, enabled=false, value=None)",
 			want: []api.ToolCall{{
 				Function: api.ToolCallFunction{
 					Name: "get_current_weather",
 					Arguments: api.ToolCallFunctionArguments{
 						"active":  true,
 						"enabled": false,
 						"value":   nil,
 					},
 				},
 			}},
 		},
 		{
 			name:  "single vs double quotes",
 			input: "get_current_weather(str1='single', str2=\"double\")",
 			want: []api.ToolCall{{
 				Function: api.ToolCallFunction{
 					Name: "get_current_weather",
 					Arguments: api.ToolCallFunctionArguments{
 						"str1": "single",
 						"str2": "double",
 					},
 				},
 			}},
 		},
 		{
 			name:  "whitespace handling",
 			input: "get_current_weather( location = \"San Francisco\" , temp = 72 )",
 			want: []api.ToolCall{{
 				Function: api.ToolCallFunction{
 					Name: "get_current_weather",
 					Arguments: api.ToolCallFunctionArguments{
 						"location": "San Francisco",
 						"temp":     72,
 					},
 				},
 			}},
 		},
 	}
 	for _, tt := range cases {
 		t.Run(tt.name, func(t *testing.T) {
 			got, err := parsePythonToolCall(tt.input)
 			if (err != nil) != tt.err {
 				t.Fatalf("expected error: %v, got error: %v", tt.err, err)
 			}
 			if tt.err {
 				return
 			}
 			if diff := cmp.Diff(got, tt.want); diff != "" {
 				t.Errorf("mismatch (-got +want):\n%s", diff)
 			}
 		})
 	}
 }
 func TestParsePythonValue(t *testing.T) {
 	cases := []struct {
 		name  string
 		input string
 		want  any
 		err   bool
 	}{
 		{
 			name:  "string with double quotes",
 			input: "\"hello\"",
 			want:  "hello",
 		},
 		{
 			name:  "string with single quotes",
 			input: "'world'",
 			want:  "world",
 		},
 		{
 			name:  "integer",
 			input: "42",
 			want:  42,
 		},
 		{
 			name:  "float",
 			input: "3.14",
 			want:  3.14,
 		},
 		{
 			name:  "boolean true",
 			input: "True",
 			want:  true,
 		},
 		{
 			name:  "boolean false",
 			input: "False",
 			want:  false,
 		},
 		{
 			name:  "none/null",
 			input: "None",
 			want:  nil,
 		},
 		{
 			name:  "simple list",
 			input: "[1, 2, 3]",
 			want:  []any{1, 2, 3},
 		},
 		{
 			name:  "nested list",
 			input: "[1, [2, 3], 4]",
 			want:  []any{1, []any{2, 3}, 4},
 		},
 		{
 			name:  "mixed type list",
 			input: "[1, \"two\", 3.0, true]",
 			want:  []any{1, "two", 3.0, true},
 		},
 		{
 			name:  "invalid list",
 			input: "[1, 2,",
 			want:  nil,
 			err:   true,
 		},
 		{
 			name:  "dictionaries",
 			input: "{'a': 1, 'b': 2}",
 			want:  map[any]any{"a": 1, "b": 2},
 			err:   false,
 		},
 		{
 			name:  "int dictionary",
 			input: "{1: 2}",
 			want:  map[any]any{1: 2},
 			err:   false,
 		},
 		{
 			name:  "mixed type dictionary",
 			input: "{'a': 1, 'b': 2.0, 'c': True}",
 			want:  map[any]any{"a": 1, "b": 2.0, "c": true},
 			err:   false,
 		},
 		{
 			name:  "invalid dictionary - missing closing brace",
 			input: "{'a': 1, 'b': 2",
 			want:  nil,
 			err:   true,
 		},
 		{
 			name:  "sets",
 			input: "{1, 2, 3}",
 			want:  []any{1, 2, 3},
 			err:   false,
 		},
 	}
 	for _, tt := range cases {
 		t.Run(tt.name, func(t *testing.T) {
 			got, err := parsePythonValue(tt.input)
 			if (err != nil) != tt.err {
 				t.Fatalf("expected error: %v, got error: %v", tt.err, err)
 			}
 			if tt.err {
 				return
 			}
 			if diff := cmp.Diff(got, tt.want); diff != "" {
 				t.Errorf("mismatch (-got +want):\n%s", diff)
 			}
 		})
 	}
 }
--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -299,9 +299,6 @@ func TestGenerateChat(t *testing.T) {
 				{Role: "user", Content: "Hello!"},
 			},
 			Stream: &stream,
 			Options: map[string]any{
 				"num_ctx": 1024,
 			},
 		})
 		if w.Code != http.StatusOK {
@@ -324,9 +321,6 @@ func TestGenerateChat(t *testing.T) {
 				{Role: "user", Content: "Hello!"},
 			},
 			Stream: &stream,
 			Options: map[string]any{
 				"num_ctx": 1024,
 			},
 		})
 		if w.Code != http.StatusOK {
@@ -350,9 +344,6 @@ func TestGenerateChat(t *testing.T) {
 				{Role: "user", Content: "Help me write tests."},
 			},
 			Stream: &stream,
 			Options: map[string]any{
 				"num_ctx": 1024,
 			},
 		})
 		if w.Code != http.StatusOK {
--- a/server/sched.go
+++ b/server/sched.go
@@ -81,6 +81,10 @@ func InitScheduler(ctx context.Context) *Scheduler {
 // context must be canceled to decrement ref count and release the runner
 func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration *api.Duration) (chan *runnerRef, chan error) {
 	if opts.NumCtx < 4 {
 		opts.NumCtx = 4
 	}
 	req := &LlmRequest{
 		ctx:             c,
 		model:           model,
@@ -110,11 +114,6 @@ func (s *Scheduler) Run(ctx context.Context) {
 	}()
 }
 const (
 	defaultContextLength  = 4096
 	smallGpuContextLength = 2048
 )
 func (s *Scheduler) processPending(ctx context.Context) {
 	for {
 		select {
@@ -167,17 +166,6 @@ func (s *Scheduler) processPending(ctx context.Context) {
 						gpus = s.getGpuFn()
 					}
 					if pending.origNumCtx == -1 {
 						if len(gpus) == 1 && gpus[0].Library != "cpu" && gpus[0].TotalMemory <= 4096*1024*1024 {
 							slog.Info("GPU is small, limiting default context window", "num_ctx", smallGpuContextLength)
 							pending.opts.NumCtx = smallGpuContextLength
 							pending.origNumCtx = smallGpuContextLength
 						} else {
 							pending.opts.NumCtx = defaultContextLength
 							pending.origNumCtx = defaultContextLength
 						}
 					}
 					if envconfig.MaxRunners() <= 0 {
 						// No user specified MaxRunners, so figure out what automatic setting to use
 						// If all GPUs have reliable free memory reporting, defaultModelsPerGPU * the number of GPUs
--- a/server/sched_test.go
+++ b/server/sched_test.go
@@ -148,7 +148,6 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
 		successCh:       make(chan *runnerRef, 1),
 		errCh:           make(chan error, 1),
 	}
 	b.req.opts.NumCtx = 4096
 	b.srv = &mockLlm{estimatedVRAM: estimatedVRAM, estimatedVRAMByGPU: map[string]uint64{"": estimatedVRAM}}
 	return b
 }
Author	SHA1	Message	Date
Devon Rifkin	67335dede2	lower default NUM_PARALLEL to 2 this is in part to "pay" for #10452, which doubled the default context length. The combination isn't fully neutral though, because even though the old 4x2k limit and the new 2x4k limit are memory equivalent, the 1x fallback is larger with 4k	2025-04-29 02:03:51 -07:00
Devon Rifkin	6ec71d8fb6	Merge pull request #10452 from ollama/drifkin/4096-context-length config: update default context length to 4096	2025-04-28 17:13:51 -07:00
Devon Rifkin	44b466eeb2	config: update default context length to 4096	2025-04-28 17:03:27 -07:00
Devon Rifkin	a25f3f8260	Merge pull request #10451 from ollama/revert-10364-drifkin/context-length Revert "increase default context length to 4096"	2025-04-28 17:02:10 -07:00
Devon Rifkin	dd93e1af85	Revert "increase default context length to 4096 (#10364 )" This reverts commit `424f648632`.	2025-04-28 16:54:11 -07:00