lower default NUM_PARALLEL to 2

this is in part to "pay" for #10452, which doubled the default context length. The combination isn't fully neutral though, because even though the old 4x2k limit and the new 2x4k limit are memory equivalent, the 1x fallback is larger with 4k
Merge pull request #10452 from ollama/drifkin/4096-context-length
2025-04-29 02:03:51 -07:00 · 2025-04-28 17:13:51 -07:00 · 2025-04-28 17:03:27 -07:00 · 2025-04-28 17:02:10 -07:00 · 2025-04-28 16:54:11 -07:00
9 changed files with 12 additions and 544 deletions
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -1407,7 +1407,6 @@ func NewCLI() *cobra.Command {
 				envVars["OLLAMA_LLM_LIBRARY"],
 				envVars["OLLAMA_GPU_OVERHEAD"],
 				envVars["OLLAMA_LOAD_TIMEOUT"],
-				envVars["OLLAMA_CONTEXT_LENGTH"],
 			})
 		default:
 			appendEnvDocs(cmd, envs)
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -20,7 +20,7 @@ Please refer to the [GPU docs](./gpu.md).

 ## How can I specify the context window size?

-By default, Ollama uses a context window size of 4096 tokens, unless you have a single GPU with <= 4 GB of VRAM, in which case it will default to 2048 tokens. 
+By default, Ollama uses a context window size of 4096 tokens. 

 This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use: 

@@ -31,7 +31,7 @@ OLLAMA_CONTEXT_LENGTH=8192 ollama serve
 To change this when using `ollama run`, use `/set parameter`:

 ```shell
-/set parameter num_ctx 8192
+/set parameter num_ctx 4096
 ```

 When using the API, specify the `num_ctx` parameter:
@@ -41,7 +41,7 @@ curl http://localhost:11434/api/generate -d '{
  "model": "llama3.2",
  "prompt": "Why is the sky blue?",
  "options": {
-    "num_ctx": 8192
+    "num_ctx": 4096
  }
 }'
 ```
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -169,7 +169,7 @@ var (
 	// Enable the new Ollama engine
 	NewEngine = Bool("OLLAMA_NEW_ENGINE")
 	// ContextLength sets the default context length
-	ContextLength = Int64("OLLAMA_CONTEXT_LENGTH", -1)
+	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
 )

 func String(s string) func() string {
@@ -227,20 +227,6 @@ func Uint64(key string, defaultValue uint64) func() uint64 {
 	}
 }

-func Int64(key string, defaultValue int64) func() int64 {
-	return func() int64 {
-		if s := Var(key); s != "" {
-			if n, err := strconv.ParseInt(s, 10, 64); err != nil {
-				slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
-			} else {
-				return n
-			}
-		}
-
-		return defaultValue
-	}
-}
-
 // Set aside VRAM per GPU
 var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)

@@ -269,7 +255,7 @@ func AsMap() map[string]EnvVar {
 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
 		"OLLAMA_MULTIUSER_CACHE":   {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
-		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default 4096 or 2048 with low VRAM)"},
+		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"},
 		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},

 		// Informational
--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -278,9 +278,9 @@ func TestVar(t *testing.T) {
 }

 func TestContextLength(t *testing.T) {
-	cases := map[string]int64{
-		"":     -1,
-		"4096": 4096,
+	cases := map[string]uint{
+		"":     4096,
+		"2048": 2048,
 	}

 	for k, v := range cases {
--- a/server/python_tools.go
+++ b/server/python_tools.go
@@ -1,226 +0,0 @@
-package server
-
-import (
-	"fmt"
-	"regexp"
-	"strconv"
-	"strings"
-
-	"github.com/ollama/ollama/api"
-)
-
-var (
-	pythonFuncRegex = regexp.MustCompile(`(\w+)\((.*?)\)`)
-	braces          = map[rune]rune{
-		'[':  ']',
-		'{':  '}',
-		'(':  ')',
-		'"':  '"',
-		'\'': '\'',
-	}
-)
-
-// parsePythonValue converts a Python value string to its appropriate Go type
-func parsePythonValue(value string) (any, error) {
-	value = strings.TrimSpace(value)
-
-	// string
-	if (strings.HasPrefix(value, "\"") && strings.HasSuffix(value, "\"")) ||
-		(strings.HasPrefix(value, "'") && strings.HasSuffix(value, "'")) {
-		// Remove quotes
-		result := value[1 : len(value)-1]
-		return result, nil
-	}
-
-	// bool
-	switch strings.ToLower(value) {
-	case "true":
-		return true, nil
-	case "false":
-		return false, nil
-	case "none":
-		return nil, nil
-	}
-
-	// int
-	if i, err := strconv.Atoi(value); err == nil {
-		return i, nil
-	}
-
-	// float
-	if f, err := strconv.ParseFloat(value, 64); err == nil {
-		return f, nil
-	}
-
-	// list
-	if strings.HasPrefix(value, "[") && strings.HasSuffix(value, "]") {
-		listStr := value[1 : len(value)-1]
-		var list []any
-		stack := []rune{}
-		start := 0
-
-		for i, char := range listStr {
-			if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
-				stack = stack[:len(stack)-1]
-			} else if _, ok := braces[char]; ok {
-				stack = append(stack, char)
-			}
-
-			if len(stack) == 0 && (char == ',' || i == len(listStr)-1) {
-				end := i
-				if i == len(listStr)-1 {
-					end = i + 1
-				}
-				item := strings.TrimSpace(listStr[start:end])
-				if val, err := parsePythonValue(item); err == nil {
-					list = append(list, val)
-				} else {
-					return nil, fmt.Errorf("invalid list item: %s", item)
-				}
-				start = i + 1
-			}
-		}
-		return list, nil
-	}
-
-	// dictionary
-	if strings.HasPrefix(value, "{") && strings.HasSuffix(value, "}") && strings.Contains(value, ":") {
-		dictStr := value[1 : len(value)-1]
-		dict := make(map[any]any)
-		stack := []rune{}
-		start := 0
-		for i, char := range dictStr {
-			if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
-				stack = stack[:len(stack)-1]
-			} else if _, ok := braces[char]; ok {
-				stack = append(stack, char)
-			}
-			if len(stack) == 0 && (char == ',' || i == len(dictStr)-1) {
-				end := i
-				if i == len(dictStr)-1 {
-					end = i + 1
-				}
-				item := strings.TrimSpace(dictStr[start:end])
-				kv := strings.SplitN(item, ":", 2)
-				if len(kv) != 2 {
-					return nil, fmt.Errorf("invalid dictionary key-value pair: %s", item)
-				}
-
-				key, err := parsePythonValue(strings.TrimSpace(kv[0]))
-				if err != nil {
-					return nil, fmt.Errorf("invalid dictionary key: %s", kv[0])
-				}
-
-				val, err := parsePythonValue(strings.TrimSpace(kv[1]))
-				if err != nil {
-					return nil, fmt.Errorf("invalid dictionary value: %s", kv[1])
-				}
-
-				dict[key] = val
-				start = i + 1
-			}
-		}
-		return dict, nil
-	}
-
-	// sets (stored as lists)
-	if strings.HasPrefix(value, "{") && strings.HasSuffix(value, "}") {
-		setStr := value[1 : len(value)-1]
-		var list []any
-		stack := []rune{}
-		start := 0
-		for i, char := range setStr {
-			if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
-				stack = stack[:len(stack)-1]
-			} else if _, ok := braces[char]; ok {
-				stack = append(stack, char)
-			}
-			if len(stack) == 0 && (char == ',' || i == len(setStr)-1) {
-				end := i
-				if i == len(setStr)-1 {
-					end = i + 1
-				}
-				item := strings.TrimSpace(setStr[start:end])
-				if val, err := parsePythonValue(item); err == nil {
-					list = append(list, val)
-				} else {
-					return nil, fmt.Errorf("invalid set item: %s", item)
-				}
-				start = i + 1
-			}
-		}
-		return list, nil
-	}
-
-	return nil, fmt.Errorf("invalid Python value: %s", value)
-}
-
-// parsePythonToolCall parses Python function calls from a string
-// it supports keyword arguments, as well as multiple functions in a single string
-func parsePythonToolCall(s string) ([]api.ToolCall, error) {
-	matches := pythonFuncRegex.FindAllStringSubmatchIndex(s, -1)
-	if len(matches) == 0 {
-		return nil, fmt.Errorf("no Python function calls found")
-	}
-
-	var toolCalls []api.ToolCall
-	for _, match := range matches {
-		name := s[match[2]:match[3]]
-		args := s[match[4]:match[5]]
-		var arguments api.ToolCallFunctionArguments
-		if len(args) == 0 {
-			toolCalls = append(toolCalls, api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name: name,
-				},
-			})
-			continue
-		}
-
-		start := 0
-		stack := []rune{}
-		for i, char := range args {
-			if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
-				stack = stack[:len(stack)-1]
-			} else if _, ok := braces[char]; ok {
-				stack = append(stack, char)
-			}
-			if len(stack) == 0 && (char == ',' || i == len(args)-1) {
-				end := i
-				if i == len(args)-1 {
-					end = i + 1
-				}
-				kv := strings.SplitN(args[start:end], "=", 2)
-				if len(kv) == 2 {
-					key := strings.TrimSpace(kv[0])
-					valueStr := strings.TrimSpace(kv[1])
-
-					// Parse the value into appropriate type
-					value, err := parsePythonValue(valueStr)
-					if err != nil {
-						return nil, fmt.Errorf("failed to parse value for key %q: %v", key, err)
-					}
-
-					arguments[key] = value
-				} else {
-					return nil, fmt.Errorf("invalid argument format: %q", args[start:end])
-				}
-				start = i + 1
-			}
-		}
-
-		if len(arguments) > 0 {
-			toolCalls = append(toolCalls, api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      name,
-					Arguments: arguments,
-				},
-			})
-		}
-	}
-
-	if len(toolCalls) > 0 {
-		return toolCalls, nil
-	}
-	return nil, fmt.Errorf("failed to parse any valid tool calls")
-}
--- a/server/python_tools_test.go
+++ b/server/python_tools_test.go
@@ -1,269 +0,0 @@
-package server
-
-import (
-	"testing"
-
-	"github.com/google/go-cmp/cmp"
-	"github.com/ollama/ollama/api"
-)
-
-func TestParsePythonFunctionCall(t *testing.T) {
-	t1 := api.ToolCall{
-		Function: api.ToolCallFunction{
-			Name: "get_current_weather",
-			Arguments: api.ToolCallFunctionArguments{
-				"location": "San Francisco, CA",
-				"format":   "fahrenheit",
-			},
-		},
-	}
-
-	t2 := api.ToolCall{
-		Function: api.ToolCallFunction{
-			Name: "get_forecast",
-			Arguments: api.ToolCallFunctionArguments{
-				"days":     5,
-				"location": "Seattle",
-			},
-		},
-	}
-
-	t3 := api.ToolCall{
-		Function: api.ToolCallFunction{
-			Name: "get_current_weather",
-			Arguments: api.ToolCallFunctionArguments{
-				"list":   []any{1, 2, 3},
-				"int":    -1,
-				"float":  1.23,
-				"string": "hello",
-			},
-		},
-	}
-	t4 := api.ToolCall{
-		Function: api.ToolCallFunction{
-			Name: "get_current_weather",
-		},
-	}
-
-	cases := []struct {
-		name  string
-		input string
-		want  []api.ToolCall
-		err   bool
-	}{
-		{
-			name:  "malformed function call - missing closing paren",
-			input: "get_current_weather(location=\"San Francisco\"",
-			err:   true,
-		},
-		{
-			name:  "empty function call",
-			input: "get_current_weather()",
-			want:  []api.ToolCall{t4},
-			err:   false,
-		},
-		{
-			name:  "single valid function call",
-			input: "get_current_weather(location=\"San Francisco, CA\", format=\"fahrenheit\")",
-			want:  []api.ToolCall{t1},
-		},
-		{
-			name:  "multiple valid function calls",
-			input: "get_current_weather(location=\"San Francisco, CA\", format=\"fahrenheit\") get_forecast(days=5, location=\"Seattle\")",
-			want:  []api.ToolCall{t1, t2},
-		},
-		{
-			name:  "multiple valid function calls with list",
-			input: "get_current_weather(list=[1,2,3], int=-1, float=1.23, string=\"hello\")",
-			want:  []api.ToolCall{t3},
-		},
-		{
-			name:  "positional arguments not supported",
-			input: "get_current_weather(1, 2, 3)",
-			err:   true,
-		},
-		{
-			name:  "invalid argument format without equals",
-			input: "get_current_weather(\"San Francisco\")",
-			err:   true,
-		},
-		{
-			name:  "nested lists",
-			input: "get_current_weather(data=[[1,2],[3,4]])",
-			want: []api.ToolCall{{
-				Function: api.ToolCallFunction{
-					Name: "get_current_weather",
-					Arguments: api.ToolCallFunctionArguments{
-						"data": []any{[]any{1, 2}, []any{3, 4}},
-					},
-				},
-			}},
-		},
-		{
-			name:  "boolean and none values",
-			input: "get_current_weather(active=true, enabled=false, value=None)",
-			want: []api.ToolCall{{
-				Function: api.ToolCallFunction{
-					Name: "get_current_weather",
-					Arguments: api.ToolCallFunctionArguments{
-						"active":  true,
-						"enabled": false,
-						"value":   nil,
-					},
-				},
-			}},
-		},
-		{
-			name:  "single vs double quotes",
-			input: "get_current_weather(str1='single', str2=\"double\")",
-			want: []api.ToolCall{{
-				Function: api.ToolCallFunction{
-					Name: "get_current_weather",
-					Arguments: api.ToolCallFunctionArguments{
-						"str1": "single",
-						"str2": "double",
-					},
-				},
-			}},
-		},
-		{
-			name:  "whitespace handling",
-			input: "get_current_weather( location = \"San Francisco\" , temp = 72 )",
-			want: []api.ToolCall{{
-				Function: api.ToolCallFunction{
-					Name: "get_current_weather",
-					Arguments: api.ToolCallFunctionArguments{
-						"location": "San Francisco",
-						"temp":     72,
-					},
-				},
-			}},
-		},
-	}
-
-	for _, tt := range cases {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := parsePythonToolCall(tt.input)
-			if (err != nil) != tt.err {
-				t.Fatalf("expected error: %v, got error: %v", tt.err, err)
-			}
-			if tt.err {
-				return
-			}
-			if diff := cmp.Diff(got, tt.want); diff != "" {
-				t.Errorf("mismatch (-got +want):\n%s", diff)
-			}
-		})
-	}
-}
-
-func TestParsePythonValue(t *testing.T) {
-	cases := []struct {
-		name  string
-		input string
-		want  any
-		err   bool
-	}{
-		{
-			name:  "string with double quotes",
-			input: "\"hello\"",
-			want:  "hello",
-		},
-		{
-			name:  "string with single quotes",
-			input: "'world'",
-			want:  "world",
-		},
-		{
-			name:  "integer",
-			input: "42",
-			want:  42,
-		},
-		{
-			name:  "float",
-			input: "3.14",
-			want:  3.14,
-		},
-		{
-			name:  "boolean true",
-			input: "True",
-			want:  true,
-		},
-		{
-			name:  "boolean false",
-			input: "False",
-			want:  false,
-		},
-		{
-			name:  "none/null",
-			input: "None",
-			want:  nil,
-		},
-		{
-			name:  "simple list",
-			input: "[1, 2, 3]",
-			want:  []any{1, 2, 3},
-		},
-		{
-			name:  "nested list",
-			input: "[1, [2, 3], 4]",
-			want:  []any{1, []any{2, 3}, 4},
-		},
-		{
-			name:  "mixed type list",
-			input: "[1, \"two\", 3.0, true]",
-			want:  []any{1, "two", 3.0, true},
-		},
-		{
-			name:  "invalid list",
-			input: "[1, 2,",
-			want:  nil,
-			err:   true,
-		},
-		{
-			name:  "dictionaries",
-			input: "{'a': 1, 'b': 2}",
-			want:  map[any]any{"a": 1, "b": 2},
-			err:   false,
-		},
-		{
-			name:  "int dictionary",
-			input: "{1: 2}",
-			want:  map[any]any{1: 2},
-			err:   false,
-		},
-		{
-			name:  "mixed type dictionary",
-			input: "{'a': 1, 'b': 2.0, 'c': True}",
-			want:  map[any]any{"a": 1, "b": 2.0, "c": true},
-			err:   false,
-		},
-		{
-			name:  "invalid dictionary - missing closing brace",
-			input: "{'a': 1, 'b': 2",
-			want:  nil,
-			err:   true,
-		},
-		{
-			name:  "sets",
-			input: "{1, 2, 3}",
-			want:  []any{1, 2, 3},
-			err:   false,
-		},
-	}
-
-	for _, tt := range cases {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := parsePythonValue(tt.input)
-			if (err != nil) != tt.err {
-				t.Fatalf("expected error: %v, got error: %v", tt.err, err)
-			}
-			if tt.err {
-				return
-			}
-			if diff := cmp.Diff(got, tt.want); diff != "" {
-				t.Errorf("mismatch (-got +want):\n%s", diff)
-			}
-		})
-	}
-}
--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -299,9 +299,6 @@ func TestGenerateChat(t *testing.T) {
 				{Role: "user", Content: "Hello!"},
 			},
 			Stream: &stream,
-			Options: map[string]any{
-				"num_ctx": 1024,
-			},
 		})

 		if w.Code != http.StatusOK {
@@ -324,9 +321,6 @@ func TestGenerateChat(t *testing.T) {
 				{Role: "user", Content: "Hello!"},
 			},
 			Stream: &stream,
-			Options: map[string]any{
-				"num_ctx": 1024,
-			},
 		})

 		if w.Code != http.StatusOK {
@@ -350,9 +344,6 @@ func TestGenerateChat(t *testing.T) {
 				{Role: "user", Content: "Help me write tests."},
 			},
 			Stream: &stream,
-			Options: map[string]any{
-				"num_ctx": 1024,
-			},
 		})

 		if w.Code != http.StatusOK {
--- a/server/sched.go
+++ b/server/sched.go
@@ -81,6 +81,10 @@ func InitScheduler(ctx context.Context) *Scheduler {

 // context must be canceled to decrement ref count and release the runner
 func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration *api.Duration) (chan *runnerRef, chan error) {
+	if opts.NumCtx < 4 {
+		opts.NumCtx = 4
+	}
+
 	req := &LlmRequest{
 		ctx:             c,
 		model:           model,
@@ -110,11 +114,6 @@ func (s *Scheduler) Run(ctx context.Context) {
 	}()
 }

-const (
-	defaultContextLength  = 4096
-	smallGpuContextLength = 2048
-)
-
 func (s *Scheduler) processPending(ctx context.Context) {
 	for {
 		select {
@@ -167,17 +166,6 @@ func (s *Scheduler) processPending(ctx context.Context) {
 						gpus = s.getGpuFn()
 					}

-					if pending.origNumCtx == -1 {
-						if len(gpus) == 1 && gpus[0].Library != "cpu" && gpus[0].TotalMemory <= 4096*1024*1024 {
-							slog.Info("GPU is small, limiting default context window", "num_ctx", smallGpuContextLength)
-							pending.opts.NumCtx = smallGpuContextLength
-							pending.origNumCtx = smallGpuContextLength
-						} else {
-							pending.opts.NumCtx = defaultContextLength
-							pending.origNumCtx = defaultContextLength
-						}
-					}
-
 					if envconfig.MaxRunners() <= 0 {
 						// No user specified MaxRunners, so figure out what automatic setting to use
 						// If all GPUs have reliable free memory reporting, defaultModelsPerGPU * the number of GPUs
--- a/server/sched_test.go
+++ b/server/sched_test.go
@@ -148,7 +148,6 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
 		successCh:       make(chan *runnerRef, 1),
 		errCh:           make(chan error, 1),
 	}
-	b.req.opts.NumCtx = 4096
 	b.srv = &mockLlm{estimatedVRAM: estimatedVRAM, estimatedVRAMByGPU: map[string]uint64{"": estimatedVRAM}}
 	return b
 }
Author	SHA1	Message	Date
Devon Rifkin	67335dede2	lower default NUM_PARALLEL to 2 this is in part to "pay" for #10452, which doubled the default context length. The combination isn't fully neutral though, because even though the old 4x2k limit and the new 2x4k limit are memory equivalent, the 1x fallback is larger with 4k	2025-04-29 02:03:51 -07:00
Devon Rifkin	6ec71d8fb6	Merge pull request #10452 from ollama/drifkin/4096-context-length config: update default context length to 4096	2025-04-28 17:13:51 -07:00
Devon Rifkin	44b466eeb2	config: update default context length to 4096	2025-04-28 17:03:27 -07:00
Devon Rifkin	a25f3f8260	Merge pull request #10451 from ollama/revert-10364-drifkin/context-length Revert "increase default context length to 4096"	2025-04-28 17:02:10 -07:00
Devon Rifkin	dd93e1af85	Revert "increase default context length to 4096 (#10364 )" This reverts commit `424f648632`.	2025-04-28 16:54:11 -07:00