remove context shifting with max tokens and update docs

2025-01-30 13:48:24 -08:00 · 2025-01-30 13:48:24 -08:00 · 16abd181a9
commit 16abd181a9
parent 5c2f35d846
3 changed files with 36 additions and 32 deletions
--- a/docs/openai.md
+++ b/docs/openai.md
@ -94,6 +94,20 @@ except Exception as e:
    print(f"Error: {e}")
 ```
 #### Experimental 
 - `num_ctx` parameter can be used to set the context window for the model
 - OpenAI Python SDK does not support setting context window size, however this can be set for Ollama through the `extra_body` parameter
 - The recommended way to control this is through the [Ollama Python SDK](https://github.com/ollama/ollama-python) with the `options` parameter
 ```py
 completion = client.beta.chat.completions.create(
    model="llama3.1:8b",
    messages=[{"role": "user", "content": "Say this is a test"}],
    extra_body={"num_ctx": 4096},
 )
 ```
 ### OpenAI JavaScript library
 ```javascript
@ -142,6 +156,21 @@ const embedding = await openai.embeddings.create({
 })
 ```
 #### Experimental
 - `num_ctx` parameter can be used to set the context window for the model
 - OpenAI JS SDK does not support setting context window size, however this can be set for Ollama by passing `num_ctx` directly with a `@ts-expect-error` as an undocumented parameter in the [OpenAI JS SDK](https://github.com/openai/openai-node?tab=readme-ov-file#making-customundocumented-requests)
 - The recommended way to control this is through the [Ollama JS SDK](https://github.com/ollama/ollama-js) with the `options` parameter
 ```js
 const chatCompletion = await openai.chat.completions.create({
    messages: [{ role: 'user', content: 'Say this is a test' }],
    model: 'llama3.2',
    // @ts-expect-error num_ctx is not officially supported
    num_ctx: 4096,
 })
 ```
 ### `curl`
 ``` shell
@ -213,6 +242,7 @@ curl http://localhost:11434/v1/embeddings \
 - [x] Chat completions
 - [x] Streaming
 - [x] JSON mode
 - [x] Structured outputs
 - [x] Reproducible outputs
 - [x] Vision
 - [x] Tools
--- a/openai/openai.go
+++ b/openai/openai.go
@ -477,24 +477,17 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
 		options["stop"] = stops
 	}
 	if r.NumCtx != nil {
 		options["num_ctx"] = *r.NumCtx
 	}
 	// Deprecated: MaxTokens is deprecated, use MaxCompletionTokens instead
 	if r.MaxTokens != nil {
 		r.MaxCompletionTokens = r.MaxTokens
 	}
 	if r.NumCtx != nil {
 		options["num_ctx"] = *r.NumCtx
 	}
 	DEFAULT_NUM_CTX := 2048
 	// set num_ctx to max_completion_tokens if it's greater than num_ctx
 	if r.MaxCompletionTokens != nil {
 		options["num_predict"] = *r.MaxCompletionTokens
 		if r.NumCtx != nil && *r.MaxCompletionTokens > *r.NumCtx {
 			options["num_ctx"] = *r.MaxCompletionTokens
 		} else if *r.MaxCompletionTokens > DEFAULT_NUM_CTX {
 			options["num_ctx"] = *r.MaxCompletionTokens
 		}
 	}
 	if r.Temperature != nil {
--- a/openai/openai_test.go
+++ b/openai/openai_test.go
@ -81,7 +81,7 @@ func TestChatMiddleware(t *testing.T) {
 					{"role": "user", "content": "Hello"}
 				],
 				"stream":            true,
-				"max_completion_tokens":        999,
+				"max_tokens":        999,
 				"seed":              123,
 				"stop":              ["\n", "stop"],
 				"temperature":       3.0,
@ -333,7 +333,7 @@ func TestChatMiddleware(t *testing.T) {
 			},
 		},
 		{
-			name: "chat handler with max_completion_tokens < num_ctx",
+			name: "chat handler with max_completion_tokens",
 			body: `{
 				"model": "test-model",
 				"messages": [{"role": "user", "content": "Hello"}],
@ -350,25 +350,6 @@ func TestChatMiddleware(t *testing.T) {
 				Stream: &False,
 			},
 		},
 		{
 			name: "chat handler with max_completion_tokens > num_ctx",
 			body: `{
 				"model": "test-model",
 				"messages": [{"role": "user", "content": "Hello"}],
 				"max_completion_tokens": 4096
 			}`,
 			req: api.ChatRequest{
 				Model:    "test-model",
 				Messages: []api.Message{{Role: "user", Content: "Hello"}},
 				Options: map[string]any{
 					"num_predict": 4096.0, // float because JSON doesn't distinguish between float and int
 					"num_ctx":     4096.0,
 					"temperature": 1.0,
 					"top_p":       1.0,
 				},
 				Stream: &False,
 			},
 		},
 		{
 			name: "chat handler error forwarding",
 			body: `{