From 61a5254115d7f9684bfe57ccda82a616c02ad07a Mon Sep 17 00:00:00 2001 From: ParthSareen Date: Wed, 5 Feb 2025 11:26:55 -0800 Subject: [PATCH] context_window and addressing comments --- docs/openai.md | 24 ++++++++++++------------ openai/openai.go | 10 ++++++---- openai/openai_test.go | 4 ++-- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/docs/openai.md b/docs/openai.md index c8ef02cc2..22bb82f78 100644 --- a/docs/openai.md +++ b/docs/openai.md @@ -204,31 +204,31 @@ curl http://localhost:11434/v1/embeddings \ }' ``` -## Extra Arguments +## Extra arguments -### Setting Context Window Size -- `num_ctx` parameter can be used to set the context window for the model +### Setting context window size +- `context_window` parameter can be used to set the context window for the model -#### OpenAI Python SDK -- OpenAI Python SDK does not support setting context window size, however this can be set for Ollama through the `extra_body` parameter +#### OpenAI python library +- OpenAI python library does not support setting context window size, however this can be set for Ollama through the `extra_body` parameter ```py -completion = client.beta.chat.completions.create( +completion = client.chat.completions.create( model="llama3.1:8b", messages=[{"role": "user", "content": "Say this is a test"}], - extra_body={"num_ctx": 4096}, + extra_body={"context_window": 4096}, ) ``` -#### OpenAI JS SDK -- OpenAI JS SDK does not support setting context window size, however this can be set for Ollama by passing `num_ctx` directly with a `@ts-expect-error` as an undocumented parameter in the [OpenAI JS SDK](https://github.com/openai/openai-node?tab=readme-ov-file#making-customundocumented-requests) +#### OpenAI JavaScript library +- OpenAI JavaScript library does not support setting context window size, however this can be set for Ollama by passing `num_ctx` directly with a `@ts-expect-error` as an undocumented parameter in the OpenAI JavaScript library. [See documentation here](https://github.com/openai/openai-node?tab=readme-ov-file#making-customundocumented-requests) ```ts const chatCompletion = await openai.chat.completions.create({ messages: [{ role: 'user', content: 'Say this is a test' }], model: 'llama3.2', - // @ts-expect-error num_ctx is not officially supported - num_ctx: 4096, + // @ts-expect-error context_window is an additional parameter + context_window: 4096, }) ``` @@ -239,7 +239,7 @@ curl http://localhost:11434/v1/chat/completions \ -d '{ "model": "llama3.2", "messages": [{"role": "user", "content": "Say this is a test"}], - "num_ctx": 4096 + "context_window": 4096 }' ``` diff --git a/openai/openai.go b/openai/openai.go index fc0b1e150..ebb74c9f6 100644 --- a/openai/openai.go +++ b/openai/openai.go @@ -86,7 +86,7 @@ type ChatCompletionRequest struct { StreamOptions *StreamOptions `json:"stream_options"` MaxCompletionTokens *int `json:"max_completion_tokens"` // Deprecated: Use [ChatCompletionRequest.MaxCompletionTokens] - MaxTokens *int `json:"max_tokens" deprecated:"use max_completion_tokens instead"` + MaxTokens *int `json:"max_tokens"` Seed *int `json:"seed"` Stop any `json:"stop"` Temperature *float64 `json:"temperature"` @@ -95,7 +95,7 @@ type ChatCompletionRequest struct { TopP *float64 `json:"top_p"` ResponseFormat *ResponseFormat `json:"response_format"` Tools []api.Tool `json:"tools"` - NumCtx *int `json:"num_ctx"` + ContextWindow *int `json:"context_window"` } type ChatCompletion struct { @@ -478,8 +478,9 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) { options["stop"] = stops } - if r.NumCtx != nil { - options["num_ctx"] = *r.NumCtx + if r.ContextWindow != nil { + slog.Info("context_window in if", "context_window", *r.ContextWindow) + options["num_ctx"] = *r.ContextWindow } // Deprecated: MaxTokens is deprecated, use MaxCompletionTokens instead @@ -974,6 +975,7 @@ func ChatMiddleware() gin.HandlerFunc { c.AbortWithStatusJSON(http.StatusBadRequest, NewError(http.StatusBadRequest, err.Error())) return } + slog.Info("num_ctx", "num_ctx", chatReq.Options["num_ctx"]) if err := json.NewEncoder(&b).Encode(chatReq); err != nil { c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error())) diff --git a/openai/openai_test.go b/openai/openai_test.go index 00be4e426..8162db3d1 100644 --- a/openai/openai_test.go +++ b/openai/openai_test.go @@ -315,11 +315,11 @@ func TestChatMiddleware(t *testing.T) { }, }, { - name: "chat handler with num_ctx", + name: "chat handler with context_window", body: `{ "model": "test-model", "messages": [{"role": "user", "content": "Hello"}], - "num_ctx": 4096 + "context_window": 4096 }`, req: api.ChatRequest{ Model: "test-model",