context_window and addressing comments

This commit is contained in:
ParthSareen 2025-02-05 11:26:55 -08:00
parent 53d2cf37d2
commit 61a5254115
3 changed files with 20 additions and 18 deletions

View File

@ -204,31 +204,31 @@ curl http://localhost:11434/v1/embeddings \
}' }'
``` ```
## Extra Arguments ## Extra arguments
### Setting Context Window Size ### Setting context window size
- `num_ctx` parameter can be used to set the context window for the model - `context_window` parameter can be used to set the context window for the model
#### OpenAI Python SDK #### OpenAI python library
- OpenAI Python SDK does not support setting context window size, however this can be set for Ollama through the `extra_body` parameter - OpenAI python library does not support setting context window size, however this can be set for Ollama through the `extra_body` parameter
```py ```py
completion = client.beta.chat.completions.create( completion = client.chat.completions.create(
model="llama3.1:8b", model="llama3.1:8b",
messages=[{"role": "user", "content": "Say this is a test"}], messages=[{"role": "user", "content": "Say this is a test"}],
extra_body={"num_ctx": 4096}, extra_body={"context_window": 4096},
) )
``` ```
#### OpenAI JS SDK #### OpenAI JavaScript library
- OpenAI JS SDK does not support setting context window size, however this can be set for Ollama by passing `num_ctx` directly with a `@ts-expect-error` as an undocumented parameter in the [OpenAI JS SDK](https://github.com/openai/openai-node?tab=readme-ov-file#making-customundocumented-requests) - OpenAI JavaScript library does not support setting context window size, however this can be set for Ollama by passing `num_ctx` directly with a `@ts-expect-error` as an undocumented parameter in the OpenAI JavaScript library. [See documentation here](https://github.com/openai/openai-node?tab=readme-ov-file#making-customundocumented-requests)
```ts ```ts
const chatCompletion = await openai.chat.completions.create({ const chatCompletion = await openai.chat.completions.create({
messages: [{ role: 'user', content: 'Say this is a test' }], messages: [{ role: 'user', content: 'Say this is a test' }],
model: 'llama3.2', model: 'llama3.2',
// @ts-expect-error num_ctx is not officially supported // @ts-expect-error context_window is an additional parameter
num_ctx: 4096, context_window: 4096,
}) })
``` ```
@ -239,7 +239,7 @@ curl http://localhost:11434/v1/chat/completions \
-d '{ -d '{
"model": "llama3.2", "model": "llama3.2",
"messages": [{"role": "user", "content": "Say this is a test"}], "messages": [{"role": "user", "content": "Say this is a test"}],
"num_ctx": 4096 "context_window": 4096
}' }'
``` ```

View File

@ -86,7 +86,7 @@ type ChatCompletionRequest struct {
StreamOptions *StreamOptions `json:"stream_options"` StreamOptions *StreamOptions `json:"stream_options"`
MaxCompletionTokens *int `json:"max_completion_tokens"` MaxCompletionTokens *int `json:"max_completion_tokens"`
// Deprecated: Use [ChatCompletionRequest.MaxCompletionTokens] // Deprecated: Use [ChatCompletionRequest.MaxCompletionTokens]
MaxTokens *int `json:"max_tokens" deprecated:"use max_completion_tokens instead"` MaxTokens *int `json:"max_tokens"`
Seed *int `json:"seed"` Seed *int `json:"seed"`
Stop any `json:"stop"` Stop any `json:"stop"`
Temperature *float64 `json:"temperature"` Temperature *float64 `json:"temperature"`
@ -95,7 +95,7 @@ type ChatCompletionRequest struct {
TopP *float64 `json:"top_p"` TopP *float64 `json:"top_p"`
ResponseFormat *ResponseFormat `json:"response_format"` ResponseFormat *ResponseFormat `json:"response_format"`
Tools []api.Tool `json:"tools"` Tools []api.Tool `json:"tools"`
NumCtx *int `json:"num_ctx"` ContextWindow *int `json:"context_window"`
} }
type ChatCompletion struct { type ChatCompletion struct {
@ -478,8 +478,9 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
options["stop"] = stops options["stop"] = stops
} }
if r.NumCtx != nil { if r.ContextWindow != nil {
options["num_ctx"] = *r.NumCtx slog.Info("context_window in if", "context_window", *r.ContextWindow)
options["num_ctx"] = *r.ContextWindow
} }
// Deprecated: MaxTokens is deprecated, use MaxCompletionTokens instead // Deprecated: MaxTokens is deprecated, use MaxCompletionTokens instead
@ -974,6 +975,7 @@ func ChatMiddleware() gin.HandlerFunc {
c.AbortWithStatusJSON(http.StatusBadRequest, NewError(http.StatusBadRequest, err.Error())) c.AbortWithStatusJSON(http.StatusBadRequest, NewError(http.StatusBadRequest, err.Error()))
return return
} }
slog.Info("num_ctx", "num_ctx", chatReq.Options["num_ctx"])
if err := json.NewEncoder(&b).Encode(chatReq); err != nil { if err := json.NewEncoder(&b).Encode(chatReq); err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error())) c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error()))

View File

@ -315,11 +315,11 @@ func TestChatMiddleware(t *testing.T) {
}, },
}, },
{ {
name: "chat handler with num_ctx", name: "chat handler with context_window",
body: `{ body: `{
"model": "test-model", "model": "test-model",
"messages": [{"role": "user", "content": "Hello"}], "messages": [{"role": "user", "content": "Hello"}],
"num_ctx": 4096 "context_window": 4096
}`, }`,
req: api.ChatRequest{ req: api.ChatRequest{
Model: "test-model", Model: "test-model",