context_window and addressing comments
This commit is contained in:
parent
53d2cf37d2
commit
61a5254115
@ -204,31 +204,31 @@ curl http://localhost:11434/v1/embeddings \
|
|||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
## Extra Arguments
|
## Extra arguments
|
||||||
|
|
||||||
### Setting Context Window Size
|
### Setting context window size
|
||||||
- `num_ctx` parameter can be used to set the context window for the model
|
- `context_window` parameter can be used to set the context window for the model
|
||||||
|
|
||||||
#### OpenAI Python SDK
|
#### OpenAI python library
|
||||||
- OpenAI Python SDK does not support setting context window size, however this can be set for Ollama through the `extra_body` parameter
|
- OpenAI python library does not support setting context window size, however this can be set for Ollama through the `extra_body` parameter
|
||||||
|
|
||||||
```py
|
```py
|
||||||
completion = client.beta.chat.completions.create(
|
completion = client.chat.completions.create(
|
||||||
model="llama3.1:8b",
|
model="llama3.1:8b",
|
||||||
messages=[{"role": "user", "content": "Say this is a test"}],
|
messages=[{"role": "user", "content": "Say this is a test"}],
|
||||||
extra_body={"num_ctx": 4096},
|
extra_body={"context_window": 4096},
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### OpenAI JS SDK
|
#### OpenAI JavaScript library
|
||||||
- OpenAI JS SDK does not support setting context window size, however this can be set for Ollama by passing `num_ctx` directly with a `@ts-expect-error` as an undocumented parameter in the [OpenAI JS SDK](https://github.com/openai/openai-node?tab=readme-ov-file#making-customundocumented-requests)
|
- OpenAI JavaScript library does not support setting context window size, however this can be set for Ollama by passing `num_ctx` directly with a `@ts-expect-error` as an undocumented parameter in the OpenAI JavaScript library. [See documentation here](https://github.com/openai/openai-node?tab=readme-ov-file#making-customundocumented-requests)
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
const chatCompletion = await openai.chat.completions.create({
|
const chatCompletion = await openai.chat.completions.create({
|
||||||
messages: [{ role: 'user', content: 'Say this is a test' }],
|
messages: [{ role: 'user', content: 'Say this is a test' }],
|
||||||
model: 'llama3.2',
|
model: 'llama3.2',
|
||||||
// @ts-expect-error num_ctx is not officially supported
|
// @ts-expect-error context_window is an additional parameter
|
||||||
num_ctx: 4096,
|
context_window: 4096,
|
||||||
})
|
})
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -239,7 +239,7 @@ curl http://localhost:11434/v1/chat/completions \
|
|||||||
-d '{
|
-d '{
|
||||||
"model": "llama3.2",
|
"model": "llama3.2",
|
||||||
"messages": [{"role": "user", "content": "Say this is a test"}],
|
"messages": [{"role": "user", "content": "Say this is a test"}],
|
||||||
"num_ctx": 4096
|
"context_window": 4096
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -86,7 +86,7 @@ type ChatCompletionRequest struct {
|
|||||||
StreamOptions *StreamOptions `json:"stream_options"`
|
StreamOptions *StreamOptions `json:"stream_options"`
|
||||||
MaxCompletionTokens *int `json:"max_completion_tokens"`
|
MaxCompletionTokens *int `json:"max_completion_tokens"`
|
||||||
// Deprecated: Use [ChatCompletionRequest.MaxCompletionTokens]
|
// Deprecated: Use [ChatCompletionRequest.MaxCompletionTokens]
|
||||||
MaxTokens *int `json:"max_tokens" deprecated:"use max_completion_tokens instead"`
|
MaxTokens *int `json:"max_tokens"`
|
||||||
Seed *int `json:"seed"`
|
Seed *int `json:"seed"`
|
||||||
Stop any `json:"stop"`
|
Stop any `json:"stop"`
|
||||||
Temperature *float64 `json:"temperature"`
|
Temperature *float64 `json:"temperature"`
|
||||||
@ -95,7 +95,7 @@ type ChatCompletionRequest struct {
|
|||||||
TopP *float64 `json:"top_p"`
|
TopP *float64 `json:"top_p"`
|
||||||
ResponseFormat *ResponseFormat `json:"response_format"`
|
ResponseFormat *ResponseFormat `json:"response_format"`
|
||||||
Tools []api.Tool `json:"tools"`
|
Tools []api.Tool `json:"tools"`
|
||||||
NumCtx *int `json:"num_ctx"`
|
ContextWindow *int `json:"context_window"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ChatCompletion struct {
|
type ChatCompletion struct {
|
||||||
@ -478,8 +478,9 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
|||||||
options["stop"] = stops
|
options["stop"] = stops
|
||||||
}
|
}
|
||||||
|
|
||||||
if r.NumCtx != nil {
|
if r.ContextWindow != nil {
|
||||||
options["num_ctx"] = *r.NumCtx
|
slog.Info("context_window in if", "context_window", *r.ContextWindow)
|
||||||
|
options["num_ctx"] = *r.ContextWindow
|
||||||
}
|
}
|
||||||
|
|
||||||
// Deprecated: MaxTokens is deprecated, use MaxCompletionTokens instead
|
// Deprecated: MaxTokens is deprecated, use MaxCompletionTokens instead
|
||||||
@ -974,6 +975,7 @@ func ChatMiddleware() gin.HandlerFunc {
|
|||||||
c.AbortWithStatusJSON(http.StatusBadRequest, NewError(http.StatusBadRequest, err.Error()))
|
c.AbortWithStatusJSON(http.StatusBadRequest, NewError(http.StatusBadRequest, err.Error()))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
slog.Info("num_ctx", "num_ctx", chatReq.Options["num_ctx"])
|
||||||
|
|
||||||
if err := json.NewEncoder(&b).Encode(chatReq); err != nil {
|
if err := json.NewEncoder(&b).Encode(chatReq); err != nil {
|
||||||
c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error()))
|
c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error()))
|
||||||
|
@ -315,11 +315,11 @@ func TestChatMiddleware(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "chat handler with num_ctx",
|
name: "chat handler with context_window",
|
||||||
body: `{
|
body: `{
|
||||||
"model": "test-model",
|
"model": "test-model",
|
||||||
"messages": [{"role": "user", "content": "Hello"}],
|
"messages": [{"role": "user", "content": "Hello"}],
|
||||||
"num_ctx": 4096
|
"context_window": 4096
|
||||||
}`,
|
}`,
|
||||||
req: api.ChatRequest{
|
req: api.ChatRequest{
|
||||||
Model: "test-model",
|
Model: "test-model",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user