Compare commits
	
		
			10 Commits
		
	
	
		
			drifkin/ar
			...
			parth/set-
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					b4de2e9189 | ||
| 
						 | 
					61a5254115 | ||
| 
						 | 
					53d2cf37d2 | ||
| 
						 | 
					75f88e7aac | ||
| 
						 | 
					4982089c84 | ||
| 
						 | 
					8c231b0826 | ||
| 
						 | 
					16abd181a9 | ||
| 
						 | 
					5c2f35d846 | ||
| 
						 | 
					6de3227841 | ||
| 
						 | 
					35e97db03b | 
@@ -204,6 +204,45 @@ curl http://localhost:11434/v1/embeddings \
 | 
			
		||||
    }'
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Extra arguments
 | 
			
		||||
 | 
			
		||||
### Setting context length
 | 
			
		||||
- `context_length` parameter can be used to set the context length for the model
 | 
			
		||||
 | 
			
		||||
#### OpenAI python library 
 | 
			
		||||
- OpenAI python library does not support setting context length, however this can be set for Ollama through the `extra_body` parameter
 | 
			
		||||
 | 
			
		||||
```py
 | 
			
		||||
completion = client.chat.completions.create(
 | 
			
		||||
    model="llama3.1:8b",
 | 
			
		||||
    messages=[{"role": "user", "content": "Say this is a test"}],
 | 
			
		||||
    extra_body={"context_length": 4096},
 | 
			
		||||
)
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
#### OpenAI JavaScript library
 | 
			
		||||
- OpenAI JavaScript library does not support setting context length, however this can be set for Ollama by passing `context_length` directly with a `@ts-expect-error` as an undocumented parameter in the OpenAI JavaScript library. [See documentation here](https://github.com/openai/openai-node?tab=readme-ov-file#making-customundocumented-requests)
 | 
			
		||||
 | 
			
		||||
```ts
 | 
			
		||||
const chatCompletion = await openai.chat.completions.create({
 | 
			
		||||
    messages: [{ role: 'user', content: 'Say this is a test' }],
 | 
			
		||||
    model: 'llama3.2',
 | 
			
		||||
    // @ts-expect-error context_length is an additional parameter 
 | 
			
		||||
    context_length: 4096,
 | 
			
		||||
})
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
#### `curl`
 | 
			
		||||
```shell
 | 
			
		||||
curl http://localhost:11434/v1/chat/completions \
 | 
			
		||||
    -H "Content-Type: application/json" \
 | 
			
		||||
    -d '{
 | 
			
		||||
        "model": "llama3.2",
 | 
			
		||||
        "messages": [{"role": "user", "content": "Say this is a test"}],
 | 
			
		||||
        "context_length": 4096
 | 
			
		||||
    }'
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Endpoints
 | 
			
		||||
 | 
			
		||||
### `/v1/chat/completions`
 | 
			
		||||
@@ -213,6 +252,7 @@ curl http://localhost:11434/v1/embeddings \
 | 
			
		||||
- [x] Chat completions
 | 
			
		||||
- [x] Streaming
 | 
			
		||||
- [x] JSON mode
 | 
			
		||||
- [x] Structured outputs
 | 
			
		||||
- [x] Reproducible outputs
 | 
			
		||||
- [x] Vision
 | 
			
		||||
- [x] Tools
 | 
			
		||||
@@ -339,27 +379,3 @@ curl http://localhost:11434/v1/chat/completions \
 | 
			
		||||
    }'
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Setting the context size
 | 
			
		||||
 | 
			
		||||
The OpenAI API does not have a way of setting the context size for a model. If you need to change the context size, create a `Modelfile` which looks like:
 | 
			
		||||
 | 
			
		||||
```modelfile
 | 
			
		||||
FROM <some model>
 | 
			
		||||
PARAMETER num_ctx <context size>
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Use the `ollama create mymodel` command to create a new model with the updated context size. Call the API with the updated model name:
 | 
			
		||||
 | 
			
		||||
```shell
 | 
			
		||||
curl http://localhost:11434/v1/chat/completions \
 | 
			
		||||
    -H "Content-Type: application/json" \
 | 
			
		||||
    -d '{
 | 
			
		||||
        "model": "mymodel",
 | 
			
		||||
        "messages": [
 | 
			
		||||
            {
 | 
			
		||||
                "role": "user",
 | 
			
		||||
                "content": "Hello!"
 | 
			
		||||
            }
 | 
			
		||||
        ]
 | 
			
		||||
    }'
 | 
			
		||||
```
 | 
			
		||||
 
 | 
			
		||||
@@ -84,6 +84,8 @@ type ChatCompletionRequest struct {
 | 
			
		||||
	Messages            []Message      `json:"messages"`
 | 
			
		||||
	Stream              bool           `json:"stream"`
 | 
			
		||||
	StreamOptions       *StreamOptions `json:"stream_options"`
 | 
			
		||||
	MaxCompletionTokens *int           `json:"max_completion_tokens"`
 | 
			
		||||
	// Deprecated: Use [ChatCompletionRequest.MaxCompletionTokens]
 | 
			
		||||
	MaxTokens        *int            `json:"max_tokens"`
 | 
			
		||||
	Seed             *int            `json:"seed"`
 | 
			
		||||
	Stop             any             `json:"stop"`
 | 
			
		||||
@@ -93,6 +95,7 @@ type ChatCompletionRequest struct {
 | 
			
		||||
	TopP             *float64        `json:"top_p"`
 | 
			
		||||
	ResponseFormat   *ResponseFormat `json:"response_format"`
 | 
			
		||||
	Tools            []api.Tool      `json:"tools"`
 | 
			
		||||
	ContextLength    *int            `json:"context_length"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type ChatCompletion struct {
 | 
			
		||||
@@ -475,8 +478,17 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
 | 
			
		||||
		options["stop"] = stops
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if r.ContextLength != nil {
 | 
			
		||||
		options["num_ctx"] = *r.ContextLength
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Deprecated: MaxTokens is deprecated, use MaxCompletionTokens instead
 | 
			
		||||
	if r.MaxTokens != nil {
 | 
			
		||||
		options["num_predict"] = *r.MaxTokens
 | 
			
		||||
		r.MaxCompletionTokens = r.MaxTokens
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if r.MaxCompletionTokens != nil {
 | 
			
		||||
		options["num_predict"] = *r.MaxCompletionTokens
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if r.Temperature != nil {
 | 
			
		||||
@@ -962,6 +974,7 @@ func ChatMiddleware() gin.HandlerFunc {
 | 
			
		||||
			c.AbortWithStatusJSON(http.StatusBadRequest, NewError(http.StatusBadRequest, err.Error()))
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		slog.Info("num_ctx", "num_ctx", chatReq.Options["num_ctx"])
 | 
			
		||||
 | 
			
		||||
		if err := json.NewEncoder(&b).Encode(chatReq); err != nil {
 | 
			
		||||
			c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error()))
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,6 @@ import (
 | 
			
		||||
	"io"
 | 
			
		||||
	"net/http"
 | 
			
		||||
	"net/http/httptest"
 | 
			
		||||
	"reflect"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"testing"
 | 
			
		||||
	"time"
 | 
			
		||||
@@ -315,6 +314,42 @@ func TestChatMiddleware(t *testing.T) {
 | 
			
		||||
				Stream: &True,
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			name: "chat handler with context_length",
 | 
			
		||||
			body: `{
 | 
			
		||||
				"model": "test-model",
 | 
			
		||||
				"messages": [{"role": "user", "content": "Hello"}],
 | 
			
		||||
				"context_length": 4096 
 | 
			
		||||
			}`,
 | 
			
		||||
			req: api.ChatRequest{
 | 
			
		||||
				Model:    "test-model",
 | 
			
		||||
				Messages: []api.Message{{Role: "user", Content: "Hello"}},
 | 
			
		||||
				Options: map[string]any{
 | 
			
		||||
					"num_ctx":     4096.0, // float because JSON doesn't distinguish between float and int
 | 
			
		||||
					"temperature": 1.0,
 | 
			
		||||
					"top_p":       1.0,
 | 
			
		||||
				},
 | 
			
		||||
				Stream: &False,
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			name: "chat handler with max_completion_tokens",
 | 
			
		||||
			body: `{
 | 
			
		||||
				"model": "test-model",
 | 
			
		||||
				"messages": [{"role": "user", "content": "Hello"}],
 | 
			
		||||
				"max_completion_tokens": 2
 | 
			
		||||
			}`,
 | 
			
		||||
			req: api.ChatRequest{
 | 
			
		||||
				Model:    "test-model",
 | 
			
		||||
				Messages: []api.Message{{Role: "user", Content: "Hello"}},
 | 
			
		||||
				Options: map[string]any{
 | 
			
		||||
					"num_predict": 2.0, // float because JSON doesn't distinguish between float and int
 | 
			
		||||
					"temperature": 1.0,
 | 
			
		||||
					"top_p":       1.0,
 | 
			
		||||
				},
 | 
			
		||||
				Stream: &False,
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			name: "chat handler error forwarding",
 | 
			
		||||
			body: `{
 | 
			
		||||
@@ -359,7 +394,7 @@ func TestChatMiddleware(t *testing.T) {
 | 
			
		||||
				return
 | 
			
		||||
			}
 | 
			
		||||
			if diff := cmp.Diff(&tc.req, capturedRequest); diff != "" {
 | 
			
		||||
				t.Fatalf("requests did not match: %+v", diff)
 | 
			
		||||
				t.Fatalf("requests did not match (-want +got):\n%s", diff)
 | 
			
		||||
			}
 | 
			
		||||
			if diff := cmp.Diff(tc.err, errResp); diff != "" {
 | 
			
		||||
				t.Fatalf("errors did not match for %s:\n%s", tc.name, diff)
 | 
			
		||||
@@ -493,12 +528,14 @@ func TestCompletionsMiddleware(t *testing.T) {
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if capturedRequest != nil && !reflect.DeepEqual(tc.req, *capturedRequest) {
 | 
			
		||||
				t.Fatal("requests did not match")
 | 
			
		||||
			if capturedRequest != nil {
 | 
			
		||||
				if diff := cmp.Diff(tc.req, *capturedRequest); diff != "" {
 | 
			
		||||
					t.Fatalf("requests did not match (-want +got):\n%s", diff)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if !reflect.DeepEqual(tc.err, errResp) {
 | 
			
		||||
				t.Fatal("errors did not match")
 | 
			
		||||
			if diff := cmp.Diff(tc.err, errResp); diff != "" {
 | 
			
		||||
				t.Fatalf("errors did not match (-want +got):\n%s", diff)
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			capturedRequest = nil
 | 
			
		||||
@@ -577,12 +614,14 @@ func TestEmbeddingsMiddleware(t *testing.T) {
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if capturedRequest != nil && !reflect.DeepEqual(tc.req, *capturedRequest) {
 | 
			
		||||
				t.Fatal("requests did not match")
 | 
			
		||||
			if capturedRequest != nil {
 | 
			
		||||
				if diff := cmp.Diff(tc.req, *capturedRequest); diff != "" {
 | 
			
		||||
					t.Fatalf("requests did not match (-want +got):\n%s", diff)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if !reflect.DeepEqual(tc.err, errResp) {
 | 
			
		||||
				t.Fatal("errors did not match")
 | 
			
		||||
			if diff := cmp.Diff(tc.err, errResp); diff != "" {
 | 
			
		||||
				t.Fatalf("errors did not match (-want +got):\n%s", diff)
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			capturedRequest = nil
 | 
			
		||||
@@ -656,8 +695,8 @@ func TestListMiddleware(t *testing.T) {
 | 
			
		||||
			t.Fatalf("failed to unmarshal actual response: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if !reflect.DeepEqual(expected, actual) {
 | 
			
		||||
			t.Errorf("responses did not match\nExpected: %+v\nActual: %+v", expected, actual)
 | 
			
		||||
		if diff := cmp.Diff(expected, actual); diff != "" {
 | 
			
		||||
			t.Errorf("responses did not match (-want +got):\n%s", diff)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
@@ -722,8 +761,8 @@ func TestRetrieveMiddleware(t *testing.T) {
 | 
			
		||||
			t.Fatalf("failed to unmarshal actual response: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if !reflect.DeepEqual(expected, actual) {
 | 
			
		||||
			t.Errorf("responses did not match\nExpected: %+v\nActual: %+v", expected, actual)
 | 
			
		||||
		if diff := cmp.Diff(expected, actual); diff != "" {
 | 
			
		||||
			t.Errorf("responses did not match (-want +got):\n%s", diff)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user