Compare commits
9 Commits
v0.4.6
...
brucemacd/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8f8aac9cd3 | ||
|
|
2b82c5a8a1 | ||
|
|
55c3efa900 | ||
|
|
1aedffad93 | ||
|
|
ff6c2d6dc8 | ||
|
|
d543b282a7 | ||
|
|
5f8051180e | ||
|
|
39e29ae5dd | ||
|
|
30a9f063c9 |
@@ -346,6 +346,9 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Web management](https://github.com/lemonit-eric-mao/ollama-web-management) (Web management page)
|
- [Web management](https://github.com/lemonit-eric-mao/ollama-web-management) (Web management page)
|
||||||
- [Promptery](https://github.com/promptery/promptery) (desktop client for Ollama.)
|
- [Promptery](https://github.com/promptery/promptery) (desktop client for Ollama.)
|
||||||
- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
|
- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
|
||||||
|
- [SpaceLlama](https://github.com/tcsenpai/spacellama) (Firefox and Chrome extension to quickly summarize web pages with ollama in a sidebar)
|
||||||
|
- [YouLama](https://github.com/tcsenpai/youlama) (Webapp to quickly summarize any YouTube video, supporting Invidious as well)
|
||||||
|
- [DualMind](https://github.com/tcsenpai/dualmind) (Experimental app allowing two models to talk to each other in the terminal or in a web interface)
|
||||||
- [ollamarama-matrix](https://github.com/h1ddenpr0cess20/ollamarama-matrix) (Ollama chatbot for the Matrix chat protocol)
|
- [ollamarama-matrix](https://github.com/h1ddenpr0cess20/ollamarama-matrix) (Ollama chatbot for the Matrix chat protocol)
|
||||||
- [ollama-chat-app](https://github.com/anan1213095357/ollama-chat-app) (Flutter-based chat app)
|
- [ollama-chat-app](https://github.com/anan1213095357/ollama-chat-app) (Flutter-based chat app)
|
||||||
- [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard and said in the meetings)
|
- [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard and said in the meetings)
|
||||||
@@ -356,6 +359,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Nosia](https://github.com/nosia-ai/nosia) (Easy to install and use RAG platform based on Ollama)
|
- [Nosia](https://github.com/nosia-ai/nosia) (Easy to install and use RAG platform based on Ollama)
|
||||||
- [Witsy](https://github.com/nbonamy/witsy) (An AI Desktop application avaiable for Mac/Windows/Linux)
|
- [Witsy](https://github.com/nbonamy/witsy) (An AI Desktop application avaiable for Mac/Windows/Linux)
|
||||||
- [Abbey](https://github.com/US-Artificial-Intelligence/abbey) (A configurable AI interface server with notebooks, document storage, and YouTube support)
|
- [Abbey](https://github.com/US-Artificial-Intelligence/abbey) (A configurable AI interface server with notebooks, document storage, and YouTube support)
|
||||||
|
- [Minima](https://github.com/dmayboroda/minima) (RAG with on-premises or fully local workflow)
|
||||||
|
|
||||||
### Cloud
|
### Cloud
|
||||||
|
|
||||||
|
|||||||
@@ -146,6 +146,7 @@ type ToolCall struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type ToolCallFunction struct {
|
type ToolCallFunction struct {
|
||||||
|
Index int `json:"index,omitempty"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Arguments ToolCallFunctionArguments `json:"arguments"`
|
Arguments ToolCallFunctionArguments `json:"arguments"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
@@ -180,18 +179,14 @@ Weigh anchor!
|
|||||||
|
|
||||||
t.Run("license", func(t *testing.T) {
|
t.Run("license", func(t *testing.T) {
|
||||||
var b bytes.Buffer
|
var b bytes.Buffer
|
||||||
license, err := os.ReadFile(filepath.Join("..", "LICENSE"))
|
license := "MIT License\nCopyright (c) Ollama\n"
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := showInfo(&api.ShowResponse{
|
if err := showInfo(&api.ShowResponse{
|
||||||
Details: api.ModelDetails{
|
Details: api.ModelDetails{
|
||||||
Family: "test",
|
Family: "test",
|
||||||
ParameterSize: "7B",
|
ParameterSize: "7B",
|
||||||
QuantizationLevel: "FP16",
|
QuantizationLevel: "FP16",
|
||||||
},
|
},
|
||||||
License: string(license),
|
License: license,
|
||||||
}, &b); err != nil {
|
}, &b); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,10 +49,10 @@ Advanced parameters (optional):
|
|||||||
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
||||||
- `system`: system message to (overrides what is defined in the `Modelfile`)
|
- `system`: system message to (overrides what is defined in the `Modelfile`)
|
||||||
- `template`: the prompt template to use (overrides what is defined in the `Modelfile`)
|
- `template`: the prompt template to use (overrides what is defined in the `Modelfile`)
|
||||||
- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
|
|
||||||
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
|
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
|
||||||
- `raw`: if `true` no formatting will be applied to the prompt. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API
|
- `raw`: if `true` no formatting will be applied to the prompt. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API
|
||||||
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
|
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
|
||||||
|
- `context` (deprecated): the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
|
||||||
|
|
||||||
#### JSON mode
|
#### JSON mode
|
||||||
|
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ SYSTEM You are Mario from super mario bros, acting as an assistant.
|
|||||||
To use this:
|
To use this:
|
||||||
|
|
||||||
1. Save it as a file (e.g. `Modelfile`)
|
1. Save it as a file (e.g. `Modelfile`)
|
||||||
2. `ollama create choose-a-model-name -f <location of the file e.g. ./Modelfile>'`
|
2. `ollama create choose-a-model-name -f <location of the file e.g. ./Modelfile>`
|
||||||
3. `ollama run choose-a-model-name`
|
3. `ollama run choose-a-model-name`
|
||||||
4. Start using the model!
|
4. Start using the model!
|
||||||
|
|
||||||
@@ -156,7 +156,7 @@ PARAMETER <parameter> <parametervalue>
|
|||||||
| seed | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0) | int | seed 42 |
|
| seed | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0) | int | seed 42 |
|
||||||
| stop | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile. | string | stop "AI assistant:" |
|
| stop | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile. | string | stop "AI assistant:" |
|
||||||
| tfs_z | Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1) | float | tfs_z 1 |
|
| tfs_z | Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1) | float | tfs_z 1 |
|
||||||
| num_predict | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) | int | num_predict 42 |
|
| num_predict | Maximum number of tokens to predict when generating text. (Default: -1, infinite generation) | int | num_predict 42 |
|
||||||
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
|
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
|
||||||
| top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 |
|
| top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 |
|
||||||
| min_p | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float | min_p 0.05 |
|
| min_p | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float | min_p 0.05 |
|
||||||
|
|||||||
@@ -105,7 +105,7 @@ make apply-patches
|
|||||||
|
|
||||||
**Pin to new base commit**
|
**Pin to new base commit**
|
||||||
|
|
||||||
To update to a newer base commit, select the upstream git tag or commit and update `llama/vendoring.env`
|
To update to a newer base commit, select the upstream git tag or commit and update `llama/vendoring`
|
||||||
|
|
||||||
#### Applying patches
|
#### Applying patches
|
||||||
|
|
||||||
|
|||||||
@@ -1,21 +1,35 @@
|
|||||||
import * as fs from 'fs'
|
import * as fs from 'fs'
|
||||||
import { exec as cbExec } from 'child_process'
|
import { spawn } from 'child_process'
|
||||||
import * as path from 'path'
|
import * as path from 'path'
|
||||||
import { promisify } from 'util'
|
|
||||||
|
|
||||||
const app = process && process.type === 'renderer' ? require('@electron/remote').app : require('electron').app
|
const app = process && process.type === 'renderer' ? require('@electron/remote').app : require('electron').app
|
||||||
const ollama = app.isPackaged ? path.join(process.resourcesPath, 'ollama') : path.resolve(process.cwd(), '..', 'ollama')
|
const ollama = app.isPackaged ? path.join(process.resourcesPath, 'ollama') : path.resolve(process.cwd(), '..', 'ollama')
|
||||||
const exec = promisify(cbExec)
|
|
||||||
const symlinkPath = '/usr/local/bin/ollama'
|
const symlinkPath = '/usr/local/bin/ollama'
|
||||||
|
|
||||||
export function installed() {
|
export function installed(): boolean {
|
||||||
return fs.existsSync(symlinkPath) && fs.readlinkSync(symlinkPath) === ollama
|
return fs.existsSync(symlinkPath) && fs.readlinkSync(symlinkPath) === ollama
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function install() {
|
function validPath(targetPath: string): boolean {
|
||||||
const command = `do shell script "mkdir -p ${path.dirname(
|
const normalized = path.normalize(targetPath)
|
||||||
symlinkPath
|
return !(/[;&|`$(){}[\]<>]/.test(normalized) || normalized.includes('..'))
|
||||||
)} && ln -F -s \\"${ollama}\\" \\"${symlinkPath}\\"" with administrator privileges`
|
}
|
||||||
|
|
||||||
await exec(`osascript -e '${command}'`)
|
export async function install(): Promise<void> {
|
||||||
|
if (!validPath(ollama) || !validPath(symlinkPath)) {
|
||||||
|
throw new Error('Invalid path format')
|
||||||
|
}
|
||||||
|
|
||||||
|
await fs.promises.mkdir(path.dirname(symlinkPath), { recursive: true })
|
||||||
|
.catch(err => err.code === 'EEXIST' ? null : Promise.reject(err))
|
||||||
|
|
||||||
|
const process = spawn('osascript', [
|
||||||
|
'-e',
|
||||||
|
`do shell script "ln -F -s '${path.normalize(ollama)}' '${path.normalize(symlinkPath)}'" with administrator privileges`
|
||||||
|
])
|
||||||
|
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
process.on('error', reject)
|
||||||
|
process.on('close', code => code === 0 ? resolve() : reject(new Error(`Failed with code ${code}`)))
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -140,6 +140,7 @@ type CompletionChunk struct {
|
|||||||
|
|
||||||
type ToolCall struct {
|
type ToolCall struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
|
Index int `json:"index"`
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
Function struct {
|
Function struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
@@ -206,6 +207,7 @@ func toToolCalls(tc []api.ToolCall) []ToolCall {
|
|||||||
toolCalls[i].ID = toolCallId()
|
toolCalls[i].ID = toolCallId()
|
||||||
toolCalls[i].Type = "function"
|
toolCalls[i].Type = "function"
|
||||||
toolCalls[i].Function.Name = tc.Function.Name
|
toolCalls[i].Function.Name = tc.Function.Name
|
||||||
|
toolCalls[i].Index = tc.Function.Index
|
||||||
|
|
||||||
args, err := json.Marshal(tc.Function.Arguments)
|
args, err := json.Marshal(tc.Function.Arguments)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -195,7 +195,86 @@ func TestChatMiddleware(t *testing.T) {
|
|||||||
Stream: &False,
|
Stream: &False,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "chat handler with streaming tools",
|
||||||
|
body: `{
|
||||||
|
"model": "test-model",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "What's the weather like in Paris?"}
|
||||||
|
],
|
||||||
|
"stream": true,
|
||||||
|
"tools": [{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_weather",
|
||||||
|
"description": "Get the current weather",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"required": ["location"],
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state"
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["celsius", "fahrenheit"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}`,
|
||||||
|
req: api.ChatRequest{
|
||||||
|
Model: "test-model",
|
||||||
|
Messages: []api.Message{
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: "What's the weather like in Paris?",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Tools: []api.Tool{
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: api.ToolFunction{
|
||||||
|
Name: "get_weather",
|
||||||
|
Description: "Get the current weather",
|
||||||
|
Parameters: struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Required []string `json:"required"`
|
||||||
|
Properties map[string]struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
Enum []string `json:"enum,omitempty"`
|
||||||
|
} `json:"properties"`
|
||||||
|
}{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{"location"},
|
||||||
|
Properties: map[string]struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
Enum []string `json:"enum,omitempty"`
|
||||||
|
}{
|
||||||
|
"location": {
|
||||||
|
Type: "string",
|
||||||
|
Description: "The city and state",
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
Type: "string",
|
||||||
|
Enum: []string{"celsius", "fahrenheit"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Options: map[string]any{
|
||||||
|
"temperature": 1.0,
|
||||||
|
"top_p": 1.0,
|
||||||
|
},
|
||||||
|
Stream: &True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "chat handler error forwarding",
|
name: "chat handler error forwarding",
|
||||||
body: `{
|
body: `{
|
||||||
|
|||||||
@@ -251,6 +251,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
|
|
||||||
var b bytes.Buffer
|
var b bytes.Buffer
|
||||||
if req.Context != nil {
|
if req.Context != nil {
|
||||||
|
slog.Warn("the context field is deprecated and will be removed in a future version of Ollama")
|
||||||
s, err := r.Detokenize(c.Request.Context(), req.Context)
|
s, err := r.Detokenize(c.Request.Context(), req.Context)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
@@ -1469,7 +1470,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
go func() {
|
go func() {
|
||||||
defer close(ch)
|
defer close(ch)
|
||||||
var sb strings.Builder
|
var sb strings.Builder
|
||||||
var hasToolCalls bool
|
var toolCallIndex int = 0
|
||||||
if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
|
if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
|
||||||
Prompt: prompt,
|
Prompt: prompt,
|
||||||
Images: images,
|
Images: images,
|
||||||
@@ -1509,16 +1510,19 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
sb.WriteString(r.Content)
|
sb.WriteString(r.Content)
|
||||||
if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
|
if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
|
||||||
res.Message.ToolCalls = toolCalls
|
res.Message.ToolCalls = toolCalls
|
||||||
|
for i := range toolCalls {
|
||||||
|
toolCalls[i].Function.Index = toolCallIndex
|
||||||
|
toolCallIndex++
|
||||||
|
}
|
||||||
res.Message.Content = ""
|
res.Message.Content = ""
|
||||||
sb.Reset()
|
sb.Reset()
|
||||||
hasToolCalls = true
|
|
||||||
ch <- res
|
ch <- res
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if r.Done {
|
if r.Done {
|
||||||
// Send any remaining content if no tool calls were detected
|
// Send any remaining content if no tool calls were detected
|
||||||
if !hasToolCalls {
|
if toolCallIndex == 0 {
|
||||||
res.Message.Content = sb.String()
|
res.Message.Content = sb.String()
|
||||||
}
|
}
|
||||||
ch <- res
|
ch <- res
|
||||||
|
|||||||
Reference in New Issue
Block a user