Compare commits
2 Commits
v0.6.7
...
parth/pyth
Author | SHA1 | Date | |
---|---|---|---|
![]() |
23e8ac9428 | ||
![]() |
611d3a17ed |
35
README.md
35
README.md
@@ -285,7 +285,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
|
||||
- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
|
||||
- [Saddle](https://github.com/jikkuatwork/saddle)
|
||||
- [TagSpaces](https://www.tagspaces.org) (A platform for file-based apps, [utilizing Ollama](https://docs.tagspaces.org/ai/) for the generation of tags and descriptions)
|
||||
- [TagSpaces](https://www.tagspaces.org) (A platform for file based apps, [utilizing Ollama](https://docs.tagspaces.org/ai/) for the generation of tags and descriptions)
|
||||
- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
|
||||
- [Chatbot UI v2](https://github.com/mckaywrigley/chatbot-ui)
|
||||
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
|
||||
@@ -325,14 +325,14 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [RWKV-Runner](https://github.com/josStorer/RWKV-Runner) (RWKV offline LLM deployment tool, also usable as a client for ChatGPT and Ollama)
|
||||
- [Ollama Grid Search](https://github.com/dezoito/ollama-grid-search) (app to evaluate and compare models)
|
||||
- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
|
||||
- [Casibase](https://casibase.org) (An open source AI knowledge base and dialogue system combining the latest RAG, SSO, ollama support, and multiple large language models.)
|
||||
- [Casibase](https://casibase.org) (An open source AI knowledge base and dialogue system combining the latest RAG, SSO, ollama support and multiple large language models.)
|
||||
- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
|
||||
- [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
|
||||
- [Shinkai Desktop](https://github.com/dcSpark/shinkai-apps) (Two click install Local AI using Ollama + Files + RAG)
|
||||
- [AiLama](https://github.com/zeyoyt/ailama) (A Discord User App that allows you to interact with Ollama anywhere in Discord)
|
||||
- [AiLama](https://github.com/zeyoyt/ailama) (A Discord User App that allows you to interact with Ollama anywhere in discord )
|
||||
- [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
|
||||
- [R2R](https://github.com/SciPhi-AI/R2R) (Open-source RAG engine)
|
||||
- [Ollama-Kis](https://github.com/elearningshow/ollama-kis) (A simple easy-to-use GUI with sample custom LLM for Drivers Education)
|
||||
- [Ollama-Kis](https://github.com/elearningshow/ollama-kis) (A simple easy to use GUI with sample custom LLM for Drivers Education)
|
||||
- [OpenGPA](https://opengpa.org) (Open-source offline-first Enterprise Agentic Application)
|
||||
- [Painting Droid](https://github.com/mateuszmigas/painting-droid) (Painting app with AI integrations)
|
||||
- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
|
||||
@@ -341,16 +341,16 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
|
||||
- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
|
||||
- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
|
||||
- [PyGPT](https://github.com/szczyglis-dev/py-gpt) (AI desktop assistant for Linux, Windows, and Mac)
|
||||
- [Alpaca](https://github.com/Jeffser/Alpaca) (An Ollama client application for Linux and macOS made with GTK4 and Adwaita)
|
||||
- [PyGPT](https://github.com/szczyglis-dev/py-gpt) (AI desktop assistant for Linux, Windows and Mac)
|
||||
- [Alpaca](https://github.com/Jeffser/Alpaca) (An Ollama client application for linux and macos made with GTK4 and Adwaita)
|
||||
- [AutoGPT](https://github.com/Significant-Gravitas/AutoGPT/blob/master/docs/content/platform/ollama.md) (AutoGPT Ollama integration)
|
||||
- [Go-CREW](https://www.jonathanhecl.com/go-crew/) (Powerful Offline RAG in Golang)
|
||||
- [PartCAD](https://github.com/openvmp/partcad/) (CAD model generation with OpenSCAD and CadQuery)
|
||||
- [Ollama4j Web UI](https://github.com/ollama4j/ollama4j-web-ui) - Java-based Web UI for Ollama built with Vaadin, Spring Boot, and Ollama4j
|
||||
- [Ollama4j Web UI](https://github.com/ollama4j/ollama4j-web-ui) - Java-based Web UI for Ollama built with Vaadin, Spring Boot and Ollama4j
|
||||
- [PyOllaMx](https://github.com/kspviswa/pyOllaMx) - macOS application capable of chatting with both Ollama and Apple MLX models.
|
||||
- [Cline](https://github.com/cline/cline) - Formerly known as Claude Dev is a VSCode extension for multi-file/whole-repo coding
|
||||
- [Cherry Studio](https://github.com/kangfenmao/cherry-studio) (Desktop client with Ollama support)
|
||||
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy-focused LLM chat interface with optional encryption)
|
||||
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption)
|
||||
- [Archyve](https://github.com/nickthecook/archyve) (RAG-enabling document library)
|
||||
- [crewAI with Mesop](https://github.com/rapidarchitect/ollama-crew-mesop) (Mesop Web Interface to run crewAI with Ollama)
|
||||
- [Tkinter-based client](https://github.com/chyok/ollama-gui) (Python tkinter-based Client for Ollama)
|
||||
@@ -368,7 +368,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [DualMind](https://github.com/tcsenpai/dualmind) (Experimental app allowing two models to talk to each other in the terminal or in a web interface)
|
||||
- [ollamarama-matrix](https://github.com/h1ddenpr0cess20/ollamarama-matrix) (Ollama chatbot for the Matrix chat protocol)
|
||||
- [ollama-chat-app](https://github.com/anan1213095357/ollama-chat-app) (Flutter-based chat app)
|
||||
- [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard, and said in the meetings)
|
||||
- [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard and said in the meetings)
|
||||
- [Hexabot](https://github.com/hexastack/hexabot) (A conversational AI builder)
|
||||
- [Reddit Rate](https://github.com/rapidarchitect/reddit_analyzer) (Search and Rate Reddit topics with a weighted summation)
|
||||
- [OpenTalkGpt](https://github.com/adarshM84/OpenTalkGpt) (Chrome Extension to manage open-source models supported by Ollama, create custom models, and chat with models from a user-friendly UI)
|
||||
@@ -386,7 +386,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
|
||||
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
|
||||
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
|
||||
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivalent endpoint with Ollama support for running locally)
|
||||
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
|
||||
- [AntSK](https://github.com/AIDotNet/AntSK) (Out-of-the-box & Adaptable RAG Chatbot)
|
||||
- [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot)
|
||||
- [yla](https://github.com/danielekp/yla) (Web interface to freely interact with your customized models)
|
||||
@@ -399,7 +399,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Ollamb](https://github.com/hengkysteen/ollamb) (Simple yet rich in features, cross-platform built with Flutter and designed for Ollama. Try the [web demo](https://hengkysteen.github.io/demo/ollamb/).)
|
||||
- [Writeopia](https://github.com/Writeopia/Writeopia) (Text editor with integration with Ollama)
|
||||
- [AppFlowy](https://github.com/AppFlowy-IO/AppFlowy) (AI collaborative workspace with Ollama, cross-platform and self-hostable)
|
||||
- [Lumina](https://github.com/cushydigit/lumina.git) (A lightweight, minimal React.js frontend for interacting with Ollama servers)
|
||||
|
||||
### Cloud
|
||||
|
||||
@@ -441,7 +440,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [PowershAI](https://github.com/rrg92/powershai) PowerShell module that brings AI to terminal on Windows, including support for Ollama
|
||||
- [DeepShell](https://github.com/Abyss-c0re/deepshell) Your self-hosted AI assistant. Interactive Shell, Files and Folders analysis.
|
||||
- [orbiton](https://github.com/xyproto/orbiton) Configuration-free text editor and IDE with support for tab completion with Ollama.
|
||||
- [orca-cli](https://github.com/molbal/orca-cli) Ollama Registry CLI Application - Browse, pull, and download models from Ollama Registry in your terminal.
|
||||
- [orca-cli](https://github.com/molbal/orca-cli) Ollama Registry CLI Application - Browse, pull and download models from Ollama Registry in your terminal.
|
||||
- [GGUF-to-Ollama](https://github.com/jonathanhecl/gguf-to-ollama) - Importing GGUF to Ollama made easy (multiplatform)
|
||||
|
||||
### Apple Vision Pro
|
||||
@@ -516,7 +515,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Swollama for Swift](https://github.com/marcusziade/Swollama) with [DocC](https://marcusziade.github.io/Swollama/documentation/swollama/)
|
||||
- [GoLamify](https://github.com/prasad89/golamify)
|
||||
- [Ollama for Haskell](https://github.com/tusharad/ollama-haskell)
|
||||
- [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in a unified API)
|
||||
- [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in unified API)
|
||||
- [LlmTornado](https://github.com/lofcz/llmtornado) (C# library providing a unified interface for major FOSS & Commercial inference APIs)
|
||||
- [Ollama for Zig](https://github.com/dravenk/ollama-zig)
|
||||
- [Abso](https://github.com/lunary-ai/abso) (OpenAI-compatible TypeScript SDK for any LLM provider)
|
||||
@@ -525,11 +524,11 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
|
||||
### Mobile
|
||||
|
||||
- [SwiftChat](https://github.com/aws-samples/swift-chat) (Lightning-fast Cross-platform AI chat app with native UI for Android, iOS, and iPad)
|
||||
- [SwiftChat](https://github.com/aws-samples/swift-chat) (Lightning-fast Cross-platform AI chat app with native UI for Android, iOS and iPad)
|
||||
- [Enchanted](https://github.com/AugustDev/enchanted)
|
||||
- [Maid](https://github.com/Mobile-Artificial-Intelligence/maid)
|
||||
- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
|
||||
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy-focused LLM chat interface with optional encryption)
|
||||
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption)
|
||||
- [Ollama Android Chat](https://github.com/sunshine0523/OllamaServer) (No need for Termux, start the Ollama service with one click on an Android device)
|
||||
- [Reins](https://github.com/ibrahimcetin/reins) (Easily tweak parameters, customize system prompts per chat, and enhance your AI experiments with reasoning model support.)
|
||||
|
||||
@@ -553,7 +552,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
|
||||
- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
|
||||
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
|
||||
- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use Ollama as a copilot like GitHub Copilot)
|
||||
- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
|
||||
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
|
||||
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face)
|
||||
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
|
||||
@@ -563,8 +562,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
|
||||
- [ChatGPTBox: All in one browser extension](https://github.com/josStorer/chatGPTBox) with [Integrating Tutorial](https://github.com/josStorer/chatGPTBox/issues/616#issuecomment-1975186467)
|
||||
- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
|
||||
- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depend on ollama server)
|
||||
- [Terraform AWS Ollama & Open WebUI](https://github.com/xuyangbocn/terraform-aws-self-host-llm) (A Terraform module to deploy on AWS a ready-to-use Ollama service, together with its front-end Open WebUI service.)
|
||||
- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
|
||||
- [Terraform AWS Ollama & Open WebUI](https://github.com/xuyangbocn/terraform-aws-self-host-llm) (A Terraform module to deploy on AWS a ready-to-use Ollama service, together with its front end Open WebUI service.)
|
||||
- [node-red-contrib-ollama](https://github.com/jakubburkiewicz/node-red-contrib-ollama)
|
||||
- [Local AI Helper](https://github.com/ivostoykov/localAI) (Chrome and Firefox extensions that enable interactions with the active tab and customisable API endpoints. Includes secure storage for user prompts.)
|
||||
- [vnc-lm](https://github.com/jake83741/vnc-lm) (Discord bot for messaging with LLMs through Ollama and LiteLLM. Seamlessly move between local and flagship models.)
|
||||
|
@@ -1407,6 +1407,7 @@ func NewCLI() *cobra.Command {
|
||||
envVars["OLLAMA_LLM_LIBRARY"],
|
||||
envVars["OLLAMA_GPU_OVERHEAD"],
|
||||
envVars["OLLAMA_LOAD_TIMEOUT"],
|
||||
envVars["OLLAMA_CONTEXT_LENGTH"],
|
||||
})
|
||||
default:
|
||||
appendEnvDocs(cmd, envs)
|
||||
|
@@ -20,7 +20,7 @@ Please refer to the [GPU docs](./gpu.md).
|
||||
|
||||
## How can I specify the context window size?
|
||||
|
||||
By default, Ollama uses a context window size of 4096 tokens.
|
||||
By default, Ollama uses a context window size of 4096 tokens, unless you have a single GPU with <= 4 GB of VRAM, in which case it will default to 2048 tokens.
|
||||
|
||||
This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use:
|
||||
|
||||
@@ -31,7 +31,7 @@ OLLAMA_CONTEXT_LENGTH=8192 ollama serve
|
||||
To change this when using `ollama run`, use `/set parameter`:
|
||||
|
||||
```shell
|
||||
/set parameter num_ctx 4096
|
||||
/set parameter num_ctx 8192
|
||||
```
|
||||
|
||||
When using the API, specify the `num_ctx` parameter:
|
||||
@@ -41,7 +41,7 @@ curl http://localhost:11434/api/generate -d '{
|
||||
"model": "llama3.2",
|
||||
"prompt": "Why is the sky blue?",
|
||||
"options": {
|
||||
"num_ctx": 4096
|
||||
"num_ctx": 8192
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
@@ -169,7 +169,7 @@ var (
|
||||
// Enable the new Ollama engine
|
||||
NewEngine = Bool("OLLAMA_NEW_ENGINE")
|
||||
// ContextLength sets the default context length
|
||||
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
|
||||
ContextLength = Int64("OLLAMA_CONTEXT_LENGTH", -1)
|
||||
)
|
||||
|
||||
func String(s string) func() string {
|
||||
@@ -227,6 +227,20 @@ func Uint64(key string, defaultValue uint64) func() uint64 {
|
||||
}
|
||||
}
|
||||
|
||||
func Int64(key string, defaultValue int64) func() int64 {
|
||||
return func() int64 {
|
||||
if s := Var(key); s != "" {
|
||||
if n, err := strconv.ParseInt(s, 10, 64); err != nil {
|
||||
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
|
||||
} else {
|
||||
return n
|
||||
}
|
||||
}
|
||||
|
||||
return defaultValue
|
||||
}
|
||||
}
|
||||
|
||||
// Set aside VRAM per GPU
|
||||
var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
|
||||
|
||||
@@ -255,7 +269,7 @@ func AsMap() map[string]EnvVar {
|
||||
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
|
||||
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
|
||||
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
|
||||
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"},
|
||||
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default 4096 or 2048 with low VRAM)"},
|
||||
"OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
|
||||
|
||||
// Informational
|
||||
|
@@ -278,9 +278,9 @@ func TestVar(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestContextLength(t *testing.T) {
|
||||
cases := map[string]uint{
|
||||
"": 4096,
|
||||
"2048": 2048,
|
||||
cases := map[string]int64{
|
||||
"": -1,
|
||||
"4096": 4096,
|
||||
}
|
||||
|
||||
for k, v := range cases {
|
||||
|
@@ -531,12 +531,11 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
||||
|
||||
var s uint64
|
||||
for _, t := range ts {
|
||||
t.Offset = s
|
||||
t.Offset = s + uint64(ggufPadding(int64(s), int64(alignment)))
|
||||
if err := ggufWriteTensorInfo(ws, t); err != nil {
|
||||
return err
|
||||
}
|
||||
s += t.Size()
|
||||
s += uint64(ggufPadding(int64(s), int64(alignment)))
|
||||
}
|
||||
|
||||
for _, t := range ts {
|
||||
|
@@ -1,63 +0,0 @@
|
||||
package ggml
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"slices"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
func TestWriteGGUF(t *testing.T) {
|
||||
w, err := os.CreateTemp(t.TempDir(), "*.bin")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer w.Close()
|
||||
|
||||
if err := WriteGGUF(w, KV{
|
||||
"general.alignment": uint32(16),
|
||||
}, []Tensor{
|
||||
{Name: "test.0", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
|
||||
{Name: "test.1", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
|
||||
{Name: "test.2", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
|
||||
{Name: "test.3", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
|
||||
{Name: "test.4", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
|
||||
{Name: "test.5", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
|
||||
}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
r, err := os.Open(w.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
ff, _, err := Decode(r, 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(ff.KV(), KV{
|
||||
"general.alignment": uint32(16),
|
||||
"general.parameter_count": uint64(36),
|
||||
}); diff != "" {
|
||||
t.Errorf("Mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(ff.Tensors(), Tensors{
|
||||
Offset: 336,
|
||||
items: []*Tensor{
|
||||
{Name: "test.0", Offset: 0, Shape: []uint64{2, 3}},
|
||||
{Name: "test.1", Offset: 32, Shape: []uint64{2, 3}},
|
||||
{Name: "test.2", Offset: 64, Shape: []uint64{2, 3}},
|
||||
{Name: "test.3", Offset: 96, Shape: []uint64{2, 3}},
|
||||
{Name: "test.4", Offset: 128, Shape: []uint64{2, 3}},
|
||||
{Name: "test.5", Offset: 160, Shape: []uint64{2, 3}},
|
||||
},
|
||||
}, cmp.AllowUnexported(Tensors{})); diff != "" {
|
||||
t.Errorf("Mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
12
go.mod
12
go.mod
@@ -11,7 +11,7 @@ require (
|
||||
github.com/spf13/cobra v1.7.0
|
||||
github.com/stretchr/testify v1.9.0
|
||||
github.com/x448/float16 v0.8.4
|
||||
golang.org/x/sync v0.12.0
|
||||
golang.org/x/sync v0.11.0
|
||||
)
|
||||
|
||||
require (
|
||||
@@ -70,12 +70,12 @@ require (
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/ugorji/go/codec v1.2.12 // indirect
|
||||
golang.org/x/arch v0.8.0 // indirect
|
||||
golang.org/x/crypto v0.36.0
|
||||
golang.org/x/crypto v0.33.0
|
||||
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa
|
||||
golang.org/x/net v0.38.0 // indirect
|
||||
golang.org/x/sys v0.31.0
|
||||
golang.org/x/term v0.30.0
|
||||
golang.org/x/text v0.23.0
|
||||
golang.org/x/net v0.35.0 // indirect
|
||||
golang.org/x/sys v0.30.0
|
||||
golang.org/x/term v0.29.0
|
||||
golang.org/x/text v0.22.0
|
||||
google.golang.org/protobuf v1.34.1
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
|
24
go.sum
24
go.sum
@@ -214,8 +214,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
|
||||
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
|
||||
golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
|
||||
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
|
||||
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
|
||||
golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
@@ -257,8 +257,8 @@ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81R
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
|
||||
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
|
||||
golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
|
||||
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
|
||||
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
|
||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
@@ -268,8 +268,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
|
||||
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
|
||||
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
|
||||
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
@@ -285,17 +285,17 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc
|
||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
|
||||
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
|
||||
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
|
||||
golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
|
||||
golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU=
|
||||
golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
|
||||
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
|
||||
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
|
||||
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
|
||||
golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
|
@@ -34,15 +34,13 @@ func cosineSimilarity[V float32 | float64](v1, v2 []V) V {
|
||||
func TestAllMiniLMEmbeddings(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
req := api.EmbeddingRequest{
|
||||
Model: "all-minilm",
|
||||
Prompt: "why is the sky blue?",
|
||||
}
|
||||
|
||||
res, err := embeddingTestHelper(ctx, client, t, req)
|
||||
res, err := embeddingTestHelper(ctx, t, req)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("error: %v", err)
|
||||
@@ -64,15 +62,13 @@ func TestAllMiniLMEmbeddings(t *testing.T) {
|
||||
func TestAllMiniLMEmbed(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
req := api.EmbedRequest{
|
||||
Model: "all-minilm",
|
||||
Input: "why is the sky blue?",
|
||||
}
|
||||
|
||||
res, err := embedTestHelper(ctx, client, t, req)
|
||||
res, err := embedTestHelper(ctx, t, req)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("error: %v", err)
|
||||
@@ -102,15 +98,13 @@ func TestAllMiniLMEmbed(t *testing.T) {
|
||||
func TestAllMiniLMBatchEmbed(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
req := api.EmbedRequest{
|
||||
Model: "all-minilm",
|
||||
Input: []string{"why is the sky blue?", "why is the grass green?"},
|
||||
}
|
||||
|
||||
res, err := embedTestHelper(ctx, client, t, req)
|
||||
res, err := embedTestHelper(ctx, t, req)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("error: %v", err)
|
||||
@@ -150,8 +144,6 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
|
||||
func TestAllMiniLMEmbedTruncate(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
truncTrue, truncFalse := true, false
|
||||
|
||||
@@ -190,7 +182,7 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
|
||||
res := make(map[string]*api.EmbedResponse)
|
||||
|
||||
for _, req := range reqs {
|
||||
response, err := embedTestHelper(ctx, client, t, req.Request)
|
||||
response, err := embedTestHelper(ctx, t, req.Request)
|
||||
if err != nil {
|
||||
t.Fatalf("error: %v", err)
|
||||
}
|
||||
@@ -206,7 +198,7 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
|
||||
}
|
||||
|
||||
// check that truncate set to false returns an error if context length is exceeded
|
||||
_, err := embedTestHelper(ctx, client, t, api.EmbedRequest{
|
||||
_, err := embedTestHelper(ctx, t, api.EmbedRequest{
|
||||
Model: "all-minilm",
|
||||
Input: "why is the sky blue?",
|
||||
Truncate: &truncFalse,
|
||||
@@ -218,7 +210,9 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func embeddingTestHelper(ctx context.Context, client *api.Client, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
|
||||
func embeddingTestHelper(ctx context.Context, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatalf("failed to pull model %s: %v", req.Model, err)
|
||||
}
|
||||
@@ -232,7 +226,9 @@ func embeddingTestHelper(ctx context.Context, client *api.Client, t *testing.T,
|
||||
return response, nil
|
||||
}
|
||||
|
||||
func embedTestHelper(ctx context.Context, client *api.Client, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
|
||||
func embedTestHelper(ctx context.Context, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatalf("failed to pull model %s: %v", req.Model, err)
|
||||
}
|
||||
|
@@ -329,13 +329,11 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||
libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
|
||||
}
|
||||
|
||||
ggmlPaths := []string{discover.LibOllamaPath}
|
||||
if len(compatible) > 0 {
|
||||
c := compatible[0]
|
||||
if libpath, ok := libs[c]; ok {
|
||||
slog.Debug("adding gpu library", "path", libpath)
|
||||
libraryPaths = append(libraryPaths, libpath)
|
||||
ggmlPaths = append(ggmlPaths, libpath)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -371,8 +369,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||
s.cmd.Stderr = s.status
|
||||
s.cmd.SysProcAttr = LlamaServerSysProcAttr
|
||||
|
||||
s.cmd.Env = append(s.cmd.Env, "OLLAMA_LIBRARY_PATH="+strings.Join(ggmlPaths, string(filepath.ListSeparator)))
|
||||
|
||||
envWorkarounds := [][2]string{}
|
||||
for _, gpu := range gpus {
|
||||
envWorkarounds = append(envWorkarounds, gpu.EnvWorkarounds...)
|
||||
@@ -410,8 +406,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||
if envconfig.Debug() {
|
||||
filteredEnv := []string{}
|
||||
for _, ev := range s.cmd.Env {
|
||||
if strings.HasPrefix(ev, "OLLAMA_") ||
|
||||
strings.HasPrefix(ev, "CUDA_") ||
|
||||
if strings.HasPrefix(ev, "CUDA_") ||
|
||||
strings.HasPrefix(ev, "ROCR_") ||
|
||||
strings.HasPrefix(ev, "ROCM_") ||
|
||||
strings.HasPrefix(ev, "HIP_") ||
|
||||
|
@@ -57,20 +57,26 @@ var OnceLoad = sync.OnceFunc(func() {
|
||||
exe = "."
|
||||
}
|
||||
|
||||
var value string
|
||||
// PATH, LD_LIBRARY_PATH, and DYLD_LIBRARY_PATH are often
|
||||
// set by the parent process, however, use a default value
|
||||
// if the environment variable is not set.
|
||||
var name, value string
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
// On macOS, DYLD_LIBRARY_PATH is often not set, so
|
||||
// we use the directory of the executable as the default.
|
||||
name = "DYLD_LIBRARY_PATH"
|
||||
value = filepath.Dir(exe)
|
||||
case "windows":
|
||||
name = "PATH"
|
||||
value = filepath.Join(filepath.Dir(exe), "lib", "ollama")
|
||||
default:
|
||||
name = "LD_LIBRARY_PATH"
|
||||
value = filepath.Join(filepath.Dir(exe), "..", "lib", "ollama")
|
||||
}
|
||||
|
||||
// Avoid potentially loading incompatible GGML libraries
|
||||
paths, ok := os.LookupEnv("OLLAMA_LIBRARY_PATH")
|
||||
paths, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
slog.Debug("OLLAMA_LIBRARY_PATH not set, falling back to default", "search", value)
|
||||
paths = value
|
||||
}
|
||||
|
||||
|
@@ -723,9 +723,7 @@ func (m *multiLPath) String() string {
|
||||
return strings.Join(*m, ", ")
|
||||
}
|
||||
|
||||
// TODO(jessegross): This is causing tensor allocation failures with large batches when not offloaded
|
||||
// to the GPU
|
||||
/*func (s *Server) reserveWorstCaseGraph() error {
|
||||
func (s *Server) reserveWorstCaseGraph() error {
|
||||
ctx := s.model.Backend().NewContext()
|
||||
defer ctx.Close()
|
||||
|
||||
@@ -768,7 +766,7 @@ func (m *multiLPath) String() string {
|
||||
}
|
||||
|
||||
return nil
|
||||
}*/
|
||||
}
|
||||
|
||||
func (s *Server) loadModel(
|
||||
ctx context.Context,
|
||||
@@ -805,10 +803,10 @@ func (s *Server) loadModel(
|
||||
s.seqs = make([]*Sequence, s.parallel)
|
||||
s.seqsSem = semaphore.NewWeighted(int64(s.parallel))
|
||||
|
||||
/*err = s.reserveWorstCaseGraph()
|
||||
err = s.reserveWorstCaseGraph()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}*/
|
||||
}
|
||||
|
||||
s.status = llm.ServerStatusReady
|
||||
s.ready.Done()
|
||||
|
226
server/python_tools.go
Normal file
226
server/python_tools.go
Normal file
@@ -0,0 +1,226 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
var (
|
||||
pythonFuncRegex = regexp.MustCompile(`(\w+)\((.*?)\)`)
|
||||
braces = map[rune]rune{
|
||||
'[': ']',
|
||||
'{': '}',
|
||||
'(': ')',
|
||||
'"': '"',
|
||||
'\'': '\'',
|
||||
}
|
||||
)
|
||||
|
||||
// parsePythonValue converts a Python value string to its appropriate Go type
|
||||
func parsePythonValue(value string) (any, error) {
|
||||
value = strings.TrimSpace(value)
|
||||
|
||||
// string
|
||||
if (strings.HasPrefix(value, "\"") && strings.HasSuffix(value, "\"")) ||
|
||||
(strings.HasPrefix(value, "'") && strings.HasSuffix(value, "'")) {
|
||||
// Remove quotes
|
||||
result := value[1 : len(value)-1]
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// bool
|
||||
switch strings.ToLower(value) {
|
||||
case "true":
|
||||
return true, nil
|
||||
case "false":
|
||||
return false, nil
|
||||
case "none":
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// int
|
||||
if i, err := strconv.Atoi(value); err == nil {
|
||||
return i, nil
|
||||
}
|
||||
|
||||
// float
|
||||
if f, err := strconv.ParseFloat(value, 64); err == nil {
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// list
|
||||
if strings.HasPrefix(value, "[") && strings.HasSuffix(value, "]") {
|
||||
listStr := value[1 : len(value)-1]
|
||||
var list []any
|
||||
stack := []rune{}
|
||||
start := 0
|
||||
|
||||
for i, char := range listStr {
|
||||
if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
|
||||
stack = stack[:len(stack)-1]
|
||||
} else if _, ok := braces[char]; ok {
|
||||
stack = append(stack, char)
|
||||
}
|
||||
|
||||
if len(stack) == 0 && (char == ',' || i == len(listStr)-1) {
|
||||
end := i
|
||||
if i == len(listStr)-1 {
|
||||
end = i + 1
|
||||
}
|
||||
item := strings.TrimSpace(listStr[start:end])
|
||||
if val, err := parsePythonValue(item); err == nil {
|
||||
list = append(list, val)
|
||||
} else {
|
||||
return nil, fmt.Errorf("invalid list item: %s", item)
|
||||
}
|
||||
start = i + 1
|
||||
}
|
||||
}
|
||||
return list, nil
|
||||
}
|
||||
|
||||
// dictionary
|
||||
if strings.HasPrefix(value, "{") && strings.HasSuffix(value, "}") && strings.Contains(value, ":") {
|
||||
dictStr := value[1 : len(value)-1]
|
||||
dict := make(map[any]any)
|
||||
stack := []rune{}
|
||||
start := 0
|
||||
for i, char := range dictStr {
|
||||
if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
|
||||
stack = stack[:len(stack)-1]
|
||||
} else if _, ok := braces[char]; ok {
|
||||
stack = append(stack, char)
|
||||
}
|
||||
if len(stack) == 0 && (char == ',' || i == len(dictStr)-1) {
|
||||
end := i
|
||||
if i == len(dictStr)-1 {
|
||||
end = i + 1
|
||||
}
|
||||
item := strings.TrimSpace(dictStr[start:end])
|
||||
kv := strings.SplitN(item, ":", 2)
|
||||
if len(kv) != 2 {
|
||||
return nil, fmt.Errorf("invalid dictionary key-value pair: %s", item)
|
||||
}
|
||||
|
||||
key, err := parsePythonValue(strings.TrimSpace(kv[0]))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid dictionary key: %s", kv[0])
|
||||
}
|
||||
|
||||
val, err := parsePythonValue(strings.TrimSpace(kv[1]))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid dictionary value: %s", kv[1])
|
||||
}
|
||||
|
||||
dict[key] = val
|
||||
start = i + 1
|
||||
}
|
||||
}
|
||||
return dict, nil
|
||||
}
|
||||
|
||||
// sets (stored as lists)
|
||||
if strings.HasPrefix(value, "{") && strings.HasSuffix(value, "}") {
|
||||
setStr := value[1 : len(value)-1]
|
||||
var list []any
|
||||
stack := []rune{}
|
||||
start := 0
|
||||
for i, char := range setStr {
|
||||
if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
|
||||
stack = stack[:len(stack)-1]
|
||||
} else if _, ok := braces[char]; ok {
|
||||
stack = append(stack, char)
|
||||
}
|
||||
if len(stack) == 0 && (char == ',' || i == len(setStr)-1) {
|
||||
end := i
|
||||
if i == len(setStr)-1 {
|
||||
end = i + 1
|
||||
}
|
||||
item := strings.TrimSpace(setStr[start:end])
|
||||
if val, err := parsePythonValue(item); err == nil {
|
||||
list = append(list, val)
|
||||
} else {
|
||||
return nil, fmt.Errorf("invalid set item: %s", item)
|
||||
}
|
||||
start = i + 1
|
||||
}
|
||||
}
|
||||
return list, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("invalid Python value: %s", value)
|
||||
}
|
||||
|
||||
// parsePythonToolCall parses Python function calls from a string
|
||||
// it supports keyword arguments, as well as multiple functions in a single string
|
||||
func parsePythonToolCall(s string) ([]api.ToolCall, error) {
|
||||
matches := pythonFuncRegex.FindAllStringSubmatchIndex(s, -1)
|
||||
if len(matches) == 0 {
|
||||
return nil, fmt.Errorf("no Python function calls found")
|
||||
}
|
||||
|
||||
var toolCalls []api.ToolCall
|
||||
for _, match := range matches {
|
||||
name := s[match[2]:match[3]]
|
||||
args := s[match[4]:match[5]]
|
||||
var arguments api.ToolCallFunctionArguments
|
||||
if len(args) == 0 {
|
||||
toolCalls = append(toolCalls, api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: name,
|
||||
},
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
start := 0
|
||||
stack := []rune{}
|
||||
for i, char := range args {
|
||||
if len(stack) != 0 && char == braces[stack[len(stack)-1]] {
|
||||
stack = stack[:len(stack)-1]
|
||||
} else if _, ok := braces[char]; ok {
|
||||
stack = append(stack, char)
|
||||
}
|
||||
if len(stack) == 0 && (char == ',' || i == len(args)-1) {
|
||||
end := i
|
||||
if i == len(args)-1 {
|
||||
end = i + 1
|
||||
}
|
||||
kv := strings.SplitN(args[start:end], "=", 2)
|
||||
if len(kv) == 2 {
|
||||
key := strings.TrimSpace(kv[0])
|
||||
valueStr := strings.TrimSpace(kv[1])
|
||||
|
||||
// Parse the value into appropriate type
|
||||
value, err := parsePythonValue(valueStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse value for key %q: %v", key, err)
|
||||
}
|
||||
|
||||
arguments[key] = value
|
||||
} else {
|
||||
return nil, fmt.Errorf("invalid argument format: %q", args[start:end])
|
||||
}
|
||||
start = i + 1
|
||||
}
|
||||
}
|
||||
|
||||
if len(arguments) > 0 {
|
||||
toolCalls = append(toolCalls, api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: name,
|
||||
Arguments: arguments,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if len(toolCalls) > 0 {
|
||||
return toolCalls, nil
|
||||
}
|
||||
return nil, fmt.Errorf("failed to parse any valid tool calls")
|
||||
}
|
269
server/python_tools_test.go
Normal file
269
server/python_tools_test.go
Normal file
@@ -0,0 +1,269 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestParsePythonFunctionCall(t *testing.T) {
|
||||
t1 := api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get_current_weather",
|
||||
Arguments: api.ToolCallFunctionArguments{
|
||||
"location": "San Francisco, CA",
|
||||
"format": "fahrenheit",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
t2 := api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get_forecast",
|
||||
Arguments: api.ToolCallFunctionArguments{
|
||||
"days": 5,
|
||||
"location": "Seattle",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
t3 := api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get_current_weather",
|
||||
Arguments: api.ToolCallFunctionArguments{
|
||||
"list": []any{1, 2, 3},
|
||||
"int": -1,
|
||||
"float": 1.23,
|
||||
"string": "hello",
|
||||
},
|
||||
},
|
||||
}
|
||||
t4 := api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get_current_weather",
|
||||
},
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
input string
|
||||
want []api.ToolCall
|
||||
err bool
|
||||
}{
|
||||
{
|
||||
name: "malformed function call - missing closing paren",
|
||||
input: "get_current_weather(location=\"San Francisco\"",
|
||||
err: true,
|
||||
},
|
||||
{
|
||||
name: "empty function call",
|
||||
input: "get_current_weather()",
|
||||
want: []api.ToolCall{t4},
|
||||
err: false,
|
||||
},
|
||||
{
|
||||
name: "single valid function call",
|
||||
input: "get_current_weather(location=\"San Francisco, CA\", format=\"fahrenheit\")",
|
||||
want: []api.ToolCall{t1},
|
||||
},
|
||||
{
|
||||
name: "multiple valid function calls",
|
||||
input: "get_current_weather(location=\"San Francisco, CA\", format=\"fahrenheit\") get_forecast(days=5, location=\"Seattle\")",
|
||||
want: []api.ToolCall{t1, t2},
|
||||
},
|
||||
{
|
||||
name: "multiple valid function calls with list",
|
||||
input: "get_current_weather(list=[1,2,3], int=-1, float=1.23, string=\"hello\")",
|
||||
want: []api.ToolCall{t3},
|
||||
},
|
||||
{
|
||||
name: "positional arguments not supported",
|
||||
input: "get_current_weather(1, 2, 3)",
|
||||
err: true,
|
||||
},
|
||||
{
|
||||
name: "invalid argument format without equals",
|
||||
input: "get_current_weather(\"San Francisco\")",
|
||||
err: true,
|
||||
},
|
||||
{
|
||||
name: "nested lists",
|
||||
input: "get_current_weather(data=[[1,2],[3,4]])",
|
||||
want: []api.ToolCall{{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get_current_weather",
|
||||
Arguments: api.ToolCallFunctionArguments{
|
||||
"data": []any{[]any{1, 2}, []any{3, 4}},
|
||||
},
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "boolean and none values",
|
||||
input: "get_current_weather(active=true, enabled=false, value=None)",
|
||||
want: []api.ToolCall{{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get_current_weather",
|
||||
Arguments: api.ToolCallFunctionArguments{
|
||||
"active": true,
|
||||
"enabled": false,
|
||||
"value": nil,
|
||||
},
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "single vs double quotes",
|
||||
input: "get_current_weather(str1='single', str2=\"double\")",
|
||||
want: []api.ToolCall{{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get_current_weather",
|
||||
Arguments: api.ToolCallFunctionArguments{
|
||||
"str1": "single",
|
||||
"str2": "double",
|
||||
},
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "whitespace handling",
|
||||
input: "get_current_weather( location = \"San Francisco\" , temp = 72 )",
|
||||
want: []api.ToolCall{{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get_current_weather",
|
||||
Arguments: api.ToolCallFunctionArguments{
|
||||
"location": "San Francisco",
|
||||
"temp": 72,
|
||||
},
|
||||
},
|
||||
}},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := parsePythonToolCall(tt.input)
|
||||
if (err != nil) != tt.err {
|
||||
t.Fatalf("expected error: %v, got error: %v", tt.err, err)
|
||||
}
|
||||
if tt.err {
|
||||
return
|
||||
}
|
||||
if diff := cmp.Diff(got, tt.want); diff != "" {
|
||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePythonValue(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
input string
|
||||
want any
|
||||
err bool
|
||||
}{
|
||||
{
|
||||
name: "string with double quotes",
|
||||
input: "\"hello\"",
|
||||
want: "hello",
|
||||
},
|
||||
{
|
||||
name: "string with single quotes",
|
||||
input: "'world'",
|
||||
want: "world",
|
||||
},
|
||||
{
|
||||
name: "integer",
|
||||
input: "42",
|
||||
want: 42,
|
||||
},
|
||||
{
|
||||
name: "float",
|
||||
input: "3.14",
|
||||
want: 3.14,
|
||||
},
|
||||
{
|
||||
name: "boolean true",
|
||||
input: "True",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "boolean false",
|
||||
input: "False",
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "none/null",
|
||||
input: "None",
|
||||
want: nil,
|
||||
},
|
||||
{
|
||||
name: "simple list",
|
||||
input: "[1, 2, 3]",
|
||||
want: []any{1, 2, 3},
|
||||
},
|
||||
{
|
||||
name: "nested list",
|
||||
input: "[1, [2, 3], 4]",
|
||||
want: []any{1, []any{2, 3}, 4},
|
||||
},
|
||||
{
|
||||
name: "mixed type list",
|
||||
input: "[1, \"two\", 3.0, true]",
|
||||
want: []any{1, "two", 3.0, true},
|
||||
},
|
||||
{
|
||||
name: "invalid list",
|
||||
input: "[1, 2,",
|
||||
want: nil,
|
||||
err: true,
|
||||
},
|
||||
{
|
||||
name: "dictionaries",
|
||||
input: "{'a': 1, 'b': 2}",
|
||||
want: map[any]any{"a": 1, "b": 2},
|
||||
err: false,
|
||||
},
|
||||
{
|
||||
name: "int dictionary",
|
||||
input: "{1: 2}",
|
||||
want: map[any]any{1: 2},
|
||||
err: false,
|
||||
},
|
||||
{
|
||||
name: "mixed type dictionary",
|
||||
input: "{'a': 1, 'b': 2.0, 'c': True}",
|
||||
want: map[any]any{"a": 1, "b": 2.0, "c": true},
|
||||
err: false,
|
||||
},
|
||||
{
|
||||
name: "invalid dictionary - missing closing brace",
|
||||
input: "{'a': 1, 'b': 2",
|
||||
want: nil,
|
||||
err: true,
|
||||
},
|
||||
{
|
||||
name: "sets",
|
||||
input: "{1, 2, 3}",
|
||||
want: []any{1, 2, 3},
|
||||
err: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := parsePythonValue(tt.input)
|
||||
if (err != nil) != tt.err {
|
||||
t.Fatalf("expected error: %v, got error: %v", tt.err, err)
|
||||
}
|
||||
if tt.err {
|
||||
return
|
||||
}
|
||||
if diff := cmp.Diff(got, tt.want); diff != "" {
|
||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
@@ -18,7 +18,6 @@ import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
"syscall"
|
||||
@@ -1513,7 +1512,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
||||
if req.Messages[0].Role != "system" && m.System != "" {
|
||||
msgs = append([]api.Message{{Role: "system", Content: m.System}}, msgs...)
|
||||
}
|
||||
msgs = filterThinkTags(msgs, m)
|
||||
|
||||
prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, msgs, req.Tools)
|
||||
if err != nil {
|
||||
@@ -1642,23 +1640,3 @@ func handleScheduleError(c *gin.Context, name string, err error) {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
}
|
||||
}
|
||||
|
||||
var thinkTagRegexp = regexp.MustCompile(`<think>(?s).*?</think>(\n)*`)
|
||||
|
||||
func filterThinkTags(msgs []api.Message, m *Model) []api.Message {
|
||||
if m.Config.ModelFamily == "qwen3" || model.ParseName(m.Name).Model == "deepseek-r1" {
|
||||
finalUserIndex := -1
|
||||
for i, msg := range msgs {
|
||||
if msg.Role == "user" {
|
||||
finalUserIndex = i
|
||||
}
|
||||
}
|
||||
|
||||
for i, msg := range msgs {
|
||||
if msg.Role == "assistant" && i < finalUserIndex {
|
||||
msgs[i].Content = thinkTagRegexp.ReplaceAllString(msg.Content, "")
|
||||
}
|
||||
}
|
||||
}
|
||||
return msgs
|
||||
}
|
||||
|
@@ -299,6 +299,9 @@ func TestGenerateChat(t *testing.T) {
|
||||
{Role: "user", Content: "Hello!"},
|
||||
},
|
||||
Stream: &stream,
|
||||
Options: map[string]any{
|
||||
"num_ctx": 1024,
|
||||
},
|
||||
})
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
@@ -321,6 +324,9 @@ func TestGenerateChat(t *testing.T) {
|
||||
{Role: "user", Content: "Hello!"},
|
||||
},
|
||||
Stream: &stream,
|
||||
Options: map[string]any{
|
||||
"num_ctx": 1024,
|
||||
},
|
||||
})
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
@@ -344,6 +350,9 @@ func TestGenerateChat(t *testing.T) {
|
||||
{Role: "user", Content: "Help me write tests."},
|
||||
},
|
||||
Stream: &stream,
|
||||
Options: map[string]any{
|
||||
"num_ctx": 1024,
|
||||
},
|
||||
})
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
|
@@ -15,7 +15,6 @@ import (
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
@@ -747,128 +746,3 @@ func TestNormalize(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilterThinkTags(t *testing.T) {
|
||||
type testCase struct {
|
||||
msgs []api.Message
|
||||
want []api.Message
|
||||
model *Model
|
||||
}
|
||||
testCases := []testCase{
|
||||
{
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Hello, world!"},
|
||||
{Role: "assistant", Content: "<think>Thinking... about the answer</think>abc"},
|
||||
{Role: "user", Content: "What is the answer?"},
|
||||
},
|
||||
want: []api.Message{
|
||||
{Role: "user", Content: "Hello, world!"},
|
||||
{Role: "assistant", Content: "abc"},
|
||||
{Role: "user", Content: "What is the answer?"},
|
||||
},
|
||||
model: &Model{
|
||||
Config: ConfigV2{
|
||||
ModelFamily: "qwen3",
|
||||
},
|
||||
},
|
||||
},
|
||||
// with newlines inside the think tag aned newlines after
|
||||
{
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Hello, world!"},
|
||||
{Role: "assistant", Content: "<think>Thinking... \n\nabout \nthe answer</think>\n\nabc\ndef"},
|
||||
{Role: "user", Content: "What is the answer?"},
|
||||
},
|
||||
want: []api.Message{
|
||||
{Role: "user", Content: "Hello, world!"},
|
||||
{Role: "assistant", Content: "abc\ndef"},
|
||||
{Role: "user", Content: "What is the answer?"},
|
||||
},
|
||||
model: &Model{
|
||||
Config: ConfigV2{
|
||||
ModelFamily: "qwen3",
|
||||
},
|
||||
},
|
||||
},
|
||||
// should leave thinking tags if it's after the last user message
|
||||
{
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Hello, world!"},
|
||||
{Role: "assistant", Content: "<think>Thinking...</think>after"},
|
||||
{Role: "user", Content: "What is the answer?"},
|
||||
{Role: "assistant", Content: "<think>thinking again</think>hjk"},
|
||||
{Role: "assistant", Content: "<think>thinking yet again</think>hjk"},
|
||||
},
|
||||
want: []api.Message{
|
||||
{Role: "user", Content: "Hello, world!"},
|
||||
{Role: "assistant", Content: "after"},
|
||||
{Role: "user", Content: "What is the answer?"},
|
||||
{Role: "assistant", Content: "<think>thinking again</think>hjk"},
|
||||
{Role: "assistant", Content: "<think>thinking yet again</think>hjk"},
|
||||
},
|
||||
model: &Model{
|
||||
Config: ConfigV2{
|
||||
ModelFamily: "qwen3",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
// shouldn't strip anything because the model family isn't one of the hardcoded ones
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Hello, world!"},
|
||||
{Role: "assistant", Content: "<think>Thinking... about the answer</think>abc"},
|
||||
{Role: "user", Content: "What is the answer?"},
|
||||
},
|
||||
want: []api.Message{
|
||||
{Role: "user", Content: "Hello, world!"},
|
||||
{Role: "assistant", Content: "<think>Thinking... about the answer</think>abc"},
|
||||
{Role: "user", Content: "What is the answer?"},
|
||||
},
|
||||
model: &Model{
|
||||
Config: ConfigV2{
|
||||
ModelFamily: "llama3",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
// deepseek-r1:-prefixed model
|
||||
msgs: []api.Message{
|
||||
{Role: "user", Content: "Hello, world!"},
|
||||
{Role: "assistant", Content: "<think>Thinking... about the answer</think>abc"},
|
||||
{Role: "user", Content: "What is the answer?"},
|
||||
},
|
||||
want: []api.Message{
|
||||
{Role: "user", Content: "Hello, world!"},
|
||||
{Role: "assistant", Content: "abc"},
|
||||
{Role: "user", Content: "What is the answer?"},
|
||||
},
|
||||
model: &Model{
|
||||
Name: "registry.ollama.ai/library/deepseek-r1:latest",
|
||||
ShortName: "deepseek-r1:7b",
|
||||
Config: ConfigV2{},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
filtered := filterThinkTags(tc.msgs, tc.model)
|
||||
|
||||
if !reflect.DeepEqual(filtered, tc.want) {
|
||||
t.Errorf("messages differ for case %d:", i)
|
||||
for i := range tc.want {
|
||||
if i >= len(filtered) {
|
||||
t.Errorf(" missing message %d: %+v", i, tc.want[i])
|
||||
continue
|
||||
}
|
||||
if !reflect.DeepEqual(filtered[i], tc.want[i]) {
|
||||
t.Errorf(" message %d:\n want: %+v\n got: %+v", i, tc.want[i], filtered[i])
|
||||
}
|
||||
}
|
||||
if len(filtered) > len(tc.want) {
|
||||
for i := len(tc.want); i < len(filtered); i++ {
|
||||
t.Errorf(" extra message %d: %+v", i, filtered[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -81,10 +81,6 @@ func InitScheduler(ctx context.Context) *Scheduler {
|
||||
|
||||
// context must be canceled to decrement ref count and release the runner
|
||||
func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration *api.Duration) (chan *runnerRef, chan error) {
|
||||
if opts.NumCtx < 4 {
|
||||
opts.NumCtx = 4
|
||||
}
|
||||
|
||||
req := &LlmRequest{
|
||||
ctx: c,
|
||||
model: model,
|
||||
@@ -114,6 +110,11 @@ func (s *Scheduler) Run(ctx context.Context) {
|
||||
}()
|
||||
}
|
||||
|
||||
const (
|
||||
defaultContextLength = 4096
|
||||
smallGpuContextLength = 2048
|
||||
)
|
||||
|
||||
func (s *Scheduler) processPending(ctx context.Context) {
|
||||
for {
|
||||
select {
|
||||
@@ -166,6 +167,17 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||
gpus = s.getGpuFn()
|
||||
}
|
||||
|
||||
if pending.origNumCtx == -1 {
|
||||
if len(gpus) == 1 && gpus[0].Library != "cpu" && gpus[0].TotalMemory <= 4096*1024*1024 {
|
||||
slog.Info("GPU is small, limiting default context window", "num_ctx", smallGpuContextLength)
|
||||
pending.opts.NumCtx = smallGpuContextLength
|
||||
pending.origNumCtx = smallGpuContextLength
|
||||
} else {
|
||||
pending.opts.NumCtx = defaultContextLength
|
||||
pending.origNumCtx = defaultContextLength
|
||||
}
|
||||
}
|
||||
|
||||
if envconfig.MaxRunners() <= 0 {
|
||||
// No user specified MaxRunners, so figure out what automatic setting to use
|
||||
// If all GPUs have reliable free memory reporting, defaultModelsPerGPU * the number of GPUs
|
||||
@@ -441,9 +453,10 @@ func (s *Scheduler) load(req *LlmRequest, f *ggml.GGML, gpus discover.GpuInfoLis
|
||||
estimatedVRAM: llama.EstimatedVRAM(),
|
||||
estimatedTotal: llama.EstimatedTotal(),
|
||||
loading: true,
|
||||
refCount: 1,
|
||||
}
|
||||
runner.numParallel = numParallel
|
||||
runner.refMu.Lock() // hold lock until running or aborted
|
||||
runner.refMu.Lock()
|
||||
|
||||
s.loadedMu.Lock()
|
||||
s.loaded[req.model.ModelPath] = runner
|
||||
@@ -454,13 +467,13 @@ func (s *Scheduler) load(req *LlmRequest, f *ggml.GGML, gpus discover.GpuInfoLis
|
||||
defer runner.refMu.Unlock()
|
||||
if err = llama.WaitUntilRunning(req.ctx); err != nil {
|
||||
slog.Error("error loading llama server", "error", err)
|
||||
runner.refCount--
|
||||
req.errCh <- err
|
||||
slog.Debug("triggering expiration for failed load", "model", runner.modelPath)
|
||||
s.expiredCh <- runner
|
||||
return
|
||||
}
|
||||
slog.Debug("finished setting up runner", "model", req.model.ModelPath)
|
||||
runner.refCount++
|
||||
runner.loading = false
|
||||
go func() {
|
||||
<-req.ctx.Done()
|
||||
@@ -478,12 +491,7 @@ func (s *Scheduler) updateFreeSpace(allGpus discover.GpuInfoList) {
|
||||
}
|
||||
predMap := map[predKey]uint64{} // Sum up the total predicted usage per GPU for all runners
|
||||
s.loadedMu.Lock()
|
||||
runners := make([]*runnerRef, 0, len(s.loaded))
|
||||
for _, r := range s.loaded {
|
||||
runners = append(runners, r)
|
||||
}
|
||||
s.loadedMu.Unlock()
|
||||
for _, r := range runners {
|
||||
r.refMu.Lock()
|
||||
if r.llama != nil {
|
||||
for _, gpu := range allGpus {
|
||||
@@ -494,6 +502,7 @@ func (s *Scheduler) updateFreeSpace(allGpus discover.GpuInfoList) {
|
||||
}
|
||||
r.refMu.Unlock()
|
||||
}
|
||||
s.loadedMu.Unlock()
|
||||
|
||||
// Now that we've summed up all the GPU usage predictions across all the loaded runners, update the gpu list
|
||||
for i := range allGpus {
|
||||
@@ -540,8 +549,10 @@ func (s *Scheduler) filterGPUsWithoutLoadingModels(allGpus discover.GpuInfoList)
|
||||
|
||||
// TODO consolidate sched_types.go
|
||||
type runnerRef struct {
|
||||
refMu sync.Mutex
|
||||
refMu sync.Mutex
|
||||
// refCond sync.Cond // Signaled on transition from 1 -> 0 refCount
|
||||
refCount uint // prevent unloading if > 0
|
||||
// unloading bool // set to true when we are trying to unload the runner
|
||||
|
||||
llama llm.LlamaServer
|
||||
loading bool // True only during initial load, then false forever
|
||||
@@ -812,8 +823,8 @@ func (s *Scheduler) unloadAllRunners() {
|
||||
|
||||
func (s *Scheduler) expireRunner(model *Model) {
|
||||
s.loadedMu.Lock()
|
||||
defer s.loadedMu.Unlock()
|
||||
runner, ok := s.loaded[model.ModelPath]
|
||||
s.loadedMu.Unlock()
|
||||
if ok {
|
||||
runner.refMu.Lock()
|
||||
runner.expiresAt = time.Now()
|
||||
|
@@ -148,6 +148,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
|
||||
successCh: make(chan *runnerRef, 1),
|
||||
errCh: make(chan error, 1),
|
||||
}
|
||||
b.req.opts.NumCtx = 4096
|
||||
b.srv = &mockLlm{estimatedVRAM: estimatedVRAM, estimatedVRAMByGPU: map[string]uint64{"": estimatedVRAM}}
|
||||
return b
|
||||
}
|
||||
|
Reference in New Issue
Block a user