Compare commits
	
		
			2 Commits
		
	
	
		
			v0.6.7-rc2
			...
			parth/pyth
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 23e8ac9428 | ||
|   | 611d3a17ed | 
							
								
								
									
										35
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										35
									
								
								README.md
									
									
									
									
									
								
							| @@ -285,7 +285,7 @@ See the [API documentation](./docs/api.md) for all endpoints. | ||||
| - [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt) | ||||
| - [HTML UI](https://github.com/rtcfirefly/ollama-ui) | ||||
| - [Saddle](https://github.com/jikkuatwork/saddle) | ||||
| - [TagSpaces](https://www.tagspaces.org) (A platform for file-based apps, [utilizing Ollama](https://docs.tagspaces.org/ai/) for the generation of tags and descriptions) | ||||
| - [TagSpaces](https://www.tagspaces.org) (A platform for file based apps, [utilizing Ollama](https://docs.tagspaces.org/ai/) for the generation of tags and descriptions) | ||||
| - [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama) | ||||
| - [Chatbot UI v2](https://github.com/mckaywrigley/chatbot-ui) | ||||
| - [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file) | ||||
| @@ -325,14 +325,14 @@ See the [API documentation](./docs/api.md) for all endpoints. | ||||
| - [RWKV-Runner](https://github.com/josStorer/RWKV-Runner) (RWKV offline LLM deployment tool, also usable as a client for ChatGPT and Ollama) | ||||
| - [Ollama Grid Search](https://github.com/dezoito/ollama-grid-search) (app to evaluate and compare models) | ||||
| - [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama) | ||||
| - [Casibase](https://casibase.org) (An open source AI knowledge base and dialogue system combining the latest RAG, SSO, ollama support, and multiple large language models.) | ||||
| - [Casibase](https://casibase.org) (An open source AI knowledge base and dialogue system combining the latest RAG, SSO, ollama support and multiple large language models.) | ||||
| - [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS) | ||||
| - [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama) | ||||
| - [Shinkai Desktop](https://github.com/dcSpark/shinkai-apps) (Two click install Local AI using Ollama + Files + RAG) | ||||
| - [AiLama](https://github.com/zeyoyt/ailama) (A Discord User App that allows you to interact with Ollama anywhere in Discord) | ||||
| - [AiLama](https://github.com/zeyoyt/ailama) (A Discord User App that allows you to interact with Ollama anywhere in discord ) | ||||
| - [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama) | ||||
| - [R2R](https://github.com/SciPhi-AI/R2R) (Open-source RAG engine) | ||||
| - [Ollama-Kis](https://github.com/elearningshow/ollama-kis) (A simple easy-to-use GUI with sample custom LLM for Drivers Education) | ||||
| - [Ollama-Kis](https://github.com/elearningshow/ollama-kis) (A simple easy to use GUI with sample custom LLM for Drivers Education) | ||||
| - [OpenGPA](https://opengpa.org) (Open-source offline-first Enterprise Agentic Application) | ||||
| - [Painting Droid](https://github.com/mateuszmigas/painting-droid) (Painting app with AI integrations) | ||||
| - [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS) | ||||
| @@ -341,16 +341,16 @@ See the [API documentation](./docs/api.md) for all endpoints. | ||||
| - [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows) | ||||
| - [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac) | ||||
| - [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend) | ||||
| - [PyGPT](https://github.com/szczyglis-dev/py-gpt) (AI desktop assistant for Linux, Windows, and Mac) | ||||
| - [Alpaca](https://github.com/Jeffser/Alpaca) (An Ollama client application for Linux and macOS made with GTK4 and Adwaita) | ||||
| - [PyGPT](https://github.com/szczyglis-dev/py-gpt) (AI desktop assistant for Linux, Windows and Mac) | ||||
| - [Alpaca](https://github.com/Jeffser/Alpaca) (An Ollama client application for linux and macos made with GTK4 and Adwaita) | ||||
| - [AutoGPT](https://github.com/Significant-Gravitas/AutoGPT/blob/master/docs/content/platform/ollama.md) (AutoGPT Ollama integration) | ||||
| - [Go-CREW](https://www.jonathanhecl.com/go-crew/) (Powerful Offline RAG in Golang) | ||||
| - [PartCAD](https://github.com/openvmp/partcad/) (CAD model generation with OpenSCAD and CadQuery) | ||||
| - [Ollama4j Web UI](https://github.com/ollama4j/ollama4j-web-ui) - Java-based Web UI for Ollama built with Vaadin, Spring Boot, and Ollama4j | ||||
| - [Ollama4j Web UI](https://github.com/ollama4j/ollama4j-web-ui) - Java-based Web UI for Ollama built with Vaadin, Spring Boot and Ollama4j | ||||
| - [PyOllaMx](https://github.com/kspviswa/pyOllaMx) - macOS application capable of chatting with both Ollama and Apple MLX models. | ||||
| - [Cline](https://github.com/cline/cline) - Formerly known as Claude Dev is a VSCode extension for multi-file/whole-repo coding | ||||
| - [Cherry Studio](https://github.com/kangfenmao/cherry-studio) (Desktop client with Ollama support) | ||||
| - [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy-focused LLM chat interface with optional encryption) | ||||
| - [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption) | ||||
| - [Archyve](https://github.com/nickthecook/archyve) (RAG-enabling document library) | ||||
| - [crewAI with Mesop](https://github.com/rapidarchitect/ollama-crew-mesop) (Mesop Web Interface to run crewAI with Ollama) | ||||
| - [Tkinter-based client](https://github.com/chyok/ollama-gui) (Python tkinter-based Client for Ollama) | ||||
| @@ -368,7 +368,7 @@ See the [API documentation](./docs/api.md) for all endpoints. | ||||
| - [DualMind](https://github.com/tcsenpai/dualmind) (Experimental app allowing two models to talk to each other in the terminal or in a web interface) | ||||
| - [ollamarama-matrix](https://github.com/h1ddenpr0cess20/ollamarama-matrix) (Ollama chatbot for the Matrix chat protocol) | ||||
| - [ollama-chat-app](https://github.com/anan1213095357/ollama-chat-app) (Flutter-based chat app) | ||||
| - [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard, and said in the meetings) | ||||
| - [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard and said in the meetings) | ||||
| - [Hexabot](https://github.com/hexastack/hexabot) (A conversational AI builder) | ||||
| - [Reddit Rate](https://github.com/rapidarchitect/reddit_analyzer) (Search and Rate Reddit topics with a weighted summation) | ||||
| - [OpenTalkGpt](https://github.com/adarshM84/OpenTalkGpt) (Chrome Extension to manage open-source models supported by Ollama, create custom models, and chat with models from a user-friendly UI) | ||||
| @@ -386,7 +386,7 @@ See the [API documentation](./docs/api.md) for all endpoints. | ||||
| - [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints) | ||||
| - [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI) | ||||
| - [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models) | ||||
| - [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivalent endpoint with Ollama support for running locally) | ||||
| - [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally) | ||||
| - [AntSK](https://github.com/AIDotNet/AntSK) (Out-of-the-box & Adaptable RAG Chatbot) | ||||
| - [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot) | ||||
| - [yla](https://github.com/danielekp/yla) (Web interface to freely interact with your customized models) | ||||
| @@ -399,7 +399,6 @@ See the [API documentation](./docs/api.md) for all endpoints. | ||||
| - [Ollamb](https://github.com/hengkysteen/ollamb) (Simple yet rich in features, cross-platform built with Flutter and designed for Ollama. Try the [web demo](https://hengkysteen.github.io/demo/ollamb/).) | ||||
| - [Writeopia](https://github.com/Writeopia/Writeopia) (Text editor with integration with Ollama) | ||||
| - [AppFlowy](https://github.com/AppFlowy-IO/AppFlowy) (AI collaborative workspace with Ollama, cross-platform and self-hostable) | ||||
| - [Lumina](https://github.com/cushydigit/lumina.git) (A lightweight, minimal React.js frontend for interacting with Ollama servers) | ||||
|  | ||||
| ### Cloud | ||||
|  | ||||
| @@ -441,7 +440,7 @@ See the [API documentation](./docs/api.md) for all endpoints. | ||||
| - [PowershAI](https://github.com/rrg92/powershai) PowerShell module that brings AI to terminal on Windows, including support for Ollama | ||||
| - [DeepShell](https://github.com/Abyss-c0re/deepshell) Your self-hosted AI assistant. Interactive Shell, Files and Folders analysis. | ||||
| - [orbiton](https://github.com/xyproto/orbiton) Configuration-free text editor and IDE with support for tab completion with Ollama. | ||||
| - [orca-cli](https://github.com/molbal/orca-cli) Ollama Registry CLI Application - Browse, pull, and download models from Ollama Registry in your terminal. | ||||
| - [orca-cli](https://github.com/molbal/orca-cli) Ollama Registry CLI Application - Browse, pull and download models from Ollama Registry in your terminal. | ||||
| - [GGUF-to-Ollama](https://github.com/jonathanhecl/gguf-to-ollama) - Importing GGUF to Ollama made easy (multiplatform) | ||||
|  | ||||
| ### Apple Vision Pro | ||||
| @@ -516,7 +515,7 @@ See the [API documentation](./docs/api.md) for all endpoints. | ||||
| - [Swollama for Swift](https://github.com/marcusziade/Swollama) with [DocC](https://marcusziade.github.io/Swollama/documentation/swollama/) | ||||
| - [GoLamify](https://github.com/prasad89/golamify) | ||||
| - [Ollama for Haskell](https://github.com/tusharad/ollama-haskell) | ||||
| - [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in a unified API) | ||||
| - [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in unified API) | ||||
| - [LlmTornado](https://github.com/lofcz/llmtornado) (C# library providing a unified interface for major FOSS & Commercial inference APIs) | ||||
| - [Ollama for Zig](https://github.com/dravenk/ollama-zig) | ||||
| - [Abso](https://github.com/lunary-ai/abso) (OpenAI-compatible TypeScript SDK for any LLM provider) | ||||
| @@ -525,11 +524,11 @@ See the [API documentation](./docs/api.md) for all endpoints. | ||||
|  | ||||
| ### Mobile | ||||
|  | ||||
| - [SwiftChat](https://github.com/aws-samples/swift-chat) (Lightning-fast Cross-platform AI chat app with native UI for Android, iOS, and iPad) | ||||
| - [SwiftChat](https://github.com/aws-samples/swift-chat) (Lightning-fast Cross-platform AI chat app with native UI for Android, iOS and iPad) | ||||
| - [Enchanted](https://github.com/AugustDev/enchanted) | ||||
| - [Maid](https://github.com/Mobile-Artificial-Intelligence/maid) | ||||
| - [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama) | ||||
| - [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy-focused LLM chat interface with optional encryption) | ||||
| - [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption) | ||||
| - [Ollama Android Chat](https://github.com/sunshine0523/OllamaServer) (No need for Termux, start the Ollama service with one click on an Android device) | ||||
| - [Reins](https://github.com/ibrahimcetin/reins) (Easily tweak parameters, customize system prompts per chat, and enhance your AI experiments with reasoning model support.) | ||||
|  | ||||
| @@ -553,7 +552,7 @@ See the [API documentation](./docs/api.md) for all endpoints. | ||||
| - [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt) | ||||
| - [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama) | ||||
| - [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama) | ||||
| - [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use Ollama as a copilot like GitHub Copilot) | ||||
| - [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot) | ||||
| - [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama) | ||||
| - [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face) | ||||
| - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension) | ||||
| @@ -563,8 +562,8 @@ See the [API documentation](./docs/api.md) for all endpoints. | ||||
| - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation) | ||||
| - [ChatGPTBox: All in one browser extension](https://github.com/josStorer/chatGPTBox) with [Integrating Tutorial](https://github.com/josStorer/chatGPTBox/issues/616#issuecomment-1975186467) | ||||
| - [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities. | ||||
| - [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depend on ollama server) | ||||
| - [Terraform AWS Ollama & Open WebUI](https://github.com/xuyangbocn/terraform-aws-self-host-llm) (A Terraform module to deploy on AWS a ready-to-use Ollama service, together with its front-end Open WebUI service.) | ||||
| - [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server) | ||||
| - [Terraform AWS Ollama & Open WebUI](https://github.com/xuyangbocn/terraform-aws-self-host-llm) (A Terraform module to deploy on AWS a ready-to-use Ollama service, together with its front end Open WebUI service.) | ||||
| - [node-red-contrib-ollama](https://github.com/jakubburkiewicz/node-red-contrib-ollama) | ||||
| - [Local AI Helper](https://github.com/ivostoykov/localAI) (Chrome and Firefox extensions that enable interactions with the active tab and customisable API endpoints. Includes secure storage for user prompts.) | ||||
| - [vnc-lm](https://github.com/jake83741/vnc-lm) (Discord bot for messaging with LLMs through Ollama and LiteLLM. Seamlessly move between local and flagship models.) | ||||
|   | ||||
| @@ -1407,6 +1407,7 @@ func NewCLI() *cobra.Command { | ||||
| 				envVars["OLLAMA_LLM_LIBRARY"], | ||||
| 				envVars["OLLAMA_GPU_OVERHEAD"], | ||||
| 				envVars["OLLAMA_LOAD_TIMEOUT"], | ||||
| 				envVars["OLLAMA_CONTEXT_LENGTH"], | ||||
| 			}) | ||||
| 		default: | ||||
| 			appendEnvDocs(cmd, envs) | ||||
|   | ||||
| @@ -20,7 +20,7 @@ Please refer to the [GPU docs](./gpu.md). | ||||
|  | ||||
| ## How can I specify the context window size? | ||||
|  | ||||
| By default, Ollama uses a context window size of 4096 tokens.  | ||||
| By default, Ollama uses a context window size of 4096 tokens, unless you have a single GPU with <= 4 GB of VRAM, in which case it will default to 2048 tokens.  | ||||
|  | ||||
| This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use:  | ||||
|  | ||||
| @@ -31,7 +31,7 @@ OLLAMA_CONTEXT_LENGTH=8192 ollama serve | ||||
| To change this when using `ollama run`, use `/set parameter`: | ||||
|  | ||||
| ```shell | ||||
| /set parameter num_ctx 4096 | ||||
| /set parameter num_ctx 8192 | ||||
| ``` | ||||
|  | ||||
| When using the API, specify the `num_ctx` parameter: | ||||
| @@ -41,7 +41,7 @@ curl http://localhost:11434/api/generate -d '{ | ||||
|   "model": "llama3.2", | ||||
|   "prompt": "Why is the sky blue?", | ||||
|   "options": { | ||||
|     "num_ctx": 4096 | ||||
|     "num_ctx": 8192 | ||||
|   } | ||||
| }' | ||||
| ``` | ||||
|   | ||||
| @@ -169,7 +169,7 @@ var ( | ||||
| 	// Enable the new Ollama engine | ||||
| 	NewEngine = Bool("OLLAMA_NEW_ENGINE") | ||||
| 	// ContextLength sets the default context length | ||||
| 	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096) | ||||
| 	ContextLength = Int64("OLLAMA_CONTEXT_LENGTH", -1) | ||||
| ) | ||||
|  | ||||
| func String(s string) func() string { | ||||
| @@ -227,6 +227,20 @@ func Uint64(key string, defaultValue uint64) func() uint64 { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func Int64(key string, defaultValue int64) func() int64 { | ||||
| 	return func() int64 { | ||||
| 		if s := Var(key); s != "" { | ||||
| 			if n, err := strconv.ParseInt(s, 10, 64); err != nil { | ||||
| 				slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue) | ||||
| 			} else { | ||||
| 				return n | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		return defaultValue | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Set aside VRAM per GPU | ||||
| var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0) | ||||
|  | ||||
| @@ -255,7 +269,7 @@ func AsMap() map[string]EnvVar { | ||||
| 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"}, | ||||
| 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"}, | ||||
| 		"OLLAMA_MULTIUSER_CACHE":   {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"}, | ||||
| 		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"}, | ||||
| 		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default 4096 or 2048 with low VRAM)"}, | ||||
| 		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"}, | ||||
|  | ||||
| 		// Informational | ||||
|   | ||||
| @@ -278,9 +278,9 @@ func TestVar(t *testing.T) { | ||||
| } | ||||
|  | ||||
| func TestContextLength(t *testing.T) { | ||||
| 	cases := map[string]uint{ | ||||
| 		"":     4096, | ||||
| 		"2048": 2048, | ||||
| 	cases := map[string]int64{ | ||||
| 		"":     -1, | ||||
| 		"4096": 4096, | ||||
| 	} | ||||
|  | ||||
| 	for k, v := range cases { | ||||
|   | ||||
							
								
								
									
										12
									
								
								go.mod
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								go.mod
									
									
									
									
									
								
							| @@ -11,7 +11,7 @@ require ( | ||||
| 	github.com/spf13/cobra v1.7.0 | ||||
| 	github.com/stretchr/testify v1.9.0 | ||||
| 	github.com/x448/float16 v0.8.4 | ||||
| 	golang.org/x/sync v0.12.0 | ||||
| 	golang.org/x/sync v0.11.0 | ||||
| ) | ||||
|  | ||||
| require ( | ||||
| @@ -70,12 +70,12 @@ require ( | ||||
| 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect | ||||
| 	github.com/ugorji/go/codec v1.2.12 // indirect | ||||
| 	golang.org/x/arch v0.8.0 // indirect | ||||
| 	golang.org/x/crypto v0.36.0 | ||||
| 	golang.org/x/crypto v0.33.0 | ||||
| 	golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa | ||||
| 	golang.org/x/net v0.38.0 // indirect | ||||
| 	golang.org/x/sys v0.31.0 | ||||
| 	golang.org/x/term v0.30.0 | ||||
| 	golang.org/x/text v0.23.0 | ||||
| 	golang.org/x/net v0.35.0 // indirect | ||||
| 	golang.org/x/sys v0.30.0 | ||||
| 	golang.org/x/term v0.29.0 | ||||
| 	golang.org/x/text v0.22.0 | ||||
| 	google.golang.org/protobuf v1.34.1 | ||||
| 	gopkg.in/yaml.v3 v3.0.1 // indirect | ||||
| ) | ||||
|   | ||||
							
								
								
									
										24
									
								
								go.sum
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								go.sum
									
									
									
									
									
								
							| @@ -214,8 +214,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk | ||||
| golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= | ||||
| golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= | ||||
| golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= | ||||
| golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34= | ||||
| golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc= | ||||
| golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus= | ||||
| golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M= | ||||
| golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= | ||||
| golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= | ||||
| golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= | ||||
| @@ -257,8 +257,8 @@ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81R | ||||
| golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= | ||||
| golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= | ||||
| golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= | ||||
| golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= | ||||
| golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= | ||||
| golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8= | ||||
| golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk= | ||||
| golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= | ||||
| golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= | ||||
| golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= | ||||
| @@ -268,8 +268,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ | ||||
| golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= | ||||
| golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= | ||||
| golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= | ||||
| golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= | ||||
| golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= | ||||
| golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= | ||||
| golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= | ||||
| golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | ||||
| golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | ||||
| golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | ||||
| @@ -285,17 +285,17 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc | ||||
| golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||||
| golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||||
| golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||||
| golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= | ||||
| golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= | ||||
| golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= | ||||
| golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= | ||||
| golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= | ||||
| golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= | ||||
| golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= | ||||
| golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= | ||||
| golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= | ||||
| golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= | ||||
| golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= | ||||
| golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= | ||||
| golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= | ||||
| golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= | ||||
| golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= | ||||
| golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= | ||||
| golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= | ||||
| golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= | ||||
| golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= | ||||
| golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= | ||||
|   | ||||
| @@ -34,15 +34,13 @@ func cosineSimilarity[V float32 | float64](v1, v2 []V) V { | ||||
| func TestAllMiniLMEmbeddings(t *testing.T) { | ||||
| 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) | ||||
| 	defer cancel() | ||||
| 	client, _, cleanup := InitServerConnection(ctx, t) | ||||
| 	defer cleanup() | ||||
|  | ||||
| 	req := api.EmbeddingRequest{ | ||||
| 		Model:  "all-minilm", | ||||
| 		Prompt: "why is the sky blue?", | ||||
| 	} | ||||
|  | ||||
| 	res, err := embeddingTestHelper(ctx, client, t, req) | ||||
| 	res, err := embeddingTestHelper(ctx, t, req) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("error: %v", err) | ||||
| @@ -64,15 +62,13 @@ func TestAllMiniLMEmbeddings(t *testing.T) { | ||||
| func TestAllMiniLMEmbed(t *testing.T) { | ||||
| 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) | ||||
| 	defer cancel() | ||||
| 	client, _, cleanup := InitServerConnection(ctx, t) | ||||
| 	defer cleanup() | ||||
|  | ||||
| 	req := api.EmbedRequest{ | ||||
| 		Model: "all-minilm", | ||||
| 		Input: "why is the sky blue?", | ||||
| 	} | ||||
|  | ||||
| 	res, err := embedTestHelper(ctx, client, t, req) | ||||
| 	res, err := embedTestHelper(ctx, t, req) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("error: %v", err) | ||||
| @@ -102,15 +98,13 @@ func TestAllMiniLMEmbed(t *testing.T) { | ||||
| func TestAllMiniLMBatchEmbed(t *testing.T) { | ||||
| 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) | ||||
| 	defer cancel() | ||||
| 	client, _, cleanup := InitServerConnection(ctx, t) | ||||
| 	defer cleanup() | ||||
|  | ||||
| 	req := api.EmbedRequest{ | ||||
| 		Model: "all-minilm", | ||||
| 		Input: []string{"why is the sky blue?", "why is the grass green?"}, | ||||
| 	} | ||||
|  | ||||
| 	res, err := embedTestHelper(ctx, client, t, req) | ||||
| 	res, err := embedTestHelper(ctx, t, req) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("error: %v", err) | ||||
| @@ -150,8 +144,6 @@ func TestAllMiniLMBatchEmbed(t *testing.T) { | ||||
| func TestAllMiniLMEmbedTruncate(t *testing.T) { | ||||
| 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) | ||||
| 	defer cancel() | ||||
| 	client, _, cleanup := InitServerConnection(ctx, t) | ||||
| 	defer cleanup() | ||||
|  | ||||
| 	truncTrue, truncFalse := true, false | ||||
|  | ||||
| @@ -190,7 +182,7 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) { | ||||
| 	res := make(map[string]*api.EmbedResponse) | ||||
|  | ||||
| 	for _, req := range reqs { | ||||
| 		response, err := embedTestHelper(ctx, client, t, req.Request) | ||||
| 		response, err := embedTestHelper(ctx, t, req.Request) | ||||
| 		if err != nil { | ||||
| 			t.Fatalf("error: %v", err) | ||||
| 		} | ||||
| @@ -206,7 +198,7 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) { | ||||
| 	} | ||||
|  | ||||
| 	// check that truncate set to false returns an error if context length is exceeded | ||||
| 	_, err := embedTestHelper(ctx, client, t, api.EmbedRequest{ | ||||
| 	_, err := embedTestHelper(ctx, t, api.EmbedRequest{ | ||||
| 		Model:    "all-minilm", | ||||
| 		Input:    "why is the sky blue?", | ||||
| 		Truncate: &truncFalse, | ||||
| @@ -218,7 +210,9 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func embeddingTestHelper(ctx context.Context, client *api.Client, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) { | ||||
| func embeddingTestHelper(ctx context.Context, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) { | ||||
| 	client, _, cleanup := InitServerConnection(ctx, t) | ||||
| 	defer cleanup() | ||||
| 	if err := PullIfMissing(ctx, client, req.Model); err != nil { | ||||
| 		t.Fatalf("failed to pull model %s: %v", req.Model, err) | ||||
| 	} | ||||
| @@ -232,7 +226,9 @@ func embeddingTestHelper(ctx context.Context, client *api.Client, t *testing.T, | ||||
| 	return response, nil | ||||
| } | ||||
|  | ||||
| func embedTestHelper(ctx context.Context, client *api.Client, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) { | ||||
| func embedTestHelper(ctx context.Context, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) { | ||||
| 	client, _, cleanup := InitServerConnection(ctx, t) | ||||
| 	defer cleanup() | ||||
| 	if err := PullIfMissing(ctx, client, req.Model); err != nil { | ||||
| 		t.Fatalf("failed to pull model %s: %v", req.Model, err) | ||||
| 	} | ||||
|   | ||||
| @@ -329,13 +329,11 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a | ||||
| 			libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...) | ||||
| 		} | ||||
|  | ||||
| 		ggmlPaths := []string{discover.LibOllamaPath} | ||||
| 		if len(compatible) > 0 { | ||||
| 			c := compatible[0] | ||||
| 			if libpath, ok := libs[c]; ok { | ||||
| 				slog.Debug("adding gpu library", "path", libpath) | ||||
| 				libraryPaths = append(libraryPaths, libpath) | ||||
| 				ggmlPaths = append(ggmlPaths, libpath) | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| @@ -371,8 +369,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a | ||||
| 		s.cmd.Stderr = s.status | ||||
| 		s.cmd.SysProcAttr = LlamaServerSysProcAttr | ||||
|  | ||||
| 		s.cmd.Env = append(s.cmd.Env, "OLLAMA_LIBRARY_PATH="+strings.Join(ggmlPaths, string(filepath.ListSeparator))) | ||||
|  | ||||
| 		envWorkarounds := [][2]string{} | ||||
| 		for _, gpu := range gpus { | ||||
| 			envWorkarounds = append(envWorkarounds, gpu.EnvWorkarounds...) | ||||
| @@ -410,8 +406,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a | ||||
| 		if envconfig.Debug() { | ||||
| 			filteredEnv := []string{} | ||||
| 			for _, ev := range s.cmd.Env { | ||||
| 				if strings.HasPrefix(ev, "OLLAMA_") || | ||||
| 					strings.HasPrefix(ev, "CUDA_") || | ||||
| 				if strings.HasPrefix(ev, "CUDA_") || | ||||
| 					strings.HasPrefix(ev, "ROCR_") || | ||||
| 					strings.HasPrefix(ev, "ROCM_") || | ||||
| 					strings.HasPrefix(ev, "HIP_") || | ||||
|   | ||||
| @@ -57,20 +57,26 @@ var OnceLoad = sync.OnceFunc(func() { | ||||
| 		exe = "." | ||||
| 	} | ||||
|  | ||||
| 	var value string | ||||
| 	// PATH, LD_LIBRARY_PATH, and DYLD_LIBRARY_PATH are often | ||||
| 	// set by the parent process, however, use a default value | ||||
| 	// if the environment variable is not set. | ||||
| 	var name, value string | ||||
| 	switch runtime.GOOS { | ||||
| 	case "darwin": | ||||
| 		// On macOS, DYLD_LIBRARY_PATH is often not set, so | ||||
| 		// we use the directory of the executable as the default. | ||||
| 		name = "DYLD_LIBRARY_PATH" | ||||
| 		value = filepath.Dir(exe) | ||||
| 	case "windows": | ||||
| 		name = "PATH" | ||||
| 		value = filepath.Join(filepath.Dir(exe), "lib", "ollama") | ||||
| 	default: | ||||
| 		name = "LD_LIBRARY_PATH" | ||||
| 		value = filepath.Join(filepath.Dir(exe), "..", "lib", "ollama") | ||||
| 	} | ||||
|  | ||||
| 	// Avoid potentially loading incompatible GGML libraries | ||||
| 	paths, ok := os.LookupEnv("OLLAMA_LIBRARY_PATH") | ||||
| 	paths, ok := os.LookupEnv(name) | ||||
| 	if !ok { | ||||
| 		slog.Debug("OLLAMA_LIBRARY_PATH not set, falling back to default", "search", value) | ||||
| 		paths = value | ||||
| 	} | ||||
|  | ||||
|   | ||||
| @@ -723,9 +723,7 @@ func (m *multiLPath) String() string { | ||||
| 	return strings.Join(*m, ", ") | ||||
| } | ||||
|  | ||||
| // TODO(jessegross): This is causing tensor allocation failures with large batches when not offloaded | ||||
| // to the GPU | ||||
| /*func (s *Server) reserveWorstCaseGraph() error { | ||||
| func (s *Server) reserveWorstCaseGraph() error { | ||||
| 	ctx := s.model.Backend().NewContext() | ||||
| 	defer ctx.Close() | ||||
|  | ||||
| @@ -768,7 +766,7 @@ func (m *multiLPath) String() string { | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| }*/ | ||||
| } | ||||
|  | ||||
| func (s *Server) loadModel( | ||||
| 	ctx context.Context, | ||||
| @@ -805,10 +803,10 @@ func (s *Server) loadModel( | ||||
| 	s.seqs = make([]*Sequence, s.parallel) | ||||
| 	s.seqsSem = semaphore.NewWeighted(int64(s.parallel)) | ||||
|  | ||||
| 	/*err = s.reserveWorstCaseGraph() | ||||
| 	err = s.reserveWorstCaseGraph() | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	}*/ | ||||
| 	} | ||||
|  | ||||
| 	s.status = llm.ServerStatusReady | ||||
| 	s.ready.Done() | ||||
|   | ||||
							
								
								
									
										226
									
								
								server/python_tools.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										226
									
								
								server/python_tools.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,226 @@ | ||||
| package server | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"regexp" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
|  | ||||
| 	"github.com/ollama/ollama/api" | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	pythonFuncRegex = regexp.MustCompile(`(\w+)\((.*?)\)`) | ||||
| 	braces          = map[rune]rune{ | ||||
| 		'[':  ']', | ||||
| 		'{':  '}', | ||||
| 		'(':  ')', | ||||
| 		'"':  '"', | ||||
| 		'\'': '\'', | ||||
| 	} | ||||
| ) | ||||
|  | ||||
| // parsePythonValue converts a Python value string to its appropriate Go type | ||||
| func parsePythonValue(value string) (any, error) { | ||||
| 	value = strings.TrimSpace(value) | ||||
|  | ||||
| 	// string | ||||
| 	if (strings.HasPrefix(value, "\"") && strings.HasSuffix(value, "\"")) || | ||||
| 		(strings.HasPrefix(value, "'") && strings.HasSuffix(value, "'")) { | ||||
| 		// Remove quotes | ||||
| 		result := value[1 : len(value)-1] | ||||
| 		return result, nil | ||||
| 	} | ||||
|  | ||||
| 	// bool | ||||
| 	switch strings.ToLower(value) { | ||||
| 	case "true": | ||||
| 		return true, nil | ||||
| 	case "false": | ||||
| 		return false, nil | ||||
| 	case "none": | ||||
| 		return nil, nil | ||||
| 	} | ||||
|  | ||||
| 	// int | ||||
| 	if i, err := strconv.Atoi(value); err == nil { | ||||
| 		return i, nil | ||||
| 	} | ||||
|  | ||||
| 	// float | ||||
| 	if f, err := strconv.ParseFloat(value, 64); err == nil { | ||||
| 		return f, nil | ||||
| 	} | ||||
|  | ||||
| 	// list | ||||
| 	if strings.HasPrefix(value, "[") && strings.HasSuffix(value, "]") { | ||||
| 		listStr := value[1 : len(value)-1] | ||||
| 		var list []any | ||||
| 		stack := []rune{} | ||||
| 		start := 0 | ||||
|  | ||||
| 		for i, char := range listStr { | ||||
| 			if len(stack) != 0 && char == braces[stack[len(stack)-1]] { | ||||
| 				stack = stack[:len(stack)-1] | ||||
| 			} else if _, ok := braces[char]; ok { | ||||
| 				stack = append(stack, char) | ||||
| 			} | ||||
|  | ||||
| 			if len(stack) == 0 && (char == ',' || i == len(listStr)-1) { | ||||
| 				end := i | ||||
| 				if i == len(listStr)-1 { | ||||
| 					end = i + 1 | ||||
| 				} | ||||
| 				item := strings.TrimSpace(listStr[start:end]) | ||||
| 				if val, err := parsePythonValue(item); err == nil { | ||||
| 					list = append(list, val) | ||||
| 				} else { | ||||
| 					return nil, fmt.Errorf("invalid list item: %s", item) | ||||
| 				} | ||||
| 				start = i + 1 | ||||
| 			} | ||||
| 		} | ||||
| 		return list, nil | ||||
| 	} | ||||
|  | ||||
| 	// dictionary | ||||
| 	if strings.HasPrefix(value, "{") && strings.HasSuffix(value, "}") && strings.Contains(value, ":") { | ||||
| 		dictStr := value[1 : len(value)-1] | ||||
| 		dict := make(map[any]any) | ||||
| 		stack := []rune{} | ||||
| 		start := 0 | ||||
| 		for i, char := range dictStr { | ||||
| 			if len(stack) != 0 && char == braces[stack[len(stack)-1]] { | ||||
| 				stack = stack[:len(stack)-1] | ||||
| 			} else if _, ok := braces[char]; ok { | ||||
| 				stack = append(stack, char) | ||||
| 			} | ||||
| 			if len(stack) == 0 && (char == ',' || i == len(dictStr)-1) { | ||||
| 				end := i | ||||
| 				if i == len(dictStr)-1 { | ||||
| 					end = i + 1 | ||||
| 				} | ||||
| 				item := strings.TrimSpace(dictStr[start:end]) | ||||
| 				kv := strings.SplitN(item, ":", 2) | ||||
| 				if len(kv) != 2 { | ||||
| 					return nil, fmt.Errorf("invalid dictionary key-value pair: %s", item) | ||||
| 				} | ||||
|  | ||||
| 				key, err := parsePythonValue(strings.TrimSpace(kv[0])) | ||||
| 				if err != nil { | ||||
| 					return nil, fmt.Errorf("invalid dictionary key: %s", kv[0]) | ||||
| 				} | ||||
|  | ||||
| 				val, err := parsePythonValue(strings.TrimSpace(kv[1])) | ||||
| 				if err != nil { | ||||
| 					return nil, fmt.Errorf("invalid dictionary value: %s", kv[1]) | ||||
| 				} | ||||
|  | ||||
| 				dict[key] = val | ||||
| 				start = i + 1 | ||||
| 			} | ||||
| 		} | ||||
| 		return dict, nil | ||||
| 	} | ||||
|  | ||||
| 	// sets (stored as lists) | ||||
| 	if strings.HasPrefix(value, "{") && strings.HasSuffix(value, "}") { | ||||
| 		setStr := value[1 : len(value)-1] | ||||
| 		var list []any | ||||
| 		stack := []rune{} | ||||
| 		start := 0 | ||||
| 		for i, char := range setStr { | ||||
| 			if len(stack) != 0 && char == braces[stack[len(stack)-1]] { | ||||
| 				stack = stack[:len(stack)-1] | ||||
| 			} else if _, ok := braces[char]; ok { | ||||
| 				stack = append(stack, char) | ||||
| 			} | ||||
| 			if len(stack) == 0 && (char == ',' || i == len(setStr)-1) { | ||||
| 				end := i | ||||
| 				if i == len(setStr)-1 { | ||||
| 					end = i + 1 | ||||
| 				} | ||||
| 				item := strings.TrimSpace(setStr[start:end]) | ||||
| 				if val, err := parsePythonValue(item); err == nil { | ||||
| 					list = append(list, val) | ||||
| 				} else { | ||||
| 					return nil, fmt.Errorf("invalid set item: %s", item) | ||||
| 				} | ||||
| 				start = i + 1 | ||||
| 			} | ||||
| 		} | ||||
| 		return list, nil | ||||
| 	} | ||||
|  | ||||
| 	return nil, fmt.Errorf("invalid Python value: %s", value) | ||||
| } | ||||
|  | ||||
| // parsePythonToolCall parses Python function calls from a string | ||||
| // it supports keyword arguments, as well as multiple functions in a single string | ||||
| func parsePythonToolCall(s string) ([]api.ToolCall, error) { | ||||
| 	matches := pythonFuncRegex.FindAllStringSubmatchIndex(s, -1) | ||||
| 	if len(matches) == 0 { | ||||
| 		return nil, fmt.Errorf("no Python function calls found") | ||||
| 	} | ||||
|  | ||||
| 	var toolCalls []api.ToolCall | ||||
| 	for _, match := range matches { | ||||
| 		name := s[match[2]:match[3]] | ||||
| 		args := s[match[4]:match[5]] | ||||
| 		var arguments api.ToolCallFunctionArguments | ||||
| 		if len(args) == 0 { | ||||
| 			toolCalls = append(toolCalls, api.ToolCall{ | ||||
| 				Function: api.ToolCallFunction{ | ||||
| 					Name: name, | ||||
| 				}, | ||||
| 			}) | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		start := 0 | ||||
| 		stack := []rune{} | ||||
| 		for i, char := range args { | ||||
| 			if len(stack) != 0 && char == braces[stack[len(stack)-1]] { | ||||
| 				stack = stack[:len(stack)-1] | ||||
| 			} else if _, ok := braces[char]; ok { | ||||
| 				stack = append(stack, char) | ||||
| 			} | ||||
| 			if len(stack) == 0 && (char == ',' || i == len(args)-1) { | ||||
| 				end := i | ||||
| 				if i == len(args)-1 { | ||||
| 					end = i + 1 | ||||
| 				} | ||||
| 				kv := strings.SplitN(args[start:end], "=", 2) | ||||
| 				if len(kv) == 2 { | ||||
| 					key := strings.TrimSpace(kv[0]) | ||||
| 					valueStr := strings.TrimSpace(kv[1]) | ||||
|  | ||||
| 					// Parse the value into appropriate type | ||||
| 					value, err := parsePythonValue(valueStr) | ||||
| 					if err != nil { | ||||
| 						return nil, fmt.Errorf("failed to parse value for key %q: %v", key, err) | ||||
| 					} | ||||
|  | ||||
| 					arguments[key] = value | ||||
| 				} else { | ||||
| 					return nil, fmt.Errorf("invalid argument format: %q", args[start:end]) | ||||
| 				} | ||||
| 				start = i + 1 | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		if len(arguments) > 0 { | ||||
| 			toolCalls = append(toolCalls, api.ToolCall{ | ||||
| 				Function: api.ToolCallFunction{ | ||||
| 					Name:      name, | ||||
| 					Arguments: arguments, | ||||
| 				}, | ||||
| 			}) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if len(toolCalls) > 0 { | ||||
| 		return toolCalls, nil | ||||
| 	} | ||||
| 	return nil, fmt.Errorf("failed to parse any valid tool calls") | ||||
| } | ||||
							
								
								
									
										269
									
								
								server/python_tools_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										269
									
								
								server/python_tools_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,269 @@ | ||||
| package server | ||||
|  | ||||
| import ( | ||||
| 	"testing" | ||||
|  | ||||
| 	"github.com/google/go-cmp/cmp" | ||||
| 	"github.com/ollama/ollama/api" | ||||
| ) | ||||
|  | ||||
| func TestParsePythonFunctionCall(t *testing.T) { | ||||
| 	t1 := api.ToolCall{ | ||||
| 		Function: api.ToolCallFunction{ | ||||
| 			Name: "get_current_weather", | ||||
| 			Arguments: api.ToolCallFunctionArguments{ | ||||
| 				"location": "San Francisco, CA", | ||||
| 				"format":   "fahrenheit", | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	t2 := api.ToolCall{ | ||||
| 		Function: api.ToolCallFunction{ | ||||
| 			Name: "get_forecast", | ||||
| 			Arguments: api.ToolCallFunctionArguments{ | ||||
| 				"days":     5, | ||||
| 				"location": "Seattle", | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	t3 := api.ToolCall{ | ||||
| 		Function: api.ToolCallFunction{ | ||||
| 			Name: "get_current_weather", | ||||
| 			Arguments: api.ToolCallFunctionArguments{ | ||||
| 				"list":   []any{1, 2, 3}, | ||||
| 				"int":    -1, | ||||
| 				"float":  1.23, | ||||
| 				"string": "hello", | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 	t4 := api.ToolCall{ | ||||
| 		Function: api.ToolCallFunction{ | ||||
| 			Name: "get_current_weather", | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	cases := []struct { | ||||
| 		name  string | ||||
| 		input string | ||||
| 		want  []api.ToolCall | ||||
| 		err   bool | ||||
| 	}{ | ||||
| 		{ | ||||
| 			name:  "malformed function call - missing closing paren", | ||||
| 			input: "get_current_weather(location=\"San Francisco\"", | ||||
| 			err:   true, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "empty function call", | ||||
| 			input: "get_current_weather()", | ||||
| 			want:  []api.ToolCall{t4}, | ||||
| 			err:   false, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "single valid function call", | ||||
| 			input: "get_current_weather(location=\"San Francisco, CA\", format=\"fahrenheit\")", | ||||
| 			want:  []api.ToolCall{t1}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "multiple valid function calls", | ||||
| 			input: "get_current_weather(location=\"San Francisco, CA\", format=\"fahrenheit\") get_forecast(days=5, location=\"Seattle\")", | ||||
| 			want:  []api.ToolCall{t1, t2}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "multiple valid function calls with list", | ||||
| 			input: "get_current_weather(list=[1,2,3], int=-1, float=1.23, string=\"hello\")", | ||||
| 			want:  []api.ToolCall{t3}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "positional arguments not supported", | ||||
| 			input: "get_current_weather(1, 2, 3)", | ||||
| 			err:   true, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "invalid argument format without equals", | ||||
| 			input: "get_current_weather(\"San Francisco\")", | ||||
| 			err:   true, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "nested lists", | ||||
| 			input: "get_current_weather(data=[[1,2],[3,4]])", | ||||
| 			want: []api.ToolCall{{ | ||||
| 				Function: api.ToolCallFunction{ | ||||
| 					Name: "get_current_weather", | ||||
| 					Arguments: api.ToolCallFunctionArguments{ | ||||
| 						"data": []any{[]any{1, 2}, []any{3, 4}}, | ||||
| 					}, | ||||
| 				}, | ||||
| 			}}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "boolean and none values", | ||||
| 			input: "get_current_weather(active=true, enabled=false, value=None)", | ||||
| 			want: []api.ToolCall{{ | ||||
| 				Function: api.ToolCallFunction{ | ||||
| 					Name: "get_current_weather", | ||||
| 					Arguments: api.ToolCallFunctionArguments{ | ||||
| 						"active":  true, | ||||
| 						"enabled": false, | ||||
| 						"value":   nil, | ||||
| 					}, | ||||
| 				}, | ||||
| 			}}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "single vs double quotes", | ||||
| 			input: "get_current_weather(str1='single', str2=\"double\")", | ||||
| 			want: []api.ToolCall{{ | ||||
| 				Function: api.ToolCallFunction{ | ||||
| 					Name: "get_current_weather", | ||||
| 					Arguments: api.ToolCallFunctionArguments{ | ||||
| 						"str1": "single", | ||||
| 						"str2": "double", | ||||
| 					}, | ||||
| 				}, | ||||
| 			}}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "whitespace handling", | ||||
| 			input: "get_current_weather( location = \"San Francisco\" , temp = 72 )", | ||||
| 			want: []api.ToolCall{{ | ||||
| 				Function: api.ToolCallFunction{ | ||||
| 					Name: "get_current_weather", | ||||
| 					Arguments: api.ToolCallFunctionArguments{ | ||||
| 						"location": "San Francisco", | ||||
| 						"temp":     72, | ||||
| 					}, | ||||
| 				}, | ||||
| 			}}, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	for _, tt := range cases { | ||||
| 		t.Run(tt.name, func(t *testing.T) { | ||||
| 			got, err := parsePythonToolCall(tt.input) | ||||
| 			if (err != nil) != tt.err { | ||||
| 				t.Fatalf("expected error: %v, got error: %v", tt.err, err) | ||||
| 			} | ||||
| 			if tt.err { | ||||
| 				return | ||||
| 			} | ||||
| 			if diff := cmp.Diff(got, tt.want); diff != "" { | ||||
| 				t.Errorf("mismatch (-got +want):\n%s", diff) | ||||
| 			} | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestParsePythonValue(t *testing.T) { | ||||
| 	cases := []struct { | ||||
| 		name  string | ||||
| 		input string | ||||
| 		want  any | ||||
| 		err   bool | ||||
| 	}{ | ||||
| 		{ | ||||
| 			name:  "string with double quotes", | ||||
| 			input: "\"hello\"", | ||||
| 			want:  "hello", | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "string with single quotes", | ||||
| 			input: "'world'", | ||||
| 			want:  "world", | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "integer", | ||||
| 			input: "42", | ||||
| 			want:  42, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "float", | ||||
| 			input: "3.14", | ||||
| 			want:  3.14, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "boolean true", | ||||
| 			input: "True", | ||||
| 			want:  true, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "boolean false", | ||||
| 			input: "False", | ||||
| 			want:  false, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "none/null", | ||||
| 			input: "None", | ||||
| 			want:  nil, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "simple list", | ||||
| 			input: "[1, 2, 3]", | ||||
| 			want:  []any{1, 2, 3}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "nested list", | ||||
| 			input: "[1, [2, 3], 4]", | ||||
| 			want:  []any{1, []any{2, 3}, 4}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "mixed type list", | ||||
| 			input: "[1, \"two\", 3.0, true]", | ||||
| 			want:  []any{1, "two", 3.0, true}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "invalid list", | ||||
| 			input: "[1, 2,", | ||||
| 			want:  nil, | ||||
| 			err:   true, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "dictionaries", | ||||
| 			input: "{'a': 1, 'b': 2}", | ||||
| 			want:  map[any]any{"a": 1, "b": 2}, | ||||
| 			err:   false, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "int dictionary", | ||||
| 			input: "{1: 2}", | ||||
| 			want:  map[any]any{1: 2}, | ||||
| 			err:   false, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "mixed type dictionary", | ||||
| 			input: "{'a': 1, 'b': 2.0, 'c': True}", | ||||
| 			want:  map[any]any{"a": 1, "b": 2.0, "c": true}, | ||||
| 			err:   false, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "invalid dictionary - missing closing brace", | ||||
| 			input: "{'a': 1, 'b': 2", | ||||
| 			want:  nil, | ||||
| 			err:   true, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:  "sets", | ||||
| 			input: "{1, 2, 3}", | ||||
| 			want:  []any{1, 2, 3}, | ||||
| 			err:   false, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	for _, tt := range cases { | ||||
| 		t.Run(tt.name, func(t *testing.T) { | ||||
| 			got, err := parsePythonValue(tt.input) | ||||
| 			if (err != nil) != tt.err { | ||||
| 				t.Fatalf("expected error: %v, got error: %v", tt.err, err) | ||||
| 			} | ||||
| 			if tt.err { | ||||
| 				return | ||||
| 			} | ||||
| 			if diff := cmp.Diff(got, tt.want); diff != "" { | ||||
| 				t.Errorf("mismatch (-got +want):\n%s", diff) | ||||
| 			} | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
| @@ -18,7 +18,6 @@ import ( | ||||
| 	"os" | ||||
| 	"os/signal" | ||||
| 	"path/filepath" | ||||
| 	"regexp" | ||||
| 	"slices" | ||||
| 	"strings" | ||||
| 	"syscall" | ||||
| @@ -1513,7 +1512,6 @@ func (s *Server) ChatHandler(c *gin.Context) { | ||||
| 	if req.Messages[0].Role != "system" && m.System != "" { | ||||
| 		msgs = append([]api.Message{{Role: "system", Content: m.System}}, msgs...) | ||||
| 	} | ||||
| 	msgs = filterThinkTags(msgs, m) | ||||
|  | ||||
| 	prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, msgs, req.Tools) | ||||
| 	if err != nil { | ||||
| @@ -1642,23 +1640,3 @@ func handleScheduleError(c *gin.Context, name string, err error) { | ||||
| 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| var thinkTagRegexp = regexp.MustCompile(`<think>(?s).*?</think>(\n)*`) | ||||
|  | ||||
| func filterThinkTags(msgs []api.Message, m *Model) []api.Message { | ||||
| 	if m.Config.ModelFamily == "qwen3" || model.ParseName(m.Name).Model == "deepseek-r1" { | ||||
| 		finalUserIndex := -1 | ||||
| 		for i, msg := range msgs { | ||||
| 			if msg.Role == "user" { | ||||
| 				finalUserIndex = i | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		for i, msg := range msgs { | ||||
| 			if msg.Role == "assistant" && i < finalUserIndex { | ||||
| 				msgs[i].Content = thinkTagRegexp.ReplaceAllString(msg.Content, "") | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return msgs | ||||
| } | ||||
|   | ||||
| @@ -299,6 +299,9 @@ func TestGenerateChat(t *testing.T) { | ||||
| 				{Role: "user", Content: "Hello!"}, | ||||
| 			}, | ||||
| 			Stream: &stream, | ||||
| 			Options: map[string]any{ | ||||
| 				"num_ctx": 1024, | ||||
| 			}, | ||||
| 		}) | ||||
|  | ||||
| 		if w.Code != http.StatusOK { | ||||
| @@ -321,6 +324,9 @@ func TestGenerateChat(t *testing.T) { | ||||
| 				{Role: "user", Content: "Hello!"}, | ||||
| 			}, | ||||
| 			Stream: &stream, | ||||
| 			Options: map[string]any{ | ||||
| 				"num_ctx": 1024, | ||||
| 			}, | ||||
| 		}) | ||||
|  | ||||
| 		if w.Code != http.StatusOK { | ||||
| @@ -344,6 +350,9 @@ func TestGenerateChat(t *testing.T) { | ||||
| 				{Role: "user", Content: "Help me write tests."}, | ||||
| 			}, | ||||
| 			Stream: &stream, | ||||
| 			Options: map[string]any{ | ||||
| 				"num_ctx": 1024, | ||||
| 			}, | ||||
| 		}) | ||||
|  | ||||
| 		if w.Code != http.StatusOK { | ||||
|   | ||||
| @@ -15,7 +15,6 @@ import ( | ||||
| 	"net/http/httptest" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"reflect" | ||||
| 	"sort" | ||||
| 	"strings" | ||||
| 	"testing" | ||||
| @@ -747,128 +746,3 @@ func TestNormalize(t *testing.T) { | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestFilterThinkTags(t *testing.T) { | ||||
| 	type testCase struct { | ||||
| 		msgs  []api.Message | ||||
| 		want  []api.Message | ||||
| 		model *Model | ||||
| 	} | ||||
| 	testCases := []testCase{ | ||||
| 		{ | ||||
| 			msgs: []api.Message{ | ||||
| 				{Role: "user", Content: "Hello, world!"}, | ||||
| 				{Role: "assistant", Content: "<think>Thinking... about the answer</think>abc"}, | ||||
| 				{Role: "user", Content: "What is the answer?"}, | ||||
| 			}, | ||||
| 			want: []api.Message{ | ||||
| 				{Role: "user", Content: "Hello, world!"}, | ||||
| 				{Role: "assistant", Content: "abc"}, | ||||
| 				{Role: "user", Content: "What is the answer?"}, | ||||
| 			}, | ||||
| 			model: &Model{ | ||||
| 				Config: ConfigV2{ | ||||
| 					ModelFamily: "qwen3", | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		// with newlines inside the think tag aned newlines after | ||||
| 		{ | ||||
| 			msgs: []api.Message{ | ||||
| 				{Role: "user", Content: "Hello, world!"}, | ||||
| 				{Role: "assistant", Content: "<think>Thinking... \n\nabout \nthe answer</think>\n\nabc\ndef"}, | ||||
| 				{Role: "user", Content: "What is the answer?"}, | ||||
| 			}, | ||||
| 			want: []api.Message{ | ||||
| 				{Role: "user", Content: "Hello, world!"}, | ||||
| 				{Role: "assistant", Content: "abc\ndef"}, | ||||
| 				{Role: "user", Content: "What is the answer?"}, | ||||
| 			}, | ||||
| 			model: &Model{ | ||||
| 				Config: ConfigV2{ | ||||
| 					ModelFamily: "qwen3", | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		// should leave thinking tags if it's after the last user message | ||||
| 		{ | ||||
| 			msgs: []api.Message{ | ||||
| 				{Role: "user", Content: "Hello, world!"}, | ||||
| 				{Role: "assistant", Content: "<think>Thinking...</think>after"}, | ||||
| 				{Role: "user", Content: "What is the answer?"}, | ||||
| 				{Role: "assistant", Content: "<think>thinking again</think>hjk"}, | ||||
| 				{Role: "assistant", Content: "<think>thinking yet again</think>hjk"}, | ||||
| 			}, | ||||
| 			want: []api.Message{ | ||||
| 				{Role: "user", Content: "Hello, world!"}, | ||||
| 				{Role: "assistant", Content: "after"}, | ||||
| 				{Role: "user", Content: "What is the answer?"}, | ||||
| 				{Role: "assistant", Content: "<think>thinking again</think>hjk"}, | ||||
| 				{Role: "assistant", Content: "<think>thinking yet again</think>hjk"}, | ||||
| 			}, | ||||
| 			model: &Model{ | ||||
| 				Config: ConfigV2{ | ||||
| 					ModelFamily: "qwen3", | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			// shouldn't strip anything because the model family isn't one of the hardcoded ones | ||||
| 			msgs: []api.Message{ | ||||
| 				{Role: "user", Content: "Hello, world!"}, | ||||
| 				{Role: "assistant", Content: "<think>Thinking... about the answer</think>abc"}, | ||||
| 				{Role: "user", Content: "What is the answer?"}, | ||||
| 			}, | ||||
| 			want: []api.Message{ | ||||
| 				{Role: "user", Content: "Hello, world!"}, | ||||
| 				{Role: "assistant", Content: "<think>Thinking... about the answer</think>abc"}, | ||||
| 				{Role: "user", Content: "What is the answer?"}, | ||||
| 			}, | ||||
| 			model: &Model{ | ||||
| 				Config: ConfigV2{ | ||||
| 					ModelFamily: "llama3", | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			// deepseek-r1:-prefixed model | ||||
| 			msgs: []api.Message{ | ||||
| 				{Role: "user", Content: "Hello, world!"}, | ||||
| 				{Role: "assistant", Content: "<think>Thinking... about the answer</think>abc"}, | ||||
| 				{Role: "user", Content: "What is the answer?"}, | ||||
| 			}, | ||||
| 			want: []api.Message{ | ||||
| 				{Role: "user", Content: "Hello, world!"}, | ||||
| 				{Role: "assistant", Content: "abc"}, | ||||
| 				{Role: "user", Content: "What is the answer?"}, | ||||
| 			}, | ||||
| 			model: &Model{ | ||||
| 				Name:      "registry.ollama.ai/library/deepseek-r1:latest", | ||||
| 				ShortName: "deepseek-r1:7b", | ||||
| 				Config:    ConfigV2{}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	for i, tc := range testCases { | ||||
| 		filtered := filterThinkTags(tc.msgs, tc.model) | ||||
|  | ||||
| 		if !reflect.DeepEqual(filtered, tc.want) { | ||||
| 			t.Errorf("messages differ for case %d:", i) | ||||
| 			for i := range tc.want { | ||||
| 				if i >= len(filtered) { | ||||
| 					t.Errorf("  missing message %d: %+v", i, tc.want[i]) | ||||
| 					continue | ||||
| 				} | ||||
| 				if !reflect.DeepEqual(filtered[i], tc.want[i]) { | ||||
| 					t.Errorf("  message %d:\n    want: %+v\n    got:  %+v", i, tc.want[i], filtered[i]) | ||||
| 				} | ||||
| 			} | ||||
| 			if len(filtered) > len(tc.want) { | ||||
| 				for i := len(tc.want); i < len(filtered); i++ { | ||||
| 					t.Errorf("  extra message %d: %+v", i, filtered[i]) | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -81,10 +81,6 @@ func InitScheduler(ctx context.Context) *Scheduler { | ||||
|  | ||||
| // context must be canceled to decrement ref count and release the runner | ||||
| func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration *api.Duration) (chan *runnerRef, chan error) { | ||||
| 	if opts.NumCtx < 4 { | ||||
| 		opts.NumCtx = 4 | ||||
| 	} | ||||
|  | ||||
| 	req := &LlmRequest{ | ||||
| 		ctx:             c, | ||||
| 		model:           model, | ||||
| @@ -114,6 +110,11 @@ func (s *Scheduler) Run(ctx context.Context) { | ||||
| 	}() | ||||
| } | ||||
|  | ||||
| const ( | ||||
| 	defaultContextLength  = 4096 | ||||
| 	smallGpuContextLength = 2048 | ||||
| ) | ||||
|  | ||||
| func (s *Scheduler) processPending(ctx context.Context) { | ||||
| 	for { | ||||
| 		select { | ||||
| @@ -166,6 +167,17 @@ func (s *Scheduler) processPending(ctx context.Context) { | ||||
| 						gpus = s.getGpuFn() | ||||
| 					} | ||||
|  | ||||
| 					if pending.origNumCtx == -1 { | ||||
| 						if len(gpus) == 1 && gpus[0].Library != "cpu" && gpus[0].TotalMemory <= 4096*1024*1024 { | ||||
| 							slog.Info("GPU is small, limiting default context window", "num_ctx", smallGpuContextLength) | ||||
| 							pending.opts.NumCtx = smallGpuContextLength | ||||
| 							pending.origNumCtx = smallGpuContextLength | ||||
| 						} else { | ||||
| 							pending.opts.NumCtx = defaultContextLength | ||||
| 							pending.origNumCtx = defaultContextLength | ||||
| 						} | ||||
| 					} | ||||
|  | ||||
| 					if envconfig.MaxRunners() <= 0 { | ||||
| 						// No user specified MaxRunners, so figure out what automatic setting to use | ||||
| 						// If all GPUs have reliable free memory reporting, defaultModelsPerGPU * the number of GPUs | ||||
| @@ -441,9 +453,10 @@ func (s *Scheduler) load(req *LlmRequest, f *ggml.GGML, gpus discover.GpuInfoLis | ||||
| 		estimatedVRAM:   llama.EstimatedVRAM(), | ||||
| 		estimatedTotal:  llama.EstimatedTotal(), | ||||
| 		loading:         true, | ||||
| 		refCount:        1, | ||||
| 	} | ||||
| 	runner.numParallel = numParallel | ||||
| 	runner.refMu.Lock() // hold lock until running or aborted | ||||
| 	runner.refMu.Lock() | ||||
|  | ||||
| 	s.loadedMu.Lock() | ||||
| 	s.loaded[req.model.ModelPath] = runner | ||||
| @@ -454,13 +467,13 @@ func (s *Scheduler) load(req *LlmRequest, f *ggml.GGML, gpus discover.GpuInfoLis | ||||
| 		defer runner.refMu.Unlock() | ||||
| 		if err = llama.WaitUntilRunning(req.ctx); err != nil { | ||||
| 			slog.Error("error loading llama server", "error", err) | ||||
| 			runner.refCount-- | ||||
| 			req.errCh <- err | ||||
| 			slog.Debug("triggering expiration for failed load", "model", runner.modelPath) | ||||
| 			s.expiredCh <- runner | ||||
| 			return | ||||
| 		} | ||||
| 		slog.Debug("finished setting up runner", "model", req.model.ModelPath) | ||||
| 		runner.refCount++ | ||||
| 		runner.loading = false | ||||
| 		go func() { | ||||
| 			<-req.ctx.Done() | ||||
| @@ -478,12 +491,7 @@ func (s *Scheduler) updateFreeSpace(allGpus discover.GpuInfoList) { | ||||
| 	} | ||||
| 	predMap := map[predKey]uint64{} // Sum up the total predicted usage per GPU for all runners | ||||
| 	s.loadedMu.Lock() | ||||
| 	runners := make([]*runnerRef, 0, len(s.loaded)) | ||||
| 	for _, r := range s.loaded { | ||||
| 		runners = append(runners, r) | ||||
| 	} | ||||
| 	s.loadedMu.Unlock() | ||||
| 	for _, r := range runners { | ||||
| 		r.refMu.Lock() | ||||
| 		if r.llama != nil { | ||||
| 			for _, gpu := range allGpus { | ||||
| @@ -494,6 +502,7 @@ func (s *Scheduler) updateFreeSpace(allGpus discover.GpuInfoList) { | ||||
| 		} | ||||
| 		r.refMu.Unlock() | ||||
| 	} | ||||
| 	s.loadedMu.Unlock() | ||||
|  | ||||
| 	// Now that we've summed up all the GPU usage predictions across all the loaded runners, update the gpu list | ||||
| 	for i := range allGpus { | ||||
| @@ -541,7 +550,9 @@ func (s *Scheduler) filterGPUsWithoutLoadingModels(allGpus discover.GpuInfoList) | ||||
| // TODO consolidate sched_types.go | ||||
| type runnerRef struct { | ||||
| 	refMu sync.Mutex | ||||
| 	// refCond   sync.Cond // Signaled on transition from 1 -> 0 refCount | ||||
| 	refCount uint // prevent unloading if > 0 | ||||
| 	// unloading bool      // set to true when we are trying to unload the runner | ||||
|  | ||||
| 	llama          llm.LlamaServer | ||||
| 	loading        bool                 // True only during initial load, then false forever | ||||
| @@ -812,8 +823,8 @@ func (s *Scheduler) unloadAllRunners() { | ||||
|  | ||||
| func (s *Scheduler) expireRunner(model *Model) { | ||||
| 	s.loadedMu.Lock() | ||||
| 	defer s.loadedMu.Unlock() | ||||
| 	runner, ok := s.loaded[model.ModelPath] | ||||
| 	s.loadedMu.Unlock() | ||||
| 	if ok { | ||||
| 		runner.refMu.Lock() | ||||
| 		runner.expiresAt = time.Now() | ||||
|   | ||||
| @@ -148,6 +148,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est | ||||
| 		successCh:       make(chan *runnerRef, 1), | ||||
| 		errCh:           make(chan error, 1), | ||||
| 	} | ||||
| 	b.req.opts.NumCtx = 4096 | ||||
| 	b.srv = &mockLlm{estimatedVRAM: estimatedVRAM, estimatedVRAMByGPU: map[string]uint64{"": estimatedVRAM}} | ||||
| 	return b | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user