Compare commits
39 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
949fc4eafa | ||
![]() |
0a4f21c0a7 | ||
![]() |
9abb66254a | ||
![]() |
1d0ebe67e8 | ||
![]() |
a1b2d95f96 | ||
![]() |
c0b1bf7537 | ||
![]() |
cdfeb165ca | ||
![]() |
92d454ec5f | ||
![]() |
9333b0cc82 | ||
![]() |
9771b1ec51 | ||
![]() |
76db4a49cf | ||
![]() |
4aa0976a2e | ||
![]() |
92c20fdae6 | ||
![]() |
c951da7096 | ||
![]() |
24d82a23a2 | ||
![]() |
f40b3de758 | ||
![]() |
5f4008c296 | ||
![]() |
6ae33d8141 | ||
![]() |
c5664c1fef | ||
![]() |
958a5a8184 | ||
![]() |
8608eb4760 | ||
![]() |
a2b210130f | ||
![]() |
ed20837f9a | ||
![]() |
1db2a61dd0 | ||
![]() |
2ded8ab206 | ||
![]() |
e6b3648bbf | ||
![]() |
0625e805f0 | ||
![]() |
c38ec5befb | ||
![]() |
c577721a43 | ||
![]() |
29c056ea39 | ||
![]() |
9fc3bba9cf | ||
![]() |
7774ed4ae6 | ||
![]() |
11f920f209 | ||
![]() |
6e6b655956 | ||
![]() |
110ae89a6c | ||
![]() |
5e388f931e | ||
![]() |
d5ad41dd7b | ||
![]() |
d294a11bc9 | ||
![]() |
93d887e4bc |
18
Dockerfile
18
Dockerfile
@@ -1,9 +1,8 @@
|
||||
ARG CUDA_VERSION=12.2.0
|
||||
|
||||
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu22.04
|
||||
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
|
||||
|
||||
ARG TARGETARCH
|
||||
ARG VERSION=0.0.0
|
||||
ARG GOFLAGS="'-ldflags=-w -s'"
|
||||
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||
RUN apt-get update && apt-get install -y git build-essential cmake
|
||||
@@ -13,19 +12,12 @@ RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.1.tar.gz
|
||||
COPY . .
|
||||
ENV GOARCH=$TARGETARCH
|
||||
RUN /usr/local/go/bin/go generate ./... \
|
||||
&& /usr/local/go/bin/go build -ldflags "-linkmode=external -extldflags='-static' -X=github.com/jmorganca/ollama/version.Version=$VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
|
||||
&& /usr/local/go/bin/go build .
|
||||
|
||||
FROM ubuntu:22.04
|
||||
ENV OLLAMA_HOST 0.0.0.0
|
||||
|
||||
RUN apt-get update && apt-get install -y ca-certificates
|
||||
|
||||
ARG USER=ollama
|
||||
ARG GROUP=ollama
|
||||
RUN groupadd $GROUP && useradd -m -g $GROUP $USER
|
||||
|
||||
COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
|
||||
|
||||
USER $USER:$GROUP
|
||||
EXPOSE 11434
|
||||
ENV OLLAMA_HOST 0.0.0.0
|
||||
ENTRYPOINT ["/bin/ollama"]
|
||||
CMD ["serve"]
|
||||
|
@@ -1,4 +1,3 @@
|
||||
ARG VERSION=0.0.0
|
||||
|
||||
# centos7 amd64 dependencies
|
||||
FROM --platform=linux/amd64 nvidia/cuda:11.8.0-devel-centos7 AS base-amd64
|
||||
@@ -23,7 +22,11 @@ RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.1.tar.gz
|
||||
# build the final binary
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||
COPY . .
|
||||
ENV GOOS=linux
|
||||
ENV GOARCH=$TARGETARCH
|
||||
|
||||
ARG VERSION=0.0.0
|
||||
ARG GOFLAGS="'-ldflags -w -s'"
|
||||
|
||||
RUN /usr/local/go/bin/go generate ./... && \
|
||||
/usr/local/go/bin/go build -ldflags "-X=github.com/jmorganca/ollama/version.Version=$VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
|
||||
/usr/local/go/bin/go build .
|
||||
|
197
README.md
197
README.md
@@ -9,19 +9,27 @@
|
||||
|
||||
[](https://discord.gg/ollama)
|
||||
|
||||
Run, create, and share large language models (LLMs).
|
||||
Get up and running with large language models locally.
|
||||
|
||||
> Note: Ollama is in early preview. Please report any issues you find.
|
||||
### macOS
|
||||
|
||||
## Download
|
||||
[Download](https://ollama.ai/download/Ollama-darwin.zip)
|
||||
|
||||
- [Download](https://ollama.ai/download) for macOS
|
||||
- Download for Windows and Linux (coming soon)
|
||||
- Build [from source](#building)
|
||||
### Linux & WSL2
|
||||
|
||||
```
|
||||
curl https://ollama.ai/install.sh | sh
|
||||
```
|
||||
|
||||
[Manual install instructions](https://github.com/jmorganca/ollama/blob/main/docs/linux.md)
|
||||
|
||||
### Windows
|
||||
|
||||
coming soon
|
||||
|
||||
## Quickstart
|
||||
|
||||
To run and chat with [Llama 2](https://ai.meta.com/llama), the new model by Meta:
|
||||
To run and chat with [Llama 2](https://ollama.ai/library/llama2):
|
||||
|
||||
```
|
||||
ollama run llama2
|
||||
@@ -29,87 +37,50 @@ ollama run llama2
|
||||
|
||||
## Model library
|
||||
|
||||
Ollama supports a list of open-source models available on [ollama.ai/library](https://ollama.ai/library 'ollama model library')
|
||||
Ollama supports a list of open-source models available on [ollama.ai/library](https://ollama.ai/library "ollama model library")
|
||||
|
||||
Here are some example open-source models that can be downloaded:
|
||||
|
||||
| Model | Parameters | Size | Download |
|
||||
| ------------------------ | ---------- | ----- | ------------------------------- |
|
||||
| Llama2 | 7B | 3.8GB | `ollama pull llama2` |
|
||||
| Llama2 13B | 13B | 7.3GB | `ollama pull llama2:13b` |
|
||||
| Llama2 70B | 70B | 39GB | `ollama pull llama2:70b` |
|
||||
| Llama2 Uncensored | 7B | 3.8GB | `ollama pull llama2-uncensored` |
|
||||
| Code Llama | 7B | 3.8GB | `ollama pull codellama` |
|
||||
| Orca Mini | 3B | 1.9GB | `ollama pull orca-mini` |
|
||||
| Vicuna | 7B | 3.8GB | `ollama pull vicuna` |
|
||||
| Nous-Hermes | 7B | 3.8GB | `ollama pull nous-hermes` |
|
||||
| Nous-Hermes 13B | 13B | 7.3GB | `ollama pull nous-hermes:13b` |
|
||||
| Wizard Vicuna Uncensored | 13B | 7.3GB | `ollama pull wizard-vicuna` |
|
||||
| Model | Parameters | Size | Download |
|
||||
| ------------------ | ---------- | ----- | ------------------------------ |
|
||||
| Mistral | 7B | 4.1GB | `ollama run mistral` |
|
||||
| Llama 2 | 7B | 3.8GB | `ollama run llama2` |
|
||||
| Code Llama | 7B | 3.8GB | `ollama run codellama` |
|
||||
| Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` |
|
||||
| Llama 2 13B | 13B | 7.3GB | `ollama run llama2:13b` |
|
||||
| Llama 2 70B | 70B | 39GB | `ollama run llama2:70b` |
|
||||
| Orca Mini | 3B | 1.9GB | `ollama run orca-mini` |
|
||||
| Vicuna | 7B | 3.8GB | `ollama run vicuna` |
|
||||
|
||||
> Note: You should have at least 8 GB of RAM to run the 3B models, 16 GB to run the 7B models, and 32 GB to run the 13B models.
|
||||
|
||||
## Examples
|
||||
## Customize your own model
|
||||
|
||||
### Pull a public model
|
||||
### Import from GGUF or GGML
|
||||
|
||||
```
|
||||
ollama pull llama2
|
||||
```
|
||||
Ollama supports importing GGUF and GGML file formats in the Modelfile. This means if you have a model that is not in the Ollama library, you can create it, iterate on it, and upload it to the Ollama library to share with others when you are ready.
|
||||
|
||||
> This command can also be used to update a local model. Only updated changes will be pulled.
|
||||
1. Create a file named Modelfile, and add a `FROM` instruction with the local filepath to the model you want to import.
|
||||
|
||||
### Run a model interactively
|
||||
```
|
||||
FROM ./vicuna-33b.Q4_0.gguf
|
||||
```
|
||||
|
||||
```
|
||||
ollama run llama2
|
||||
>>> hi
|
||||
Hello! How can I help you today?
|
||||
```
|
||||
3. Create the model in Ollama
|
||||
|
||||
For multiline input, you can wrap text with `"""`:
|
||||
```
|
||||
ollama create name -f path_to_modelfile
|
||||
```
|
||||
|
||||
```
|
||||
>>> """Hello,
|
||||
... world!
|
||||
... """
|
||||
I'm a basic program that prints the famous "Hello, world!" message to the console.
|
||||
```
|
||||
5. Run the model
|
||||
|
||||
### Run a model non-interactively
|
||||
```
|
||||
ollama run name
|
||||
```
|
||||
|
||||
```
|
||||
$ ollama run llama2 'tell me a joke'
|
||||
Sure! Here's a quick one:
|
||||
Why did the scarecrow win an award? Because he was outstanding in his field!
|
||||
```
|
||||
### Customize a prompt
|
||||
|
||||
```
|
||||
$ cat <<EOF >prompts.txt
|
||||
tell me a joke about llamas
|
||||
tell me another one
|
||||
EOF
|
||||
$ ollama run llama2 <prompts.txt
|
||||
>>> tell me a joke about llamas
|
||||
Why did the llama refuse to play hide-and-seek?
|
||||
nobody likes to be hided!
|
||||
|
||||
>>> tell me another one
|
||||
Sure, here's another one:
|
||||
|
||||
Why did the llama go to the bar?
|
||||
To have a hay-often good time!
|
||||
```
|
||||
|
||||
### Run a model on contents of a text file
|
||||
|
||||
```
|
||||
$ ollama run llama2 "summarize this file:" "$(cat README.md)"
|
||||
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
|
||||
```
|
||||
|
||||
### Customize a model
|
||||
|
||||
Pull a base model:
|
||||
Models from the Ollama library can be customized with a prompt. The example
|
||||
|
||||
```
|
||||
ollama pull llama2
|
||||
@@ -138,30 +109,61 @@ ollama run mario
|
||||
Hello! It's your friend Mario.
|
||||
```
|
||||
|
||||
For more examples, see the [examples](./examples) directory. For more information on creating a Modelfile, see the [Modelfile](./docs/modelfile.md) documentation.
|
||||
For more examples, see the [examples](./examples) directory. For more information on working with a Modelfile, see the [Modelfile](./docs/modelfile.md) documentation.
|
||||
|
||||
### Listing local models
|
||||
## CLI Reference
|
||||
|
||||
### Create a model
|
||||
|
||||
`ollama create` is used to create a model from a Modelfile.
|
||||
|
||||
### Pull a model
|
||||
|
||||
```
|
||||
ollama list
|
||||
ollama pull llama2
|
||||
```
|
||||
|
||||
### Removing local models
|
||||
> This command can also be used to update a local model. Only the diff will be pulled.
|
||||
|
||||
### Remove a model
|
||||
|
||||
```
|
||||
ollama rm llama2
|
||||
```
|
||||
|
||||
## Model packages
|
||||
### Copy a model
|
||||
|
||||
### Overview
|
||||
```
|
||||
ollama cp llama2 my-llama2
|
||||
```
|
||||
|
||||
Ollama bundles model weights, configurations, and data into a single package, defined by a [Modelfile](./docs/modelfile.md).
|
||||
### Multiline input
|
||||
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" height="480" srcset="https://github.com/jmorganca/ollama/assets/251292/2fd96b5f-191b-45c1-9668-941cfad4eb70">
|
||||
<img alt="logo" height="480" src="https://github.com/jmorganca/ollama/assets/251292/2fd96b5f-191b-45c1-9668-941cfad4eb70">
|
||||
</picture>
|
||||
For multiline input, you can wrap text with `"""`:
|
||||
|
||||
```
|
||||
>>> """Hello,
|
||||
... world!
|
||||
... """
|
||||
I'm a basic program that prints the famous "Hello, world!" message to the console.
|
||||
```
|
||||
|
||||
### Pass in prompt as arguments
|
||||
|
||||
```
|
||||
$ ollama run llama2 "summarize this file:" "$(cat README.md)"
|
||||
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
|
||||
```
|
||||
|
||||
### List models on your computer
|
||||
|
||||
```
|
||||
ollama list
|
||||
```
|
||||
|
||||
### Start Ollama
|
||||
|
||||
`ollama serve` is used when you want to start ollama without running the desktop application.
|
||||
|
||||
## Building
|
||||
|
||||
@@ -204,19 +206,18 @@ curl -X POST http://localhost:11434/api/generate -d '{
|
||||
}'
|
||||
```
|
||||
|
||||
## Community Projects using Ollama
|
||||
## Community Integrations
|
||||
|
||||
| Project | Description |
|
||||
| -------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| [LangChain][1] and [LangChain.js][2] | Also, there is a question-answering [example][3]. |
|
||||
| [Continue](https://github.com/continuedev/continue) | Embeds Ollama inside Visual Studio Code. The extension lets you highlight code to add to the prompt, ask questions in the sidebar, and generate code inline. |
|
||||
| [LiteLLM](https://github.com/BerriAI/litellm) | Lightweight Python package to simplify LLM API calls. |
|
||||
| [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot) | Interact with Ollama as a chatbot on Discord. |
|
||||
| [Raycast Ollama](https://github.com/MassimilianoPasquini97/raycast_ollama) | Raycast extension to use Ollama for local llama inference on Raycast. |
|
||||
| [Simple HTML UI](https://github.com/rtcfirefly/ollama-ui) | Also, there is a Chrome extension. |
|
||||
| [Ollama-GUI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file) | 🖥️ Mac Chat Interface ⚡️ |
|
||||
| [Emacs client](https://github.com/zweifisch/ollama) | |
|
||||
|
||||
[1]: https://python.langchain.com/docs/integrations/llms/ollama
|
||||
[2]: https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama
|
||||
[3]: https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa
|
||||
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
|
||||
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
|
||||
- [Raycast extension](https://github.com/MassimilianoPasquini97/raycast_ollama)
|
||||
- [Discollama](https://github.com/mxyng/discollama) (Discord bot inside the Ollama discord channel)
|
||||
- [Continue](https://github.com/continuedev/continue)
|
||||
- [Obsidian Ollama plugin](https://github.com/hinterdupfinger/obsidian-ollama)
|
||||
- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
|
||||
- [LiteLLM](https://github.com/BerriAI/litellm)
|
||||
- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
|
||||
- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
|
||||
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
|
||||
- [Dumbar](https://github.com/JerrySievert/Dumbar)
|
||||
- [Emacs client](https://github.com/zweifisch/ollama)
|
||||
|
26
api/types.go
26
api/types.go
@@ -31,6 +31,22 @@ func (e StatusError) Error() string {
|
||||
}
|
||||
}
|
||||
|
||||
// /api/chat
|
||||
type Message struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type ChatRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages []Message `json:"messages"`
|
||||
}
|
||||
|
||||
type ChatResponse struct {
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Message Message `json:"message"`
|
||||
}
|
||||
|
||||
type GenerateRequest struct {
|
||||
Model string `json:"model"`
|
||||
Prompt string `json:"prompt"`
|
||||
@@ -81,22 +97,18 @@ type CopyRequest struct {
|
||||
type PullRequest struct {
|
||||
Name string `json:"name"`
|
||||
Insecure bool `json:"insecure,omitempty"`
|
||||
Username string `json:"username"`
|
||||
Password string `json:"password"`
|
||||
}
|
||||
|
||||
type ProgressResponse struct {
|
||||
Status string `json:"status"`
|
||||
Digest string `json:"digest,omitempty"`
|
||||
Total int `json:"total,omitempty"`
|
||||
Completed int `json:"completed,omitempty"`
|
||||
Total int64 `json:"total,omitempty"`
|
||||
Completed int64 `json:"completed,omitempty"`
|
||||
}
|
||||
|
||||
type PushRequest struct {
|
||||
Name string `json:"name"`
|
||||
Insecure bool `json:"insecure,omitempty"`
|
||||
Username string `json:"username"`
|
||||
Password string `json:"password"`
|
||||
}
|
||||
|
||||
type ListResponse struct {
|
||||
@@ -106,7 +118,7 @@ type ListResponse struct {
|
||||
type ModelResponse struct {
|
||||
Name string `json:"name"`
|
||||
ModifiedAt time.Time `json:"modified_at"`
|
||||
Size int `json:"size"`
|
||||
Size int64 `json:"size"`
|
||||
Digest string `json:"digest"`
|
||||
}
|
||||
|
||||
|
@@ -5,7 +5,7 @@ import winston from 'winston'
|
||||
import 'winston-daily-rotate-file'
|
||||
import * as path from 'path'
|
||||
|
||||
import { analytics, id } from './telemetry'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
import { installed } from './install'
|
||||
|
||||
require('@electron/remote/main').initialize()
|
||||
@@ -164,11 +164,11 @@ app.on('before-quit', () => {
|
||||
|
||||
function init() {
|
||||
if (app.isPackaged) {
|
||||
heartbeat()
|
||||
autoUpdater.checkForUpdates()
|
||||
setInterval(() => {
|
||||
heartbeat()
|
||||
autoUpdater.checkForUpdates()
|
||||
if (!updateAvailable) {
|
||||
autoUpdater.checkForUpdates()
|
||||
}
|
||||
}, 60 * 60 * 1000)
|
||||
}
|
||||
|
||||
@@ -234,28 +234,26 @@ app.on('window-all-closed', () => {
|
||||
}
|
||||
})
|
||||
|
||||
// In this file you can include the rest of your app's specific main process
|
||||
// code. You can also put them in separate files and import them here.
|
||||
let aid = ''
|
||||
try {
|
||||
aid = id()
|
||||
} catch (e) {}
|
||||
function id(): string {
|
||||
const id = store.get('id') as string
|
||||
|
||||
autoUpdater.setFeedURL({
|
||||
url: `https://ollama.ai/api/update?os=${process.platform}&arch=${process.arch}&version=${app.getVersion()}&id=${aid}`,
|
||||
})
|
||||
if (id) {
|
||||
return id
|
||||
}
|
||||
|
||||
async function heartbeat() {
|
||||
analytics.track({
|
||||
anonymousId: aid,
|
||||
event: 'heartbeat',
|
||||
properties: {
|
||||
version: app.getVersion(),
|
||||
},
|
||||
})
|
||||
const uuid = uuidv4()
|
||||
store.set('id', uuid)
|
||||
return uuid
|
||||
}
|
||||
|
||||
autoUpdater.setFeedURL({
|
||||
url: `https://ollama.ai/api/update?os=${process.platform}&arch=${
|
||||
process.arch
|
||||
}&version=${app.getVersion()}&id=${id()}`,
|
||||
})
|
||||
|
||||
autoUpdater.on('error', e => {
|
||||
logger.error(`update check failed - ${e.message}`)
|
||||
console.error(`update check failed - ${e.message}`)
|
||||
})
|
||||
|
||||
|
@@ -1,19 +0,0 @@
|
||||
import { Analytics } from '@segment/analytics-node'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
import Store from 'electron-store'
|
||||
|
||||
const store = new Store()
|
||||
|
||||
export const analytics = new Analytics({ writeKey: process.env.TELEMETRY_WRITE_KEY || '<empty>' })
|
||||
|
||||
export function id(): string {
|
||||
const id = store.get('id') as string
|
||||
|
||||
if (id) {
|
||||
return id
|
||||
}
|
||||
|
||||
const uuid = uuidv4()
|
||||
store.set('id', uuid)
|
||||
return uuid
|
||||
}
|
69
cmd/cmd.go
69
cmd/cmd.go
@@ -13,9 +13,11 @@ import (
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/dustin/go-humanize"
|
||||
@@ -78,18 +80,18 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
currentDigest = resp.Digest
|
||||
switch {
|
||||
case strings.Contains(resp.Status, "embeddings"):
|
||||
bar = progressbar.Default(int64(resp.Total), resp.Status)
|
||||
bar.Set(resp.Completed)
|
||||
bar = progressbar.Default(resp.Total, resp.Status)
|
||||
bar.Set64(resp.Completed)
|
||||
default:
|
||||
// pulling
|
||||
bar = progressbar.DefaultBytes(
|
||||
int64(resp.Total),
|
||||
resp.Total,
|
||||
resp.Status,
|
||||
)
|
||||
bar.Set(resp.Completed)
|
||||
bar.Set64(resp.Completed)
|
||||
}
|
||||
} else if resp.Digest == currentDigest && resp.Digest != "" {
|
||||
bar.Set(resp.Completed)
|
||||
bar.Set64(resp.Completed)
|
||||
} else {
|
||||
currentDigest = ""
|
||||
if spinner != nil {
|
||||
@@ -127,13 +129,9 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
modelName, modelTag, ok := strings.Cut(args[0], ":")
|
||||
if !ok {
|
||||
modelTag = "latest"
|
||||
}
|
||||
|
||||
canonicalModelPath := server.ParseModelPath(args[0])
|
||||
for _, model := range models.Models {
|
||||
if model.Name == strings.Join([]string{modelName, modelTag}, ":") {
|
||||
if model.Name == canonicalModelPath.GetShortTagname() {
|
||||
return RunGenerate(cmd, args)
|
||||
}
|
||||
}
|
||||
@@ -164,13 +162,13 @@ func PushHandler(cmd *cobra.Command, args []string) error {
|
||||
if resp.Digest != currentDigest && resp.Digest != "" {
|
||||
currentDigest = resp.Digest
|
||||
bar = progressbar.DefaultBytes(
|
||||
int64(resp.Total),
|
||||
resp.Total,
|
||||
fmt.Sprintf("pushing %s...", resp.Digest[7:19]),
|
||||
)
|
||||
|
||||
bar.Set(resp.Completed)
|
||||
bar.Set64(resp.Completed)
|
||||
} else if resp.Digest == currentDigest && resp.Digest != "" {
|
||||
bar.Set(resp.Completed)
|
||||
bar.Set64(resp.Completed)
|
||||
} else {
|
||||
currentDigest = ""
|
||||
fmt.Println(resp.Status)
|
||||
@@ -353,13 +351,13 @@ func pull(model string, insecure bool) error {
|
||||
if resp.Digest != currentDigest && resp.Digest != "" {
|
||||
currentDigest = resp.Digest
|
||||
bar = progressbar.DefaultBytes(
|
||||
int64(resp.Total),
|
||||
resp.Total,
|
||||
fmt.Sprintf("pulling %s...", resp.Digest[7:19]),
|
||||
)
|
||||
|
||||
bar.Set(resp.Completed)
|
||||
bar.Set64(resp.Completed)
|
||||
} else if resp.Digest == currentDigest && resp.Digest != "" {
|
||||
bar.Set(resp.Completed)
|
||||
bar.Set64(resp.Completed)
|
||||
} else {
|
||||
currentDigest = ""
|
||||
fmt.Println(resp.Status)
|
||||
@@ -430,6 +428,19 @@ func generate(cmd *cobra.Command, model, prompt string) error {
|
||||
wrapTerm = false
|
||||
}
|
||||
|
||||
cancelCtx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT)
|
||||
var abort bool
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
cancel()
|
||||
abort = true
|
||||
}()
|
||||
|
||||
var currentLineLength int
|
||||
var wordBuffer string
|
||||
|
||||
@@ -469,7 +480,7 @@ func generate(cmd *cobra.Command, model, prompt string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := client.Generate(context.Background(), &request, fn); err != nil {
|
||||
if err := client.Generate(cancelCtx, &request, fn); err != nil {
|
||||
if strings.Contains(err.Error(), "failed to load model") {
|
||||
// tell the user to check the server log, if it exists locally
|
||||
home, nestedErr := os.UserHomeDir()
|
||||
@@ -481,6 +492,9 @@ func generate(cmd *cobra.Command, model, prompt string) error {
|
||||
if _, nestedErr := os.Stat(logPath); nestedErr == nil {
|
||||
err = fmt.Errorf("%w\nFor more details, check the error logs at %s", err, logPath)
|
||||
}
|
||||
} else if strings.Contains(err.Error(), "context canceled") && abort {
|
||||
spinner.Finish()
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -490,6 +504,9 @@ func generate(cmd *cobra.Command, model, prompt string) error {
|
||||
}
|
||||
|
||||
if !latest.Done {
|
||||
if abort {
|
||||
return nil
|
||||
}
|
||||
return errors.New("unexpected end of response")
|
||||
}
|
||||
|
||||
@@ -572,7 +589,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
||||
return nil
|
||||
case errors.Is(err, readline.ErrInterrupt):
|
||||
if line == "" {
|
||||
return nil
|
||||
fmt.Println("Use Ctrl-D or /bye to exit.")
|
||||
}
|
||||
|
||||
continue
|
||||
@@ -639,6 +656,8 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
||||
} else {
|
||||
usage()
|
||||
}
|
||||
default:
|
||||
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
|
||||
}
|
||||
} else {
|
||||
usage()
|
||||
@@ -649,6 +668,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
||||
resp, err := server.GetModelInfo(model)
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't get model")
|
||||
return err
|
||||
}
|
||||
|
||||
switch args[1] {
|
||||
@@ -663,7 +683,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
||||
case "template":
|
||||
fmt.Println(resp.Template)
|
||||
default:
|
||||
fmt.Println("error: unknown command")
|
||||
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
|
||||
}
|
||||
} else {
|
||||
usage()
|
||||
@@ -725,6 +745,15 @@ func RunServer(cmd *cobra.Command, _ []string) error {
|
||||
if err := server.PruneLayers(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
manifestsPath, err := server.GetManifestPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := server.PruneDirectory(manifestsPath); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return server.Serve(ln, origins)
|
||||
|
@@ -23,6 +23,10 @@ Model names follow a `model:tag` format. Some examples are `orca-mini:3b-q4_1` a
|
||||
|
||||
All durations are returned in nanoseconds.
|
||||
|
||||
### Streaming responses
|
||||
|
||||
Certain endpoints stream responses as JSON objects delineated with the newline (`\n`) character.
|
||||
|
||||
## Generate a completion
|
||||
|
||||
```shell
|
||||
|
@@ -14,4 +14,6 @@ OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
|
||||
|
||||
## Where are models stored?
|
||||
|
||||
Raw model data is stored under `~/.ollama/models`.
|
||||
* macOS: Raw model data is stored under `~/.ollama/models`.
|
||||
* Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models`
|
||||
|
||||
|
@@ -94,6 +94,7 @@ This bin file location should be specified as an absolute path or relative to th
|
||||
### EMBED
|
||||
|
||||
The EMBED instruction is used to add embeddings of files to a model. This is useful for adding custom data that the model can reference when generating an answer. Note that currently only text files are supported, formatted with each line as one embedding.
|
||||
|
||||
```
|
||||
FROM <model name>:<tag>
|
||||
EMBED <file path>.txt
|
||||
@@ -118,13 +119,14 @@ PARAMETER <parameter> <parametervalue>
|
||||
| mirostat_tau | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0) | float | mirostat_tau 5.0 |
|
||||
| num_ctx | Sets the size of the context window used to generate the next token. (Default: 2048) | int | num_ctx 4096 |
|
||||
| num_gqa | The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b | int | num_gqa 1 |
|
||||
| num_gpu | The number of GPUs to use. On macOS it defaults to 1 to enable metal support, 0 to disable. | int | num_gpu 1 |
|
||||
| num_gpu | The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. | int | num_gpu 50 |
|
||||
| num_thread | Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). | int | num_thread 8 |
|
||||
| repeat_last_n | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) | int | repeat_last_n 64 |
|
||||
| repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) | float | repeat_penalty 1.1 |
|
||||
| temperature | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8) | float | temperature 0.7 |
|
||||
| stop | Sets the stop sequences to use. | string | stop "AI assistant:" |
|
||||
| tfs_z | Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1) | float | tfs_z 1 |
|
||||
| num_predict | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) | int | num_predict 42 |
|
||||
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
|
||||
| top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 |
|
||||
|
||||
|
17
llm/llama.go
17
llm/llama.go
@@ -64,27 +64,29 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||
runnerAvailable := false // if no runner files are found in the embed, this flag will cause a fast fail
|
||||
for _, r := range runners {
|
||||
// find all the files in the runner's bin directory
|
||||
files, err := fs.Glob(llamaCppEmbed, filepath.Join(filepath.Dir(r), "*"))
|
||||
files, err := fs.Glob(llamaCppEmbed, path.Join(path.Dir(r), "*"))
|
||||
if err != nil {
|
||||
// this is expected, ollama may be compiled without all runners packed in
|
||||
log.Printf("%s runner not found: %v", r, err)
|
||||
continue
|
||||
}
|
||||
runnerAvailable = true
|
||||
|
||||
for _, f := range files {
|
||||
runnerAvailable = true
|
||||
|
||||
srcFile, err := llamaCppEmbed.Open(f)
|
||||
if err != nil {
|
||||
log.Fatalf("read llama runner %s: %v", f, err)
|
||||
}
|
||||
defer srcFile.Close()
|
||||
|
||||
// create the directory in case it does not exist
|
||||
// create the directory in case it does not exist, filepath.Dir() converts the file path to the OS's format
|
||||
destPath := filepath.Join(workDir, filepath.Dir(f))
|
||||
if err := os.MkdirAll(destPath, 0o755); err != nil {
|
||||
log.Fatalf("create runner temp dir %s: %v", filepath.Dir(f), err)
|
||||
}
|
||||
|
||||
// create the path to the destination file, filepath.Base() converts the file path to the OS's format
|
||||
destFile := filepath.Join(destPath, filepath.Base(f))
|
||||
|
||||
_, err = os.Stat(destFile)
|
||||
@@ -111,7 +113,8 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||
// return the runners to try in priority order
|
||||
localRunnersByPriority := []ModelRunner{}
|
||||
for _, r := range runners {
|
||||
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{Path: path.Join(workDir, r)})
|
||||
// clean the ModelRunner paths so that they match the OS we are running on
|
||||
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{Path: filepath.Clean(path.Join(workDir, r))})
|
||||
}
|
||||
|
||||
return localRunnersByPriority
|
||||
@@ -187,7 +190,7 @@ type llama struct {
|
||||
var errNoGPU = errors.New("nvidia-smi command failed")
|
||||
|
||||
// CheckVRAM returns the available VRAM in MiB on Linux machines with NVIDIA GPUs
|
||||
func CheckVRAM() (int, error) {
|
||||
func CheckVRAM() (int64, error) {
|
||||
cmd := exec.Command("nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits")
|
||||
var stdout bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
@@ -196,11 +199,11 @@ func CheckVRAM() (int, error) {
|
||||
return 0, errNoGPU
|
||||
}
|
||||
|
||||
var total int
|
||||
var total int64
|
||||
scanner := bufio.NewScanner(&stdout)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
vram, err := strconv.Atoi(line)
|
||||
vram, err := strconv.ParseInt(strings.TrimSpace(line), 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to parse available VRAM: %v", err)
|
||||
}
|
||||
|
21
scripts/build.sh
Normal file
21
scripts/build.sh
Normal file
@@ -0,0 +1,21 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -eu
|
||||
|
||||
usage() {
|
||||
echo "usage: $(basename $0) VERSION"
|
||||
exit 1
|
||||
}
|
||||
|
||||
[ "$#" -eq 1 ] || usage
|
||||
|
||||
export VERSION="$1"
|
||||
|
||||
# build universal MacOS binary
|
||||
sh $(dirname $0)/build_darwin.sh
|
||||
|
||||
# # build arm64 and amd64 Linux binaries
|
||||
sh $(dirname $0)/build_linux.sh
|
||||
|
||||
# # build arm64 and amd64 Docker images
|
||||
sh $(dirname $0)/build_docker.sh
|
@@ -1,29 +1,30 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
|
||||
set -eu
|
||||
|
||||
export VERSION=${VERSION:-0.0.0}
|
||||
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
|
||||
|
||||
mkdir -p dist
|
||||
|
||||
GO_LDFLAGS="-X github.com/jmorganca/ollama/version.Version=$VERSION"
|
||||
GO_LDFLAGS="$GO_LDFLAGS -X github.com/jmorganca/ollama/server.mode=release"
|
||||
for TARGETARCH in arm64 amd64; do
|
||||
GOOS=darwin GOARCH=$TARGETARCH go generate ./...
|
||||
GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH
|
||||
done
|
||||
|
||||
# build universal binary
|
||||
GOARCH=arm64 go generate ./...
|
||||
GOARCH=arm64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-arm64
|
||||
rm -rf llm/llama.cpp/*/build/*/bin
|
||||
GOARCH=amd64 go generate ./...
|
||||
GOARCH=amd64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-amd64
|
||||
lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
|
||||
rm dist/ollama-darwin-amd64 dist/ollama-darwin-arm64
|
||||
lipo -create -output dist/ollama dist/ollama-darwin-*
|
||||
rm -f dist/ollama-darwin-*
|
||||
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
|
||||
chmod +x dist/ollama
|
||||
|
||||
# build and sign the mac app
|
||||
npm install --prefix app
|
||||
npm run --prefix app make:sign
|
||||
cp app/out/make/zip/darwin/universal/Ollama-darwin-universal-${VERSION:-0.0.0}.zip dist/Ollama-darwin.zip
|
||||
cp app/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip
|
||||
|
||||
# sign the binary and rename it
|
||||
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama
|
||||
ditto -c -k --keepParent dist/ollama dist/temp.zip
|
||||
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
|
||||
mv dist/ollama dist/ollama-darwin
|
||||
rm dist/temp.zip
|
||||
rm -f dist/temp.zip
|
||||
|
15
scripts/build_docker.sh
Normal file
15
scripts/build_docker.sh
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -eu
|
||||
|
||||
export VERSION=${VERSION:-0.0.0}
|
||||
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
|
||||
|
||||
docker buildx build \
|
||||
--load \
|
||||
--platform=linux/arm64,linux/amd64 \
|
||||
--build-arg=VERSION \
|
||||
--build-arg=GOFLAGS \
|
||||
-f Dockerfile \
|
||||
-t ollama \
|
||||
.
|
@@ -1,12 +1,15 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
set -eu
|
||||
|
||||
export VERSION=${VERSION:-0.0.0}
|
||||
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
|
||||
|
||||
mkdir -p dist
|
||||
|
||||
for ARCH in arm64 amd64; do
|
||||
docker buildx build --platform=linux/$ARCH -f Dockerfile.build . -t builder:$ARCH --load
|
||||
docker create --platform linux/$ARCH --name builder builder:$ARCH
|
||||
docker cp builder:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$ARCH
|
||||
docker rm builder
|
||||
for TARGETARCH in arm64 amd64; do
|
||||
docker buildx build --load --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
|
||||
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
|
||||
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
|
||||
docker rm builder-$TARGETARCH
|
||||
done
|
||||
|
@@ -52,11 +52,15 @@ if [ -n "$NEEDS" ]; then
|
||||
fi
|
||||
|
||||
status "Downloading ollama..."
|
||||
$SUDO curl --fail --show-error --location --progress-bar -o $TEMP_DIR/ollama "https://ollama.ai/download/ollama-linux-$ARCH"
|
||||
curl --fail --show-error --location --progress-bar -o $TEMP_DIR/ollama "https://ollama.ai/download/ollama-linux-$ARCH"
|
||||
|
||||
status "Installing ollama to /usr/bin..."
|
||||
$SUDO install -o0 -g0 -m755 -d /usr/bin
|
||||
$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama /usr/bin/ollama
|
||||
for BINDIR in /usr/local/bin /usr/bin /bin; do
|
||||
echo $PATH | grep -q $BINDIR && break || continue
|
||||
done
|
||||
|
||||
status "Installing ollama to $BINDIR..."
|
||||
$SUDO install -o0 -g0 -m755 -d $BINDIR
|
||||
$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama
|
||||
|
||||
install_success() { status 'Install complete. Run "ollama" from the command line.'; }
|
||||
trap install_success EXIT
|
||||
@@ -76,12 +80,13 @@ Description=Ollama Service
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
ExecStart=/usr/bin/ollama serve
|
||||
ExecStart=$BINDIR/ollama serve
|
||||
User=ollama
|
||||
Group=ollama
|
||||
Restart=always
|
||||
RestartSec=3
|
||||
Environment="HOME=/usr/share/ollama"
|
||||
Environment="PATH=$PATH"
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
@@ -92,7 +97,9 @@ EOF
|
||||
status "Enabling and starting ollama service..."
|
||||
$SUDO systemctl daemon-reload
|
||||
$SUDO systemctl enable ollama
|
||||
$SUDO systemctl restart ollama
|
||||
|
||||
start_service() { $SUDO systemctl restart ollama; }
|
||||
trap start_service EXIT
|
||||
;;
|
||||
esac
|
||||
}
|
||||
@@ -114,6 +121,11 @@ check_gpu() {
|
||||
esac
|
||||
}
|
||||
|
||||
if check_gpu nvidia-smi; then
|
||||
status "NVIDIA GPU installed."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if ! check_gpu lspci && ! check_gpu lshw; then
|
||||
warning "No NVIDIA GPU detected. Ollama will run in CPU-only mode."
|
||||
exit 0
|
||||
@@ -208,7 +220,8 @@ fi
|
||||
if ! lsmod | grep -q nvidia; then
|
||||
KERNEL_RELEASE="$(uname -r)"
|
||||
case $OS_NAME in
|
||||
centos|rhel|rocky|fedora|amzn) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE kernel-headers-$KERNEL_RELEASE ;;
|
||||
centos|rhel|rocky|amzn) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE kernel-headers-$KERNEL_RELEASE ;;
|
||||
fedora) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE ;;
|
||||
debian|ubuntu) $SUDO apt-get -y install linux-headers-$KERNEL_RELEASE ;;
|
||||
*) exit ;;
|
||||
esac
|
||||
@@ -219,9 +232,12 @@ if ! lsmod | grep -q nvidia; then
|
||||
fi
|
||||
|
||||
if lsmod | grep -q nouveau; then
|
||||
status "Removing nouveau..."
|
||||
$SUDO rmmod nouveau
|
||||
status 'Reboot to complete NVIDIA CUDA driver install.'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
$SUDO modprobe nvidia
|
||||
fi
|
||||
|
||||
|
||||
status "NVIDIA CUDA drivers installed."
|
||||
|
@@ -46,8 +46,8 @@ func downloadBlob(ctx context.Context, opts downloadOpts) error {
|
||||
// we already have the file, so return
|
||||
opts.fn(api.ProgressResponse{
|
||||
Digest: opts.digest,
|
||||
Total: int(fi.Size()),
|
||||
Completed: int(fi.Size()),
|
||||
Total: fi.Size(),
|
||||
Completed: fi.Size(),
|
||||
})
|
||||
|
||||
return nil
|
||||
@@ -93,8 +93,8 @@ func monitorDownload(ctx context.Context, opts downloadOpts, f *FileDownload) er
|
||||
// successful download while monitoring
|
||||
opts.fn(api.ProgressResponse{
|
||||
Digest: f.Digest,
|
||||
Total: int(fi.Size()),
|
||||
Completed: int(fi.Size()),
|
||||
Total: fi.Size(),
|
||||
Completed: fi.Size(),
|
||||
})
|
||||
return true, false, nil
|
||||
}
|
||||
@@ -109,8 +109,8 @@ func monitorDownload(ctx context.Context, opts downloadOpts, f *FileDownload) er
|
||||
opts.fn(api.ProgressResponse{
|
||||
Status: fmt.Sprintf("downloading %s", f.Digest),
|
||||
Digest: f.Digest,
|
||||
Total: int(f.Total),
|
||||
Completed: int(f.Completed),
|
||||
Total: f.Total,
|
||||
Completed: f.Completed,
|
||||
})
|
||||
return false, false, nil
|
||||
}()
|
||||
@@ -129,8 +129,8 @@ func monitorDownload(ctx context.Context, opts downloadOpts, f *FileDownload) er
|
||||
}
|
||||
|
||||
var (
|
||||
chunkSize = 1024 * 1024 // 1 MiB in bytes
|
||||
errDownload = fmt.Errorf("download failed")
|
||||
chunkSize int64 = 1024 * 1024 // 1 MiB in bytes
|
||||
errDownload = fmt.Errorf("download failed")
|
||||
)
|
||||
|
||||
// doDownload downloads a blob from the registry and stores it in the blobs directory
|
||||
@@ -147,7 +147,7 @@ func doDownload(ctx context.Context, opts downloadOpts, f *FileDownload) error {
|
||||
default:
|
||||
size = fi.Size()
|
||||
// Ensure the size is divisible by the chunk size by removing excess bytes
|
||||
size -= size % int64(chunkSize)
|
||||
size -= size % chunkSize
|
||||
|
||||
err := os.Truncate(f.FilePath+"-partial", size)
|
||||
if err != nil {
|
||||
@@ -200,8 +200,8 @@ outerLoop:
|
||||
opts.fn(api.ProgressResponse{
|
||||
Status: fmt.Sprintf("downloading %s", f.Digest),
|
||||
Digest: f.Digest,
|
||||
Total: int(f.Total),
|
||||
Completed: int(f.Completed),
|
||||
Total: f.Total,
|
||||
Completed: f.Completed,
|
||||
})
|
||||
|
||||
if f.Completed >= f.Total {
|
||||
@@ -213,8 +213,8 @@ outerLoop:
|
||||
opts.fn(api.ProgressResponse{
|
||||
Status: fmt.Sprintf("error renaming file: %v", err),
|
||||
Digest: f.Digest,
|
||||
Total: int(f.Total),
|
||||
Completed: int(f.Completed),
|
||||
Total: f.Total,
|
||||
Completed: f.Completed,
|
||||
})
|
||||
return err
|
||||
}
|
||||
@@ -223,7 +223,7 @@ outerLoop:
|
||||
}
|
||||
}
|
||||
|
||||
n, err := io.CopyN(out, resp.Body, int64(chunkSize))
|
||||
n, err := io.CopyN(out, resp.Body, chunkSize)
|
||||
if err != nil && !errors.Is(err, io.EOF) {
|
||||
return fmt.Errorf("%w: %w", errDownload, err)
|
||||
}
|
||||
|
@@ -54,6 +54,54 @@ type Model struct {
|
||||
Embeddings []vector.Embedding
|
||||
}
|
||||
|
||||
func (m *Model) ChatPrompt(messages []api.Message) (string, error) {
|
||||
tmpl, err := template.New("").Parse(m.Template)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var vars struct {
|
||||
System string
|
||||
Prompt string
|
||||
First bool
|
||||
}
|
||||
|
||||
vars.First = true
|
||||
|
||||
var sb strings.Builder
|
||||
flush := func() {
|
||||
tmpl.Execute(&sb, vars)
|
||||
vars.System = ""
|
||||
vars.Prompt = ""
|
||||
}
|
||||
|
||||
// build the chat history from messages
|
||||
for _, m := range messages {
|
||||
if m.Role == "system" {
|
||||
if vars.System != "" {
|
||||
flush()
|
||||
}
|
||||
vars.System = m.Content
|
||||
}
|
||||
|
||||
if m.Role == "user" {
|
||||
if vars.Prompt != "" {
|
||||
flush()
|
||||
}
|
||||
vars.Prompt = m.Content
|
||||
}
|
||||
|
||||
if m.Role == "assistant" {
|
||||
flush()
|
||||
sb.Write([]byte(m.Content))
|
||||
}
|
||||
}
|
||||
|
||||
flush()
|
||||
|
||||
return sb.String(), nil
|
||||
}
|
||||
|
||||
func (m *Model) Prompt(request api.GenerateRequest, embedding string) (string, error) {
|
||||
t := m.Template
|
||||
if request.Template != "" {
|
||||
@@ -103,7 +151,7 @@ type ManifestV2 struct {
|
||||
type Layer struct {
|
||||
MediaType string `json:"mediaType"`
|
||||
Digest string `json:"digest"`
|
||||
Size int `json:"size"`
|
||||
Size int64 `json:"size"`
|
||||
From string `json:"from,omitempty"`
|
||||
}
|
||||
|
||||
@@ -129,11 +177,11 @@ type RootFS struct {
|
||||
DiffIDs []string `json:"diff_ids"`
|
||||
}
|
||||
|
||||
func (m *ManifestV2) GetTotalSize() int {
|
||||
var total int
|
||||
func (m *ManifestV2) GetTotalSize() (total int64) {
|
||||
for _, layer := range m.Layers {
|
||||
total += layer.Size
|
||||
}
|
||||
|
||||
total += m.Config.Size
|
||||
return total
|
||||
}
|
||||
@@ -649,8 +697,8 @@ func embeddingLayers(workDir string, e EmbeddingParams) ([]*LayerReader, error)
|
||||
e.fn(api.ProgressResponse{
|
||||
Status: fmt.Sprintf("creating embeddings for file %s", filePath),
|
||||
Digest: fileDigest,
|
||||
Total: len(data) - 1,
|
||||
Completed: i,
|
||||
Total: int64(len(data) - 1),
|
||||
Completed: int64(i),
|
||||
})
|
||||
if len(existing[d]) > 0 {
|
||||
// already have an embedding for this line
|
||||
@@ -675,7 +723,7 @@ func embeddingLayers(workDir string, e EmbeddingParams) ([]*LayerReader, error)
|
||||
Layer: Layer{
|
||||
MediaType: "application/vnd.ollama.image.embed",
|
||||
Digest: digest,
|
||||
Size: r.Len(),
|
||||
Size: r.Size(),
|
||||
},
|
||||
Reader: r,
|
||||
}
|
||||
@@ -1005,6 +1053,39 @@ func PruneLayers() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func PruneDirectory(path string) error {
|
||||
info, err := os.Lstat(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if info.IsDir() && info.Mode()&os.ModeSymlink == 0 {
|
||||
entries, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
if err := PruneDirectory(filepath.Join(path, entry.Name())); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
entries, err = os.ReadDir(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(entries) > 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return os.Remove(path)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func DeleteModel(name string) error {
|
||||
mp := ParseModelPath(name)
|
||||
manifest, _, err := GetManifest(mp)
|
||||
@@ -1356,14 +1437,14 @@ func createConfigLayer(config ConfigV2, layers []string) (*LayerReader, error) {
|
||||
}
|
||||
|
||||
// GetSHA256Digest returns the SHA256 hash of a given buffer and returns it, and the size of buffer
|
||||
func GetSHA256Digest(r io.Reader) (string, int) {
|
||||
func GetSHA256Digest(r io.Reader) (string, int64) {
|
||||
h := sha256.New()
|
||||
n, err := io.Copy(h, r)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("sha256:%x", h.Sum(nil)), int(n)
|
||||
return fmt.Sprintf("sha256:%x", h.Sum(nil)), n
|
||||
}
|
||||
|
||||
// Function to check if a blob already exists in the Docker registry
|
||||
|
@@ -4,9 +4,9 @@ import "testing"
|
||||
|
||||
func TestParseModelPath(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
arg string
|
||||
want ModelPath
|
||||
name string
|
||||
arg string
|
||||
want ModelPath
|
||||
}{
|
||||
{
|
||||
"full path https",
|
||||
|
@@ -156,6 +156,54 @@ func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]
|
||||
return nil
|
||||
}
|
||||
|
||||
func ChatModelHandler(c *gin.Context) {
|
||||
loaded.mu.Lock()
|
||||
defer loaded.mu.Unlock()
|
||||
|
||||
var req api.ChatRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
model, err := GetModel(req.Model)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
prompt, err := model.ChatPrompt(req.Messages)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
var response string
|
||||
fn := func(r api.GenerateResponse) {
|
||||
response += r.Response
|
||||
}
|
||||
|
||||
workDir := c.GetString("workDir")
|
||||
if err := load(c.Request.Context(), workDir, model, nil, defaultSessionDuration); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Println(prompt)
|
||||
|
||||
if err := loaded.llm.Predict(c.Request.Context(), []int{}, prompt, fn); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, api.ChatResponse{
|
||||
Message: api.Message{
|
||||
Role: "assistant",
|
||||
Content: response,
|
||||
},
|
||||
CreatedAt: time.Now().UTC(),
|
||||
})
|
||||
}
|
||||
|
||||
func GenerateHandler(c *gin.Context) {
|
||||
loaded.mu.Lock()
|
||||
defer loaded.mu.Unlock()
|
||||
@@ -292,8 +340,6 @@ func PullModelHandler(c *gin.Context) {
|
||||
|
||||
regOpts := &RegistryOptions{
|
||||
Insecure: req.Insecure,
|
||||
Username: req.Username,
|
||||
Password: req.Password,
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(c.Request.Context())
|
||||
@@ -323,8 +369,6 @@ func PushModelHandler(c *gin.Context) {
|
||||
|
||||
regOpts := &RegistryOptions{
|
||||
Insecure: req.Insecure,
|
||||
Username: req.Username,
|
||||
Password: req.Password,
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
@@ -378,6 +422,18 @@ func DeleteModelHandler(c *gin.Context) {
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
manifestsPath, err := GetManifestPath()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if err := PruneDirectory(manifestsPath); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, nil)
|
||||
}
|
||||
|
||||
@@ -544,6 +600,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
|
||||
},
|
||||
)
|
||||
|
||||
r.POST("/api/chat", ChatModelHandler)
|
||||
r.POST("/api/pull", PullModelHandler)
|
||||
r.POST("/api/generate", GenerateHandler)
|
||||
r.POST("/api/embeddings", EmbeddingHandler)
|
||||
@@ -602,6 +659,7 @@ func streamResponse(c *gin.Context, ch chan any) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Delineate chunks with new-line delimiter
|
||||
bts = append(bts, '\n')
|
||||
if _, err := w.Write(bts); err != nil {
|
||||
log.Printf("streamResponse: w.Write failed with %s", err)
|
||||
|
@@ -15,8 +15,8 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
redirectChunkSize = 1024 * 1024 * 1024
|
||||
regularChunkSize = 95 * 1024 * 1024
|
||||
redirectChunkSize int64 = 1024 * 1024 * 1024
|
||||
regularChunkSize int64 = 95 * 1024 * 1024
|
||||
)
|
||||
|
||||
func startUpload(ctx context.Context, mp ModelPath, layer *Layer, regOpts *RegistryOptions) (*url.URL, int64, error) {
|
||||
@@ -48,7 +48,7 @@ func startUpload(ctx context.Context, mp ModelPath, layer *Layer, regOpts *Regis
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
return locationURL, int64(chunkSize), nil
|
||||
return locationURL, chunkSize, nil
|
||||
}
|
||||
|
||||
func uploadBlob(ctx context.Context, requestURL *url.URL, layer *Layer, chunkSize int64, regOpts *RegistryOptions, fn func(api.ProgressResponse)) error {
|
||||
@@ -73,10 +73,10 @@ func uploadBlob(ctx context.Context, requestURL *url.URL, layer *Layer, chunkSiz
|
||||
fn: fn,
|
||||
}
|
||||
|
||||
for offset := int64(0); offset < int64(layer.Size); {
|
||||
chunk := int64(layer.Size) - offset
|
||||
if chunk > int64(chunkSize) {
|
||||
chunk = int64(chunkSize)
|
||||
for offset := int64(0); offset < layer.Size; {
|
||||
chunk := layer.Size - offset
|
||||
if chunk > chunkSize {
|
||||
chunk = chunkSize
|
||||
}
|
||||
|
||||
resp, err := uploadBlobChunk(ctx, http.MethodPatch, requestURL, f, offset, chunk, regOpts, &pw)
|
||||
@@ -85,7 +85,7 @@ func uploadBlob(ctx context.Context, requestURL *url.URL, layer *Layer, chunkSiz
|
||||
Status: fmt.Sprintf("error uploading chunk: %v", err),
|
||||
Digest: layer.Digest,
|
||||
Total: layer.Size,
|
||||
Completed: int(offset),
|
||||
Completed: offset,
|
||||
})
|
||||
|
||||
return err
|
||||
@@ -127,7 +127,7 @@ func uploadBlob(ctx context.Context, requestURL *url.URL, layer *Layer, chunkSiz
|
||||
}
|
||||
|
||||
func uploadBlobChunk(ctx context.Context, method string, requestURL *url.URL, r io.ReaderAt, offset, limit int64, opts *RegistryOptions, pw *ProgressWriter) (*http.Response, error) {
|
||||
sectionReader := io.NewSectionReader(r, int64(offset), limit)
|
||||
sectionReader := io.NewSectionReader(r, offset, limit)
|
||||
|
||||
headers := make(http.Header)
|
||||
headers.Set("Content-Type", "application/octet-stream")
|
||||
@@ -152,7 +152,7 @@ func uploadBlobChunk(ctx context.Context, method string, requestURL *url.URL, r
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pw.completed = int(offset)
|
||||
pw.completed = offset
|
||||
if _, err := uploadBlobChunk(ctx, http.MethodPut, location, r, offset, limit, nil, pw); err != nil {
|
||||
// retry
|
||||
log.Printf("retrying redirected upload: %v", err)
|
||||
@@ -170,7 +170,7 @@ func uploadBlobChunk(ctx context.Context, method string, requestURL *url.URL, r
|
||||
|
||||
opts.Token = token
|
||||
|
||||
pw.completed = int(offset)
|
||||
pw.completed = offset
|
||||
sectionReader = io.NewSectionReader(r, offset, limit)
|
||||
continue
|
||||
case resp.StatusCode >= http.StatusBadRequest:
|
||||
@@ -187,19 +187,19 @@ func uploadBlobChunk(ctx context.Context, method string, requestURL *url.URL, r
|
||||
type ProgressWriter struct {
|
||||
status string
|
||||
digest string
|
||||
bucket int
|
||||
completed int
|
||||
total int
|
||||
bucket int64
|
||||
completed int64
|
||||
total int64
|
||||
fn func(api.ProgressResponse)
|
||||
}
|
||||
|
||||
func (pw *ProgressWriter) Write(b []byte) (int, error) {
|
||||
n := len(b)
|
||||
pw.bucket += n
|
||||
pw.completed += n
|
||||
pw.bucket += int64(n)
|
||||
|
||||
// throttle status updates to not spam the client
|
||||
if pw.bucket >= 1024*1024 || pw.completed >= pw.total {
|
||||
if pw.bucket >= 1024*1024 || pw.completed+pw.bucket >= pw.total {
|
||||
pw.completed += pw.bucket
|
||||
pw.fn(api.ProgressResponse{
|
||||
Status: pw.status,
|
||||
Digest: pw.digest,
|
||||
|
Reference in New Issue
Block a user