Compare commits
	
		
			190 Commits
		
	
	
		
			matt/strea
			...
			mattw/howt
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 4522109b11 | ||
|   | b2974a7095 | ||
|   | 3c975f898f | ||
|   | 9245c8a1df | ||
|   | 7a537cdca9 | ||
|   | 257ffeb997 | ||
|   | 9b513bb6b1 | ||
|   | 042100f797 | ||
|   | 7804b8fab9 | ||
|   | 56497663c8 | ||
|   | e1afcb8af2 | ||
|   | 385eeea357 | ||
|   | 8a41b244e8 | ||
|   | 92578798bb | ||
|   | 788637918a | ||
|   | c413a55093 | ||
|   | 630bb75d2a | ||
|   | a2055a1e93 | ||
|   | b599946b74 | ||
|   | aca2d65b82 | ||
|   | b5e08e3373 | ||
|   | 274d5a5fdf | ||
|   | fc6b49be32 | ||
|   | 77295f716e | ||
|   | 615f7d1dea | ||
|   | cdf5e106ae | ||
|   | a85329f59a | ||
|   | f2ba1311aa | ||
|   | 65dcd0ce35 | ||
|   | 0040f543a2 | ||
|   | 767f9bdbbb | ||
|   | f7f5169c94 | ||
|   | 2cfffea02e | ||
|   | f6e98334e4 | ||
|   | ab0668293c | ||
|   | af4cf55884 | ||
|   | d6786f2945 | ||
|   | 38dc2f79bc | ||
|   | cb961c87ca | ||
|   | 0560b28a8d | ||
|   | 10199c5987 | ||
|   | 288814d3e4 | ||
|   | 04733438da | ||
|   | 711e891f0f | ||
|   | 090d08422b | ||
|   | 5b84404c64 | ||
|   | 8544edca21 | ||
|   | 5d22319a2c | ||
|   | 2130c0708b | ||
|   | 61ff1946e6 | ||
|   | d06bc0cb6e | ||
|   | d104b7e997 | ||
|   | 9e2de1bd2c | ||
|   | dc87e9c9ae | ||
|   | 367cb68dc1 | ||
|   | c02c0cd483 | ||
|   | 1852755154 | ||
|   | 6f2ce74231 | ||
|   | 6edcc5c79f | ||
|   | b1f7123301 | ||
|   | 1fbf3585d6 | ||
|   | 99d5161e8a | ||
|   | ea8380be45 | ||
|   | 4f25092dc1 | ||
|   | 4fc10acce9 | ||
|   | 0a4f21c0a7 | ||
|   | 9abb66254a | ||
|   | 1d0ebe67e8 | ||
|   | a1b2d95f96 | ||
|   | c0b1bf7537 | ||
|   | cdfeb165ca | ||
|   | 92d454ec5f | ||
|   | 9333b0cc82 | ||
|   | 9771b1ec51 | ||
|   | 76db4a49cf | ||
|   | 4aa0976a2e | ||
|   | 92c20fdae6 | ||
|   | c951da7096 | ||
|   | 24d82a23a2 | ||
|   | f40b3de758 | ||
|   | 5f4008c296 | ||
|   | 6ae33d8141 | ||
|   | c5664c1fef | ||
|   | 958a5a8184 | ||
|   | 8608eb4760 | ||
|   | a2b210130f | ||
|   | ed20837f9a | ||
|   | 1db2a61dd0 | ||
|   | 2ded8ab206 | ||
|   | e6b3648bbf | ||
|   | 0625e805f0 | ||
|   | c38ec5befb | ||
|   | c577721a43 | ||
|   | 29c056ea39 | ||
|   | 9fc3bba9cf | ||
|   | 7774ed4ae6 | ||
|   | 11f920f209 | ||
|   | 6e6b655956 | ||
|   | 110ae89a6c | ||
|   | 5e388f931e | ||
|   | d5ad41dd7b | ||
|   | d294a11bc9 | ||
|   | 93d887e4bc | ||
|   | 5306b0269d | ||
|   | 7de0c8345d | ||
|   | 1b9dcab3ab | ||
|   | 86279f4ae3 | ||
|   | b934bf23e6 | ||
|   | 2b8ef455ad | ||
|   | 0c5f47177c | ||
|   | 1210db2924 | ||
|   | d0854bf1e6 | ||
|   | 8396463255 | ||
|   | a027bbf4d7 | ||
|   | ed94a3dd02 | ||
|   | f14f62ab3b | ||
|   | 0fb5268496 | ||
|   | c65edb1506 | ||
|   | 1605af32ec | ||
|   | ee3032ad89 | ||
|   | 5b7a27281d | ||
|   | d2a784e33e | ||
|   | 413a2e4f91 | ||
|   | a92fdff620 | ||
|   | b5614f3ebc | ||
|   | 8b2ba9cab8 | ||
|   | e29662ab5c | ||
|   | cbc40aa996 | ||
|   | 5cb82540c9 | ||
|   | d7849a1dc9 | ||
|   | 01c44d687e | ||
|   | 9b12a511ca | ||
|   | e20362e0d5 | ||
|   | c928ceb927 | ||
|   | e1a0846483 | ||
|   | f997e29e45 | ||
|   | 87d9efb364 | ||
|   | 93d3a2568d | ||
|   | 5a81390b24 | ||
|   | a89ef99aed | ||
|   | dc0c725ceb | ||
|   | 5d71bda478 | ||
|   | 88897a90e4 | ||
|   | 9df31c3518 | ||
|   | 2044f9d4da | ||
|   | 0d186f3b33 | ||
|   | 82f5b66c01 | ||
|   | c986694367 | ||
|   | 058d0cd04b | ||
|   | ee1c994d15 | ||
|   | 4cba75efc5 | ||
|   | 8c83701e9f | ||
|   | 6137b12799 | ||
|   | 1fabba474b | ||
|   | 765770efdb | ||
|   | 9297ff8330 | ||
|   | ee4fd16f2c | ||
|   | a9ed7cc6aa | ||
|   | 6c6a31a1e8 | ||
|   | fc6ec356fc | ||
|   | 1255bc9b45 | ||
|   | 084e4c782a | ||
|   | 58ffa03d8b | ||
|   | 637f8bc6a5 | ||
|   | 499e9007a5 | ||
|   | b9bb5ca288 | ||
|   | 4e8be787c7 | ||
|   | aa45d7c1df | ||
|   | e35565c567 | ||
|   | a5520bfb42 | ||
|   | 2627c464ba | ||
|   | b58d5d16b0 | ||
|   | 24580df958 | ||
|   | 80dd44e80a | ||
|   | 94e1d96b29 | ||
|   | 66003e1d05 | ||
|   | c345053a8b | ||
|   | 08d7c2a944 | ||
|   | bc9573dcb1 | ||
|   | e53bc57d4d | ||
|   | f0b398d17f | ||
|   | 8efbc5df55 | ||
|   | ccc3e9ac6d | ||
|   | daa4f096f9 | ||
|   | 3ee85f1c6c | ||
|   | 2540c9181c | ||
|   | 83ffb154bc | ||
|   | 9aa192c812 | ||
|   | fc8707686f | ||
|   | e6881cabd0 | 
| @@ -1,5 +1,8 @@ | ||||
| .vscode | ||||
| ollama | ||||
| app | ||||
| dist | ||||
| scripts | ||||
| llm/llama.cpp/ggml | ||||
| llm/llama.cpp/gguf | ||||
| .env | ||||
|   | ||||
							
								
								
									
										1
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							| @@ -6,4 +6,5 @@ | ||||
| [submodule "llm/llama.cpp/gguf"] | ||||
|     path = llm/llama.cpp/gguf | ||||
|     url = https://github.com/ggerganov/llama.cpp.git | ||||
|     ignore = dirty | ||||
|     shallow = true | ||||
|   | ||||
							
								
								
									
										28
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								Dockerfile
									
									
									
									
									
								
							| @@ -1,21 +1,23 @@ | ||||
| FROM golang:alpine | ||||
| FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 | ||||
|  | ||||
| ARG TARGETARCH | ||||
| ARG GOFLAGS="'-ldflags=-w -s'" | ||||
|  | ||||
| WORKDIR /go/src/github.com/jmorganca/ollama | ||||
| RUN apk add --no-cache git build-base cmake | ||||
| RUN apt-get update && apt-get install -y git build-essential cmake | ||||
| ADD https://dl.google.com/go/go1.21.1.linux-$TARGETARCH.tar.gz /tmp/go1.21.1.tar.gz | ||||
| RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.1.tar.gz | ||||
|  | ||||
| COPY . . | ||||
| RUN go generate ./... && go build -ldflags '-linkmode external -extldflags "-static"' . | ||||
|  | ||||
| FROM alpine | ||||
| ENV OLLAMA_HOST 0.0.0.0 | ||||
| RUN apk add --no-cache libstdc++ | ||||
|  | ||||
| ARG USER=ollama | ||||
| ARG GROUP=ollama | ||||
| RUN addgroup $GROUP && adduser -D -G $GROUP $USER | ||||
| ENV GOARCH=$TARGETARCH | ||||
| ENV GOFLAGS=$GOFLAGS | ||||
| RUN /usr/local/go/bin/go generate ./... \ | ||||
|     && /usr/local/go/bin/go build . | ||||
|  | ||||
| FROM ubuntu:22.04 | ||||
| RUN apt-get update && apt-get install -y ca-certificates | ||||
| COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama | ||||
|  | ||||
| USER $USER:$GROUP | ||||
| EXPOSE 11434 | ||||
| ENV OLLAMA_HOST 0.0.0.0 | ||||
| ENTRYPOINT ["/bin/ollama"] | ||||
| CMD ["serve"] | ||||
|   | ||||
							
								
								
									
										32
									
								
								Dockerfile.build
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								Dockerfile.build
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,32 @@ | ||||
|  | ||||
| # centos7 amd64 dependencies | ||||
| FROM --platform=linux/amd64 nvidia/cuda:11.8.0-devel-centos7 AS base-amd64 | ||||
| RUN yum install -y https://repo.ius.io/ius-release-el7.rpm centos-release-scl && \ | ||||
|     yum update -y && \ | ||||
|     yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++ git236 wget | ||||
| RUN wget "https://github.com/Kitware/CMake/releases/download/v3.27.6/cmake-3.27.6-linux-x86_64.sh" -O cmake-installer.sh && chmod +x cmake-installer.sh && ./cmake-installer.sh --skip-license --prefix=/usr/local | ||||
| ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH | ||||
|  | ||||
| # centos8 arm64 dependencies | ||||
| FROM --platform=linux/arm64 nvidia/cuda:11.4.3-devel-centos8 AS base-arm64 | ||||
| RUN sed -i -e 's/mirrorlist/#mirrorlist/g' -e 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* | ||||
| RUN yum install -y git cmake | ||||
|  | ||||
| FROM base-${TARGETARCH} | ||||
| ARG TARGETARCH | ||||
| ARG GOFLAGS="'-ldflags -w -s'" | ||||
|  | ||||
| # install go | ||||
| ADD https://dl.google.com/go/go1.21.1.linux-$TARGETARCH.tar.gz /tmp/go1.21.1.tar.gz | ||||
| RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.1.tar.gz | ||||
|  | ||||
| # build the final binary | ||||
| WORKDIR /go/src/github.com/jmorganca/ollama | ||||
| COPY . . | ||||
|  | ||||
| ENV GOOS=linux | ||||
| ENV GOARCH=$TARGETARCH | ||||
| ENV GOFLAGS=$GOFLAGS | ||||
|  | ||||
| RUN /usr/local/go/bin/go generate ./... && \ | ||||
|     /usr/local/go/bin/go build . | ||||
| @@ -1,22 +0,0 @@ | ||||
| FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 | ||||
|  | ||||
| WORKDIR /go/src/github.com/jmorganca/ollama | ||||
| RUN apt-get update && apt-get install -y git build-essential cmake | ||||
| ADD https://dl.google.com/go/go1.21.1.linux-amd64.tar.gz /tmp/go1.21.1.tar.gz | ||||
| RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.1.tar.gz | ||||
|  | ||||
| COPY . . | ||||
| RUN /usr/local/go/bin/go generate ./... && /usr/local/go/bin/go build -ldflags '-linkmode external -extldflags "-static"' . | ||||
|  | ||||
| FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 | ||||
| ENV OLLAMA_HOST 0.0.0.0 | ||||
|  | ||||
| ARG USER=ollama | ||||
| ARG GROUP=ollama | ||||
| RUN groupadd $GROUP && useradd -m -g $GROUP $USER | ||||
|  | ||||
| COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama | ||||
|  | ||||
| USER $USER:$GROUP | ||||
| ENTRYPOINT ["/bin/ollama"] | ||||
| CMD ["serve"] | ||||
							
								
								
									
										191
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										191
									
								
								README.md
									
									
									
									
									
								
							| @@ -9,19 +9,27 @@ | ||||
|  | ||||
| [](https://discord.gg/ollama) | ||||
|  | ||||
| Run, create, and share large language models (LLMs). | ||||
| Get up and running with large language models locally. | ||||
|  | ||||
| > Note: Ollama is in early preview. Please report any issues you find. | ||||
| ### macOS | ||||
|  | ||||
| ## Download | ||||
| [Download](https://ollama.ai/download/Ollama-darwin.zip) | ||||
|  | ||||
| - [Download](https://ollama.ai/download) for macOS | ||||
| - Download for Windows and Linux (coming soon) | ||||
| - Build [from source](#building) | ||||
| ### Linux & WSL2 | ||||
|  | ||||
| ``` | ||||
| curl https://ollama.ai/install.sh | sh | ||||
| ``` | ||||
|  | ||||
| [Manual install instructions](https://github.com/jmorganca/ollama/blob/main/docs/linux.md) | ||||
|  | ||||
| ### Windows | ||||
|  | ||||
| coming soon | ||||
|  | ||||
| ## Quickstart | ||||
|  | ||||
| To run and chat with [Llama 2](https://ai.meta.com/llama), the new model by Meta: | ||||
| To run and chat with [Llama 2](https://ollama.ai/library/llama2): | ||||
|  | ||||
| ``` | ||||
| ollama run llama2 | ||||
| @@ -33,83 +41,46 @@ Ollama supports a list of open-source models available on [ollama.ai/library](ht | ||||
|  | ||||
| Here are some example open-source models that can be downloaded: | ||||
|  | ||||
| | Model                    | Parameters | Size  | Download                        | | ||||
| | ------------------------ | ---------- | ----- | ------------------------------- | | ||||
| | Llama2                   | 7B         | 3.8GB | `ollama pull llama2`            | | ||||
| | Llama2 13B               | 13B        | 7.3GB | `ollama pull llama2:13b`        | | ||||
| | Llama2 70B               | 70B        | 39GB  | `ollama pull llama2:70b`        | | ||||
| | Llama2 Uncensored        | 7B         | 3.8GB | `ollama pull llama2-uncensored` | | ||||
| | Code Llama               | 7B         | 3.8GB | `ollama pull codellama`         | | ||||
| | Orca Mini                | 3B         | 1.9GB | `ollama pull orca-mini`         | | ||||
| | Vicuna                   | 7B         | 3.8GB | `ollama pull vicuna`            | | ||||
| | Nous-Hermes              | 7B         | 3.8GB | `ollama pull nous-hermes`       | | ||||
| | Nous-Hermes 13B          | 13B        | 7.3GB | `ollama pull nous-hermes:13b`   | | ||||
| | Wizard Vicuna Uncensored | 13B        | 7.3GB | `ollama pull wizard-vicuna`     | | ||||
| | Model              | Parameters | Size  | Download                       | | ||||
| | ------------------ | ---------- | ----- | ------------------------------ | | ||||
| | Mistral            | 7B         | 4.1GB | `ollama run mistral`           | | ||||
| | Llama 2            | 7B         | 3.8GB | `ollama run llama2`            | | ||||
| | Code Llama         | 7B         | 3.8GB | `ollama run codellama`         | | ||||
| | Llama 2 Uncensored | 7B         | 3.8GB | `ollama run llama2-uncensored` | | ||||
| | Llama 2 13B        | 13B        | 7.3GB | `ollama run llama2:13b`        | | ||||
| | Llama 2 70B        | 70B        | 39GB  | `ollama run llama2:70b`        | | ||||
| | Orca Mini          | 3B         | 1.9GB | `ollama run orca-mini`         | | ||||
| | Vicuna             | 7B         | 3.8GB | `ollama run vicuna`            | | ||||
|  | ||||
| > Note: You should have at least 8 GB of RAM to run the 3B models, 16 GB to run the 7B models, and 32 GB to run the 13B models. | ||||
|  | ||||
| ## Examples | ||||
| ## Customize your own model | ||||
|  | ||||
| ### Pull a public model | ||||
| ### Import from GGUF or GGML | ||||
|  | ||||
| ``` | ||||
| ollama pull llama2 | ||||
| ``` | ||||
| Ollama supports importing GGUF and GGML file formats in the Modelfile. This means if you have a model that is not in the Ollama library, you can create it, iterate on it, and upload it to the Ollama library to share with others when you are ready. | ||||
|  | ||||
| > This command can also be used to update a local model. Only updated changes will be pulled. | ||||
| 1. Create a file named Modelfile, and add a `FROM` instruction with the local filepath to the model you want to import. | ||||
|  | ||||
| ### Run a model interactively | ||||
|    ``` | ||||
|    FROM ./vicuna-33b.Q4_0.gguf | ||||
|    ``` | ||||
|  | ||||
| ``` | ||||
| ollama run llama2 | ||||
| >>> hi | ||||
| Hello! How can I help you today? | ||||
| ``` | ||||
| 2. Create the model in Ollama | ||||
|  | ||||
| For multiline input, you can wrap text with `"""`: | ||||
|    ``` | ||||
|    ollama create name -f path_to_modelfile | ||||
|    ``` | ||||
|  | ||||
| ``` | ||||
| >>> """Hello, | ||||
| ... world! | ||||
| ... """ | ||||
| I'm a basic program that prints the famous "Hello, world!" message to the console. | ||||
| ``` | ||||
| 3. Run the model | ||||
|  | ||||
| ### Run a model non-interactively | ||||
|    ``` | ||||
|    ollama run name | ||||
|    ``` | ||||
|  | ||||
| ``` | ||||
| $ ollama run llama2 'tell me a joke' | ||||
|  Sure! Here's a quick one: | ||||
|  Why did the scarecrow win an award? Because he was outstanding in his field! | ||||
| ``` | ||||
| ### Customize a prompt | ||||
|  | ||||
| ``` | ||||
| $ cat <<EOF >prompts.txt | ||||
| tell me a joke about llamas | ||||
| tell me another one | ||||
| EOF | ||||
| $ ollama run llama2 <prompts.txt | ||||
| >>> tell me a joke about llamas | ||||
|  Why did the llama refuse to play hide-and-seek? | ||||
|  nobody likes to be hided! | ||||
|  | ||||
| >>> tell me another one | ||||
|  Sure, here's another one: | ||||
|  | ||||
| Why did the llama go to the bar? | ||||
| To have a hay-often good time! | ||||
| ``` | ||||
|  | ||||
| ### Run a model on contents of a text file | ||||
|  | ||||
| ``` | ||||
| $ ollama run llama2 "summarize this file:" "$(cat README.md)" | ||||
|  Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications. | ||||
| ``` | ||||
|  | ||||
| ### Customize a model | ||||
|  | ||||
| Pull a base model: | ||||
| Models from the Ollama library can be customized with a prompt. The example | ||||
|  | ||||
| ``` | ||||
| ollama pull llama2 | ||||
| @@ -138,30 +109,61 @@ ollama run mario | ||||
| Hello! It's your friend Mario. | ||||
| ``` | ||||
|  | ||||
| For more examples, see the [examples](./examples) directory. For more information on creating a Modelfile, see the [Modelfile](./docs/modelfile.md) documentation. | ||||
| For more examples, see the [examples](examples) directory. For more information on working with a Modelfile, see the [Modelfile](docs/modelfile.md) documentation. | ||||
|  | ||||
| ### Listing local models | ||||
| ## CLI Reference | ||||
|  | ||||
| ### Create a model | ||||
|  | ||||
| `ollama create` is used to create a model from a Modelfile. | ||||
|  | ||||
| ### Pull a model | ||||
|  | ||||
| ``` | ||||
| ollama list | ||||
| ollama pull llama2 | ||||
| ``` | ||||
|  | ||||
| ### Removing local models | ||||
| > This command can also be used to update a local model. Only the diff will be pulled. | ||||
|  | ||||
| ### Remove a model | ||||
|  | ||||
| ``` | ||||
| ollama rm llama2 | ||||
| ``` | ||||
|  | ||||
| ## Model packages | ||||
| ### Copy a model | ||||
|  | ||||
| ### Overview | ||||
| ``` | ||||
| ollama cp llama2 my-llama2 | ||||
| ``` | ||||
|  | ||||
| Ollama bundles model weights, configurations, and data into a single package, defined by a [Modelfile](./docs/modelfile.md). | ||||
| ### Multiline input | ||||
|  | ||||
| <picture> | ||||
|   <source media="(prefers-color-scheme: dark)" height="480" srcset="https://github.com/jmorganca/ollama/assets/251292/2fd96b5f-191b-45c1-9668-941cfad4eb70"> | ||||
|   <img alt="logo" height="480" src="https://github.com/jmorganca/ollama/assets/251292/2fd96b5f-191b-45c1-9668-941cfad4eb70"> | ||||
| </picture> | ||||
| For multiline input, you can wrap text with `"""`: | ||||
|  | ||||
| ``` | ||||
| >>> """Hello, | ||||
| ... world! | ||||
| ... """ | ||||
| I'm a basic program that prints the famous "Hello, world!" message to the console. | ||||
| ``` | ||||
|  | ||||
| ### Pass in prompt as arguments | ||||
|  | ||||
| ``` | ||||
| $ ollama run llama2 "summarize this file:" "$(cat README.md)" | ||||
|  Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications. | ||||
| ``` | ||||
|  | ||||
| ### List models on your computer | ||||
|  | ||||
| ``` | ||||
| ollama list | ||||
| ``` | ||||
|  | ||||
| ### Start Ollama | ||||
|  | ||||
| `ollama serve` is used when you want to start ollama without running the desktop application. | ||||
|  | ||||
| ## Building | ||||
|  | ||||
| @@ -193,7 +195,7 @@ Finally, in a separate shell, run a model: | ||||
|  | ||||
| ## REST API | ||||
|  | ||||
| > See the [API documentation](./docs/api.md) for all endpoints. | ||||
| > See the [API documentation](docs/api.md) for all endpoints. | ||||
|  | ||||
| Ollama has an API for running and managing models. For example to generate text from a model: | ||||
|  | ||||
| @@ -204,12 +206,19 @@ curl -X POST http://localhost:11434/api/generate -d '{ | ||||
| }' | ||||
| ``` | ||||
|  | ||||
| ## Community Projects using Ollama | ||||
| ## Community Integrations | ||||
|  | ||||
| - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with a question-answering [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa). | ||||
| - [Continue](https://github.com/continuedev/continue) - embeds Ollama inside Visual Studio Code. The extension lets you highlight code to add to the prompt, ask questions in the sidebar, and generate code inline. | ||||
| - [LiteLLM](https://github.com/BerriAI/litellm) a lightweight python package to simplify LLM API calls | ||||
| - [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot) - interact with Ollama as a chatbot on Discord. | ||||
| - [Raycast Ollama](https://github.com/MassimilianoPasquini97/raycast_ollama) - Raycast extension to use Ollama for local llama inference on Raycast. | ||||
| - [Simple HTML UI for Ollama](https://github.com/rtcfirefly/ollama-ui) | ||||
| - [Emacs client](https://github.com/zweifisch/ollama) for Ollama | ||||
| - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa) | ||||
| - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html) | ||||
| - [Raycast extension](https://github.com/MassimilianoPasquini97/raycast_ollama) | ||||
| - [Discollama](https://github.com/mxyng/discollama) (Discord bot inside the Ollama discord channel) | ||||
| - [Continue](https://github.com/continuedev/continue) | ||||
| - [Obsidian Ollama plugin](https://github.com/hinterdupfinger/obsidian-ollama) | ||||
| - [Dagger Chatbot](https://github.com/samalba/dagger-chatbot) | ||||
| - [LiteLLM](https://github.com/BerriAI/litellm) | ||||
| - [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot) | ||||
| - [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama) | ||||
| - [HTML UI](https://github.com/rtcfirefly/ollama-ui) | ||||
| - [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file) | ||||
| - [Dumbar](https://github.com/JerrySievert/Dumbar) | ||||
| - [Emacs client](https://github.com/zweifisch/ollama) | ||||
|   | ||||
| @@ -7,6 +7,7 @@ import ( | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"net" | ||||
| 	"net/http" | ||||
| 	"net/url" | ||||
| 	"os" | ||||
| @@ -18,14 +19,11 @@ import ( | ||||
|  | ||||
| const DefaultHost = "127.0.0.1:11434" | ||||
|  | ||||
| var ( | ||||
| 	envHost = os.Getenv("OLLAMA_HOST") | ||||
| ) | ||||
| var envHost = os.Getenv("OLLAMA_HOST") | ||||
|  | ||||
| type Client struct { | ||||
| 	Base    url.URL | ||||
| 	HTTP    http.Client | ||||
| 	Headers http.Header | ||||
| 	base *url.URL | ||||
| 	http http.Client | ||||
| } | ||||
|  | ||||
| func checkError(resp *http.Response, body []byte) error { | ||||
| @@ -44,34 +42,44 @@ func checkError(resp *http.Response, body []byte) error { | ||||
| 	return apiError | ||||
| } | ||||
|  | ||||
| // Host returns the default host to use for the client. It is determined in the following order: | ||||
| // 1. The OLLAMA_HOST environment variable | ||||
| // 2. The default host (localhost:11434) | ||||
| func Host() string { | ||||
| 	if envHost != "" { | ||||
| 		return envHost | ||||
| 	} | ||||
| 	return DefaultHost | ||||
| } | ||||
|  | ||||
| // FromEnv creates a new client using Host() as the host. An error is returns | ||||
| // if the host is invalid. | ||||
| func FromEnv() (*Client, error) { | ||||
| 	h := Host() | ||||
| 	if !strings.HasPrefix(h, "http://") && !strings.HasPrefix(h, "https://") { | ||||
| 		h = "http://" + h | ||||
| func ClientFromEnvironment() (*Client, error) { | ||||
| 	scheme, hostport, ok := strings.Cut(os.Getenv("OLLAMA_HOST"), "://") | ||||
| 	if !ok { | ||||
| 		scheme, hostport = "http", os.Getenv("OLLAMA_HOST") | ||||
| 	} | ||||
|  | ||||
| 	u, err := url.Parse(h) | ||||
| 	host, port, err := net.SplitHostPort(hostport) | ||||
| 	if err != nil { | ||||
| 		return nil, fmt.Errorf("could not parse host: %w", err) | ||||
| 		host, port = "127.0.0.1", "11434" | ||||
| 		if ip := net.ParseIP(strings.Trim(os.Getenv("OLLAMA_HOST"), "[]")); ip != nil { | ||||
| 			host = ip.String() | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if u.Port() == "" { | ||||
| 		u.Host += ":11434" | ||||
| 	client := Client{ | ||||
| 		base: &url.URL{ | ||||
| 			Scheme: scheme, | ||||
| 			Host:   net.JoinHostPort(host, port), | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	return &Client{Base: *u, HTTP: http.Client{}}, nil | ||||
| 	mockRequest, err := http.NewRequest("HEAD", client.base.String(), nil) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	proxyURL, err := http.ProxyFromEnvironment(mockRequest) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	client.http = http.Client{ | ||||
| 		Transport: &http.Transport{ | ||||
| 			Proxy: http.ProxyURL(proxyURL), | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	return &client, nil | ||||
| } | ||||
|  | ||||
| func (c *Client) do(ctx context.Context, method, path string, reqData, respData any) error { | ||||
| @@ -86,7 +94,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData | ||||
| 		reqBody = bytes.NewReader(data) | ||||
| 	} | ||||
|  | ||||
| 	requestURL := c.Base.JoinPath(path) | ||||
| 	requestURL := c.base.JoinPath(path) | ||||
| 	request, err := http.NewRequestWithContext(ctx, method, requestURL.String(), reqBody) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| @@ -96,11 +104,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData | ||||
| 	request.Header.Set("Accept", "application/json") | ||||
| 	request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version())) | ||||
|  | ||||
| 	for k, v := range c.Headers { | ||||
| 		request.Header[k] = v | ||||
| 	} | ||||
|  | ||||
| 	respObj, err := c.HTTP.Do(request) | ||||
| 	respObj, err := c.http.Do(request) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| @@ -123,6 +127,8 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| const maxBufferSize = 512 * 1000 // 512KB | ||||
|  | ||||
| func (c *Client) stream(ctx context.Context, method, path string, data any, fn func([]byte) error) error { | ||||
| 	var buf *bytes.Buffer | ||||
| 	if data != nil { | ||||
| @@ -134,23 +140,26 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f | ||||
| 		buf = bytes.NewBuffer(bts) | ||||
| 	} | ||||
|  | ||||
| 	requestURL := c.Base.JoinPath(path) | ||||
| 	requestURL := c.base.JoinPath(path) | ||||
| 	request, err := http.NewRequestWithContext(ctx, method, requestURL.String(), buf) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	request.Header.Set("Content-Type", "application/json") | ||||
| 	request.Header.Set("Accept", "application/json") | ||||
| 	request.Header.Set("Accept", "application/x-ndjson") | ||||
| 	request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version())) | ||||
|  | ||||
| 	response, err := http.DefaultClient.Do(request) | ||||
| 	response, err := c.http.Do(request) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer response.Body.Close() | ||||
|  | ||||
| 	scanner := bufio.NewScanner(response.Body) | ||||
| 	// increase the buffer size to avoid running out of space | ||||
| 	scanBuf := make([]byte, 0, maxBufferSize) | ||||
| 	scanner.Buffer(scanBuf, maxBufferSize) | ||||
| 	for scanner.Scan() { | ||||
| 		var errorResponse struct { | ||||
| 			Error string `json:"error,omitempty"` | ||||
|   | ||||
							
								
								
									
										225
									
								
								api/client.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										225
									
								
								api/client.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,225 @@ | ||||
| import os | ||||
| import json | ||||
| import requests | ||||
|  | ||||
| BASE_URL = os.environ.get('OLLAMA_HOST', 'http://localhost:11434') | ||||
|  | ||||
| # Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses. | ||||
| # The final response object will include statistics and additional data from the request. Use the callback function to override | ||||
| # the default handler. | ||||
| def generate(model_name, prompt, system=None, template=None, context=None, options=None, callback=None): | ||||
|     try: | ||||
|         url = f"{BASE_URL}/api/generate" | ||||
|         payload = { | ||||
|             "model": model_name,  | ||||
|             "prompt": prompt,  | ||||
|             "system": system,  | ||||
|             "template": template,  | ||||
|             "context": context,  | ||||
|             "options": options | ||||
|         } | ||||
|          | ||||
|         # Remove keys with None values | ||||
|         payload = {k: v for k, v in payload.items() if v is not None} | ||||
|          | ||||
|         with requests.post(url, json=payload, stream=True) as response: | ||||
|             response.raise_for_status() | ||||
|              | ||||
|             # Creating a variable to hold the context history of the final chunk | ||||
|             final_context = None | ||||
|              | ||||
|             # Variable to hold concatenated response strings if no callback is provided | ||||
|             full_response = "" | ||||
|  | ||||
|             # Iterating over the response line by line and displaying the details | ||||
|             for line in response.iter_lines(): | ||||
|                 if line: | ||||
|                     # Parsing each line (JSON chunk) and extracting the details | ||||
|                     chunk = json.loads(line) | ||||
|                      | ||||
|                     # If a callback function is provided, call it with the chunk | ||||
|                     if callback: | ||||
|                         callback(chunk) | ||||
|                     else: | ||||
|                         # If this is not the last chunk, add the "response" field value to full_response and print it | ||||
|                         if not chunk.get("done"): | ||||
|                             response_piece = chunk.get("response", "") | ||||
|                             full_response += response_piece | ||||
|                             print(response_piece, end="", flush=True) | ||||
|                      | ||||
|                     # Check if it's the last chunk (done is true) | ||||
|                     if chunk.get("done"): | ||||
|                         final_context = chunk.get("context") | ||||
|              | ||||
|             # Return the full response and the final context | ||||
|             return full_response, final_context | ||||
|     except requests.exceptions.RequestException as e: | ||||
|         print(f"An error occurred: {e}") | ||||
|         return None, None | ||||
|  | ||||
| # Create a model from a Modelfile. Use the callback function to override the default handler. | ||||
| def create(model_name, model_path, callback=None): | ||||
|     try: | ||||
|         url = f"{BASE_URL}/api/create" | ||||
|         payload = {"name": model_name, "path": model_path} | ||||
|          | ||||
|         # Making a POST request with the stream parameter set to True to handle streaming responses | ||||
|         with requests.post(url, json=payload, stream=True) as response: | ||||
|             response.raise_for_status() | ||||
|  | ||||
|             # Iterating over the response line by line and displaying the status | ||||
|             for line in response.iter_lines(): | ||||
|                 if line: | ||||
|                     # Parsing each line (JSON chunk) and extracting the status | ||||
|                     chunk = json.loads(line) | ||||
|  | ||||
|                     if callback: | ||||
|                         callback(chunk) | ||||
|                     else: | ||||
|                         print(f"Status: {chunk.get('status')}") | ||||
|     except requests.exceptions.RequestException as e: | ||||
|         print(f"An error occurred: {e}") | ||||
|  | ||||
| # Pull a model from a the model registry. Cancelled pulls are resumed from where they left off, and multiple | ||||
| # calls to will share the same download progress. Use the callback function to override the default handler. | ||||
| def pull(model_name, insecure=False, callback=None): | ||||
|     try: | ||||
|         url = f"{BASE_URL}/api/pull" | ||||
|         payload = { | ||||
|             "name": model_name, | ||||
|             "insecure": insecure | ||||
|         } | ||||
|  | ||||
|         # Making a POST request with the stream parameter set to True to handle streaming responses | ||||
|         with requests.post(url, json=payload, stream=True) as response: | ||||
|             response.raise_for_status() | ||||
|  | ||||
|             # Iterating over the response line by line and displaying the details | ||||
|             for line in response.iter_lines(): | ||||
|                 if line: | ||||
|                     # Parsing each line (JSON chunk) and extracting the details | ||||
|                     chunk = json.loads(line) | ||||
|  | ||||
|                     # If a callback function is provided, call it with the chunk | ||||
|                     if callback: | ||||
|                         callback(chunk) | ||||
|                     else: | ||||
|                         # Print the status message directly to the console | ||||
|                         print(chunk.get('status', ''), end='', flush=True) | ||||
|                      | ||||
|                     # If there's layer data, you might also want to print that (adjust as necessary) | ||||
|                     if 'digest' in chunk: | ||||
|                         print(f" - Digest: {chunk['digest']}", end='', flush=True) | ||||
|                         print(f" - Total: {chunk['total']}", end='', flush=True) | ||||
|                         print(f" - Completed: {chunk['completed']}", end='\n', flush=True) | ||||
|                     else: | ||||
|                         print() | ||||
|     except requests.exceptions.RequestException as e: | ||||
|         print(f"An error occurred: {e}") | ||||
|  | ||||
| # Push a model to the model registry. Use the callback function to override the default handler. | ||||
| def push(model_name, insecure=False, callback=None): | ||||
|     try: | ||||
|         url = f"{BASE_URL}/api/push" | ||||
|         payload = { | ||||
|             "name": model_name, | ||||
|             "insecure": insecure | ||||
|         } | ||||
|  | ||||
|         # Making a POST request with the stream parameter set to True to handle streaming responses | ||||
|         with requests.post(url, json=payload, stream=True) as response: | ||||
|             response.raise_for_status() | ||||
|  | ||||
|             # Iterating over the response line by line and displaying the details | ||||
|             for line in response.iter_lines(): | ||||
|                 if line: | ||||
|                     # Parsing each line (JSON chunk) and extracting the details | ||||
|                     chunk = json.loads(line) | ||||
|  | ||||
|                     # If a callback function is provided, call it with the chunk | ||||
|                     if callback: | ||||
|                         callback(chunk) | ||||
|                     else: | ||||
|                         # Print the status message directly to the console | ||||
|                         print(chunk.get('status', ''), end='', flush=True) | ||||
|                      | ||||
|                     # If there's layer data, you might also want to print that (adjust as necessary) | ||||
|                     if 'digest' in chunk: | ||||
|                         print(f" - Digest: {chunk['digest']}", end='', flush=True) | ||||
|                         print(f" - Total: {chunk['total']}", end='', flush=True) | ||||
|                         print(f" - Completed: {chunk['completed']}", end='\n', flush=True) | ||||
|                     else: | ||||
|                         print() | ||||
|     except requests.exceptions.RequestException as e: | ||||
|         print(f"An error occurred: {e}") | ||||
|  | ||||
| # List models that are available locally. | ||||
| def list(): | ||||
|     try: | ||||
|         response = requests.get(f"{BASE_URL}/api/tags") | ||||
|         response.raise_for_status() | ||||
|         data = response.json() | ||||
|         models = data.get('models', []) | ||||
|         return models | ||||
|  | ||||
|     except requests.exceptions.RequestException as e: | ||||
|         print(f"An error occurred: {e}") | ||||
|         return None | ||||
|  | ||||
| # Copy a model. Creates a model with another name from an existing model. | ||||
| def copy(source, destination): | ||||
|     try: | ||||
|         # Create the JSON payload | ||||
|         payload = { | ||||
|             "source": source, | ||||
|             "destination": destination | ||||
|         } | ||||
|          | ||||
|         response = requests.post(f"{BASE_URL}/api/copy", json=payload) | ||||
|         response.raise_for_status() | ||||
|          | ||||
|         # If the request was successful, return a message indicating that the copy was successful | ||||
|         return "Copy successful" | ||||
|  | ||||
|     except requests.exceptions.RequestException as e: | ||||
|         print(f"An error occurred: {e}") | ||||
|         return None | ||||
|  | ||||
| # Delete a model and its data. | ||||
| def delete(model_name): | ||||
|     try: | ||||
|         url = f"{BASE_URL}/api/delete" | ||||
|         payload = {"name": model_name} | ||||
|         response = requests.delete(url, json=payload) | ||||
|         response.raise_for_status() | ||||
|         return "Delete successful" | ||||
|     except requests.exceptions.RequestException as e: | ||||
|         print(f"An error occurred: {e}") | ||||
|         return None | ||||
|  | ||||
| # Show info about a model. | ||||
| def show(model_name): | ||||
|     try: | ||||
|         url = f"{BASE_URL}/api/show" | ||||
|         payload = {"name": model_name} | ||||
|         response = requests.post(url, json=payload) | ||||
|         response.raise_for_status() | ||||
|          | ||||
|         # Parse the JSON response and return it | ||||
|         data = response.json() | ||||
|         return data | ||||
|     except requests.exceptions.RequestException as e: | ||||
|         print(f"An error occurred: {e}") | ||||
|         return None | ||||
|  | ||||
| def heartbeat(): | ||||
|     try: | ||||
|         url = f"{BASE_URL}/" | ||||
|         response = requests.head(url) | ||||
|         response.raise_for_status() | ||||
|         return "Ollama is running" | ||||
|     except requests.exceptions.RequestException as e: | ||||
|         print(f"An error occurred: {e}") | ||||
|         return "Ollama is not running" | ||||
|  | ||||
|  | ||||
							
								
								
									
										81
									
								
								api/types.go
									
									
									
									
									
								
							
							
						
						
									
										81
									
								
								api/types.go
									
									
									
									
									
								
							| @@ -37,6 +37,7 @@ type GenerateRequest struct { | ||||
| 	System   string `json:"system"` | ||||
| 	Template string `json:"template"` | ||||
| 	Context  []int  `json:"context,omitempty"` | ||||
| 	Stream   *bool  `json:"stream,omitempty"` | ||||
|  | ||||
| 	Options map[string]interface{} `json:"options"` | ||||
| } | ||||
| @@ -53,8 +54,9 @@ type EmbeddingResponse struct { | ||||
| } | ||||
|  | ||||
| type CreateRequest struct { | ||||
| 	Name string `json:"name"` | ||||
| 	Path string `json:"path"` | ||||
| 	Name   string `json:"name"` | ||||
| 	Path   string `json:"path"` | ||||
| 	Stream *bool  `json:"stream,omitempty"` | ||||
| } | ||||
|  | ||||
| type DeleteRequest struct { | ||||
| @@ -83,13 +85,14 @@ type PullRequest struct { | ||||
| 	Insecure bool   `json:"insecure,omitempty"` | ||||
| 	Username string `json:"username"` | ||||
| 	Password string `json:"password"` | ||||
| 	Stream   *bool  `json:"stream,omitempty"` | ||||
| } | ||||
|  | ||||
| type ProgressResponse struct { | ||||
| 	Status    string `json:"status"` | ||||
| 	Digest    string `json:"digest,omitempty"` | ||||
| 	Total     int    `json:"total,omitempty"` | ||||
| 	Completed int    `json:"completed,omitempty"` | ||||
| 	Total     int64  `json:"total,omitempty"` | ||||
| 	Completed int64  `json:"completed,omitempty"` | ||||
| } | ||||
|  | ||||
| type PushRequest struct { | ||||
| @@ -97,6 +100,7 @@ type PushRequest struct { | ||||
| 	Insecure bool   `json:"insecure,omitempty"` | ||||
| 	Username string `json:"username"` | ||||
| 	Password string `json:"password"` | ||||
| 	Stream   *bool  `json:"stream,omitempty"` | ||||
| } | ||||
|  | ||||
| type ListResponse struct { | ||||
| @@ -106,7 +110,7 @@ type ListResponse struct { | ||||
| type ModelResponse struct { | ||||
| 	Name       string    `json:"name"` | ||||
| 	ModifiedAt time.Time `json:"modified_at"` | ||||
| 	Size       int       `json:"size"` | ||||
| 	Size       int64     `json:"size"` | ||||
| 	Digest     string    `json:"digest"` | ||||
| } | ||||
|  | ||||
| @@ -117,7 +121,7 @@ type TokenResponse struct { | ||||
| type GenerateResponse struct { | ||||
| 	Model     string    `json:"model"` | ||||
| 	CreatedAt time.Time `json:"created_at"` | ||||
| 	Response  string    `json:"response,omitempty"` | ||||
| 	Response  string    `json:"response"` | ||||
|  | ||||
| 	Done    bool  `json:"done"` | ||||
| 	Context []int `json:"context,omitempty"` | ||||
| @@ -201,6 +205,8 @@ type Options struct { | ||||
| 	NumThread int `json:"num_thread,omitempty"` | ||||
| } | ||||
|  | ||||
| var ErrInvalidOpts = fmt.Errorf("invalid options") | ||||
|  | ||||
| func (opts *Options) FromMap(m map[string]interface{}) error { | ||||
| 	valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct | ||||
| 	typeOpts := reflect.TypeOf(opts).Elem()   // types of the fields in the options struct | ||||
| @@ -214,6 +220,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error { | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	invalidOpts := []string{} | ||||
| 	for key, val := range m { | ||||
| 		if opt, ok := jsonOpts[key]; ok { | ||||
| 			field := valueOpts.FieldByName(opt.Name) | ||||
| @@ -231,12 +238,12 @@ func (opts *Options) FromMap(m map[string]interface{}) error { | ||||
| 						// when JSON unmarshals numbers, it uses float64, not int | ||||
| 						field.SetInt(int64(t)) | ||||
| 					default: | ||||
| 						log.Printf("could not convert model parameter %v to int, skipped", key) | ||||
| 						log.Printf("could not convert model parameter %v of type %T to int, skipped", key, val) | ||||
| 					} | ||||
| 				case reflect.Bool: | ||||
| 					val, ok := val.(bool) | ||||
| 					if !ok { | ||||
| 						log.Printf("could not convert model parameter %v to bool, skipped", key) | ||||
| 						log.Printf("could not convert model parameter %v of type %T to bool, skipped", key, val) | ||||
| 						continue | ||||
| 					} | ||||
| 					field.SetBool(val) | ||||
| @@ -244,14 +251,14 @@ func (opts *Options) FromMap(m map[string]interface{}) error { | ||||
| 					// JSON unmarshals to float64 | ||||
| 					val, ok := val.(float64) | ||||
| 					if !ok { | ||||
| 						log.Printf("could not convert model parameter %v to float32, skipped", key) | ||||
| 						log.Printf("could not convert model parameter %v of type %T to float32, skipped", key, val) | ||||
| 						continue | ||||
| 					} | ||||
| 					field.SetFloat(val) | ||||
| 				case reflect.String: | ||||
| 					val, ok := val.(string) | ||||
| 					if !ok { | ||||
| 						log.Printf("could not convert model parameter %v to string, skipped", key) | ||||
| 						log.Printf("could not convert model parameter %v of type %T to string, skipped", key, val) | ||||
| 						continue | ||||
| 					} | ||||
| 					field.SetString(val) | ||||
| @@ -259,7 +266,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error { | ||||
| 					// JSON unmarshals to []interface{}, not []string | ||||
| 					val, ok := val.([]interface{}) | ||||
| 					if !ok { | ||||
| 						log.Printf("could not convert model parameter %v to slice, skipped", key) | ||||
| 						log.Printf("could not convert model parameter %v of type %T to slice, skipped", key, val) | ||||
| 						continue | ||||
| 					} | ||||
| 					// convert []interface{} to []string | ||||
| @@ -267,7 +274,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error { | ||||
| 					for i, item := range val { | ||||
| 						str, ok := item.(string) | ||||
| 						if !ok { | ||||
| 							log.Printf("could not convert model parameter %v to slice of strings, skipped", key) | ||||
| 							log.Printf("could not convert model parameter %v of type %T to slice of strings, skipped", key, item) | ||||
| 							continue | ||||
| 						} | ||||
| 						slice[i] = str | ||||
| @@ -277,45 +284,51 @@ func (opts *Options) FromMap(m map[string]interface{}) error { | ||||
| 					return fmt.Errorf("unknown type loading config params: %v", field.Kind()) | ||||
| 				} | ||||
| 			} | ||||
| 		} else { | ||||
| 			invalidOpts = append(invalidOpts, key) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if len(invalidOpts) > 0 { | ||||
| 		return fmt.Errorf("%w: %v", ErrInvalidOpts, strings.Join(invalidOpts, ", ")) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func DefaultOptions() Options { | ||||
| 	return Options{ | ||||
| 		Seed: -1, | ||||
|  | ||||
| 		UseNUMA: false, | ||||
|  | ||||
| 		NumCtx:             2048, | ||||
| 		NumKeep:            -1, | ||||
| 		NumBatch:           512, | ||||
| 		NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically | ||||
| 		NumGQA:             1, | ||||
| 		LowVRAM:            false, | ||||
| 		F16KV:              true, | ||||
| 		UseMMap:            true, | ||||
| 		UseMLock:           false, | ||||
| 		RopeFrequencyBase:  10000.0, | ||||
| 		RopeFrequencyScale: 1.0, | ||||
| 		EmbeddingOnly:      true, | ||||
|  | ||||
| 		RepeatLastN:      64, | ||||
| 		RepeatPenalty:    1.1, | ||||
| 		FrequencyPenalty: 0.0, | ||||
| 		PresencePenalty:  0.0, | ||||
| 		// options set on request to runner | ||||
| 		NumPredict:       -1, | ||||
| 		NumKeep:          -1, | ||||
| 		Temperature:      0.8, | ||||
| 		TopK:             40, | ||||
| 		TopP:             0.9, | ||||
| 		TFSZ:             1.0, | ||||
| 		TypicalP:         1.0, | ||||
| 		RepeatLastN:      64, | ||||
| 		RepeatPenalty:    1.1, | ||||
| 		PresencePenalty:  0.0, | ||||
| 		FrequencyPenalty: 0.0, | ||||
| 		Mirostat:         0, | ||||
| 		MirostatTau:      5.0, | ||||
| 		MirostatEta:      0.1, | ||||
| 		PenalizeNewline:  true, | ||||
| 		Seed:             -1, | ||||
|  | ||||
| 		NumThread: 0, // let the runtime decide | ||||
| 		// options set when the model is loaded | ||||
| 		NumCtx:             2048, | ||||
| 		RopeFrequencyBase:  10000.0, | ||||
| 		RopeFrequencyScale: 1.0, | ||||
| 		NumBatch:           512, | ||||
| 		NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically | ||||
| 		NumGQA:             1, | ||||
| 		NumThread:          0, // let the runtime decide | ||||
| 		LowVRAM:            false, | ||||
| 		F16KV:              true, | ||||
| 		UseMLock:           false, | ||||
| 		UseMMap:            true, | ||||
| 		UseNUMA:            false, | ||||
| 		EmbeddingOnly:      true, | ||||
| 	} | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -5,7 +5,7 @@ import winston from 'winston' | ||||
| import 'winston-daily-rotate-file' | ||||
| import * as path from 'path' | ||||
|  | ||||
| import { analytics, id } from './telemetry' | ||||
| import { v4 as uuidv4 } from 'uuid' | ||||
| import { installed } from './install' | ||||
|  | ||||
| require('@electron/remote/main').initialize() | ||||
| @@ -164,11 +164,11 @@ app.on('before-quit', () => { | ||||
|  | ||||
| function init() { | ||||
|   if (app.isPackaged) { | ||||
|     heartbeat() | ||||
|     autoUpdater.checkForUpdates() | ||||
|     setInterval(() => { | ||||
|       heartbeat() | ||||
|       autoUpdater.checkForUpdates() | ||||
|       if (!updateAvailable) { | ||||
|         autoUpdater.checkForUpdates() | ||||
|       } | ||||
|     }, 60 * 60 * 1000) | ||||
|   } | ||||
|  | ||||
| @@ -234,28 +234,26 @@ app.on('window-all-closed', () => { | ||||
|   } | ||||
| }) | ||||
|  | ||||
| // In this file you can include the rest of your app's specific main process | ||||
| // code. You can also put them in separate files and import them here. | ||||
| let aid = '' | ||||
| try { | ||||
|   aid = id() | ||||
| } catch (e) {} | ||||
| function id(): string { | ||||
|   const id = store.get('id') as string | ||||
|  | ||||
| autoUpdater.setFeedURL({ | ||||
|   url: `https://ollama.ai/api/update?os=${process.platform}&arch=${process.arch}&version=${app.getVersion()}&id=${aid}`, | ||||
| }) | ||||
|   if (id) { | ||||
|     return id | ||||
|   } | ||||
|  | ||||
| async function heartbeat() { | ||||
|   analytics.track({ | ||||
|     anonymousId: aid, | ||||
|     event: 'heartbeat', | ||||
|     properties: { | ||||
|       version: app.getVersion(), | ||||
|     }, | ||||
|   }) | ||||
|   const uuid = uuidv4() | ||||
|   store.set('id', uuid) | ||||
|   return uuid | ||||
| } | ||||
|  | ||||
| autoUpdater.setFeedURL({ | ||||
|   url: `https://ollama.ai/api/update?os=${process.platform}&arch=${ | ||||
|     process.arch | ||||
|   }&version=${app.getVersion()}&id=${id()}`, | ||||
| }) | ||||
|  | ||||
| autoUpdater.on('error', e => { | ||||
|   logger.error(`update check failed - ${e.message}`) | ||||
|   console.error(`update check failed - ${e.message}`) | ||||
| }) | ||||
|  | ||||
|   | ||||
| @@ -1,19 +0,0 @@ | ||||
| import { Analytics } from '@segment/analytics-node' | ||||
| import { v4 as uuidv4 } from 'uuid' | ||||
| import Store from 'electron-store' | ||||
|  | ||||
| const store = new Store() | ||||
|  | ||||
| export const analytics = new Analytics({ writeKey: process.env.TELEMETRY_WRITE_KEY || '<empty>' }) | ||||
|  | ||||
| export function id(): string { | ||||
|   const id = store.get('id') as string | ||||
|  | ||||
|   if (id) { | ||||
|     return id | ||||
|   } | ||||
|  | ||||
|   const uuid = uuidv4() | ||||
|   store.set('id', uuid) | ||||
|   return uuid | ||||
| } | ||||
							
								
								
									
										448
									
								
								cmd/cmd.go
									
									
									
									
									
								
							
							
						
						
									
										448
									
								
								cmd/cmd.go
									
									
									
									
									
								
							| @@ -11,20 +11,21 @@ import ( | ||||
| 	"io" | ||||
| 	"log" | ||||
| 	"net" | ||||
| 	"net/http" | ||||
| 	"os" | ||||
| 	"os/exec" | ||||
| 	"path" | ||||
| 	"os/signal" | ||||
| 	"path/filepath" | ||||
| 	"runtime" | ||||
| 	"strings" | ||||
| 	"syscall" | ||||
| 	"time" | ||||
|  | ||||
| 	"github.com/chzyer/readline" | ||||
| 	"github.com/dustin/go-humanize" | ||||
| 	"github.com/olekukonko/tablewriter" | ||||
| 	"github.com/pdevine/readline" | ||||
| 	"github.com/spf13/cobra" | ||||
| 	"golang.org/x/crypto/ssh" | ||||
| 	"golang.org/x/term" | ||||
|  | ||||
| 	"github.com/jmorganca/ollama/api" | ||||
| 	"github.com/jmorganca/ollama/format" | ||||
| @@ -33,6 +34,26 @@ import ( | ||||
| 	"github.com/jmorganca/ollama/version" | ||||
| ) | ||||
|  | ||||
| type Painter struct { | ||||
| 	IsMultiLine bool | ||||
| } | ||||
|  | ||||
| func (p Painter) Paint(line []rune, _ int) []rune { | ||||
| 	termType := os.Getenv("TERM") | ||||
| 	if termType == "xterm-256color" && len(line) == 0 { | ||||
| 		var prompt string | ||||
| 		if p.IsMultiLine { | ||||
| 			prompt = "Use \"\"\" to end multi-line input" | ||||
| 		} else { | ||||
| 			prompt = "Send a message (/? for help)" | ||||
| 		} | ||||
| 		return []rune(fmt.Sprintf("\033[38;5;245m%s\033[%dD\033[0m", prompt, len(prompt))) | ||||
| 	} | ||||
| 	// add a space and a backspace to prevent the cursor from walking up the screen | ||||
| 	line = append(line, []rune(" \b")...) | ||||
| 	return line | ||||
| } | ||||
|  | ||||
| func CreateHandler(cmd *cobra.Command, args []string) error { | ||||
| 	filename, _ := cmd.Flags().GetString("file") | ||||
| 	filename, err := filepath.Abs(filename) | ||||
| @@ -40,7 +61,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	client, err := api.FromEnv() | ||||
| 	client, err := api.ClientFromEnvironment() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| @@ -59,18 +80,18 @@ func CreateHandler(cmd *cobra.Command, args []string) error { | ||||
| 			currentDigest = resp.Digest | ||||
| 			switch { | ||||
| 			case strings.Contains(resp.Status, "embeddings"): | ||||
| 				bar = progressbar.Default(int64(resp.Total), resp.Status) | ||||
| 				bar.Set(resp.Completed) | ||||
| 				bar = progressbar.Default(resp.Total, resp.Status) | ||||
| 				bar.Set64(resp.Completed) | ||||
| 			default: | ||||
| 				// pulling | ||||
| 				bar = progressbar.DefaultBytes( | ||||
| 					int64(resp.Total), | ||||
| 					resp.Total, | ||||
| 					resp.Status, | ||||
| 				) | ||||
| 				bar.Set(resp.Completed) | ||||
| 				bar.Set64(resp.Completed) | ||||
| 			} | ||||
| 		} else if resp.Digest == currentDigest && resp.Digest != "" { | ||||
| 			bar.Set(resp.Completed) | ||||
| 			bar.Set64(resp.Completed) | ||||
| 		} else { | ||||
| 			currentDigest = "" | ||||
| 			if spinner != nil { | ||||
| @@ -98,39 +119,24 @@ func CreateHandler(cmd *cobra.Command, args []string) error { | ||||
| } | ||||
|  | ||||
| func RunHandler(cmd *cobra.Command, args []string) error { | ||||
| 	insecure, err := cmd.Flags().GetBool("insecure") | ||||
| 	client, err := api.ClientFromEnvironment() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	mp := server.ParseModelPath(args[0]) | ||||
| 	models, err := client.List(context.Background()) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	if mp.ProtocolScheme == "http" && !insecure { | ||||
| 		return fmt.Errorf("insecure protocol http") | ||||
| 	} | ||||
|  | ||||
| 	fp, err := mp.GetManifestPath(false) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	_, err = os.Stat(fp) | ||||
| 	switch { | ||||
| 	case errors.Is(err, os.ErrNotExist): | ||||
| 		if err := pull(args[0], insecure); err != nil { | ||||
| 			var apiStatusError api.StatusError | ||||
| 			if !errors.As(err, &apiStatusError) { | ||||
| 				return err | ||||
| 			} | ||||
|  | ||||
| 			if apiStatusError.StatusCode != http.StatusBadGateway { | ||||
| 				return err | ||||
| 			} | ||||
| 	canonicalModelPath := server.ParseModelPath(args[0]) | ||||
| 	for _, model := range models.Models { | ||||
| 		if model.Name == canonicalModelPath.GetShortTagname() { | ||||
| 			return RunGenerate(cmd, args) | ||||
| 		} | ||||
| 	case err != nil: | ||||
| 	} | ||||
|  | ||||
| 	if err := PullHandler(cmd, args); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| @@ -138,7 +144,7 @@ func RunHandler(cmd *cobra.Command, args []string) error { | ||||
| } | ||||
|  | ||||
| func PushHandler(cmd *cobra.Command, args []string) error { | ||||
| 	client, err := api.FromEnv() | ||||
| 	client, err := api.ClientFromEnvironment() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| @@ -156,13 +162,13 @@ func PushHandler(cmd *cobra.Command, args []string) error { | ||||
| 		if resp.Digest != currentDigest && resp.Digest != "" { | ||||
| 			currentDigest = resp.Digest | ||||
| 			bar = progressbar.DefaultBytes( | ||||
| 				int64(resp.Total), | ||||
| 				resp.Total, | ||||
| 				fmt.Sprintf("pushing %s...", resp.Digest[7:19]), | ||||
| 			) | ||||
|  | ||||
| 			bar.Set(resp.Completed) | ||||
| 			bar.Set64(resp.Completed) | ||||
| 		} else if resp.Digest == currentDigest && resp.Digest != "" { | ||||
| 			bar.Set(resp.Completed) | ||||
| 			bar.Set64(resp.Completed) | ||||
| 		} else { | ||||
| 			currentDigest = "" | ||||
| 			fmt.Println(resp.Status) | ||||
| @@ -182,7 +188,7 @@ func PushHandler(cmd *cobra.Command, args []string) error { | ||||
| } | ||||
|  | ||||
| func ListHandler(cmd *cobra.Command, args []string) error { | ||||
| 	client, err := api.FromEnv() | ||||
| 	client, err := api.ClientFromEnvironment() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| @@ -215,7 +221,7 @@ func ListHandler(cmd *cobra.Command, args []string) error { | ||||
| } | ||||
|  | ||||
| func DeleteHandler(cmd *cobra.Command, args []string) error { | ||||
| 	client, err := api.FromEnv() | ||||
| 	client, err := api.ClientFromEnvironment() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| @@ -231,7 +237,7 @@ func DeleteHandler(cmd *cobra.Command, args []string) error { | ||||
| } | ||||
|  | ||||
| func ShowHandler(cmd *cobra.Command, args []string) error { | ||||
| 	client, err := api.FromEnv() | ||||
| 	client, err := api.ClientFromEnvironment() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| @@ -309,7 +315,7 @@ func ShowHandler(cmd *cobra.Command, args []string) error { | ||||
| } | ||||
|  | ||||
| func CopyHandler(cmd *cobra.Command, args []string) error { | ||||
| 	client, err := api.FromEnv() | ||||
| 	client, err := api.ClientFromEnvironment() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| @@ -332,7 +338,7 @@ func PullHandler(cmd *cobra.Command, args []string) error { | ||||
| } | ||||
|  | ||||
| func pull(model string, insecure bool) error { | ||||
| 	client, err := api.FromEnv() | ||||
| 	client, err := api.ClientFromEnvironment() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| @@ -345,13 +351,13 @@ func pull(model string, insecure bool) error { | ||||
| 		if resp.Digest != currentDigest && resp.Digest != "" { | ||||
| 			currentDigest = resp.Digest | ||||
| 			bar = progressbar.DefaultBytes( | ||||
| 				int64(resp.Total), | ||||
| 				resp.Total, | ||||
| 				fmt.Sprintf("pulling %s...", resp.Digest[7:19]), | ||||
| 			) | ||||
|  | ||||
| 			bar.Set(resp.Completed) | ||||
| 			bar.Set64(resp.Completed) | ||||
| 		} else if resp.Digest == currentDigest && resp.Digest != "" { | ||||
| 			bar.Set(resp.Completed) | ||||
| 			bar.Set64(resp.Completed) | ||||
| 		} else { | ||||
| 			currentDigest = "" | ||||
| 			fmt.Println(resp.Status) | ||||
| @@ -374,7 +380,20 @@ func pull(model string, insecure bool) error { | ||||
| func RunGenerate(cmd *cobra.Command, args []string) error { | ||||
| 	if len(args) > 1 { | ||||
| 		// join all args into a single prompt | ||||
| 		return generate(cmd, args[0], strings.Join(args[1:], " ")) | ||||
| 		wordWrap := false | ||||
| 		if term.IsTerminal(int(os.Stdout.Fd())) { | ||||
| 			wordWrap = true | ||||
| 		} | ||||
|  | ||||
| 		nowrap, err := cmd.Flags().GetBool("nowordwrap") | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		if nowrap { | ||||
| 			wordWrap = false | ||||
| 		} | ||||
|  | ||||
| 		return generate(cmd, args[0], strings.Join(args[1:], " "), wordWrap) | ||||
| 	} | ||||
|  | ||||
| 	if readline.IsTerminal(int(os.Stdin.Fd())) { | ||||
| @@ -386,71 +405,110 @@ func RunGenerate(cmd *cobra.Command, args []string) error { | ||||
|  | ||||
| type generateContextKey string | ||||
|  | ||||
| func generate(cmd *cobra.Command, model, prompt string) error { | ||||
| 	if len(strings.TrimSpace(prompt)) > 0 { | ||||
| 		client, err := api.FromEnv() | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error { | ||||
| 	client, err := api.ClientFromEnvironment() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	spinner := NewSpinner("") | ||||
| 	go spinner.Spin(60 * time.Millisecond) | ||||
|  | ||||
| 	var latest api.GenerateResponse | ||||
|  | ||||
| 	generateContext, ok := cmd.Context().Value(generateContextKey("context")).([]int) | ||||
| 	if !ok { | ||||
| 		generateContext = []int{} | ||||
| 	} | ||||
|  | ||||
| 	termWidth, _, err := term.GetSize(int(0)) | ||||
| 	if err != nil { | ||||
| 		wordWrap = false | ||||
| 	} | ||||
|  | ||||
| 	cancelCtx, cancel := context.WithCancel(context.Background()) | ||||
| 	defer cancel() | ||||
|  | ||||
| 	sigChan := make(chan os.Signal, 1) | ||||
| 	signal.Notify(sigChan, syscall.SIGINT) | ||||
| 	var abort bool | ||||
|  | ||||
| 	go func() { | ||||
| 		<-sigChan | ||||
| 		cancel() | ||||
| 		abort = true | ||||
| 	}() | ||||
|  | ||||
| 	var currentLineLength int | ||||
| 	var wordBuffer string | ||||
|  | ||||
| 	request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext} | ||||
| 	fn := func(response api.GenerateResponse) error { | ||||
| 		if !spinner.IsFinished() { | ||||
| 			spinner.Finish() | ||||
| 		} | ||||
|  | ||||
| 		spinner := NewSpinner("") | ||||
| 		go spinner.Spin(60 * time.Millisecond) | ||||
| 		latest = response | ||||
|  | ||||
| 		var latest api.GenerateResponse | ||||
| 		if wordWrap { | ||||
| 			for _, ch := range response.Response { | ||||
| 				if currentLineLength+1 > termWidth-5 { | ||||
| 					// backtrack the length of the last word and clear to the end of the line | ||||
| 					fmt.Printf("\x1b[%dD\x1b[K\n", len(wordBuffer)) | ||||
| 					fmt.Printf("%s%c", wordBuffer, ch) | ||||
| 					currentLineLength = len(wordBuffer) + 1 | ||||
| 				} else { | ||||
| 					fmt.Print(string(ch)) | ||||
| 					currentLineLength += 1 | ||||
|  | ||||
| 		generateContext, ok := cmd.Context().Value(generateContextKey("context")).([]int) | ||||
| 		if !ok { | ||||
| 			generateContext = []int{} | ||||
| 		} | ||||
|  | ||||
| 		request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext} | ||||
| 		fn := func(response api.GenerateResponse) error { | ||||
| 			if !spinner.IsFinished() { | ||||
| 				spinner.Finish() | ||||
| 					switch ch { | ||||
| 					case ' ': | ||||
| 						wordBuffer = "" | ||||
| 					case '\n': | ||||
| 						currentLineLength = 0 | ||||
| 					default: | ||||
| 						wordBuffer += string(ch) | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
|  | ||||
| 			latest = response | ||||
|  | ||||
| 		} else { | ||||
| 			fmt.Print(response.Response) | ||||
| 		} | ||||
|  | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	if err := client.Generate(cancelCtx, &request, fn); err != nil { | ||||
| 		if strings.Contains(err.Error(), "context canceled") && abort { | ||||
| 			spinner.Finish() | ||||
| 			return nil | ||||
| 		} | ||||
|  | ||||
| 		if err := client.Generate(context.Background(), &request, fn); err != nil { | ||||
| 			if strings.Contains(err.Error(), "failed to load model") { | ||||
| 				// tell the user to check the server log, if it exists locally | ||||
| 				home, nestedErr := os.UserHomeDir() | ||||
| 				if nestedErr != nil { | ||||
| 					// return the original error | ||||
| 					return err | ||||
| 				} | ||||
| 				logPath := filepath.Join(home, ".ollama", "logs", "server.log") | ||||
| 				if _, nestedErr := os.Stat(logPath); nestedErr == nil { | ||||
| 					err = fmt.Errorf("%w\nFor more details, check the error logs at %s", err, logPath) | ||||
| 				} | ||||
| 			} | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		fmt.Println() | ||||
| 		fmt.Println() | ||||
|  | ||||
| 		if !latest.Done { | ||||
| 			return errors.New("unexpected end of response") | ||||
| 		} | ||||
|  | ||||
| 		verbose, err := cmd.Flags().GetBool("verbose") | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		if verbose { | ||||
| 			latest.Summary() | ||||
| 		} | ||||
|  | ||||
| 		ctx := cmd.Context() | ||||
| 		ctx = context.WithValue(ctx, generateContextKey("context"), latest.Context) | ||||
| 		cmd.SetContext(ctx) | ||||
| 		return err | ||||
| 	} | ||||
| 	if prompt != "" { | ||||
| 		fmt.Println() | ||||
| 		fmt.Println() | ||||
| 	} | ||||
|  | ||||
| 	if !latest.Done { | ||||
| 		if abort { | ||||
| 			return nil | ||||
| 		} | ||||
| 		return errors.New("unexpected end of response") | ||||
| 	} | ||||
|  | ||||
| 	verbose, err := cmd.Flags().GetBool("verbose") | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	if verbose { | ||||
| 		latest.Summary() | ||||
| 	} | ||||
|  | ||||
| 	ctx := cmd.Context() | ||||
| 	ctx = context.WithValue(ctx, generateContextKey("context"), latest.Context) | ||||
| 	cmd.SetContext(ctx) | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
| @@ -461,19 +519,21 @@ func generateInteractive(cmd *cobra.Command, model string) error { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	// load the model | ||||
| 	if err := generate(cmd, model, "", false); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	completer := readline.NewPrefixCompleter( | ||||
| 		readline.PcItem("/help"), | ||||
| 		readline.PcItem("/list"), | ||||
| 		readline.PcItem("/set", | ||||
| 			readline.PcItem("history"), | ||||
| 			readline.PcItem("nohistory"), | ||||
| 			readline.PcItem("wordwrap"), | ||||
| 			readline.PcItem("nowordwrap"), | ||||
| 			readline.PcItem("verbose"), | ||||
| 			readline.PcItem("quiet"), | ||||
| 			readline.PcItem("mode", | ||||
| 				readline.PcItem("vim"), | ||||
| 				readline.PcItem("emacs"), | ||||
| 				readline.PcItem("default"), | ||||
| 			), | ||||
| 		), | ||||
| 		readline.PcItem("/show", | ||||
| 			readline.PcItem("license"), | ||||
| @@ -487,11 +547,41 @@ func generateInteractive(cmd *cobra.Command, model string) error { | ||||
| 	) | ||||
|  | ||||
| 	usage := func() { | ||||
| 		fmt.Fprintln(os.Stderr, "commands:") | ||||
| 		fmt.Fprintln(os.Stderr, completer.Tree("  ")) | ||||
| 		fmt.Fprintln(os.Stderr, "Available Commands:") | ||||
| 		fmt.Fprintln(os.Stderr, "  /set         Set session variables") | ||||
| 		fmt.Fprintln(os.Stderr, "  /show        Show model information") | ||||
| 		fmt.Fprintln(os.Stderr, "  /bye         Exit") | ||||
| 		fmt.Fprintln(os.Stderr, "  /?, /help    Help for a command") | ||||
| 		fmt.Fprintln(os.Stderr, "") | ||||
| 		fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.") | ||||
| 		fmt.Fprintln(os.Stderr, "") | ||||
| 	} | ||||
|  | ||||
| 	usageSet := func() { | ||||
| 		fmt.Fprintln(os.Stderr, "Available Commands:") | ||||
| 		fmt.Fprintln(os.Stderr, "  /set history      Enable history") | ||||
| 		fmt.Fprintln(os.Stderr, "  /set nohistory    Disable history") | ||||
| 		fmt.Fprintln(os.Stderr, "  /set wordwrap     Enable wordwrap") | ||||
| 		fmt.Fprintln(os.Stderr, "  /set nowordwrap   Disable wordwrap") | ||||
| 		fmt.Fprintln(os.Stderr, "  /set verbose      Show LLM stats") | ||||
| 		fmt.Fprintln(os.Stderr, "  /set quiet        Disable LLM stats") | ||||
| 		fmt.Fprintln(os.Stderr, "") | ||||
| 	} | ||||
|  | ||||
| 	usageShow := func() { | ||||
| 		fmt.Fprintln(os.Stderr, "Available Commands:") | ||||
| 		fmt.Fprintln(os.Stderr, "  /show license      Show model license") | ||||
| 		fmt.Fprintln(os.Stderr, "  /show modelfile    Show Modelfile for this model") | ||||
| 		fmt.Fprintln(os.Stderr, "  /show parameters   Show parameters for this model") | ||||
| 		fmt.Fprintln(os.Stderr, "  /show system       Show system prompt") | ||||
| 		fmt.Fprintln(os.Stderr, "  /show template     Show prompt template") | ||||
| 		fmt.Fprintln(os.Stderr, "") | ||||
| 	} | ||||
|  | ||||
| 	var painter Painter | ||||
|  | ||||
| 	config := readline.Config{ | ||||
| 		Painter:      &painter, | ||||
| 		Prompt:       ">>> ", | ||||
| 		HistoryFile:  filepath.Join(home, ".ollama", "history"), | ||||
| 		AutoComplete: completer, | ||||
| @@ -503,6 +593,21 @@ func generateInteractive(cmd *cobra.Command, model string) error { | ||||
| 	} | ||||
| 	defer scanner.Close() | ||||
|  | ||||
| 	var wordWrap bool | ||||
| 	termType := os.Getenv("TERM") | ||||
| 	if termType == "xterm-256color" { | ||||
| 		wordWrap = true | ||||
| 	} | ||||
|  | ||||
| 	// override wrapping if the user turned it off | ||||
| 	nowrap, err := cmd.Flags().GetBool("nowordwrap") | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if nowrap { | ||||
| 		wordWrap = false | ||||
| 	} | ||||
|  | ||||
| 	var multiLineBuffer string | ||||
| 	var isMultiLine bool | ||||
|  | ||||
| @@ -513,7 +618,7 @@ func generateInteractive(cmd *cobra.Command, model string) error { | ||||
| 			return nil | ||||
| 		case errors.Is(err, readline.ErrInterrupt): | ||||
| 			if line == "" { | ||||
| 				return nil | ||||
| 				fmt.Println("Use Ctrl-D or /bye to exit.") | ||||
| 			} | ||||
|  | ||||
| 			continue | ||||
| @@ -527,6 +632,7 @@ func generateInteractive(cmd *cobra.Command, model string) error { | ||||
| 		case isMultiLine: | ||||
| 			if strings.HasSuffix(line, `"""`) { | ||||
| 				isMultiLine = false | ||||
| 				painter.IsMultiLine = isMultiLine | ||||
| 				multiLineBuffer += strings.TrimSuffix(line, `"""`) | ||||
| 				line = multiLineBuffer | ||||
| 				multiLineBuffer = "" | ||||
| @@ -537,6 +643,7 @@ func generateInteractive(cmd *cobra.Command, model string) error { | ||||
| 			} | ||||
| 		case strings.HasPrefix(line, `"""`): | ||||
| 			isMultiLine = true | ||||
| 			painter.IsMultiLine = isMultiLine | ||||
| 			multiLineBuffer = strings.TrimPrefix(line, `"""`) + " " | ||||
| 			scanner.SetPrompt("... ") | ||||
| 			continue | ||||
| @@ -545,45 +652,44 @@ func generateInteractive(cmd *cobra.Command, model string) error { | ||||
| 			if err := ListHandler(cmd, args[1:]); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
|  | ||||
| 			continue | ||||
| 		case strings.HasPrefix(line, "/set"): | ||||
| 			args := strings.Fields(line) | ||||
| 			if len(args) > 1 { | ||||
| 				switch args[1] { | ||||
| 				case "history": | ||||
| 					scanner.HistoryEnable() | ||||
| 					continue | ||||
| 				case "nohistory": | ||||
| 					scanner.HistoryDisable() | ||||
| 					continue | ||||
| 				case "wordwrap": | ||||
| 					wordWrap = true | ||||
| 					fmt.Println("Set 'wordwrap' mode.") | ||||
| 				case "nowordwrap": | ||||
| 					wordWrap = false | ||||
| 					fmt.Println("Set 'nowordwrap' mode.") | ||||
| 				case "verbose": | ||||
| 					cmd.Flags().Set("verbose", "true") | ||||
| 					continue | ||||
| 					fmt.Println("Set 'verbose' mode.") | ||||
| 				case "quiet": | ||||
| 					cmd.Flags().Set("verbose", "false") | ||||
| 					continue | ||||
| 					fmt.Println("Set 'quiet' mode.") | ||||
| 				case "mode": | ||||
| 					if len(args) > 2 { | ||||
| 						switch args[2] { | ||||
| 						case "vim": | ||||
| 							scanner.SetVimMode(true) | ||||
| 							continue | ||||
| 						case "emacs", "default": | ||||
| 							scanner.SetVimMode(false) | ||||
| 							continue | ||||
| 						default: | ||||
| 							usage() | ||||
| 							continue | ||||
| 						} | ||||
| 					} else { | ||||
| 						usage() | ||||
| 						continue | ||||
| 					} | ||||
| 				default: | ||||
| 					fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1]) | ||||
| 				} | ||||
| 			} else { | ||||
| 				usage() | ||||
| 				continue | ||||
| 				usageSet() | ||||
| 			} | ||||
| 		case strings.HasPrefix(line, "/show"): | ||||
| 			args := strings.Fields(line) | ||||
| @@ -591,38 +697,65 @@ func generateInteractive(cmd *cobra.Command, model string) error { | ||||
| 				resp, err := server.GetModelInfo(model) | ||||
| 				if err != nil { | ||||
| 					fmt.Println("error: couldn't get model") | ||||
| 					continue | ||||
| 					return err | ||||
| 				} | ||||
|  | ||||
| 				switch args[1] { | ||||
| 				case "license": | ||||
| 					fmt.Println(resp.License) | ||||
| 					if resp.License == "" { | ||||
| 						fmt.Print("No license was specified for this model.\n\n") | ||||
| 					} else { | ||||
| 						fmt.Println(resp.License) | ||||
| 					} | ||||
| 				case "modelfile": | ||||
| 					fmt.Println(resp.Modelfile) | ||||
| 				case "parameters": | ||||
| 					fmt.Println(resp.Parameters) | ||||
| 					if resp.Parameters == "" { | ||||
| 						fmt.Print("No parameters were specified for this model.\n\n") | ||||
| 					} else { | ||||
| 						fmt.Println(resp.Parameters) | ||||
| 					} | ||||
| 				case "system": | ||||
| 					fmt.Println(resp.System) | ||||
| 					if resp.System == "" { | ||||
| 						fmt.Print("No system prompt was specified for this model.\n\n") | ||||
| 					} else { | ||||
| 						fmt.Println(resp.System) | ||||
| 					} | ||||
| 				case "template": | ||||
| 					fmt.Println(resp.Template) | ||||
| 					if resp.Template == "" { | ||||
| 						fmt.Print("No prompt template was specified for this model.\n\n") | ||||
| 					} else { | ||||
| 						fmt.Println(resp.Template) | ||||
| 					} | ||||
| 				default: | ||||
| 					fmt.Println("error: unknown command") | ||||
| 					fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1]) | ||||
| 				} | ||||
| 			} else { | ||||
| 				usageShow() | ||||
| 			} | ||||
| 		case strings.HasPrefix(line, "/help"), strings.HasPrefix(line, "/?"): | ||||
| 			args := strings.Fields(line) | ||||
| 			if len(args) > 1 { | ||||
| 				switch args[1] { | ||||
| 				case "set", "/set": | ||||
| 					usageSet() | ||||
| 				case "show", "/show": | ||||
| 					usageShow() | ||||
| 				} | ||||
|  | ||||
| 				continue | ||||
| 			} else { | ||||
| 				usage() | ||||
| 				continue | ||||
| 			} | ||||
| 		case line == "/help", line == "/?": | ||||
| 			usage() | ||||
| 			continue | ||||
| 		case line == "/exit", line == "/bye": | ||||
| 			return nil | ||||
| 		case strings.HasPrefix(line, "/"): | ||||
| 			args := strings.Fields(line) | ||||
| 			fmt.Printf("Unknown command '%s'. Type /? for help\n", args[0]) | ||||
| 		} | ||||
|  | ||||
| 		if err := generate(cmd, model, line); err != nil { | ||||
| 			return err | ||||
| 		if len(line) > 0 && line[0] != '/' { | ||||
| 			if err := generate(cmd, model, line, wordWrap); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| @@ -632,7 +765,7 @@ func generateBatch(cmd *cobra.Command, model string) error { | ||||
| 	for scanner.Scan() { | ||||
| 		prompt := scanner.Text() | ||||
| 		fmt.Printf(">>> %s\n", prompt) | ||||
| 		if err := generate(cmd, model, prompt); err != nil { | ||||
| 		if err := generate(cmd, model, prompt, false); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| @@ -641,28 +774,19 @@ func generateBatch(cmd *cobra.Command, model string) error { | ||||
| } | ||||
|  | ||||
| func RunServer(cmd *cobra.Command, _ []string) error { | ||||
| 	host, port := "127.0.0.1", "11434" | ||||
|  | ||||
| 	parts := strings.Split(os.Getenv("OLLAMA_HOST"), ":") | ||||
| 	if ip := net.ParseIP(parts[0]); ip != nil { | ||||
| 		host = ip.String() | ||||
| 	} | ||||
|  | ||||
| 	if len(parts) > 1 { | ||||
| 		port = parts[1] | ||||
| 	} | ||||
|  | ||||
| 	// deprecated: include port in OLLAMA_HOST | ||||
| 	if p := os.Getenv("OLLAMA_PORT"); p != "" { | ||||
| 		port = p | ||||
| 	} | ||||
|  | ||||
| 	err := initializeKeypair() | ||||
| 	host, port, err := net.SplitHostPort(os.Getenv("OLLAMA_HOST")) | ||||
| 	if err != nil { | ||||
| 		host, port = "127.0.0.1", "11434" | ||||
| 		if ip := net.ParseIP(strings.Trim(os.Getenv("OLLAMA_HOST"), "[]")); ip != nil { | ||||
| 			host = ip.String() | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if err := initializeKeypair(); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	ln, err := net.Listen("tcp", fmt.Sprintf("%s:%s", host, port)) | ||||
| 	ln, err := net.Listen("tcp", net.JoinHostPort(host, port)) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| @@ -676,6 +800,15 @@ func RunServer(cmd *cobra.Command, _ []string) error { | ||||
| 		if err := server.PruneLayers(); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		manifestsPath, err := server.GetManifestPath() | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		if err := server.PruneDirectory(manifestsPath); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return server.Serve(ln, origins) | ||||
| @@ -703,7 +836,7 @@ func initializeKeypair() error { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		err = os.MkdirAll(path.Dir(privKeyPath), 0o700) | ||||
| 		err = os.MkdirAll(filepath.Dir(privKeyPath), 0o755) | ||||
| 		if err != nil { | ||||
| 			return fmt.Errorf("could not create directory %w", err) | ||||
| 		} | ||||
| @@ -762,7 +895,7 @@ func startMacApp(client *api.Client) error { | ||||
| } | ||||
|  | ||||
| func checkServerHeartbeat(_ *cobra.Command, _ []string) error { | ||||
| 	client, err := api.FromEnv() | ||||
| 	client, err := api.ClientFromEnvironment() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| @@ -831,6 +964,7 @@ func NewCLI() *cobra.Command { | ||||
|  | ||||
| 	runCmd.Flags().Bool("verbose", false, "Show timings for response") | ||||
| 	runCmd.Flags().Bool("insecure", false, "Use an insecure registry") | ||||
| 	runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically") | ||||
|  | ||||
| 	serveCmd := &cobra.Command{ | ||||
| 		Use:     "serve", | ||||
|   | ||||
							
								
								
									
										148
									
								
								docs/api.md
									
									
									
									
									
								
							
							
						
						
									
										148
									
								
								docs/api.md
									
									
									
									
									
								
							| @@ -3,26 +3,32 @@ | ||||
| ## Endpoints | ||||
|  | ||||
| - [Generate a completion](#generate-a-completion) | ||||
| - [Create a model](#create-a-model) | ||||
| - [List local models](#list-local-models) | ||||
| - [Copy a model](#copy-a-model) | ||||
| - [Delete a model](#delete-a-model) | ||||
| - [Pull a model](#pull-a-model) | ||||
| - [Generate embeddings](#generate-embeddings) | ||||
| - [Create a Model](#create-a-model) | ||||
| - [List Local Models](#list-local-models) | ||||
| - [Show Model Information](#show-model-information) | ||||
| - [Copy a Model](#copy-a-model) | ||||
| - [Delete a Model](#delete-a-model) | ||||
| - [Pull a Model](#pull-a-model) | ||||
| - [Push a Model](#push-a-model) | ||||
| - [Generate Embeddings](#generate-embeddings) | ||||
|  | ||||
| ## Conventions | ||||
|  | ||||
| ### Model names | ||||
|  | ||||
| Model names follow a `model:tag` format. Some examples are `orca-mini:3b-q4_1` and `llama2:70b`. The tag is optional and if not provided will default to `latest`. The tag is used to identify a specific version. | ||||
| Model names follow a `model:tag` format. Some examples are `orca-mini:3b-q4_1` and `llama2:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version. | ||||
|  | ||||
| ### Durations | ||||
|  | ||||
| All durations are returned in nanoseconds. | ||||
|  | ||||
| ### Streaming responses | ||||
|  | ||||
| Certain endpoints stream responses as JSON objects delineated with the newline (`\n`) character. | ||||
|  | ||||
| ## Generate a completion | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| POST /api/generate | ||||
| ``` | ||||
|  | ||||
| @@ -33,16 +39,17 @@ Generate a response for a given prompt with a provided model. This is a streamin | ||||
| - `model`: (required) the [model name](#model-names) | ||||
| - `prompt`: the prompt to generate a response for | ||||
|  | ||||
| Advanced parameters: | ||||
| Advanced parameters (optional): | ||||
|  | ||||
| - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature` | ||||
| - `system`: system prompt to (overrides what is defined in the `Modelfile`) | ||||
| - `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`) | ||||
| - `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory | ||||
| - `stream`: if `false` the response will be be returned as a single response object, rather than a stream of objects | ||||
|  | ||||
| ### Request | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| curl -X POST http://localhost:11434/api/generate -d '{ | ||||
|   "model": "llama2:7b", | ||||
|   "prompt": "Why is the sky blue?" | ||||
| @@ -73,6 +80,7 @@ The final response in the stream also includes additional data about the generat | ||||
| - `eval_count`: number of tokens the response | ||||
| - `eval_duration`: time in nanoseconds spent generating the response | ||||
| - `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory | ||||
| - `response`: empty if the response was streamed, if not streamed, this will contain the full response | ||||
|  | ||||
| To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`. | ||||
|  | ||||
| @@ -80,6 +88,7 @@ To calculate how fast the response is generated in tokens per second (token/s), | ||||
| { | ||||
|   "model": "llama2:7b", | ||||
|   "created_at": "2023-08-04T19:22:45.499127Z", | ||||
|   "response": "", | ||||
|   "context": [1, 2, 3], | ||||
|   "done": true, | ||||
|   "total_duration": 5589157167, | ||||
| @@ -95,7 +104,7 @@ To calculate how fast the response is generated in tokens per second (token/s), | ||||
|  | ||||
| ## Create a Model | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| POST /api/create | ||||
| ``` | ||||
|  | ||||
| @@ -105,10 +114,11 @@ Create a model from a [`Modelfile`](./modelfile.md) | ||||
|  | ||||
| - `name`: name of the model to create | ||||
| - `path`: path to the Modelfile | ||||
| - `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects | ||||
|  | ||||
| ### Request | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| curl -X POST http://localhost:11434/api/create -d '{ | ||||
|   "name": "mario", | ||||
|   "path": "~/Modelfile" | ||||
| @@ -117,7 +127,7 @@ curl -X POST http://localhost:11434/api/create -d '{ | ||||
|  | ||||
| ### Response | ||||
|  | ||||
| A stream of JSON objects. When finished, `status` is `success` | ||||
| A stream of JSON objects. When finished, `status` is `success`. | ||||
|  | ||||
| ```json | ||||
| { | ||||
| @@ -127,7 +137,7 @@ A stream of JSON objects. When finished, `status` is `success` | ||||
|  | ||||
| ## List Local Models | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| GET /api/tags | ||||
| ``` | ||||
|  | ||||
| @@ -135,7 +145,7 @@ List models that are available locally. | ||||
|  | ||||
| ### Request | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| curl http://localhost:11434/api/tags | ||||
| ``` | ||||
|  | ||||
| @@ -158,9 +168,40 @@ curl http://localhost:11434/api/tags | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## Show Model Information | ||||
|  | ||||
| ```shell | ||||
| POST /api/show | ||||
| ``` | ||||
|  | ||||
| Show details about a model including modelfile, template, parameters, license, and system prompt. | ||||
|  | ||||
| ### Parameters | ||||
|  | ||||
| - `name`: name of the model to show | ||||
|  | ||||
| ### Request | ||||
|  | ||||
| ```shell | ||||
| curl http://localhost:11434/api/show -d '{ | ||||
|   "name": "llama2:7b" | ||||
| }' | ||||
| ``` | ||||
|  | ||||
| ### Response | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "license": "<contents of license block>", | ||||
|   "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama2:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <<SYS>>\nPARAMETER stop <</SYS>>\n", | ||||
|   "parameters": "stop                           [INST]\nstop                           [/INST]\nstop                           <<SYS>>\nstop                           <</SYS>>", | ||||
|   "template": "[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] " | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## Copy a Model | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| POST /api/copy | ||||
| ``` | ||||
|  | ||||
| @@ -168,7 +209,7 @@ Copy a model. Creates a model with another name from an existing model. | ||||
|  | ||||
| ### Request | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| curl http://localhost:11434/api/copy -d '{ | ||||
|   "source": "llama2:7b", | ||||
|   "destination": "llama2-backup" | ||||
| @@ -177,7 +218,7 @@ curl http://localhost:11434/api/copy -d '{ | ||||
|  | ||||
| ## Delete a Model | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| DELETE /api/delete | ||||
| ``` | ||||
|  | ||||
| @@ -189,7 +230,7 @@ Delete a model and its data. | ||||
|  | ||||
| ### Request | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| curl -X DELETE http://localhost:11434/api/delete -d '{ | ||||
|   "name": "llama2:13b" | ||||
| }' | ||||
| @@ -197,19 +238,21 @@ curl -X DELETE http://localhost:11434/api/delete -d '{ | ||||
|  | ||||
| ## Pull a Model | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| POST /api/pull | ||||
| ``` | ||||
|  | ||||
| Download a model from a the model registry. Cancelled pulls are resumed from where they left off, and multiple calls to will share the same download progress. | ||||
| Download a model from the ollama library. Cancelled pulls are resumed from where they left off, and multiple calls will share the same download progress. | ||||
|  | ||||
| ### Parameters | ||||
|  | ||||
| - `name`: name of the model to pull | ||||
| - `insecure`: (optional) allow insecure connections to the library. Only use this if you are pulling from your own library during development. | ||||
| - `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects | ||||
|  | ||||
| ### Request | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| curl -X POST http://localhost:11434/api/pull -d '{ | ||||
|   "name": "llama2:7b" | ||||
| }' | ||||
| @@ -225,9 +268,66 @@ curl -X POST http://localhost:11434/api/pull -d '{ | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## Push a Model | ||||
|  | ||||
| ```shell | ||||
| POST /api/push | ||||
| ``` | ||||
|  | ||||
| Upload a model to a model library. Requires registering for ollama.ai and adding a public key first. | ||||
|  | ||||
| ### Parameters | ||||
|  | ||||
| - `name`: name of the model to push in the form of `<namespace>/<model>:<tag>` | ||||
| - `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development. | ||||
| - `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects | ||||
|  | ||||
| ### Request | ||||
|  | ||||
| ```shell | ||||
| curl -X POST http://localhost:11434/api/push -d '{ | ||||
|   "name": "mattw/pygmalion:latest" | ||||
| }' | ||||
| ``` | ||||
|  | ||||
| ### Response | ||||
|  | ||||
| Streaming response that starts with: | ||||
|  | ||||
| ```json | ||||
| { "status": "retrieving manifest" } | ||||
| ``` | ||||
|  | ||||
| and then: | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "status": "starting upload", | ||||
|   "digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab", | ||||
|   "total": 1928429856 | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Then there is a series of uploading responses: | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "status": "starting upload", | ||||
|   "digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab", | ||||
|   "total": 1928429856 | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Finally, when the upload is complete: | ||||
|  | ||||
| ```json | ||||
| {"status":"pushing manifest"} | ||||
| {"status":"success"} | ||||
| ``` | ||||
|  | ||||
| ## Generate Embeddings | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| POST /api/embeddings | ||||
| ``` | ||||
|  | ||||
| @@ -244,7 +344,7 @@ Advanced parameters: | ||||
|  | ||||
| ### Request | ||||
|  | ||||
| ``` | ||||
| ```shell | ||||
| curl -X POST http://localhost:11434/api/embeddings -d '{ | ||||
|   "model": "llama2:7b", | ||||
|   "prompt": "Here is an article about llamas..." | ||||
|   | ||||
| @@ -10,25 +10,25 @@ Install required tools: | ||||
| - go version 1.20 or higher | ||||
| - gcc version 11.4.0 or higher | ||||
|  | ||||
| ``` | ||||
| ```bash | ||||
| brew install go cmake gcc | ||||
| ``` | ||||
|  | ||||
| Get the required libraries: | ||||
|  | ||||
| ``` | ||||
| ```bash | ||||
| go generate ./... | ||||
| ``` | ||||
|  | ||||
| Then build ollama: | ||||
|  | ||||
| ``` | ||||
| ```bash | ||||
| go build . | ||||
| ``` | ||||
|  | ||||
| Now you can run `ollama`: | ||||
|  | ||||
| ``` | ||||
| ```bash | ||||
| ./ollama | ||||
| ``` | ||||
|  | ||||
|   | ||||
| @@ -2,16 +2,17 @@ | ||||
|  | ||||
| ## How can I expose the Ollama server? | ||||
|  | ||||
| ``` | ||||
| ```bash | ||||
| OLLAMA_HOST=0.0.0.0:11435 ollama serve | ||||
| ``` | ||||
|  | ||||
| By default, Ollama allows cross origin requests from `127.0.0.1` and `0.0.0.0`. To support more origins, you can use the `OLLAMA_ORIGINS` environment variable: | ||||
|  | ||||
| ``` | ||||
| ```bash | ||||
| OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve | ||||
| ``` | ||||
|  | ||||
| ## Where are models stored? | ||||
|  | ||||
| Raw model data is stored under `~/.ollama/models`. | ||||
| * macOS: Raw model data is stored under `~/.ollama/models`. | ||||
| * Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models` | ||||
|   | ||||
							
								
								
									
										83
									
								
								docs/linux.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								docs/linux.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | ||||
| # Installing Ollama on Linux | ||||
|  | ||||
| > Note: A one line installer for Ollama is available by running: | ||||
| > | ||||
| > ```bash | ||||
| > curl https://ollama.ai/install.sh | sh | ||||
| > ``` | ||||
|  | ||||
| ## Download the `ollama` binary | ||||
|  | ||||
| Ollama is distributed as a self-contained binary. Download it to a directory in your PATH: | ||||
|  | ||||
| ```bash | ||||
| sudo curl -L https://ollama.ai/download/ollama-linux-amd64 -o /usr/bin/ollama | ||||
| sudo chmod +x /usr/bin/ollama | ||||
| ``` | ||||
|  | ||||
| ## Start Ollama | ||||
|  | ||||
| Start Ollama by running `ollama serve`: | ||||
|  | ||||
| ```bash | ||||
| ollama serve | ||||
| ``` | ||||
|  | ||||
| Once Ollama is running, run a model in another terminal session: | ||||
|  | ||||
| ```bash | ||||
| ollama run llama2 | ||||
| ``` | ||||
|  | ||||
| ## Install CUDA drivers (optional – for Nvidia GPUs) | ||||
|  | ||||
| [Download and install](https://developer.nvidia.com/cuda-downloads) CUDA. | ||||
|  | ||||
| Verify that the drivers are installed by running the following command, which should print details about your GPU: | ||||
|  | ||||
| ```bash | ||||
| nvidia-smi | ||||
| ``` | ||||
|  | ||||
| ## Adding Ollama as a startup service (optional) | ||||
|  | ||||
| Create a user for Ollama: | ||||
|  | ||||
| ```bash | ||||
| sudo useradd -r -s /bin/false -m -d /usr/share/ollama ollama | ||||
| ``` | ||||
|  | ||||
| Create a service file in `/etc/systemd/system/ollama.service`: | ||||
|  | ||||
| ```ini | ||||
| [Unit] | ||||
| Description=Ollama Service | ||||
| After=network-online.target | ||||
|  | ||||
| [Service] | ||||
| ExecStart=/usr/bin/ollama serve | ||||
| User=ollama | ||||
| Group=ollama | ||||
| Restart=always | ||||
| RestartSec=3 | ||||
| Environment="HOME=/usr/share/ollama" | ||||
|  | ||||
| [Install] | ||||
| WantedBy=default.target | ||||
| ``` | ||||
|  | ||||
| Then start the service: | ||||
|  | ||||
| ```bash | ||||
| sudo systemctl daemon-reload | ||||
| sudo systemctl enable ollama | ||||
| ``` | ||||
|  | ||||
| ### Viewing logs | ||||
|  | ||||
| To view logs of Ollama running as a startup service, run: | ||||
|  | ||||
| ```bash | ||||
| journalctl -u ollama | ||||
| ``` | ||||
|  | ||||
| @@ -1,6 +1,6 @@ | ||||
| # Ollama Model File | ||||
|  | ||||
| > Note: this model file syntax is in development | ||||
| > Note: this `Modelfile` syntax is in development | ||||
|  | ||||
| A model file is the blueprint to create and share models with Ollama. | ||||
|  | ||||
| @@ -24,7 +24,7 @@ A model file is the blueprint to create and share models with Ollama. | ||||
|  | ||||
| ## Format | ||||
|  | ||||
| The format of the Modelfile: | ||||
| The format of the `Modelfile`: | ||||
|  | ||||
| ```modelfile | ||||
| # comment | ||||
| @@ -42,9 +42,9 @@ INSTRUCTION arguments | ||||
|  | ||||
| ## Examples | ||||
|  | ||||
| An example of a model file creating a mario blueprint: | ||||
| An example of a `Modelfile` creating a mario blueprint: | ||||
|  | ||||
| ``` | ||||
| ```modelfile | ||||
| FROM llama2 | ||||
| # sets the temperature to 1 [higher is more creative, lower is more coherent] | ||||
| PARAMETER temperature 1 | ||||
| @@ -57,9 +57,9 @@ SYSTEM You are Mario from super mario bros, acting as an assistant. | ||||
|  | ||||
| To use this: | ||||
|  | ||||
| 1. Save it as a file (eg. `Modelfile`) | ||||
| 2. `ollama create NAME -f <location of the file eg. ./Modelfile>'` | ||||
| 3. `ollama run NAME` | ||||
| 1. Save it as a file (e.g. `Modelfile`) | ||||
| 2. `ollama create choose-a-model-name -f <location of the file e.g. ./Modelfile>'` | ||||
| 3. `ollama run choose-a-model-name` | ||||
| 4. Start using the model! | ||||
|  | ||||
| More examples are available in the [examples directory](../examples). | ||||
| @@ -68,33 +68,34 @@ More examples are available in the [examples directory](../examples). | ||||
|  | ||||
| ### FROM (Required) | ||||
|  | ||||
| The FROM instruction defines the base model to use when creating a model. | ||||
| The `FROM` instruction defines the base model to use when creating a model. | ||||
|  | ||||
| ``` | ||||
| ```modelfile | ||||
| FROM <model name>:<tag> | ||||
| ``` | ||||
|  | ||||
| #### Build from llama2 | ||||
|  | ||||
| ``` | ||||
| ```modelfile | ||||
| FROM llama2 | ||||
| ``` | ||||
|  | ||||
| A list of available base models: | ||||
| <https://github.com/jmorganca/ollama#model-library> | ||||
|  | ||||
| #### Build from a bin file | ||||
| #### Build from a `bin` file | ||||
|  | ||||
| ``` | ||||
| ```modelfile | ||||
| FROM ./ollama-model.bin | ||||
| ``` | ||||
|  | ||||
| This bin file location should be specified as an absolute path or relative to the Modelfile location. | ||||
| This bin file location should be specified as an absolute path or relative to the `Modelfile` location. | ||||
|  | ||||
| ### EMBED | ||||
|  | ||||
| The EMBED instruction is used to add embeddings of files to a model. This is useful for adding custom data that the model can reference when generating an answer. Note that currently only text files are supported, formatted with each line as one embedding. | ||||
| ``` | ||||
| The `EMBED` instruction is used to add embeddings of files to a model. This is useful for adding custom data that the model can reference when generating an answer. Note that currently only text files are supported, formatted with each line as one embedding. | ||||
|  | ||||
| ```modelfile | ||||
| FROM <model name>:<tag> | ||||
| EMBED <file path>.txt | ||||
| EMBED <different file path>.txt | ||||
| @@ -105,7 +106,7 @@ EMBED <path to directory>/*.txt | ||||
|  | ||||
| The `PARAMETER` instruction defines a parameter that can be set when the model is run. | ||||
|  | ||||
| ``` | ||||
| ```modelfile | ||||
| PARAMETER <parameter> <parametervalue> | ||||
| ``` | ||||
|  | ||||
| @@ -118,19 +119,21 @@ PARAMETER <parameter> <parametervalue> | ||||
| | mirostat_tau   | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)                                                                                                         | float      | mirostat_tau 5.0     | | ||||
| | num_ctx        | Sets the size of the context window used to generate the next token. (Default: 2048)                                                                                                                                                                    | int        | num_ctx 4096         | | ||||
| | num_gqa        | The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b                                                                                                                                         | int        | num_gqa 1            | | ||||
| | num_gpu        | The number of GPUs to use. On macOS it defaults to 1 to enable metal support, 0 to disable.                                                                                                                                                             | int        | num_gpu 1            | | ||||
| | num_gpu        | The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable.                                                                                                                                            | int        | num_gpu 50           | | ||||
| | num_thread     | Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). | int        | num_thread 8         | | ||||
| | repeat_last_n  | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)                                                                                                                                           | int        | repeat_last_n 64     | | ||||
| | repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)                                                                     | float      | repeat_penalty 1.1   | | ||||
| | temperature    | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)                                                                                                                                     | float      | temperature 0.7      | | ||||
| | seed | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. | int | seed 42 | | ||||
| | stop           | Sets the stop sequences to use.                                                                                                                                                                                                                         | string     | stop "AI assistant:" | | ||||
| | tfs_z          | Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)                                               | float      | tfs_z 1              | | ||||
| | num_predict    | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)                                                                                                                                   | int        | num_predict 42       | | ||||
| | top_k          | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)                                                                        | int        | top_k 40             | | ||||
| | top_p          | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)                                                                 | float      | top_p 0.9            | | ||||
|  | ||||
| ### TEMPLATE | ||||
|  | ||||
| `TEMPLATE` of the full prompt template to be passed into the model. It may include (optionally) a system prompt and a user's prompt. This is used to create a full custom prompt, and syntax may be model specific. | ||||
| `TEMPLATE` of the full prompt template to be passed into the model. It may include (optionally) a system prompt and a user's prompt. This is used to create a full custom prompt, and syntax may be model specific. You can usually find the template for a given model in the readme for that model. | ||||
|  | ||||
| #### Template Variables | ||||
|  | ||||
| @@ -140,7 +143,7 @@ PARAMETER <parameter> <parametervalue> | ||||
| | `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input.                 | | ||||
| | `{{ .First }}`  | A boolean value used to render specific template information for the first generation of a session.          | | ||||
|  | ||||
| ``` | ||||
| ```modelfile | ||||
| TEMPLATE """ | ||||
| {{- if .First }} | ||||
| ### System: | ||||
| @@ -160,7 +163,7 @@ SYSTEM """<system message>""" | ||||
|  | ||||
| The `SYSTEM` instruction specifies the system prompt to be used in the template, if applicable. | ||||
|  | ||||
| ``` | ||||
| ```modelfile | ||||
| SYSTEM """<system message>""" | ||||
| ``` | ||||
|  | ||||
| @@ -168,7 +171,7 @@ SYSTEM """<system message>""" | ||||
|  | ||||
| The `ADAPTER` instruction specifies the LoRA adapter to apply to the base model. The value of this instruction should be an absolute path or a path relative to the Modelfile and the file must be in a GGML file format. The adapter should be tuned from the base model otherwise the behaviour is undefined. | ||||
|  | ||||
| ``` | ||||
| ```modelfile | ||||
| ADAPTER ./ollama-lora.bin | ||||
| ``` | ||||
|  | ||||
| @@ -176,7 +179,7 @@ ADAPTER ./ollama-lora.bin | ||||
|  | ||||
| The `LICENSE` instruction allows you to specify the legal license under which the model used with this Modelfile is shared or distributed. | ||||
|  | ||||
| ``` | ||||
| ```modelfile | ||||
| LICENSE """ | ||||
| <license text> | ||||
| """ | ||||
| @@ -184,5 +187,5 @@ LICENSE """ | ||||
|  | ||||
| ## Notes | ||||
|  | ||||
| - the **modelfile is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments. | ||||
| - the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments. | ||||
| - Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable. | ||||
|   | ||||
							
								
								
									
										111
									
								
								docs/quantize.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								docs/quantize.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,111 @@ | ||||
| # How to Quantize a Model | ||||
|  | ||||
| Sometimes the model you want to work with is not available at [https://ollama.ai/library](https://ollama.ai/library). | ||||
|  | ||||
| ## Figure out if we can run the model? | ||||
|  | ||||
| Not all models will work with Ollama. There are a number of factors that go into whether we are able to work with the next cool model. First it has to work with llama.cpp. Then we have to have implemented the features of llama.cpp that it requires. And then, sometimes, even with both of those, the model might not work... | ||||
|  | ||||
| 1. What is the model you want to convert and upload? | ||||
| 2. Visit the model's page on HuggingFace. | ||||
| 3. Switch to the **Files and versions** tab. | ||||
| 4. Click on the **config.json** file. If there is no config.json file, it may not work. | ||||
| 5. Take note of the **architecture** list in the json file. | ||||
| 6. Does any entry in the list match one of the following architectures? | ||||
|     1. LlamaForCausalLM | ||||
|     2. MistralForCausalLM | ||||
|     3. RWForCausalLM | ||||
|     4. FalconForCausalLM | ||||
|     5. GPTNeoXForCausalLM | ||||
|     6. GPTBigCodeForCausalLM | ||||
| 7. If the answer is yes, then there is a good chance the model will run after being converted and quantized. | ||||
| 8. An alternative to this process is to visit [https://caniquant.tvl.st](https://caniquant.tvl.st) and enter the org/modelname in the box and submit. | ||||
|  | ||||
| At this point there are two processes you can use. You can either use a Docker container to convert and quantize, OR you can manually run the scripts. The Docker container is the easiest way to do it, but it requires you to have Docker installed on your machine. If you don't have Docker installed, you can follow the manual process. | ||||
|  | ||||
| ## Convert and Quantize with Docker | ||||
|  | ||||
| Run `docker run --rm -v /path/to/model/repo:/repo ollama/quantize -q quantlevel /repo`. For instance, if you have downloaded the latest Mistral 7B model, then clone it to your machine. Then change into that directory and you can run: | ||||
|  | ||||
| ```shell | ||||
| docker run --rm -v .:/repo ollama/quantize -q q4_0 /repo | ||||
| ``` | ||||
|  | ||||
| You can find the different quantization levels below under **Quantize the Model**. | ||||
|  | ||||
| This will output two files into the directory. First is a f16.bin file that is the model converted to GGUF. The second file is a q4_0.bin file which is the model quantized to a 4 bit quantization. You should rename it to something more descriptive. | ||||
|  | ||||
| You can find the repository for the Docker container here: [https://github.com/mxyng/quantize](https://github.com/mxyng/quantize) | ||||
|  | ||||
| For instance, if you wanted to convert the Mistral 7B model to a Q4 quantized model, then you could go through the following steps: | ||||
|  | ||||
| 1. First verify the model will potentially work. | ||||
| 2. Now clone Mistral 7B to your machine. You can find the command to run when you click the three vertical dots button on the model page, then click **Clone Repository**. | ||||
|    1. For this repo, the command is: | ||||
|  | ||||
|       ```shell | ||||
|       git lfs install | ||||
|       git clone https://huggingface.co/mistralai/Mistral-7B-v0.1 | ||||
|       ``` | ||||
|  | ||||
|    2. Navigate into the new directory and run `docker run --rm -v .:/repo ollama/quantize -q q4_0 /repo` | ||||
|    3. Now you can create a modelfile using the q4_0.bin file that was created. | ||||
|  | ||||
| ## Convert and Quantize Manually | ||||
|  | ||||
| ### Clone llama.cpp to your machine | ||||
|  | ||||
| If we know the model has a chance of working, then we need to convert and quantize. This is a matter of running two separate scripts in the llama.cpp project. | ||||
|  | ||||
| 1. Decide where you want the llama.cpp repository on your machine. | ||||
| 2. Navigate to that location and then run: | ||||
|  [`git clone https://github.com/ggerganov/llama.cpp.git`](https://github.com/ggerganov/llama.cpp.git) | ||||
|     1. If you don't have git installed, download this zip file and unzip it to that location: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.zip | ||||
| 3. Install the Python dependencies: `pip install torch transformers sentencepiece` | ||||
| 4. Run 'make' to build the project and the quantize executable. | ||||
|  | ||||
| ### Convert the model to GGUF | ||||
|  | ||||
| 1. Decide on the right convert script to run. What was the model architecture you found in the first section. | ||||
|     1. LlamaForCausalLM or MistralForCausalLM: | ||||
|     run `python3 convert.py <modelfilename>` | ||||
|     No need to specify fp16 or fp32. | ||||
|     2. FalconForCausalLM or RWForCausalLM: | ||||
|     run `python3 convert-falcon-hf-to-gguf.py <modelfilename> <fpsize>`   | ||||
|     fpsize depends on the weight size. 1 for fp16, 0 for fp32 | ||||
|     3. GPTNeoXForCausalLM: | ||||
|     run `python3 convert-gptneox-hf-to-gguf.py <modelfilename> <fpsize>` | ||||
|     fpsize depends on the weight size. 1 for fp16, 0 for fp32 | ||||
|     4. GPTBigCodeForCausalLM: | ||||
|     run `python3 convert-starcoder-hf-to-gguf.py <modelfilename> <fpsize>` | ||||
|     fpsize depends on the weight size. 1 for fp16, 0 for fp32 | ||||
|  | ||||
| ### Quantize the model | ||||
|  | ||||
| If the model converted successfully, there is a good chance it will also quantize successfully. Now you need to decide on the quantization to use. We will always try to create all the quantizations and upload them to the library. You should decide which level is more important to you and quantize accordingly. | ||||
|  | ||||
| The quantization options are as follows. Note that some architectures such as Falcon do not support K quants. | ||||
|  | ||||
| - Q4_0 | ||||
| - Q4_1 | ||||
| - Q5_0 | ||||
| - Q5_1 | ||||
| - Q2_K | ||||
| - Q3_K | ||||
| - Q3_K_S | ||||
| - Q3_K_M | ||||
| - Q3_K_L | ||||
| - Q4_K | ||||
| - Q4_K_S | ||||
| - Q4_K_M | ||||
| - Q5_K | ||||
| - Q5_K_S | ||||
| - Q5_K_M | ||||
| - Q6_K | ||||
| - Q8_0 | ||||
|  | ||||
| Run the following command `quantize <converted model from above> <output file> <quantization type>` | ||||
|  | ||||
| ## Now Create the Model | ||||
|  | ||||
| Now you can create the Ollama model. Refer to the [modelfile](./modelfile.md) doc for more information on doing that. | ||||
							
								
								
									
										171
									
								
								examples/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										171
									
								
								examples/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,171 @@ | ||||
| node_modules | ||||
| # OSX | ||||
| .DS_STORE | ||||
|  | ||||
| # Models | ||||
| models/ | ||||
|  | ||||
| # Local Chroma db | ||||
| .chroma/ | ||||
| db/ | ||||
|  | ||||
| # Byte-compiled / optimized / DLL files | ||||
| __pycache__/ | ||||
| *.py[cod] | ||||
| *$py.class | ||||
|  | ||||
| # C extensions | ||||
| *.so | ||||
|  | ||||
| # Distribution / packaging | ||||
| .Python | ||||
| build/ | ||||
| develop-eggs/ | ||||
| dist/ | ||||
| downloads/ | ||||
| eggs/ | ||||
| .eggs/ | ||||
| lib/ | ||||
| lib64/ | ||||
| parts/ | ||||
| sdist/ | ||||
| var/ | ||||
| wheels/ | ||||
| share/python-wheels/ | ||||
| *.egg-info/ | ||||
| .installed.cfg | ||||
| *.egg | ||||
| MANIFEST | ||||
|  | ||||
| # PyInstaller | ||||
| #  Usually these files are written by a python script from a template | ||||
| #  before PyInstaller builds the exe, so as to inject date/other infos into it. | ||||
| *.manifest | ||||
| *.spec | ||||
|  | ||||
| # Installer logs | ||||
| pip-log.txt | ||||
| pip-delete-this-directory.txt | ||||
|  | ||||
| # Unit test / coverage reports | ||||
| htmlcov/ | ||||
| .tox/ | ||||
| .nox/ | ||||
| .coverage | ||||
| .coverage.* | ||||
| .cache | ||||
| nosetests.xml | ||||
| coverage.xml | ||||
| *.cover | ||||
| *.py,cover | ||||
| .hypothesis/ | ||||
| .pytest_cache/ | ||||
| cover/ | ||||
|  | ||||
| # Translations | ||||
| *.mo | ||||
| *.pot | ||||
|  | ||||
| # Django stuff: | ||||
| *.log | ||||
| local_settings.py | ||||
| db.sqlite3 | ||||
| db.sqlite3-journal | ||||
|  | ||||
| # Flask stuff: | ||||
| instance/ | ||||
| .webassets-cache | ||||
|  | ||||
| # Scrapy stuff: | ||||
| .scrapy | ||||
|  | ||||
| # Sphinx documentation | ||||
| docs/_build/ | ||||
|  | ||||
| # PyBuilder | ||||
| .pybuilder/ | ||||
| target/ | ||||
|  | ||||
| # Jupyter Notebook | ||||
| .ipynb_checkpoints | ||||
|  | ||||
| # IPython | ||||
| profile_default/ | ||||
| ipython_config.py | ||||
|  | ||||
| # pyenv | ||||
| #   For a library or package, you might want to ignore these files since the code is | ||||
| #   intended to run in multiple environments; otherwise, check them in: | ||||
| # .python-version | ||||
|  | ||||
| # pipenv | ||||
| #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||||
| #   However, in case of collaboration, if having platform-specific dependencies or dependencies | ||||
| #   having no cross-platform support, pipenv may install dependencies that don't work, or not | ||||
| #   install all needed dependencies. | ||||
| #Pipfile.lock | ||||
|  | ||||
| # poetry | ||||
| #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | ||||
| #   This is especially recommended for binary packages to ensure reproducibility, and is more | ||||
| #   commonly ignored for libraries. | ||||
| #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | ||||
| #poetry.lock | ||||
|  | ||||
| # pdm | ||||
| #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. | ||||
| #pdm.lock | ||||
| #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it | ||||
| #   in version control. | ||||
| #   https://pdm.fming.dev/#use-with-ide | ||||
| .pdm.toml | ||||
|  | ||||
| # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm | ||||
| __pypackages__/ | ||||
|  | ||||
| # Celery stuff | ||||
| celerybeat-schedule | ||||
| celerybeat.pid | ||||
|  | ||||
| # SageMath parsed files | ||||
| *.sage.py | ||||
|  | ||||
| # Environments | ||||
| .env | ||||
| .venv | ||||
| env/ | ||||
| venv/ | ||||
| ENV/ | ||||
| env.bak/ | ||||
| venv.bak/ | ||||
|  | ||||
| # Spyder project settings | ||||
| .spyderproject | ||||
| .spyproject | ||||
|  | ||||
| # Rope project settings | ||||
| .ropeproject | ||||
|  | ||||
| # mkdocs documentation | ||||
| /site | ||||
|  | ||||
| # mypy | ||||
| .mypy_cache/ | ||||
| .dmypy.json | ||||
| dmypy.json | ||||
|  | ||||
| # Pyre type checker | ||||
| .pyre/ | ||||
|  | ||||
| # pytype static type analyzer | ||||
| .pytype/ | ||||
|  | ||||
| # Cython debug symbols | ||||
| cython_debug/ | ||||
|  | ||||
| # PyCharm | ||||
| #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can | ||||
| #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore | ||||
| #  and can be added to the global gitignore or merged into this file.  For a more nuclear | ||||
| #  option (not recommended) you can uncomment the following to ignore the entire idea folder. | ||||
| #.idea/ | ||||
| @@ -1,15 +1,3 @@ | ||||
| # Examples | ||||
|  | ||||
| This directory contains different examples of using Ollama | ||||
|  | ||||
| To create a model: | ||||
|  | ||||
| ``` | ||||
| ollama create example -f <example file> | ||||
| ``` | ||||
|  | ||||
| To run a model: | ||||
|  | ||||
| ``` | ||||
| ollama run example | ||||
| ``` | ||||
| This directory contains different examples of using Ollama. | ||||
|   | ||||
							
								
								
									
										0
									
								
								examples/golang-simplegenerate/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								examples/golang-simplegenerate/README.md
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										27
									
								
								examples/golang-simplegenerate/main.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								examples/golang-simplegenerate/main.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| package main | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 	"net/http" | ||||
| 	"os" | ||||
| 	"io" | ||||
| 	"log" | ||||
| ) | ||||
|  | ||||
| func main() { | ||||
| 	body := []byte(`{"model":"mistral"}`) | ||||
| 	resp, err := http.Post("http://localhost:11434/api/generate", "application/json", bytes.NewBuffer(body)) | ||||
|  | ||||
| 	if err != nil { | ||||
| 		fmt.Print(err.Error()) | ||||
| 		os.Exit(1) | ||||
| 	}  | ||||
|  | ||||
| 	responseData, err := io.ReadAll(resp.Body) | ||||
| 	if err != nil { | ||||
| 		log.Fatal(err) | ||||
| 	} | ||||
| 	fmt.Println(string(responseData)) | ||||
|  | ||||
| } | ||||
							
								
								
									
										21
									
								
								examples/langchain-typescript-simple/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								examples/langchain-typescript-simple/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,21 @@ | ||||
| # LangChain | ||||
|  | ||||
| This example is a basic "hello world" of using LangChain with Ollama using Node.js and Typescript. | ||||
|  | ||||
| ## Setup | ||||
|  | ||||
| ```shell | ||||
| npm install | ||||
| ``` | ||||
|  | ||||
| ## Run | ||||
|  | ||||
| ```shell | ||||
| ts-node main.ts | ||||
| ``` | ||||
|  | ||||
| Running this example will print the response for "hello": | ||||
|  | ||||
| ```plaintext | ||||
| Hello! It's nice to meet you. hopefully you are having a great day! Is there something I can help you with or would you like to chat? | ||||
| ``` | ||||
							
								
								
									
										15
									
								
								examples/langchain-typescript-simple/main.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								examples/langchain-typescript-simple/main.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| import { Ollama} from 'langchain/llms/ollama'; | ||||
|  | ||||
| async function main() { | ||||
|   const ollama = new Ollama({ | ||||
|     model: 'mistral'     | ||||
|     // other parameters can be found at https://js.langchain.com/docs/api/llms_ollama/classes/Ollama | ||||
|   }) | ||||
|   const stream = await ollama.stream("Hello"); | ||||
|  | ||||
|   for await (const chunk of stream) { | ||||
|     process.stdout.write(chunk); | ||||
|   } | ||||
| } | ||||
|  | ||||
| main(); | ||||
							
								
								
									
										997
									
								
								examples/langchain-typescript-simple/package-lock.json
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
									
										997
									
								
								examples/langchain-typescript-simple/package-lock.json
									
									
									
										generated
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,997 @@ | ||||
| { | ||||
|   "name": "with-langchain-typescript-simplegenerate", | ||||
|   "lockfileVersion": 3, | ||||
|   "requires": true, | ||||
|   "packages": { | ||||
|     "": { | ||||
|       "dependencies": { | ||||
|         "langchain": "^0.0.165" | ||||
|       }, | ||||
|       "devDependencies": { | ||||
|         "typescript": "^5.2.2" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/@anthropic-ai/sdk": { | ||||
|       "version": "0.6.2", | ||||
|       "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.6.2.tgz", | ||||
|       "integrity": "sha512-fB9PUj9RFT+XjkL+E9Ol864ZIJi+1P8WnbHspN3N3/GK2uSzjd0cbVIKTGgf4v3N8MwaQu+UWnU7C4BG/fap/g==", | ||||
|       "dependencies": { | ||||
|         "@types/node": "^18.11.18", | ||||
|         "@types/node-fetch": "^2.6.4", | ||||
|         "abort-controller": "^3.0.0", | ||||
|         "agentkeepalive": "^4.2.1", | ||||
|         "digest-fetch": "^1.3.0", | ||||
|         "form-data-encoder": "1.7.2", | ||||
|         "formdata-node": "^4.3.2", | ||||
|         "node-fetch": "^2.6.7" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/@types/node": { | ||||
|       "version": "18.18.4", | ||||
|       "resolved": "https://registry.npmjs.org/@types/node/-/node-18.18.4.tgz", | ||||
|       "integrity": "sha512-t3rNFBgJRugIhackit2mVcLfF6IRc0JE4oeizPQL8Zrm8n2WY/0wOdpOPhdtG0V9Q2TlW/axbF1MJ6z+Yj/kKQ==" | ||||
|     }, | ||||
|     "node_modules/@types/node-fetch": { | ||||
|       "version": "2.6.6", | ||||
|       "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.6.tgz", | ||||
|       "integrity": "sha512-95X8guJYhfqiuVVhRFxVQcf4hW/2bCuoPwDasMf/531STFoNoWTT7YDnWdXHEZKqAGUigmpG31r2FE70LwnzJw==", | ||||
|       "dependencies": { | ||||
|         "@types/node": "*", | ||||
|         "form-data": "^4.0.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/@types/retry": { | ||||
|       "version": "0.12.0", | ||||
|       "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz", | ||||
|       "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==" | ||||
|     }, | ||||
|     "node_modules/@types/uuid": { | ||||
|       "version": "9.0.5", | ||||
|       "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.5.tgz", | ||||
|       "integrity": "sha512-xfHdwa1FMJ082prjSJpoEI57GZITiQz10r3vEJCHa2khEFQjKy91aWKz6+zybzssCvXUwE1LQWgWVwZ4nYUvHQ==" | ||||
|     }, | ||||
|     "node_modules/abort-controller": { | ||||
|       "version": "3.0.0", | ||||
|       "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", | ||||
|       "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", | ||||
|       "dependencies": { | ||||
|         "event-target-shim": "^5.0.0" | ||||
|       }, | ||||
|       "engines": { | ||||
|         "node": ">=6.5" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/agentkeepalive": { | ||||
|       "version": "4.5.0", | ||||
|       "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz", | ||||
|       "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==", | ||||
|       "dependencies": { | ||||
|         "humanize-ms": "^1.2.1" | ||||
|       }, | ||||
|       "engines": { | ||||
|         "node": ">= 8.0.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/ansi-styles": { | ||||
|       "version": "5.2.0", | ||||
|       "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", | ||||
|       "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", | ||||
|       "engines": { | ||||
|         "node": ">=10" | ||||
|       }, | ||||
|       "funding": { | ||||
|         "url": "https://github.com/chalk/ansi-styles?sponsor=1" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/argparse": { | ||||
|       "version": "2.0.1", | ||||
|       "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", | ||||
|       "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" | ||||
|     }, | ||||
|     "node_modules/asynckit": { | ||||
|       "version": "0.4.0", | ||||
|       "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", | ||||
|       "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" | ||||
|     }, | ||||
|     "node_modules/base-64": { | ||||
|       "version": "0.1.0", | ||||
|       "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz", | ||||
|       "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==" | ||||
|     }, | ||||
|     "node_modules/base64-js": { | ||||
|       "version": "1.5.1", | ||||
|       "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", | ||||
|       "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", | ||||
|       "funding": [ | ||||
|         { | ||||
|           "type": "github", | ||||
|           "url": "https://github.com/sponsors/feross" | ||||
|         }, | ||||
|         { | ||||
|           "type": "patreon", | ||||
|           "url": "https://www.patreon.com/feross" | ||||
|         }, | ||||
|         { | ||||
|           "type": "consulting", | ||||
|           "url": "https://feross.org/support" | ||||
|         } | ||||
|       ] | ||||
|     }, | ||||
|     "node_modules/binary-extensions": { | ||||
|       "version": "2.2.0", | ||||
|       "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz", | ||||
|       "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==", | ||||
|       "engines": { | ||||
|         "node": ">=8" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/binary-search": { | ||||
|       "version": "1.3.6", | ||||
|       "resolved": "https://registry.npmjs.org/binary-search/-/binary-search-1.3.6.tgz", | ||||
|       "integrity": "sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA==" | ||||
|     }, | ||||
|     "node_modules/camelcase": { | ||||
|       "version": "6.3.0", | ||||
|       "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", | ||||
|       "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==", | ||||
|       "engines": { | ||||
|         "node": ">=10" | ||||
|       }, | ||||
|       "funding": { | ||||
|         "url": "https://github.com/sponsors/sindresorhus" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/charenc": { | ||||
|       "version": "0.0.2", | ||||
|       "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz", | ||||
|       "integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==", | ||||
|       "engines": { | ||||
|         "node": "*" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/combined-stream": { | ||||
|       "version": "1.0.8", | ||||
|       "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", | ||||
|       "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", | ||||
|       "dependencies": { | ||||
|         "delayed-stream": "~1.0.0" | ||||
|       }, | ||||
|       "engines": { | ||||
|         "node": ">= 0.8" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/commander": { | ||||
|       "version": "10.0.1", | ||||
|       "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz", | ||||
|       "integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==", | ||||
|       "engines": { | ||||
|         "node": ">=14" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/crypt": { | ||||
|       "version": "0.0.2", | ||||
|       "resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz", | ||||
|       "integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==", | ||||
|       "engines": { | ||||
|         "node": "*" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/decamelize": { | ||||
|       "version": "1.2.0", | ||||
|       "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", | ||||
|       "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==", | ||||
|       "engines": { | ||||
|         "node": ">=0.10.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/delayed-stream": { | ||||
|       "version": "1.0.0", | ||||
|       "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", | ||||
|       "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", | ||||
|       "engines": { | ||||
|         "node": ">=0.4.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/digest-fetch": { | ||||
|       "version": "1.3.0", | ||||
|       "resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz", | ||||
|       "integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==", | ||||
|       "dependencies": { | ||||
|         "base-64": "^0.1.0", | ||||
|         "md5": "^2.3.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/event-target-shim": { | ||||
|       "version": "5.0.1", | ||||
|       "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", | ||||
|       "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", | ||||
|       "engines": { | ||||
|         "node": ">=6" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/eventemitter3": { | ||||
|       "version": "4.0.7", | ||||
|       "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", | ||||
|       "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==" | ||||
|     }, | ||||
|     "node_modules/expr-eval": { | ||||
|       "version": "2.0.2", | ||||
|       "resolved": "https://registry.npmjs.org/expr-eval/-/expr-eval-2.0.2.tgz", | ||||
|       "integrity": "sha512-4EMSHGOPSwAfBiibw3ndnP0AvjDWLsMvGOvWEZ2F96IGk0bIVdjQisOHxReSkE13mHcfbuCiXw+G4y0zv6N8Eg==" | ||||
|     }, | ||||
|     "node_modules/flat": { | ||||
|       "version": "5.0.2", | ||||
|       "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", | ||||
|       "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==", | ||||
|       "bin": { | ||||
|         "flat": "cli.js" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/form-data": { | ||||
|       "version": "4.0.0", | ||||
|       "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", | ||||
|       "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", | ||||
|       "dependencies": { | ||||
|         "asynckit": "^0.4.0", | ||||
|         "combined-stream": "^1.0.8", | ||||
|         "mime-types": "^2.1.12" | ||||
|       }, | ||||
|       "engines": { | ||||
|         "node": ">= 6" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/form-data-encoder": { | ||||
|       "version": "1.7.2", | ||||
|       "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", | ||||
|       "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==" | ||||
|     }, | ||||
|     "node_modules/formdata-node": { | ||||
|       "version": "4.4.1", | ||||
|       "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", | ||||
|       "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", | ||||
|       "dependencies": { | ||||
|         "node-domexception": "1.0.0", | ||||
|         "web-streams-polyfill": "4.0.0-beta.3" | ||||
|       }, | ||||
|       "engines": { | ||||
|         "node": ">= 12.20" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/humanize-ms": { | ||||
|       "version": "1.2.1", | ||||
|       "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", | ||||
|       "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", | ||||
|       "dependencies": { | ||||
|         "ms": "^2.0.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/is-any-array": { | ||||
|       "version": "2.0.1", | ||||
|       "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz", | ||||
|       "integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==" | ||||
|     }, | ||||
|     "node_modules/is-buffer": { | ||||
|       "version": "1.1.6", | ||||
|       "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", | ||||
|       "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==" | ||||
|     }, | ||||
|     "node_modules/js-tiktoken": { | ||||
|       "version": "1.0.7", | ||||
|       "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.7.tgz", | ||||
|       "integrity": "sha512-biba8u/clw7iesNEWLOLwrNGoBP2lA+hTaBLs/D45pJdUPFXyxD6nhcDVtADChghv4GgyAiMKYMiRx7x6h7Biw==", | ||||
|       "dependencies": { | ||||
|         "base64-js": "^1.5.1" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/js-yaml": { | ||||
|       "version": "4.1.0", | ||||
|       "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", | ||||
|       "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", | ||||
|       "dependencies": { | ||||
|         "argparse": "^2.0.1" | ||||
|       }, | ||||
|       "bin": { | ||||
|         "js-yaml": "bin/js-yaml.js" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/jsonpointer": { | ||||
|       "version": "5.0.1", | ||||
|       "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz", | ||||
|       "integrity": "sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==", | ||||
|       "engines": { | ||||
|         "node": ">=0.10.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/langchain": { | ||||
|       "version": "0.0.165", | ||||
|       "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.0.165.tgz", | ||||
|       "integrity": "sha512-CpbNpjwaE+9lzjdw+pZz0VgnRrFivEgr7CVp9dDaAb5JpaJAA4V2v6uQ9ZPN+TSqupTQ79HFn2sfyZVEl2EG7Q==", | ||||
|       "dependencies": { | ||||
|         "@anthropic-ai/sdk": "^0.6.2", | ||||
|         "ansi-styles": "^5.0.0", | ||||
|         "binary-extensions": "^2.2.0", | ||||
|         "camelcase": "6", | ||||
|         "decamelize": "^1.2.0", | ||||
|         "expr-eval": "^2.0.2", | ||||
|         "flat": "^5.0.2", | ||||
|         "js-tiktoken": "^1.0.7", | ||||
|         "js-yaml": "^4.1.0", | ||||
|         "jsonpointer": "^5.0.1", | ||||
|         "langchainhub": "~0.0.6", | ||||
|         "langsmith": "~0.0.31", | ||||
|         "ml-distance": "^4.0.0", | ||||
|         "object-hash": "^3.0.0", | ||||
|         "openai": "~4.4.0", | ||||
|         "openapi-types": "^12.1.3", | ||||
|         "p-queue": "^6.6.2", | ||||
|         "p-retry": "4", | ||||
|         "uuid": "^9.0.0", | ||||
|         "yaml": "^2.2.1", | ||||
|         "zod": "^3.22.3", | ||||
|         "zod-to-json-schema": "^3.20.4" | ||||
|       }, | ||||
|       "engines": { | ||||
|         "node": ">=18" | ||||
|       }, | ||||
|       "peerDependencies": { | ||||
|         "@aws-crypto/sha256-js": "^5.0.0", | ||||
|         "@aws-sdk/client-bedrock-runtime": "^3.422.0", | ||||
|         "@aws-sdk/client-dynamodb": "^3.310.0", | ||||
|         "@aws-sdk/client-kendra": "^3.352.0", | ||||
|         "@aws-sdk/client-lambda": "^3.310.0", | ||||
|         "@aws-sdk/client-s3": "^3.310.0", | ||||
|         "@aws-sdk/client-sagemaker-runtime": "^3.310.0", | ||||
|         "@aws-sdk/client-sfn": "^3.310.0", | ||||
|         "@aws-sdk/credential-provider-node": "^3.388.0", | ||||
|         "@azure/storage-blob": "^12.15.0", | ||||
|         "@clickhouse/client": "^0.0.14", | ||||
|         "@cloudflare/ai": "^1.0.12", | ||||
|         "@elastic/elasticsearch": "^8.4.0", | ||||
|         "@getmetal/metal-sdk": "*", | ||||
|         "@getzep/zep-js": "^0.7.0", | ||||
|         "@gomomento/sdk": "^1.23.0", | ||||
|         "@google-ai/generativelanguage": "^0.2.1", | ||||
|         "@google-cloud/storage": "^6.10.1", | ||||
|         "@huggingface/inference": "^1.5.1", | ||||
|         "@mozilla/readability": "*", | ||||
|         "@notionhq/client": "^2.2.10", | ||||
|         "@opensearch-project/opensearch": "*", | ||||
|         "@pinecone-database/pinecone": "^1.1.0", | ||||
|         "@planetscale/database": "^1.8.0", | ||||
|         "@qdrant/js-client-rest": "^1.2.0", | ||||
|         "@raycast/api": "^1.55.2", | ||||
|         "@smithy/eventstream-codec": "^2.0.5", | ||||
|         "@smithy/protocol-http": "^3.0.6", | ||||
|         "@smithy/signature-v4": "^2.0.10", | ||||
|         "@smithy/util-utf8": "^2.0.0", | ||||
|         "@supabase/postgrest-js": "^1.1.1", | ||||
|         "@supabase/supabase-js": "^2.10.0", | ||||
|         "@tensorflow-models/universal-sentence-encoder": "*", | ||||
|         "@tensorflow/tfjs-converter": "*", | ||||
|         "@tensorflow/tfjs-core": "*", | ||||
|         "@upstash/redis": "^1.20.6", | ||||
|         "@vercel/postgres": "^0.5.0", | ||||
|         "@writerai/writer-sdk": "^0.40.2", | ||||
|         "@xata.io/client": "^0.25.1", | ||||
|         "@xenova/transformers": "^2.5.4", | ||||
|         "@zilliz/milvus2-sdk-node": ">=2.2.7", | ||||
|         "apify-client": "^2.7.1", | ||||
|         "axios": "*", | ||||
|         "cassandra-driver": "^4.6.4", | ||||
|         "cheerio": "^1.0.0-rc.12", | ||||
|         "chromadb": "*", | ||||
|         "cohere-ai": ">=6.0.0", | ||||
|         "d3-dsv": "^2.0.0", | ||||
|         "epub2": "^3.0.1", | ||||
|         "faiss-node": "^0.3.0", | ||||
|         "fast-xml-parser": "^4.2.7", | ||||
|         "firebase-admin": "^11.9.0", | ||||
|         "google-auth-library": "^8.9.0", | ||||
|         "googleapis": "^126.0.1", | ||||
|         "hnswlib-node": "^1.4.2", | ||||
|         "html-to-text": "^9.0.5", | ||||
|         "ignore": "^5.2.0", | ||||
|         "ioredis": "^5.3.2", | ||||
|         "jsdom": "*", | ||||
|         "llmonitor": "*", | ||||
|         "lodash": "^4.17.21", | ||||
|         "mammoth": "*", | ||||
|         "mongodb": "^5.2.0", | ||||
|         "mysql2": "^3.3.3", | ||||
|         "neo4j-driver": "*", | ||||
|         "node-llama-cpp": "*", | ||||
|         "notion-to-md": "^3.1.0", | ||||
|         "pdf-parse": "1.1.1", | ||||
|         "peggy": "^3.0.2", | ||||
|         "pg": "^8.11.0", | ||||
|         "pg-copy-streams": "^6.0.5", | ||||
|         "pickleparser": "^0.1.0", | ||||
|         "playwright": "^1.32.1", | ||||
|         "portkey-ai": "^0.1.11", | ||||
|         "puppeteer": "^19.7.2", | ||||
|         "redis": "^4.6.4", | ||||
|         "replicate": "^0.18.0", | ||||
|         "sonix-speech-recognition": "^2.1.1", | ||||
|         "srt-parser-2": "^1.2.2", | ||||
|         "typeorm": "^0.3.12", | ||||
|         "typesense": "^1.5.3", | ||||
|         "usearch": "^1.1.1", | ||||
|         "vectordb": "^0.1.4", | ||||
|         "voy-search": "0.6.2", | ||||
|         "weaviate-ts-client": "^1.4.0", | ||||
|         "web-auth-library": "^1.0.3", | ||||
|         "youtube-transcript": "^1.0.6", | ||||
|         "youtubei.js": "^5.8.0" | ||||
|       }, | ||||
|       "peerDependenciesMeta": { | ||||
|         "@aws-crypto/sha256-js": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@aws-sdk/client-bedrock-runtime": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@aws-sdk/client-dynamodb": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@aws-sdk/client-kendra": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@aws-sdk/client-lambda": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@aws-sdk/client-s3": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@aws-sdk/client-sagemaker-runtime": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@aws-sdk/client-sfn": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@aws-sdk/credential-provider-node": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@azure/storage-blob": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@clickhouse/client": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@cloudflare/ai": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@elastic/elasticsearch": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@getmetal/metal-sdk": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@getzep/zep-js": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@gomomento/sdk": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@google-ai/generativelanguage": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@google-cloud/storage": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@huggingface/inference": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@mozilla/readability": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@notionhq/client": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@opensearch-project/opensearch": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@pinecone-database/pinecone": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@planetscale/database": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@qdrant/js-client-rest": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@raycast/api": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@smithy/eventstream-codec": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@smithy/protocol-http": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@smithy/signature-v4": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@smithy/util-utf8": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@supabase/postgrest-js": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@supabase/supabase-js": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@tensorflow-models/universal-sentence-encoder": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@tensorflow/tfjs-converter": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@tensorflow/tfjs-core": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@upstash/redis": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@vercel/postgres": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@writerai/writer-sdk": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@xata.io/client": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@xenova/transformers": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "@zilliz/milvus2-sdk-node": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "apify-client": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "axios": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "cassandra-driver": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "cheerio": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "chromadb": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "cohere-ai": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "d3-dsv": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "epub2": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "faiss-node": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "fast-xml-parser": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "firebase-admin": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "google-auth-library": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "googleapis": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "hnswlib-node": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "html-to-text": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "ignore": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "ioredis": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "jsdom": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "llmonitor": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "lodash": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "mammoth": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "mongodb": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "mysql2": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "neo4j-driver": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "node-llama-cpp": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "notion-to-md": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "pdf-parse": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "peggy": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "pg": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "pg-copy-streams": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "pickleparser": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "playwright": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "portkey-ai": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "puppeteer": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "redis": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "replicate": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "sonix-speech-recognition": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "srt-parser-2": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "typeorm": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "typesense": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "usearch": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "vectordb": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "voy-search": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "weaviate-ts-client": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "web-auth-library": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "youtube-transcript": { | ||||
|           "optional": true | ||||
|         }, | ||||
|         "youtubei.js": { | ||||
|           "optional": true | ||||
|         } | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/langchainhub": { | ||||
|       "version": "0.0.6", | ||||
|       "resolved": "https://registry.npmjs.org/langchainhub/-/langchainhub-0.0.6.tgz", | ||||
|       "integrity": "sha512-SW6105T+YP1cTe0yMf//7kyshCgvCTyFBMTgH2H3s9rTAR4e+78DA/BBrUL/Mt4Q5eMWui7iGuAYb3pgGsdQ9w==" | ||||
|     }, | ||||
|     "node_modules/langsmith": { | ||||
|       "version": "0.0.42", | ||||
|       "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.0.42.tgz", | ||||
|       "integrity": "sha512-sFuN+e7E+pPBIRaRgFqZh/BRBWNHTZNAwi6uj4kydQawooCZYoJmM5snOkiQrhVSvAhgu6xFhLvmfvkPcKzD7w==", | ||||
|       "dependencies": { | ||||
|         "@types/uuid": "^9.0.1", | ||||
|         "commander": "^10.0.1", | ||||
|         "p-queue": "^6.6.2", | ||||
|         "p-retry": "4", | ||||
|         "uuid": "^9.0.0" | ||||
|       }, | ||||
|       "bin": { | ||||
|         "langsmith": "dist/cli/main.cjs" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/md5": { | ||||
|       "version": "2.3.0", | ||||
|       "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz", | ||||
|       "integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==", | ||||
|       "dependencies": { | ||||
|         "charenc": "0.0.2", | ||||
|         "crypt": "0.0.2", | ||||
|         "is-buffer": "~1.1.6" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/mime-db": { | ||||
|       "version": "1.52.0", | ||||
|       "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", | ||||
|       "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", | ||||
|       "engines": { | ||||
|         "node": ">= 0.6" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/mime-types": { | ||||
|       "version": "2.1.35", | ||||
|       "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", | ||||
|       "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", | ||||
|       "dependencies": { | ||||
|         "mime-db": "1.52.0" | ||||
|       }, | ||||
|       "engines": { | ||||
|         "node": ">= 0.6" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/ml-array-mean": { | ||||
|       "version": "1.1.6", | ||||
|       "resolved": "https://registry.npmjs.org/ml-array-mean/-/ml-array-mean-1.1.6.tgz", | ||||
|       "integrity": "sha512-MIdf7Zc8HznwIisyiJGRH9tRigg3Yf4FldW8DxKxpCCv/g5CafTw0RRu51nojVEOXuCQC7DRVVu5c7XXO/5joQ==", | ||||
|       "dependencies": { | ||||
|         "ml-array-sum": "^1.1.6" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/ml-array-sum": { | ||||
|       "version": "1.1.6", | ||||
|       "resolved": "https://registry.npmjs.org/ml-array-sum/-/ml-array-sum-1.1.6.tgz", | ||||
|       "integrity": "sha512-29mAh2GwH7ZmiRnup4UyibQZB9+ZLyMShvt4cH4eTK+cL2oEMIZFnSyB3SS8MlsTh6q/w/yh48KmqLxmovN4Dw==", | ||||
|       "dependencies": { | ||||
|         "is-any-array": "^2.0.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/ml-distance": { | ||||
|       "version": "4.0.1", | ||||
|       "resolved": "https://registry.npmjs.org/ml-distance/-/ml-distance-4.0.1.tgz", | ||||
|       "integrity": "sha512-feZ5ziXs01zhyFUUUeZV5hwc0f5JW0Sh0ckU1koZe/wdVkJdGxcP06KNQuF0WBTj8FttQUzcvQcpcrOp/XrlEw==", | ||||
|       "dependencies": { | ||||
|         "ml-array-mean": "^1.1.6", | ||||
|         "ml-distance-euclidean": "^2.0.0", | ||||
|         "ml-tree-similarity": "^1.0.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/ml-distance-euclidean": { | ||||
|       "version": "2.0.0", | ||||
|       "resolved": "https://registry.npmjs.org/ml-distance-euclidean/-/ml-distance-euclidean-2.0.0.tgz", | ||||
|       "integrity": "sha512-yC9/2o8QF0A3m/0IXqCTXCzz2pNEzvmcE/9HFKOZGnTjatvBbsn4lWYJkxENkA4Ug2fnYl7PXQxnPi21sgMy/Q==" | ||||
|     }, | ||||
|     "node_modules/ml-tree-similarity": { | ||||
|       "version": "1.0.0", | ||||
|       "resolved": "https://registry.npmjs.org/ml-tree-similarity/-/ml-tree-similarity-1.0.0.tgz", | ||||
|       "integrity": "sha512-XJUyYqjSuUQkNQHMscr6tcjldsOoAekxADTplt40QKfwW6nd++1wHWV9AArl0Zvw/TIHgNaZZNvr8QGvE8wLRg==", | ||||
|       "dependencies": { | ||||
|         "binary-search": "^1.3.5", | ||||
|         "num-sort": "^2.0.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/ms": { | ||||
|       "version": "2.1.3", | ||||
|       "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", | ||||
|       "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" | ||||
|     }, | ||||
|     "node_modules/node-domexception": { | ||||
|       "version": "1.0.0", | ||||
|       "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", | ||||
|       "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", | ||||
|       "funding": [ | ||||
|         { | ||||
|           "type": "github", | ||||
|           "url": "https://github.com/sponsors/jimmywarting" | ||||
|         }, | ||||
|         { | ||||
|           "type": "github", | ||||
|           "url": "https://paypal.me/jimmywarting" | ||||
|         } | ||||
|       ], | ||||
|       "engines": { | ||||
|         "node": ">=10.5.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/node-fetch": { | ||||
|       "version": "2.7.0", | ||||
|       "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", | ||||
|       "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", | ||||
|       "dependencies": { | ||||
|         "whatwg-url": "^5.0.0" | ||||
|       }, | ||||
|       "engines": { | ||||
|         "node": "4.x || >=6.0.0" | ||||
|       }, | ||||
|       "peerDependencies": { | ||||
|         "encoding": "^0.1.0" | ||||
|       }, | ||||
|       "peerDependenciesMeta": { | ||||
|         "encoding": { | ||||
|           "optional": true | ||||
|         } | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/num-sort": { | ||||
|       "version": "2.1.0", | ||||
|       "resolved": "https://registry.npmjs.org/num-sort/-/num-sort-2.1.0.tgz", | ||||
|       "integrity": "sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg==", | ||||
|       "engines": { | ||||
|         "node": ">=8" | ||||
|       }, | ||||
|       "funding": { | ||||
|         "url": "https://github.com/sponsors/sindresorhus" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/object-hash": { | ||||
|       "version": "3.0.0", | ||||
|       "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", | ||||
|       "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==", | ||||
|       "engines": { | ||||
|         "node": ">= 6" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/openai": { | ||||
|       "version": "4.4.0", | ||||
|       "resolved": "https://registry.npmjs.org/openai/-/openai-4.4.0.tgz", | ||||
|       "integrity": "sha512-JN0t628Kh95T0IrXl0HdBqnlJg+4Vq0Bnh55tio+dfCnyzHvMLiWyCM9m726MAJD2YkDU4/8RQB6rNbEq9ct2w==", | ||||
|       "dependencies": { | ||||
|         "@types/node": "^18.11.18", | ||||
|         "@types/node-fetch": "^2.6.4", | ||||
|         "abort-controller": "^3.0.0", | ||||
|         "agentkeepalive": "^4.2.1", | ||||
|         "digest-fetch": "^1.3.0", | ||||
|         "form-data-encoder": "1.7.2", | ||||
|         "formdata-node": "^4.3.2", | ||||
|         "node-fetch": "^2.6.7" | ||||
|       }, | ||||
|       "bin": { | ||||
|         "openai": "bin/cli" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/openapi-types": { | ||||
|       "version": "12.1.3", | ||||
|       "resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz", | ||||
|       "integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==" | ||||
|     }, | ||||
|     "node_modules/p-finally": { | ||||
|       "version": "1.0.0", | ||||
|       "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", | ||||
|       "integrity": "sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow==", | ||||
|       "engines": { | ||||
|         "node": ">=4" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/p-queue": { | ||||
|       "version": "6.6.2", | ||||
|       "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-6.6.2.tgz", | ||||
|       "integrity": "sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==", | ||||
|       "dependencies": { | ||||
|         "eventemitter3": "^4.0.4", | ||||
|         "p-timeout": "^3.2.0" | ||||
|       }, | ||||
|       "engines": { | ||||
|         "node": ">=8" | ||||
|       }, | ||||
|       "funding": { | ||||
|         "url": "https://github.com/sponsors/sindresorhus" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/p-retry": { | ||||
|       "version": "4.6.2", | ||||
|       "resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz", | ||||
|       "integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==", | ||||
|       "dependencies": { | ||||
|         "@types/retry": "0.12.0", | ||||
|         "retry": "^0.13.1" | ||||
|       }, | ||||
|       "engines": { | ||||
|         "node": ">=8" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/p-timeout": { | ||||
|       "version": "3.2.0", | ||||
|       "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-3.2.0.tgz", | ||||
|       "integrity": "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==", | ||||
|       "dependencies": { | ||||
|         "p-finally": "^1.0.0" | ||||
|       }, | ||||
|       "engines": { | ||||
|         "node": ">=8" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/retry": { | ||||
|       "version": "0.13.1", | ||||
|       "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz", | ||||
|       "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==", | ||||
|       "engines": { | ||||
|         "node": ">= 4" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/tr46": { | ||||
|       "version": "0.0.3", | ||||
|       "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", | ||||
|       "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" | ||||
|     }, | ||||
|     "node_modules/typescript": { | ||||
|       "version": "5.2.2", | ||||
|       "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.2.2.tgz", | ||||
|       "integrity": "sha512-mI4WrpHsbCIcwT9cF4FZvr80QUeKvsUsUvKDoR+X/7XHQH98xYD8YHZg7ANtz2GtZt/CBq2QJ0thkGJMHfqc1w==", | ||||
|       "dev": true, | ||||
|       "bin": { | ||||
|         "tsc": "bin/tsc", | ||||
|         "tsserver": "bin/tsserver" | ||||
|       }, | ||||
|       "engines": { | ||||
|         "node": ">=14.17" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/uuid": { | ||||
|       "version": "9.0.1", | ||||
|       "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", | ||||
|       "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", | ||||
|       "funding": [ | ||||
|         "https://github.com/sponsors/broofa", | ||||
|         "https://github.com/sponsors/ctavan" | ||||
|       ], | ||||
|       "bin": { | ||||
|         "uuid": "dist/bin/uuid" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/web-streams-polyfill": { | ||||
|       "version": "4.0.0-beta.3", | ||||
|       "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", | ||||
|       "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", | ||||
|       "engines": { | ||||
|         "node": ">= 14" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/webidl-conversions": { | ||||
|       "version": "3.0.1", | ||||
|       "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", | ||||
|       "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" | ||||
|     }, | ||||
|     "node_modules/whatwg-url": { | ||||
|       "version": "5.0.0", | ||||
|       "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", | ||||
|       "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", | ||||
|       "dependencies": { | ||||
|         "tr46": "~0.0.3", | ||||
|         "webidl-conversions": "^3.0.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/yaml": { | ||||
|       "version": "2.3.2", | ||||
|       "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.2.tgz", | ||||
|       "integrity": "sha512-N/lyzTPaJasoDmfV7YTrYCI0G/3ivm/9wdG0aHuheKowWQwGTsK0Eoiw6utmzAnI6pkJa0DUVygvp3spqqEKXg==", | ||||
|       "engines": { | ||||
|         "node": ">= 14" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/zod": { | ||||
|       "version": "3.22.4", | ||||
|       "resolved": "https://registry.npmjs.org/zod/-/zod-3.22.4.tgz", | ||||
|       "integrity": "sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==", | ||||
|       "funding": { | ||||
|         "url": "https://github.com/sponsors/colinhacks" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/zod-to-json-schema": { | ||||
|       "version": "3.21.4", | ||||
|       "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.21.4.tgz", | ||||
|       "integrity": "sha512-fjUZh4nQ1s6HMccgIeE0VP4QG/YRGPmyjO9sAh890aQKPEk3nqbfUXhMFaC+Dr5KvYBm8BCyvfpZf2jY9aGSsw==", | ||||
|       "peerDependencies": { | ||||
|         "zod": "^3.21.4" | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
							
								
								
									
										8
									
								
								examples/langchain-typescript-simple/package.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								examples/langchain-typescript-simple/package.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,8 @@ | ||||
| { | ||||
|   "devDependencies": { | ||||
|     "typescript": "^5.2.2" | ||||
|   }, | ||||
|   "dependencies": { | ||||
|     "langchain": "^0.0.165" | ||||
|   } | ||||
| } | ||||
| @@ -1,8 +0,0 @@ | ||||
| # Modelfile for creating a Midjourney prompts from a topic | ||||
| # This prompt was adapted from the original at https://www.greataiprompts.com/guide/midjourney/best-chatgpt-prompt-for-midjourney/ | ||||
| # Run `ollama create mj -f ./Modelfile` and then `ollama run mj` and enter a topic | ||||
|  | ||||
| FROM nous-hermes | ||||
| SYSTEM """ | ||||
| Embrace your role as an AI-powered creative assistant, employing Midjourney to manifest compelling AI-generated art. I will outline a specific image concept, and in response, you must produce an exhaustive, multifaceted prompt for Midjourney, ensuring every detail of the original concept is represented in your instructions. Midjourney doesn't do well with text, so after the prompt, give me instructions that I can use to create the titles in a image editor. | ||||
| """ | ||||
							
								
								
									
										23
									
								
								examples/modelfile-10tweets/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								examples/modelfile-10tweets/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
| # Ten Tweets Modelfile | ||||
|  | ||||
| This is a simple modelfile that generates ten tweets based off any topic. | ||||
|  | ||||
| ```bash | ||||
| ollama create tentweets | ||||
|  | ||||
| ollama run tentweets | ||||
| >>> underwater basketweaving | ||||
|  Great! Here are ten creative tweets about underwater basketweaving: | ||||
|  | ||||
| 1. "Just discovered the ultimate stress-reliever: Underwater basketweaving! 🌊🧵 #UnderwaterBasketweaving #StressRelief" | ||||
| 2. "Who needs meditation when you can do underwater basketweaving? 😴👀 #PeacefulDistraction #UnderwaterBasketweaving" | ||||
| 3. "Just spent an hour in the pool and still managed to knot my basket. Goal: untangle it before next session. 💪🏽 #ChallengeAccepted #UnderwaterBasketweaving" | ||||
| 4. "When life gives you lemons, make underwater basketweaving! 🍋🧵 #LemonadeLife #UnderwaterBasketweaving" | ||||
| 5. "Just realized my underwater basketweaving skills could come in handy during a zombie apocalypse. 😂🧡 #SurvivalTips #UnderwaterBasketweaving" | ||||
| 6. "I'm not lazy, I'm just conserving energy for my next underwater basketweaving session. 😴💤 #LazyDay #UnderwaterBasketweaving" | ||||
| 7. "Just found my inner peace while doing underwater basketweaving. It's like meditation, but with knots! 🙏🧵 #Mindfulness #UnderwaterBasketweaving" | ||||
| 8. "Why study for exams when you can do underwater basketweaving and forget all your worries? 😜🧵 #ProcrastinationStation #UnderwaterBasketweaving" | ||||
| 9. "Just had to cut my underwater basketweaving session short due to a sudden urge to breathe. 🤯🌊 #AquaticAdventures #UnderwaterBasketweaving" | ||||
| 10. "I'm not sure what's more impressive: my underwater basketweaving skills or the fact that I didn't drown trying to make this tweet. 😅🧵 #Accomplishment  | ||||
| #UnderwaterBasketweaving" | ||||
| ``` | ||||
| @@ -1,7 +1,7 @@ | ||||
| # Modelfile for creating a devops engineer assistant | ||||
| # Run `ollama create devops-engineer -f ./Modelfile` and then `ollama run devops-engineer` and enter a topic | ||||
| 
 | ||||
| FROM llama2:13b | ||||
| FROM mistral | ||||
| PARAMETER temperature 1 | ||||
| SYSTEM """ | ||||
| You are a senior devops engineer, acting as an assistant. You offer help with cloud technologies like: Terraform, AWS, kubernetes, python. You answer with code examples when possible | ||||
							
								
								
									
										40
									
								
								examples/modelfile-devopsengineer/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								examples/modelfile-devopsengineer/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| # DevOps Engineer Modelfile | ||||
|  | ||||
| This is a simple modelfile that takes will respond to any questions that a senior DevOps engineer should know how to solve. | ||||
|  | ||||
| ````text | ||||
| ollama create devopsengineer | ||||
|  | ||||
| ollama run devopsengineer | ||||
| >>> how can i proxy all connections from inside the office through a single server that caches large files. Provide a config file for nginx to achieve this? | ||||
|  | ||||
| Here is an example configuration file for Nginx that can be used to proxy all connections from inside the office network to a centralized cache server: | ||||
| ```perl | ||||
| http { | ||||
|     upstream cache_server { | ||||
|         server <cache_server_ip>; | ||||
|         server <cache_server_ip>; | ||||
|         server <cache_server_ip>; | ||||
|     } | ||||
|      | ||||
|     server { | ||||
|         listen 80; | ||||
|         server_name localhost; | ||||
|          | ||||
|         location / { | ||||
|             proxy_pass http://cache_server; | ||||
|             proxy_set_header Host $host; | ||||
|             proxy_set_header X-Real-IP $remote_addr; | ||||
|             proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| ``` | ||||
| In this configuration, we define an `upstream` block that lists the IP addresses of the cache servers. We then define a server block that listens on port 80 and routes all  | ||||
| traffic to the `/` location to the cache servers using the `proxy_pass` directive. The `proxy_set_header` directives are used to preserve the source IP address of the client | ||||
| request when forwarding it to the cache server. | ||||
|  | ||||
| To use this configuration, you would need to replace the placeholder `<cache_server_ip>` with the actual IP addresses of your cache servers. You would also need to make sure | ||||
| that the cache servers are configured to accept incoming connections from the Nginx server and handle requests for files. | ||||
|  | ||||
| ```` | ||||
| Before Width: | Height: | Size: 446 KiB After Width: | Height: | Size: 446 KiB | 
							
								
								
									
										11
									
								
								examples/modelfile-midjourney/Modelfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								examples/modelfile-midjourney/Modelfile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| # Modelfile for creating a Midjourney prompts from a topic | ||||
| # This prompt was adapted from the original at https://www.greataiprompts.com/guide/midjourney/best-chatgpt-prompt-for-midjourney/ | ||||
| # Run `ollama create mj -f ./Modelfile` and then `ollama run mj` and enter a topic | ||||
|  | ||||
| FROM zephyr | ||||
| PARAMETER temperature 0.8 | ||||
| PARAMETER top_k 500 | ||||
| PARAMETER top_p 0.9 | ||||
| SYSTEM """ | ||||
| Embrace your role as a creative illustrator. Based on a concept provided, you must produce a single paragraph with a multifaceted description of an image, ensuring significant details of the concept and more is represented in your instructions. You do not need to write complete sentences but rather short concepts with the following information: the level of detail that should be represented, an artistic style and maybe a specific name of a painter or illustrator, the ideal color pallete, lighting, mood, perspective, the setting, time of day, weather, the season, the time period, location, materials, the textures, patterns, lines, brushstrokes, techniques, the medium, the genre, the rendering style. Don't include everything and keep the description length under 250 words.  | ||||
| """ | ||||
							
								
								
									
										11
									
								
								examples/modelfile-midjourney/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								examples/modelfile-midjourney/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| # Midjourney Prompt Generator Modelfile | ||||
|  | ||||
| This simple modelfile will help create a prompt to feed to Midjourney. | ||||
|  | ||||
| ```text | ||||
| ollama create midjourney | ||||
|  | ||||
| ollama run midjourney | ||||
| >>> a sports car in the mountains.  | ||||
| A sleek, high-performance automobile cuts through a serpentine mountain landscape. The concept is a classic illustration of speed and power, depicted in the style of pop art by Andy Warhol. The color palette is dominated by bold, primary hues of red, blue, and yellow, with striking accent colors of white, black, and metallic shades. The lighting is bright and focused, casting sharp shadows on the rugged terrain. A sense of excitement and anticipation permeates throughout the scene, as the car navigates a treacherous course through the winding road. The perspective is low, allowing for a full view of the vehicle's sleek lines and intricate details. The setting takes place in the afternoon during a sunny day in autumn, as evidenced by the vibrant foliage on the mountainside. The time period is modern, with nods to classic car design. The materials are primarily digital, allowing for smooth curves and sharp contrasts. The textures are sleek and polished, with meticulously detailed lines and brushstrokes that accentuate the car's aerodynamic design. The patterns consist of geometric shapes and bold stripes, adding to the car's dynamic appeal. The genre is modern realism, with a focus on precision and detail. The rendering style is highly technical, capturing the nuances and subtleties of the vehicle and its surroundings in breathtaking detail. | ||||
| ``` | ||||
							
								
								
									
										20
									
								
								examples/modelfile-recipemaker/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								examples/modelfile-recipemaker/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | ||||
| # Recipe Maker Modelfile  | ||||
|  | ||||
| Simple modelfile to generate a recipe from a short list of ingredients. | ||||
|  | ||||
| ``` | ||||
| ollama create recipemaker | ||||
|  | ||||
| ollama run recipemaker | ||||
| >>> chilli pepper, white chocolate, kale | ||||
|  Ingredients: | ||||
| - 1 small chili pepper | ||||
| - 4 squares of white chocolate | ||||
| - handful of kale leaves | ||||
|  | ||||
| Instructions: | ||||
| 1. In a blender or food processor, puree the chilies and white chocolate until smooth. | ||||
| 2. Add the chopped kale leaves to the blender and pulse until well combined. | ||||
| 3. Serve immediately as a dip for crackers or use it as an ingredient in your favorite recipe. The mixture of spicy chili pepper with sweet white chocolate and nutritious  | ||||
| kale will make your taste buds dance with delight! | ||||
| ``` | ||||
| @@ -1,4 +1,4 @@ | ||||
| FROM llama2 | ||||
| FROM mistral | ||||
| SYSTEM """ | ||||
| You are an experienced Devops engineer focused on docker. When given specifications for a particular need or application you know the best way to host that within a docker container. For instance if someone tells you they want an nginx server to host files located at /web you will answer as follows | ||||
| 
 | ||||
							
								
								
									
										2
									
								
								examples/typescript-mentors/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								examples/typescript-mentors/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | ||||
| node_modules | ||||
| package-lock.json | ||||
							
								
								
									
										21
									
								
								examples/typescript-mentors/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								examples/typescript-mentors/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,21 @@ | ||||
| # Ask the Mentors | ||||
|  | ||||
| This example demonstrates how one would create a set of 'mentors' you can have a conversation with. The mentors are generated using the `character-generator.ts` file. This will use **Stable Beluga 70b** to create a bio and list of verbal ticks and common phrases used by each person. Then `mentors.ts` will take a question, and choose three of the 'mentors' and start a conversation with them. Occasionally, they will talk to each other, and other times they will just deliver a set of monologues. It's fun to see what they do and say. | ||||
|  | ||||
| ## Usage | ||||
|  | ||||
| ```bash | ||||
| ts-node ./character-generator.ts "Lorne Greene" | ||||
| ``` | ||||
|  | ||||
| This will create `lornegreene/Modelfile`. Now you can create a model with this command: | ||||
|  | ||||
| ```bash | ||||
| ollama create lornegreene -f lornegreene/Modelfile | ||||
| ``` | ||||
|  | ||||
| If you want to add your own mentors, you will have to update the code to look at your namespace instead of **mattw**. Also set the list of mentors to include yours. | ||||
|  | ||||
| ```bash | ||||
| ts-node ./mentors.ts "What is a Jackalope?" | ||||
| ``` | ||||
							
								
								
									
										26
									
								
								examples/typescript-mentors/character-generator.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								examples/typescript-mentors/character-generator.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,26 @@ | ||||
| import { Ollama } from 'ollama-node' | ||||
| import fs from 'fs'; | ||||
| import path from 'path'; | ||||
|  | ||||
| async function characterGenerator() { | ||||
|   const character = process.argv[2]; | ||||
|   console.log(`You are creating a character for ${character}.`); | ||||
|   const foldername = character.replace(/\s/g, '').toLowerCase(); | ||||
|   const directory = path.join(__dirname, foldername); | ||||
|   if (!fs.existsSync(directory)) { | ||||
|     fs.mkdirSync(directory, { recursive: true }); | ||||
|   } | ||||
|  | ||||
|   const ollama = new Ollama(); | ||||
|   ollama.setModel("stablebeluga2:70b-q4_K_M"); | ||||
|   const bio = await ollama.generate(`create a bio of ${character} in a single long paragraph. Instead of saying '${character} is...' or '${character} was...' use language like 'You are...' or 'You were...'. Then create a paragraph describing the speaking mannerisms and style of ${character}. Don't include anything about how ${character} looked or what they sounded like, just focus on the words they said. Instead of saying '${character} would say...' use language like 'You should say...'. If you use quotes, always use single quotes instead of double quotes. If there are any specific words or phrases you used a lot, show how you used them. `); | ||||
|  | ||||
|   const thecontents = `FROM llama2\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`; | ||||
|  | ||||
|   fs.writeFile(path.join(directory, 'Modelfile'), thecontents, (err: any) => { | ||||
|     if (err) throw err; | ||||
|     console.log('The file has been saved!'); | ||||
|   }); | ||||
| } | ||||
|  | ||||
| characterGenerator(); | ||||
							
								
								
									
										59
									
								
								examples/typescript-mentors/mentors.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								examples/typescript-mentors/mentors.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | ||||
| import { Ollama } from 'ollama-node'; | ||||
|  | ||||
| const mentorCount = 3; | ||||
| const ollama = new Ollama(); | ||||
|  | ||||
| function getMentors(): string[] { | ||||
|   const mentors = ['Gary Vaynerchuk', 'Kanye West', 'Martha Stewart', 'Neil deGrasse Tyson', 'Owen Wilson', 'Ronald Reagan', 'Donald Trump', 'Barack Obama', 'Jeff Bezos']; | ||||
|   const chosenMentors: string[] = []; | ||||
|   for (let i = 0; i < mentorCount; i++) { | ||||
|     const mentor = mentors[Math.floor(Math.random() * mentors.length)]; | ||||
|     chosenMentors.push(mentor); | ||||
|     mentors.splice(mentors.indexOf(mentor), 1); | ||||
|   } | ||||
|   return chosenMentors; | ||||
| } | ||||
|  | ||||
| function getMentorFileName(mentor: string): string { | ||||
|   const model = mentor.toLowerCase().replace(/\s/g, ''); | ||||
|   return `mattw/${model}`; | ||||
| } | ||||
|  | ||||
| async function getSystemPrompt(mentor: string, isLast: boolean, question: string): Promise<string> { | ||||
|   ollama.setModel(getMentorFileName(mentor)); | ||||
|   const info = await ollama.showModelInfo() | ||||
|   let SystemPrompt = info.system || ''; | ||||
|   SystemPrompt += ` You should continue the conversation as if you were ${mentor} and acknowledge the people before you in the conversation. You should adopt their mannerisms and tone, but also not use language they wouldn't use. If they are not known to know about the concept in the question, don't offer an answer. Your answer should be no longer than 1 paragraph. And definitely try not to sound like anyone else. Don't repeat any slang or phrases already used. And if it is a question the original ${mentor} wouldn't have know the answer to, just say that you don't know, in the style of ${mentor}. And think about the time the person lived. Don't use terminology that they wouldn't have used.` | ||||
|  | ||||
|   if (isLast) { | ||||
|     SystemPrompt += ` End your answer with something like I hope our answers help you out`; | ||||
|   } else { | ||||
|     SystemPrompt += ` Remember, this is a conversation, so you don't need a conclusion, but end your answer with a question related to the first question: "${question}".`; | ||||
|   } | ||||
|   return SystemPrompt; | ||||
| } | ||||
|  | ||||
| async function main() { | ||||
|   const mentors = getMentors(); | ||||
|   const question = process.argv[2]; | ||||
|   let theConversation = `Here is the conversation so far.\nYou: ${question}\n` | ||||
|  | ||||
|   for await (const mentor of mentors) { | ||||
|     const SystemPrompt = await getSystemPrompt(mentor, mentor === mentors[mentorCount - 1], question); | ||||
|     ollama.setModel(getMentorFileName(mentor)); | ||||
|     ollama.setSystemPrompt(SystemPrompt); | ||||
|     let output = ''; | ||||
|     process.stdout.write(`\n${mentor}: `); | ||||
|     for await (const chunk of ollama.streamingGenerate(theConversation + `Continue the conversation as if you were ${mentor} on the question "${question}".`)) { | ||||
|       if (chunk.response) { | ||||
|         output += chunk.response; | ||||
|         process.stdout.write(chunk.response); | ||||
|       } else { | ||||
|         process.stdout.write('\n'); | ||||
|       } | ||||
|     } | ||||
|     theConversation += `${mentor}: ${output}\n\n` | ||||
|   } | ||||
| } | ||||
|  | ||||
| main(); | ||||
							
								
								
									
										7
									
								
								examples/typescript-mentors/package.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								examples/typescript-mentors/package.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,7 @@ | ||||
| { | ||||
|   "dependencies": { | ||||
|     "fs": "^0.0.1-security", | ||||
|     "ollama-node": "^0.0.3", | ||||
|     "path": "^0.12.7" | ||||
|   } | ||||
| } | ||||
							
								
								
									
										16
									
								
								format/bytes.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								format/bytes.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | ||||
| package format | ||||
|  | ||||
| import "fmt" | ||||
|  | ||||
| func HumanBytes(b int64) string { | ||||
| 	switch { | ||||
| 	case b > 1000*1000*1000: | ||||
| 		return fmt.Sprintf("%d GB", b/1000/1000/1000) | ||||
| 	case b > 1000*1000: | ||||
| 		return fmt.Sprintf("%d MB", b/1000/1000) | ||||
| 	case b > 1000: | ||||
| 		return fmt.Sprintf("%d KB", b/1000) | ||||
| 	default: | ||||
| 		return fmt.Sprintf("%d B", b) | ||||
| 	} | ||||
| } | ||||
| @@ -7,26 +7,14 @@ import ( | ||||
| 	"time" | ||||
| ) | ||||
|  | ||||
| // HumanDuration returns a human-readable approximation of a duration | ||||
| // (eg. "About a minute", "4 hours ago", etc.). | ||||
| // Modified version of github.com/docker/go-units.HumanDuration | ||||
| func HumanDuration(d time.Duration) string { | ||||
| 	return HumanDurationWithCase(d, true) | ||||
| } | ||||
|  | ||||
| // HumanDurationWithCase returns a human-readable approximation of a | ||||
| // duration (eg. "About a minute", "4 hours ago", etc.). but allows | ||||
| // you to specify whether the first word should be capitalized | ||||
| // (eg. "About" vs. "about") | ||||
| func HumanDurationWithCase(d time.Duration, useCaps bool) string { | ||||
| // humanDuration returns a human-readable approximation of a | ||||
| // duration (eg. "About a minute", "4 hours ago", etc.). | ||||
| func humanDuration(d time.Duration) string { | ||||
| 	seconds := int(d.Seconds()) | ||||
|  | ||||
| 	switch { | ||||
| 	case seconds < 1: | ||||
| 		if useCaps { | ||||
| 			return "Less than a second" | ||||
| 		} | ||||
| 		return "less than a second" | ||||
| 		return "Less than a second" | ||||
| 	case seconds == 1: | ||||
| 		return "1 second" | ||||
| 	case seconds < 60: | ||||
| @@ -36,10 +24,7 @@ func HumanDurationWithCase(d time.Duration, useCaps bool) string { | ||||
| 	minutes := int(d.Minutes()) | ||||
| 	switch { | ||||
| 	case minutes == 1: | ||||
| 		if useCaps { | ||||
| 			return "About a minute" | ||||
| 		} | ||||
| 		return "about a minute" | ||||
| 		return "About a minute" | ||||
| 	case minutes < 60: | ||||
| 		return fmt.Sprintf("%d minutes", minutes) | ||||
| 	} | ||||
| @@ -47,10 +32,7 @@ func HumanDurationWithCase(d time.Duration, useCaps bool) string { | ||||
| 	hours := int(math.Round(d.Hours())) | ||||
| 	switch { | ||||
| 	case hours == 1: | ||||
| 		if useCaps { | ||||
| 			return "About an hour" | ||||
| 		} | ||||
| 		return "about an hour" | ||||
| 		return "About an hour" | ||||
| 	case hours < 48: | ||||
| 		return fmt.Sprintf("%d hours", hours) | ||||
| 	case hours < 24*7*2: | ||||
| @@ -65,77 +47,22 @@ func HumanDurationWithCase(d time.Duration, useCaps bool) string { | ||||
| } | ||||
|  | ||||
| func HumanTime(t time.Time, zeroValue string) string { | ||||
| 	return humanTimeWithCase(t, zeroValue, true) | ||||
| 	return humanTime(t, zeroValue) | ||||
| } | ||||
|  | ||||
| func HumanTimeLower(t time.Time, zeroValue string) string { | ||||
| 	return humanTimeWithCase(t, zeroValue, false) | ||||
| 	return strings.ToLower(humanTime(t, zeroValue)) | ||||
| } | ||||
|  | ||||
| func humanTimeWithCase(t time.Time, zeroValue string, useCaps bool) string { | ||||
| func humanTime(t time.Time, zeroValue string) string { | ||||
| 	if t.IsZero() { | ||||
| 		return zeroValue | ||||
| 	} | ||||
|  | ||||
| 	delta := time.Since(t) | ||||
| 	if delta < 0 { | ||||
| 		return HumanDurationWithCase(-delta, useCaps) + " from now" | ||||
| 		return humanDuration(-delta) + " from now" | ||||
| 	} | ||||
| 	return HumanDurationWithCase(delta, useCaps) + " ago" | ||||
| } | ||||
|  | ||||
| // ExcatDuration returns a human readable hours/minutes/seconds or milliseconds format of a duration | ||||
| // the most precise level of duration is milliseconds | ||||
| func ExactDuration(d time.Duration) string { | ||||
| 	if d.Seconds() < 1 { | ||||
| 		if d.Milliseconds() == 1 { | ||||
| 			return fmt.Sprintf("%d millisecond", d.Milliseconds()) | ||||
| 		} | ||||
| 		return fmt.Sprintf("%d milliseconds", d.Milliseconds()) | ||||
| 	} | ||||
|  | ||||
| 	var readableDur strings.Builder | ||||
|  | ||||
| 	dur := d.String() | ||||
|  | ||||
| 	// split the default duration string format of 0h0m0s into something nicer to read | ||||
| 	h := strings.Split(dur, "h") | ||||
| 	if len(h) > 1 { | ||||
| 		hours := h[0] | ||||
| 		if hours == "1" { | ||||
| 			readableDur.WriteString(fmt.Sprintf("%s hour ", hours)) | ||||
| 		} else { | ||||
| 			readableDur.WriteString(fmt.Sprintf("%s hours ", hours)) | ||||
| 		} | ||||
| 		dur = h[1] | ||||
| 	} | ||||
|  | ||||
| 	m := strings.Split(dur, "m") | ||||
| 	if len(m) > 1 { | ||||
| 		mins := m[0] | ||||
| 		switch mins { | ||||
| 		case "0": | ||||
| 			// skip | ||||
| 		case "1": | ||||
| 			readableDur.WriteString(fmt.Sprintf("%s minute ", mins)) | ||||
| 		default: | ||||
| 			readableDur.WriteString(fmt.Sprintf("%s minutes ", mins)) | ||||
| 		} | ||||
| 		dur = m[1] | ||||
| 	} | ||||
|  | ||||
| 	s := strings.Split(dur, "s") | ||||
| 	if len(s) > 0 { | ||||
| 		sec := s[0] | ||||
| 		switch sec { | ||||
| 		case "0": | ||||
| 			// skip | ||||
| 		case "1": | ||||
| 			readableDur.WriteString(fmt.Sprintf("%s second ", sec)) | ||||
| 		default: | ||||
| 			readableDur.WriteString(fmt.Sprintf("%s seconds ", sec)) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return strings.TrimSpace(readableDur.String()) | ||||
|  | ||||
| 	return humanDuration(delta) + " ago" | ||||
| } | ||||
|   | ||||
| @@ -11,92 +11,25 @@ func assertEqual(t *testing.T, a interface{}, b interface{}) { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestHumanDuration(t *testing.T) { | ||||
| 	day := 24 * time.Hour | ||||
| 	week := 7 * day | ||||
| 	month := 30 * day | ||||
| 	year := 365 * day | ||||
|  | ||||
| 	assertEqual(t, "Less than a second", HumanDuration(450*time.Millisecond)) | ||||
| 	assertEqual(t, "Less than a second", HumanDurationWithCase(450*time.Millisecond, true)) | ||||
| 	assertEqual(t, "less than a second", HumanDurationWithCase(450*time.Millisecond, false)) | ||||
| 	assertEqual(t, "1 second", HumanDuration(1*time.Second)) | ||||
| 	assertEqual(t, "45 seconds", HumanDuration(45*time.Second)) | ||||
| 	assertEqual(t, "46 seconds", HumanDuration(46*time.Second)) | ||||
| 	assertEqual(t, "59 seconds", HumanDuration(59*time.Second)) | ||||
| 	assertEqual(t, "About a minute", HumanDuration(60*time.Second)) | ||||
| 	assertEqual(t, "About a minute", HumanDurationWithCase(1*time.Minute, true)) | ||||
| 	assertEqual(t, "about a minute", HumanDurationWithCase(1*time.Minute, false)) | ||||
| 	assertEqual(t, "3 minutes", HumanDuration(3*time.Minute)) | ||||
| 	assertEqual(t, "35 minutes", HumanDuration(35*time.Minute)) | ||||
| 	assertEqual(t, "35 minutes", HumanDuration(35*time.Minute+40*time.Second)) | ||||
| 	assertEqual(t, "45 minutes", HumanDuration(45*time.Minute)) | ||||
| 	assertEqual(t, "45 minutes", HumanDuration(45*time.Minute+40*time.Second)) | ||||
| 	assertEqual(t, "46 minutes", HumanDuration(46*time.Minute)) | ||||
| 	assertEqual(t, "59 minutes", HumanDuration(59*time.Minute)) | ||||
| 	assertEqual(t, "About an hour", HumanDuration(1*time.Hour)) | ||||
| 	assertEqual(t, "About an hour", HumanDurationWithCase(1*time.Hour+29*time.Minute, true)) | ||||
| 	assertEqual(t, "about an hour", HumanDurationWithCase(1*time.Hour+29*time.Minute, false)) | ||||
| 	assertEqual(t, "2 hours", HumanDuration(1*time.Hour+31*time.Minute)) | ||||
| 	assertEqual(t, "2 hours", HumanDuration(1*time.Hour+59*time.Minute)) | ||||
| 	assertEqual(t, "3 hours", HumanDuration(3*time.Hour)) | ||||
| 	assertEqual(t, "3 hours", HumanDuration(3*time.Hour+29*time.Minute)) | ||||
| 	assertEqual(t, "4 hours", HumanDuration(3*time.Hour+31*time.Minute)) | ||||
| 	assertEqual(t, "4 hours", HumanDuration(3*time.Hour+59*time.Minute)) | ||||
| 	assertEqual(t, "4 hours", HumanDuration(3*time.Hour+60*time.Minute)) | ||||
| 	assertEqual(t, "24 hours", HumanDuration(24*time.Hour)) | ||||
| 	assertEqual(t, "36 hours", HumanDuration(1*day+12*time.Hour)) | ||||
| 	assertEqual(t, "2 days", HumanDuration(2*day)) | ||||
| 	assertEqual(t, "7 days", HumanDuration(7*day)) | ||||
| 	assertEqual(t, "13 days", HumanDuration(13*day+5*time.Hour)) | ||||
| 	assertEqual(t, "2 weeks", HumanDuration(2*week)) | ||||
| 	assertEqual(t, "2 weeks", HumanDuration(2*week+4*day)) | ||||
| 	assertEqual(t, "3 weeks", HumanDuration(3*week)) | ||||
| 	assertEqual(t, "4 weeks", HumanDuration(4*week)) | ||||
| 	assertEqual(t, "4 weeks", HumanDuration(4*week+3*day)) | ||||
| 	assertEqual(t, "4 weeks", HumanDuration(1*month)) | ||||
| 	assertEqual(t, "6 weeks", HumanDuration(1*month+2*week)) | ||||
| 	assertEqual(t, "2 months", HumanDuration(2*month)) | ||||
| 	assertEqual(t, "2 months", HumanDuration(2*month+2*week)) | ||||
| 	assertEqual(t, "3 months", HumanDuration(3*month)) | ||||
| 	assertEqual(t, "3 months", HumanDuration(3*month+1*week)) | ||||
| 	assertEqual(t, "5 months", HumanDuration(5*month+2*week)) | ||||
| 	assertEqual(t, "13 months", HumanDuration(13*month)) | ||||
| 	assertEqual(t, "23 months", HumanDuration(23*month)) | ||||
| 	assertEqual(t, "24 months", HumanDuration(24*month)) | ||||
| 	assertEqual(t, "2 years", HumanDuration(24*month+2*week)) | ||||
| 	assertEqual(t, "3 years", HumanDuration(3*year+2*month)) | ||||
| } | ||||
|  | ||||
| func TestHumanTime(t *testing.T) { | ||||
| 	now := time.Now() | ||||
|  | ||||
| 	t.Run("zero value", func(t *testing.T) { | ||||
| 		assertEqual(t, HumanTime(time.Time{}, "never"), "never") | ||||
| 	}) | ||||
|  | ||||
| 	t.Run("time in the future", func(t *testing.T) { | ||||
| 		v := now.Add(48 * time.Hour) | ||||
| 		assertEqual(t, HumanTime(v, ""), "2 days from now") | ||||
| 	}) | ||||
|  | ||||
| 	t.Run("time in the past", func(t *testing.T) { | ||||
| 		v := now.Add(-48 * time.Hour) | ||||
| 		assertEqual(t, HumanTime(v, ""), "2 days ago") | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| func TestExactDuration(t *testing.T) { | ||||
| 	assertEqual(t, "1 millisecond", ExactDuration(1*time.Millisecond)) | ||||
| 	assertEqual(t, "10 milliseconds", ExactDuration(10*time.Millisecond)) | ||||
| 	assertEqual(t, "1 second", ExactDuration(1*time.Second)) | ||||
| 	assertEqual(t, "10 seconds", ExactDuration(10*time.Second)) | ||||
| 	assertEqual(t, "1 minute", ExactDuration(1*time.Minute)) | ||||
| 	assertEqual(t, "10 minutes", ExactDuration(10*time.Minute)) | ||||
| 	assertEqual(t, "1 hour", ExactDuration(1*time.Hour)) | ||||
| 	assertEqual(t, "10 hours", ExactDuration(10*time.Hour)) | ||||
| 	assertEqual(t, "1 hour 1 second", ExactDuration(1*time.Hour+1*time.Second)) | ||||
| 	assertEqual(t, "1 hour 10 seconds", ExactDuration(1*time.Hour+10*time.Second)) | ||||
| 	assertEqual(t, "1 hour 1 minute", ExactDuration(1*time.Hour+1*time.Minute)) | ||||
| 	assertEqual(t, "1 hour 10 minutes", ExactDuration(1*time.Hour+10*time.Minute)) | ||||
| 	assertEqual(t, "1 hour 1 minute 1 second", ExactDuration(1*time.Hour+1*time.Minute+1*time.Second)) | ||||
| 	assertEqual(t, "10 hours 10 minutes 10 seconds", ExactDuration(10*time.Hour+10*time.Minute+10*time.Second)) | ||||
| 	t.Run("soon", func(t *testing.T) { | ||||
| 		v := now.Add(800*time.Millisecond) | ||||
| 		assertEqual(t, HumanTime(v, ""), "Less than a second from now") | ||||
| 	}) | ||||
| } | ||||
|   | ||||
							
								
								
									
										3
									
								
								go.mod
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								go.mod
									
									
									
									
									
								
							| @@ -8,7 +8,9 @@ require ( | ||||
| 	github.com/mattn/go-runewidth v0.0.14 | ||||
| 	github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db | ||||
| 	github.com/olekukonko/tablewriter v0.0.5 | ||||
| 	github.com/pdevine/readline v1.5.2 | ||||
| 	github.com/spf13/cobra v1.7.0 | ||||
| 	golang.org/x/sync v0.3.0 | ||||
| ) | ||||
|  | ||||
| require github.com/rivo/uniseg v0.2.0 // indirect | ||||
| @@ -16,7 +18,6 @@ require github.com/rivo/uniseg v0.2.0 // indirect | ||||
| require ( | ||||
| 	github.com/bytedance/sonic v1.9.1 // indirect | ||||
| 	github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect | ||||
| 	github.com/chzyer/readline v1.5.1 | ||||
| 	github.com/gabriel-vasile/mimetype v1.4.2 // indirect | ||||
| 	github.com/gin-contrib/cors v1.4.0 | ||||
| 	github.com/gin-contrib/sse v0.1.0 // indirect | ||||
|   | ||||
							
								
								
									
										7
									
								
								go.sum
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								go.sum
									
									
									
									
									
								
							| @@ -6,8 +6,6 @@ github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhD | ||||
| github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= | ||||
| github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM= | ||||
| github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ= | ||||
| github.com/chzyer/readline v1.5.1 h1:upd/6fQk4src78LMRzh5vItIt361/o4uq553V8B5sGI= | ||||
| github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObkaSkeBlk= | ||||
| github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04= | ||||
| github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8= | ||||
| github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= | ||||
| @@ -80,6 +78,8 @@ github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N | ||||
| github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= | ||||
| github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= | ||||
| github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= | ||||
| github.com/pdevine/readline v1.5.2 h1:oz6Y5GdTmhPG+08hhxcAvtHitSANWuA2100Sppb38xI= | ||||
| github.com/pdevine/readline v1.5.2/go.mod h1:na/LbuE5PYwxI7GyopWdIs3U8HVe89lYlNTFTXH3wOw= | ||||
| github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= | ||||
| github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= | ||||
| github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= | ||||
| @@ -120,12 +120,13 @@ golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= | ||||
| golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= | ||||
| golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM= | ||||
| golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I= | ||||
| golang.org/x/exp v0.0.0-20230321023759-10a507213a29 h1:ooxPy7fPvB4kwsA2h+iBNHkAbp/4JxTSwCmvdjEYmug= | ||||
| golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ= | ||||
| golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8= | ||||
| golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= | ||||
| golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= | ||||
| golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= | ||||
| golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= | ||||
| golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= | ||||
| golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | ||||
| golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||||
| golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| package llm | ||||
|  | ||||
| const ModelFamilyFalcon = "falcon" | ||||
|  | ||||
| const ( | ||||
| 	falconModelType7B   = 32 | ||||
| 	falconModelType40B  = 60 | ||||
| @@ -17,6 +15,6 @@ func falconModelType(numLayer uint32) string { | ||||
| 	case 80: | ||||
| 		return "180B" | ||||
| 	default: | ||||
| 		return "Unknown" | ||||
| 		return "unknown" | ||||
| 	} | ||||
| } | ||||
|   | ||||
							
								
								
									
										22
									
								
								llm/ggml.go
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								llm/ggml.go
									
									
									
									
									
								
							| @@ -4,8 +4,6 @@ import ( | ||||
| 	"encoding/binary" | ||||
| 	"errors" | ||||
| 	"io" | ||||
| 	"path" | ||||
| 	"sync" | ||||
| ) | ||||
|  | ||||
| type GGML struct { | ||||
| @@ -71,7 +69,7 @@ func fileType(fileType uint32) string { | ||||
| 	case fileTypeQ6_K: | ||||
| 		return "Q6_K" | ||||
| 	default: | ||||
| 		return "Unknown" | ||||
| 		return "unknown" | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @@ -79,6 +77,7 @@ type model interface { | ||||
| 	ModelFamily() string | ||||
| 	ModelType() string | ||||
| 	FileType() string | ||||
| 	NumLayers() int64 | ||||
| } | ||||
|  | ||||
| type container interface { | ||||
| @@ -166,23 +165,6 @@ func (c *containerLORA) Decode(r io.Reader) (model, error) { | ||||
| 	return nil, nil | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	ggmlGPU = path.Join("llama.cpp", "ggml", "build", "gpu", "bin") | ||||
| 	ggmlCPU = path.Join("llama.cpp", "ggml", "build", "cpu", "bin") | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	ggmlInit       sync.Once | ||||
| 	ggmlRunnerPath string | ||||
| ) | ||||
|  | ||||
| func ggmlRunner() ModelRunner { | ||||
| 	ggmlInit.Do(func() { | ||||
| 		ggmlRunnerPath = chooseRunner(ggmlGPU, ggmlCPU) | ||||
| 	}) | ||||
| 	return ModelRunner{Path: ggmlRunnerPath} | ||||
| } | ||||
|  | ||||
| const ( | ||||
| 	// Magic constant for `ggml` files (unversioned). | ||||
| 	FILE_MAGIC_GGML = 0x67676d6c | ||||
|   | ||||
							
								
								
									
										38
									
								
								llm/gguf.go
									
									
									
									
									
								
							
							
						
						
									
										38
									
								
								llm/gguf.go
									
									
									
									
									
								
							| @@ -6,8 +6,6 @@ import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"path" | ||||
| 	"sync" | ||||
| ) | ||||
|  | ||||
| type containerGGUF struct { | ||||
| @@ -111,9 +109,13 @@ func (llm *ggufModel) ModelType() string { | ||||
| 		if blocks, ok := llm.kv["falcon.block_count"].(uint32); ok { | ||||
| 			return falconModelType(blocks) | ||||
| 		} | ||||
| 	case "starcoder": | ||||
| 		if blocks, ok := llm.kv["starcoder.block_count"].(uint32); ok { | ||||
| 			return starCoderModelType(blocks) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return "Unknown" | ||||
| 	return "unknown" | ||||
| } | ||||
|  | ||||
| func (llm *ggufModel) FileType() string { | ||||
| @@ -122,7 +124,7 @@ func (llm *ggufModel) FileType() string { | ||||
| 		return fileType(t) | ||||
| 	} | ||||
|  | ||||
| 	return "Unknown" | ||||
| 	return "unknown" | ||||
| } | ||||
|  | ||||
| func (llm *ggufModel) Decode(r io.Reader) error { | ||||
| @@ -197,6 +199,16 @@ func (llm *ggufModel) Decode(r io.Reader) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (llm *ggufModel) NumLayers() int64 { | ||||
| 	value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())] | ||||
| 	if !exists { | ||||
| 		return 0 | ||||
| 	} | ||||
|  | ||||
| 	v := value.(uint32) | ||||
| 	return int64(v) | ||||
| } | ||||
|  | ||||
| func (ggufModel) readU8(r io.Reader) uint8 { | ||||
| 	var u8 uint8 | ||||
| 	binary.Read(r, binary.LittleEndian, &u8) | ||||
| @@ -369,21 +381,3 @@ func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) { | ||||
|  | ||||
| 	return | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	ggufGPU = path.Join("llama.cpp", "gguf", "build", "gpu", "bin") | ||||
| 	ggufCPU = path.Join("llama.cpp", "gguf", "build", "cpu", "bin") | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	ggufInit       sync.Once | ||||
| 	ggufRunnerPath string | ||||
| ) | ||||
|  | ||||
| func ggufRunner() ModelRunner { | ||||
| 	ggufInit.Do(func() { | ||||
| 		ggufRunnerPath = chooseRunner(ggufGPU, ggufCPU) | ||||
| 	}) | ||||
|  | ||||
| 	return ModelRunner{Path: ggufRunnerPath} | ||||
| } | ||||
|   | ||||
| @@ -1,17 +0,0 @@ | ||||
| //go:build !darwin | ||||
| // +build !darwin | ||||
|  | ||||
| package llm | ||||
|  | ||||
| //go:generate git submodule init | ||||
|  | ||||
| //go:generate git submodule update --force ggml | ||||
| //go:generate -command git-apply git -C ggml apply | ||||
| //go:generate git-apply ../ggml_patch/0001-add-detokenize-endpoint.patch | ||||
| //go:generate git-apply ../ggml_patch/0002-34B-model-support.patch | ||||
| //go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on | ||||
| //go:generate cmake --build ggml/build/cpu --target server --config Release | ||||
|  | ||||
| //go:generate git submodule update --force gguf | ||||
| //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on | ||||
| //go:generate cmake --build gguf/build/cpu --target server --config Release | ||||
| @@ -3,14 +3,16 @@ package llm | ||||
| //go:generate git submodule init | ||||
|  | ||||
| //go:generate git submodule update --force ggml | ||||
| //go:generate -command git-apply git -C ggml apply | ||||
| //go:generate git-apply ../ggml_patch/0001-add-detokenize-endpoint.patch | ||||
| //go:generate git-apply ../ggml_patch/0002-34B-model-support.patch | ||||
| //go:generate git-apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch | ||||
| //go:generate git-apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch | ||||
| //go:generate git -C ggml apply ../patches/0001-add-detokenize-endpoint.patch | ||||
| //go:generate git -C ggml apply ../patches/0002-34B-model-support.patch | ||||
| //go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch | ||||
| //go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch | ||||
| //go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 | ||||
| //go:generate cmake --build ggml/build/cpu --target server --config Release | ||||
| //go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner | ||||
|  | ||||
| //go:generate git submodule update --force gguf | ||||
| //go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch | ||||
| //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 | ||||
| //go:generate cmake --build gguf/build/cpu --target server --config Release | ||||
| //go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner | ||||
|   | ||||
| @@ -3,14 +3,16 @@ package llm | ||||
| //go:generate git submodule init | ||||
|  | ||||
| //go:generate git submodule update --force ggml | ||||
| //go:generate -command git-apply git -C ggml apply | ||||
| //go:generate git-apply ../ggml_patch/0001-add-detokenize-endpoint.patch | ||||
| //go:generate git-apply ../ggml_patch/0002-34B-model-support.patch | ||||
| //go:generate git-apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch | ||||
| //go:generate git-apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch | ||||
| //go:generate cmake -S ggml -B ggml/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 | ||||
| //go:generate cmake --build ggml/build/gpu --target server --config Release | ||||
| //go:generate git -C ggml apply ../patches/0001-add-detokenize-endpoint.patch | ||||
| //go:generate git -C ggml apply ../patches/0002-34B-model-support.patch | ||||
| //go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch | ||||
| //go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch | ||||
| //go:generate cmake -S ggml -B ggml/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 | ||||
| //go:generate cmake --build ggml/build/metal --target server --config Release | ||||
| //go:generate mv ggml/build/metal/bin/server ggml/build/metal/bin/ollama-runner | ||||
|  | ||||
| //go:generate git submodule update --force gguf | ||||
| //go:generate cmake -S gguf -B gguf/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 | ||||
| //go:generate cmake --build gguf/build/gpu --target server --config Release | ||||
| //go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch | ||||
| //go:generate cmake -S gguf -B gguf/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 | ||||
| //go:generate cmake --build gguf/build/metal --target server --config Release | ||||
| //go:generate mv gguf/build/metal/bin/server gguf/build/metal/bin/ollama-runner | ||||
|   | ||||
| @@ -3,13 +3,24 @@ package llm | ||||
| //go:generate git submodule init | ||||
|  | ||||
| //go:generate git submodule update --force ggml | ||||
| //go:generate -command git-apply git -C ggml apply | ||||
| //go:generate git-apply ../ggml_patch/0001-add-detokenize-endpoint.patch | ||||
| //go:generate git-apply ../ggml_patch/0002-34B-model-support.patch | ||||
| //go:generate git-apply ../ggml_patch/0005-ggml-support-CUDA-s-half-type-for-aarch64-1455-2670.patch | ||||
| //go:generate cmake -S ggml -B ggml/build/gpu -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on | ||||
| //go:generate cmake --build ggml/build/gpu --target server --config Release | ||||
| //go:generate git -C ggml apply ../patches/0001-add-detokenize-endpoint.patch | ||||
| //go:generate git -C ggml apply ../patches/0002-34B-model-support.patch | ||||
| //go:generate git -C ggml apply ../patches/0005-ggml-support-CUDA-s-half-type-for-aarch64-1455-2670.patch | ||||
| //go:generate git -C ggml apply ../patches/0001-copy-cuda-runtime-libraries.patch | ||||
| //go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on | ||||
| //go:generate cmake --build ggml/build/cpu --target server --config Release | ||||
| //go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner | ||||
|  | ||||
| //go:generate git submodule update --force gguf | ||||
| //go:generate cmake -S gguf -B gguf/build/gpu -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on | ||||
| //go:generate cmake --build gguf/build/gpu --target server --config Release | ||||
| //go:generate git -C gguf apply ../patches/0001-copy-cuda-runtime-libraries.patch | ||||
| //go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch | ||||
| //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on | ||||
| //go:generate cmake --build gguf/build/cpu --target server --config Release | ||||
| //go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner | ||||
|  | ||||
| //go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on | ||||
| //go:generate cmake --build ggml/build/cuda --target server --config Release | ||||
| //go:generate mv ggml/build/cuda/bin/server ggml/build/cuda/bin/ollama-runner | ||||
| //go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on | ||||
| //go:generate cmake --build gguf/build/cuda --target server --config Release | ||||
| //go:generate mv gguf/build/cuda/bin/server gguf/build/cuda/bin/ollama-runner | ||||
|   | ||||
							
								
								
									
										16
									
								
								llm/llama.cpp/generate_windows.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								llm/llama.cpp/generate_windows.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | ||||
| package llm | ||||
|  | ||||
| //go:generate git submodule init | ||||
|  | ||||
| //go:generate git submodule update --force ggml | ||||
| //go:generate git -C ggml apply ../patches/0001-add-detokenize-endpoint.patch | ||||
| //go:generate git -C ggml apply ../patches/0002-34B-model-support.patch | ||||
| //go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on | ||||
| //go:generate cmake --build ggml/build/cpu --target server --config Release | ||||
| //go:generate cmd /c move ggml\build\cpu\bin\Release\server.exe ggml\build\cpu\bin\Release\ollama-runner.exe | ||||
|  | ||||
| //go:generate git submodule update --force gguf | ||||
| //go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch | ||||
| //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on | ||||
| //go:generate cmake --build gguf/build/cpu --target server --config Release | ||||
| //go:generate cmd /c move gguf\build\cpu\bin\Release\server.exe gguf\build\cpu\bin\Release\ollama-runner.exe | ||||
| @@ -1,32 +0,0 @@ | ||||
| From 8c0ea847ac1460bca534d92266e3471cb31471be Mon Sep 17 00:00:00 2001 | ||||
| From: Bruce MacDonald <brucewmacdonald@gmail.com> | ||||
| Date: Tue, 5 Sep 2023 16:05:08 -0400 | ||||
| Subject: [PATCH] metal: add missing barriers for mul-mat #2699 | ||||
|  | ||||
| --- | ||||
|  ggml-metal.metal | 2 ++ | ||||
|  1 file changed, 2 insertions(+) | ||||
|  | ||||
| diff --git a/ggml-metal.metal b/ggml-metal.metal | ||||
| index 3f31252..ce3541f 100644 | ||||
| --- a/ggml-metal.metal | ||||
| +++ b/ggml-metal.metal | ||||
| @@ -1850,6 +1850,7 @@ kernel void kernel_mul_mm(device const  uchar * src0, | ||||
|          //load data and store to threadgroup memory | ||||
|          half4x4 temp_a; | ||||
|          dequantize_func(x, il, temp_a); | ||||
| +        threadgroup_barrier(mem_flags::mem_threadgroup); | ||||
|          #pragma unroll(16) | ||||
|          for (int i = 0; i < 16; i++) { | ||||
|              *(sa + SG_MAT_SIZE * ((tiitg / THREAD_PER_ROW / 8) \ | ||||
| @@ -1895,6 +1896,7 @@ kernel void kernel_mul_mm(device const  uchar * src0, | ||||
|          } | ||||
|      } else { | ||||
|          // block is smaller than 64x32, we should avoid writing data outside of the matrix | ||||
| +        threadgroup_barrier(mem_flags::mem_threadgroup); | ||||
|          threadgroup float *temp_str = ((threadgroup float *)shared_memory) \ | ||||
|                                        + 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M; | ||||
|          for (int i = 0; i < 8; i++) { | ||||
| --  | ||||
| 2.39.2 (Apple Git-143) | ||||
|  | ||||
 Submodule llm/llama.cpp/gguf updated: 53885d7256...bc9d3e3971
									
								
							
							
								
								
									
										27
									
								
								llm/llama.cpp/patches/0001-copy-cuda-runtime-libraries.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								llm/llama.cpp/patches/0001-copy-cuda-runtime-libraries.patch
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| From 5dd02993e8cc2ce309157736b95bb572f274a3fd Mon Sep 17 00:00:00 2001 | ||||
| From: Michael Yang <mxyng@pm.me> | ||||
| Date: Wed, 20 Sep 2023 14:19:52 -0700 | ||||
| Subject: [PATCH] copy cuda runtime libraries | ||||
|  | ||||
| --- | ||||
|  CMakeLists.txt | 4 ++++ | ||||
|  1 file changed, 4 insertions(+) | ||||
|  | ||||
| diff --git a/CMakeLists.txt b/CMakeLists.txt | ||||
| index 824d9f2..dd24137 100644 | ||||
| --- a/CMakeLists.txt | ||||
| +++ b/CMakeLists.txt | ||||
| @@ -274,6 +274,10 @@ if (LLAMA_CUBLAS) | ||||
|              set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) | ||||
|          endif() | ||||
|   | ||||
| +        configure_file(${CUDAToolkit_LIBRARY_DIR}/libcudart.so ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/libcudart.so.${CUDAToolkit_VERSION_MAJOR}.0 COPYONLY) | ||||
| +        configure_file(${CUDAToolkit_LIBRARY_DIR}/libcublas.so ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/libcublas.so.${CUDAToolkit_VERSION_MAJOR} COPYONLY) | ||||
| +        configure_file(${CUDAToolkit_LIBRARY_DIR}/libcublasLt.so ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/libcublasLt.so.${CUDAToolkit_VERSION_MAJOR} COPYONLY) | ||||
| + | ||||
|      if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) | ||||
|          # 52 == lowest CUDA 12 standard | ||||
|          # 60 == f16 CUDA intrinsics | ||||
| --  | ||||
| 2.42.0 | ||||
|  | ||||
							
								
								
									
										25
									
								
								llm/llama.cpp/patches/0001-remove-warm-up-logging.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								llm/llama.cpp/patches/0001-remove-warm-up-logging.patch
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,25 @@ | ||||
| From 07993bdc35345b67b27aa649a7c099ad42d80c4c Mon Sep 17 00:00:00 2001 | ||||
| From: Michael Yang <mxyng@pm.me> | ||||
| Date: Thu, 21 Sep 2023 14:43:21 -0700 | ||||
| Subject: [PATCH] remove warm up logging | ||||
|  | ||||
| --- | ||||
|  common/common.cpp | 2 -- | ||||
|  1 file changed, 2 deletions(-) | ||||
|  | ||||
| diff --git a/common/common.cpp b/common/common.cpp | ||||
| index 2597ba0..b56549b 100644 | ||||
| --- a/common/common.cpp | ||||
| +++ b/common/common.cpp | ||||
| @@ -780,8 +780,6 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par | ||||
|      } | ||||
|   | ||||
|      { | ||||
| -        LOG("warming up the model with an empty run\n"); | ||||
| - | ||||
|          const std::vector<llama_token> tmp = { llama_token_bos(lctx), llama_token_eos(lctx), }; | ||||
|          llama_eval(lctx, tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, params.n_threads); | ||||
|          llama_reset_timings(lctx); | ||||
| --  | ||||
| 2.42.0 | ||||
|  | ||||
							
								
								
									
										360
									
								
								llm/llama.go
									
									
									
									
									
								
							
							
						
						
									
										360
									
								
								llm/llama.go
									
									
									
									
									
								
							| @@ -20,6 +20,7 @@ import ( | ||||
| 	"runtime" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"sync" | ||||
| 	"time" | ||||
|  | ||||
| 	"github.com/jmorganca/ollama/api" | ||||
| @@ -28,71 +29,96 @@ import ( | ||||
| //go:embed llama.cpp/*/build/*/bin/* | ||||
| var llamaCppEmbed embed.FS | ||||
|  | ||||
| func osPath(llamaPath string) string { | ||||
| 	if runtime.GOOS == "windows" { | ||||
| 		return path.Join(llamaPath, "Release") | ||||
| 	} | ||||
|  | ||||
| 	return llamaPath | ||||
| type ModelRunner struct { | ||||
| 	Path string // path to the model runner executable | ||||
| } | ||||
|  | ||||
| func chooseRunner(gpuPath, cpuPath string) string { | ||||
| 	tmpDir, err := os.MkdirTemp("", "llama-*") | ||||
| 	if err != nil { | ||||
| 		log.Fatalf("llama.cpp: failed to create temp dir: %v", err) | ||||
| 	} | ||||
| func chooseRunners(workDir, runnerType string) []ModelRunner { | ||||
| 	buildPath := path.Join("llama.cpp", runnerType, "build") | ||||
| 	var runners []string | ||||
|  | ||||
| 	llamaPath := osPath(gpuPath) | ||||
| 	if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil { | ||||
| 		llamaPath = osPath(cpuPath) | ||||
| 		if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil { | ||||
| 			log.Fatalf("llama.cpp executable not found") | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	files := []string{"server"} | ||||
| 	// set the runners based on the OS | ||||
| 	// IMPORTANT: the order of the runners in the array is the priority order | ||||
| 	switch runtime.GOOS { | ||||
| 	case "windows": | ||||
| 		files = []string{"server.exe"} | ||||
| 	case "darwin": | ||||
| 		if llamaPath == osPath(gpuPath) { | ||||
| 			files = append(files, "ggml-metal.metal") | ||||
| 		runners = []string{ | ||||
| 			path.Join(buildPath, "metal", "bin", "ollama-runner"), | ||||
| 			path.Join(buildPath, "cpu", "bin", "ollama-runner"), | ||||
| 		} | ||||
| 	case "linux": | ||||
| 		// check if there is a GPU available | ||||
| 		if _, err := CheckVRAM(); errors.Is(err, errNoGPU) { | ||||
| 			// this error was logged on start-up, so we don't need to log it again | ||||
| 			llamaPath = osPath(cpuPath) | ||||
| 		runners = []string{ | ||||
| 			path.Join(buildPath, "cuda", "bin", "ollama-runner"), | ||||
| 			path.Join(buildPath, "cpu", "bin", "ollama-runner"), | ||||
| 		} | ||||
| 	case "windows": | ||||
| 		// TODO: select windows GPU runner here when available | ||||
| 		runners = []string{ | ||||
| 			path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe"), | ||||
| 		} | ||||
| 	default: | ||||
| 		log.Printf("unknown OS, running on CPU: %s", runtime.GOOS) | ||||
| 		runners = []string{ | ||||
| 			path.Join(buildPath, "cpu", "bin", "ollama-runner"), | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	for _, f := range files { | ||||
| 		srcPath := path.Join(llamaPath, f) | ||||
| 		destPath := filepath.Join(tmpDir, f) | ||||
|  | ||||
| 		srcFile, err := llamaCppEmbed.Open(srcPath) | ||||
| 	runnerAvailable := false // if no runner files are found in the embed, this flag will cause a fast fail | ||||
| 	for _, r := range runners { | ||||
| 		// find all the files in the runner's bin directory | ||||
| 		files, err := fs.Glob(llamaCppEmbed, path.Join(path.Dir(r), "*")) | ||||
| 		if err != nil { | ||||
| 			log.Fatalf("read llama.cpp %s: %v", f, err) | ||||
| 			// this is expected, ollama may be compiled without all runners packed in | ||||
| 			log.Printf("%s runner not found: %v", r, err) | ||||
| 			continue | ||||
| 		} | ||||
| 		defer srcFile.Close() | ||||
|  | ||||
| 		destFile, err := os.OpenFile(destPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) | ||||
| 		if err != nil { | ||||
| 			log.Fatalf("write llama.cpp %s: %v", f, err) | ||||
| 		} | ||||
| 		defer destFile.Close() | ||||
| 		for _, f := range files { | ||||
| 			runnerAvailable = true | ||||
|  | ||||
| 		if _, err := io.Copy(destFile, srcFile); err != nil { | ||||
| 			log.Fatalf("copy llama.cpp %s: %v", f, err) | ||||
| 			srcFile, err := llamaCppEmbed.Open(f) | ||||
| 			if err != nil { | ||||
| 				log.Fatalf("read llama runner %s: %v", f, err) | ||||
| 			} | ||||
| 			defer srcFile.Close() | ||||
|  | ||||
| 			// create the directory in case it does not exist, filepath.Dir() converts the file path to the OS's format | ||||
| 			destPath := filepath.Join(workDir, filepath.Dir(f)) | ||||
| 			if err := os.MkdirAll(destPath, 0o755); err != nil { | ||||
| 				log.Fatalf("create runner temp dir %s: %v", filepath.Dir(f), err) | ||||
| 			} | ||||
|  | ||||
| 			// create the path to the destination file, filepath.Base() converts the file path to the OS's format | ||||
| 			destFile := filepath.Join(destPath, filepath.Base(f)) | ||||
|  | ||||
| 			_, err = os.Stat(destFile) | ||||
| 			switch { | ||||
| 			case errors.Is(err, os.ErrNotExist): | ||||
| 				destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) | ||||
| 				if err != nil { | ||||
| 					log.Fatalf("write llama runner %s: %v", f, err) | ||||
| 				} | ||||
| 				defer destFile.Close() | ||||
|  | ||||
| 				if _, err := io.Copy(destFile, srcFile); err != nil { | ||||
| 					log.Fatalf("copy llama runner %s: %v", f, err) | ||||
| 				} | ||||
| 			case err != nil: | ||||
| 				log.Fatalf("stat llama runner %s: %v", f, err) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	runPath := filepath.Join(tmpDir, "server") | ||||
| 	if runtime.GOOS == "windows" { | ||||
| 		runPath = filepath.Join(tmpDir, "server.exe") | ||||
| 	if !runnerAvailable { | ||||
| 		log.Fatalf("%s runner not found", runnerType) | ||||
| 	} | ||||
|  | ||||
| 	return runPath | ||||
| 	// return the runners to try in priority order | ||||
| 	localRunnersByPriority := []ModelRunner{} | ||||
| 	for _, r := range runners { | ||||
| 		// clean the ModelRunner paths so that they match the OS we are running on | ||||
| 		localRunnersByPriority = append(localRunnersByPriority, ModelRunner{Path: filepath.Clean(path.Join(workDir, r))}) | ||||
| 	} | ||||
|  | ||||
| 	return localRunnersByPriority | ||||
| } | ||||
|  | ||||
| type llamaModel struct { | ||||
| @@ -118,7 +144,7 @@ func llamaModelType(numLayer uint32) string { | ||||
| 	case 80: | ||||
| 		return "65B" | ||||
| 	default: | ||||
| 		return "Unknown" | ||||
| 		return "unknown" | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @@ -130,6 +156,10 @@ func (llm *llamaModel) FileType() string { | ||||
| 	return fileType(llm.hyperparameters.FileType) | ||||
| } | ||||
|  | ||||
| func (llm *llamaModel) NumLayers() int64 { | ||||
| 	return int64(llm.hyperparameters.NumLayer) | ||||
| } | ||||
|  | ||||
| type llamaHyperparameters struct { | ||||
| 	// NumVocab is the size of the model's vocabulary. | ||||
| 	NumVocab uint32 | ||||
| @@ -148,13 +178,12 @@ type llamaHyperparameters struct { | ||||
| } | ||||
|  | ||||
| type Running struct { | ||||
| 	Port   int | ||||
| 	Cmd    *exec.Cmd | ||||
| 	Cancel context.CancelFunc | ||||
| } | ||||
|  | ||||
| type ModelRunner struct { | ||||
| 	Path string // path to the model runner executable | ||||
| 	Port     int | ||||
| 	Cmd      *exec.Cmd | ||||
| 	Cancel   context.CancelFunc | ||||
| 	exitOnce sync.Once | ||||
| 	exitCh   chan error // channel to receive the exit status of the subprocess | ||||
| 	exitErr  error      // error returned by the subprocess | ||||
| } | ||||
|  | ||||
| type llama struct { | ||||
| @@ -165,8 +194,8 @@ type llama struct { | ||||
| var errNoGPU = errors.New("nvidia-smi command failed") | ||||
|  | ||||
| // CheckVRAM returns the available VRAM in MiB on Linux machines with NVIDIA GPUs | ||||
| func CheckVRAM() (int, error) { | ||||
| 	cmd := exec.Command("nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits") | ||||
| func CheckVRAM() (int64, error) { | ||||
| 	cmd := exec.Command("nvidia-smi", "--query-gpu=memory.free", "--format=csv,noheader,nounits") | ||||
| 	var stdout bytes.Buffer | ||||
| 	cmd.Stdout = &stdout | ||||
| 	err := cmd.Run() | ||||
| @@ -174,28 +203,27 @@ func CheckVRAM() (int, error) { | ||||
| 		return 0, errNoGPU | ||||
| 	} | ||||
|  | ||||
| 	var total int | ||||
| 	var free int64 | ||||
| 	scanner := bufio.NewScanner(&stdout) | ||||
| 	for scanner.Scan() { | ||||
| 		line := scanner.Text() | ||||
| 		vram, err := strconv.Atoi(line) | ||||
| 		vram, err := strconv.ParseInt(strings.TrimSpace(line), 10, 64) | ||||
| 		if err != nil { | ||||
| 			return 0, fmt.Errorf("failed to parse available VRAM: %v", err) | ||||
| 		} | ||||
|  | ||||
| 		total += vram | ||||
| 		free += vram | ||||
| 	} | ||||
|  | ||||
| 	return total, nil | ||||
| 	return free, nil | ||||
| } | ||||
|  | ||||
| func NumGPU(opts api.Options) int { | ||||
| func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int { | ||||
| 	if opts.NumGPU != -1 { | ||||
| 		return opts.NumGPU | ||||
| 	} | ||||
| 	n := 1 // default to enable metal on macOS | ||||
| 	if runtime.GOOS == "linux" { | ||||
| 		vram, err := CheckVRAM() | ||||
| 		vramMib, err := CheckVRAM() | ||||
| 		if err != nil { | ||||
| 			if err.Error() != "nvidia-smi command failed" { | ||||
| 				log.Print(err.Error()) | ||||
| @@ -203,37 +231,45 @@ func NumGPU(opts api.Options) int { | ||||
| 			// nvidia driver not installed or no nvidia GPU found | ||||
| 			return 0 | ||||
| 		} | ||||
| 		// TODO: this is a very rough heuristic, better would be to calculate this based on number of layers and context size | ||||
| 		switch { | ||||
| 		case vram < 500: | ||||
| 			log.Printf("WARNING: Low VRAM detected, disabling GPU") | ||||
| 			n = 0 | ||||
| 		case vram < 1000: | ||||
| 			n = 4 | ||||
| 		case vram < 2000: | ||||
| 			n = 8 | ||||
| 		case vram < 4000: | ||||
| 			n = 12 | ||||
| 		case vram < 8000: | ||||
| 			n = 16 | ||||
| 		case vram < 12000: | ||||
| 			n = 24 | ||||
| 		case vram < 16000: | ||||
| 			n = 32 | ||||
| 		default: | ||||
| 			n = 48 | ||||
| 		} | ||||
| 		log.Printf("%d MB VRAM available, loading %d GPU layers", vram, n) | ||||
|  | ||||
| 		freeVramBytes := int64(vramMib) * 1024 * 1024 // 1 MiB = 1024^2 bytes | ||||
|  | ||||
| 		// Calculate bytes per layer | ||||
| 		// TODO: this is a rough heuristic, better would be to calculate this based on number of layers and context size | ||||
| 		bytesPerLayer := fileSizeBytes / numLayer | ||||
|  | ||||
| 		// max number of layers we can fit in VRAM, subtract 5% to prevent consuming all available VRAM and running out of memory | ||||
| 		layers := int(freeVramBytes/bytesPerLayer) * 95 / 100 | ||||
| 		log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, layers) | ||||
|  | ||||
| 		return layers | ||||
| 	} | ||||
| 	return n | ||||
| 	// default to enable metal on macOS | ||||
| 	return 1 | ||||
| } | ||||
|  | ||||
| func newLlama(model string, adapters []string, runner ModelRunner, opts api.Options) (*llama, error) { | ||||
| 	if _, err := os.Stat(model); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| // StatusWriter is a writer that captures error messages from the llama runner process | ||||
| type StatusWriter struct { | ||||
| 	ErrCh chan error | ||||
| } | ||||
|  | ||||
| 	if _, err := os.Stat(runner.Path); err != nil { | ||||
| func NewStatusWriter() *StatusWriter { | ||||
| 	return &StatusWriter{ | ||||
| 		ErrCh: make(chan error, 1), | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (w *StatusWriter) Write(b []byte) (int, error) { | ||||
| 	if _, after, ok := bytes.Cut(b, []byte("error:")); ok { | ||||
| 		err := fmt.Errorf("llama runner: %s", after) | ||||
| 		w.ErrCh <- err | ||||
| 	} | ||||
| 	return os.Stderr.Write(b) | ||||
| } | ||||
|  | ||||
| func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) { | ||||
| 	fileInfo, err := os.Stat(model) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| @@ -247,7 +283,7 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti | ||||
| 		"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase), | ||||
| 		"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale), | ||||
| 		"--batch-size", fmt.Sprintf("%d", opts.NumBatch), | ||||
| 		"--n-gpu-layers", fmt.Sprintf("%d", NumGPU(opts)), | ||||
| 		"--n-gpu-layers", fmt.Sprintf("%d", NumGPU(numLayers, fileInfo.Size(), opts)), | ||||
| 		"--embedding", | ||||
| 	} | ||||
|  | ||||
| @@ -277,8 +313,15 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti | ||||
| 		params = append(params, "--numa") | ||||
| 	} | ||||
|  | ||||
| 	var runnerErr error | ||||
|  | ||||
| 	// start the llama.cpp server with a retry in case the port is already in use | ||||
| 	for try := 0; try < 3; try++ { | ||||
| 	for _, runner := range runners { | ||||
| 		if _, err := os.Stat(runner.Path); err != nil { | ||||
| 			log.Printf("llama runner not found: %v", err) | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		port := rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range | ||||
| 		ctx, cancel := context.WithCancel(context.Background()) | ||||
| 		cmd := exec.CommandContext( | ||||
| @@ -286,21 +329,43 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti | ||||
| 			runner.Path, | ||||
| 			append(params, "--port", strconv.Itoa(port))..., | ||||
| 		) | ||||
|  | ||||
| 		cmd.Env = append(os.Environ(), fmt.Sprintf("LD_LIBRARY_PATH=%s", filepath.Dir(runner.Path))) | ||||
| 		cmd.Stdout = os.Stderr | ||||
| 		cmd.Stderr = os.Stderr | ||||
| 		statusWriter := NewStatusWriter() | ||||
| 		cmd.Stderr = statusWriter | ||||
|  | ||||
| 		llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel}} | ||||
| 		llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel, exitCh: make(chan error)}} | ||||
|  | ||||
| 		log.Print("starting llama.cpp server") | ||||
| 		log.Print("starting llama runner") | ||||
| 		if err := llm.Cmd.Start(); err != nil { | ||||
| 			log.Printf("error starting the external llama.cpp server: %v", err) | ||||
| 			log.Printf("error starting the external llama runner: %v", err) | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		// monitor the llama runner process and signal when it exits | ||||
| 		go func() { | ||||
| 			err := llm.Cmd.Wait() | ||||
| 			llm.exitErr = err | ||||
| 			// llm.Cmd.Wait() can only be called once, use this exit channel to signal that the process has exited | ||||
| 			llm.exitOnce.Do(func() { | ||||
| 				close(llm.exitCh) | ||||
| 			}) | ||||
| 		}() | ||||
|  | ||||
| 		if err := waitForServer(llm); err != nil { | ||||
| 			log.Printf("error starting llama.cpp server: %v", err) | ||||
| 			log.Printf("error starting llama runner: %v", err) | ||||
| 			llm.Close() | ||||
|  | ||||
| 			// default the runnerErr to the error returned by the most recent llama runner process | ||||
| 			runnerErr = err | ||||
|  | ||||
| 			// capture the error directly from the runner process, if any | ||||
| 			select { | ||||
| 			case runnerErr = <-statusWriter.ErrCh: | ||||
| 			default: | ||||
| 				// the runner process probably timed out | ||||
| 			} | ||||
|  | ||||
| 			// try again | ||||
| 			continue | ||||
| 		} | ||||
| @@ -309,34 +374,53 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti | ||||
| 		return llm, nil | ||||
| 	} | ||||
|  | ||||
| 	return nil, fmt.Errorf("max retry exceeded starting llama.cpp") | ||||
| 	if runnerErr != nil { | ||||
| 		// this is the error returned from the llama runner process that failed most recently | ||||
| 		return nil, runnerErr | ||||
| 	} | ||||
|  | ||||
| 	return nil, fmt.Errorf("failed to start a llama runner") | ||||
| } | ||||
|  | ||||
| func waitForServer(llm *llama) error { | ||||
| 	// wait for the server to start responding | ||||
| 	start := time.Now() | ||||
| 	expiresAt := time.Now().Add(45 * time.Second) | ||||
| 	expiresAt := time.Now().Add(3 * time.Minute) // be generous with timeout, large models can take a while to load | ||||
| 	ticker := time.NewTicker(200 * time.Millisecond) | ||||
| 	defer ticker.Stop() | ||||
|  | ||||
| 	log.Print("waiting for llama.cpp server to start responding") | ||||
| 	for range ticker.C { | ||||
| 		if time.Now().After(expiresAt) { | ||||
| 			return fmt.Errorf("llama.cpp server did not start within alloted time, retrying") | ||||
| 		} | ||||
| 	log.Print("waiting for llama runner to start responding") | ||||
| 	for { | ||||
| 		select { | ||||
| 		case <-llm.exitCh: | ||||
| 			// failed to start subprocess | ||||
| 			return fmt.Errorf("llama runner process has terminated") | ||||
| 		case <-ticker.C: | ||||
| 			if time.Now().After(expiresAt) { | ||||
| 				// timeout | ||||
| 				return fmt.Errorf("timed out waiting for llama runner to start") | ||||
| 			} | ||||
|  | ||||
| 		if err := llm.Ping(context.Background()); err == nil { | ||||
| 			break | ||||
| 			if err := llm.Ping(context.Background()); err == nil { | ||||
| 				// success | ||||
| 				log.Printf("llama runner started in %f seconds", time.Since(start).Seconds()) | ||||
| 				return nil | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	log.Printf("llama.cpp server started in %f seconds", time.Since(start).Seconds()) | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (llm *llama) Close() { | ||||
| 	// signal the sub-process to terminate | ||||
| 	llm.Cancel() | ||||
| 	if err := llm.Cmd.Wait(); err != nil { | ||||
| 		log.Printf("llama.cpp server exited with error: %v", err) | ||||
|  | ||||
| 	// wait for the command to exit to prevent race conditions with the next run | ||||
| 	<-llm.exitCh | ||||
| 	err := llm.exitErr | ||||
|  | ||||
| 	if err != nil { | ||||
| 		log.Printf("llama runner stopped with error: %v", err) | ||||
| 	} else { | ||||
| 		log.Print("llama runner stopped successfully") | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @@ -387,30 +471,29 @@ type Prediction struct { | ||||
| } | ||||
|  | ||||
| type PredictRequest struct { | ||||
| 	Stream           bool            `json:"stream"` | ||||
| 	NPredict         int             `json:"n_predict,omitempty"` | ||||
| 	TopK             int             `json:"top_k,omitempty"` | ||||
| 	TopP             float32         `json:"top_p,omitempty"` | ||||
| 	TfsZ             float32         `json:"tfs_z,omitempty"` | ||||
| 	TypicalP         float32         `json:"typical_p,omitempty"` | ||||
| 	RepeatLastN      int             `json:"repeat_last_n,omitempty"` | ||||
| 	Temperature      float32         `json:"temperature,omitempty"` | ||||
| 	RepeatPenalty    float32         `json:"repeat_penalty,omitempty"` | ||||
| 	PresencePenalty  float32         `json:"presence_penalty,omitempty"` | ||||
| 	FrequencyPenalty float32         `json:"frequency_penalty,omitempty"` | ||||
| 	Mirostat         int             `json:"mirostat,omitempty"` | ||||
| 	MirostatTau      float32         `json:"mirostat_tau,omitempty"` | ||||
| 	MirostatEta      float32         `json:"mirostat_eta,omitempty"` | ||||
| 	PenalizeNl       bool            `json:"penalize_nl,omitempty"` | ||||
| 	NKeep            int             `json:"n_keep,omitempty"` | ||||
| 	Seed             int             `json:"seed,omitempty"` | ||||
| 	Prompt           string          `json:"prompt,omitempty"` | ||||
| 	NProbs           int             `json:"n_probs,omitempty"` | ||||
| 	LogitBias        map[int]float32 `json:"logit_bias,omitempty"` | ||||
| 	IgnoreEos        bool            `json:"ignore_eos,omitempty"` | ||||
| 	Stop             []string        `json:"stop,omitempty"` | ||||
| 	Prompt           string   `json:"prompt"` | ||||
| 	Stream           bool     `json:"stream"` | ||||
| 	NPredict         int      `json:"n_predict"` | ||||
| 	NKeep            int      `json:"n_keep"` | ||||
| 	Temperature      float32  `json:"temperature"` | ||||
| 	TopK             int      `json:"top_k"` | ||||
| 	TopP             float32  `json:"top_p"` | ||||
| 	TfsZ             float32  `json:"tfs_z"` | ||||
| 	TypicalP         float32  `json:"typical_p"` | ||||
| 	RepeatLastN      int      `json:"repeat_last_n"` | ||||
| 	RepeatPenalty    float32  `json:"repeat_penalty"` | ||||
| 	PresencePenalty  float32  `json:"presence_penalty"` | ||||
| 	FrequencyPenalty float32  `json:"frequency_penalty"` | ||||
| 	Mirostat         int      `json:"mirostat"` | ||||
| 	MirostatTau      float32  `json:"mirostat_tau"` | ||||
| 	MirostatEta      float32  `json:"mirostat_eta"` | ||||
| 	PenalizeNl       bool     `json:"penalize_nl"` | ||||
| 	Seed             int      `json:"seed"` | ||||
| 	Stop             []string `json:"stop,omitempty"` | ||||
| } | ||||
|  | ||||
| const maxBufferSize = 512 * 1000 // 512KB | ||||
|  | ||||
| func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error { | ||||
| 	prevConvo, err := llm.Decode(ctx, prevContext) | ||||
| 	if err != nil { | ||||
| @@ -440,8 +523,10 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, | ||||
| 		MirostatTau:      llm.MirostatTau, | ||||
| 		MirostatEta:      llm.MirostatEta, | ||||
| 		PenalizeNl:       llm.PenalizeNewline, | ||||
| 		Seed:             llm.Seed, | ||||
| 		Stop:             llm.Stop, | ||||
| 	} | ||||
|  | ||||
| 	data, err := json.Marshal(predReq) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("error marshaling data: %v", err) | ||||
| @@ -469,6 +554,9 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, | ||||
| 	} | ||||
|  | ||||
| 	scanner := bufio.NewScanner(resp.Body) | ||||
| 	// increase the buffer size to avoid running out of space | ||||
| 	buf := make([]byte, 0, maxBufferSize) | ||||
| 	scanner.Buffer(buf, maxBufferSize) | ||||
| 	for scanner.Scan() { | ||||
| 		select { | ||||
| 		case <-ctx.Done(): | ||||
|   | ||||
							
								
								
									
										73
									
								
								llm/llm.go
									
									
									
									
									
								
							
							
						
						
									
										73
									
								
								llm/llm.go
									
									
									
									
									
								
							| @@ -5,6 +5,7 @@ import ( | ||||
| 	"fmt" | ||||
| 	"log" | ||||
| 	"os" | ||||
| 	"runtime" | ||||
|  | ||||
| 	"github.com/pbnjay/memory" | ||||
|  | ||||
| @@ -21,7 +22,7 @@ type LLM interface { | ||||
| 	Ping(context.Context) error | ||||
| } | ||||
|  | ||||
| func New(model string, adapters []string, opts api.Options) (LLM, error) { | ||||
| func New(workDir, model string, adapters []string, opts api.Options) (LLM, error) { | ||||
| 	if _, err := os.Stat(model); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| @@ -37,53 +38,55 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	switch ggml.FileType() { | ||||
| 	case "Q8_0": | ||||
| 		if ggml.Name() != "gguf" && opts.NumGPU != 0 { | ||||
| 			// GGML Q8_0 do not support Metal API and will | ||||
| 			// cause the runner to segmentation fault so disable GPU | ||||
| 			log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0") | ||||
| 			opts.NumGPU = 0 | ||||
| 		} | ||||
| 	case "F32", "Q5_0", "Q5_1": | ||||
| 		if opts.NumGPU != 0 { | ||||
| 			// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will | ||||
| 			// cause the runner to segmentation fault so disable GPU | ||||
| 			log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0") | ||||
| 			opts.NumGPU = 0 | ||||
| 	if runtime.GOOS == "darwin" { | ||||
| 		switch ggml.FileType() { | ||||
| 		case "Q8_0": | ||||
| 			if ggml.Name() != "gguf" && opts.NumGPU != 0 { | ||||
| 				// GGML Q8_0 do not support Metal API and will | ||||
| 				// cause the runner to segmentation fault so disable GPU | ||||
| 				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0") | ||||
| 				opts.NumGPU = 0 | ||||
| 			} | ||||
| 		case "F32", "Q5_0", "Q5_1": | ||||
| 			if opts.NumGPU != 0 { | ||||
| 				// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will | ||||
| 				// cause the runner to segmentation fault so disable GPU | ||||
| 				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0") | ||||
| 				opts.NumGPU = 0 | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	totalResidentMemory := memory.TotalMemory() | ||||
| 	switch ggml.ModelType() { | ||||
| 	case "3B", "7B": | ||||
| 		if ggml.FileType() == "F16" && totalResidentMemory < 16*1024*1024 { | ||||
| 			return nil, fmt.Errorf("F16 model requires at least 16GB of memory") | ||||
| 		} else if totalResidentMemory < 8*1024*1024 { | ||||
| 			return nil, fmt.Errorf("model requires at least 8GB of memory") | ||||
| 		if ggml.FileType() == "F16" && totalResidentMemory < 16*1000*1000 { | ||||
| 			return nil, fmt.Errorf("F16 model requires at least 16 GB of memory") | ||||
| 		} else if totalResidentMemory < 8*1000*1000 { | ||||
| 			return nil, fmt.Errorf("model requires at least 8 GB of memory") | ||||
| 		} | ||||
| 	case "13B": | ||||
| 		if ggml.FileType() == "F16" && totalResidentMemory < 32*1024*1024 { | ||||
| 			return nil, fmt.Errorf("F16 model requires at least 32GB of memory") | ||||
| 		} else if totalResidentMemory < 16*1024*1024 { | ||||
| 			return nil, fmt.Errorf("model requires at least 16GB of memory") | ||||
| 		if ggml.FileType() == "F16" && totalResidentMemory < 32*1000*1000 { | ||||
| 			return nil, fmt.Errorf("F16 model requires at least 32 GB of memory") | ||||
| 		} else if totalResidentMemory < 16*1000*1000 { | ||||
| 			return nil, fmt.Errorf("model requires at least 16 GB of memory") | ||||
| 		} | ||||
| 	case "30B", "34B", "40B": | ||||
| 		if ggml.FileType() == "F16" && totalResidentMemory < 64*1024*1024 { | ||||
| 			return nil, fmt.Errorf("F16 model requires at least 64GB of memory") | ||||
| 		} else if totalResidentMemory < 32*1024*1024 { | ||||
| 			return nil, fmt.Errorf("model requires at least 32GB of memory") | ||||
| 		if ggml.FileType() == "F16" && totalResidentMemory < 64*1000*1000 { | ||||
| 			return nil, fmt.Errorf("F16 model requires at least 64 GB of memory") | ||||
| 		} else if totalResidentMemory < 32*1000*1000 { | ||||
| 			return nil, fmt.Errorf("model requires at least 32 GB of memory") | ||||
| 		} | ||||
| 	case "65B", "70B": | ||||
| 		if ggml.FileType() == "F16" && totalResidentMemory < 128*1024*1024 { | ||||
| 			return nil, fmt.Errorf("F16 model requires at least 128GB of memory") | ||||
| 		} else if totalResidentMemory < 64*1024*1024 { | ||||
| 			return nil, fmt.Errorf("model requires at least 64GB of memory") | ||||
| 		if ggml.FileType() == "F16" && totalResidentMemory < 128*1000*1000 { | ||||
| 			return nil, fmt.Errorf("F16 model requires at least 128 GB of memory") | ||||
| 		} else if totalResidentMemory < 64*1000*1000 { | ||||
| 			return nil, fmt.Errorf("model requires at least 64 GB of memory") | ||||
| 		} | ||||
| 	case "180B": | ||||
| 		if ggml.FileType() == "F16" && totalResidentMemory < 512*1024*1024 { | ||||
| 		if ggml.FileType() == "F16" && totalResidentMemory < 512*1000*1000 { | ||||
| 			return nil, fmt.Errorf("F16 model requires at least 512GB of memory") | ||||
| 		} else if totalResidentMemory < 128*1024*1024 { | ||||
| 		} else if totalResidentMemory < 128*1000*1000 { | ||||
| 			return nil, fmt.Errorf("model requires at least 128GB of memory") | ||||
| 		} | ||||
| 	} | ||||
| @@ -91,9 +94,9 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) { | ||||
| 	switch ggml.Name() { | ||||
| 	case "gguf": | ||||
| 		opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions | ||||
| 		return newLlama(model, adapters, ggufRunner(), opts) | ||||
| 		return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts) | ||||
| 	case "ggml", "ggmf", "ggjt", "ggla": | ||||
| 		return newLlama(model, adapters, ggmlRunner(), opts) | ||||
| 		return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts) | ||||
| 	default: | ||||
| 		return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily()) | ||||
| 	} | ||||
|   | ||||
							
								
								
									
										23
									
								
								llm/starcoder.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								llm/starcoder.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
| package llm | ||||
|  | ||||
| const ( | ||||
| 	starCoderModelType1B  = 24 | ||||
| 	starCoderModelType3B  = 36 | ||||
| 	starCoderModelType7B  = 42 | ||||
| 	starCoderModelType15B = 40 | ||||
| ) | ||||
|  | ||||
| func starCoderModelType(numLayer uint32) string { | ||||
| 	switch numLayer { | ||||
| 	case 24: | ||||
| 		return "1B" | ||||
| 	case 36: | ||||
| 		return "3B" | ||||
| 	case 42: | ||||
| 		return "7B" | ||||
| 	case 40: | ||||
| 		return "15B" | ||||
| 	default: | ||||
| 		return "unknown" | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										21
									
								
								scripts/build.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								scripts/build.sh
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,21 @@ | ||||
| #!/bin/sh | ||||
|  | ||||
| set -eu | ||||
|  | ||||
| usage() { | ||||
|     echo "usage: $(basename $0) VERSION" | ||||
|     exit 1 | ||||
| } | ||||
|  | ||||
| [ "$#" -eq 1 ] || usage | ||||
|  | ||||
| export VERSION="$1" | ||||
|  | ||||
| # build universal MacOS binary | ||||
| sh $(dirname $0)/build_darwin.sh | ||||
|  | ||||
| # # build arm64 and amd64 Linux binaries | ||||
| sh $(dirname $0)/build_linux.sh | ||||
|  | ||||
| # # build arm64 and amd64 Docker images | ||||
| sh $(dirname $0)/build_docker.sh | ||||
| @@ -1,29 +1,30 @@ | ||||
| #!/bin/bash | ||||
| #!/bin/sh | ||||
|  | ||||
| set -eu | ||||
|  | ||||
| export VERSION=${VERSION:-0.0.0} | ||||
| export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'" | ||||
|  | ||||
| mkdir -p dist | ||||
|  | ||||
| GO_LDFLAGS="-X github.com/jmorganca/ollama/version.Version=$VERSION" | ||||
| GO_LDFLAGS="$GO_LDFLAGS -X github.com/jmorganca/ollama/server.mode=release" | ||||
| for TARGETARCH in arm64 amd64; do | ||||
|     GOOS=darwin GOARCH=$TARGETARCH go generate ./... | ||||
|     GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH | ||||
| done | ||||
|  | ||||
| # build universal binary | ||||
| GOARCH=arm64 go generate ./... | ||||
| GOARCH=arm64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-arm64 | ||||
| rm -rf llm/llama.cpp/*/build/*/bin | ||||
| GOARCH=amd64 go generate ./... | ||||
| GOARCH=amd64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-amd64 | ||||
| lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64 | ||||
| rm dist/ollama-darwin-amd64 dist/ollama-darwin-arm64 | ||||
| lipo -create -output dist/ollama dist/ollama-darwin-* | ||||
| rm -f dist/ollama-darwin-* | ||||
| codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama | ||||
| chmod +x dist/ollama | ||||
|  | ||||
| # build and sign the mac app | ||||
| npm install --prefix app | ||||
| npm run --prefix app make:sign | ||||
| cp app/out/make/zip/darwin/universal/Ollama-darwin-universal-${VERSION:-0.0.0}.zip dist/Ollama-darwin.zip | ||||
| cp app/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip | ||||
|  | ||||
| # sign the binary and rename it | ||||
| codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama | ||||
| ditto -c -k --keepParent dist/ollama dist/temp.zip | ||||
| xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID | ||||
| mv dist/ollama dist/ollama-darwin | ||||
| rm dist/temp.zip | ||||
| rm -f dist/temp.zip | ||||
|   | ||||
							
								
								
									
										15
									
								
								scripts/build_docker.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										15
									
								
								scripts/build_docker.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| #!/bin/sh | ||||
|  | ||||
| set -eu | ||||
|  | ||||
| export VERSION=${VERSION:-0.0.0} | ||||
| export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'" | ||||
|  | ||||
| docker buildx build \ | ||||
|     --load \ | ||||
|     --platform=linux/arm64,linux/amd64 \ | ||||
|     --build-arg=VERSION \ | ||||
|     --build-arg=GOFLAGS \ | ||||
|     -f Dockerfile \ | ||||
|     -t ollama \ | ||||
|     . | ||||
							
								
								
									
										15
									
								
								scripts/build_linux.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										15
									
								
								scripts/build_linux.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| #!/bin/sh | ||||
|  | ||||
| set -eu | ||||
|  | ||||
| export VERSION=${VERSION:-0.0.0} | ||||
| export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'" | ||||
|  | ||||
| mkdir -p dist | ||||
|  | ||||
| for TARGETARCH in arm64 amd64; do | ||||
|     docker buildx build --load --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS -f Dockerfile.build -t builder:$TARGETARCH . | ||||
|     docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH | ||||
|     docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH | ||||
|     docker rm builder-$TARGETARCH | ||||
| done | ||||
							
								
								
									
										243
									
								
								scripts/install.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										243
									
								
								scripts/install.sh
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,243 @@ | ||||
| #!/bin/sh | ||||
| # This script installs Ollama on Linux. | ||||
| # It detects the current operating system architecture and installs the appropriate version of Ollama. | ||||
|  | ||||
| set -eu | ||||
|  | ||||
| status() { echo ">>> $*" >&2; } | ||||
| error() { echo "ERROR $*"; exit 1; } | ||||
| warning() { echo "WARNING: $*"; } | ||||
|  | ||||
| TEMP_DIR=$(mktemp -d) | ||||
| cleanup() { rm -rf $TEMP_DIR; } | ||||
| trap cleanup EXIT | ||||
|  | ||||
| available() { command -v $1 >/dev/null; } | ||||
| require() { | ||||
|     local MISSING='' | ||||
|     for TOOL in $*; do | ||||
|         if ! available $TOOL; then | ||||
|             MISSING="$MISSING $TOOL" | ||||
|         fi | ||||
|     done | ||||
|  | ||||
|     echo $MISSING | ||||
| } | ||||
|  | ||||
| [ "$(uname -s)" = "Linux" ] || error 'This script is intended to run on Linux only.' | ||||
|  | ||||
| case "$(uname -m)" in | ||||
|     x86_64) ARCH="amd64" ;; | ||||
|     aarch64|arm64) ARCH="arm64" ;; | ||||
|     *) error "Unsupported architecture: $ARCH" ;; | ||||
| esac | ||||
|  | ||||
| SUDO= | ||||
| if [ "$(id -u)" -ne 0 ]; then | ||||
|     # Running as root, no need for sudo | ||||
|     if ! available sudo; then | ||||
|         error "This script requires superuser permissions. Please re-run as root." | ||||
|     fi | ||||
|  | ||||
|     SUDO="sudo" | ||||
| fi | ||||
|  | ||||
| NEEDS=$(require curl awk grep sed tee xargs) | ||||
| if [ -n "$NEEDS" ]; then | ||||
|     status "ERROR: The following tools are required but missing:" | ||||
|     for NEED in $NEEDS; do | ||||
|         echo "  - $NEED" | ||||
|     done | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| status "Downloading ollama..." | ||||
| curl --fail --show-error --location --progress-bar -o $TEMP_DIR/ollama "https://ollama.ai/download/ollama-linux-$ARCH" | ||||
|  | ||||
| for BINDIR in /usr/local/bin /usr/bin /bin; do | ||||
|     echo $PATH | grep -q $BINDIR && break || continue | ||||
| done | ||||
|  | ||||
| status "Installing ollama to $BINDIR..." | ||||
| $SUDO install -o0 -g0 -m755 -d $BINDIR | ||||
| $SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama | ||||
|  | ||||
| install_success() { status 'Install complete. Run "ollama" from the command line.'; } | ||||
| trap install_success EXIT | ||||
|  | ||||
| # Everything from this point onwards is optional. | ||||
|  | ||||
| configure_systemd() { | ||||
|     if ! id ollama >/dev/null 2>&1; then | ||||
|         status "Creating ollama user..." | ||||
|         $SUDO useradd -r -s /bin/false -m -d /usr/share/ollama ollama | ||||
|     fi | ||||
|  | ||||
|     status "Creating ollama systemd service..." | ||||
|     cat <<EOF | $SUDO tee /etc/systemd/system/ollama.service >/dev/null | ||||
| [Unit] | ||||
| Description=Ollama Service | ||||
| After=network-online.target | ||||
|  | ||||
| [Service] | ||||
| ExecStart=$BINDIR/ollama serve | ||||
| User=ollama | ||||
| Group=ollama | ||||
| Restart=always | ||||
| RestartSec=3 | ||||
| Environment="HOME=/usr/share/ollama" | ||||
| Environment="PATH=$PATH" | ||||
|  | ||||
| [Install] | ||||
| WantedBy=default.target | ||||
| EOF | ||||
|     SYSTEMCTL_RUNNING="$(systemctl is-system-running || true)" | ||||
|     case $SYSTEMCTL_RUNNING in | ||||
|         running|degraded) | ||||
|             status "Enabling and starting ollama service..." | ||||
|             $SUDO systemctl daemon-reload | ||||
|             $SUDO systemctl enable ollama | ||||
|  | ||||
|             start_service() { $SUDO systemctl restart ollama; } | ||||
|             trap start_service EXIT | ||||
|             ;; | ||||
|     esac | ||||
| } | ||||
|  | ||||
| if available systemctl; then | ||||
|     configure_systemd | ||||
| fi | ||||
|  | ||||
| if ! available lspci && ! available lshw; then | ||||
|     warning "Unable to detect NVIDIA GPU. Install lspci or lshw to automatically detect and install NVIDIA CUDA drivers." | ||||
|     exit 0 | ||||
| fi | ||||
|  | ||||
| check_gpu() { | ||||
|     case $1 in | ||||
|         lspci) available lspci && lspci -d '10de:' | grep -q 'NVIDIA' || return 1 ;; | ||||
|         lshw) available lshw && $SUDO lshw -c display -numeric | grep -q 'vendor: .* \[10DE\]' || return 1 ;; | ||||
|         nvidia-smi) available nvidia-smi || return 1 ;; | ||||
|     esac | ||||
| } | ||||
|  | ||||
| if check_gpu nvidia-smi; then | ||||
|     status "NVIDIA GPU installed." | ||||
|     exit 0 | ||||
| fi | ||||
|  | ||||
| if ! check_gpu lspci && ! check_gpu lshw; then | ||||
|     warning "No NVIDIA GPU detected. Ollama will run in CPU-only mode." | ||||
|     exit 0 | ||||
| fi | ||||
|  | ||||
| # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-7-centos-7 | ||||
| # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-8-rocky-8 | ||||
| # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-9-rocky-9 | ||||
| # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#fedora | ||||
| install_cuda_driver_yum() { | ||||
|     status 'Installing NVIDIA repository...' | ||||
|     case $PACKAGE_MANAGER in | ||||
|         yum) | ||||
|             $SUDO $PACKAGE_MANAGER -y install yum-utils | ||||
|             $SUDO $PACKAGE_MANAGER-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo | ||||
|             ;; | ||||
|         dnf) | ||||
|             $SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo | ||||
|             ;; | ||||
|     esac | ||||
|  | ||||
|     case $1 in | ||||
|         rhel) | ||||
|             status 'Installing EPEL repository...' | ||||
|             # EPEL is required for third-party dependencies such as dkms and libvdpau | ||||
|             $SUDO $PACKAGE_MANAGER -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-$2.noarch.rpm || true | ||||
|             ;; | ||||
|     esac | ||||
|  | ||||
|     status 'Installing CUDA driver...' | ||||
|  | ||||
|     if [ "$1" = 'centos' ] || [ "$1$2" = 'rhel7' ]; then | ||||
|         $SUDO $PACKAGE_MANAGER -y install nvidia-driver-latest-dkms | ||||
|     fi | ||||
|  | ||||
|     $SUDO $PACKAGE_MANAGER -y install cuda-drivers | ||||
| } | ||||
|  | ||||
| # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu | ||||
| # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#debian | ||||
| install_cuda_driver_apt() { | ||||
|     status 'Installing NVIDIA repository...' | ||||
|     curl -fsSL -o $TEMP_DIR/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb | ||||
|  | ||||
|     case $1 in | ||||
|         debian) | ||||
|             status 'Enabling contrib sources...' | ||||
|             $SUDO sed 's/main/contrib/' < /etc/apt/sources.list | sudo tee /etc/apt/sources.list.d/contrib.list > /dev/null | ||||
|             ;; | ||||
|     esac | ||||
|  | ||||
|     status 'Installing CUDA driver...' | ||||
|     $SUDO dpkg -i $TEMP_DIR/cuda-keyring.deb | ||||
|     $SUDO apt-get update | ||||
|  | ||||
|     [ -n "$SUDO" ] && SUDO_E="$SUDO -E" || SUDO_E= | ||||
|     DEBIAN_FRONTEND=noninteractive $SUDO_E apt-get -y install cuda-drivers -q | ||||
| } | ||||
|  | ||||
| if [ ! -f "/etc/os-release" ]; then | ||||
|     error "Unknown distribution. Skipping CUDA installation." | ||||
| fi | ||||
|  | ||||
| . /etc/os-release | ||||
|  | ||||
| OS_NAME=$ID | ||||
| OS_VERSION=$VERSION_ID | ||||
|  | ||||
| PACKAGE_MANAGER= | ||||
| for PACKAGE_MANAGER in dnf yum apt-get; do | ||||
|     if available $PACKAGE_MANAGER; then | ||||
|         break | ||||
|     fi | ||||
| done | ||||
|  | ||||
| if [ -z "$PACKAGE_MANAGER" ]; then | ||||
|     error "Unknown package manager. Skipping CUDA installation." | ||||
| fi | ||||
|  | ||||
| if ! check_gpu nvidia-smi || [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then | ||||
|     case $OS_NAME in | ||||
|         centos|rhel) install_cuda_driver_yum 'rhel' $OS_VERSION ;; | ||||
|         rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;; | ||||
|         fedora) install_cuda_driver_yum $OS_NAME $OS_VERSION ;; | ||||
|         amzn) install_cuda_driver_yum 'fedora' '35' ;; | ||||
|         debian) install_cuda_driver_apt $OS_NAME $OS_VERSION ;; | ||||
|         ubuntu) install_cuda_driver_apt $OS_NAME $(echo $OS_VERSION | sed 's/\.//') ;; | ||||
|         *) exit ;; | ||||
|     esac | ||||
| fi | ||||
|  | ||||
| if ! lsmod | grep -q nvidia; then | ||||
|     KERNEL_RELEASE="$(uname -r)" | ||||
|     case $OS_NAME in | ||||
|         centos|rhel|rocky|amzn) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE kernel-headers-$KERNEL_RELEASE ;; | ||||
|         fedora) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE ;; | ||||
|         debian|ubuntu) $SUDO apt-get -y install linux-headers-$KERNEL_RELEASE ;; | ||||
|         *) exit ;; | ||||
|     esac | ||||
|  | ||||
|     NVIDIA_CUDA_VERSION=$($SUDO dkms status | awk -F: '/added/ { print $1 }') | ||||
|     if [ -n "$NVIDIA_CUDA_VERSION" ]; then | ||||
|         $SUDO dkms install $NVIDIA_CUDA_VERSION | ||||
|     fi | ||||
|  | ||||
|     if lsmod | grep -q nouveau; then | ||||
|         status 'Reboot to complete NVIDIA CUDA driver install.' | ||||
|         exit 0 | ||||
|     fi | ||||
|  | ||||
|     $SUDO modprobe nvidia | ||||
| fi | ||||
|  | ||||
|  | ||||
| status "NVIDIA CUDA drivers installed." | ||||
| @@ -14,7 +14,7 @@ import ( | ||||
| 	"net/http" | ||||
| 	"net/url" | ||||
| 	"os" | ||||
| 	"path" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"time" | ||||
| @@ -71,7 +71,7 @@ func (r AuthRedirect) URL() (*url.URL, error) { | ||||
| 	return redirectURL, nil | ||||
| } | ||||
|  | ||||
| func getAuthToken(ctx context.Context, redirData AuthRedirect, regOpts *RegistryOptions) (string, error) { | ||||
| func getAuthToken(ctx context.Context, redirData AuthRedirect) (string, error) { | ||||
| 	redirectURL, err := redirData.URL() | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| @@ -82,7 +82,7 @@ func getAuthToken(ctx context.Context, redirData AuthRedirect, regOpts *Registry | ||||
| 		return "", err | ||||
| 	} | ||||
|  | ||||
| 	keyPath := path.Join(home, ".ollama", "id_ed25519") | ||||
| 	keyPath := filepath.Join(home, ".ollama", "id_ed25519") | ||||
|  | ||||
| 	rawKey, err := os.ReadFile(keyPath) | ||||
| 	if err != nil { | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user