add back in the windows terminal file

initial commit of the readline editor replacement
treat ollama run model < file as entire prompt, not prompt-per-line (#1126 )
2023-11-14 16:52:34 -08:00 · 2023-11-14 15:59:35 -08:00 · 2023-11-14 16:42:21 -05:00 · 2023-11-14 16:12:30 -05:00 · 2023-11-14 16:09:09 -05:00 · 2023-11-14 10:22:03 -05:00
128 changed files with 6777 additions and 4510 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -5,3 +5,4 @@ dist
 scripts
 llm/llama.cpp/ggml
 llm/llama.cpp/gguf
+.env
--- a/24
+++ b/24
@@ -1,31 +1,23 @@
-ARG CUDA_VERSION=12.2.0
-
-FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu22.04
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04

 ARG TARGETARCH
-ARG VERSION=0.0.0
+ARG GOFLAGS="'-ldflags=-w -s'"

 WORKDIR /go/src/github.com/jmorganca/ollama
 RUN apt-get update && apt-get install -y git build-essential cmake
-ADD https://dl.google.com/go/go1.21.1.linux-$TARGETARCH.tar.gz /tmp/go1.21.1.tar.gz
-RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.1.tar.gz
+ADD https://dl.google.com/go/go1.21.3.linux-$TARGETARCH.tar.gz /tmp/go1.21.3.tar.gz
+RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.3.tar.gz

 COPY . .
 ENV GOARCH=$TARGETARCH
+ENV GOFLAGS=$GOFLAGS
 RUN /usr/local/go/bin/go generate ./... \
-    && /usr/local/go/bin/go build -ldflags "-linkmode=external -extldflags='-static' -X=github.com/jmorganca/ollama/version.Version=$VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
+    && /usr/local/go/bin/go build .

 FROM ubuntu:22.04
-ENV OLLAMA_HOST 0.0.0.0
-
 RUN apt-get update && apt-get install -y ca-certificates
-
-ARG USER=ollama
-ARG GROUP=ollama
-RUN groupadd $GROUP && useradd -m -g $GROUP $USER
-
 COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
-
-USER $USER:$GROUP
+EXPOSE 11434
+ENV OLLAMA_HOST 0.0.0.0
 ENTRYPOINT ["/bin/ollama"]
 CMD ["serve"]
--- a/Dockerfile.build
+++ b/Dockerfile.build
@@ -1,7 +1,5 @@
-ARG VERSION=0.0.0
-
 # centos7 amd64 dependencies
-FROM --platform=linux/amd64 nvidia/cuda:11.8.0-devel-centos7 AS base-amd64
+FROM --platform=linux/amd64 nvidia/cuda:11.3.1-devel-centos7 AS base-amd64
 RUN yum install -y https://repo.ius.io/ius-release-el7.rpm centos-release-scl && \
    yum update -y && \
    yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++ git236 wget
@@ -9,21 +7,25 @@ RUN wget "https://github.com/Kitware/CMake/releases/download/v3.27.6/cmake-3.27.
 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH

 # centos8 arm64 dependencies
-FROM --platform=linux/arm64 nvidia/cuda:11.4.3-devel-centos8 AS base-arm64
+FROM --platform=linux/arm64 nvidia/cuda-arm64:11.3.1-devel-centos8 AS base-arm64
 RUN sed -i -e 's/mirrorlist/#mirrorlist/g' -e 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
 RUN yum install -y git cmake

 FROM base-${TARGETARCH}
 ARG TARGETARCH
+ARG GOFLAGS="'-ldflags -w -s'"

 # install go
-ADD https://dl.google.com/go/go1.21.1.linux-$TARGETARCH.tar.gz /tmp/go1.21.1.tar.gz
-RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.1.tar.gz
+ADD https://dl.google.com/go/go1.21.3.linux-$TARGETARCH.tar.gz /tmp/go1.21.3.tar.gz
+RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.3.tar.gz

 # build the final binary
 WORKDIR /go/src/github.com/jmorganca/ollama
 COPY . .
+
+ENV GOOS=linux
 ENV GOARCH=$TARGETARCH
+ENV GOFLAGS=$GOFLAGS

 RUN /usr/local/go/bin/go generate ./... && \
-    /usr/local/go/bin/go build -ldflags "-X=github.com/jmorganca/ollama/version.Version=$VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
+    /usr/local/go/bin/go build .
--- a/README.md
+++ b/README.md
@@ -9,19 +9,31 @@

 [![Discord](https://dcbadge.vercel.app/api/server/ollama?style=flat&compact=true)](https://discord.gg/ollama)

-Run, create, and share large language models (LLMs).
+Get up and running with large language models locally.

-> Note: Ollama is in early preview. Please report any issues you find.
+### macOS

-## Download
+[Download](https://ollama.ai/download/Ollama-darwin.zip)

- [Download](https://ollama.ai/download) for macOS
- Download for Windows and Linux (coming soon)
- Build [from source](#building)
+### Windows
+
+Coming soon!
+
+### Linux & WSL2
+
+```
+curl https://ollama.ai/install.sh | sh
+```
+
+[Manual install instructions](https://github.com/jmorganca/ollama/blob/main/docs/linux.md)
+
+### Docker
+
+The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `ollama/ollama` is available on Docker Hub.

 ## Quickstart

-To run and chat with [Llama 2](https://ai.meta.com/llama), the new model by Meta:
+To run and chat with [Llama 2](https://ollama.ai/library/llama2):

 ```
 ollama run llama2
@@ -33,83 +45,50 @@ Ollama supports a list of open-source models available on [ollama.ai/library](ht

 Here are some example open-source models that can be downloaded:

-| Model                    | Parameters | Size  | Download                        |
-| ------------------------ | ---------- | ----- | ------------------------------- |
-| Llama2                   | 7B         | 3.8GB | `ollama pull llama2`            |
-| Llama2 13B               | 13B        | 7.3GB | `ollama pull llama2:13b`        |
-| Llama2 70B               | 70B        | 39GB  | `ollama pull llama2:70b`        |
-| Llama2 Uncensored        | 7B         | 3.8GB | `ollama pull llama2-uncensored` |
-| Code Llama               | 7B         | 3.8GB | `ollama pull codellama`         |
-| Orca Mini                | 3B         | 1.9GB | `ollama pull orca-mini`         |
-| Vicuna                   | 7B         | 3.8GB | `ollama pull vicuna`            |
-| Nous-Hermes              | 7B         | 3.8GB | `ollama pull nous-hermes`       |
-| Nous-Hermes 13B          | 13B        | 7.3GB | `ollama pull nous-hermes:13b`   |
-| Wizard Vicuna Uncensored | 13B        | 7.3GB | `ollama pull wizard-vicuna`     |
+| Model              | Parameters | Size  | Download                       |
+| ------------------ | ---------- | ----- | ------------------------------ |
+| Mistral            | 7B         | 4.1GB | `ollama run mistral`           |
+| Llama 2            | 7B         | 3.8GB | `ollama run llama2`            |
+| Code Llama         | 7B         | 3.8GB | `ollama run codellama`         |
+| Llama 2 Uncensored | 7B         | 3.8GB | `ollama run llama2-uncensored` |
+| Llama 2 13B        | 13B        | 7.3GB | `ollama run llama2:13b`        |
+| Llama 2 70B        | 70B        | 39GB  | `ollama run llama2:70b`        |
+| Orca Mini          | 3B         | 1.9GB | `ollama run orca-mini`         |
+| Vicuna             | 7B         | 3.8GB | `ollama run vicuna`            |

 > Note: You should have at least 8 GB of RAM to run the 3B models, 16 GB to run the 7B models, and 32 GB to run the 13B models.

-## Examples
+## Customize your own model

-### Pull a public model
+### Import from GGUF

-```
-ollama pull llama2
-```
+Ollama supports importing GGUF models in the Modelfile:

-> This command can also be used to update a local model. Only updated changes will be pulled.
+1. Create a file named `Modelfile`, with a `FROM` instruction with the local filepath to the model you want to import.

-### Run a model interactively
+   ```
+   FROM ./vicuna-33b.Q4_0.gguf
+   ```

-```
-ollama run llama2
->>> hi
-Hello! How can I help you today?
-```
+2. Create the model in Ollama

-For multiline input, you can wrap text with `"""`:
+   ```
+   ollama create example -f Modelfile
+   ```

-```
->>> """Hello,
-... world!
-... """
-I'm a basic program that prints the famous "Hello, world!" message to the console.
-```
+3. Run the model

-### Run a model non-interactively
+   ```
+   ollama run example
+   ```

-```
-$ ollama run llama2 'tell me a joke'
- Sure! Here's a quick one:
- Why did the scarecrow win an award? Because he was outstanding in his field!
-```
+### Import from PyTorch or Safetensors

-```
-$ cat <<EOF >prompts.txt
-tell me a joke about llamas
-tell me another one
-EOF
-$ ollama run llama2 <prompts.txt
->>> tell me a joke about llamas
- Why did the llama refuse to play hide-and-seek?
- nobody likes to be hided!
+See the [guide](docs/import.md) on importing models for more information.

->>> tell me another one
- Sure, here's another one:
+### Customize a prompt

-Why did the llama go to the bar?
-To have a hay-often good time!
-```
-
-### Run a model on contents of a text file
-
-```
-$ ollama run llama2 "summarize this file:" "$(cat README.md)"
- Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
-```
-
-### Customize a model
-
-Pull a base model:
+Models from the Ollama library can be customized with a prompt. For example, to customize the `llama2` model:

 ```
 ollama pull llama2
@@ -138,38 +117,68 @@ ollama run mario
 Hello! It's your friend Mario.
 ```

-For more examples, see the [examples](./examples) directory. For more information on creating a Modelfile, see the [Modelfile](./docs/modelfile.md) documentation.
+For more examples, see the [examples](examples) directory. For more information on working with a Modelfile, see the [Modelfile](docs/modelfile.md) documentation.

-### Listing local models
+## CLI Reference
+
+### Create a model
+
+`ollama create` is used to create a model from a Modelfile.
+
+### Pull a model

 ```
-ollama list
+ollama pull llama2
 ```

-### Removing local models
+> This command can also be used to update a local model. Only the diff will be pulled.
+
+### Remove a model

 ```
 ollama rm llama2
 ```

-## Model packages
+### Copy a model

-### Overview
+```
+ollama cp llama2 my-llama2
+```

-Ollama bundles model weights, configurations, and data into a single package, defined by a [Modelfile](./docs/modelfile.md).
+### Multiline input

-<picture>
-  <source media="(prefers-color-scheme: dark)" height="480" srcset="https://github.com/jmorganca/ollama/assets/251292/2fd96b5f-191b-45c1-9668-941cfad4eb70">
-  <img alt="logo" height="480" src="https://github.com/jmorganca/ollama/assets/251292/2fd96b5f-191b-45c1-9668-941cfad4eb70">
-</picture>
+For multiline input, you can wrap text with `"""`:
+
+```
+>>> """Hello,
+... world!
+... """
+I'm a basic program that prints the famous "Hello, world!" message to the console.
+```
+
+### Pass in prompt as arguments
+
+```
+$ ollama run llama2 "Summarize this file: $(cat README.md)"
+ Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
+```
+
+### List models on your computer
+
+```
+ollama list
+```
+
+### Start Ollama
+
+`ollama serve` is used when you want to start ollama without running the desktop application.

 ## Building

 Install `cmake` and `go`:

 ```
-brew install cmake
-brew install go
+brew install cmake go
 ```

 Then generate dependencies and build:
@@ -193,9 +202,8 @@ Finally, in a separate shell, run a model:

 ## REST API

-> See the [API documentation](./docs/api.md) for all endpoints.
-
-Ollama has an API for running and managing models. For example to generate text from a model:
+Ollama has a REST API for running and managing models.
+For example, to generate text from a model:

 ```
 curl -X POST http://localhost:11434/api/generate -d '{
@@ -204,18 +212,48 @@ curl -X POST http://localhost:11434/api/generate -d '{
 }'
 ```

-## Community Projects using Ollama
+See the [API documentation](./docs/api.md) for all endpoints.

-| Project                                                                    | Description                                                                                                                                                  |
-| -------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| [LangChain][1] and [LangChain.js][2]                                       | Also, there is a question-answering [example][3].                                                                                                            |
-| [Continue](https://github.com/continuedev/continue)                        | Embeds Ollama inside Visual Studio Code. The extension lets you highlight code to add to the prompt, ask questions in the sidebar, and generate code inline. |
-| [LiteLLM](https://github.com/BerriAI/litellm)                              | Lightweight Python package to simplify LLM API calls.                                                                                                        |
-| [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)            | Interact with Ollama as a chatbot on Discord.                                                                                                                |
-| [Raycast Ollama](https://github.com/MassimilianoPasquini97/raycast_ollama) | Raycast extension to use Ollama for local llama inference on Raycast.                                                                                        |
-| [Simple HTML UI](https://github.com/rtcfirefly/ollama-ui)                  | Also, there is a Chrome extension.                                                                                                                           |
-| [Emacs client](https://github.com/zweifisch/ollama)                        |                                                                                                                                                              |
+## Community Integrations

-[1]: https://python.langchain.com/docs/integrations/llms/ollama
-[2]: https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama
-[3]: https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa
+### Web & Desktop
+
+- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
+- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
+- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
+- [Minimalistic React UI for Ollama Models](https://github.com/richawo/minimal-llm-ui)
+- [Web UI](https://github.com/ollama-webui/ollama-webui)
+- [Ollamac](https://github.com/kevinhermawan/Ollamac)
+- [big-AGI](https://github.com/enricoros/big-agi/blob/main/docs/config-ollama.md)
+
+### Terminal
+
+- [oterm](https://github.com/ggozad/oterm)
+- [Ellama Emacs client](https://github.com/s-kostyaev/ellama)
+- [Emacs client](https://github.com/zweifisch/ollama)
+- [gen.nvim](https://github.com/David-Kunz/gen.nvim)
+- [ollama.nvim](https://github.com/nomnivore/ollama.nvim)
+- [gptel Emacs client](https://github.com/karthink/gptel)
+
+### Libraries
+
+- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
+- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
+- [LiteLLM](https://github.com/BerriAI/litellm)
+- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
+- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
+- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
+- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
+- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
+- [Ollama for Dart](https://github.com/breitburg/dart-ollama)
+
+### Extensions & Plugins
+
+- [Raycast extension](https://github.com/MassimilianoPasquini97/raycast_ollama)
+- [Discollama](https://github.com/mxyng/discollama) (Discord bot inside the Ollama discord channel)
+- [Continue](https://github.com/continuedev/continue)
+- [Obsidian Ollama plugin](https://github.com/hinterdupfinger/obsidian-ollama)
+- [Logseq Ollama plugin](https://github.com/omagdy7/ollama-logseq)
+- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
+- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
+- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
--- a/api/client.go
+++ b/api/client.go
@@ -7,25 +7,20 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"net"
 	"net/http"
 	"net/url"
 	"os"
 	"runtime"
 	"strings"

+	"github.com/jmorganca/ollama/format"
 	"github.com/jmorganca/ollama/version"
 )

-const DefaultHost = "127.0.0.1:11434"
-
-var (
-	envHost = os.Getenv("OLLAMA_HOST")
-)
-
 type Client struct {
-	Base    url.URL
-	HTTP    http.Client
-	Headers http.Header
+	base *url.URL
+	http http.Client
 }

 func checkError(resp *http.Response, body []byte) error {
@@ -44,34 +39,56 @@ func checkError(resp *http.Response, body []byte) error {
 	return apiError
 }

-// Host returns the default host to use for the client. It is determined in the following order:
-// 1. The OLLAMA_HOST environment variable
-// 2. The default host (localhost:11434)
-func Host() string {
-	if envHost != "" {
-		return envHost
-	}
-	return DefaultHost
-}
+func ClientFromEnvironment() (*Client, error) {
+	defaultPort := "11434"

-// FromEnv creates a new client using Host() as the host. An error is returns
-// if the host is invalid.
-func FromEnv() (*Client, error) {
-	h := Host()
-	if !strings.HasPrefix(h, "http://") && !strings.HasPrefix(h, "https://") {
-		h = "http://" + h
+	scheme, hostport, ok := strings.Cut(os.Getenv("OLLAMA_HOST"), "://")
+	switch {
+	case !ok:
+		scheme, hostport = "http", os.Getenv("OLLAMA_HOST")
+	case scheme == "http":
+		defaultPort = "80"
+	case scheme == "https":
+		defaultPort = "443"
 	}

-	u, err := url.Parse(h)
+	// trim trailing slashes
+	hostport = strings.TrimRight(hostport, "/")
+
+	host, port, err := net.SplitHostPort(hostport)
 	if err != nil {
-		return nil, fmt.Errorf("could not parse host: %w", err)
+		host, port = "127.0.0.1", defaultPort
+		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
+			host = ip.String()
+		} else if hostport != "" {
+			host = hostport
+		}
 	}

-	if u.Port() == "" {
-		u.Host += ":11434"
+	client := Client{
+		base: &url.URL{
+			Scheme: scheme,
+			Host:   net.JoinHostPort(host, port),
+		},
 	}

-	return &Client{Base: *u, HTTP: http.Client{}}, nil
+	mockRequest, err := http.NewRequest(http.MethodHead, client.base.String(), nil)
+	if err != nil {
+		return nil, err
+	}
+
+	proxyURL, err := http.ProxyFromEnvironment(mockRequest)
+	if err != nil {
+		return nil, err
+	}
+
+	client.http = http.Client{
+		Transport: &http.Transport{
+			Proxy: http.ProxyURL(proxyURL),
+		},
+	}
+
+	return &client, nil
 }

 func (c *Client) do(ctx context.Context, method, path string, reqData, respData any) error {
@@ -86,7 +103,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
 		reqBody = bytes.NewReader(data)
 	}

-	requestURL := c.Base.JoinPath(path)
+	requestURL := c.base.JoinPath(path)
 	request, err := http.NewRequestWithContext(ctx, method, requestURL.String(), reqBody)
 	if err != nil {
 		return err
@@ -96,11 +113,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
 	request.Header.Set("Accept", "application/json")
 	request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))

-	for k, v := range c.Headers {
-		request.Header[k] = v
-	}
-
-	respObj, err := c.HTTP.Do(request)
+	respObj, err := c.http.Do(request)
 	if err != nil {
 		return err
 	}
@@ -123,6 +136,8 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
 	return nil
 }

+const maxBufferSize = 512 * format.KiloByte
+
 func (c *Client) stream(ctx context.Context, method, path string, data any, fn func([]byte) error) error {
 	var buf *bytes.Buffer
 	if data != nil {
@@ -134,23 +149,26 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 		buf = bytes.NewBuffer(bts)
 	}

-	requestURL := c.Base.JoinPath(path)
+	requestURL := c.base.JoinPath(path)
 	request, err := http.NewRequestWithContext(ctx, method, requestURL.String(), buf)
 	if err != nil {
 		return err
 	}

 	request.Header.Set("Content-Type", "application/json")
-	request.Header.Set("Accept", "application/json")
+	request.Header.Set("Accept", "application/x-ndjson")
 	request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))

-	response, err := http.DefaultClient.Do(request)
+	response, err := c.http.Do(request)
 	if err != nil {
 		return err
 	}
 	defer response.Body.Close()

 	scanner := bufio.NewScanner(response.Body)
+	// increase the buffer size to avoid running out of space
+	scanBuf := make([]byte, 0, maxBufferSize)
+	scanner.Buffer(scanBuf, maxBufferSize)
 	for scanner.Scan() {
 		var errorResponse struct {
 			Error string `json:"error,omitempty"`
--- a/api/client.py
+++ b/api/client.py
@@ -7,7 +7,7 @@ BASE_URL = os.environ.get('OLLAMA_HOST', 'http://localhost:11434')
 # Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses.
 # The final response object will include statistics and additional data from the request. Use the callback function to override
 # the default handler.
-def generate(model_name, prompt, system=None, template=None, context=None, options=None, callback=None):
+def generate(model_name, prompt, system=None, template=None, format="", context=None, options=None, callback=None):
    try:
        url = f"{BASE_URL}/api/generate"
        payload = {
@@ -16,7 +16,8 @@ def generate(model_name, prompt, system=None, template=None, context=None, optio
            "system": system, 
            "template": template, 
            "context": context, 
-            "options": options
+            "options": options,
+            "format": format,
        }
        
        # Remove keys with None values
--- a/api/client_test.go
+++ b/api/client_test.go
@@ -0,0 +1,43 @@
+package api
+
+import "testing"
+
+func TestClientFromEnvironment(t *testing.T) {
+	type testCase struct {
+		value  string
+		expect string
+		err    error
+	}
+
+	testCases := map[string]*testCase{
+		"empty":                      {value: "", expect: "http://127.0.0.1:11434"},
+		"only address":               {value: "1.2.3.4", expect: "http://1.2.3.4:11434"},
+		"only port":                  {value: ":1234", expect: "http://:1234"},
+		"address and port":           {value: "1.2.3.4:1234", expect: "http://1.2.3.4:1234"},
+		"scheme http and address":    {value: "http://1.2.3.4", expect: "http://1.2.3.4:80"},
+		"scheme https and address":   {value: "https://1.2.3.4", expect: "https://1.2.3.4:443"},
+		"scheme, address, and port":  {value: "https://1.2.3.4:1234", expect: "https://1.2.3.4:1234"},
+		"hostname":                   {value: "example.com", expect: "http://example.com:11434"},
+		"hostname and port":          {value: "example.com:1234", expect: "http://example.com:1234"},
+		"scheme http and hostname":   {value: "http://example.com", expect: "http://example.com:80"},
+		"scheme https and hostname":  {value: "https://example.com", expect: "https://example.com:443"},
+		"scheme, hostname, and port": {value: "https://example.com:1234", expect: "https://example.com:1234"},
+		"trailing slash":             {value: "example.com/", expect: "http://example.com:11434"},
+		"trailing slash port":        {value: "example.com:1234/", expect: "http://example.com:1234"},
+	}
+
+	for k, v := range testCases {
+		t.Run(k, func(t *testing.T) {
+			t.Setenv("OLLAMA_HOST", v.value)
+
+			client, err := ClientFromEnvironment()
+			if err != v.err {
+				t.Fatalf("expected %s, got %s", v.err, err)
+			}
+
+			if client.base.String() != v.expect {
+				t.Fatalf("expected %s, got %s", v.expect, client.base.String())
+			}
+		})
+	}
+}
--- a/api/types.go
+++ b/api/types.go
@@ -3,7 +3,6 @@ package api
 import (
 	"encoding/json"
 	"fmt"
-	"log"
 	"math"
 	"os"
 	"reflect"
@@ -37,10 +36,57 @@ type GenerateRequest struct {
 	System   string `json:"system"`
 	Template string `json:"template"`
 	Context  []int  `json:"context,omitempty"`
+	Stream   *bool  `json:"stream,omitempty"`
+	Raw      bool   `json:"raw,omitempty"`
+	Format   string `json:"format"`

 	Options map[string]interface{} `json:"options"`
 }

+// Options specfied in GenerateRequest, if you add a new option here add it to the API docs also
+type Options struct {
+	Runner
+
+	// Predict options used at runtime
+	NumKeep          int      `json:"num_keep,omitempty"`
+	Seed             int      `json:"seed,omitempty"`
+	NumPredict       int      `json:"num_predict,omitempty"`
+	TopK             int      `json:"top_k,omitempty"`
+	TopP             float32  `json:"top_p,omitempty"`
+	TFSZ             float32  `json:"tfs_z,omitempty"`
+	TypicalP         float32  `json:"typical_p,omitempty"`
+	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
+	Temperature      float32  `json:"temperature,omitempty"`
+	RepeatPenalty    float32  `json:"repeat_penalty,omitempty"`
+	PresencePenalty  float32  `json:"presence_penalty,omitempty"`
+	FrequencyPenalty float32  `json:"frequency_penalty,omitempty"`
+	Mirostat         int      `json:"mirostat,omitempty"`
+	MirostatTau      float32  `json:"mirostat_tau,omitempty"`
+	MirostatEta      float32  `json:"mirostat_eta,omitempty"`
+	PenalizeNewline  bool     `json:"penalize_newline,omitempty"`
+	Stop             []string `json:"stop,omitempty"`
+}
+
+// Runner options which must be set when the model is loaded into memory
+type Runner struct {
+	UseNUMA            bool    `json:"numa,omitempty"`
+	NumCtx             int     `json:"num_ctx,omitempty"`
+	NumBatch           int     `json:"num_batch,omitempty"`
+	NumGQA             int     `json:"num_gqa,omitempty"`
+	NumGPU             int     `json:"num_gpu,omitempty"`
+	MainGPU            int     `json:"main_gpu,omitempty"`
+	LowVRAM            bool    `json:"low_vram,omitempty"`
+	F16KV              bool    `json:"f16_kv,omitempty"`
+	LogitsAll          bool    `json:"logits_all,omitempty"`
+	VocabOnly          bool    `json:"vocab_only,omitempty"`
+	UseMMap            bool    `json:"use_mmap,omitempty"`
+	UseMLock           bool    `json:"use_mlock,omitempty"`
+	EmbeddingOnly      bool    `json:"embedding_only,omitempty"`
+	RopeFrequencyBase  float32 `json:"rope_frequency_base,omitempty"`
+	RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
+	NumThread          int     `json:"num_thread,omitempty"`
+}
+
 type EmbeddingRequest struct {
 	Model  string `json:"model"`
 	Prompt string `json:"prompt"`
@@ -53,8 +99,9 @@ type EmbeddingResponse struct {
 }

 type CreateRequest struct {
-	Name string `json:"name"`
-	Path string `json:"path"`
+	Name   string `json:"name"`
+	Path   string `json:"path"`
+	Stream *bool  `json:"stream,omitempty"`
 }

 type DeleteRequest struct {
@@ -83,13 +130,14 @@ type PullRequest struct {
 	Insecure bool   `json:"insecure,omitempty"`
 	Username string `json:"username"`
 	Password string `json:"password"`
+	Stream   *bool  `json:"stream,omitempty"`
 }

 type ProgressResponse struct {
 	Status    string `json:"status"`
 	Digest    string `json:"digest,omitempty"`
-	Total     int    `json:"total,omitempty"`
-	Completed int    `json:"completed,omitempty"`
+	Total     int64  `json:"total,omitempty"`
+	Completed int64  `json:"completed,omitempty"`
 }

 type PushRequest struct {
@@ -97,6 +145,7 @@ type PushRequest struct {
 	Insecure bool   `json:"insecure,omitempty"`
 	Username string `json:"username"`
 	Password string `json:"password"`
+	Stream   *bool  `json:"stream,omitempty"`
 }

 type ListResponse struct {
@@ -106,7 +155,7 @@ type ListResponse struct {
 type ModelResponse struct {
 	Name       string    `json:"name"`
 	ModifiedAt time.Time `json:"modified_at"`
-	Size       int       `json:"size"`
+	Size       int64     `json:"size"`
 	Digest     string    `json:"digest"`
 }

@@ -117,7 +166,7 @@ type TokenResponse struct {
 type GenerateResponse struct {
 	Model     string    `json:"model"`
 	CreatedAt time.Time `json:"created_at"`
-	Response  string    `json:"response,omitempty"`
+	Response  string    `json:"response"`

 	Done    bool  `json:"done"`
 	Context []int `json:"context,omitempty"`
@@ -158,48 +207,7 @@ func (r *GenerateResponse) Summary() {
 	}
 }

-type Options struct {
-	Seed int `json:"seed,omitempty"`
-
-	// Backend options
-	UseNUMA bool `json:"numa,omitempty"`
-
-	// Model options
-	NumCtx             int     `json:"num_ctx,omitempty"`
-	NumKeep            int     `json:"num_keep,omitempty"`
-	NumBatch           int     `json:"num_batch,omitempty"`
-	NumGQA             int     `json:"num_gqa,omitempty"`
-	NumGPU             int     `json:"num_gpu,omitempty"`
-	MainGPU            int     `json:"main_gpu,omitempty"`
-	LowVRAM            bool    `json:"low_vram,omitempty"`
-	F16KV              bool    `json:"f16_kv,omitempty"`
-	LogitsAll          bool    `json:"logits_all,omitempty"`
-	VocabOnly          bool    `json:"vocab_only,omitempty"`
-	UseMMap            bool    `json:"use_mmap,omitempty"`
-	UseMLock           bool    `json:"use_mlock,omitempty"`
-	EmbeddingOnly      bool    `json:"embedding_only,omitempty"`
-	RopeFrequencyBase  float32 `json:"rope_frequency_base,omitempty"`
-	RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
-
-	// Predict options
-	NumPredict       int      `json:"num_predict,omitempty"`
-	TopK             int      `json:"top_k,omitempty"`
-	TopP             float32  `json:"top_p,omitempty"`
-	TFSZ             float32  `json:"tfs_z,omitempty"`
-	TypicalP         float32  `json:"typical_p,omitempty"`
-	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
-	Temperature      float32  `json:"temperature,omitempty"`
-	RepeatPenalty    float32  `json:"repeat_penalty,omitempty"`
-	PresencePenalty  float32  `json:"presence_penalty,omitempty"`
-	FrequencyPenalty float32  `json:"frequency_penalty,omitempty"`
-	Mirostat         int      `json:"mirostat,omitempty"`
-	MirostatTau      float32  `json:"mirostat_tau,omitempty"`
-	MirostatEta      float32  `json:"mirostat_eta,omitempty"`
-	PenalizeNewline  bool     `json:"penalize_newline,omitempty"`
-	Stop             []string `json:"stop,omitempty"`
-
-	NumThread int `json:"num_thread,omitempty"`
-}
+var ErrInvalidOpts = fmt.Errorf("invalid options")

 func (opts *Options) FromMap(m map[string]interface{}) error {
 	valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
@@ -214,6 +222,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 		}
 	}

+	invalidOpts := []string{}
 	for key, val := range m {
 		if opt, ok := jsonOpts[key]; ok {
 			field := valueOpts.FieldByName(opt.Name)
@@ -231,44 +240,39 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 						// when JSON unmarshals numbers, it uses float64, not int
 						field.SetInt(int64(t))
 					default:
-						log.Printf("could not convert model parameter %v to int, skipped", key)
+						return fmt.Errorf("option %q must be of type integer", key)
 					}
 				case reflect.Bool:
 					val, ok := val.(bool)
 					if !ok {
-						log.Printf("could not convert model parameter %v to bool, skipped", key)
-						continue
+						return fmt.Errorf("option %q must be of type boolean", key)
 					}
 					field.SetBool(val)
 				case reflect.Float32:
 					// JSON unmarshals to float64
 					val, ok := val.(float64)
 					if !ok {
-						log.Printf("could not convert model parameter %v to float32, skipped", key)
-						continue
+						return fmt.Errorf("option %q must be of type float32", key)
 					}
 					field.SetFloat(val)
 				case reflect.String:
 					val, ok := val.(string)
 					if !ok {
-						log.Printf("could not convert model parameter %v to string, skipped", key)
-						continue
+						return fmt.Errorf("option %q must be of type string", key)
 					}
 					field.SetString(val)
 				case reflect.Slice:
 					// JSON unmarshals to []interface{}, not []string
 					val, ok := val.([]interface{})
 					if !ok {
-						log.Printf("could not convert model parameter %v to slice, skipped", key)
-						continue
+						return fmt.Errorf("option %q must be of type array", key)
 					}
 					// convert []interface{} to []string
 					slice := make([]string, len(val))
 					for i, item := range val {
 						str, ok := item.(string)
 						if !ok {
-							log.Printf("could not convert model parameter %v to slice of strings, skipped", key)
-							continue
+							return fmt.Errorf("option %q must be of an array of strings", key)
 						}
 						slice[i] = str
 					}
@@ -277,45 +281,53 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 					return fmt.Errorf("unknown type loading config params: %v", field.Kind())
 				}
 			}
+		} else {
+			invalidOpts = append(invalidOpts, key)
 		}
 	}
+
+	if len(invalidOpts) > 0 {
+		return fmt.Errorf("%w: %v", ErrInvalidOpts, strings.Join(invalidOpts, ", "))
+	}
 	return nil
 }

 func DefaultOptions() Options {
 	return Options{
-		Seed: -1,
-
-		UseNUMA: false,
-
-		NumCtx:             2048,
-		NumKeep:            -1,
-		NumBatch:           512,
-		NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically
-		NumGQA:             1,
-		LowVRAM:            false,
-		F16KV:              true,
-		UseMMap:            true,
-		UseMLock:           false,
-		RopeFrequencyBase:  10000.0,
-		RopeFrequencyScale: 1.0,
-		EmbeddingOnly:      true,
-
-		RepeatLastN:      64,
-		RepeatPenalty:    1.1,
-		FrequencyPenalty: 0.0,
-		PresencePenalty:  0.0,
+		// options set on request to runner
+		NumPredict:       -1,
+		NumKeep:          0,
 		Temperature:      0.8,
 		TopK:             40,
 		TopP:             0.9,
 		TFSZ:             1.0,
 		TypicalP:         1.0,
+		RepeatLastN:      64,
+		RepeatPenalty:    1.1,
+		PresencePenalty:  0.0,
+		FrequencyPenalty: 0.0,
 		Mirostat:         0,
 		MirostatTau:      5.0,
 		MirostatEta:      0.1,
 		PenalizeNewline:  true,
+		Seed:             -1,

-		NumThread: 0, // let the runtime decide
+		Runner: Runner{
+			// options set when the model is loaded
+			NumCtx:             2048,
+			RopeFrequencyBase:  10000.0,
+			RopeFrequencyScale: 1.0,
+			NumBatch:           512,
+			NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically
+			NumGQA:             1,
+			NumThread:          0, // let the runtime decide
+			LowVRAM:            false,
+			F16KV:              true,
+			UseMLock:           false,
+			UseMMap:            true,
+			UseNUMA:            false,
+			EmbeddingOnly:      true,
+		},
 	}
 }

--- a/app/forge.config.ts
+++ b/app/forge.config.ts
@@ -47,16 +47,6 @@ const config: ForgeConfig = {
  },
  rebuildConfig: {},
  makers: [new MakerSquirrel({}), new MakerZIP({}, ['darwin'])],
-  publishers: [
-    new PublisherGithub({
-      repository: {
-        name: 'ollama',
-        owner: 'jmorganca',
-      },
-      draft: false,
-      prerelease: true,
-    }),
-  ],
  hooks: {
    readPackageJson: async (_, packageJson) => {
      return { ...packageJson, version: process.env.VERSION || packageJson.version }
--- a/app/package-lock.json
+++ b/app/package-lock.json
--- a/app/package.json
+++ b/app/package.json
@@ -46,7 +46,7 @@
    "chmodr": "^1.2.0",
    "copy-webpack-plugin": "^11.0.0",
    "css-loader": "^6.8.1",
-    "electron": "25.2.0",
+    "electron": "25.9.2",
    "eslint": "^8.43.0",
    "eslint-plugin-import": "^2.27.5",
    "fork-ts-checker-webpack-plugin": "^7.3.0",
--- a/app/src/index.ts
+++ b/app/src/index.ts
@@ -5,7 +5,7 @@ import winston from 'winston'
 import 'winston-daily-rotate-file'
 import * as path from 'path'

-import { analytics, id } from './telemetry'
+import { v4 as uuidv4 } from 'uuid'
 import { installed } from './install'

 require('@electron/remote/main').initialize()
@@ -162,13 +162,56 @@ app.on('before-quit', () => {
  }
 })

+const updateURL = `https://ollama.ai/api/update?os=${process.platform}&arch=${
+  process.arch
+}&version=${app.getVersion()}&id=${id()}`
+
+let latest = ''
+async function isNewReleaseAvailable() {
+  try {
+    const response = await fetch(updateURL)
+
+    if (!response.ok) {
+      return false
+    }
+
+    if (response.status === 204) {
+      return false
+    }
+
+    const data = await response.json()
+
+    const url = data?.url
+    if (!url) {
+      return false
+    }
+
+    if (latest === url) {
+      return false
+    }
+
+    latest = url
+
+    return true
+  } catch (error) {
+    logger.error(`update check failed - ${error}`)
+    return false
+  }
+}
+
+async function checkUpdate() {
+  const available = await isNewReleaseAvailable()
+  if (available) {
+    logger.info('checking for update')
+    autoUpdater.checkForUpdates()
+  }
+}
+
 function init() {
  if (app.isPackaged) {
-    heartbeat()
-    autoUpdater.checkForUpdates()
+    checkUpdate()
    setInterval(() => {
-      heartbeat()
-      autoUpdater.checkForUpdates()
+      checkUpdate()
    }, 60 * 60 * 1000)
  }

@@ -234,28 +277,22 @@ app.on('window-all-closed', () => {
  }
 })

-// In this file you can include the rest of your app's specific main process
-// code. You can also put them in separate files and import them here.
-let aid = ''
-try {
-  aid = id()
-} catch (e) {}
+function id(): string {
+  const id = store.get('id') as string

-autoUpdater.setFeedURL({
-  url: `https://ollama.ai/api/update?os=${process.platform}&arch=${process.arch}&version=${app.getVersion()}&id=${aid}`,
-})
+  if (id) {
+    return id
+  }

-async function heartbeat() {
-  analytics.track({
-    anonymousId: aid,
-    event: 'heartbeat',
-    properties: {
-      version: app.getVersion(),
-    },
-  })
+  const uuid = uuidv4()
+  store.set('id', uuid)
+  return uuid
 }

+autoUpdater.setFeedURL({ url: updateURL })
+
 autoUpdater.on('error', e => {
+  logger.error(`update check failed - ${e.message}`)
  console.error(`update check failed - ${e.message}`)
 })

--- a/app/src/telemetry.ts
+++ b/app/src/telemetry.ts
@@ -1,19 +0,0 @@
-import { Analytics } from '@segment/analytics-node'
-import { v4 as uuidv4 } from 'uuid'
-import Store from 'electron-store'
-
-const store = new Store()
-
-export const analytics = new Analytics({ writeKey: process.env.TELEMETRY_WRITE_KEY || '<empty>' })
-
-export function id(): string {
-  const id = store.get('id') as string
-
-  if (id) {
-    return id
-  }
-
-  const uuid = uuidv4()
-  store.set('id', uuid)
-  return uuid
-}
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -1,7 +1,6 @@
 package cmd

 import (
-	"bufio"
 	"context"
 	"crypto/ed25519"
 	"crypto/rand"
@@ -11,38 +10,30 @@ import (
 	"io"
 	"log"
 	"net"
+	"net/http"
 	"os"
 	"os/exec"
+	"os/signal"
 	"path/filepath"
 	"runtime"
 	"strings"
+	"syscall"
 	"time"

 	"github.com/dustin/go-humanize"
 	"github.com/olekukonko/tablewriter"
-	"github.com/pdevine/readline"
 	"github.com/spf13/cobra"
 	"golang.org/x/crypto/ssh"
 	"golang.org/x/term"

 	"github.com/jmorganca/ollama/api"
+	"github.com/jmorganca/ollama/editor"
 	"github.com/jmorganca/ollama/format"
 	"github.com/jmorganca/ollama/progressbar"
 	"github.com/jmorganca/ollama/server"
 	"github.com/jmorganca/ollama/version"
 )

-type Painter struct{}
-
-func (p Painter) Paint(line []rune, l int) []rune {
-	termType := os.Getenv("TERM")
-	if termType == "xterm-256color" && len(line) == 0 {
-		prompt := "Send a message (/? for help)"
-		return []rune(fmt.Sprintf("\033[38;5;245m%s\033[%dD\033[0m", prompt, len(prompt)))
-	}
-	return line
-}
-
 func CreateHandler(cmd *cobra.Command, args []string) error {
 	filename, _ := cmd.Flags().GetString("file")
 	filename, err := filepath.Abs(filename)
@@ -50,7 +41,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

-	client, err := api.FromEnv()
+	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
@@ -67,20 +58,14 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				spinner.Stop()
 			}
 			currentDigest = resp.Digest
-			switch {
-			case strings.Contains(resp.Status, "embeddings"):
-				bar = progressbar.Default(int64(resp.Total), resp.Status)
-				bar.Set(resp.Completed)
-			default:
-				// pulling
-				bar = progressbar.DefaultBytes(
-					int64(resp.Total),
-					resp.Status,
-				)
-				bar.Set(resp.Completed)
-			}
+			// pulling
+			bar = progressbar.DefaultBytes(
+				resp.Total,
+				resp.Status,
+			)
+			bar.Set64(resp.Completed)
 		} else if resp.Digest == currentDigest && resp.Digest != "" {
-			bar.Set(resp.Completed)
+			bar.Set64(resp.Completed)
 		} else {
 			currentDigest = ""
 			if spinner != nil {
@@ -108,28 +93,21 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 }

 func RunHandler(cmd *cobra.Command, args []string) error {
-	client, err := api.FromEnv()
+	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}

-	models, err := client.List(context.Background())
-	if err != nil {
-		return err
-	}
-
-	modelName, modelTag, ok := strings.Cut(args[0], ":")
-	if !ok {
-		modelTag = "latest"
-	}
-
-	for _, model := range models.Models {
-		if model.Name == strings.Join([]string{modelName, modelTag}, ":") {
-			return RunGenerate(cmd, args)
+	name := args[0]
+	// check if the model exists on the server
+	_, err = client.Show(context.Background(), &api.ShowRequest{Name: name})
+	var statusError api.StatusError
+	switch {
+	case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
+		if err := PullHandler(cmd, args); err != nil {
+			return err
 		}
-	}
-
-	if err := PullHandler(cmd, args); err != nil {
+	case err != nil:
 		return err
 	}

@@ -137,7 +115,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 }

 func PushHandler(cmd *cobra.Command, args []string) error {
-	client, err := api.FromEnv()
+	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
@@ -155,13 +133,13 @@ func PushHandler(cmd *cobra.Command, args []string) error {
 		if resp.Digest != currentDigest && resp.Digest != "" {
 			currentDigest = resp.Digest
 			bar = progressbar.DefaultBytes(
-				int64(resp.Total),
+				resp.Total,
 				fmt.Sprintf("pushing %s...", resp.Digest[7:19]),
 			)

-			bar.Set(resp.Completed)
+			bar.Set64(resp.Completed)
 		} else if resp.Digest == currentDigest && resp.Digest != "" {
-			bar.Set(resp.Completed)
+			bar.Set64(resp.Completed)
 		} else {
 			currentDigest = ""
 			fmt.Println(resp.Status)
@@ -181,7 +159,7 @@ func PushHandler(cmd *cobra.Command, args []string) error {
 }

 func ListHandler(cmd *cobra.Command, args []string) error {
-	client, err := api.FromEnv()
+	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
@@ -214,7 +192,7 @@ func ListHandler(cmd *cobra.Command, args []string) error {
 }

 func DeleteHandler(cmd *cobra.Command, args []string) error {
-	client, err := api.FromEnv()
+	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
@@ -230,7 +208,7 @@ func DeleteHandler(cmd *cobra.Command, args []string) error {
 }

 func ShowHandler(cmd *cobra.Command, args []string) error {
-	client, err := api.FromEnv()
+	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
@@ -308,7 +286,7 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
 }

 func CopyHandler(cmd *cobra.Command, args []string) error {
-	client, err := api.FromEnv()
+	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
@@ -331,7 +309,7 @@ func PullHandler(cmd *cobra.Command, args []string) error {
 }

 func pull(model string, insecure bool) error {
-	client, err := api.FromEnv()
+	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
@@ -344,13 +322,13 @@ func pull(model string, insecure bool) error {
 		if resp.Digest != currentDigest && resp.Digest != "" {
 			currentDigest = resp.Digest
 			bar = progressbar.DefaultBytes(
-				int64(resp.Total),
+				resp.Total,
 				fmt.Sprintf("pulling %s...", resp.Digest[7:19]),
 			)

-			bar.Set(resp.Completed)
+			bar.Set64(resp.Completed)
 		} else if resp.Digest == currentDigest && resp.Digest != "" {
-			bar.Set(resp.Completed)
+			bar.Set64(resp.Completed)
 		} else {
 			currentDigest = ""
 			fmt.Println(resp.Status)
@@ -371,22 +349,50 @@ func pull(model string, insecure bool) error {
 }

 func RunGenerate(cmd *cobra.Command, args []string) error {
-	if len(args) > 1 {
-		// join all args into a single prompt
-		return generate(cmd, args[0], strings.Join(args[1:], " "))
+	format, err := cmd.Flags().GetString("format")
+	if err != nil {
+		return err
 	}

-	if readline.IsTerminal(int(os.Stdin.Fd())) {
-		return generateInteractive(cmd, args[0])
+	prompts := args[1:]
+
+	// prepend stdin to the prompt if provided
+	if !term.IsTerminal(int(os.Stdin.Fd())) {
+		in, err := io.ReadAll(os.Stdin)
+		if err != nil {
+			return err
+		}
+
+		prompts = append([]string{string(in)}, prompts...)
 	}

-	return generateBatch(cmd, args[0])
+	// output is being piped
+	if !term.IsTerminal(int(os.Stdout.Fd())) {
+		return generate(cmd, args[0], strings.Join(prompts, " "), false, format)
+	}
+
+	wordWrap := os.Getenv("TERM") == "xterm-256color"
+
+	nowrap, err := cmd.Flags().GetBool("nowordwrap")
+	if err != nil {
+		return err
+	}
+	if nowrap {
+		wordWrap = false
+	}
+
+	// prompts are provided via stdin or args so don't enter interactive mode
+	if len(prompts) > 0 {
+		return generate(cmd, args[0], strings.Join(prompts, " "), wordWrap, format)
+	}
+
+	return generateInteractive(cmd, args[0], wordWrap, format)
 }

 type generateContextKey string

-func generate(cmd *cobra.Command, model, prompt string) error {
-	client, err := api.FromEnv()
+func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format string) error {
+	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
@@ -401,30 +407,28 @@ func generate(cmd *cobra.Command, model, prompt string) error {
 		generateContext = []int{}
 	}

-	var wrapTerm bool
-	termType := os.Getenv("TERM")
-	if termType == "xterm-256color" {
-		wrapTerm = true
+	termWidth, _, err := term.GetSize(int(os.Stdout.Fd()))
+	if err != nil {
+		wordWrap = false
 	}

-	termWidth, _, err := term.GetSize(int(0))
-	if err != nil {
-		wrapTerm = false
-	}
+	cancelCtx, cancel := context.WithCancel(context.Background())
+	defer cancel()

-	// override wrapping if the user turned it off
-	nowrap, err := cmd.Flags().GetBool("nowordwrap")
-	if err != nil {
-		return err
-	}
-	if nowrap {
-		wrapTerm = false
-	}
+	sigChan := make(chan os.Signal, 1)
+	signal.Notify(sigChan, syscall.SIGINT)
+	var abort bool
+
+	go func() {
+		<-sigChan
+		cancel()
+		abort = true
+	}()

 	var currentLineLength int
 	var wordBuffer string

-	request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext}
+	request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format}
 	fn := func(response api.GenerateResponse) error {
 		if !spinner.IsFinished() {
 			spinner.Finish()
@@ -432,7 +436,7 @@ func generate(cmd *cobra.Command, model, prompt string) error {

 		latest = response

-		if wrapTerm {
+		if wordWrap {
 			for _, ch := range response.Response {
 				if currentLineLength+1 > termWidth-5 {
 					// backtrack the length of the last word and clear to the end of the line
@@ -460,18 +464,10 @@ func generate(cmd *cobra.Command, model, prompt string) error {
 		return nil
 	}

-	if err := client.Generate(context.Background(), &request, fn); err != nil {
-		if strings.Contains(err.Error(), "failed to load model") {
-			// tell the user to check the server log, if it exists locally
-			home, nestedErr := os.UserHomeDir()
-			if nestedErr != nil {
-				// return the original error
-				return err
-			}
-			logPath := filepath.Join(home, ".ollama", "logs", "server.log")
-			if _, nestedErr := os.Stat(logPath); nestedErr == nil {
-				err = fmt.Errorf("%w\nFor more details, check the error logs at %s", err, logPath)
-			}
+	if err := client.Generate(cancelCtx, &request, fn); err != nil {
+		if strings.Contains(err.Error(), "context canceled") && abort {
+			spinner.Finish()
+			return nil
 		}
 		return err
 	}
@@ -481,6 +477,9 @@ func generate(cmd *cobra.Command, model, prompt string) error {
 	}

 	if !latest.Done {
+		if abort {
+			return nil
+		}
 		return errors.New("unexpected end of response")
 	}

@@ -500,68 +499,66 @@ func generate(cmd *cobra.Command, model, prompt string) error {
 	return nil
 }

-func generateInteractive(cmd *cobra.Command, model string) error {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return err
-	}
-
+func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format string) error {
 	// load the model
-	if err := generate(cmd, model, ""); err != nil {
+	if err := generate(cmd, model, "", false, ""); err != nil {
 		return err
 	}

-	completer := readline.NewPrefixCompleter(
-		readline.PcItem("/help"),
-		readline.PcItem("/list"),
-		readline.PcItem("/set",
-			readline.PcItem("history"),
-			readline.PcItem("nohistory"),
-			readline.PcItem("wordwrap"),
-			readline.PcItem("nowordwrap"),
-			readline.PcItem("verbose"),
-			readline.PcItem("quiet"),
-		),
-		readline.PcItem("/show",
-			readline.PcItem("license"),
-			readline.PcItem("modelfile"),
-			readline.PcItem("parameters"),
-			readline.PcItem("system"),
-			readline.PcItem("template"),
-		),
-		readline.PcItem("/exit"),
-		readline.PcItem("/bye"),
-	)
-
 	usage := func() {
-		fmt.Fprintln(os.Stderr, "commands:")
-		fmt.Fprintln(os.Stderr, completer.Tree("  "))
+		fmt.Fprintln(os.Stderr, "Available Commands:")
+		fmt.Fprintln(os.Stderr, "  /set         Set session variables")
+		fmt.Fprintln(os.Stderr, "  /show        Show model information")
+		fmt.Fprintln(os.Stderr, "  /bye         Exit")
+		fmt.Fprintln(os.Stderr, "  /?, /help    Help for a command")
+		fmt.Fprintln(os.Stderr, "")
+		fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.")
+		fmt.Fprintln(os.Stderr, "")
 	}

-	config := readline.Config{
-		Painter:      Painter{},
-		Prompt:       ">>> ",
-		HistoryFile:  filepath.Join(home, ".ollama", "history"),
-		AutoComplete: completer,
+	usageSet := func() {
+		fmt.Fprintln(os.Stderr, "Available Commands:")
+		fmt.Fprintln(os.Stderr, "  /set history      Enable history")
+		fmt.Fprintln(os.Stderr, "  /set nohistory    Disable history")
+		fmt.Fprintln(os.Stderr, "  /set wordwrap     Enable wordwrap")
+		fmt.Fprintln(os.Stderr, "  /set nowordwrap   Disable wordwrap")
+		fmt.Fprintln(os.Stderr, "  /set format json  Enable JSON mode")
+		fmt.Fprintln(os.Stderr, "  /set noformat     Disable formatting")
+		fmt.Fprintln(os.Stderr, "  /set verbose      Show LLM stats")
+		fmt.Fprintln(os.Stderr, "  /set quiet        Disable LLM stats")
+		fmt.Fprintln(os.Stderr, "")
 	}

-	scanner, err := readline.NewEx(&config)
+	usageShow := func() {
+		fmt.Fprintln(os.Stderr, "Available Commands:")
+		fmt.Fprintln(os.Stderr, "  /show license      Show model license")
+		fmt.Fprintln(os.Stderr, "  /show modelfile    Show Modelfile for this model")
+		fmt.Fprintln(os.Stderr, "  /show parameters   Show parameters for this model")
+		fmt.Fprintln(os.Stderr, "  /show system       Show system prompt")
+		fmt.Fprintln(os.Stderr, "  /show template     Show prompt template")
+		fmt.Fprintln(os.Stderr, "")
+	}
+
+	prompt := editor.Prompt{
+		Prompt:      ">>> ",
+		AltPrompt:   "... ",
+		Placeholder: "Send a message (/? for help)",
+	}
+
+	ed, err := editor.New(prompt)
 	if err != nil {
 		return err
 	}
-	defer scanner.Close()
-
-	var multiLineBuffer string
-	var isMultiLine bool

 	for {
-		line, err := scanner.Readline()
+		line, err := ed.HandleInput()
 		switch {
 		case errors.Is(err, io.EOF):
+			fmt.Println()
 			return nil
-		case errors.Is(err, readline.ErrInterrupt):
+		case errors.Is(err, editor.ErrInterrupt):
 			if line == "" {
-				return nil
+				fmt.Println("\nUse Ctrl-D or /bye to exit.")
 			}

 			continue
@@ -572,23 +569,6 @@ func generateInteractive(cmd *cobra.Command, model string) error {
 		line = strings.TrimSpace(line)

 		switch {
-		case isMultiLine:
-			if strings.HasSuffix(line, `"""`) {
-				isMultiLine = false
-				multiLineBuffer += strings.TrimSuffix(line, `"""`)
-				line = multiLineBuffer
-				multiLineBuffer = ""
-				scanner.SetPrompt(">>> ")
-				continue
-			} else {
-				multiLineBuffer += line + " "
-				continue
-			}
-		case strings.HasPrefix(line, `"""`):
-			isMultiLine = true
-			multiLineBuffer = strings.TrimPrefix(line, `"""`) + " "
-			scanner.SetPrompt("... ")
-			continue
 		case strings.HasPrefix(line, "/list"):
 			args := strings.Fields(line)
 			if err := ListHandler(cmd, args[1:]); err != nil {
@@ -599,14 +579,14 @@ func generateInteractive(cmd *cobra.Command, model string) error {
 			if len(args) > 1 {
 				switch args[1] {
 				case "history":
-					scanner.HistoryEnable()
+					//scanner.HistoryEnable()
 				case "nohistory":
-					scanner.HistoryDisable()
+					//scanner.HistoryDisable()
 				case "wordwrap":
-					cmd.Flags().Set("nowordwrap", "false")
+					wordWrap = true
 					fmt.Println("Set 'wordwrap' mode.")
 				case "nowordwrap":
-					cmd.Flags().Set("nowordwrap", "true")
+					wordWrap = false
 					fmt.Println("Set 'nowordwrap' mode.")
 				case "verbose":
 					cmd.Flags().Set("verbose", "true")
@@ -614,50 +594,81 @@ func generateInteractive(cmd *cobra.Command, model string) error {
 				case "quiet":
 					cmd.Flags().Set("verbose", "false")
 					fmt.Println("Set 'quiet' mode.")
-				case "mode":
-					if len(args) > 2 {
-						switch args[2] {
-						case "vim":
-							scanner.SetVimMode(true)
-						case "emacs", "default":
-							scanner.SetVimMode(false)
-						default:
-							usage()
-						}
+				case "format":
+					if len(args) < 3 || args[2] != "json" {
+						fmt.Println("Invalid or missing format. For 'json' mode use '/set format json'")
 					} else {
-						usage()
+						format = args[2]
+						fmt.Printf("Set format to '%s' mode.\n", args[2])
 					}
+				case "noformat":
+					format = ""
+					fmt.Println("Disabled format.")
+				default:
+					fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
 				}
 			} else {
-				usage()
+				usageSet()
 			}
 		case strings.HasPrefix(line, "/show"):
 			args := strings.Fields(line)
 			if len(args) > 1 {
-				resp, err := server.GetModelInfo(model)
+				client, err := api.ClientFromEnvironment()
+				if err != nil {
+					fmt.Println("error: couldn't connect to ollama server")
+					return err
+				}
+				resp, err := client.Show(cmd.Context(), &api.ShowRequest{Name: model})
 				if err != nil {
 					fmt.Println("error: couldn't get model")
+					return err
 				}

 				switch args[1] {
 				case "license":
-					fmt.Println(resp.License)
+					if resp.License == "" {
+						fmt.Print("No license was specified for this model.\n\n")
+					} else {
+						fmt.Println(resp.License)
+					}
 				case "modelfile":
 					fmt.Println(resp.Modelfile)
 				case "parameters":
-					fmt.Println(resp.Parameters)
+					if resp.Parameters == "" {
+						fmt.Print("No parameters were specified for this model.\n\n")
+					} else {
+						fmt.Println(resp.Parameters)
+					}
 				case "system":
-					fmt.Println(resp.System)
+					if resp.System == "" {
+						fmt.Print("No system prompt was specified for this model.\n\n")
+					} else {
+						fmt.Println(resp.System)
+					}
 				case "template":
-					fmt.Println(resp.Template)
+					if resp.Template == "" {
+						fmt.Print("No prompt template was specified for this model.\n\n")
+					} else {
+						fmt.Println(resp.Template)
+					}
 				default:
-					fmt.Println("error: unknown command")
+					fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
+				}
+			} else {
+				usageShow()
+			}
+		case strings.HasPrefix(line, "/help"), strings.HasPrefix(line, "/?"):
+			args := strings.Fields(line)
+			if len(args) > 1 {
+				switch args[1] {
+				case "set", "/set":
+					usageSet()
+				case "show", "/show":
+					usageShow()
 				}
 			} else {
 				usage()
 			}
-		case line == "/help", line == "/?":
-			usage()
 		case line == "/exit", line == "/bye":
 			return nil
 		case strings.HasPrefix(line, "/"):
@@ -666,26 +677,13 @@ func generateInteractive(cmd *cobra.Command, model string) error {
 		}

 		if len(line) > 0 && line[0] != '/' {
-			if err := generate(cmd, model, line); err != nil {
+			if err := generate(cmd, model, line, wordWrap, format); err != nil {
 				return err
 			}
 		}
 	}
 }

-func generateBatch(cmd *cobra.Command, model string) error {
-	scanner := bufio.NewScanner(os.Stdin)
-	for scanner.Scan() {
-		prompt := scanner.Text()
-		fmt.Printf(">>> %s\n", prompt)
-		if err := generate(cmd, model, prompt); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
 func RunServer(cmd *cobra.Command, _ []string) error {
 	host, port, err := net.SplitHostPort(os.Getenv("OLLAMA_HOST"))
 	if err != nil {
@@ -709,12 +707,6 @@ func RunServer(cmd *cobra.Command, _ []string) error {
 		origins = strings.Split(o, ",")
 	}

-	if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
-		if err := server.PruneLayers(); err != nil {
-			return err
-		}
-	}
-
 	return server.Serve(ln, origins)
 }

@@ -799,7 +791,7 @@ func startMacApp(client *api.Client) error {
 }

 func checkServerHeartbeat(_ *cobra.Command, _ []string) error {
-	client, err := api.FromEnv()
+	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}
@@ -837,7 +829,7 @@ func NewCLI() *cobra.Command {
 	createCmd := &cobra.Command{
 		Use:     "create MODEL",
 		Short:   "Create a model from a Modelfile",
-		Args:    cobra.MinimumNArgs(1),
+		Args:    cobra.ExactArgs(1),
 		PreRunE: checkServerHeartbeat,
 		RunE:    CreateHandler,
 	}
@@ -847,7 +839,7 @@ func NewCLI() *cobra.Command {
 	showCmd := &cobra.Command{
 		Use:     "show MODEL",
 		Short:   "Show information for a model",
-		Args:    cobra.MinimumNArgs(1),
+		Args:    cobra.ExactArgs(1),
 		PreRunE: checkServerHeartbeat,
 		RunE:    ShowHandler,
 	}
@@ -869,18 +861,20 @@ func NewCLI() *cobra.Command {
 	runCmd.Flags().Bool("verbose", false, "Show timings for response")
 	runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
 	runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
+	runCmd.Flags().String("format", "", "Response format (e.g. json)")

 	serveCmd := &cobra.Command{
 		Use:     "serve",
 		Aliases: []string{"start"},
 		Short:   "Start ollama",
+		Args:    cobra.ExactArgs(0),
 		RunE:    RunServer,
 	}

 	pullCmd := &cobra.Command{
 		Use:     "pull MODEL",
 		Short:   "Pull a model from a registry",
-		Args:    cobra.MinimumNArgs(1),
+		Args:    cobra.ExactArgs(1),
 		PreRunE: checkServerHeartbeat,
 		RunE:    PullHandler,
 	}
@@ -890,7 +884,7 @@ func NewCLI() *cobra.Command {
 	pushCmd := &cobra.Command{
 		Use:     "push MODEL",
 		Short:   "Push a model to a registry",
-		Args:    cobra.MinimumNArgs(1),
+		Args:    cobra.ExactArgs(1),
 		PreRunE: checkServerHeartbeat,
 		RunE:    PushHandler,
 	}
@@ -906,15 +900,15 @@ func NewCLI() *cobra.Command {
 	}

 	copyCmd := &cobra.Command{
-		Use:     "cp",
+		Use:     "cp SOURCE TARGET",
 		Short:   "Copy a model",
-		Args:    cobra.MinimumNArgs(2),
+		Args:    cobra.ExactArgs(2),
 		PreRunE: checkServerHeartbeat,
 		RunE:    CopyHandler,
 	}

 	deleteCmd := &cobra.Command{
-		Use:     "rm",
+		Use:     "rm MODEL [MODEL...]",
 		Short:   "Remove a model",
 		Args:    cobra.MinimumNArgs(1),
 		PreRunE: checkServerHeartbeat,
--- a/docs/api.md
+++ b/docs/api.md
@@ -12,7 +12,6 @@
 - [Push a Model](#push-a-model)
 - [Generate Embeddings](#generate-embeddings)

-
 ## Conventions

 ### Model names
@@ -23,6 +22,10 @@ Model names follow a `model:tag` format. Some examples are `orca-mini:3b-q4_1` a

 All durations are returned in nanoseconds.

+### Streaming responses
+
+Certain endpoints stream responses as JSON objects delineated with the newline (`\n`) character.
+
 ## Generate a completion

 ```shell
@@ -36,29 +39,38 @@ Generate a response for a given prompt with a provided model. This is a streamin
 - `model`: (required) the [model name](#model-names)
 - `prompt`: the prompt to generate a response for

-Advanced parameters:
+Advanced parameters (optional):

+- `format`: the format to return a response in. Currently the only accepted value is `json`
 - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
 - `system`: system prompt to (overrides what is defined in the `Modelfile`)
 - `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
 - `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
+- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
+- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.

-### Request
+### JSON mode
+
+Enable JSON mode by setting the `format` parameter to `json` and specifying the model should use JSON in the `prompt`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
+
+### Examples
+
+#### Request

 ```shell
 curl -X POST http://localhost:11434/api/generate -d '{
-  "model": "llama2:7b",
+  "model": "llama2",
  "prompt": "Why is the sky blue?"
 }'
 ```

-### Response
+#### Response

-A stream of JSON objects:
+A stream of JSON objects is returned:

 ```json
 {
-  "model": "llama2:7b",
+  "model": "llama2",
  "created_at": "2023-08-04T08:52:19.385406455-07:00",
  "response": "The",
  "done": false
@@ -76,13 +88,15 @@ The final response in the stream also includes additional data about the generat
 - `eval_count`: number of tokens the response
 - `eval_duration`: time in nanoseconds spent generating the response
 - `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory
+- `response`: empty if the response was streamed, if not streamed, this will contain the full response

 To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`.

 ```json
 {
-  "model": "llama2:7b",
+  "model": "llama2",
  "created_at": "2023-08-04T19:22:45.499127Z",
+  "response": "",
  "context": [1, 2, 3],
  "done": true,
  "total_duration": 5589157167,
@@ -96,6 +110,182 @@ To calculate how fast the response is generated in tokens per second (token/s),
 }
 ```

+#### Request (No streaming)
+
+```shell
+curl -X POST http://localhost:11434/api/generate -d '{
+  "model": "llama2:7b",
+  "prompt": "Why is the sky blue?",
+  "stream": false
+}'
+```
+
+#### Response
+
+If `stream` is set to `false`, the response will be a single JSON object:
+
+```json
+{
+  "model": "llama2:7b",
+  "created_at": "2023-08-04T19:22:45.499127Z",
+  "response": "The sky is blue because it is the color of the sky.",
+  "context": [1, 2, 3],
+  "done": true,
+  "total_duration": 5589157167,
+  "load_duration": 3013701500,
+  "sample_count": 114,
+  "sample_duration": 81442000,
+  "prompt_eval_count": 46,
+  "prompt_eval_duration": 1160282000,
+  "eval_count": 13,
+  "eval_duration": 1325948000
+}
+```
+
+#### Request (Raw mode)
+
+In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
+
+```shell
+curl -X POST http://localhost:11434/api/generate -d '{
+  "model": "mistral",
+  "prompt": "[INST] why is the sky blue? [/INST]",
+  "raw": true,
+  "stream": false
+}'
+```
+
+#### Response
+
+```json
+{
+  "model": "mistral",
+  "created_at": "2023-11-03T15:36:02.583064Z",
+  "response": " The sky appears blue because of a phenomenon called Rayleigh scattering.",
+  "done": true,
+  "total_duration": 14648695333,
+  "load_duration": 3302671417,
+  "prompt_eval_count": 14,
+  "prompt_eval_duration": 286243000,
+  "eval_count": 129,
+  "eval_duration": 10931424000
+}
+```
+
+#### Request (JSON mode)
+
+```shell
+curl -X POST http://localhost:11434/api/generate -d '{
+  "model": "llama2",
+  "prompt": "What color is the sky at different times of the day? Respond using JSON",
+  "format": "json",
+  "stream": false
+}'
+```
+
+#### Response
+
+```json
+{
+  "model": "llama2",
+  "created_at": "2023-11-09T21:07:55.186497Z",
+  "response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
+  "done": true,
+  "total_duration": 4661289125,
+  "load_duration": 1714434500,
+  "prompt_eval_count": 36,
+  "prompt_eval_duration": 264132000,
+  "eval_count": 75,
+  "eval_duration": 2112149000
+}
+```
+
+The value of `response` will be a string containing JSON similar to:
+
+```json
+{
+  "morning": {
+    "color": "blue"
+  },
+  "noon": {
+    "color": "blue-gray"
+  },
+  "afternoon": {
+    "color": "warm gray"
+  },
+  "evening": {
+    "color": "orange"
+  }
+}
+```
+
+#### Request (With options)
+
+If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.
+
+```shell
+curl -X POST http://localhost:11434/api/generate -d '{
+  "model": "llama2:7b",
+  "prompt": "Why is the sky blue?",
+  "stream": false,
+  "options": {
+    "num_keep": 5,
+    "seed": 42,
+    "num_predict": 100,
+    "top_k": 20,
+    "top_p": 0.9,
+    "tfs_z": 0.5,
+    "typical_p": 0.7,
+    "repeat_last_n": 33,
+    "temperature": 0.8,
+    "repeat_penalty": 1.2,
+    "presence_penalty": 1.5,
+    "frequency_penalty": 1.0,
+    "mirostat": 1,
+    "mirostat_tau": 0.8,
+    "mirostat_eta": 0.6,
+    "penalize_newline": true,
+    "stop": ["\n", "user:"],
+    "numa": false,
+    "num_ctx": 4,
+    "num_batch": 2,
+    "num_gqa": 1,
+    "num_gpu": 1,
+    "main_gpu": 0,
+    "low_vram": false,
+    "f16_kv": true,
+    "logits_all": false,
+    "vocab_only": false,
+    "use_mmap": true,
+    "use_mlock": false,
+    "embedding_only": false,
+    "rope_frequency_base": 1.1,
+    "rope_frequency_scale": 0.8,
+    "num_thread": 8
+    }
+}'
+```
+
+#### Response
+
+```json
+{
+  "model": "llama2:7b",
+  "created_at": "2023-08-04T19:22:45.499127Z",
+  "response": "The sky is blue because it is the color of the sky.",
+  "context": [1, 2, 3],
+  "done": true,
+  "total_duration": 5589157167,
+  "load_duration": 3013701500,
+  "sample_count": 114,
+  "sample_duration": 81442000,
+  "prompt_eval_count": 46,
+  "prompt_eval_duration": 1160282000,
+  "eval_count": 13,
+  "eval_duration": 1325948000
+}
+```
+
 ## Create a Model

 ```shell
@@ -108,8 +298,11 @@ Create a model from a [`Modelfile`](./modelfile.md)

 - `name`: name of the model to create
 - `path`: path to the Modelfile
+- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects

-### Request
+### Examples
+
+#### Request

 ```shell
 curl -X POST http://localhost:11434/api/create -d '{
@@ -118,7 +311,7 @@ curl -X POST http://localhost:11434/api/create -d '{
 }'
 ```

-### Response
+#### Response

 A stream of JSON objects. When finished, `status` is `success`.

@@ -136,13 +329,17 @@ GET /api/tags

 List models that are available locally.

-### Request
+### Examples
+
+#### Request

 ```shell
 curl http://localhost:11434/api/tags
 ```

-### Response
+#### Response
+
+A single JSON object will be returned.

 ```json
 {
@@ -173,22 +370,24 @@ Show details about a model including modelfile, template, parameters, license, a

 - `name`: name of the model to show

-### Request
+### Examples

-```shell  
+#### Request
+
+```shell
 curl http://localhost:11434/api/show -d '{
  "name": "llama2:7b"
 }'
 ```

-### Response
+#### Response

 ```json
 {
-    "license": "<contents of license block>",
-    "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama2:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <<SYS>>\nPARAMETER stop <</SYS>>\n",
-    "parameters": "stop                           [INST]\nstop                           [/INST]\nstop                           <<SYS>>\nstop                           <</SYS>>",
-    "template": "[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] "
+  "license": "<contents of license block>",
+  "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama2:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <<SYS>>\nPARAMETER stop <</SYS>>\n",
+  "parameters": "stop                           [INST]\nstop                           [/INST]\nstop                           <<SYS>>\nstop                           <</SYS>>",
+  "template": "[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] "
 }
 ```

@@ -200,7 +399,9 @@ POST /api/copy

 Copy a model. Creates a model with another name from an existing model.

-### Request
+### Examples
+
+#### Request

 ```shell
 curl http://localhost:11434/api/copy -d '{
@@ -209,6 +410,10 @@ curl http://localhost:11434/api/copy -d '{
 }'
 ```

+#### Response
+
+The only response is a 200 OK if successful.
+
 ## Delete a Model

 ```shell
@@ -219,9 +424,11 @@ Delete a model and its data.

 ### Parameters

- `model`: model name to delete
+- `name`: model name to delete

-### Request
+### Examples
+
+#### Request

 ```shell
 curl -X DELETE http://localhost:11434/api/delete -d '{
@@ -229,6 +436,10 @@ curl -X DELETE http://localhost:11434/api/delete -d '{
 }'
 ```

+#### Response
+
+If successful, the only response is a 200 OK.
+
 ## Pull a Model

 ```shell
@@ -241,8 +452,11 @@ Download a model from the ollama library. Cancelled pulls are resumed from where

 - `name`: name of the model to pull
 - `insecure`: (optional) allow insecure connections to the library. Only use this if you are pulling from your own library during development.
+- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects

-### Request
+### Examples
+
+#### Request

 ```shell
 curl -X POST http://localhost:11434/api/pull -d '{
@@ -250,13 +464,51 @@ curl -X POST http://localhost:11434/api/pull -d '{
 }'
 ```

-### Response
+#### Response
+
+If `stream` is not specified, or set to `true`, a stream of JSON objects is returned:
+
+The first object is the manifest:
+
+```json
+{
+  "status": "pulling manifest"
+}
+```
+
+Then there is a series of downloading responses. Until any of the download is completed, the `completed` key may not be included. The number of files to be downloaded depends on the number of layers specified in the manifest.

 ```json
 {
  "status": "downloading digestname",
  "digest": "digestname",
-  "total": 2142590208
+  "total": 2142590208,
+  "completed": 241970
+}
+```
+
+After all the files are downloaded, the final responses are:
+
+```json
+{
+    "status": "verifying sha256 digest"
+}
+{
+    "status": "writing manifest"
+}
+{
+    "status": "removing any unused layers"
+}
+{
+    "status": "success"
+}
+```
+
+if `stream` is set to false, then the response is a single JSON object:
+
+```json
+{
+  "status": "success"
 }
 ```

@@ -271,9 +523,12 @@ Upload a model to a model library. Requires registering for ollama.ai and adding
 ### Parameters

 - `name`: name of the model to push in the form of `<namespace>/<model>:<tag>`
- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development.  
+- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development.
+- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects

-### Request
+### Examples
+
+#### Request

 ```shell
 curl -X POST http://localhost:11434/api/push -d '{
@@ -281,20 +536,21 @@ curl -X POST http://localhost:11434/api/push -d '{
 }'
 ```

-### Response
+#### Response

-Streaming response that starts with:
+If `stream` is not specified, or set to `true`, a stream of JSON objects is returned:

 ```json
-{"status":"retrieving manifest"}
+{ "status": "retrieving manifest" }
 ```

 and then:

 ```json
 {
-"status":"starting upload","digest":"sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
-"total":1928429856
+  "status": "starting upload",
+  "digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
+  "total": 1928429856
 }
 ```

@@ -302,9 +558,10 @@ Then there is a series of uploading responses:

 ```json
 {
-"status":"starting upload",
-"digest":"sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
-"total":1928429856}
+  "status": "starting upload",
+  "digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
+  "total": 1928429856
+}
 ```

 Finally, when the upload is complete:
@@ -314,6 +571,12 @@ Finally, when the upload is complete:
 {"status":"success"}
 ```

+If `stream` is set to `false`, then the response is a single JSON object:
+
+```json
+{ "status": "success" }
+```
+
 ## Generate Embeddings

 ```shell
@@ -331,7 +594,9 @@ Advanced parameters:

 - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`

-### Request
+### Examples
+
+#### Request

 ```shell
 curl -X POST http://localhost:11434/api/embeddings -d '{
@@ -340,12 +605,13 @@ curl -X POST http://localhost:11434/api/embeddings -d '{
 }'
 ```

-### Response
+#### Response

 ```json
 {
-  "embeddings": [
+  "embedding": [
    0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
    0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
  ]
-}```
+}
+```
--- a/docs/development.md
+++ b/docs/development.md
@@ -10,25 +10,25 @@ Install required tools:
 - go version 1.20 or higher
 - gcc version 11.4.0 or higher

-```
+```bash
 brew install go cmake gcc
 ```

 Get the required libraries:

-```
+```bash
 go generate ./...
 ```

 Then build ollama:

-```
+```bash
 go build .
 ```

 Now you can run `ollama`:

-```
+```bash
 ./ollama
 ```

--- a/docs/faq.md
+++ b/docs/faq.md
@@ -1,17 +1,98 @@
 # FAQ

-## How can I expose the Ollama server?
+## How can I view the logs?
+
+On macOS:

 ```
+cat ~/.ollama/logs/server.log
+```
+
+On Linux:
+
+```
+journalctl -u ollama
+```
+
+If you're running `ollama serve` directly, the logs will be printed to the console.
+
+## How can I expose Ollama on my network?
+
+Ollama binds to 127.0.0.1 port 11434 by default. Change the bind address with the `OLLAMA_HOST` environment variable.
+
+On macOS:
+
+```bash
 OLLAMA_HOST=0.0.0.0:11435 ollama serve
 ```

-By default, Ollama allows cross origin requests from `127.0.0.1` and `0.0.0.0`. To support more origins, you can use the `OLLAMA_ORIGINS` environment variable:
+On Linux:

+Create a `systemd` drop-in directory and set `Environment=OLLAMA_HOST`
+
+```bash
+mkdir -p /etc/systemd/system/ollama.service.d
+echo "[Service]" >>/etc/systemd/system/ollama.service.d/environment.conf
 ```
+
+```bash
+echo "Environment=OLLAMA_HOST=0.0.0.0:11434" >>/etc/systemd/system/ollama.service.d/environment.conf
+```
+
+Reload `systemd` and restart Ollama:
+
+```bash
+systemctl daemon-reload
+systemctl restart ollama
+```
+
+## How can I allow additional web origins to access Ollama?
+
+Ollama allows cross origin requests from `127.0.0.1` and `0.0.0.0` by default. Add additional origins with the `OLLAMA_ORIGINS` environment variable:
+
+On macOS:
+
+```bash
 OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
 ```

+On Linux:
+
+```bash
+echo "Environment=OLLAMA_ORIGINS=http://129.168.1.1:*,https://example.com" >>/etc/systemd/system/ollama.service.d/environment.conf
+```
+
+Reload `systemd` and restart Ollama:
+
+```bash
+systemctl daemon-reload
+systemctl restart ollama
+```
+
 ## Where are models stored?

-Raw model data is stored under `~/.ollama/models`.
+- macOS: Raw model data is stored under `~/.ollama/models`.
+- Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models`
+
+
+
+Below the models directory you will find a structure similar to the following:
+
+```shell
+.
+├── blobs
+└── manifests
+   └── registry.ollama.ai
+      ├── f0rodo
+      ├── library
+      ├── mattw
+      └── saikatkumardey
+```
+
+There is a `manifests/registry.ollama.ai/namespace` path. In example above, the user has downloaded models from the official `library`, `f0rodo`, `mattw`, and `saikatkumardey` namespaces. Within each of those directories, you will find directories for each of the models downloaded. And in there you will find a file name representing each tag. Each tag file is the manifest for the model.  
+
+The manifest lists all the layers used in this model. You will see a `media type` for each layer, along with a digest. That digest corresponds with a file in the `models/blobs directory`.
+
+### How can I change where Ollama stores models?
+
+To modify where models are stored, you can use the `OLLAMA_MODELS` environment variable. Note that on Linux this means defining `OLLAMA_MODELS` in a drop-in `/etc/systemd/system/ollama.service.d` service file, reloading systemd, and restarting the ollama service.
--- a/docs/import.md
+++ b/docs/import.md
@@ -0,0 +1,198 @@
+# Import a model
+
+This guide walks through importing a GGUF, PyTorch or Safetensors model.
+
+## Importing (GGUF)
+
+### Step 1: Write a `Modelfile`
+
+Start by creating a `Modelfile`. This file is the blueprint for your model, specifying weights, parameters, prompt templates and more.
+
+```
+FROM ./mistral-7b-v0.1.Q4_0.gguf
+```
+
+(Optional) many chat models require a prompt template in order to answer correctly. A default prompt template can be specified with the `TEMPLATE` instruction in the `Modelfile`:
+
+```
+FROM ./q4_0.bin
+TEMPLATE "[INST] {{ .Prompt }} [/INST]"
+```
+
+### Step 2: Create the Ollama model
+
+Finally, create a model from your `Modelfile`:
+
+```
+ollama create example -f Modelfile
+```
+
+### Step 3: Run your model
+
+Next, test the model with `ollama run`:
+
+```
+ollama run example "What is your favourite condiment?"
+```
+
+## Importing (PyTorch & Safetensors)
+
+### Supported models
+
+Ollama supports a set of model architectures, with support for more coming soon:
+
+- Llama & Mistral
+- Falcon & RW
+- GPT-NeoX
+- BigCode
+
+To view a model's architecture, check the `config.json` file in its HuggingFace repo. You should see an entry under `architectures` (e.g. `LlamaForCausalLM`).
+
+### Step 1: Clone the HuggingFace repository (optional)
+
+If the model is currently hosted in a HuggingFace repository, first clone that repository to download the raw model.
+
+```
+git lfs install
+git clone https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1
+cd Mistral-7B-Instruct-v0.1
+```
+
+### Step 2: Convert and quantize to a `.bin` file (optional, for PyTorch and Safetensors)
+
+If the model is in PyTorch or Safetensors format, a [Docker image](https://hub.docker.com/r/ollama/quantize) with the tooling required to convert and quantize models is available.
+
+First, Install [Docker](https://www.docker.com/get-started/).
+
+Next, to convert and quantize your model, run:
+
+```
+docker run --rm -v .:/model ollama/quantize -q q4_0 /model
+```
+
+This will output two files into the directory:
+
+- `f16.bin`: the model converted to GGUF
+- `q4_0.bin` the model quantized to a 4-bit quantization (we will use this file to create the Ollama model)
+
+### Step 3: Write a `Modelfile`
+
+Next, create a `Modelfile` for your model:
+
+```
+FROM ./q4_0.bin
+```
+
+(Optional) many chat models require a prompt template in order to answer correctly. A default prompt template can be specified with the `TEMPLATE` instruction in the `Modelfile`:
+
+```
+FROM ./q4_0.bin
+TEMPLATE "[INST] {{ .Prompt }} [/INST]"
+```
+
+### Step 4: Create the Ollama model
+
+Finally, create a model from your `Modelfile`:
+
+```
+ollama create example -f Modelfile
+```
+
+### Step 5: Run your model
+
+Next, test the model with `ollama run`:
+
+```
+ollama run example "What is your favourite condiment?"
+```
+
+## Publishing your model (optional – early alpha)
+
+Publishing models is in early alpha. If you'd like to publish your model to share with others, follow these steps:
+
+1. Create [an account](https://ollama.ai/signup)
+2. Run `cat ~/.ollama/id_ed25519.pub` to view your Ollama public key. Copy this to the clipboard.
+3. Add your public key to your [Ollama account](https://ollama.ai/settings/keys)
+
+Next, copy your model to your username's namespace:
+
+```
+ollama cp example <your username>/example
+```
+
+Then push the model:
+
+```
+ollama push <your username>/example
+```
+
+After publishing, your model will be available at `https://ollama.ai/<your username>/example`.
+
+## Quantization reference
+
+The quantization options are as follow (from highest highest to lowest levels of quantization). Note: some architectures such as Falcon do not support K quants.
+
+- `q2_K`
+- `q3_K`
+- `q3_K_S`
+- `q3_K_M`
+- `q3_K_L`
+- `q4_0` (recommended)
+- `q4_1`
+- `q4_K`
+- `q4_K_S`
+- `q4_K_M`
+- `q5_0`
+- `q5_1`
+- `q5_K`
+- `q5_K_S`
+- `q5_K_M`
+- `q6_K`
+- `q8_0`
+
+## Manually converting & quantizing models
+
+### Prerequisites
+
+Start by cloning the `llama.cpp` repo to your machine in another directory:
+
+```
+git clone https://github.com/ggerganov/llama.cpp.git
+cd llama.cpp
+```
+
+Next, install the Python dependencies:
+
+```
+pip install -r requirements.txt
+```
+
+Finally, build the `quantize` tool:
+
+```
+make quantize
+```
+
+### Convert the model
+
+Run the correct conversion script for your model architecture:
+
+```shell
+# LlamaForCausalLM or MistralForCausalLM
+python convert.py <path to model directory>
+
+# FalconForCausalLM
+python convert-falcon-hf-to-gguf.py <path to model directory>
+
+# GPTNeoXForCausalLM
+python convert-gptneox-hf-to-gguf.py <path to model directory>
+
+# GPTBigCodeForCausalLM
+python convert-starcoder-hf-to-gguf.py <path to model directory>
+```
+
+### Quantize the model
+
+```
+quantize <path to model dir>/ggml-model-f32.bin <path to model dir>/q4_0.bin q4_0
+```
--- a/docs/linux.md
+++ b/docs/linux.md
@@ -0,0 +1,116 @@
+# Ollama on Linux
+
+## Install
+
+Install Ollama running this one-liner:
+>
+```bash
+curl https://ollama.ai/install.sh | sh
+```
+
+## Manual install
+
+### Download the `ollama` binary
+
+Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:
+
+```bash
+sudo curl -L https://ollama.ai/download/ollama-linux-amd64 -o /usr/bin/ollama
+sudo chmod +x /usr/bin/ollama
+```
+
+### Adding Ollama as a startup service (recommended)
+
+Create a user for Ollama:
+
+```bash
+sudo useradd -r -s /bin/false -m -d /usr/share/ollama ollama
+```
+
+Create a service file in `/etc/systemd/system/ollama.service`:
+
+```ini
+[Unit]
+Description=Ollama Service
+After=network-online.target
+
+[Service]
+ExecStart=/usr/bin/ollama serve
+User=ollama
+Group=ollama
+Restart=always
+RestartSec=3
+
+[Install]
+WantedBy=default.target
+```
+
+Then start the service:
+
+```bash
+sudo systemctl daemon-reload
+sudo systemctl enable ollama
+```
+
+### Install CUDA drivers (optional – for Nvidia GPUs)
+
+[Download and install](https://developer.nvidia.com/cuda-downloads) CUDA.
+
+Verify that the drivers are installed by running the following command, which should print details about your GPU:
+
+```bash
+nvidia-smi
+```
+
+### Start Ollama
+
+Start Ollama using `systemd`:
+
+```bash
+sudo systemctl start ollama
+```
+
+## Update
+
+Update ollama by running the install script again:
+
+```bash
+curl https://ollama.ai/install.sh | sh
+```
+
+Or by downloading the ollama binary:
+
+```bash
+sudo curl -L https://ollama.ai/download/ollama-linux-amd64 -o /usr/bin/ollama
+sudo chmod +x /usr/bin/ollama
+```
+
+## Viewing logs
+
+To view logs of Ollama running as a startup service, run:
+
+```bash
+journalctl -u ollama
+```
+
+## Uninstall
+
+Remove the ollama service:
+
+```bash
+sudo systemctl stop ollama
+sudo systemctl disable ollama
+sudo rm /etc/systemd/system/ollama.service
+```
+
+Remove the ollama binary from your bin directory (either `/usr/local/bin`, `/usr/bin`, or `/bin`):
+
+```bash
+sudo rm $(which ollama)
+```
+
+Remove the downloaded models and Ollama service user:
+```bash
+sudo rm -r /usr/share/ollama
+sudo userdel ollama
+```
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -1,6 +1,6 @@
 # Ollama Model File

-> Note: this model file syntax is in development
+> Note: this `Modelfile` syntax is in development

 A model file is the blueprint to create and share models with Ollama.

@@ -12,7 +12,6 @@ A model file is the blueprint to create and share models with Ollama.
  - [FROM (Required)](#from-required)
    - [Build from llama2](#build-from-llama2)
    - [Build from a bin file](#build-from-a-bin-file)
-  - [EMBED](#embed)
  - [PARAMETER](#parameter)
    - [Valid Parameters and Values](#valid-parameters-and-values)
  - [TEMPLATE](#template)
@@ -24,7 +23,7 @@ A model file is the blueprint to create and share models with Ollama.

 ## Format

-The format of the Modelfile:
+The format of the `Modelfile`:

 ```modelfile
 # comment
@@ -42,9 +41,9 @@ INSTRUCTION arguments

 ## Examples

-An example of a model file creating a mario blueprint:
+An example of a `Modelfile` creating a mario blueprint:

-```
+```modelfile
 FROM llama2
 # sets the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1
@@ -57,9 +56,9 @@ SYSTEM You are Mario from super mario bros, acting as an assistant.

 To use this:

-1. Save it as a file (eg. `Modelfile`)
-2. `ollama create NAME -f <location of the file eg. ./Modelfile>'`
-3. `ollama run NAME`
+1. Save it as a file (e.g. `Modelfile`)
+2. `ollama create choose-a-model-name -f <location of the file e.g. ./Modelfile>'`
+3. `ollama run choose-a-model-name`
 4. Start using the model!

 More examples are available in the [examples directory](../examples).
@@ -68,44 +67,34 @@ More examples are available in the [examples directory](../examples).

 ### FROM (Required)

-The FROM instruction defines the base model to use when creating a model.
+The `FROM` instruction defines the base model to use when creating a model.

-```
+```modelfile
 FROM <model name>:<tag>
 ```

 #### Build from llama2

-```
+```modelfile
 FROM llama2
 ```

 A list of available base models:
 <https://github.com/jmorganca/ollama#model-library>

-#### Build from a bin file
+#### Build from a `bin` file

-```
+```modelfile
 FROM ./ollama-model.bin
 ```

-This bin file location should be specified as an absolute path or relative to the Modelfile location.
-
-### EMBED
-
-The EMBED instruction is used to add embeddings of files to a model. This is useful for adding custom data that the model can reference when generating an answer. Note that currently only text files are supported, formatted with each line as one embedding.
-```
-FROM <model name>:<tag>
-EMBED <file path>.txt
-EMBED <different file path>.txt
-EMBED <path to directory>/*.txt
-```
+This bin file location should be specified as an absolute path or relative to the `Modelfile` location.

 ### PARAMETER

 The `PARAMETER` instruction defines a parameter that can be set when the model is run.

-```
+```modelfile
 PARAMETER <parameter> <parametervalue>
 ```

@@ -118,19 +107,21 @@ PARAMETER <parameter> <parametervalue>
 | mirostat_tau   | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)                                                                                                         | float      | mirostat_tau 5.0     |
 | num_ctx        | Sets the size of the context window used to generate the next token. (Default: 2048)                                                                                                                                                                    | int        | num_ctx 4096         |
 | num_gqa        | The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b                                                                                                                                         | int        | num_gqa 1            |
-| num_gpu        | The number of GPUs to use. On macOS it defaults to 1 to enable metal support, 0 to disable.                                                                                                                                                             | int        | num_gpu 1            |
+| num_gpu        | The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable.                                                                                                                                            | int        | num_gpu 50           |
 | num_thread     | Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). | int        | num_thread 8         |
 | repeat_last_n  | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)                                                                                                                                           | int        | repeat_last_n 64     |
 | repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)                                                                     | float      | repeat_penalty 1.1   |
 | temperature    | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)                                                                                                                                     | float      | temperature 0.7      |
-| stop           | Sets the stop sequences to use.                                                                                                                                                                                                                         | string     | stop "AI assistant:" |
+| seed           | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0)                                                                                       | int        | seed 42              |
+| stop           | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile.                                      | string     | stop "AI assistant:" |
 | tfs_z          | Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)                                               | float      | tfs_z 1              |
+| num_predict    | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)                                                                                                                                   | int        | num_predict 42       |
 | top_k          | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)                                                                        | int        | top_k 40             |
 | top_p          | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)                                                                 | float      | top_p 0.9            |

 ### TEMPLATE

-`TEMPLATE` of the full prompt template to be passed into the model. It may include (optionally) a system prompt and a user's prompt. This is used to create a full custom prompt, and syntax may be model specific.
+`TEMPLATE` of the full prompt template to be passed into the model. It may include (optionally) a system prompt and a user's prompt. This is used to create a full custom prompt, and syntax may be model specific. You can usually find the template for a given model in the readme for that model.

 #### Template Variables

@@ -140,7 +131,7 @@ PARAMETER <parameter> <parametervalue>
 | `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input.                 |
 | `{{ .First }}`  | A boolean value used to render specific template information for the first generation of a session.          |

-```
+```modelfile
 TEMPLATE """
 {{- if .First }}
 ### System:
@@ -160,7 +151,7 @@ SYSTEM """<system message>"""

 The `SYSTEM` instruction specifies the system prompt to be used in the template, if applicable.

-```
+```modelfile
 SYSTEM """<system message>"""
 ```

@@ -168,7 +159,7 @@ SYSTEM """<system message>"""

 The `ADAPTER` instruction specifies the LoRA adapter to apply to the base model. The value of this instruction should be an absolute path or a path relative to the Modelfile and the file must be in a GGML file format. The adapter should be tuned from the base model otherwise the behaviour is undefined.

-```
+```modelfile
 ADAPTER ./ollama-lora.bin
 ```

@@ -176,7 +167,7 @@ ADAPTER ./ollama-lora.bin

 The `LICENSE` instruction allows you to specify the legal license under which the model used with this Modelfile is shared or distributed.

-```
+```modelfile
 LICENSE """
 <license text>
 """
@@ -184,5 +175,5 @@ LICENSE """

 ## Notes

- the **modelfile is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments.
+- the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments.
 - Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable.
--- a/docs/tutorials/langchainjs.md
+++ b/docs/tutorials/langchainjs.md
@@ -23,13 +23,17 @@ const answer = await ollama.call(`why is the sky blue?`);
 console.log(answer);
 ```

-That will get us the same thing as if we ran `ollama run llama2 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's build that part of the app.
+That will get us the same thing as if we ran `ollama run llama2 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
+
+```bash
+npm install cheerio 
+```

 ```javascript
 import { CheerioWebBaseLoader } from "langchain/document_loaders/web/cheerio";

 const loader = new CheerioWebBaseLoader("https://en.wikipedia.org/wiki/2023_Hawaii_wildfires");
-const data = loader.load();
+const data = await loader.load();
 ```

 That will load the document. Although this page is smaller than the Odyssey, it is certainly bigger than the context size for most LLMs. So we are going to need to split into smaller pieces, and then select just the pieces relevant to our question. This is a great use for a vector datastore. In this example, we will use the **MemoryVectorStore** that is part of **LangChain**. But there is one more thing we need to get the content into the datastore. We have to run an embeddings process that converts the tokens in the text into a series of vectors. And for that, we are going to use **Tensorflow**. There is a lot of stuff going on in this one. First, install the **Tensorflow** components that we need.
--- a/editor/buffer.go
+++ b/editor/buffer.go
@@ -0,0 +1,488 @@
+package editor
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/emirpasic/gods/lists/arraylist"
+	"golang.org/x/term"
+)
+
+type Buffer struct {
+	PosX         int
+	PosY         int
+	Buf          []*arraylist.List
+	Prompt       *Prompt
+	WordWrap     int
+	ScreenWidth  int
+	ScreenHeight int
+}
+
+func NewBuffer(prompt *Prompt) (*Buffer, error) {
+	width, height, err := term.GetSize(0)
+	if err != nil {
+		fmt.Println("Error getting size:", err)
+		return nil, err
+	}
+
+	b := &Buffer{
+		PosX:         0,
+		PosY:         0,
+		Buf:          []*arraylist.List{arraylist.New()},
+		Prompt:       prompt,
+		ScreenWidth:  width,
+		ScreenHeight: height,
+	}
+
+	return b, nil
+}
+
+func (b *Buffer) LineWidth() int {
+	return b.ScreenWidth - len(b.Prompt.Prompt)
+}
+
+func (b *Buffer) findWordAtPos(line string, pos int) string {
+	return ""
+}
+
+func (b *Buffer) addLine(row int) {
+	if row+1 == len(b.Buf) {
+		b.Buf = append(b.Buf, arraylist.New())
+	} else {
+		b.Buf = append(b.Buf, nil)
+		copy(b.Buf[row+2:], b.Buf[row+1:])
+		b.Buf[row+1] = arraylist.New()
+	}
+}
+
+func (b *Buffer) Add(r rune) {
+	switch r {
+	case CharCtrlJ, CharEnter:
+		b.addLine(b.PosY)
+
+		// handle Ctrl-J in the middle of a line
+		var remainingText string
+		if b.PosX < b.Buf[b.PosY].Size() {
+			fmt.Print(ClearToEOL)
+			remainingText = b.StringLine(b.PosX, b.PosY)
+			for cnt := 0; cnt < len(remainingText); cnt++ {
+				b.Buf[b.PosY].Remove(b.Buf[b.PosY].Size() - 1)
+				b.Buf[b.PosY+1].Add(rune(remainingText[cnt]))
+			}
+		}
+		b.PosY++
+		b.PosX = 0
+		fmt.Printf("\n... " + ClearToEOL)
+		b.drawRemaining()
+	default:
+		if b.PosX == b.Buf[b.PosY].Size() {
+			fmt.Printf("%c", r)
+			b.PosX++
+			b.Buf[b.PosY].Add(r)
+			wrap, prefix, offset := b.splitLineInsert(b.PosY, b.PosX)
+			if wrap {
+				fmt.Print(CursorHide + cursorLeftN(len(prefix)+1) + ClearToEOL)
+				fmt.Printf("\n%s... %s%c", ClearToEOL, prefix, r)
+				b.PosY++
+				b.PosX = offset
+				b.ResetCursor()
+				b.drawRemaining()
+				fmt.Print(CursorShow)
+			}
+		} else {
+			fmt.Printf("%c", r)
+			b.Buf[b.PosY].Insert(b.PosX, r)
+			b.PosX++
+			_, prefix, offset := b.splitLineInsert(b.PosY, b.PosX)
+			fmt.Print(CursorHide)
+			if b.PosX > b.Buf[b.PosY].Size() {
+				if offset > 0 {
+					fmt.Print(cursorLeftN(offset))
+				}
+				fmt.Print(ClearToEOL + CursorDown + CursorBOL + ClearToEOL)
+				fmt.Printf("... %s", prefix[:offset])
+				b.PosY++
+				b.PosX = offset
+				b.ResetCursor()
+			}
+			b.drawRemaining()
+			fmt.Print(CursorShow)
+		}
+	}
+}
+
+func (b *Buffer) ResetCursor() {
+	fmt.Print(CursorHide + CursorBOL)
+	fmt.Print(cursorRightN(b.PosX + len(b.Prompt.Prompt)))
+	fmt.Print(CursorShow)
+}
+
+func (b *Buffer) splitLineInsert(posY, posX int) (bool, string, int) {
+	line := b.StringLine(0, posY)
+	screenEdge := b.LineWidth() - 5
+
+	// if the current line doesn't need to be reflowed, none of the other
+	// lines will either
+	if len(line) <= screenEdge {
+		return false, "", 0
+	}
+
+	// we know we're going to have to insert onto the next line, so
+	// add another line if there isn't one already
+	if posY == len(b.Buf)-1 {
+		b.Buf = append(b.Buf, arraylist.New())
+	}
+
+	// make a truncated version of the current line
+	currLine := line[:screenEdge]
+
+	// figure out where the last space in the line is
+	idx := strings.LastIndex(currLine, " ")
+
+	// deal with strings that don't have spaces in them
+	if idx == -1 {
+		idx = len(currLine) - 1
+	}
+
+	// if the next line already has text on it, we need
+	// to add a space to insert our new word
+	if b.Buf[posY+1].Size() > 0 {
+		b.Buf[posY+1].Insert(0, ' ')
+	}
+
+	// calculate the number of characters we need to remove
+	// from the current line to add to the next one
+	totalChars := len(line) - idx - 1
+
+	for cnt := 0; cnt < totalChars; cnt++ {
+		b.Buf[posY].Remove(b.Buf[posY].Size() - 1)
+		b.Buf[posY+1].Insert(0, rune(line[len(line)-1-cnt]))
+	}
+	// remove the trailing space
+	b.Buf[posY].Remove(b.Buf[posY].Size() - 1)
+
+	// wrap any further lines
+	if b.Buf[posY+1].Size() > b.LineWidth()-5 {
+		b.splitLineInsert(posY+1, 0)
+	}
+
+	return true, currLine[idx+1:], posX - idx - 1
+}
+
+func (b *Buffer) drawRemaining() {
+	remainingText := b.StringFromRow(b.PosY)
+	remainingText = remainingText[b.PosX:]
+
+	fmt.Print(CursorHide + ClearToEOL)
+
+	var rowCount int
+	for _, c := range remainingText {
+		fmt.Print(string(c))
+		if c == '\n' {
+			fmt.Print("... " + ClearToEOL)
+			rowCount++
+		}
+	}
+	if rowCount > 0 {
+		fmt.Print(cursorUpN(rowCount))
+	}
+	b.ResetCursor()
+}
+
+func (b *Buffer) findWordBeginning(posX int) int {
+	for {
+		if posX < 0 {
+			return -1
+		}
+		r, ok := b.Buf[b.PosY].Get(posX)
+		if !ok {
+			return -1
+		} else if r.(rune) == ' ' {
+			return posX
+		}
+		posX--
+	}
+}
+
+func (b *Buffer) Delete() {
+	if b.PosX < b.Buf[b.PosY].Size()-1 {
+		b.Buf[b.PosY].Remove(b.PosX)
+		b.drawRemaining()
+	} else {
+		b.joinLines()
+	}
+}
+
+func (b *Buffer) joinLines() {
+	lineLen := b.Buf[b.PosY].Size()
+	for cnt := 0; cnt < lineLen; cnt++ {
+		r, _ := b.Buf[b.PosY].Get(0)
+		b.Buf[b.PosY].Remove(0)
+		b.Buf[b.PosY-1].Add(r)
+	}
+}
+
+func (b *Buffer) Remove() {
+	if b.PosX > 0 {
+		fmt.Print(CursorLeft + " " + CursorLeft)
+		b.PosX--
+		b.Buf[b.PosY].Remove(b.PosX)
+		if b.PosX < b.Buf[b.PosY].Size() {
+			fmt.Print(ClearToEOL)
+			b.drawRemaining()
+		}
+	} else if b.PosX == 0 && b.PosY > 0 {
+		b.joinLines()
+
+		lastPos := b.Buf[b.PosY-1].Size()
+		var cnt int
+		b.PosX = lastPos
+		b.PosY--
+
+		fmt.Print(CursorHide)
+		for {
+			if b.PosX+cnt > b.LineWidth()-5 {
+				// the concatenated line won't fit, so find the beginning of the word
+				// and copy the rest of the string from there
+				idx := b.findWordBeginning(b.PosX)
+				lineLen := b.Buf[b.PosY].Size()
+				for offset := idx + 1; offset < lineLen; offset++ {
+					r, _ := b.Buf[b.PosY].Get(idx + 1)
+					b.Buf[b.PosY].Remove(idx + 1)
+					b.Buf[b.PosY+1].Add(r)
+				}
+				// remove the trailing space
+				b.Buf[b.PosY].Remove(idx)
+				fmt.Print(CursorUp + ClearToEOL)
+				b.PosX = 0
+				b.drawRemaining()
+				fmt.Print(CursorDown)
+				if idx > 0 {
+					if lastPos-idx-1 > 0 {
+						b.PosX = lastPos - idx - 1
+						b.ResetCursor()
+					}
+				}
+				b.PosY++
+				break
+			}
+			r, ok := b.Buf[b.PosY].Get(b.PosX + cnt)
+			if !ok {
+				// found the end of the string
+				fmt.Print(CursorUp + cursorRightN(b.PosX) + ClearToEOL)
+				b.drawRemaining()
+				break
+			}
+			if r == ' ' {
+				// found the end of the word
+				lineLen := b.Buf[b.PosY].Size()
+				for offset := b.PosX + cnt + 1; offset < lineLen; offset++ {
+					r, _ := b.Buf[b.PosY].Get(b.PosX + cnt + 1)
+					b.Buf[b.PosY].Remove(b.PosX + cnt + 1)
+					b.Buf[b.PosY+1].Add(r)
+				}
+				fmt.Print(CursorUp + cursorRightN(b.PosX) + ClearToEOL)
+				b.drawRemaining()
+				break
+			}
+			cnt++
+		}
+		fmt.Print(CursorShow)
+	}
+}
+
+func (b *Buffer) RemoveBefore() {
+	for {
+		if b.PosX == 0 && b.PosY == 0 {
+			break
+		}
+		b.Remove()
+	}
+}
+
+func (b *Buffer) RemoveWordBefore() {
+	if b.PosX > 0 || b.PosY > 0 {
+		var foundNonspace bool
+		for {
+			xPos := b.PosX
+			yPos := b.PosY
+
+			v, _ := b.Buf[yPos].Get(xPos - 1)
+			if v == ' ' {
+				if !foundNonspace {
+					b.Remove()
+				} else {
+					break
+				}
+			} else {
+				foundNonspace = true
+				b.Remove()
+			}
+
+			if xPos == 0 && yPos == 0 {
+				break
+			}
+		}
+	}
+}
+
+func (b *Buffer) StringLine(x, y int) string {
+	if y >= len(b.Buf) {
+		return ""
+	}
+
+	var output string
+
+	for cnt := x; cnt < b.Buf[y].Size(); cnt++ {
+		r, _ := b.Buf[y].Get(cnt)
+		output += string(r.(rune))
+	}
+	return output
+}
+
+func (b *Buffer) String() string {
+	return b.StringFromRow(0)
+}
+
+func (b *Buffer) StringFromRow(n int) string {
+	var output []string
+	for _, row := range b.Buf[n:] {
+		var currLine string
+		for cnt := 0; cnt < row.Size(); cnt++ {
+			r, _ := row.Get(cnt)
+			currLine += string(r.(rune))
+		}
+		currLine = strings.TrimRight(currLine, " ")
+		output = append(output, currLine)
+	}
+	return strings.Join(output, "\n")
+}
+
+func (b *Buffer) cursorUp() {
+	fmt.Print(CursorUp)
+	b.ResetCursor()
+}
+
+func (b *Buffer) cursorDown() {
+	fmt.Print(CursorDown)
+	b.ResetCursor()
+}
+
+func (b *Buffer) MoveUp() {
+	if b.PosY > 0 {
+		b.PosY--
+		if b.Buf[b.PosY].Size() < b.PosX {
+			b.PosX = b.Buf[b.PosY].Size()
+		}
+		b.cursorUp()
+	} else {
+		fmt.Print("\a")
+	}
+}
+
+func (b *Buffer) MoveDown() {
+	if b.PosY < len(b.Buf)-1 {
+		b.PosY++
+		if b.Buf[b.PosY].Size() < b.PosX {
+			b.PosX = b.Buf[b.PosY].Size()
+		}
+		b.cursorDown()
+	} else {
+		fmt.Print("\a")
+	}
+}
+
+func (b *Buffer) MoveLeft() {
+	if b.PosX > 0 {
+		b.PosX--
+		fmt.Print(CursorLeft)
+	} else if b.PosY > 0 {
+		b.PosX = b.Buf[b.PosY-1].Size()
+		b.PosY--
+		b.cursorUp()
+	} else if b.PosX == 0 && b.PosY == 0 {
+		fmt.Print("\a")
+	}
+}
+
+func (b *Buffer) MoveRight() {
+	if b.PosX < b.Buf[b.PosY].Size() {
+		b.PosX++
+		fmt.Print(CursorRight)
+	} else if b.PosY < len(b.Buf)-1 {
+		b.PosY++
+		b.PosX = 0
+		b.cursorDown()
+	} else {
+		fmt.Print("\a")
+	}
+}
+
+func (b *Buffer) MoveToBOL() {
+	if b.PosX > 0 {
+		b.PosX = 0
+		b.ResetCursor()
+	}
+}
+
+func (b *Buffer) MoveToEOL() {
+	if b.PosX < b.Buf[b.PosY].Size() {
+		b.PosX = b.Buf[b.PosY].Size()
+		b.ResetCursor()
+	}
+}
+
+func (b *Buffer) MoveToEnd() {
+	fmt.Print(CursorHide)
+	yDiff := len(b.Buf)-1 - b.PosY
+	if yDiff > 0 {
+		fmt.Print(cursorDownN(yDiff))
+	}
+	b.PosY = len(b.Buf)-1
+	b.MoveToEOL()
+	fmt.Print(CursorShow)
+}
+
+func cursorLeftN(n int) string {
+	return fmt.Sprintf(CursorLeftN, n)
+}
+
+func cursorRightN(n int) string {
+	return fmt.Sprintf(CursorRightN, n)
+}
+
+func cursorUpN(n int) string {
+	return fmt.Sprintf(CursorUpN, n)
+}
+
+func cursorDownN(n int) string {
+	return fmt.Sprintf(CursorDownN, n)
+}
+
+func (b *Buffer) ClearScreen() {
+	fmt.Printf(CursorHide + ClearScreen + CursorReset + b.Prompt.Prompt)
+	if b.IsEmpty() {
+		ph := b.Prompt.Placeholder
+		fmt.Printf(ColorGrey + ph + cursorLeftN(len(ph)) + ColorDefault)
+	} else {
+		currPosX := b.PosX
+		currPosY := b.PosY
+		b.PosX = 0
+		b.PosY = 0
+		b.drawRemaining()
+		b.PosX = currPosX
+		b.PosY = currPosY
+		fmt.Print(CursorReset + cursorRightN(len(b.Prompt.Prompt)))
+		if b.PosY > 0 {
+			fmt.Print(cursorDownN(b.PosY))
+		}
+		if b.PosX > 0 {
+			fmt.Print(cursorRightN(b.PosX))
+		}
+	}
+	fmt.Print(CursorShow)
+}
+
+func (b *Buffer) IsEmpty() bool {
+	return len(b.Buf) == 1 && b.Buf[0].Empty()
+}
--- a/editor/editor.go
+++ b/editor/editor.go
@@ -0,0 +1,205 @@
+package editor
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"os"
+	"syscall"
+)
+
+type Prompt struct {
+	Prompt         string
+	AltPrompt      string
+	Placeholder    string
+	AltPlaceholder string
+	UseAlt         bool
+}
+
+type Terminal struct {
+	outchan chan rune
+}
+
+type Instance struct {
+	Prompt   *Prompt
+	Terminal *Terminal
+}
+
+func New(prompt Prompt) (*Instance, error) {
+	term, err := NewTerminal()
+	if err != nil {
+		return nil, err
+	}
+
+	return &Instance{
+		Prompt:   &prompt,
+		Terminal: term,
+	}, nil
+}
+
+func (i *Instance) HandleInput() (string, error) {
+	prompt := i.Prompt.Prompt
+	if i.Prompt.UseAlt {
+		prompt = i.Prompt.AltPrompt
+	}
+	fmt.Print(prompt)
+
+	termios, err := SetRawMode(syscall.Stdin)
+	if err != nil {
+		return "", err
+	}
+	defer UnsetRawMode(syscall.Stdin, termios)
+
+	buf, _ := NewBuffer(i.Prompt)
+
+	var esc bool
+	var escex bool
+	var pasteMode PasteMode
+
+	fmt.Print(StartBracketedPaste)
+	defer fmt.Printf(EndBracketedPaste)
+
+	for {
+		if buf.IsEmpty() {
+			ph := i.Prompt.Placeholder
+			if i.Prompt.UseAlt {
+				ph = i.Prompt.AltPlaceholder
+			}
+			fmt.Printf(ColorGrey + ph + fmt.Sprintf(CursorLeftN, len(ph)) + ColorDefault)
+		}
+
+		r, err := i.Terminal.Read()
+		if err != nil {
+			return "", io.EOF
+		}
+
+		if buf.IsEmpty() {
+			fmt.Print(ClearToEOL)
+		}
+
+		if escex {
+			escex = false
+
+			switch r {
+			case KeyUp:
+				buf.MoveUp()
+			case KeyDown:
+				buf.MoveDown()
+			case KeyLeft:
+				buf.MoveLeft()
+			case KeyRight:
+				buf.MoveRight()
+			case CharBracketedPaste:
+				var code string
+				for cnt := 0; cnt < 3; cnt++ {
+					r, err = i.Terminal.Read()
+					if err != nil {
+						return "", io.EOF
+					}
+
+					code += string(r)
+				}
+				if code == CharBracketedPasteStart {
+					pasteMode = PasteModeStart
+				} else if code == CharBracketedPasteEnd {
+					pasteMode = PasteModeEnd
+				}
+			case MetaStart:
+				buf.MoveToBOL()
+			case MetaEnd:
+				buf.MoveToEOL()
+			}
+			continue
+		} else if esc {
+			esc = false
+
+			switch r {
+			case CharEscapeEx:
+				escex = true
+			}
+			continue
+		}
+
+		switch r {
+		case CharNull:
+			continue
+		case CharEsc:
+			esc = true
+		case CharInterrupt:
+			return "", ErrInterrupt
+		case CharLineStart:
+			buf.MoveToBOL()
+		case CharLineEnd:
+			buf.MoveToEOL()
+		case CharBackward:
+			buf.MoveLeft()
+		case CharForward:
+			buf.MoveRight()
+		case CharBackspace, CharCtrlH:
+			buf.Remove()
+		case CharTab:
+			for cnt := 0; cnt < 8; cnt++ {
+				buf.Add(' ')
+			}
+		case CharDelete:
+			if len(buf.Buf) > 0 && buf.Buf[0].Size() > 0 {
+				buf.Delete()
+			} else {
+				return "", io.EOF
+			}
+		case CharCtrlU:
+			buf.RemoveBefore()
+		case CharCtrlL:
+			buf.ClearScreen()
+		case CharCtrlW:
+			buf.RemoveWordBefore()
+		case CharCtrlJ:
+			buf.Add(r)
+		case CharEnter:
+			if pasteMode == PasteModeStart {
+				buf.Add(r)
+				continue
+			}
+			buf.MoveToEnd()
+			fmt.Println()
+			return buf.String(), nil
+		default:
+			if r >= CharSpace || r == CharEnter {
+				buf.Add(r)
+			}
+		}
+	}
+
+}
+
+func NewTerminal() (*Terminal, error) {
+	t := &Terminal{
+		outchan: make(chan rune),
+	}
+
+	go t.ioloop()
+
+	return t, nil
+}
+
+func (t *Terminal) ioloop() {
+	buf := bufio.NewReader(os.Stdin)
+
+	for {
+		r, _, err := buf.ReadRune()
+		if err != nil {
+			close(t.outchan)
+			break
+		}
+		t.outchan <- r
+	}
+}
+
+func (t *Terminal) Read() (rune, error) {
+	r, ok := <-t.outchan
+	if !ok {
+		return 0, io.EOF
+	}
+
+	return r, nil
+}
--- a/editor/errors.go
+++ b/editor/errors.go
@@ -0,0 +1,17 @@
+package editor
+
+import (
+	"errors"
+)
+
+var (
+	ErrInterrupt = errors.New("Interrupt")
+)
+
+type InterruptError struct {
+	Line []rune
+}
+
+func (*InterruptError) Error() string {
+	return "Interrupted"
+}
--- a/editor/term.go
+++ b/editor/term.go
@@ -0,0 +1,36 @@
+//go:build aix || darwin || dragonfly || freebsd || (linux && !appengine) || netbsd || openbsd || os400 || solaris
+
+package editor
+
+import (
+	"syscall"
+)
+
+type Termios syscall.Termios
+
+func SetRawMode(fd int) (*Termios, error) {
+	termios, err := getTermios(fd)
+	if err != nil {
+		return nil, err
+	}
+
+	newTermios := *termios
+	newTermios.Iflag &^= syscall.IGNBRK | syscall.BRKINT | syscall.PARMRK | syscall.ISTRIP | syscall.INLCR | syscall.IGNCR | syscall.ICRNL | syscall.IXON
+	newTermios.Lflag &^= syscall.ECHO | syscall.ECHONL | syscall.ICANON | syscall.ISIG | syscall.IEXTEN
+	newTermios.Cflag &^= syscall.CSIZE | syscall.PARENB
+	newTermios.Cflag |= syscall.CS8
+	newTermios.Cc[syscall.VMIN] = 1
+	newTermios.Cc[syscall.VTIME] = 0
+
+	return termios, setTermios(fd, &newTermios)
+}
+
+func UnsetRawMode(fd int, termios *Termios) error {
+	return setTermios(fd, termios)
+}
+
+// IsTerminal returns true if the given file descriptor is a terminal.
+func IsTerminal(fd int) bool {
+	_, err := getTermios(fd)
+	return err == nil
+}
--- a/editor/term_bsd.go
+++ b/editor/term_bsd.go
@@ -0,0 +1,24 @@
+//go:build darwin || freebsd || netbsd || openbsd
+package editor
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+func getTermios(fd int) (*Termios, error) {
+	termios := new(Termios)
+	_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCGETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
+	if err != 0 {
+		return nil, err
+	}
+	return termios, nil
+}
+
+func setTermios(fd int, termios *Termios) error {
+	_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCSETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
+	if err != 0 {
+		return err
+	}
+	return nil
+}
--- a/editor/term_linux.go
+++ b/editor/term_linux.go
@@ -0,0 +1,27 @@
+//go:build linux || solaris
+package editor
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+const tcgets = 0x5401
+const tcsets = 0x5402
+
+func getTermios(fd int) (*Termios, error) {
+	termios := new(Termios)
+	_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), tcgets, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
+	if err != 0 {
+		return nil, err
+	}
+	return termios, nil
+}
+
+func setTermios(fd int, termios *Termios) error {
+	_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), tcsets, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
+	if err != 0 {
+		return err
+	}
+	return nil
+}
--- a/editor/term_windows.go
+++ b/editor/term_windows.go
@@ -0,0 +1,62 @@
+package readline
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+const (
+	enableLineInput       = 2
+	enableWindowInput     = 8
+	enableMouseInput      = 16
+	enableInsertMode      = 32
+	enableQuickEditMode   = 64
+	enableExtendedFlags   = 128
+	enableProcessedOutput = 1
+	enableWrapAtEolOutput = 2
+	enableAutoPosition    = 256 // Cursor position is not affected by writing data to the console.
+	enableEchoInput       = 4   // Characters are written to the console as they're read.
+	enableProcessedInput  = 1   // Enables input processing (like recognizing Ctrl+C).
+)
+
+var kernel32 = syscall.NewLazyDLL("kernel32.dll")
+
+var (
+	procGetConsoleMode = kernel32.NewProc("GetConsoleMode")
+	procSetConsoleMode = kernel32.NewProc("SetConsoleMode")
+)
+
+type State struct {
+	mode uint32
+}
+
+// IsTerminal checks if the given file descriptor is associated with a terminal
+func IsTerminal(fd int) bool {
+	var st uint32
+	r, _, e := syscall.SyscallN(procGetConsoleMode.Addr(), uintptr(fd), uintptr(unsafe.Pointer(&st)), 0)
+	// if the call succeeds and doesn't produce an error, it's a terminal
+	return r != 0 && e == 0
+}
+
+func SetRawMode(fd int) (*State, error) {
+	var st uint32
+	// retrieve the current mode of the terminal
+	_, _, e := syscall.SyscallN(procGetConsoleMode.Addr(), uintptr(fd), uintptr(unsafe.Pointer(&st)), 0)
+	if e != 0 {
+		return nil, error(e)
+	}
+	// modify the mode to set it to raw
+	raw := st &^ (enableEchoInput | enableProcessedInput | enableLineInput | enableProcessedOutput)
+	// apply the new mode to the terminal
+	_, _, e = syscall.SyscallN(procSetConsoleMode.Addr(), uintptr(fd), uintptr(raw), 0)
+	if e != 0 {
+		return nil, error(e)
+	}
+	// return the original state so that it can be restored later
+	return &State{st}, nil
+}
+
+func UnsetRawMode(fd int, state *State) error {
+	_, _, err := syscall.SyscallN(procSetConsoleMode.Addr(), uintptr(fd), uintptr(state.mode), 0)
+	return err
+}
--- a/editor/types.go
+++ b/editor/types.go
@@ -0,0 +1,86 @@
+package editor
+
+const (
+	CharNull      = 0
+	CharLineStart = 1
+	CharBackward  = 2
+	CharInterrupt = 3
+	CharDelete    = 4
+	CharLineEnd   = 5
+	CharForward   = 6
+	CharBell      = 7
+	CharCtrlH     = 8
+	CharTab       = 9
+	CharCtrlJ     = 10
+	CharKill      = 11
+	CharCtrlL     = 12
+	CharEnter     = 13
+	CharNext      = 14
+	CharPrev      = 16
+	CharBckSearch = 18
+	CharFwdSearch = 19
+	CharTranspose = 20
+	CharCtrlU     = 21
+	CharCtrlW     = 23
+	CharCtrlY     = 25
+	CharCtrlZ     = 26
+	CharEsc       = 27
+	CharSpace     = 32
+	CharEscapeEx  = 91
+	CharBackspace = 127
+)
+
+const (
+	KeyDel    = 51
+	KeyUp     = 65
+	KeyDown   = 66
+	KeyRight  = 67
+	KeyLeft   = 68
+	MetaEnd   = 70
+	MetaStart = 72
+)
+
+const (
+	CursorUp    = "\033[1A"
+	CursorDown  = "\033[1B"
+	CursorRight = "\033[1C"
+	CursorLeft  = "\033[1D"
+
+	CursorSave    = "\033[s"
+	CursorRestore = "\033[u"
+
+	CursorUpN    = "\033[%dA"
+	CursorDownN  = "\033[%dB"
+	CursorRightN = "\033[%dC"
+	CursorLeftN  = "\033[%dD"
+
+	CursorEOL  = "\033[E"
+	CursorBOL  = "\033[1G"
+	CursorHide = "\033[?25l"
+	CursorShow = "\033[?25h"
+
+	ClearToEOL  = "\033[K"
+	ClearLine   = "\033[2K"
+	ClearScreen = "\033[2J"
+	CursorReset = "\033[0;0f"
+
+	ColorGrey    = "\033[38;5;245m"
+	ColorDefault = "\033[0m"
+
+	StartBracketedPaste = "\033[?2004h"
+	EndBracketedPaste   = "\033[?2004l"
+)
+
+const (
+	CharBracketedPaste      = 50
+	CharBracketedPasteStart = "00~"
+	CharBracketedPasteEnd   = "01~"
+)
+
+type PasteMode int
+
+const (
+	PastModeOff = iota
+	PasteModeStart
+	PasteModeEnd
+)
--- a/examples/.gitignore
+++ b/examples/.gitignore
@@ -0,0 +1,171 @@
+node_modules
+# OSX
+.DS_STORE
+
+# Models
+models/
+
+# Local Chroma db
+.chroma/
+db/
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,15 +1,3 @@
 # Examples

-This directory contains different examples of using Ollama
-
-To create a model:
-
-```
-ollama create example -f <example file>
-```
-
-To run a model:
-
-```
-ollama run example
-```
+This directory contains different examples of using Ollama.
--- a/examples/bash-comparemodels/README.md
+++ b/examples/bash-comparemodels/README.md
@@ -0,0 +1,10 @@
+# Bash Shell examples
+
+When calling `ollama`, you can pass it a file to run all the prompts in the file, one after the other:
+
+`ollama run llama2 < sourcequestions.txt`
+
+This concept is used in the following example.
+
+## Compare Models
+`comparemodels.sh` is a script that runs all the questions in `sourcequestions.txt` using any 4 models you choose that you have already pulled from the Ollama library or have created locally.
--- a/examples/bash-comparemodels/comparemodels.sh
+++ b/examples/bash-comparemodels/comparemodels.sh
@@ -0,0 +1,64 @@
+#! /usr/bin/env bash
+# Compare multiple models by running them with the same questions
+
+NUMBEROFCHOICES=4
+SELECTIONS=()
+declare -a SUMS=()
+
+# Get the list of models
+CHOICES=$(ollama list | awk '{print $1}')
+
+# Select which models to run as a comparison
+echo "Select $NUMBEROFCHOICES models to compare:"
+select ITEM in $CHOICES; do
+    if [[ -n $ITEM ]]; then
+        echo "You have selected $ITEM"
+        SELECTIONS+=("$ITEM")
+        ((COUNT++))
+        if [[ $COUNT -eq $NUMBEROFCHOICES ]]; then
+            break
+        fi
+    else
+        echo "Invalid selection"
+    fi
+done
+
+# Loop through each of the selected models
+for ITEM in "${SELECTIONS[@]}"; do
+    echo "--------------------------------------------------------------"
+    echo "Loading the model $ITEM into memory"
+    ollama run "$ITEM" ""
+    echo "--------------------------------------------------------------"
+    echo "Running the questions through the model $ITEM"
+    COMMAND_OUTPUT=$(ollama run "$ITEM" --verbose < sourcequestions.txt 2>&1| tee /dev/stderr)
+
+    # eval duration is sometimes listed in seconds and sometimes in milliseconds. 
+    # Add up the values for each model
+    SUM=$(echo "$COMMAND_OUTPUT" | awk '
+    /eval duration:/ {
+        value = $3
+        if (index(value, "ms") > 0) {
+            gsub("ms", "", value)
+            value /= 1000
+        } else {
+            gsub("s", "", value)
+        }
+        sum += value
+    }
+    END { print sum }')
+
+
+    SUMS+=("All questions for $ITEM completed in $SUM seconds")
+done
+
+echo ""
+echo "--------------------------------------------------------------"
+echo -e "Sums of eval durations for each run:"
+for val in "${SUMS[@]}"; do
+    echo "$val"
+done
+
+echo "--------------------------------------------------------------"
+echo "Comparison complete. Now you can decide"
+echo "which model is best."
+echo "--------------------------------------------------------------"
--- a/examples/bash-comparemodels/sourcequestions.txt
+++ b/examples/bash-comparemodels/sourcequestions.txt
@@ -0,0 +1,7 @@
+Why is the sky blue
+What is a black hole
+Explain the big bang theory like I am 5?
+What is the quickest way to win a game of Monopoly with 3 others?
+Why does a vacuum bottle keep my coffee hot and my milkshake cold?
+What is the difference between a meteor, a meteorite, and a meteoroid?
+Create an array with 5 items and print to the console. Do this in Python, C#, Typescript, and Rust.
--- a/examples/golang-simplegenerate/README.md
+++ b/examples/golang-simplegenerate/README.md
--- a/examples/golang-simplegenerate/main.go
+++ b/examples/golang-simplegenerate/main.go
@@ -0,0 +1,27 @@
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"os"
+)
+
+func main() {
+	body := []byte(`{"model":"mistral"}`)
+	resp, err := http.Post("http://localhost:11434/api/generate", "application/json", bytes.NewBuffer(body))
+
+	if err != nil {
+		fmt.Print(err.Error())
+		os.Exit(1)
+	}
+
+	responseData, err := io.ReadAll(resp.Body)
+	if err != nil {
+		log.Fatal(err)
+	}
+	fmt.Println(string(responseData))
+
+}
--- a/examples/kubernetes/README.md
+++ b/examples/kubernetes/README.md
@@ -0,0 +1,36 @@
+# Deploy Ollama to Kubernetes
+
+## Prerequisites
+
+- Ollama: https://ollama.ai/download
+- Kubernetes cluster. This example will use Google Kubernetes Engine.
+
+## Steps
+
+1. Create the Ollama namespace, daemon set, and service
+
+    ```bash
+    kubectl apply -f cpu.yaml
+    ```
+
+1. Port forward the Ollama service to connect and use it locally
+
+    ```bash
+    kubectl -n ollama port-forward service/ollama 11434:80
+    ```
+
+1. Pull and run a model, for example `orca-mini:3b`
+
+    ```bash
+    ollama run orca-mini:3b
+    ```
+
+## (Optional) Hardware Acceleration
+
+Hardware acceleration in Kubernetes requires NVIDIA's [`k8s-device-plugin`](https://github.com/NVIDIA/k8s-device-plugin). Follow the link for more details.
+
+Once configured, create a GPU enabled Ollama deployment.
+
+```bash
+kubectl apply -f gpu.yaml
+```
--- a/examples/kubernetes/cpu.yaml
+++ b/examples/kubernetes/cpu.yaml
@@ -0,0 +1,42 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: ollama
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama
+  namespace: ollama
+spec:
+  selector:
+    matchLabels:
+      name: ollama
+  template:
+    metadata:
+      labels:
+        name: ollama
+    spec:
+      containers:
+      - name: ollama
+        image: ollama/ollama:latest
+        ports:
+        - name: http
+          containerPort: 11434
+          protocol: TCP
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama
+  namespace: ollama
+spec:
+  type: ClusterIP
+  selector:
+    name: ollama
+  ports:
+  - port: 80
+    name: http
+    targetPort: http
+    protocol: TCP
--- a/examples/kubernetes/gpu.yaml
+++ b/examples/kubernetes/gpu.yaml
@@ -0,0 +1,56 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: ollama
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama
+  namespace: ollama
+spec:
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      name: ollama
+  template:
+    metadata:
+      labels:
+        name: ollama
+    spec:
+      containers:
+      - name: ollama
+        image: ollama/ollama:latest
+        env:
+        - name: PATH
+          value: /usr/local/nvidia/bin:/usr/local/nvidia/lib64:/usr/bin:/usr/sbin:/bin:/sbin
+        - name: LD_LIBRARY_PATH
+          value: /usr/local/nvidia/lib64
+        ports:
+        - name: http
+          containerPort: 11434
+          protocol: TCP
+        resources:
+          limits:
+            nvidia.com/gpu: 1
+      tolerations:
+      - key: nvidia.com/gpu
+        operator: Exists
+        effect: NoSchedule
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama
+  namespace: ollama
+spec:
+  type: ClusterIP
+  selector:
+    name: ollama
+  ports:
+  - port: 80
+    name: http
+    targetPort: http
+    protocol: TCP
--- a/examples/langchain-python-rag-document/README.md
+++ b/examples/langchain-python-rag-document/README.md
--- a/examples/langchain-python-rag-document/main.py
+++ b/examples/langchain-python-rag-document/main.py
--- a/examples/langchain-python-rag-document/requirements.txt
+++ b/examples/langchain-python-rag-document/requirements.txt
--- a/examples/langchain-python-rag-privategpt/.gitignore
+++ b/examples/langchain-python-rag-privategpt/.gitignore
--- a/examples/langchain-python-rag-privategpt/LICENSE
+++ b/examples/langchain-python-rag-privategpt/LICENSE
--- a/examples/langchain-python-rag-privategpt/README.md
+++ b/examples/langchain-python-rag-privategpt/README.md
--- a/examples/langchain-python-rag-privategpt/constants.py
+++ b/examples/langchain-python-rag-privategpt/constants.py
@@ -6,7 +6,6 @@ PERSIST_DIRECTORY = os.environ.get('PERSIST_DIRECTORY', 'db')

 # Define the Chroma settings
 CHROMA_SETTINGS = Settings(
-        chroma_db_impl='duckdb+parquet',
        persist_directory=PERSIST_DIRECTORY,
        anonymized_telemetry=False
 )
--- a/examples/langchain-python-rag-privategpt/ingest.py
+++ b/examples/langchain-python-rag-privategpt/ingest.py
@@ -150,7 +150,7 @@ def main():
        print("Creating new vectorstore")
        texts = process_documents()
        print(f"Creating embeddings. May take some minutes...")
-        db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory, client_settings=CHROMA_SETTINGS)
+        db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
    db.persist()
    db = None

--- a/examples/langchain-python-rag-privategpt/poetry.lock
+++ b/examples/langchain-python-rag-privategpt/poetry.lock
--- a/examples/langchain-python-rag-privategpt/privateGPT.py
+++ b/examples/langchain-python-rag-privategpt/privateGPT.py
@@ -4,6 +4,7 @@ from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain.vectorstores import Chroma
 from langchain.llms import Ollama
+import chromadb
 import os
 import argparse
 import time
@@ -22,7 +23,9 @@ def main():
    # Parse the command line arguments
    args = parse_arguments()
    embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
-    db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS)
+
+    db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
+
    retriever = db.as_retriever(search_kwargs={"k": target_source_chunks})
    # activate/deactivate the streaming StdOut callback for LLMs
    callbacks = [] if args.mute_stream else [StreamingStdOutCallbackHandler()]
--- a/examples/langchain-python-rag-privategpt/pyproject.toml
+++ b/examples/langchain-python-rag-privategpt/pyproject.toml
--- a/examples/langchain-python-rag-privategpt/requirements.txt
+++ b/examples/langchain-python-rag-privategpt/requirements.txt
@@ -0,0 +1,14 @@
+langchain==0.0.274
+gpt4all==1.0.8
+chromadb==0.4.7
+llama-cpp-python==0.1.81
+urllib3==2.0.4
+PyMuPDF==1.23.5
+python-dotenv==1.0.0
+unstructured==0.10.8
+extract-msg==0.45.0
+tabulate==0.9.0
+pandoc==2.3
+pypandoc==1.11
+tqdm==4.66.1
+sentence_transformers==2.2.2
--- a/examples/langchain-python-rag-websummary/README.md
+++ b/examples/langchain-python-rag-websummary/README.md
--- a/examples/langchain-python-rag-websummary/main.py
+++ b/examples/langchain-python-rag-websummary/main.py
--- a/examples/langchain-python-rag-websummary/requirements.txt
+++ b/examples/langchain-python-rag-websummary/requirements.txt
--- a/examples/langchain-python-simple/README.md
+++ b/examples/langchain-python-simple/README.md
--- a/examples/langchain-python-simple/main.py
+++ b/examples/langchain-python-simple/main.py
--- a/examples/langchain-python-simple/requirements.txt
+++ b/examples/langchain-python-simple/requirements.txt
--- a/examples/langchain-typescript-simple/README.md
+++ b/examples/langchain-typescript-simple/README.md
@@ -0,0 +1,21 @@
+# LangChain
+
+This example is a basic "hello world" of using LangChain with Ollama using Node.js and Typescript.
+
+## Setup
+
+```shell
+npm install
+```
+
+## Run
+
+```shell
+ts-node main.ts
+```
+
+Running this example will print the response for "hello":
+
+```plaintext
+Hello! It's nice to meet you. hopefully you are having a great day! Is there something I can help you with or would you like to chat?
+```
--- a/examples/langchain-typescript-simple/main.ts
+++ b/examples/langchain-typescript-simple/main.ts
@@ -0,0 +1,15 @@
+import { Ollama} from 'langchain/llms/ollama';
+
+async function main() {
+  const ollama = new Ollama({
+    model: 'mistral'    
+    // other parameters can be found at https://js.langchain.com/docs/api/llms_ollama/classes/Ollama
+  })
+  const stream = await ollama.stream("Hello");
+
+  for await (const chunk of stream) {
+    process.stdout.write(chunk);
+  }
+}
+
+main();
--- a/examples/langchain-typescript-simple/package-lock.json
+++ b/examples/langchain-typescript-simple/package-lock.json
@@ -0,0 +1,997 @@
+{
+  "name": "with-langchain-typescript-simplegenerate",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "dependencies": {
+        "langchain": "^0.0.165"
+      },
+      "devDependencies": {
+        "typescript": "^5.2.2"
+      }
+    },
+    "node_modules/@anthropic-ai/sdk": {
+      "version": "0.6.2",
+      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.6.2.tgz",
+      "integrity": "sha512-fB9PUj9RFT+XjkL+E9Ol864ZIJi+1P8WnbHspN3N3/GK2uSzjd0cbVIKTGgf4v3N8MwaQu+UWnU7C4BG/fap/g==",
+      "dependencies": {
+        "@types/node": "^18.11.18",
+        "@types/node-fetch": "^2.6.4",
+        "abort-controller": "^3.0.0",
+        "agentkeepalive": "^4.2.1",
+        "digest-fetch": "^1.3.0",
+        "form-data-encoder": "1.7.2",
+        "formdata-node": "^4.3.2",
+        "node-fetch": "^2.6.7"
+      }
+    },
+    "node_modules/@types/node": {
+      "version": "18.18.4",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.18.4.tgz",
+      "integrity": "sha512-t3rNFBgJRugIhackit2mVcLfF6IRc0JE4oeizPQL8Zrm8n2WY/0wOdpOPhdtG0V9Q2TlW/axbF1MJ6z+Yj/kKQ=="
+    },
+    "node_modules/@types/node-fetch": {
+      "version": "2.6.6",
+      "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.6.tgz",
+      "integrity": "sha512-95X8guJYhfqiuVVhRFxVQcf4hW/2bCuoPwDasMf/531STFoNoWTT7YDnWdXHEZKqAGUigmpG31r2FE70LwnzJw==",
+      "dependencies": {
+        "@types/node": "*",
+        "form-data": "^4.0.0"
+      }
+    },
+    "node_modules/@types/retry": {
+      "version": "0.12.0",
+      "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz",
+      "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA=="
+    },
+    "node_modules/@types/uuid": {
+      "version": "9.0.5",
+      "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.5.tgz",
+      "integrity": "sha512-xfHdwa1FMJ082prjSJpoEI57GZITiQz10r3vEJCHa2khEFQjKy91aWKz6+zybzssCvXUwE1LQWgWVwZ4nYUvHQ=="
+    },
+    "node_modules/abort-controller": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
+      "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
+      "dependencies": {
+        "event-target-shim": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=6.5"
+      }
+    },
+    "node_modules/agentkeepalive": {
+      "version": "4.5.0",
+      "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz",
+      "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==",
+      "dependencies": {
+        "humanize-ms": "^1.2.1"
+      },
+      "engines": {
+        "node": ">= 8.0.0"
+      }
+    },
+    "node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/argparse": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
+      "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
+    },
+    "node_modules/asynckit": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
+      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
+    },
+    "node_modules/base-64": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz",
+      "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA=="
+    },
+    "node_modules/base64-js": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
+      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ]
+    },
+    "node_modules/binary-extensions": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz",
+      "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/binary-search": {
+      "version": "1.3.6",
+      "resolved": "https://registry.npmjs.org/binary-search/-/binary-search-1.3.6.tgz",
+      "integrity": "sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA=="
+    },
+    "node_modules/camelcase": {
+      "version": "6.3.0",
+      "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz",
+      "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/charenc": {
+      "version": "0.0.2",
+      "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz",
+      "integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==",
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/combined-stream": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
+      "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
+      "dependencies": {
+        "delayed-stream": "~1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/commander": {
+      "version": "10.0.1",
+      "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz",
+      "integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==",
+      "engines": {
+        "node": ">=14"
+      }
+    },
+    "node_modules/crypt": {
+      "version": "0.0.2",
+      "resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz",
+      "integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==",
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/decamelize": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz",
+      "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/delayed-stream": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
+      "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/digest-fetch": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz",
+      "integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==",
+      "dependencies": {
+        "base-64": "^0.1.0",
+        "md5": "^2.3.0"
+      }
+    },
+    "node_modules/event-target-shim": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
+      "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/eventemitter3": {
+      "version": "4.0.7",
+      "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz",
+      "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw=="
+    },
+    "node_modules/expr-eval": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/expr-eval/-/expr-eval-2.0.2.tgz",
+      "integrity": "sha512-4EMSHGOPSwAfBiibw3ndnP0AvjDWLsMvGOvWEZ2F96IGk0bIVdjQisOHxReSkE13mHcfbuCiXw+G4y0zv6N8Eg=="
+    },
+    "node_modules/flat": {
+      "version": "5.0.2",
+      "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz",
+      "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==",
+      "bin": {
+        "flat": "cli.js"
+      }
+    },
+    "node_modules/form-data": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
+      "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
+      "dependencies": {
+        "asynckit": "^0.4.0",
+        "combined-stream": "^1.0.8",
+        "mime-types": "^2.1.12"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/form-data-encoder": {
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz",
+      "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A=="
+    },
+    "node_modules/formdata-node": {
+      "version": "4.4.1",
+      "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz",
+      "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==",
+      "dependencies": {
+        "node-domexception": "1.0.0",
+        "web-streams-polyfill": "4.0.0-beta.3"
+      },
+      "engines": {
+        "node": ">= 12.20"
+      }
+    },
+    "node_modules/humanize-ms": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz",
+      "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==",
+      "dependencies": {
+        "ms": "^2.0.0"
+      }
+    },
+    "node_modules/is-any-array": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz",
+      "integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ=="
+    },
+    "node_modules/is-buffer": {
+      "version": "1.1.6",
+      "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz",
+      "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w=="
+    },
+    "node_modules/js-tiktoken": {
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.7.tgz",
+      "integrity": "sha512-biba8u/clw7iesNEWLOLwrNGoBP2lA+hTaBLs/D45pJdUPFXyxD6nhcDVtADChghv4GgyAiMKYMiRx7x6h7Biw==",
+      "dependencies": {
+        "base64-js": "^1.5.1"
+      }
+    },
+    "node_modules/js-yaml": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
+      "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
+      "dependencies": {
+        "argparse": "^2.0.1"
+      },
+      "bin": {
+        "js-yaml": "bin/js-yaml.js"
+      }
+    },
+    "node_modules/jsonpointer": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz",
+      "integrity": "sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/langchain": {
+      "version": "0.0.165",
+      "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.0.165.tgz",
+      "integrity": "sha512-CpbNpjwaE+9lzjdw+pZz0VgnRrFivEgr7CVp9dDaAb5JpaJAA4V2v6uQ9ZPN+TSqupTQ79HFn2sfyZVEl2EG7Q==",
+      "dependencies": {
+        "@anthropic-ai/sdk": "^0.6.2",
+        "ansi-styles": "^5.0.0",
+        "binary-extensions": "^2.2.0",
+        "camelcase": "6",
+        "decamelize": "^1.2.0",
+        "expr-eval": "^2.0.2",
+        "flat": "^5.0.2",
+        "js-tiktoken": "^1.0.7",
+        "js-yaml": "^4.1.0",
+        "jsonpointer": "^5.0.1",
+        "langchainhub": "~0.0.6",
+        "langsmith": "~0.0.31",
+        "ml-distance": "^4.0.0",
+        "object-hash": "^3.0.0",
+        "openai": "~4.4.0",
+        "openapi-types": "^12.1.3",
+        "p-queue": "^6.6.2",
+        "p-retry": "4",
+        "uuid": "^9.0.0",
+        "yaml": "^2.2.1",
+        "zod": "^3.22.3",
+        "zod-to-json-schema": "^3.20.4"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "@aws-crypto/sha256-js": "^5.0.0",
+        "@aws-sdk/client-bedrock-runtime": "^3.422.0",
+        "@aws-sdk/client-dynamodb": "^3.310.0",
+        "@aws-sdk/client-kendra": "^3.352.0",
+        "@aws-sdk/client-lambda": "^3.310.0",
+        "@aws-sdk/client-s3": "^3.310.0",
+        "@aws-sdk/client-sagemaker-runtime": "^3.310.0",
+        "@aws-sdk/client-sfn": "^3.310.0",
+        "@aws-sdk/credential-provider-node": "^3.388.0",
+        "@azure/storage-blob": "^12.15.0",
+        "@clickhouse/client": "^0.0.14",
+        "@cloudflare/ai": "^1.0.12",
+        "@elastic/elasticsearch": "^8.4.0",
+        "@getmetal/metal-sdk": "*",
+        "@getzep/zep-js": "^0.7.0",
+        "@gomomento/sdk": "^1.23.0",
+        "@google-ai/generativelanguage": "^0.2.1",
+        "@google-cloud/storage": "^6.10.1",
+        "@huggingface/inference": "^1.5.1",
+        "@mozilla/readability": "*",
+        "@notionhq/client": "^2.2.10",
+        "@opensearch-project/opensearch": "*",
+        "@pinecone-database/pinecone": "^1.1.0",
+        "@planetscale/database": "^1.8.0",
+        "@qdrant/js-client-rest": "^1.2.0",
+        "@raycast/api": "^1.55.2",
+        "@smithy/eventstream-codec": "^2.0.5",
+        "@smithy/protocol-http": "^3.0.6",
+        "@smithy/signature-v4": "^2.0.10",
+        "@smithy/util-utf8": "^2.0.0",
+        "@supabase/postgrest-js": "^1.1.1",
+        "@supabase/supabase-js": "^2.10.0",
+        "@tensorflow-models/universal-sentence-encoder": "*",
+        "@tensorflow/tfjs-converter": "*",
+        "@tensorflow/tfjs-core": "*",
+        "@upstash/redis": "^1.20.6",
+        "@vercel/postgres": "^0.5.0",
+        "@writerai/writer-sdk": "^0.40.2",
+        "@xata.io/client": "^0.25.1",
+        "@xenova/transformers": "^2.5.4",
+        "@zilliz/milvus2-sdk-node": ">=2.2.7",
+        "apify-client": "^2.7.1",
+        "axios": "*",
+        "cassandra-driver": "^4.6.4",
+        "cheerio": "^1.0.0-rc.12",
+        "chromadb": "*",
+        "cohere-ai": ">=6.0.0",
+        "d3-dsv": "^2.0.0",
+        "epub2": "^3.0.1",
+        "faiss-node": "^0.3.0",
+        "fast-xml-parser": "^4.2.7",
+        "firebase-admin": "^11.9.0",
+        "google-auth-library": "^8.9.0",
+        "googleapis": "^126.0.1",
+        "hnswlib-node": "^1.4.2",
+        "html-to-text": "^9.0.5",
+        "ignore": "^5.2.0",
+        "ioredis": "^5.3.2",
+        "jsdom": "*",
+        "llmonitor": "*",
+        "lodash": "^4.17.21",
+        "mammoth": "*",
+        "mongodb": "^5.2.0",
+        "mysql2": "^3.3.3",
+        "neo4j-driver": "*",
+        "node-llama-cpp": "*",
+        "notion-to-md": "^3.1.0",
+        "pdf-parse": "1.1.1",
+        "peggy": "^3.0.2",
+        "pg": "^8.11.0",
+        "pg-copy-streams": "^6.0.5",
+        "pickleparser": "^0.1.0",
+        "playwright": "^1.32.1",
+        "portkey-ai": "^0.1.11",
+        "puppeteer": "^19.7.2",
+        "redis": "^4.6.4",
+        "replicate": "^0.18.0",
+        "sonix-speech-recognition": "^2.1.1",
+        "srt-parser-2": "^1.2.2",
+        "typeorm": "^0.3.12",
+        "typesense": "^1.5.3",
+        "usearch": "^1.1.1",
+        "vectordb": "^0.1.4",
+        "voy-search": "0.6.2",
+        "weaviate-ts-client": "^1.4.0",
+        "web-auth-library": "^1.0.3",
+        "youtube-transcript": "^1.0.6",
+        "youtubei.js": "^5.8.0"
+      },
+      "peerDependenciesMeta": {
+        "@aws-crypto/sha256-js": {
+          "optional": true
+        },
+        "@aws-sdk/client-bedrock-runtime": {
+          "optional": true
+        },
+        "@aws-sdk/client-dynamodb": {
+          "optional": true
+        },
+        "@aws-sdk/client-kendra": {
+          "optional": true
+        },
+        "@aws-sdk/client-lambda": {
+          "optional": true
+        },
+        "@aws-sdk/client-s3": {
+          "optional": true
+        },
+        "@aws-sdk/client-sagemaker-runtime": {
+          "optional": true
+        },
+        "@aws-sdk/client-sfn": {
+          "optional": true
+        },
+        "@aws-sdk/credential-provider-node": {
+          "optional": true
+        },
+        "@azure/storage-blob": {
+          "optional": true
+        },
+        "@clickhouse/client": {
+          "optional": true
+        },
+        "@cloudflare/ai": {
+          "optional": true
+        },
+        "@elastic/elasticsearch": {
+          "optional": true
+        },
+        "@getmetal/metal-sdk": {
+          "optional": true
+        },
+        "@getzep/zep-js": {
+          "optional": true
+        },
+        "@gomomento/sdk": {
+          "optional": true
+        },
+        "@google-ai/generativelanguage": {
+          "optional": true
+        },
+        "@google-cloud/storage": {
+          "optional": true
+        },
+        "@huggingface/inference": {
+          "optional": true
+        },
+        "@mozilla/readability": {
+          "optional": true
+        },
+        "@notionhq/client": {
+          "optional": true
+        },
+        "@opensearch-project/opensearch": {
+          "optional": true
+        },
+        "@pinecone-database/pinecone": {
+          "optional": true
+        },
+        "@planetscale/database": {
+          "optional": true
+        },
+        "@qdrant/js-client-rest": {
+          "optional": true
+        },
+        "@raycast/api": {
+          "optional": true
+        },
+        "@smithy/eventstream-codec": {
+          "optional": true
+        },
+        "@smithy/protocol-http": {
+          "optional": true
+        },
+        "@smithy/signature-v4": {
+          "optional": true
+        },
+        "@smithy/util-utf8": {
+          "optional": true
+        },
+        "@supabase/postgrest-js": {
+          "optional": true
+        },
+        "@supabase/supabase-js": {
+          "optional": true
+        },
+        "@tensorflow-models/universal-sentence-encoder": {
+          "optional": true
+        },
+        "@tensorflow/tfjs-converter": {
+          "optional": true
+        },
+        "@tensorflow/tfjs-core": {
+          "optional": true
+        },
+        "@upstash/redis": {
+          "optional": true
+        },
+        "@vercel/postgres": {
+          "optional": true
+        },
+        "@writerai/writer-sdk": {
+          "optional": true
+        },
+        "@xata.io/client": {
+          "optional": true
+        },
+        "@xenova/transformers": {
+          "optional": true
+        },
+        "@zilliz/milvus2-sdk-node": {
+          "optional": true
+        },
+        "apify-client": {
+          "optional": true
+        },
+        "axios": {
+          "optional": true
+        },
+        "cassandra-driver": {
+          "optional": true
+        },
+        "cheerio": {
+          "optional": true
+        },
+        "chromadb": {
+          "optional": true
+        },
+        "cohere-ai": {
+          "optional": true
+        },
+        "d3-dsv": {
+          "optional": true
+        },
+        "epub2": {
+          "optional": true
+        },
+        "faiss-node": {
+          "optional": true
+        },
+        "fast-xml-parser": {
+          "optional": true
+        },
+        "firebase-admin": {
+          "optional": true
+        },
+        "google-auth-library": {
+          "optional": true
+        },
+        "googleapis": {
+          "optional": true
+        },
+        "hnswlib-node": {
+          "optional": true
+        },
+        "html-to-text": {
+          "optional": true
+        },
+        "ignore": {
+          "optional": true
+        },
+        "ioredis": {
+          "optional": true
+        },
+        "jsdom": {
+          "optional": true
+        },
+        "llmonitor": {
+          "optional": true
+        },
+        "lodash": {
+          "optional": true
+        },
+        "mammoth": {
+          "optional": true
+        },
+        "mongodb": {
+          "optional": true
+        },
+        "mysql2": {
+          "optional": true
+        },
+        "neo4j-driver": {
+          "optional": true
+        },
+        "node-llama-cpp": {
+          "optional": true
+        },
+        "notion-to-md": {
+          "optional": true
+        },
+        "pdf-parse": {
+          "optional": true
+        },
+        "peggy": {
+          "optional": true
+        },
+        "pg": {
+          "optional": true
+        },
+        "pg-copy-streams": {
+          "optional": true
+        },
+        "pickleparser": {
+          "optional": true
+        },
+        "playwright": {
+          "optional": true
+        },
+        "portkey-ai": {
+          "optional": true
+        },
+        "puppeteer": {
+          "optional": true
+        },
+        "redis": {
+          "optional": true
+        },
+        "replicate": {
+          "optional": true
+        },
+        "sonix-speech-recognition": {
+          "optional": true
+        },
+        "srt-parser-2": {
+          "optional": true
+        },
+        "typeorm": {
+          "optional": true
+        },
+        "typesense": {
+          "optional": true
+        },
+        "usearch": {
+          "optional": true
+        },
+        "vectordb": {
+          "optional": true
+        },
+        "voy-search": {
+          "optional": true
+        },
+        "weaviate-ts-client": {
+          "optional": true
+        },
+        "web-auth-library": {
+          "optional": true
+        },
+        "youtube-transcript": {
+          "optional": true
+        },
+        "youtubei.js": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/langchainhub": {
+      "version": "0.0.6",
+      "resolved": "https://registry.npmjs.org/langchainhub/-/langchainhub-0.0.6.tgz",
+      "integrity": "sha512-SW6105T+YP1cTe0yMf//7kyshCgvCTyFBMTgH2H3s9rTAR4e+78DA/BBrUL/Mt4Q5eMWui7iGuAYb3pgGsdQ9w=="
+    },
+    "node_modules/langsmith": {
+      "version": "0.0.42",
+      "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.0.42.tgz",
+      "integrity": "sha512-sFuN+e7E+pPBIRaRgFqZh/BRBWNHTZNAwi6uj4kydQawooCZYoJmM5snOkiQrhVSvAhgu6xFhLvmfvkPcKzD7w==",
+      "dependencies": {
+        "@types/uuid": "^9.0.1",
+        "commander": "^10.0.1",
+        "p-queue": "^6.6.2",
+        "p-retry": "4",
+        "uuid": "^9.0.0"
+      },
+      "bin": {
+        "langsmith": "dist/cli/main.cjs"
+      }
+    },
+    "node_modules/md5": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz",
+      "integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==",
+      "dependencies": {
+        "charenc": "0.0.2",
+        "crypt": "0.0.2",
+        "is-buffer": "~1.1.6"
+      }
+    },
+    "node_modules/mime-db": {
+      "version": "1.52.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
+      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/mime-types": {
+      "version": "2.1.35",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
+      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
+      "dependencies": {
+        "mime-db": "1.52.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/ml-array-mean": {
+      "version": "1.1.6",
+      "resolved": "https://registry.npmjs.org/ml-array-mean/-/ml-array-mean-1.1.6.tgz",
+      "integrity": "sha512-MIdf7Zc8HznwIisyiJGRH9tRigg3Yf4FldW8DxKxpCCv/g5CafTw0RRu51nojVEOXuCQC7DRVVu5c7XXO/5joQ==",
+      "dependencies": {
+        "ml-array-sum": "^1.1.6"
+      }
+    },
+    "node_modules/ml-array-sum": {
+      "version": "1.1.6",
+      "resolved": "https://registry.npmjs.org/ml-array-sum/-/ml-array-sum-1.1.6.tgz",
+      "integrity": "sha512-29mAh2GwH7ZmiRnup4UyibQZB9+ZLyMShvt4cH4eTK+cL2oEMIZFnSyB3SS8MlsTh6q/w/yh48KmqLxmovN4Dw==",
+      "dependencies": {
+        "is-any-array": "^2.0.0"
+      }
+    },
+    "node_modules/ml-distance": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/ml-distance/-/ml-distance-4.0.1.tgz",
+      "integrity": "sha512-feZ5ziXs01zhyFUUUeZV5hwc0f5JW0Sh0ckU1koZe/wdVkJdGxcP06KNQuF0WBTj8FttQUzcvQcpcrOp/XrlEw==",
+      "dependencies": {
+        "ml-array-mean": "^1.1.6",
+        "ml-distance-euclidean": "^2.0.0",
+        "ml-tree-similarity": "^1.0.0"
+      }
+    },
+    "node_modules/ml-distance-euclidean": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/ml-distance-euclidean/-/ml-distance-euclidean-2.0.0.tgz",
+      "integrity": "sha512-yC9/2o8QF0A3m/0IXqCTXCzz2pNEzvmcE/9HFKOZGnTjatvBbsn4lWYJkxENkA4Ug2fnYl7PXQxnPi21sgMy/Q=="
+    },
+    "node_modules/ml-tree-similarity": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/ml-tree-similarity/-/ml-tree-similarity-1.0.0.tgz",
+      "integrity": "sha512-XJUyYqjSuUQkNQHMscr6tcjldsOoAekxADTplt40QKfwW6nd++1wHWV9AArl0Zvw/TIHgNaZZNvr8QGvE8wLRg==",
+      "dependencies": {
+        "binary-search": "^1.3.5",
+        "num-sort": "^2.0.0"
+      }
+    },
+    "node_modules/ms": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
+    },
+    "node_modules/node-domexception": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
+      "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/jimmywarting"
+        },
+        {
+          "type": "github",
+          "url": "https://paypal.me/jimmywarting"
+        }
+      ],
+      "engines": {
+        "node": ">=10.5.0"
+      }
+    },
+    "node_modules/node-fetch": {
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
+      "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
+      "dependencies": {
+        "whatwg-url": "^5.0.0"
+      },
+      "engines": {
+        "node": "4.x || >=6.0.0"
+      },
+      "peerDependencies": {
+        "encoding": "^0.1.0"
+      },
+      "peerDependenciesMeta": {
+        "encoding": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/num-sort": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/num-sort/-/num-sort-2.1.0.tgz",
+      "integrity": "sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg==",
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/object-hash": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz",
+      "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/openai": {
+      "version": "4.4.0",
+      "resolved": "https://registry.npmjs.org/openai/-/openai-4.4.0.tgz",
+      "integrity": "sha512-JN0t628Kh95T0IrXl0HdBqnlJg+4Vq0Bnh55tio+dfCnyzHvMLiWyCM9m726MAJD2YkDU4/8RQB6rNbEq9ct2w==",
+      "dependencies": {
+        "@types/node": "^18.11.18",
+        "@types/node-fetch": "^2.6.4",
+        "abort-controller": "^3.0.0",
+        "agentkeepalive": "^4.2.1",
+        "digest-fetch": "^1.3.0",
+        "form-data-encoder": "1.7.2",
+        "formdata-node": "^4.3.2",
+        "node-fetch": "^2.6.7"
+      },
+      "bin": {
+        "openai": "bin/cli"
+      }
+    },
+    "node_modules/openapi-types": {
+      "version": "12.1.3",
+      "resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz",
+      "integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw=="
+    },
+    "node_modules/p-finally": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz",
+      "integrity": "sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow==",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/p-queue": {
+      "version": "6.6.2",
+      "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-6.6.2.tgz",
+      "integrity": "sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==",
+      "dependencies": {
+        "eventemitter3": "^4.0.4",
+        "p-timeout": "^3.2.0"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/p-retry": {
+      "version": "4.6.2",
+      "resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz",
+      "integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==",
+      "dependencies": {
+        "@types/retry": "0.12.0",
+        "retry": "^0.13.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/p-timeout": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-3.2.0.tgz",
+      "integrity": "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==",
+      "dependencies": {
+        "p-finally": "^1.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/retry": {
+      "version": "0.13.1",
+      "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz",
+      "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==",
+      "engines": {
+        "node": ">= 4"
+      }
+    },
+    "node_modules/tr46": {
+      "version": "0.0.3",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
+      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
+    },
+    "node_modules/typescript": {
+      "version": "5.2.2",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.2.2.tgz",
+      "integrity": "sha512-mI4WrpHsbCIcwT9cF4FZvr80QUeKvsUsUvKDoR+X/7XHQH98xYD8YHZg7ANtz2GtZt/CBq2QJ0thkGJMHfqc1w==",
+      "dev": true,
+      "bin": {
+        "tsc": "bin/tsc",
+        "tsserver": "bin/tsserver"
+      },
+      "engines": {
+        "node": ">=14.17"
+      }
+    },
+    "node_modules/uuid": {
+      "version": "9.0.1",
+      "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
+      "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
+      "funding": [
+        "https://github.com/sponsors/broofa",
+        "https://github.com/sponsors/ctavan"
+      ],
+      "bin": {
+        "uuid": "dist/bin/uuid"
+      }
+    },
+    "node_modules/web-streams-polyfill": {
+      "version": "4.0.0-beta.3",
+      "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
+      "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==",
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/webidl-conversions": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
+      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
+    },
+    "node_modules/whatwg-url": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
+      "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
+      "dependencies": {
+        "tr46": "~0.0.3",
+        "webidl-conversions": "^3.0.0"
+      }
+    },
+    "node_modules/yaml": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.2.tgz",
+      "integrity": "sha512-N/lyzTPaJasoDmfV7YTrYCI0G/3ivm/9wdG0aHuheKowWQwGTsK0Eoiw6utmzAnI6pkJa0DUVygvp3spqqEKXg==",
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/zod": {
+      "version": "3.22.4",
+      "resolved": "https://registry.npmjs.org/zod/-/zod-3.22.4.tgz",
+      "integrity": "sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==",
+      "funding": {
+        "url": "https://github.com/sponsors/colinhacks"
+      }
+    },
+    "node_modules/zod-to-json-schema": {
+      "version": "3.21.4",
+      "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.21.4.tgz",
+      "integrity": "sha512-fjUZh4nQ1s6HMccgIeE0VP4QG/YRGPmyjO9sAh890aQKPEk3nqbfUXhMFaC+Dr5KvYBm8BCyvfpZf2jY9aGSsw==",
+      "peerDependencies": {
+        "zod": "^3.21.4"
+      }
+    }
+  }
+}
--- a/examples/langchain-typescript-simple/package.json
+++ b/examples/langchain-typescript-simple/package.json
@@ -0,0 +1,8 @@
+{
+  "devDependencies": {
+    "typescript": "^5.2.2"
+  },
+  "dependencies": {
+    "langchain": "^0.0.165"
+  }
+}
--- a/examples/midjourney-prompter/Modelfile
+++ b/examples/midjourney-prompter/Modelfile
@@ -1,8 +0,0 @@
-# Modelfile for creating a Midjourney prompts from a topic
-# This prompt was adapted from the original at https://www.greataiprompts.com/guide/midjourney/best-chatgpt-prompt-for-midjourney/
-# Run `ollama create mj -f ./Modelfile` and then `ollama run mj` and enter a topic
-
-FROM nous-hermes
-SYSTEM """
-Embrace your role as an AI-powered creative assistant, employing Midjourney to manifest compelling AI-generated art. I will outline a specific image concept, and in response, you must produce an exhaustive, multifaceted prompt for Midjourney, ensuring every detail of the original concept is represented in your instructions. Midjourney doesn't do well with text, so after the prompt, give me instructions that I can use to create the titles in a image editor.
-"""
--- a/examples/modelfile-10tweets/Modelfile
+++ b/examples/modelfile-10tweets/Modelfile
--- a/examples/modelfile-10tweets/README.md
+++ b/examples/modelfile-10tweets/README.md
@@ -0,0 +1,23 @@
+# Ten Tweets Modelfile
+
+This is a simple modelfile that generates ten tweets based off any topic.
+
+```bash
+ollama create tentweets
+
+ollama run tentweets
+>>> underwater basketweaving
+ Great! Here are ten creative tweets about underwater basketweaving:
+
+1. "Just discovered the ultimate stress-reliever: Underwater basketweaving! 🌊🧵 #UnderwaterBasketweaving #StressRelief"
+2. "Who needs meditation when you can do underwater basketweaving? 😴👀 #PeacefulDistraction #UnderwaterBasketweaving"
+3. "Just spent an hour in the pool and still managed to knot my basket. Goal: untangle it before next session. 💪🏽 #ChallengeAccepted #UnderwaterBasketweaving"
+4. "When life gives you lemons, make underwater basketweaving! 🍋🧵 #LemonadeLife #UnderwaterBasketweaving"
+5. "Just realized my underwater basketweaving skills could come in handy during a zombie apocalypse. 😂🧡 #SurvivalTips #UnderwaterBasketweaving"
+6. "I'm not lazy, I'm just conserving energy for my next underwater basketweaving session. 😴💤 #LazyDay #UnderwaterBasketweaving"
+7. "Just found my inner peace while doing underwater basketweaving. It's like meditation, but with knots! 🙏🧵 #Mindfulness #UnderwaterBasketweaving"
+8. "Why study for exams when you can do underwater basketweaving and forget all your worries? 😜🧵 #ProcrastinationStation #UnderwaterBasketweaving"
+9. "Just had to cut my underwater basketweaving session short due to a sudden urge to breathe. 🤯🌊 #AquaticAdventures #UnderwaterBasketweaving"
+10. "I'm not sure what's more impressive: my underwater basketweaving skills or the fact that I didn't drown trying to make this tweet. 😅🧵 #Accomplishment 
+#UnderwaterBasketweaving"
+```
--- a/examples/modelfile-devopsengineer/Modelfile
+++ b/examples/modelfile-devopsengineer/Modelfile
@@ -1,7 +1,7 @@
 # Modelfile for creating a devops engineer assistant
 # Run `ollama create devops-engineer -f ./Modelfile` and then `ollama run devops-engineer` and enter a topic

-FROM llama2:13b
+FROM mistral
 PARAMETER temperature 1
 SYSTEM """
 You are a senior devops engineer, acting as an assistant. You offer help with cloud technologies like: Terraform, AWS, kubernetes, python. You answer with code examples when possible
--- a/examples/modelfile-devopsengineer/README.md
+++ b/examples/modelfile-devopsengineer/README.md
@@ -0,0 +1,40 @@
+# DevOps Engineer Modelfile
+
+This is a simple modelfile that takes will respond to any questions that a senior DevOps engineer should know how to solve.
+
+````text
+ollama create devopsengineer
+
+ollama run devopsengineer
+>>> how can i proxy all connections from inside the office through a single server that caches large files. Provide a config file for nginx to achieve this?
+
+Here is an example configuration file for Nginx that can be used to proxy all connections from inside the office network to a centralized cache server:
+```perl
+http {
+    upstream cache_server {
+        server <cache_server_ip>;
+        server <cache_server_ip>;
+        server <cache_server_ip>;
+    }
+    
+    server {
+        listen 80;
+        server_name localhost;
+        
+        location / {
+            proxy_pass http://cache_server;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        }
+    }
+}
+```
+In this configuration, we define an `upstream` block that lists the IP addresses of the cache servers. We then define a server block that listens on port 80 and routes all 
+traffic to the `/` location to the cache servers using the `proxy_pass` directive. The `proxy_set_header` directives are used to preserve the source IP address of the client
+request when forwarding it to the cache server.
+
+To use this configuration, you would need to replace the placeholder `<cache_server_ip>` with the actual IP addresses of your cache servers. You would also need to make sure
+that the cache servers are configured to accept incoming connections from the Nginx server and handle requests for files.
+
+````
--- a/examples/modelfile-mario/Modelfile
+++ b/examples/modelfile-mario/Modelfile
--- a/examples/modelfile-mario/logo.png
+++ b/examples/modelfile-mario/logo.png
--- a/examples/modelfile-mario/readme.md
+++ b/examples/modelfile-mario/readme.md
--- a/examples/modelfile-midjourney/Modelfile
+++ b/examples/modelfile-midjourney/Modelfile
@@ -0,0 +1,11 @@
+# Modelfile for creating a Midjourney prompts from a topic
+# This prompt was adapted from the original at https://www.greataiprompts.com/guide/midjourney/best-chatgpt-prompt-for-midjourney/
+# Run `ollama create mj -f ./Modelfile` and then `ollama run mj` and enter a topic
+
+FROM zephyr
+PARAMETER temperature 0.8
+PARAMETER top_k 500
+PARAMETER top_p 0.9
+SYSTEM """
+Embrace your role as a creative illustrator. Based on a concept provided, you must produce a single paragraph with a multifaceted description of an image, ensuring significant details of the concept and more is represented in your instructions. You do not need to write complete sentences but rather short concepts with the following information: the level of detail that should be represented, an artistic style and maybe a specific name of a painter or illustrator, the ideal color pallete, lighting, mood, perspective, the setting, time of day, weather, the season, the time period, location, materials, the textures, patterns, lines, brushstrokes, techniques, the medium, the genre, the rendering style. Don't include everything and keep the description length under 250 words. 
+"""
--- a/examples/modelfile-midjourney/README.md
+++ b/examples/modelfile-midjourney/README.md
@@ -0,0 +1,11 @@
+# Midjourney Prompt Generator Modelfile
+
+This simple modelfile will help create a prompt to feed to Midjourney.
+
+```text
+ollama create midjourney
+
+ollama run midjourney
+>>> a sports car in the mountains. 
+A sleek, high-performance automobile cuts through a serpentine mountain landscape. The concept is a classic illustration of speed and power, depicted in the style of pop art by Andy Warhol. The color palette is dominated by bold, primary hues of red, blue, and yellow, with striking accent colors of white, black, and metallic shades. The lighting is bright and focused, casting sharp shadows on the rugged terrain. A sense of excitement and anticipation permeates throughout the scene, as the car navigates a treacherous course through the winding road. The perspective is low, allowing for a full view of the vehicle's sleek lines and intricate details. The setting takes place in the afternoon during a sunny day in autumn, as evidenced by the vibrant foliage on the mountainside. The time period is modern, with nods to classic car design. The materials are primarily digital, allowing for smooth curves and sharp contrasts. The textures are sleek and polished, with meticulously detailed lines and brushstrokes that accentuate the car's aerodynamic design. The patterns consist of geometric shapes and bold stripes, adding to the car's dynamic appeal. The genre is modern realism, with a focus on precision and detail. The rendering style is highly technical, capturing the nuances and subtleties of the vehicle and its surroundings in breathtaking detail.
+```
--- a/examples/modelfile-recipemaker/Modelfile
+++ b/examples/modelfile-recipemaker/Modelfile
--- a/examples/modelfile-recipemaker/README.md
+++ b/examples/modelfile-recipemaker/README.md
@@ -0,0 +1,20 @@
+# Recipe Maker Modelfile 
+
+Simple modelfile to generate a recipe from a short list of ingredients.
+
+```
+ollama create recipemaker
+
+ollama run recipemaker
+>>> chilli pepper, white chocolate, kale
+ Ingredients:
+- 1 small chili pepper
+- 4 squares of white chocolate
+- handful of kale leaves
+
+Instructions:
+1. In a blender or food processor, puree the chilies and white chocolate until smooth.
+2. Add the chopped kale leaves to the blender and pulse until well combined.
+3. Serve immediately as a dip for crackers or use it as an ingredient in your favorite recipe. The mixture of spicy chili pepper with sweet white chocolate and nutritious 
+kale will make your taste buds dance with delight!
+```
--- a/examples/modelfile-sentiments/Modelfile
+++ b/examples/modelfile-sentiments/Modelfile
--- a/examples/modelfile-sentiments/Readme.md
+++ b/examples/modelfile-sentiments/Readme.md
--- a/examples/modelfile-tweetwriter/Modelfile
+++ b/examples/modelfile-tweetwriter/Modelfile
--- a/examples/privategpt/requirements.txt
+++ b/examples/privategpt/requirements.txt
--- a/examples/python-dockerit/Modelfile
+++ b/examples/python-dockerit/Modelfile
@@ -1,4 +1,4 @@
-FROM llama2
+FROM mistral
 SYSTEM """
 You are an experienced Devops engineer focused on docker. When given specifications for a particular need or application you know the best way to host that within a docker container. For instance if someone tells you they want an nginx server to host files located at /web you will answer as follows

--- a/examples/python-dockerit/README.md
+++ b/examples/python-dockerit/README.md
--- a/examples/python-dockerit/dockerit.py
+++ b/examples/python-dockerit/dockerit.py
--- a/examples/python-dockerit/requirements.txt
+++ b/examples/python-dockerit/requirements.txt
--- a/examples/python-rag-newssummary/README.md
+++ b/examples/python-rag-newssummary/README.md
@@ -0,0 +1,22 @@
+# News Summarizer
+
+This example goes through a series of steps:
+
+  1. You choose a topic area (e.g., "news", "NVidia", "music", etc.).
+  2. Gets the most recent articles on that topic from various sources.
+  3. Uses Ollama to summarize each article.
+  4. Creates chunks of sentences from each article.
+  5. Uses Sentence Transformers to generate embeddings for each of those chunks.
+  6. You enter a question regarding the summaries shown.
+  7. Uses Sentence Transformers to generate an embedding for that question.
+  8. Uses the embedded question to find the most similar chunks.
+  9. Feeds all that to Ollama to generate a good answer to your question based on these news articles.
+
+This example lets you pick from a few different topic areas, then summarize the most recent x articles for that topic. It then creates chunks of sentences from each article and then generates embeddings for each of those chunks.
+
+You can run the example like this:
+
+```bash
+pip install -r requirements.txt
+python summ.py
+```
--- a/examples/python-rag-newssummary/requirements.txt
+++ b/examples/python-rag-newssummary/requirements.txt
@@ -0,0 +1,9 @@
+beautifulsoup4==4.12.2
+feedparser==6.0.10
+mattsollamatools==0.0.8
+newspaper3k==0.2.8
+nltk==3.8.1
+numpy==1.24.3
+Requests==2.31.0
+scikit_learn==1.3.0
+sentence_transformers==2.2.2
--- a/examples/python-rag-newssummary/summ.py
+++ b/examples/python-rag-newssummary/summ.py
@@ -0,0 +1,86 @@
+import curses
+import json
+from utils import get_url_for_topic, topic_urls, menu, getUrls, get_summary, getArticleText, knn_search
+import requests
+from sentence_transformers import SentenceTransformer
+from mattsollamatools import chunker
+
+if __name__ == "__main__":
+    chosen_topic = curses.wrapper(menu)
+    print("Here is your news summary:\n")
+    urls = getUrls(chosen_topic, n=5)
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+    allEmbeddings = []
+
+    for url in urls:
+      article={}
+      article['embeddings'] = []
+      article['url'] = url
+      text = getArticleText(url)
+      summary = get_summary(text)
+      chunks = chunker(text)  # Use the chunk_text function from web_utils
+      embeddings = model.encode(chunks)
+      for (chunk, embedding) in zip(chunks, embeddings):
+        item = {}
+        item['source'] = chunk
+        item['embedding'] = embedding.tolist()  # Convert NumPy array to list
+        item['sourcelength'] = len(chunk)
+        article['embeddings'].append(item)
+    
+      allEmbeddings.append(article)
+
+      print(f"{summary}\n")
+
+    
+    while True:
+      context = []
+      # Input a question from the user
+      question = input("Enter your question about the news, or type quit: ")
+
+      if question.lower() == 'quit':
+        break
+
+      # Embed the user's question
+      question_embedding = model.encode([question])
+
+      # Perform KNN search to find the best matches (indices and source text)
+      best_matches = knn_search(question_embedding, allEmbeddings, k=10)
+
+
+      sourcetext=""
+      for i, (index, source_text) in enumerate(best_matches, start=1):
+          sourcetext += f"{i}. Index: {index}, Source Text: {source_text}"
+
+      systemPrompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"
+
+      url = "http://localhost:11434/api/generate"
+
+      payload = {
+      "model": "mistral-openorca",
+      "prompt": question, 
+      "system": systemPrompt,
+      "stream": False, 
+      "context": context
+      }
+
+      # Convert the payload to a JSON string
+      payload_json = json.dumps(payload)
+
+      # Set the headers to specify JSON content
+      headers = {
+          "Content-Type": "application/json"
+      }
+
+      # Send the POST request
+      response = requests.post(url, data=payload_json, headers=headers)
+
+      # Check the response
+      if response.status_code == 200:
+          output = json.loads(response.text)
+          context = output['context']
+          print(output['response']+ "\n")
+          
+
+      else:
+          print(f"Request failed with status code {response.status_code}")
+
--- a/examples/python-rag-newssummary/utils.py
+++ b/examples/python-rag-newssummary/utils.py
@@ -0,0 +1,108 @@
+import curses
+import feedparser
+import requests
+import unicodedata
+import json
+from newspaper import Article
+from bs4 import BeautifulSoup
+from nltk.tokenize import sent_tokenize, word_tokenize
+import numpy as np
+from sklearn.neighbors import NearestNeighbors
+from mattsollamatools import chunker
+
+# Create a dictionary to store topics and their URLs
+topic_urls = {
+    "Mac": "https://9to5mac.com/guides/mac/feed",
+    "News": "http://www.npr.org/rss/rss.php?id=1001",
+    "Nvidia": "https://nvidianews.nvidia.com/releases.xml",
+    "Raspberry Pi": "https://www.raspberrypi.com/news/feed/", 
+    "Music": "https://www.billboard.com/c/music/music-news/feed/"
+}
+
+# Use curses to create a menu of topics
+def menu(stdscr):
+    chosen_topic = get_url_for_topic(stdscr)  
+    url = topic_urls[chosen_topic] if chosen_topic in topic_urls else "Topic not found"
+    
+    stdscr.addstr(len(topic_urls) + 3, 0, f"Selected URL for {chosen_topic}: {url}")
+    stdscr.refresh()
+    
+    return chosen_topic
+
+# You have chosen a topic. Now return the url for that topic
+def get_url_for_topic(stdscr):
+    curses.curs_set(0)  # Hide the cursor
+    stdscr.clear()
+
+    stdscr.addstr(0, 0, "Choose a topic using the arrow keys (Press Enter to select):")
+
+    # Create a list of topics
+    topics = list(topic_urls.keys())
+    current_topic = 0
+
+    while True:
+        for i, topic in enumerate(topics):
+            if i == current_topic:
+                stdscr.addstr(i + 2, 2, f"> {topic}")
+            else:
+                stdscr.addstr(i + 2, 2, f"  {topic}")
+
+        stdscr.refresh()
+
+        key = stdscr.getch()
+
+        if key == curses.KEY_DOWN and current_topic < len(topics) - 1:
+            current_topic += 1
+        elif key == curses.KEY_UP and current_topic > 0:
+            current_topic -= 1
+        elif key == 10:  # Enter key
+            return topic_urls[topics[current_topic]]
+
+# Get the last N URLs from an RSS feed
+def getUrls(feed_url, n=20):
+    feed = feedparser.parse(feed_url)
+    entries = feed.entries[-n:]
+    urls = [entry.link for entry in entries]
+    return urls
+
+# Often there are a bunch of ads and menus on pages for a news article. This uses newspaper3k to get just the text of just the article.
+def getArticleText(url):
+  article = Article(url)
+  article.download()
+  article.parse()
+  return article.text
+
+def get_summary(text):
+  systemPrompt = "Write a concise summary of the text, return your responses with 5 lines that cover the key points of the text given."
+  prompt = text
+  
+  url = "http://localhost:11434/api/generate"
+
+  payload = {
+    "model": "mistral-openorca",
+    "prompt": prompt, 
+    "system": systemPrompt,
+    "stream": False
+  }
+  payload_json = json.dumps(payload)
+  headers = {"Content-Type": "application/json"}
+  response = requests.post(url, data=payload_json, headers=headers)
+
+  return json.loads(response.text)["response"]
+
+# Perform K-nearest neighbors (KNN) search
+def knn_search(question_embedding, embeddings, k=5):
+    X = np.array([item['embedding'] for article in embeddings for item in article['embeddings']])
+    source_texts = [item['source'] for article in embeddings for item in article['embeddings']]
+    
+    # Fit a KNN model on the embeddings
+    knn = NearestNeighbors(n_neighbors=k, metric='cosine')
+    knn.fit(X)
+    
+    # Find the indices and distances of the k-nearest neighbors
+    distances, indices = knn.kneighbors(question_embedding, n_neighbors=k)
+    
+    # Get the indices and source texts of the best matches
+    best_matches = [(indices[0][i], source_texts[indices[0][i]]) for i in range(k)]
+    
+    return best_matches
--- a/examples/python-simplegenerate/client.py
+++ b/examples/python-simplegenerate/client.py
@@ -17,7 +17,7 @@ def generate(prompt, context):
    for line in r.iter_lines():
        body = json.loads(line)
        response_part = body.get('response', '')
-        # the response streams one token at a time, print that as we recieve it
+        # the response streams one token at a time, print that as we receive it
        print(response_part, end='', flush=True)

        if 'error' in body:
@@ -35,4 +35,4 @@ def main():
        print()

 if __name__ == "__main__":
-    main()
+    main()
--- a/examples/typescript-mentors/.gitignore
+++ b/examples/typescript-mentors/.gitignore
@@ -0,0 +1,2 @@
+node_modules
+package-lock.json
--- a/examples/typescript-mentors/README.md
+++ b/examples/typescript-mentors/README.md
@@ -0,0 +1,21 @@
+# Ask the Mentors
+
+This example demonstrates how one would create a set of 'mentors' you can have a conversation with. The mentors are generated using the `character-generator.ts` file. This will use **Stable Beluga 70b** to create a bio and list of verbal ticks and common phrases used by each person. Then `mentors.ts` will take a question, and choose three of the 'mentors' and start a conversation with them. Occasionally, they will talk to each other, and other times they will just deliver a set of monologues. It's fun to see what they do and say.
+
+## Usage
+
+```bash
+ts-node ./character-generator.ts "Lorne Greene"
+```
+
+This will create `lornegreene/Modelfile`. Now you can create a model with this command:
+
+```bash
+ollama create lornegreene -f lornegreene/Modelfile
+```
+
+If you want to add your own mentors, you will have to update the code to look at your namespace instead of **mattw**. Also set the list of mentors to include yours.
+
+```bash
+ts-node ./mentors.ts "What is a Jackalope?"
+```
--- a/examples/typescript-mentors/character-generator.ts
+++ b/examples/typescript-mentors/character-generator.ts
@@ -0,0 +1,26 @@
+import { Ollama } from 'ollama-node'
+import fs from 'fs';
+import path from 'path';
+
+async function characterGenerator() {
+  const character = process.argv[2];
+  console.log(`You are creating a character for ${character}.`);
+  const foldername = character.replace(/\s/g, '').toLowerCase();
+  const directory = path.join(__dirname, foldername);
+  if (!fs.existsSync(directory)) {
+    fs.mkdirSync(directory, { recursive: true });
+  }
+
+  const ollama = new Ollama();
+  ollama.setModel("stablebeluga2:70b-q4_K_M");
+  const bio = await ollama.generate(`create a bio of ${character} in a single long paragraph. Instead of saying '${character} is...' or '${character} was...' use language like 'You are...' or 'You were...'. Then create a paragraph describing the speaking mannerisms and style of ${character}. Don't include anything about how ${character} looked or what they sounded like, just focus on the words they said. Instead of saying '${character} would say...' use language like 'You should say...'. If you use quotes, always use single quotes instead of double quotes. If there are any specific words or phrases you used a lot, show how you used them. `);
+
+  const thecontents = `FROM llama2\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`;
+
+  fs.writeFile(path.join(directory, 'Modelfile'), thecontents, (err: any) => {
+    if (err) throw err;
+    console.log('The file has been saved!');
+  });
+}
+
+characterGenerator();
--- a/examples/typescript-mentors/mentors.ts
+++ b/examples/typescript-mentors/mentors.ts
@@ -0,0 +1,59 @@
+import { Ollama } from 'ollama-node';
+
+const mentorCount = 3;
+const ollama = new Ollama();
+
+function getMentors(): string[] {
+  const mentors = ['Gary Vaynerchuk', 'Kanye West', 'Martha Stewart', 'Neil deGrasse Tyson', 'Owen Wilson', 'Ronald Reagan', 'Donald Trump', 'Barack Obama', 'Jeff Bezos'];
+  const chosenMentors: string[] = [];
+  for (let i = 0; i < mentorCount; i++) {
+    const mentor = mentors[Math.floor(Math.random() * mentors.length)];
+    chosenMentors.push(mentor);
+    mentors.splice(mentors.indexOf(mentor), 1);
+  }
+  return chosenMentors;
+}
+
+function getMentorFileName(mentor: string): string {
+  const model = mentor.toLowerCase().replace(/\s/g, '');
+  return `mattw/${model}`;
+}
+
+async function getSystemPrompt(mentor: string, isLast: boolean, question: string): Promise<string> {
+  ollama.setModel(getMentorFileName(mentor));
+  const info = await ollama.showModelInfo()
+  let SystemPrompt = info.system || '';
+  SystemPrompt += ` You should continue the conversation as if you were ${mentor} and acknowledge the people before you in the conversation. You should adopt their mannerisms and tone, but also not use language they wouldn't use. If they are not known to know about the concept in the question, don't offer an answer. Your answer should be no longer than 1 paragraph. And definitely try not to sound like anyone else. Don't repeat any slang or phrases already used. And if it is a question the original ${mentor} wouldn't have know the answer to, just say that you don't know, in the style of ${mentor}. And think about the time the person lived. Don't use terminology that they wouldn't have used.`
+
+  if (isLast) {
+    SystemPrompt += ` End your answer with something like I hope our answers help you out`;
+  } else {
+    SystemPrompt += ` Remember, this is a conversation, so you don't need a conclusion, but end your answer with a question related to the first question: "${question}".`;
+  }
+  return SystemPrompt;
+}
+
+async function main() {
+  const mentors = getMentors();
+  const question = process.argv[2];
+  let theConversation = `Here is the conversation so far.\nYou: ${question}\n`
+
+  for await (const mentor of mentors) {
+    const SystemPrompt = await getSystemPrompt(mentor, mentor === mentors[mentorCount - 1], question);
+    ollama.setModel(getMentorFileName(mentor));
+    ollama.setSystemPrompt(SystemPrompt);
+    let output = '';
+    process.stdout.write(`\n${mentor}: `);
+    for await (const chunk of ollama.streamingGenerate(theConversation + `Continue the conversation as if you were ${mentor} on the question "${question}".`)) {
+      if (chunk.response) {
+        output += chunk.response;
+        process.stdout.write(chunk.response);
+      } else {
+        process.stdout.write('\n');
+      }
+    }
+    theConversation += `${mentor}: ${output}\n\n`
+  }
+}
+
+main();
--- a/examples/typescript-mentors/package.json
+++ b/examples/typescript-mentors/package.json
@@ -0,0 +1,7 @@
+{
+  "dependencies": {
+    "fs": "^0.0.1-security",
+    "ollama-node": "^0.0.3",
+    "path": "^0.12.7"
+  }
+}
--- a/format/bytes.go
+++ b/format/bytes.go
@@ -0,0 +1,23 @@
+package format
+
+import "fmt"
+
+const (
+	Byte     = 1
+	KiloByte = Byte * 1000
+	MegaByte = KiloByte * 1000
+	GigaByte = MegaByte * 1000
+)
+
+func HumanBytes(b int64) string {
+	switch {
+	case b > GigaByte:
+		return fmt.Sprintf("%d GB", b/GigaByte)
+	case b > MegaByte:
+		return fmt.Sprintf("%d MB", b/MegaByte)
+	case b > KiloByte:
+		return fmt.Sprintf("%d KB", b/KiloByte)
+	default:
+		return fmt.Sprintf("%d B", b)
+	}
+}
--- a/format/format.go
+++ b/format/format.go
@@ -0,0 +1,25 @@
+package format
+
+import (
+	"fmt"
+	"math"
+)
+
+const (
+	Thousand = 1000
+	Million  = Thousand * 1000
+	Billion  = Million * 1000
+)
+
+func HumanNumber(b uint64) string {
+	switch {
+	case b > Billion:
+		return fmt.Sprintf("%.0fB", math.Round(float64(b)/Billion))
+	case b > Million:
+		return fmt.Sprintf("%.0fM", math.Round(float64(b)/Million))
+	case b > Thousand:
+		return fmt.Sprintf("%.0fK", math.Round(float64(b)/Thousand))
+	default:
+		return fmt.Sprintf("%d", b)
+	}
+}
--- a/format/time.go
+++ b/format/time.go
@@ -7,26 +7,14 @@ import (
 	"time"
 )

-// HumanDuration returns a human-readable approximation of a duration
-// (eg. "About a minute", "4 hours ago", etc.).
-// Modified version of github.com/docker/go-units.HumanDuration
-func HumanDuration(d time.Duration) string {
-	return HumanDurationWithCase(d, true)
-}
-
-// HumanDurationWithCase returns a human-readable approximation of a
-// duration (eg. "About a minute", "4 hours ago", etc.). but allows
-// you to specify whether the first word should be capitalized
-// (eg. "About" vs. "about")
-func HumanDurationWithCase(d time.Duration, useCaps bool) string {
+// humanDuration returns a human-readable approximation of a
+// duration (eg. "About a minute", "4 hours ago", etc.).
+func humanDuration(d time.Duration) string {
 	seconds := int(d.Seconds())

 	switch {
 	case seconds < 1:
-		if useCaps {
-			return "Less than a second"
-		}
-		return "less than a second"
+		return "Less than a second"
 	case seconds == 1:
 		return "1 second"
 	case seconds < 60:
@@ -36,10 +24,7 @@ func HumanDurationWithCase(d time.Duration, useCaps bool) string {
 	minutes := int(d.Minutes())
 	switch {
 	case minutes == 1:
-		if useCaps {
-			return "About a minute"
-		}
-		return "about a minute"
+		return "About a minute"
 	case minutes < 60:
 		return fmt.Sprintf("%d minutes", minutes)
 	}
@@ -47,10 +32,7 @@ func HumanDurationWithCase(d time.Duration, useCaps bool) string {
 	hours := int(math.Round(d.Hours()))
 	switch {
 	case hours == 1:
-		if useCaps {
-			return "About an hour"
-		}
-		return "about an hour"
+		return "About an hour"
 	case hours < 48:
 		return fmt.Sprintf("%d hours", hours)
 	case hours < 24*7*2:
@@ -65,77 +47,22 @@ func HumanDurationWithCase(d time.Duration, useCaps bool) string {
 }

 func HumanTime(t time.Time, zeroValue string) string {
-	return humanTimeWithCase(t, zeroValue, true)
+	return humanTime(t, zeroValue)
 }

 func HumanTimeLower(t time.Time, zeroValue string) string {
-	return humanTimeWithCase(t, zeroValue, false)
+	return strings.ToLower(humanTime(t, zeroValue))
 }

-func humanTimeWithCase(t time.Time, zeroValue string, useCaps bool) string {
+func humanTime(t time.Time, zeroValue string) string {
 	if t.IsZero() {
 		return zeroValue
 	}

 	delta := time.Since(t)
 	if delta < 0 {
-		return HumanDurationWithCase(-delta, useCaps) + " from now"
+		return humanDuration(-delta) + " from now"
 	}
-	return HumanDurationWithCase(delta, useCaps) + " ago"
-}
-
-// ExcatDuration returns a human readable hours/minutes/seconds or milliseconds format of a duration
-// the most precise level of duration is milliseconds
-func ExactDuration(d time.Duration) string {
-	if d.Seconds() < 1 {
-		if d.Milliseconds() == 1 {
-			return fmt.Sprintf("%d millisecond", d.Milliseconds())
-		}
-		return fmt.Sprintf("%d milliseconds", d.Milliseconds())
-	}
-
-	var readableDur strings.Builder
-
-	dur := d.String()
-
-	// split the default duration string format of 0h0m0s into something nicer to read
-	h := strings.Split(dur, "h")
-	if len(h) > 1 {
-		hours := h[0]
-		if hours == "1" {
-			readableDur.WriteString(fmt.Sprintf("%s hour ", hours))
-		} else {
-			readableDur.WriteString(fmt.Sprintf("%s hours ", hours))
-		}
-		dur = h[1]
-	}
-
-	m := strings.Split(dur, "m")
-	if len(m) > 1 {
-		mins := m[0]
-		switch mins {
-		case "0":
-			// skip
-		case "1":
-			readableDur.WriteString(fmt.Sprintf("%s minute ", mins))
-		default:
-			readableDur.WriteString(fmt.Sprintf("%s minutes ", mins))
-		}
-		dur = m[1]
-	}
-
-	s := strings.Split(dur, "s")
-	if len(s) > 0 {
-		sec := s[0]
-		switch sec {
-		case "0":
-			// skip
-		case "1":
-			readableDur.WriteString(fmt.Sprintf("%s second ", sec))
-		default:
-			readableDur.WriteString(fmt.Sprintf("%s seconds ", sec))
-		}
-	}
-
-	return strings.TrimSpace(readableDur.String())
+
+	return humanDuration(delta) + " ago"
 }
--- a/format/time_test.go
+++ b/format/time_test.go
@@ -11,92 +11,25 @@ func assertEqual(t *testing.T, a interface{}, b interface{}) {
 	}
 }

-func TestHumanDuration(t *testing.T) {
-	day := 24 * time.Hour
-	week := 7 * day
-	month := 30 * day
-	year := 365 * day
-
-	assertEqual(t, "Less than a second", HumanDuration(450*time.Millisecond))
-	assertEqual(t, "Less than a second", HumanDurationWithCase(450*time.Millisecond, true))
-	assertEqual(t, "less than a second", HumanDurationWithCase(450*time.Millisecond, false))
-	assertEqual(t, "1 second", HumanDuration(1*time.Second))
-	assertEqual(t, "45 seconds", HumanDuration(45*time.Second))
-	assertEqual(t, "46 seconds", HumanDuration(46*time.Second))
-	assertEqual(t, "59 seconds", HumanDuration(59*time.Second))
-	assertEqual(t, "About a minute", HumanDuration(60*time.Second))
-	assertEqual(t, "About a minute", HumanDurationWithCase(1*time.Minute, true))
-	assertEqual(t, "about a minute", HumanDurationWithCase(1*time.Minute, false))
-	assertEqual(t, "3 minutes", HumanDuration(3*time.Minute))
-	assertEqual(t, "35 minutes", HumanDuration(35*time.Minute))
-	assertEqual(t, "35 minutes", HumanDuration(35*time.Minute+40*time.Second))
-	assertEqual(t, "45 minutes", HumanDuration(45*time.Minute))
-	assertEqual(t, "45 minutes", HumanDuration(45*time.Minute+40*time.Second))
-	assertEqual(t, "46 minutes", HumanDuration(46*time.Minute))
-	assertEqual(t, "59 minutes", HumanDuration(59*time.Minute))
-	assertEqual(t, "About an hour", HumanDuration(1*time.Hour))
-	assertEqual(t, "About an hour", HumanDurationWithCase(1*time.Hour+29*time.Minute, true))
-	assertEqual(t, "about an hour", HumanDurationWithCase(1*time.Hour+29*time.Minute, false))
-	assertEqual(t, "2 hours", HumanDuration(1*time.Hour+31*time.Minute))
-	assertEqual(t, "2 hours", HumanDuration(1*time.Hour+59*time.Minute))
-	assertEqual(t, "3 hours", HumanDuration(3*time.Hour))
-	assertEqual(t, "3 hours", HumanDuration(3*time.Hour+29*time.Minute))
-	assertEqual(t, "4 hours", HumanDuration(3*time.Hour+31*time.Minute))
-	assertEqual(t, "4 hours", HumanDuration(3*time.Hour+59*time.Minute))
-	assertEqual(t, "4 hours", HumanDuration(3*time.Hour+60*time.Minute))
-	assertEqual(t, "24 hours", HumanDuration(24*time.Hour))
-	assertEqual(t, "36 hours", HumanDuration(1*day+12*time.Hour))
-	assertEqual(t, "2 days", HumanDuration(2*day))
-	assertEqual(t, "7 days", HumanDuration(7*day))
-	assertEqual(t, "13 days", HumanDuration(13*day+5*time.Hour))
-	assertEqual(t, "2 weeks", HumanDuration(2*week))
-	assertEqual(t, "2 weeks", HumanDuration(2*week+4*day))
-	assertEqual(t, "3 weeks", HumanDuration(3*week))
-	assertEqual(t, "4 weeks", HumanDuration(4*week))
-	assertEqual(t, "4 weeks", HumanDuration(4*week+3*day))
-	assertEqual(t, "4 weeks", HumanDuration(1*month))
-	assertEqual(t, "6 weeks", HumanDuration(1*month+2*week))
-	assertEqual(t, "2 months", HumanDuration(2*month))
-	assertEqual(t, "2 months", HumanDuration(2*month+2*week))
-	assertEqual(t, "3 months", HumanDuration(3*month))
-	assertEqual(t, "3 months", HumanDuration(3*month+1*week))
-	assertEqual(t, "5 months", HumanDuration(5*month+2*week))
-	assertEqual(t, "13 months", HumanDuration(13*month))
-	assertEqual(t, "23 months", HumanDuration(23*month))
-	assertEqual(t, "24 months", HumanDuration(24*month))
-	assertEqual(t, "2 years", HumanDuration(24*month+2*week))
-	assertEqual(t, "3 years", HumanDuration(3*year+2*month))
-}
-
 func TestHumanTime(t *testing.T) {
 	now := time.Now()

 	t.Run("zero value", func(t *testing.T) {
 		assertEqual(t, HumanTime(time.Time{}, "never"), "never")
 	})
+
 	t.Run("time in the future", func(t *testing.T) {
 		v := now.Add(48 * time.Hour)
 		assertEqual(t, HumanTime(v, ""), "2 days from now")
 	})
+
 	t.Run("time in the past", func(t *testing.T) {
 		v := now.Add(-48 * time.Hour)
 		assertEqual(t, HumanTime(v, ""), "2 days ago")
 	})
-}

-func TestExactDuration(t *testing.T) {
-	assertEqual(t, "1 millisecond", ExactDuration(1*time.Millisecond))
-	assertEqual(t, "10 milliseconds", ExactDuration(10*time.Millisecond))
-	assertEqual(t, "1 second", ExactDuration(1*time.Second))
-	assertEqual(t, "10 seconds", ExactDuration(10*time.Second))
-	assertEqual(t, "1 minute", ExactDuration(1*time.Minute))
-	assertEqual(t, "10 minutes", ExactDuration(10*time.Minute))
-	assertEqual(t, "1 hour", ExactDuration(1*time.Hour))
-	assertEqual(t, "10 hours", ExactDuration(10*time.Hour))
-	assertEqual(t, "1 hour 1 second", ExactDuration(1*time.Hour+1*time.Second))
-	assertEqual(t, "1 hour 10 seconds", ExactDuration(1*time.Hour+10*time.Second))
-	assertEqual(t, "1 hour 1 minute", ExactDuration(1*time.Hour+1*time.Minute))
-	assertEqual(t, "1 hour 10 minutes", ExactDuration(1*time.Hour+10*time.Minute))
-	assertEqual(t, "1 hour 1 minute 1 second", ExactDuration(1*time.Hour+1*time.Minute+1*time.Second))
-	assertEqual(t, "10 hours 10 minutes 10 seconds", ExactDuration(10*time.Hour+10*time.Minute+10*time.Second))
+	t.Run("soon", func(t *testing.T) {
+		v := now.Add(800 * time.Millisecond)
+		assertEqual(t, HumanTime(v, ""), "Less than a second from now")
+	})
 }
--- a/go.mod
+++ b/go.mod
@@ -4,12 +4,13 @@ go 1.20

 require (
 	github.com/dustin/go-humanize v1.0.1
+	github.com/emirpasic/gods v1.18.1
 	github.com/gin-gonic/gin v1.9.1
 	github.com/mattn/go-runewidth v0.0.14
 	github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db
 	github.com/olekukonko/tablewriter v0.0.5
-	github.com/pdevine/readline v1.5.2
 	github.com/spf13/cobra v1.7.0
+	golang.org/x/sync v0.3.0
 )

 require github.com/rivo/uniseg v0.2.0 // indirect
@@ -38,13 +39,12 @@ require (
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
 	github.com/ugorji/go/codec v1.2.11 // indirect
 	golang.org/x/arch v0.3.0 // indirect
-	golang.org/x/crypto v0.10.0
+	golang.org/x/crypto v0.14.0
 	golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63
-	golang.org/x/net v0.10.0 // indirect
-	golang.org/x/sys v0.11.0 // indirect
-	golang.org/x/term v0.10.0
-	golang.org/x/text v0.10.0 // indirect
-	gonum.org/v1/gonum v0.13.0
+	golang.org/x/net v0.17.0 // indirect
+	golang.org/x/sys v0.13.0 // indirect
+	golang.org/x/term v0.13.0
+	golang.org/x/text v0.13.0 // indirect
 	google.golang.org/protobuf v1.30.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -4,10 +4,6 @@ github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZX
 github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
 github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams=
 github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
-github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM=
-github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ=
-github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04=
-github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -15,6 +11,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
+github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
+github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
 github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
 github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
 github.com/gin-contrib/cors v1.4.0 h1:oJ6gwtUl3lqV0WEIwM/LxPF1QZ5qe2lGWdY2+bz7y0g=
@@ -78,8 +76,6 @@ github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N
 github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
 github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
 github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
-github.com/pdevine/readline v1.5.2 h1:oz6Y5GdTmhPG+08hhxcAvtHitSANWuA2100Sppb38xI=
-github.com/pdevine/readline v1.5.2/go.mod h1:na/LbuE5PYwxI7GyopWdIs3U8HVe89lYlNTFTXH3wOw=
 github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo=
 github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
 github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
@@ -118,33 +114,32 @@ golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUu
 golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=
 golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
 golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
-golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
+golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
+golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
 golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
 golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
-golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
+golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
+golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
-golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
+golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c=
-golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o=
+golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
+golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58=
-golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-gonum.org/v1/gonum v0.13.0 h1:a0T3bh+7fhRyqeNbiC3qVHYmkiQgit3wnNan/2c0HMM=
-gonum.org/v1/gonum v0.13.0/go.mod h1:/WPYRckkfWrhWefxyYTfrTtQR0KH4iyHNuzxqXAKyAU=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
 google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
--- a/llm/falcon.go
+++ b/llm/falcon.go
@@ -1,7 +1,5 @@
 package llm

-const ModelFamilyFalcon = "falcon"
-
 const (
 	falconModelType7B   = 32
 	falconModelType40B  = 60
@@ -17,6 +15,6 @@ func falconModelType(numLayer uint32) string {
 	case 80:
 		return "180B"
 	default:
-		return "Unknown"
+		return "unknown"
 	}
 }
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -69,7 +69,7 @@ func fileType(fileType uint32) string {
 	case fileTypeQ6_K:
 		return "Q6_K"
 	default:
-		return "Unknown"
+		return "unknown"
 	}
 }

@@ -77,6 +77,7 @@ type model interface {
 	ModelFamily() string
 	ModelType() string
 	FileType() string
+	NumLayers() int64
 }

 type container interface {
@@ -174,7 +175,8 @@ const (
 	// Magic constant for `ggla` files (LoRA adapter).
 	FILE_MAGIC_GGLA = 0x67676C61
 	// Magic constant for `gguf` files (versioned, gguf)
-	FILE_MAGIC_GGUF = 0x46554747
+	FILE_MAGIC_GGUF_LE = 0x46554747
+	FILE_MAGIC_GGUF_BE = 0x47475546
 )

 func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
@@ -190,8 +192,10 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
 		ggml.container = &containerGGJT{}
 	case FILE_MAGIC_GGLA:
 		ggml.container = &containerLORA{}
-	case FILE_MAGIC_GGUF:
-		ggml.container = &containerGGUF{}
+	case FILE_MAGIC_GGUF_LE:
+		ggml.container = &containerGGUF{bo: binary.LittleEndian}
+	case FILE_MAGIC_GGUF_BE:
+		ggml.container = &containerGGUF{bo: binary.BigEndian}
 	default:
 		return nil, errors.New("invalid file magic")
 	}
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -3,12 +3,15 @@ package llm
 import (
 	"bytes"
 	"encoding/binary"
-	"errors"
 	"fmt"
 	"io"
+
+	"github.com/jmorganca/ollama/format"
 )

 type containerGGUF struct {
+	bo binary.ByteOrder
+
 	Version uint32

 	V1 struct {
@@ -20,6 +23,8 @@ type containerGGUF struct {
 		NumTensor uint64
 		NumKV     uint64
 	}
+
+	parameters uint64
 }

 func (c *containerGGUF) Name() string {
@@ -27,15 +32,13 @@ func (c *containerGGUF) Name() string {
 }

 func (c *containerGGUF) Decode(r io.Reader) (model, error) {
-	binary.Read(r, binary.LittleEndian, &c.Version)
+	binary.Read(r, c.bo, &c.Version)

 	switch c.Version {
 	case 1:
-		binary.Read(r, binary.LittleEndian, &c.V1)
-	case 2:
-		binary.Read(r, binary.LittleEndian, &c.V2)
+		binary.Read(r, c.bo, &c.V1)
 	default:
-		return nil, errors.New("invalid version")
+		binary.Read(r, c.bo, &c.V2)
 	}

 	model := newGGUFModel(c)
@@ -76,6 +79,14 @@ func newGGUFModel(container *containerGGUF) *ggufModel {
 	}
 }

+func (llm *ggufModel) NumTensor() uint64 {
+	if llm.Version == 1 {
+		return uint64(llm.V1.NumTensor)
+	}
+
+	return llm.V2.NumTensor
+}
+
 func (llm *ggufModel) NumKV() uint64 {
 	if llm.Version == 1 {
 		return uint64(llm.V1.NumKV)
@@ -94,6 +105,10 @@ func (llm *ggufModel) ModelFamily() string {
 }

 func (llm *ggufModel) ModelType() string {
+	if llm.parameters > 0 {
+		return format.HumanNumber(llm.parameters)
+	}
+
 	switch llm.ModelFamily() {
 	case "llama":
 		if blocks, ok := llm.kv["llama.block_count"].(uint32); ok {
@@ -109,9 +124,13 @@ func (llm *ggufModel) ModelType() string {
 		if blocks, ok := llm.kv["falcon.block_count"].(uint32); ok {
 			return falconModelType(blocks)
 		}
+	case "starcoder":
+		if blocks, ok := llm.kv["starcoder.block_count"].(uint32); ok {
+			return starCoderModelType(blocks)
+		}
 	}

-	return "Unknown"
+	return "unknown"
 }

 func (llm *ggufModel) FileType() string {
@@ -120,17 +139,13 @@ func (llm *ggufModel) FileType() string {
 		return fileType(t)
 	}

-	return "Unknown"
+	return "unknown"
 }

 func (llm *ggufModel) Decode(r io.Reader) error {
-	read := llm.readString
-	if llm.Version == 1 {
-		read = llm.readStringV1
-	}
-
+	// decode key-values
 	for i := 0; uint64(i) < llm.NumKV(); i++ {
-		k, err := read(r)
+		k, err := llm.readString(r)
 		if err != nil {
 			return err
 		}
@@ -162,24 +177,14 @@ func (llm *ggufModel) Decode(r io.Reader) error {
 		case ggufTypeBool:
 			v = llm.readBool(r)
 		case ggufTypeString:
-			fn := llm.readString
-			if llm.Version == 1 {
-				fn = llm.readStringV1
-			}
-
-			s, err := fn(r)
+			s, err := llm.readString(r)
 			if err != nil {
 				return err
 			}

 			v = s
 		case ggufTypeArray:
-			fn := llm.readArray
-			if llm.Version == 1 {
-				fn = llm.readArrayV1
-			}
-
-			a, err := fn(r)
+			a, err := llm.readArray(r)
 			if err != nil {
 				return err
 			}
@@ -192,78 +197,107 @@ func (llm *ggufModel) Decode(r io.Reader) error {
 		llm.kv[k] = v
 	}

+	// decode tensors
+	for i := 0; uint64(i) < llm.NumTensor(); i++ {
+		if _, err := llm.readString(r); err != nil {
+			return err
+		}
+
+		dimensions := llm.readU32(r)
+
+		var elements uint64 = 1
+		for i := 0; uint32(i) < dimensions; i++ {
+			elements *= llm.readU64(r)
+		}
+
+		llm.readU32(r) // type
+		llm.readU64(r) // offset
+
+		llm.parameters += elements
+	}
+
 	return nil
 }

-func (ggufModel) readU8(r io.Reader) uint8 {
+func (llm *ggufModel) NumLayers() int64 {
+	value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
+	if !exists {
+		return 0
+	}
+
+	v := value.(uint32)
+	return int64(v)
+}
+
+func (llm ggufModel) readU8(r io.Reader) uint8 {
 	var u8 uint8
-	binary.Read(r, binary.LittleEndian, &u8)
+	binary.Read(r, llm.bo, &u8)
 	return u8
 }

-func (ggufModel) readI8(r io.Reader) int8 {
+func (llm ggufModel) readI8(r io.Reader) int8 {
 	var i8 int8
-	binary.Read(r, binary.LittleEndian, &i8)
+	binary.Read(r, llm.bo, &i8)
 	return i8
 }

-func (ggufModel) readU16(r io.Reader) uint16 {
+func (llm ggufModel) readU16(r io.Reader) uint16 {
 	var u16 uint16
-	binary.Read(r, binary.LittleEndian, &u16)
+	binary.Read(r, llm.bo, &u16)
 	return u16
 }

-func (ggufModel) readI16(r io.Reader) int16 {
+func (llm ggufModel) readI16(r io.Reader) int16 {
 	var i16 int16
-	binary.Read(r, binary.LittleEndian, &i16)
+	binary.Read(r, llm.bo, &i16)
 	return i16
 }

-func (ggufModel) readU32(r io.Reader) uint32 {
+func (llm ggufModel) readU32(r io.Reader) uint32 {
 	var u32 uint32
-	binary.Read(r, binary.LittleEndian, &u32)
+	binary.Read(r, llm.bo, &u32)
 	return u32
 }

-func (ggufModel) readI32(r io.Reader) int32 {
+func (llm ggufModel) readI32(r io.Reader) int32 {
 	var i32 int32
-	binary.Read(r, binary.LittleEndian, &i32)
+	binary.Read(r, llm.bo, &i32)
 	return i32
 }

-func (ggufModel) readU64(r io.Reader) uint64 {
+func (llm ggufModel) readU64(r io.Reader) uint64 {
 	var u64 uint64
-	binary.Read(r, binary.LittleEndian, &u64)
+	binary.Read(r, llm.bo, &u64)
 	return u64
 }

-func (ggufModel) readI64(r io.Reader) int64 {
+func (llm ggufModel) readI64(r io.Reader) int64 {
 	var i64 int64
-	binary.Read(r, binary.LittleEndian, &i64)
+	binary.Read(r, llm.bo, &i64)
 	return i64
 }

-func (ggufModel) readF32(r io.Reader) float32 {
+func (llm ggufModel) readF32(r io.Reader) float32 {
 	var f32 float32
-	binary.Read(r, binary.LittleEndian, &f32)
+	binary.Read(r, llm.bo, &f32)
 	return f32
 }

-func (ggufModel) readF64(r io.Reader) float64 {
+func (llm ggufModel) readF64(r io.Reader) float64 {
 	var f64 float64
-	binary.Read(r, binary.LittleEndian, &f64)
+	binary.Read(r, llm.bo, &f64)
 	return f64
 }

-func (ggufModel) readBool(r io.Reader) bool {
+func (llm ggufModel) readBool(r io.Reader) bool {
 	var b bool
-	binary.Read(r, binary.LittleEndian, &b)
+	binary.Read(r, llm.bo, &b)
 	return b
 }

-func (ggufModel) readStringV1(r io.Reader) (string, error) {
+func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
 	var nameLength uint32
-	binary.Read(r, binary.LittleEndian, &nameLength)
+	binary.Read(r, llm.bo, &nameLength)

 	var b bytes.Buffer
 	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
@@ -277,8 +311,12 @@ func (ggufModel) readStringV1(r io.Reader) (string, error) {
 }

 func (llm ggufModel) readString(r io.Reader) (string, error) {
+	if llm.Version == 1 {
+		return llm.readStringV1(r)
+	}
+
 	var nameLength uint64
-	binary.Read(r, binary.LittleEndian, &nameLength)
+	binary.Read(r, llm.bo, &nameLength)

 	var b bytes.Buffer
 	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
@@ -326,6 +364,10 @@ func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
 }

 func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
+	if llm.Version == 1 {
+		return llm.readArrayV1(r)
+	}
+
 	atype := llm.readU32(r)
 	n := llm.readU64(r)

--- a/Show More
+++ b/Show More