add examples of streaming in python and node

Signed-off-by: Matt Williams <m@technovangelist.com>
Merge pull request #525 from jmorganca/mxyng/falcon-decode
2023-09-14 07:12:09 -07:00 · 2023-09-13 15:08:47 -07:00 · 2023-09-13 14:47:37 -07:00 · 2023-09-13 12:43:57 -07:00 · 2023-09-12 15:12:59 -07:00 · 2023-09-12 17:06:48 -04:00
28 changed files with 1097 additions and 429 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,8 +1,5 @@
 build
 llama/build
 .venv
 .vscode
 ollama
 app
-web
+llm/llama.cpp/ggml
-.env
+llm/llama.cpp/gguf
--- a/.gitmodules
+++ b/.gitmodules
@@ -2,3 +2,8 @@
    path = llm/llama.cpp/ggml
    url = https://github.com/ggerganov/llama.cpp.git
    ignore = dirty
    shallow = true
 [submodule "llm/llama.cpp/gguf"]
    path = llm/llama.cpp/gguf
    url = https://github.com/ggerganov/llama.cpp.git
    shallow = true
--- a/18
+++ b/18
@@ -1,15 +1,21 @@
-FROM golang:1.20
+FROM golang:alpine
 WORKDIR /go/src/github.com/jmorganca/ollama
 RUN apk add --no-cache git build-base cmake
 COPY . .
-RUN CGO_ENABLED=1 go build -ldflags '-linkmode external -extldflags "-static"' .
+RUN go generate ./... && go build -ldflags '-linkmode external -extldflags "-static"' .
 FROM alpine
-COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
+ENV OLLAMA_HOST 0.0.0.0
-EXPOSE 11434
+RUN apk add --no-cache libstdc++
 ARG USER=ollama
 ARG GROUP=ollama
-RUN addgroup -g 1000 $GROUP && adduser -u 1000 -DG $GROUP $USER
+RUN addgroup $GROUP && adduser -D -G $GROUP $USER
 COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
 USER $USER:$GROUP
 ENTRYPOINT ["/bin/ollama"]
 ENV OLLAMA_HOST 0.0.0.0
 CMD ["serve"]
--- a/Dockerfile.cuda
+++ b/Dockerfile.cuda
@@ -0,0 +1,22 @@
 FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
 WORKDIR /go/src/github.com/jmorganca/ollama
 RUN apt-get update && apt-get install -y git build-essential cmake
 ADD https://dl.google.com/go/go1.21.1.linux-amd64.tar.gz /tmp/go1.21.1.tar.gz
 RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.1.tar.gz
 COPY . .
 RUN /usr/local/go/bin/go generate ./... && /usr/local/go/bin/go build -ldflags '-linkmode external -extldflags "-static"' .
 FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04
 ENV OLLAMA_HOST 0.0.0.0
 ARG USER=ollama
 ARG GROUP=ollama
 RUN groupadd $GROUP && useradd -m -g $GROUP $USER
 COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
 USER $USER:$GROUP
 ENTRYPOINT ["/bin/ollama"]
 CMD ["serve"]
--- a/README.md
+++ b/README.md
@@ -165,10 +165,11 @@ Ollama bundles model weights, configurations, and data into a single package, de
 ## Building
-Install `cmake`:
+Install `cmake` and `go`:
 ```
 brew install cmake
 brew install go
 ```
 Then generate dependencies and build:
--- a/api/types.go
+++ b/api/types.go
@@ -291,7 +291,7 @@ func DefaultOptions() Options {
 		NumCtx:             2048,
 		NumKeep:            -1,
 		NumBatch:           512,
-		NumGPU:             1,
+		NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically
 		NumGQA:             1,
 		LowVRAM:            false,
 		F16KV:              true,
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -672,6 +672,12 @@ func RunServer(cmd *cobra.Command, _ []string) error {
 		origins = strings.Split(o, ",")
 	}
 	if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
 		if err := server.PruneLayers(); err != nil {
 			return err
 		}
 	}
 	return server.Serve(ln, origins)
 }
--- a/docs/api.md
+++ b/docs/api.md
@@ -20,6 +20,10 @@ Model names follow a `model:tag` format. Some examples are `orca-mini:3b-q4_1` a
 All durations are returned in nanoseconds.
 ### Streams
 Many API responses are streams of JSON objects showing the current status. For examples of working with streams in various languages, see [streaming.md](./streaming.md)
 ## Generate a completion
 ```
--- a/docs/development.md
+++ b/docs/development.md
@@ -6,6 +6,10 @@
 Install required tools:
 - cmake version 3.24 or higher
 - go version 1.20 or higher
 - gcc version 11.4.0 or higher
 ```
 brew install go cmake gcc
 ```
@@ -27,3 +31,9 @@ Now you can run `ollama`:
 ```
 ./ollama
 ```
 ## Building on Linux with GPU support
 - Install cmake and nvidia-cuda-toolkit
 - run `go generate ./...`
 - run `go build .`
--- a/docs/streaming.md
+++ b/docs/streaming.md
@@ -0,0 +1,35 @@
 # Streaming responses in the Ollama Client API
 ## JavaScript / TypeScript / Deno
 ```javascript
 const pull = async () => {
  const request = await fetch("http://localhost:11434/api/pull", {
    method: "POST",
    body: JSON.stringify({ name: "llama2:7b-q5_0" }),
  });
  const reader = await request.body?.pipeThrough(new TextDecoderStream());
  if (!reader) throw new Error("No reader");
  for await (const chunk of reader) {
    const out = JSON.parse(chunk);
    if (out.status.startsWith("downloading")) {
      console.log(`${out.status} - ${(out.completed / out.total) * 100}%`);
    }
  }
 }
 pull();
 ```
 ## Python
 ```python
 import requests
 import json
 response = requests.post("http://localhost:11434/api/pull", json={"name": "llama2:7b-q5_0"}, stream=True)
 for data in response.iter_lines():
  out = json.loads(data)
  if "completed" in out:
    print(out["completed"] / out["total"] * 100)
 ```
--- a/format/openssh.go
+++ b/format/openssh.go
@@ -10,15 +10,11 @@ package format
 import (
 	"crypto"
 	"crypto/ecdsa"
 	"crypto/ed25519"
 	"crypto/elliptic"
 	"crypto/rand"
 	"crypto/rsa"
 	"encoding/binary"
 	"encoding/pem"
 	"fmt"
 	"math/big"
 	"golang.org/x/crypto/ssh"
 )
@@ -41,25 +37,6 @@ type openSSHPrivateKey struct {
 	Rest    []byte `ssh:"rest"`
 }
 type openSSHRSAPrivateKey struct {
 	N       *big.Int
 	E       *big.Int
 	D       *big.Int
 	Iqmp    *big.Int
 	P       *big.Int
 	Q       *big.Int
 	Comment string
 	Pad     []byte `ssh:"rest"`
 }
 type openSSHECDSAPrivateKey struct {
 	Curve   string
 	Pub     []byte
 	D       *big.Int
 	Comment string
 	Pad     []byte `ssh:"rest"`
 }
 type openSSHEd25519PrivateKey struct {
 	Pub     []byte
 	Priv    []byte
@@ -85,64 +62,6 @@ func OpenSSHPrivateKey(key crypto.PrivateKey, comment string) (*pem.Block, error
 	}
 	switch k := key.(type) {
 	case *rsa.PrivateKey:
 		e := new(big.Int).SetInt64(int64(k.E))
 		key := openSSHRSAPrivateKey{
 			N:       k.N,
 			E:       e,
 			D:       k.D,
 			Iqmp:    k.Precomputed.Qinv,
 			P:       k.Primes[0],
 			Q:       k.Primes[1],
 			Comment: comment,
 		}
 		pk1.Keytype = ssh.KeyAlgoRSA
 		pk1.Rest = ssh.Marshal(key)
 		w.PubKey = ssh.Marshal(struct {
 			KeyType string
 			E       *big.Int
 			N       *big.Int
 		}{
 			ssh.KeyAlgoRSA, e, k.N,
 		})
 	case *ecdsa.PrivateKey:
 		var curve, keytype string
 		switch name := k.Curve.Params().Name; name {
 		case "P-256":
 			curve = "nistp256"
 			keytype = ssh.KeyAlgoECDSA256
 		case "P-384":
 			curve = "nistp384"
 			keytype = ssh.KeyAlgoECDSA384
 		case "P-521":
 			curve = "nistp521"
 			keytype = ssh.KeyAlgoECDSA521
 		default:
 			return nil, fmt.Errorf("ssh: unknown curve %q", name)
 		}
 		pub := elliptic.Marshal(k.Curve, k.X, k.Y)
 		key := openSSHECDSAPrivateKey{
 			Curve:   curve,
 			Pub:     pub,
 			D:       k.D,
 			Comment: comment,
 		}
 		pk1.Keytype = keytype
 		pk1.Rest = ssh.Marshal(key)
 		w.PubKey = ssh.Marshal(struct {
 			KeyType string
 			Curve   string
 			Pub     []byte
 		}{
 			keytype, curve, pub,
 		})
 	case ed25519.PrivateKey:
 		pub, priv := k[32:], k
 		key := openSSHEd25519PrivateKey{
--- a/llm/falcon.go
+++ b/llm/falcon.go
@@ -0,0 +1,22 @@
 package llm
 const ModelFamilyFalcon = "falcon"
 const (
 	falconModelType7B   = 32
 	falconModelType40B  = 60
 	falconModelType180B = 80
 )
 func falconModelType(numLayer uint32) string {
 	switch numLayer {
 	case 32:
 		return "7B"
 	case 60:
 		return "40B"
 	case 80:
 		return "180B"
 	default:
 		return "Unknown"
 	}
 }
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -3,72 +3,97 @@ package llm
 import (
 	"encoding/binary"
 	"errors"
 	"fmt"
 	"io"
 	"path"
 	"sync"
 )
 type ModelFamily string
 type ModelType uint32
 const (
 	ModelType3B  ModelType = 26
 	ModelType7B  ModelType = 32
 	ModelType13B ModelType = 40
 	ModelType34B ModelType = 48
 	ModelType30B ModelType = 60
 	ModelType65B ModelType = 80
 )
 func (mt ModelType) String() string {
 	switch mt {
 	case ModelType3B:
 		return "3B"
 	case ModelType7B:
 		return "7B"
 	case ModelType13B:
 		return "13B"
 	case ModelType34B:
 		return "34B"
 	case ModelType30B:
 		return "30B"
 	case ModelType65B:
 		return "65B"
 	default:
 		return "Unknown"
 	}
 }
 type FileType interface {
 	String() string
 }
 type GGML struct {
 	magic uint32
 	container
 	model
 }
 const (
 	fileTypeF32 uint32 = iota
 	fileTypeF16
 	fileTypeQ4_0
 	fileTypeQ4_1
 	fileTypeQ4_1_F16
 	fileTypeQ8_0 uint32 = iota + 2
 	fileTypeQ5_0
 	fileTypeQ5_1
 	fileTypeQ2_K
 	fileTypeQ3_K_S
 	fileTypeQ3_K_M
 	fileTypeQ3_K_L
 	fileTypeQ4_K_S
 	fileTypeQ4_K_M
 	fileTypeQ5_K_S
 	fileTypeQ5_K_M
 	fileTypeQ6_K
 )
 func fileType(fileType uint32) string {
 	switch fileType {
 	case fileTypeF32:
 		return "F32"
 	case fileTypeF16:
 		return "F16"
 	case fileTypeQ4_0:
 		return "Q4_0"
 	case fileTypeQ4_1:
 		return "Q4_1"
 	case fileTypeQ4_1_F16:
 		return "Q4_1_F16"
 	case fileTypeQ8_0:
 		return "Q8_0"
 	case fileTypeQ5_0:
 		return "Q5_0"
 	case fileTypeQ5_1:
 		return "Q5_1"
 	case fileTypeQ2_K:
 		return "Q2_K"
 	case fileTypeQ3_K_S:
 		return "Q3_K_S"
 	case fileTypeQ3_K_M:
 		return "Q3_K_M"
 	case fileTypeQ3_K_L:
 		return "Q3_K_L"
 	case fileTypeQ4_K_S:
 		return "Q4_K_S"
 	case fileTypeQ4_K_M:
 		return "Q4_K_M"
 	case fileTypeQ5_K_S:
 		return "Q5_K_S"
 	case fileTypeQ5_K_M:
 		return "Q5_K_M"
 	case fileTypeQ6_K:
 		return "Q6_K"
 	default:
 		return "Unknown"
 	}
 }
 type model interface {
-	ModelFamily() ModelFamily
+	ModelFamily() string
-	ModelType() ModelType
+	ModelType() string
-	FileType() FileType
+	FileType() string
 }
 type container interface {
 	Name() string
-	Decode(io.Reader) error
+	Decode(io.Reader) (model, error)
 }
-type containerGGML struct {
+type containerGGML struct{}
 }
 func (c *containerGGML) Name() string {
 	return "ggml"
 }
-func (c *containerGGML) Decode(r io.Reader) error {
+func (c *containerGGML) Decode(r io.Reader) (model, error) {
-	return nil
+	return nil, nil
 }
 type containerGGMF struct {
@@ -79,18 +104,18 @@ func (c *containerGGMF) Name() string {
 	return "ggmf"
 }
-func (c *containerGGMF) Decode(r io.Reader) error {
+func (c *containerGGMF) Decode(r io.Reader) (model, error) {
 	var version uint32
 	binary.Read(r, binary.LittleEndian, &version)
 	switch version {
 	case 1:
 	default:
-		return errors.New("invalid version")
+		return nil, errors.New("invalid version")
 	}
 	c.version = version
-	return nil
+	return nil, nil
 }
 type containerGGJT struct {
@@ -101,18 +126,22 @@ func (c *containerGGJT) Name() string {
 	return "ggjt"
 }
-func (c *containerGGJT) Decode(r io.Reader) error {
+func (c *containerGGJT) Decode(r io.Reader) (model, error) {
 	var version uint32
 	binary.Read(r, binary.LittleEndian, &version)
 	switch version {
 	case 1, 2, 3:
 	default:
-		return errors.New("invalid version")
+		return nil, errors.New("invalid version")
 	}
 	c.version = version
-	return nil
+
 	// different model types may have different layouts for hyperparameters
 	var llama llamaModel
 	binary.Read(r, binary.LittleEndian, &llama.hyperparameters)
 	return &llama, nil
 }
 type containerLORA struct {
@@ -123,32 +152,51 @@ func (c *containerLORA) Name() string {
 	return "ggla"
 }
-func (c *containerLORA) Decode(r io.Reader) error {
+func (c *containerLORA) Decode(r io.Reader) (model, error) {
 	var version uint32
 	binary.Read(r, binary.LittleEndian, &version)
 	switch version {
 	case 1:
 	default:
-		return errors.New("invalid version")
+		return nil, errors.New("invalid version")
 	}
 	c.version = version
-	return nil
+	return nil, nil
 }
 var (
 	ggmlGPU = path.Join("llama.cpp", "ggml", "build", "gpu", "bin")
 	ggmlCPU = path.Join("llama.cpp", "ggml", "build", "cpu", "bin")
 )
 var (
 	ggmlInit       sync.Once
 	ggmlRunnerPath string
 )
 func ggmlRunner() ModelRunner {
 	ggmlInit.Do(func() {
 		ggmlRunnerPath = chooseRunner(ggmlGPU, ggmlCPU)
 	})
 	return ModelRunner{Path: ggmlRunnerPath}
 }
 const (
-	// / Magic constant for `ggml` files (unversioned).
+	// Magic constant for `ggml` files (unversioned).
 	FILE_MAGIC_GGML = 0x67676d6c
-	// / Magic constant for `ggml` files (versioned, ggmf).
+	// Magic constant for `ggml` files (versioned, ggmf).
 	FILE_MAGIC_GGMF = 0x67676d66
-	// / Magic constant for `ggml` files (versioned, ggjt).
+	// Magic constant for `ggml` files (versioned, ggjt).
 	FILE_MAGIC_GGJT = 0x67676a74
-	// / Magic constant for `ggla` files (LoRA adapter).
+	// Magic constant for `ggla` files (LoRA adapter).
 	FILE_MAGIC_GGLA = 0x67676C61
 	// Magic constant for `gguf` files (versioned, gguf)
 	FILE_MAGIC_GGUF = 0x46554747
 )
-func DecodeGGML(r io.ReadSeeker, hint ModelFamily) (*GGML, error) {
+func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
 	var ggml GGML
 	binary.Read(r, binary.LittleEndian, &ggml.magic)
@@ -161,24 +209,18 @@ func DecodeGGML(r io.ReadSeeker, hint ModelFamily) (*GGML, error) {
 		ggml.container = &containerGGJT{}
 	case FILE_MAGIC_GGLA:
 		ggml.container = &containerLORA{}
 	case FILE_MAGIC_GGUF:
 		ggml.container = &containerGGUF{}
 	default:
 		return nil, errors.New("invalid file magic")
 	}
-	if err := ggml.Decode(r); err != nil {
+	model, err := ggml.Decode(r)
 	if err != nil {
 		return nil, err
 	}
-	// different model types may have different layouts for hyperparameters
+	ggml.model = model
 	switch hint {
 	case ModelFamilyLlama:
 		var llama llamaModel
 		binary.Read(r, binary.LittleEndian, &llama.hyperparameters)
 		ggml.model = &llama
 		// TODO: sanity check hyperparameters
 	default:
 		return nil, fmt.Errorf("unsupported model type: %s", hint)
 	}
 	// final model type
 	return &ggml, nil
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -0,0 +1,389 @@
 package llm
 import (
 	"bytes"
 	"encoding/binary"
 	"errors"
 	"fmt"
 	"io"
 	"path"
 	"sync"
 )
 type containerGGUF struct {
 	Version uint32
 	V1 struct {
 		NumTensor uint32
 		NumKV     uint32
 	}
 	V2 struct {
 		NumTensor uint64
 		NumKV     uint64
 	}
 }
 func (c *containerGGUF) Name() string {
 	return "gguf"
 }
 func (c *containerGGUF) Decode(r io.Reader) (model, error) {
 	binary.Read(r, binary.LittleEndian, &c.Version)
 	switch c.Version {
 	case 1:
 		binary.Read(r, binary.LittleEndian, &c.V1)
 	case 2:
 		binary.Read(r, binary.LittleEndian, &c.V2)
 	default:
 		return nil, errors.New("invalid version")
 	}
 	model := newGGUFModel(c)
 	if err := model.Decode(r); err != nil {
 		return nil, err
 	}
 	return model, nil
 }
 const (
 	ggufTypeUint8 uint32 = iota
 	ggufTypeInt8
 	ggufTypeUint16
 	ggufTypeInt16
 	ggufTypeUint32
 	ggufTypeInt32
 	ggufTypeFloat32
 	ggufTypeBool
 	ggufTypeString
 	ggufTypeArray
 	ggufTypeUint64
 	ggufTypeInt64
 	ggufTypeFloat64
 )
 type kv map[string]any
 type ggufModel struct {
 	*containerGGUF
 	kv
 }
 func newGGUFModel(container *containerGGUF) *ggufModel {
 	return &ggufModel{
 		containerGGUF: container,
 		kv:            make(kv),
 	}
 }
 func (llm *ggufModel) NumKV() uint64 {
 	if llm.Version == 1 {
 		return uint64(llm.V1.NumKV)
 	}
 	return llm.V2.NumKV
 }
 func (llm *ggufModel) ModelFamily() string {
 	t, ok := llm.kv["general.architecture"].(string)
 	if ok {
 		return t
 	}
 	return "unknown"
 }
 func (llm *ggufModel) ModelType() string {
 	switch llm.ModelFamily() {
 	case "llama":
 		if blocks, ok := llm.kv["llama.block_count"].(uint32); ok {
 			heads, headsOK := llm.kv["llama.head_count"].(uint32)
 			headKVs, headsKVsOK := llm.kv["llama.head_count_kv"].(uint32)
 			if headsOK && headsKVsOK && heads/headKVs == 8 {
 				return "70B"
 			}
 			return llamaModelType(blocks)
 		}
 	case "falcon":
 		if blocks, ok := llm.kv["falcon.block_count"].(uint32); ok {
 			return falconModelType(blocks)
 		}
 	}
 	return "Unknown"
 }
 func (llm *ggufModel) FileType() string {
 	t, ok := llm.kv["general.file_type"].(uint32)
 	if ok {
 		return fileType(t)
 	}
 	return "Unknown"
 }
 func (llm *ggufModel) Decode(r io.Reader) error {
 	read := llm.readString
 	if llm.Version == 1 {
 		read = llm.readStringV1
 	}
 	for i := 0; uint64(i) < llm.NumKV(); i++ {
 		k, err := read(r)
 		if err != nil {
 			return err
 		}
 		vtype := llm.readU32(r)
 		var v any
 		switch vtype {
 		case ggufTypeUint8:
 			v = llm.readU8(r)
 		case ggufTypeInt8:
 			v = llm.readI8(r)
 		case ggufTypeUint16:
 			v = llm.readU16(r)
 		case ggufTypeInt16:
 			v = llm.readI16(r)
 		case ggufTypeUint32:
 			v = llm.readU32(r)
 		case ggufTypeInt32:
 			v = llm.readI32(r)
 		case ggufTypeUint64:
 			v = llm.readU64(r)
 		case ggufTypeInt64:
 			v = llm.readI64(r)
 		case ggufTypeFloat32:
 			v = llm.readF32(r)
 		case ggufTypeFloat64:
 			v = llm.readF64(r)
 		case ggufTypeBool:
 			v = llm.readBool(r)
 		case ggufTypeString:
 			fn := llm.readString
 			if llm.Version == 1 {
 				fn = llm.readStringV1
 			}
 			s, err := fn(r)
 			if err != nil {
 				return err
 			}
 			v = s
 		case ggufTypeArray:
 			fn := llm.readArray
 			if llm.Version == 1 {
 				fn = llm.readArrayV1
 			}
 			a, err := fn(r)
 			if err != nil {
 				return err
 			}
 			v = a
 		default:
 			return fmt.Errorf("invalid type: %d", vtype)
 		}
 		llm.kv[k] = v
 	}
 	return nil
 }
 func (ggufModel) readU8(r io.Reader) uint8 {
 	var u8 uint8
 	binary.Read(r, binary.LittleEndian, &u8)
 	return u8
 }
 func (ggufModel) readI8(r io.Reader) int8 {
 	var i8 int8
 	binary.Read(r, binary.LittleEndian, &i8)
 	return i8
 }
 func (ggufModel) readU16(r io.Reader) uint16 {
 	var u16 uint16
 	binary.Read(r, binary.LittleEndian, &u16)
 	return u16
 }
 func (ggufModel) readI16(r io.Reader) int16 {
 	var i16 int16
 	binary.Read(r, binary.LittleEndian, &i16)
 	return i16
 }
 func (ggufModel) readU32(r io.Reader) uint32 {
 	var u32 uint32
 	binary.Read(r, binary.LittleEndian, &u32)
 	return u32
 }
 func (ggufModel) readI32(r io.Reader) int32 {
 	var i32 int32
 	binary.Read(r, binary.LittleEndian, &i32)
 	return i32
 }
 func (ggufModel) readU64(r io.Reader) uint64 {
 	var u64 uint64
 	binary.Read(r, binary.LittleEndian, &u64)
 	return u64
 }
 func (ggufModel) readI64(r io.Reader) int64 {
 	var i64 int64
 	binary.Read(r, binary.LittleEndian, &i64)
 	return i64
 }
 func (ggufModel) readF32(r io.Reader) float32 {
 	var f32 float32
 	binary.Read(r, binary.LittleEndian, &f32)
 	return f32
 }
 func (ggufModel) readF64(r io.Reader) float64 {
 	var f64 float64
 	binary.Read(r, binary.LittleEndian, &f64)
 	return f64
 }
 func (ggufModel) readBool(r io.Reader) bool {
 	var b bool
 	binary.Read(r, binary.LittleEndian, &b)
 	return b
 }
 func (ggufModel) readStringV1(r io.Reader) (string, error) {
 	var nameLength uint32
 	binary.Read(r, binary.LittleEndian, &nameLength)
 	var b bytes.Buffer
 	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
 		return "", err
 	}
 	// gguf v1 strings are null-terminated
 	b.Truncate(b.Len() - 1)
 	return b.String(), nil
 }
 func (llm ggufModel) readString(r io.Reader) (string, error) {
 	var nameLength uint64
 	binary.Read(r, binary.LittleEndian, &nameLength)
 	var b bytes.Buffer
 	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
 		return "", err
 	}
 	return b.String(), nil
 }
 func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
 	atype := llm.readU32(r)
 	n := llm.readU32(r)
 	for i := 0; uint32(i) < n; i++ {
 		switch atype {
 		case ggufTypeUint8:
 			arr = append(arr, llm.readU8(r))
 		case ggufTypeInt8:
 			arr = append(arr, llm.readU8(r))
 		case ggufTypeUint16:
 			arr = append(arr, llm.readU16(r))
 		case ggufTypeInt16:
 			arr = append(arr, llm.readI16(r))
 		case ggufTypeUint32:
 			arr = append(arr, llm.readU32(r))
 		case ggufTypeInt32:
 			arr = append(arr, llm.readI32(r))
 		case ggufTypeFloat32:
 			arr = append(arr, llm.readF32(r))
 		case ggufTypeBool:
 			arr = append(arr, llm.readBool(r))
 		case ggufTypeString:
 			s, err := llm.readStringV1(r)
 			if err != nil {
 				return nil, err
 			}
 			arr = append(arr, s)
 		default:
 			return nil, fmt.Errorf("invalid array type: %d", atype)
 		}
 	}
 	return
 }
 func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
 	atype := llm.readU32(r)
 	n := llm.readU64(r)
 	for i := 0; uint64(i) < n; i++ {
 		switch atype {
 		case ggufTypeUint8:
 			arr = append(arr, llm.readU8(r))
 		case ggufTypeInt8:
 			arr = append(arr, llm.readU8(r))
 		case ggufTypeUint16:
 			arr = append(arr, llm.readU16(r))
 		case ggufTypeInt16:
 			arr = append(arr, llm.readI16(r))
 		case ggufTypeUint32:
 			arr = append(arr, llm.readU32(r))
 		case ggufTypeInt32:
 			arr = append(arr, llm.readI32(r))
 		case ggufTypeUint64:
 			arr = append(arr, llm.readU64(r))
 		case ggufTypeInt64:
 			arr = append(arr, llm.readI64(r))
 		case ggufTypeFloat32:
 			arr = append(arr, llm.readF32(r))
 		case ggufTypeFloat64:
 			arr = append(arr, llm.readF64(r))
 		case ggufTypeBool:
 			arr = append(arr, llm.readBool(r))
 		case ggufTypeString:
 			s, err := llm.readString(r)
 			if err != nil {
 				return nil, err
 			}
 			arr = append(arr, s)
 		default:
 			return nil, fmt.Errorf("invalid array type: %d", atype)
 		}
 	}
 	return
 }
 var (
 	ggufGPU = path.Join("llama.cpp", "gguf", "build", "gpu", "bin")
 	ggufCPU = path.Join("llama.cpp", "gguf", "build", "cpu", "bin")
 )
 var (
 	ggufInit       sync.Once
 	ggufRunnerPath string
 )
 func ggufRunner() ModelRunner {
 	ggufInit.Do(func() {
 		ggufRunnerPath = chooseRunner(ggufGPU, ggufCPU)
 	})
 	return ModelRunner{Path: ggufRunnerPath}
 }
--- a/llm/llama.cpp/generate.go
+++ b/llm/llama.cpp/generate.go
@@ -4,10 +4,14 @@
 package llm
 //go:generate git submodule init
 //go:generate git submodule update --force ggml
-//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
+//go:generate -command git-apply git -C ggml apply
-//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
+//go:generate git-apply ../ggml_patch/0001-add-detokenize-endpoint.patch
-//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
+//go:generate git-apply ../ggml_patch/0002-34B-model-support.patch
-//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
+//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
 //go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
 //go:generate cmake --build ggml/build/cpu --target server --config Release
 //go:generate git submodule update --force gguf
 //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
 //go:generate cmake --build gguf/build/cpu --target server --config Release
--- a/llm/llama.cpp/generate_darwin_amd64.go
+++ b/llm/llama.cpp/generate_darwin_amd64.go
@@ -1,10 +1,16 @@
 package llm
 //go:generate git submodule init
 //go:generate git submodule update --force ggml
-//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
+//go:generate -command git-apply git -C ggml apply
-//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
+//go:generate git-apply ../ggml_patch/0001-add-detokenize-endpoint.patch
-//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
+//go:generate git-apply ../ggml_patch/0002-34B-model-support.patch
-//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
+//go:generate git-apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
-//go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=10.11
+//go:generate git-apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
 //go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
 //go:generate cmake --build ggml/build/cpu --target server --config Release
 //go:generate git submodule update --force gguf
 //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
 //go:generate cmake --build gguf/build/cpu --target server --config Release
--- a/llm/llama.cpp/generate_darwin_arm64.go
+++ b/llm/llama.cpp/generate_darwin_arm64.go
@@ -1,10 +1,16 @@
 package llm
 //go:generate git submodule init
 //go:generate git submodule update --force ggml
-//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
+//go:generate -command git-apply git -C ggml apply
-//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
+//go:generate git-apply ../ggml_patch/0001-add-detokenize-endpoint.patch
-//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
+//go:generate git-apply ../ggml_patch/0002-34B-model-support.patch
-//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
+//go:generate git-apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
-//go:generate cmake --fresh -S ggml -B ggml/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
+//go:generate git-apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
 //go:generate cmake -S ggml -B ggml/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
 //go:generate cmake --build ggml/build/gpu --target server --config Release
 //go:generate git submodule update --force gguf
 //go:generate cmake -S gguf -B gguf/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
 //go:generate cmake --build gguf/build/gpu --target server --config Release
--- a/llm/llama.cpp/generate_linux.go
+++ b/llm/llama.cpp/generate_linux.go
@@ -0,0 +1,15 @@
 package llm
 //go:generate git submodule init
 //go:generate git submodule update --force ggml
 //go:generate -command git-apply git -C ggml apply
 //go:generate git-apply ../ggml_patch/0001-add-detokenize-endpoint.patch
 //go:generate git-apply ../ggml_patch/0002-34B-model-support.patch
 //go:generate git-apply ../ggml_patch/0005-ggml-support-CUDA-s-half-type-for-aarch64-1455-2670.patch
 //go:generate cmake -S ggml -B ggml/build/gpu -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
 //go:generate cmake --build ggml/build/gpu --target server --config Release
 //go:generate git submodule update --force gguf
 //go:generate cmake -S gguf -B gguf/build/gpu -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
 //go:generate cmake --build gguf/build/gpu --target server --config Release
--- a/llm/llama.cpp/ggml_patch/0005-ggml-support-CUDA-s-half-type-for-aarch64-1455-2670.patch
+++ b/llm/llama.cpp/ggml_patch/0005-ggml-support-CUDA-s-half-type-for-aarch64-1455-2670.patch
@@ -0,0 +1,32 @@
 From 1e3bc523d8053a77df3ac7126a84d0297ee97ef6 Mon Sep 17 00:00:00 2001
 From: Kylin <56434533+KyL0N@users.noreply.github.com>
 Date: Tue, 22 Aug 2023 15:14:23 +0800
 Subject: [PATCH] ggml : support CUDA's half type for aarch64(#1455) (#2670)
 * ggml: support CUDA's half type for aarch64(#1455)
 support CUDA's half type for aarch64 in ggml_fp16_t definition
 * ggml: use __CUDACC__ to recognise nvcc compiler
 ---
 ggml.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
 diff --git a/ggml.h b/ggml.h
 index 544ad2d..0ec7ec5 100644
 --- a/ggml.h
 +++ b/ggml.h
@@ -259,8 +259,9 @@
 extern "C" {
 #endif
 -#ifdef __ARM_NEON
 -    // we use the built-in 16-bit float type
 +#if defined(__ARM_NEON) && defined(__CUDACC__)
 +    typedef half ggml_fp16_t;
 +#elif defined(__ARM_NEON)
     typedef __fp16 ggml_fp16_t;
 #else
     typedef uint16_t ggml_fp16_t;
 -- 
 2.39.2 (Apple Git-143)
--- a/llm/llama.cpp/gguf
+++ b/llm/llama.cpp/gguf
--- a/llm/ggml_llama.go
+++ b/llm/ggml_llama.go
@@ -20,27 +20,14 @@ import (
 	"runtime"
 	"strconv"
 	"strings"
 	"sync"
 	"time"
 	"github.com/jmorganca/ollama/api"
 )
-const ModelFamilyLlama ModelFamily = "llama"
+//go:embed llama.cpp/*/build/*/bin/*
 //go:embed llama.cpp/ggml/build/*/bin/*
 var llamaCppEmbed embed.FS
 var (
 	ggmlGPU = path.Join("llama.cpp", "ggml", "build", "gpu", "bin")
 	ggmlCPU = path.Join("llama.cpp", "ggml", "build", "cpu", "bin")
 )
 var (
 	ggmlInit       sync.Once
 	ggmlRunnerPath string
 )
 func osPath(llamaPath string) string {
 	if runtime.GOOS == "windows" {
 		return path.Join(llamaPath, "Release")
@@ -49,16 +36,15 @@ func osPath(llamaPath string) string {
 	return llamaPath
 }
-func initGGML() {
+func chooseRunner(gpuPath, cpuPath string) string {
 	ggmlInit.Do(func() {
 	tmpDir, err := os.MkdirTemp("", "llama-*")
 	if err != nil {
 		log.Fatalf("llama.cpp: failed to create temp dir: %v", err)
 	}
-		llamaPath := osPath(ggmlGPU)
+	llamaPath := osPath(gpuPath)
 	if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil {
-			llamaPath = osPath(ggmlCPU)
+		llamaPath = osPath(cpuPath)
 		if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil {
 			log.Fatalf("llama.cpp executable not found")
 		}
@@ -69,9 +55,15 @@ func initGGML() {
 	case "windows":
 		files = []string{"server.exe"}
 	case "darwin":
-			if llamaPath == osPath(ggmlGPU) {
+		if llamaPath == osPath(gpuPath) {
 			files = append(files, "ggml-metal.metal")
 		}
 	case "linux":
 		// check if there is a GPU available
 		if _, err := CheckVRAM(); errors.Is(err, errNoGPU) {
 			// this error was logged on start-up, so we don't need to log it again
 			llamaPath = osPath(cpuPath)
 		}
 	}
 	for _, f := range files {
@@ -95,52 +87,47 @@ func initGGML() {
 		}
 	}
-		ggmlRunnerPath = filepath.Join(tmpDir, "server")
+	runPath := filepath.Join(tmpDir, "server")
 	if runtime.GOOS == "windows" {
-			ggmlRunnerPath = filepath.Join(tmpDir, "server.exe")
+		runPath = filepath.Join(tmpDir, "server.exe")
 		}
 	})
 	}
-type ModelRunner struct {
+	return runPath
 	Path string // path to the model runner executable
 }
 func ggmlRunner() ModelRunner {
 	initGGML()
 	return ModelRunner{Path: ggmlRunnerPath}
 }
 type llamaModel struct {
 	hyperparameters llamaHyperparameters
 }
-func (llm *llamaModel) ModelFamily() ModelFamily {
+func (llm *llamaModel) ModelFamily() string {
-	return ModelFamilyLlama
+	return "llama"
 }
-func (llm *llamaModel) ModelType() ModelType {
+func llamaModelType(numLayer uint32) string {
-	switch llm.hyperparameters.NumLayer {
+	switch numLayer {
 	case 26:
-		return ModelType3B
+		return "3B"
 	case 32:
-		return ModelType7B
+		return "7B"
 	case 40:
-		return ModelType13B
+		return "13B"
 	case 48:
-		return ModelType34B
+		return "34B"
 	case 60:
-		return ModelType30B
+		return "30B"
 	case 80:
-		return ModelType65B
+		return "65B"
 	default:
 		return "Unknown"
 	}
 }
-	// TODO: find a better default
+func (llm *llamaModel) ModelType() string {
-	return ModelType7B
+	return llamaModelType(llm.hyperparameters.NumLayer)
 }
-func (llm *llamaModel) FileType() FileType {
+func (llm *llamaModel) FileType() string {
-	return llm.hyperparameters.FileType
+	return fileType(llm.hyperparameters.FileType)
 }
 type llamaHyperparameters struct {
@@ -157,70 +144,7 @@ type llamaHyperparameters struct {
 	NumRot   uint32
 	// FileType describes the quantization level of the model, e.g. Q4_0, Q5_K, etc.
-	FileType llamaFileType
+	FileType uint32
 }
 type llamaFileType uint32
 const (
 	llamaFileTypeF32 llamaFileType = iota
 	llamaFileTypeF16
 	llamaFileTypeQ4_0
 	llamaFileTypeQ4_1
 	llamaFileTypeQ4_1_F16
 	llamaFileTypeQ8_0 llamaFileType = iota + 2
 	llamaFileTypeQ5_0
 	llamaFileTypeQ5_1
 	llamaFileTypeQ2_K
 	llamaFileTypeQ3_K_S
 	llamaFileTypeQ3_K_M
 	llamaFileTypeQ3_K_L
 	llamaFileTypeQ4_K_S
 	llamaFileTypeQ4_K_M
 	llamaFileTypeQ5_K_S
 	llamaFileTypeQ5_K_M
 	llamaFileTypeQ6_K
 )
 func (ft llamaFileType) String() string {
 	switch ft {
 	case llamaFileTypeF32:
 		return "F32"
 	case llamaFileTypeF16:
 		return "F16"
 	case llamaFileTypeQ4_0:
 		return "Q4_0"
 	case llamaFileTypeQ4_1:
 		return "Q4_1"
 	case llamaFileTypeQ4_1_F16:
 		return "Q4_1_F16"
 	case llamaFileTypeQ8_0:
 		return "Q8_0"
 	case llamaFileTypeQ5_0:
 		return "Q5_0"
 	case llamaFileTypeQ5_1:
 		return "Q5_1"
 	case llamaFileTypeQ2_K:
 		return "Q2_K"
 	case llamaFileTypeQ3_K_S:
 		return "Q3_K_S"
 	case llamaFileTypeQ3_K_M:
 		return "Q3_K_M"
 	case llamaFileTypeQ3_K_L:
 		return "Q3_K_L"
 	case llamaFileTypeQ4_K_S:
 		return "Q4_K_S"
 	case llamaFileTypeQ4_K_M:
 		return "Q4_K_M"
 	case llamaFileTypeQ5_K_S:
 		return "Q5_K_S"
 	case llamaFileTypeQ5_K_M:
 		return "Q5_K_M"
 	case llamaFileTypeQ6_K:
 		return "Q6_K"
 	default:
 		return "Unknown"
 	}
 }
 type Running struct {
@@ -229,11 +153,81 @@ type Running struct {
 	Cancel context.CancelFunc
 }
 type ModelRunner struct {
 	Path string // path to the model runner executable
 }
 type llama struct {
 	api.Options
 	Running
 }
 var errNoGPU = errors.New("nvidia-smi command failed")
 // CheckVRAM returns the available VRAM in MiB on Linux machines with NVIDIA GPUs
 func CheckVRAM() (int, error) {
 	cmd := exec.Command("nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits")
 	var stdout bytes.Buffer
 	cmd.Stdout = &stdout
 	err := cmd.Run()
 	if err != nil {
 		return 0, errNoGPU
 	}
 	var total int
 	scanner := bufio.NewScanner(&stdout)
 	for scanner.Scan() {
 		line := scanner.Text()
 		vram, err := strconv.Atoi(line)
 		if err != nil {
 			return 0, fmt.Errorf("failed to parse available VRAM: %v", err)
 		}
 		total += vram
 	}
 	return total, nil
 }
 func NumGPU(opts api.Options) int {
 	if opts.NumGPU != -1 {
 		return opts.NumGPU
 	}
 	n := 1 // default to enable metal on macOS
 	if runtime.GOOS == "linux" {
 		vram, err := CheckVRAM()
 		if err != nil {
 			if err.Error() != "nvidia-smi command failed" {
 				log.Print(err.Error())
 			}
 			// nvidia driver not installed or no nvidia GPU found
 			return 0
 		}
 		// TODO: this is a very rough heuristic, better would be to calculate this based on number of layers and context size
 		switch {
 		case vram < 500:
 			log.Printf("WARNING: Low VRAM detected, disabling GPU")
 			n = 0
 		case vram < 1000:
 			n = 4
 		case vram < 2000:
 			n = 8
 		case vram < 4000:
 			n = 12
 		case vram < 8000:
 			n = 16
 		case vram < 12000:
 			n = 24
 		case vram < 16000:
 			n = 32
 		default:
 			n = 48
 		}
 		log.Printf("%d MB VRAM available, loading %d GPU layers", vram, n)
 	}
 	return n
 }
 func newLlama(model string, adapters []string, runner ModelRunner, opts api.Options) (*llama, error) {
 	if _, err := os.Stat(model); err != nil {
 		return nil, err
@@ -250,14 +244,17 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti
 	params := []string{
 		"--model", model,
 		"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
 		"--gqa", fmt.Sprintf("%d", opts.NumGQA),
 		"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
 		"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
 		"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
-		"--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU),
+		"--n-gpu-layers", fmt.Sprintf("%d", NumGPU(opts)),
 		"--embedding",
 	}
 	if opts.NumGQA > 0 {
 		params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
 	}
 	if len(adapters) > 0 {
 		// TODO: applying multiple adapters is not supported by the llama.cpp server yet
 		params = append(params, "--lora", adapters[0])
@@ -289,17 +286,25 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti
 			runner.Path,
 			append(params, "--port", strconv.Itoa(port))...,
 		)
 		cmd.Stdout = os.Stderr
 		cmd.Stderr = os.Stderr
 		llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel}}
 		log.Print("starting llama.cpp server")
 		if err := llm.Cmd.Start(); err != nil {
 			log.Printf("error starting the external llama.cpp server: %v", err)
 			continue
 		}
 		if err := waitForServer(llm); err != nil {
 			log.Printf("error starting llama.cpp server: %v", err)
 			llm.Close()
 			// try again
 			continue
 		}
 		// server started successfully
 		return llm, nil
 	}
@@ -308,48 +313,31 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti
 }
 func waitForServer(llm *llama) error {
 	log.Print("starting llama.cpp server")
 	var stderr bytes.Buffer
 	llm.Cmd.Stderr = &stderr
 	err := llm.Cmd.Start()
 	if err != nil {
 		return fmt.Errorf("error starting the external llama.cpp server: %w", err)
 	}
 	exitChan := make(chan error, 1)
 	// the server is a long running process, watch for it exiting to keep track of something going wrong
 	go func() {
 		err := llm.Cmd.Wait()
 		log.Print(stderr.String())
 		exitChan <- err
 	}()
 	// wait for the server to start responding
 	start := time.Now()
-	expiresAt := time.Now().Add(30 * time.Second)
+	expiresAt := time.Now().Add(45 * time.Second)
-	ticker := time.NewTicker(100 * time.Millisecond)
+	ticker := time.NewTicker(200 * time.Millisecond)
 	log.Print("waiting for llama.cpp server to start responding")
-
+	for range ticker.C {
 	for {
 		select {
 		case <-ticker.C:
 		if time.Now().After(expiresAt) {
-				return fmt.Errorf("llama.cpp server did not start responding within 30 seconds, retrying")
+			return fmt.Errorf("llama.cpp server did not start within alloted time, retrying")
 		}
 		if err := llm.Ping(context.Background()); err == nil {
 			break
 		}
 	}
 	log.Printf("llama.cpp server started in %f seconds", time.Since(start).Seconds())
 	return nil
 }
 		case err := <-exitChan:
 			return fmt.Errorf("llama.cpp server exited unexpectedly: %w", err)
 		}
 	}
 }
 func (llm *llama) Close() {
-	llm.Running.Cmd.Cancel()
+	llm.Cancel()
 	if err := llm.Cmd.Wait(); err != nil {
 		log.Printf("llama.cpp server exited with error: %v", err)
 	}
 }
 func (llm *llama) SetOptions(opts api.Options) {
@@ -676,7 +664,7 @@ func (llm *llama) Embedding(ctx context.Context, input string) ([]float64, error
 // Ping checks that the server subprocess is still running and responding to requests
 func (llm *llama) Ping(ctx context.Context) error {
-	resp, err := http.Head(fmt.Sprintf("http://127.0.0.1:%d", llm.Running.Port))
+	resp, err := http.Head(fmt.Sprintf("http://127.0.0.1:%d", llm.Port))
 	if err != nil {
 		return fmt.Errorf("ping resp: %w", err)
 	}
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -32,15 +32,22 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
 	}
 	defer f.Close()
-	ggml, err := DecodeGGML(f, ModelFamilyLlama)
+	ggml, err := DecodeGGML(f)
 	if err != nil {
 		return nil, err
 	}
-	switch ggml.FileType().String() {
+	switch ggml.FileType() {
-	case "F32", "Q5_0", "Q5_1", "Q8_0":
+	case "Q8_0":
 		if ggml.Name() != "gguf" && opts.NumGPU != 0 {
 			// GGML Q8_0 do not support Metal API and will
 			// cause the runner to segmentation fault so disable GPU
 			log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
 			opts.NumGPU = 0
 		}
 	case "F32", "Q5_0", "Q5_1":
 		if opts.NumGPU != 0 {
-			// F32, F16, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
+			// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
 			// cause the runner to segmentation fault so disable GPU
 			log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
 			opts.NumGPU = 0
@@ -49,34 +56,43 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
 	totalResidentMemory := memory.TotalMemory()
 	switch ggml.ModelType() {
-	case ModelType3B, ModelType7B:
+	case "3B", "7B":
-		if ggml.FileType().String() == "F16" && totalResidentMemory < 16*1024*1024 {
+		if ggml.FileType() == "F16" && totalResidentMemory < 16*1024*1024 {
 			return nil, fmt.Errorf("F16 model requires at least 16GB of memory")
 		} else if totalResidentMemory < 8*1024*1024 {
 			return nil, fmt.Errorf("model requires at least 8GB of memory")
 		}
-	case ModelType13B:
+	case "13B":
-		if ggml.FileType().String() == "F16" && totalResidentMemory < 32*1024*1024 {
+		if ggml.FileType() == "F16" && totalResidentMemory < 32*1024*1024 {
 			return nil, fmt.Errorf("F16 model requires at least 32GB of memory")
 		} else if totalResidentMemory < 16*1024*1024 {
 			return nil, fmt.Errorf("model requires at least 16GB of memory")
 		}
-	case ModelType30B, ModelType34B:
+	case "30B", "34B", "40B":
-		if ggml.FileType().String() == "F16" && totalResidentMemory < 64*1024*1024 {
+		if ggml.FileType() == "F16" && totalResidentMemory < 64*1024*1024 {
 			return nil, fmt.Errorf("F16 model requires at least 64GB of memory")
 		} else if totalResidentMemory < 32*1024*1024 {
 			return nil, fmt.Errorf("model requires at least 32GB of memory")
 		}
-	case ModelType65B:
+	case "65B", "70B":
-		if ggml.FileType().String() == "F16" && totalResidentMemory < 128*1024*1024 {
+		if ggml.FileType() == "F16" && totalResidentMemory < 128*1024*1024 {
 			return nil, fmt.Errorf("F16 model requires at least 128GB of memory")
 		} else if totalResidentMemory < 64*1024*1024 {
 			return nil, fmt.Errorf("model requires at least 64GB of memory")
 		}
 	case "180B":
 		if ggml.FileType() == "F16" && totalResidentMemory < 512*1024*1024 {
 			return nil, fmt.Errorf("F16 model requires at least 512GB of memory")
 		} else if totalResidentMemory < 128*1024*1024 {
 			return nil, fmt.Errorf("model requires at least 128GB of memory")
 		}
 	}
-	switch ggml.ModelFamily() {
+	switch ggml.Name() {
-	case ModelFamilyLlama:
+	case "gguf":
 		opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
 		return newLlama(model, adapters, ggufRunner(), opts)
 	case "ggml", "ggmf", "ggjt", "ggla":
 		return newLlama(model, adapters, ggmlRunner(), opts)
 	default:
 		return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
--- a/scripts/build_darwin.sh
+++ b/scripts/build_darwin.sh
@@ -8,7 +8,7 @@ GO_LDFLAGS="$GO_LDFLAGS -X github.com/jmorganca/ollama/server.mode=release"
 # build universal binary
 GOARCH=arm64 go generate ./...
 GOARCH=arm64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-arm64
-rm -rf llm/llama.cpp/ggml/build/*/bin
+rm -rf llm/llama.cpp/*/build/*/bin
 GOARCH=amd64 go generate ./...
 GOARCH=amd64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-amd64
 lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
--- a/server/auth.go
+++ b/server/auth.go
@@ -103,7 +103,7 @@ func getAuthToken(ctx context.Context, redirData AuthRedirect, regOpts *Registry
 	headers := make(http.Header)
 	headers.Set("Authorization", sig)
-	resp, err := makeRequest(ctx, "GET", redirectURL, headers, nil, regOpts)
+	resp, err := makeRequest(ctx, "GET", redirectURL, headers, nil, nil)
 	if err != nil {
 		log.Printf("couldn't get token: %q", err)
 	}
--- a/server/images.go
+++ b/server/images.go
@@ -114,7 +114,8 @@ type LayerReader struct {
 }
 type ConfigV2 struct {
-	ModelFamily llm.ModelFamily `json:"model_family"`
+	ModelFormat string `json:"model_format"`
 	ModelFamily string `json:"model_family"`
 	ModelType   string `json:"model_type"`
 	FileType    string `json:"file_type"`
 	RootFS      RootFS `json:"rootfs"`
@@ -268,6 +269,29 @@ func filenameWithPath(path, f string) (string, error) {
 }
 func CreateModel(ctx context.Context, name string, path string, fn func(resp api.ProgressResponse)) error {
 	mp := ParseModelPath(name)
 	var manifest *ManifestV2
 	var err error
 	var noprune string
 	// build deleteMap to prune unused layers
 	deleteMap := make(map[string]bool)
 	if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
 		manifest, _, err = GetManifest(mp)
 		if err != nil && !errors.Is(err, os.ErrNotExist) {
 			return err
 		}
 		if manifest != nil {
 			for _, l := range manifest.Layers {
 				deleteMap[l.Digest] = true
 			}
 			deleteMap[manifest.Config.Digest] = true
 		}
 	}
 	mf, err := os.Open(path)
 	if err != nil {
 		fn(api.ProgressResponse{Status: fmt.Sprintf("couldn't open modelfile '%s'", path)})
@@ -328,14 +352,15 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
 					}
 					defer file.Close()
-					ggml, err := llm.DecodeGGML(file, llm.ModelFamilyLlama)
+					ggml, err := llm.DecodeGGML(file)
 					if err != nil {
 						return err
 					}
 					config.ModelFormat = ggml.Name()
 					config.ModelFamily = ggml.ModelFamily()
-					config.ModelType = ggml.ModelType().String()
+					config.ModelType = ggml.ModelType()
-					config.FileType = ggml.FileType().String()
+					config.FileType = ggml.FileType()
 					// reset the file
 					file.Seek(0, io.SeekStart)
@@ -369,6 +394,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
 				// copie the model metadata
 				config.ModelFamily = source.ModelFamily
 				config.ModelType = source.ModelType
 				config.ModelFormat = source.ModelFormat
 				config.FileType = source.FileType
 				for _, l := range mf.Layers {
@@ -472,6 +498,12 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
 			}
 		}
 		if config.ModelType == "65B" {
 			if numGQA, ok := formattedParams["num_gqa"].(int); ok && numGQA == 8 {
 				config.ModelType = "70B"
 			}
 		}
 		bts, err := json.Marshal(formattedParams)
 		if err != nil {
 			return err
@@ -503,6 +535,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
 	var manifestLayers []*Layer
 	for _, l := range layers {
 		manifestLayers = append(manifestLayers, &l.Layer)
 		delete(deleteMap, l.Layer.Digest)
 	}
 	// Create a layer for the config object
@@ -512,6 +545,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
 		return err
 	}
 	layers = append(layers, cfg)
 	delete(deleteMap, cfg.Layer.Digest)
 	if err := SaveLayers(layers, fn, false); err != nil {
 		return err
@@ -524,6 +558,14 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
 		return err
 	}
 	if noprune == "" {
 		fn(api.ProgressResponse{Status: "removing any unused layers"})
 		err = deleteUnusedLayers(nil, deleteMap, false)
 		if err != nil {
 			return err
 		}
 	}
 	fn(api.ProgressResponse{Status: "success"})
 	return nil
 }
@@ -779,14 +821,14 @@ func formatParams(params map[string][]string) (map[string]interface{}, error) {
 						return nil, fmt.Errorf("invalid float value %s", vals)
 					}
-					out[key] = floatVal
+					out[key] = float32(floatVal)
 				case reflect.Int:
-					intVal, err := strconv.ParseInt(vals[0], 10, 0)
+					intVal, err := strconv.ParseInt(vals[0], 10, 64)
 					if err != nil {
 						return nil, fmt.Errorf("invalid int value %s", vals)
 					}
-					out[key] = intVal
+					out[key] = int(intVal)
 				case reflect.Bool:
 					boolVal, err := strconv.ParseBool(vals[0])
 					if err != nil {
@@ -866,18 +908,7 @@ func CopyModel(src, dest string) error {
 	return nil
 }
-func DeleteModel(name string) error {
+func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]bool, dryRun bool) error {
 	mp := ParseModelPath(name)
 	manifest, _, err := GetManifest(mp)
 	if err != nil {
 		return err
 	}
 	deleteMap := make(map[string]bool)
 	for _, layer := range manifest.Layers {
 		deleteMap[layer.Digest] = true
 	}
 	deleteMap[manifest.Config.Digest] = true
 	fp, err := GetManifestPath()
 	if err != nil {
 		return err
@@ -894,14 +925,13 @@ func DeleteModel(name string) error {
 		fmp := ParseModelPath(tag)
 		// skip the manifest we're trying to delete
-		if mp.GetFullTagname() == fmp.GetFullTagname() {
+		if skipModelPath != nil && skipModelPath.GetFullTagname() == fmp.GetFullTagname() {
 			return nil
 		}
 		// save (i.e. delete from the deleteMap) any files used in other manifests
 		manifest, _, err := GetManifest(fmp)
 		if err != nil {
 			log.Printf("skipping file: %s", fp)
 			return nil
 		}
@@ -925,14 +955,72 @@ func DeleteModel(name string) error {
 				log.Printf("couldn't get file path for '%s': %v", k, err)
 				continue
 			}
 			if !dryRun {
 				if err := os.Remove(fp); err != nil {
 					log.Printf("couldn't remove file '%s': %v", fp, err)
 					continue
 				}
 			} else {
 				log.Printf("wanted to remove: %s", fp)
 			}
 		}
 	}
-	fp, err = mp.GetManifestPath(false)
+	return nil
 }
 func PruneLayers() error {
 	deleteMap := make(map[string]bool)
 	p, err := GetBlobsPath("")
 	if err != nil {
 		return err
 	}
 	blobs, err := os.ReadDir(p)
 	if err != nil {
 		log.Printf("couldn't read dir '%s': %v", p, err)
 		return err
 	}
 	for _, blob := range blobs {
 		name := blob.Name()
 		if runtime.GOOS == "windows" {
 			name = strings.ReplaceAll(name, "-", ":")
 		}
 		deleteMap[name] = true
 	}
 	log.Printf("total blobs: %d", len(deleteMap))
 	err = deleteUnusedLayers(nil, deleteMap, false)
 	if err != nil {
 		return err
 	}
 	log.Printf("total unused blobs removed: %d", len(deleteMap))
 	return nil
 }
 func DeleteModel(name string) error {
 	mp := ParseModelPath(name)
 	manifest, _, err := GetManifest(mp)
 	if err != nil {
 		return err
 	}
 	deleteMap := make(map[string]bool)
 	for _, layer := range manifest.Layers {
 		deleteMap[layer.Digest] = true
 	}
 	deleteMap[manifest.Config.Digest] = true
 	err = deleteUnusedLayers(&mp, deleteMap, false)
 	if err != nil {
 		return err
 	}
 	fp, err := mp.GetManifestPath(false)
 	if err != nil {
 		return err
 	}
@@ -1111,13 +1199,34 @@ func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
 func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn func(api.ProgressResponse)) error {
 	mp := ParseModelPath(name)
 	var manifest *ManifestV2
 	var err error
 	var noprune string
 	// build deleteMap to prune unused layers
 	deleteMap := make(map[string]bool)
 	if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
 		manifest, _, err = GetManifest(mp)
 		if err != nil && !errors.Is(err, os.ErrNotExist) {
 			return err
 		}
 		if manifest != nil {
 			for _, l := range manifest.Layers {
 				deleteMap[l.Digest] = true
 			}
 			deleteMap[manifest.Config.Digest] = true
 		}
 	}
 	if mp.ProtocolScheme == "http" && !regOpts.Insecure {
 		return fmt.Errorf("insecure protocol http")
 	}
 	fn(api.ProgressResponse{Status: "pulling manifest"})
-	manifest, err := pullModelManifest(ctx, mp, regOpts)
+	manifest, err = pullModelManifest(ctx, mp, regOpts)
 	if err != nil {
 		return fmt.Errorf("pull model manifest: %s", err)
 	}
@@ -1137,7 +1246,9 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
 			}); err != nil {
 			return err
 		}
 		delete(deleteMap, layer.Digest)
 	}
 	delete(deleteMap, manifest.Config.Digest)
 	fn(api.ProgressResponse{Status: "verifying sha256 digest"})
 	for _, layer := range layers {
@@ -1175,6 +1286,14 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
 		return err
 	}
 	if noprune == "" {
 		fn(api.ProgressResponse{Status: "removing any unused layers"})
 		err = deleteUnusedLayers(nil, deleteMap, false)
 		if err != nil {
 			return err
 		}
 	}
 	fn(api.ProgressResponse{Status: "success"})
 	return nil
@@ -1300,7 +1419,7 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
 }
 func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) {
-	if requestURL.Scheme != "http" && regOpts.Insecure {
+	if requestURL.Scheme != "http" && regOpts != nil && regOpts.Insecure {
 		requestURL.Scheme = "http"
 	}
@@ -1313,11 +1432,13 @@ func makeRequest(ctx context.Context, method string, requestURL *url.URL, header
 		req.Header = headers
 	}
 	if regOpts != nil {
 		if regOpts.Token != "" {
 			req.Header.Set("Authorization", "Bearer "+regOpts.Token)
 		} else if regOpts.Username != "" && regOpts.Password != "" {
 			req.SetBasicAuth(regOpts.Username, regOpts.Password)
 		}
 	}
 	req.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
--- a/server/modelpath.go
+++ b/server/modelpath.go
@@ -133,7 +133,12 @@ func GetBlobsPath(digest string) (string, error) {
 	}
 	path := filepath.Join(home, ".ollama", "models", "blobs", digest)
-	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
+	dirPath := filepath.Dir(path)
 	if digest == "" {
 		dirPath = path
 	}
 	if err := os.MkdirAll(dirPath, 0o755); err != nil {
 		return "", err
 	}
--- a/server/routes.go
+++ b/server/routes.go
@@ -12,6 +12,7 @@ import (
 	"os/signal"
 	"path/filepath"
 	"reflect"
 	"runtime"
 	"strconv"
 	"strings"
 	"sync"
@@ -363,6 +364,7 @@ func DeleteModelHandler(c *gin.Context) {
 		}
 		return
 	}
 	c.JSON(http.StatusOK, nil)
 }
 func ShowModelHandler(c *gin.Context) {
@@ -547,6 +549,13 @@ func Serve(ln net.Listener, origins []string) error {
 		os.Exit(0)
 	}()
 	if runtime.GOOS == "linux" {
 		// check compatibility to log warnings
 		if _, err := llm.CheckVRAM(); err != nil {
 			log.Printf("Warning: GPU support not enabled, you may need to install GPU drivers: %v", err)
 		}
 	}
 	return s.Serve(ln)
 }
--- a/server/upload.go
+++ b/server/upload.go
@@ -66,12 +66,19 @@ func uploadBlobChunked(ctx context.Context, requestURL *url.URL, layer *Layer, r
 		sectionReader := io.NewSectionReader(f, int64(offset), chunk)
 		for try := 0; try < MaxRetries; try++ {
 			ch := make(chan error, 1)
 			r, w := io.Pipe()
 			defer r.Close()
 			go func() {
 				defer w.Close()
 				for chunked := int64(0); chunked < chunk; {
 					select {
 					case err := <-ch:
 						log.Printf("chunk interrupted: %v", err)
 						return
 					default:
 						n, err := io.CopyN(w, sectionReader, 1024*1024)
 						if err != nil && !errors.Is(err, io.EOF) {
 							fn(api.ProgressResponse{
@@ -92,6 +99,7 @@ func uploadBlobChunked(ctx context.Context, requestURL *url.URL, layer *Layer, r
 							Completed: int(offset) + int(chunked),
 						})
 					}
 				}
 			}()
 			headers := make(http.Header)
@@ -113,6 +121,8 @@ func uploadBlobChunked(ctx context.Context, requestURL *url.URL, layer *Layer, r
 			switch {
 			case resp.StatusCode == http.StatusUnauthorized:
 				ch <- errors.New("unauthorized")
 				auth := resp.Header.Get("www-authenticate")
 				authRedir := ParseAuthRedirectString(auth)
 				token, err := getAuthToken(ctx, authRedir, regOpts)
@@ -121,10 +131,7 @@ func uploadBlobChunked(ctx context.Context, requestURL *url.URL, layer *Layer, r
 				}
 				regOpts.Token = token
-				if _, err := sectionReader.Seek(0, io.SeekStart); err != nil {
+				sectionReader = io.NewSectionReader(f, int64(offset), chunk)
 					return err
 				}
 				continue
 			case resp.StatusCode >= http.StatusBadRequest:
 				body, _ := io.ReadAll(resp.Body)
Author	SHA1	Message	Date
Matt Williams	e2389b63aa	add examples of streaming in python and node Signed-off-by: Matt Williams <m@technovangelist.com>	2023-09-14 07:12:09 -07:00
Michael Yang	f89c23764b	Merge pull request #525 from jmorganca/mxyng/falcon-decode fix: add falcon.go	2023-09-13 15:08:47 -07:00
Michael Yang	d028853879	fix: add falcon.go	2023-09-13 14:47:37 -07:00
Michael Yang	949553db23	Merge pull request #519 from jmorganca/mxyng/decode Mxyng/decode	2023-09-13 12:43:57 -07:00
Michael Yang	0c5a454361	fix model type for 70b	2023-09-12 15:12:59 -07:00
Bruce MacDonald	f59c4d03f7	fix ggml arm64 cuda build (#520 )	2023-09-12 17:06:48 -04:00
Michael Yang	7dee25a07f	fix falcon decode get model and file type from bin file	2023-09-12 12:34:53 -07:00
Bruce MacDonald	f221637053	first pass at linux gpu support (#454 ) * linux gpu support * handle multiple gpus * add cuda docker image (#488) --------- Co-authored-by: Michael Yang <mxyng@pm.me>	2023-09-12 11:04:35 -04:00
Patrick Devine	45ac07cd02	create the blobs directory correctly (#508 )	2023-09-11 14:54:52 -07:00
Jeffrey Morgan	7d749cc787	fix darwin build script	2023-09-11 16:31:46 -04:00
Patrick Devine	e7e91cd71c	add autoprune to remove unused layers (#491 )	2023-09-11 11:46:35 -07:00
Jeffrey Morgan	3920e15386	add model format to config layer (#497 )	2023-09-09 17:53:44 -04:00
Michael Yang	41e976edde	Merge pull request #492 from jmorganca/mxyng/nil-pointer fix nil pointer dereference	2023-09-07 17:25:23 -07:00
Michael Yang	de227b620f	fix nil pointer dereference	2023-09-07 17:24:31 -07:00
Michael Yang	63def6ca49	Merge pull request #487 from jmorganca/mxyng/dockerignore update dockerignore	2023-09-07 14:16:17 -07:00
Michael Yang	738fe9c4aa	Merge pull request #486 from jmorganca/mxyng/fix-push fix: retry push on expired token	2023-09-07 13:58:34 -07:00
Michael Yang	a8da0bacbe	update dockerignore	2023-09-07 13:36:25 -07:00
Michael Yang	bf146fb072	fix retry on unauthorized chunk	2023-09-07 12:02:04 -07:00
Michael Yang	f0f4943577	fix get auth token	2023-09-07 12:01:56 -07:00
Bruce MacDonald	09dd2aeff9	GGUF support (#441 )	2023-09-07 13:55:37 -04:00
Alexander Pepper	07b4074e7b	[docs] Improve build instructions (#482 ) Go is required and not installed by default.	2023-09-07 06:43:26 -04:00
Jeffrey Morgan	61dda6a5e0	set minimum `CMAKE_OSX_DEPLOYMENT_TARGET` to 11.0	2023-09-06 19:56:50 -04:00
Michael Yang	e1f9ced568	Merge pull request #479 from jmorganca/mxyng/dockerfile update dockerfile	2023-09-06 15:44:24 -07:00
Michael Yang	9795b43d93	update dockerfile	2023-09-06 15:31:25 -07:00
Michael Yang	0980d5c7e3	Merge pull request #478 from jmorganca/mxyng/cleanup remove unused openssh key types	2023-09-06 15:18:54 -07:00
Michael Yang	0dae34b6a7	remove unused openssh key types	2023-09-06 14:34:09 -07:00