From 2fdebffc8d675f198af8f4f07e9fffa2f0f42549 Mon Sep 17 00:00:00 2001 From: Josh Yan Date: Thu, 11 Jul 2024 18:18:26 -0700 Subject: [PATCH] sawp --- llm/ggml.go | 21 ++++++++++++++++ llm/gguf.go | 66 ++++++++++++++++++++++++++++++++++++++++++++++++- server/model.go | 21 +++++++++++++++- 3 files changed, 106 insertions(+), 2 deletions(-) diff --git a/llm/ggml.go b/llm/ggml.go index fddb50391..126139641 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -1,6 +1,7 @@ package llm import ( + "cmp" "encoding/binary" "errors" "fmt" @@ -114,6 +115,26 @@ func (kv KV) ChatTemplate() string { type Tensors []*Tensor +func (ts Tensors) Less(i, j int) bool { + var x, y int + if n, err := fmt.Sscanf(ts[i].Name, "blk.%d", &x); err != nil || n != 1 { + return cmp.Less(ts[i].Name, ts[j].Name) + } else if n, err := fmt.Sscanf(ts[j].Name, "blk.%d", &y); err != nil || n != 1 { + return cmp.Less(ts[i].Name, ts[j].Name) + } + + return cmp.Less(x, y) +} + +func (ts Tensors) Len() int { + return len(ts) +} + +func (ts Tensors) Swap(i, j int) { + var temp Tensor + +} + func (ts Tensors) Layers() map[string]Layer { layers := make(map[string]Layer) for _, t := range ts { diff --git a/llm/gguf.go b/llm/gguf.go index e35222cf4..4a3f23e51 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -2,11 +2,15 @@ package llm import ( "bytes" + "cmp" "encoding/binary" "encoding/json" "fmt" "io" + "slices" "strings" + + "golang.org/x/exp/maps" ) type containerGGUF struct { @@ -711,5 +715,65 @@ func (GGUFWriter) Read([]byte) (int, error) { } func (gguf GGUFWriter) WriteTo(w io.Writer) (int64, error) { - + if err := binary.Write(w, binary.LittleEndian, []byte("GGUF")); err != nil { + return 0, err + } + + if err := binary.Write(w, binary.LittleEndian, uint32(3)); err != nil { + return 0, err + } + + if err := binary.Write(w, binary.LittleEndian, uint64(len(gguf.T))); err != nil { + return 0, err + } + + if err := binary.Write(w, binary.LittleEndian, uint64(len(gguf.KV))); err != nil { + return 0, err + } + + keys := maps.Keys(gguf.KV) + slices.Sort(keys) + + for _, key := range keys { + if err := ggufWriteKV(w, key, gguf.KV[key]); err != nil { + return err + } + } + + slices.SortFunc(gguf.T, func(a, b *Tensor) int { + var i, j int + if n, err := fmt.Sscanf(a.Name, "blk.%d", &i); err != nil || n != 1 { + return cmp.Compare(a.Name, b.Name) + } else if n, err := fmt.Sscanf(b.Name, "blk.%d", &j); err != nil || n != 1 { + return cmp.Compare(a.Name, b.Name) + } + + return cmp.Compare(i, j) + }) + + var s uint64 + for _, t := range gguf.T { + t.Offset = s + if err := ggufWriteTensorInfo(w, t); err != nil { + return 0, err + } + s += t.Size() + } + + var alignment int64 = 32 + for _, t := range gguf.T { + if err := ggufWriteTensor(w, t, alignment); err != nil { + return 0, err + } + } + + return 0, nil +} + +func ggufWriteTensor(io.Writer, *Tensor, int64) error { + +} + +func ggufWriteTensorInfo(io.Writer, *Tensor) error { + } diff --git a/server/model.go b/server/model.go index a79f549a3..c200b909b 100644 --- a/server/model.go +++ b/server/model.go @@ -3,6 +3,7 @@ package server import ( "archive/zip" "bytes" + "cmp" "context" "errors" "fmt" @@ -11,6 +12,7 @@ import ( "net/http" "os" "path/filepath" + "slices" "github.com/ollama/ollama/api" "github.com/ollama/ollama/convert" @@ -241,7 +243,24 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap mediatype = "application/vnd.ollama.image.projector" } - layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype) + var reader io.Reader = io.NewSectionReader(file, offset, n) + if !slices.IsSortedFunc(ggml.Tensors(), func(a, b *llm.Tensor) int { + var i, j int + if n, err := fmt.Sscanf(a.Name, "blk.%d", &i); err != nil || n != 1 { + return cmp.Compare(a.Name, b.Name) + } else if n, err := fmt.Sscanf(b.Name, "blk.%d", &j); err != nil || n != 1 { + return cmp.Compare(a.Name, b.Name) + } + + return cmp.Compare(i, j) + }) { + reader = &llm.GGUFWriter{ + KV: ggml.KV(), + T: ggml.Tensors(), + } + } + + layer, err := NewLayer(reader, mediatype) if err != nil { return nil, err }