From 873f334783ecd59886a158aac7e20627fb751c28 Mon Sep 17 00:00:00 2001 From: Josh Yan Date: Tue, 16 Jul 2024 14:12:07 -0700 Subject: [PATCH] IT WORKS --- llm/ggla.go | 6 +++++- llm/ggml.go | 21 +++++++++++++-------- llm/gguf.go | 33 +++++++++++++++++++++++---------- llm/gguf_test.go | 27 ++++++++++++++++----------- server/model.go | 12 ++++++++---- 5 files changed, 65 insertions(+), 34 deletions(-) diff --git a/llm/ggla.go b/llm/ggla.go index 34c4f6ca3..7a172581b 100644 --- a/llm/ggla.go +++ b/llm/ggla.go @@ -36,6 +36,7 @@ type ggla struct { kv KV tensors []*Tensor + offset int64 } func newGGLA(container *containerGGLA) *ggla { @@ -50,7 +51,10 @@ func (llm *ggla) KV() KV { } func (llm *ggla) Tensors() Tensors { - return llm.tensors + return Tensors{ + Items: llm.tensors, + Offset: llm.offset, + } } func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) { diff --git a/llm/ggml.go b/llm/ggml.go index b2973250b..072d28216 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -113,32 +113,37 @@ func (kv KV) ChatTemplate() string { } // Tensors type as a slice of pointers to Tensor -type Tensors []*Tensor +// type Tensors []*Tensor + +type Tensors struct { + Items []*Tensor + Offset int64 +} // Implement the Len method func (ts Tensors) Len() int { - return len(ts) + return len(ts.Items) } // Implement the Swap method func (ts Tensors) Swap(i, j int) { - ts[i], ts[j] = ts[j], ts[i] + ts.Items[i], ts.Items[j] = ts.Items[j], ts.Items[i] } // Implement the Less method func (ts Tensors) Less(i, j int) bool { var x, y int - if n, err := fmt.Sscanf(ts[i].Name, "blk.%d", &x); err != nil || n != 1 { - return ts[i].Name < ts[j].Name - } else if n, err := fmt.Sscanf(ts[j].Name, "blk.%d", &y); err != nil || n != 1 { - return ts[i].Name < ts[j].Name + if n, err := fmt.Sscanf(ts.Items[i].Name, "blk.%d", &x); err != nil || n != 1 { + return ts.Items[i].Name < ts.Items[j].Name + } else if n, err := fmt.Sscanf(ts.Items[j].Name, "blk.%d", &y); err != nil || n != 1 { + return ts.Items[i].Name < ts.Items[j].Name } return x < y } func (ts Tensors) Layers() map[string]Layer { layers := make(map[string]Layer) - for _, t := range ts { + for _, t := range ts.Items { parts := strings.Split(t.Name, ".") if parts[0] == "blk" { // join first and second part, e.g. blk.%d diff --git a/llm/gguf.go b/llm/gguf.go index 9339488e0..cccfc3686 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -8,6 +8,7 @@ import ( "io" "log/slog" "slices" + "sort" "strings" "golang.org/x/exp/maps" @@ -91,6 +92,7 @@ type gguf struct { kv KV tensors []*Tensor + offset int64 parameters uint64 @@ -113,7 +115,10 @@ func (llm *gguf) KV() KV { } func (llm *gguf) Tensors() Tensors { - return llm.tensors + return Tensors{ + Items: llm.tensors, + Offset: llm.offset, + } } func (llm *gguf) numTensor() uint64 { @@ -242,6 +247,15 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error { alignment = 32 } + offset, err := rs.Seek(0, io.SeekCurrent) + if err != nil { + return fmt.Errorf("failed to get current offset: %w", err) + } + + // ADD PADDING + + llm.offset = offset + llm.padding(offset, int64(alignment)) + for _, tensor := range llm.tensors { offset, err := rs.Seek(0, io.SeekCurrent) if err != nil { @@ -703,7 +717,7 @@ func (gguf) padding(offset, align int64) int64 { return (align - offset%align) % align } -// Reader and WriterTo +// Reader and WriterTof type GGUFWriter struct { KV Tensors @@ -739,7 +753,7 @@ func (gguf GGUFWriter) WriteTo(w io.Writer) (int64, error) { return 0, err } - if err := binary.Write(wo, binary.LittleEndian, uint64(len(gguf.Tensors))); err != nil { + if err := binary.Write(wo, binary.LittleEndian, uint64(len(gguf.Tensors.Items))); err != nil { return 0, err } @@ -761,10 +775,10 @@ func (gguf GGUFWriter) WriteTo(w io.Writer) (int64, error) { } } } - //sort.Sort(gguf.Tensors) + sort.Sort(gguf.Tensors) var s uint64 - for _, t := range gguf.Tensors { + for _, t := range gguf.Tensors.Items { t.Offset = s if err := ggufWriteTensorInfo(wo, t); err != nil { return 0, err @@ -773,7 +787,7 @@ func (gguf GGUFWriter) WriteTo(w io.Writer) (int64, error) { } tensorOffset := wo.offset - for _, t := range gguf.Tensors { + for _, t := range gguf.Tensors.Items { if err := ggufWriteTensor(wo, t, wo.offset); err != nil { return 0, err } @@ -810,10 +824,9 @@ func ggufWriteTensorInfo(ws io.Writer, t *Tensor) error { func ggufWriteTensor(ws io.Writer, t *Tensor, offset int) error { slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset) - fmt.Println(int(ggufPadding(int64(offset), 32))) - /* if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(int64(offset), 32)))); err != nil { + if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(int64(offset), 32)))); err != nil { return err - } */ + } _, err := t.WriteTo(ws) return err @@ -906,5 +919,5 @@ func ggufWriteKV(ws io.Writer, k string, v any) error { } func ggufPadding(offset, align int64) int64 { - return align - offset%align + return (align - offset%align) % align } diff --git a/llm/gguf_test.go b/llm/gguf_test.go index 5c4ee805d..244549d48 100644 --- a/llm/gguf_test.go +++ b/llm/gguf_test.go @@ -16,6 +16,7 @@ import ( func TestGGUFRewrite(t *testing.T) { tests := []string{ "phi3.gguf", + "nutiny.gguf", } for i := range tests { @@ -112,13 +113,13 @@ func compareGGML(n int64, ggml1, ggml2 *GGML, f *os.File, f2 *os.File) (map[stri t1 := ggml1.Tensors() t2 := ggml2.Tensors() - if len(t1) != len(t2) { - diff["lenTensors"] = fmt.Sprintf("t1: %d, t2: %d", len(t1), len(t2)) + if len(t1.Items) != len(t2.Items) { + diff["lenTensors"] = fmt.Sprintf("t1: %d, t2: %d", len(t1.Items), len(t2.Items)) } - for _, tensor := range t1 { + for _, tensor := range t1.Items { sha256sum := sha256.New() - sr := io.NewSectionReader(f, n+int64(tensor.Offset), int64(tensor.Size())) + sr := io.NewSectionReader(f, t1.Offset+int64(tensor.Offset), int64(tensor.Size())) var s int64 s, err := io.Copy(sha256sum, sr) if err != nil { @@ -147,10 +148,10 @@ func compareGGML(n int64, ggml1, ggml2 *GGML, f *os.File, f2 *os.File) (map[stri diff["sha"] = fmt.Sprintf("%d", s1) diff2["sha"] = fmt.Sprintf("%d", s2) */ - for _, tensor := range t2 { + for _, tensor := range t2.Items { sha256sum := sha256.New() var s int64 - sr := io.NewSectionReader(f2, n+int64(tensor.Offset), int64(tensor.Size())) + sr := io.NewSectionReader(f2, t1.Offset+int64(tensor.Offset), int64(tensor.Size())) s, err := io.Copy(sha256sum, sr) if err != nil { fmt.Println(err) @@ -173,23 +174,24 @@ func decodeGGML(t *testing.T, f *os.File) (*GGML, int64, error) { } func rewriteGGML(t *testing.T, ggml *GGML, temp *os.File, f *os.File) (int64, *GGML, error) { - var tensors Tensors + var tensors []*Tensor fmt.Println("11111111111111111111111111111111111111111") - for _, tensor := range ggml.Tensors() { + for _, tensor := range ggml.Tensors().Items { shape := make([]uint64, len(tensor.Shape)) for i := range len(tensor.Shape) { shape[i] = tensor.Shape[len(tensor.Shape)-i-1] } - fmt.Println("tensors", tensor.Name, shape, tensor.Kind, 737414+int64(tensor.Offset)) + fmt.Println("tensors", tensor.Name, shape, tensor.Kind, tensor.Offset) + fmt.Println(ggml.Tensors().Offset) tensors = append(tensors, &Tensor{ Name: tensor.Name, Kind: tensor.Kind, Shape: shape, WriterTo: TensorWriter{ - Reader: io.NewSectionReader(f, 737414+int64(tensor.Offset), int64(tensor.Size())), + Reader: io.NewSectionReader(f, ggml.Tensors().Offset+int64(tensor.Offset), int64(tensor.Size())), }, }) } @@ -197,7 +199,10 @@ func rewriteGGML(t *testing.T, ggml *GGML, temp *os.File, f *os.File) (int64, *G reader := &GGUFWriter{ KV: ggml.KV(), // Update .Tensors - Tensors: tensors, + Tensors: Tensors{ + Items: tensors, + Offset: ggml.Tensors().Offset, + }, } n, err := io.Copy(temp, reader) diff --git a/server/model.go b/server/model.go index 8cc5f4e35..9ecf3a0ed 100644 --- a/server/model.go +++ b/server/model.go @@ -245,9 +245,10 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap var reader io.Reader = io.NewSectionReader(file, offset, n) if !sort.IsSorted(ggml.Tensors()) { // create a new Tensors containing Tensors that have a writeTo - var tensors llm.Tensors + var tensors []*llm.Tensor + ggmlTensors := ggml.Tensors() - for _, tensor := range ggml.Tensors() { + for _, tensor := range ggmlTensors.Items { shape := make([]uint64, len(tensor.Shape)) for i := range len(tensor.Shape) { shape[i] = tensor.Shape[len(tensor.Shape)-i-1] @@ -260,7 +261,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap WriterTo: &llm.TensorWriter{ // This needs offset + tensors.Offset int64(tensor.Offset) to be correct - Reader: io.NewSectionReader(file, offset + int64(tensor.Offset), int64(tensor.Size())), + Reader: io.NewSectionReader(file, offset+ggmlTensors.Offset+int64(tensor.Offset), int64(tensor.Size())), }, }) } @@ -268,7 +269,10 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap reader = &llm.GGUFWriter{ KV: ggml.KV(), // Update .Tensors - Tensors: tensors, + Tensors: llm.Tensors{ + Items: tensors, + Offset: ggml.Tensors().Offset, + }, } }