From 2fec73eef6e9482f606f185ebb2ae4f75ad1a37c Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Fri, 11 Apr 2025 13:39:51 -0700 Subject: [PATCH] fix write gguf padding --- convert/convert_llama.go | 14 +++++++------- convert/convert_phi3.go | 3 +-- fs/ggml/gguf.go | 12 +++++------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/convert/convert_llama.go b/convert/convert_llama.go index e4422f41a..679d062ea 100644 --- a/convert/convert_llama.go +++ b/convert/convert_llama.go @@ -28,12 +28,12 @@ type llamaModel struct { NumKeyValueHeads uint32 `json:"num_key_value_heads"` RopeTheta float32 `json:"rope_theta"` RopeScaling struct { - Type string `json:"type"` - RopeType string `json:"rope_type"` - Factor float32 `json:"factor"` - LowFrequencyFactor float32 `json:"low_freq_factor"` - HighFrequencyFactor float32 `json:"high_freq_factor"` - OriginalMaxPositionalEmbeddings uint32 `json:"original_max_positional_embeddings"` + Type string `json:"type"` + RopeType string `json:"rope_type"` + Factor float32 `json:"factor"` + LowFrequencyFactor float32 `json:"low_freq_factor"` + HighFrequencyFactor float32 `json:"high_freq_factor"` + OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"` factors ropeFactor } `json:"rope_scaling"` @@ -84,7 +84,7 @@ func (p *llamaModel) KV(t *Tokenizer) ggml.KV { factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0) factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0) - original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192) + original := cmp.Or(p.RopeScaling.OriginalMaxPositionEmbeddings, 8192) lambdaLow := float32(original) / factorLow lambdaHigh := float32(original) / factorHigh diff --git a/convert/convert_phi3.go b/convert/convert_phi3.go index 4f25737b1..d1c13795a 100644 --- a/convert/convert_phi3.go +++ b/convert/convert_phi3.go @@ -118,6 +118,5 @@ func (p *phi3Model) Replacements() []string { type ropeFactor []float32 func (r ropeFactor) WriteTo(w io.Writer) (int64, error) { - err := binary.Write(w, binary.LittleEndian, r) - return 0, err + return 0, binary.Write(w, binary.LittleEndian, r) } diff --git a/fs/ggml/gguf.go b/fs/ggml/gguf.go index 8954cb37d..0c6a8eb2c 100644 --- a/fs/ggml/gguf.go +++ b/fs/ggml/gguf.go @@ -235,10 +235,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error { // patch KV with parameter count llm.kv["general.parameter_count"] = llm.parameters - alignment, ok := llm.kv["general.alignment"].(uint32) - if !ok { - alignment = 32 - } + alignment := llm.kv.Uint("general.alignment", 32) offset, err := rs.Seek(0, io.SeekCurrent) if err != nil { @@ -506,6 +503,8 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error { } func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error { + alignment := kv.Uint("general.alignment", 32) + if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil { return err } @@ -543,16 +542,15 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error { var s uint64 for _, t := range ts { - t.Offset = s + t.Offset = s + uint64(ggufPadding(int64(s), int64(alignment))) if err := ggufWriteTensorInfo(ws, t); err != nil { return err } s += t.Size() } - var alignment int64 = 32 for _, t := range ts { - if err := ggufWriteTensor(ws, t, alignment); err != nil { + if err := ggufWriteTensor(ws, t, int64(alignment)); err != nil { return err } }