fix write gguf padding

This commit is contained in:
Michael Yang 2025-04-11 13:39:51 -07:00 committed by Michael Yang
parent 1e7f62cb42
commit 2fec73eef6
3 changed files with 13 additions and 16 deletions

View File

@ -28,12 +28,12 @@ type llamaModel struct {
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
RopeTheta float32 `json:"rope_theta"`
RopeScaling struct {
Type string `json:"type"`
RopeType string `json:"rope_type"`
Factor float32 `json:"factor"`
LowFrequencyFactor float32 `json:"low_freq_factor"`
HighFrequencyFactor float32 `json:"high_freq_factor"`
OriginalMaxPositionalEmbeddings uint32 `json:"original_max_positional_embeddings"`
Type string `json:"type"`
RopeType string `json:"rope_type"`
Factor float32 `json:"factor"`
LowFrequencyFactor float32 `json:"low_freq_factor"`
HighFrequencyFactor float32 `json:"high_freq_factor"`
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
factors ropeFactor
} `json:"rope_scaling"`
@ -84,7 +84,7 @@ func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192)
original := cmp.Or(p.RopeScaling.OriginalMaxPositionEmbeddings, 8192)
lambdaLow := float32(original) / factorLow
lambdaHigh := float32(original) / factorHigh

View File

@ -118,6 +118,5 @@ func (p *phi3Model) Replacements() []string {
type ropeFactor []float32
func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
err := binary.Write(w, binary.LittleEndian, r)
return 0, err
return 0, binary.Write(w, binary.LittleEndian, r)
}

View File

@ -235,10 +235,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
// patch KV with parameter count
llm.kv["general.parameter_count"] = llm.parameters
alignment, ok := llm.kv["general.alignment"].(uint32)
if !ok {
alignment = 32
}
alignment := llm.kv.Uint("general.alignment", 32)
offset, err := rs.Seek(0, io.SeekCurrent)
if err != nil {
@ -506,6 +503,8 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
}
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
alignment := kv.Uint("general.alignment", 32)
if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
return err
}
@ -543,16 +542,15 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
var s uint64
for _, t := range ts {
t.Offset = s
t.Offset = s + uint64(ggufPadding(int64(s), int64(alignment)))
if err := ggufWriteTensorInfo(ws, t); err != nil {
return err
}
s += t.Size()
}
var alignment int64 = 32
for _, t := range ts {
if err := ggufWriteTensor(ws, t, alignment); err != nil {
if err := ggufWriteTensor(ws, t, int64(alignment)); err != nil {
return err
}
}