fix write gguf padding
This commit is contained in:
parent
1e7f62cb42
commit
2fec73eef6
@ -33,7 +33,7 @@ type llamaModel struct {
|
|||||||
Factor float32 `json:"factor"`
|
Factor float32 `json:"factor"`
|
||||||
LowFrequencyFactor float32 `json:"low_freq_factor"`
|
LowFrequencyFactor float32 `json:"low_freq_factor"`
|
||||||
HighFrequencyFactor float32 `json:"high_freq_factor"`
|
HighFrequencyFactor float32 `json:"high_freq_factor"`
|
||||||
OriginalMaxPositionalEmbeddings uint32 `json:"original_max_positional_embeddings"`
|
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||||
|
|
||||||
factors ropeFactor
|
factors ropeFactor
|
||||||
} `json:"rope_scaling"`
|
} `json:"rope_scaling"`
|
||||||
@ -84,7 +84,7 @@ func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
|
|||||||
factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
|
factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
|
||||||
factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
|
factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
|
||||||
|
|
||||||
original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192)
|
original := cmp.Or(p.RopeScaling.OriginalMaxPositionEmbeddings, 8192)
|
||||||
lambdaLow := float32(original) / factorLow
|
lambdaLow := float32(original) / factorLow
|
||||||
lambdaHigh := float32(original) / factorHigh
|
lambdaHigh := float32(original) / factorHigh
|
||||||
|
|
||||||
|
@ -118,6 +118,5 @@ func (p *phi3Model) Replacements() []string {
|
|||||||
type ropeFactor []float32
|
type ropeFactor []float32
|
||||||
|
|
||||||
func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
|
func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
|
||||||
err := binary.Write(w, binary.LittleEndian, r)
|
return 0, binary.Write(w, binary.LittleEndian, r)
|
||||||
return 0, err
|
|
||||||
}
|
}
|
||||||
|
@ -235,10 +235,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
|||||||
// patch KV with parameter count
|
// patch KV with parameter count
|
||||||
llm.kv["general.parameter_count"] = llm.parameters
|
llm.kv["general.parameter_count"] = llm.parameters
|
||||||
|
|
||||||
alignment, ok := llm.kv["general.alignment"].(uint32)
|
alignment := llm.kv.Uint("general.alignment", 32)
|
||||||
if !ok {
|
|
||||||
alignment = 32
|
|
||||||
}
|
|
||||||
|
|
||||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -506,6 +503,8 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
||||||
|
alignment := kv.Uint("general.alignment", 32)
|
||||||
|
|
||||||
if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
|
if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -543,16 +542,15 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
|||||||
|
|
||||||
var s uint64
|
var s uint64
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
t.Offset = s
|
t.Offset = s + uint64(ggufPadding(int64(s), int64(alignment)))
|
||||||
if err := ggufWriteTensorInfo(ws, t); err != nil {
|
if err := ggufWriteTensorInfo(ws, t); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
s += t.Size()
|
s += t.Size()
|
||||||
}
|
}
|
||||||
|
|
||||||
var alignment int64 = 32
|
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
if err := ggufWriteTensor(ws, t, alignment); err != nil {
|
if err := ggufWriteTensor(ws, t, int64(alignment)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user