fix write gguf padding
This commit is contained in:
parent
1e7f62cb42
commit
2fec73eef6
@ -28,12 +28,12 @@ type llamaModel struct {
|
||||
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
||||
RopeTheta float32 `json:"rope_theta"`
|
||||
RopeScaling struct {
|
||||
Type string `json:"type"`
|
||||
RopeType string `json:"rope_type"`
|
||||
Factor float32 `json:"factor"`
|
||||
LowFrequencyFactor float32 `json:"low_freq_factor"`
|
||||
HighFrequencyFactor float32 `json:"high_freq_factor"`
|
||||
OriginalMaxPositionalEmbeddings uint32 `json:"original_max_positional_embeddings"`
|
||||
Type string `json:"type"`
|
||||
RopeType string `json:"rope_type"`
|
||||
Factor float32 `json:"factor"`
|
||||
LowFrequencyFactor float32 `json:"low_freq_factor"`
|
||||
HighFrequencyFactor float32 `json:"high_freq_factor"`
|
||||
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
|
||||
|
||||
factors ropeFactor
|
||||
} `json:"rope_scaling"`
|
||||
@ -84,7 +84,7 @@ func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
|
||||
factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
|
||||
factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
|
||||
|
||||
original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192)
|
||||
original := cmp.Or(p.RopeScaling.OriginalMaxPositionEmbeddings, 8192)
|
||||
lambdaLow := float32(original) / factorLow
|
||||
lambdaHigh := float32(original) / factorHigh
|
||||
|
||||
|
@ -118,6 +118,5 @@ func (p *phi3Model) Replacements() []string {
|
||||
type ropeFactor []float32
|
||||
|
||||
func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
|
||||
err := binary.Write(w, binary.LittleEndian, r)
|
||||
return 0, err
|
||||
return 0, binary.Write(w, binary.LittleEndian, r)
|
||||
}
|
||||
|
@ -235,10 +235,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
// patch KV with parameter count
|
||||
llm.kv["general.parameter_count"] = llm.parameters
|
||||
|
||||
alignment, ok := llm.kv["general.alignment"].(uint32)
|
||||
if !ok {
|
||||
alignment = 32
|
||||
}
|
||||
alignment := llm.kv.Uint("general.alignment", 32)
|
||||
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
@ -506,6 +503,8 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
|
||||
}
|
||||
|
||||
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
||||
alignment := kv.Uint("general.alignment", 32)
|
||||
|
||||
if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -543,16 +542,15 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
||||
|
||||
var s uint64
|
||||
for _, t := range ts {
|
||||
t.Offset = s
|
||||
t.Offset = s + uint64(ggufPadding(int64(s), int64(alignment)))
|
||||
if err := ggufWriteTensorInfo(ws, t); err != nil {
|
||||
return err
|
||||
}
|
||||
s += t.Size()
|
||||
}
|
||||
|
||||
var alignment int64 = 32
|
||||
for _, t := range ts {
|
||||
if err := ggufWriteTensor(ws, t, alignment); err != nil {
|
||||
if err := ggufWriteTensor(ws, t, int64(alignment)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user