diff --git a/llm/gguf.go b/llm/gguf.go index d9a949f94..6ee1c054b 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -252,8 +252,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error { return fmt.Errorf("failed to get current offset: %w", err) } - // ADD PADDING - + // align to next 32-byte boundary llm.offset = offset + llm.padding(offset, int64(alignment)) for _, tensor := range llm.tensors { diff --git a/server/model.go b/server/model.go index 9ecf3a0ed..3101a662e 100644 --- a/server/model.go +++ b/server/model.go @@ -260,7 +260,6 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap Shape: shape, WriterTo: &llm.TensorWriter{ - // This needs offset + tensors.Offset int64(tensor.Offset) to be correct Reader: io.NewSectionReader(file, offset+ggmlTensors.Offset+int64(tensor.Offset), int64(tensor.Size())), }, }) @@ -268,10 +267,9 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap reader = &llm.GGUFWriter{ KV: ggml.KV(), - // Update .Tensors Tensors: llm.Tensors{ Items: tensors, - Offset: ggml.Tensors().Offset, + Offset: ggmlTensors.Offset, }, } }