wip: write tensors in parallel
This commit is contained in:
parent
b0f28d178a
commit
34ae8077d1
@ -4,9 +4,9 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/fs/ggml"
|
||||||
@ -84,12 +84,12 @@ func (ModelParameters) specialTokenTypes() []string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ModelParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
|
func (ModelParameters) writeFile(f *os.File, kv ggml.KV, ts []ggml.Tensor) error {
|
||||||
return ggml.WriteGGUF(ws, kv, ts)
|
return ggml.WriteGGUF(f, kv, ts)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
|
func (AdapterParameters) writeFile(f *os.File, kv ggml.KV, ts []ggml.Tensor) error {
|
||||||
return ggml.WriteGGUF(ws, kv, ts)
|
return ggml.WriteGGUF(f, kv, ts)
|
||||||
}
|
}
|
||||||
|
|
||||||
type ModelConverter interface {
|
type ModelConverter interface {
|
||||||
@ -104,7 +104,7 @@ type ModelConverter interface {
|
|||||||
// specialTokenTypes returns any special token types the model uses
|
// specialTokenTypes returns any special token types the model uses
|
||||||
specialTokenTypes() []string
|
specialTokenTypes() []string
|
||||||
// writeFile writes the model to the provided io.WriteSeeker
|
// writeFile writes the model to the provided io.WriteSeeker
|
||||||
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
|
writeFile(*os.File, ggml.KV, []ggml.Tensor) error
|
||||||
}
|
}
|
||||||
|
|
||||||
type moreParser interface {
|
type moreParser interface {
|
||||||
@ -120,10 +120,10 @@ type AdapterConverter interface {
|
|||||||
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
||||||
Replacements() []string
|
Replacements() []string
|
||||||
|
|
||||||
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
|
writeFile(*os.File, ggml.KV, []ggml.Tensor) error
|
||||||
}
|
}
|
||||||
|
|
||||||
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
|
func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error {
|
||||||
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -158,14 +158,14 @@ func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
|
return conv.writeFile(f, conv.KV(baseKV), conv.Tensors(ts))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
|
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
|
||||||
// and files it finds in the input path.
|
// and files it finds in the input path.
|
||||||
// Supported input model formats include safetensors.
|
// Supported input model formats include safetensors.
|
||||||
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
|
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
|
||||||
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
|
func ConvertModel(fsys fs.FS, f *os.File) error {
|
||||||
bts, err := fs.ReadFile(fsys, "config.json")
|
bts, err := fs.ReadFile(fsys, "config.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -248,5 +248,5 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
|
return conv.writeFile(f, conv.KV(t), conv.Tensors(ts))
|
||||||
}
|
}
|
||||||
|
@ -9,8 +9,12 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"maps"
|
"maps"
|
||||||
|
"os"
|
||||||
|
"runtime"
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/sync/errgroup"
|
||||||
)
|
)
|
||||||
|
|
||||||
type containerGGUF struct {
|
type containerGGUF struct {
|
||||||
@ -502,22 +506,22 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
|
|||||||
return binary.Write(w, binary.LittleEndian, s)
|
return binary.Write(w, binary.LittleEndian, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
func WriteGGUF(f *os.File, kv KV, ts []Tensor) error {
|
||||||
alignment := kv.Uint("general.alignment", 32)
|
alignment := kv.Uint("general.alignment", 32)
|
||||||
|
|
||||||
if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
|
if err := binary.Write(f, binary.LittleEndian, []byte("GGUF")); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := binary.Write(ws, binary.LittleEndian, uint32(3)); err != nil {
|
if err := binary.Write(f, binary.LittleEndian, uint32(3)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(ts))); err != nil {
|
if err := binary.Write(f, binary.LittleEndian, uint64(len(ts))); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := binary.Write(ws, binary.LittleEndian, uint64(len(kv))); err != nil {
|
if err := binary.Write(f, binary.LittleEndian, uint64(len(kv))); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -525,7 +529,7 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
|||||||
slices.Sort(keys)
|
slices.Sort(keys)
|
||||||
|
|
||||||
for _, key := range keys {
|
for _, key := range keys {
|
||||||
if err := ggufWriteKV(ws, key, kv[key]); err != nil {
|
if err := ggufWriteKV(f, key, kv[key]); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -541,21 +545,34 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
|
|||||||
})
|
})
|
||||||
|
|
||||||
var s uint64
|
var s uint64
|
||||||
for _, t := range ts {
|
for i := range ts {
|
||||||
t.Offset = s + uint64(ggufPadding(int64(s), int64(alignment)))
|
ts[i].Offset = s + uint64(ggufPadding(int64(s), int64(alignment)))
|
||||||
if err := ggufWriteTensorInfo(ws, t); err != nil {
|
if err := ggufWriteTensorInfo(f, ts[i]); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
s += t.Size()
|
s += ts[i].Size()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
offset, err := f.Seek(0, io.SeekCurrent)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
offset += ggufPadding(offset, int64(alignment))
|
||||||
|
slog.Debug("gguf", "offset", offset, "size", s, "alignment", alignment)
|
||||||
|
|
||||||
|
var g errgroup.Group
|
||||||
|
g.SetLimit(runtime.GOMAXPROCS(0))
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
if err := ggufWriteTensor(ws, t, int64(alignment)); err != nil {
|
t := t
|
||||||
|
w := io.NewOffsetWriter(f, offset+int64(t.Offset))
|
||||||
|
g.Go(func() error {
|
||||||
|
_, err := t.WriteTo(w)
|
||||||
return err
|
return err
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return g.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func ggufWriteKV(ws io.WriteSeeker, k string, v any) error {
|
func ggufWriteKV(ws io.WriteSeeker, k string, v any) error {
|
||||||
@ -640,20 +657,6 @@ func ggufWriteTensorInfo(ws io.WriteSeeker, t Tensor) error {
|
|||||||
return binary.Write(ws, binary.LittleEndian, t.Offset)
|
return binary.Write(ws, binary.LittleEndian, t.Offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
func ggufWriteTensor(ws io.WriteSeeker, t Tensor, alignment int64) error {
|
|
||||||
offset, err := ws.Seek(0, io.SeekCurrent)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(offset, alignment)))); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = t.WriteTo(ws)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func ggufPadding(offset, align int64) int64 {
|
func ggufPadding(offset, align int64) int64 {
|
||||||
return (align - offset%align) % align
|
return (align - offset%align) % align
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user