wip: write tensors in parallel

This commit is contained in:
Michael Yang 2025-04-21 14:40:28 -07:00
parent b0f28d178a
commit 34ae8077d1
2 changed files with 41 additions and 38 deletions

View File

@ -4,9 +4,9 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"io"
"io/fs" "io/fs"
"log/slog" "log/slog"
"os"
"strings" "strings"
"github.com/ollama/ollama/fs/ggml" "github.com/ollama/ollama/fs/ggml"
@ -84,12 +84,12 @@ func (ModelParameters) specialTokenTypes() []string {
} }
} }
func (ModelParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error { func (ModelParameters) writeFile(f *os.File, kv ggml.KV, ts []ggml.Tensor) error {
return ggml.WriteGGUF(ws, kv, ts) return ggml.WriteGGUF(f, kv, ts)
} }
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error { func (AdapterParameters) writeFile(f *os.File, kv ggml.KV, ts []ggml.Tensor) error {
return ggml.WriteGGUF(ws, kv, ts) return ggml.WriteGGUF(f, kv, ts)
} }
type ModelConverter interface { type ModelConverter interface {
@ -104,7 +104,7 @@ type ModelConverter interface {
// specialTokenTypes returns any special token types the model uses // specialTokenTypes returns any special token types the model uses
specialTokenTypes() []string specialTokenTypes() []string
// writeFile writes the model to the provided io.WriteSeeker // writeFile writes the model to the provided io.WriteSeeker
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error writeFile(*os.File, ggml.KV, []ggml.Tensor) error
} }
type moreParser interface { type moreParser interface {
@ -120,10 +120,10 @@ type AdapterConverter interface {
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details // See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string Replacements() []string
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error writeFile(*os.File, ggml.KV, []ggml.Tensor) error
} }
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error { func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error {
bts, err := fs.ReadFile(fsys, "adapter_config.json") bts, err := fs.ReadFile(fsys, "adapter_config.json")
if err != nil { if err != nil {
return err return err
@ -158,14 +158,14 @@ func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
return err return err
} }
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts)) return conv.writeFile(f, conv.KV(baseKV), conv.Tensors(ts))
} }
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations // Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path. // and files it finds in the input path.
// Supported input model formats include safetensors. // Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model. // Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error { func ConvertModel(fsys fs.FS, f *os.File) error {
bts, err := fs.ReadFile(fsys, "config.json") bts, err := fs.ReadFile(fsys, "config.json")
if err != nil { if err != nil {
return err return err
@ -248,5 +248,5 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
return err return err
} }
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts)) return conv.writeFile(f, conv.KV(t), conv.Tensors(ts))
} }

View File

@ -9,8 +9,12 @@ import (
"io" "io"
"log/slog" "log/slog"
"maps" "maps"
"os"
"runtime"
"slices" "slices"
"strings" "strings"
"golang.org/x/sync/errgroup"
) )
type containerGGUF struct { type containerGGUF struct {
@ -502,22 +506,22 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
return binary.Write(w, binary.LittleEndian, s) return binary.Write(w, binary.LittleEndian, s)
} }
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error { func WriteGGUF(f *os.File, kv KV, ts []Tensor) error {
alignment := kv.Uint("general.alignment", 32) alignment := kv.Uint("general.alignment", 32)
if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil { if err := binary.Write(f, binary.LittleEndian, []byte("GGUF")); err != nil {
return err return err
} }
if err := binary.Write(ws, binary.LittleEndian, uint32(3)); err != nil { if err := binary.Write(f, binary.LittleEndian, uint32(3)); err != nil {
return err return err
} }
if err := binary.Write(ws, binary.LittleEndian, uint64(len(ts))); err != nil { if err := binary.Write(f, binary.LittleEndian, uint64(len(ts))); err != nil {
return err return err
} }
if err := binary.Write(ws, binary.LittleEndian, uint64(len(kv))); err != nil { if err := binary.Write(f, binary.LittleEndian, uint64(len(kv))); err != nil {
return err return err
} }
@ -525,7 +529,7 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
slices.Sort(keys) slices.Sort(keys)
for _, key := range keys { for _, key := range keys {
if err := ggufWriteKV(ws, key, kv[key]); err != nil { if err := ggufWriteKV(f, key, kv[key]); err != nil {
return err return err
} }
} }
@ -541,21 +545,34 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
}) })
var s uint64 var s uint64
for _, t := range ts { for i := range ts {
t.Offset = s + uint64(ggufPadding(int64(s), int64(alignment))) ts[i].Offset = s + uint64(ggufPadding(int64(s), int64(alignment)))
if err := ggufWriteTensorInfo(ws, t); err != nil { if err := ggufWriteTensorInfo(f, ts[i]); err != nil {
return err return err
} }
s += t.Size() s += ts[i].Size()
} }
offset, err := f.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
offset += ggufPadding(offset, int64(alignment))
slog.Debug("gguf", "offset", offset, "size", s, "alignment", alignment)
var g errgroup.Group
g.SetLimit(runtime.GOMAXPROCS(0))
for _, t := range ts { for _, t := range ts {
if err := ggufWriteTensor(ws, t, int64(alignment)); err != nil { t := t
w := io.NewOffsetWriter(f, offset+int64(t.Offset))
g.Go(func() error {
_, err := t.WriteTo(w)
return err return err
} })
} }
return nil return g.Wait()
} }
func ggufWriteKV(ws io.WriteSeeker, k string, v any) error { func ggufWriteKV(ws io.WriteSeeker, k string, v any) error {
@ -640,20 +657,6 @@ func ggufWriteTensorInfo(ws io.WriteSeeker, t Tensor) error {
return binary.Write(ws, binary.LittleEndian, t.Offset) return binary.Write(ws, binary.LittleEndian, t.Offset)
} }
func ggufWriteTensor(ws io.WriteSeeker, t Tensor, alignment int64) error {
offset, err := ws.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(offset, alignment)))); err != nil {
return err
}
_, err = t.WriteTo(ws)
return err
}
func ggufPadding(offset, align int64) int64 { func ggufPadding(offset, align int64) int64 {
return (align - offset%align) % align return (align - offset%align) % align
} }