Compare commits

...

3 Commits

Author SHA1 Message Date
Michael Yang
34ae8077d1 wip: write tensors in parallel 2025-04-25 13:39:12 -07:00
Michael Yang
b0f28d178a default max term height 2025-04-25 12:54:07 -07:00
Michael Yang
588a97dbef create blobs in parallel 2025-04-25 12:54:07 -07:00
6 changed files with 92 additions and 56 deletions

View File

@ -22,6 +22,7 @@ import (
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
@ -31,6 +32,7 @@ import (
"github.com/olekukonko/tablewriter"
"github.com/spf13/cobra"
"golang.org/x/crypto/ssh"
"golang.org/x/sync/errgroup"
"golang.org/x/term"
"github.com/ollama/ollama/api"
@ -106,7 +108,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
}
spinner.Stop()
req.Name = args[0]
req.Model = args[0]
quantize, _ := cmd.Flags().GetString("quantize")
if quantize != "" {
req.Quantize = quantize
@ -117,26 +119,43 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return err
}
if len(req.Files) > 0 {
fileMap := map[string]string{}
for f, digest := range req.Files {
var mu sync.Mutex
var g errgroup.Group
g.SetLimit(max(runtime.GOMAXPROCS(0)-1, 1))
// copy files since we'll be modifying the map
temp := req.Files
req.Files = make(map[string]string, len(temp))
for f, digest := range temp {
g.Go(func() error {
if _, err := createBlob(cmd, client, f, digest, p); err != nil {
return err
}
fileMap[filepath.Base(f)] = digest
}
req.Files = fileMap
mu.Lock()
req.Files[filepath.Base(f)] = digest
mu.Unlock()
return nil
})
}
if len(req.Adapters) > 0 {
fileMap := map[string]string{}
for f, digest := range req.Adapters {
// copy files since we'll be modifying the map
temp = req.Adapters
req.Adapters = make(map[string]string, len(temp))
for f, digest := range temp {
g.Go(func() error {
if _, err := createBlob(cmd, client, f, digest, p); err != nil {
return err
}
fileMap[filepath.Base(f)] = digest
}
req.Adapters = fileMap
mu.Lock()
req.Adapters[filepath.Base(f)] = digest
mu.Unlock()
return nil
})
}
if err := g.Wait(); err != nil {
return err
}
bars := make(map[string]*progress.Bar)
@ -213,7 +232,7 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, digest stri
}
}()
if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
if err := client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
return "", err
}
return digest, nil

View File

@ -690,7 +690,7 @@ func TestCreateHandler(t *testing.T) {
return
}
if req.Name != "test-model" {
if req.Model != "test-model" {
t.Errorf("expected model name 'test-model', got %s", req.Name)
}

View File

@ -4,9 +4,9 @@ import (
"encoding/json"
"errors"
"fmt"
"io"
"io/fs"
"log/slog"
"os"
"strings"
"github.com/ollama/ollama/fs/ggml"
@ -84,12 +84,12 @@ func (ModelParameters) specialTokenTypes() []string {
}
}
func (ModelParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
return ggml.WriteGGUF(ws, kv, ts)
func (ModelParameters) writeFile(f *os.File, kv ggml.KV, ts []ggml.Tensor) error {
return ggml.WriteGGUF(f, kv, ts)
}
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
return ggml.WriteGGUF(ws, kv, ts)
func (AdapterParameters) writeFile(f *os.File, kv ggml.KV, ts []ggml.Tensor) error {
return ggml.WriteGGUF(f, kv, ts)
}
type ModelConverter interface {
@ -104,7 +104,7 @@ type ModelConverter interface {
// specialTokenTypes returns any special token types the model uses
specialTokenTypes() []string
// writeFile writes the model to the provided io.WriteSeeker
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
writeFile(*os.File, ggml.KV, []ggml.Tensor) error
}
type moreParser interface {
@ -120,10 +120,10 @@ type AdapterConverter interface {
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
writeFile(*os.File, ggml.KV, []ggml.Tensor) error
}
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error {
bts, err := fs.ReadFile(fsys, "adapter_config.json")
if err != nil {
return err
@ -158,14 +158,14 @@ func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
return err
}
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
return conv.writeFile(f, conv.KV(baseKV), conv.Tensors(ts))
}
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
func ConvertModel(fsys fs.FS, f *os.File) error {
bts, err := fs.ReadFile(fsys, "config.json")
if err != nil {
return err
@ -248,5 +248,5 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
return err
}
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
return conv.writeFile(f, conv.KV(t), conv.Tensors(ts))
}

View File

@ -9,8 +9,12 @@ import (
"io"
"log/slog"
"maps"
"os"
"runtime"
"slices"
"strings"
"golang.org/x/sync/errgroup"
)
type containerGGUF struct {
@ -502,22 +506,22 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
return binary.Write(w, binary.LittleEndian, s)
}
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
func WriteGGUF(f *os.File, kv KV, ts []Tensor) error {
alignment := kv.Uint("general.alignment", 32)
if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
if err := binary.Write(f, binary.LittleEndian, []byte("GGUF")); err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, uint32(3)); err != nil {
if err := binary.Write(f, binary.LittleEndian, uint32(3)); err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, uint64(len(ts))); err != nil {
if err := binary.Write(f, binary.LittleEndian, uint64(len(ts))); err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, uint64(len(kv))); err != nil {
if err := binary.Write(f, binary.LittleEndian, uint64(len(kv))); err != nil {
return err
}
@ -525,7 +529,7 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
slices.Sort(keys)
for _, key := range keys {
if err := ggufWriteKV(ws, key, kv[key]); err != nil {
if err := ggufWriteKV(f, key, kv[key]); err != nil {
return err
}
}
@ -541,21 +545,34 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
})
var s uint64
for _, t := range ts {
t.Offset = s + uint64(ggufPadding(int64(s), int64(alignment)))
if err := ggufWriteTensorInfo(ws, t); err != nil {
for i := range ts {
ts[i].Offset = s + uint64(ggufPadding(int64(s), int64(alignment)))
if err := ggufWriteTensorInfo(f, ts[i]); err != nil {
return err
}
s += t.Size()
s += ts[i].Size()
}
offset, err := f.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
offset += ggufPadding(offset, int64(alignment))
slog.Debug("gguf", "offset", offset, "size", s, "alignment", alignment)
var g errgroup.Group
g.SetLimit(runtime.GOMAXPROCS(0))
for _, t := range ts {
if err := ggufWriteTensor(ws, t, int64(alignment)); err != nil {
t := t
w := io.NewOffsetWriter(f, offset+int64(t.Offset))
g.Go(func() error {
_, err := t.WriteTo(w)
return err
}
})
}
return nil
return g.Wait()
}
func ggufWriteKV(ws io.WriteSeeker, k string, v any) error {
@ -640,20 +657,6 @@ func ggufWriteTensorInfo(ws io.WriteSeeker, t Tensor) error {
return binary.Write(ws, binary.LittleEndian, t.Offset)
}
func ggufWriteTensor(ws io.WriteSeeker, t Tensor, alignment int64) error {
offset, err := ws.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(offset, alignment)))); err != nil {
return err
}
_, err = t.WriteTo(ws)
return err
}
func ggufPadding(offset, align int64) int64 {
return (align - offset%align) % align
}

View File

@ -64,7 +64,7 @@ func formatDuration(d time.Duration) string {
func (b *Bar) String() string {
termWidth, _, err := term.GetSize(int(os.Stderr.Fd()))
if err != nil {
termWidth = 80
termWidth = defaultTermWidth
}
var pre strings.Builder

View File

@ -4,8 +4,16 @@ import (
"bufio"
"fmt"
"io"
"os"
"sync"
"time"
"golang.org/x/term"
)
const (
defaultTermWidth = 80
defaultTermHeight = 24
)
type State interface {
@ -83,6 +91,11 @@ func (p *Progress) Add(key string, state State) {
}
func (p *Progress) render() {
_, termHeight, err := term.GetSize(int(os.Stderr.Fd()))
if err != nil {
termHeight = defaultTermHeight
}
p.mu.Lock()
defer p.mu.Unlock()
@ -102,8 +115,9 @@ func (p *Progress) render() {
fmt.Fprint(p.w, "\033[1G")
// render progress lines
for i, state := range p.states {
fmt.Fprint(p.w, state.String(), "\033[K")
maxHeight := min(len(p.states), termHeight)
for i := len(p.states) - maxHeight; i < len(p.states); i++ {
fmt.Fprint(p.w, p.states[i].String(), "\033[K")
if i < len(p.states)-1 {
fmt.Fprint(p.w, "\n")
}