Compare commits

...

6 Commits

Author SHA1 Message Date
Josh Yan
0e1ec461f9 import 2024-08-28 11:18:23 -07:00
Josh Yan
52ef79bb7d last lint (hopefully) 2024-08-28 11:12:39 -07:00
Josh Yan
800edd7884 lint again 2024-08-28 11:10:03 -07:00
Josh Yan
01b20fe6f1 lint 2024-08-28 11:07:43 -07:00
Josh Yan
340162fbc3 convert progress 2024-08-28 10:54:52 -07:00
Patrick Devine
6c1c1ad6a9 throw an error when encountering unsupport tensor sizes (#6538) 2024-08-27 17:54:04 -07:00
10 changed files with 158 additions and 23 deletions

View File

@@ -124,6 +124,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
}
bars := make(map[string]*progress.Bar)
var convertSpin *progress.Spinner
fn := func(resp api.ProgressResponse) error {
if resp.Digest != "" {
spinner.Stop()
@@ -136,6 +137,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
}
bar.Set(resp.Completed)
} else if strings.Contains(resp.Status, "converting") {
spinner.Stop()
if convertSpin != nil {
convertSpin.SetMessage(resp.Status)
} else {
status = resp.Status
convertSpin = progress.NewSpinner(resp.Status)
p.Add("convert", convertSpin)
}
} else if status != resp.Status {
spinner.Stop()

View File

@@ -9,6 +9,7 @@ import (
"log/slog"
"strings"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/llm"
)
@@ -79,12 +80,12 @@ func (ModelParameters) specialTokenTypes() []string {
}
}
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
return llm.WriteGGUF(ws, kv, ts)
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor, fn func(api.ProgressResponse)) error {
return llm.WriteGGUF(ws, kv, ts, fn)
}
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
return llm.WriteGGUF(ws, kv, ts)
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor, fn func(api.ProgressResponse)) error {
return llm.WriteGGUF(ws, kv, ts, fn)
}
type ModelConverter interface {
@@ -99,7 +100,7 @@ type ModelConverter interface {
// specialTokenTypes returns any special token types the model uses
specialTokenTypes() []string
// writeFile writes the model to the provided io.WriteSeeker
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor, func(api.ProgressResponse)) error
}
type moreParser interface {
@@ -115,10 +116,10 @@ type AdapterConverter interface {
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor, func(api.ProgressResponse)) error
}
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV, fn func(api.ProgressResponse)) error {
bts, err := fs.ReadFile(fsys, "adapter_config.json")
if err != nil {
return err
@@ -153,14 +154,17 @@ func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
return err
}
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
fn(api.ProgressResponse{
Status: fmt.Sprintf("converting adapter 0%%"),
})
return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts), fn)
}
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
func ConvertModel(fsys fs.FS, ws io.WriteSeeker, fn func(api.ProgressResponse)) error {
bts, err := fs.ReadFile(fsys, "config.json")
if err != nil {
return err
@@ -224,5 +228,8 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
return err
}
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
fn(api.ProgressResponse{
Status: fmt.Sprintf("converting model 0%%"),
})
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts), fn)
}

View File

@@ -19,6 +19,7 @@ import (
"golang.org/x/exp/maps"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/llm"
)
@@ -31,7 +32,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
}
defer f.Close()
if err := ConvertModel(fsys, f); err != nil {
if err := ConvertModel(fsys, f, func(api.ProgressResponse) {}); err != nil {
t.Fatal(err)
}
@@ -140,6 +141,107 @@ func TestConvertFull(t *testing.T) {
}
}
func TestConvertInvalidDatatype(t *testing.T) {
f, err := os.CreateTemp(t.TempDir(), "testmodel")
if err != nil {
t.Fatal(err)
}
defer f.Close()
tempDir := t.TempDir()
generateSafetensorTestData(t, tempDir)
err = ConvertModel(os.DirFS(tempDir), f, func(api.ProgressResponse) {})
if err == nil || err.Error() != "unsupported safetensors model" {
t.Errorf("expected error but didn't get one")
}
}
func generateSafetensorTestData(t *testing.T, tempDir string) {
type tensorData struct {
Offsets []int `json:"data_offsets"`
Type string `json:"dtype"`
Shape []int `json:"shape"`
}
offset := 4096 * 14336
td := map[string]*tensorData{}
td["model.layers.0.mlp.down_proj.weight"] = &tensorData{
Offsets: []int{0, offset},
Type: "I8",
Shape: []int{4096, 14336},
}
td["model.layers.0.mlp.down_proj.weight_format"] = &tensorData{
Offsets: []int{offset, offset},
Type: "U8",
Shape: []int{},
}
data, err := json.Marshal(td)
if err != nil {
t.Fatal(err)
}
var buf bytes.Buffer
l := int64(len(data))
err = binary.Write(&buf, binary.LittleEndian, l)
if err != nil {
t.Fatal(err)
}
_, err = buf.Write(data)
if err != nil {
t.Fatal(err)
}
fdata, err := os.Create(filepath.Join(tempDir, "model-00001-of-00001.safetensors"))
if err != nil {
t.Fatal(err)
}
defer fdata.Close()
_, err = fdata.Write(buf.Bytes())
if err != nil {
t.Fatal(err)
}
configData := `
{
"architectures": [
"LlamaForCausalLM"
]
}
`
f, err := os.Create(filepath.Join(tempDir, "config.json"))
if err != nil {
t.Fatal(err)
}
defer f.Close()
_, err = f.WriteString(configData)
if err != nil {
t.Fatal(err)
}
tokenizerData := `
{
}
`
f, err = os.Create(filepath.Join(tempDir, "tokenizer.json"))
if err != nil {
t.Fatal(err)
}
defer f.Close()
_, err = f.WriteString(tokenizerData)
if err != nil {
t.Fatal(err)
}
}
func TestConvertAdapter(t *testing.T) {
type AdapterCase struct {
Name string
@@ -186,7 +288,7 @@ func TestConvertAdapter(t *testing.T) {
tempDir := t.TempDir()
generateLoraTestData(t, tempDir)
if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil {
if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV, func(api.ProgressResponse) {}); err != nil {
t.Fatal(err)
}

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"io"
"io/fs"
@@ -50,6 +51,10 @@ func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]T
for _, key := range keys {
if value := headers[key]; value.Type != "" {
// bitsandbytes quantized models are unsupported
if len(value.Shape) == 0 {
return nil, errors.New("unsupported safetensors model")
}
ts = append(ts, safetensor{
fs: fsys,
path: p,

View File

@@ -12,6 +12,8 @@ import (
"strings"
"golang.org/x/exp/maps"
"github.com/ollama/ollama/api"
)
type containerGGUF struct {
@@ -506,7 +508,7 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
return binary.Write(w, binary.LittleEndian, s)
}
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor, fn func(api.ProgressResponse)) error {
if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
return err
}
@@ -552,7 +554,10 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
}
var alignment int64 = 32
for _, t := range ts {
for i, t := range ts {
fn(api.ProgressResponse{
Status: fmt.Sprintf("converting model %d%%", 100*(i+1)/len(ts)),
})
if err := ggufWriteTensor(ws, t, alignment); err != nil {
return err
}

View File

@@ -41,7 +41,7 @@ func TestEstimateGPULayers(t *testing.T) {
"tokenizer.ggml.tokens": []string{" "},
"tokenizer.ggml.scores": []float32{0},
"tokenizer.ggml.token_type": []int32{0},
}, tensors)
}, tensors, func(api.ProgressResponse) {})
require.NoError(t, err)
ggml, err := LoadModel(f.Name(), 0)

View File

@@ -98,7 +98,6 @@ func parseFromZipFile(_ context.Context, command string, baseLayers []*layerGGML
}
defer os.RemoveAll(p)
fn(api.ProgressResponse{Status: "converting model"})
// TODO(mxyng): this should write directly into a layer
// e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
t, err := os.CreateTemp(p, "fp16")
@@ -123,13 +122,18 @@ func parseFromZipFile(_ context.Context, command string, baseLayers []*layerGGML
if baseModel == nil {
return nil, fmt.Errorf("no base model specified for the adapter")
}
if err := convert.ConvertAdapter(convert.NewZipReader(r, p, 32<<20), t, baseModel.KV()); err != nil {
fn(api.ProgressResponse{
Status: "converting adapter",
})
if err := convert.ConvertAdapter(convert.NewZipReader(r, p, 32<<20), t, baseModel.KV(), fn); err != nil {
return nil, err
}
layerType = "application/vnd.ollama.image.adapter"
case "model":
if err := convert.ConvertModel(convert.NewZipReader(r, p, 32<<20), t); err != nil {
fn(api.ProgressResponse{
Status: "converting model",
})
if err := convert.ConvertModel(convert.NewZipReader(r, p, 32<<20), t, fn); err != nil {
return nil, err
}
layerType = "application/vnd.ollama.image.model"

View File

@@ -145,7 +145,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
t.Fatalf("failed to open file: %v", err)
}
defer file.Close()
if err := llm.WriteGGUF(file, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil {
if err := llm.WriteGGUF(file, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}, func(api.ProgressResponse) {}); err != nil {
t.Fatalf("failed to write gguf: %v", err)
}
@@ -197,7 +197,7 @@ func TestParseLayerFromCopy(t *testing.T) {
defer file2.Close()
for range 5 {
if err := llm.WriteGGUF(file2, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil {
if err := llm.WriteGGUF(file2, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}, func(api.ProgressResponse) {}); err != nil {
t.Fatalf("failed to write gguf: %v", err)
}
}

View File

@@ -30,7 +30,7 @@ func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string {
}
defer f.Close()
if err := llm.WriteGGUF(f, kv, ti); err != nil {
if err := llm.WriteGGUF(f, kv, ti, func(api.ProgressResponse) {}); err != nil {
t.Fatal(err)
}

View File

@@ -128,7 +128,8 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
}, []llm.Tensor{
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
}))
},
func(api.ProgressResponse) {}))
require.NoError(t, err)
fname := f.Name()