change
This commit is contained in:
parent
e59453982d
commit
b0e4e8d76c
@ -28,7 +28,7 @@ func SystemInfo() string {
|
|||||||
return C.GoString(C.llama_print_system_info())
|
return C.GoString(C.llama_print_system_info())
|
||||||
}
|
}
|
||||||
|
|
||||||
func Quantize(infile, outfile string, ftype fileType, fn func(resp api.ProgressResponse) ) error {
|
func Quantize(infile, outfile string, ftype fileType, fn func(resp api.ProgressResponse), tensorCount int) error {
|
||||||
cinfile := C.CString(infile)
|
cinfile := C.CString(infile)
|
||||||
defer C.free(unsafe.Pointer(cinfile))
|
defer C.free(unsafe.Pointer(cinfile))
|
||||||
|
|
||||||
@ -59,7 +59,7 @@ func Quantize(infile, outfile string, ftype fileType, fn func(resp api.ProgressR
|
|||||||
select {
|
select {
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
fn(api.ProgressResponse{
|
fn(api.ProgressResponse{
|
||||||
Status: fmt.Sprintf("quantizing model %d%%", int(*((*C.float)(store))*100)),
|
Status: fmt.Sprintf("quantizing model %d/%d", int(*((*C.float)(store))), tensorCount),
|
||||||
Quantize: "quant",
|
Quantize: "quant",
|
||||||
})
|
})
|
||||||
fmt.Println("Progress: ", *((*C.float)(store)))
|
fmt.Println("Progress: ", *((*C.float)(store)))
|
||||||
|
@ -422,6 +422,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
tensorCount := len(baseLayer.GGML.Tensors())
|
||||||
|
|
||||||
ft := baseLayer.GGML.KV().FileType()
|
ft := baseLayer.GGML.KV().FileType()
|
||||||
if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
|
if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
|
||||||
@ -441,7 +442,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
|||||||
|
|
||||||
// Quantizes per layer
|
// Quantizes per layer
|
||||||
// Save total quantized tensors
|
// Save total quantized tensors
|
||||||
if err := llm.Quantize(blob, temp.Name(), want, fn); err != nil {
|
if err := llm.Quantize(blob, temp.Name(), want, fn, tensorCount); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user