move llama.h
This commit is contained in:
parent
d82d25d70c
commit
d352c68ffc
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -1 +1,2 @@
|
||||
llm/ext_server/* linguist-vendored
|
||||
llm/*.h linguist-vendored
|
1227
llm/llama.h
vendored
Normal file
1227
llm/llama.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
package llm
|
||||
|
||||
// #cgo CFLAGS: -Illama.cpp -Illama.cpp/include -Illama.cpp/ggml/include
|
||||
// #cgo CPPFLAGS: -Illama.cpp/ggml/include
|
||||
// #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread
|
||||
// #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal
|
||||
// #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src
|
||||
@ -61,13 +61,13 @@ func Quantize(infile, outfile string, ftype fileType, fn func(resp api.ProgressR
|
||||
select {
|
||||
case <-ticker.C:
|
||||
fn(api.ProgressResponse{
|
||||
Status: fmt.Sprintf("quantizing model %d/%d", int(*((*C.float)(store))), tensorCount),
|
||||
Status: fmt.Sprintf("quantizing model tensors %d/%d", int(*((*C.float)(store))), tensorCount),
|
||||
Quantize: "quant",
|
||||
})
|
||||
fmt.Println("Progress: ", *((*C.float)(store)))
|
||||
case <-done:
|
||||
fn(api.ProgressResponse{
|
||||
Status: fmt.Sprintf("quantizing model %d/%d", tensorCount, tensorCount),
|
||||
Status: fmt.Sprintf("quantizing model tensors %d/%d", tensorCount, tensorCount),
|
||||
Quantize: "quant",
|
||||
})
|
||||
return
|
||||
|
@ -428,7 +428,10 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||
if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
|
||||
return errors.New("quantization is only supported for F16 and F32 models")
|
||||
} else if want != ft {
|
||||
fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantization)})
|
||||
fn(api.ProgressResponse{
|
||||
Status: "quantizing model tensors",
|
||||
Quantize: "quant",
|
||||
})
|
||||
|
||||
blob, err := GetBlobsPath(baseLayer.Digest)
|
||||
if err != nil {
|
||||
|
Loading…
x
Reference in New Issue
Block a user