move llama.h
This commit is contained in:
parent
d82d25d70c
commit
d352c68ffc
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -1 +1,2 @@
|
|||||||
llm/ext_server/* linguist-vendored
|
llm/ext_server/* linguist-vendored
|
||||||
|
llm/*.h linguist-vendored
|
1227
llm/llama.h
vendored
Normal file
1227
llm/llama.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
|||||||
package llm
|
package llm
|
||||||
|
|
||||||
// #cgo CFLAGS: -Illama.cpp -Illama.cpp/include -Illama.cpp/ggml/include
|
// #cgo CPPFLAGS: -Illama.cpp/ggml/include
|
||||||
// #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread
|
// #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread
|
||||||
// #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal
|
// #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal
|
||||||
// #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src
|
// #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src
|
||||||
@ -61,13 +61,13 @@ func Quantize(infile, outfile string, ftype fileType, fn func(resp api.ProgressR
|
|||||||
select {
|
select {
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
fn(api.ProgressResponse{
|
fn(api.ProgressResponse{
|
||||||
Status: fmt.Sprintf("quantizing model %d/%d", int(*((*C.float)(store))), tensorCount),
|
Status: fmt.Sprintf("quantizing model tensors %d/%d", int(*((*C.float)(store))), tensorCount),
|
||||||
Quantize: "quant",
|
Quantize: "quant",
|
||||||
})
|
})
|
||||||
fmt.Println("Progress: ", *((*C.float)(store)))
|
fmt.Println("Progress: ", *((*C.float)(store)))
|
||||||
case <-done:
|
case <-done:
|
||||||
fn(api.ProgressResponse{
|
fn(api.ProgressResponse{
|
||||||
Status: fmt.Sprintf("quantizing model %d/%d", tensorCount, tensorCount),
|
Status: fmt.Sprintf("quantizing model tensors %d/%d", tensorCount, tensorCount),
|
||||||
Quantize: "quant",
|
Quantize: "quant",
|
||||||
})
|
})
|
||||||
return
|
return
|
||||||
|
@ -428,7 +428,10 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
|||||||
if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
|
if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
|
||||||
return errors.New("quantization is only supported for F16 and F32 models")
|
return errors.New("quantization is only supported for F16 and F32 models")
|
||||||
} else if want != ft {
|
} else if want != ft {
|
||||||
fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantization)})
|
fn(api.ProgressResponse{
|
||||||
|
Status: "quantizing model tensors",
|
||||||
|
Quantize: "quant",
|
||||||
|
})
|
||||||
|
|
||||||
blob, err := GetBlobsPath(baseLayer.Digest)
|
blob, err := GetBlobsPath(baseLayer.Digest)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user