move llama.h

2024-07-10 14:20:04 -07:00 · 2024-07-10 14:20:04 -07:00 · d352c68ffc
commit d352c68ffc
parent d82d25d70c
4 changed files with 1235 additions and 4 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -1 +1,2 @@
 llm/ext_server/* linguist-vendored
+llm/*.h linguist-vendored
--- a/llm/llama.h
+++ b/llm/llama.h
--- a/llm/llm.go
+++ b/llm/llm.go
@ -1,6 +1,6 @@
 package llm

-// #cgo CFLAGS: -Illama.cpp -Illama.cpp/include -Illama.cpp/ggml/include
+// #cgo CPPFLAGS: -Illama.cpp/ggml/include
 // #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread
 // #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal
 // #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src
@ -61,13 +61,13 @@ func Quantize(infile, outfile string, ftype fileType, fn func(resp api.ProgressR
 			select {
 			case <-ticker.C:
 				fn(api.ProgressResponse{
-					Status:   fmt.Sprintf("quantizing model %d/%d", int(*((*C.float)(store))), tensorCount),
+					Status:   fmt.Sprintf("quantizing model tensors %d/%d", int(*((*C.float)(store))), tensorCount),
 					Quantize: "quant",
 				})			
 				fmt.Println("Progress: ", *((*C.float)(store)))
 			case <-done:
 				fn(api.ProgressResponse{
-					Status:   fmt.Sprintf("quantizing model %d/%d", tensorCount, tensorCount),
+					Status:   fmt.Sprintf("quantizing model tensors %d/%d", tensorCount, tensorCount),
 					Quantize: "quant",
 				})
 				return
--- a/server/images.go
+++ b/server/images.go
@ -428,7 +428,10 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
 					if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
 						return errors.New("quantization is only supported for F16 and F32 models")
 					} else if want != ft {
-						fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantization)})
+						fn(api.ProgressResponse{
+							Status: "quantizing model tensors",
+							Quantize: "quant",
+						})

 						blob, err := GetBlobsPath(baseLayer.Digest)
 						if err != nil {