Compare commits

...

8 Commits

Author SHA1 Message Date
Matt Williams
a314b6c2a9 add faq on models downloaded from hf
Signed-off-by: Matt Williams <m@technovangelist.com>
2024-01-04 16:55:56 -08:00
Daniel Hiltgen
cd8fad3398 Merge pull request #1790 from dhiltgen/llm_code_shuffle
Cleaup stale submodule
2024-01-04 13:47:25 -08:00
Daniel Hiltgen
9983fa5f4e Cleaup stale submodule
If the tree has a stale submodule, make sure we clean it up first
2024-01-04 13:40:16 -08:00
Daniel Hiltgen
dfda91c2ee Merge pull request #1788 from dhiltgen/llm_code_shuffle
Revamp code layout for the llm directory and llama.cpp submodule
2024-01-04 13:14:28 -08:00
Daniel Hiltgen
fac9060da5 Init submodule with new path 2024-01-04 13:00:13 -08:00
Daniel Hiltgen
a554616f8e remove old llama.cpp submodule path 2024-01-04 12:12:21 -08:00
Daniel Hiltgen
77d96da94b Code shuffle to clean up the llm dir 2024-01-04 12:12:05 -08:00
Brian Murray
0d6e3565ae Add embeddings to API (#1773) 2024-01-04 15:00:52 -05:00
21 changed files with 79 additions and 51 deletions

View File

@@ -2,7 +2,7 @@
ollama ollama
app app
dist dist
llm/llama.cpp/gguf llm/llama.cpp
.env .env
.cache .cache
test_data test_data

5
.gitmodules vendored
View File

@@ -1,5 +1,4 @@
[submodule "llm/llama.cpp/gguf"] [submodule "llama.cpp"]
path = llm/llama.cpp/gguf path = llm/llama.cpp
url = https://github.com/ggerganov/llama.cpp.git url = https://github.com/ggerganov/llama.cpp.git
ignore = dirty
shallow = true shallow = true

View File

@@ -309,6 +309,13 @@ func (c *Client) Heartbeat(ctx context.Context) error {
} }
return nil return nil
} }
func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error) {
var resp EmbeddingResponse
if err := c.do(ctx, http.MethodPost, "/api/embeddings", req, &resp); err != nil {
return nil, err
}
return &resp, nil
}
func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error { func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
if err := c.do(ctx, http.MethodHead, fmt.Sprintf("/api/blobs/%s", digest), nil, nil); err != nil { if err := c.do(ctx, http.MethodHead, fmt.Sprintf("/api/blobs/%s", digest), nil, nil); err != nil {

View File

@@ -66,6 +66,16 @@ Refer to the section above for how to use environment variables on your platform
If a different directory needs to be used, set the environment variable `OLLAMA_MODELS` to the chosen directory. Refer to the section above for how to use environment variables on your platform. If a different directory needs to be used, set the environment variable `OLLAMA_MODELS` to the chosen directory. Refer to the section above for how to use environment variables on your platform.
## Can I use models I downloaded from Hugging Face in Ollama?
There are a lot of models available on Hugging Face. Many of them will work with Ollama, but not all of them yet. You can look for models that use the library **PyTorch**, then in the repo look at the `config.json` file. In there you should see an architecture. For now, we support models that use the following architectures: Llama, Mistral, Falcon, RW, and BigCode.
## Can I use models I downloaded in Ollama in other applications?
Yes, as long as those applications work with GGUF models. You can find the models in the directories listed above. Under `models`, there is a manifests directory. Follow that path down to find the model you want to use. There will be a file for the model and tag you intend to use. In that file, you will see a layer called: `application/vnd.ollama.image.model`.
The next line will show a sha256 hash. That happens to also be the filename for the model weights file that you can find in `.ollama/models/blobs`. You can use that file in any application that supports gguf. But it is important not to move the file from this location otherwise Ollama won't be able to use it.
## Does Ollama send my prompts and answers back to Ollama.ai to use in any way? ## Does Ollama send my prompts and answers back to Ollama.ai to use in any way?
No, Ollama runs entirely locally, and conversation data will never leave your machine. No, Ollama runs entirely locally, and conversation data will never leave your machine.

View File

@@ -2,7 +2,7 @@
set(TARGET ext_server) set(TARGET ext_server)
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON) option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
add_library(${TARGET} STATIC ../../../ext_server.cpp) add_library(${TARGET} STATIC ../../../ext_server/ext_server.cpp)
target_include_directories(${TARGET} PRIVATE ../../common) target_include_directories(${TARGET} PRIVATE ../../common)
target_include_directories(${TARGET} PRIVATE ../..) target_include_directories(${TARGET} PRIVATE ../..)
target_include_directories(${TARGET} PRIVATE ../../..) target_include_directories(${TARGET} PRIVATE ../../..)

4
llm/ext_server/README.md Normal file
View File

@@ -0,0 +1,4 @@
# Extern C Server
This directory contains a thin facade we layer on top of the Llama.cpp server
to expose `extern C` interfaces to access the functionality through direct API calls in-process

View File

@@ -1,7 +1,7 @@
package llm package llm
/* /*
#cgo CFLAGS: -I${SRCDIR}/llama.cpp -I${SRCDIR}/llama.cpp/gguf -I${SRCDIR}/llama.cpp/gguf/common -I${SRCDIR}/llama.cpp/gguf/examples/server #cgo CFLAGS: -I${SRCDIR}/ext_server -I${SRCDIR}/llama.cpp -I${SRCDIR}/llama.cpp/common -I${SRCDIR}/llama.cpp/examples/server
#cgo CFLAGS: -DNDEBUG -DLLAMA_SERVER_LIBRARY=1 -D_XOPEN_SOURCE=600 -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 #cgo CFLAGS: -DNDEBUG -DLLAMA_SERVER_LIBRARY=1 -D_XOPEN_SOURCE=600 -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
#cgo CFLAGS: -Wmissing-noreturn -Wall -Wextra -Wcast-qual -Wno-unused-function -Wno-array-bounds #cgo CFLAGS: -Wmissing-noreturn -Wall -Wextra -Wcast-qual -Wno-unused-function -Wno-array-bounds
#cgo CPPFLAGS: -Ofast -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-unused-but-set-variable #cgo CPPFLAGS: -Ofast -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-unused-but-set-variable
@@ -10,17 +10,17 @@ package llm
#cgo darwin CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG #cgo darwin CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG
#cgo darwin LDFLAGS: -lc++ -framework Accelerate #cgo darwin LDFLAGS: -lc++ -framework Accelerate
#cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders #cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libcommon.a #cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libcommon.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libext_server.a #cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libext_server.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libllama.a #cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libllama.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libggml_static.a #cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libggml_static.a
#cgo linux CFLAGS: -D_GNU_SOURCE #cgo linux CFLAGS: -D_GNU_SOURCE
#cgo linux windows CFLAGS: -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_USE_CUBLAS #cgo linux windows CFLAGS: -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_USE_CUBLAS
#cgo linux LDFLAGS: -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib/stubs #cgo linux LDFLAGS: -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib/stubs
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libext_server.a #cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libext_server.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libcommon.a #cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libcommon.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libllama.a #cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libllama.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libggml_static.a #cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libggml_static.a
#cgo linux LDFLAGS: -lrt -ldl -lstdc++ -lm #cgo linux LDFLAGS: -lrt -ldl -lstdc++ -lm
#cgo linux windows LDFLAGS: -lpthread #cgo linux windows LDFLAGS: -lpthread

View File

@@ -1,7 +1,7 @@
# common logic accross linux and darwin # common logic accross linux and darwin
init_vars() { init_vars() {
LLAMACPP_DIR=gguf LLAMACPP_DIR=../llama.cpp
PATCHES="0001-Expose-callable-API-for-server.patch" PATCHES="0001-Expose-callable-API-for-server.patch"
CMAKE_DEFS="" CMAKE_DEFS=""
CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static" CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
@@ -18,19 +18,24 @@ git_module_setup() {
echo "Skipping submodule initialization" echo "Skipping submodule initialization"
return return
fi fi
# Make sure the tree is clean after the directory moves
if [ -d "${LLAMACPP_DIR}/gguf" ]; then
echo "Cleaning up old submodule"
rm -rf ${LLAMACPP_DIR}
fi
git submodule init git submodule init
git submodule update --force gguf git submodule update --force ${LLAMACPP_DIR}
} }
apply_patches() { apply_patches() {
# Wire up our CMakefile # Wire up our CMakefile
if ! grep ollama gguf/examples/server/CMakeLists.txt; then if ! grep ollama ${LLAMACPP_DIR}/examples/server/CMakeLists.txt; then
echo 'include (../../../CMakeLists.txt) # ollama' >>gguf/examples/server/CMakeLists.txt echo 'include (../../../ext_server/CMakeLists.txt) # ollama' >>${LLAMACPP_DIR}/examples/server/CMakeLists.txt
fi fi
# Avoid duplicate main symbols when we link into the cgo binary # Avoid duplicate main symbols when we link into the cgo binary
sed -e 's/int main(/int __main(/g' <./gguf/examples/server/server.cpp >./gguf/examples/server/server.cpp.tmp && sed -e 's/int main(/int __main(/g' <${LLAMACPP_DIR}/examples/server/server.cpp >${LLAMACPP_DIR}/examples/server/server.cpp.tmp &&
mv ./gguf/examples/server/server.cpp.tmp ./gguf/examples/server/server.cpp mv ${LLAMACPP_DIR}/examples/server/server.cpp.tmp ${LLAMACPP_DIR}/examples/server/server.cpp
} }
build() { build() {
@@ -49,5 +54,5 @@ install() {
# Keep the local tree clean after we're done with the build # Keep the local tree clean after we're done with the build
cleanup() { cleanup() {
(cd gguf/examples/server/ && git checkout CMakeLists.txt server.cpp) (cd ${LLAMACPP_DIR}/examples/server/ && git checkout CMakeLists.txt server.cpp)
} }

View File

@@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
# This script is intended to run inside the go generate # This script is intended to run inside the go generate
# working directory must be ../llm/llama.cpp # working directory must be ./llm/generate/
# TODO - add hardening to detect missing tools (cmake, etc.) # TODO - add hardening to detect missing tools (cmake, etc.)
@@ -10,7 +10,7 @@ echo "Starting darwin generate script"
source $(dirname $0)/gen_common.sh source $(dirname $0)/gen_common.sh
init_vars init_vars
CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}" CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
BUILD_DIR="gguf/build/darwin/metal" BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal"
case "${GOARCH}" in case "${GOARCH}" in
"amd64") "amd64")
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"

View File

@@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
# This script is intended to run inside the go generate # This script is intended to run inside the go generate
# working directory must be llm/llama.cpp # working directory must be llm/generate/
# First we build our default built-in library which will be linked into the CGO # First we build our default built-in library which will be linked into the CGO
# binary as a normal dependency. This default build is CPU based. # binary as a normal dependency. This default build is CPU based.
@@ -52,7 +52,7 @@ apply_patches
# CPU first for the default library # CPU first for the default library
# #
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}" CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
BUILD_DIR="gguf/build/linux/cpu" BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
build build
install install
@@ -64,7 +64,7 @@ if [ -d /usr/local/cuda/lib64/ ]; then
echo "CUDA libraries detected - building dynamic CUDA library" echo "CUDA libraries detected - building dynamic CUDA library"
init_vars init_vars
CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}" CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
BUILD_DIR="gguf/build/linux/cuda" BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda"
CUDA_LIB_DIR=/usr/local/cuda/lib64 CUDA_LIB_DIR=/usr/local/cuda/lib64
build build
install install
@@ -98,7 +98,7 @@ if [ -d "${ROCM_PATH}" ]; then
echo "ROCm libraries detected - building dynamic ROCm library" echo "ROCm libraries detected - building dynamic ROCm library"
init_vars init_vars
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)" CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
BUILD_DIR="gguf/build/linux/rocm" BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm"
build build
install install
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \ gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \

View File

@@ -3,6 +3,7 @@
$ErrorActionPreference = "Stop" $ErrorActionPreference = "Stop"
function init_vars { function init_vars {
$script:llamacppDir = "../llama.cpp"
$script:patches = @("0001-Expose-callable-API-for-server.patch") $script:patches = @("0001-Expose-callable-API-for-server.patch")
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-A","x64") $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-A","x64")
$script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static") $script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")
@@ -19,25 +20,25 @@ function git_module_setup {
# TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo # TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
& git submodule init & git submodule init
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& git submodule update --force gguf & git submodule update --force "${script:llamacppDir}"
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
} }
function apply_patches { function apply_patches {
# Wire up our CMakefile # Wire up our CMakefile
if (!(Select-String -Path "gguf/examples/server/CMakeLists.txt" -Pattern 'ollama')) { if (!(Select-String -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Pattern 'ollama')) {
Add-Content -Path "gguf/examples/server/CMakeLists.txt" -Value 'include (../../../CMakeLists.txt) # ollama' Add-Content -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Value 'include (../../../ext_server/CMakeLists.txt) # ollama'
} }
# Avoid duplicate main symbols when we link into the cgo binary # Avoid duplicate main symbols when we link into the cgo binary
$content = Get-Content -Path "./gguf/examples/server/server.cpp" $content = Get-Content -Path "${script:llamacppDir}/examples/server/server.cpp"
$content = $content -replace 'int main\(', 'int __main(' $content = $content -replace 'int main\(', 'int __main('
Set-Content -Path "./gguf/examples/server/server.cpp" -Value $content Set-Content -Path "${script:llamacppDir}/examples/server/server.cpp" -Value $content
} }
function build { function build {
write-host "generating config with: cmake -S gguf -B $script:buildDir $script:cmakeDefs" write-host "generating config with: cmake -S ${script:llamacppDir} -B $script:buildDir $script:cmakeDefs"
& cmake --version & cmake --version
& cmake -S gguf -B $script:buildDir $script:cmakeDefs & cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
write-host "building with: cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })" write-host "building with: cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })"
& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })
@@ -55,7 +56,7 @@ function install {
} }
function cleanup { function cleanup {
Set-Location "gguf/examples/server" Set-Location "${script:llamacppDir}/examples/server"
git checkout CMakeLists.txt server.cpp git checkout CMakeLists.txt server.cpp
} }
@@ -64,20 +65,20 @@ git_module_setup
apply_patches apply_patches
# first build CPU based # first build CPU based
$script:buildDir="gguf/build/windows/cpu" $script:buildDir="${script:llamacppDir}/build/windows/cpu"
build build
install install
# Then build cuda as a dynamically loaded library # Then build cuda as a dynamically loaded library
init_vars init_vars
$script:buildDir="gguf/build/windows/cuda" $script:buildDir="${script:llamacppDir}/build/windows/cuda"
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON") $script:cmakeDefs += @("-DLLAMA_CUBLAS=ON")
build build
install install
# TODO - actually implement ROCm support on windows # TODO - actually implement ROCm support on windows
$script:buildDir="gguf/build/windows/rocm" $script:buildDir="${script:llamacppDir}/build/windows/rocm"
rm -ea 0 -recurse -force -path "${script:buildDir}/lib" rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
md "${script:buildDir}/lib" -ea 0 > $null md "${script:buildDir}/lib" -ea 0 > $null

View File

@@ -1,3 +1,3 @@
package llm package generate
//go:generate sh ./gen_darwin.sh //go:generate sh ./gen_darwin.sh

View File

@@ -1,3 +1,3 @@
package llm package generate
//go:generate bash ./gen_linux.sh //go:generate bash ./gen_linux.sh

View File

@@ -1,3 +1,3 @@
package llm package generate
//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1 //go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1

View File

@@ -13,7 +13,7 @@ import (
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
) )
//go:embed llama.cpp/gguf/ggml-metal.metal //go:embed llama.cpp/ggml-metal.metal
var libEmbed embed.FS var libEmbed embed.FS
func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) { func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
@@ -22,7 +22,7 @@ func newDynamicShimExtServer(library, model string, adapters, projectors []strin
} }
func nativeInit(workdir string) error { func nativeInit(workdir string) error {
err := extractPayloadFiles(workdir, "llama.cpp/gguf/ggml-metal.metal") err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
if err != nil { if err != nil {
if err == payloadMissing { if err == payloadMissing {
// TODO perhaps consider this a hard failure on arm macs? // TODO perhaps consider this a hard failure on arm macs?

View File

@@ -34,6 +34,8 @@ type shimExtServer struct {
var shimMutex sync.Mutex var shimMutex sync.Mutex
var llm *shimExtServer var llm *shimExtServer
const pathComponentCount = 6
func (llm *shimExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) { func (llm *shimExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) {
C.dynamic_shim_llama_server_init(llm.s, sparams, err) C.dynamic_shim_llama_server_init(llm.s, sparams, err)
} }
@@ -112,7 +114,7 @@ func (llm *shimExtServer) Close() {
} }
func nativeInit(workdir string) error { func nativeInit(workdir string) error {
libs, err := extractDynamicLibs(workdir, "llama.cpp/gguf/build/*/*/lib/*") libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*")
if err != nil { if err != nil {
if err == payloadMissing { if err == payloadMissing {
log.Printf("%s", payloadMissing) log.Printf("%s", payloadMissing)
@@ -151,13 +153,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
for _, file := range files { for _, file := range files {
pathComps := strings.Split(file, "/") pathComps := strings.Split(file, "/")
if len(pathComps) != 7 { if len(pathComps) != pathComponentCount {
log.Printf("unexpected payload components: %v", pathComps) log.Printf("unexpected payload components: %v", pathComps)
continue continue
} }
// llama.cpp/gguf/build/$OS/$VARIANT/lib/$LIBRARY // llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
// Include the variant in the path to avoid conflicts between multiple server libs // Include the variant in the path to avoid conflicts between multiple server libs
targetDir := filepath.Join(workDir, pathComps[4]) targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
srcFile, err := libEmbed.Open(file) srcFile, err := libEmbed.Open(file)
if err != nil { if err != nil {
return nil, fmt.Errorf("read payload %s: %v", file, err) return nil, fmt.Errorf("read payload %s: %v", file, err)

View File

@@ -10,7 +10,7 @@ import (
"strings" "strings"
) )
//go:embed llama.cpp/gguf/build/*/*/lib/*.so //go:embed llama.cpp/build/*/*/lib/*.so
var libEmbed embed.FS var libEmbed embed.FS
func updatePath(dir string) { func updatePath(dir string) {

View File

@@ -8,7 +8,7 @@ import (
"strings" "strings"
) )
//go:embed llama.cpp/gguf/build/windows/*/lib/*.dll //go:embed llama.cpp/build/windows/*/lib/*.dll
var libEmbed embed.FS var libEmbed embed.FS
func updatePath(dir string) { func updatePath(dir string) {