Compare commits

...

8 Commits

Author SHA1 Message Date
Matt Williams
a314b6c2a9 add faq on models downloaded from hf
Signed-off-by: Matt Williams <m@technovangelist.com>
2024-01-04 16:55:56 -08:00
Daniel Hiltgen
cd8fad3398 Merge pull request #1790 from dhiltgen/llm_code_shuffle
Cleaup stale submodule
2024-01-04 13:47:25 -08:00
Daniel Hiltgen
9983fa5f4e Cleaup stale submodule
If the tree has a stale submodule, make sure we clean it up first
2024-01-04 13:40:16 -08:00
Daniel Hiltgen
dfda91c2ee Merge pull request #1788 from dhiltgen/llm_code_shuffle
Revamp code layout for the llm directory and llama.cpp submodule
2024-01-04 13:14:28 -08:00
Daniel Hiltgen
fac9060da5 Init submodule with new path 2024-01-04 13:00:13 -08:00
Daniel Hiltgen
a554616f8e remove old llama.cpp submodule path 2024-01-04 12:12:21 -08:00
Daniel Hiltgen
77d96da94b Code shuffle to clean up the llm dir 2024-01-04 12:12:05 -08:00
Brian Murray
0d6e3565ae Add embeddings to API (#1773) 2024-01-04 15:00:52 -05:00
21 changed files with 79 additions and 51 deletions

View File

@@ -2,7 +2,7 @@
ollama
app
dist
llm/llama.cpp/gguf
llm/llama.cpp
.env
.cache
test_data

9
.gitmodules vendored
View File

@@ -1,5 +1,4 @@
[submodule "llm/llama.cpp/gguf"]
path = llm/llama.cpp/gguf
url = https://github.com/ggerganov/llama.cpp.git
ignore = dirty
shallow = true
[submodule "llama.cpp"]
path = llm/llama.cpp
url = https://github.com/ggerganov/llama.cpp.git
shallow = true

View File

@@ -309,6 +309,13 @@ func (c *Client) Heartbeat(ctx context.Context) error {
}
return nil
}
func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error) {
var resp EmbeddingResponse
if err := c.do(ctx, http.MethodPost, "/api/embeddings", req, &resp); err != nil {
return nil, err
}
return &resp, nil
}
func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
if err := c.do(ctx, http.MethodHead, fmt.Sprintf("/api/blobs/%s", digest), nil, nil); err != nil {

View File

@@ -66,6 +66,16 @@ Refer to the section above for how to use environment variables on your platform
If a different directory needs to be used, set the environment variable `OLLAMA_MODELS` to the chosen directory. Refer to the section above for how to use environment variables on your platform.
## Can I use models I downloaded from Hugging Face in Ollama?
There are a lot of models available on Hugging Face. Many of them will work with Ollama, but not all of them yet. You can look for models that use the library **PyTorch**, then in the repo look at the `config.json` file. In there you should see an architecture. For now, we support models that use the following architectures: Llama, Mistral, Falcon, RW, and BigCode.
## Can I use models I downloaded in Ollama in other applications?
Yes, as long as those applications work with GGUF models. You can find the models in the directories listed above. Under `models`, there is a manifests directory. Follow that path down to find the model you want to use. There will be a file for the model and tag you intend to use. In that file, you will see a layer called: `application/vnd.ollama.image.model`.
The next line will show a sha256 hash. That happens to also be the filename for the model weights file that you can find in `.ollama/models/blobs`. You can use that file in any application that supports gguf. But it is important not to move the file from this location otherwise Ollama won't be able to use it.
## Does Ollama send my prompts and answers back to Ollama.ai to use in any way?
No, Ollama runs entirely locally, and conversation data will never leave your machine.

View File

@@ -2,7 +2,7 @@
set(TARGET ext_server)
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
add_library(${TARGET} STATIC ../../../ext_server.cpp)
add_library(${TARGET} STATIC ../../../ext_server/ext_server.cpp)
target_include_directories(${TARGET} PRIVATE ../../common)
target_include_directories(${TARGET} PRIVATE ../..)
target_include_directories(${TARGET} PRIVATE ../../..)

4
llm/ext_server/README.md Normal file
View File

@@ -0,0 +1,4 @@
# Extern C Server
This directory contains a thin facade we layer on top of the Llama.cpp server
to expose `extern C` interfaces to access the functionality through direct API calls in-process

View File

@@ -1,7 +1,7 @@
package llm
/*
#cgo CFLAGS: -I${SRCDIR}/llama.cpp -I${SRCDIR}/llama.cpp/gguf -I${SRCDIR}/llama.cpp/gguf/common -I${SRCDIR}/llama.cpp/gguf/examples/server
#cgo CFLAGS: -I${SRCDIR}/ext_server -I${SRCDIR}/llama.cpp -I${SRCDIR}/llama.cpp/common -I${SRCDIR}/llama.cpp/examples/server
#cgo CFLAGS: -DNDEBUG -DLLAMA_SERVER_LIBRARY=1 -D_XOPEN_SOURCE=600 -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
#cgo CFLAGS: -Wmissing-noreturn -Wall -Wextra -Wcast-qual -Wno-unused-function -Wno-array-bounds
#cgo CPPFLAGS: -Ofast -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-unused-but-set-variable
@@ -10,17 +10,17 @@ package llm
#cgo darwin CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG
#cgo darwin LDFLAGS: -lc++ -framework Accelerate
#cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libcommon.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libext_server.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libllama.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libggml_static.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libcommon.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libext_server.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libllama.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libggml_static.a
#cgo linux CFLAGS: -D_GNU_SOURCE
#cgo linux windows CFLAGS: -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_USE_CUBLAS
#cgo linux LDFLAGS: -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib/stubs
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libext_server.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libcommon.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libllama.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libggml_static.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libext_server.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libcommon.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libllama.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libggml_static.a
#cgo linux LDFLAGS: -lrt -ldl -lstdc++ -lm
#cgo linux windows LDFLAGS: -lpthread

View File

@@ -1,7 +1,7 @@
# common logic accross linux and darwin
init_vars() {
LLAMACPP_DIR=gguf
LLAMACPP_DIR=../llama.cpp
PATCHES="0001-Expose-callable-API-for-server.patch"
CMAKE_DEFS=""
CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
@@ -18,19 +18,24 @@ git_module_setup() {
echo "Skipping submodule initialization"
return
fi
# Make sure the tree is clean after the directory moves
if [ -d "${LLAMACPP_DIR}/gguf" ]; then
echo "Cleaning up old submodule"
rm -rf ${LLAMACPP_DIR}
fi
git submodule init
git submodule update --force gguf
git submodule update --force ${LLAMACPP_DIR}
}
apply_patches() {
# Wire up our CMakefile
if ! grep ollama gguf/examples/server/CMakeLists.txt; then
echo 'include (../../../CMakeLists.txt) # ollama' >>gguf/examples/server/CMakeLists.txt
if ! grep ollama ${LLAMACPP_DIR}/examples/server/CMakeLists.txt; then
echo 'include (../../../ext_server/CMakeLists.txt) # ollama' >>${LLAMACPP_DIR}/examples/server/CMakeLists.txt
fi
# Avoid duplicate main symbols when we link into the cgo binary
sed -e 's/int main(/int __main(/g' <./gguf/examples/server/server.cpp >./gguf/examples/server/server.cpp.tmp &&
mv ./gguf/examples/server/server.cpp.tmp ./gguf/examples/server/server.cpp
sed -e 's/int main(/int __main(/g' <${LLAMACPP_DIR}/examples/server/server.cpp >${LLAMACPP_DIR}/examples/server/server.cpp.tmp &&
mv ${LLAMACPP_DIR}/examples/server/server.cpp.tmp ${LLAMACPP_DIR}/examples/server/server.cpp
}
build() {
@@ -49,5 +54,5 @@ install() {
# Keep the local tree clean after we're done with the build
cleanup() {
(cd gguf/examples/server/ && git checkout CMakeLists.txt server.cpp)
(cd ${LLAMACPP_DIR}/examples/server/ && git checkout CMakeLists.txt server.cpp)
}

View File

@@ -1,6 +1,6 @@
#!/bin/bash
# This script is intended to run inside the go generate
# working directory must be ../llm/llama.cpp
# working directory must be ./llm/generate/
# TODO - add hardening to detect missing tools (cmake, etc.)
@@ -10,7 +10,7 @@ echo "Starting darwin generate script"
source $(dirname $0)/gen_common.sh
init_vars
CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
BUILD_DIR="gguf/build/darwin/metal"
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal"
case "${GOARCH}" in
"amd64")
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"

View File

@@ -1,6 +1,6 @@
#!/bin/bash
# This script is intended to run inside the go generate
# working directory must be llm/llama.cpp
# working directory must be llm/generate/
# First we build our default built-in library which will be linked into the CGO
# binary as a normal dependency. This default build is CPU based.
@@ -52,7 +52,7 @@ apply_patches
# CPU first for the default library
#
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
BUILD_DIR="gguf/build/linux/cpu"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
build
install
@@ -64,7 +64,7 @@ if [ -d /usr/local/cuda/lib64/ ]; then
echo "CUDA libraries detected - building dynamic CUDA library"
init_vars
CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
BUILD_DIR="gguf/build/linux/cuda"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda"
CUDA_LIB_DIR=/usr/local/cuda/lib64
build
install
@@ -98,7 +98,7 @@ if [ -d "${ROCM_PATH}" ]; then
echo "ROCm libraries detected - building dynamic ROCm library"
init_vars
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
BUILD_DIR="gguf/build/linux/rocm"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm"
build
install
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \

View File

@@ -3,6 +3,7 @@
$ErrorActionPreference = "Stop"
function init_vars {
$script:llamacppDir = "../llama.cpp"
$script:patches = @("0001-Expose-callable-API-for-server.patch")
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-A","x64")
$script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")
@@ -19,25 +20,25 @@ function git_module_setup {
# TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
& git submodule init
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& git submodule update --force gguf
& git submodule update --force "${script:llamacppDir}"
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
function apply_patches {
# Wire up our CMakefile
if (!(Select-String -Path "gguf/examples/server/CMakeLists.txt" -Pattern 'ollama')) {
Add-Content -Path "gguf/examples/server/CMakeLists.txt" -Value 'include (../../../CMakeLists.txt) # ollama'
if (!(Select-String -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Pattern 'ollama')) {
Add-Content -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Value 'include (../../../ext_server/CMakeLists.txt) # ollama'
}
# Avoid duplicate main symbols when we link into the cgo binary
$content = Get-Content -Path "./gguf/examples/server/server.cpp"
$content = Get-Content -Path "${script:llamacppDir}/examples/server/server.cpp"
$content = $content -replace 'int main\(', 'int __main('
Set-Content -Path "./gguf/examples/server/server.cpp" -Value $content
Set-Content -Path "${script:llamacppDir}/examples/server/server.cpp" -Value $content
}
function build {
write-host "generating config with: cmake -S gguf -B $script:buildDir $script:cmakeDefs"
write-host "generating config with: cmake -S ${script:llamacppDir} -B $script:buildDir $script:cmakeDefs"
& cmake --version
& cmake -S gguf -B $script:buildDir $script:cmakeDefs
& cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
write-host "building with: cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })"
& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })
@@ -55,7 +56,7 @@ function install {
}
function cleanup {
Set-Location "gguf/examples/server"
Set-Location "${script:llamacppDir}/examples/server"
git checkout CMakeLists.txt server.cpp
}
@@ -64,20 +65,20 @@ git_module_setup
apply_patches
# first build CPU based
$script:buildDir="gguf/build/windows/cpu"
$script:buildDir="${script:llamacppDir}/build/windows/cpu"
build
install
# Then build cuda as a dynamically loaded library
init_vars
$script:buildDir="gguf/build/windows/cuda"
$script:buildDir="${script:llamacppDir}/build/windows/cuda"
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON")
build
install
# TODO - actually implement ROCm support on windows
$script:buildDir="gguf/build/windows/rocm"
$script:buildDir="${script:llamacppDir}/build/windows/rocm"
rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
md "${script:buildDir}/lib" -ea 0 > $null

View File

@@ -1,3 +1,3 @@
package llm
package generate
//go:generate sh ./gen_darwin.sh

View File

@@ -1,3 +1,3 @@
package llm
package generate
//go:generate bash ./gen_linux.sh

View File

@@ -1,3 +1,3 @@
package llm
package generate
//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1

View File

@@ -13,7 +13,7 @@ import (
"github.com/jmorganca/ollama/api"
)
//go:embed llama.cpp/gguf/ggml-metal.metal
//go:embed llama.cpp/ggml-metal.metal
var libEmbed embed.FS
func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
@@ -22,7 +22,7 @@ func newDynamicShimExtServer(library, model string, adapters, projectors []strin
}
func nativeInit(workdir string) error {
err := extractPayloadFiles(workdir, "llama.cpp/gguf/ggml-metal.metal")
err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
if err != nil {
if err == payloadMissing {
// TODO perhaps consider this a hard failure on arm macs?

View File

@@ -34,6 +34,8 @@ type shimExtServer struct {
var shimMutex sync.Mutex
var llm *shimExtServer
const pathComponentCount = 6
func (llm *shimExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) {
C.dynamic_shim_llama_server_init(llm.s, sparams, err)
}
@@ -112,7 +114,7 @@ func (llm *shimExtServer) Close() {
}
func nativeInit(workdir string) error {
libs, err := extractDynamicLibs(workdir, "llama.cpp/gguf/build/*/*/lib/*")
libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*")
if err != nil {
if err == payloadMissing {
log.Printf("%s", payloadMissing)
@@ -151,13 +153,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
for _, file := range files {
pathComps := strings.Split(file, "/")
if len(pathComps) != 7 {
if len(pathComps) != pathComponentCount {
log.Printf("unexpected payload components: %v", pathComps)
continue
}
// llama.cpp/gguf/build/$OS/$VARIANT/lib/$LIBRARY
// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
// Include the variant in the path to avoid conflicts between multiple server libs
targetDir := filepath.Join(workDir, pathComps[4])
targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
srcFile, err := libEmbed.Open(file)
if err != nil {
return nil, fmt.Errorf("read payload %s: %v", file, err)

View File

@@ -10,7 +10,7 @@ import (
"strings"
)
//go:embed llama.cpp/gguf/build/*/*/lib/*.so
//go:embed llama.cpp/build/*/*/lib/*.so
var libEmbed embed.FS
func updatePath(dir string) {

View File

@@ -8,7 +8,7 @@ import (
"strings"
)
//go:embed llama.cpp/gguf/build/windows/*/lib/*.dll
//go:embed llama.cpp/build/windows/*/lib/*.dll
var libEmbed embed.FS
func updatePath(dir string) {