Compare commits
49 Commits
mattw/allm
...
cuda-searc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
be721ca0df | ||
|
|
34344d801c | ||
|
|
e868c8a5c7 | ||
|
|
c336693f07 | ||
|
|
e89dc1d54b | ||
|
|
1961a81f03 | ||
|
|
8a8c7e7f8d | ||
|
|
6df83e6daa | ||
|
|
62023177f6 | ||
|
|
6164f378f2 | ||
|
|
f387e9631b | ||
|
|
6566387ae3 | ||
|
|
37708931fb | ||
|
|
f6cb0a553c | ||
|
|
2680078c13 | ||
|
|
f1b7e5f560 | ||
|
|
cb534e6ac2 | ||
|
|
58ce2d8273 | ||
|
|
18ddf6d57d | ||
|
|
61e6502449 | ||
|
|
08f1e18965 | ||
|
|
7e8f7c8358 | ||
|
|
3f3eb19a3b | ||
|
|
059ae4585e | ||
|
|
6347f501ca | ||
|
|
5feec959ad | ||
|
|
dbdd50b283 | ||
|
|
d74ce6bd4f | ||
|
|
57942b4676 | ||
|
|
e0d05b0f1e | ||
|
|
2d9dd14f27 | ||
|
|
1caa56128f | ||
|
|
0101e76dbe | ||
|
|
2ef9352b94 | ||
|
|
5580ae2472 | ||
|
|
3a9f447141 | ||
|
|
9c2941e61b | ||
|
|
238ac5e765 | ||
|
|
4f4980b66b | ||
|
|
22e93efa41 | ||
|
|
2909dce894 | ||
|
|
df32537312 | ||
|
|
3367b5f3df | ||
|
|
46edbbc518 | ||
|
|
d2ff18cd6b | ||
|
|
df086d3c8c | ||
|
|
8baaaa39c0 | ||
|
|
f9961c70ae | ||
|
|
e201efa14b |
159
Dockerfile.build
159
Dockerfile.build
@@ -1,74 +1,101 @@
|
||||
# Ubuntu 20.04 amd64 dependencies
|
||||
FROM --platform=linux/amd64 ubuntu:20.04 AS base-amd64
|
||||
ARG CUDA_VERSION=11.3.1-1
|
||||
ARG CMAKE_VERSION=3.22.1
|
||||
# ROCm only supports amd64
|
||||
ARG ROCM_VERSION=6.0
|
||||
ARG CLBLAST_VER=1.6.1
|
||||
|
||||
# Note: https://rocm.docs.amd.com/en/latest/release/user_kernel_space_compat_matrix.html
|
||||
RUN apt-get update && \
|
||||
apt-get install -y wget gnupg && \
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \
|
||||
mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \
|
||||
echo "deb [by-hash=no] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" > /etc/apt/sources.list.d/cuda.list && \
|
||||
wget "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.sh" -O /tmp/cmake-installer.sh && \
|
||||
chmod +x /tmp/cmake-installer.sh && /tmp/cmake-installer.sh --skip-license --prefix=/usr && \
|
||||
mkdir --parents --mode=0755 /etc/apt/keyrings && \
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor > /etc/apt/keyrings/rocm.gpg && \
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} focal main" > /etc/apt/sources.list.d/rocm.list && \
|
||||
echo "Package: *" > /etc/apt/preferences.d/rocm-pin-600 && \
|
||||
echo "Pin: release o=repo.radeon.com" >> /etc/apt/preferences.d/rocm-pin-600 && \
|
||||
echo "Pin-Priority: 600" >> /etc/apt/preferences.d/rocm-pin-600 && \
|
||||
apt-get update && \
|
||||
DEBIAN_FRONTEND=noninteractive apt-get -y install cuda=${CUDA_VERSION} rocm-hip-libraries rocm-device-libs rocm-libs rocm-ocl-icd rocm-hip-sdk rocm-hip-libraries rocm-cmake rocm-clang-ocl rocm-dev
|
||||
|
||||
# CLBlast
|
||||
RUN wget -qO- https://github.com/CNugteren/CLBlast/archive/refs/tags/${CLBLAST_VER}.tar.gz | tar zxv -C /tmp/ && \
|
||||
cd /tmp/CLBlast-${CLBLAST_VER} && mkdir build && cd build && cmake .. && make && make install
|
||||
|
||||
ENV ROCM_PATH=/opt/rocm
|
||||
|
||||
# Ubuntu 22.04 arm64 dependencies
|
||||
FROM --platform=linux/arm64 ubuntu:20.04 AS base-arm64
|
||||
ARG CUDA_VERSION=11.3.1-1
|
||||
ARG CMAKE_VERSION=3.27.6
|
||||
RUN apt-get update && \
|
||||
apt-get install -y wget gnupg && \
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa/cuda-ubuntu2004.pin && \
|
||||
mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa//3bf863cc.pub && \
|
||||
echo "deb [by-hash=no] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa/ /" > /etc/apt/sources.list.d/cuda.list && \
|
||||
wget "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.sh" -O /tmp/cmake-installer.sh && \
|
||||
chmod +x /tmp/cmake-installer.sh && /tmp/cmake-installer.sh --skip-license --prefix=/usr && \
|
||||
apt-get update && \
|
||||
apt-cache madison cuda && \
|
||||
DEBIAN_FRONTEND=noninteractive apt-get -y install cuda=${CUDA_VERSION}
|
||||
|
||||
FROM base-${TARGETARCH}
|
||||
ARG TARGETARCH
|
||||
ARG GOFLAGS="'-ldflags -w -s'"
|
||||
ARG CGO_CFLAGS
|
||||
ARG GOLANG_VERSION=1.21.3
|
||||
ARG CMAKE_VERSION=3.22.1
|
||||
ARG CUDA_VERSION=11.3.1
|
||||
ARG ROCM_VERSION=5.7.1
|
||||
|
||||
# Common toolchain
|
||||
RUN apt-get update && \
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y gcc-10 g++-10 cpp-10 git ocl-icd-opencl-dev && \
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10
|
||||
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
|
||||
|
||||
# install go
|
||||
ADD https://dl.google.com/go/go${GOLANG_VERSION}.linux-$TARGETARCH.tar.gz /tmp/go${GOLANG_VERSION}.tar.gz
|
||||
RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go${GOLANG_VERSION}.tar.gz
|
||||
ARG CMAKE_VERSION
|
||||
|
||||
RUN yum install -y https://repo.ius.io/ius-release-el7.rpm centos-release-scl \
|
||||
&& yum update -y \
|
||||
&& yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++ git236
|
||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||
|
||||
ADD https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-linux-x86_64.tar.gz /tmp/cmake-$CMAKE_VERSION.tar.gz
|
||||
RUN tar -zx -C /usr --strip-components 1 </tmp/cmake-$CMAKE_VERSION.tar.gz
|
||||
|
||||
# build the final binary
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||
COPY . .
|
||||
|
||||
ENV GOOS=linux
|
||||
ENV GOARCH=$TARGETARCH
|
||||
ENV GOFLAGS=$GOFLAGS
|
||||
ENV CGO_CFLAGS=${CGO_CFLAGS}
|
||||
WORKDIR llm/generate
|
||||
RUN sh gen_linux.sh
|
||||
|
||||
RUN /usr/local/go/bin/go generate ./... && \
|
||||
/usr/local/go/bin/go build .
|
||||
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
|
||||
|
||||
ARG CMAKE_VERSION
|
||||
|
||||
RUN dnf install -y git cmake
|
||||
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||
COPY . .
|
||||
|
||||
WORKDIR llm/generate
|
||||
RUN sh gen_linux.sh
|
||||
|
||||
FROM --platform=linux/amd64 rocm/dev-centos-7:$ROCM_VERSION-complete AS rocm-build-amd64
|
||||
|
||||
ARG CMAKE_VERSION
|
||||
|
||||
RUN yum install -y https://repo.ius.io/ius-release-el7.rpm centos-release-scl \
|
||||
&& yum update -y \
|
||||
&& yum remove -y git \
|
||||
&& yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++ git236
|
||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||
ENV LIBRARY_PATH /opt/amdgpu/lib64
|
||||
|
||||
ADD https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-linux-x86_64.tar.gz /tmp/cmake-$CMAKE_VERSION.tar.gz
|
||||
RUN tar -zx -C /usr --strip-components 1 </tmp/cmake-$CMAKE_VERSION.tar.gz
|
||||
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||
COPY . .
|
||||
|
||||
WORKDIR llm/generate
|
||||
RUN sh gen_linux.sh
|
||||
|
||||
FROM --platform=linux/amd64 centos:7 AS build-amd64
|
||||
ENV CGO_ENABLED 1
|
||||
|
||||
ARG GOLANG_VERSION
|
||||
ARG GOFLAGS
|
||||
ARG CGO_FLAGS
|
||||
|
||||
RUN yum install -y centos-release-scl \
|
||||
&& yum update -y \
|
||||
&& yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++
|
||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||
|
||||
ADD https://dl.google.com/go/go$GOLANG_VERSION.linux-amd64.tar.gz /tmp/go-$GOLANG_VERSION.tar.gz
|
||||
RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go-$GOLANG_VERSION.tar.gz
|
||||
ENV PATH /usr/local/go/bin:$PATH
|
||||
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||
COPY . .
|
||||
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cpu/lib llm/llama.cpp/build/linux/cpu/lib
|
||||
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cuda/lib llm/llama.cpp/build/linux/cuda/lib
|
||||
COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/rocm/lib llm/llama.cpp/build/linux/rocm/lib
|
||||
RUN go build .
|
||||
|
||||
FROM --platform=linux/arm64 centos:7 AS build-arm64
|
||||
ENV CGO_ENABLED 1
|
||||
|
||||
ARG GOLANG_VERSION
|
||||
ARG GOFLAGS
|
||||
ARG CGO_FLAGS
|
||||
|
||||
RUN yum install -y centos-release-scl \
|
||||
&& yum update -y \
|
||||
&& yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++
|
||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||
|
||||
ADD https://dl.google.com/go/go$GOLANG_VERSION.linux-arm64.tar.gz /tmp/go-$GOLANG_VERSION.tar.gz
|
||||
RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go-$GOLANG_VERSION.tar.gz
|
||||
ENV PATH /usr/local/go/bin:$PATH
|
||||
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||
COPY . .
|
||||
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cpu/lib llm/llama.cpp/build/linux/cpu/lib
|
||||
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cuda/lib llm/llama.cpp/build/linux/cuda/lib
|
||||
RUN go build .
|
||||
|
||||
FROM build-$TARGETARCH
|
||||
|
||||
@@ -292,6 +292,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
|
||||
- [LiteLLM](https://github.com/BerriAI/litellm)
|
||||
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
|
||||
- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
|
||||
- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
|
||||
- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
|
||||
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
|
||||
|
||||
@@ -148,7 +148,12 @@ type DeleteRequest struct {
|
||||
}
|
||||
|
||||
type ShowRequest struct {
|
||||
Name string `json:"name"`
|
||||
Name string `json:"name"`
|
||||
Model string `json:"model"`
|
||||
System string `json:"system"`
|
||||
Template string `json:"template"`
|
||||
|
||||
Options map[string]interface{} `json:"options"`
|
||||
}
|
||||
|
||||
type ShowResponse struct {
|
||||
|
||||
527
cmd/cmd.go
527
cmd/cmd.go
@@ -17,7 +17,6 @@ import (
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
@@ -26,14 +25,12 @@ import (
|
||||
"github.com/olekukonko/tablewriter"
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/crypto/ssh"
|
||||
"golang.org/x/exp/slices"
|
||||
"golang.org/x/term"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/format"
|
||||
"github.com/jmorganca/ollama/parser"
|
||||
"github.com/jmorganca/ollama/progress"
|
||||
"github.com/jmorganca/ollama/readline"
|
||||
"github.com/jmorganca/ollama/server"
|
||||
"github.com/jmorganca/ollama/version"
|
||||
)
|
||||
@@ -156,7 +153,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
var statusError api.StatusError
|
||||
switch {
|
||||
case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
|
||||
if err := PullHandler(cmd, args); err != nil {
|
||||
if err := PullHandler(cmd, []string{name}); err != nil {
|
||||
return err
|
||||
}
|
||||
case err != nil:
|
||||
@@ -572,31 +569,12 @@ func generate(cmd *cobra.Command, opts generateOptions) error {
|
||||
}
|
||||
|
||||
if err := client.Generate(ctx, &request, fn); err != nil {
|
||||
switch {
|
||||
case errors.Is(err, context.Canceled):
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return nil
|
||||
case strings.Contains(err.Error(), "unsupported model format"):
|
||||
// pull and retry to see if the model has been updated
|
||||
parts := strings.Split(opts.Model, string(os.PathSeparator))
|
||||
if len(parts) == 1 {
|
||||
// this is a library model, log some info
|
||||
fmt.Fprintln(os.Stderr, "This model is no longer compatible with Ollama. Pulling a new version...")
|
||||
}
|
||||
if err := PullHandler(cmd, []string{opts.Model}); err != nil {
|
||||
fmt.Printf("Error: %s\n", err)
|
||||
return fmt.Errorf("unsupported model, please update this model to gguf format") // relay the original error
|
||||
}
|
||||
// retry
|
||||
if err := client.Generate(ctx, &request, fn); err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
default:
|
||||
return err
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
if opts.Prompt != "" {
|
||||
fmt.Println()
|
||||
fmt.Println()
|
||||
@@ -621,459 +599,6 @@ func generate(cmd *cobra.Command, opts generateOptions) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type MultilineState int
|
||||
|
||||
const (
|
||||
MultilineNone MultilineState = iota
|
||||
MultilinePrompt
|
||||
MultilineSystem
|
||||
MultilineTemplate
|
||||
)
|
||||
|
||||
func modelIsMultiModal(cmd *cobra.Command, name string) bool {
|
||||
// get model details
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't connect to ollama server")
|
||||
return false
|
||||
}
|
||||
|
||||
req := api.ShowRequest{Name: name}
|
||||
resp, err := client.Show(cmd.Context(), &req)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return slices.Contains(resp.Details.Families, "clip")
|
||||
}
|
||||
|
||||
func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
multiModal := modelIsMultiModal(cmd, opts.Model)
|
||||
|
||||
// load the model
|
||||
loadOpts := generateOptions{
|
||||
Model: opts.Model,
|
||||
Prompt: "",
|
||||
Images: []ImageData{},
|
||||
}
|
||||
if err := generate(cmd, loadOpts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
usage := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /set Set session variables")
|
||||
fmt.Fprintln(os.Stderr, " /show Show model information")
|
||||
fmt.Fprintln(os.Stderr, " /bye Exit")
|
||||
fmt.Fprintln(os.Stderr, " /?, /help Help for a command")
|
||||
fmt.Fprintln(os.Stderr, " /? shortcuts Help for keyboard shortcuts")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
usageSet := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter ... Set a parameter")
|
||||
fmt.Fprintln(os.Stderr, " /set system <string> Set system message")
|
||||
fmt.Fprintln(os.Stderr, " /set template <string> Set prompt template")
|
||||
fmt.Fprintln(os.Stderr, " /set history Enable history")
|
||||
fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
|
||||
fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap")
|
||||
fmt.Fprintln(os.Stderr, " /set nowordwrap Disable wordwrap")
|
||||
fmt.Fprintln(os.Stderr, " /set format json Enable JSON mode")
|
||||
fmt.Fprintln(os.Stderr, " /set noformat Disable formatting")
|
||||
fmt.Fprintln(os.Stderr, " /set verbose Show LLM stats")
|
||||
fmt.Fprintln(os.Stderr, " /set quiet Disable LLM stats")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
usageShortcuts := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available keyboard shortcuts:")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + a Move to the beginning of the line (Home)")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + e Move to the end of the line (End)")
|
||||
fmt.Fprintln(os.Stderr, " Alt + b Move back (left) one word")
|
||||
fmt.Fprintln(os.Stderr, " Alt + f Move forward (right) one word")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + k Delete the sentence after the cursor")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + u Delete the sentence before the cursor")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + l Clear the screen")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + c Stop the model from responding")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + d Exit ollama (/bye)")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
usageShow := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /show license Show model license")
|
||||
fmt.Fprintln(os.Stderr, " /show modelfile Show Modelfile for this model")
|
||||
fmt.Fprintln(os.Stderr, " /show parameters Show parameters for this model")
|
||||
fmt.Fprintln(os.Stderr, " /show system Show system message")
|
||||
fmt.Fprintln(os.Stderr, " /show template Show prompt template")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
// only list out the most common parameters
|
||||
usageParameters := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Parameters:")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter seed <int> Random number seed")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_predict <int> Max number of tokens to predict")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter top_k <int> Pick from top k num of tokens")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter top_p <float> Pick token based on sum of probabilities")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_ctx <int> Set the context size")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter temperature <float> Set creativity level")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty <float> How strongly to penalize repetitions")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter repeat_last_n <int> Set how far back to look for repetitions")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_gpu <int> The number of layers to send to the GPU")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter stop \"<string>\", ... Set the stop parameters")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
scanner, err := readline.New(readline.Prompt{
|
||||
Prompt: ">>> ",
|
||||
AltPrompt: "... ",
|
||||
Placeholder: "Send a message (/? for help)",
|
||||
AltPlaceholder: `Use """ to end multi-line input`,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Print(readline.StartBracketedPaste)
|
||||
defer fmt.Printf(readline.EndBracketedPaste)
|
||||
|
||||
var multiline MultilineState
|
||||
var prompt string
|
||||
|
||||
for {
|
||||
line, err := scanner.Readline()
|
||||
switch {
|
||||
case errors.Is(err, io.EOF):
|
||||
fmt.Println()
|
||||
return nil
|
||||
case errors.Is(err, readline.ErrInterrupt):
|
||||
if line == "" {
|
||||
fmt.Println("\nUse Ctrl + d or /bye to exit.")
|
||||
}
|
||||
|
||||
scanner.Prompt.UseAlt = false
|
||||
prompt = ""
|
||||
|
||||
continue
|
||||
case err != nil:
|
||||
return err
|
||||
}
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(prompt, `"""`):
|
||||
// if the prompt so far starts with """ then we're in multiline mode
|
||||
// and we need to keep reading until we find a line that ends with """
|
||||
cut, found := strings.CutSuffix(line, `"""`)
|
||||
prompt += cut
|
||||
|
||||
if !found {
|
||||
prompt += "\n"
|
||||
continue
|
||||
}
|
||||
|
||||
prompt = strings.TrimPrefix(prompt, `"""`)
|
||||
scanner.Prompt.UseAlt = false
|
||||
|
||||
switch multiline {
|
||||
case MultilineSystem:
|
||||
opts.System = prompt
|
||||
prompt = ""
|
||||
fmt.Println("Set system message.")
|
||||
case MultilineTemplate:
|
||||
opts.Template = prompt
|
||||
prompt = ""
|
||||
fmt.Println("Set prompt template.")
|
||||
}
|
||||
multiline = MultilineNone
|
||||
case strings.HasPrefix(line, `"""`) && len(prompt) == 0:
|
||||
scanner.Prompt.UseAlt = true
|
||||
multiline = MultilinePrompt
|
||||
prompt += line + "\n"
|
||||
continue
|
||||
case scanner.Pasting:
|
||||
prompt += line + "\n"
|
||||
continue
|
||||
case strings.HasPrefix(line, "/list"):
|
||||
args := strings.Fields(line)
|
||||
if err := ListHandler(cmd, args[1:]); err != nil {
|
||||
return err
|
||||
}
|
||||
case strings.HasPrefix(line, "/set"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
switch args[1] {
|
||||
case "history":
|
||||
scanner.HistoryEnable()
|
||||
case "nohistory":
|
||||
scanner.HistoryDisable()
|
||||
case "wordwrap":
|
||||
opts.WordWrap = true
|
||||
fmt.Println("Set 'wordwrap' mode.")
|
||||
case "nowordwrap":
|
||||
opts.WordWrap = false
|
||||
fmt.Println("Set 'nowordwrap' mode.")
|
||||
case "verbose":
|
||||
cmd.Flags().Set("verbose", "true")
|
||||
fmt.Println("Set 'verbose' mode.")
|
||||
case "quiet":
|
||||
cmd.Flags().Set("verbose", "false")
|
||||
fmt.Println("Set 'quiet' mode.")
|
||||
case "format":
|
||||
if len(args) < 3 || args[2] != "json" {
|
||||
fmt.Println("Invalid or missing format. For 'json' mode use '/set format json'")
|
||||
} else {
|
||||
opts.Format = args[2]
|
||||
fmt.Printf("Set format to '%s' mode.\n", args[2])
|
||||
}
|
||||
case "noformat":
|
||||
opts.Format = ""
|
||||
fmt.Println("Disabled format.")
|
||||
case "parameter":
|
||||
if len(args) < 4 {
|
||||
usageParameters()
|
||||
continue
|
||||
}
|
||||
var params []string
|
||||
for _, p := range args[3:] {
|
||||
params = append(params, p)
|
||||
}
|
||||
fp, err := api.FormatParams(map[string][]string{args[2]: params})
|
||||
if err != nil {
|
||||
fmt.Printf("Couldn't set parameter: %q\n\n", err)
|
||||
continue
|
||||
}
|
||||
fmt.Printf("Set parameter '%s' to '%s'\n\n", args[2], strings.Join(params, ", "))
|
||||
opts.Options[args[2]] = fp[args[2]]
|
||||
case "system", "template":
|
||||
if len(args) < 3 {
|
||||
usageSet()
|
||||
continue
|
||||
}
|
||||
line := strings.Join(args[2:], " ")
|
||||
line = strings.TrimPrefix(line, `"""`)
|
||||
if strings.HasPrefix(args[2], `"""`) {
|
||||
cut, found := strings.CutSuffix(line, `"""`)
|
||||
prompt += cut
|
||||
if found {
|
||||
if args[1] == "system" {
|
||||
opts.System = prompt
|
||||
fmt.Println("Set system message.")
|
||||
} else {
|
||||
opts.Template = prompt
|
||||
fmt.Println("Set prompt template.")
|
||||
}
|
||||
prompt = ""
|
||||
} else {
|
||||
prompt = `"""` + prompt + "\n"
|
||||
if args[1] == "system" {
|
||||
multiline = MultilineSystem
|
||||
} else {
|
||||
multiline = MultilineTemplate
|
||||
}
|
||||
scanner.Prompt.UseAlt = true
|
||||
}
|
||||
} else {
|
||||
opts.System = line
|
||||
fmt.Println("Set system message.")
|
||||
}
|
||||
default:
|
||||
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
|
||||
}
|
||||
} else {
|
||||
usageSet()
|
||||
}
|
||||
case strings.HasPrefix(line, "/show"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't connect to ollama server")
|
||||
return err
|
||||
}
|
||||
resp, err := client.Show(cmd.Context(), &api.ShowRequest{Name: opts.Model})
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't get model")
|
||||
return err
|
||||
}
|
||||
|
||||
switch args[1] {
|
||||
case "license":
|
||||
if resp.License == "" {
|
||||
fmt.Print("No license was specified for this model.\n\n")
|
||||
} else {
|
||||
fmt.Println(resp.License)
|
||||
}
|
||||
case "modelfile":
|
||||
fmt.Println(resp.Modelfile)
|
||||
case "parameters":
|
||||
if resp.Parameters == "" {
|
||||
fmt.Print("No parameters were specified for this model.\n\n")
|
||||
} else {
|
||||
if len(opts.Options) > 0 {
|
||||
fmt.Println("User defined parameters:")
|
||||
for k, v := range opts.Options {
|
||||
fmt.Printf("%-*s %v\n", 30, k, v)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
fmt.Println("Model defined parameters:")
|
||||
fmt.Println(resp.Parameters)
|
||||
}
|
||||
case "system":
|
||||
switch {
|
||||
case opts.System != "":
|
||||
fmt.Println(opts.System + "\n")
|
||||
case resp.System != "":
|
||||
fmt.Println(resp.System + "\n")
|
||||
default:
|
||||
fmt.Print("No system message was specified for this model.\n\n")
|
||||
}
|
||||
case "template":
|
||||
switch {
|
||||
case opts.Template != "":
|
||||
fmt.Println(opts.Template + "\n")
|
||||
case resp.Template != "":
|
||||
fmt.Println(resp.Template)
|
||||
default:
|
||||
fmt.Print("No prompt template was specified for this model.\n\n")
|
||||
}
|
||||
default:
|
||||
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
|
||||
}
|
||||
} else {
|
||||
usageShow()
|
||||
}
|
||||
case strings.HasPrefix(line, "/help"), strings.HasPrefix(line, "/?"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
switch args[1] {
|
||||
case "set", "/set":
|
||||
usageSet()
|
||||
case "show", "/show":
|
||||
usageShow()
|
||||
case "shortcut", "shortcuts":
|
||||
usageShortcuts()
|
||||
}
|
||||
} else {
|
||||
usage()
|
||||
}
|
||||
case line == "/exit", line == "/bye":
|
||||
return nil
|
||||
case strings.HasPrefix(line, "/"):
|
||||
args := strings.Fields(line)
|
||||
isFile := false
|
||||
|
||||
if multiModal {
|
||||
for _, f := range extractFileNames(line) {
|
||||
if strings.HasPrefix(f, args[0]) {
|
||||
isFile = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if isFile {
|
||||
prompt += line
|
||||
} else {
|
||||
fmt.Printf("Unknown command '%s'. Type /? for help\n", args[0])
|
||||
continue
|
||||
}
|
||||
default:
|
||||
prompt += line
|
||||
}
|
||||
|
||||
if len(prompt) > 0 && multiline == MultilineNone {
|
||||
opts.Prompt = prompt
|
||||
if multiModal {
|
||||
newPrompt, images, err := extractFileData(prompt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts.Prompt = newPrompt
|
||||
|
||||
// reset the context if we find another image
|
||||
if len(images) > 0 {
|
||||
opts.Images = images
|
||||
ctx := cmd.Context()
|
||||
ctx = context.WithValue(ctx, generateContextKey("context"), []int{})
|
||||
cmd.SetContext(ctx)
|
||||
}
|
||||
if len(opts.Images) == 0 {
|
||||
fmt.Println("This model requires you to add a jpeg, png, or svg image.")
|
||||
fmt.Println()
|
||||
prompt = ""
|
||||
continue
|
||||
}
|
||||
}
|
||||
if err := generate(cmd, opts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
prompt = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeFilePath(fp string) string {
|
||||
// Define a map of escaped characters and their replacements
|
||||
replacements := map[string]string{
|
||||
"\\ ": " ", // Escaped space
|
||||
"\\(": "(", // Escaped left parenthesis
|
||||
"\\)": ")", // Escaped right parenthesis
|
||||
"\\[": "[", // Escaped left square bracket
|
||||
"\\]": "]", // Escaped right square bracket
|
||||
"\\{": "{", // Escaped left curly brace
|
||||
"\\}": "}", // Escaped right curly brace
|
||||
"\\$": "$", // Escaped dollar sign
|
||||
"\\&": "&", // Escaped ampersand
|
||||
"\\;": ";", // Escaped semicolon
|
||||
"\\'": "'", // Escaped single quote
|
||||
"\\\\": "\\", // Escaped backslash
|
||||
"\\*": "*", // Escaped asterisk
|
||||
"\\?": "?", // Escaped question mark
|
||||
}
|
||||
|
||||
for escaped, actual := range replacements {
|
||||
fp = strings.ReplaceAll(fp, escaped, actual)
|
||||
}
|
||||
return fp
|
||||
}
|
||||
|
||||
func extractFileNames(input string) []string {
|
||||
// Regex to match file paths starting with / or ./ and include escaped spaces (\ or %20)
|
||||
// and followed by more characters and a file extension
|
||||
regexPattern := `(?:\./|/)[\S\\ ]+?\.(?i:jpg|jpeg|png|svg)\b`
|
||||
re := regexp.MustCompile(regexPattern)
|
||||
|
||||
return re.FindAllString(input, -1)
|
||||
}
|
||||
|
||||
func extractFileData(input string) (string, []ImageData, error) {
|
||||
filePaths := extractFileNames(input)
|
||||
var imgs []ImageData
|
||||
|
||||
for _, fp := range filePaths {
|
||||
nfp := normalizeFilePath(fp)
|
||||
data, err := getImageData(nfp)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("Couldn't process image: %q\n", err)
|
||||
return "", imgs, err
|
||||
}
|
||||
fmt.Printf("Added image '%s'\n", nfp)
|
||||
input = strings.ReplaceAll(input, fp, "")
|
||||
imgs = append(imgs, data)
|
||||
}
|
||||
return input, imgs, nil
|
||||
}
|
||||
|
||||
func RunServer(cmd *cobra.Command, _ []string) error {
|
||||
host, port, err := net.SplitHostPort(os.Getenv("OLLAMA_HOST"))
|
||||
if err != nil {
|
||||
@@ -1095,50 +620,6 @@ func RunServer(cmd *cobra.Command, _ []string) error {
|
||||
return server.Serve(ln)
|
||||
}
|
||||
|
||||
func getImageData(filePath string) ([]byte, error) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
buf := make([]byte, 512)
|
||||
_, err = file.Read(buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
contentType := http.DetectContentType(buf)
|
||||
allowedTypes := []string{"image/jpeg", "image/jpg", "image/svg+xml", "image/png"}
|
||||
if !slices.Contains(allowedTypes, contentType) {
|
||||
return nil, fmt.Errorf("invalid image type: %s", contentType)
|
||||
}
|
||||
|
||||
info, err := file.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check if the file size exceeds 100MB
|
||||
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
||||
if info.Size() > maxSize {
|
||||
return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
|
||||
}
|
||||
|
||||
buf = make([]byte, info.Size())
|
||||
_, err = file.Seek(0, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
_, err = io.ReadFull(file, buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
func initializeKeypair() error {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
|
||||
545
cmd/interactive.go
Normal file
545
cmd/interactive.go
Normal file
@@ -0,0 +1,545 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/readline"
|
||||
)
|
||||
|
||||
type MultilineState int
|
||||
|
||||
const (
|
||||
MultilineNone MultilineState = iota
|
||||
MultilinePrompt
|
||||
MultilineSystem
|
||||
MultilineTemplate
|
||||
)
|
||||
|
||||
func modelIsMultiModal(cmd *cobra.Command, name string) bool {
|
||||
// get model details
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't connect to ollama server")
|
||||
return false
|
||||
}
|
||||
|
||||
req := api.ShowRequest{Name: name}
|
||||
resp, err := client.Show(cmd.Context(), &req)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return slices.Contains(resp.Details.Families, "clip")
|
||||
}
|
||||
|
||||
func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
multiModal := modelIsMultiModal(cmd, opts.Model)
|
||||
|
||||
// load the model
|
||||
loadOpts := generateOptions{
|
||||
Model: opts.Model,
|
||||
Prompt: "",
|
||||
Images: []ImageData{},
|
||||
}
|
||||
if err := generate(cmd, loadOpts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
usage := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /set Set session variables")
|
||||
fmt.Fprintln(os.Stderr, " /show Show model information")
|
||||
fmt.Fprintln(os.Stderr, " /bye Exit")
|
||||
fmt.Fprintln(os.Stderr, " /?, /help Help for a command")
|
||||
fmt.Fprintln(os.Stderr, " /? shortcuts Help for keyboard shortcuts")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
usageSet := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter ... Set a parameter")
|
||||
fmt.Fprintln(os.Stderr, " /set system <string> Set system message")
|
||||
fmt.Fprintln(os.Stderr, " /set template <string> Set prompt template")
|
||||
fmt.Fprintln(os.Stderr, " /set history Enable history")
|
||||
fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
|
||||
fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap")
|
||||
fmt.Fprintln(os.Stderr, " /set nowordwrap Disable wordwrap")
|
||||
fmt.Fprintln(os.Stderr, " /set format json Enable JSON mode")
|
||||
fmt.Fprintln(os.Stderr, " /set noformat Disable formatting")
|
||||
fmt.Fprintln(os.Stderr, " /set verbose Show LLM stats")
|
||||
fmt.Fprintln(os.Stderr, " /set quiet Disable LLM stats")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
usageShortcuts := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available keyboard shortcuts:")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + a Move to the beginning of the line (Home)")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + e Move to the end of the line (End)")
|
||||
fmt.Fprintln(os.Stderr, " Alt + b Move back (left) one word")
|
||||
fmt.Fprintln(os.Stderr, " Alt + f Move forward (right) one word")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + k Delete the sentence after the cursor")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + u Delete the sentence before the cursor")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + l Clear the screen")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + c Stop the model from responding")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + d Exit ollama (/bye)")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
usageShow := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /show info Show details for this model")
|
||||
fmt.Fprintln(os.Stderr, " /show license Show model license")
|
||||
fmt.Fprintln(os.Stderr, " /show modelfile Show Modelfile for this model")
|
||||
fmt.Fprintln(os.Stderr, " /show parameters Show parameters for this model")
|
||||
fmt.Fprintln(os.Stderr, " /show system Show system message")
|
||||
fmt.Fprintln(os.Stderr, " /show template Show prompt template")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
// only list out the most common parameters
|
||||
usageParameters := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Parameters:")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter seed <int> Random number seed")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_predict <int> Max number of tokens to predict")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter top_k <int> Pick from top k num of tokens")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter top_p <float> Pick token based on sum of probabilities")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_ctx <int> Set the context size")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter temperature <float> Set creativity level")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty <float> How strongly to penalize repetitions")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter repeat_last_n <int> Set how far back to look for repetitions")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_gpu <int> The number of layers to send to the GPU")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter stop \"<string>\", ... Set the stop parameters")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
scanner, err := readline.New(readline.Prompt{
|
||||
Prompt: ">>> ",
|
||||
AltPrompt: "... ",
|
||||
Placeholder: "Send a message (/? for help)",
|
||||
AltPlaceholder: `Use """ to end multi-line input`,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Print(readline.StartBracketedPaste)
|
||||
defer fmt.Printf(readline.EndBracketedPaste)
|
||||
|
||||
var sb strings.Builder
|
||||
var multiline MultilineState
|
||||
|
||||
for {
|
||||
line, err := scanner.Readline()
|
||||
switch {
|
||||
case errors.Is(err, io.EOF):
|
||||
fmt.Println()
|
||||
return nil
|
||||
case errors.Is(err, readline.ErrInterrupt):
|
||||
if line == "" {
|
||||
fmt.Println("\nUse Ctrl + d or /bye to exit.")
|
||||
}
|
||||
|
||||
scanner.Prompt.UseAlt = false
|
||||
sb.Reset()
|
||||
|
||||
continue
|
||||
case err != nil:
|
||||
return err
|
||||
}
|
||||
|
||||
switch {
|
||||
case multiline != MultilineNone:
|
||||
// check if there's a multiline terminating string
|
||||
before, ok := strings.CutSuffix(line, `"""`)
|
||||
sb.WriteString(before)
|
||||
if !ok {
|
||||
fmt.Fprintln(&sb)
|
||||
continue
|
||||
}
|
||||
|
||||
switch multiline {
|
||||
case MultilineSystem:
|
||||
opts.System = sb.String()
|
||||
fmt.Println("Set system message.")
|
||||
sb.Reset()
|
||||
case MultilineTemplate:
|
||||
opts.Template = sb.String()
|
||||
fmt.Println("Set prompt template.")
|
||||
sb.Reset()
|
||||
}
|
||||
|
||||
multiline = MultilineNone
|
||||
scanner.Prompt.UseAlt = false
|
||||
case strings.HasPrefix(line, `"""`):
|
||||
line := strings.TrimPrefix(line, `"""`)
|
||||
line, ok := strings.CutSuffix(line, `"""`)
|
||||
sb.WriteString(line)
|
||||
if !ok {
|
||||
// no multiline terminating string; need more input
|
||||
fmt.Fprintln(&sb)
|
||||
multiline = MultilinePrompt
|
||||
scanner.Prompt.UseAlt = true
|
||||
break
|
||||
}
|
||||
case scanner.Pasting:
|
||||
fmt.Fprintln(&sb, line)
|
||||
continue
|
||||
case strings.HasPrefix(line, "/list"):
|
||||
args := strings.Fields(line)
|
||||
if err := ListHandler(cmd, args[1:]); err != nil {
|
||||
return err
|
||||
}
|
||||
case strings.HasPrefix(line, "/set"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
switch args[1] {
|
||||
case "history":
|
||||
scanner.HistoryEnable()
|
||||
case "nohistory":
|
||||
scanner.HistoryDisable()
|
||||
case "wordwrap":
|
||||
opts.WordWrap = true
|
||||
fmt.Println("Set 'wordwrap' mode.")
|
||||
case "nowordwrap":
|
||||
opts.WordWrap = false
|
||||
fmt.Println("Set 'nowordwrap' mode.")
|
||||
case "verbose":
|
||||
cmd.Flags().Set("verbose", "true")
|
||||
fmt.Println("Set 'verbose' mode.")
|
||||
case "quiet":
|
||||
cmd.Flags().Set("verbose", "false")
|
||||
fmt.Println("Set 'quiet' mode.")
|
||||
case "format":
|
||||
if len(args) < 3 || args[2] != "json" {
|
||||
fmt.Println("Invalid or missing format. For 'json' mode use '/set format json'")
|
||||
} else {
|
||||
opts.Format = args[2]
|
||||
fmt.Printf("Set format to '%s' mode.\n", args[2])
|
||||
}
|
||||
case "noformat":
|
||||
opts.Format = ""
|
||||
fmt.Println("Disabled format.")
|
||||
case "parameter":
|
||||
if len(args) < 4 {
|
||||
usageParameters()
|
||||
continue
|
||||
}
|
||||
var params []string
|
||||
for _, p := range args[3:] {
|
||||
params = append(params, p)
|
||||
}
|
||||
fp, err := api.FormatParams(map[string][]string{args[2]: params})
|
||||
if err != nil {
|
||||
fmt.Printf("Couldn't set parameter: %q\n\n", err)
|
||||
continue
|
||||
}
|
||||
fmt.Printf("Set parameter '%s' to '%s'\n\n", args[2], strings.Join(params, ", "))
|
||||
opts.Options[args[2]] = fp[args[2]]
|
||||
case "system", "template":
|
||||
if len(args) < 3 {
|
||||
usageSet()
|
||||
continue
|
||||
}
|
||||
|
||||
if args[1] == "system" {
|
||||
multiline = MultilineSystem
|
||||
} else if args[1] == "template" {
|
||||
multiline = MultilineTemplate
|
||||
}
|
||||
|
||||
line := strings.Join(args[2:], " ")
|
||||
line, ok := strings.CutPrefix(line, `"""`)
|
||||
if !ok {
|
||||
multiline = MultilineNone
|
||||
} else {
|
||||
// only cut suffix if the line is multiline
|
||||
line, ok = strings.CutSuffix(line, `"""`)
|
||||
if ok {
|
||||
multiline = MultilineNone
|
||||
}
|
||||
}
|
||||
|
||||
sb.WriteString(line)
|
||||
if multiline != MultilineNone {
|
||||
scanner.Prompt.UseAlt = true
|
||||
continue
|
||||
}
|
||||
|
||||
if args[1] == "system" {
|
||||
opts.System = sb.String()
|
||||
fmt.Println("Set system message.")
|
||||
} else if args[1] == "template" {
|
||||
opts.Template = sb.String()
|
||||
fmt.Println("Set prompt template.")
|
||||
}
|
||||
|
||||
sb.Reset()
|
||||
continue
|
||||
default:
|
||||
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
|
||||
}
|
||||
} else {
|
||||
usageSet()
|
||||
}
|
||||
case strings.HasPrefix(line, "/show"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't connect to ollama server")
|
||||
return err
|
||||
}
|
||||
req := &api.ShowRequest{
|
||||
Name: opts.Model,
|
||||
System: opts.System,
|
||||
Template: opts.Template,
|
||||
Options: opts.Options,
|
||||
}
|
||||
resp, err := client.Show(cmd.Context(), req)
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't get model")
|
||||
return err
|
||||
}
|
||||
|
||||
switch args[1] {
|
||||
case "info":
|
||||
fmt.Println("Model details:")
|
||||
if len(resp.Details.Families) > 0 {
|
||||
fmt.Printf("Family %s\n", strings.Join(resp.Details.Families, ", "))
|
||||
} else if resp.Details.Family != "" {
|
||||
fmt.Printf("Family %s\n", resp.Details.Family)
|
||||
}
|
||||
fmt.Printf("Parameter Size %s\n", resp.Details.ParameterSize)
|
||||
fmt.Printf("Quantization Level %s\n", resp.Details.QuantizationLevel)
|
||||
fmt.Println("")
|
||||
case "license":
|
||||
if resp.License == "" {
|
||||
fmt.Print("No license was specified for this model.\n\n")
|
||||
} else {
|
||||
fmt.Println(resp.License)
|
||||
}
|
||||
case "modelfile":
|
||||
fmt.Println(resp.Modelfile)
|
||||
case "parameters":
|
||||
if resp.Parameters == "" {
|
||||
fmt.Print("No parameters were specified for this model.\n\n")
|
||||
} else {
|
||||
if len(opts.Options) > 0 {
|
||||
fmt.Println("User defined parameters:")
|
||||
for k, v := range opts.Options {
|
||||
fmt.Printf("%-*s %v\n", 30, k, v)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
fmt.Println("Model defined parameters:")
|
||||
fmt.Println(resp.Parameters)
|
||||
}
|
||||
case "system":
|
||||
switch {
|
||||
case opts.System != "":
|
||||
fmt.Println(opts.System + "\n")
|
||||
case resp.System != "":
|
||||
fmt.Println(resp.System + "\n")
|
||||
default:
|
||||
fmt.Print("No system message was specified for this model.\n\n")
|
||||
}
|
||||
case "template":
|
||||
switch {
|
||||
case opts.Template != "":
|
||||
fmt.Println(opts.Template + "\n")
|
||||
case resp.Template != "":
|
||||
fmt.Println(resp.Template)
|
||||
default:
|
||||
fmt.Print("No prompt template was specified for this model.\n\n")
|
||||
}
|
||||
default:
|
||||
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
|
||||
}
|
||||
} else {
|
||||
usageShow()
|
||||
}
|
||||
case strings.HasPrefix(line, "/help"), strings.HasPrefix(line, "/?"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
switch args[1] {
|
||||
case "set", "/set":
|
||||
usageSet()
|
||||
case "show", "/show":
|
||||
usageShow()
|
||||
case "shortcut", "shortcuts":
|
||||
usageShortcuts()
|
||||
}
|
||||
} else {
|
||||
usage()
|
||||
}
|
||||
case line == "/exit", line == "/bye":
|
||||
return nil
|
||||
case strings.HasPrefix(line, "/"):
|
||||
args := strings.Fields(line)
|
||||
isFile := false
|
||||
|
||||
if multiModal {
|
||||
for _, f := range extractFileNames(line) {
|
||||
if strings.HasPrefix(f, args[0]) {
|
||||
isFile = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !isFile {
|
||||
fmt.Printf("Unknown command '%s'. Type /? for help\n", args[0])
|
||||
continue
|
||||
}
|
||||
|
||||
sb.WriteString(line)
|
||||
default:
|
||||
sb.WriteString(line)
|
||||
}
|
||||
|
||||
if sb.Len() > 0 && multiline == MultilineNone {
|
||||
opts.Prompt = sb.String()
|
||||
if multiModal {
|
||||
newPrompt, images, err := extractFileData(sb.String())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts.Prompt = newPrompt
|
||||
|
||||
// reset the context if we find another image
|
||||
if len(images) > 0 {
|
||||
opts.Images = images
|
||||
ctx := cmd.Context()
|
||||
ctx = context.WithValue(ctx, generateContextKey("context"), []int{})
|
||||
cmd.SetContext(ctx)
|
||||
}
|
||||
if len(opts.Images) == 0 {
|
||||
fmt.Println("This model requires you to add a jpeg, png, or svg image.")
|
||||
fmt.Println()
|
||||
sb.Reset()
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if err := generate(cmd, opts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sb.Reset()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeFilePath(fp string) string {
|
||||
// Define a map of escaped characters and their replacements
|
||||
replacements := map[string]string{
|
||||
"\\ ": " ", // Escaped space
|
||||
"\\(": "(", // Escaped left parenthesis
|
||||
"\\)": ")", // Escaped right parenthesis
|
||||
"\\[": "[", // Escaped left square bracket
|
||||
"\\]": "]", // Escaped right square bracket
|
||||
"\\{": "{", // Escaped left curly brace
|
||||
"\\}": "}", // Escaped right curly brace
|
||||
"\\$": "$", // Escaped dollar sign
|
||||
"\\&": "&", // Escaped ampersand
|
||||
"\\;": ";", // Escaped semicolon
|
||||
"\\'": "'", // Escaped single quote
|
||||
"\\\\": "\\", // Escaped backslash
|
||||
"\\*": "*", // Escaped asterisk
|
||||
"\\?": "?", // Escaped question mark
|
||||
}
|
||||
|
||||
for escaped, actual := range replacements {
|
||||
fp = strings.ReplaceAll(fp, escaped, actual)
|
||||
}
|
||||
return fp
|
||||
}
|
||||
|
||||
func extractFileNames(input string) []string {
|
||||
// Regex to match file paths starting with optional drive letter, / ./ \ or .\ and include escaped or unescaped spaces (\ or %20)
|
||||
// and followed by more characters and a file extension
|
||||
// This will capture non filename strings, but we'll check for file existence to remove mismatches
|
||||
regexPattern := `(?:[a-zA-Z]:)?(?:\./|/|\\)[\S\\ ]+?\.(?i:jpg|jpeg|png|svg)\b`
|
||||
re := regexp.MustCompile(regexPattern)
|
||||
|
||||
return re.FindAllString(input, -1)
|
||||
}
|
||||
|
||||
func extractFileData(input string) (string, []ImageData, error) {
|
||||
filePaths := extractFileNames(input)
|
||||
var imgs []ImageData
|
||||
|
||||
for _, fp := range filePaths {
|
||||
nfp := normalizeFilePath(fp)
|
||||
data, err := getImageData(nfp)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("Couldn't process image: %q\n", err)
|
||||
return "", imgs, err
|
||||
}
|
||||
fmt.Printf("Added image '%s'\n", nfp)
|
||||
input = strings.ReplaceAll(input, fp, "")
|
||||
imgs = append(imgs, data)
|
||||
}
|
||||
return input, imgs, nil
|
||||
}
|
||||
|
||||
func getImageData(filePath string) ([]byte, error) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
buf := make([]byte, 512)
|
||||
_, err = file.Read(buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
contentType := http.DetectContentType(buf)
|
||||
allowedTypes := []string{"image/jpeg", "image/jpg", "image/svg+xml", "image/png"}
|
||||
if !slices.Contains(allowedTypes, contentType) {
|
||||
return nil, fmt.Errorf("invalid image type: %s", contentType)
|
||||
}
|
||||
|
||||
info, err := file.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check if the file size exceeds 100MB
|
||||
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
||||
if info.Size() > maxSize {
|
||||
return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
|
||||
}
|
||||
|
||||
buf = make([]byte, info.Size())
|
||||
_, err = file.Seek(0, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
_, err = io.ReadFull(file, buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return buf, nil
|
||||
}
|
||||
51
cmd/interactive_test.go
Normal file
51
cmd/interactive_test.go
Normal file
@@ -0,0 +1,51 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestExtractFilenames(t *testing.T) {
|
||||
// Unix style paths
|
||||
input := ` some preamble
|
||||
./relative\ path/one.png inbetween1 ./not a valid two.jpg inbetween2
|
||||
/unescaped space /three.jpeg inbetween3 /valid\ path/dir/four.png "./quoted with spaces/five.svg`
|
||||
res := extractFileNames(input)
|
||||
assert.Len(t, res, 5)
|
||||
assert.Contains(t, res[0], "one.png")
|
||||
assert.Contains(t, res[1], "two.jpg")
|
||||
assert.Contains(t, res[2], "three.jpeg")
|
||||
assert.Contains(t, res[3], "four.png")
|
||||
assert.Contains(t, res[4], "five.svg")
|
||||
assert.NotContains(t, res[4], '"')
|
||||
assert.NotContains(t, res, "inbtween")
|
||||
|
||||
// Windows style paths
|
||||
input = ` some preamble
|
||||
c:/users/jdoe/one.png inbetween1 c:/program files/someplace/two.jpg inbetween2
|
||||
/absolute/nospace/three.jpeg inbetween3 /absolute/with space/four.png inbetween4
|
||||
./relative\ path/five.svg inbetween5 "./relative with/spaces/six.png inbetween6
|
||||
d:\path with\spaces\seven.svg inbetween7 c:\users\jdoe\eight.png inbetween8
|
||||
d:\program files\someplace\nine.png inbetween9 "E:\program files\someplace\ten.svg some ending
|
||||
`
|
||||
res = extractFileNames(input)
|
||||
assert.Len(t, res, 10)
|
||||
assert.NotContains(t, res, "inbtween")
|
||||
assert.Contains(t, res[0], "one.png")
|
||||
assert.Contains(t, res[0], "c:")
|
||||
assert.Contains(t, res[1], "two.jpg")
|
||||
assert.Contains(t, res[1], "c:")
|
||||
assert.Contains(t, res[2], "three.jpeg")
|
||||
assert.Contains(t, res[3], "four.png")
|
||||
assert.Contains(t, res[4], "five.svg")
|
||||
assert.Contains(t, res[5], "six.png")
|
||||
assert.Contains(t, res[6], "seven.svg")
|
||||
assert.Contains(t, res[6], "d:")
|
||||
assert.Contains(t, res[7], "eight.png")
|
||||
assert.Contains(t, res[7], "c:")
|
||||
assert.Contains(t, res[8], "nine.png")
|
||||
assert.Contains(t, res[8], "d:")
|
||||
assert.Contains(t, res[9], "ten.svg")
|
||||
assert.Contains(t, res[9], "E:")
|
||||
}
|
||||
@@ -12,7 +12,7 @@ Import models using source model weights found on Hugging Face and similar sites
|
||||
|
||||
Installing on Linux in most cases is easy using the script on Ollama.ai. To get more detail about the install, including CUDA drivers, see the **[Linux Documentation](./linux.md)**.
|
||||
|
||||
Many of our users like the flexibility of using our official Docker Image. Learn more about using Docker with Ollama using the **[Docker Documentation](./docker.md)**.
|
||||
Many of our users like the flexibility of using our official Docker Image. Learn more about using Docker with Ollama using the **[Docker Documentation](https://hub.docker.com/r/ollama/ollama)**.
|
||||
|
||||
It is easy to install on Linux and Mac, but many users will choose to build Ollama on their own. To do this, refer to the **[Development Documentation](./development.md)**.
|
||||
|
||||
|
||||
@@ -409,7 +409,7 @@ A stream of JSON objects is returned:
|
||||
"model": "llama2",
|
||||
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||
"message": {
|
||||
"role": "assisant",
|
||||
"role": "assistant",
|
||||
"content": "The",
|
||||
"images": null
|
||||
},
|
||||
@@ -505,7 +505,7 @@ A stream of JSON objects is returned:
|
||||
"model": "llama2",
|
||||
"created_at": "2023-08-04T08:52:19.385406455-07:00",
|
||||
"message": {
|
||||
"role": "assisant",
|
||||
"role": "assistant",
|
||||
"content": "The"
|
||||
},
|
||||
"done": false
|
||||
|
||||
@@ -38,37 +38,71 @@ Now you can run `ollama`:
|
||||
./ollama
|
||||
```
|
||||
|
||||
## Building on Linux with GPU support
|
||||
### Linux
|
||||
|
||||
#### Linux CUDA (NVIDIA)
|
||||
|
||||
### Linux/Windows CUDA (NVIDIA)
|
||||
*Your operating system distribution may already have packages for NVIDIA CUDA. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!*
|
||||
|
||||
Install `cmake` and `golang` as well as [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads) development and runtime packages.
|
||||
Then generate dependencies:
|
||||
|
||||
```
|
||||
go generate ./...
|
||||
```
|
||||
|
||||
Then build the binary:
|
||||
|
||||
```
|
||||
go build .
|
||||
```
|
||||
|
||||
### Linux ROCm (AMD)
|
||||
#### Linux ROCm (AMD)
|
||||
|
||||
*Your operating system distribution may already have packages for AMD ROCm and CLBlast. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!*
|
||||
|
||||
Install [CLBlast](https://github.com/CNugteren/CLBlast/blob/master/doc/installation.md) and [ROCm](https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html) developement packages first, as well as `cmake` and `golang`.
|
||||
Adjust the paths below (correct for Arch) as appropriate for your distributions install locations and generate dependencies:
|
||||
|
||||
```
|
||||
CLBlast_DIR=/usr/lib/cmake/CLBlast ROCM_PATH=/opt/rocm go generate ./...
|
||||
```
|
||||
|
||||
Then build the binary:
|
||||
|
||||
```
|
||||
go build .
|
||||
```
|
||||
|
||||
ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
|
||||
|
||||
## Containerized Build
|
||||
#### Containerized Linux Build
|
||||
|
||||
If you have Docker available, you can build linux binaries with `./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included.
|
||||
If you have Docker available, you can build linux binaries with `./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included. The resulting binary is placed in `./dist`
|
||||
|
||||
|
||||
### Windows
|
||||
|
||||
Note: The windows build for Ollama is still under development.
|
||||
|
||||
Install required tools:
|
||||
|
||||
- MSVC toolchain - C/C++ and cmake as minimal requirements
|
||||
- go version 1.20 or higher
|
||||
- MinGW (pick one variant) with GCC.
|
||||
- <https://www.mingw-w64.org/>
|
||||
- <https://www.msys2.org/>
|
||||
|
||||
```powershell
|
||||
$env:CGO_ENABLED="1"
|
||||
|
||||
go generate ./...
|
||||
|
||||
go build .
|
||||
```
|
||||
|
||||
#### Windows CUDA (NVIDIA)
|
||||
|
||||
In addition to the common Windows development tools described above, install:
|
||||
|
||||
- [NVIDIA CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html)
|
||||
|
||||
@@ -156,11 +156,12 @@ PARAMETER <parameter> <parametervalue>
|
||||
|
||||
#### Template Variables
|
||||
|
||||
| Variable | Description |
|
||||
| --------------- | ------------------------------------------------------------------------------------------------------------- |
|
||||
| `{{ .System }}` | The system message used to specify custom behavior, this must also be set in the Modelfile as an instruction. |
|
||||
| `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input. |
|
||||
| `{{ .First }}` | A boolean value used to render specific template information for the first generation of a session. |
|
||||
| Variable | Description |
|
||||
| ----------------- | ------------------------------------------------------------------------------------------------------------- |
|
||||
| `{{ .System }}` | The system message used to specify custom behavior, this must also be set in the Modelfile as an instruction. |
|
||||
| `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input. |
|
||||
| `{{ .Response }}` | The response from the LLM, if not specified response is appended to the end of the template. |
|
||||
| `{{ .First }}` | A boolean value used to render specific template information for the first generation of a session. |
|
||||
|
||||
```modelfile
|
||||
TEMPLATE """
|
||||
|
||||
52
gpu/gpu.go
52
gpu/gpu.go
@@ -16,8 +16,6 @@ import (
|
||||
"runtime"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
)
|
||||
|
||||
type handles struct {
|
||||
@@ -28,6 +26,9 @@ type handles struct {
|
||||
var gpuMutex sync.Mutex
|
||||
var gpuHandles *handles = nil
|
||||
|
||||
// With our current CUDA compile flags, 5.2 and older will not work properly
|
||||
const CudaComputeMajorMin = 6
|
||||
|
||||
// Note: gpuMutex must already be held
|
||||
func initGPUHandles() {
|
||||
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
||||
@@ -73,7 +74,18 @@ func GetGPUInfo() GpuInfo {
|
||||
log.Printf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err))
|
||||
C.free(unsafe.Pointer(memInfo.err))
|
||||
} else {
|
||||
resp.Library = "cuda"
|
||||
// Verify minimum compute capability
|
||||
var cc C.cuda_compute_capability_t
|
||||
C.cuda_compute_capability(*gpuHandles.cuda, &cc)
|
||||
if cc.err != nil {
|
||||
log.Printf("error looking up CUDA GPU compute capability: %s", C.GoString(cc.err))
|
||||
C.free(unsafe.Pointer(cc.err))
|
||||
} else if cc.major >= CudaComputeMajorMin {
|
||||
log.Printf("CUDA Compute Capability detected: %d.%d", cc.major, cc.minor)
|
||||
resp.Library = "cuda"
|
||||
} else {
|
||||
log.Printf("CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor)
|
||||
}
|
||||
}
|
||||
} else if gpuHandles.rocm != nil {
|
||||
C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
|
||||
@@ -98,6 +110,8 @@ func GetGPUInfo() GpuInfo {
|
||||
C.free(unsafe.Pointer(memInfo.err))
|
||||
return resp
|
||||
}
|
||||
|
||||
resp.DeviceCount = uint32(memInfo.count)
|
||||
resp.FreeMemory = uint64(memInfo.free)
|
||||
resp.TotalMemory = uint64(memInfo.total)
|
||||
return resp
|
||||
@@ -119,31 +133,13 @@ func getCPUMem() (memInfo, error) {
|
||||
func CheckVRAM() (int64, error) {
|
||||
gpuInfo := GetGPUInfo()
|
||||
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
|
||||
return int64(gpuInfo.FreeMemory), nil
|
||||
// leave 10% or 384Mi of VRAM free for unaccounted for overhead
|
||||
overhead := gpuInfo.FreeMemory * uint64(gpuInfo.DeviceCount) / 10
|
||||
if overhead < 384*1024*1024 {
|
||||
overhead = 384 * 1024 * 1024
|
||||
}
|
||||
return int64(gpuInfo.FreeMemory - overhead), nil
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
|
||||
}
|
||||
|
||||
func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
|
||||
if opts.NumGPU != -1 {
|
||||
return opts.NumGPU
|
||||
}
|
||||
info := GetGPUInfo()
|
||||
if info.Library == "cpu" || info.Library == "default" {
|
||||
return 0
|
||||
}
|
||||
|
||||
/*
|
||||
Calculate bytes per layer, this will roughly be the size of the model file divided by the number of layers.
|
||||
We can store the model weights and the kv cache in vram,
|
||||
to enable kv chache vram storage add two additional layers to the number of layers retrieved from the model file.
|
||||
*/
|
||||
bytesPerLayer := uint64(fileSizeBytes / numLayer)
|
||||
|
||||
// 75% of the absolute max number of layers we can fit in available VRAM, off-loading too many layers to the GPU can cause OOM errors
|
||||
layers := int(info.FreeMemory/bytesPerLayer) * 3 / 4
|
||||
|
||||
log.Printf("%d MB VRAM available, loading up to %d %s GPU layers out of %d", info.FreeMemory/(1024*1024), layers, info.Library, numLayer)
|
||||
|
||||
return layers
|
||||
}
|
||||
|
||||
@@ -6,18 +6,31 @@ import "C"
|
||||
import (
|
||||
"runtime"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/pbnjay/memory"
|
||||
)
|
||||
|
||||
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
|
||||
func CheckVRAM() (int64, error) {
|
||||
// TODO - assume metal, and return free memory?
|
||||
return 0, nil
|
||||
if runtime.GOARCH == "amd64" {
|
||||
// gpu not supported, this may not be metal
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// on macOS, there's already buffer for available vram (see below) so just return the total
|
||||
systemMemory := int64(memory.TotalMemory())
|
||||
|
||||
// macOS limits how much memory is available to the GPU based on the amount of system memory
|
||||
// TODO: handle case where iogpu.wired_limit_mb is set to a higher value
|
||||
if systemMemory <= 36*1024*1024*1024 {
|
||||
systemMemory = systemMemory * 2 / 3
|
||||
} else {
|
||||
systemMemory = systemMemory * 3 / 4
|
||||
}
|
||||
|
||||
return systemMemory, nil
|
||||
}
|
||||
|
||||
func GetGPUInfo() GpuInfo {
|
||||
// TODO - Metal vs. x86 macs...
|
||||
mem, _ := getCPUMem()
|
||||
return GpuInfo{
|
||||
Library: "default",
|
||||
@@ -29,22 +42,10 @@ func getCPUMem() (memInfo, error) {
|
||||
return memInfo{
|
||||
TotalMemory: 0,
|
||||
FreeMemory: 0,
|
||||
DeviceCount: 0,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
|
||||
if opts.NumGPU != -1 {
|
||||
return opts.NumGPU
|
||||
}
|
||||
|
||||
// metal only supported on arm64
|
||||
if runtime.GOARCH == "arm64" {
|
||||
return 1
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func nativeInit() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ extern "C" {
|
||||
typedef struct mem_info {
|
||||
uint64_t total;
|
||||
uint64_t free;
|
||||
unsigned int count;
|
||||
char *err; // If non-nill, caller responsible for freeing
|
||||
} mem_info_t;
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ void cpu_check_ram(mem_info_t *resp) {
|
||||
MEMORYSTATUSEX info;
|
||||
info.dwLength = sizeof(info);
|
||||
if (GlobalMemoryStatusEx(&info) != 0) {
|
||||
resp->count = 1;
|
||||
resp->total = info.ullTotalPhys;
|
||||
resp->free = info.ullAvailPhys;
|
||||
} else {
|
||||
@@ -26,6 +27,7 @@ void cpu_check_ram(mem_info_t *resp) {
|
||||
if (sysinfo(&info) != 0) {
|
||||
resp->err = strdup(strerror(errno));
|
||||
} else {
|
||||
resp->count = 1;
|
||||
resp->total = info.totalram * info.mem_unit;
|
||||
resp->free = info.freeram * info.mem_unit;
|
||||
}
|
||||
|
||||
@@ -7,9 +7,17 @@
|
||||
#ifndef _WIN32
|
||||
const char *cuda_lib_paths[] = {
|
||||
"libnvidia-ml.so",
|
||||
"/usr/lib/wsl/lib/libnvidia-ml.so", // TODO Maybe glob?
|
||||
"/usr/lib/wsl/lib/libnvidia-ml.so.1",
|
||||
"/usr/local/cuda/lib64/libnvidia-ml.so",
|
||||
"/usr/lib/libnvidia-ml.so",
|
||||
"/usr/lib/libnvidia-ml.so.1",
|
||||
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so",
|
||||
"/usr/lib/wsl/lib/libnvidia-ml.so.1", // TODO Maybe glob?
|
||||
"/usr/lib/x86_64-linux-gnu/libnvidia-ml.so",
|
||||
"/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1",
|
||||
"/usr/lib/aarch64-linux-gnu/nvidia/current/libnvidia-ml.so",
|
||||
"/usr/lib/aarch64-linux-gnu/libnvidia-ml.so",
|
||||
"/usr/lib/aarch64-linux-gnu/libnvidia-ml.so.1",
|
||||
NULL,
|
||||
};
|
||||
#else
|
||||
@@ -20,6 +28,8 @@ const char *cuda_lib_paths[] = {
|
||||
};
|
||||
#endif
|
||||
|
||||
#define CUDA_LOOKUP_SIZE 6
|
||||
|
||||
void cuda_init(cuda_init_resp_t *resp) {
|
||||
nvmlReturn_t ret;
|
||||
resp->err = NULL;
|
||||
@@ -30,11 +40,13 @@ void cuda_init(cuda_init_resp_t *resp) {
|
||||
struct lookup {
|
||||
char *s;
|
||||
void **p;
|
||||
} l[4] = {
|
||||
} l[CUDA_LOOKUP_SIZE] = {
|
||||
{"nvmlInit_v2", (void *)&resp->ch.initFn},
|
||||
{"nvmlShutdown", (void *)&resp->ch.shutdownFn},
|
||||
{"nvmlDeviceGetHandleByIndex", (void *)&resp->ch.getHandle},
|
||||
{"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.getMemInfo},
|
||||
{"nvmlDeviceGetCount_v2", (void *)&resp->ch.getCount},
|
||||
{"nvmlDeviceGetCudaComputeCapability", (void *)&resp->ch.getComputeCapability},
|
||||
};
|
||||
|
||||
for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) {
|
||||
@@ -52,7 +64,7 @@ void cuda_init(cuda_init_resp_t *resp) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) { // TODO - fix this to use a null terminated list
|
||||
for (i = 0; i < CUDA_LOOKUP_SIZE; i++) { // TODO - fix this to use a null terminated list
|
||||
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
|
||||
if (!l[i].p) {
|
||||
UNLOAD_LIBRARY(resp->ch.handle);
|
||||
@@ -89,22 +101,81 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO - handle multiple GPUs
|
||||
ret = (*h.getHandle)(0, &device);
|
||||
ret = (*h.getCount)(&resp->count);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
snprintf(buf, buflen, "unable to get device handle: %d", ret);
|
||||
snprintf(buf, buflen, "unable to get device count: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = (*h.getMemInfo)(device, &memInfo);
|
||||
resp->total = 0;
|
||||
resp->free = 0;
|
||||
for (i = 0; i < resp->count; i++) {
|
||||
ret = (*h.getHandle)(i, &device);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
snprintf(buf, buflen, "unable to get device handle %d: %d", i, ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = (*h.getMemInfo)(device, &memInfo);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
snprintf(buf, buflen, "device memory info lookup failure %d: %d", i, ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
resp->total += memInfo.total;
|
||||
resp->free += memInfo.free;
|
||||
}
|
||||
}
|
||||
|
||||
void cuda_compute_capability(cuda_handle_t h, cuda_compute_capability_t *resp) {
|
||||
resp->err = NULL;
|
||||
resp->major = 0;
|
||||
resp->minor = 0;
|
||||
nvmlDevice_t device;
|
||||
int major = 0;
|
||||
int minor = 0;
|
||||
nvmlReturn_t ret;
|
||||
const int buflen = 256;
|
||||
char buf[buflen + 1];
|
||||
int i;
|
||||
|
||||
if (h.handle == NULL) {
|
||||
resp->err = strdup("nvml handle not initialized");
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned int devices;
|
||||
ret = (*h.getCount)(&devices);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
snprintf(buf, buflen, "device memory info lookup failure: %d", ret);
|
||||
snprintf(buf, buflen, "unable to get device count: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
resp->total = memInfo.total;
|
||||
resp->free = memInfo.free;
|
||||
return;
|
||||
|
||||
for (i = 0; i < devices; i++) {
|
||||
ret = (*h.getHandle)(i, &device);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
snprintf(buf, buflen, "unable to get device handle %d: %d", i, ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = (*h.getComputeCapability)(device, &major, &minor);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
snprintf(buf, buflen, "device compute capability lookup failure %d: %d", i, ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
// Report the lowest major.minor we detect as that limits our compatibility
|
||||
if (resp->major == 0 || resp->major > major ) {
|
||||
resp->major = major;
|
||||
resp->minor = minor;
|
||||
} else if ( resp->major == major && resp->minor > minor ) {
|
||||
resp->minor = minor;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // __APPLE__
|
||||
@@ -21,6 +21,8 @@ typedef struct cuda_handle {
|
||||
nvmlReturn_t (*shutdownFn)(void);
|
||||
nvmlReturn_t (*getHandle)(unsigned int, nvmlDevice_t *);
|
||||
nvmlReturn_t (*getMemInfo)(nvmlDevice_t, nvmlMemory_t *);
|
||||
nvmlReturn_t (*getCount)(unsigned int *);
|
||||
nvmlReturn_t (*getComputeCapability)(nvmlDevice_t, int* major, int* minor);
|
||||
} cuda_handle_t;
|
||||
|
||||
typedef struct cuda_init_resp {
|
||||
@@ -28,8 +30,15 @@ typedef struct cuda_init_resp {
|
||||
cuda_handle_t ch;
|
||||
} cuda_init_resp_t;
|
||||
|
||||
typedef struct cuda_compute_capability {
|
||||
char *err;
|
||||
int major;
|
||||
int minor;
|
||||
} cuda_compute_capability_t;
|
||||
|
||||
void cuda_init(cuda_init_resp_t *resp);
|
||||
void cuda_check_vram(cuda_handle_t ch, mem_info_t *resp);
|
||||
void cuda_compute_capability(cuda_handle_t ch, cuda_compute_capability_t *cc);
|
||||
|
||||
#endif // __GPU_INFO_CUDA_H__
|
||||
#endif // __APPLE__
|
||||
@@ -110,6 +110,8 @@ void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: set this to the actual number of devices
|
||||
resp->count = 1;
|
||||
resp->total = totalMem;
|
||||
resp->free = totalMem - usedMem;
|
||||
return;
|
||||
|
||||
@@ -18,6 +18,7 @@ func TestBasicGetGPUInfo(t *testing.T) {
|
||||
case "linux", "windows":
|
||||
assert.Greater(t, info.TotalMemory, uint64(0))
|
||||
assert.Greater(t, info.FreeMemory, uint64(0))
|
||||
assert.Greater(t, info.DeviceCount, uint64(0))
|
||||
default:
|
||||
return
|
||||
}
|
||||
@@ -35,7 +36,6 @@ func TestCPUMemInfo(t *testing.T) {
|
||||
default:
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
|
||||
|
||||
@@ -3,6 +3,7 @@ package gpu
|
||||
type memInfo struct {
|
||||
TotalMemory uint64 `json:"total_memory,omitempty"`
|
||||
FreeMemory uint64 `json:"free_memory,omitempty"`
|
||||
DeviceCount uint32 `json:"device_count,omitempty"`
|
||||
}
|
||||
|
||||
// Beginning of an `ollama info` command
|
||||
|
||||
@@ -12,6 +12,7 @@ void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) {
|
||||
#if SERVER_VERBOSE != 1
|
||||
log_disable();
|
||||
#endif
|
||||
LOG_TEE("system info: %s", llama_print_system_info());
|
||||
assert(err != NULL && sparams != NULL);
|
||||
err->id = 0;
|
||||
err->msg[0] = '\0';
|
||||
|
||||
@@ -3,8 +3,8 @@ package llm
|
||||
/*
|
||||
#cgo CFLAGS: -I${SRCDIR}/ext_server -I${SRCDIR}/llama.cpp -I${SRCDIR}/llama.cpp/common -I${SRCDIR}/llama.cpp/examples/server
|
||||
#cgo CFLAGS: -DNDEBUG -DLLAMA_SERVER_LIBRARY=1 -D_XOPEN_SOURCE=600 -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
|
||||
#cgo CFLAGS: -Wmissing-noreturn -Wall -Wextra -Wcast-qual -Wno-unused-function -Wno-array-bounds
|
||||
#cgo CPPFLAGS: -Ofast -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-unused-but-set-variable
|
||||
#cgo CFLAGS: -Wmissing-noreturn -Wextra -Wcast-qual -Wno-unused-function -Wno-array-bounds
|
||||
#cgo CPPFLAGS: -Ofast -Wextra -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-unused-but-set-variable
|
||||
#cgo darwin CFLAGS: -D_DARWIN_C_SOURCE
|
||||
#cgo darwin CPPFLAGS: -DGGML_USE_ACCELERATE
|
||||
#cgo darwin CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG
|
||||
@@ -35,14 +35,12 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/gpu"
|
||||
)
|
||||
|
||||
type extServer interface {
|
||||
@@ -82,25 +80,20 @@ func extServerResponseToErr(resp C.ext_server_resp_t) error {
|
||||
return fmt.Errorf(C.GoString(resp.msg))
|
||||
}
|
||||
|
||||
func newExtServer(server extServer, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
|
||||
func newExtServer(server extServer, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
|
||||
if !mutex.TryLock() {
|
||||
log.Printf("concurrent llm servers not yet supported, waiting for prior server to complete")
|
||||
mutex.Lock()
|
||||
}
|
||||
fileInfo, err := os.Stat(model)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var sparams C.ext_server_params_t
|
||||
sparams.model = C.CString(model)
|
||||
defer C.free(unsafe.Pointer(sparams.model))
|
||||
|
||||
numGPU := gpu.NumGPU(numLayers, fileInfo.Size(), opts)
|
||||
|
||||
sparams.embedding = true
|
||||
sparams.n_ctx = C.uint(opts.NumCtx)
|
||||
sparams.n_batch = C.uint(opts.NumBatch)
|
||||
sparams.n_gpu_layers = C.int(numGPU)
|
||||
sparams.n_gpu_layers = C.int(opts.NumGPU)
|
||||
sparams.main_gpu = C.int(opts.MainGPU)
|
||||
sparams.n_parallel = 1 // TODO - wire up concurrency
|
||||
|
||||
|
||||
@@ -54,9 +54,9 @@ func (llm *llamaExtServer) llama_server_release_json_resp(json_resp **C.char) {
|
||||
C.llama_server_release_json_resp(json_resp)
|
||||
}
|
||||
|
||||
func newDefaultExtServer(model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
|
||||
func newDefaultExtServer(model string, adapters, projectors []string, opts api.Options) (extServer, error) {
|
||||
server := &llamaExtServer{opts}
|
||||
return newExtServer(server, model, adapters, projectors, numLayers, opts)
|
||||
return newExtServer(server, model, adapters, projectors, opts)
|
||||
}
|
||||
|
||||
func (llm *llamaExtServer) Predict(ctx context.Context, pred PredictOpts, fn func(PredictResult)) error {
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"github.com/jmorganca/ollama/api"
|
||||
)
|
||||
|
||||
func newDefaultExtServer(model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
|
||||
func newDefaultExtServer(model string, adapters, projectors []string, opts api.Options) (extServer, error) {
|
||||
// On windows we always load the llama.cpp libraries dynamically to avoid startup DLL dependencies
|
||||
// This ensures we can update the PATH at runtime to get everything loaded
|
||||
|
||||
return newDynamicShimExtServer(AvailableShims["cpu"], model, adapters, projectors, numLayers, opts)
|
||||
return newDynamicShimExtServer(AvailableShims["cpu"], model, adapters, projectors, opts)
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
init_vars() {
|
||||
LLAMACPP_DIR=../llama.cpp
|
||||
PATCHES="0001-Expose-callable-API-for-server.patch"
|
||||
CMAKE_DEFS=""
|
||||
CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
|
||||
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
|
||||
|
||||
@@ -9,14 +9,14 @@ set -o pipefail
|
||||
echo "Starting darwin generate script"
|
||||
source $(dirname $0)/gen_common.sh
|
||||
init_vars
|
||||
CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
|
||||
CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
|
||||
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal"
|
||||
case "${GOARCH}" in
|
||||
"amd64")
|
||||
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
||||
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_METAL=off -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
||||
;;
|
||||
"arm64")
|
||||
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 ${CMAKE_DEFS}"
|
||||
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DLLAMA_METAL=on ${CMAKE_DEFS}"
|
||||
;;
|
||||
*)
|
||||
echo "GOARCH must be set"
|
||||
|
||||
@@ -4,7 +4,6 @@ $ErrorActionPreference = "Stop"
|
||||
|
||||
function init_vars {
|
||||
$script:llamacppDir = "../llama.cpp"
|
||||
$script:patches = @("0001-Expose-callable-API-for-server.patch")
|
||||
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-A","x64")
|
||||
$script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")
|
||||
if ($env:CGO_CFLAGS -contains "-g") {
|
||||
|
||||
@@ -78,7 +78,11 @@ type model interface {
|
||||
ModelFamily() string
|
||||
ModelType() string
|
||||
FileType() string
|
||||
NumLayers() int64
|
||||
NumLayers() uint32
|
||||
NumGQA() uint32
|
||||
NumEmbed() uint32
|
||||
NumHead() uint32
|
||||
NumHeadKv() uint32
|
||||
}
|
||||
|
||||
type container interface {
|
||||
|
||||
41
llm/gguf.go
41
llm/gguf.go
@@ -272,14 +272,49 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *ggufModel) NumLayers() int64 {
|
||||
func (llm *ggufModel) NumLayers() uint32 {
|
||||
value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
|
||||
if !exists {
|
||||
return 0
|
||||
}
|
||||
|
||||
v := value.(uint32)
|
||||
return int64(v)
|
||||
return value.(uint32)
|
||||
}
|
||||
|
||||
func (llm *ggufModel) NumHead() uint32 {
|
||||
value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
|
||||
if !exists {
|
||||
return 0
|
||||
}
|
||||
|
||||
return value.(uint32)
|
||||
}
|
||||
|
||||
func (llm *ggufModel) NumEmbed() uint32 {
|
||||
value, exists := llm.kv[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
|
||||
if !exists {
|
||||
return 0
|
||||
}
|
||||
|
||||
return value.(uint32)
|
||||
}
|
||||
|
||||
func (llm *ggufModel) NumHeadKv() uint32 {
|
||||
value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
|
||||
if !exists {
|
||||
return 0
|
||||
}
|
||||
|
||||
return value.(uint32)
|
||||
}
|
||||
|
||||
func (llm *ggufModel) NumGQA() uint32 {
|
||||
numHeadKv := llm.NumHeadKv()
|
||||
if numHeadKv == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
return llm.NumHead() / numHeadKv
|
||||
}
|
||||
|
||||
func (llm ggufModel) readU8(r io.Reader) uint8 {
|
||||
|
||||
61
llm/llama.go
61
llm/llama.go
@@ -8,7 +8,6 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
@@ -43,69 +42,11 @@ number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
|
||||
ws ::= ([ \t\n] ws)?
|
||||
`
|
||||
|
||||
type llamaModel struct {
|
||||
hyperparameters llamaHyperparameters
|
||||
}
|
||||
|
||||
func (llm *llamaModel) ModelFamily() string {
|
||||
return "llama"
|
||||
}
|
||||
|
||||
func llamaModelType(numLayer uint32) string {
|
||||
switch numLayer {
|
||||
case 26:
|
||||
return "3B"
|
||||
case 32:
|
||||
return "7B"
|
||||
case 40:
|
||||
return "13B"
|
||||
case 48:
|
||||
return "34B"
|
||||
case 60:
|
||||
return "30B"
|
||||
case 80:
|
||||
return "65B"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
func (llm *llamaModel) ModelType() string {
|
||||
return llamaModelType(llm.hyperparameters.NumLayer)
|
||||
}
|
||||
|
||||
func (llm *llamaModel) FileType() string {
|
||||
return fileType(llm.hyperparameters.FileType)
|
||||
}
|
||||
|
||||
func (llm *llamaModel) NumLayers() int64 {
|
||||
return int64(llm.hyperparameters.NumLayer)
|
||||
}
|
||||
|
||||
type llamaHyperparameters struct {
|
||||
// NumVocab is the size of the model's vocabulary.
|
||||
NumVocab uint32
|
||||
|
||||
// NumEmbd is the size of the model's embedding layer.
|
||||
NumEmbd uint32
|
||||
NumMult uint32
|
||||
NumHead uint32
|
||||
|
||||
// NumLayer is the number of layers in the model.
|
||||
NumLayer uint32
|
||||
NumRot uint32
|
||||
|
||||
// FileType describes the quantization level of the model, e.g. Q4_0, Q5_K, etc.
|
||||
FileType uint32
|
||||
}
|
||||
|
||||
type Running struct {
|
||||
Port int
|
||||
Cmd *exec.Cmd
|
||||
Cancel context.CancelFunc
|
||||
exitOnce sync.Once
|
||||
exitCh chan error // channel to receive the exit status of the subprocess
|
||||
*StatusWriter // captures error messages from the llama runner process
|
||||
*StatusWriter // captures error messages from the llama runner process
|
||||
}
|
||||
|
||||
type ImageData struct {
|
||||
|
||||
115
llm/llm.go
115
llm/llm.go
@@ -7,10 +7,7 @@ import (
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/pbnjay/memory"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/format"
|
||||
"github.com/jmorganca/ollama/gpu"
|
||||
)
|
||||
|
||||
@@ -40,40 +37,95 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if runtime.GOOS == "darwin" {
|
||||
var requiredMemory int64
|
||||
var f16Multiplier int64 = 2
|
||||
if opts.NumCtx < 4 {
|
||||
opts.NumCtx = 4
|
||||
}
|
||||
|
||||
switch ggml.ModelType() {
|
||||
case "3B", "7B":
|
||||
requiredMemory = 8 * format.GigaByte
|
||||
case "13B":
|
||||
requiredMemory = 16 * format.GigaByte
|
||||
case "30B", "34B", "40B":
|
||||
requiredMemory = 32 * format.GigaByte
|
||||
case "47B":
|
||||
requiredMemory = 48 * format.GigaByte
|
||||
case "65B", "70B":
|
||||
requiredMemory = 64 * format.GigaByte
|
||||
case "180B":
|
||||
requiredMemory = 128 * format.GigaByte
|
||||
f16Multiplier = 4
|
||||
}
|
||||
fmt.Println("size", ggml.Size)
|
||||
fmt.Println("filetype", ggml.FileType())
|
||||
fmt.Println("architecture", ggml.ModelFamily())
|
||||
fmt.Println("type", ggml.ModelType())
|
||||
fmt.Println("name", ggml.Name())
|
||||
fmt.Println("embd", ggml.NumEmbed())
|
||||
fmt.Println("head", ggml.NumHead())
|
||||
fmt.Println("head_kv", ggml.NumHeadKv())
|
||||
fmt.Println("gqa", ggml.NumGQA())
|
||||
|
||||
systemMemory := int64(memory.TotalMemory())
|
||||
available, _ := gpu.CheckVRAM()
|
||||
|
||||
if ggml.FileType() == "F16" && requiredMemory*f16Multiplier > systemMemory {
|
||||
return nil, fmt.Errorf("F16 model requires at least %s of memory", format.HumanBytes(requiredMemory))
|
||||
} else if requiredMemory > systemMemory {
|
||||
return nil, fmt.Errorf("model requires at least %s of memory", format.HumanBytes(requiredMemory))
|
||||
// For now assume filesize = model size
|
||||
// TODO: use actual model size
|
||||
requiredModel := ggml.Size
|
||||
|
||||
// fp16 k,v matrices require = n_ctx * n_layer * n_embd / n_head * n_head_kv * 2 bytes each * 2 key and value
|
||||
requiredKv := 2 * 2 * int64(opts.NumCtx) * int64(ggml.NumLayers()) * int64(ggml.NumEmbed()) * int64(ggml.NumHeadKv()) / int64(ggml.NumHead())
|
||||
|
||||
// this amount is the overhead + tensors in memory
|
||||
// TODO: get this from the llama.cpp's graph calcluations instead of
|
||||
// estimating it's 1/6 * kv_cache_size * num_gqa
|
||||
requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 6
|
||||
|
||||
requiredTotal := requiredModel + requiredKv + requiredAlloc
|
||||
|
||||
log.Println("system memory bytes:", available)
|
||||
log.Println("required model bytes:", requiredModel)
|
||||
log.Println("required kv bytes:", requiredKv)
|
||||
log.Println("required alloc bytes:", requiredAlloc)
|
||||
log.Println("required total bytes:", requiredTotal)
|
||||
|
||||
info := gpu.GetGPUInfo()
|
||||
library := info.Library
|
||||
|
||||
if opts.NumGPU == -1 {
|
||||
// default to offloading all layers
|
||||
opts.NumGPU = int(ggml.NumLayers()) + 1
|
||||
}
|
||||
|
||||
// decide how many layers to put on the GPU
|
||||
if opts.NumGPU > 0 {
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
if requiredTotal > available {
|
||||
log.Println("not enough vram available, falling back to CPU only")
|
||||
opts.NumGPU = 0
|
||||
}
|
||||
default:
|
||||
if library == "cpu" || library == "default" {
|
||||
opts.NumGPU = 0
|
||||
break
|
||||
}
|
||||
|
||||
// no offloading required
|
||||
if requiredTotal <= available {
|
||||
break
|
||||
}
|
||||
|
||||
// requiredAlloc is always loaded for the CUDA runner, so don't load it if it won't fit
|
||||
if requiredAlloc > available {
|
||||
log.Printf("not enough vram available, falling back to CPU only")
|
||||
library = "cpu"
|
||||
opts.NumGPU = 0
|
||||
break
|
||||
}
|
||||
|
||||
available -= requiredAlloc
|
||||
|
||||
// fill remaining vram with layers
|
||||
log.Println("splitting", available, "of available memory bytes into layers")
|
||||
bytesPerLayer := int64((requiredModel + requiredKv) / int64(ggml.NumLayers()))
|
||||
log.Println("bytes per layer:", bytesPerLayer)
|
||||
layers := available / bytesPerLayer
|
||||
log.Println("total required with split:", requiredAlloc+(layers*bytesPerLayer))
|
||||
if layers < int64(opts.NumGPU) {
|
||||
opts.NumGPU = int(layers)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
opts.NumGQA = 0
|
||||
opts.RopeFrequencyBase = 0.0
|
||||
opts.RopeFrequencyScale = 0.0
|
||||
gpuInfo := gpu.GetGPUInfo()
|
||||
return newLlmServer(gpuInfo.Library, model, adapters, projectors, ggml.NumLayers(), opts)
|
||||
return newLlmServer(library, model, adapters, projectors, opts)
|
||||
}
|
||||
|
||||
// Give any native cgo implementations an opportunity to initialize
|
||||
@@ -81,9 +133,9 @@ func Init(workdir string) error {
|
||||
return nativeInit(workdir)
|
||||
}
|
||||
|
||||
func newLlmServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
|
||||
func newLlmServer(library, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
|
||||
if _, libPresent := AvailableShims[library]; libPresent && library != "default" {
|
||||
srv, err := newDynamicShimExtServer(AvailableShims[library], model, adapters, projectors, numLayers, opts)
|
||||
srv, err := newDynamicShimExtServer(AvailableShims[library], model, adapters, projectors, opts)
|
||||
if err == nil {
|
||||
return srv, nil
|
||||
}
|
||||
@@ -91,6 +143,5 @@ func newLlmServer(library, model string, adapters, projectors []string, numLayer
|
||||
// TODO - update some state to indicate we were unable to load the GPU library for future "info" ux
|
||||
}
|
||||
|
||||
return newDefaultExtServer(model, adapters, projectors, numLayers, opts)
|
||||
|
||||
return newDefaultExtServer(model, adapters, projectors, opts)
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ import (
|
||||
//go:embed llama.cpp/ggml-metal.metal
|
||||
var libEmbed embed.FS
|
||||
|
||||
func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
|
||||
func newDynamicShimExtServer(library, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
|
||||
// should never happen...
|
||||
return nil, fmt.Errorf("Dynamic library loading not supported on Mac")
|
||||
}
|
||||
|
||||
@@ -72,7 +72,7 @@ func (llm *shimExtServer) llama_server_release_json_resp(json_resp **C.char) {
|
||||
C.dynamic_shim_llama_server_release_json_resp(llm.s, json_resp)
|
||||
}
|
||||
|
||||
func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
|
||||
func newDynamicShimExtServer(library, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
|
||||
shimMutex.Lock()
|
||||
defer shimMutex.Unlock()
|
||||
updatePath(filepath.Dir(library))
|
||||
@@ -90,7 +90,7 @@ func newDynamicShimExtServer(library, model string, adapters, projectors []strin
|
||||
options: opts,
|
||||
}
|
||||
log.Printf("Loading Dynamic Shim llm server: %s", library)
|
||||
return newExtServer(llm, model, adapters, projectors, numLayers, opts)
|
||||
return newExtServer(llm, model, adapters, projectors, opts)
|
||||
}
|
||||
|
||||
func (llm *shimExtServer) Predict(ctx context.Context, pred PredictOpts, fn func(PredictResult)) error {
|
||||
|
||||
63
parser/parser_test.go
Normal file
63
parser/parser_test.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func Test_Parser(t *testing.T) {
|
||||
|
||||
input := `
|
||||
FROM model1
|
||||
ADAPTER adapter1
|
||||
LICENSE MIT
|
||||
PARAMETER param1 value1
|
||||
PARAMETER param2 value2
|
||||
TEMPLATE template1
|
||||
`
|
||||
|
||||
reader := strings.NewReader(input)
|
||||
|
||||
commands, err := Parse(reader)
|
||||
assert.Nil(t, err)
|
||||
|
||||
expectedCommands := []Command{
|
||||
{Name: "model", Args: "model1"},
|
||||
{Name: "adapter", Args: "adapter1"},
|
||||
{Name: "license", Args: "MIT"},
|
||||
{Name: "param1", Args: "value1"},
|
||||
{Name: "param2", Args: "value2"},
|
||||
{Name: "template", Args: "template1"},
|
||||
}
|
||||
|
||||
assert.Equal(t, expectedCommands, commands)
|
||||
}
|
||||
|
||||
func Test_Parser_NoFromLine(t *testing.T) {
|
||||
|
||||
input := `
|
||||
PARAMETER param1 value1
|
||||
PARAMETER param2 value2
|
||||
`
|
||||
|
||||
reader := strings.NewReader(input)
|
||||
|
||||
_, err := Parse(reader)
|
||||
assert.ErrorContains(t, err, "no FROM line")
|
||||
}
|
||||
|
||||
func Test_Parser_MissingValue(t *testing.T) {
|
||||
|
||||
input := `
|
||||
FROM foo
|
||||
PARAMETER param1
|
||||
`
|
||||
|
||||
reader := strings.NewReader(input)
|
||||
|
||||
_, err := Parse(reader)
|
||||
assert.ErrorContains(t, err, "missing value for [param1]")
|
||||
|
||||
}
|
||||
@@ -25,10 +25,7 @@ func NewBuffer(prompt *Prompt) (*Buffer, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
lwidth := width - len(prompt.Prompt)
|
||||
if prompt.UseAlt {
|
||||
lwidth = width - len(prompt.AltPrompt)
|
||||
}
|
||||
lwidth := width - len(prompt.prompt())
|
||||
|
||||
b := &Buffer{
|
||||
Pos: 0,
|
||||
@@ -78,7 +75,7 @@ func (b *Buffer) MoveRight() {
|
||||
if b.Pos < b.Size() {
|
||||
b.Pos += 1
|
||||
if b.Pos%b.LineWidth == 0 {
|
||||
fmt.Printf(CursorDown + CursorBOL + cursorRightN(b.PromptSize()))
|
||||
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())))
|
||||
} else {
|
||||
fmt.Print(CursorRight)
|
||||
}
|
||||
@@ -109,7 +106,7 @@ func (b *Buffer) MoveToStart() {
|
||||
fmt.Print(CursorUp)
|
||||
}
|
||||
}
|
||||
fmt.Printf(CursorBOL + cursorRightN(b.PromptSize()))
|
||||
fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt())))
|
||||
b.Pos = 0
|
||||
}
|
||||
}
|
||||
@@ -123,7 +120,7 @@ func (b *Buffer) MoveToEnd() {
|
||||
fmt.Print(CursorDown)
|
||||
}
|
||||
remainder := b.Size() % b.LineWidth
|
||||
fmt.Printf(CursorBOL + cursorRightN(b.PromptSize()+remainder))
|
||||
fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt())+remainder))
|
||||
} else {
|
||||
fmt.Print(cursorRightN(b.Size() - b.Pos))
|
||||
}
|
||||
@@ -143,13 +140,6 @@ func min(n, m int) int {
|
||||
return n
|
||||
}
|
||||
|
||||
func (b *Buffer) PromptSize() int {
|
||||
if b.Prompt.UseAlt {
|
||||
return len(b.Prompt.AltPrompt)
|
||||
}
|
||||
return len(b.Prompt.Prompt)
|
||||
}
|
||||
|
||||
func (b *Buffer) Add(r rune) {
|
||||
if b.Pos == b.Buf.Size() {
|
||||
fmt.Printf("%c", r)
|
||||
@@ -232,7 +222,7 @@ func (b *Buffer) Remove() {
|
||||
remainingLines := (b.Size() - b.Pos) / b.LineWidth
|
||||
fmt.Printf(cursorDownN(remainingLines+1) + CursorBOL + ClearToEOL)
|
||||
place := b.Pos % b.LineWidth
|
||||
fmt.Printf(cursorUpN(remainingLines+1) + cursorRightN(place+len(b.Prompt.Prompt)))
|
||||
fmt.Printf(cursorUpN(remainingLines+1) + cursorRightN(place+len(b.Prompt.prompt())))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -247,7 +237,7 @@ func (b *Buffer) Delete() {
|
||||
remainingLines := (b.Size() - b.Pos) / b.LineWidth
|
||||
fmt.Printf(cursorDownN(remainingLines) + CursorBOL + ClearToEOL)
|
||||
place := b.Pos % b.LineWidth
|
||||
fmt.Printf(cursorUpN(remainingLines) + cursorRightN(place+len(b.Prompt.Prompt)))
|
||||
fmt.Printf(cursorUpN(remainingLines) + cursorRightN(place+len(b.Prompt.prompt())))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -294,15 +284,15 @@ func (b *Buffer) DeleteWord() {
|
||||
}
|
||||
|
||||
func (b *Buffer) ClearScreen() {
|
||||
fmt.Printf(ClearScreen + CursorReset + b.Prompt.Prompt)
|
||||
fmt.Printf(ClearScreen + CursorReset + b.Prompt.prompt())
|
||||
if b.IsEmpty() {
|
||||
ph := b.Prompt.Placeholder
|
||||
ph := b.Prompt.placeholder()
|
||||
fmt.Printf(ColorGrey + ph + cursorLeftN(len(ph)) + ColorDefault)
|
||||
} else {
|
||||
currPos := b.Pos
|
||||
b.Pos = 0
|
||||
b.drawRemaining()
|
||||
fmt.Printf(CursorReset + cursorRightN(len(b.Prompt.Prompt)))
|
||||
fmt.Printf(CursorReset + cursorRightN(len(b.Prompt.prompt())))
|
||||
if currPos > 0 {
|
||||
targetLine := currPos / b.LineWidth
|
||||
if targetLine > 0 {
|
||||
@@ -329,7 +319,7 @@ func (b *Buffer) IsEmpty() bool {
|
||||
func (b *Buffer) Replace(r []rune) {
|
||||
b.Pos = 0
|
||||
b.Buf.Clear()
|
||||
fmt.Printf(ClearLine + CursorBOL + b.Prompt.Prompt)
|
||||
fmt.Printf(ClearLine + CursorBOL + b.Prompt.prompt())
|
||||
for _, c := range r {
|
||||
b.Add(c)
|
||||
}
|
||||
|
||||
@@ -16,6 +16,20 @@ type Prompt struct {
|
||||
UseAlt bool
|
||||
}
|
||||
|
||||
func (p *Prompt) prompt() string {
|
||||
if p.UseAlt {
|
||||
return p.AltPrompt
|
||||
}
|
||||
return p.Prompt
|
||||
}
|
||||
|
||||
func (p *Prompt) placeholder() string {
|
||||
if p.UseAlt {
|
||||
return p.AltPlaceholder
|
||||
}
|
||||
return p.Placeholder
|
||||
}
|
||||
|
||||
type Terminal struct {
|
||||
outchan chan rune
|
||||
}
|
||||
@@ -46,8 +60,9 @@ func New(prompt Prompt) (*Instance, error) {
|
||||
}
|
||||
|
||||
func (i *Instance) Readline() (string, error) {
|
||||
prompt := i.Prompt.Prompt
|
||||
if i.Prompt.UseAlt || i.Pasting {
|
||||
prompt := i.Prompt.prompt()
|
||||
if i.Pasting {
|
||||
// force alt prompt when pasting
|
||||
prompt = i.Prompt.AltPrompt
|
||||
}
|
||||
fmt.Print(prompt)
|
||||
@@ -71,10 +86,7 @@ func (i *Instance) Readline() (string, error) {
|
||||
// don't show placeholder when pasting unless we're in multiline mode
|
||||
showPlaceholder := !i.Pasting || i.Prompt.UseAlt
|
||||
if buf.IsEmpty() && showPlaceholder {
|
||||
ph := i.Prompt.Placeholder
|
||||
if i.Prompt.UseAlt {
|
||||
ph = i.Prompt.AltPlaceholder
|
||||
}
|
||||
ph := i.Prompt.placeholder()
|
||||
fmt.Printf(ColorGrey + ph + fmt.Sprintf(CursorLeftN, len(ph)) + ColorDefault)
|
||||
}
|
||||
|
||||
|
||||
@@ -8,9 +8,9 @@ export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version
|
||||
mkdir -p dist
|
||||
|
||||
for TARGETARCH in arm64 amd64; do
|
||||
rm -rf llm/llama.cpp/build
|
||||
GOOS=darwin GOARCH=$TARGETARCH go generate ./...
|
||||
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH
|
||||
rm -rf llm/llama.cpp/*/build
|
||||
done
|
||||
|
||||
lipo -create -output dist/ollama dist/ollama-darwin-*
|
||||
|
||||
@@ -8,7 +8,7 @@ export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version
|
||||
mkdir -p dist
|
||||
|
||||
for TARGETARCH in amd64 arm64; do
|
||||
docker build --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS --build-arg=CGO_CFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
|
||||
docker build --platform=linux/$TARGETARCH --build-arg=GOFLAGS --build-arg=CGO_CFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
|
||||
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
|
||||
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
|
||||
docker rm builder-$TARGETARCH
|
||||
|
||||
@@ -478,32 +478,6 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
||||
return err
|
||||
}
|
||||
|
||||
// if the model is not in gguf format, pull the base model to try and get it in gguf format
|
||||
if fromConfig.ModelFormat != "gguf" {
|
||||
fn(api.ProgressResponse{Status: "updating base model"})
|
||||
parent, err := GetModel(c.Args)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
originalModel := parent.OriginalModel
|
||||
if originalModel == "" {
|
||||
originalModel = parent.ShortName
|
||||
}
|
||||
if err := PullModel(ctx, originalModel, &RegistryOptions{}, fn); err != nil {
|
||||
log.Printf("error pulling parent model: %v", err)
|
||||
}
|
||||
|
||||
// Reset the file pointer to the beginning of the file
|
||||
_, err = fromConfigFile.Seek(0, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("update from config after pull: %w", err)
|
||||
}
|
||||
if err := json.NewDecoder(fromConfigFile).Decode(&fromConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// if the model is still not in gguf format, error out
|
||||
if fromConfig.ModelFormat != "gguf" {
|
||||
return fmt.Errorf("%s is not in gguf format, this base model is not compatible with this version of ollama", c.Args)
|
||||
|
||||
@@ -610,12 +610,18 @@ func ShowModelHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if req.Name == "" {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
|
||||
switch {
|
||||
case req.Model == "" && req.Name == "":
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
||||
return
|
||||
case req.Model != "" && req.Name != "":
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "both model and name are set"})
|
||||
return
|
||||
case req.Model == "" && req.Name != "":
|
||||
req.Model = req.Name
|
||||
}
|
||||
|
||||
resp, err := GetModelInfo(req.Name)
|
||||
resp, err := GetModelInfo(req)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
|
||||
@@ -628,8 +634,8 @@ func ShowModelHandler(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
func GetModelInfo(name string) (*api.ShowResponse, error) {
|
||||
model, err := GetModel(name)
|
||||
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
||||
model, err := GetModel(req.Model)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -642,6 +648,14 @@ func GetModelInfo(name string) (*api.ShowResponse, error) {
|
||||
QuantizationLevel: model.Config.FileType,
|
||||
}
|
||||
|
||||
if req.System != "" {
|
||||
model.System = req.System
|
||||
}
|
||||
|
||||
if req.Template != "" {
|
||||
model.Template = req.Template
|
||||
}
|
||||
|
||||
resp := &api.ShowResponse{
|
||||
License: strings.Join(model.License, "\n"),
|
||||
System: model.System,
|
||||
@@ -649,13 +663,6 @@ func GetModelInfo(name string) (*api.ShowResponse, error) {
|
||||
Details: modelDetails,
|
||||
}
|
||||
|
||||
mf, err := ShowModelfile(model)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp.Modelfile = mf
|
||||
|
||||
var params []string
|
||||
cs := 30
|
||||
for k, v := range model.Options {
|
||||
@@ -685,6 +692,19 @@ func GetModelInfo(name string) (*api.ShowResponse, error) {
|
||||
}
|
||||
resp.Parameters = strings.Join(params, "\n")
|
||||
|
||||
for k, v := range req.Options {
|
||||
if _, ok := req.Options[k]; ok {
|
||||
model.Options[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
mf, err := ShowModelfile(model)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp.Modelfile = mf
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
@@ -837,6 +857,7 @@ func (s *Server) GenerateRoutes() http.Handler {
|
||||
|
||||
config := cors.DefaultConfig()
|
||||
config.AllowWildcard = true
|
||||
config.AllowBrowserExtensions = true
|
||||
|
||||
config.AllowOrigins = origins
|
||||
for _, allowOrigin := range defaultAllowOrigins {
|
||||
|
||||
Reference in New Issue
Block a user