Compare commits
26 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
5306b0269d | ||
![]() |
7de0c8345d | ||
![]() |
1b9dcab3ab | ||
![]() |
86279f4ae3 | ||
![]() |
b934bf23e6 | ||
![]() |
2b8ef455ad | ||
![]() |
0c5f47177c | ||
![]() |
1210db2924 | ||
![]() |
d0854bf1e6 | ||
![]() |
8396463255 | ||
![]() |
a027bbf4d7 | ||
![]() |
ed94a3dd02 | ||
![]() |
f14f62ab3b | ||
![]() |
0fb5268496 | ||
![]() |
c65edb1506 | ||
![]() |
1605af32ec | ||
![]() |
ee3032ad89 | ||
![]() |
5b7a27281d | ||
![]() |
d2a784e33e | ||
![]() |
413a2e4f91 | ||
![]() |
b5614f3ebc | ||
![]() |
8b2ba9cab8 | ||
![]() |
e29662ab5c | ||
![]() |
cbc40aa996 | ||
![]() |
5cb82540c9 | ||
![]() |
d7849a1dc9 |
@@ -5,3 +5,4 @@ dist
|
|||||||
scripts
|
scripts
|
||||||
llm/llama.cpp/ggml
|
llm/llama.cpp/ggml
|
||||||
llm/llama.cpp/gguf
|
llm/llama.cpp/gguf
|
||||||
|
.env
|
||||||
|
@@ -214,6 +214,7 @@ curl -X POST http://localhost:11434/api/generate -d '{
|
|||||||
| [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot) | Interact with Ollama as a chatbot on Discord. |
|
| [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot) | Interact with Ollama as a chatbot on Discord. |
|
||||||
| [Raycast Ollama](https://github.com/MassimilianoPasquini97/raycast_ollama) | Raycast extension to use Ollama for local llama inference on Raycast. |
|
| [Raycast Ollama](https://github.com/MassimilianoPasquini97/raycast_ollama) | Raycast extension to use Ollama for local llama inference on Raycast. |
|
||||||
| [Simple HTML UI](https://github.com/rtcfirefly/ollama-ui) | Also, there is a Chrome extension. |
|
| [Simple HTML UI](https://github.com/rtcfirefly/ollama-ui) | Also, there is a Chrome extension. |
|
||||||
|
| [Ollama-GUI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file) | 🖥️ Mac Chat Interface ⚡️ |
|
||||||
| [Emacs client](https://github.com/zweifisch/ollama) | |
|
| [Emacs client](https://github.com/zweifisch/ollama) | |
|
||||||
|
|
||||||
[1]: https://python.langchain.com/docs/integrations/llms/ollama
|
[1]: https://python.langchain.com/docs/integrations/llms/ollama
|
||||||
|
17
cmd/cmd.go
17
cmd/cmd.go
@@ -33,15 +33,22 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Painter struct {
|
type Painter struct {
|
||||||
HideHint bool
|
IsMultiLine bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Painter) Paint(line []rune, _ int) []rune {
|
func (p Painter) Paint(line []rune, _ int) []rune {
|
||||||
termType := os.Getenv("TERM")
|
termType := os.Getenv("TERM")
|
||||||
if termType == "xterm-256color" && len(line) == 0 && !p.HideHint {
|
if termType == "xterm-256color" && len(line) == 0 {
|
||||||
prompt := "Send a message (/? for help)"
|
var prompt string
|
||||||
|
if p.IsMultiLine {
|
||||||
|
prompt = "Use \"\"\" to end multi-line input"
|
||||||
|
} else {
|
||||||
|
prompt = "Send a message (/? for help)"
|
||||||
|
}
|
||||||
return []rune(fmt.Sprintf("\033[38;5;245m%s\033[%dD\033[0m", prompt, len(prompt)))
|
return []rune(fmt.Sprintf("\033[38;5;245m%s\033[%dD\033[0m", prompt, len(prompt)))
|
||||||
}
|
}
|
||||||
|
// add a space and a backspace to prevent the cursor from walking up the screen
|
||||||
|
line = append(line, []rune(" \b")...)
|
||||||
return line
|
return line
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -579,7 +586,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
|||||||
case isMultiLine:
|
case isMultiLine:
|
||||||
if strings.HasSuffix(line, `"""`) {
|
if strings.HasSuffix(line, `"""`) {
|
||||||
isMultiLine = false
|
isMultiLine = false
|
||||||
painter.HideHint = false
|
painter.IsMultiLine = isMultiLine
|
||||||
multiLineBuffer += strings.TrimSuffix(line, `"""`)
|
multiLineBuffer += strings.TrimSuffix(line, `"""`)
|
||||||
line = multiLineBuffer
|
line = multiLineBuffer
|
||||||
multiLineBuffer = ""
|
multiLineBuffer = ""
|
||||||
@@ -590,9 +597,9 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
|||||||
}
|
}
|
||||||
case strings.HasPrefix(line, `"""`):
|
case strings.HasPrefix(line, `"""`):
|
||||||
isMultiLine = true
|
isMultiLine = true
|
||||||
|
painter.IsMultiLine = isMultiLine
|
||||||
multiLineBuffer = strings.TrimPrefix(line, `"""`) + " "
|
multiLineBuffer = strings.TrimPrefix(line, `"""`) + " "
|
||||||
scanner.SetPrompt("... ")
|
scanner.SetPrompt("... ")
|
||||||
painter.HideHint = true
|
|
||||||
continue
|
continue
|
||||||
case strings.HasPrefix(line, "/list"):
|
case strings.HasPrefix(line, "/list"):
|
||||||
args := strings.Fields(line)
|
args := strings.Fields(line)
|
||||||
|
83
docs/linux.md
Normal file
83
docs/linux.md
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
# Installing Ollama on Linux
|
||||||
|
|
||||||
|
> Note: A one line installer for Ollama is available by running:
|
||||||
|
>
|
||||||
|
> ```
|
||||||
|
> curl https://ollama.ai/install.sh | sh
|
||||||
|
> ```
|
||||||
|
|
||||||
|
## Download the `ollama` binary
|
||||||
|
|
||||||
|
Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo curl -L https://ollama.ai/download/ollama-linux-amd64 -o /usr/bin/ollama
|
||||||
|
sudo chmod +x /usr/bin/ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
## Start Ollama
|
||||||
|
|
||||||
|
Start Ollama by running `ollama serve`:
|
||||||
|
|
||||||
|
```
|
||||||
|
ollama serve
|
||||||
|
```
|
||||||
|
|
||||||
|
Once Ollama is running, run a model in another terminal session:
|
||||||
|
|
||||||
|
```
|
||||||
|
ollama run llama2
|
||||||
|
```
|
||||||
|
|
||||||
|
## Install CUDA drivers (optional – for Nvidia GPUs)
|
||||||
|
|
||||||
|
[Download and install](https://developer.nvidia.com/cuda-downloads) CUDA.
|
||||||
|
|
||||||
|
Verify that the drivers are installed by running the following command, which should print details about your GPU:
|
||||||
|
|
||||||
|
```
|
||||||
|
nvidia-smi
|
||||||
|
```
|
||||||
|
|
||||||
|
## Adding Ollama as a startup service (optional)
|
||||||
|
|
||||||
|
Create a user for Ollama:
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo useradd -r -s /bin/false -m -d /usr/share/ollama ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a service file in `/etc/systemd/system/ollama.service`:
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[Unit]
|
||||||
|
Description=Ollama Service
|
||||||
|
After=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
ExecStart=/usr/bin/ollama serve
|
||||||
|
User=ollama
|
||||||
|
Group=ollama
|
||||||
|
Restart=always
|
||||||
|
RestartSec=3
|
||||||
|
Environment="HOME=/usr/share/ollama"
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=default.target
|
||||||
|
```
|
||||||
|
|
||||||
|
Then start the service:
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo systemctl daemon-reload
|
||||||
|
sudo systemctl enable ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
### Viewing logs
|
||||||
|
|
||||||
|
To view logs of Ollama running as a startup service, run:
|
||||||
|
|
||||||
|
```
|
||||||
|
journalctl -u ollama
|
||||||
|
```
|
||||||
|
|
@@ -77,6 +77,7 @@ type model interface {
|
|||||||
ModelFamily() string
|
ModelFamily() string
|
||||||
ModelType() string
|
ModelType() string
|
||||||
FileType() string
|
FileType() string
|
||||||
|
NumLayers() int64
|
||||||
}
|
}
|
||||||
|
|
||||||
type container interface {
|
type container interface {
|
||||||
|
10
llm/gguf.go
10
llm/gguf.go
@@ -195,6 +195,16 @@ func (llm *ggufModel) Decode(r io.Reader) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (llm *ggufModel) NumLayers() int64 {
|
||||||
|
value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
|
||||||
|
if !exists {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
v := value.(uint32)
|
||||||
|
return int64(v)
|
||||||
|
}
|
||||||
|
|
||||||
func (ggufModel) readU8(r io.Reader) uint8 {
|
func (ggufModel) readU8(r io.Reader) uint8 {
|
||||||
var u8 uint8
|
var u8 uint8
|
||||||
binary.Read(r, binary.LittleEndian, &u8)
|
binary.Read(r, binary.LittleEndian, &u8)
|
||||||
|
50
llm/llama.go
50
llm/llama.go
@@ -152,6 +152,10 @@ func (llm *llamaModel) FileType() string {
|
|||||||
return fileType(llm.hyperparameters.FileType)
|
return fileType(llm.hyperparameters.FileType)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (llm *llamaModel) NumLayers() int64 {
|
||||||
|
return int64(llm.hyperparameters.NumLayer)
|
||||||
|
}
|
||||||
|
|
||||||
type llamaHyperparameters struct {
|
type llamaHyperparameters struct {
|
||||||
// NumVocab is the size of the model's vocabulary.
|
// NumVocab is the size of the model's vocabulary.
|
||||||
NumVocab uint32
|
NumVocab uint32
|
||||||
@@ -207,13 +211,13 @@ func CheckVRAM() (int, error) {
|
|||||||
return total, nil
|
return total, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func NumGPU(opts api.Options) int {
|
func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
|
||||||
if opts.NumGPU != -1 {
|
if opts.NumGPU != -1 {
|
||||||
return opts.NumGPU
|
return opts.NumGPU
|
||||||
}
|
}
|
||||||
n := 1 // default to enable metal on macOS
|
n := 1 // default to enable metal on macOS
|
||||||
if runtime.GOOS == "linux" {
|
if runtime.GOOS == "linux" {
|
||||||
vram, err := CheckVRAM()
|
vramMib, err := CheckVRAM()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err.Error() != "nvidia-smi command failed" {
|
if err.Error() != "nvidia-smi command failed" {
|
||||||
log.Print(err.Error())
|
log.Print(err.Error())
|
||||||
@@ -221,33 +225,25 @@ func NumGPU(opts api.Options) int {
|
|||||||
// nvidia driver not installed or no nvidia GPU found
|
// nvidia driver not installed or no nvidia GPU found
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
// TODO: this is a very rough heuristic, better would be to calculate this based on number of layers and context size
|
|
||||||
switch {
|
totalVramBytes := int64(vramMib) * 1024 * 1024 // 1 MiB = 1024^2 bytes
|
||||||
case vram < 500:
|
|
||||||
log.Printf("WARNING: Low VRAM detected, disabling GPU")
|
// Calculate bytes per layer
|
||||||
n = 0
|
// TODO: this is a rough heuristic, better would be to calculate this based on number of layers and context size
|
||||||
case vram < 1000:
|
bytesPerLayer := fileSizeBytes / numLayer
|
||||||
n = 4
|
|
||||||
case vram < 2000:
|
// set n to the max number of layers we can fit in VRAM
|
||||||
n = 8
|
return int(totalVramBytes / bytesPerLayer)
|
||||||
case vram < 4000:
|
|
||||||
n = 12
|
log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, n)
|
||||||
case vram < 8000:
|
|
||||||
n = 16
|
|
||||||
case vram < 12000:
|
|
||||||
n = 24
|
|
||||||
case vram < 16000:
|
|
||||||
n = 32
|
|
||||||
default:
|
|
||||||
n = 48
|
|
||||||
}
|
|
||||||
log.Printf("%d MB VRAM available, loading %d GPU layers", vram, n)
|
|
||||||
}
|
}
|
||||||
return n
|
// default to enable metal on macOS
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
func newLlama(model string, adapters []string, runners []ModelRunner, opts api.Options) (*llama, error) {
|
func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) {
|
||||||
if _, err := os.Stat(model); err != nil {
|
fileInfo, err := os.Stat(model)
|
||||||
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -261,7 +257,7 @@ func newLlama(model string, adapters []string, runners []ModelRunner, opts api.O
|
|||||||
"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
|
"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
|
||||||
"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
|
"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
|
||||||
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
||||||
"--n-gpu-layers", fmt.Sprintf("%d", NumGPU(opts)),
|
"--n-gpu-layers", fmt.Sprintf("%d", NumGPU(numLayers, fileInfo.Size(), opts)),
|
||||||
"--embedding",
|
"--embedding",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -91,9 +91,9 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
|
|||||||
switch ggml.Name() {
|
switch ggml.Name() {
|
||||||
case "gguf":
|
case "gguf":
|
||||||
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
|
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
|
||||||
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), opts)
|
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
|
||||||
case "ggml", "ggmf", "ggjt", "ggla":
|
case "ggml", "ggmf", "ggjt", "ggla":
|
||||||
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), opts)
|
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
|
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
|
||||||
}
|
}
|
||||||
|
@@ -4,123 +4,73 @@
|
|||||||
|
|
||||||
set -eu
|
set -eu
|
||||||
|
|
||||||
check_os() {
|
status() { echo ">>> $*" >&2; }
|
||||||
if [ "$(uname -s)" != "Linux" ]; then
|
error() { echo "ERROR $*"; exit 1; }
|
||||||
echo "This script is intended to run on Linux only."
|
warning() { echo "WARNING: $*"; }
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
determine_architecture() {
|
TEMP_DIR=$(mktemp -d)
|
||||||
ARCH=$(uname -m)
|
cleanup() { rm -rf $TEMP_DIR; }
|
||||||
case $ARCH in
|
trap cleanup EXIT
|
||||||
x86_64)
|
|
||||||
ARCH_SUFFIX="amd64"
|
|
||||||
;;
|
|
||||||
aarch64|arm64)
|
|
||||||
ARCH_SUFFIX="arm64"
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unsupported architecture: $ARCH"
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
}
|
|
||||||
|
|
||||||
check_sudo() {
|
available() { command -v $1 >/dev/null; }
|
||||||
if [ "$(id -u)" -ne 0 ]; then
|
require() {
|
||||||
if command -v sudo >/dev/null 2>&1; then
|
local MISSING=''
|
||||||
SUDO_CMD="sudo"
|
for TOOL in $*; do
|
||||||
echo "Downloading the ollama executable to the PATH, this will require sudo permissions."
|
if ! available $TOOL; then
|
||||||
else
|
MISSING="$MISSING $TOOL"
|
||||||
echo "Error: sudo is not available. Please run as root or install sudo."
|
|
||||||
exit 1
|
|
||||||
fi
|
fi
|
||||||
else
|
done
|
||||||
SUDO_CMD=""
|
|
||||||
fi
|
echo $MISSING
|
||||||
}
|
}
|
||||||
|
|
||||||
install_cuda_drivers() {
|
[ "$(uname -s)" = "Linux" ] || error 'This script is intended to run on Linux only.'
|
||||||
local os_name os_version
|
|
||||||
if [ -f "/etc/os-release" ]; then
|
case "$(uname -m)" in
|
||||||
. /etc/os-release
|
x86_64) ARCH="amd64" ;;
|
||||||
os_name=$ID
|
aarch64|arm64) ARCH="arm64" ;;
|
||||||
os_version=$VERSION_ID
|
*) error "Unsupported architecture: $ARCH" ;;
|
||||||
else
|
esac
|
||||||
echo "Unable to detect operating system. Skipping CUDA installation."
|
|
||||||
return 1
|
SUDO=
|
||||||
|
if [ "$(id -u)" -ne 0 ]; then
|
||||||
|
# Running as root, no need for sudo
|
||||||
|
if ! available sudo; then
|
||||||
|
error "This script requires superuser permissions. Please re-run as root."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# based on https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#package-manager-installation
|
SUDO="sudo"
|
||||||
case $os_name in
|
fi
|
||||||
CentOS)
|
|
||||||
$SUDO_CMD yum install yum-utils
|
|
||||||
$SUDO_CMD yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
|
|
||||||
$SUDO_CMD yum clean all
|
|
||||||
$SUDO_CMD yum -y install nvidia-driver-latest-dkms
|
|
||||||
$SUDO_CMD yum -y install cuda-driver
|
|
||||||
$SUDO_CMD yum install kernel-devel-$(uname -r) kernel-headers-$(uname -r)
|
|
||||||
$SUDO_CMD dkms status | awk -F: '/added/ { print $1 }' | xargs -n1 $SUDO_CMD dkms install
|
|
||||||
$SUDO_CMD modprobe nvidia
|
|
||||||
;;
|
|
||||||
ubuntu)
|
|
||||||
case $os_version in
|
|
||||||
20.04)
|
|
||||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb
|
|
||||||
;;
|
|
||||||
22.04)
|
|
||||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Skipping automatic CUDA installation, not supported for Ubuntu ($os_version)."
|
|
||||||
return
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
$SUDO_CMD dpkg -i cuda-keyring_1.1-1_all.deb
|
|
||||||
$SUDO_CMD apt-get update
|
|
||||||
$SUDO_CMD apt-get -y install cuda-drivers
|
|
||||||
;;
|
|
||||||
RedHatEnterprise*|Kylin|Fedora|SLES|openSUSE*|Microsoft|Debian)
|
|
||||||
echo "NVIDIA CUDA drivers may not be installed, you can install them from: https://developer.nvidia.com/cuda-downloads"
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unsupported or unknown distribution, skipping GPU CUDA driver install: $os_name"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
}
|
|
||||||
|
|
||||||
check_install_cuda_drivers() {
|
NEEDS=$(require curl awk grep sed tee xargs)
|
||||||
if lspci -d '10de:' | grep 'NVIDIA' >/dev/null; then
|
if [ -n "$NEEDS" ]; then
|
||||||
# NVIDIA Corporation [10de] device is available
|
status "ERROR: The following tools are required but missing:"
|
||||||
if command -v nvidia-smi >/dev/null 2>&1; then
|
for NEED in $NEEDS; do
|
||||||
CUDA_VERSION=$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")
|
echo " - $NEED"
|
||||||
if [ -z "$CUDA_VERSION" ]; then
|
done
|
||||||
echo "Warning: NVIDIA-SMI is available, but the CUDA version cannot be detected. Installing CUDA drivers..."
|
exit 1
|
||||||
install_cuda_drivers
|
fi
|
||||||
else
|
|
||||||
echo "Detected CUDA version $CUDA_VERSION"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "Warning: NVIDIA GPU detected but NVIDIA-SMI is not available. Installing CUDA drivers..."
|
|
||||||
install_cuda_drivers
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "No NVIDIA GPU detected. Skipping driver installation."
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
download_ollama() {
|
status "Downloading ollama..."
|
||||||
$SUDO_CMD mkdir -p /usr/bin
|
$SUDO curl --fail --show-error --location --progress-bar -o $TEMP_DIR/ollama "https://ollama.ai/download/ollama-linux-$ARCH"
|
||||||
$SUDO_CMD curl -fsSL -o /usr/bin/ollama "https://ollama.ai/download/latest/ollama-linux-$ARCH_SUFFIX"
|
|
||||||
}
|
status "Installing ollama to /usr/bin..."
|
||||||
|
$SUDO install -o0 -g0 -m755 -d /usr/bin
|
||||||
|
$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama /usr/bin/ollama
|
||||||
|
|
||||||
|
install_success() { status 'Install complete. Run "ollama" from the command line.'; }
|
||||||
|
trap install_success EXIT
|
||||||
|
|
||||||
|
# Everything from this point onwards is optional.
|
||||||
|
|
||||||
configure_systemd() {
|
configure_systemd() {
|
||||||
if command -v systemctl >/dev/null 2>&1; then
|
if ! id ollama >/dev/null 2>&1; then
|
||||||
$SUDO_CMD useradd -r -s /bin/false -m -d /home/ollama ollama 2>/dev/null
|
status "Creating ollama user..."
|
||||||
|
$SUDO useradd -r -s /bin/false -m -d /usr/share/ollama ollama
|
||||||
|
fi
|
||||||
|
|
||||||
echo "Creating systemd service file for ollama..."
|
status "Creating ollama systemd service..."
|
||||||
cat <<EOF | $SUDO_CMD tee /etc/systemd/system/ollama.service >/dev/null
|
cat <<EOF | $SUDO tee /etc/systemd/system/ollama.service >/dev/null
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=Ollama Service
|
Description=Ollama Service
|
||||||
After=network-online.target
|
After=network-online.target
|
||||||
@@ -131,30 +81,147 @@ User=ollama
|
|||||||
Group=ollama
|
Group=ollama
|
||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=3
|
RestartSec=3
|
||||||
Environment="HOME=/home/ollama"
|
Environment="HOME=/usr/share/ollama"
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=default.target
|
WantedBy=default.target
|
||||||
EOF
|
EOF
|
||||||
echo "Reloading systemd and enabling ollama service..."
|
SYSTEMCTL_RUNNING="$(systemctl is-system-running || true)"
|
||||||
if [ "$(systemctl is-system-running || echo 'not running')" = 'running' ]; then
|
case $SYSTEMCTL_RUNNING in
|
||||||
$SUDO_CMD systemctl daemon-reload
|
running|degraded)
|
||||||
$SUDO_CMD systemctl enable ollama
|
status "Enabling and starting ollama service..."
|
||||||
$SUDO_CMD systemctl restart ollama
|
$SUDO systemctl daemon-reload
|
||||||
fi
|
$SUDO systemctl enable ollama
|
||||||
else
|
$SUDO systemctl restart ollama
|
||||||
echo "Run 'ollama serve' from the command line to start the service."
|
;;
|
||||||
fi
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
main() {
|
if available systemctl; then
|
||||||
check_os
|
|
||||||
determine_architecture
|
|
||||||
check_sudo
|
|
||||||
download_ollama
|
|
||||||
configure_systemd
|
configure_systemd
|
||||||
check_install_cuda_drivers
|
fi
|
||||||
echo "Installation complete. You can now run 'ollama' from the command line."
|
|
||||||
|
if ! available lspci && ! available lshw; then
|
||||||
|
warning "Unable to detect NVIDIA GPU. Install lspci or lshw to automatically detect and install NVIDIA CUDA drivers."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
check_gpu() {
|
||||||
|
case $1 in
|
||||||
|
lspci) available lspci && lspci -d '10de:' | grep -q 'NVIDIA' || return 1 ;;
|
||||||
|
lshw) available lshw && $SUDO lshw -c display -numeric | grep -q 'vendor: .* \[10DE\]' || return 1 ;;
|
||||||
|
nvidia-smi) available nvidia-smi || return 1 ;;
|
||||||
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
main
|
if ! check_gpu lspci && ! check_gpu lshw; then
|
||||||
|
warning "No NVIDIA GPU detected. Ollama will run in CPU-only mode."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-7-centos-7
|
||||||
|
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-8-rocky-8
|
||||||
|
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-9-rocky-9
|
||||||
|
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#fedora
|
||||||
|
install_cuda_driver_yum() {
|
||||||
|
status 'Installing NVIDIA repository...'
|
||||||
|
case $PACKAGE_MANAGER in
|
||||||
|
yum)
|
||||||
|
$SUDO $PACKAGE_MANAGER -y install yum-utils
|
||||||
|
$SUDO $PACKAGE_MANAGER-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
|
||||||
|
;;
|
||||||
|
dnf)
|
||||||
|
$SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
case $1 in
|
||||||
|
rhel)
|
||||||
|
status 'Installing EPEL repository...'
|
||||||
|
# EPEL is required for third-party dependencies such as dkms and libvdpau
|
||||||
|
$SUDO $PACKAGE_MANAGER -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-$2.noarch.rpm || true
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
status 'Installing CUDA driver...'
|
||||||
|
|
||||||
|
if [ "$1" = 'centos' ] || [ "$1$2" = 'rhel7' ]; then
|
||||||
|
$SUDO $PACKAGE_MANAGER -y install nvidia-driver-latest-dkms
|
||||||
|
fi
|
||||||
|
|
||||||
|
$SUDO $PACKAGE_MANAGER -y install cuda-drivers
|
||||||
|
}
|
||||||
|
|
||||||
|
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu
|
||||||
|
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#debian
|
||||||
|
install_cuda_driver_apt() {
|
||||||
|
status 'Installing NVIDIA repository...'
|
||||||
|
curl -fsSL -o $TEMP_DIR/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb
|
||||||
|
|
||||||
|
case $1 in
|
||||||
|
debian)
|
||||||
|
status 'Enabling contrib sources...'
|
||||||
|
$SUDO sed 's/main/contrib/' < /etc/apt/sources.list | sudo tee /etc/apt/sources.list.d/contrib.list > /dev/null
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
status 'Installing CUDA driver...'
|
||||||
|
$SUDO dpkg -i $TEMP_DIR/cuda-keyring.deb
|
||||||
|
$SUDO apt-get update
|
||||||
|
|
||||||
|
[ -n "$SUDO" ] && SUDO_E="$SUDO -E" || SUDO_E=
|
||||||
|
DEBIAN_FRONTEND=noninteractive $SUDO_E apt-get -y install cuda-drivers -q
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ ! -f "/etc/os-release" ]; then
|
||||||
|
error "Unknown distribution. Skipping CUDA installation."
|
||||||
|
fi
|
||||||
|
|
||||||
|
. /etc/os-release
|
||||||
|
|
||||||
|
OS_NAME=$ID
|
||||||
|
OS_VERSION=$VERSION_ID
|
||||||
|
|
||||||
|
PACKAGE_MANAGER=
|
||||||
|
for PACKAGE_MANAGER in dnf yum apt-get; do
|
||||||
|
if available $PACKAGE_MANAGER; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$PACKAGE_MANAGER" ]; then
|
||||||
|
error "Unknown package manager. Skipping CUDA installation."
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! check_gpu nvidia-smi || [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then
|
||||||
|
case $OS_NAME in
|
||||||
|
centos|rhel) install_cuda_driver_yum 'rhel' $OS_VERSION ;;
|
||||||
|
rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;;
|
||||||
|
fedora) install_cuda_driver_yum $OS_NAME $OS_VERSION ;;
|
||||||
|
amzn) install_cuda_driver_yum 'fedora' '35' ;;
|
||||||
|
debian) install_cuda_driver_apt $OS_NAME $OS_VERSION ;;
|
||||||
|
ubuntu) install_cuda_driver_apt $OS_NAME $(echo $OS_VERSION | sed 's/\.//') ;;
|
||||||
|
*) exit ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! lsmod | grep -q nvidia; then
|
||||||
|
KERNEL_RELEASE="$(uname -r)"
|
||||||
|
case $OS_NAME in
|
||||||
|
centos|rhel|rocky|fedora|amzn) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE kernel-headers-$KERNEL_RELEASE ;;
|
||||||
|
debian|ubuntu) $SUDO apt-get -y install linux-headers-$KERNEL_RELEASE ;;
|
||||||
|
*) exit ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
NVIDIA_CUDA_VERSION=$($SUDO dkms status | awk -F: '/added/ { print $1 }')
|
||||||
|
if [ -n "$NVIDIA_CUDA_VERSION" ]; then
|
||||||
|
$SUDO dkms install $NVIDIA_CUDA_VERSION
|
||||||
|
fi
|
||||||
|
|
||||||
|
if lsmod | grep -q nouveau; then
|
||||||
|
status "Removing nouveau..."
|
||||||
|
$SUDO rmmod nouveau
|
||||||
|
fi
|
||||||
|
|
||||||
|
$SUDO modprobe nvidia
|
||||||
|
fi
|
||||||
|
Reference in New Issue
Block a user