diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 29adf56f3..ac9af6411 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -73,12 +73,12 @@ jobs:
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$gccpath;$env:PATH"
echo $env:PATH
- go generate -x ./...
+ $env:GOARCH=""; $env:OLLAMA_BUILD_TARGET_ARCH="${{ matrix.arch }}"; go generate -x ./...
if: ${{ startsWith(matrix.os, 'windows-') }}
- name: 'Windows Go Generate'
- - run: go generate -x ./...
+ name: 'Windows Generate'
+ - run: GOARCH= OLLAMA_BUILD_TARGET_ARCH=${{ matrix.arch }} go generate -x ./...
if: ${{ ! startsWith(matrix.os, 'windows-') }}
- name: 'Unix Go Generate'
+ name: 'Unix Generate'
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
@@ -184,7 +184,7 @@ jobs:
$env:OLLAMA_SKIP_CPU_GENERATE="1"
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
go generate -x ./...
- name: go generate
+ name: go generate -x ./...
env:
OLLAMA_SKIP_CPU_GENERATE: '1'
# TODO - do we need any artifacts?
@@ -217,7 +217,7 @@ jobs:
- name: 'Verify CUDA'
run: nvcc -V
- run: go get ./...
- - name: go generate
+ - name: go generate -x ./...
run: |
$gopath=(get-command go).source | split-path -parent
$cudabin=(get-command nvcc).source | split-path
@@ -312,7 +312,10 @@ jobs:
touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
if: ${{ startsWith(matrix.os, 'macos-') }}
shell: bash
- - run: go generate ./...
+ - run: $env:GOARCH=""; $env:OLLAMA_BUILD_TARGET_ARCH="${{ matrix.arch }}"; go generate -x ./...
+ if: ${{ startsWith(matrix.os, 'windows-') }}
+ - run: GOARCH= OLLAMA_BUILD_TARGET_ARCH=${{ matrix.arch }} go generate -x ./...
+ if: ${{ ! startsWith(matrix.os, 'windows-') }}
- run: go build
- run: go test -v ./...
- uses: actions/upload-artifact@v4
diff --git a/README.md b/README.md
index 62f5cd65c..4bbf38556 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
-

+
# Ollama
[](https://discord.gg/ollama)
-Get up and running with large language models.
+Get up and running with large language models locally.
### macOS
@@ -51,17 +51,15 @@ Here are some example models that can be downloaded:
| ------------------ | ---------- | ----- | ------------------------------ |
| Llama 3 | 8B | 4.7GB | `ollama run llama3` |
| Llama 3 | 70B | 40GB | `ollama run llama3:70b` |
-| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
-| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
-| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
-| Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
+| Phi-3 | 3,8B | 2.3GB | `ollama run phi3` |
| Mistral | 7B | 4.1GB | `ollama run mistral` |
-| Moondream 2 | 1.4B | 829MB | `ollama run moondream` |
| Neural Chat | 7B | 4.1GB | `ollama run neural-chat` |
| Starling | 7B | 4.1GB | `ollama run starling-lm` |
| Code Llama | 7B | 3.8GB | `ollama run codellama` |
| Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` |
| LLaVA | 7B | 4.5GB | `ollama run llava` |
+| Gemma | 2B | 1.4GB | `ollama run gemma:2b` |
+| Gemma | 7B | 4.8GB | `ollama run gemma:7b` |
| Solar | 10.7B | 6.1GB | `ollama run solar` |
> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
@@ -175,19 +173,13 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
The image features a yellow smiley face, which is likely the central focus of the picture.
```
-### Pass the prompt as an argument
+### Pass in prompt as arguments
```
$ ollama run llama3 "Summarize this file: $(cat README.md)"
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
```
-### Show model information
-
-```
-ollama show llama3
-```
-
### List models on your computer
```
@@ -200,7 +192,19 @@ ollama list
## Building
-See the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
+Install `cmake` and `go`:
+
+```
+brew install cmake go
+```
+
+Then build the binary:
+
+```
+go run build.go
+```
+
+More detailed instructions can be found in the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
### Running local builds
@@ -248,7 +252,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Open WebUI](https://github.com/open-webui/open-webui)
- [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
-- [Hollama](https://github.com/fmaclen/hollama)
- [Lollms-Webui](https://github.com/ParisNeo/lollms-webui)
- [LibreChat](https://github.com/danny-avila/LibreChat)
- [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
@@ -275,24 +278,17 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [OllamaGUI](https://github.com/enoch1118/ollamaGUI)
- [OpenAOE](https://github.com/InternLM/OpenAOE)
- [Odin Runes](https://github.com/leonid20000/OdinRunes)
-- [LLM-X](https://github.com/mrdjohnson/llm-x) (Progressive Web App)
+- [LLM-X: Progressive Web App](https://github.com/mrdjohnson/llm-x)
- [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
- [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
- [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
-- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Chat with Code Repository)
-- [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases)
-- [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG)
-- [RAGFlow](https://github.com/infiniflow/ragflow) (Open-source Retrieval-Augmented Generation engine based on deep document understanding)
-- [StreamDeploy](https://github.com/StreamDeploy-DevRel/streamdeploy-llm-app-scaffold) (LLM Application Scaffold)
-- [chat](https://github.com/swuecho/chat) (chat web app for teams)
+- [QA-Pilot: Chat with Code Repository](https://github.com/reid41/QA-Pilot)
+- [ChatOllama: Open Source Chatbot based on Ollama with Knowledge Bases](https://github.com/sugarforever/chat-ollama)
+- [CRAG Ollama Chat: Simple Web Search with Corrective RAG](https://github.com/Nagi-ovo/CRAG-Ollama-Chat)
+- [RAGFlow: Open-source Retrieval-Augmented Generation engine based on deep document understanding](https://github.com/infiniflow/ragflow)
+- [chat: chat web app for teams](https://github.com/swuecho/chat)
- [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama)
-- [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) (Local Chat with multiple PDFs using Ollama and RAG)
-- [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation)
-- [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends)
-- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
-- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
-- [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
-- [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
+- [Ollama RAG Chatbot: Local Chat with multiples PDFs using Ollama and RAG.](https://github.com/datvodinh/rag-chatbot.git)
### Terminal
@@ -315,7 +311,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [ShellOracle](https://github.com/djcopley/ShellOracle)
- [tlm](https://github.com/yusufcanb/tlm)
- [podman-ollama](https://github.com/ericcurtin/podman-ollama)
-- [gollama](https://github.com/sammcj/gollama)
### Database
@@ -326,20 +321,17 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
-- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
### Libraries
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
-- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
- [LiteLLM](https://github.com/BerriAI/litellm)
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
-- [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
@@ -350,13 +342,9 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md)
- [Elixir LangChain](https://github.com/brainlid/langchain)
- [Ollama for R - rollama](https://github.com/JBGruber/rollama)
-- [Ollama for R - ollama-r](https://github.com/hauselin/ollama-r)
- [Ollama-ex for Elixir](https://github.com/lebrunel/ollama-ex)
- [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama)
- [Testcontainers](https://testcontainers.com/modules/ollama/)
-- [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama)
-- [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama)
-- [LlamaScript](https://github.com/Project-Llama/llamascript)
### Mobile
@@ -376,23 +364,18 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
+- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
- [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support)
- [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
-- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
-- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
-- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
-- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
-
-### Supported backends
-
-- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
+### Supported backends
+- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
diff --git a/build.go b/build.go
new file mode 100644
index 000000000..e6446d2a3
--- /dev/null
+++ b/build.go
@@ -0,0 +1,199 @@
+//go:build ignore
+
+package main
+
+import (
+ "cmp"
+ "errors"
+ "flag"
+ "log"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "runtime"
+)
+
+// Flags
+var (
+ flagRegenerateDestroy = flag.Bool("d", false, "force regenerate the dependencies (destructive)")
+ flagRegenerateGently = flag.Bool("g", false, "regenerate the dependencies (non-destructive)")
+ flagSkipBuild = flag.Bool("s", false, "generate dependencies only (e.g. skip 'go build .')")
+
+ // Flags to set GOARCH explicitly for cross-platform builds,
+ // e.g., in CI to target a different platform than the build matrix
+ // default. These allows us to run generate without a separate build
+ // step for building the script binary for the host ARCH and then
+ // runing the generate script for the target ARCH. Instead, we can
+ // just run `go run build.go -target=$GOARCH` to generate the
+ // deps.
+ flagGOARCH = flag.String("target", "", "sets GOARCH to use when generating dependencies and building")
+)
+
+func buildEnv() []string {
+ return append(os.Environ(), "GOARCH="+cmp.Or(
+ *flagGOARCH,
+ os.Getenv("OLLAMA_BUILD_TARGET_ARCH"),
+ runtime.GOARCH,
+ ))
+}
+
+func main() {
+ log.SetFlags(0)
+ flag.Usage = func() {
+ log.Printf("Usage: go run build.go [flags]")
+ log.Println()
+ log.Println("Flags:")
+ flag.PrintDefaults()
+ log.Println()
+ log.Println("This script builds the Ollama server binary and generates the llama.cpp")
+ log.Println("bindings for the current platform. It assumes that the current working")
+ log.Println("directory is the root directory of the Ollama project.")
+ log.Println()
+ log.Println("If the -d flag is provided, the script will force regeneration of the")
+ log.Println("dependencies; removing the 'llm/build' directory before starting.")
+ log.Println()
+ log.Println("If the -g flag is provided, the script will regenerate the dependencies")
+ log.Println("without removing the 'llm/build' directory.")
+ log.Println()
+ log.Println("If the -s flag is provided, the script will skip building the Ollama binary")
+ log.Println()
+ log.Println("If the -target flag is provided, the script will set GOARCH to the value")
+ log.Println("of the flag. This is useful for cross-platform builds.")
+ log.Println()
+ log.Println("The script will check for the required dependencies (cmake, gcc) and")
+ log.Println("print their version.")
+ log.Println()
+ log.Println("The script will also check if it is being run from the root directory of")
+ log.Println("the Ollama project.")
+ log.Println()
+ os.Exit(1)
+ }
+ flag.Parse()
+
+ log.Printf("=== Building Ollama ===")
+ defer func() {
+ log.Printf("=== Done building Ollama ===")
+ if !*flagSkipBuild {
+ log.Println()
+ log.Println("To run the Ollama server, use:")
+ log.Println()
+ log.Println(" ./ollama serve")
+ log.Println()
+ }
+ }()
+
+ if flag.NArg() > 0 {
+ flag.Usage()
+ }
+
+ if !inRootDir() {
+ log.Fatalf("Please run this script from the root directory of the Ollama project.")
+ }
+
+ if err := checkDependencies(); err != nil {
+ log.Fatalf("Failed dependency check: %v", err)
+ }
+ if err := buildLlammaCPP(); err != nil {
+ log.Fatalf("Failed to build llama.cpp: %v", err)
+ }
+ if err := goBuildOllama(); err != nil {
+ log.Fatalf("Failed to build ollama Go binary: %v", err)
+ }
+}
+
+// checkDependencies does a quick check to see if the required dependencies are
+// installed on the system and functioning enough to print their version.
+//
+// TODO(bmizerany): Check the actual version of the dependencies? Seems a
+// little daunting given diff versions might print diff things. This should
+// be good enough for now.
+func checkDependencies() error {
+ var err error
+ check := func(name string, args ...string) {
+ log.Printf("=== Checking for %s ===", name)
+ defer log.Printf("=== Done checking for %s ===\n\n", name)
+ cmd := exec.Command(name, args...)
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ err = errors.Join(err, cmd.Run())
+ }
+
+ check("cmake", "--version")
+ check("gcc", "--version")
+ return err
+}
+
+func goBuildOllama() error {
+ log.Println("=== Building Ollama binary ===")
+ defer log.Printf("=== Done building Ollama binary ===\n\n")
+ if *flagSkipBuild {
+ log.Println("Skipping 'go build -o ollama .'")
+ return nil
+ }
+ cmd := exec.Command("go", "build", "-o", "ollama", ".")
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ cmd.Env = buildEnv()
+ return cmd.Run()
+}
+
+// buildLlammaCPP generates the llama.cpp bindings for the current platform.
+//
+// It assumes that the current working directory is the root directory of the
+// Ollama project.
+func buildLlammaCPP() error {
+ log.Println("=== Generating dependencies ===")
+ defer log.Printf("=== Done generating dependencies ===\n\n")
+ if *flagRegenerateDestroy {
+ if err := os.RemoveAll(filepath.Join("llm", "build")); err != nil {
+ return err
+ }
+ }
+ if isDirectory(filepath.Join("llm", "build")) && !*flagRegenerateGently {
+ log.Println("llm/build already exists; skipping. Use -d or -g to re-generate.")
+ return nil
+ }
+
+ scriptDir, err := filepath.Abs(filepath.Join("llm", "generate"))
+ if err != nil {
+ return err
+ }
+
+ var cmd *exec.Cmd
+ switch runtime.GOOS {
+ case "windows":
+ script := filepath.Join(scriptDir, "gen_windows.ps1")
+ cmd = exec.Command("powershell", "-ExecutionPolicy", "Bypass", "-File", script)
+ case "linux":
+ script := filepath.Join(scriptDir, "gen_linux.sh")
+ cmd = exec.Command("bash", script)
+ case "darwin":
+ script := filepath.Join(scriptDir, "gen_darwin.sh")
+ cmd = exec.Command("bash", script)
+ default:
+ log.Fatalf("Unsupported OS: %s", runtime.GOOS)
+ }
+ cmd.Dir = filepath.Join("llm", "generate")
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ cmd.Env = buildEnv()
+
+ log.Printf("Running GOOS=%s GOARCH=%s %s", runtime.GOOS, runtime.GOARCH, cmd.Args)
+
+ return cmd.Run()
+}
+
+func isDirectory(path string) bool {
+ info, err := os.Stat(path)
+ if err != nil {
+ return false
+ }
+ return info.IsDir()
+}
+
+// inRootDir returns true if the current working directory is the root
+// directory of the Ollama project. It looks for a file named "go.mod".
+func inRootDir() bool {
+ _, err := os.Stat("go.mod")
+ return err == nil
+}
diff --git a/docs/development.md b/docs/development.md
index 2a6886a43..1f54e0320 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -25,13 +25,7 @@ export OLLAMA_DEBUG=1
Get the required libraries and build the native LLM code:
```bash
-go generate ./...
-```
-
-Then build ollama:
-
-```bash
-go build .
+go run build.go
```
Now you can run `ollama`:
@@ -40,6 +34,16 @@ Now you can run `ollama`:
./ollama
```
+### Rebuilding the native code
+
+If at any point you need to rebuild the native code, you can run the
+build.go script again using the `-f` flag to force a rebuild, and,
+optionally, the `-d` flag to skip building the Go binary:
+
+```bash
+go run build.go -d -s
+```
+
### Linux
#### Linux CUDA (NVIDIA)
@@ -55,16 +59,10 @@ specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
-Then generate dependencies:
-
-```
-go generate ./...
-```
-
Then build the binary:
```
-go build .
+go run build.go
```
#### Linux ROCm (AMD)
@@ -80,21 +78,17 @@ install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
-```
-go generate ./...
-```
-
Then build the binary:
```
-go build .
+go run build.go
```
ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
#### Advanced CPU Settings
-By default, running `go generate ./...` will compile a few different variations
+By default, running `go run build.go` will compile a few different variations
of the LLM library based on common CPU families and vector math capabilities,
including a lowest-common-denominator which should run on almost any 64 bit CPU
somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
@@ -104,8 +98,7 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H,
you might use:
```
-OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./...
-go build .
+OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go run build.go
```
#### Containerized Linux Build
@@ -129,8 +122,7 @@ Then, build the `ollama` binary:
```powershell
$env:CGO_ENABLED="1"
-go generate ./...
-go build .
+go run build.go
```
#### Windows CUDA (NVIDIA)
diff --git a/llm/generate/gen_darwin.sh b/llm/generate/gen_darwin.sh
index 721a9ae80..7a019942a 100755
--- a/llm/generate/gen_darwin.sh
+++ b/llm/generate/gen_darwin.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-# This script is intended to run inside the go generate
-# working directory must be ./llm/generate/
+# This script is intended to run inside the `go run build.go` script, which
+# sets the working directory to the correct location: ./llm/generate/.
# TODO - add hardening to detect missing tools (cmake, etc.)
@@ -92,10 +92,10 @@ case "${GOARCH}" in
;;
*)
echo "GOARCH must be set"
- echo "this script is meant to be run from within go generate"
+ echo "this script is meant to be run from within 'go run build.go'"
exit 1
;;
esac
cleanup
-echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
+echo "code generation completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh
index 28ce1f21d..b2cd76a6f 100755
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-# This script is intended to run inside the go generate
-# working directory must be llm/generate/
+# This script is intended to run with the `go run build.go` script, which
+# sets the working directory to the correct location: ./llm/generate/.
# First we build one or more CPU based LLM libraries
#
@@ -281,4 +281,4 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
fi
cleanup
-echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
+echo "code generation completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1
index e217a0382..8c07ad065 100644
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -26,26 +26,15 @@ function amdGPUs {
$GPU_LIST -join ';'
}
-
function init_vars {
- if (!$script:SRC_DIR) {
- $script:SRC_DIR = $(resolve-path "..\..\")
- }
- if (!$script:llamacppDir) {
- $script:llamacppDir = "../llama.cpp"
- }
- if (!$script:cmakeTargets) {
- $script:cmakeTargets = @("ollama_llama_server")
- }
+ $script:SRC_DIR = $(resolve-path "..\..\")
+ $script:llamacppDir = "../llama.cpp"
$script:cmakeDefs = @(
"-DBUILD_SHARED_LIBS=on",
- "-DLLAMA_NATIVE=off",
- "-DLLAMA_OPENMP=off"
+ "-DLLAMA_NATIVE=off"
)
- $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
- $script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
- $script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
- md "$script:DIST_BASE" -ea 0 > $null
+ $script:cmakeTargets = @("ollama_llama_server")
+ $script:ARCH = "amd64" # arm not yet supported.
if ($env:CGO_CFLAGS -contains "-g") {
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
$script:config = "RelWithDebInfo"
@@ -66,6 +55,7 @@ function init_vars {
} else {
$script:CUDA_LIB_DIR=$env:CUDA_LIB_DIR
}
+ $script:GZIP=(get-command -ea 'silentlycontinue' gzip).path
$script:DUMPBIN=(get-command -ea 'silentlycontinue' dumpbin).path
if ($null -eq $env:CMAKE_CUDA_ARCHITECTURES) {
$script:CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
@@ -123,13 +113,8 @@ function build {
& cmake --version
& cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
- if ($cmakeDefs -contains "-G") {
- $extra=@("-j8")
- } else {
- $extra= @("--", "/p:CL_MPcount=8")
- }
- write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra"
- & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra
+ write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ })"
+ & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
# Rearrange output to be consistent between different generators
if ($null -ne ${script:config} -And (test-path -path "${script:buildDir}/bin/${script:config}" ) ) {
@@ -149,18 +134,21 @@ function sign {
}
}
-function install {
- write-host "Installing binaries to dist dir ${script:distDir}"
- mkdir ${script:distDir} -ErrorAction SilentlyContinue
+function compress {
+ if ($script:GZIP -eq $null) {
+ write-host "gzip not installed, not compressing files"
+ return
+ }
+ write-host "Compressing binaries..."
$binaries = dir "${script:buildDir}/bin/*.exe"
foreach ($file in $binaries) {
- copy-item -Path $file -Destination ${script:distDir} -Force
+ & "$script:GZIP" --best -f $file
}
- write-host "Installing dlls to dist dir ${script:distDir}"
+ write-host "Compressing dlls..."
$dlls = dir "${script:buildDir}/bin/*.dll"
foreach ($file in $dlls) {
- copy-item -Path $file -Destination ${script:distDir} -Force
+ & "$script:GZIP" --best -f $file
}
}
@@ -181,252 +169,132 @@ function cleanup {
}
}
+init_vars
+git_module_setup
+apply_patches
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
# -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
# -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
+$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
-function build_static() {
- if ((-not "${env:OLLAMA_SKIP_STATIC_GENERATE}") -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "static"))) {
- # GCC build for direct linking into the Go binary
- init_vars
- # cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
- # as we need this to be compiled by gcc for golang to be able to link with itx
- write-host "Checking for MinGW..."
- # error action ensures we exit on failure
- get-command gcc
- get-command mingw32-make
- $oldTargets = $script:cmakeTargets
- $script:cmakeTargets = @("llama", "ggml")
- $script:cmakeDefs = @(
- "-G", "MinGW Makefiles"
- "-DCMAKE_C_COMPILER=gcc.exe",
- "-DCMAKE_CXX_COMPILER=g++.exe",
- "-DBUILD_SHARED_LIBS=off",
- "-DLLAMA_NATIVE=off",
- "-DLLAMA_AVX=off",
- "-DLLAMA_AVX2=off",
- "-DLLAMA_AVX512=off",
- "-DLLAMA_F16C=off",
- "-DLLAMA_FMA=off",
- "-DLLAMA_OPENMP=off")
- $script:buildDir="../build/windows/${script:ARCH}_static"
- write-host "Building static library"
- build
- $script:cmakeTargets = $oldTargets
- } else {
- write-host "Skipping CPU generation step as requested"
- }
+if ($null -eq ${env:OLLAMA_SKIP_CPU_GENERATE}) {
+
+# GCC build for direct linking into the Go binary
+init_vars
+# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
+# as we need this to be compiled by gcc for golang to be able to link with itx
+write-host "Checking for MinGW..."
+# error action ensures we exit on failure
+get-command gcc
+get-command mingw32-make
+$script:cmakeTargets = @("llama", "ggml")
+$script:cmakeDefs = @(
+ "-G", "MinGW Makefiles"
+ "-DCMAKE_C_COMPILER=gcc.exe",
+ "-DCMAKE_CXX_COMPILER=g++.exe",
+ "-DBUILD_SHARED_LIBS=off",
+ "-DLLAMA_NATIVE=off",
+ "-DLLAMA_AVX=off",
+ "-DLLAMA_AVX2=off",
+ "-DLLAMA_AVX512=off",
+ "-DLLAMA_F16C=off",
+ "-DLLAMA_FMA=off")
+$script:buildDir="../build/windows/${script:ARCH}_static"
+write-host "Building static library"
+build
+
+# remaining llama.cpp builds use MSVC
+ init_vars
+ $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+ $script:buildDir="../build/windows/${script:ARCH}/cpu"
+ write-host "Building LCD CPU"
+ build
+ sign
+ compress
+
+ init_vars
+ $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+ $script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
+ write-host "Building AVX CPU"
+ build
+ sign
+ compress
+
+ init_vars
+ $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
+ $script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
+ write-host "Building AVX2 CPU"
+ build
+ sign
+ compress
+} else {
+ write-host "Skipping CPU generation step as requested"
}
-function build_cpu($gen_arch) {
- if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
- # remaining llama.cpp builds use MSVC
- init_vars
- $script:cmakeDefs = $script:commonCpuDefs + @("-A", $gen_arch, "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
- $script:buildDir="../build/windows/${script:ARCH}/cpu"
- $script:distDir="$script:DIST_BASE\cpu"
- write-host "Building LCD CPU"
- build
- sign
- install
- } else {
- write-host "Skipping CPU generation step as requested"
- }
-}
-
-function build_cpu_avx() {
- if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx"))) {
- init_vars
- $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
- $script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
- $script:distDir="$script:DIST_BASE\cpu_avx"
- write-host "Building AVX CPU"
- build
- sign
- install
- } else {
- write-host "Skipping CPU AVX generation step as requested"
- }
-}
-
-function build_cpu_avx2() {
- if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx2"))) {
- init_vars
- $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
- $script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
- $script:distDir="$script:DIST_BASE\cpu_avx2"
- write-host "Building AVX2 CPU"
- build
- sign
- install
- } else {
- write-host "Skipping CPU AVX2 generation step as requested"
- }
-}
-
-function build_cuda() {
- if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
- # Then build cuda as a dynamically loaded library
- $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
- $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
- if ($null -ne $script:CUDA_VERSION) {
- $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
- }
- init_vars
- $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
- $script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
- $script:cmakeDefs += @(
- "-A", "x64",
- "-DLLAMA_CUDA=ON",
- "-DLLAMA_AVX=on",
- "-DLLAMA_AVX2=off",
- "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR",
- "-DCMAKE_CUDA_FLAGS=-t8",
- "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}"
- )
- if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
- write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
- $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
- write-host "building custom CUDA GPU"
- }
- build
- sign
- install
-
- rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
- md "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" -ea 0 > $null
- write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
- cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
- cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
- cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
- } else {
- write-host "Skipping CUDA generation step"
- }
-}
-
-function build_oneapi() {
- if ((-not "${env:OLLAMA_SKIP_ONEAPI_GENERATE}") -and ("${env:ONEAPI_ROOT}")) {
- # Get oneAPI version
- $script:ONEAPI_VERSION = icpx --version
- $script:ONEAPI_VERSION = [regex]::Match($script:ONEAPI_VERSION, '(?<=oneAPI DPC\+\+/C\+\+ Compiler )(?\d+\.\d+\.\d+)').Value
- if ($null -ne $script:ONEAPI_VERSION) {
- $script:ONEAPI_VARIANT = "_v" + $script:ONEAPI_VERSION
+if ($null -ne $script:CUDA_LIB_DIR) {
+ # Then build cuda as a dynamically loaded library
+ $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
+ $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
+ if ($null -ne $script:CUDA_VERSION) {
+ $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
}
init_vars
- $script:buildDir = "../build/windows/${script:ARCH}/oneapi$script:ONEAPI_VARIANT"
- $script:distDir ="$script:DIST_BASE\oneapi$script:ONEAPI_VARIANT"
- $script:cmakeDefs += @(
- "-G", "MinGW Makefiles",
- "-DLLAMA_SYCL=ON",
- "-DCMAKE_C_COMPILER=icx",
- "-DCMAKE_CXX_COMPILER=icx",
- "-DCMAKE_BUILD_TYPE=Release"
- )
+ $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
+ $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
+ if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
+ write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
+ $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
+ write-host "building custom CUDA GPU"
+ }
+ build
+ sign
+ compress
+}
- Write-Host "Building oneAPI"
+if ($null -ne $env:HIP_PATH) {
+ $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
+ if ($null -ne $script:ROCM_VERSION) {
+ $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
+ }
+
+ init_vars
+ $script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
+ $script:cmakeDefs += @(
+ "-G", "Ninja",
+ "-DCMAKE_C_COMPILER=clang.exe",
+ "-DCMAKE_CXX_COMPILER=clang++.exe",
+ "-DLLAMA_HIPBLAS=on",
+ "-DHIP_PLATFORM=amd",
+ "-DLLAMA_AVX=on",
+ "-DLLAMA_AVX2=off",
+ "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
+ "-DAMDGPU_TARGETS=$(amdGPUs)",
+ "-DGPU_TARGETS=$(amdGPUs)"
+ )
+
+ # Make sure the ROCm binary dir is first in the path
+ $env:PATH="$env:HIP_PATH\bin;$env:PATH"
+
+ # We have to clobber the LIB var from the developer shell for clang to work properly
+ $env:LIB=""
+ if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
+ write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
+ $script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
+ write-host "building custom ROCM GPU"
+ }
+ write-host "Building ROCm"
build
# Ninja doesn't prefix with config name
+ ${script:config}=""
if ($null -ne $script:DUMPBIN) {
- & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | Select-String ".dll"
+ & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
}
sign
- install
-
- rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
- md "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" -ea 0 > $null
- cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
- cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
- cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
- cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
- cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
- cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
- cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
- cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
- cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
- cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
- } else {
- Write-Host "Skipping oneAPI generation step"
- }
+ compress
}
-function build_rocm() {
- if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
- $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
- if ($null -ne $script:ROCM_VERSION) {
- $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
- }
- init_vars
- $script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
- $script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
- $script:cmakeDefs += @(
- "-G", "Ninja",
- "-DCMAKE_C_COMPILER=clang.exe",
- "-DCMAKE_CXX_COMPILER=clang++.exe",
- "-DLLAMA_HIPBLAS=on",
- "-DHIP_PLATFORM=amd",
- "-DLLAMA_AVX=on",
- "-DLLAMA_AVX2=off",
- "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
- "-DAMDGPU_TARGETS=$(amdGPUs)",
- "-DGPU_TARGETS=$(amdGPUs)"
- )
-
- # Make sure the ROCm binary dir is first in the path
- $env:PATH="$env:HIP_PATH\bin;$env:PATH"
-
- # We have to clobber the LIB var from the developer shell for clang to work properly
- $env:LIB=""
- if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
- write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
- $script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
- write-host "building custom ROCM GPU"
- }
- write-host "Building ROCm"
- build
- # Ninja doesn't prefix with config name
- ${script:config}=""
- if ($null -ne $script:DUMPBIN) {
- & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
- }
- sign
- install
-
- # Assumes v5.7, may need adjustments for v6
- rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
- md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
- cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
- cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
- # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
- cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\"
- } else {
- write-host "Skipping ROCm generation step"
- }
-}
-
-init_vars
-if ($($args.count) -eq 0) {
- git_module_setup
- apply_patches
- build_static
- if ($script:ARCH -eq "arm64") {
- build_cpu("ARM64")
- } else { # amd64
- build_cpu("x64")
- build_cpu_avx
- build_cpu_avx2
- build_cuda
- build_oneapi
- build_rocm
- }
-
- cleanup
- write-host "`ngo generate completed. LLM runners: $(get-childitem -path $script:DIST_BASE)"
-} else {
- for ( $i = 0; $i -lt $args.count; $i++ ) {
- write-host "performing $($args[$i])"
- & $($args[$i])
- }
-}
\ No newline at end of file
+cleanup
+write-host "`code generation completed. LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"
diff --git a/llm/generate/generate_darwin.go b/llm/generate/generate_darwin.go
deleted file mode 100644
index 776852342..000000000
--- a/llm/generate/generate_darwin.go
+++ /dev/null
@@ -1,3 +0,0 @@
-package generate
-
-//go:generate bash ./gen_darwin.sh
diff --git a/llm/generate/generate_linux.go b/llm/generate/generate_linux.go
deleted file mode 100644
index 2b7e116db..000000000
--- a/llm/generate/generate_linux.go
+++ /dev/null
@@ -1,3 +0,0 @@
-package generate
-
-//go:generate bash ./gen_linux.sh
diff --git a/llm/generate/generate_windows.go b/llm/generate/generate_windows.go
deleted file mode 100644
index d2ee5428a..000000000
--- a/llm/generate/generate_windows.go
+++ /dev/null
@@ -1,3 +0,0 @@
-package generate
-
-//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
diff --git a/main.go b/main.go
index 158f0063c..4f7e26158 100644
--- a/main.go
+++ b/main.go
@@ -1,5 +1,7 @@
package main
+//go:generate go run build.go -g -s
+
import (
"context"