Compare commits
86 Commits
whitespace
...
mxyng/tune
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
39374984ef | ||
|
|
daf928fe1a | ||
|
|
2ada81e068 | ||
|
|
f678f5c5c3 | ||
|
|
2cb74e23fb | ||
|
|
3c8df3808b | ||
|
|
7d564835c2 | ||
|
|
72431031d9 | ||
|
|
6041abb5b2 | ||
|
|
6c5ccb11f9 | ||
|
|
2e20110e50 | ||
|
|
82ddc3e441 | ||
|
|
d481fb3cc8 | ||
|
|
23ee633252 | ||
|
|
23ebe8fe11 | ||
|
|
2c017ca441 | ||
|
|
be330174dd | ||
|
|
0ded7fdc4b | ||
|
|
2103a5073c | ||
|
|
ce9f7c4674 | ||
|
|
e5596c1944 | ||
|
|
9bc3fee694 | ||
|
|
21347e1ed6 | ||
|
|
3b4bab3dc5 | ||
|
|
cbd6e3b38e | ||
|
|
b830afa716 | ||
|
|
bd1d8b0d14 | ||
|
|
25c2912120 | ||
|
|
0e19476b56 | ||
|
|
fa2f2b3563 | ||
|
|
cbf4970e0f | ||
|
|
74468513bd | ||
|
|
794a916a72 | ||
|
|
76e5d9ec88 | ||
|
|
076237b8ea | ||
|
|
53d694c67f | ||
|
|
5aa6bfea94 | ||
|
|
1cde63dd64 | ||
|
|
98e0b7e94f | ||
|
|
061e8f6abc | ||
|
|
a189810df6 | ||
|
|
e95b896790 | ||
|
|
1f087c4d26 | ||
|
|
5d7ea6616f | ||
|
|
2a4b128ae3 | ||
|
|
fc483274ad | ||
|
|
fd10a2ad4b | ||
|
|
b291f63188 | ||
|
|
f58856bf6f | ||
|
|
275ea01587 | ||
|
|
8782dd5628 | ||
|
|
11bfff8ee1 | ||
|
|
7c0167a8f6 | ||
|
|
74d898e37d | ||
|
|
c6e8b00718 | ||
|
|
be9980ef13 | ||
|
|
646a0dedb9 | ||
|
|
7f964d938c | ||
|
|
e6b8a139ff | ||
|
|
bdc0ea1ba5 | ||
|
|
7fab7918cc | ||
|
|
74c1bdba0d | ||
|
|
f983ef7f5f | ||
|
|
1ae1c33651 | ||
|
|
084d846621 | ||
|
|
6a4b994433 | ||
|
|
bea007deb7 | ||
|
|
074934be03 | ||
|
|
0de12368a0 | ||
|
|
917bd61084 | ||
|
|
efe040f8c0 | ||
|
|
2a7553ce09 | ||
|
|
10af6070a9 | ||
|
|
92423b0600 | ||
|
|
b3eac61cac | ||
|
|
287ba11500 | ||
|
|
63861f58cc | ||
|
|
f0425d3de9 | ||
|
|
210b65268e | ||
|
|
949d7b1c48 | ||
|
|
897b213468 | ||
|
|
4613a080e7 | ||
|
|
ace2cdf1c6 | ||
|
|
eed92bc19a | ||
|
|
e0a2f46466 | ||
|
|
01ff2e14db |
43
.github/workflows/test.yaml
vendored
43
.github/workflows/test.yaml
vendored
@@ -2,17 +2,22 @@ name: test
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- '**/*'
|
||||
- '!docs/**'
|
||||
- '!examples/**'
|
||||
- '!README.md'
|
||||
|
||||
jobs:
|
||||
generate:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
arch: [amd64, arm64]
|
||||
exclude:
|
||||
- os: ubuntu-latest
|
||||
arch: arm64
|
||||
- os: windows-latest
|
||||
- os: windows-2019
|
||||
arch: arm64
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
@@ -21,10 +26,21 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21'
|
||||
go-version: '1.22'
|
||||
cache: true
|
||||
- run: go get ./...
|
||||
- run: |
|
||||
$gopath=(get-command go).source | split-path -parent
|
||||
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
|
||||
cd $env:GITHUB_WORKSPACE
|
||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||
$env:PATH="$gopath;$env:PATH"
|
||||
go generate -x ./...
|
||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||
name: "Windows Go Generate"
|
||||
- run: go generate -x ./...
|
||||
if: ${{ ! startsWith(matrix.os, 'windows-') }}
|
||||
name: "Unix Go Generate"
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
|
||||
@@ -46,7 +62,7 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: '1.21'
|
||||
go-version: '1.22'
|
||||
cache: true
|
||||
- run: go get ./...
|
||||
- run: |
|
||||
@@ -62,7 +78,6 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
rocm-version:
|
||||
- '5.7.1'
|
||||
- '6.0'
|
||||
runs-on: linux
|
||||
container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
|
||||
@@ -76,7 +91,7 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: '1.21'
|
||||
go-version: '1.22'
|
||||
cache: true
|
||||
- run: go get ./...
|
||||
- run: |
|
||||
@@ -91,26 +106,26 @@ jobs:
|
||||
lint:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
arch: [amd64, arm64]
|
||||
exclude:
|
||||
- os: ubuntu-latest
|
||||
arch: arm64
|
||||
- os: windows-latest
|
||||
- os: windows-2019
|
||||
arch: arm64
|
||||
- os: macos-latest
|
||||
arch: amd64
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
GOARCH: ${{ matrix.arch }}
|
||||
CGO_ENABLED: "1"
|
||||
CGO_ENABLED: '1'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21'
|
||||
go-version: '1.22'
|
||||
cache: false
|
||||
- run: |
|
||||
mkdir -p llm/llama.cpp/build/linux/${{ matrix.arch }}/stub/lib/
|
||||
@@ -130,24 +145,24 @@ jobs:
|
||||
needs: generate
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
arch: [amd64]
|
||||
exclude:
|
||||
- os: ubuntu-latest
|
||||
arch: arm64
|
||||
- os: windows-latest
|
||||
- os: windows-2019
|
||||
arch: arm64
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
GOARCH: ${{ matrix.arch }}
|
||||
CGO_ENABLED: "1"
|
||||
CGO_ENABLED: '1'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21'
|
||||
go-version: '1.22'
|
||||
cache: true
|
||||
- run: go get
|
||||
- uses: actions/download-artifact@v4
|
||||
|
||||
30
Dockerfile
30
Dockerfile
@@ -1,6 +1,7 @@
|
||||
ARG GOLANG_VERSION=1.21.3
|
||||
ARG GOLANG_VERSION=1.22.1
|
||||
ARG CMAKE_VERSION=3.22.1
|
||||
ARG CUDA_VERSION=11.3.1
|
||||
ARG ROCM_VERSION=6.0
|
||||
|
||||
# Copy the minimal context we need to run the generate scripts
|
||||
FROM scratch AS llm-code
|
||||
@@ -28,7 +29,7 @@ WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||
ARG CGO_CFLAGS
|
||||
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||
|
||||
FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
|
||||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
|
||||
ARG CMAKE_VERSION
|
||||
COPY ./scripts/rh_linux_deps.sh /
|
||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||
@@ -39,18 +40,14 @@ WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||
ARG CGO_CFLAGS
|
||||
ARG AMDGPU_TARGETS
|
||||
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||
RUN mkdir /tmp/scratch && \
|
||||
for dep in $(cat /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/x86_64/rocm*/lib/deps.txt) ; do \
|
||||
cp ${dep} /tmp/scratch/ || exit 1 ; \
|
||||
done && \
|
||||
(cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
|
||||
mkdir -p /go/src/github.com/jmorganca/ollama/dist/deps/ && \
|
||||
(cd /tmp/scratch/ && tar czvf /go/src/github.com/jmorganca/ollama/dist/deps/rocm-amd64-deps.tgz . )
|
||||
|
||||
FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
|
||||
ARG CMAKE_VERSION
|
||||
COPY ./scripts/rh_linux_deps.sh /
|
||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||
ENV LIBRARY_PATH /opt/amdgpu/lib64
|
||||
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||
ARG CGO_CFLAGS
|
||||
ARG AMDGPU_TARGETS
|
||||
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||
|
||||
FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
|
||||
ARG CMAKE_VERSION
|
||||
@@ -91,8 +88,8 @@ COPY . .
|
||||
COPY --from=cpu_avx-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||
COPY --from=rocm-5-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||
COPY --from=rocm-6-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||
COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||
COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/dist/deps/ ./dist/deps/
|
||||
ARG GOFLAGS
|
||||
ARG CGO_CFLAGS
|
||||
RUN go build .
|
||||
@@ -117,7 +114,7 @@ RUN apt-get update && apt-get install -y ca-certificates
|
||||
COPY --from=build-arm64 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
|
||||
|
||||
# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
|
||||
FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete as runtime-rocm
|
||||
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
|
||||
RUN update-pciids
|
||||
COPY --from=build-amd64 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
|
||||
EXPOSE 11434
|
||||
@@ -132,6 +129,7 @@ ENV OLLAMA_HOST 0.0.0.0
|
||||
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
|
||||
ENTRYPOINT ["/bin/ollama"]
|
||||
CMD ["serve"]
|
||||
|
||||
18
README.md
18
README.md
@@ -62,6 +62,8 @@ Here are some example models that can be downloaded:
|
||||
| Orca Mini | 3B | 1.9GB | `ollama run orca-mini` |
|
||||
| Vicuna | 7B | 3.8GB | `ollama run vicuna` |
|
||||
| LLaVA | 7B | 4.5GB | `ollama run llava` |
|
||||
| Gemma | 2B | 1.4GB | `ollama run gemma:2b` |
|
||||
| Gemma | 7B | 4.8GB | `ollama run gemma:7b` |
|
||||
|
||||
> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
|
||||
|
||||
@@ -258,19 +260,26 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
### Web & Desktop
|
||||
|
||||
- [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
|
||||
- [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
|
||||
- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
|
||||
- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
|
||||
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
|
||||
- [Minimalistic React UI for Ollama Models](https://github.com/richawo/minimal-llm-ui)
|
||||
- [Open WebUI](https://github.com/open-webui/open-webui)
|
||||
- [Ollamac](https://github.com/kevinhermawan/Ollamac)
|
||||
- [big-AGI](https://github.com/enricoros/big-agi/blob/main/docs/config-ollama.md)
|
||||
- [big-AGI](https://github.com/enricoros/big-AGI/blob/main/docs/config-local-ollama.md)
|
||||
- [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core)
|
||||
- [Amica](https://github.com/semperai/amica)
|
||||
- [chatd](https://github.com/BruceMacD/chatd)
|
||||
- [Ollama-SwiftUI](https://github.com/kghandour/Ollama-SwiftUI)
|
||||
- [MindMac](https://mindmac.app)
|
||||
- [NextJS Web Interface for Ollama](https://github.com/jakobhoeg/nextjs-ollama-llm-ui)
|
||||
- [Msty](https://msty.app)
|
||||
- [Chatbox](https://github.com/Bin-Huang/Chatbox)
|
||||
- [WinForm Ollama Copilot](https://github.com/tgraupmann/WinForm_Ollama_Copilot)
|
||||
- [NextChat](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web) with [Get Started Doc](https://docs.nextchat.dev/models/ollama)
|
||||
- [Odin Runes](https://github.com/leonid20000/OdinRunes)
|
||||
- [LLM-X: Progressive Web App](https://github.com/mrdjohnson/llm-x)
|
||||
|
||||
### Terminal
|
||||
|
||||
@@ -301,6 +310,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
|
||||
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
|
||||
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
|
||||
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
|
||||
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
|
||||
- [LangChain4j](https://github.com/langchain4j/langchain4j/tree/main/langchain4j-ollama)
|
||||
- [LiteLLM](https://github.com/BerriAI/litellm)
|
||||
@@ -315,8 +325,10 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [LangChainDart](https://github.com/davidmigloz/langchain_dart)
|
||||
- [Semantic Kernel - Python](https://github.com/microsoft/semantic-kernel/tree/main/python/semantic_kernel/connectors/ai/ollama)
|
||||
- [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md)
|
||||
- [Elixir LangChain](https://github.com/brainlid/langchain)
|
||||
- [Ollama for R - rollama](https://github.com/JBGruber/rollama)
|
||||
- [Ollama-ex for Elixir](https://github.com/lebrunel/ollama-ex)
|
||||
- [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama)
|
||||
|
||||
### Mobile
|
||||
|
||||
@@ -330,6 +342,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Continue](https://github.com/continuedev/continue)
|
||||
- [Obsidian Ollama plugin](https://github.com/hinterdupfinger/obsidian-ollama)
|
||||
- [Logseq Ollama plugin](https://github.com/omagdy7/ollama-logseq)
|
||||
- [NotesOllama](https://github.com/andersrex/notesollama) (Apple Notes Ollama plugin)
|
||||
- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
|
||||
- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
|
||||
- [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
|
||||
@@ -337,6 +350,9 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
|
||||
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
|
||||
- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
|
||||
- [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
|
||||
- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
|
||||
- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
|
||||
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
|
||||
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
|
||||
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
|
||||
|
||||
@@ -21,7 +21,7 @@ import (
|
||||
|
||||
type Client struct {
|
||||
base *url.URL
|
||||
http http.Client
|
||||
http *http.Client
|
||||
}
|
||||
|
||||
func checkError(resp *http.Response, body []byte) error {
|
||||
@@ -66,30 +66,13 @@ func ClientFromEnvironment() (*Client, error) {
|
||||
}
|
||||
}
|
||||
|
||||
client := Client{
|
||||
return &Client{
|
||||
base: &url.URL{
|
||||
Scheme: scheme,
|
||||
Host: net.JoinHostPort(host, port),
|
||||
},
|
||||
}
|
||||
|
||||
mockRequest, err := http.NewRequest(http.MethodHead, client.base.String(), nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
proxyURL, err := http.ProxyFromEnvironment(mockRequest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client.http = http.Client{
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyURL(proxyURL),
|
||||
},
|
||||
}
|
||||
|
||||
return &client, nil
|
||||
http: http.DefaultClient,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *Client) do(ctx context.Context, method, path string, reqData, respData any) error {
|
||||
|
||||
@@ -83,7 +83,7 @@ type Metrics struct {
|
||||
EvalDuration time.Duration `json:"eval_duration,omitempty"`
|
||||
}
|
||||
|
||||
// Options specfied in GenerateRequest, if you add a new option here add it to the API docs also
|
||||
// Options specified in GenerateRequest, if you add a new option here add it to the API docs also
|
||||
type Options struct {
|
||||
Runner
|
||||
|
||||
@@ -121,7 +121,6 @@ type Runner struct {
|
||||
VocabOnly bool `json:"vocab_only,omitempty"`
|
||||
UseMMap bool `json:"use_mmap,omitempty"`
|
||||
UseMLock bool `json:"use_mlock,omitempty"`
|
||||
EmbeddingOnly bool `json:"embedding_only,omitempty"`
|
||||
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
|
||||
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
|
||||
NumThread int `json:"num_thread,omitempty"`
|
||||
@@ -395,7 +394,6 @@ func DefaultOptions() Options {
|
||||
UseMLock: false,
|
||||
UseMMap: true,
|
||||
UseNUMA: false,
|
||||
EmbeddingOnly: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,20 +34,6 @@ type UpdateResponse struct {
|
||||
UpdateVersion string `json:"version"`
|
||||
}
|
||||
|
||||
func getClient(req *http.Request) http.Client {
|
||||
proxyURL, err := http.ProxyFromEnvironment(req)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to handle proxy: %s", err))
|
||||
return http.Client{}
|
||||
}
|
||||
|
||||
return http.Client{
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyURL(proxyURL),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
|
||||
var updateResp UpdateResponse
|
||||
|
||||
@@ -83,10 +69,9 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
|
||||
}
|
||||
req.Header.Set("Authorization", signature)
|
||||
req.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
|
||||
client := getClient(req)
|
||||
|
||||
slog.Debug("checking for available update", "requestURL", requestURL)
|
||||
resp, err := client.Do(req)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to check for update: %s", err))
|
||||
return false, updateResp
|
||||
@@ -101,6 +86,11 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to read body response: %s", err))
|
||||
}
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
slog.Info(fmt.Sprintf("check update error %d - %.96s", resp.StatusCode, string(body)))
|
||||
return false, updateResp
|
||||
}
|
||||
err = json.Unmarshal(body, &updateResp)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("malformed response checking for update: %s", err))
|
||||
@@ -119,8 +109,8 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
client := getClient(req)
|
||||
resp, err := client.Do(req)
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error checking update: %w", err)
|
||||
}
|
||||
@@ -151,7 +141,7 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
|
||||
cleanupOldDownloads()
|
||||
|
||||
req.Method = http.MethodGet
|
||||
resp, err = client.Do(req)
|
||||
resp, err = http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error checking update: %w", err)
|
||||
}
|
||||
|
||||
@@ -49,9 +49,6 @@ SetupLogging=yes
|
||||
CloseApplications=yes
|
||||
RestartApplications=no
|
||||
|
||||
; Make sure they can at least download llama2 as a minimum
|
||||
ExtraDiskSpaceRequired=3826806784
|
||||
|
||||
; https://jrsoftware.org/ishelp/index.php?topic=setup_wizardimagefile
|
||||
WizardSmallImageFile=.\assets\setup.bmp
|
||||
|
||||
@@ -94,6 +91,14 @@ Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
||||
Source: "..\dist\windeps\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
|
||||
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
|
||||
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
|
||||
; Assumes v5.7, may need adjustments for v6
|
||||
#if GetEnv("HIP_PATH") != ""
|
||||
Source: "{#GetEnv('HIP_PATH')}\bin\hipblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
|
||||
Source: "{#GetEnv('HIP_PATH')}\bin\rocblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
|
||||
; amdhip64.dll dependency comes from the driver and must be installed already
|
||||
Source: "{#GetEnv('HIP_PATH')}\bin\rocblas\library\*"; DestDir: "{app}\rocm\rocblas\library\"; Flags: ignoreversion
|
||||
#endif
|
||||
|
||||
|
||||
[Icons]
|
||||
Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
|
||||
@@ -116,7 +121,8 @@ Filename: "{cmd}"; Parameters: "/c timeout 5"; Flags: runhidden
|
||||
Type: filesandordirs; Name: "{%TEMP}\ollama*"
|
||||
Type: filesandordirs; Name: "{%LOCALAPPDATA}\Ollama"
|
||||
Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
|
||||
Type: filesandordirs; Name: "{%USERPROFILE}\.ollama"
|
||||
Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\models"
|
||||
Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\history"
|
||||
; NOTE: if the user has a custom OLLAMA_MODELS it will be preserved
|
||||
|
||||
[Messages]
|
||||
|
||||
152
cmd/cmd.go
152
cmd/cmd.go
@@ -1,6 +1,7 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
@@ -87,22 +88,82 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
path = filepath.Join(filepath.Dir(filename), path)
|
||||
}
|
||||
|
||||
bin, err := os.Open(path)
|
||||
fi, err := os.Stat(path)
|
||||
if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
|
||||
continue
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
defer bin.Close()
|
||||
|
||||
hash := sha256.New()
|
||||
if _, err := io.Copy(hash, bin); err != nil {
|
||||
// TODO make this work w/ adapters
|
||||
if fi.IsDir() {
|
||||
tf, err := os.CreateTemp("", "ollama-tf")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
bin.Seek(0, io.SeekStart)
|
||||
defer os.RemoveAll(tf.Name())
|
||||
|
||||
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
|
||||
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
|
||||
zf := zip.NewWriter(tf)
|
||||
|
||||
files, err := filepath.Glob(filepath.Join(path, "model-*.safetensors"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(files) == 0 {
|
||||
return fmt.Errorf("no safetensors files were found in '%s'", path)
|
||||
}
|
||||
|
||||
// add the safetensor config file + tokenizer
|
||||
files = append(files, filepath.Join(path, "config.json"))
|
||||
files = append(files, filepath.Join(path, "added_tokens.json"))
|
||||
files = append(files, filepath.Join(path, "tokenizer.model"))
|
||||
|
||||
for _, fn := range files {
|
||||
f, err := os.Open(fn)
|
||||
if os.IsNotExist(err) && strings.HasSuffix(fn, "added_tokens.json") {
|
||||
continue
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
h, err := zip.FileInfoHeader(fi)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
h.Name = filepath.Base(fn)
|
||||
h.Method = zip.Store
|
||||
|
||||
w, err := zf.CreateHeader(h)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = io.Copy(w, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if err := zf.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := tf.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
path = tf.Name()
|
||||
}
|
||||
|
||||
digest, err := createBlob(cmd, client, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -141,6 +202,26 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
|
||||
bin, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer bin.Close()
|
||||
|
||||
hash := sha256.New()
|
||||
if _, err := io.Copy(hash, bin); err != nil {
|
||||
return "", err
|
||||
}
|
||||
bin.Seek(0, io.SeekStart)
|
||||
|
||||
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
|
||||
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return digest, nil
|
||||
}
|
||||
|
||||
func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
@@ -718,39 +799,36 @@ func initializeKeypair() error {
|
||||
_, err = os.Stat(privKeyPath)
|
||||
if os.IsNotExist(err) {
|
||||
fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
|
||||
_, privKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
privKeyBytes, err := format.OpenSSHPrivateKey(privKey, "")
|
||||
privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = os.MkdirAll(filepath.Dir(privKeyPath), 0o755)
|
||||
if err != nil {
|
||||
if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
|
||||
return fmt.Errorf("could not create directory %w", err)
|
||||
}
|
||||
|
||||
err = os.WriteFile(privKeyPath, pem.EncodeToMemory(privKeyBytes), 0o600)
|
||||
if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sshPrivateKey, err := ssh.NewSignerFromKey(privKey)
|
||||
if err != nil {
|
||||
publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
|
||||
|
||||
if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pubKeyData := ssh.MarshalAuthorizedKey(sshPrivateKey.PublicKey())
|
||||
|
||||
err = os.WriteFile(pubKeyPath, pubKeyData, 0o644)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Printf("Your new public key is: \n\n%s\n", string(pubKeyData))
|
||||
fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -809,6 +887,14 @@ func versionHandler(cmd *cobra.Command, _ []string) {
|
||||
}
|
||||
}
|
||||
|
||||
func appendHostEnvDocs(cmd *cobra.Command) {
|
||||
const hostEnvDocs = `
|
||||
Environment Variables:
|
||||
OLLAMA_HOST The host:port or base URL of the Ollama server (e.g. http://localhost:11434)
|
||||
`
|
||||
cmd.SetUsageTemplate(cmd.UsageTemplate() + hostEnvDocs)
|
||||
}
|
||||
|
||||
func NewCLI() *cobra.Command {
|
||||
log.SetFlags(log.LstdFlags | log.Lshortfile)
|
||||
cobra.EnableCommandSorting = false
|
||||
@@ -874,7 +960,6 @@ func NewCLI() *cobra.Command {
|
||||
runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
|
||||
runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
|
||||
runCmd.Flags().String("format", "", "Response format (e.g. json)")
|
||||
|
||||
serveCmd := &cobra.Command{
|
||||
Use: "serve",
|
||||
Aliases: []string{"start"},
|
||||
@@ -882,6 +967,13 @@ func NewCLI() *cobra.Command {
|
||||
Args: cobra.ExactArgs(0),
|
||||
RunE: RunServer,
|
||||
}
|
||||
serveCmd.SetUsageTemplate(serveCmd.UsageTemplate() + `
|
||||
Environment Variables:
|
||||
|
||||
OLLAMA_HOST The host:port to bind to (default "127.0.0.1:11434")
|
||||
OLLAMA_ORIGINS A comma separated list of allowed origins.
|
||||
OLLAMA_MODELS The path to the models directory (default is "~/.ollama/models")
|
||||
`)
|
||||
|
||||
pullCmd := &cobra.Command{
|
||||
Use: "pull MODEL",
|
||||
@@ -910,7 +1002,6 @@ func NewCLI() *cobra.Command {
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ListHandler,
|
||||
}
|
||||
|
||||
copyCmd := &cobra.Command{
|
||||
Use: "cp SOURCE TARGET",
|
||||
Short: "Copy a model",
|
||||
@@ -927,6 +1018,19 @@ func NewCLI() *cobra.Command {
|
||||
RunE: DeleteHandler,
|
||||
}
|
||||
|
||||
for _, cmd := range []*cobra.Command{
|
||||
createCmd,
|
||||
showCmd,
|
||||
runCmd,
|
||||
pullCmd,
|
||||
pushCmd,
|
||||
listCmd,
|
||||
copyCmd,
|
||||
deleteCmd,
|
||||
} {
|
||||
appendHostEnvDocs(cmd)
|
||||
}
|
||||
|
||||
rootCmd.AddCommand(
|
||||
serveCmd,
|
||||
createCmd,
|
||||
|
||||
331
convert/convert.go
Normal file
331
convert/convert.go
Normal file
@@ -0,0 +1,331 @@
|
||||
package convert
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"cmp"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"slices"
|
||||
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
"github.com/jmorganca/ollama/convert/sentencepiece"
|
||||
"github.com/jmorganca/ollama/llm"
|
||||
)
|
||||
|
||||
type Params struct {
|
||||
Architectures []string `json:"architectures"`
|
||||
VocabSize int `json:"vocab_size"`
|
||||
HiddenSize int `json:"hidden_size"` // n_embd
|
||||
HiddenLayers int `json:"num_hidden_layers"` // n_layer
|
||||
ContextSize int `json:"max_position_embeddings"`
|
||||
IntermediateSize int `json:"intermediate_size"`
|
||||
AttentionHeads int `json:"num_attention_heads"` // n_head
|
||||
KeyValHeads int `json:"num_key_value_heads"`
|
||||
NormEPS float64 `json:"rms_norm_eps"`
|
||||
RopeFreqBase float64 `json:"rope_theta"`
|
||||
BoSTokenID int `json:"bos_token_id"`
|
||||
EoSTokenID int `json:"eos_token_id"`
|
||||
}
|
||||
|
||||
type MetaData struct {
|
||||
Type string `mapstructure:"dtype"`
|
||||
Shape []int `mapstructure:"shape"`
|
||||
Offsets []int `mapstructure:"data_offsets"`
|
||||
}
|
||||
|
||||
func ReadSafeTensors(fn string, offset uint64) ([]llm.Tensor, uint64, error) {
|
||||
f, err := os.Open(fn)
|
||||
if err != nil {
|
||||
return []llm.Tensor{}, 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var jsonSize uint64
|
||||
binary.Read(f, binary.LittleEndian, &jsonSize)
|
||||
|
||||
buf := make([]byte, jsonSize)
|
||||
_, err = io.ReadFull(f, buf)
|
||||
if err != nil {
|
||||
return []llm.Tensor{}, 0, err
|
||||
}
|
||||
|
||||
d := json.NewDecoder(bytes.NewBuffer(buf))
|
||||
d.UseNumber()
|
||||
var parsed map[string]interface{}
|
||||
if err = d.Decode(&parsed); err != nil {
|
||||
return []llm.Tensor{}, 0, err
|
||||
}
|
||||
|
||||
var keys []string
|
||||
for k := range parsed {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
|
||||
slices.Sort(keys)
|
||||
|
||||
slog.Info("converting layers")
|
||||
|
||||
var tensors []llm.Tensor
|
||||
for _, k := range keys {
|
||||
vals := parsed[k].(map[string]interface{})
|
||||
var data MetaData
|
||||
if err = mapstructure.Decode(vals, &data); err != nil {
|
||||
return []llm.Tensor{}, 0, err
|
||||
}
|
||||
|
||||
var size uint64
|
||||
var kind uint32
|
||||
switch len(data.Shape) {
|
||||
case 0:
|
||||
// metadata
|
||||
continue
|
||||
case 1:
|
||||
// convert to float32
|
||||
kind = 0
|
||||
size = uint64(data.Shape[0] * 4)
|
||||
case 2:
|
||||
// convert to float16
|
||||
kind = 1
|
||||
size = uint64(data.Shape[0] * data.Shape[1] * 2)
|
||||
}
|
||||
|
||||
ggufName, err := GetTensorName(k)
|
||||
if err != nil {
|
||||
slog.Error("%v", err)
|
||||
return []llm.Tensor{}, 0, err
|
||||
}
|
||||
|
||||
shape := [4]uint64{0, 0, 0, 0}
|
||||
for cnt, s := range data.Shape {
|
||||
shape[cnt] = uint64(s)
|
||||
}
|
||||
|
||||
t := llm.Tensor{
|
||||
Name: ggufName,
|
||||
Kind: kind,
|
||||
Offset: offset,
|
||||
Shape: shape,
|
||||
FileName: fn,
|
||||
OffsetPadding: 8 + jsonSize,
|
||||
FileOffsets: []uint64{uint64(data.Offsets[0]), uint64(data.Offsets[1])},
|
||||
}
|
||||
slog.Debug(fmt.Sprintf("%v", t))
|
||||
tensors = append(tensors, t)
|
||||
offset += size
|
||||
}
|
||||
return tensors, offset, nil
|
||||
}
|
||||
|
||||
func GetSafeTensors(dirpath string) ([]llm.Tensor, error) {
|
||||
var tensors []llm.Tensor
|
||||
files, err := filepath.Glob(filepath.Join(dirpath, "/model-*.safetensors"))
|
||||
if err != nil {
|
||||
return []llm.Tensor{}, err
|
||||
}
|
||||
|
||||
var offset uint64
|
||||
for _, f := range files {
|
||||
var t []llm.Tensor
|
||||
var err error
|
||||
t, offset, err = ReadSafeTensors(f, offset)
|
||||
if err != nil {
|
||||
slog.Error("%v", err)
|
||||
return []llm.Tensor{}, err
|
||||
}
|
||||
tensors = append(tensors, t...)
|
||||
}
|
||||
return tensors, nil
|
||||
}
|
||||
|
||||
func GetParams(dirpath string) (*Params, error) {
|
||||
f, err := os.Open(filepath.Join(dirpath, "config.json"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var params Params
|
||||
|
||||
d := json.NewDecoder(f)
|
||||
err = d.Decode(¶ms)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ¶ms, nil
|
||||
}
|
||||
|
||||
// Details on gguf's tokenizer can be found at:
|
||||
// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer
|
||||
type Vocab struct {
|
||||
Tokens []string
|
||||
Scores []float32
|
||||
Types []int32
|
||||
}
|
||||
|
||||
func LoadTokens(dirpath string) (*Vocab, error) {
|
||||
slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model")))
|
||||
in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// To regenerate sentencepiece from the protobufs use:
|
||||
// protoc -I=./ --go_out=./ sentencepiece_model.proto
|
||||
modelProto := &sentencepiece.ModelProto{}
|
||||
if err := proto.Unmarshal(in, modelProto); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
v := &Vocab{
|
||||
Tokens: make([]string, 0),
|
||||
Scores: make([]float32, 0),
|
||||
Types: make([]int32, 0),
|
||||
}
|
||||
|
||||
pieces := modelProto.GetPieces()
|
||||
for _, p := range pieces {
|
||||
v.Tokens = append(v.Tokens, p.GetPiece())
|
||||
v.Scores = append(v.Scores, p.GetScore())
|
||||
t := p.GetType()
|
||||
v.Types = append(v.Types, int32(t))
|
||||
}
|
||||
|
||||
slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens)))
|
||||
|
||||
// add any additional tokens
|
||||
addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json"))
|
||||
if os.IsNotExist(err) {
|
||||
return v, nil
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
slog.Info("reading user defined tokens")
|
||||
|
||||
var extraTokenData map[string]int
|
||||
if err := json.Unmarshal(addIn, &extraTokenData); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
type token struct {
|
||||
key string
|
||||
pos int
|
||||
}
|
||||
|
||||
extraTokens := make([]token, 0)
|
||||
for k, id := range extraTokenData {
|
||||
extraTokens = append(extraTokens, token{k, id})
|
||||
}
|
||||
|
||||
slices.SortFunc(extraTokens, func(a, b token) int {
|
||||
return cmp.Compare(a.pos, b.pos)
|
||||
})
|
||||
|
||||
numToks := len(v.Tokens)
|
||||
|
||||
for cnt, t := range extraTokens {
|
||||
// the token id should match the specific index for the total number of tokens
|
||||
if t.pos != cnt+numToks {
|
||||
return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key)
|
||||
}
|
||||
v.Tokens = append(v.Tokens, t.key)
|
||||
v.Scores = append(v.Scores, -1000.0)
|
||||
v.Types = append(v.Types, int32(llm.GGUFTokenUserDefined))
|
||||
}
|
||||
slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))
|
||||
|
||||
return v, nil
|
||||
}
|
||||
|
||||
func GetTensorName(n string) (string, error) {
|
||||
tMap := map[string]string{
|
||||
"model.embed_tokens.weight": "token_embd.weight",
|
||||
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
|
||||
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
|
||||
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
|
||||
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
|
||||
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
|
||||
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
|
||||
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
|
||||
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
|
||||
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
|
||||
"lm_head.weight": "output.weight",
|
||||
"model.norm.weight": "output_norm.weight",
|
||||
}
|
||||
|
||||
v, ok := tMap[n]
|
||||
if ok {
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// quick hack to rename the layers to gguf format
|
||||
for k, v := range tMap {
|
||||
re := regexp.MustCompile(k)
|
||||
newName := re.ReplaceAllString(n, v)
|
||||
if newName != n {
|
||||
return newName, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
|
||||
}
|
||||
|
||||
func WriteGGUF(name string, tensors []llm.Tensor, params *Params, vocab *Vocab) (string, error) {
|
||||
c := llm.ContainerGGUF{
|
||||
ByteOrder: binary.LittleEndian,
|
||||
}
|
||||
|
||||
m := llm.NewGGUFModel(&c)
|
||||
m.Tensors = tensors
|
||||
m.KV["general.architecture"] = "llama"
|
||||
m.KV["general.name"] = name
|
||||
m.KV["llama.context_length"] = uint32(params.ContextSize)
|
||||
m.KV["llama.embedding_length"] = uint32(params.HiddenSize)
|
||||
m.KV["llama.block_count"] = uint32(params.HiddenLayers)
|
||||
m.KV["llama.feed_forward_length"] = uint32(params.IntermediateSize)
|
||||
m.KV["llama.rope.dimension_count"] = uint32(128)
|
||||
m.KV["llama.attention.head_count"] = uint32(params.AttentionHeads)
|
||||
m.KV["llama.attention.head_count_kv"] = uint32(params.KeyValHeads)
|
||||
m.KV["llama.attention.layer_norm_rms_epsilon"] = float32(params.NormEPS)
|
||||
m.KV["llama.rope.freq_base"] = float32(params.RopeFreqBase)
|
||||
m.KV["general.file_type"] = uint32(1)
|
||||
m.KV["tokenizer.ggml.model"] = "llama"
|
||||
|
||||
m.KV["tokenizer.ggml.tokens"] = vocab.Tokens
|
||||
m.KV["tokenizer.ggml.scores"] = vocab.Scores
|
||||
m.KV["tokenizer.ggml.token_type"] = vocab.Types
|
||||
|
||||
m.KV["tokenizer.ggml.bos_token_id"] = uint32(params.BoSTokenID)
|
||||
m.KV["tokenizer.ggml.eos_token_id"] = uint32(params.EoSTokenID)
|
||||
m.KV["tokenizer.ggml.unknown_token_id"] = uint32(0)
|
||||
m.KV["tokenizer.ggml.add_bos_token"] = true
|
||||
m.KV["tokenizer.ggml.add_eos_token"] = false
|
||||
|
||||
// llamacpp sets the chat template, however we don't need to set it since we pass it in through a layer
|
||||
// m.KV["tokenizer.chat_template"] = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}" // XXX removeme
|
||||
|
||||
c.V3.NumTensor = uint64(len(tensors))
|
||||
c.V3.NumKV = uint64(len(m.KV))
|
||||
|
||||
f, err := os.CreateTemp("", "ollama-gguf")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
err = m.Encode(f)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return f.Name(), nil
|
||||
}
|
||||
1497
convert/sentencepiece/sentencepiece_model.pb.go
Normal file
1497
convert/sentencepiece/sentencepiece_model.pb.go
Normal file
File diff suppressed because it is too large
Load Diff
333
convert/sentencepiece_model.proto
Normal file
333
convert/sentencepiece_model.proto
Normal file
@@ -0,0 +1,333 @@
|
||||
// Copyright 2016 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.!
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
// TODO(taku): Needs to use LITE RUNTIME in OSS release.
|
||||
option optimize_for = LITE_RUNTIME;
|
||||
option go_package = "./sentencepiece";
|
||||
|
||||
package sentencepiece;
|
||||
|
||||
// TrainerSpec encodes a various parameters for SentencePiece training.
|
||||
// Next id: 55
|
||||
message TrainerSpec {
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// General parameters
|
||||
//
|
||||
// Input corpus files.
|
||||
// Trainer accepts the following two formats:
|
||||
// A) Monolingual: plain text, one sentence per line.
|
||||
// B) Bilingual: TSV, source sentence <tab> target sentence
|
||||
// When bilingual data is passed, shared vocabulary model is built.
|
||||
// Note that the input file must be raw corpus, not a preprocessed corpus.
|
||||
// Trainer only loads the first `input_sentence_size` sentences specified
|
||||
// with this parameter.
|
||||
repeated string input = 1;
|
||||
|
||||
// Input corpus format:
|
||||
// "text": one-sentence-per-line text format (default)
|
||||
// "tsv": sentence <tab> freq
|
||||
optional string input_format = 7;
|
||||
|
||||
// Output model file prefix.
|
||||
// <model_prefix>.model and <model_prefix>.vocab are generated.
|
||||
optional string model_prefix = 2;
|
||||
|
||||
// Model type. only have UNIGRAM now.
|
||||
enum ModelType {
|
||||
UNIGRAM = 1; // Unigram language model with dynamic algorithm
|
||||
BPE = 2; // Byte Pair Encoding
|
||||
WORD = 3; // Delimitered by whitespace.
|
||||
CHAR = 4; // tokenizes into character sequence
|
||||
}
|
||||
optional ModelType model_type = 3 [default = UNIGRAM];
|
||||
|
||||
// Vocabulary size. 8k is the default size.
|
||||
optional int32 vocab_size = 4 [default = 8000];
|
||||
|
||||
// List of the languages this model can accept.
|
||||
// Since the model is language-agnostic, this field is used as a reference.
|
||||
repeated string accept_language = 5;
|
||||
|
||||
// Size of self-test samples, which are encoded in the model file.
|
||||
optional int32 self_test_sample_size = 6 [default = 0];
|
||||
|
||||
// Whether to use DP version of sentencepiece. Use it with TSV input format
|
||||
// (requires precomputed word tab counts to work).
|
||||
optional bool enable_differential_privacy = 50 [default = false];
|
||||
// Set these parameters if you need DP version of sentencepiece.
|
||||
// std of noise to add.
|
||||
optional float differential_privacy_noise_level = 51 [default = 0.0];
|
||||
// Clipping threshold to apply after adding noise. All the words with
|
||||
// frequency less than this value are dropped.
|
||||
optional uint64 differential_privacy_clipping_threshold = 52 [default = 0];
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Training parameters.
|
||||
//
|
||||
// Uses characters which cover the corpus with the ratio of `chars_coverage`.
|
||||
// This parameter determines the set of basic Alphabet of sentence piece.
|
||||
// 1.0 - `chars_coverage` characters are treated as UNK.
|
||||
// See also required_chars field.
|
||||
optional float character_coverage = 10 [default = 0.9995];
|
||||
|
||||
// Maximum size of sentences the trainer loads from `input` parameter.
|
||||
// Trainer simply loads the `input` files in sequence.
|
||||
// It is better to shuffle the input corpus randomly.
|
||||
optional uint64 input_sentence_size = 11 [default = 0];
|
||||
optional bool shuffle_input_sentence = 19 [default = true];
|
||||
|
||||
// Maximum size of sentences to make seed sentence pieces.
|
||||
// Extended suffix array is constructed to extract frequent
|
||||
// sub-strings from the corpus. This uses 20N working space,
|
||||
// where N is the size of corpus.
|
||||
optional int32 mining_sentence_size = 12 [deprecated = true];
|
||||
|
||||
// Maximum size of sentences to train sentence pieces.
|
||||
optional int32 training_sentence_size = 13 [deprecated = true];
|
||||
|
||||
// The size of seed sentencepieces.
|
||||
// `seed_sentencepiece_size` must be larger than `vocab_size`.
|
||||
optional int32 seed_sentencepiece_size = 14 [default = 1000000];
|
||||
|
||||
// In every EM sub-iterations, keeps top
|
||||
// `shrinking_factor` * `current sentencepieces size` with respect to
|
||||
// the loss of the sentence piece. This value should be smaller than 1.0.
|
||||
optional float shrinking_factor = 15 [default = 0.75];
|
||||
|
||||
// The maximum sentence length in byte. The sentences with the length
|
||||
// larger than `max_sentence_length` is simply ignored.
|
||||
// Longer input tends to bring the following risks:
|
||||
// * Overflow during EM training (unigram language model only)
|
||||
// * Performance drop because of O(n log n) cost in BPE.
|
||||
optional int32 max_sentence_length = 18 [default = 4192];
|
||||
|
||||
// Number of threads in the training.
|
||||
optional int32 num_threads = 16 [default = 16];
|
||||
|
||||
// Number of EM sub iterations.
|
||||
optional int32 num_sub_iterations = 17 [default = 2];
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// SentencePiece parameters which control the shapes of sentence piece.
|
||||
//
|
||||
// Maximum length of sentencepiece.
|
||||
optional int32 max_sentencepiece_length = 20 [default = 16];
|
||||
|
||||
// Uses Unicode script to split sentence pieces.
|
||||
// When `split_by_unicode_script` is true, we do not allow sentence piece to
|
||||
// include multiple Unicode scripts, e.g. "F1" is not a valid piece.
|
||||
// Exception: CJ characters (Hiragana/Katakana/Han) are all handled
|
||||
// as one script type, since Japanese word can consist of multiple scripts.
|
||||
// This exception is always applied regardless of the accept-language
|
||||
// parameter.
|
||||
optional bool split_by_unicode_script = 21 [default = true];
|
||||
|
||||
// When `split_by_number` is true, put a boundary between number and
|
||||
// non-number transition. If we want to treat "F1" is one token, set this flag
|
||||
// to be false.
|
||||
optional bool split_by_number = 23 [default = true];
|
||||
|
||||
// Use a white space to split sentence pieces.
|
||||
// When `split_by_whitespace` is false, we may have the piece containing
|
||||
// a white space in the middle. e.g., "in_the".
|
||||
optional bool split_by_whitespace = 22 [default = true];
|
||||
|
||||
// Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
|
||||
// hello_. When `treat_whitespace_as_suffix` is true,
|
||||
// NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
|
||||
// of sentence.
|
||||
optional bool treat_whitespace_as_suffix = 24 [default = false];
|
||||
|
||||
// Allows pieces that only contain whitespaces instead of appearing only as
|
||||
// prefix or suffix of other pieces.
|
||||
optional bool allow_whitespace_only_pieces = 26 [default = false];
|
||||
|
||||
// Split all digits (0-9) into separate pieces.
|
||||
optional bool split_digits = 25 [default = false];
|
||||
|
||||
// Defines the pre-tokenization delimiter.
|
||||
// When specified, no pieces crossing this delimiter is not included
|
||||
// in the vocab. Then the delimiter string is virtually ignored
|
||||
// during the training. This field can allows constraints on the vocabulary
|
||||
// selection. Note that this field is available on unigram mode.
|
||||
optional string pretokenization_delimiter = 53 [ default = ""];
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Vocabulary management
|
||||
//
|
||||
// Defines control symbols used as an indicator to
|
||||
// change the behavior of the decoder. <s> and </s> are pre-defined.
|
||||
// We can use this field to encode various meta information,
|
||||
// including language indicator in multilingual model.
|
||||
// These symbols are not visible to users, but visible to
|
||||
// the decoder. Note that when the input sentence contains control symbols,
|
||||
// they are not treated as one token, but segmented into normal pieces.
|
||||
// Control symbols must be inserted independently from the segmentation.
|
||||
repeated string control_symbols = 30;
|
||||
|
||||
// Defines user defined symbols.
|
||||
// These symbols are added with extremely high score
|
||||
// so they are always treated as one unique symbol in any context.
|
||||
// Typical usage of user_defined_symbols is placeholder for named entities.
|
||||
repeated string user_defined_symbols = 31;
|
||||
|
||||
// Defines required characters. Each UTF8 character in this string is included
|
||||
// in the character set regardless of character_coverage value. Unlike
|
||||
// user_defined_symbols, these characters have scores based on the frequency
|
||||
// on input sentences, and the model can form subwords using characters
|
||||
// in this field.
|
||||
optional string required_chars = 36;
|
||||
|
||||
// Decomposes unknown pieces into UTF-8 bytes.
|
||||
optional bool byte_fallback = 35 [default = false];
|
||||
|
||||
// When creating the vocabulary file, defines whether or not to additionally
|
||||
// output the score for each piece.
|
||||
optional bool vocabulary_output_piece_score = 32 [default = true];
|
||||
|
||||
// `vocab_size` is treated as hard limit. Crash if
|
||||
// the model can not produce the vocab of size `vocab_size`,
|
||||
// When `hard_vocab_limit` is false, vocab_size is treated
|
||||
// as soft limit. Note that when model_type=char,
|
||||
// always assumes hard_vocab_limit = false.
|
||||
optional bool hard_vocab_limit = 33 [default = true];
|
||||
|
||||
// use all symbols for vocab extraction. This flag is valid
|
||||
// if model type is either CHAR or WORD
|
||||
optional bool use_all_vocab = 34 [default = false];
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Reserved special meta tokens.
|
||||
// * -1 is not used.
|
||||
// * unk_id must not be -1.
|
||||
// Id must starts with 0 and be contigous.
|
||||
optional int32 unk_id = 40 [default = 0]; // <unk>
|
||||
optional int32 bos_id = 41 [default = 1]; // <s>
|
||||
optional int32 eos_id = 42 [default = 2]; // </s>
|
||||
optional int32 pad_id = 43 [default = -1]; // <pad> (padding)
|
||||
optional string unk_piece = 45 [default = "<unk>"];
|
||||
optional string bos_piece = 46 [default = "<s>"];
|
||||
optional string eos_piece = 47 [default = "</s>"];
|
||||
optional string pad_piece = 48 [default = "<pad>"];
|
||||
|
||||
// Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
|
||||
// since this character can be useful both for user and
|
||||
// developer. We can easily figure out that <unk> is emitted.
|
||||
optional string unk_surface = 44 [default = " \xE2\x81\x87 "];
|
||||
|
||||
// Increase bit depth to allow unigram model training on large
|
||||
// (>10M sentences) corpora. A Side-effect of enabling this flag
|
||||
// is increased memory usage.
|
||||
optional bool train_extremely_large_corpus = 49 [default = false];
|
||||
|
||||
// Path to a seed sentencepieces file, with one tab-separated
|
||||
// seed sentencepiece <tab> frequency per line.
|
||||
optional string seed_sentencepieces_file = 54 [default = ""];
|
||||
|
||||
// Customized extensions: the range of field numbers
|
||||
// are open to third-party extensions.
|
||||
extensions 200 to max;
|
||||
}
|
||||
|
||||
// NormalizerSpec encodes a various parameters for string normalizaiton
|
||||
message NormalizerSpec {
|
||||
// name of normalization rule.
|
||||
optional string name = 1;
|
||||
|
||||
// Pre-compiled normalization rule created by
|
||||
// Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
|
||||
// Usually this field is set by Builder::GetNormalizerSpec() method.
|
||||
optional bytes precompiled_charsmap = 2;
|
||||
|
||||
// Adds dummy whitespace at the beginning of text in order to
|
||||
// treat "world" in "world" and "hello world" in the same way.
|
||||
optional bool add_dummy_prefix = 3 [default = true];
|
||||
|
||||
// Removes leading, trailing, and duplicate internal whitespace.
|
||||
optional bool remove_extra_whitespaces = 4 [default = true];
|
||||
|
||||
// Replaces whitespace with meta symbol.
|
||||
// This field must be true to train sentence piece model.
|
||||
optional bool escape_whitespaces = 5 [default = true];
|
||||
|
||||
// Custom normalization rule file in TSV format.
|
||||
// https://github.com/google/sentencepiece/blob/master/doc/normalization.md
|
||||
// This field is only used in SentencePieceTrainer::Train() method, which
|
||||
// compiles the rule into the binary rule stored in `precompiled_charsmap`.
|
||||
optional string normalization_rule_tsv = 6;
|
||||
|
||||
// Customized extensions: the range of field numbers
|
||||
// are open to third-party extensions.
|
||||
extensions 200 to max;
|
||||
}
|
||||
|
||||
// Proto to store samples for self-testing.
|
||||
message SelfTestData {
|
||||
message Sample {
|
||||
optional string input = 1;
|
||||
optional string expected = 2;
|
||||
}
|
||||
repeated Sample samples = 1;
|
||||
|
||||
// Customized extensions: the range of field numbers
|
||||
// are open to third-party extensions.
|
||||
extensions 200 to max;
|
||||
}
|
||||
|
||||
// ModelProto stores model parameters.
|
||||
// SentencePieceProcessor is supposed to be self-contained.
|
||||
// All settings/parameters which may change the behavior must be encoded
|
||||
// in ModelProto.
|
||||
message ModelProto {
|
||||
message SentencePiece {
|
||||
enum Type {
|
||||
NORMAL = 1; // normal symbol
|
||||
UNKNOWN = 2; // unknown symbol. only <unk> for now.
|
||||
CONTROL = 3; // control symbols. </s>, <s>, <2ja> etc.
|
||||
USER_DEFINED = 4; // user defined symbols.
|
||||
// Typical usage of USER_DEFINED symbol
|
||||
// is placeholder.
|
||||
BYTE = 6; // byte symbols. Used when `byte_fallback` is true.
|
||||
UNUSED = 5; // this piece is not used.
|
||||
}
|
||||
optional string piece = 1; // piece must not be empty.
|
||||
optional float score = 2;
|
||||
optional Type type = 3 [default = NORMAL];
|
||||
|
||||
// Customized extensions: the range of field numbers
|
||||
// are open to third-party extensions.
|
||||
extensions 200 to max;
|
||||
}
|
||||
|
||||
// Sentence pieces with scores.
|
||||
repeated SentencePiece pieces = 1;
|
||||
|
||||
// Spec used to generate this model file.
|
||||
optional TrainerSpec trainer_spec = 2;
|
||||
|
||||
// Spec for text normalization.
|
||||
optional NormalizerSpec normalizer_spec = 3;
|
||||
|
||||
// Stores sample input and its expected segmentation to verify the model.
|
||||
optional SelfTestData self_test_data = 4;
|
||||
|
||||
// Spec for text de-normalization.
|
||||
optional NormalizerSpec denormalizer_spec = 5;
|
||||
|
||||
// Customized extensions: the range of field numbers
|
||||
// are open to third-party extensions.
|
||||
extensions 200 to max;
|
||||
}
|
||||
@@ -54,7 +54,7 @@ Advanced parameters (optional):
|
||||
|
||||
#### JSON mode
|
||||
|
||||
Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#generate-request-json-mode) below.
|
||||
Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
|
||||
|
||||
> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
|
||||
|
||||
@@ -256,9 +256,9 @@ For reproducible outputs, set `temperature` to 0 and `seed` to a number:
|
||||
```shell
|
||||
curl http://localhost:11434/api/generate -d '{
|
||||
"model": "mistral",
|
||||
"prompt": "[INST] why is the sky blue? [/INST]",
|
||||
"prompt": "Why is the sky blue?",
|
||||
"options": {
|
||||
"seed": 101,
|
||||
"seed": 123,
|
||||
"temperature": 0
|
||||
}
|
||||
}'
|
||||
@@ -321,7 +321,6 @@ curl http://localhost:11434/api/generate -d '{
|
||||
"vocab_only": false,
|
||||
"use_mmap": true,
|
||||
"use_mlock": false,
|
||||
"embedding_only": false,
|
||||
"rope_frequency_base": 1.1,
|
||||
"rope_frequency_scale": 0.8,
|
||||
"num_thread": 8
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
Install required tools:
|
||||
|
||||
- cmake version 3.24 or higher
|
||||
- go version 1.21 or higher
|
||||
- go version 1.22 or higher
|
||||
- gcc version 11.4.0 or higher
|
||||
|
||||
```bash
|
||||
@@ -42,7 +42,7 @@ Now you can run `ollama`:
|
||||
|
||||
#### Linux CUDA (NVIDIA)
|
||||
|
||||
*Your operating system distribution may already have packages for NVIDIA CUDA. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!*
|
||||
_Your operating system distribution may already have packages for NVIDIA CUDA. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!_
|
||||
|
||||
Install `cmake` and `golang` as well as [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads)
|
||||
development and runtime packages.
|
||||
@@ -67,9 +67,9 @@ go build .
|
||||
|
||||
#### Linux ROCm (AMD)
|
||||
|
||||
*Your operating system distribution may already have packages for AMD ROCm and CLBlast. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!*
|
||||
_Your operating system distribution may already have packages for AMD ROCm and CLBlast. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!_
|
||||
|
||||
Install [CLBlast](https://github.com/CNugteren/CLBlast/blob/master/doc/installation.md) and [ROCm](https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html) developement packages first, as well as `cmake` and `golang`.
|
||||
Install [CLBlast](https://github.com/CNugteren/CLBlast/blob/master/doc/installation.md) and [ROCm](https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html) development packages first, as well as `cmake` and `golang`.
|
||||
|
||||
Typically the build scripts will auto-detect ROCm, however, if your Linux distro
|
||||
or installation approach uses unusual paths, you can specify the location by
|
||||
@@ -110,15 +110,14 @@ go build .
|
||||
|
||||
If you have Docker available, you can build linux binaries with `./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included. The resulting binary is placed in `./dist`
|
||||
|
||||
|
||||
### Windows
|
||||
|
||||
Note: The windows build for Ollama is still under development.
|
||||
|
||||
Install required tools:
|
||||
|
||||
- MSVC toolchain - C/C++ and cmake as minimal requirements
|
||||
- go version 1.21 or higher
|
||||
- MSVC toolchain - C/C++ and cmake as minimal requirements - You must build from a "Developer Shell" with the environment variables set
|
||||
- go version 1.22 or higher
|
||||
- MinGW (pick one variant) with GCC.
|
||||
- <https://www.mingw-w64.org/>
|
||||
- <https://www.msys2.org/>
|
||||
@@ -133,6 +132,6 @@ go build .
|
||||
|
||||
#### Windows CUDA (NVIDIA)
|
||||
|
||||
In addition to the common Windows development tools described above, install:
|
||||
In addition to the common Windows development tools described above, install CUDA **AFTER** you install MSVC.
|
||||
|
||||
- [NVIDIA CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html)
|
||||
|
||||
@@ -113,9 +113,9 @@ If a different directory needs to be used, set the environment variable `OLLAMA_
|
||||
|
||||
Refer to the section [above](#how-do-i-configure-ollama-server) for how to set environment variables on your platform.
|
||||
|
||||
## Does Ollama send my prompts and answers back to Ollama.ai to use in any way?
|
||||
## Does Ollama send my prompts and answers back to ollama.com?
|
||||
|
||||
No, Ollama runs entirely locally, and conversation data will never leave your machine.
|
||||
No. Ollama runs locally, and conversation data does not leave your machine.
|
||||
|
||||
## How can I use Ollama in Visual Studio Code?
|
||||
|
||||
|
||||
@@ -124,7 +124,10 @@ ollama run example "What is your favourite condiment?"
|
||||
Publishing models is in early alpha. If you'd like to publish your model to share with others, follow these steps:
|
||||
|
||||
1. Create [an account](https://ollama.com/signup)
|
||||
2. Run `cat ~/.ollama/id_ed25519.pub` (or `type %USERPROFILE%\.ollama\id_ed25519.pub` on Windows) to view your Ollama public key. Copy this to the clipboard.
|
||||
2. Copy your Ollama public key:
|
||||
- macOS: `cat ~/.ollama/id_ed25519.pub`
|
||||
- Windows: `type %USERPROFILE%\.ollama\id_ed25519.pub`
|
||||
- Linux: `cat /usr/share/ollama/.ollama/id_ed25519.pub`
|
||||
3. Add your public key to your [Ollama account](https://ollama.com/settings/keys)
|
||||
|
||||
Next, copy your model to your username's namespace:
|
||||
|
||||
@@ -10,6 +10,14 @@ Install Ollama running this one-liner:
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
```
|
||||
|
||||
## AMD Radeon GPU support
|
||||
|
||||
While AMD has contributed the `amdgpu` driver upstream to the official linux
|
||||
kernel source, the version is older and may not support all ROCm features. We
|
||||
recommend you install the latest driver from
|
||||
https://www.amd.com/en/support/linux-drivers for best support of your Radeon
|
||||
GPU.
|
||||
|
||||
## Manual install
|
||||
|
||||
### Download the `ollama` binary
|
||||
|
||||
@@ -67,6 +67,43 @@ You can see what features your CPU has with the following.
|
||||
cat /proc/cpuinfo| grep flags | head -1
|
||||
```
|
||||
|
||||
## AMD Radeon GPU Support
|
||||
|
||||
Ollama leverages the AMD ROCm library, which does not support all AMD GPUs. In
|
||||
some cases you can force the system to try to use a close GPU type. For example
|
||||
The Radeon RX 5400 is `gfx1034` (also known as 10.3.4) however, ROCm does not
|
||||
support this patch-level, the closest support is `gfx1030`. You can use the
|
||||
environment variable `HSA_OVERRIDE_GFX_VERSION` with `x.y.z` syntax. So for
|
||||
example, to force the system to run on the RX 5400, you would set
|
||||
`HSA_OVERRIDE_GFX_VERSION="10.3.0"` as an environment variable for the server.
|
||||
|
||||
At this time, the known supported GPU types are the following: (This may change from
|
||||
release to release)
|
||||
- gfx900
|
||||
- gfx906
|
||||
- gfx908
|
||||
- gfx90a
|
||||
- gfx940
|
||||
- gfx941
|
||||
- gfx942
|
||||
- gfx1030
|
||||
- gfx1100
|
||||
- gfx1101
|
||||
- gfx1102
|
||||
|
||||
This will not work for all unsupported GPUs. Reach out on [Discord](https://discord.gg/ollama)
|
||||
or file an [issue](https://github.com/ollama/ollama/issues) for additional help.
|
||||
|
||||
|
||||
## Installing older versions on Linux
|
||||
|
||||
If you run into problems on Linux and want to install an older version you can tell the install script
|
||||
which version to install.
|
||||
|
||||
```sh
|
||||
curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION="0.1.27" sh
|
||||
```
|
||||
|
||||
## Known issues
|
||||
|
||||
* N/A
|
||||
@@ -42,12 +42,12 @@ text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
|
||||
all_splits = text_splitter.split_documents(data)
|
||||
```
|
||||
|
||||
It's split up, but we have to find the relevant splits and then submit those to the model. We can do this by creating embeddings and storing them in a vector database. We can use Ollama directly to instantiate an embedding model. We will use ChromaDB in this example for a vector database. `pip install GPT4All chromadb`
|
||||
It's split up, but we have to find the relevant splits and then submit those to the model. We can do this by creating embeddings and storing them in a vector database. We can use Ollama directly to instantiate an embedding model. We will use ChromaDB in this example for a vector database. `pip install chromadb`
|
||||
|
||||
```python
|
||||
from langchain.embeddings import OllamaEmbeddings
|
||||
from langchain.vectorstores import Chroma
|
||||
oembed = OllamaEmbeddings(base_url="http://localhost:11434", model="llama2")
|
||||
oembed = OllamaEmbeddings(base_url="http://localhost:11434", model="nomic-embed-text")
|
||||
vectorstore = Chroma.from_documents(documents=all_splits, embedding=oembed)
|
||||
```
|
||||
|
||||
@@ -66,7 +66,7 @@ The next thing is to send the question and the relevant parts of the docs to the
|
||||
```python
|
||||
from langchain.chains import RetrievalQA
|
||||
qachain=RetrievalQA.from_chain_type(ollama, retriever=vectorstore.as_retriever())
|
||||
qachain({"query": question})
|
||||
qachain.invoke({"query": question})
|
||||
```
|
||||
|
||||
The answer received from this chain was:
|
||||
|
||||
@@ -4,7 +4,7 @@ Welcome to the Ollama Windows preview.
|
||||
|
||||
No more WSL required!
|
||||
|
||||
Ollama now runs as a native Windows application, including NVIDIA GPU support.
|
||||
Ollama now runs as a native Windows application, including NVIDIA and AMD Radeon GPU support.
|
||||
After installing Ollama Windows Preview, Ollama will run in the background and
|
||||
the `ollama` command line is available in `cmd`, `powershell` or your favorite
|
||||
terminal application. As usual the Ollama [api](./api.md) will be served on
|
||||
@@ -21,6 +21,7 @@ Logs will often be helpful in dianosing the problem (see
|
||||
|
||||
* Windows 10 or newer, Home or Pro
|
||||
* NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
|
||||
* AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
|
||||
|
||||
## API Access
|
||||
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
# Example Modelfile - Tweetwriter
|
||||
|
||||
This simple examples shows what you can do without any code, simply relying on a Modelfile. The file has two instructions:
|
||||
|
||||
1. FROM - The From instructions defines the parent model to use for this one. If you choose a model from the library, you can enter just the model name. For all other models, you need to specify the namespace as well. You could also use a local file. Just include the relative path to the converted, quantized model weights file. To learn more about creating that file, see the `import.md` file in the docs folder of this repository.
|
||||
2. SYSTEM - This defines the system prompt for the model and overrides the system prompt from the parent model.
|
||||
|
||||
## Running the Example
|
||||
|
||||
1. Create the model:
|
||||
|
||||
```bash
|
||||
ollama create tweetwriter
|
||||
```
|
||||
|
||||
2. Enter a topic to generate a tweet about.
|
||||
3. Show the Modelfile in the REPL.
|
||||
|
||||
```bash
|
||||
/show modelfile
|
||||
```
|
||||
|
||||
Notice that the FROM and SYSTEM match what was in the file. But there is also a TEMPLATE and PARAMETER. These are inherited from the parent model.
|
||||
@@ -1,102 +0,0 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Code originally from https://go-review.googlesource.com/c/crypto/+/218620
|
||||
|
||||
// TODO: replace with upstream once the above change is merged and released.
|
||||
|
||||
package format
|
||||
|
||||
import (
|
||||
"crypto"
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"encoding/binary"
|
||||
"encoding/pem"
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
const privateKeyAuthMagic = "openssh-key-v1\x00"
|
||||
|
||||
type openSSHEncryptedPrivateKey struct {
|
||||
CipherName string
|
||||
KDFName string
|
||||
KDFOptions string
|
||||
KeysCount uint32
|
||||
PubKey []byte
|
||||
KeyBlocks []byte
|
||||
}
|
||||
|
||||
type openSSHPrivateKey struct {
|
||||
Check1 uint32
|
||||
Check2 uint32
|
||||
Keytype string
|
||||
Rest []byte `ssh:"rest"`
|
||||
}
|
||||
|
||||
type openSSHEd25519PrivateKey struct {
|
||||
Pub []byte
|
||||
Priv []byte
|
||||
Comment string
|
||||
Pad []byte `ssh:"rest"`
|
||||
}
|
||||
|
||||
func OpenSSHPrivateKey(key crypto.PrivateKey, comment string) (*pem.Block, error) {
|
||||
var check uint32
|
||||
if err := binary.Read(rand.Reader, binary.BigEndian, &check); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var pk1 openSSHPrivateKey
|
||||
pk1.Check1 = check
|
||||
pk1.Check2 = check
|
||||
|
||||
var w openSSHEncryptedPrivateKey
|
||||
w.KeysCount = 1
|
||||
|
||||
if k, ok := key.(*ed25519.PrivateKey); ok {
|
||||
key = *k
|
||||
}
|
||||
|
||||
switch k := key.(type) {
|
||||
case ed25519.PrivateKey:
|
||||
pub, priv := k[32:], k
|
||||
key := openSSHEd25519PrivateKey{
|
||||
Pub: pub,
|
||||
Priv: priv,
|
||||
Comment: comment,
|
||||
}
|
||||
|
||||
pk1.Keytype = ssh.KeyAlgoED25519
|
||||
pk1.Rest = ssh.Marshal(key)
|
||||
|
||||
w.PubKey = ssh.Marshal(struct {
|
||||
KeyType string
|
||||
Pub []byte
|
||||
}{
|
||||
ssh.KeyAlgoED25519, pub,
|
||||
})
|
||||
default:
|
||||
return nil, fmt.Errorf("ssh: unknown key type %T", k)
|
||||
}
|
||||
|
||||
w.KeyBlocks = openSSHPadding(ssh.Marshal(pk1), 8)
|
||||
|
||||
w.CipherName, w.KDFName, w.KDFOptions = "none", "none", ""
|
||||
|
||||
return &pem.Block{
|
||||
Type: "OPENSSH PRIVATE KEY",
|
||||
Bytes: append([]byte(privateKeyAuthMagic), ssh.Marshal(w)...),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func openSSHPadding(block []byte, blocksize int) []byte {
|
||||
for i, j := 0, len(block); (j+i)%blocksize != 0; i++ {
|
||||
block = append(block, byte(i+1))
|
||||
}
|
||||
|
||||
return block
|
||||
}
|
||||
41
go.mod
41
go.mod
@@ -1,37 +1,45 @@
|
||||
module github.com/jmorganca/ollama
|
||||
|
||||
go 1.21
|
||||
go 1.22
|
||||
|
||||
toolchain go1.22.0
|
||||
|
||||
require (
|
||||
github.com/containerd/console v1.0.3
|
||||
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
|
||||
github.com/emirpasic/gods v1.18.1
|
||||
github.com/gin-gonic/gin v1.9.1
|
||||
github.com/golang/protobuf v1.5.0
|
||||
github.com/google/uuid v1.0.0
|
||||
github.com/mitchellh/mapstructure v1.5.0
|
||||
github.com/olekukonko/tablewriter v0.0.5
|
||||
github.com/spf13/cobra v1.7.0
|
||||
github.com/stretchr/testify v1.8.4
|
||||
github.com/x448/float16 v0.8.4
|
||||
golang.org/x/sync v0.3.0
|
||||
)
|
||||
|
||||
require github.com/pdevine/tensor v0.0.0-20240228013915-64ccaa8d9ca9
|
||||
|
||||
require (
|
||||
github.com/containerd/console v1.0.3 // indirect
|
||||
github.com/cratonica/2goarray v0.0.0-20190331194516-514510793eaa // indirect
|
||||
github.com/apache/arrow/go/arrow v0.0.0-20201229220542-30ce2eb5d4dc // indirect
|
||||
github.com/chewxy/hm v1.0.0 // indirect
|
||||
github.com/chewxy/math32 v1.0.8 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/getlantern/context v0.0.0-20190109183933-c447772a6520 // indirect
|
||||
github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7 // indirect
|
||||
github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7 // indirect
|
||||
github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7 // indirect
|
||||
github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55 // indirect
|
||||
github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f // indirect
|
||||
github.com/getlantern/systray v1.2.2 // indirect
|
||||
github.com/go-stack/stack v1.8.0 // indirect
|
||||
github.com/google/uuid v1.0.0 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/google/flatbuffers v1.12.0 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.14 // indirect
|
||||
github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c // indirect
|
||||
github.com/pborman/uuid v1.2.1 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
github.com/xtgo/set v1.0.0 // indirect
|
||||
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
|
||||
gonum.org/v1/gonum v0.8.2 // indirect
|
||||
gorgonia.org/vecf32 v0.9.0 // indirect
|
||||
gorgonia.org/vecf64 v0.9.0 // indirect
|
||||
)
|
||||
|
||||
|
||||
require (
|
||||
github.com/bytedance/sonic v1.9.1 // indirect
|
||||
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
|
||||
@@ -50,7 +58,6 @@ require (
|
||||
github.com/mattn/go-isatty v0.0.19 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58
|
||||
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
|
||||
github.com/spf13/pflag v1.0.5 // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
@@ -62,6 +69,6 @@ require (
|
||||
golang.org/x/sys v0.13.0
|
||||
golang.org/x/term v0.13.0
|
||||
golang.org/x/text v0.13.0 // indirect
|
||||
google.golang.org/protobuf v1.30.0 // indirect
|
||||
google.golang.org/protobuf v1.30.0
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
|
||||
179
go.sum
179
go.sum
@@ -1,36 +1,40 @@
|
||||
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
|
||||
github.com/apache/arrow/go/arrow v0.0.0-20201229220542-30ce2eb5d4dc h1:zvQ6w7KwtQWgMQiewOF9tFtundRMVZFSAksNV6ogzuY=
|
||||
github.com/apache/arrow/go/arrow v0.0.0-20201229220542-30ce2eb5d4dc/go.mod h1:c9sxoIT3YgLxH4UhLOCKaBlEojuMhVYpk4Ntv3opUTQ=
|
||||
github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM=
|
||||
github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s=
|
||||
github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U=
|
||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
|
||||
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams=
|
||||
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
|
||||
github.com/chewxy/hm v1.0.0 h1:zy/TSv3LV2nD3dwUEQL2VhXeoXbb9QkpmdRAVUFiA6k=
|
||||
github.com/chewxy/hm v1.0.0/go.mod h1:qg9YI4q6Fkj/whwHR1D+bOGeF7SniIP40VweVepLjg0=
|
||||
github.com/chewxy/math32 v1.0.0/go.mod h1:Miac6hA1ohdDUTagnvJy/q+aNnEk16qWUdb8ZVhvCN0=
|
||||
github.com/chewxy/math32 v1.0.8 h1:fU5E4Ec4Z+5RtRAi3TovSxUjQPkgRh+HbP7tKB2OFbM=
|
||||
github.com/chewxy/math32 v1.0.8/go.mod h1:dOB2rcuFrCn6UHrze36WSLVPKtzPMRAQvBvUwkSsLqs=
|
||||
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
||||
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
|
||||
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
|
||||
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/cratonica/2goarray v0.0.0-20190331194516-514510793eaa h1:Wg+722vs7a2zQH5lR9QWYsVbplKeffaQFIs5FTdfNNo=
|
||||
github.com/cratonica/2goarray v0.0.0-20190331194516-514510793eaa/go.mod h1:6Arca19mRx58CA7OWEd7Wu1NpC1rd3uDnNs6s1pj/DI=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1 h1:cBzrdJPAFBsgCrDPnZxlp1dF2+k4r1kVpD7+1S1PVjY=
|
||||
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1/go.mod h1:uw2gLcxEuYUlAd/EXyjc/v55nd3+47YAgWbSXVxPrNI=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
|
||||
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
|
||||
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
|
||||
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
|
||||
github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
|
||||
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
|
||||
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
|
||||
github.com/getlantern/context v0.0.0-20190109183933-c447772a6520 h1:NRUJuo3v3WGC/g5YiyF790gut6oQr5f3FBI88Wv0dx4=
|
||||
github.com/getlantern/context v0.0.0-20190109183933-c447772a6520/go.mod h1:L+mq6/vvYHKjCX2oez0CgEAJmbq1fbb/oNJIWQkBybY=
|
||||
github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7 h1:6uJ+sZ/e03gkbqZ0kUG6mfKoqDb4XMAzMIwlajq19So=
|
||||
github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7/go.mod h1:l+xpFBrCtDLpK9qNjxs+cHU6+BAdlBaxHqikB6Lku3A=
|
||||
github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7 h1:guBYzEaLz0Vfc/jv0czrr2z7qyzTOGC9hiQ0VC+hKjk=
|
||||
github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7/go.mod h1:zx/1xUUeYPy3Pcmet8OSXLbF47l+3y6hIPpyLWoR9oc=
|
||||
github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7 h1:micT5vkcr9tOVk1FiH8SWKID8ultN44Z+yzd2y/Vyb0=
|
||||
github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7/go.mod h1:dD3CgOrwlzca8ed61CsZouQS5h5jIzkK9ZWrTcf0s+o=
|
||||
github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55 h1:XYzSdCbkzOC0FDNrgJqGRo8PCMFOBFL9py72DRs7bmc=
|
||||
github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55/go.mod h1:6mmzY2kW1TOOrVy+r41Za2MxXM+hhqTtY3oBKd2AgFA=
|
||||
github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f h1:wrYrQttPS8FHIRSlsrcuKazukx/xqO/PpLZzZXsF+EA=
|
||||
github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f/go.mod h1:D5ao98qkA6pxftxoqzibIBBrLSUli+kYnJqrgBf9cIA=
|
||||
github.com/getlantern/systray v1.2.2 h1:dCEHtfmvkJG7HZ8lS/sLklTH4RKUcIsKrAD9sThoEBE=
|
||||
github.com/getlantern/systray v1.2.2/go.mod h1:pXFOI1wwqwYXEhLPm9ZGjS2u/vVELeIgNMY5HvhHhcE=
|
||||
github.com/gin-contrib/cors v1.4.0 h1:oJ6gwtUl3lqV0WEIwM/LxPF1QZ5qe2lGWdY2+bz7y0g=
|
||||
github.com/gin-contrib/cors v1.4.0/go.mod h1:bs9pNM0x/UsmHPBWT2xZz9ROh8xYjYkiURUfmBoMlcs=
|
||||
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
|
||||
@@ -40,6 +44,7 @@ github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
|
||||
github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
|
||||
github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||
github.com/go-playground/locales v0.14.0/go.mod h1:sawfccIbzZTqEDETgFXqTho0QybSa7l++s0DH+LDiLs=
|
||||
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
||||
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
|
||||
@@ -49,12 +54,34 @@ github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91
|
||||
github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos=
|
||||
github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js=
|
||||
github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU=
|
||||
github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk=
|
||||
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
|
||||
github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
||||
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
||||
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
|
||||
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
||||
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
|
||||
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
|
||||
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
|
||||
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
|
||||
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
|
||||
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
|
||||
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
|
||||
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
|
||||
github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4=
|
||||
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
||||
github.com/google/flatbuffers v1.11.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
|
||||
github.com/google/flatbuffers v1.12.0 h1:/PtAHvnBY4Kqnx/xCQ3OIV9uYcSFGScBsWI3Oogeh6w=
|
||||
github.com/google/flatbuffers v1.12.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
|
||||
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
||||
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
@@ -65,6 +92,9 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2
|
||||
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
|
||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||
github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk=
|
||||
github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
|
||||
@@ -79,14 +109,14 @@ github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY=
|
||||
github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
|
||||
github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
|
||||
github.com/lxn/walk v0.0.0-20210112085537-c389da54e794/go.mod h1:E23UucZGqpuUANJooIbHWCufXvOcT6E7Stq81gU+CSQ=
|
||||
github.com/lxn/win v0.0.0-20210218163916-a377121e959e/go.mod h1:KxxjdtRkfNoYDCUP5ryK7XJJNTnpC8atvtmTheChOtk=
|
||||
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
|
||||
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
|
||||
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
|
||||
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
|
||||
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
|
||||
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
@@ -94,25 +124,23 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
|
||||
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
|
||||
github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c h1:rp5dCmg/yLR3mgFuSOe4oEnDDmGLROTvMragMUXpTQw=
|
||||
github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c/go.mod h1:X07ZCGwUbLaax7L0S3Tw4hpejzu63ZrrQiUe6W0hcy0=
|
||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
|
||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
|
||||
github.com/pborman/uuid v1.2.1 h1:+ZZIw58t/ozdjRaXh/3awHfmWRbzYxJoAdNJxe/3pvw=
|
||||
github.com/pborman/uuid v1.2.1/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k=
|
||||
github.com/pdevine/tensor v0.0.0-20240228013915-64ccaa8d9ca9 h1:DV4iXjNn6fGeDl1AkZ1I0QB/0DBjrc7kPpxHrmuDzW4=
|
||||
github.com/pdevine/tensor v0.0.0-20240228013915-64ccaa8d9ca9/go.mod h1:nR7l3gM6ubiOm+mCkmmUyIBUcBAyiUmW6dQrDZhugFE=
|
||||
github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo=
|
||||
github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
|
||||
github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
|
||||
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
|
||||
github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8=
|
||||
github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966/go.mod h1:sUM3LWHvSMaG192sy56D9F7CNvL7jUJVXoqM1QKLnog=
|
||||
github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
|
||||
github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0=
|
||||
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
|
||||
@@ -120,6 +148,8 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/testify v1.1.4/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
@@ -136,44 +166,127 @@ github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6
|
||||
github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY=
|
||||
github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
|
||||
github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
|
||||
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
|
||||
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
|
||||
github.com/xtgo/set v1.0.0 h1:6BCNBRv3ORNDQ7fyoJXRv+tstJz3m1JVFQErfeZz2pY=
|
||||
github.com/xtgo/set v1.0.0/go.mod h1:d3NHzGzSa0NmB2NhFyECA+QdRp29oEn2xbT+TpeFoM8=
|
||||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 h1:lGdhQUN/cnWdSH3291CUuxSEqc+AsGTiDxPP3r2J0l4=
|
||||
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6/go.mod h1:FftLjUGFEDu5k8lt0ddY+HcrH/qU/0qk+H8j9/nTl3E=
|
||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||
golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=
|
||||
golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
|
||||
golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
|
||||
golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
|
||||
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
|
||||
golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
|
||||
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
||||
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
|
||||
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
|
||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
|
||||
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
|
||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
|
||||
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
|
||||
golang.org/x/sys v0.0.0-20201018230417-eeed37f84f13/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
|
||||
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
|
||||
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
|
||||
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||
golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
|
||||
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
|
||||
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
|
||||
gonum.org/v1/gonum v0.8.2 h1:CCXrcPKiGGotvnN6jfUsKk4rRqm7q09/YbKb5xCEvtM=
|
||||
gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
|
||||
gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc=
|
||||
gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
|
||||
gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
|
||||
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
||||
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
||||
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
||||
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
|
||||
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
|
||||
google.golang.org/genproto v0.0.0-20200911024640-645f7a48b24f h1:Yv4xsIx7HZOoyUGSJ2ksDyWE2qIBXROsZKt2ny3hCGM=
|
||||
google.golang.org/genproto v0.0.0-20200911024640-645f7a48b24f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
|
||||
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
||||
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
|
||||
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
|
||||
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
|
||||
google.golang.org/grpc v1.32.0 h1:zWTV+LMdc3kaiJMSTOFz2UgSBgx8RNQoTGiZu3fR9S0=
|
||||
google.golang.org/grpc v1.32.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
|
||||
google.golang.org/grpc/cmd/protoc-gen-go-grpc v0.0.0-20200910201057-6591123024b3/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw=
|
||||
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
||||
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
|
||||
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
|
||||
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
|
||||
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
|
||||
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
||||
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
||||
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
||||
google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
|
||||
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
|
||||
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
||||
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
||||
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
|
||||
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
||||
gopkg.in/Knetic/govaluate.v3 v3.0.0/go.mod h1:csKLBORsPbafmSCGTEh3U7Ozmsuq8ZSIlKk1bcqph0E=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
@@ -184,4 +297,10 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gorgonia.org/vecf32 v0.9.0 h1:PClazic1r+JVJ1dEzRXgeiVl4g1/Hf/w+wUSqnco1Xg=
|
||||
gorgonia.org/vecf32 v0.9.0/go.mod h1:NCc+5D2oxddRL11hd+pCB1PEyXWOyiQxfZ/1wwhOXCA=
|
||||
gorgonia.org/vecf64 v0.9.0 h1:bgZDP5x0OzBF64PjMGC3EvTdOoMEcmfAh1VCUnZFm1A=
|
||||
gorgonia.org/vecf64 v0.9.0/go.mod h1:hp7IOWCnRiVQKON73kkC/AUMtEXyf9kGlVrtPQ9ccVA=
|
||||
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
|
||||
|
||||
101
gpu/amd.go
101
gpu/amd.go
@@ -1,101 +0,0 @@
|
||||
package gpu
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// TODO - windows vs. non-windows vs darwin
|
||||
|
||||
// Discovery logic for AMD/ROCm GPUs
|
||||
|
||||
const (
|
||||
DriverVersionFile = "/sys/module/amdgpu/version"
|
||||
GPUPropertiesFileGlob = "/sys/class/kfd/kfd/topology/nodes/*/properties"
|
||||
// TODO probably break these down per GPU to make the logic simpler
|
||||
GPUTotalMemoryFileGlob = "/sys/class/kfd/kfd/topology/nodes/*/mem_banks/*/properties" // size_in_bytes line
|
||||
GPUUsedMemoryFileGlob = "/sys/class/kfd/kfd/topology/nodes/*/mem_banks/*/used_memory"
|
||||
)
|
||||
|
||||
func AMDDetected() bool {
|
||||
// Some driver versions (older?) don't have a version file, so just lookup the parent dir
|
||||
sysfsDir := filepath.Dir(DriverVersionFile)
|
||||
_, err := os.Stat(sysfsDir)
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
slog.Debug("amd driver not detected " + sysfsDir)
|
||||
return false
|
||||
} else if err != nil {
|
||||
slog.Debug(fmt.Sprintf("error looking up amd driver %s %s", sysfsDir, err))
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func AMDDriverVersion() (string, error) {
|
||||
_, err := os.Stat(DriverVersionFile)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("amdgpu file stat error: %s %w", DriverVersionFile, err)
|
||||
}
|
||||
fp, err := os.Open(DriverVersionFile)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer fp.Close()
|
||||
verString, err := io.ReadAll(fp)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return strings.TrimSpace(string(verString)), nil
|
||||
}
|
||||
|
||||
func AMDGFXVersions() []Version {
|
||||
res := []Version{}
|
||||
matches, _ := filepath.Glob(GPUPropertiesFileGlob)
|
||||
for _, match := range matches {
|
||||
fp, err := os.Open(match)
|
||||
if err != nil {
|
||||
slog.Debug(fmt.Sprintf("failed to open sysfs node file %s: %s", match, err))
|
||||
continue
|
||||
}
|
||||
defer fp.Close()
|
||||
|
||||
scanner := bufio.NewScanner(fp)
|
||||
// optionally, resize scanner's capacity for lines over 64K, see next example
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(line, "gfx_target_version") {
|
||||
ver := strings.Fields(line)
|
||||
if len(ver) != 2 || len(ver[1]) < 5 {
|
||||
slog.Debug("malformed " + line)
|
||||
continue
|
||||
}
|
||||
l := len(ver[1])
|
||||
patch, err1 := strconv.ParseUint(ver[1][l-2:l], 10, 32)
|
||||
minor, err2 := strconv.ParseUint(ver[1][l-4:l-2], 10, 32)
|
||||
major, err3 := strconv.ParseUint(ver[1][:l-4], 10, 32)
|
||||
if err1 != nil || err2 != nil || err3 != nil {
|
||||
slog.Debug("malformed int " + line)
|
||||
continue
|
||||
}
|
||||
|
||||
res = append(res, Version{
|
||||
Major: uint(major),
|
||||
Minor: uint(minor),
|
||||
Patch: uint(patch),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func (v Version) ToGFXString() string {
|
||||
return fmt.Sprintf("gfx%d%d%d", v.Major, v.Minor, v.Patch)
|
||||
}
|
||||
58
gpu/amd_common.go
Normal file
58
gpu/amd_common.go
Normal file
@@ -0,0 +1,58 @@
|
||||
//go:build linux || windows
|
||||
|
||||
package gpu
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Determine if the given ROCm lib directory is usable by checking for existence of some glob patterns
|
||||
func rocmLibUsable(libDir string) bool {
|
||||
slog.Debug("evaluating potential rocm lib dir " + libDir)
|
||||
for _, g := range ROCmLibGlobs {
|
||||
res, _ := filepath.Glob(filepath.Join(libDir, g))
|
||||
if len(res) == 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func GetSupportedGFX(libDir string) ([]string, error) {
|
||||
var ret []string
|
||||
files, err := filepath.Glob(filepath.Join(libDir, "rocblas", "library", "TensileLibrary_lazy_gfx*.dat"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, file := range files {
|
||||
ret = append(ret, strings.TrimSuffix(strings.TrimPrefix(filepath.Base(file), "TensileLibrary_lazy_"), ".dat"))
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func amdSetVisibleDevices(ids []int, skip map[int]interface{}) {
|
||||
// Set the visible devices if not already set
|
||||
// TODO - does sort order matter?
|
||||
devices := []string{}
|
||||
for i := range ids {
|
||||
slog.Debug(fmt.Sprintf("i=%d", i))
|
||||
if _, skipped := skip[i]; skipped {
|
||||
slog.Debug("skipped")
|
||||
continue
|
||||
}
|
||||
devices = append(devices, strconv.Itoa(i))
|
||||
}
|
||||
slog.Debug(fmt.Sprintf("devices=%v", devices))
|
||||
|
||||
val := strings.Join(devices, ",")
|
||||
err := os.Setenv("HIP_VISIBLE_DEVICES", val)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to set env: %s", err))
|
||||
}
|
||||
slog.Debug("HIP_VISIBLE_DEVICES=" + val)
|
||||
}
|
||||
141
gpu/amd_hip_windows.go
Normal file
141
gpu/amd_hip_windows.go
Normal file
@@ -0,0 +1,141 @@
|
||||
package gpu
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strconv"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
const (
|
||||
hipSuccess = 0
|
||||
hipErrorNoDevice = 100
|
||||
)
|
||||
|
||||
type hipDevicePropMinimal struct {
|
||||
Name [256]byte
|
||||
unused1 [140]byte
|
||||
GcnArchName [256]byte // gfx####
|
||||
iGPU int // Doesn't seem to actually report correctly
|
||||
unused2 [128]byte
|
||||
}
|
||||
|
||||
// Wrap the amdhip64.dll library for GPU discovery
|
||||
type HipLib struct {
|
||||
dll windows.Handle
|
||||
hipGetDeviceCount uintptr
|
||||
hipGetDeviceProperties uintptr
|
||||
hipMemGetInfo uintptr
|
||||
hipSetDevice uintptr
|
||||
hipDriverGetVersion uintptr
|
||||
}
|
||||
|
||||
func NewHipLib() (*HipLib, error) {
|
||||
h, err := windows.LoadLibrary("amdhip64.dll")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to load amdhip64.dll: %w", err)
|
||||
}
|
||||
hl := &HipLib{}
|
||||
hl.dll = h
|
||||
hl.hipGetDeviceCount, err = windows.GetProcAddress(hl.dll, "hipGetDeviceCount")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
hl.hipGetDeviceProperties, err = windows.GetProcAddress(hl.dll, "hipGetDeviceProperties")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
hl.hipMemGetInfo, err = windows.GetProcAddress(hl.dll, "hipMemGetInfo")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
hl.hipSetDevice, err = windows.GetProcAddress(hl.dll, "hipSetDevice")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
hl.hipDriverGetVersion, err = windows.GetProcAddress(hl.dll, "hipDriverGetVersion")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return hl, nil
|
||||
}
|
||||
|
||||
// The hip library only evaluates the HIP_VISIBLE_DEVICES variable at startup
|
||||
// so we have to unload/reset the library after we do our initial discovery
|
||||
// to make sure our updates to that variable are processed by llama.cpp
|
||||
func (hl *HipLib) Release() {
|
||||
err := windows.FreeLibrary(hl.dll)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to unload amdhip64.dll: %s", err))
|
||||
}
|
||||
hl.dll = 0
|
||||
}
|
||||
|
||||
func (hl *HipLib) AMDDriverVersion() (string, error) {
|
||||
if hl.dll == 0 {
|
||||
return "", fmt.Errorf("dll has been unloaded")
|
||||
}
|
||||
var version int
|
||||
status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
|
||||
if status != hipSuccess {
|
||||
return "", fmt.Errorf("failed call to hipDriverGetVersion: %d %s", status, err)
|
||||
}
|
||||
return strconv.Itoa(version), nil
|
||||
}
|
||||
|
||||
func (hl *HipLib) HipGetDeviceCount() int {
|
||||
if hl.dll == 0 {
|
||||
slog.Error("dll has been unloaded")
|
||||
return 0
|
||||
}
|
||||
var count int
|
||||
status, _, err := syscall.SyscallN(hl.hipGetDeviceCount, uintptr(unsafe.Pointer(&count)))
|
||||
if status == hipErrorNoDevice {
|
||||
slog.Info("AMD ROCm reports no devices found")
|
||||
return 0
|
||||
}
|
||||
if status != hipSuccess {
|
||||
slog.Warn(fmt.Sprintf("failed call to hipGetDeviceCount: %d %s", status, err))
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func (hl *HipLib) HipSetDevice(device int) error {
|
||||
if hl.dll == 0 {
|
||||
return fmt.Errorf("dll has been unloaded")
|
||||
}
|
||||
status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
|
||||
if status != hipSuccess {
|
||||
return fmt.Errorf("failed call to hipSetDevice: %d %s", status, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
|
||||
if hl.dll == 0 {
|
||||
return nil, fmt.Errorf("dll has been unloaded")
|
||||
}
|
||||
var props hipDevicePropMinimal
|
||||
status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
|
||||
if status != hipSuccess {
|
||||
return nil, fmt.Errorf("failed call to hipGetDeviceProperties: %d %s", status, err)
|
||||
}
|
||||
return &props, nil
|
||||
}
|
||||
|
||||
// free, total, err
|
||||
func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
|
||||
if hl.dll == 0 {
|
||||
return 0, 0, fmt.Errorf("dll has been unloaded")
|
||||
}
|
||||
var totalMemory uint64
|
||||
var freeMemory uint64
|
||||
status, _, err := syscall.SyscallN(hl.hipMemGetInfo, uintptr(unsafe.Pointer(&freeMemory)), uintptr(unsafe.Pointer(&totalMemory)))
|
||||
if status != hipSuccess {
|
||||
return 0, 0, fmt.Errorf("failed call to hipMemGetInfo: %d %s", status, err)
|
||||
}
|
||||
return freeMemory, totalMemory, nil
|
||||
}
|
||||
411
gpu/amd_linux.go
Normal file
411
gpu/amd_linux.go
Normal file
@@ -0,0 +1,411 @@
|
||||
package gpu
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/jmorganca/ollama/version"
|
||||
)
|
||||
|
||||
// Discovery logic for AMD/ROCm GPUs
|
||||
|
||||
const (
|
||||
curlMsg = "curl -fsSL https://github.com/ollama/ollama/releases/download/v%s/rocm-amd64-deps.tgz | tar -zxf - -C %s"
|
||||
DriverVersionFile = "/sys/module/amdgpu/version"
|
||||
AMDNodesSysfsDir = "/sys/class/kfd/kfd/topology/nodes/"
|
||||
GPUPropertiesFileGlob = AMDNodesSysfsDir + "*/properties"
|
||||
|
||||
// Prefix with the node dir
|
||||
GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
|
||||
GPUUsedMemoryFileGlob = "mem_banks/*/used_memory"
|
||||
RocmStandardLocation = "/opt/rocm/lib"
|
||||
)
|
||||
|
||||
var (
|
||||
// Used to validate if the given ROCm lib is usable
|
||||
ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here...
|
||||
)
|
||||
|
||||
// Gather GPU information from the amdgpu driver if any supported GPUs are detected
|
||||
// HIP_VISIBLE_DEVICES will be set if we detect a mix of unsupported and supported devices
|
||||
// and the user hasn't already set this variable
|
||||
func AMDGetGPUInfo(resp *GpuInfo) {
|
||||
// TODO - DRY this out with windows
|
||||
if !AMDDetected() {
|
||||
return
|
||||
}
|
||||
skip := map[int]interface{}{}
|
||||
|
||||
// Opportunistic logging of driver version to aid in troubleshooting
|
||||
ver, err := AMDDriverVersion()
|
||||
if err == nil {
|
||||
slog.Info("AMD Driver: " + ver)
|
||||
} else {
|
||||
// TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
|
||||
slog.Warn(fmt.Sprintf("ollama recommends running the https://www.amd.com/en/support/linux-drivers: %s", err))
|
||||
}
|
||||
|
||||
// If the user has specified exactly which GPUs to use, look up their memory
|
||||
visibleDevices := os.Getenv("HIP_VISIBLE_DEVICES")
|
||||
if visibleDevices != "" {
|
||||
ids := []int{}
|
||||
for _, idStr := range strings.Split(visibleDevices, ",") {
|
||||
id, err := strconv.Atoi(idStr)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("malformed HIP_VISIBLE_DEVICES=%s %s", visibleDevices, err))
|
||||
} else {
|
||||
ids = append(ids, id)
|
||||
}
|
||||
}
|
||||
amdProcMemLookup(resp, nil, ids)
|
||||
return
|
||||
}
|
||||
|
||||
// Gather GFX version information from all detected cards
|
||||
gfx := AMDGFXVersions()
|
||||
verStrings := []string{}
|
||||
for i, v := range gfx {
|
||||
verStrings = append(verStrings, v.ToGFXString())
|
||||
if v.Major == 0 {
|
||||
// Silently skip CPUs
|
||||
skip[i] = struct{}{}
|
||||
continue
|
||||
}
|
||||
if v.Major < 9 {
|
||||
// TODO consider this a build-time setting if we can support 8xx family GPUs
|
||||
slog.Warn(fmt.Sprintf("amdgpu [%d] too old %s", i, v.ToGFXString()))
|
||||
skip[i] = struct{}{}
|
||||
}
|
||||
}
|
||||
slog.Info(fmt.Sprintf("detected amdgpu versions %v", verStrings))
|
||||
|
||||
// Abort if all GPUs are skipped
|
||||
if len(skip) >= len(gfx) {
|
||||
slog.Info("all detected amdgpus are skipped, falling back to CPU")
|
||||
return
|
||||
}
|
||||
|
||||
// If we got this far, then we have at least 1 GPU that's a ROCm candidate, so make sure we have a lib
|
||||
libDir, err := AMDValidateLibDir()
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("unable to verify rocm library, will use cpu: %s", err))
|
||||
return
|
||||
}
|
||||
|
||||
gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION")
|
||||
if gfxOverride == "" {
|
||||
supported, err := GetSupportedGFX(libDir)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to lookup supported GFX types, falling back to CPU mode: %s", err))
|
||||
return
|
||||
}
|
||||
slog.Debug(fmt.Sprintf("rocm supported GPU types %v", supported))
|
||||
|
||||
for i, v := range gfx {
|
||||
if !slices.Contains[[]string, string](supported, v.ToGFXString()) {
|
||||
slog.Warn(fmt.Sprintf("amdgpu [%d] %s is not supported by %s %v", i, v.ToGFXString(), libDir, supported))
|
||||
// TODO - consider discrete markdown just for ROCM troubleshooting?
|
||||
slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
|
||||
skip[i] = struct{}{}
|
||||
} else {
|
||||
slog.Info(fmt.Sprintf("amdgpu [%d] %s is supported", i, v.ToGFXString()))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride)
|
||||
}
|
||||
|
||||
if len(skip) >= len(gfx) {
|
||||
slog.Info("all detected amdgpus are skipped, falling back to CPU")
|
||||
return
|
||||
}
|
||||
|
||||
ids := make([]int, len(gfx))
|
||||
i := 0
|
||||
for k := range gfx {
|
||||
ids[i] = k
|
||||
i++
|
||||
}
|
||||
amdProcMemLookup(resp, skip, ids)
|
||||
if resp.memInfo.DeviceCount == 0 {
|
||||
return
|
||||
}
|
||||
if len(skip) > 0 {
|
||||
amdSetVisibleDevices(ids, skip)
|
||||
}
|
||||
}
|
||||
|
||||
// Walk the sysfs nodes for the available GPUs and gather information from them
|
||||
// skipping over any devices in the skip map
|
||||
func amdProcMemLookup(resp *GpuInfo, skip map[int]interface{}, ids []int) {
|
||||
resp.memInfo.DeviceCount = 0
|
||||
resp.memInfo.TotalMemory = 0
|
||||
resp.memInfo.FreeMemory = 0
|
||||
if len(ids) == 0 {
|
||||
slog.Debug("discovering all amdgpu devices")
|
||||
entries, err := os.ReadDir(AMDNodesSysfsDir)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to read amdgpu sysfs %s - %s", AMDNodesSysfsDir, err))
|
||||
return
|
||||
}
|
||||
for _, node := range entries {
|
||||
if !node.IsDir() {
|
||||
continue
|
||||
}
|
||||
id, err := strconv.Atoi(node.Name())
|
||||
if err != nil {
|
||||
slog.Warn("malformed amdgpu sysfs node id " + node.Name())
|
||||
continue
|
||||
}
|
||||
ids = append(ids, id)
|
||||
}
|
||||
}
|
||||
slog.Debug(fmt.Sprintf("discovering amdgpu devices %v", ids))
|
||||
|
||||
for _, id := range ids {
|
||||
if _, skipped := skip[id]; skipped {
|
||||
continue
|
||||
}
|
||||
totalMemory := uint64(0)
|
||||
usedMemory := uint64(0)
|
||||
propGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(id), GPUTotalMemoryFileGlob)
|
||||
propFiles, err := filepath.Glob(propGlob)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("error looking up total GPU memory: %s %s", propGlob, err))
|
||||
}
|
||||
// 1 or more memory banks - sum the values of all of them
|
||||
for _, propFile := range propFiles {
|
||||
fp, err := os.Open(propFile)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to open sysfs node file %s: %s", propFile, err))
|
||||
continue
|
||||
}
|
||||
defer fp.Close()
|
||||
scanner := bufio.NewScanner(fp)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(line, "size_in_bytes") {
|
||||
ver := strings.Fields(line)
|
||||
if len(ver) != 2 {
|
||||
slog.Warn("malformed " + line)
|
||||
continue
|
||||
}
|
||||
bankSizeInBytes, err := strconv.ParseUint(ver[1], 10, 64)
|
||||
if err != nil {
|
||||
slog.Warn("malformed int " + line)
|
||||
continue
|
||||
}
|
||||
totalMemory += bankSizeInBytes
|
||||
}
|
||||
}
|
||||
}
|
||||
if totalMemory == 0 {
|
||||
continue
|
||||
}
|
||||
usedGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(id), GPUUsedMemoryFileGlob)
|
||||
usedFiles, err := filepath.Glob(usedGlob)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("error looking up used GPU memory: %s %s", usedGlob, err))
|
||||
continue
|
||||
}
|
||||
for _, usedFile := range usedFiles {
|
||||
fp, err := os.Open(usedFile)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to open sysfs node file %s: %s", usedFile, err))
|
||||
continue
|
||||
}
|
||||
defer fp.Close()
|
||||
data, err := io.ReadAll(fp)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to read sysfs node file %s: %s", usedFile, err))
|
||||
continue
|
||||
}
|
||||
used, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("malformed used memory %s: %s", string(data), err))
|
||||
continue
|
||||
}
|
||||
usedMemory += used
|
||||
}
|
||||
slog.Info(fmt.Sprintf("[%d] amdgpu totalMemory %d", id, totalMemory))
|
||||
slog.Info(fmt.Sprintf("[%d] amdgpu freeMemory %d", id, (totalMemory - usedMemory)))
|
||||
resp.memInfo.DeviceCount++
|
||||
resp.memInfo.TotalMemory += totalMemory
|
||||
resp.memInfo.FreeMemory += (totalMemory - usedMemory)
|
||||
}
|
||||
if resp.memInfo.DeviceCount > 0 {
|
||||
resp.Library = "rocm"
|
||||
}
|
||||
}
|
||||
|
||||
// Quick check for AMD driver so we can skip amdgpu discovery if not present
|
||||
func AMDDetected() bool {
|
||||
// Some driver versions (older?) don't have a version file, so just lookup the parent dir
|
||||
sysfsDir := filepath.Dir(DriverVersionFile)
|
||||
_, err := os.Stat(sysfsDir)
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
slog.Debug("amdgpu driver not detected " + sysfsDir)
|
||||
return false
|
||||
} else if err != nil {
|
||||
slog.Debug(fmt.Sprintf("error looking up amd driver %s %s", sysfsDir, err))
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func setupLink(source, target string) error {
|
||||
if err := os.RemoveAll(target); err != nil {
|
||||
return fmt.Errorf("failed to remove old rocm directory %s %w", target, err)
|
||||
}
|
||||
if err := os.Symlink(source, target); err != nil {
|
||||
return fmt.Errorf("failed to create link %s => %s %w", source, target, err)
|
||||
}
|
||||
slog.Debug(fmt.Sprintf("host rocm linked %s => %s", source, target))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Ensure the AMD rocm lib dir is wired up
|
||||
// Prefer to use host installed ROCm, as long as it meets our minimum requirements
|
||||
// failing that, tell the user how to download it on their own
|
||||
func AMDValidateLibDir() (string, error) {
|
||||
// We rely on the rpath compiled into our library to find rocm
|
||||
// so we establish a symlink to wherever we find it on the system
|
||||
// to $AssetsDir/rocm
|
||||
|
||||
// If we already have a rocm dependency wired, nothing more to do
|
||||
assetsDir, err := AssetsDir()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to lookup lib dir: %w", err)
|
||||
}
|
||||
// Versioned directory
|
||||
rocmTargetDir := filepath.Join(assetsDir, "rocm")
|
||||
if rocmLibUsable(rocmTargetDir) {
|
||||
return rocmTargetDir, nil
|
||||
}
|
||||
// Parent dir (unversioned)
|
||||
commonRocmDir := filepath.Join(filepath.Dir(assetsDir), "rocm")
|
||||
if rocmLibUsable(commonRocmDir) {
|
||||
return rocmTargetDir, setupLink(commonRocmDir, rocmTargetDir)
|
||||
}
|
||||
|
||||
// Prefer explicit HIP env var
|
||||
hipPath := os.Getenv("HIP_PATH")
|
||||
if hipPath != "" {
|
||||
hipLibDir := filepath.Join(hipPath, "lib")
|
||||
if rocmLibUsable(hipLibDir) {
|
||||
slog.Debug("detected ROCM via HIP_PATH=" + hipPath)
|
||||
return rocmTargetDir, setupLink(hipLibDir, rocmTargetDir)
|
||||
}
|
||||
}
|
||||
|
||||
// Scan the library path for potential matches
|
||||
ldPaths := strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
|
||||
for _, ldPath := range ldPaths {
|
||||
d, err := filepath.Abs(ldPath)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if rocmLibUsable(d) {
|
||||
return rocmTargetDir, setupLink(d, rocmTargetDir)
|
||||
}
|
||||
}
|
||||
|
||||
// Well known location(s)
|
||||
if rocmLibUsable("/opt/rocm/lib") {
|
||||
return rocmTargetDir, setupLink("/opt/rocm/lib", rocmTargetDir)
|
||||
}
|
||||
err = os.MkdirAll(rocmTargetDir, 0755)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create empty rocm dir %s %w", rocmTargetDir, err)
|
||||
}
|
||||
|
||||
// If we still haven't found a usable rocm, the user will have to download it on their own
|
||||
slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or run the following")
|
||||
slog.Warn(fmt.Sprintf(curlMsg, version.Version, rocmTargetDir))
|
||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
||||
}
|
||||
|
||||
func AMDDriverVersion() (string, error) {
|
||||
_, err := os.Stat(DriverVersionFile)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("amdgpu version file missing: %s %w", DriverVersionFile, err)
|
||||
}
|
||||
fp, err := os.Open(DriverVersionFile)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer fp.Close()
|
||||
verString, err := io.ReadAll(fp)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return strings.TrimSpace(string(verString)), nil
|
||||
}
|
||||
|
||||
func AMDGFXVersions() map[int]Version {
|
||||
res := map[int]Version{}
|
||||
matches, _ := filepath.Glob(GPUPropertiesFileGlob)
|
||||
for _, match := range matches {
|
||||
fp, err := os.Open(match)
|
||||
if err != nil {
|
||||
slog.Debug(fmt.Sprintf("failed to open sysfs node file %s: %s", match, err))
|
||||
continue
|
||||
}
|
||||
defer fp.Close()
|
||||
i, err := strconv.Atoi(filepath.Base(filepath.Dir(match)))
|
||||
if err != nil {
|
||||
slog.Debug(fmt.Sprintf("failed to parse node ID %s", err))
|
||||
continue
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(fp)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(line, "gfx_target_version") {
|
||||
ver := strings.Fields(line)
|
||||
if len(ver) != 2 || len(ver[1]) < 5 {
|
||||
|
||||
if ver[1] == "0" {
|
||||
// Silently skip the CPU
|
||||
continue
|
||||
} else {
|
||||
slog.Debug("malformed " + line)
|
||||
}
|
||||
res[i] = Version{
|
||||
Major: 0,
|
||||
Minor: 0,
|
||||
Patch: 0,
|
||||
}
|
||||
continue
|
||||
}
|
||||
l := len(ver[1])
|
||||
patch, err1 := strconv.ParseUint(ver[1][l-2:l], 10, 32)
|
||||
minor, err2 := strconv.ParseUint(ver[1][l-4:l-2], 10, 32)
|
||||
major, err3 := strconv.ParseUint(ver[1][:l-4], 10, 32)
|
||||
if err1 != nil || err2 != nil || err3 != nil {
|
||||
slog.Debug("malformed int " + line)
|
||||
continue
|
||||
}
|
||||
|
||||
res[i] = Version{
|
||||
Major: uint(major),
|
||||
Minor: uint(minor),
|
||||
Patch: uint(patch),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func (v Version) ToGFXString() string {
|
||||
return fmt.Sprintf("gfx%d%d%d", v.Major, v.Minor, v.Patch)
|
||||
}
|
||||
190
gpu/amd_windows.go
Normal file
190
gpu/amd_windows.go
Normal file
@@ -0,0 +1,190 @@
|
||||
package gpu
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
RocmStandardLocation = "C:\\Program Files\\AMD\\ROCm\\5.7\\bin" // TODO glob?
|
||||
|
||||
// TODO We're lookinng for this exact name to detect iGPUs since hipGetDeviceProperties never reports integrated==true
|
||||
iGPUName = "AMD Radeon(TM) Graphics"
|
||||
)
|
||||
|
||||
var (
|
||||
// Used to validate if the given ROCm lib is usable
|
||||
ROCmLibGlobs = []string{"hipblas.dll", "rocblas"} // TODO - probably include more coverage of files here...
|
||||
)
|
||||
|
||||
func AMDGetGPUInfo(resp *GpuInfo) {
|
||||
hl, err := NewHipLib()
|
||||
if err != nil {
|
||||
slog.Debug(err.Error())
|
||||
return
|
||||
}
|
||||
defer hl.Release()
|
||||
skip := map[int]interface{}{}
|
||||
ids := []int{}
|
||||
resp.memInfo.DeviceCount = 0
|
||||
resp.memInfo.TotalMemory = 0
|
||||
resp.memInfo.FreeMemory = 0
|
||||
|
||||
ver, err := hl.AMDDriverVersion()
|
||||
if err == nil {
|
||||
slog.Info("AMD Driver: " + ver)
|
||||
} else {
|
||||
// For now this is benign, but we may eventually need to fail compatibility checks
|
||||
slog.Debug(fmt.Sprintf("error looking up amd driver version: %s", err))
|
||||
}
|
||||
|
||||
// Note: the HIP library automatically handles HIP_VISIBLE_DEVICES
|
||||
count := hl.HipGetDeviceCount()
|
||||
if count == 0 {
|
||||
return
|
||||
}
|
||||
libDir, err := AMDValidateLibDir()
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("unable to verify rocm library, will use cpu: %s", err))
|
||||
return
|
||||
}
|
||||
|
||||
var supported []string
|
||||
gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION")
|
||||
if gfxOverride == "" {
|
||||
supported, err = GetSupportedGFX(libDir)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to lookup supported GFX types, falling back to CPU mode: %s", err))
|
||||
return
|
||||
}
|
||||
} else {
|
||||
slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride)
|
||||
}
|
||||
|
||||
slog.Info(fmt.Sprintf("detected %d hip devices", count))
|
||||
for i := 0; i < count; i++ {
|
||||
ids = append(ids, i)
|
||||
err = hl.HipSetDevice(i)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("[%d] %s", i, err))
|
||||
skip[i] = struct{}{}
|
||||
continue
|
||||
}
|
||||
|
||||
props, err := hl.HipGetDeviceProperties(i)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("[%d] %s", i, err))
|
||||
skip[i] = struct{}{}
|
||||
continue
|
||||
}
|
||||
n := bytes.IndexByte(props.Name[:], 0)
|
||||
name := string(props.Name[:n])
|
||||
slog.Info(fmt.Sprintf("[%d] Name: %s", i, name))
|
||||
n = bytes.IndexByte(props.GcnArchName[:], 0)
|
||||
gfx := string(props.GcnArchName[:n])
|
||||
slog.Info(fmt.Sprintf("[%d] GcnArchName: %s", i, gfx))
|
||||
//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
|
||||
// TODO Why isn't props.iGPU accurate!?
|
||||
if strings.EqualFold(name, iGPUName) {
|
||||
slog.Info(fmt.Sprintf("iGPU detected [%d] skipping", i))
|
||||
skip[i] = struct{}{}
|
||||
continue
|
||||
}
|
||||
if gfxOverride == "" {
|
||||
if !slices.Contains[[]string, string](supported, gfx) {
|
||||
slog.Warn(fmt.Sprintf("amdgpu [%d] %s is not supported by %s %v", i, gfx, libDir, supported))
|
||||
// TODO - consider discrete markdown just for ROCM troubleshooting?
|
||||
slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
|
||||
skip[i] = struct{}{}
|
||||
continue
|
||||
} else {
|
||||
slog.Info(fmt.Sprintf("amdgpu [%d] %s is supported", i, gfx))
|
||||
}
|
||||
}
|
||||
|
||||
totalMemory, freeMemory, err := hl.HipMemGetInfo()
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("[%d] %s", i, err))
|
||||
continue
|
||||
}
|
||||
|
||||
// TODO according to docs, freeMem may lie on windows!
|
||||
slog.Info(fmt.Sprintf("[%d] Total Mem: %d", i, totalMemory))
|
||||
slog.Info(fmt.Sprintf("[%d] Free Mem: %d", i, freeMemory))
|
||||
resp.memInfo.DeviceCount++
|
||||
resp.memInfo.TotalMemory += totalMemory
|
||||
resp.memInfo.FreeMemory += freeMemory
|
||||
}
|
||||
if resp.memInfo.DeviceCount > 0 {
|
||||
resp.Library = "rocm"
|
||||
}
|
||||
// Abort if all GPUs are skipped
|
||||
if len(skip) >= count {
|
||||
slog.Info("all detected amdgpus are skipped, falling back to CPU")
|
||||
return
|
||||
}
|
||||
if len(skip) > 0 {
|
||||
amdSetVisibleDevices(ids, skip)
|
||||
}
|
||||
UpdatePath(libDir)
|
||||
}
|
||||
|
||||
func AMDValidateLibDir() (string, error) {
|
||||
// On windows non-admins typically can't create links
|
||||
// so instead of trying to rely on rpath and a link in
|
||||
// $LibDir/rocm, we instead rely on setting PATH to point
|
||||
// to the location of the ROCm library
|
||||
|
||||
// Installer payload location
|
||||
exe, err := os.Executable()
|
||||
if err == nil {
|
||||
rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm")
|
||||
if rocmLibUsable(rocmTargetDir) {
|
||||
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
|
||||
return rocmTargetDir, nil
|
||||
}
|
||||
}
|
||||
|
||||
// If we already have a rocm dependency wired, nothing more to do
|
||||
libDir, err := AssetsDir()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to lookup lib dir: %w", err)
|
||||
}
|
||||
rocmTargetDir := filepath.Join(libDir, "rocm")
|
||||
if rocmLibUsable(rocmTargetDir) {
|
||||
return rocmTargetDir, nil
|
||||
}
|
||||
|
||||
// Prefer explicit HIP env var
|
||||
hipPath := os.Getenv("HIP_PATH")
|
||||
if hipPath != "" {
|
||||
hipLibDir := filepath.Join(hipPath, "bin")
|
||||
if rocmLibUsable(hipLibDir) {
|
||||
slog.Debug("detected ROCM via HIP_PATH=" + hipPath)
|
||||
return hipLibDir, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Well known location(s)
|
||||
if rocmLibUsable(RocmStandardLocation) {
|
||||
return RocmStandardLocation, nil
|
||||
}
|
||||
|
||||
// Installer payload (if we're running from some other location)
|
||||
localAppData := os.Getenv("LOCALAPPDATA")
|
||||
appDir := filepath.Join(localAppData, "Programs", "Ollama")
|
||||
rocmTargetDir = filepath.Join(appDir, "rocm")
|
||||
if rocmLibUsable(rocmTargetDir) {
|
||||
slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
|
||||
return rocmTargetDir, nil
|
||||
}
|
||||
|
||||
// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
|
||||
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm v6")
|
||||
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
|
||||
}
|
||||
60
gpu/assets.go
Normal file
60
gpu/assets.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package gpu
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/jmorganca/ollama/version"
|
||||
)
|
||||
|
||||
func AssetsDir() (string, error) {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
baseDir := filepath.Join(home, ".ollama", "assets")
|
||||
libDirs, err := os.ReadDir(baseDir)
|
||||
if err == nil {
|
||||
for _, d := range libDirs {
|
||||
if d.Name() == version.Version {
|
||||
continue
|
||||
}
|
||||
// Special case the rocm dependencies, which are handled by the installer
|
||||
if d.Name() == "rocm" {
|
||||
continue
|
||||
}
|
||||
slog.Debug("stale lib detected, cleaning up " + d.Name())
|
||||
err = os.RemoveAll(filepath.Join(baseDir, d.Name()))
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("unable to clean up stale library %s: %s", filepath.Join(baseDir, d.Name()), err))
|
||||
}
|
||||
}
|
||||
}
|
||||
return filepath.Join(baseDir, version.Version), nil
|
||||
}
|
||||
|
||||
func UpdatePath(dir string) {
|
||||
if runtime.GOOS == "windows" {
|
||||
tmpDir := filepath.Dir(dir)
|
||||
pathComponents := strings.Split(os.Getenv("PATH"), ";")
|
||||
i := 0
|
||||
for _, comp := range pathComponents {
|
||||
if strings.EqualFold(comp, dir) {
|
||||
return
|
||||
}
|
||||
// Remove any other prior paths to our temp dir
|
||||
if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) {
|
||||
pathComponents[i] = comp
|
||||
i++
|
||||
}
|
||||
}
|
||||
newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
|
||||
slog.Info(fmt.Sprintf("Updating PATH to %s", newPath))
|
||||
os.Setenv("PATH", newPath)
|
||||
}
|
||||
// linux and darwin rely on rpath
|
||||
}
|
||||
117
gpu/gpu.go
117
gpu/gpu.go
@@ -24,7 +24,6 @@ import (
|
||||
|
||||
type handles struct {
|
||||
cuda *C.cuda_handle_t
|
||||
rocm *C.rocm_handle_t
|
||||
}
|
||||
|
||||
var gpuMutex sync.Mutex
|
||||
@@ -54,39 +53,23 @@ var CudaWindowsGlobs = []string{
|
||||
"c:\\Windows\\System32\\nvml.dll",
|
||||
}
|
||||
|
||||
var RocmLinuxGlobs = []string{
|
||||
"/opt/rocm*/lib*/librocm_smi64.so*",
|
||||
}
|
||||
|
||||
var RocmWindowsGlobs = []string{
|
||||
"c:\\Windows\\System32\\rocm_smi64.dll",
|
||||
}
|
||||
|
||||
// Note: gpuMutex must already be held
|
||||
func initGPUHandles() {
|
||||
|
||||
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
||||
|
||||
gpuHandles = &handles{nil, nil}
|
||||
gpuHandles = &handles{nil}
|
||||
var cudaMgmtName string
|
||||
var cudaMgmtPatterns []string
|
||||
var rocmMgmtName string
|
||||
var rocmMgmtPatterns []string
|
||||
switch runtime.GOOS {
|
||||
case "windows":
|
||||
cudaMgmtName = "nvml.dll"
|
||||
cudaMgmtPatterns = make([]string, len(CudaWindowsGlobs))
|
||||
copy(cudaMgmtPatterns, CudaWindowsGlobs)
|
||||
rocmMgmtName = "rocm_smi64.dll"
|
||||
rocmMgmtPatterns = make([]string, len(RocmWindowsGlobs))
|
||||
copy(rocmMgmtPatterns, RocmWindowsGlobs)
|
||||
case "linux":
|
||||
cudaMgmtName = "libnvidia-ml.so"
|
||||
cudaMgmtPatterns = make([]string, len(CudaLinuxGlobs))
|
||||
copy(cudaMgmtPatterns, CudaLinuxGlobs)
|
||||
rocmMgmtName = "librocm_smi64.so"
|
||||
rocmMgmtPatterns = make([]string, len(RocmLinuxGlobs))
|
||||
copy(rocmMgmtPatterns, RocmLinuxGlobs)
|
||||
default:
|
||||
return
|
||||
}
|
||||
@@ -101,16 +84,6 @@ func initGPUHandles() {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
rocmLibPaths := FindGPULibs(rocmMgmtName, rocmMgmtPatterns)
|
||||
if len(rocmLibPaths) > 0 {
|
||||
rocm := LoadROCMMgmt(rocmLibPaths)
|
||||
if rocm != nil {
|
||||
slog.Info("Radeon GPU detected")
|
||||
gpuHandles.rocm = rocm
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func GetGPUInfo() GpuInfo {
|
||||
@@ -149,66 +122,10 @@ func GetGPUInfo() GpuInfo {
|
||||
slog.Info(fmt.Sprintf("CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor))
|
||||
}
|
||||
}
|
||||
} else if AMDDetected() && gpuHandles.rocm != nil && (cpuVariant != "" || runtime.GOARCH != "amd64") {
|
||||
ver, err := AMDDriverVersion()
|
||||
if err == nil {
|
||||
slog.Info("AMD Driver: " + ver)
|
||||
} else {
|
||||
// For now this is benign, but we may eventually need to fail compatibility checks
|
||||
slog.Debug("error looking up amd driver version: %s", err)
|
||||
}
|
||||
gfx := AMDGFXVersions()
|
||||
tooOld := false
|
||||
for _, v := range gfx {
|
||||
if v.Major < 9 {
|
||||
slog.Info("AMD GPU too old, falling back to CPU " + v.ToGFXString())
|
||||
tooOld = true
|
||||
break
|
||||
}
|
||||
|
||||
// TODO - remap gfx strings for unsupporetd minor/patch versions to supported for the same major
|
||||
// e.g. gfx1034 works if we map it to gfx1030 at runtime
|
||||
|
||||
}
|
||||
if !tooOld {
|
||||
// TODO - this algo can be shifted over to use sysfs instead of the rocm info library...
|
||||
C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
|
||||
if memInfo.err != nil {
|
||||
slog.Info(fmt.Sprintf("error looking up ROCm GPU memory: %s", C.GoString(memInfo.err)))
|
||||
C.free(unsafe.Pointer(memInfo.err))
|
||||
} else if memInfo.igpu_index >= 0 && memInfo.count == 1 {
|
||||
// Only one GPU detected and it appears to be an integrated GPU - skip it
|
||||
slog.Info("ROCm unsupported integrated GPU detected")
|
||||
} else if memInfo.count > 0 {
|
||||
if memInfo.igpu_index >= 0 {
|
||||
// We have multiple GPUs reported, and one of them is an integrated GPU
|
||||
// so we have to set the env var to bypass it
|
||||
// If the user has specified their own ROCR_VISIBLE_DEVICES, don't clobber it
|
||||
val := os.Getenv("ROCR_VISIBLE_DEVICES")
|
||||
if val == "" {
|
||||
devices := []string{}
|
||||
for i := 0; i < int(memInfo.count); i++ {
|
||||
if i == int(memInfo.igpu_index) {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, strconv.Itoa(i))
|
||||
}
|
||||
val = strings.Join(devices, ",")
|
||||
os.Setenv("ROCR_VISIBLE_DEVICES", val)
|
||||
}
|
||||
slog.Info(fmt.Sprintf("ROCm integrated GPU detected - ROCR_VISIBLE_DEVICES=%s", val))
|
||||
}
|
||||
resp.Library = "rocm"
|
||||
var version C.rocm_version_resp_t
|
||||
C.rocm_get_version(*gpuHandles.rocm, &version)
|
||||
verString := C.GoString(version.str)
|
||||
if version.status == 0 {
|
||||
resp.Variant = "v" + verString
|
||||
} else {
|
||||
slog.Info(fmt.Sprintf("failed to look up ROCm version: %s", verString))
|
||||
}
|
||||
C.free(unsafe.Pointer(version.str))
|
||||
}
|
||||
AMDGetGPUInfo(&resp)
|
||||
if resp.Library != "" {
|
||||
return resp
|
||||
}
|
||||
}
|
||||
if resp.Library == "" {
|
||||
@@ -242,6 +159,15 @@ func getCPUMem() (memInfo, error) {
|
||||
}
|
||||
|
||||
func CheckVRAM() (int64, error) {
|
||||
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
|
||||
if userLimit != "" {
|
||||
avail, err := strconv.ParseInt(userLimit, 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
|
||||
}
|
||||
slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
|
||||
return avail, nil
|
||||
}
|
||||
gpuInfo := GetGPUInfo()
|
||||
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
|
||||
// leave 10% or 1024MiB of VRAM free per GPU to handle unaccounted for overhead
|
||||
@@ -329,23 +255,6 @@ func LoadCUDAMgmt(cudaLibPaths []string) *C.cuda_handle_t {
|
||||
return nil
|
||||
}
|
||||
|
||||
func LoadROCMMgmt(rocmLibPaths []string) *C.rocm_handle_t {
|
||||
var resp C.rocm_init_resp_t
|
||||
resp.rh.verbose = getVerboseState()
|
||||
for _, libPath := range rocmLibPaths {
|
||||
lib := C.CString(libPath)
|
||||
defer C.free(unsafe.Pointer(lib))
|
||||
C.rocm_init(lib, &resp)
|
||||
if resp.err != nil {
|
||||
slog.Info(fmt.Sprintf("Unable to load ROCm management library %s: %s", libPath, C.GoString(resp.err)))
|
||||
C.free(unsafe.Pointer(resp.err))
|
||||
} else {
|
||||
return &resp.rh
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getVerboseState() C.uint16_t {
|
||||
if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
|
||||
return C.uint16_t(1)
|
||||
|
||||
@@ -2,32 +2,38 @@
|
||||
|
||||
package gpu
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -x objective-c
|
||||
#cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal
|
||||
#include "gpu_info_darwin.h"
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/pbnjay/memory"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
|
||||
func CheckVRAM() (int64, error) {
|
||||
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
|
||||
if userLimit != "" {
|
||||
avail, err := strconv.ParseInt(userLimit, 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
|
||||
}
|
||||
slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
|
||||
return avail, nil
|
||||
}
|
||||
|
||||
if runtime.GOARCH == "amd64" {
|
||||
// gpu not supported, this may not be metal
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// on macOS, there's already buffer for available vram (see below) so just return the total
|
||||
systemMemory := int64(memory.TotalMemory())
|
||||
|
||||
// macOS limits how much memory is available to the GPU based on the amount of system memory
|
||||
// TODO: handle case where iogpu.wired_limit_mb is set to a higher value
|
||||
if systemMemory <= 36*1024*1024*1024 {
|
||||
systemMemory = systemMemory * 2 / 3
|
||||
} else {
|
||||
systemMemory = systemMemory * 3 / 4
|
||||
}
|
||||
|
||||
return systemMemory, nil
|
||||
recommendedMaxVRAM := int64(C.getRecommendedMaxVRAM())
|
||||
return recommendedMaxVRAM, nil
|
||||
}
|
||||
|
||||
func GetGPUInfo() GpuInfo {
|
||||
|
||||
@@ -53,7 +53,6 @@ void cpu_check_ram(mem_info_t *resp);
|
||||
#endif
|
||||
|
||||
#include "gpu_info_cuda.h"
|
||||
#include "gpu_info_rocm.h"
|
||||
|
||||
#endif // __GPU_INFO_H__
|
||||
#endif // __APPLE__
|
||||
@@ -124,31 +124,31 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
|
||||
// When in verbose mode, report more information about
|
||||
// the card we discover, but don't fail on error
|
||||
ret = (*h.nvmlDeviceGetName)(device, buf, buflen);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
if (ret != NVML_SUCCESS) {
|
||||
LOG(h.verbose, "nvmlDeviceGetName failed: %d\n", ret);
|
||||
} else {
|
||||
LOG(h.verbose, "[%d] CUDA device name: %s\n", i, buf);
|
||||
}
|
||||
ret = (*h.nvmlDeviceGetBoardPartNumber)(device, buf, buflen);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
if (ret != NVML_SUCCESS) {
|
||||
LOG(h.verbose, "nvmlDeviceGetBoardPartNumber failed: %d\n", ret);
|
||||
} else {
|
||||
LOG(h.verbose, "[%d] CUDA part number: %s\n", i, buf);
|
||||
}
|
||||
ret = (*h.nvmlDeviceGetSerial)(device, buf, buflen);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
if (ret != NVML_SUCCESS) {
|
||||
LOG(h.verbose, "nvmlDeviceGetSerial failed: %d\n", ret);
|
||||
} else {
|
||||
LOG(h.verbose, "[%d] CUDA S/N: %s\n", i, buf);
|
||||
}
|
||||
ret = (*h.nvmlDeviceGetVbiosVersion)(device, buf, buflen);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
if (ret != NVML_SUCCESS) {
|
||||
LOG(h.verbose, "nvmlDeviceGetVbiosVersion failed: %d\n", ret);
|
||||
} else {
|
||||
LOG(h.verbose, "[%d] CUDA vbios version: %s\n", i, buf);
|
||||
}
|
||||
ret = (*h.nvmlDeviceGetBrand)(device, &brand);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
if (ret != NVML_SUCCESS) {
|
||||
LOG(h.verbose, "nvmlDeviceGetBrand failed: %d\n", ret);
|
||||
} else {
|
||||
LOG(h.verbose, "[%d] CUDA brand: %d\n", i, brand);
|
||||
@@ -156,7 +156,7 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
|
||||
}
|
||||
|
||||
LOG(h.verbose, "[%d] CUDA totalMem %ld\n", i, memInfo.total);
|
||||
LOG(h.verbose, "[%d] CUDA usedMem %ld\n", i, memInfo.free);
|
||||
LOG(h.verbose, "[%d] CUDA usedMem %ld\n", i, memInfo.used);
|
||||
|
||||
resp->total += memInfo.total;
|
||||
resp->free += memInfo.free;
|
||||
|
||||
3
gpu/gpu_info_darwin.h
Normal file
3
gpu/gpu_info_darwin.h
Normal file
@@ -0,0 +1,3 @@
|
||||
#import <Metal/Metal.h>
|
||||
#include <stdint.h>
|
||||
uint64_t getRecommendedMaxVRAM();
|
||||
11
gpu/gpu_info_darwin.m
Normal file
11
gpu/gpu_info_darwin.m
Normal file
@@ -0,0 +1,11 @@
|
||||
//go:build darwin
|
||||
#include "gpu_info_darwin.h"
|
||||
|
||||
uint64_t getRecommendedMaxVRAM()
|
||||
{
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
uint64_t result = device.recommendedMaxWorkingSetSize;
|
||||
CFRelease(device);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1,198 +0,0 @@
|
||||
#ifndef __APPLE__
|
||||
|
||||
#include "gpu_info_rocm.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) {
|
||||
rsmi_status_t ret;
|
||||
resp->err = NULL;
|
||||
const int buflen = 256;
|
||||
char buf[buflen + 1];
|
||||
int i;
|
||||
struct lookup {
|
||||
char *s;
|
||||
void **p;
|
||||
} l[] = {
|
||||
{"rsmi_init", (void *)&resp->rh.rsmi_init},
|
||||
{"rsmi_shut_down", (void *)&resp->rh.rsmi_shut_down},
|
||||
{"rsmi_dev_memory_total_get", (void *)&resp->rh.rsmi_dev_memory_total_get},
|
||||
{"rsmi_dev_memory_usage_get", (void *)&resp->rh.rsmi_dev_memory_usage_get},
|
||||
{"rsmi_version_get", (void *)&resp->rh.rsmi_version_get},
|
||||
{"rsmi_num_monitor_devices", (void*)&resp->rh.rsmi_num_monitor_devices},
|
||||
{"rsmi_dev_id_get", (void*)&resp->rh.rsmi_dev_id_get},
|
||||
{"rsmi_dev_name_get", (void *)&resp->rh.rsmi_dev_name_get},
|
||||
{"rsmi_dev_brand_get", (void *)&resp->rh.rsmi_dev_brand_get},
|
||||
{"rsmi_dev_vendor_name_get", (void *)&resp->rh.rsmi_dev_vendor_name_get},
|
||||
{"rsmi_dev_vram_vendor_get", (void *)&resp->rh.rsmi_dev_vram_vendor_get},
|
||||
{"rsmi_dev_serial_number_get", (void *)&resp->rh.rsmi_dev_serial_number_get},
|
||||
{"rsmi_dev_subsystem_name_get", (void *)&resp->rh.rsmi_dev_subsystem_name_get},
|
||||
{"rsmi_dev_vbios_version_get", (void *)&resp->rh.rsmi_dev_vbios_version_get},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
resp->rh.handle = LOAD_LIBRARY(rocm_lib_path, RTLD_LAZY);
|
||||
if (!resp->rh.handle) {
|
||||
char *msg = LOAD_ERR();
|
||||
snprintf(buf, buflen,
|
||||
"Unable to load %s library to query for Radeon GPUs: %s\n",
|
||||
rocm_lib_path, msg);
|
||||
free(msg);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO once we've squashed the remaining corner cases remove this log
|
||||
LOG(resp->rh.verbose, "wiring rocm management library functions in %s\n", rocm_lib_path);
|
||||
|
||||
for (i = 0; l[i].s != NULL; i++) {
|
||||
// TODO once we've squashed the remaining corner cases remove this log
|
||||
LOG(resp->rh.verbose, "dlsym: %s\n", l[i].s);
|
||||
|
||||
*l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s);
|
||||
if (!l[i].p) {
|
||||
resp->rh.handle = NULL;
|
||||
char *msg = LOAD_ERR();
|
||||
LOG(resp->rh.verbose, "dlerr: %s\n", msg);
|
||||
UNLOAD_LIBRARY(resp->rh.handle);
|
||||
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
|
||||
msg);
|
||||
free(msg);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
ret = (*resp->rh.rsmi_init)(0);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
LOG(resp->rh.verbose, "rsmi_init err: %d\n", ret);
|
||||
UNLOAD_LIBRARY(resp->rh.handle);
|
||||
resp->rh.handle = NULL;
|
||||
snprintf(buf, buflen, "rocm vram init failure: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
|
||||
resp->err = NULL;
|
||||
resp->igpu_index = -1;
|
||||
uint64_t totalMem = 0;
|
||||
uint64_t usedMem = 0;
|
||||
rsmi_status_t ret;
|
||||
const int buflen = 256;
|
||||
char buf[buflen + 1];
|
||||
int i;
|
||||
|
||||
if (h.handle == NULL) {
|
||||
resp->err = strdup("rocm handle not initialized");
|
||||
return;
|
||||
}
|
||||
|
||||
ret = (*h.rsmi_num_monitor_devices)(&resp->count);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
snprintf(buf, buflen, "unable to get device count: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
LOG(h.verbose, "discovered %d ROCm GPU Devices\n", resp->count);
|
||||
|
||||
resp->total = 0;
|
||||
resp->free = 0;
|
||||
for (i = 0; i < resp->count; i++) {
|
||||
if (h.verbose) {
|
||||
// When in verbose mode, report more information about
|
||||
// the card we discover, but don't fail on error
|
||||
ret = (*h.rsmi_dev_name_get)(i, buf, buflen);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
LOG(h.verbose, "rsmi_dev_name_get failed: %d\n", ret);
|
||||
} else {
|
||||
LOG(h.verbose, "[%d] ROCm device name: %s\n", i, buf);
|
||||
}
|
||||
ret = (*h.rsmi_dev_brand_get)(i, buf, buflen);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
LOG(h.verbose, "rsmi_dev_brand_get failed: %d\n", ret);
|
||||
} else {
|
||||
LOG(h.verbose, "[%d] ROCm brand: %s\n", i, buf);
|
||||
}
|
||||
ret = (*h.rsmi_dev_vendor_name_get)(i, buf, buflen);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
LOG(h.verbose, "rsmi_dev_vendor_name_get failed: %d\n", ret);
|
||||
} else {
|
||||
LOG(h.verbose, "[%d] ROCm vendor: %s\n", i, buf);
|
||||
}
|
||||
ret = (*h.rsmi_dev_vram_vendor_get)(i, buf, buflen);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
LOG(h.verbose, "rsmi_dev_vram_vendor_get failed: %d\n", ret);
|
||||
} else {
|
||||
LOG(h.verbose, "[%d] ROCm VRAM vendor: %s\n", i, buf);
|
||||
}
|
||||
ret = (*h.rsmi_dev_serial_number_get)(i, buf, buflen);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
LOG(h.verbose, "rsmi_dev_serial_number_get failed: %d\n", ret);
|
||||
} else {
|
||||
LOG(h.verbose, "[%d] ROCm S/N: %s\n", i, buf);
|
||||
}
|
||||
ret = (*h.rsmi_dev_subsystem_name_get)(i, buf, buflen);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
LOG(h.verbose, "rsmi_dev_subsystem_name_get failed: %d\n", ret);
|
||||
} else {
|
||||
LOG(h.verbose, "[%d] ROCm subsystem name: %s\n", i, buf);
|
||||
}
|
||||
ret = (*h.rsmi_dev_vbios_version_get)(i, buf, buflen);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
LOG(h.verbose, "rsmi_dev_vbios_version_get failed: %d\n", ret);
|
||||
} else {
|
||||
LOG(h.verbose, "[%d] ROCm vbios version: %s\n", i, buf);
|
||||
}
|
||||
}
|
||||
|
||||
// Get total memory - used memory for available memory
|
||||
ret = (*h.rsmi_dev_memory_total_get)(i, RSMI_MEM_TYPE_VRAM, &totalMem);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
snprintf(buf, buflen, "rocm total mem lookup failure: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
ret = (*h.rsmi_dev_memory_usage_get)(i, RSMI_MEM_TYPE_VRAM, &usedMem);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
snprintf(buf, buflen, "rocm usage mem lookup failure: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
LOG(h.verbose, "[%d] ROCm totalMem %ld\n", i, totalMem);
|
||||
LOG(h.verbose, "[%d] ROCm usedMem %ld\n", i, usedMem);
|
||||
if (totalMem < 1024 * 1024 * 1024) {
|
||||
// Do not add up integrated GPU memory capacity, it's a bogus 512M, and actually uses system memory
|
||||
LOG(h.verbose, "[%d] ROCm integrated GPU\n", i);
|
||||
resp->igpu_index = i;
|
||||
} else {
|
||||
resp->total += totalMem;
|
||||
resp->free += totalMem - usedMem;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rocm_get_version(rocm_handle_t h, rocm_version_resp_t *resp) {
|
||||
const int buflen = 256;
|
||||
char buf[buflen + 1];
|
||||
if (h.handle == NULL) {
|
||||
resp->str = strdup("rocm handle not initialized");
|
||||
resp->status = 1;
|
||||
return;
|
||||
}
|
||||
rsmi_version_t ver;
|
||||
rsmi_status_t ret;
|
||||
ret = h.rsmi_version_get(&ver);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
snprintf(buf, buflen, "unexpected response on version lookup %d", ret);
|
||||
resp->status = 1;
|
||||
} else {
|
||||
snprintf(buf, buflen, "%d", ver.major);
|
||||
resp->status = 0;
|
||||
}
|
||||
resp->str = strdup(buf);
|
||||
}
|
||||
|
||||
#endif // __APPLE__
|
||||
@@ -1,59 +0,0 @@
|
||||
#ifndef __APPLE__
|
||||
#ifndef __GPU_INFO_ROCM_H__
|
||||
#define __GPU_INFO_ROCM_H__
|
||||
#include "gpu_info.h"
|
||||
|
||||
// Just enough typedef's to dlopen/dlsym for memory information
|
||||
typedef enum rsmi_status_return {
|
||||
RSMI_STATUS_SUCCESS = 0,
|
||||
// Other values omitted for now...
|
||||
} rsmi_status_t;
|
||||
|
||||
typedef enum rsmi_memory_type {
|
||||
RSMI_MEM_TYPE_VRAM = 0,
|
||||
RSMI_MEM_TYPE_VIS_VRAM,
|
||||
RSMI_MEM_TYPE_GTT,
|
||||
} rsmi_memory_type_t;
|
||||
|
||||
typedef struct {
|
||||
uint32_t major;
|
||||
uint32_t minor;
|
||||
uint32_t patch;
|
||||
const char *build;
|
||||
} rsmi_version_t;
|
||||
|
||||
typedef struct rocm_handle {
|
||||
void *handle;
|
||||
uint16_t verbose;
|
||||
rsmi_status_t (*rsmi_init)(uint64_t);
|
||||
rsmi_status_t (*rsmi_shut_down)(void);
|
||||
rsmi_status_t (*rsmi_dev_memory_total_get)(uint32_t, rsmi_memory_type_t, uint64_t *);
|
||||
rsmi_status_t (*rsmi_dev_memory_usage_get)(uint32_t, rsmi_memory_type_t, uint64_t *);
|
||||
rsmi_status_t (*rsmi_version_get) (rsmi_version_t *version);
|
||||
rsmi_status_t (*rsmi_num_monitor_devices) (uint32_t *);
|
||||
rsmi_status_t (*rsmi_dev_id_get)(uint32_t, uint16_t *);
|
||||
rsmi_status_t (*rsmi_dev_name_get) (uint32_t,char *,size_t);
|
||||
rsmi_status_t (*rsmi_dev_brand_get) (uint32_t, char *, uint32_t);
|
||||
rsmi_status_t (*rsmi_dev_vendor_name_get) (uint32_t, char *, uint32_t);
|
||||
rsmi_status_t (*rsmi_dev_vram_vendor_get) (uint32_t, char *, uint32_t);
|
||||
rsmi_status_t (*rsmi_dev_serial_number_get) (uint32_t, char *, uint32_t);
|
||||
rsmi_status_t (*rsmi_dev_subsystem_name_get) (uint32_t, char *, uint32_t);
|
||||
rsmi_status_t (*rsmi_dev_vbios_version_get) (uint32_t, char *, uint32_t);
|
||||
} rocm_handle_t;
|
||||
|
||||
typedef struct rocm_init_resp {
|
||||
char *err; // If err is non-null handle is invalid
|
||||
rocm_handle_t rh;
|
||||
} rocm_init_resp_t;
|
||||
|
||||
typedef struct rocm_version_resp {
|
||||
rsmi_status_t status;
|
||||
char *str; // Contains version or error string if status != 0
|
||||
} rocm_version_resp_t;
|
||||
|
||||
void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp);
|
||||
void rocm_check_vram(rocm_handle_t rh, mem_info_t *resp);
|
||||
void rocm_get_version(rocm_handle_t rh, rocm_version_resp_t *resp);
|
||||
|
||||
#endif // __GPU_INFO_ROCM_H__
|
||||
#endif // __APPLE__
|
||||
@@ -14,17 +14,14 @@
|
||||
#define LOAD_LIBRARY(lib, flags) LoadLibrary(lib)
|
||||
#define LOAD_SYMBOL(handle, sym) GetProcAddress(handle, sym)
|
||||
#define UNLOAD_LIBRARY(handle) FreeLibrary(handle)
|
||||
inline char *LOAD_ERR() {
|
||||
LPSTR messageBuffer = NULL;
|
||||
size_t size = FormatMessageA(
|
||||
FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
|
||||
FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||
NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||
(LPSTR)&messageBuffer, 0, NULL);
|
||||
char *resp = strdup(messageBuffer);
|
||||
LocalFree(messageBuffer);
|
||||
return resp;
|
||||
}
|
||||
#define LOAD_ERR() ({\
|
||||
LPSTR messageBuffer = NULL; \
|
||||
size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, \
|
||||
NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); \
|
||||
char *resp = strdup(messageBuffer); \
|
||||
LocalFree(messageBuffer); \
|
||||
resp; \
|
||||
})
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags)
|
||||
|
||||
@@ -28,13 +28,13 @@ import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/gpu"
|
||||
)
|
||||
|
||||
type dynExtServer struct {
|
||||
@@ -72,7 +72,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
|
||||
slog.Info("concurrent llm servers not yet supported, waiting for prior server to complete")
|
||||
mutex.Lock()
|
||||
}
|
||||
updatePath(filepath.Dir(library))
|
||||
gpu.UpdatePath(filepath.Dir(library))
|
||||
libPath := C.CString(library)
|
||||
defer C.free(unsafe.Pointer(libPath))
|
||||
resp := newExtServerResp(512)
|
||||
@@ -106,7 +106,12 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
|
||||
sparams.memory_f16 = C.bool(opts.F16KV)
|
||||
sparams.use_mlock = C.bool(opts.UseMLock)
|
||||
sparams.use_mmap = C.bool(opts.UseMMap)
|
||||
sparams.numa = C.bool(opts.UseNUMA)
|
||||
|
||||
if opts.UseNUMA {
|
||||
sparams.numa = C.int(1)
|
||||
} else {
|
||||
sparams.numa = C.int(0)
|
||||
}
|
||||
|
||||
sparams.lora_adapters = nil
|
||||
for i := 0; i < len(adapters); i++ {
|
||||
@@ -143,6 +148,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
|
||||
}
|
||||
|
||||
slog.Info("Initializing llama server")
|
||||
slog.Debug(fmt.Sprintf("server params: %+v", sparams))
|
||||
initResp := newExtServerResp(128)
|
||||
defer freeExtServerResp(initResp)
|
||||
C.dyn_llama_server_init(llm.s, &sparams, &initResp)
|
||||
@@ -360,25 +366,3 @@ func (llm *dynExtServer) Close() {
|
||||
C.dyn_llama_server_stop(llm.s)
|
||||
mutex.Unlock()
|
||||
}
|
||||
|
||||
func updatePath(dir string) {
|
||||
if runtime.GOOS == "windows" {
|
||||
tmpDir := filepath.Dir(dir)
|
||||
pathComponents := strings.Split(os.Getenv("PATH"), ";")
|
||||
i := 0
|
||||
for _, comp := range pathComponents {
|
||||
if strings.EqualFold(comp, dir) {
|
||||
return
|
||||
}
|
||||
// Remove any other prior paths to our temp dir
|
||||
if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) {
|
||||
pathComponents[i] = comp
|
||||
i++
|
||||
}
|
||||
}
|
||||
newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
|
||||
slog.Info(fmt.Sprintf("Updating PATH to %s", newPath))
|
||||
os.Setenv("PATH", newPath)
|
||||
}
|
||||
// linux and darwin rely on rpath
|
||||
}
|
||||
|
||||
@@ -80,7 +80,7 @@ void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) {
|
||||
params.main_gpu = sparams->main_gpu;
|
||||
params.use_mlock = sparams->use_mlock;
|
||||
params.use_mmap = sparams->use_mmap;
|
||||
params.numa = sparams->numa;
|
||||
params.numa = (ggml_numa_strategy)sparams->numa;
|
||||
params.embedding = sparams->embedding;
|
||||
if (sparams->model != NULL) {
|
||||
params.model = sparams->model;
|
||||
@@ -111,7 +111,8 @@ void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) {
|
||||
}
|
||||
#endif
|
||||
|
||||
llama_backend_init(params.numa);
|
||||
llama_backend_init();
|
||||
llama_numa_init(params.numa);
|
||||
|
||||
// load the model
|
||||
if (!llama->load_model(params)) {
|
||||
@@ -146,8 +147,8 @@ void llama_server_start() {
|
||||
&llama_server_context::process_single_task, llama, std::placeholders::_1));
|
||||
llama->queue_tasks.on_finish_multitask(std::bind(
|
||||
&llama_server_context::on_finish_multitask, llama, std::placeholders::_1));
|
||||
llama->queue_tasks.on_all_tasks_finished(std::bind(
|
||||
&llama_server_context::run_on_all_tasks_finished, llama));
|
||||
llama->queue_tasks.on_run_slots(std::bind(
|
||||
&llama_server_context::update_slots, llama));
|
||||
llama->queue_results.on_multitask_update(std::bind(
|
||||
&llama_server_queue::update_multitask,
|
||||
&llama->queue_tasks,
|
||||
@@ -208,7 +209,6 @@ void llama_server_completion(const char *json_req, ext_server_resp_t *resp) {
|
||||
void llama_server_completion_next_result(const int task_id,
|
||||
ext_server_task_result_t *resp) {
|
||||
assert(llama != NULL && resp != NULL);
|
||||
std::string msg;
|
||||
resp->id = -1;
|
||||
resp->stop = false;
|
||||
resp->error = false;
|
||||
|
||||
@@ -41,7 +41,7 @@ typedef struct ext_server_params {
|
||||
int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
||||
bool use_mlock; // force system to keep model in RAM
|
||||
bool use_mmap; // use mmap if possible
|
||||
bool numa; // attempt optimizations that help on some NUMA systems
|
||||
int numa; // attempt optimizations that help on some NUMA systems
|
||||
bool embedding; // get only sentence embedding
|
||||
ext_server_lora_adapter_t *lora_adapters;
|
||||
char *mmproj;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# common logic accross linux and darwin
|
||||
# common logic across linux and darwin
|
||||
|
||||
init_vars() {
|
||||
case "${GOARCH}" in
|
||||
@@ -101,7 +101,7 @@ compress_libs() {
|
||||
pids=""
|
||||
rm -rf ${BUILD_DIR}/lib/*.${LIB_EXT}*.gz
|
||||
for lib in ${BUILD_DIR}/lib/*.${LIB_EXT}* ; do
|
||||
gzip --best -f ${lib} &
|
||||
gzip -n --best -f ${lib} &
|
||||
pids+=" $!"
|
||||
done
|
||||
echo
|
||||
|
||||
@@ -179,17 +179,21 @@ fi
|
||||
|
||||
if [ -d "${ROCM_PATH}" ]; then
|
||||
echo "ROCm libraries detected - building dynamic ROCm library"
|
||||
if [ -f ${ROCM_PATH}/lib/librocm_smi64.so.? ]; then
|
||||
ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocm_smi64.so.? | cut -f3 -d. || true)
|
||||
if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
|
||||
ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
|
||||
fi
|
||||
init_vars
|
||||
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
|
||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}"
|
||||
EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,${ROCM_PATH}/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
|
||||
EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
|
||||
build
|
||||
|
||||
# Note: the ROCM libs and runtime library files are too large to embed, so we depend on
|
||||
# them being present at runtime on the host
|
||||
# Record the ROCM dependencies
|
||||
rm -f "${BUILD_DIR}/lib/deps.txt"
|
||||
touch "${BUILD_DIR}/lib/deps.txt"
|
||||
for dep in $(ldd "${BUILD_DIR}/lib/libext_server.so" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do
|
||||
echo "${dep}" >> "${BUILD_DIR}/lib/deps.txt"
|
||||
done
|
||||
compress_libs
|
||||
fi
|
||||
|
||||
|
||||
@@ -2,19 +2,52 @@
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
function amdGPUs {
|
||||
if ($env:AMDGPU_TARGETS) {
|
||||
return $env:AMDGPU_TARGETS
|
||||
}
|
||||
# TODO - load from some common data file for linux + windows build consistency
|
||||
$GPU_LIST = @(
|
||||
"gfx900"
|
||||
"gfx906:xnack-"
|
||||
"gfx908:xnack-"
|
||||
"gfx90a:xnack+"
|
||||
"gfx90a:xnack-"
|
||||
"gfx1010"
|
||||
"gfx1012"
|
||||
"gfx1030"
|
||||
"gfx1100"
|
||||
"gfx1101"
|
||||
"gfx1102"
|
||||
)
|
||||
$GPU_LIST -join ';'
|
||||
}
|
||||
|
||||
function init_vars {
|
||||
# Verify the environment is a Developer Shell for MSVC 2019
|
||||
write-host $env:VSINSTALLDIR
|
||||
if (($env:VSINSTALLDIR -eq $null)) {
|
||||
Write-Error "`r`nBUILD ERROR - YOUR DEVELOPMENT ENVIRONMENT IS NOT SET UP CORRECTLY`r`nTo build Ollama you must run from an MSVC Developer Shell`r`nSee .\docs\development.md for instructions to set up your dev environment"
|
||||
exit 1
|
||||
}
|
||||
$script:SRC_DIR = $(resolve-path "..\..\")
|
||||
$script:llamacppDir = "../llama.cpp"
|
||||
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-A", "x64")
|
||||
$script:cmakeDefs = @(
|
||||
"-DBUILD_SHARED_LIBS=on",
|
||||
"-DLLAMA_NATIVE=off"
|
||||
)
|
||||
$script:cmakeTargets = @("ext_server")
|
||||
$script:ARCH = "amd64" # arm not yet supported.
|
||||
if ($env:CGO_CFLAGS -contains "-g") {
|
||||
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")
|
||||
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
|
||||
$script:config = "RelWithDebInfo"
|
||||
} else {
|
||||
$script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off")
|
||||
$script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off", "-DCMAKE_BUILD_TYPE=Release")
|
||||
$script:config = "Release"
|
||||
}
|
||||
if ($null -ne $env:CMAKE_SYSTEM_VERSION) {
|
||||
$script:cmakeDefs += @("-DCMAKE_SYSTEM_VERSION=${env:CMAKE_SYSTEM_VERSION}")
|
||||
}
|
||||
# Try to find the CUDA dir
|
||||
if ($env:CUDA_LIB_DIR -eq $null) {
|
||||
$d=(get-command -ea 'silentlycontinue' nvcc).path
|
||||
@@ -154,9 +187,10 @@ apply_patches
|
||||
# -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
|
||||
# -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
|
||||
|
||||
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on", "-DLLAMA_NATIVE=off")
|
||||
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
|
||||
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu"
|
||||
write-host "Building LCD CPU"
|
||||
build
|
||||
@@ -164,7 +198,8 @@ install
|
||||
sign
|
||||
compress_libs
|
||||
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx"
|
||||
write-host "Building AVX CPU"
|
||||
build
|
||||
@@ -172,7 +207,8 @@ install
|
||||
sign
|
||||
compress_libs
|
||||
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
|
||||
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2"
|
||||
write-host "Building AVX2 CPU"
|
||||
build
|
||||
@@ -189,18 +225,51 @@ if ($null -ne $script:CUDA_LIB_DIR) {
|
||||
}
|
||||
init_vars
|
||||
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
|
||||
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
|
||||
$script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
|
||||
write-host "Building CUDA"
|
||||
build
|
||||
install
|
||||
sign
|
||||
compress_libs
|
||||
}
|
||||
# TODO - actually implement ROCm support on windows
|
||||
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm"
|
||||
|
||||
rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
|
||||
md "${script:buildDir}/lib" -ea 0 > $null
|
||||
echo $null >> "${script:buildDir}/lib/.generated"
|
||||
if ($null -ne $env:HIP_PATH) {
|
||||
$script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
|
||||
if ($null -ne $script:ROCM_VERSION) {
|
||||
$script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
|
||||
}
|
||||
|
||||
init_vars
|
||||
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
|
||||
$script:cmakeDefs += @(
|
||||
"-G", "Ninja",
|
||||
"-DCMAKE_C_COMPILER=clang.exe",
|
||||
"-DCMAKE_CXX_COMPILER=clang++.exe",
|
||||
"-DLLAMA_HIPBLAS=on",
|
||||
"-DLLAMA_AVX=on",
|
||||
"-DLLAMA_AVX2=off",
|
||||
"-DCMAKE_POSITION_INDEPENDENT_CODE=on",
|
||||
"-DAMDGPU_TARGETS=$(amdGPUs)",
|
||||
"-DGPU_TARGETS=$(amdGPUs)"
|
||||
)
|
||||
|
||||
# Make sure the ROCm binary dir is first in the path
|
||||
$env:PATH="$env:HIP_PATH\bin;$env:VSINSTALLDIR\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja;$env:PATH"
|
||||
|
||||
# We have to clobber the LIB var from the developer shell for clang to work properly
|
||||
$env:LIB=""
|
||||
|
||||
write-host "Building ROCm"
|
||||
build
|
||||
# Ninja doesn't prefix with config name
|
||||
${script:config}=""
|
||||
install
|
||||
if ($null -ne $script:DUMPBIN) {
|
||||
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/${script:config}/ext_server.dll" | select-string ".dll"
|
||||
}
|
||||
sign
|
||||
compress_libs
|
||||
}
|
||||
|
||||
cleanup
|
||||
write-host "`ngo generate completed"
|
||||
|
||||
19
llm/ggml.go
19
llm/ggml.go
@@ -31,6 +31,11 @@ const (
|
||||
fileTypeQ5_K_S
|
||||
fileTypeQ5_K_M
|
||||
fileTypeQ6_K
|
||||
fileTypeIQ2_XXS
|
||||
fileTypeIQ2_XS
|
||||
fileTypeQ2_K_S
|
||||
fileTypeQ3_K_XS
|
||||
fileTypeIQ3_XXS
|
||||
)
|
||||
|
||||
func fileType(fileType uint32) string {
|
||||
@@ -69,6 +74,16 @@ func fileType(fileType uint32) string {
|
||||
return "Q5_K_M"
|
||||
case fileTypeQ6_K:
|
||||
return "Q6_K"
|
||||
case fileTypeIQ2_XXS:
|
||||
return "IQ2_XXS"
|
||||
case fileTypeIQ2_XS:
|
||||
return "IQ2_XS"
|
||||
case fileTypeQ2_K_S:
|
||||
return "Q2_K_S"
|
||||
case fileTypeQ3_K_XS:
|
||||
return "Q3_K_XS"
|
||||
case fileTypeIQ3_XXS:
|
||||
return "IQ3_XXS"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
@@ -148,9 +163,9 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
|
||||
case FILE_MAGIC_GGLA:
|
||||
c = &containerLORA{}
|
||||
case FILE_MAGIC_GGUF_LE:
|
||||
c = &containerGGUF{bo: binary.LittleEndian}
|
||||
c = &ContainerGGUF{ByteOrder: binary.LittleEndian}
|
||||
case FILE_MAGIC_GGUF_BE:
|
||||
c = &containerGGUF{bo: binary.BigEndian}
|
||||
c = &ContainerGGUF{ByteOrder: binary.BigEndian}
|
||||
default:
|
||||
return nil, errors.New("invalid file magic")
|
||||
}
|
||||
|
||||
719
llm/gguf.go
719
llm/gguf.go
File diff suppressed because it is too large
Load Diff
Submodule llm/llama.cpp updated: 6c00a06692...c29af7e225
12
llm/llm.go
12
llm/llm.go
@@ -19,7 +19,7 @@ type LLM interface {
|
||||
Close()
|
||||
}
|
||||
|
||||
func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
|
||||
func New(model string, adapters, projectors []string, opts api.Options) (LLM, error) {
|
||||
if _, err := os.Stat(model); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -120,15 +120,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
||||
|
||||
opts.RopeFrequencyBase = 0.0
|
||||
opts.RopeFrequencyScale = 0.0
|
||||
return newLlmServer(info, workDir, model, adapters, projectors, opts)
|
||||
return newLlmServer(info, model, adapters, projectors, opts)
|
||||
}
|
||||
|
||||
// Give any native cgo implementations an opportunity to initialize
|
||||
func Init(workdir string) error {
|
||||
return nativeInit(workdir)
|
||||
func Init() error {
|
||||
return nativeInit()
|
||||
}
|
||||
|
||||
func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
|
||||
func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
|
||||
dynLibs := getDynLibs(gpuInfo)
|
||||
|
||||
// Check to see if the user has requested a specific library instead of auto-detecting
|
||||
@@ -147,7 +147,7 @@ func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projecto
|
||||
_, err := os.Stat(dynLibs[0])
|
||||
if err != nil {
|
||||
slog.Info(fmt.Sprintf("%s has disappeared, reloading libraries", dynLibs[0]))
|
||||
err = nativeInit(workDir)
|
||||
err = nativeInit()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
|
||||
index d86d7e04..2694e92e 100644
|
||||
index 2b2f4a0f..afac49af 100644
|
||||
--- a/examples/server/server.cpp
|
||||
+++ b/examples/server/server.cpp
|
||||
@@ -901,13 +901,15 @@ struct llama_server_context
|
||||
slot.sent_count += result.text_to_send.size();
|
||||
@@ -997,13 +997,15 @@ struct llama_server_context
|
||||
slot.n_sent_text += result.text_to_send.size();
|
||||
// add the token to slot queue and cache
|
||||
}
|
||||
- slot.add_token_string(result);
|
||||
|
||||
@@ -1,30 +1,29 @@
|
||||
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
|
||||
index 3102762c..568ac1d0 100644
|
||||
index 2b2f4a0f..25857bdd 100644
|
||||
--- a/examples/server/server.cpp
|
||||
+++ b/examples/server/server.cpp
|
||||
@@ -307,6 +307,10 @@ struct llama_client_slot
|
||||
}
|
||||
};
|
||||
@@ -31,6 +31,10 @@
|
||||
#include <atomic>
|
||||
#include <signal.h>
|
||||
|
||||
+#ifdef GGML_USE_CUBLAS
|
||||
+extern "C" GGML_CALL void ggml_free_cublas(void);
|
||||
+#endif
|
||||
+
|
||||
struct llama_server_context
|
||||
{
|
||||
llama_model *model = nullptr;
|
||||
@@ -353,6 +357,10 @@ struct llama_server_context
|
||||
using json = nlohmann::json;
|
||||
|
||||
struct server_params {
|
||||
@@ -363,6 +367,9 @@ struct llama_server_context
|
||||
llama_free_model(model);
|
||||
model = nullptr;
|
||||
}
|
||||
+#ifdef GGML_USE_CUBLAS
|
||||
+ ggml_free_cublas();
|
||||
+#endif
|
||||
+
|
||||
}
|
||||
|
||||
bool load_model(const gpt_params ¶ms_)
|
||||
@@ -3093,6 +3101,7 @@ int main(int argc, char **argv)
|
||||
@@ -3494,6 +3501,7 @@ int main(int argc, char **argv)
|
||||
sigemptyset (&sigint_action.sa_mask);
|
||||
sigint_action.sa_flags = 0;
|
||||
sigaction(SIGINT, &sigint_action, NULL);
|
||||
@@ -32,16 +31,11 @@ index 3102762c..568ac1d0 100644
|
||||
#elif defined (_WIN32)
|
||||
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
||||
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
|
||||
@@ -3106,3 +3115,4 @@ int main(int argc, char **argv)
|
||||
llama_backend_free();
|
||||
return 0;
|
||||
}
|
||||
+
|
||||
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
|
||||
index 96976f24..3543920e 100644
|
||||
index 0c6501e9..75c12723 100644
|
||||
--- a/ggml-cuda.cu
|
||||
+++ b/ggml-cuda.cu
|
||||
@@ -39,6 +39,7 @@
|
||||
@@ -43,6 +43,7 @@
|
||||
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
|
||||
#define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6
|
||||
#define cublasCreate hipblasCreate
|
||||
@@ -49,30 +43,30 @@ index 96976f24..3543920e 100644
|
||||
#define cublasGemmEx hipblasGemmEx
|
||||
#define cublasGemmBatchedEx hipblasGemmBatchedEx
|
||||
#define cublasGemmStridedBatchedEx hipblasGemmStridedBatchedEx
|
||||
@@ -7928,10 +7929,11 @@ GGML_CALL bool ggml_cublas_loaded(void) {
|
||||
@@ -8694,10 +8695,10 @@ GGML_CALL bool ggml_cublas_loaded(void) {
|
||||
return g_cublas_loaded;
|
||||
}
|
||||
|
||||
+static bool g_cublas_initialized = false;
|
||||
+
|
||||
GGML_CALL void ggml_init_cublas() {
|
||||
-GGML_CALL void ggml_init_cublas() {
|
||||
- static bool initialized = false;
|
||||
+static bool g_cublas_initialized = false;
|
||||
|
||||
- if (!initialized) {
|
||||
+GGML_CALL void ggml_init_cublas() {
|
||||
+ if (!g_cublas_initialized) {
|
||||
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
// Workaround for a rocBLAS bug when using multiple graphics cards:
|
||||
@@ -7941,7 +7943,7 @@ GGML_CALL void ggml_init_cublas() {
|
||||
@@ -8707,7 +8708,7 @@ GGML_CALL void ggml_init_cublas() {
|
||||
#endif
|
||||
|
||||
if (cudaGetDeviceCount(&g_device_count) != cudaSuccess) {
|
||||
- initialized = true;
|
||||
+ g_cublas_initialized = true;
|
||||
g_cublas_loaded = false;
|
||||
fprintf(stderr, "%s: no " GGML_CUDA_NAME " devices found, " GGML_CUDA_NAME " will be disabled\n", __func__);
|
||||
return;
|
||||
}
|
||||
@@ -8011,7 +8013,7 @@ GGML_CALL void ggml_init_cublas() {
|
||||
@@ -8778,7 +8779,7 @@ GGML_CALL void ggml_init_cublas() {
|
||||
// configure logging to stdout
|
||||
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
|
||||
|
||||
@@ -81,7 +75,7 @@ index 96976f24..3543920e 100644
|
||||
g_cublas_loaded = true;
|
||||
}
|
||||
}
|
||||
@@ -11528,3 +11530,17 @@ GGML_CALL int ggml_backend_cuda_reg_devices() {
|
||||
@@ -12345,3 +12346,22 @@ GGML_CALL int ggml_backend_cuda_reg_devices() {
|
||||
}
|
||||
return device_count;
|
||||
}
|
||||
@@ -89,28 +83,32 @@ index 96976f24..3543920e 100644
|
||||
+extern "C" GGML_CALL void ggml_free_cublas(void);
|
||||
+GGML_CALL void ggml_free_cublas(void) {
|
||||
+ for (int id = 0; id < g_device_count; ++id) {
|
||||
+#if !defined(GGML_USE_HIPBLAS)
|
||||
+#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__))
|
||||
+ if (g_device_caps[id].vmm) {
|
||||
+ CU_CHECK(cuMemUnmap(g_cuda_pool_addr[id], g_cuda_pool_size[id]));
|
||||
+ g_cuda_pool_size[id] = 0;
|
||||
+ g_cuda_pool_addr[id] = 0;
|
||||
+ }
|
||||
+#endif
|
||||
+ // TODO: free legacy non-vmm memory
|
||||
+ // destroy cublas handle
|
||||
+ CUBLAS_CHECK(cublasDestroy(g_cublas_handles[id]));
|
||||
+ g_cublas_handles[id] = nullptr;
|
||||
+ }
|
||||
+
|
||||
+ g_cublas_initialized = false;
|
||||
+}
|
||||
\ No newline at end of file
|
||||
diff --git a/ggml-cuda.h b/ggml-cuda.h
|
||||
index b1ebd61d..b4c80c2c 100644
|
||||
index b1ebd61d..6dd58ddf 100644
|
||||
--- a/ggml-cuda.h
|
||||
+++ b/ggml-cuda.h
|
||||
@@ -20,6 +20,9 @@ extern "C" {
|
||||
// Always success. To check if CUDA is actually loaded, use `ggml_cublas_loaded`.
|
||||
GGML_API GGML_CALL void ggml_init_cublas(void);
|
||||
@@ -23,6 +23,9 @@ GGML_API GGML_CALL void ggml_init_cublas(void);
|
||||
// Returns `true` if there are available CUDA devices and cublas loads successfully; otherwise, it returns `false`.
|
||||
GGML_API GGML_CALL bool ggml_cublas_loaded(void);
|
||||
|
||||
+// Release CUDA resources
|
||||
+GGML_API GGML_CALL void ggml_free_cublas(void);
|
||||
+
|
||||
// Returns `true` if there are available CUDA devices and cublas loads successfully; otherwise, it returns `false`.
|
||||
GGML_API GGML_CALL bool ggml_cublas_loaded(void);
|
||||
GGML_API GGML_CALL void * ggml_cuda_host_malloc(size_t size);
|
||||
GGML_API GGML_CALL void ggml_cuda_host_free(void * ptr);
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
|
||||
index a0b46970..7800c6e7 100644
|
||||
--- a/examples/server/server.cpp
|
||||
+++ b/examples/server/server.cpp
|
||||
@@ -28,6 +28,7 @@
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <atomic>
|
||||
+#include <signal.h>
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
@@ -2511,6 +2512,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
|
||||
}
|
||||
}
|
||||
|
||||
+std::function<void(int)> shutdown_handler;
|
||||
+inline void signal_handler(int signal) { shutdown_handler(signal); }
|
||||
+
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
#if SERVER_VERBOSE != 1
|
||||
@@ -3128,8 +3132,25 @@ int main(int argc, char **argv)
|
||||
std::placeholders::_2,
|
||||
std::placeholders::_3
|
||||
));
|
||||
- llama.queue_tasks.start_loop();
|
||||
|
||||
+ shutdown_handler = [&](int) {
|
||||
+ llama.queue_tasks.terminate();
|
||||
+ };
|
||||
+
|
||||
+#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
||||
+ struct sigaction sigint_action;
|
||||
+ sigint_action.sa_handler = signal_handler;
|
||||
+ sigemptyset (&sigint_action.sa_mask);
|
||||
+ sigint_action.sa_flags = 0;
|
||||
+ sigaction(SIGINT, &sigint_action, NULL);
|
||||
+#elif defined (_WIN32)
|
||||
+ auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
||||
+ return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
|
||||
+ };
|
||||
+ SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
|
||||
+#endif
|
||||
+ llama.queue_tasks.start_loop();
|
||||
+ svr.stop();
|
||||
t.join();
|
||||
|
||||
llama_backend_free();
|
||||
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
|
||||
index 54854896..0ee670db 100644
|
||||
--- a/examples/server/utils.hpp
|
||||
+++ b/examples/server/utils.hpp
|
||||
@@ -220,6 +220,7 @@ inline std::string format_chatml(std::vector<json> messages)
|
||||
struct llama_server_queue {
|
||||
int id = 0;
|
||||
std::mutex mutex_tasks;
|
||||
+ bool running;
|
||||
// queues
|
||||
std::vector<task_server> queue_tasks;
|
||||
std::vector<task_server> queue_tasks_deferred;
|
||||
@@ -278,9 +279,18 @@ struct llama_server_queue {
|
||||
queue_tasks_deferred.clear();
|
||||
}
|
||||
|
||||
- // Start the main loop. This call is blocking
|
||||
- [[noreturn]]
|
||||
+ // end the start_loop routine
|
||||
+ void terminate() {
|
||||
+ {
|
||||
+ std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||
+ running = false;
|
||||
+ }
|
||||
+ condition_tasks.notify_all();
|
||||
+ }
|
||||
+
|
||||
+ // Start the main loop.
|
||||
void start_loop() {
|
||||
+ running = true;
|
||||
while (true) {
|
||||
// new task arrived
|
||||
LOG_VERBOSE("have new task", {});
|
||||
@@ -324,8 +334,12 @@ struct llama_server_queue {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||
if (queue_tasks.empty()) {
|
||||
+ if (!running) {
|
||||
+ LOG_VERBOSE("ending start_loop", {});
|
||||
+ return;
|
||||
+ }
|
||||
condition_tasks.wait(lock, [&]{
|
||||
- return !queue_tasks.empty();
|
||||
+ return (!queue_tasks.empty() || !running);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -103,10 +103,14 @@ func rocmDynLibPresent() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func nativeInit(workdir string) error {
|
||||
func nativeInit() error {
|
||||
slog.Info("Extracting dynamic libraries...")
|
||||
assetsDir, err := gpu.AssetsDir()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if runtime.GOOS == "darwin" {
|
||||
err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
|
||||
err := extractPayloadFiles(assetsDir, "llama.cpp/ggml-metal.metal")
|
||||
if err != nil {
|
||||
if err == payloadMissing {
|
||||
// TODO perhaps consider this a hard failure on arm macs?
|
||||
@@ -115,10 +119,10 @@ func nativeInit(workdir string) error {
|
||||
}
|
||||
return err
|
||||
}
|
||||
os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
|
||||
os.Setenv("GGML_METAL_PATH_RESOURCES", assetsDir)
|
||||
}
|
||||
|
||||
libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*")
|
||||
libs, err := extractDynamicLibs(assetsDir, "llama.cpp/build/*/*/*/lib/*")
|
||||
if err != nil {
|
||||
if err == payloadMissing {
|
||||
slog.Info(fmt.Sprintf("%s", payloadMissing))
|
||||
@@ -149,17 +153,13 @@ func nativeInit(workdir string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func extractDynamicLibs(workDir, glob string) ([]string, error) {
|
||||
func extractDynamicLibs(assetsDir, glob string) ([]string, error) {
|
||||
files, err := fs.Glob(libEmbed, glob)
|
||||
if err != nil || len(files) == 0 {
|
||||
return nil, payloadMissing
|
||||
}
|
||||
libs := []string{}
|
||||
|
||||
// TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
|
||||
// and tracking by version so we don't reexpand the files every time
|
||||
// Also maybe consider lazy loading only what is needed
|
||||
|
||||
g := new(errgroup.Group)
|
||||
for _, file := range files {
|
||||
pathComps := strings.Split(file, "/")
|
||||
@@ -172,14 +172,14 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
|
||||
g.Go(func() error {
|
||||
// llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
|
||||
// Include the variant in the path to avoid conflicts between multiple server libs
|
||||
targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
|
||||
targetDir := filepath.Join(assetsDir, pathComps[pathComponentCount-3])
|
||||
srcFile, err := libEmbed.Open(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read payload %s: %v", file, err)
|
||||
}
|
||||
defer srcFile.Close()
|
||||
if err := os.MkdirAll(targetDir, 0o755); err != nil {
|
||||
return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
|
||||
return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
|
||||
}
|
||||
src := io.Reader(srcFile)
|
||||
filename := file
|
||||
@@ -196,27 +196,21 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
|
||||
libs = append(libs, destFile)
|
||||
}
|
||||
|
||||
_, err = os.Stat(destFile)
|
||||
switch {
|
||||
case errors.Is(err, os.ErrNotExist):
|
||||
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
||||
destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("write payload %s: %v", file, err)
|
||||
}
|
||||
defer destFile.Close()
|
||||
if _, err := io.Copy(destFile, src); err != nil {
|
||||
defer destFp.Close()
|
||||
if _, err := io.Copy(destFp, src); err != nil {
|
||||
return fmt.Errorf("copy payload %s: %v", file, err)
|
||||
}
|
||||
case err != nil:
|
||||
return fmt.Errorf("stat payload %s: %v", file, err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
return libs, g.Wait()
|
||||
}
|
||||
|
||||
func extractPayloadFiles(workDir, glob string) error {
|
||||
func extractPayloadFiles(assetsDir, glob string) error {
|
||||
files, err := fs.Glob(libEmbed, glob)
|
||||
if err != nil || len(files) == 0 {
|
||||
return payloadMissing
|
||||
@@ -228,8 +222,8 @@ func extractPayloadFiles(workDir, glob string) error {
|
||||
return fmt.Errorf("read payload %s: %v", file, err)
|
||||
}
|
||||
defer srcFile.Close()
|
||||
if err := os.MkdirAll(workDir, 0o755); err != nil {
|
||||
return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
|
||||
if err := os.MkdirAll(assetsDir, 0o755); err != nil {
|
||||
return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
|
||||
}
|
||||
src := io.Reader(srcFile)
|
||||
filename := file
|
||||
@@ -241,20 +235,22 @@ func extractPayloadFiles(workDir, glob string) error {
|
||||
filename = strings.TrimSuffix(filename, ".gz")
|
||||
}
|
||||
|
||||
destFile := filepath.Join(workDir, filepath.Base(filename))
|
||||
destFile := filepath.Join(assetsDir, filepath.Base(filename))
|
||||
_, err = os.Stat(destFile)
|
||||
switch {
|
||||
case errors.Is(err, os.ErrNotExist):
|
||||
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
||||
destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("write payload %s: %v", file, err)
|
||||
}
|
||||
defer destFile.Close()
|
||||
if _, err := io.Copy(destFile, src); err != nil {
|
||||
defer destFp.Close()
|
||||
if _, err := io.Copy(destFp, src); err != nil {
|
||||
return fmt.Errorf("copy payload %s: %v", file, err)
|
||||
}
|
||||
case err != nil:
|
||||
return fmt.Errorf("stat payload %s: %v", file, err)
|
||||
case err == nil:
|
||||
slog.Debug("payload already exists: " + destFile)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -4,5 +4,5 @@ import (
|
||||
"embed"
|
||||
)
|
||||
|
||||
//go:embed llama.cpp/build/linux/*/*/lib/*.so*
|
||||
//go:embed llama.cpp/build/linux/*/*/lib/*
|
||||
var libEmbed embed.FS
|
||||
|
||||
@@ -5,13 +5,15 @@ set -eu
|
||||
export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
|
||||
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
|
||||
|
||||
IMAGE_NAME=${IMAGE_NAME:-"ollama/ollama"}
|
||||
BUILD_PLATFORM=${BUILD_PLATFORM:-"linux/arm64,linux/amd64"}
|
||||
docker build \
|
||||
--load \
|
||||
--platform=linux/arm64,linux/amd64 \
|
||||
--platform=${BUILD_PLATFORM} \
|
||||
--build-arg=VERSION \
|
||||
--build-arg=GOFLAGS \
|
||||
-f Dockerfile \
|
||||
-t ollama/ollama:$VERSION \
|
||||
-t ${IMAGE_NAME}:$VERSION \
|
||||
.
|
||||
|
||||
docker build \
|
||||
@@ -21,5 +23,12 @@ docker build \
|
||||
--build-arg=GOFLAGS \
|
||||
--target runtime-rocm \
|
||||
-f Dockerfile \
|
||||
-t ollama/ollama:$VERSION-rocm \
|
||||
-t ${IMAGE_NAME}:$VERSION-rocm \
|
||||
.
|
||||
|
||||
docker tag ${IMAGE_NAME}:$VERSION ${IMAGE_NAME}:latest
|
||||
docker tag ${IMAGE_NAME}:$VERSION-rocm ${IMAGE_NAME}:rocm
|
||||
|
||||
echo "To release, run:"
|
||||
echo " docker push ${IMAGE_NAME}:$VERSION && docker push ${IMAGE_NAME}:latest"
|
||||
echo " docker push ${IMAGE_NAME}:$VERSION-rocm && docker push ${IMAGE_NAME}:rocm"
|
||||
@@ -22,5 +22,6 @@ for TARGETARCH in ${BUILD_ARCH}; do
|
||||
.
|
||||
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
|
||||
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
|
||||
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/dist/deps/ ./dist/
|
||||
docker rm builder-$TARGETARCH
|
||||
done
|
||||
|
||||
@@ -53,13 +53,14 @@ function buildOllama() {
|
||||
write-host "Building ollama CLI"
|
||||
& go generate ./...
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& go build "-ldflags=-w -s ""-X=github.com/jmorganca/ollama/version.Version=$script:VERSION"" ""-X=github.com/jmorganca/ollama/server.mode=release""" .
|
||||
& go build -ldflags "-s -w -X=github.com/jmorganca/ollama/version.Version=$script:VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
if ("${env:KEY_CONTAINER}") {
|
||||
& "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" `
|
||||
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
New-Item -ItemType Directory -Path .\dist -Force
|
||||
cp .\ollama.exe .\dist\ollama-windows-amd64.exe
|
||||
}
|
||||
|
||||
@@ -67,7 +68,7 @@ function buildApp() {
|
||||
write-host "Building Ollama App"
|
||||
cd "${script:SRC_DIR}\app"
|
||||
& windres -l 0 -o ollama.syso ollama.rc
|
||||
& go build "-ldflags=-H windowsgui -w -s ""-X=github.com/jmorganca/ollama/version.Version=$script:VERSION"" ""-X=github.com/jmorganca/ollama/server.mode=release""" .
|
||||
& go build -ldflags "-s -w -H windowsgui -X=github.com/jmorganca/ollama/version.Version=$script:VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
if ("${env:KEY_CONTAINER}") {
|
||||
& "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" `
|
||||
|
||||
@@ -72,7 +72,7 @@ $SUDO install -o0 -g0 -m755 -d $BINDIR
|
||||
$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama
|
||||
|
||||
install_success() {
|
||||
status 'The Ollama API is now available at 0.0.0.0:11434.'
|
||||
status 'The Ollama API is now available at 127.0.0.1:11434.'
|
||||
status 'Install complete. Run "ollama" from the command line.'
|
||||
}
|
||||
trap install_success EXIT
|
||||
@@ -88,6 +88,10 @@ configure_systemd() {
|
||||
status "Adding ollama user to render group..."
|
||||
$SUDO usermod -a -G render ollama
|
||||
fi
|
||||
if getent group video >/dev/null 2>&1; then
|
||||
status "Adding ollama user to video group..."
|
||||
$SUDO usermod -a -G video ollama
|
||||
fi
|
||||
|
||||
status "Adding current user to ollama group..."
|
||||
$SUDO usermod -a -G ollama $(whoami)
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -20,6 +21,7 @@ import (
|
||||
"time"
|
||||
|
||||
"golang.org/x/sync/errgroup"
|
||||
"golang.org/x/sync/semaphore"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/format"
|
||||
@@ -138,30 +140,29 @@ func (b *blobDownload) Prepare(ctx context.Context, requestURL *url.URL, opts *r
|
||||
}
|
||||
|
||||
func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *registryOptions) {
|
||||
b.err = b.run(ctx, requestURL, opts)
|
||||
}
|
||||
|
||||
func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *registryOptions) error {
|
||||
defer blobDownloadManager.Delete(b.Digest)
|
||||
ctx, b.CancelFunc = context.WithCancel(ctx)
|
||||
|
||||
file, err := os.OpenFile(b.Name+"-partial", os.O_CREATE|os.O_RDWR, 0o644)
|
||||
if err != nil {
|
||||
return err
|
||||
b.err = err
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
_ = file.Truncate(b.Total)
|
||||
|
||||
g, inner := errgroup.WithContext(ctx)
|
||||
g.SetLimit(numDownloadParts)
|
||||
limit := int64(runtime.NumCPU())
|
||||
g, inner := NewLimitGroup(ctx, numDownloadParts, limit)
|
||||
go watchDelta(inner, g, &b.Completed, limit)
|
||||
|
||||
for i := range b.Parts {
|
||||
part := b.Parts[i]
|
||||
if part.Completed == part.Size {
|
||||
continue
|
||||
}
|
||||
|
||||
g.Go(func() error {
|
||||
g.Go(inner, func() error {
|
||||
var err error
|
||||
for try := 0; try < maxRetries; try++ {
|
||||
w := io.NewOffsetWriter(file, part.StartsAt())
|
||||
@@ -188,26 +189,29 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
|
||||
}
|
||||
|
||||
if err := g.Wait(); err != nil {
|
||||
return err
|
||||
b.err = err
|
||||
return
|
||||
}
|
||||
|
||||
// explicitly close the file so we can rename it
|
||||
if err := file.Close(); err != nil {
|
||||
return err
|
||||
b.err = err
|
||||
return
|
||||
}
|
||||
|
||||
for i := range b.Parts {
|
||||
if err := os.Remove(file.Name() + "-" + strconv.Itoa(i)); err != nil {
|
||||
return err
|
||||
b.err = err
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if err := os.Rename(file.Name(), b.Name); err != nil {
|
||||
return err
|
||||
b.err = err
|
||||
return
|
||||
}
|
||||
|
||||
b.done = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *blobDownload) downloadChunk(ctx context.Context, requestURL *url.URL, w io.Writer, part *blobDownloadPart, opts *registryOptions) error {
|
||||
@@ -377,3 +381,87 @@ func downloadBlob(ctx context.Context, opts downloadOpts) error {
|
||||
|
||||
return download.Wait(ctx, opts.fn)
|
||||
}
|
||||
|
||||
type LimitGroup struct {
|
||||
*errgroup.Group
|
||||
*semaphore.Weighted
|
||||
size, limit int64
|
||||
}
|
||||
|
||||
func NewLimitGroup(ctx context.Context, size, limit int64) (*LimitGroup, context.Context) {
|
||||
g, ctx := errgroup.WithContext(ctx)
|
||||
return &LimitGroup{
|
||||
Group: g,
|
||||
Weighted: semaphore.NewWeighted(size),
|
||||
size: size,
|
||||
limit: limit,
|
||||
}, ctx
|
||||
}
|
||||
|
||||
func (g *LimitGroup) Go(ctx context.Context, fn func() error) {
|
||||
var weight int64 = 1
|
||||
if g.limit > 0 {
|
||||
weight = g.size / g.limit
|
||||
}
|
||||
|
||||
_ = g.Acquire(ctx, weight)
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
|
||||
g.Group.Go(func() error {
|
||||
defer g.Release(weight)
|
||||
return fn()
|
||||
})
|
||||
}
|
||||
|
||||
func (g *LimitGroup) SetLimit(limit int64) {
|
||||
if limit > g.limit {
|
||||
g.limit = limit
|
||||
}
|
||||
}
|
||||
|
||||
func watchDelta(ctx context.Context, g *LimitGroup, c *atomic.Int64, limit int64) {
|
||||
var maxDelta float64
|
||||
var buckets []int64
|
||||
|
||||
// 3s ramp up period
|
||||
nextUpdate := time.Now().Add(3 * time.Second)
|
||||
|
||||
ticker := time.NewTicker(time.Second)
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
buckets = append(buckets, c.Load())
|
||||
if len(buckets) < 2 {
|
||||
continue
|
||||
} else if len(buckets) > 10 {
|
||||
buckets = buckets[1:]
|
||||
}
|
||||
|
||||
delta := float64((buckets[len(buckets)-1] - buckets[0])) / float64(len(buckets))
|
||||
slog.Debug("", "limit", limit, "delta", format.HumanBytes(int64(delta)), "max_delta", format.HumanBytes(int64(maxDelta)))
|
||||
|
||||
if time.Now().Before(nextUpdate) {
|
||||
// quiet period; do not update ccy if recently updated
|
||||
continue
|
||||
} else if maxDelta > 0 {
|
||||
x := delta / maxDelta
|
||||
if x < 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
limit += min(int64(x), int64(runtime.NumCPU()))
|
||||
slog.Debug("setting", "limit", limit)
|
||||
g.SetLimit(limit)
|
||||
}
|
||||
|
||||
// 3s cooldown period
|
||||
nextUpdate = time.Now().Add(2 * time.Second)
|
||||
maxDelta = delta
|
||||
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
105
server/images.go
105
server/images.go
@@ -1,6 +1,7 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
@@ -23,6 +24,7 @@ import (
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/convert"
|
||||
"github.com/jmorganca/ollama/llm"
|
||||
"github.com/jmorganca/ollama/parser"
|
||||
"github.com/jmorganca/ollama/version"
|
||||
@@ -52,6 +54,10 @@ type Model struct {
|
||||
Messages []Message
|
||||
}
|
||||
|
||||
func (m *Model) IsEmbedding() bool {
|
||||
return slices.Contains(m.Config.ModelFamilies, "bert") || slices.Contains(m.Config.ModelFamilies, "nomic-bert")
|
||||
}
|
||||
|
||||
type Message struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
@@ -312,7 +318,24 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
||||
c.Args = blobPath
|
||||
}
|
||||
|
||||
bin, err := os.Open(realpath(modelFileDir, c.Args))
|
||||
pathName := realpath(modelFileDir, c.Args)
|
||||
|
||||
ggufName, err := convertSafetensors(name, pathName)
|
||||
if err != nil {
|
||||
switch {
|
||||
case errors.Is(err, zip.ErrFormat):
|
||||
// it's not a safetensor archive
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if ggufName != "" {
|
||||
pathName = ggufName
|
||||
defer os.RemoveAll(ggufName)
|
||||
}
|
||||
|
||||
bin, err := os.Open(pathName)
|
||||
if err != nil {
|
||||
// not a file on disk so must be a model reference
|
||||
modelpath := ParseModelPath(c.Args)
|
||||
@@ -588,6 +611,73 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
||||
return nil
|
||||
}
|
||||
|
||||
func convertSafetensors(name, fn string) (string, error) {
|
||||
r, err := zip.OpenReader(fn)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
tempDir, err := os.MkdirTemp("", "ollama-convert")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer os.RemoveAll(tempDir)
|
||||
|
||||
for _, f := range r.File {
|
||||
fpath := filepath.Join(tempDir, f.Name)
|
||||
outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
rc, err := f.Open()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
_, err = io.Copy(outFile, rc)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
outFile.Close()
|
||||
rc.Close()
|
||||
}
|
||||
|
||||
params, err := convert.GetParams(tempDir)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
SupportedArchs := []string{
|
||||
"MistralForCausalLM",
|
||||
}
|
||||
|
||||
for _, arch := range params.Architectures {
|
||||
if !slices.Contains(SupportedArchs, arch) {
|
||||
return "", fmt.Errorf("this safetensors model is not yet supported")
|
||||
}
|
||||
}
|
||||
|
||||
t, err := convert.GetSafeTensors(tempDir)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
vocab, err := convert.LoadTokens(tempDir)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
fn, err = convert.WriteGGUF(name, t, params, vocab)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return fn, nil
|
||||
}
|
||||
|
||||
func CopyModel(src, dest string) error {
|
||||
srcModelPath := ParseModelPath(src)
|
||||
srcPath, err := srcModelPath.GetManifestPath()
|
||||
@@ -1103,18 +1193,7 @@ func makeRequest(ctx context.Context, method string, requestURL *url.URL, header
|
||||
req.ContentLength = contentLength
|
||||
}
|
||||
|
||||
proxyURL, err := http.ProxyFromEnvironment(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client := http.Client{
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyURL(proxyURL),
|
||||
},
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -121,13 +121,15 @@ func ChatPrompt(tmpl string, messages []api.Message, window int, encode func(str
|
||||
p = prompt{}
|
||||
}
|
||||
|
||||
p.Prompt = msg.Content
|
||||
|
||||
var sb strings.Builder
|
||||
for range msg.Images {
|
||||
p.Prompt += fmt.Sprintf(" [img-%d]", imgId)
|
||||
fmt.Fprintf(&sb, "[img-%d] ", imgId)
|
||||
p.images = append(p.images, imgId)
|
||||
imgId += 1
|
||||
}
|
||||
|
||||
sb.WriteString(msg.Content)
|
||||
p.Prompt = sb.String()
|
||||
case "assistant":
|
||||
if p.Response != "" {
|
||||
prompts = append(prompts, p)
|
||||
|
||||
@@ -155,7 +155,7 @@ func TestChatPrompt(t *testing.T) {
|
||||
{Role: "user", Content: "Hello", Images: []api.ImageData{[]byte("base64")}},
|
||||
},
|
||||
window: 1024,
|
||||
want: "You are a Wizard. Hello [img-0]",
|
||||
want: "You are a Wizard. [img-0] Hello",
|
||||
},
|
||||
{
|
||||
name: "images truncated",
|
||||
@@ -165,7 +165,7 @@ func TestChatPrompt(t *testing.T) {
|
||||
{Role: "user", Content: "Hello", Images: []api.ImageData{[]byte("img1"), []byte("img2")}},
|
||||
},
|
||||
window: 1024,
|
||||
want: "You are a Wizard. Hello [img-1]",
|
||||
want: "You are a Wizard. [img-0] [img-1] Hello",
|
||||
},
|
||||
{
|
||||
name: "empty list",
|
||||
@@ -198,7 +198,7 @@ func TestChatPrompt(t *testing.T) {
|
||||
}
|
||||
|
||||
if got != tc.want {
|
||||
t.Errorf("got = %v, want %v", got, tc.want)
|
||||
t.Errorf("got: %q, want: %q", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -66,8 +66,6 @@ var defaultSessionDuration = 5 * time.Minute
|
||||
|
||||
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
|
||||
func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.Duration) error {
|
||||
workDir := c.GetString("workDir")
|
||||
|
||||
needLoad := loaded.runner == nil || // is there a model loaded?
|
||||
loaded.ModelPath != model.ModelPath || // has the base model changed?
|
||||
!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
|
||||
@@ -82,7 +80,7 @@ func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.D
|
||||
loaded.Options = nil
|
||||
}
|
||||
|
||||
llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
|
||||
llmRunner, err := llm.New(model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
|
||||
if err != nil {
|
||||
// some older models are not compatible with newer versions of llama.cpp
|
||||
// show a generalized compatibility error until there is a better way to
|
||||
@@ -191,6 +189,11 @@ func GenerateHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if model.IsEmbedding() {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "embedding models do not support generate"})
|
||||
return
|
||||
}
|
||||
|
||||
opts, err := modelOptions(model, req.Options)
|
||||
if err != nil {
|
||||
if errors.Is(err, api.ErrInvalidOpts) {
|
||||
@@ -245,6 +248,19 @@ func GenerateHandler(c *gin.Context) {
|
||||
slog.Debug("generate handler", "system", req.System)
|
||||
|
||||
var sb strings.Builder
|
||||
for i := range req.Images {
|
||||
fmt.Fprintf(&sb, "[img-%d] ", i)
|
||||
}
|
||||
|
||||
sb.WriteString(req.Prompt)
|
||||
|
||||
p, err := Prompt(req.Template, req.System, sb.String(), "", true)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
sb.Reset()
|
||||
if req.Context != nil {
|
||||
prev, err := loaded.runner.Decode(c.Request.Context(), req.Context)
|
||||
if err != nil {
|
||||
@@ -255,18 +271,6 @@ func GenerateHandler(c *gin.Context) {
|
||||
sb.WriteString(prev)
|
||||
}
|
||||
|
||||
// write image tags
|
||||
// TODO: limit the number of images to fit in the context similar to the chat endpoint
|
||||
for i := range req.Images {
|
||||
req.Prompt += fmt.Sprintf(" [img-%d]", i)
|
||||
}
|
||||
|
||||
p, err := Prompt(req.Template, req.System, req.Prompt, "", true)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
sb.WriteString(p)
|
||||
|
||||
prompt = sb.String()
|
||||
@@ -379,7 +383,7 @@ func GenerateHandler(c *gin.Context) {
|
||||
streamResponse(c, ch)
|
||||
}
|
||||
|
||||
func EmbeddingHandler(c *gin.Context) {
|
||||
func EmbeddingsHandler(c *gin.Context) {
|
||||
loaded.mu.Lock()
|
||||
defer loaded.mu.Unlock()
|
||||
|
||||
@@ -432,8 +436,9 @@ func EmbeddingHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if !loaded.Options.EmbeddingOnly {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "embedding option must be set to true"})
|
||||
// an empty request loads the model
|
||||
if req.Prompt == "" {
|
||||
c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: []float64{}})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -942,7 +947,7 @@ func (s *Server) GenerateRoutes() http.Handler {
|
||||
r.POST("/api/pull", PullModelHandler)
|
||||
r.POST("/api/generate", GenerateHandler)
|
||||
r.POST("/api/chat", ChatHandler)
|
||||
r.POST("/api/embeddings", EmbeddingHandler)
|
||||
r.POST("/api/embeddings", EmbeddingsHandler)
|
||||
r.POST("/api/create", CreateModelHandler)
|
||||
r.POST("/api/push", PushModelHandler)
|
||||
r.POST("/api/copy", CopyModelHandler)
|
||||
@@ -1028,7 +1033,7 @@ func Serve(ln net.Listener) error {
|
||||
os.Exit(0)
|
||||
}()
|
||||
|
||||
if err := llm.Init(s.WorkDir); err != nil {
|
||||
if err := llm.Init(); err != nil {
|
||||
return fmt.Errorf("unable to initialize llm library %w", err)
|
||||
}
|
||||
if runtime.GOOS == "linux" { // TODO - windows too
|
||||
@@ -1143,6 +1148,11 @@ func ChatHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if model.IsEmbedding() {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "embedding models do not support chat"})
|
||||
return
|
||||
}
|
||||
|
||||
opts, err := modelOptions(model, req.Options)
|
||||
if err != nil {
|
||||
if errors.Is(err, api.ErrInvalidOpts) {
|
||||
|
||||
@@ -12,14 +12,12 @@ import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/format"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
var blobUploadManager sync.Map
|
||||
@@ -138,14 +136,17 @@ func (b *blobUpload) Run(ctx context.Context, opts *registryOptions) {
|
||||
}
|
||||
defer b.file.Close()
|
||||
|
||||
g, inner := errgroup.WithContext(ctx)
|
||||
g.SetLimit(numUploadParts)
|
||||
var limit int64 = 2
|
||||
g, inner := NewLimitGroup(ctx, numUploadParts, limit)
|
||||
go watchDelta(inner, g, &b.Completed, limit)
|
||||
|
||||
for i := range b.Parts {
|
||||
part := &b.Parts[i]
|
||||
select {
|
||||
case <-inner.Done():
|
||||
break
|
||||
case requestURL := <-b.nextURL:
|
||||
g.Go(func() error {
|
||||
g.Go(inner, func() error {
|
||||
var err error
|
||||
for try := 0; try < maxRetries; try++ {
|
||||
err = b.uploadPart(inner, http.MethodPatch, requestURL, part, opts)
|
||||
@@ -177,16 +178,14 @@ func (b *blobUpload) Run(ctx context.Context, opts *registryOptions) {
|
||||
requestURL := <-b.nextURL
|
||||
|
||||
// calculate md5 checksum and add it to the commit request
|
||||
var sb strings.Builder
|
||||
md5sum := md5.New()
|
||||
for _, part := range b.Parts {
|
||||
sb.Write(part.Sum(nil))
|
||||
md5sum.Write(part.Sum(nil))
|
||||
}
|
||||
|
||||
md5sum := md5.Sum([]byte(sb.String()))
|
||||
|
||||
values := requestURL.Query()
|
||||
values.Add("digest", b.Digest)
|
||||
values.Add("etag", fmt.Sprintf("%x-%d", md5sum, len(b.Parts)))
|
||||
values.Add("etag", fmt.Sprintf("%x-%d", md5sum.Sum(nil), len(b.Parts)))
|
||||
requestURL.RawQuery = values.Encode()
|
||||
|
||||
headers := make(http.Header)
|
||||
|
||||
Reference in New Issue
Block a user