build: recursive make ggml-cuda
This commit is contained in:
parent
9f7d3ff2a8
commit
054e31146e
112
Makefile2
Normal file
112
Makefile2
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
export GOOS?=$(shell go env GOOS)
|
||||||
|
export GOARCH?=$(shell go env GOARCH)
|
||||||
|
|
||||||
|
build: llama/build/$(GOOS)-$(GOARCH)
|
||||||
|
|
||||||
|
export GOFLAGS=-trimpath
|
||||||
|
|
||||||
|
llama/build/%/runners/metal: GOFLAGS+=-tags=metal
|
||||||
|
llama/build/%/runners/cpu_avx: GOFLAGS+=-tags=avx
|
||||||
|
llama/build/%/runners/cpu_avx2: GOFLAGS+=-tags=avx2
|
||||||
|
llama/build/%/runners/cuda_v11: GOFLAGS+=-tags=cuda,cuda_v11
|
||||||
|
llama/build/%/runners/cuda_v12: GOFLAGS+=-tags=cuda,cuda_v12
|
||||||
|
llama/build/%/runners/rocm: GOFLAGS+=-tags=cuda,rocm
|
||||||
|
|
||||||
|
.PHONY: llama/build/darwin-amd64 llama/build/darwin-arm64
|
||||||
|
llama/build/darwin-amd64: llama/build/darwin-amd64/runners/cpu_avx
|
||||||
|
llama/build/darwin-arm64: llama/build/darwin-arm64/runners/metal
|
||||||
|
|
||||||
|
.PHONY: llama/build/linux-amd64 llama/build/linux-arm64
|
||||||
|
llama/build/linux-amd64: llama/build/linux-amd64/runners/cpu_avx
|
||||||
|
llama/build/linux-amd64: llama/build/linux-amd64/runners/cpu_avx2
|
||||||
|
llama/build/linux-arm64: llama/build/linux-arm64/runners/cpu_avx
|
||||||
|
llama/build/linux-arm64: llama/build/linux-arm64/runners/cpu_avx2
|
||||||
|
|
||||||
|
.PHONY: llama/build/windows-amd64 linux/build/windows-arm64
|
||||||
|
llama/build/windows-amd64: llama/build/windows-amd64/runners/cpu_avx
|
||||||
|
llama/build/windows-amd64: llama/build/windows-amd64/runners/cpu_avx2
|
||||||
|
llama/build/windows-arm64: llama/build/windows-arm64/runners/cpu_avx
|
||||||
|
llama/build/windows-arm64: llama/build/windows-arm64/runners/cpu_avx2
|
||||||
|
|
||||||
|
.PHONY: cuda_v11 cuda_v12
|
||||||
|
cuda_v11 cuda_v12 rocm:
|
||||||
|
$(MAKE) -C ml/backend/ggml/ggml/ggml-cuda $@
|
||||||
|
|
||||||
|
ifeq ($(GOOS),linux)
|
||||||
|
NVCC11=$(shell command -v /usr/local/cuda-11/bin/nvcc)
|
||||||
|
NVCC12=$(shell command -v /usr/local/cuda-12/bin/nvcc)
|
||||||
|
HIPCC=$(shell command -v hipcc)
|
||||||
|
else ifeq ($(GOOS),windows)
|
||||||
|
NVCC11=$(shell ls "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.?\\bin\\nvcc.exe")
|
||||||
|
NVCC12=$(shell ls "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.?\\bin\\nvcc.exe")
|
||||||
|
HIPCC=$(shell command -v hipcc)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq ($(NVCC11),)
|
||||||
|
ifeq ($(OLLAMA_SKIP_GENERATE_CUDA_11),)
|
||||||
|
CUDA_V11_TARGETS= \
|
||||||
|
llama/build/linux-amd64/runners/cuda_v11 \
|
||||||
|
llama/build/linux-arm64/runners/cuda_v11 \
|
||||||
|
llama/build/windows-amd64/runners/cuda_v11 \
|
||||||
|
llama/build/windows-arm64/runners/cuda_v11 \
|
||||||
|
|
||||||
|
$(CUDA_V11_TARGETS): cuda_v11
|
||||||
|
cuda_v11: export NVCC=$(NVCC11)
|
||||||
|
|
||||||
|
llama/build/linux-amd64: llama/build/linux-amd64/runners/cuda_v11
|
||||||
|
llama/build/linux-arm64: llama/build/linux-arm64/runners/cuda_v11
|
||||||
|
llama/build/windows-amd64: llama/build/windows-amd64/runners/cuda_v11
|
||||||
|
llama/build/windows-arm64: llama/build/windows-arm64/runners/cuda_v11
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq ($(NVCC12),)
|
||||||
|
ifeq ($(OLLAMA_SKIP_GENERATE_CUDA_12),)
|
||||||
|
CUDA_V12_TARGETS= \
|
||||||
|
llama/build/linux-amd64/runners/cuda_v12 \
|
||||||
|
llama/build/linux-arm64/runners/cuda_v12 \
|
||||||
|
llama/build/windows-amd64/runners/cuda_v12 \
|
||||||
|
llama/build/windows-arm64/runners/cuda_v12 \
|
||||||
|
|
||||||
|
$(CUDA_V12_TARGETS): cuda_v12
|
||||||
|
cuda_v12: export NVCC=$(NVCC12)
|
||||||
|
|
||||||
|
llama/build/linux-amd64: llama/build/linux-amd64/runners/cuda_v12
|
||||||
|
llama/build/linux-arm64: llama/build/linux-arm64/runners/cuda_v12
|
||||||
|
llama/build/windows-amd64: llama/build/windows-amd64/runners/cuda_v12
|
||||||
|
llama/build/windows-arm64: llama/build/windows-arm64/runners/cuda_v12
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq ($(HIPCC),)
|
||||||
|
ifeq ($(OLLAMA_SKIP_GENERATE_ROCM),)
|
||||||
|
ROCM_TARGETS= \
|
||||||
|
llama/build/linux-amd64/runners/rocm \
|
||||||
|
llama/build/linux-arm64/runners/rocm \
|
||||||
|
llama/build/windows-amd64/runners/rocm \
|
||||||
|
llama/build/windows-arm64/runners/rocm \
|
||||||
|
|
||||||
|
$(ROCM_TARGETS): rocm
|
||||||
|
rocm: export NVCC=$(HIPCC)
|
||||||
|
|
||||||
|
llama/build/linux-amd64: llama/build/linux-amd64/runners/rocm
|
||||||
|
llama/build/linux-arm64: llama/build/linux-arm64/runners/rocm
|
||||||
|
llama/build/windows-amd64: llama/build/windows-amd64/runners/rocm
|
||||||
|
llama/build/windows-arm64: llama/build/windows-arm64/runners/rocm
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
export CGO_ENABLED=1
|
||||||
|
export CGO_CPPFLAGS_ALLOW=-mfma|-mf16c
|
||||||
|
|
||||||
|
llama/build/%: cmd/runner always
|
||||||
|
mkdir -p $@; go build -o $@ ./$<
|
||||||
|
|
||||||
|
.PHONY: always
|
||||||
|
always:
|
||||||
|
|
||||||
|
clean:
|
||||||
|
$(RM) -r llama/build
|
||||||
|
|
||||||
|
realclean: clean
|
||||||
|
$(MAKE) -C ml/backend/ggml/ggml/ggml-cuda $<
|
@ -37,8 +37,7 @@ go build -tags avx .
|
|||||||
```shell
|
```shell
|
||||||
# go doesn't recognize `-mfma` as a valid compiler flag
|
# go doesn't recognize `-mfma` as a valid compiler flag
|
||||||
# see https://github.com/golang/go/issues/17895
|
# see https://github.com/golang/go/issues/17895
|
||||||
go env -w "CGO_CFLAGS_ALLOW=-mfma|-mf16c"
|
go env -w "CGO_CPPFLAGS_ALLOW=-mfma|-mf16c"
|
||||||
go env -w "CGO_CXXFLAGS_ALLOW=-mfma|-mf16c"
|
|
||||||
go build -tags=avx,avx2 .
|
go build -tags=avx,avx2 .
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -94,4 +94,3 @@ CPPFLAGS += \
|
|||||||
-I../ml/backend/ggml/ggml/include \
|
-I../ml/backend/ggml/ggml/include \
|
||||||
-I../ml/backend/ggml/ggml/ggml-cpu \
|
-I../ml/backend/ggml/ggml/ggml-cpu \
|
||||||
-I../ml/backend/ggml/ggml/ggml-cpu/amx \
|
-I../ml/backend/ggml/ggml/ggml-cpu/amx \
|
||||||
>>>>>>> 22320f09 (preserve vendor directory structure):llama/make/common-defs.make
|
|
||||||
|
@ -11,74 +11,31 @@ GPU_GOFLAGS="-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$(VERS
|
|||||||
# today, cuda is bundled, but rocm is split out. Should split them each out by runner
|
# today, cuda is bundled, but rocm is split out. Should split them each out by runner
|
||||||
DIST_GPU_RUNNER_DEPS_DIR = $(DIST_LIB_DIR)
|
DIST_GPU_RUNNER_DEPS_DIR = $(DIST_LIB_DIR)
|
||||||
|
|
||||||
|
|
||||||
GPU_RUNNER_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT).*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
|
|
||||||
|
|
||||||
GPU_RUNNER_SRCS := \
|
|
||||||
$(filter-out $(wildcard llama/ggml-cuda/fattn*.cu),$(wildcard llama/ggml-cuda/*.cu)) \
|
|
||||||
$(wildcard llama/ggml-cuda/template-instances/mmq*.cu) \
|
|
||||||
llama/ggml.c llama/ggml-backend.cpp llama/ggml-alloc.c llama/ggml-quants.c llama/sgemm.cpp llama/ggml-aarch64.c llama/ggml-threading.cpp
|
|
||||||
GPU_RUNNER_HDRS := \
|
|
||||||
$(wildcard llama/ggml-cuda/*.cuh)
|
|
||||||
|
|
||||||
|
|
||||||
# Conditional flags and components to speed up developer builds
|
|
||||||
ifneq ($(OLLAMA_FAST_BUILD),)
|
|
||||||
GPU_COMPILER_CUFLAGS += \
|
|
||||||
-DGGML_DISABLE_FLASH_ATTN
|
|
||||||
else
|
|
||||||
GPU_RUNNER_SRCS += \
|
|
||||||
$(wildcard llama/ggml-cuda/fattn*.cu) \
|
|
||||||
$(wildcard llama/ggml-cuda/template-instances/fattn-wmma*.cu) \
|
|
||||||
$(wildcard llama/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu) \
|
|
||||||
$(wildcard llama/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu) \
|
|
||||||
$(wildcard llama/ggml-cuda/template-instances/fattn-vec*f16-f16.cu)
|
|
||||||
endif
|
|
||||||
|
|
||||||
GPU_RUNNER_OBJS := $(GPU_RUNNER_SRCS:.cu=.$(GPU_RUNNER_NAME).$(OBJ_EXT))
|
|
||||||
GPU_RUNNER_OBJS := $(GPU_RUNNER_OBJS:.c=.$(GPU_RUNNER_NAME).$(OBJ_EXT))
|
|
||||||
GPU_RUNNER_OBJS := $(addprefix $(BUILD_DIR)/,$(GPU_RUNNER_OBJS:.cpp=.$(GPU_RUNNER_NAME).$(OBJ_EXT)))
|
|
||||||
|
|
||||||
DIST_RUNNERS = $(addprefix $(RUNNERS_DIST_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)))
|
DIST_RUNNERS = $(addprefix $(RUNNERS_DIST_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)))
|
||||||
BUILD_RUNNERS = $(addprefix $(RUNNERS_BUILD_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)))
|
BUILD_RUNNERS = $(addprefix $(RUNNERS_BUILD_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)))
|
||||||
|
|
||||||
|
$(GPU_RUNNER_NAME): $(BUILD_RUNNERS)
|
||||||
$(GPU_RUNNER_NAME): $(BUILD_RUNNERS)
|
|
||||||
|
|
||||||
dist: $(DIST_RUNNERS)
|
dist: $(DIST_RUNNERS)
|
||||||
|
|
||||||
# Build targets
|
# Build targets
|
||||||
$(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cu
|
|
||||||
@-mkdir -p $(dir $@)
|
|
||||||
$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) $(GPU_COMPILER_CUFLAGS) $(GPU_RUNNER_ARCH_FLAGS) -o $@ $<
|
|
||||||
$(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.c
|
|
||||||
@-mkdir -p $(dir $@)
|
|
||||||
$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) -o $@ $<
|
|
||||||
$(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cpp
|
|
||||||
@-mkdir -p $(dir $@)
|
|
||||||
$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CXXFLAGS) -o $@ $<
|
|
||||||
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): TARGET_CGO_LDFLAGS = $(CGO_EXTRA_LDFLAGS) -L"$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/"
|
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): TARGET_CGO_LDFLAGS = $(CGO_EXTRA_LDFLAGS) -L"$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/"
|
||||||
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) ./llama/*.go ./llama/runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
|
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): ./llama/*.go ./llama/runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
|
||||||
@-mkdir -p $(dir $@)
|
@-mkdir -p $(@D)
|
||||||
|
$(MAKE) -C ml/backend/ggml/ggml/ggml-cuda $(GPU_RUNNER_NAME) CXX=$(GPU_COMPILER)
|
||||||
GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./cmd/runner
|
GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./cmd/runner
|
||||||
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS)
|
|
||||||
@-mkdir -p $(dir $@)
|
|
||||||
$(CCACHE) $(GPU_COMPILER) --shared -L$(GPU_LIB_DIR) $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@
|
|
||||||
|
|
||||||
# Distribution targets
|
# Distribution targets
|
||||||
$(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
|
$(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
|
||||||
@-mkdir -p $(dir $@)
|
@-mkdir -p $(@D)
|
||||||
$(CP) $< $@
|
$(CP) $< $@
|
||||||
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) $(GPU_DIST_LIB_DEPS)
|
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) $(GPU_DIST_LIB_DEPS)
|
||||||
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
|
|
||||||
@-mkdir -p $(dir $@)
|
|
||||||
$(CP) $< $@
|
|
||||||
$(GPU_DIST_LIB_DEPS):
|
$(GPU_DIST_LIB_DEPS):
|
||||||
@-mkdir -p $(dir $@)
|
@-mkdir -p $(@D)
|
||||||
$(CP) $(GPU_LIB_DIR)/$(notdir $@) $(dir $@)
|
$(CP) $(GPU_LIB_DIR)/$(@F) $(@D)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f $(GPU_RUNNER_OBJS) $(BUILD_RUNNERS) $(DIST_RUNNERS)
|
$(RM) $(BUILD_RUNNERS) $(DIST_RUNNERS)
|
||||||
|
|
||||||
.PHONY: clean $(GPU_RUNNER_NAME)
|
.PHONY: clean $(GPU_RUNNER_NAME)
|
||||||
|
|
||||||
|
@ -4,8 +4,7 @@ package cpu
|
|||||||
// #cgo CPPFLAGS: -I${SRCDIR}/.. -I${SRCDIR}/../include -I${SRCDIR}/amx
|
// #cgo CPPFLAGS: -I${SRCDIR}/.. -I${SRCDIR}/../include -I${SRCDIR}/amx
|
||||||
// #cgo CPPFLAGS: -D_GNU_SOURCE
|
// #cgo CPPFLAGS: -D_GNU_SOURCE
|
||||||
// #cgo amd64,avx CPPFLAGS: -mavx
|
// #cgo amd64,avx CPPFLAGS: -mavx
|
||||||
// #cgo amd64,avx2 CPPFLAGS: -mavx2 -mfma
|
// #cgo amd64,avx2 CPPFLAGS: -mavx2 -mfma -mf16c
|
||||||
// #cgo amd64,f16c CPPFLAGS: -mf16c
|
|
||||||
// #cgo arm64 CPPFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
|
// #cgo arm64 CPPFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
|
||||||
import "C"
|
import "C"
|
||||||
import (
|
import (
|
||||||
|
1
ml/backend/ggml/ggml/ggml-cuda/.gitignore
vendored
Normal file
1
ml/backend/ggml/ggml/ggml-cuda/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
*.o
|
64
ml/backend/ggml/ggml/ggml-cuda/Makefile
Normal file
64
ml/backend/ggml/ggml/ggml-cuda/Makefile
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
NVCC?=nvcc
|
||||||
|
|
||||||
|
NVCC_PREPEND_FLAGS= \
|
||||||
|
-t 2 \
|
||||||
|
-split-compile 0 \
|
||||||
|
-std=c++17 \
|
||||||
|
-I.. \
|
||||||
|
-I../include \
|
||||||
|
$(foreach ARCH,$(subst ;, ,$(CUDA_ARCHS)),--generate-code=arch=compute_$(ARCH),code=sm_$(ARCH)) \
|
||||||
|
|
||||||
|
NVCC_APPEND_FLAGS= \
|
||||||
|
# -DGGML_CUDA_USE_GRAPHS=1 \
|
||||||
|
|
||||||
|
ALL_SOURCES=$(wildcard *.cu)
|
||||||
|
FATTN_SOURCES=$(wildcard fattn*.cu)
|
||||||
|
|
||||||
|
SOURCES= \
|
||||||
|
$(filter-out $(FATTN_SOURCES),$(ALL_SOURCES)) \
|
||||||
|
$(wildcard template-instances/mmq*.cu) \
|
||||||
|
|
||||||
|
ifneq ($(OLLAMA_FAST_BUILD),)
|
||||||
|
NVCC_APPEND_FLAGS+=-DGGML_DISABLE_FLASH_ATTN
|
||||||
|
else
|
||||||
|
SOURCES+= \
|
||||||
|
$(FATTN_SOURCES) \
|
||||||
|
$(wildcard template-instances/fattn-wmma*.cu) \
|
||||||
|
$(wildcard template-instances/fattn-vec*q4_0-q4_0.cu) \
|
||||||
|
$(wildcard template-instances/fattn-vec*q8_0-q8_0.cu) \
|
||||||
|
$(wildcard template-instances/fattn-vec*f16-f16.cu)
|
||||||
|
endif
|
||||||
|
|
||||||
|
all: cuda_v11 cuda_v12
|
||||||
|
|
||||||
|
cuda_v11: CUDA_ARCHS?=50;52;53;60;61;62;70;72;75;80;86
|
||||||
|
cuda_v11: OBJECTS=$(patsubst %.cu,%.v11.o,$(SOURCES))
|
||||||
|
|
||||||
|
cuda_v12: CUDA_ARCHS?=60;61;62;70;72;75;80;86;87;89;90;90a
|
||||||
|
cuda_v12: OBJECTS=$(patsubst %.cu,%.v12.o,$(SOURCES))
|
||||||
|
|
||||||
|
rocm: CPPFLAGS+=-DGGML_USE_HIP
|
||||||
|
rocm: OBJECTS=$(patsubst %.cu,%.rocm.o,$(SOURCES))
|
||||||
|
|
||||||
|
ifeq ($(OS),Windows_NT)
|
||||||
|
cuda_v11: libggml_cuda_v11.dll
|
||||||
|
cuda_v12: libggml_cuda_v12.dll
|
||||||
|
rocm: libggml_rocm.dll
|
||||||
|
else
|
||||||
|
cuda_v11: libggml_cuda_v11.a
|
||||||
|
cuda_v12: libggml_cuda_v12.a
|
||||||
|
rocm: libggml_rocm.a
|
||||||
|
endif
|
||||||
|
|
||||||
|
clean:
|
||||||
|
$(RM) *.a *.o template-instances/*.o
|
||||||
|
|
||||||
|
%.v11.o %.v12.o %.rocm.o: %.cu
|
||||||
|
$(NVCC) $(NVCC_PREPEND_FLAGS) -c $< -o $@ $(NVCC_APPEND_FLAGS)
|
||||||
|
|
||||||
|
.SECONDEXPANSION:
|
||||||
|
%.a: $$(OBJECTS)
|
||||||
|
$(AR) rcs $@ $^
|
||||||
|
|
||||||
|
%.dll: $$(OBJECTS)
|
||||||
|
$(NVCC) -shared -o $@ $^
|
@ -1,3 +1,7 @@
|
|||||||
package cuda
|
package cuda
|
||||||
|
|
||||||
|
// #cgo cuda_v11 LDFLAGS: -L. -lggml_cuda_v11
|
||||||
|
// #cgo cuda_v12 LDFLAGS: -L. -lggml_cuda_v12
|
||||||
|
// #cgo cuda_v11 cuda_v12 LDFLAGS: -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcublasLt
|
||||||
|
// #cgo rocm LDFLAGS: -L. -lggml_rocm -L/opt/rocm/lib -lhipblas -lamdhip64 -lrocblas
|
||||||
import "C"
|
import "C"
|
||||||
|
@ -5,8 +5,7 @@ package ggml
|
|||||||
// #cgo CPPFLAGS: -DNDEBUG -DGGML_USE_CPU
|
// #cgo CPPFLAGS: -DNDEBUG -DGGML_USE_CPU
|
||||||
// #cgo darwin LDFLAGS: -framework Foundation
|
// #cgo darwin LDFLAGS: -framework Foundation
|
||||||
// #cgo amd64,avx CPPFLAGS: -mavx
|
// #cgo amd64,avx CPPFLAGS: -mavx
|
||||||
// #cgo amd64,avx2 CPPFLAGS: -mavx2 -mfma
|
// #cgo amd64,avx2 CPPFLAGS: -mavx2 -mfma -mf16c
|
||||||
// #cgo amd64,f16c CPPFLAGS: -mf16c
|
|
||||||
// #cgo arm64 CPPFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
|
// #cgo arm64 CPPFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
|
||||||
import "C"
|
import "C"
|
||||||
import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/ggml-cpu"
|
import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/ggml-cpu"
|
||||||
|
@ -3,5 +3,6 @@
|
|||||||
package ggml
|
package ggml
|
||||||
|
|
||||||
// #cgo CPPFLAGS: -DGGML_USE_CUDA
|
// #cgo CPPFLAGS: -DGGML_USE_CUDA
|
||||||
|
// #cgo rocm CPPFLAGS: -DGGML_USE_HIP
|
||||||
import "C"
|
import "C"
|
||||||
import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/ggml-cuda"
|
import _ "github.com/ollama/ollama/ml/backend/ggml/ggml/ggml-cuda"
|
||||||
|
@ -10,9 +10,7 @@ mkdir -p dist
|
|||||||
# If installed to an alternate location use the following to enable
|
# If installed to an alternate location use the following to enable
|
||||||
# export SDKROOT=/Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
|
# export SDKROOT=/Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
|
||||||
# export DEVELOPER_DIR=/Applications/Xcode_12.5.1.app/Contents/Developer
|
# export DEVELOPER_DIR=/Applications/Xcode_12.5.1.app/Contents/Developer
|
||||||
export CGO_CFLAGS=-mmacosx-version-min=11.3
|
export CGO_CPPFLAGS=-mmacosx-version-min=11.3
|
||||||
export CGO_CXXFLAGS=-mmacosx-version-min=11.3
|
|
||||||
export CGO_LDFLAGS=-mmacosx-version-min=11.3
|
|
||||||
|
|
||||||
rm -rf llama/build dist/darwin-*
|
rm -rf llama/build dist/darwin-*
|
||||||
echo "Building darwin arm64"
|
echo "Building darwin arm64"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user