Skip to content

Commit 7d11961

Browse files
committed
remove force DMMV
1 parent cd61aa0 commit 7d11961

File tree

1 file changed

+1
-5
lines changed

1 file changed

+1
-5
lines changed

Makefile

+1-5
Original file line numberDiff line numberDiff line change
@@ -199,28 +199,24 @@ ifdef LLAMA_HIPBLAS
199199
CXX := $(ROCM_PATH)/llvm/bin/clang++
200200
GPU_TARGETS = gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100
201201
LLAMA_CUDA_DMMV_X ?= 128
202-
LLAMA_CUDA_MMV_Y ?= 1
202+
LLAMA_CUDA_MMV_Y ?= 2
203203
LLAMA_CUDA_KQUANTS_ITER ?= 1
204-
LLAMA_CUDA_FORCE_DMMV ?= true
205204
HIPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
206205
HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 -lrocblas
207206
HIP_OBJS += ggml-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
208207
ggml-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
209208
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
210209
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
211-
-DGGML_CUDA_FORCE_DMMV \
212210
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \
213211
-DCC_TURING=1000000000
214212
ggml_v2-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
215213
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
216214
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
217-
-DGGML_CUDA_FORCE_DMMV \
218215
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \
219216
-DCC_TURING=1000000000
220217
ggml_v2-cuda-legacy.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
221218
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
222219
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
223-
-DGGML_CUDA_FORCE_DMMV \
224220
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \
225221
-DCC_TURING=1000000000 # DGGML_CUDA_DMMV_F16 does not currently work with AMD.
226222
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h

0 commit comments

Comments
 (0)