Skip to content

Commit 2ec4466

Browse files
SlyEchoYellowRoseCx
andcommitted
Update build flags.
GGML_CUDA_DMMV_Y is now GGML_CUDA_MMV_Y so update your build instructions. GGML_CUDA_FORCE_DMMV is always enabled. --------- Co-authored-by: YellowRoseCx <[email protected]>
1 parent cd36b18 commit 2ec4466

File tree

2 files changed

+8
-12
lines changed

2 files changed

+8
-12
lines changed

CMakeLists.txt

+2-4
Original file line numberDiff line numberDiff line change
@@ -365,11 +365,9 @@ if (LLAMA_HIPBLAS)
365365
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS)
366366
add_library(ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h)
367367
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
368-
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y})
368+
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
369369
target_compile_definitions(ggml-rocm PRIVATE K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
370-
if (LLAMA_CUDA_FORCE_DMMV)
371-
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_FORCE_DMMV)
372-
endif()
370+
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_FORCE_DMMV)
373371
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
374372
target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::hipblas)
375373

Makefile

+6-8
Original file line numberDiff line numberDiff line change
@@ -226,20 +226,18 @@ ifdef LLAMA_HIPBLAS
226226
CC := $(ROCM_PATH)/llvm/bin/clang
227227
CXX := $(ROCM_PATH)/llvm/bin/clang++
228228
GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100
229-
LLAMA_CUDA_DMMV_X ?= 32
230-
LLAMA_CUDA_DMMV_Y ?= 1
229+
LLAMA_CUDA_DMMV_X ?= 32
230+
LLAMA_CUDA_MMV_Y ?= 1
231+
LLAMA_CUDA_KQUANTS_ITER ?= 2
231232
CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
232233
CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
233234
LDFLAGS += -L/opt/rocm/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64
234235
OBJS += ggml-cuda.o
235-
ifdef LLAMA_CUDA_KQUANTS_ITER
236-
CXXFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
237-
else
238-
CXXFLAGS += -DK_QUANTS_PER_ITERATION=2
239-
endif
240236
ggml-cuda.o: CXXFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS))
241237
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
242-
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y)
238+
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
239+
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_FORCE_DMMV
240+
ggml-cuda.o: CXXFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
243241
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
244242
$(CXX) $(CXXFLAGS) -x hip -c -o $@ $<
245243
endif # LLAMA_HIPBLAS

0 commit comments

Comments
 (0)