Skip to content

Commit 8913bc6

Browse files
SlyEchoYellowRoseCx
authored andcommitted
Allow overriding CC_TURING
1 parent e77a4c3 commit 8913bc6

File tree

3 files changed

+10
-4
lines changed

3 files changed

+10
-4
lines changed

CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ if (LLAMA_HIPBLAS)
178178
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
179179
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
180180
target_compile_definitions(ggml-rocm PRIVATE K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
181+
target_compile_definitions(ggml-rocm PRIVATE CC_TURING=1000000000)
181182
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
182183
target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
183184

Makefile

+6-3
Original file line numberDiff line numberDiff line change
@@ -209,17 +209,20 @@ ggml-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
209209
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
210210
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
211211
-DGGML_CUDA_FORCE_DMMV \
212-
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
212+
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \
213+
-DCC_TURING=1000000000
213214
ggml_v2-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
214215
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
215216
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
216217
-DGGML_CUDA_FORCE_DMMV \
217-
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
218+
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \
219+
-DCC_TURING=1000000000
218220
ggml_v2-cuda-legacy.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
219221
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
220222
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
221223
-DGGML_CUDA_FORCE_DMMV \
222-
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) # DGGML_CUDA_DMMV_F16 does not currently work with AMD.
224+
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \
225+
-DCC_TURING=1000000000 # DGGML_CUDA_DMMV_F16 does not currently work with AMD.
223226
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
224227
$(CXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
225228
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h

ggml-cuda.cu

+3-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@
7272
#include "ggml.h"
7373

7474
#define MIN_CC_DP4A 610 // minimum compute capability for __dp4a, an intrinsic for byte-wise dot products
75-
#define CC_TURING 1000000000
75+
#ifndef CC_TURING
76+
#define CC_TURING 700
77+
#endif
7678

7779
#if defined(GGML_USE_HIPBLAS)
7880
#define __CUDA_ARCH__ 1300

0 commit comments

Comments
 (0)