Skip to content

Commit 4a042f3

Browse files
SlyEchoardforkjammmjdecourval
authored andcommitted
gfx1100 support
--------- Co-authored-by: ardfork <[email protected]> Co-authored-by: jammm <[email protected]> Co-authored-by: jdecourval <[email protected]>
1 parent 8913bc6 commit 4a042f3

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

ggml-cuda.cu

+2
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ static __device__ __forceinline__ int __vsubss4(const int a, const int b) {
9090
static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
9191
#if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__)
9292
c = __builtin_amdgcn_sdot4(a, b, c, false);
93+
#elif defined(__gfx1100__)
94+
c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
9395
#elif defined(__gfx1010__) || defined(__gfx900__)
9496
int tmp1;
9597
int tmp2;

0 commit comments

Comments
 (0)