We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 8913bc6 commit 4a042f3Copy full SHA for 4a042f3
ggml-cuda.cu
@@ -90,6 +90,8 @@ static __device__ __forceinline__ int __vsubss4(const int a, const int b) {
90
static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
91
#if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__)
92
c = __builtin_amdgcn_sdot4(a, b, c, false);
93
+#elif defined(__gfx1100__)
94
+ c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
95
#elif defined(__gfx1010__) || defined(__gfx900__)
96
int tmp1;
97
int tmp2;
0 commit comments