ngxson
diff --git a/‎ggml/src/ggml-cuda/mma.cuh
+4 b/‎ggml/src/ggml-cuda/mma.cuh
+4
@@ -70,6 +70,10 @@ struct mma_int_A_I16K8 {
         }
 #endif // defined(INT8_MMA_AVAILABLE)
     }
+
+    __device__ __forceinline__ void load_low(const int * __restrict__ xs0, const int & stride) {
+        ((mma_int_A_I16K4 *) x)[0].load(xs0, stride);
+    }
 };
 
 struct mma_int_B_J8K4 {
Original file line number	Diff line number	Diff line change
`@@ -70,6 +70,10 @@ struct mma_int_A_I16K8 {`
`70`	`70`	`}`
`71`	`71`	`#endif // defined(INT8_MMA_AVAILABLE)`
`72`	`72`	`}`
	`73`	`+`
	`74`	`+ __device__ __forceinline__ void load_low(const int * __restrict__ xs0, const int & stride) {`
	`75`	`+ ((mma_int_A_I16K4 *) x)[0].load(xs0, stride);`
	`76`	`+ }`
`73`	`77`	`};`
`74`	`78`
`75`	`79`	`struct mma_int_B_J8K4 {`