Skip to content

Commit 808aba3

Browse files
CUDA: optimize and refactor MMQ (ggml-org#8416)
* CUDA: optimize and refactor MMQ * explicit q8_1 memory layouts, add documentation
1 parent a977c11 commit 808aba3

File tree

5 files changed

+844
-664
lines changed

5 files changed

+844
-664
lines changed

ggml/src/ggml-cuda/mma.cuh

+4
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ struct mma_int_A_I16K8 {
7070
}
7171
#endif // defined(INT8_MMA_AVAILABLE)
7272
}
73+
74+
__device__ __forceinline__ void load_low(const int * __restrict__ xs0, const int & stride) {
75+
((mma_int_A_I16K4 *) x)[0].load(xs0, stride);
76+
}
7377
};
7478

7579
struct mma_int_B_J8K4 {

0 commit comments

Comments
 (0)