We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent ade68d0 commit 05c792eCopy full SHA for 05c792e
ggml-cuda.cu
@@ -9,6 +9,7 @@
9
#include <hip/hip_runtime.h>
10
#include <hipblas/hipblas.h>
11
#include <hip/hip_fp16.h>
12
+#include "rocblas/rocblas.h"
13
#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
14
#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
15
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
@@ -2554,6 +2555,11 @@ void ggml_init_cublas() {
2554
2555
static bool initialized = false;
2556
2557
if (!initialized) {
2558
+#ifdef GGML_USE_HIPBLAS
2559
+ rocblas_initialize();
2560
+ hipDeviceSynchronize();
2561
+ fprintf(stderr, "hipBLAS INITIALIZED\n");
2562
+#endif
2563
CUDA_CHECK(cudaGetDeviceCount(&g_device_count));
2564
GGML_ASSERT(g_device_count <= GGML_CUDA_MAX_DEVICES);
2565
int64_t total_vram = 0;
0 commit comments