Skip to content

Commit e3eba61

Browse files
committed
GPU update: add superlu_gpu_util.cu;
Test if (superlu_acc_offload) then free_LUstruct_gpu; Allow optionally use round-robin binding of MPI ranks to GPU.
1 parent 9fa4276 commit e3eba61

File tree

9 files changed

+30
-18
lines changed

9 files changed

+30
-18
lines changed

EXAMPLE/pddrive.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ int main(int argc, char *argv[])
6969
nprow = 1; /* Default process rows. */
7070
npcol = 1; /* Default process columns. */
7171
nrhs = 1; /* Number of right-hand side. */
72-
printf("MAIN ...\n"); fflush(stdout);
7372

7473
/* ------------------------------------------------------------
7574
INITIALIZE MPI ENVIRONMENT.

SRC/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ set(sources
5656
sec_structs.c
5757
)
5858
if (HAVE_CUDA)
59-
list(APPEND sources cublas_utils.c)
59+
list(APPEND sources cublas_utils.c superlu_gpu_utils.cu)
6060
endif()
6161

6262
if (MSVC)

SRC/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ ZPLUSRC += pzgssvx3d.o pzgstrf3d.o ztreeFactorization.o zscatter3d.o \
8585
znrformat_loc3d.o ztreeFactorizationGPU.o ##$(FACT3D)
8686

8787
ifeq ($(HAVE_CUDA),TRUE)
88-
ALLAUX += cublas_utils.o
88+
ALLAUX += cublas_utils.o superlu_gpu_utils.o
8989
DPLUSRC += dsuperlu_gpu.o
9090
ZPLUSRC += zsuperlu_gpu.o
9191
endif

SRC/pdgstrf3d.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,9 @@ int_t pdgstrf3d(superlu_dist_options_t *options, int m, int n, double anorm,
344344

345345
#ifdef GPU_ACC
346346
/* This frees the GPU storage allocateed in initSluGPU3D_t() */
347-
dfree_LUstruct_gpu (sluGPU->A_gpu);
347+
if (superlu_acc_offload) {
348+
dfree_LUstruct_gpu (sluGPU->A_gpu);
349+
}
348350
#endif
349351

350352
MPI_Barrier( grid3d->comm);

SRC/psgstrf3d.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,9 @@ int_t psgstrf3d(superlu_dist_options_t *options, int m, int n, float anorm,
344344

345345
#ifdef GPU_ACC
346346
/* This frees the GPU storage allocateed in initSluGPU3D_t() */
347-
sfree_LUstruct_gpu (sluGPU->A_gpu);
347+
if (superlu_acc_offload) {
348+
sfree_LUstruct_gpu (sluGPU->A_gpu);
349+
}
348350
#endif
349351

350352
MPI_Barrier( grid3d->comm);

SRC/pzgstrf3d.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,9 @@ int_t pzgstrf3d(superlu_dist_options_t *options, int m, int n, double anorm,
343343

344344
#ifdef GPU_ACC
345345
/* This frees the GPU storage allocateed in initSluGPU3D_t() */
346-
zfree_LUstruct_gpu (sluGPU->A_gpu);
346+
if (superlu_acc_offload) {
347+
zfree_LUstruct_gpu (sluGPU->A_gpu);
348+
}
347349
#endif
348350

349351
MPI_Barrier( grid3d->comm);

SRC/sp_ienv.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ sp_ienv_dist(int ispec)
116116
case 8:
117117
ttemp = getenv ("MAX_BUFFER_SIZE");
118118
if (ttemp) return atoi (ttemp);
119-
else return 1000000000; // 256000000 = 16000^2
119+
else return 256000000; // 256000000 = 16000^2
120120
}
121121

122122
/* Invalid value for ISPEC */

SRC/superlu_grid.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,15 @@ void superlu_gridinit(MPI_Comm Bcomm, /* The base communicator upon which
6464

6565
#ifdef GPU_ACC
6666
/* Binding each MPI to a CUDA device */
67-
int devs, rank;
68-
MPI_Comm_rank(Bcomm, &rank); // MPI_COMM_WORLD??
69-
cudaGetDeviceCount(&devs); // Returns the number of compute-capable devices
70-
cudaSetDevice(rank % devs); // Set device to be used for GPU executions
71-
////
67+
char *ttemp;
68+
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");
69+
70+
if (ttemp) {
71+
int devs, rank;
72+
MPI_Comm_rank(Bcomm, &rank); // MPI_COMM_WORLD??
73+
cudaGetDeviceCount(&devs); // Returns the number of compute-capable devices
74+
cudaSetDevice(rank % devs); // Set device to be used for GPU executions
75+
}
7276
#endif
7377
}
7478

SRC/superlu_grid3d.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,16 @@ void superlu_gridinit3d(MPI_Comm Bcomm, /* The base communicator upon which
4949

5050
#ifdef GPU_ACC
5151
/* Binding each MPI to a CUDA device */
52-
int devs, rank;
53-
MPI_Comm_rank(Bcomm, &rank); // MPI_COMM_WORLD??
54-
cudaGetDeviceCount(&devs); // Returns the number of compute-capable devices
55-
cudaSetDevice(rank % devs); // Set device to be used for GPU executions
56-
////
52+
char *ttemp;
53+
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");
54+
55+
if (ttemp) {
56+
int devs, rank;
57+
MPI_Comm_rank(Bcomm, &rank); // MPI_COMM_WORLD??
58+
cudaGetDeviceCount(&devs); // Returns the number of compute-capable devices
59+
cudaSetDevice(rank % devs); // Set device to be used for GPU executions
60+
}
5761
#endif
58-
5962
}
6063

6164

0 commit comments

Comments
 (0)