Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.

Commit b40615e

Browse files
committed
Factorize CUDA_KERNEL_LOOP used in CUDA kernels
Signed-off-by: Serge Panev <[email protected]>
1 parent 3dacabe commit b40615e

File tree

5 files changed

+6
-23
lines changed

5 files changed

+6
-23
lines changed

src/operator/contrib/count_sketch.cu

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,6 @@
3333
#define WARPS_PER_BLOCK 1
3434
#define THREADS_PER_BLOCK 512
3535

36-
#define CUDA_KERNEL_LOOP(i, n) \
37-
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
38-
i < (n); \
39-
i += blockDim.x * gridDim.x)
4036
namespace mshadow {
4137
namespace cuda {
4238
// wrappers to deal with atomic add

src/operator/contrib/deformable_psroi_pooling.cu

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,6 @@
3838
cudaError_t error = condition; \
3939
CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
4040
} while (0)
41-
#define CUDA_KERNEL_LOOP(i, n) \
42-
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
43-
i < (n); \
44-
i += blockDim.x * gridDim.x)
4541

4642
namespace mshadow {
4743
namespace cuda {

src/operator/contrib/psroi_pooling.cu

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,6 @@
3939
cudaError_t error = condition; \
4040
CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
4141
} while (0)
42-
#define CUDA_KERNEL_LOOP(i, n) \
43-
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
44-
i < (n); \
45-
i += blockDim.x * gridDim.x)
4642

4743
namespace mshadow {
4844
namespace cuda {

src/operator/contrib/roi_align.cu

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,12 @@
2424
* Adapted from Caffe2
2525
*/
2626
#include "./roi_align-inl.h"
27+
#include "../mxnet_op.h"
2728

2829

2930
namespace mxnet {
3031
namespace op {
3132

32-
#define CUDA_1D_KERNEL_LOOP(i, n) \
33-
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
34-
i += blockDim.x * gridDim.x)
35-
3633
using namespace mshadow::cuda;
3734

3835
// The maximum number of blocks to use in the default kernel call.
@@ -120,7 +117,7 @@ __global__ void RoIAlignForwardKernel(
120117
const int sampling_ratio,
121118
const T* bottom_rois,
122119
T* top_data) {
123-
CUDA_1D_KERNEL_LOOP(index, nthreads) {
120+
CUDA_KERNEL_LOOP(index, nthreads) {
124121
// (n, c, ph, pw) is an element in the pooled output
125122
int pw = index % pooled_width;
126123
int ph = (index / pooled_width) % pooled_height;
@@ -259,7 +256,7 @@ __global__ void RoIAlignBackwardKernel(
259256
const int sampling_ratio,
260257
T* bottom_diff,
261258
const T* bottom_rois) {
262-
CUDA_1D_KERNEL_LOOP(index, nthreads) {
259+
CUDA_KERNEL_LOOP(index, nthreads) {
263260
// (n, c, ph, pw) is an element in the pooled output
264261
int pw = index % pooled_width;
265262
int ph = (index / pooled_width) % pooled_height;
@@ -353,7 +350,7 @@ __global__ void RoIAlignBackwardKernel(
353350
} // if
354351
} // ix
355352
} // iy
356-
} // CUDA_1D_KERNEL_LOOP
353+
} // CUDA_KERNEL_LOOP
357354
} // RoIAlignBackward
358355

359356
template<typename xpu>

src/operator/correlation.cu

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <mshadow/cuda/reduce.cuh>
2929
#include <algorithm>
3030
#include <vector>
31+
#include "./mxnet_op.h"
3132

3233
#define ROUND_OFF 50000
3334
#define WARPS_PER_BLOCK 1
@@ -38,10 +39,7 @@
3839
cudaError_t error = condition; \
3940
CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
4041
} while (0)
41-
#define CUDA_KERNEL_LOOP(i, n) \
42-
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
43-
i < (n); \
44-
i += blockDim.x * gridDim.x)
42+
4543
namespace mshadow {
4644
namespace cuda {
4745
// == Correlation Kernel

0 commit comments

Comments
 (0)