Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.

Commit 104074a

Browse files
committed
Support platforms without rand_r
1 parent 8f648c3 commit 104074a

File tree

8 files changed

+106
-56
lines changed

8 files changed

+106
-56
lines changed

example/multi_threaded_inference/multi_threaded_inference.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <opencv2/opencv.hpp>
3535
#include <mxnet/c_predict_api.h>
3636
#include "mxnet-cpp/MxNetCpp.h"
37+
#include <random>
3738

3839
const float DEFAULT_MEAN = 117.0;
3940

@@ -248,7 +249,9 @@ void run_inference(const std::string& model_name, const std::vector<mxnet::cpp::
248249
auto func = [&](int num) {
249250
unsigned next = num;
250251
if (random_sleep) {
251-
int sleep_time = rand_r(&next) % 5;
252+
static thread_local std::mt19937 generator;
253+
std::uniform_int_distribution<int> distribution(0, 5);
254+
int sleep_time = distribution(generator);
252255
std::this_thread::sleep_for(std::chrono::seconds(sleep_time));
253256
}
254257
int num_output = 0;

src/operator/contrib/dgl_graph.cc

Lines changed: 52 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
#include <mxnet/operator_util.h>
2525
#include <dmlc/logging.h>
2626
#include <dmlc/optional.h>
27+
#include <algorithm>
28+
#include <random>
29+
2730
#include "../elemwise_op_common.h"
2831
#include "../../imperative/imperative_utils.h"
2932
#include "../subgraph_op_common.h"
@@ -41,7 +44,9 @@ typedef int64_t dgl_id_t;
4144
*/
4245
class ArrayHeap {
4346
public:
44-
explicit ArrayHeap(const std::vector<float>& prob) {
47+
explicit ArrayHeap(const std::vector<float>& prob, unsigned int seed) {
48+
generator_ = std::mt19937(seed);
49+
distribution_ = std::uniform_real_distribution<float>(0.0, 1.0);
4550
vec_size_ = prob.size();
4651
bit_len_ = ceil(log2(vec_size_));
4752
limit_ = 1 << bit_len_;
@@ -86,8 +91,8 @@ class ArrayHeap {
8691
/*
8792
* Sample from arrayHeap
8893
*/
89-
size_t Sample(unsigned int* seed) {
90-
float xi = heap_[1] * (rand_r(seed)%100/101.0);
94+
size_t Sample() {
95+
float xi = heap_[1] * distribution_(generator_);
9196
int i = 1;
9297
while (i < limit_) {
9398
i = i << 1;
@@ -102,10 +107,10 @@ class ArrayHeap {
102107
/*
103108
* Sample a vector by given the size n
104109
*/
105-
void SampleWithoutReplacement(size_t n, std::vector<size_t>* samples, unsigned int* seed) {
110+
void SampleWithoutReplacement(size_t n, std::vector<size_t>* samples) {
106111
// sample n elements
107112
for (size_t i = 0; i < n; ++i) {
108-
samples->at(i) = this->Sample(seed);
113+
samples->at(i) = this->Sample();
109114
this->Delete(samples->at(i));
110115
}
111116
}
@@ -115,6 +120,8 @@ class ArrayHeap {
115120
int bit_len_; // bit size
116121
int limit_;
117122
std::vector<float> heap_;
123+
std::mt19937 generator_;
124+
std::uniform_real_distribution<float> distribution_;
118125
};
119126

120127
struct NeighborSampleParam : public dmlc::Parameter<NeighborSampleParam> {
@@ -402,10 +409,12 @@ static bool CSRNeighborNonUniformSampleType(const nnvm::NodeAttrs& attrs,
402409
static void RandomSample(size_t set_size,
403410
size_t num,
404411
std::vector<size_t>* out,
405-
unsigned int* seed) {
412+
unsigned int seed) {
413+
std::mt19937 generator(seed);
406414
std::unordered_set<size_t> sampled_idxs;
415+
std::uniform_int_distribution<size_t> distribution(0, set_size - 1);
407416
while (sampled_idxs.size() < num) {
408-
sampled_idxs.insert(rand_r(seed) % set_size);
417+
sampled_idxs.insert(distribution(generator));
409418
}
410419
out->clear();
411420
for (auto it = sampled_idxs.begin(); it != sampled_idxs.end(); it++) {
@@ -441,7 +450,7 @@ static void GetUniformSample(const dgl_id_t* val_list,
441450
const size_t max_num_neighbor,
442451
std::vector<dgl_id_t>* out_ver,
443452
std::vector<dgl_id_t>* out_edge,
444-
unsigned int* seed) {
453+
unsigned int seed) {
445454
// Copy ver_list to output
446455
if (ver_len <= max_num_neighbor) {
447456
for (size_t i = 0; i < ver_len; ++i) {
@@ -485,7 +494,7 @@ static void GetNonUniformSample(const float* probability,
485494
const size_t max_num_neighbor,
486495
std::vector<dgl_id_t>* out_ver,
487496
std::vector<dgl_id_t>* out_edge,
488-
unsigned int* seed) {
497+
unsigned int seed) {
489498
// Copy ver_list to output
490499
if (ver_len <= max_num_neighbor) {
491500
for (size_t i = 0; i < ver_len; ++i) {
@@ -500,8 +509,8 @@ static void GetNonUniformSample(const float* probability,
500509
for (size_t i = 0; i < ver_len; ++i) {
501510
sp_prob[i] = probability[col_list[i]];
502511
}
503-
ArrayHeap arrayHeap(sp_prob);
504-
arrayHeap.SampleWithoutReplacement(max_num_neighbor, &sp_index, seed);
512+
ArrayHeap arrayHeap(sp_prob, seed);
513+
arrayHeap.SampleWithoutReplacement(max_num_neighbor, &sp_index);
505514
out_ver->resize(max_num_neighbor);
506515
out_edge->resize(max_num_neighbor);
507516
for (size_t i = 0; i < max_num_neighbor; ++i) {
@@ -536,8 +545,8 @@ static void SampleSubgraph(const NDArray &csr,
536545
const float* probability,
537546
int num_hops,
538547
size_t num_neighbor,
539-
size_t max_num_vertices) {
540-
unsigned int time_seed = time(nullptr);
548+
size_t max_num_vertices,
549+
unsigned int random_seed) {
541550
size_t num_seeds = seed_arr.shape().Size();
542551
CHECK_GE(max_num_vertices, num_seeds);
543552

@@ -594,7 +603,7 @@ static void SampleSubgraph(const NDArray &csr,
594603
num_neighbor,
595604
&tmp_sampled_src_list,
596605
&tmp_sampled_edge_list,
597-
&time_seed);
606+
random_seed);
598607
} else { // non-uniform-sample
599608
GetNonUniformSample(probability,
600609
val_list + *(indptr + dst_id),
@@ -603,7 +612,7 @@ static void SampleSubgraph(const NDArray &csr,
603612
num_neighbor,
604613
&tmp_sampled_src_list,
605614
&tmp_sampled_edge_list,
606-
&time_seed);
615+
random_seed);
607616
}
608617
CHECK_EQ(tmp_sampled_src_list.size(), tmp_sampled_edge_list.size());
609618
size_t pos = neighbor_list.size();
@@ -720,12 +729,15 @@ static void CSRNeighborUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs,
720729
const std::vector<NDArray>& inputs,
721730
const std::vector<OpReqType>& req,
722731
const std::vector<NDArray>& outputs) {
723-
const NeighborSampleParam& params =
724-
nnvm::get<NeighborSampleParam>(attrs.parsed);
732+
const NeighborSampleParam& params = nnvm::get<NeighborSampleParam>(attrs.parsed);
725733

726734
int num_subgraphs = inputs.size() - 1;
727735
CHECK_EQ(outputs.size(), 3 * num_subgraphs);
728736

737+
mshadow::Stream<cpu> *s = ctx.get_stream<cpu>();
738+
mshadow::Random<cpu, unsigned int> *prnd = ctx.requested[0].get_random<cpu, unsigned int>(s);
739+
unsigned int seed = prnd->GetRandInt();
740+
729741
#pragma omp parallel for
730742
for (int i = 0; i < num_subgraphs; i++) {
731743
SampleSubgraph(inputs[0], // graph_csr
@@ -737,7 +749,12 @@ static void CSRNeighborUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs,
737749
nullptr, // probability
738750
params.num_hops,
739751
params.num_neighbor,
740-
params.max_num_vertices);
752+
params.max_num_vertices,
753+
#if defined(_OPENMP)
754+
seed + omp_get_thread_num());
755+
#else
756+
seed);
757+
#endif
741758
}
742759
}
743760

@@ -798,6 +815,9 @@ of max_num_vertices, and the valid number of vertices is the same as the ones in
798815
.set_attr<mxnet::FInferShape>("FInferShape", CSRNeighborUniformSampleShape)
799816
.set_attr<nnvm::FInferType>("FInferType", CSRNeighborUniformSampleType)
800817
.set_attr<FComputeEx>("FComputeEx<cpu>", CSRNeighborUniformSampleComputeExCPU)
818+
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
819+
return std::vector<ResourceRequest>{ResourceRequest::kRandom};
820+
})
801821
.add_argument("csr_matrix", "NDArray-or-Symbol", "csr matrix")
802822
.add_argument("seed_arrays", "NDArray-or-Symbol[]", "seed vertices")
803823
.set_attr<std::string>("key_var_num_args", "num_args")
@@ -811,14 +831,17 @@ static void CSRNeighborNonUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs
811831
const std::vector<NDArray>& inputs,
812832
const std::vector<OpReqType>& req,
813833
const std::vector<NDArray>& outputs) {
814-
const NeighborSampleParam& params =
815-
nnvm::get<NeighborSampleParam>(attrs.parsed);
834+
const NeighborSampleParam& params = nnvm::get<NeighborSampleParam>(attrs.parsed);
816835

817836
int num_subgraphs = inputs.size() - 2;
818837
CHECK_EQ(outputs.size(), 4 * num_subgraphs);
819838

820839
const float* probability = inputs[1].data().dptr<float>();
821840

841+
mshadow::Stream<cpu> *s = ctx.get_stream<cpu>();
842+
mshadow::Random<cpu, unsigned int> *prnd = ctx.requested[0].get_random<cpu, unsigned int>(s);
843+
unsigned int seed = prnd->GetRandInt();
844+
822845
#pragma omp parallel for
823846
for (int i = 0; i < num_subgraphs; i++) {
824847
float* sub_prob = outputs[i+2*num_subgraphs].data().dptr<float>();
@@ -831,7 +854,12 @@ static void CSRNeighborNonUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs
831854
probability,
832855
params.num_hops,
833856
params.num_neighbor,
834-
params.max_num_vertices);
857+
params.max_num_vertices,
858+
#if defined(_OPENMP)
859+
seed + omp_get_thread_num());
860+
#else
861+
seed);
862+
#endif
835863
}
836864
}
837865

@@ -897,6 +925,9 @@ of max_num_vertices, and the valid number of vertices is the same as the ones in
897925
.set_attr<mxnet::FInferShape>("FInferShape", CSRNeighborNonUniformSampleShape)
898926
.set_attr<nnvm::FInferType>("FInferType", CSRNeighborNonUniformSampleType)
899927
.set_attr<FComputeEx>("FComputeEx<cpu>", CSRNeighborNonUniformSampleComputeExCPU)
928+
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
929+
return std::vector<ResourceRequest>{ResourceRequest::kRandom};
930+
})
900931
.add_argument("csr_matrix", "NDArray-or-Symbol", "csr matrix")
901932
.add_argument("probability", "NDArray-or-Symbol", "probability vector")
902933
.add_argument("seed_arrays", "NDArray-or-Symbol[]", "seed vertices")

src/operator/nn/mkldnn/mkldnn_rnn.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -995,7 +995,7 @@ void MKLDNNRnnOp::Forward(const OpContext &ctx,
995995
const std::vector<NDArray> &inputs,
996996
const std::vector<OpReqType> &req,
997997
const std::vector<NDArray> &outputs) {
998-
TmpMemMgr::Get()->Init(ctx.requested[0]);
998+
TmpMemMgr::Get()->Init(ctx.requested[1]);
999999
// In the `autograd.record()` context, RNNOp is required to run into
10001000
// forward_training mode.
10011001
const bool is_training = (ctx.is_train || ctx.need_grad);
@@ -1132,7 +1132,7 @@ void MKLDNNRnnOp::Backward(const OpContext& ctx,
11321132
const std::vector<OpReqType>& req,
11331133
const std::vector<NDArray>& outputs) {
11341134
using tag = mkldnn::memory::format_tag;
1135-
TmpMemMgr::Get()->Init(ctx.requested[0]);
1135+
TmpMemMgr::Get()->Init(ctx.requested[1]);
11361136
const RNNParam& default_param = full_param_.default_param;
11371137
const int data_dtype = inputs[rnn_enum::kData].dtype();
11381138
const int w_dtype = inputs[rnn_enum::kParams].dtype();

src/operator/rnn-inl.h

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <mxnet/operator.h>
3232
#include <mxnet/storage.h>
3333
#include <algorithm>
34+
#include <random>
3435
#include <map>
3536
#include <vector>
3637
#include <string>
@@ -293,23 +294,24 @@ void RNNForwardTraining(DType* ws,
293294
DType* hy_ptr,
294295
DType* cy_ptr,
295296
const float dropout,
296-
int mode) {
297+
int mode,
298+
std::mt19937 &rnd_engine) { // NOLINT(runtime/references)
297299
switch (mode) {
298300
case rnn_enum::kLstm:
299301
LstmForwardTraining<DType>(ws, rs, state_outputs, num_layers, direction, seq_length,
300302
batch_size, input_size, state_size, x_ptr, hx_ptr, cx_ptr,
301-
w_ptr, b_ptr, y_ptr, hy_ptr, cy_ptr, dropout);
303+
w_ptr, b_ptr, y_ptr, hy_ptr, cy_ptr, dropout, rnd_engine);
302304
break;
303305
case rnn_enum::kGru:
304306
GruForwardTraining<DType>(ws, rs, state_outputs, num_layers, direction, seq_length,
305307
batch_size, input_size, state_size, x_ptr, hx_ptr,
306-
w_ptr, y_ptr, hy_ptr, dropout);
308+
w_ptr, y_ptr, hy_ptr, dropout, rnd_engine);
307309
break;
308310
case rnn_enum::kRnnTanh:
309311
case rnn_enum::kRnnRelu:
310312
VanillaRNNForwardTraining<DType>(ws, rs, state_outputs, num_layers, direction, seq_length,
311313
batch_size, input_size, state_size, x_ptr, hx_ptr,
312-
w_ptr, y_ptr, hy_ptr, dropout, mode);
314+
w_ptr, y_ptr, hy_ptr, dropout, mode, rnd_engine);
313315
break;
314316
default:
315317
LOG(FATAL) << "unknown RNN mode " << mode;
@@ -842,7 +844,8 @@ class RNNOp {
842844
}
843845
#endif // MXNET_USE_CUDNN == 1 && defined(__CUDACC__)
844846

845-
if (ctx_.dev_type == kCPU) {
847+
#if !defined(__CUDACC__) // cuda doesn't support C++17
848+
if constexpr (std::is_same<xpu, cpu>::value) {
846849
int projection_size = 0;
847850
if (param_.projection_size.has_value()) {
848851
projection_size = param_.projection_size.value();
@@ -860,6 +863,9 @@ class RNNOp {
860863
DType* work_cpu_space = static_cast<DType*>(temp_cpu_space_.data().dptr_);
861864

862865
if (ctx.is_train || ctx.need_grad) {
866+
mshadow::Random<cpu, unsigned> *prnd = ctx.requested[0].get_random<xpu, unsigned int>(s);
867+
std::mt19937 &rnd_engine = prnd->GetRndEngine();
868+
863869
// allocate reserve space
864870
if (param_.projection_size.has_value()) {
865871
LOG(FATAL) << "No training support for LSTM with projection on CPU currently.";
@@ -894,7 +900,8 @@ class RNNOp {
894900
hy_ptr,
895901
cy_ptr,
896902
param_.p,
897-
param_.mode);
903+
param_.mode,
904+
rnd_engine);
898905
} else {
899906
RNNForwardInference<DType>(work_cpu_space,
900907
param_.state_outputs,
@@ -916,6 +923,7 @@ class RNNOp {
916923
param_.mode);
917924
}
918925
}
926+
#endif
919927
}
920928

921929
void Backward(const OpContext &ctx,

src/operator/rnn.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ static std::vector<ResourceRequest> RNNResourceEx(const NodeAttrs& attrs, const
184184
}
185185
#endif
186186
} else {
187+
request.emplace_back(ResourceRequest::kRandom);
187188
#if MXNET_USE_MKLDNN == 1
188189
request.emplace_back(ResourceRequest::kTempSpace);
189190
#endif

0 commit comments

Comments
 (0)