@@ -409,10 +409,11 @@ class RNNOp {
409
409
std::vector<mkldnn::memory> bias_memory;
410
410
std::vector<mkldnn::memory> y_memory;
411
411
std::vector<mkldnn::memory> hcy_memory;
412
+ size_t weights_version;
412
413
bool has_cache;
413
414
bool init_mem_;
414
415
size_t reserve_mem_size_;
415
- Storage::Handle mem_space_;
416
+ NDArray mem_space_;
416
417
#endif
417
418
explicit RNNOp (RNNParam param, Context ctx) {
418
419
this ->param_ = param;
@@ -522,12 +523,6 @@ class RNNOp {
522
523
}
523
524
524
525
~RNNOp () {
525
- #if MXNET_USE_MKLDNN == 1
526
- if (init_mem_) {
527
- Storage::Get ()->Free (mem_space_);
528
- init_mem_ = false ;
529
- }
530
- #endif // MXNET_USE_MKLDNN
531
526
#if MXNET_USE_CUDNN == 1
532
527
CUDNN_CALL (cudnnDestroyTensorDescriptor (hx_desc_));
533
528
CUDNN_CALL (cudnnDestroyTensorDescriptor (cx_desc_));
@@ -560,17 +555,6 @@ class RNNOp {
560
555
CUDNN_CALL (cudnnDestroyRNNDataDescriptor (dy_data_desc_));
561
556
#endif // MXNET_USE_CUDNN_GE_7200
562
557
#endif // MXNET_USE_CUDNN
563
-
564
- if (ctx_.dev_type == kCPU ) {
565
- if (init_space_) {
566
- Storage::Get ()->Free (reserve_cpu_space_);
567
- init_space_ = false ;
568
- }
569
- if (temp_init_space_) {
570
- Storage::Get ()->Free (temp_cpu_space_);
571
- temp_init_space_ = false ;
572
- }
573
- }
574
558
}
575
559
576
560
void Forward (const OpContext &ctx, const std::vector<TBlob> &in_data,
@@ -855,37 +839,30 @@ class RNNOp {
855
839
#endif // MXNET_USE_CUDNN == 1 && defined(__CUDACC__)
856
840
857
841
if (ctx_.dev_type == kCPU ) {
842
+ // allocate temp space
843
+ const size_t work_cpu_space_size = GetRNNWorkspaceSize (param_.seq_length_ , param_.batch_size_ ,
844
+ param_.state_size , direction, param_.mode );
845
+ if (!temp_init_space_ || temp_cpu_space_size_ < work_cpu_space_size) {
846
+ temp_cpu_space_size_ = work_cpu_space_size;
847
+ temp_cpu_space_ = NDArray (TShape ({static_cast <dim_t >(temp_cpu_space_size_)}), ctx_,
848
+ false , in_data[rnn_enum::kData ].type_flag_ );
849
+ temp_init_space_ = true ;
850
+ }
851
+ DType* work_cpu_space = static_cast <DType*>(temp_cpu_space_.data ().dptr_ );
852
+
858
853
if (ctx.is_train ) {
859
- // allocate temp space
860
- const size_t work_cpu_space_size =
861
- GetRNNWorkspaceSize (param_.seq_length_ , param_.batch_size_ ,
862
- param_.state_size , direction, param_.mode );
863
- if (temp_init_space_ && temp_cpu_space_size_ < work_cpu_space_size) {
864
- Storage::Get ()->Free (temp_cpu_space_);
865
- temp_init_space_ = false ;
866
- }
867
- if (!temp_init_space_) {
868
- temp_cpu_space_ = Storage::Get ()->Alloc
869
- (work_cpu_space_size * sizeof (DType), Context::CPU ());
870
- temp_cpu_space_size_ = work_cpu_space_size;
871
- temp_init_space_ = true ;
872
- }
873
- DType* work_cpu_space = static_cast <DType*>(temp_cpu_space_.dptr );
854
+ // allocate reserve space
874
855
875
856
const size_t r_size = GetRNNReserveSpaceSize (param_.num_layers , direction,
876
857
param_.seq_length_ , param_.batch_size_ ,
877
858
param_.state_size , param_.mode );
878
- if (init_space_ && reserve_cpu_space_size_ < r_size) {
879
- Storage::Get ()->Free (reserve_cpu_space_);
880
- init_space_ = false ;
881
- }
882
- if (!init_space_) {
883
- reserve_cpu_space_ = Storage::Get ()->Alloc (r_size * sizeof (DType), Context::CPU ());
859
+ if (!init_space_ || reserve_cpu_space_size_ < r_size) {
884
860
reserve_cpu_space_size_ = r_size;
861
+ reserve_cpu_space_ = NDArray (TShape ({static_cast <dim_t >(reserve_cpu_space_size_)}), ctx_,
862
+ false , in_data[rnn_enum::kData ].type_flag_ );
885
863
init_space_ = true ;
886
864
}
887
-
888
- DType* reserve_space_ptr = static_cast <DType*>(reserve_cpu_space_.dptr );
865
+ DType* reserve_space_ptr = static_cast <DType*>(reserve_cpu_space_.data ().dptr_ );
889
866
890
867
RNNForwardTraining<DType>(work_cpu_space,
891
868
reserve_space_ptr,
@@ -945,20 +922,6 @@ class RNNOp {
945
922
#endif // MXNET_USE_MKLDNN == 1
946
923
// Before integrating MKLDNN GRU fp32 inference
947
924
// using below code for keep func being OK
948
- const size_t work_cpu_space_size =
949
- GetRNNWorkspaceSize (param_.seq_length_ , param_.batch_size_ ,
950
- param_.state_size , direction, param_.mode );
951
- if (temp_init_space_ && temp_cpu_space_size_ < work_cpu_space_size) {
952
- Storage::Get ()->Free (temp_cpu_space_);
953
- temp_init_space_ = false ;
954
- }
955
- if (!temp_init_space_) {
956
- temp_cpu_space_ = Storage::Get ()->Alloc
957
- (work_cpu_space_size * sizeof (DType), Context::CPU ());
958
- temp_cpu_space_size_ = work_cpu_space_size;
959
- temp_init_space_ = true ;
960
- }
961
- DType* work_cpu_space = static_cast <DType*>(temp_cpu_space_.dptr );
962
925
RNNForwardInference<DType>(work_cpu_space,
963
926
param_.state_outputs ,
964
927
param_.num_layers ,
@@ -1171,7 +1134,7 @@ class RNNOp {
1171
1134
if (!temp_init_space_ || temp_cpu_space_size_ != work_cpu_space_size) {
1172
1135
LOG (FATAL) << " Check temp init error" ;
1173
1136
}
1174
- DType* work_cpu_space = static_cast <DType*>(temp_cpu_space_.dptr );
1137
+ DType* work_cpu_space = static_cast <DType*>(temp_cpu_space_.data (). dptr_ );
1175
1138
size_t r_size = GetRNNReserveSpaceSize (param_.num_layers , direction,
1176
1139
param_.seq_length_ , param_.batch_size_ ,
1177
1140
param_.state_size , param_.mode );
@@ -1180,7 +1143,7 @@ class RNNOp {
1180
1143
LOG (FATAL) << " Check forward init error" ;
1181
1144
}
1182
1145
1183
- DType* reserve_space_ptr = static_cast <DType*>(reserve_cpu_space_.dptr );
1146
+ DType* reserve_space_ptr = static_cast <DType*>(reserve_cpu_space_.data (). dptr_ );
1184
1147
RNNBackward<DType>(work_cpu_space,
1185
1148
reserve_space_ptr,
1186
1149
param_.num_layers ,
@@ -1551,7 +1514,7 @@ class RNNOp {
1551
1514
#endif // MXNET_USE_CUDNN
1552
1515
bool init_space_, temp_init_space_;
1553
1516
size_t reserve_cpu_space_size_, temp_cpu_space_size_;
1554
- Storage::Handle reserve_cpu_space_, temp_cpu_space_;
1517
+ NDArray reserve_cpu_space_, temp_cpu_space_;
1555
1518
}; // class RNNOp
1556
1519
1557
1520
static OpStatePtr CreateRNNState (const nnvm::NodeAttrs &attrs,
0 commit comments