@@ -7,10 +7,10 @@ namespace anakin {
7
7
8
8
template <typename Ttype, DataType Dtype, Precision Ptype, OpRunType RunType>
9
9
Net<Ttype, Dtype, Ptype, RunType>::~Net () {
10
- if (_graph_p) {
11
- delete _graph_p;
12
- _graph_p = nullptr ;
13
- }
10
+ if (_graph_p) {
11
+ delete _graph_p;
12
+ _graph_p = nullptr ;
13
+ }
14
14
}
15
15
16
16
template <typename Ttype, DataType Dtype>
@@ -24,7 +24,7 @@ double tensor_average(Tensor4dPtr<Ttype, Dtype>& out_tensor_p) {
24
24
tensorptr.h_tensor ().copy_from (*out_tensor_p);
25
25
hptr = tensorptr.h_tensor ().data ();
26
26
for (int i=0 ; i<out_tensor_p->valid_size (); i++) {
27
- sum += hptr[i];
27
+ sum += hptr[i];
28
28
}
29
29
return sum/out_tensor_p->valid_size ();
30
30
}
@@ -138,8 +138,8 @@ void Net<Ttype, Dtype, Ptype, RunType>::init(graph::Graph<Ttype, Dtype, Ptype>&
138
138
init_env (graph);
139
139
// shallow copy
140
140
_graph_p->CopyFrom (graph);
141
-
142
- double curr_mem_in_mb_start = MemoryInfo<Ttype>::Global ().get_used_mem_in_mb ();
141
+
142
+ double curr_mem_in_mb_start = MemoryInfo<Ttype>::Global ().get_used_mem_in_mb ();
143
143
144
144
auto node_names_in_exec_order = graph.get_nodes_in_order ();
145
145
// infer basic shape and parsing parameter from graph
@@ -190,18 +190,24 @@ void Net<Ttype, Dtype, Ptype, RunType>::init(graph::Graph<Ttype, Dtype, Ptype>&
190
190
if (node_ptr->get_op_name () == " ConvBatchnormScale" ||
191
191
node_ptr->get_op_name () == " ConvBatchnormScaleRelu" || node_ptr->get_op_name () == " ConvRelu" ||
192
192
node_ptr->get_op_name () == " Convolution" ) {
193
- std::string group = " group" ;
193
+ std::string group = " group" ;
194
194
auto group_val = node_ptr->template get_attr <int >(group);
195
+ std::string dilation = " dilation_rate" ;
196
+ auto dilation_rate_val = node_ptr->template get_attr <PTuple<int > >(dilation);
195
197
using pblock_type = PBlock<typename DataTypeWarpper<Dtype>::type, Ttype>;
196
198
std::string weight_name = " weight_1" ;
197
199
auto weights = node_ptr->template get_attr <pblock_type>(weight_name);
198
- // int c = weights.d_tensor().channel();
199
-
200
- if ((group_val == 1 )) {
201
- node_ptr->set_op (OpFactory<Ttype, Dtype, Ptype>::Global ()[" Sass" + node_ptr->get_op_name ()]);
202
- node_ptr->get_op_name () = " Sass" + node_ptr->get_op_name ();
203
- } else {
204
- LOG (ERROR) << " node_ptr->get_op_name() sass not support yet." ;
200
+
201
+ int k_w = weights.d_tensor ().width ();
202
+ int k_h = weights.d_tensor ().height ();
203
+ int dil_h = dilation_rate_val.vector ()[0 ];
204
+ int dil_w = dilation_rate_val.vector ()[1 ];
205
+
206
+ if ((group_val == 1 ) && (k_w == 3 && k_h == 3 && dil_h == 1 && dil_w == 1 )) {
207
+ node_ptr->set_op (OpFactory<Ttype, Dtype, Ptype>::Global ()[" Sass" +node_ptr->get_op_name ()]);
208
+ node_ptr->get_op_name () = " Sass" + node_ptr->get_op_name ();
209
+ } else {
210
+ LOG (ERROR) << " node_ptr->get_op_name() sass not support yet." ;
205
211
auto *op_pointer = OpFactory<Ttype, Dtype, Ptype>::Global ()[node_ptr->get_op_name ()];
206
212
node_ptr->set_op (op_pointer);
207
213
}
@@ -285,16 +291,16 @@ void Net<Ttype, Dtype, Ptype, RunType>::init(graph::Graph<Ttype, Dtype, Ptype>&
285
291
#endif
286
292
}
287
293
288
- double curr_mem_in_mb_end = MemoryInfo<Ttype>::Global ().get_used_mem_in_mb ();
289
- this ->_graph_p ->statistics .template set_info <graph::SYSTEM_MEM>(curr_mem_in_mb_end - curr_mem_in_mb_start);
294
+ double curr_mem_in_mb_end = MemoryInfo<Ttype>::Global ().get_used_mem_in_mb ();
295
+ this ->_graph_p ->statistics .template set_info <graph::SYSTEM_MEM>(curr_mem_in_mb_end - curr_mem_in_mb_start);
290
296
// init memory of _graph_p
291
297
init_memory ();
292
-
293
- graph.statistics = _graph_p->statistics ; // copy statistic back
294
- LOG (INFO) << " Temp mem used: " << this ->_graph_p ->statistics .template get_info <graph::TEMP_MEM>() << " MB" ;
295
- LOG (INFO) << " Original mem used: " << this ->_graph_p ->statistics .template get_info <graph::ORI_TEMP_MEM>() << " MB" ;
296
- LOG (INFO) << " Model mem used: " << this ->_graph_p ->statistics .template get_info <graph::MODEL_MEM>() << " MB" ;
297
- LOG (INFO) << " System mem used: " << this ->_graph_p ->statistics .template get_info <graph::SYSTEM_MEM>() << " MB" ;
298
+
299
+ graph.statistics = _graph_p->statistics ; // copy statistic back
300
+ LOG (INFO) << " Temp mem used: " << this ->_graph_p ->statistics .template get_info <graph::TEMP_MEM>() << " MB" ;
301
+ LOG (INFO) << " Original mem used: " << this ->_graph_p ->statistics .template get_info <graph::ORI_TEMP_MEM>() << " MB" ;
302
+ LOG (INFO) << " Model mem used: " << this ->_graph_p ->statistics .template get_info <graph::MODEL_MEM>() << " MB" ;
303
+ LOG (INFO) << " System mem used: " << this ->_graph_p ->statistics .template get_info <graph::SYSTEM_MEM>() << " MB" ;
298
304
#ifdef ENABLE_OP_TIMER
299
305
_op_time = std::vector<float >(_exec_funcs.size (), 0 .0f );
300
306
#endif
@@ -312,11 +318,11 @@ void Net<Ttype, Dtype, Ptype, RunType>::init(graph::Graph<Ttype, Dtype, Ptype>&
312
318
LOG (WARNING) << " Inspect memory of " << executer.name << " (" << executer.op_name << " ) " ;
313
319
executer.infer_shape ();
314
320
315
- for (auto out : executer.outs ) {
316
- LOG (INFO) << " |-- out tensor avg " << tensor_average (out);
317
- }
321
+ for (auto out : executer.outs ) {
322
+ LOG (INFO) << " |-- out tensor avg " << tensor_average (out);
323
+ }
318
324
#ifdef USE_CUDA
319
- CUDA_CHECK (cudaDeviceSynchronize ());
325
+ CUDA_CHECK (cudaDeviceSynchronize ());
320
326
CUDA_CHECK (cudaPeekAtLastError ());
321
327
#endif
322
328
}
@@ -344,15 +350,15 @@ void Net<Ttype, Dtype, Ptype, RunType>::prediction() {
344
350
<< " " << in->valid_shape ()[1 ]
345
351
<< " " << in->valid_shape ()[2 ]
346
352
<< " " << in->valid_shape ()[3 ]
347
- << " valid_size: " << in->valid_size ()
348
- << " realsize: " << in->size ()
353
+ << " valid_size: " << in->valid_size ()
354
+ << " realsize: " << in->size ()
349
355
<< " offset_size " <<in->get_seq_offset ().size ();
350
356
}
351
357
#endif
352
358
#ifdef ENABLE_OP_TIMER
353
- Context<Ttype> ctx (0 , 0 , 0 );
354
- saber::SaberTimer<Ttype> my_time;
355
- my_time.start (ctx);
359
+ Context<Ttype> ctx (0 , 0 , 0 );
360
+ saber::SaberTimer<Ttype> my_time;
361
+ my_time.start (ctx);
356
362
#endif
357
363
if (executer.op_name != " Input" ) {
358
364
executer.infer_shape ();
@@ -368,35 +374,35 @@ void Net<Ttype, Dtype, Ptype, RunType>::prediction() {
368
374
executer.outs [i]->record_event (executer.ctx_p ->get_compute_stream ());
369
375
executer.outs [i]->sync ();
370
376
}
371
- my_time.end (ctx);
377
+ my_time.end (ctx);
372
378
_op_time[op_id++] += my_time.get_average_ms ();
373
379
#endif
374
- // LOG(INFO)<< "op: " << executer.name<<"(" << executer.op_name <<") === infer+launch time "<<my_time.get_average_ms() << " ms";
380
+ // LOG(INFO)<< "op: " << executer.name<<"(" << executer.op_name <<") === infer+launch time "<<my_time.get_average_ms() << " ms";
375
381
#ifdef ENABLE_DEBUG
376
382
#ifdef USE_CUDA
377
383
CUDA_CHECK (cudaDeviceSynchronize ());
378
384
CUDA_CHECK (cudaPeekAtLastError ());
379
385
#endif
380
- for (auto out : executer.outs ) {
381
- std::vector<int > offset=out->get_seq_offset ();
382
- LOG (INFO)<<" print offset of " <<executer.name <<" ,size = " <<offset.size ();
383
- for (int i=0 ;i<offset.size ();++i){
384
- LOG (INFO)<<offset[i]<<" ," ;
385
- }
386
- LOG (INFO)<<" end print offset of " <<executer.name ;
386
+ for (auto out : executer.outs ) {
387
+ std::vector<int > offset=out->get_seq_offset ();
388
+ LOG (INFO)<<" print offset of " <<executer.name <<" ,size = " <<offset.size ();
389
+ for (int i=0 ;i<offset.size ();++i){
390
+ LOG (INFO)<<offset[i]<<" ," ;
391
+ }
392
+ LOG (INFO)<<" end print offset of " <<executer.name ;
387
393
#define RECORD_INNER
388
394
#if defined(RECORD_INNER) && defined(USE_X86_PLACE)
389
- record_tensor_to_file (*out,(" record_" +executer.name ).c_str ());
390
- if (executer.name ==" " )
395
+ record_tensor_to_file (*out,(" record_" +executer.name ).c_str ());
396
+ if (executer.name ==" " )
391
397
#endif
392
398
LOG (INFO) <<executer.name <<" d_tensor_out_p :" <<out->data ();
393
399
#ifdef USE_X86_PLACE
394
400
// for (int i = 0; i < 10; ++i) {
395
401
// std::cout << out->data()[i]<<" ";
396
402
// }
397
403
#endif
398
- LOG (ERROR) << " |---out avg " << tensor_average (out);
399
- }
404
+ LOG (ERROR) << " |---out avg " << tensor_average (out);
405
+ }
400
406
401
407
#ifdef USE_ARM_PLACE
402
408
int idx = 0 ;
@@ -468,15 +474,15 @@ void Net<Ttype, Dtype, Ptype, RunType>::prediction() {
468
474
469
475
template <typename Ttype, DataType Dtype, Precision Ptype, OpRunType RunType>
470
476
void Net<Ttype, Dtype, Ptype, RunType>::execute_stop_at_node(std::string node_name) {
471
- if (_suspended_point==-1 ) {
472
- for (int i=0 ; i<_exec_funcs.size (); i++) {
473
- if (_exec_funcs[i].name == node_name) {
474
- _suspended_point = i;
475
- }
476
- }
477
- }
478
- for (int i=0 ; i<_suspended_point; i++) {
479
- auto & executer = _exec_funcs[i];
477
+ if (_suspended_point==-1 ) {
478
+ for (int i=0 ; i<_exec_funcs.size (); i++) {
479
+ if (_exec_funcs[i].name == node_name) {
480
+ _suspended_point = i;
481
+ }
482
+ }
483
+ }
484
+ for (int i=0 ; i<_suspended_point; i++) {
485
+ auto & executer = _exec_funcs[i];
480
486
if (RunType == OpRunType::SYNC || executer.need_sync ) {
481
487
for (int i = 0 ; i < executer.ins .size (); i++) {
482
488
// record
@@ -491,37 +497,37 @@ void Net<Ttype, Dtype, Ptype, RunType>::execute_stop_at_node(std::string node_na
491
497
<< " " << in->valid_shape ()[1 ]
492
498
<< " " << in->valid_shape ()[2 ]
493
499
<< " " << in->valid_shape ()[3 ]
494
- << " valid_size: " << in->valid_size ()
495
- << " realsize: " << in->size ()
496
- << " offset_size " <<in->get_seq_offset ().size ();
500
+ << " valid_size: " << in->valid_size ()
501
+ << " realsize: " << in->size ()
502
+ << " offset_size " <<in->get_seq_offset ().size ();
503
+ }
504
+ for (auto out : executer.outs ) {
505
+ LOG (INFO) << " |-- out tensor avg " << tensor_average (out);
497
506
}
498
- for (auto out : executer.outs ) {
499
- LOG (INFO) << " |-- out tensor avg " << tensor_average (out);
500
- }
501
507
502
508
#endif
503
- if (executer.op_name != " Input" ) {
504
- executer.infer_shape ();
505
- executer.launch ();
506
- }
509
+ if (executer.op_name != " Input" ) {
510
+ executer.infer_shape ();
511
+ executer.launch ();
512
+ }
507
513
508
- for (int i = 0 ; i < executer.outs .size (); i++) {
509
- executer.outs [i]->record_event (executer.ctx_p ->get_compute_stream ());
510
- }
511
- }
514
+ for (int i = 0 ; i < executer.outs .size (); i++) {
515
+ executer.outs [i]->record_event (executer.ctx_p ->get_compute_stream ());
516
+ }
517
+ }
512
518
}
513
519
514
520
template <typename Ttype, DataType Dtype, Precision Ptype, OpRunType RunType>
515
521
void Net<Ttype, Dtype, Ptype, RunType>::execute_start_from_node(std::string node_name) {
516
- if (_start_point == -1 ) {
517
- for (int i=0 ; i<_exec_funcs.size (); i++) {
518
- if (_exec_funcs[i].name == node_name) {
519
- _start_point = i;
520
- }
521
- }
522
- }
523
- for (int i=_start_point; i<_exec_funcs.size (); i++) {
524
- auto & executer = _exec_funcs[i];
522
+ if (_start_point == -1 ) {
523
+ for (int i=0 ; i<_exec_funcs.size (); i++) {
524
+ if (_exec_funcs[i].name == node_name) {
525
+ _start_point = i;
526
+ }
527
+ }
528
+ }
529
+ for (int i=_start_point; i<_exec_funcs.size (); i++) {
530
+ auto & executer = _exec_funcs[i];
525
531
if (RunType == OpRunType::SYNC || executer.need_sync ) {
526
532
for (int i = 0 ; i < executer.ins .size (); i++) {
527
533
// record
@@ -536,24 +542,24 @@ void Net<Ttype, Dtype, Ptype, RunType>::execute_start_from_node(std::string node
536
542
<< " " << in->valid_shape ()[1 ]
537
543
<< " " << in->valid_shape ()[2 ]
538
544
<< " " << in->valid_shape ()[3 ]
539
- << " valid_size: " << in->valid_size ()
540
- << " realsize: " << in->size ()
541
- << " offset_size " <<in->get_seq_offset ().size ();
545
+ << " valid_size: " << in->valid_size ()
546
+ << " realsize: " << in->size ()
547
+ << " offset_size " <<in->get_seq_offset ().size ();
548
+ }
549
+ for (auto out : executer.outs ) {
550
+ LOG (INFO) << " |-- out tensor avg " << tensor_average (out);
542
551
}
543
- for (auto out : executer.outs ) {
544
- LOG (INFO) << " |-- out tensor avg " << tensor_average (out);
545
- }
546
552
547
553
#endif
548
- if (executer.op_name != " Input" ) {
549
- executer.infer_shape ();
550
- executer.launch ();
551
- }
554
+ if (executer.op_name != " Input" ) {
555
+ executer.infer_shape ();
556
+ executer.launch ();
557
+ }
552
558
553
- for (int i = 0 ; i < executer.outs .size (); i++) {
554
- executer.outs [i]->record_event (executer.ctx_p ->get_compute_stream ());
555
- }
556
- }
559
+ for (int i = 0 ; i < executer.outs .size (); i++) {
560
+ executer.outs [i]->record_event (executer.ctx_p ->get_compute_stream ());
561
+ }
562
+ }
557
563
}
558
564
559
565
template <typename Ttype, DataType Dtype, Precision Ptype, OpRunType RunType>
@@ -607,27 +613,27 @@ Status Net<Ttype, Dtype, Ptype, RunType>::init_memory() {
607
613
auto share_memory = [this ](graph::Edge<Ttype, Dtype>& edge) {
608
614
if (edge.shared ()) {
609
615
auto & edge_name = edge.share_from ();
610
- bool continue_search = true ;
611
- while (continue_search) {
612
- auto match_edge = [&](graph::Edge<Ttype, Dtype>& inner_edge) {
613
- if (inner_edge.name () == edge_name) {
614
- if (inner_edge.shared ()) {
615
- edge_name = inner_edge.share_from ();
616
- return Status::EXIT (" Continue to find next . " );
617
- }
618
- if (inner_edge.weight ()->size () < edge.weight ()->valid_size ()) {
619
- auto inner_original_shape = inner_edge.weight ()->valid_shape ();
620
- inner_edge.weight ()->re_alloc (edge.weight ()->valid_shape ());
621
- inner_edge.weight ()->set_shape (inner_original_shape, inner_edge.weight ()->shape ());
622
- }
623
- edge.weight ()->share_from (*(inner_edge.weight ()));
624
- continue_search = false ;
625
- return Status::EXIT (" Find the matched target edge. " );
626
- }
627
- return Status::OK ();
628
- };
629
- this ->_graph_p ->Scanner ->BFS_Edge (match_edge);
630
- }
616
+ bool continue_search = true ;
617
+ while (continue_search) {
618
+ auto match_edge = [&](graph::Edge<Ttype, Dtype>& inner_edge) {
619
+ if (inner_edge.name () == edge_name) {
620
+ if (inner_edge.shared ()) {
621
+ edge_name = inner_edge.share_from ();
622
+ return Status::EXIT (" Continue to find next . " );
623
+ }
624
+ if (inner_edge.weight ()->size () < edge.weight ()->valid_size ()) {
625
+ auto inner_original_shape = inner_edge.weight ()->valid_shape ();
626
+ inner_edge.weight ()->re_alloc (edge.weight ()->valid_shape ());
627
+ inner_edge.weight ()->set_shape (inner_original_shape, inner_edge.weight ()->shape ());
628
+ }
629
+ edge.weight ()->share_from (*(inner_edge.weight ()));
630
+ continue_search = false ;
631
+ return Status::EXIT (" Find the matched target edge. " );
632
+ }
633
+ return Status::OK ();
634
+ };
635
+ this ->_graph_p ->Scanner ->BFS_Edge (match_edge);
636
+ }
631
637
}
632
638
};
633
639
_graph_p->Scanner ->BFS_Edge (share_memory);
@@ -644,8 +650,8 @@ Status Net<Ttype, Dtype, Ptype, RunType>::init_memory() {
644
650
};
645
651
this ->_graph_p ->Scanner ->BFS_Edge (analysis_used_of_temp_mem);
646
652
647
- this ->_graph_p ->statistics .template set_info <graph::TEMP_MEM>(temp_mem_in_mbytes / 1e6 );
648
- this ->_graph_p ->statistics .template set_info <graph::ORI_TEMP_MEM>(ori_temp_mem_in_mbytes / 1e6 );
653
+ this ->_graph_p ->statistics .template set_info <graph::TEMP_MEM>(temp_mem_in_mbytes / 1e6 );
654
+ this ->_graph_p ->statistics .template set_info <graph::ORI_TEMP_MEM>(ori_temp_mem_in_mbytes / 1e6 );
649
655
}
650
656
return Status::OK ();
651
657
}
@@ -700,4 +706,3 @@ template class Net<ARM, AK_FLOAT, Precision::INT8, OpRunType::SYNC>;
700
706
#endif // arm
701
707
702
708
} /* namespace anakin */
703
-
0 commit comments