@@ -198,7 +198,8 @@ def __init__(self, features, top_features, classes, box_features=None,
198
198
self ._batch_size = per_device_batch_size
199
199
self ._num_sample = num_sample
200
200
self ._rpn_test_post_nms = rpn_test_post_nms
201
- self ._target_generator = RCNNTargetGenerator (self .num_class )
201
+ self ._target_generator = RCNNTargetGenerator (self .num_class , int (num_sample * pos_ratio ),
202
+ self ._batch_size )
202
203
self ._additional_output = additional_output
203
204
with self .name_scope ():
204
205
self .rpn = RPN (
@@ -207,7 +208,7 @@ def __init__(self, features, top_features, classes, box_features=None,
207
208
clip = clip , nms_thresh = rpn_nms_thresh , train_pre_nms = rpn_train_pre_nms ,
208
209
train_post_nms = rpn_train_post_nms , test_pre_nms = rpn_test_pre_nms ,
209
210
test_post_nms = rpn_test_post_nms , min_size = rpn_min_size ,
210
- multi_level = self .num_stages > 1 )
211
+ multi_level = self .num_stages > 1 , per_level_nms = False )
211
212
self .sampler = RCNNTargetSampler (num_image = self ._batch_size ,
212
213
num_proposal = rpn_train_post_nms , num_sample = num_sample ,
213
214
pos_iou_thresh = pos_iou_thresh , pos_ratio = pos_ratio ,
@@ -252,7 +253,8 @@ def reset_class(self, classes, reuse_weights=None):
252
253
253
254
"""
254
255
super (FasterRCNN , self ).reset_class (classes , reuse_weights )
255
- self ._target_generator = RCNNTargetGenerator (self .num_class )
256
+ self ._target_generator = RCNNTargetGenerator (self .num_class , self .sampler ._max_pos ,
257
+ self ._batch_size )
256
258
257
259
def _pyramid_roi_feats (self , F , features , rpn_rois , roi_size , strides , roi_mode = 'align' ,
258
260
roi_canonical_scale = 224.0 , eps = 1e-6 ):
@@ -292,16 +294,25 @@ def _pyramid_roi_feats(self, F, features, rpn_rois, roi_size, strides, roi_mode=
292
294
# rpn_rois = F.take(rpn_rois, roi_level_sorted_args, axis=0)
293
295
pooled_roi_feats = []
294
296
for i , l in enumerate (range (self ._min_stage , max_stage + 1 )):
295
- # Pool features with all rois first, and then set invalid pooled features to zero,
296
- # at last ele-wise add together to aggregate all features.
297
297
if roi_mode == 'pool' :
298
+ # Pool features with all rois first, and then set invalid pooled features to zero,
299
+ # at last ele-wise add together to aggregate all features.
298
300
pooled_feature = F .ROIPooling (features [i ], rpn_rois , roi_size , 1. / strides [i ])
301
+ pooled_feature = F .where (roi_level == l , pooled_feature ,
302
+ F .zeros_like (pooled_feature ))
299
303
elif roi_mode == 'align' :
300
- pooled_feature = F .contrib .ROIAlign (features [i ], rpn_rois , roi_size ,
301
- 1. / strides [i ], sample_ratio = 2 )
304
+ if 'box_encode' in F .contrib .__dict__ and 'box_decode' in F .contrib .__dict__ :
305
+ # TODO(jerryzcn): clean this up for once mx 1.6 is released.
306
+ masked_rpn_rois = F .where (roi_level == l , rpn_rois , F .ones_like (rpn_rois ) * - 1. )
307
+ pooled_feature = F .contrib .ROIAlign (features [i ], masked_rpn_rois , roi_size ,
308
+ 1. / strides [i ], sample_ratio = 2 )
309
+ else :
310
+ pooled_feature = F .contrib .ROIAlign (features [i ], rpn_rois , roi_size ,
311
+ 1. / strides [i ], sample_ratio = 2 )
312
+ pooled_feature = F .where (roi_level == l , pooled_feature ,
313
+ F .zeros_like (pooled_feature ))
302
314
else :
303
315
raise ValueError ("Invalid roi mode: {}" .format (roi_mode ))
304
- pooled_feature = F .where (roi_level == l , pooled_feature , F .zeros_like (pooled_feature ))
305
316
pooled_roi_feats .append (pooled_feature )
306
317
# Ele-wise add to aggregate all pooled features
307
318
pooled_roi_feats = F .ElementWiseSum (* pooled_roi_feats )
@@ -312,7 +323,7 @@ def _pyramid_roi_feats(self, F, features, rpn_rois, roi_size, strides, roi_mode=
312
323
return pooled_roi_feats
313
324
314
325
# pylint: disable=arguments-differ
315
- def hybrid_forward (self , F , x , gt_box = None ):
326
+ def hybrid_forward (self , F , x , gt_box = None , gt_label = None ):
316
327
"""Forward Faster-RCNN network.
317
328
318
329
The behavior during training and inference is different.
@@ -322,7 +333,9 @@ def hybrid_forward(self, F, x, gt_box=None):
322
333
x : mxnet.nd.NDArray or mxnet.symbol
323
334
The network input tensor.
324
335
gt_box : type, only required during training
325
- The ground-truth bbox tensor with shape (1, N, 4).
336
+ The ground-truth bbox tensor with shape (B, N, 4).
337
+ gt_label : type, only required during training
338
+ The ground-truth label tensor with shape (B, 1, 4).
326
339
327
340
Returns
328
341
-------
@@ -385,20 +398,29 @@ def _split(x, axis, num_outputs, squeeze_axis):
385
398
else :
386
399
box_feat = self .box_features (top_feat )
387
400
cls_pred = self .class_predictor (box_feat )
388
- box_pred = self .box_predictor (box_feat )
389
401
# cls_pred (B * N, C) -> (B, N, C)
390
402
cls_pred = cls_pred .reshape ((batch_size , num_roi , self .num_class + 1 ))
391
- # box_pred (B * N, C * 4) -> (B, N, C, 4)
392
- box_pred = box_pred .reshape ((batch_size , num_roi , self .num_class , 4 ))
393
403
394
404
# no need to convert bounding boxes in training, just return
395
405
if autograd .is_training ():
406
+ cls_targets , box_targets , box_masks , indices = \
407
+ self ._target_generator (rpn_box , samples , matches , gt_label , gt_box )
408
+ box_feat = F .reshape (box_feat .expand_dims (0 ), (batch_size , - 1 , 0 ))
409
+ box_pred = self .box_predictor (F .concat (
410
+ * [F .take (F .slice_axis (box_feat , axis = 0 , begin = i , end = i + 1 ).squeeze (),
411
+ F .slice_axis (indices , axis = 0 , begin = i , end = i + 1 ).squeeze ())
412
+ for i in range (batch_size )], dim = 0 ))
413
+ # box_pred (B * N, C * 4) -> (B, N, C, 4)
414
+ box_pred = box_pred .reshape ((batch_size , - 1 , self .num_class , 4 ))
396
415
if self ._additional_output :
397
- return (cls_pred , box_pred , rpn_box , samples , matches ,
398
- raw_rpn_score , raw_rpn_box , anchors , top_feat )
399
- return (cls_pred , box_pred , rpn_box , samples , matches ,
400
- raw_rpn_score , raw_rpn_box , anchors )
416
+ return (cls_pred , box_pred , rpn_box , samples , matches , raw_rpn_score , raw_rpn_box ,
417
+ anchors , cls_targets , box_targets , box_masks , top_feat , indices )
418
+ return (cls_pred , box_pred , rpn_box , samples , matches , raw_rpn_score , raw_rpn_box ,
419
+ anchors , cls_targets , box_targets , box_masks , indices )
401
420
421
+ box_pred = self .box_predictor (box_feat )
422
+ # box_pred (B * N, C * 4) -> (B, N, C, 4)
423
+ box_pred = box_pred .reshape ((batch_size , num_roi , self .num_class , 4 ))
402
424
# cls_ids (B, N, C), scores (B, N, C)
403
425
cls_ids , scores = self .cls_decoder (F .softmax (cls_pred , axis = - 1 ))
404
426
# cls_ids, scores (B, N, C) -> (B, C, N) -> (B, C, N, 1)
@@ -419,7 +441,7 @@ def _split(x, axis, num_outputs, squeeze_axis):
419
441
results = []
420
442
for rpn_box , cls_id , score , box_pred in zip (rpn_boxes , cls_ids , scores , box_preds ):
421
443
# box_pred (C, N, 4) rpn_box (1, N, 4) -> bbox (C, N, 4)
422
- bbox = self .box_decoder (box_pred , self . box_to_center ( rpn_box ) )
444
+ bbox = self .box_decoder (box_pred , rpn_box )
423
445
# res (C, N, 6)
424
446
res = F .concat (* [cls_id , score , bbox ], dim = - 1 )
425
447
if self .force_nms :
@@ -683,7 +705,7 @@ def faster_rcnn_fpn_bn_resnet50_v1b_coco(pretrained=False, pretrained_base=True,
683
705
top_features = None
684
706
# 1 Conv 1 FC layer before RCNN cls and reg
685
707
box_features = nn .HybridSequential ()
686
- box_features .add (nn .Conv2D (256 , 3 , padding = 1 ),
708
+ box_features .add (nn .Conv2D (256 , 3 , padding = 1 , use_bias = False ),
687
709
SyncBatchNorm (** gluon_norm_kwargs ),
688
710
nn .Activation ('relu' ),
689
711
nn .Dense (1024 , weight_initializer = mx .init .Normal (0.01 )),
0 commit comments