Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.
This repository was archived by the owner on Nov 17, 2023. It is now read-only.

NightlyTestsForBinaries tutorials test broken #15374

Open
@lebeg

Description

@lebeg

The nightly test for tutorials is broken, in particular test_tutorials.test_amp

http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/NightlyTestsForBinaries/detail/master/355/pipeline

ERROR:root:An error occurred while executing the following cell:
------------------
mbox_loss = gcv.loss.SSDMultiBoxLoss()

for epoch in range(1):
    ce_metric.reset()
    smoothl1_metric.reset()
    tic = time.time()
    btic = time.time()

    for i, batch in enumerate(train_data):
        batch_size = batch[0].shape[0]
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        cls_targets = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
        box_targets = gluon.utils.split_and_load(batch[2], ctx_list=ctx, batch_axis=0)
        with autograd.record():
            cls_preds = []
            box_preds = []
            for x in data:
                cls_pred, box_pred, _ = net(x)
                cls_preds.append(cls_pred)
                box_preds.append(box_pred)
            sum_loss, cls_loss, box_loss = mbox_loss(
                cls_preds, box_preds, cls_targets, box_targets)
            autograd.backward(sum_loss)
        trainer.step(1)
        ce_metric.update(0, [l * batch_size for l in cls_loss])
        smoothl1_metric.update(0, [l * batch_size for l in box_loss])
        if not (i + 1) % 50:
            name1, loss1 = ce_metric.get()
            name2, loss2 = smoothl1_metric.get()
            logger.info('[Epoch {}][Batch {}], Speed: {:.3f} samples/sec, {}={:.3f}, {}={:.3f}'.format(
                epoch, i, batch_size/(time.time()-btic), name1, loss1, name2, loss2))
        btic = time.time()
------------------

---------------------------------------------------------------------------
MXNetError                                Traceback (most recent call last)
<ipython-input-6-67de7c0b11cc> in <module>
     20                 box_preds.append(box_pred)
     21             sum_loss, cls_loss, box_loss = mbox_loss(
---> 22                 cls_preds, box_preds, cls_targets, box_targets)
     23             autograd.backward(sum_loss)
     24         trainer.step(1)

/work/mxnet/python/mxnet/gluon/block.py in __call__(self, *args)
    546             hook(self, args)
    547 
--> 548         out = self.forward(*args)
    549 
    550         for hook in self._forward_hooks.values():

/usr/local/lib/python3.5/dist-packages/gluoncv/loss.py in forward(self, cls_pred, box_pred, cls_target, box_target)
    157             rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
    158             hard_negative = rank < nd.maximum(self._min_hard_negatives, pos.sum(axis=1)
--> 159                                               * self._negative_mining_ratio).expand_dims(-1)
    160             # mask out if not positive or negative
    161             cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss))

/work/mxnet/python/mxnet/ndarray/ndarray.py in __lt__(self, other)
    342     def __lt__(self, other):
    343         """x.__lt__(y) <=> x<y <=> mx.nd.lesser(x, y) """
--> 344         return lesser(self, other)
    345 
    346     def __le__(self, other):

/work/mxnet/python/mxnet/ndarray/ndarray.py in lesser(lhs, rhs)
   3506         lambda x, y: 1 if x < y else 0,
   3507         _internal._lesser_scalar,
-> 3508         _internal._greater_scalar)
   3509     # pylint: enable= no-member, protected-access
   3510 

/work/mxnet/python/mxnet/ndarray/ndarray.py in _ufunc_helper(lhs, rhs, fn_array, fn_scalar, lfn_scalar, rfn_scalar)
   2706         return lfn_scalar(lhs, float(rhs))
   2707     elif isinstance(rhs, NDArray):
-> 2708         return fn_array(lhs, rhs)
   2709     else:
   2710         raise TypeError('type %s not supported' % str(type(rhs)))

/work/mxnet/python/mxnet/ndarray/register.py in broadcast_lesser(lhs, rhs, out, name, **kwargs)

/work/mxnet/python/mxnet/_ctypes/ndarray.py in _imperative_invoke(handle, ndargs, keys, vals, out)
     90         c_str_array(keys),
     91         c_str_array([str(s) for s in vals]),
---> 92         ctypes.byref(out_stypes)))
     93 
     94     if original_output is not None:

/work/mxnet/python/mxnet/base.py in check_call(ret)
    251     """
    252     if ret != 0:
--> 253         raise MXNetError(py_str(_LIB.MXGetLastError()))
    254 
    255 

MXNetError: [20:54:33] /work/mxnet/3rdparty/mshadow/../../src/operator/tensor/../elemwise_op_common.h:135: Check failed: assign(&dattr, vec.at(i)): Incompatible attr in node  at 1-th input: expected int32, got float32
Stack trace:
  [bt] (0) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x3c) [0x7f465e8070bc]
  [bt] (1) /work/mxnet/python/mxnet/../../lib/libmxnet.so(bool mxnet::op::ElemwiseAttr<int, &mxnet::op::type_is_none, &mxnet::op::type_assign, true, &mxnet::op::type_string[abi:cxx11], -1, -1>(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*, int const&)::{lambda(std::vector<int, std::allocator<int> > const&, unsigned long, char const*)#1}::operator()(std::vector<int, std::allocator<int> > const&, unsigned long, char const*) const+0x346) [0x7f465e953636]
  [bt] (2) /work/mxnet/python/mxnet/../../lib/libmxnet.so(bool mxnet::op::ElemwiseAttr<int, &mxnet::op::type_is_none, &mxnet::op::type_assign, true, &mxnet::op::type_string[abi:cxx11], -1, -1>(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*, int const&)+0x25d) [0x7f465e954a0d]
  [bt] (3) /work/mxnet/python/mxnet/../../lib/libmxnet.so(bool mxnet::op::ElemwiseType<2, 1>(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*)+0x34f) [0x7f465eb9bc5f]
  [bt] (4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<bool (nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*), bool (*)(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*)>::_M_invoke(std::_Any_data const&, nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*&&, std::vector<int, std::allocator<int> >*&&)+0x1d) [0x7f465e8adfcd]
  [bt] (5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::imperative::SetShapeType(mxnet::Context const&, nnvm::NodeAttrs const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, mxnet::DispatchMode*)+0x22d5) [0x7f4661792ba5]
  [bt] (6) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::Imperative::Invoke(mxnet::Context const&, nnvm::NodeAttrs const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&)+0x110) [0x7f466179e1b0]
  [bt] (7) /work/mxnet/python/mxnet/../../lib/libmxnet.so(MXImperativeInvokeImpl(void*, int, void**, int*, void***, int, char const**, char const**)+0x1c9) [0x7f466228acf9]
  [bt] (8) /work/mxnet/python/mxnet/../../lib/libmxnet.so(MXImperativeInvokeEx+0x8f) [0x7f466228b1ff]

MXNetError: [20:54:33] /work/mxnet/3rdparty/mshadow/../../src/operator/tensor/../elemwise_op_common.h:135: Check failed: assign(&dattr, vec.at(i)): Incompatible attr in node  at 1-th input: expected int32, got float32
Stack trace:
  [bt] (0) /work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x3c) [0x7f465e8070bc]
  [bt] (1) /work/mxnet/python/mxnet/../../lib/libmxnet.so(bool mxnet::op::ElemwiseAttr<int, &mxnet::op::type_is_none, &mxnet::op::type_assign, true, &mxnet::op::type_string[abi:cxx11], -1, -1>(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*, int const&)::{lambda(std::vector<int, std::allocator<int> > const&, unsigned long, char const*)#1}::operator()(std::vector<int, std::allocator<int> > const&, unsigned long, char const*) const+0x346) [0x7f465e953636]
  [bt] (2) /work/mxnet/python/mxnet/../../lib/libmxnet.so(bool mxnet::op::ElemwiseAttr<int, &mxnet::op::type_is_none, &mxnet::op::type_assign, true, &mxnet::op::type_string[abi:cxx11], -1, -1>(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*, int const&)+0x25d) [0x7f465e954a0d]
  [bt] (3) /work/mxnet/python/mxnet/../../lib/libmxnet.so(bool mxnet::op::ElemwiseType<2, 1>(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*)+0x34f) [0x7f465eb9bc5f]
  [bt] (4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<bool (nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*), bool (*)(nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*, std::vector<int, std::allocator<int> >*)>::_M_invoke(std::_Any_data const&, nnvm::NodeAttrs const&, std::vector<int, std::allocator<int> >*&&, std::vector<int, std::allocator<int> >*&&)+0x1d) [0x7f465e8adfcd]
  [bt] (5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::imperative::SetShapeType(mxnet::Context const&, nnvm::NodeAttrs const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, mxnet::DispatchMode*)+0x22d5) [0x7f4661792ba5]
  [bt] (6) /work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::Imperative::Invoke(mxnet::Context const&, nnvm::NodeAttrs const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&)+0x110) [0x7f466179e1b0]
  [bt] (7) /work/mxnet/python/mxnet/../../lib/libmxnet.so(MXImperativeInvokeImpl(void*, int, void**, int*, void***, int, char const**, char const**)+0x1c9) [0x7f466228acf9]
  [bt] (8) /work/mxnet/python/mxnet/../../lib/libmxnet.so(MXImperativeInvokeEx+0x8f) [0x7f466228b1ff]

======================================================================
FAIL: test_tutorials.test_amp
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/usr/local/lib/python3.5/dist-packages/nose/case.py", line 198, in runTest
    self.test(*self.arg)
  File "/work/mxnet/tests/tutorials/test_tutorials.py", line 209, in test_amp
    assert _test_tutorial_nb('amp/amp_tutorial')
AssertionError

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions