diff --git a/src/nnvm/node_op_util.h b/src/nnvm/node_op_util.h new file mode 100644 index 000000000000..8d5916aafff9 --- /dev/null +++ b/src/nnvm/node_op_util.h @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file node_op_util.h + * \brief abstraction for commonly used nnvm::Node operations. + */ +#ifndef MXNET_NNVM_NODE_OP_UTIL_H_ +#define MXNET_NNVM_NODE_OP_UTIL_H_ +#include +#include +#include +#include "../operator/elemwise_op_common.h" + +namespace mxnet { +namespace util { + +class NodeOpGen { + private: + const nnvm::NodePtr &dependent_node; + + public: + explicit NodeOpGen(const nnvm::NodePtr &dependent_node) : dependent_node{dependent_node} {} + + nnvm::NodeEntry mul(const nnvm::NodeEntry &lhs, const nnvm::NodeEntry &rhs) { + return nnvm::NodeEntry{mxnet::op::MakeNode("elemwise_mul", + dependent_node->attrs.name + "_mul", + {lhs, rhs}, nullptr, &dependent_node)}; + } + + nnvm::NodeEntry mul(const nnvm::NodeEntry &x, double scalar) { + const std::unordered_map scalar_dict = + {{"scalar", std::to_string(scalar)}}; + return nnvm::NodeEntry{mxnet::op::MakeNode("_mul_scalar", + dependent_node->attrs.name + "_mul_scalar", + {x}, &scalar_dict, &dependent_node)}; + } + + nnvm::NodeEntry mul(double scalar, const nnvm::NodeEntry &x) { + return NodeOpGen::mul(x, scalar); + } + + nnvm::NodeEntry div(const nnvm::NodeEntry &lhs, const nnvm::NodeEntry &rhs) { + return nnvm::NodeEntry{mxnet::op::MakeNode("elemwise_div", + dependent_node->attrs.name + "_div", + {lhs, rhs}, nullptr, &dependent_node)}; + } + + nnvm::NodeEntry square(const nnvm::NodeEntry &x) { + return nnvm::NodeEntry{mxnet::op::MakeNode("square", + dependent_node->attrs.name + "_square", + {x}, nullptr, &dependent_node)}; + } +}; + +} // namespace util +} // namespace mxnet + +#endif // MXNET_NNVM_NODE_OP_UTIL_H_ diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index ff210040d552..bd0c8973ae07 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -24,6 +24,7 @@ #include #include "elemwise_unary_op.h" #include "./elemwise_binary_op-inl.h" +#include "../../nnvm/node_op_util.h" namespace mxnet { namespace op { @@ -227,7 +228,35 @@ The storage type of ``arctan`` output depends upon the input storage type: .set_attr("FGradient", ElemwiseGradUseIn{ "_backward_arctan" }); MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arctan, - unary_bwd); + unary_bwd) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + // ograds[0]: head_grad_grads (dL/dxgrad) + // inputs[0]: dL/dy + // inputs[1]: x (ElemwiseGradUseIn) + // n: dL/dy * f'(x) + // f(x) = arctanh(x) + // dydx = f'(x) = 1/(1+x^2) + // f''(x) = f'(x) * f'(x) * -2 * x = (-2 * x) / (1 + x^2)^2 + // return: + // 0: dL/dy_grad * dy/dx + // 1: dL/dy_grad * dL/dy * f''(x) + auto dldy = n->inputs[0]; + auto x = n->inputs[1]; + auto dldy_mul_dydx = nnvm::NodeEntry{n}; + auto op = mxnet::util::NodeOpGen{n}; + + auto x_grad = op.div(dldy_mul_dydx, dldy); + auto x_grad_square = op.square(x_grad); + auto x_grad_square_mul_x = op.mul(x_grad_square, x); + auto x_grad_square_mul_2_x = op.mul(-2.0, x_grad_square_mul_x); + auto grad_grad_x = op.mul(dldy, x_grad_square_mul_2_x); + + std::vector ret; + ret.emplace_back(op.mul(ograds[0], x_grad)); + ret.emplace_back(op.mul(ograds[0], grad_grad_x)); + return ret; + }); // degrees MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(degrees, cpu, mshadow_op::degrees) @@ -265,7 +294,8 @@ The storage type of ``radians`` output depends upon the input storage type: .set_attr("FGradient", ElemwiseGradUseIn{ "_backward_radians" }); MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_radians, - unary_bwd); + unary_bwd) +.set_attr("FGradient", MakeZeroGradNodes); // sinh MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(sinh, cpu, mshadow_op::sinh) @@ -391,8 +421,35 @@ The storage type of ``arctanh`` output depends upon the input storage type: .set_attr("FGradient", ElemwiseGradUseIn{ "_backward_arctanh" }); MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arctanh, - unary_bwd); + unary_bwd) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + // ograds[0]: head_grad_grads (dL/dxgrad) + // inputs[0]: dL/dy + // inputs[1]: x (ElemwiseGradUseIn) + // n: dL/dy * dy/dx + // f(x) = arctanh(x) + // dy/dx = f'(x) = 1/(1-x^2) + // f''(x) = f'(x) * f'(x) * 2 * x = (2 * x) / (1 - x^2)^2 + // return: + // 0: dL/dy_grad * dy/dx + // 1: dL/dy_grad * dL/dy * f''(x) + auto dldy = n->inputs[0]; + auto x = n->inputs[1]; + auto dldy_mul_dydx = nnvm::NodeEntry{n}; + auto op = mxnet::util::NodeOpGen{n}; + + auto x_grad = op.div(dldy_mul_dydx, dldy); + auto x_grad_square = op.square(x_grad); + auto x_grad_square_mul_x = op.mul(x_grad_square, x); + auto x_grad_square_mul_2_x = op.mul(2.0, x_grad_square_mul_x); + auto grad_grad_x = op.mul(dldy, x_grad_square_mul_2_x); + std::vector ret; + ret.emplace_back(op.mul(ograds[0], x_grad)); + ret.emplace_back(op.mul(ograds[0], grad_grad_x)); + return ret; + }); } // namespace op } // namespace mxnet diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index 429070de5896..c70c747411b8 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -17,6 +17,7 @@ import math +import random from mxnet import nd, autograd from mxnet.test_utils import assert_almost_equal, random_arrays, rand_shape_nd from common import with_seed @@ -85,6 +86,51 @@ def grad_grad_op(x): array, tanh, grad_grad_op, rtol=1e-6, atol=1e-6) +@with_seed() +def test_arctan(): + def arctan(x): + return nd.arctan(x) + + def grad_grad_op(x): + return (-2 * x)/((1 + x**2)**2) + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) + # Domain of arctan is all real numbers. + # Scale std_dev + array *= random.randint(500, 10000) + check_second_order_unary(array, arctan, grad_grad_op) + + +@with_seed() +def test_arctanh(): + def arctanh(x): + return nd.arctanh(x) + + def grad_grad_op(x): + return (2 * x)/((1 - x**2)**2) + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) + check_second_order_unary(array, arctanh, grad_grad_op) + + +@with_seed() +def test_radians(): + def radians(x): + return nd.radians(x) + + def grad_grad_op(x): + return nd.zeros_like(x) + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) + check_second_order_unary(array, radians, grad_grad_op) + + @with_seed() def test_relu(): def relu(x):