diff --git a/src/nnvm/node_op_util.h b/src/nnvm/node_op_util.h index 54a96336fb94..4ce6f217bd87 100644 --- a/src/nnvm/node_op_util.h +++ b/src/nnvm/node_op_util.h @@ -80,6 +80,12 @@ class NodeOpGen { dependent_node->attrs.name + "_negative", {x}, nullptr, &dependent_node)}; } + + nnvm::NodeEntry abs(const nnvm::NodeEntry &x) { + return nnvm::NodeEntry{mxnet::op::MakeNode("abs", + dependent_node->attrs.name + "_abs", + {x}, nullptr, &dependent_node)}; + } }; } // namespace util diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index c8f597393ff8..6a920fcd231e 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -25,6 +25,7 @@ #include "elemwise_unary_op.h" #include "./elemwise_binary_op-inl.h" #include "../nn/mkldnn/mkldnn_ops-inl.h" +#include "../../nnvm/node_op_util.h" namespace mxnet { namespace op { @@ -177,7 +178,8 @@ NNVM_REGISTER_OP(_backward_hard_sigmoid) [](const NodeAttrs& attrs){ return std::vector{true}; }) -.set_attr("FCompute", HardSigmoidBackward); +.set_attr("FCompute", HardSigmoidBackward) +.set_attr("FGradient", MakeZeroGradNodes); // softsign MXNET_OPERATOR_REGISTER_UNARY(softsign) @@ -194,7 +196,35 @@ The storage type of ``softsign`` output is always dense MXNET_OPERATOR_REGISTER_BINARY(_backward_softsign) .set_attr("FCompute", ElemwiseBinaryOp::Compute >); + unary_bwd >) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + // NodeEntry{n} : dL/dy * f'(x) + // n->inputs[0] : dL/dy + // n->inputs[1] : x (ElemwiseGradUseIn) + // ograds[0] : head_grads (dL/dx_grad) + // y = f(x) = softsign(x) + // f'(x) = dy/dx = 1/(1 + |x|)^2 + // f''(x) = (-2/|x|) * softsign(x) * f'(x) = -2*x/(|x|*(1 + |x|)^3) + auto dydx = n->inputs[0]; + auto x = n->inputs[1]; + auto dldy_mul_dydx = nnvm::NodeEntry{n}; + auto op = mxnet::util::NodeOpGen{n}; + + auto dldy = op.div(dldy_mul_dydx, dydx); + auto abs_x = op.abs(x); + auto r_abs_x = op.reciprocal(abs_x); + auto neg_two_r_abs_x = op.mul(-2.0, r_abs_x); + auto softsign_x = MakeNode("softsign", n->attrs.name + "_softsign_x", + {nnvm::NodeEntry{x}}, nullptr, &n); + auto softsign_mul_dydx = op.mul(nnvm::NodeEntry{softsign_x}, dldy_mul_dydx); + auto grad_grad_x = op.mul(softsign_mul_dydx, neg_two_r_abs_x); + + std::vector ret; + ret.emplace_back(op.mul(ograds[0], dldy)); + ret.emplace_back(op.mul(ograds[0], grad_grad_x)); + return ret; + }); // copy static void CopyEx(const nnvm::NodeAttrs& attrs, diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index 4a906aa696a2..12c6fff5bf79 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -502,6 +502,34 @@ def grad_grad_op(x): check_second_order_unary(array, rcbrt, grad_grad_op) +@with_seed() +def test_softsign(): + def softsign(x): + return nd.softsign(x) + + def grad_grad_op(x): + return -2 * x / (nd.abs(x) * (1 + nd.abs(x))**3) + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) + check_second_order_unary(array, softsign, grad_grad_op) + + +@with_seed() +def test_hard_sigmoid(): + def hard_sigmoid(x): + return nd.hard_sigmoid(x) + + def grad_grad_op(x): + return nd.zeros_like(x) + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) + check_second_order_unary(array, hard_sigmoid, grad_grad_op) + + def check_second_order_unary(x, op, grad_grad_op, rtol=None, atol=None): check_nth_order_unary(x, op, grad_grad_op, 2, rtol, atol)