Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-978] Higher Order Gradient Support softsign, hard_sigmoid. #15679

6 changes: 6 additions & 0 deletions src/nnvm/node_op_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ class NodeOpGen {
dependent_node->attrs.name + "_negative",
{x}, nullptr, &dependent_node)};
}

nnvm::NodeEntry abs(const nnvm::NodeEntry &x) {
return nnvm::NodeEntry{mxnet::op::MakeNode("abs",
dependent_node->attrs.name + "_abs",
{x}, nullptr, &dependent_node)};
}
};

} // namespace util
Expand Down
34 changes: 32 additions & 2 deletions src/operator/tensor/elemwise_unary_op_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "elemwise_unary_op.h"
#include "./elemwise_binary_op-inl.h"
#include "../nn/mkldnn/mkldnn_ops-inl.h"
#include "../../nnvm/node_op_util.h"

namespace mxnet {
namespace op {
Expand Down Expand Up @@ -177,7 +178,8 @@ NNVM_REGISTER_OP(_backward_hard_sigmoid)
[](const NodeAttrs& attrs){
return std::vector<bool>{true};
})
.set_attr<FCompute>("FCompute<cpu>", HardSigmoidBackward<cpu>);
.set_attr<FCompute>("FCompute<cpu>", HardSigmoidBackward<cpu>)
.set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);

// softsign
MXNET_OPERATOR_REGISTER_UNARY(softsign)
Expand All @@ -194,7 +196,35 @@ The storage type of ``softsign`` output is always dense

MXNET_OPERATOR_REGISTER_BINARY(_backward_softsign)
.set_attr<FCompute>("FCompute<cpu>", ElemwiseBinaryOp::Compute<cpu,
unary_bwd<mshadow_op::softsign_grad> >);
unary_bwd<mshadow_op::softsign_grad> >)
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
// NodeEntry{n} : dL/dy * f'(x)
// n->inputs[0] : dL/dy
// n->inputs[1] : x (ElemwiseGradUseIn)
// ograds[0] : head_grads (dL/dx_grad)
// y = f(x) = softsign(x)
// f'(x) = dy/dx = 1/(1 + |x|)^2
// f''(x) = (-2/|x|) * softsign(x) * f'(x) = -2*x/(|x|*(1 + |x|)^3)
auto dydx = n->inputs[0];
auto x = n->inputs[1];
auto dldy_mul_dydx = nnvm::NodeEntry{n};
auto op = mxnet::util::NodeOpGen{n};

auto dldy = op.div(dldy_mul_dydx, dydx);
auto abs_x = op.abs(x);
auto r_abs_x = op.reciprocal(abs_x);
auto neg_two_r_abs_x = op.mul(-2.0, r_abs_x);
auto softsign_x = MakeNode("softsign", n->attrs.name + "_softsign_x",
{nnvm::NodeEntry{x}}, nullptr, &n);
auto softsign_mul_dydx = op.mul(nnvm::NodeEntry{softsign_x}, dldy_mul_dydx);
auto grad_grad_x = op.mul(softsign_mul_dydx, neg_two_r_abs_x);

std::vector<nnvm::NodeEntry> ret;
ret.emplace_back(op.mul(ograds[0], dldy));
ret.emplace_back(op.mul(ograds[0], grad_grad_x));
return ret;
});

// copy
static void CopyEx(const nnvm::NodeAttrs& attrs,
Expand Down
28 changes: 28 additions & 0 deletions tests/python/unittest/test_higher_order_grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,34 @@ def grad_grad_op(x):
check_second_order_unary(array, rcbrt, grad_grad_op)


@with_seed()
def test_softsign():
def softsign(x):
return nd.softsign(x)

def grad_grad_op(x):
return -2 * x / (nd.abs(x) * (1 + nd.abs(x))**3)

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
check_second_order_unary(array, softsign, grad_grad_op)


@with_seed()
def test_hard_sigmoid():
def hard_sigmoid(x):
return nd.hard_sigmoid(x)

def grad_grad_op(x):
return nd.zeros_like(x)

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
check_second_order_unary(array, hard_sigmoid, grad_grad_op)


def check_second_order_unary(x, op, grad_grad_op, rtol=None, atol=None):
check_nth_order_unary(x, op, grad_grad_op, 2, rtol, atol)

Expand Down