diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index bd9af2c148a9..191319a9edbd 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -746,6 +746,10 @@ MXNET_BINARY_MATH_OP_NC(minus_sign, a - b > DType(0) ? DType(1) : -DType(1)); MXNET_BINARY_MATH_OP(rminus, b - a); +MXNET_BINARY_MATH_OP_NC(posone, 1); + +MXNET_BINARY_MATH_OP_NC(negone, -1); + MXNET_BINARY_MATH_OP(div_grad, 1.0f / math::id(b)); MXNET_BINARY_MATH_OP(div_rgrad, -math::id(a) / math::sqr(b)); @@ -799,6 +803,54 @@ struct mod : public mxnet_op::tunable { } }; +#ifndef _WIN32 +struct mixed_mod { + template::value, int>::type = 0> + MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { + return mod::Map(static_cast(a), b); + } + + template::value || + std::is_integral::value, int>::type = 0> + MSHADOW_XINLINE static float Map(DType a, float b) { + return mod::Map(static_cast(a), b); + } + + template::value || + std::is_same::value || + std::is_integral::value, int>::type = 0> + MSHADOW_XINLINE static double Map(DType a, double b) { + return mod::Map(static_cast(a), b); + } +}; + +struct mixed_rmod { + template::value, int>::type = 0> + MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { + return mod::Map(b, static_cast(a)); + } + + template::value || + std::is_integral::value, int>::type = 0> + MSHADOW_XINLINE static float Map(DType a, float b) { + return mod::Map(b, static_cast(a)); + } + + template::value || + std::is_same::value || + std::is_integral::value, int>::type = 0> + MSHADOW_XINLINE static double Map(DType a, double b) { + return mod::Map(b, static_cast(a)); + } +}; +#endif + struct fmod : public mxnet_op::tunable { template MSHADOW_XINLINE static DType Map(DType a, DType b) { diff --git a/src/operator/numpy/np_elemwise_broadcast_op.cc b/src/operator/numpy/np_elemwise_broadcast_op.cc index 41c9ea70eb61..6ec880e0ba8b 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op.cc +++ b/src/operator/numpy/np_elemwise_broadcast_op.cc @@ -115,7 +115,22 @@ MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_add) "FCompute", NumpyBinaryBroadcastComputeWithBool) #endif -.set_attr("FGradient", ElemwiseGradUseNone{"_backward_broadcast_add"}); +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_add"}); + +NNVM_REGISTER_OP(_backward_npi_broadcast_add) +.set_num_inputs(3) +.set_num_outputs(2) +.set_attr("TIsBackward", true) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs){ + return std::vector >{{0, 0}, {0, 1}}; + }) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FCompute", NumpyBinaryBackwardUseIn); MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_subtract) #ifndef _WIN32 @@ -128,7 +143,22 @@ MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_subtract) "FCompute", NumpyBinaryBroadcastCompute) #endif -.set_attr("FGradient", ElemwiseGradUseNone{"_backward_broadcast_sub"}); +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_sub"}); + +NNVM_REGISTER_OP(_backward_npi_broadcast_sub) +.set_num_inputs(3) +.set_num_outputs(2) +.set_attr("TIsBackward", true) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs){ + return std::vector >{{0, 0}, {0, 1}}; + }) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FCompute", NumpyBinaryBackwardUseIn); MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_multiply) #ifndef _WIN32 @@ -158,9 +188,33 @@ NNVM_REGISTER_OP(_backward_npi_broadcast_mul) .set_attr("FCompute", NumpyBinaryBackwardUseIn); -MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_mod) -.set_attr("FCompute", BinaryBroadcastCompute) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_broadcast_mod"}); +MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_mod) +#ifndef _WIN32 +.set_attr( + "FCompute", + NumpyBinaryBroadcastCompute) +#else +.set_attr( + "FCompute", + NumpyBinaryBroadcastCompute) +#endif +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_mod"}); + +NNVM_REGISTER_OP(_backward_npi_broadcast_mod) +.set_num_inputs(3) +.set_num_outputs(2) +.set_attr("TIsBackward", true) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs){ + return std::vector >{{0, 1}}; + }) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FCompute", NumpyBinaryBackwardUseIn); MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_power) #ifndef _WIN32 diff --git a/src/operator/numpy/np_elemwise_broadcast_op.cu b/src/operator/numpy/np_elemwise_broadcast_op.cu index 1e0130494469..8a13b42e4846 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op.cu +++ b/src/operator/numpy/np_elemwise_broadcast_op.cu @@ -40,6 +40,10 @@ NNVM_REGISTER_OP(_npi_add) NumpyBinaryBroadcastComputeWithBool); #endif +NNVM_REGISTER_OP(_backward_npi_broadcast_add) +.set_attr("FCompute", NumpyBinaryBackwardUseIn); + NNVM_REGISTER_OP(_npi_subtract) #ifndef _WIN32 .set_attr( @@ -52,6 +56,10 @@ NNVM_REGISTER_OP(_npi_subtract) NumpyBinaryBroadcastCompute); #endif +NNVM_REGISTER_OP(_backward_npi_broadcast_sub) +.set_attr("FCompute", NumpyBinaryBackwardUseIn); + NNVM_REGISTER_OP(_npi_multiply) #ifndef _WIN32 .set_attr( @@ -69,7 +77,20 @@ NNVM_REGISTER_OP(_backward_npi_broadcast_mul) mshadow_op::left>); NNVM_REGISTER_OP(_npi_mod) -.set_attr("FCompute", BinaryBroadcastCompute); +#ifndef _WIN32 +.set_attr( + "FCompute", + NumpyBinaryBroadcastCompute); +#else +.set_attr( + "FCompute", + NumpyBinaryBroadcastCompute); +#endif + +NNVM_REGISTER_OP(_backward_npi_broadcast_mod) +.set_attr("FCompute", NumpyBinaryBackwardUseIn); NNVM_REGISTER_OP(_npi_power) #ifndef _WIN32 diff --git a/src/operator/numpy/np_true_divide-inl.h b/src/operator/numpy/np_true_divide-inl.h index 0bc60a08803e..c0edc8cad47a 100644 --- a/src/operator/numpy/np_true_divide-inl.h +++ b/src/operator/numpy/np_true_divide-inl.h @@ -29,6 +29,7 @@ #include #include "../../common/utils.h" #include "../tensor/elemwise_binary_broadcast_op.h" +#include "../numpy/np_elemwise_broadcast_op.h" namespace mxnet { namespace op { diff --git a/src/operator/numpy/np_true_divide.cc b/src/operator/numpy/np_true_divide.cc index 1e46cc9d13b5..bee8a80bab7f 100644 --- a/src/operator/numpy/np_true_divide.cc +++ b/src/operator/numpy/np_true_divide.cc @@ -80,10 +80,26 @@ NNVM_REGISTER_OP(_npi_true_divide) }) #endif .set_attr("FCompute", TrueDivideBroadcastCompute) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_broadcast_div"}) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_div"}) .add_argument("lhs", "NDArray-or-Symbol", "Dividend array") .add_argument("rhs", "NDArray-or-Symbol", "Divisor array"); + +NNVM_REGISTER_OP(_backward_npi_broadcast_div) +.set_num_inputs(3) +.set_num_outputs(2) +.set_attr("TIsBackward", true) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs){ + return std::vector >{{0, 1}}; + }) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FCompute", NumpyBinaryBackwardUseIn); + NNVM_REGISTER_OP(_npi_true_divide_scalar) .set_num_inputs(1) .set_num_outputs(1) diff --git a/src/operator/numpy/np_true_divide.cu b/src/operator/numpy/np_true_divide.cu index 7211f4a0a006..c8eccfe140b4 100644 --- a/src/operator/numpy/np_true_divide.cu +++ b/src/operator/numpy/np_true_divide.cu @@ -31,6 +31,10 @@ namespace op { NNVM_REGISTER_OP(_npi_true_divide) .set_attr("FCompute", TrueDivideBroadcastCompute); +NNVM_REGISTER_OP(_backward_npi_broadcast_div) +.set_attr("FCompute", NumpyBinaryBackwardUseIn); + NNVM_REGISTER_OP(_npi_true_divide_scalar) .set_attr("FCompute", TrueDivideScalarCompute); diff --git a/src/operator/operator_tune.cc b/src/operator/operator_tune.cc index 4077ce137463..b5e253a1872e 100644 --- a/src/operator/operator_tune.cc +++ b/src/operator/operator_tune.cc @@ -430,6 +430,8 @@ IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rldexp); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::ldexp_grad); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::ldexp_rgrad); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rldexp_grad); // NOLINT() +IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::posone); // NOLINT() +IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::negone); // NOLINT() /*! * \brief Tuner objects, *not* automatically generated */ diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index be759f6fa3d5..88b7de79fab5 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -2618,10 +2618,13 @@ def hybrid_forward(self, F, a, b, *args, **kwargs): use_broadcast=False, equal_nan=True) funcs = { - 'add': (-1.0, 1.0, None, None), - 'subtract': (-1.0, 1.0, None, None), + 'add': (-1.0, 1.0, lambda y, x1, x2: _np.ones(y.shape), + lambda y, x1, x2: _np.ones(y.shape)), + 'subtract': (-1.0, 1.0, lambda y, x1, x2: _np.ones(y.shape), + lambda y, x1, x2: _np.ones(y.shape) * -1), 'multiply': (-1.0, 1.0, lambda y, x1, x2: _np.broadcast_to(x2, y.shape), lambda y, x1, x2: _np.broadcast_to(x1, y.shape)), + 'mod': (1.0, 5.0, None, None), 'power': (1.0, 3.0, lambda y, x1, x2: _np.power(x1, x2 - 1.0) * x2, lambda y, x1, x2: _np.power(x1, x2) * _np.log(x1)), } @@ -2649,7 +2652,7 @@ def hybrid_forward(self, F, a, b, *args, **kwargs): continue check_mixed_precision_binary_func(func, low, high, lshape, rshape, lgrad, rgrad, type1, type2) - if func == 'subtract': + if func == 'subtract' or func == 'mod': continue for type1, type2 in itertools.product(itypes, itypes): if type1 == type2: