diff --git a/python/mxnet/_numpy_op_doc.py b/python/mxnet/_numpy_op_doc.py index a83447f64952..f4787be559a1 100644 --- a/python/mxnet/_numpy_op_doc.py +++ b/python/mxnet/_numpy_op_doc.py @@ -530,22 +530,22 @@ def _np_roll(a, shift, axis=None): Parameters ---------- a : ndarray - Input array. + Input array. shift : int or tuple of ints - The number of places by which elements are shifted. If a tuple, - then `axis` must be a tuple of the same size, and each of the - given axes is shifted by the corresponding number. If an int - while `axis` is a tuple of ints, then the same value is used for - all given axes. + The number of places by which elements are shifted. If a tuple, + then `axis` must be a tuple of the same size, and each of the + given axes is shifted by the corresponding number. If an int + while `axis` is a tuple of ints, then the same value is used for + all given axes. axis : int or tuple of ints, optional - Axis or axes along which elements are shifted. By default, the - array is flattened before shifting, after which the original - shape is restored. + Axis or axes along which elements are shifted. By default, the + array is flattened before shifting, after which the original + shape is restored. Returns ------- res : ndarray - Output array, with the same shape as `a`. + Output array, with the same shape as `a`. Notes ----- @@ -581,5 +581,51 @@ def _np_roll(a, shift, axis=None): >>> np.roll(x2, -1, axis=1) array([[1., 2., 3., 4., 0.], [6., 7., 8., 9., 5.]]) + """ + + +def _np_trace(a, offset=0, axis1=0, axis2=1, out=None): + """trace(a, offset=0, axis1=0, axis2=1, out=None) + + Return the sum along diagonals of the array. + If `a` is 2-D, the sum along its diagonal with the given offset + is returned, i.e., the sum of elements ``a[i,i+offset]`` for all i. + If `a` has more than two dimensions, then the axes specified by axis1 and + axis2 are used to determine the 2-D sub-arrays whose traces are returned. + The shape of the resulting array is the same as that of `a` with `axis1` + and `axis2` removed. + + Parameters + ---------- + a : ndarray + Input array, from which the diagonals are taken. + offset : int, optional + Offset of the diagonal from the main diagonal. Can be both positive + and negative. Defaults to 0. + axis1, axis2 : int, optional + Axes to be used as the first and second axis of the 2-D sub-arrays + from which the diagonals should be taken. Defaults are the first two + axes of `a`. + out : ndarray, optional + Array into which the output is placed. It must be of the right shape + and right type to hold the output. + + Returns + ------- + sum_along_diagonals : ndarray + If `a` is 2-D, the sum along the diagonal is returned. If `a` has + larger dimensions, then an array of sums along diagonals is returned. + + Examples + -------- + >>> a = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + >>> np.trace(a) + array(3.) + >>> a = np.arange(8).reshape((2, 2, 2)) + >>> np.trace(a) + array([6., 8.]) + >>> a = np.arange(24).reshape((2, 2, 2, 3)) + >>> np.trace(a).shape + (2, 3) """ pass diff --git a/src/operator/numpy/np_trace_op-inl.h b/src/operator/numpy/np_trace_op-inl.h new file mode 100644 index 000000000000..741c20b61d80 --- /dev/null +++ b/src/operator/numpy/np_trace_op-inl.h @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_trace_op-inl.h + * \brief Function definition of matrix numpy-compatible trace operator + */ + +#ifndef MXNET_OPERATOR_NUMPY_NP_TRACE_OP_INL_H_ +#define MXNET_OPERATOR_NUMPY_NP_TRACE_OP_INL_H_ + +#include +#include +#include +#include +#include +#include "../mxnet_op.h" +#include "../operator_common.h" +#include "../elemwise_op_common.h" +#include "../tensor/broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +struct NumpyTraceParam: public dmlc::Parameter { + int offset, axis1, axis2; + DMLC_DECLARE_PARAMETER(NumpyTraceParam) { + DMLC_DECLARE_FIELD(offset) + .set_default(0) + .describe("Offset of the diagonal from the main diagonal. " + "Can be both positive and negative. Defaults to 0."); + DMLC_DECLARE_FIELD(axis1) + .set_default(0) + .describe("Axes to be used as the first axis of the 2-D sub-arrays " + "from which the diagonals should be taken. Defaults to 0."); + DMLC_DECLARE_FIELD(axis2) + .set_default(1) + .describe("Axes to be used as the second axis of the 2-D sub-arrays " + "from which the diagonals should be taken. Defaults to 1."); + } +}; + +template +struct numpy_trace { + template + MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* a, + mshadow::Shape oshape, + mshadow::Shape ishape, + index_t stride, index_t offset, int dlength) { + using namespace mxnet_op; + using namespace mshadow; + index_t j = ravel(unravel(i, oshape), ishape) + offset; + if (back) { + for (index_t k = 0; k < dlength; ++k) { + KERNEL_ASSIGN(out[j], req, a[i]); + j += stride; + } + } else { + if (req == kWriteTo) { + out[i] = 0; + for (index_t k = 0; k < dlength; ++k) { + out[i] += a[j]; + j += stride; + } + } else if (req == kAddTo) { + for (index_t k = 0; k < dlength; ++k) { + out[i] += a[j]; + j += stride; + } + } + } + } +}; + +template +void NumpyTraceOpProcess(const TBlob& in_data, + const TBlob& out_data, + const mxnet::TShape& ishape, + const mxnet::TShape& oshape, + index_t dsize, + const NumpyTraceParam& param, + mxnet_op::Stream *s, + const std::vector& req) { + using namespace mxnet_op; + using namespace mshadow; + if (dsize == 0) { + if (back) { + if (out_data.Size() != 0) { + MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + if (req_type == kWriteTo) { + out_data.FlatTo1D(s) = 0; + } + }); + }); + } + } + return; + } else if (ishape.Size() == 0) { + if (!back) { + MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + if (req_type == kWriteTo) { + out_data.FlatTo1D(s) = 0; + } + }); + }); + } + return; + } + uint32_t x1 = CheckAxis(param.axis1, ishape.ndim()); + uint32_t x2 = CheckAxis(param.axis2, ishape.ndim()); + + uint32_t idim = ishape.ndim(); + + uint32_t minx = x1, maxx = x2; + if (minx > maxx) { + std::swap(minx, maxx); + } + + // merges contiguous axes that are not separated + // by axis1 or axis2 since they can be directly + // mapped to the output and there is no need + // to distinguish them + // (After this the input will have no more than + // three axes, hence improving the rave and + // unravel efficiency) + + index_t oleading = 1, + obody = 1, + otrailing = 1; + + for (uint32_t i = 0; i < minx; ++i) { + oleading *= ishape[i]; + } + for (uint32_t i = minx + 1; i < maxx; ++i) { + obody *= ishape[i]; + } + for (uint32_t i = maxx + 1; i < idim; ++i) { + otrailing *= ishape[i]; + } + + index_t ileading = oleading, + ibody = obody * ishape[minx], + itrailing = otrailing * ishape[maxx]; + + index_t stride1 = itrailing * obody, + stride2 = otrailing; + // stride1 + stride2 is the stride for + // iterating over the diagonal in question + + if (x1 == maxx) { + std::swap(stride1, stride2); + } + + // the extra index offset introduced by offset + index_t offset; + if (param.offset > 0) { + offset = stride2 * param.offset; + } else if (param.offset < 0) { + offset = stride1 * -param.offset; + } else { + offset = 0; + } + + // number of elements in the offset diagonal + // may be negative + int dlength; + if (param.offset > 0) { + dlength = std::min(ishape[x1], ishape[x2] - param.offset); + } else if (param.offset < 0) { + dlength = std::min(ishape[x1] - (-param.offset), ishape[x2]); + } else { + dlength = std::min(ishape[x1], ishape[x2]); + } + + MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + if (back) { + out_data.FlatTo1D(s) = 0; + } + Kernel, xpu>::Launch(s, dsize, out_data.dptr(), + in_data.dptr(), + Shape3(oleading, obody, otrailing), + Shape3(ileading, ibody, itrailing), + stride1 + stride2, offset, dlength); + }); + }); +} + +template +void NumpyTraceOpForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mxnet_op; + using namespace mshadow; + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + mshadow::Stream *s = ctx.get_stream(); + const TBlob& in_data = inputs[0]; + const TBlob& out_data = outputs[0]; + const mxnet::TShape& ishape = inputs[0].shape_; + const mxnet::TShape& oshape = outputs[0].shape_; + const NumpyTraceParam& param = nnvm::get(attrs.parsed); + + NumpyTraceOpProcess(in_data, out_data, ishape, oshape, + out_data.Size(), param, s, req); +} + +template +void NumpyTraceOpBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mxnet_op; + using namespace mshadow; + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + Stream *s = ctx.get_stream(); + + const TBlob& in_data = inputs[0]; + const TBlob& out_data = outputs[0]; + const mxnet::TShape& ishape = inputs[0].shape_; + const mxnet::TShape& oshape = outputs[0].shape_; + const NumpyTraceParam& param = nnvm::get(attrs.parsed); + + NumpyTraceOpProcess(in_data, out_data, oshape, ishape, + in_data.Size(), param, s, req); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_NP_TRACE_OP_INL_H_ diff --git a/src/operator/numpy/np_trace_op.cc b/src/operator/numpy/np_trace_op.cc new file mode 100644 index 000000000000..d97ac3040384 --- /dev/null +++ b/src/operator/numpy/np_trace_op.cc @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_trace_op.cc + * \brief CPU Implementation of numpy-compatible trace operator + */ + +#include "./np_trace_op-inl.h" + +namespace mxnet { +namespace op { + +inline bool NumpyTraceOpShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector* in_attrs, + mxnet::ShapeVector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1); + CHECK_EQ(out_attrs->size(), 1); + const int ndim((*in_attrs)[0].ndim()); + if (ndim < 2) { + return false; + } + std::vector oshape(ndim - 2); + const NumpyTraceParam& param = nnvm::get(attrs.parsed); + int x1 = CheckAxis(param.axis1, (*in_attrs)[0].ndim()); + int x2 = CheckAxis(param.axis2, (*in_attrs)[0].ndim()); + CHECK_NE(x1, x2) << "axis1 and axis2 cannot refer to the the same axis " << x1; + for ( int i = 0, j = 0; i < ndim; ++i ) { + if (i != x1 && i != x2) { + oshape[j++] = (*in_attrs)[0][i]; + } + } + mxnet::TShape tshape(oshape.begin(), oshape.end()); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, tshape); + return true; +} + +DMLC_REGISTER_PARAMETER(NumpyTraceParam); + +NNVM_REGISTER_OP(_np_trace) +.describe(R"code(Computes the sum of the diagonal elements of a matrix. +Input is a tensor *A* of dimension *n >= 2*. + +If *n=2*, we sum the diagonal elements. The result has shape (). + +If *n>2*, *trace* is performed separately on the matrix defined by *axis1* and *axis2* for all +inputs (batch mode). + +Examples:: + + // Single matrix reduction + A = [[1.0, 1.0], [1.0, 7.0]] + trace(A) = 8.0 + + // Batch matrix reduction + A = [[[1.0, 1.0], [1.0, 7.0]], [[3.0, 0], [0, 17.0]]] + trace(A) = [1.0, 18.0] +)code" ADD_FILELINE) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"data"}; + }) +.set_attr("FInferShape", NumpyTraceOpShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FCompute", NumpyTraceOpForward) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_np_trace"}) +.add_argument("data", "NDArray-or-Symbol", "Input ndarray") +.add_arguments(NumpyTraceParam::__FIELDS__()); + +NNVM_REGISTER_OP(_backward_np_trace) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("TIsBackward", true) +.set_attr("FCompute", NumpyTraceOpBackward); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_trace_op.cu b/src/operator/numpy/np_trace_op.cu new file mode 100644 index 000000000000..220e4ae62a59 --- /dev/null +++ b/src/operator/numpy/np_trace_op.cu @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_trace_op.cu + * \brief GPU Implementation of numpy-compatible trace operator + */ +#include "./np_trace_op-inl.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_np_trace) +.set_attr("FCompute", NumpyTraceOpForward); + +NNVM_REGISTER_OP(_backward_np_trace) +.set_attr("FCompute", NumpyTraceOpBackward); + +} // namespace op +} // namespace mxnet diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 42d90434516e..8846d57dbe32 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -2189,6 +2189,88 @@ def hybrid_forward(self, F, x): numeric_eps=1e-3, rtol=1e-3, atol=1e-5, dtype=i_dtype[dtype]) +@with_seed() +@use_np +def test_np_trace(): + class TestTrace(HybridBlock): + def __init__(self, axis1, axis2, offset): + super(TestTrace, self).__init__() + self._axis1 = axis1 + self._axis2 = axis2 + self._offset = offset + + def hybrid_forward(self, F, data): + return F.np.trace(data, axis1=self._axis1, axis2=self._axis2, offset=self._offset) + + def g(data, axis1, axis2, offset): + idx = _np.indices(data.shape) + ret = _np.zeros_like(data) + ret[idx[axis1] + offset == idx[axis2]] = 1.0 + return ret + + shapes = [ + (3, 3), + (3, 4), + (0, 0), + (3, 3, 3), + (0, 0, 0), + (2, 2, 4, 3), + (2, 2, 4, 3), + (2, 0, 3, 0), + (2, 0, 2, 3) + ] + offsets = range(-5, 5) + dtypes = ['int32', 'float16', 'float32', 'float64'] + for hybridize in [True, False]: + for shape in shapes: + ndim = len(shape) + for axis1 in range(-ndim, ndim): + for axis2 in range(-ndim, ndim): + if (axis1 + ndim) % ndim != (axis2 + ndim) % ndim: + for offset in offsets: + for dtype in dtypes: + if dtype == 'float16': + rtol = atol = 1e-2 + else: + rtol = atol = 1e-5 + test_trace = TestTrace(axis1, axis2, offset) + if hybridize: + test_trace.hybridize() + data_np = _np.random.uniform(-10.0, 10.0, shape) + data = mx.nd.array(data_np, dtype=dtype) + data_np = data.asnumpy() + data.attach_grad() + expected_np = _np.trace(data_np, axis1=axis1, axis2=axis2, offset=offset) + with mx.autograd.record(): + out_mx = test_trace(data.as_np_ndarray()) + assert out_mx.shape == expected_np.shape + assert_almost_equal(out_mx.asnumpy(), expected_np, rtol=rtol, atol=atol) + out_mx.backward() + backward_expected = g(data_np, axis1=axis1, axis2=axis2, offset=offset) + assert_almost_equal(data.grad.asnumpy(), backward_expected, rtol=rtol, atol=atol) + + # Test imperative once again + data = mx.nd.array(data_np, dtype=dtype) + out_mx = np.trace(data.as_np_ndarray(), axis1=axis1, axis2=axis2, offset=offset) + assert_almost_equal(out_mx.asnumpy(), expected_np, rtol=rtol, atol=atol) + + # bad params + params = [ + ([], 0, 1, 0), + ([2], 0, 1, 0), + ([3, 2, 2], 1, 1, 1), + ([3, 2, 2], 0, -4, 1) + ] + for shape, axis1, axis2, offset in params: + data_np = _np.random.uniform(-1.0, 1.0, shape) + data_mx = mx.nd.array(data_np) + try: + output = np.trace(data_mx.as_np_ndarray(), axis1=axis1, axis2=axis2, offset=offset) + except mx.base.MXNetError: + continue + assert False + + if __name__ == '__main__': import nose nose.runmodule()