diff --git a/python/mxnet/ndarray/sparse_ndarray.py b/python/mxnet/ndarray/sparse_ndarray.py index 720d44586a74..d0f25322f8ce 100644 --- a/python/mxnet/ndarray/sparse_ndarray.py +++ b/python/mxnet/ndarray/sparse_ndarray.py @@ -387,7 +387,7 @@ class RowSparseNDArray(SparseNDArray): ``dense[rsp.indices[i], :, :, :, ...] = rsp.values[i, :, :, :, ...]`` RowSparseNDArray is used principally in the definition of gradients for operations - that have sparse gradients (e.g. SparseEmbedding). + that have sparse gradients (e.g. dot with sparse inputs). Examples -------- diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc index f55f7d8cf563..b709aba4a9e1 100644 --- a/src/operator/tensor/indexing_op.cc +++ b/src/operator/tensor/indexing_op.cc @@ -86,49 +86,6 @@ NNVM_REGISTER_OP(_backward_Embedding) .set_attr("TIsBackward", true) .set_attr("FCompute", EmbeddingOpBackward); -NNVM_REGISTER_OP(SparseEmbedding) -.describe(R"doc(Represents words or other sparse inputs by dense continuous vectors. -It assumes that the input is in one-hot form. E.g., for a vocabulary size of 10,000, - each input vector is expected to have dimension 10,000. -The index of the non-zero entry is the index of the word or item it represents. - -The corresponding embedding vectors are stored as rows of a matrix. -Hence, mapping an input word to its embedding is implemented as a matrix product. - -The gradient of an embedding matrix has the form of gradient vectors that are only - non-zero for words seen in a minibatch. -)doc" ADD_FILELINE) -.set_num_inputs(2) -.set_num_outputs(1) -.set_attr_parser(ParamParser) -.set_attr("FListInputNames", - [](const NodeAttrs& attrs) { - return std::vector{"data", "weight"}; - }) -.set_attr("FInferShape", SparseEmbeddingShape) -.set_attr("FInferType", EmbeddingOpType) -.set_attr("FInferStorageType", SparseEmbeddingForwardStorageType) -.set_attr("FResourceRequest", - [](const NodeAttrs& attrs) { - return std::vector{ResourceRequest::kTempSpace}; - }) -.set_attr("FComputeEx", SparseEmbeddingForwardEx) -.set_attr("FGradient", - [](const nnvm::NodePtr& n, const std::vector& ograds) { - return MakeNonlossGradNode("_backward_SparseEmbedding", n, ograds, - {n->inputs[0]}, n->attrs.dict); - }) -.add_argument("data", "NDArray-or-Symbol", - "The input array to the sparse embedding operator.") -.add_argument("weight", "NDArray-or-Symbol", "The embedding weight matrix.") -.add_arguments(EmbeddingParam::__FIELDS__()); - -NNVM_REGISTER_OP(_backward_SparseEmbedding) -.set_num_inputs(2) -.set_num_outputs(2) -.set_attr("TIsBackward", true) -.set_attr("FComputeEx", SparseEmbeddingBackwardEx); - NNVM_REGISTER_OP(take) .describe(R"code(Takes elements from an input array along the given axis. diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h index 6e4b380f893b..8a73ebe1a138 100644 --- a/src/operator/tensor/indexing_op.h +++ b/src/operator/tensor/indexing_op.h @@ -204,79 +204,6 @@ void EmbeddingOpForward(const nnvm::NodeAttrs& attrs, }); } -template -void SparseEmbeddingForwardRspImpl(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const NDArray& data, - const NDArray& weight, - const OpReqType req, - NDArray *out) { - CHECK_RSP_ALL_ROWS_NON_ZERO(weight, "SparseEmbedding", "weight"); - TBlob out_blob = out->data(); - // forward to dns implementation when storage_shape equals shape - bool transpose_a = false; - DotCsrRspDnsImpl(ctx.get_stream(), data, weight, req, transpose_a, &out_blob); -} - -template -void SparseEmbeddingForwardEx(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - CHECK_EQ(req[embedding::kOut], kWriteTo); - CHECK_EQ(inputs.size(), 2U); - CHECK_EQ(outputs.size(), 1U); - CHECK_EQ(req.size(), 1U); - - NDArray output = outputs[embedding::kOut]; - auto data_stype = inputs[embedding::kData].storage_type(); - auto weight_stype = inputs[embedding::kWeight].storage_type(); - auto out_stype = outputs[embedding::kOut].storage_type(); - if (data_stype == kCSRStorage && weight_stype == kRowSparseStorage && - out_stype == kDefaultStorage) { - NDArray ret = outputs[embedding::kOut]; - SparseEmbeddingForwardRspImpl(attrs, ctx, inputs[embedding::kData], - inputs[embedding::kWeight], - req[embedding::kOut], &ret); - } else { - LOG(FATAL) << "Not supported SparseEmbedding operation for data.storage_type = " - << data_stype << ", weight.storage_type = " << weight_stype - << ", out.storage_type = " << out_stype; - } -} - -inline bool SparseEmbeddingForwardStorageType(const nnvm::NodeAttrs& attrs, - const Context& ctx, - std::vector *in_attrs, - std::vector *out_attrs) { - CHECK_EQ(in_attrs->size(), 2U); - CHECK_EQ(out_attrs->size(), 1U); - STORAGE_TYPE_ASSIGN_CHECK(*in_attrs, embedding::kData, kCSRStorage); - STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, embedding::kOut, kDefaultStorage); - // override the default storage type generated in nnvm - in_attrs->at(embedding::kWeight) = kRowSparseStorage; - return true; -} - -inline bool SparseEmbeddingShape(const nnvm::NodeAttrs& attrs, - std::vector *in_attrs, - std::vector *out_attrs) { - using namespace mshadow; - const EmbeddingParam& param = nnvm::get(attrs.parsed); - const TShape &dshape = (*in_attrs)[embedding::kData]; - CHECK_EQ(dshape.ndim(), 2) - << "SparseEmbedding shape error: data is expected to be 2D."; - SHAPE_ASSIGN_CHECK(*in_attrs, embedding::kWeight, - Shape2(param.input_dim, param.output_dim)); - out_attrs->clear(); - std::vector buf(2); - buf[0] = dshape[0]; - buf[1] = param.output_dim; - out_attrs->emplace_back(buf.begin(), buf.end()); - return true; -} - // Returns integer log2(a) rounded up inline int ilog2(unsigned int a) { int k = 1; @@ -389,31 +316,6 @@ void EmbeddingOpBackward(const nnvm::NodeAttrs& attrs, }); } -template -void SparseEmbeddingBackwardEx(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - CHECK_EQ(inputs.size(), 2U); - CHECK_EQ(outputs.size(), 2U); - CHECK_EQ(req.size(), 2U); - // CHECK_EQ(req[embedding::kData], kNullOp) - // << "Embedding layer doesn't support calculate data gradient" << req[0] << " " << req[1]; - // CHECK_NE(req[1], kWriteInplace) << "DotBackwardEx does not support WriteInplace"; - - auto data_stype = inputs[1].storage_type(); - auto grad_stype = inputs[0].storage_type(); - auto output_stype = outputs[1].storage_type(); - if (data_stype == kCSRStorage && grad_stype == kDefaultStorage && - output_stype == kDefaultStorage) { - TBlob ret = outputs[1].data(); - DotCsrDnsDnsImpl(ctx.get_stream(), inputs[1], inputs[0].data(), req[1], true, &ret); - } else { - LOG(FATAL) << "Not supported dot backward for sparse input(s) with sparse gradients"; - } -} - namespace take_ { // to avoid name conflict enum TakeOpInputs {kArr, kIdx}; enum TakeOpOutputs {kOut}; diff --git a/tests/python/unittest/test_sparse_operator.py b/tests/python/unittest/test_sparse_operator.py index d0064a9265f8..b95ae1384943 100644 --- a/tests/python/unittest/test_sparse_operator.py +++ b/tests/python/unittest/test_sparse_operator.py @@ -142,36 +142,6 @@ def test_dot_csr(lhs_shape, rhs_shape, rhs_stype, trans_lhs, density=1): test_dot_csr(lhs_shape, (lhs_shape[0], rnd.randint(1, 10)), 'row_sparse', True, 0.05) -def test_sparse_embedding(): - in_dim = 10 - out_dim = 4 - batch = 24 - - data = mx.sym.Variable("data", stype='csr') - embed = mx.sym.SparseEmbedding(data=data, input_dim=in_dim, output_dim=out_dim, name="embed") - exe_test = embed.simple_bind(default_context(), grad_req={'data': 'null', 'embed_weight': 'write'}, - data=(batch, in_dim)) - - arg_map = dict(zip(embed.list_arguments(), exe_test.arg_arrays)) - grad_map = dict(zip(embed.list_arguments(), exe_test.grad_arrays)) - np_data = np.random.randint(low=0, high=in_dim, size=batch) - np_weight = np.random.uniform(-0.01, 0.01, arg_map["embed_weight"].shape) - np_onehot = np.zeros((batch, in_dim)) - np_onehot[np.arange(batch), np_data] = 1.0 - nd_onehot = mx.nd.array(np_onehot)._to_csr() - # forward - arg_map["data"][:] = nd_onehot - arg_map["embed_weight"][:] = np_weight - exe_test.forward(is_train=True) - assert_almost_equal(exe_test.outputs[0].asnumpy(), np.dot(np_onehot, np_weight)) - # backward - np_grad = np.random.uniform(-1, 1, exe_test.outputs[0].shape) - grad = mx.nd.zeros(np_grad.shape) - grad[:] = np_grad - exe_test.backward([grad]) - assert_almost_equal(grad_map["embed_weight"].asnumpy(), np.dot(np_onehot.T, np_grad), atol=1e-5) - - def test_sparse_slice(): def check_csr_slice(shape, slice_input): storage_type = 'csr'