Skip to content

Commit

Permalink
remove sparse embedding (apache#7165)
Browse files Browse the repository at this point in the history
  • Loading branch information
eric-haibin-lin committed Jul 25, 2017
1 parent 16527f7 commit 39c967b
Show file tree
Hide file tree
Showing 4 changed files with 1 addition and 172 deletions.
2 changes: 1 addition & 1 deletion python/mxnet/ndarray/sparse_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ class RowSparseNDArray(SparseNDArray):
``dense[rsp.indices[i], :, :, :, ...] = rsp.values[i, :, :, :, ...]``
RowSparseNDArray is used principally in the definition of gradients for operations
that have sparse gradients (e.g. SparseEmbedding).
that have sparse gradients (e.g. dot with sparse inputs).
Examples
--------
Expand Down
43 changes: 0 additions & 43 deletions src/operator/tensor/indexing_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,49 +86,6 @@ NNVM_REGISTER_OP(_backward_Embedding)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
.set_attr<FCompute>("FCompute<cpu>", EmbeddingOpBackward<cpu>);

NNVM_REGISTER_OP(SparseEmbedding)
.describe(R"doc(Represents words or other sparse inputs by dense continuous vectors.
It assumes that the input is in one-hot form. E.g., for a vocabulary size of 10,000,
each input vector is expected to have dimension 10,000.
The index of the non-zero entry is the index of the word or item it represents.
The corresponding embedding vectors are stored as rows of a matrix.
Hence, mapping an input word to its embedding is implemented as a matrix product.
The gradient of an embedding matrix has the form of gradient vectors that are only
non-zero for words seen in a minibatch.
)doc" ADD_FILELINE)
.set_num_inputs(2)
.set_num_outputs(1)
.set_attr_parser(ParamParser<EmbeddingParam>)
.set_attr<nnvm::FListInputNames>("FListInputNames",
[](const NodeAttrs& attrs) {
return std::vector<std::string>{"data", "weight"};
})
.set_attr<nnvm::FInferShape>("FInferShape", SparseEmbeddingShape)
.set_attr<nnvm::FInferType>("FInferType", EmbeddingOpType)
.set_attr<FInferStorageType>("FInferStorageType", SparseEmbeddingForwardStorageType)
.set_attr<FResourceRequest>("FResourceRequest",
[](const NodeAttrs& attrs) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
.set_attr<FComputeEx>("FComputeEx<cpu>", SparseEmbeddingForwardEx<cpu>)
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
return MakeNonlossGradNode("_backward_SparseEmbedding", n, ograds,
{n->inputs[0]}, n->attrs.dict);
})
.add_argument("data", "NDArray-or-Symbol",
"The input array to the sparse embedding operator.")
.add_argument("weight", "NDArray-or-Symbol", "The embedding weight matrix.")
.add_arguments(EmbeddingParam::__FIELDS__());

NNVM_REGISTER_OP(_backward_SparseEmbedding)
.set_num_inputs(2)
.set_num_outputs(2)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", SparseEmbeddingBackwardEx<cpu>);

NNVM_REGISTER_OP(take)
.describe(R"code(Takes elements from an input array along the given axis.
Expand Down
98 changes: 0 additions & 98 deletions src/operator/tensor/indexing_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,79 +204,6 @@ void EmbeddingOpForward(const nnvm::NodeAttrs& attrs,
});
}

template<typename xpu>
void SparseEmbeddingForwardRspImpl(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const NDArray& data,
const NDArray& weight,
const OpReqType req,
NDArray *out) {
CHECK_RSP_ALL_ROWS_NON_ZERO(weight, "SparseEmbedding", "weight");
TBlob out_blob = out->data();
// forward to dns implementation when storage_shape equals shape
bool transpose_a = false;
DotCsrRspDnsImpl<xpu>(ctx.get_stream<xpu>(), data, weight, req, transpose_a, &out_blob);
}

template<typename xpu>
void SparseEmbeddingForwardEx(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
CHECK_EQ(req[embedding::kOut], kWriteTo);
CHECK_EQ(inputs.size(), 2U);
CHECK_EQ(outputs.size(), 1U);
CHECK_EQ(req.size(), 1U);

NDArray output = outputs[embedding::kOut];
auto data_stype = inputs[embedding::kData].storage_type();
auto weight_stype = inputs[embedding::kWeight].storage_type();
auto out_stype = outputs[embedding::kOut].storage_type();
if (data_stype == kCSRStorage && weight_stype == kRowSparseStorage &&
out_stype == kDefaultStorage) {
NDArray ret = outputs[embedding::kOut];
SparseEmbeddingForwardRspImpl<xpu>(attrs, ctx, inputs[embedding::kData],
inputs[embedding::kWeight],
req[embedding::kOut], &ret);
} else {
LOG(FATAL) << "Not supported SparseEmbedding operation for data.storage_type = "
<< data_stype << ", weight.storage_type = " << weight_stype
<< ", out.storage_type = " << out_stype;
}
}

inline bool SparseEmbeddingForwardStorageType(const nnvm::NodeAttrs& attrs,
const Context& ctx,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
CHECK_EQ(in_attrs->size(), 2U);
CHECK_EQ(out_attrs->size(), 1U);
STORAGE_TYPE_ASSIGN_CHECK(*in_attrs, embedding::kData, kCSRStorage);
STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, embedding::kOut, kDefaultStorage);
// override the default storage type generated in nnvm
in_attrs->at(embedding::kWeight) = kRowSparseStorage;
return true;
}

inline bool SparseEmbeddingShape(const nnvm::NodeAttrs& attrs,
std::vector<TShape> *in_attrs,
std::vector<TShape> *out_attrs) {
using namespace mshadow;
const EmbeddingParam& param = nnvm::get<EmbeddingParam>(attrs.parsed);
const TShape &dshape = (*in_attrs)[embedding::kData];
CHECK_EQ(dshape.ndim(), 2)
<< "SparseEmbedding shape error: data is expected to be 2D.";
SHAPE_ASSIGN_CHECK(*in_attrs, embedding::kWeight,
Shape2(param.input_dim, param.output_dim));
out_attrs->clear();
std::vector<index_t> buf(2);
buf[0] = dshape[0];
buf[1] = param.output_dim;
out_attrs->emplace_back(buf.begin(), buf.end());
return true;
}

// Returns integer log2(a) rounded up
inline int ilog2(unsigned int a) {
int k = 1;
Expand Down Expand Up @@ -389,31 +316,6 @@ void EmbeddingOpBackward(const nnvm::NodeAttrs& attrs,
});
}

template<typename xpu>
void SparseEmbeddingBackwardEx(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
CHECK_EQ(inputs.size(), 2U);
CHECK_EQ(outputs.size(), 2U);
CHECK_EQ(req.size(), 2U);
// CHECK_EQ(req[embedding::kData], kNullOp)
// << "Embedding layer doesn't support calculate data gradient" << req[0] << " " << req[1];
// CHECK_NE(req[1], kWriteInplace) << "DotBackwardEx does not support WriteInplace";

auto data_stype = inputs[1].storage_type();
auto grad_stype = inputs[0].storage_type();
auto output_stype = outputs[1].storage_type();
if (data_stype == kCSRStorage && grad_stype == kDefaultStorage &&
output_stype == kDefaultStorage) {
TBlob ret = outputs[1].data();
DotCsrDnsDnsImpl(ctx.get_stream<xpu>(), inputs[1], inputs[0].data(), req[1], true, &ret);
} else {
LOG(FATAL) << "Not supported dot backward for sparse input(s) with sparse gradients";
}
}

namespace take_ { // to avoid name conflict
enum TakeOpInputs {kArr, kIdx};
enum TakeOpOutputs {kOut};
Expand Down
30 changes: 0 additions & 30 deletions tests/python/unittest/test_sparse_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,36 +142,6 @@ def test_dot_csr(lhs_shape, rhs_shape, rhs_stype, trans_lhs, density=1):
test_dot_csr(lhs_shape, (lhs_shape[0], rnd.randint(1, 10)), 'row_sparse', True, 0.05)


def test_sparse_embedding():
in_dim = 10
out_dim = 4
batch = 24

data = mx.sym.Variable("data", stype='csr')
embed = mx.sym.SparseEmbedding(data=data, input_dim=in_dim, output_dim=out_dim, name="embed")
exe_test = embed.simple_bind(default_context(), grad_req={'data': 'null', 'embed_weight': 'write'},
data=(batch, in_dim))

arg_map = dict(zip(embed.list_arguments(), exe_test.arg_arrays))
grad_map = dict(zip(embed.list_arguments(), exe_test.grad_arrays))
np_data = np.random.randint(low=0, high=in_dim, size=batch)
np_weight = np.random.uniform(-0.01, 0.01, arg_map["embed_weight"].shape)
np_onehot = np.zeros((batch, in_dim))
np_onehot[np.arange(batch), np_data] = 1.0
nd_onehot = mx.nd.array(np_onehot)._to_csr()
# forward
arg_map["data"][:] = nd_onehot
arg_map["embed_weight"][:] = np_weight
exe_test.forward(is_train=True)
assert_almost_equal(exe_test.outputs[0].asnumpy(), np.dot(np_onehot, np_weight))
# backward
np_grad = np.random.uniform(-1, 1, exe_test.outputs[0].shape)
grad = mx.nd.zeros(np_grad.shape)
grad[:] = np_grad
exe_test.backward([grad])
assert_almost_equal(grad_map["embed_weight"].asnumpy(), np.dot(np_onehot.T, np_grad), atol=1e-5)


def test_sparse_slice():
def check_csr_slice(shape, slice_input):
storage_type = 'csr'
Expand Down

0 comments on commit 39c967b

Please sign in to comment.