From be009fe4c5a2a321aa92e99ac6e9cc511198c742 Mon Sep 17 00:00:00 2001 From: reminisce Date: Tue, 27 Jun 2017 20:38:13 -0700 Subject: [PATCH] Add benchmark code --- src/operator/nn/cast_storage-inl.h | 10 +++++++++ src/operator/tensor/matrix_op-inl.h | 35 ++++++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/operator/nn/cast_storage-inl.h b/src/operator/nn/cast_storage-inl.h index 1fb32045b9a0..f9cd161bd4d7 100644 --- a/src/operator/nn/cast_storage-inl.h +++ b/src/operator/nn/cast_storage-inl.h @@ -60,13 +60,18 @@ inline void CastStorageDnsRspImpl(mshadow::Stream* s, const TBlob& dns, NDA mxnet_op::Kernel::Launch(s, num_rows, row_idx, dns.dptr(), num_cols); index_t nnr = 0; + double start = dmlc::GetTime(); nnr = mxnet::common::ParallelAccumulate(row_idx, num_rows, nnr); + double elapsed = dmlc::GetTime() - start; + LOG(INFO) << "CastStorageDnsRspImpl: ParallelAccumulate time cost " << elapsed * 1000 << " ms"; + LOG(INFO) << "CastStorageDnsRspImpl: nnr = " << nnr; rsp->set_aux_shape(rowsparse::kIdx, mshadow::Shape1(nnr)); if (0 == nnr) return; rsp->CheckAndAllocData(mshadow::Shape2(nnr, num_cols)); mshadow::Tensor dns_data = dns.FlatTo2D(s); mshadow::Tensor rsp_data = rsp->data().FlatTo2D(s); size_t idx = 0; + start = dmlc::GetTime(); for (index_t i = 0; i < num_rows; ++i) { if (row_idx[i] > 0) { row_idx[idx] = i; @@ -74,6 +79,8 @@ inline void CastStorageDnsRspImpl(mshadow::Stream* s, const TBlob& dns, NDA ++idx; } } + elapsed = dmlc::GetTime() - start; + LOG(INFO) << "CastStorageDnsRspImpl: copy rows time cost " << elapsed * 1000 << " ms"; }); }); } @@ -278,7 +285,10 @@ void CastStorageComputeImpl(mshadow::Stream* s, CastStorageRspDnsImpl(s, input, &ret); } else if (src_stype == kDefaultStorage && dst_stype == kRowSparseStorage) { NDArray ret = output; // get rid of the const qualifer + double start = dmlc::GetTime(); CastStorageDnsRspImpl(s, input.data(), &ret); + double elapsed = dmlc::GetTime() - start; + LOG(INFO) << "CastStorageDnsRspImpl: time cost " << elapsed * 1000 << " ms"; } else if (src_stype == kDefaultStorage && dst_stype == kCSRStorage) { NDArray ret = output; // get rid of the const qualifer CastStorageDnsCsrImpl(s, input.data(), &ret); diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h index 376cb0ba6b56..62022262d0c8 100644 --- a/src/operator/tensor/matrix_op-inl.h +++ b/src/operator/tensor/matrix_op-inl.h @@ -7,6 +7,7 @@ #define MXNET_OPERATOR_TENSOR_MATRIX_OP_INL_H_ #include +#include #include #include #include @@ -323,6 +324,7 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs, struct DotParam : public dmlc::Parameter { bool transpose_a; bool transpose_b; + int _out_stype; DMLC_DECLARE_PARAMETER(DotParam) { DMLC_DECLARE_FIELD(transpose_a) .describe("If true then transpose the first input before dot.") @@ -330,6 +332,10 @@ struct DotParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(transpose_b) .describe("If true then transpose the second input before dot.") .set_default(false); + DMLC_DECLARE_FIELD(_out_stype) + .add_enum("dns", kDefaultStorage) + .add_enum("csr", kCSRStorage) + .add_enum("rsp", kRowSparseStorage); } }; @@ -483,12 +489,15 @@ inline bool DotForwardInferStorageType(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 2U); CHECK_EQ(out_attrs->size(), 1U); const DotParam& param = nnvm::get(attrs.parsed); + (*out_attrs)[0] = param._out_stype; +#if 0 if (param.transpose_a && kCSRStorage == (*in_attrs)[0] && kDefaultStorage == (*in_attrs)[1]) { STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, 0, kRowSparseStorage); } else { STORAGE_TYPE_ASSIGN_CHECK(*out_attrs, 0, kDefaultStorage); } +#endif return true; } @@ -720,10 +729,14 @@ void DotCsrDnsDnsImpl(const OpContext& ctx, int num_threads = mxnet_op::get_num_threads(data_out.shape_[0]); size_t seg_len = (data_out.shape_[0] + num_threads - 1) / num_threads; if (trans_lhs) { + double start = dmlc::GetTime(); mxnet_op::Kernel::Launch(s, num_threads, data_out.dptr(), data_l.dptr(), indptr_l.dptr(), col_idx_l.dptr(), data_r.dptr(), seg_len, lhs.shape()[0], data_out.shape_[0], data_out.shape_[1]); + double elapsed = dmlc::GetTime() - start; + LOG(INFO) << "DotCsrDnsDnsImpl: Kernel time cost " + << elapsed * 1000 << " ms"; } else { mxnet_op::Kernel::Launch(s, num_threads, data_out.dptr(), data_l.dptr(), indptr_l.dptr(), @@ -789,23 +802,37 @@ void DotCsrDnsRspImpl(const OpContext& ctx, int num_threads = mxnet_op::get_num_threads(data_out.shape_[0]); size_t seg_len = (data_out.shape_[0] + num_threads - 1) / num_threads; if (trans_lhs) { + double start = dmlc::GetTime(); mxnet_op::Kernel::Launch(s, num_threads, data_out.dptr(), row_idx, data_l.dptr(), indptr_l.dptr(), col_idx_l.dptr(), data_r.dptr(), seg_len, lhs.shape()[0], data_out.shape_[0], data_out.shape_[1]); + double elapsed = dmlc::GetTime() - start; + LOG(INFO) << "DotCsrDnsRspImpl: Kernel time cost " + << elapsed * 1000 << " ms"; index_t nnr = 0; + + start = dmlc::GetTime(); nnr = mxnet::common::ParallelAccumulate(row_idx, ret->shape()[0], nnr); + elapsed = dmlc::GetTime() - start; + LOG(INFO) << "DotCsrDnsRspImpl: ParallelAccumulate time cost " << elapsed * 1000 << " ms"; + LOG(INFO) << "DotCsrDnsRspImpl: nnr = " << nnr; ret->set_aux_shape(rowsparse::kIdx, mshadow::Shape1(nnr)); + ret->set_storage_shape(mshadow::Shape2(nnr, ret->shape()[1])); if (0 == nnr) return; mshadow::Tensor rsp_data = data_out.FlatTo2D(s); size_t idx = 0; + + start = dmlc::GetTime(); for (index_t i = 0; i < ret->shape()[0]; ++i) { - if (row_idx > 0) { + if (row_idx[i] > 0) { row_idx[idx] = i; mshadow::Copy(rsp_data[idx], rsp_data[i], s); ++idx; } } + elapsed = dmlc::GetTime() - start; + LOG(INFO) << "DotCsrDnsRspImpl: copy rows time cost " << elapsed * 1000 << " ms"; } else { LOG(FATAL) << "DotCsrDnsRspImpl has not implemented dot(csr, dns)=rsp yet." " Only the cpu version of dot(csr.T, dns)=rsp is supported now"; @@ -915,7 +942,10 @@ void DotForwardEx(const nnvm::NodeAttrs& attrs, auto out_stype = outputs[0].storage_type(); if (lhs_stype == kCSRStorage && rhs_stype == kDefaultStorage && out_stype == kDefaultStorage) { TBlob ret = outputs[0].data(); + double start = dmlc::GetTime(); DotCsrDnsDnsImpl(ctx, inputs[0], inputs[1].data(), req[0], param.transpose_a, &ret); + double elapsed = dmlc::GetTime() - start; + LOG(INFO) << "DotCsrDnsDnsImpl: time cost " << elapsed * 1000 << " ms"; } else if (lhs_stype == kCSRStorage && rhs_stype == kRowSparseStorage && out_stype == kDefaultStorage) { TBlob ret = outputs[0].data(); @@ -923,7 +953,10 @@ void DotForwardEx(const nnvm::NodeAttrs& attrs, } else if (lhs_stype == kCSRStorage && rhs_stype == kDefaultStorage && out_stype == kRowSparseStorage) { NDArray out = outputs[0]; + double start = dmlc::GetTime(); DotCsrDnsRspImpl(ctx, inputs[0], inputs[1].data(), req[0], param.transpose_a, &out); + double elapsed = dmlc::GetTime() - start; + LOG(INFO) << "DotCsrDnsRspImpl: time cost " << elapsed * 1000 << " ms"; } else { FCompExFallback(attrs, ctx, inputs, req, outputs, DotForward_, "DotForward_"); }