pass spgemm results as out parameters

ginkgo-project · Nov 18, 2019 · 5bb5531 · 5bb5531
1 parent 367481f
commit 5bb5531
Show file tree

Hide file tree

Showing 6 changed files with 89 additions and 70 deletions.
diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp
@@ -87,8 +87,17 @@ void Csr<ValueType, IndexType>::apply_impl(const LinOp *b, LinOp *x) const
     using Dense = Dense<ValueType>;
     using TCsr = Csr<ValueType, IndexType>;
     if (auto b_csr = dynamic_cast<const TCsr *>(b)) {
+        auto exec = this->get_executor();
+        Array<IndexType> x_rows(exec);
+        Array<IndexType> x_cols(exec);
+        Array<ValueType> x_vals(exec);
         auto x_csr = as<TCsr>(x);
-        this->get_executor()->run(csr::make_spgemm(this, b_csr, x_csr));
+        this->get_executor()->run(
+            csr::make_spgemm(this, b_csr, x_csr, x_rows, x_cols, x_vals));
+        auto new_x = TCsr::create(exec, x->get_size(), std::move(x_vals),
+                                  std::move(x_cols), std::move(x_rows),
+                                  x_csr->get_strategy());
+        new_x->move_to(x_csr);
     } else {
         this->get_executor()->run(
             csr::make_spmv(this, as<Dense>(b), as<Dense>(x)));
@@ -103,9 +112,18 @@ void Csr<ValueType, IndexType>::apply_impl(const LinOp *alpha, const LinOp *b,
     using Dense = Dense<ValueType>;
     using TCsr = Csr<ValueType, IndexType>;
     if (auto b_csr = dynamic_cast<const TCsr *>(b)) {
+        auto exec = this->get_executor();
+        Array<IndexType> x_rows(exec);
+        Array<IndexType> x_cols(exec);
+        Array<ValueType> x_vals(exec);
         auto x_csr = as<TCsr>(x);
         this->get_executor()->run(csr::make_advanced_spgemm(
-            as<Dense>(alpha), this, b_csr, as<Dense>(beta), x_csr));
+            as<Dense>(alpha), this, b_csr, as<Dense>(beta), x_csr, x_rows,
+            x_cols, x_vals));
+        auto new_x = TCsr::create(exec, x->get_size(), std::move(x_vals),
+                                  std::move(x_cols), std::move(x_rows),
+                                  x_csr->get_strategy());
+        new_x->move_to(x_csr);
     } else {
         this->get_executor()->run(
             csr::make_advanced_spmv(as<Dense>(alpha), this, as<Dense>(b),

diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp
@@ -62,19 +62,24 @@ namespace kernels {
                        const matrix::Dense<ValueType> *beta,        \
                        matrix::Dense<ValueType> *c)
 
-#define GKO_DECLARE_CSR_SPGEMM_KERNEL(ValueType, IndexType)  \
-    void spgemm(std::shared_ptr<const DefaultExecutor> exec, \
-                const matrix::Csr<ValueType, IndexType> *a,  \
-                const matrix::Csr<ValueType, IndexType> *b,  \
-                matrix::Csr<ValueType, IndexType> *c)
+#define GKO_DECLARE_CSR_SPGEMM_KERNEL(ValueType, IndexType)                 \
+    void spgemm(std::shared_ptr<const DefaultExecutor> exec,                \
+                const matrix::Csr<ValueType, IndexType> *a,                 \
+                const matrix::Csr<ValueType, IndexType> *b,                 \
+                const matrix::Csr<ValueType, IndexType> *c,                 \
+                Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs, \
+                Array<ValueType> &c_vals)
 
 #define GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType)  \
     void advanced_spgemm(std::shared_ptr<const DefaultExecutor> exec, \
                          const matrix::Dense<ValueType> *alpha,       \
                          const matrix::Csr<ValueType, IndexType> *a,  \
                          const matrix::Csr<ValueType, IndexType> *b,  \
                          const matrix::Dense<ValueType> *beta,        \
-                         matrix::Csr<ValueType, IndexType> *c)
+                         const matrix::Csr<ValueType, IndexType> *c,  \
+                         Array<IndexType> &c_row_ptrs,                \
+                         Array<IndexType> &c_col_idxs,                \
+                         Array<ValueType> &c_vals)
 
 #define GKO_DECLARE_CSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType)  \
     void convert_to_dense(std::shared_ptr<const DefaultExecutor> exec, \

diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu
@@ -359,7 +359,9 @@ template <typename ValueType, typename IndexType>
 void spgemm(std::shared_ptr<const CudaExecutor> exec,
             const matrix::Csr<ValueType, IndexType> *a,
             const matrix::Csr<ValueType, IndexType> *b,
-            matrix::Csr<ValueType, IndexType> *c) GKO_NOT_IMPLEMENTED;
+            const matrix::Csr<ValueType, IndexType> *c,
+            Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs,
+            Array<ValueType> &c_vals) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);
 
@@ -370,7 +372,9 @@ void advanced_spgemm(std::shared_ptr<const CudaExecutor> exec,
                      const matrix::Csr<ValueType, IndexType> *a,
                      const matrix::Csr<ValueType, IndexType> *b,
                      const matrix::Dense<ValueType> *beta,
-                     matrix::Csr<ValueType, IndexType> *c) GKO_NOT_IMPLEMENTED;
+                     const matrix::Csr<ValueType, IndexType> *c,
+                     Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs,
+                     Array<ValueType> &c_vals) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);

diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp
@@ -388,7 +388,9 @@ template <typename ValueType, typename IndexType>
 void spgemm(std::shared_ptr<const HipExecutor> exec,
             const matrix::Csr<ValueType, IndexType> *a,
             const matrix::Csr<ValueType, IndexType> *b,
-            matrix::Csr<ValueType, IndexType> *c) GKO_NOT_IMPLEMENTED;
+            const matrix::Csr<ValueType, IndexType> *c,
+            Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs,
+            Array<ValueType> &c_vals) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);
 
@@ -399,7 +401,9 @@ void advanced_spgemm(std::shared_ptr<const HipExecutor> exec,
                      const matrix::Csr<ValueType, IndexType> *a,
                      const matrix::Csr<ValueType, IndexType> *b,
                      const matrix::Dense<ValueType> *beta,
-                     matrix::Csr<ValueType, IndexType> *c) GKO_NOT_IMPLEMENTED;
+                     const matrix::Csr<ValueType, IndexType> *c,
+                     Array<IndexType> &c_row_ptrs, Array<IndexType> &c_col_idxs,
+                     Array<ValueType> &c_vals) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);

diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp
@@ -208,37 +208,38 @@ template <typename ValueType, typename IndexType>
 void spgemm(std::shared_ptr<const OmpExecutor> exec,
             const matrix::Csr<ValueType, IndexType> *a,
             const matrix::Csr<ValueType, IndexType> *b,
-            matrix::Csr<ValueType, IndexType> *c)
+            const matrix::Csr<ValueType, IndexType> *c,
+            Array<IndexType> &c_row_ptrs_array,
+            Array<IndexType> &c_col_idxs_array, Array<ValueType> &c_vals_array)
 {
-    auto c_size = dim<2>{a->get_size()[0], b->get_size()[1]};
-    auto c_rows = c_size[0];
+    auto rows = a->get_size()[0];
 
     // first sweep: count nnz for each row
-    Array<IndexType> c_row_ptrs_array(exec, c_rows + 1);
+    c_row_ptrs_array.resize_and_reset(rows + 1);
     auto c_row_ptrs = c_row_ptrs_array.get_data();
 
     std::unordered_set<IndexType> local_col_idxs;
 #pragma omp parallel for schedule(dynamic, 256) firstprivate(local_col_idxs)
-    for (size_type a_row = 0; a_row < c_rows; ++a_row) {
+    for (size_type a_row = 0; a_row < rows; ++a_row) {
         local_col_idxs.clear();
         spgemm_insert_row2(local_col_idxs, a, b, a_row);
         c_row_ptrs[a_row + 1] = local_col_idxs.size();
     }
 
     // build row pointers: exclusive scan (thus the + 1)
     c_row_ptrs[0] = 0;
-    std::partial_sum(c_row_ptrs + 1, c_row_ptrs + c_rows + 1, c_row_ptrs + 1);
+    std::partial_sum(c_row_ptrs + 1, c_row_ptrs + rows + 1, c_row_ptrs + 1);
 
     // second sweep: accumulate non-zeros
-    auto new_nnz = c_row_ptrs[c_rows];
-    Array<IndexType> c_col_idxs_array(exec, new_nnz);
-    Array<ValueType> c_vals_array(exec, new_nnz);
+    auto new_nnz = c_row_ptrs[rows];
+    c_col_idxs_array.resize_and_reset(new_nnz);
+    c_vals_array.resize_and_reset(new_nnz);
     auto c_col_idxs = c_col_idxs_array.get_data();
     auto c_vals = c_vals_array.get_data();
 
     std::unordered_map<IndexType, ValueType> local_row_nzs;
 #pragma omp parallel for schedule(dynamic, 256) firstprivate(local_row_nzs)
-    for (size_type a_row = 0; a_row < c_rows; ++a_row) {
+    for (size_type a_row = 0; a_row < rows; ++a_row) {
         local_row_nzs.clear();
         spgemm_accumulate_row2(local_row_nzs, a, b, one<ValueType>(), a_row);
         // store result
@@ -249,11 +250,6 @@ void spgemm(std::shared_ptr<const OmpExecutor> exec,
             ++c_nz;
         }
     }
-
-    auto new_c = matrix::Csr<ValueType, IndexType>::create(
-        exec, c_size, std::move(c_vals_array), std::move(c_col_idxs_array),
-        std::move(c_row_ptrs_array), c->get_strategy());
-    new_c->move_to(c);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);
@@ -265,20 +261,22 @@ void advanced_spgemm(std::shared_ptr<const OmpExecutor> exec,
                      const matrix::Csr<ValueType, IndexType> *a,
                      const matrix::Csr<ValueType, IndexType> *b,
                      const matrix::Dense<ValueType> *beta,
-                     matrix::Csr<ValueType, IndexType> *c)
+                     const matrix::Csr<ValueType, IndexType> *c,
+                     Array<IndexType> &c_row_ptrs_array,
+                     Array<IndexType> &c_col_idxs_array,
+                     Array<ValueType> &c_vals_array)
 {
-    auto c_size = dim<2>{a->get_size()[0], b->get_size()[1]};
-    auto c_rows = c_size[0];
+    auto rows = a->get_size()[0];
     auto valpha = alpha->at(0, 0);
     auto vbeta = beta->at(0, 0);
 
     // first sweep: count nnz for each row
-    Array<IndexType> c_row_ptrs_array(exec, c_rows + 1);
+    c_row_ptrs_array.resize_and_reset(rows + 1);
     auto c_row_ptrs = c_row_ptrs_array.get_data();
 
     std::unordered_set<IndexType> local_col_idxs;
 #pragma omp parallel for schedule(dynamic, 256) firstprivate(local_col_idxs)
-    for (size_type a_row = 0; a_row < c_rows; ++a_row) {
+    for (size_type a_row = 0; a_row < rows; ++a_row) {
         local_col_idxs.clear();
         if (vbeta != zero(vbeta)) {
             spgemm_insert_row(local_col_idxs, c, a_row);
@@ -291,17 +289,18 @@ void advanced_spgemm(std::shared_ptr<const OmpExecutor> exec,
 
     // build row pointers: exclusive scan (thus the + 1)
     c_row_ptrs[0] = 0;
-    std::partial_sum(c_row_ptrs + 1, c_row_ptrs + c_rows + 1, c_row_ptrs + 1);
+    std::partial_sum(c_row_ptrs + 1, c_row_ptrs + rows + 1, c_row_ptrs + 1);
 
     // second sweep: accumulate non-zeros
-    Array<IndexType> c_col_idxs_array(exec, c_row_ptrs[c_rows]);
-    Array<ValueType> c_vals_array(exec, c_row_ptrs[c_rows]);
+    auto new_nnz = c_row_ptrs[rows];
+    c_col_idxs_array.resize_and_reset(new_nnz);
+    c_vals_array.resize_and_reset(new_nnz);
     auto c_col_idxs = c_col_idxs_array.get_data();
     auto c_vals = c_vals_array.get_data();
 
     std::unordered_map<IndexType, ValueType> local_row_nzs;
 #pragma omp parallel for schedule(dynamic, 256) firstprivate(local_row_nzs)
-    for (size_type a_row = 0; a_row < c_rows; ++a_row) {
+    for (size_type a_row = 0; a_row < rows; ++a_row) {
         local_row_nzs.clear();
         if (vbeta != zero(vbeta)) {
             spgemm_accumulate_row(local_row_nzs, c, vbeta, a_row);
@@ -317,11 +316,6 @@ void advanced_spgemm(std::shared_ptr<const OmpExecutor> exec,
             ++c_nz;
         }
     }
-
-    auto new_c = matrix::Csr<ValueType, IndexType>::create(
-        exec, c_size, std::move(c_vals_array), std::move(c_col_idxs_array),
-        std::move(c_row_ptrs_array), c->get_strategy());
-    new_c->move_to(c);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(

diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp
@@ -206,35 +206,36 @@ template <typename ValueType, typename IndexType>
 void spgemm(std::shared_ptr<const ReferenceExecutor> exec,
             const matrix::Csr<ValueType, IndexType> *a,
             const matrix::Csr<ValueType, IndexType> *b,
-            matrix::Csr<ValueType, IndexType> *c)
+            const matrix::Csr<ValueType, IndexType> *c,
+            Array<IndexType> &c_row_ptrs_array,
+            Array<IndexType> &c_col_idxs_array, Array<ValueType> &c_vals_array)
 {
-    auto c_size = dim<2>{a->get_size()[0], b->get_size()[1]};
-    auto c_rows = c_size[0];
+    auto rows = a->get_size()[0];
 
     // first sweep: count nnz for each row
-    Array<IndexType> c_row_ptrs_array(exec, c_rows + 1);
+    c_row_ptrs_array.resize_and_reset(rows + 1);
     auto c_row_ptrs = c_row_ptrs_array.get_data();
 
     std::unordered_set<IndexType> local_col_idxs;
-    for (size_type a_row = 0; a_row < c_rows; ++a_row) {
+    for (size_type a_row = 0; a_row < rows; ++a_row) {
         local_col_idxs.clear();
         spgemm_insert_row2(local_col_idxs, a, b, a_row);
         c_row_ptrs[a_row + 1] = local_col_idxs.size();
     }
 
     // build row pointers: exclusive scan (thus the + 1)
     c_row_ptrs[0] = 0;
-    std::partial_sum(c_row_ptrs + 1, c_row_ptrs + c_rows + 1, c_row_ptrs + 1);
+    std::partial_sum(c_row_ptrs + 1, c_row_ptrs + rows + 1, c_row_ptrs + 1);
 
     // second sweep: accumulate non-zeros
-    auto new_nnz = c_row_ptrs[c_rows];
-    Array<IndexType> c_col_idxs_array(exec, new_nnz);
-    Array<ValueType> c_vals_array(exec, new_nnz);
+    auto new_nnz = c_row_ptrs[rows];
+    c_col_idxs_array.resize_and_reset(new_nnz);
+    c_vals_array.resize_and_reset(new_nnz);
     auto c_col_idxs = c_col_idxs_array.get_data();
     auto c_vals = c_vals_array.get_data();
 
     std::unordered_map<IndexType, ValueType> local_row_nzs;
-    for (size_type a_row = 0; a_row < c_rows; ++a_row) {
+    for (size_type a_row = 0; a_row < rows; ++a_row) {
         local_row_nzs.clear();
         spgemm_accumulate_row2(local_row_nzs, a, b, one<ValueType>(), a_row);
         // store result
@@ -245,11 +246,6 @@ void spgemm(std::shared_ptr<const ReferenceExecutor> exec,
             ++c_nz;
         }
     }
-
-    auto new_c = matrix::Csr<ValueType, IndexType>::create(
-        exec, c_size, std::move(c_vals_array), std::move(c_col_idxs_array),
-        std::move(c_row_ptrs_array), c->get_strategy());
-    new_c->move_to(c);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);
@@ -261,19 +257,21 @@ void advanced_spgemm(std::shared_ptr<const ReferenceExecutor> exec,
                      const matrix::Csr<ValueType, IndexType> *a,
                      const matrix::Csr<ValueType, IndexType> *b,
                      const matrix::Dense<ValueType> *beta,
-                     matrix::Csr<ValueType, IndexType> *c)
+                     const matrix::Csr<ValueType, IndexType> *c,
+                     Array<IndexType> &c_row_ptrs_array,
+                     Array<IndexType> &c_col_idxs_array,
+                     Array<ValueType> &c_vals_array)
 {
-    auto c_size = dim<2>{a->get_size()[0], b->get_size()[1]};
-    auto c_rows = c_size[0];
+    auto rows = a->get_size()[0];
     auto valpha = alpha->at(0, 0);
     auto vbeta = beta->at(0, 0);
 
     // first sweep: count nnz for each row
-    Array<IndexType> c_row_ptrs_array(exec, c_rows + 1);
+    c_row_ptrs_array.resize_and_reset(rows + 1);
     auto c_row_ptrs = c_row_ptrs_array.get_data();
 
     std::unordered_set<IndexType> local_col_idxs;
-    for (size_type a_row = 0; a_row < c_rows; ++a_row) {
+    for (size_type a_row = 0; a_row < rows; ++a_row) {
         local_col_idxs.clear();
         if (vbeta != zero(vbeta)) {
             spgemm_insert_row(local_col_idxs, c, a_row);
@@ -286,16 +284,17 @@ void advanced_spgemm(std::shared_ptr<const ReferenceExecutor> exec,
 
     // build row pointers: exclusive scan (thus the + 1)
     c_row_ptrs[0] = 0;
-    std::partial_sum(c_row_ptrs + 1, c_row_ptrs + c_rows + 1, c_row_ptrs + 1);
+    std::partial_sum(c_row_ptrs + 1, c_row_ptrs + rows + 1, c_row_ptrs + 1);
 
     // second sweep: accumulate non-zeros
-    Array<IndexType> c_col_idxs_array(exec, c_row_ptrs[c_rows]);
-    Array<ValueType> c_vals_array(exec, c_row_ptrs[c_rows]);
+    auto new_nnz = c_row_ptrs[rows];
+    c_col_idxs_array.resize_and_reset(new_nnz);
+    c_vals_array.resize_and_reset(new_nnz);
     auto c_col_idxs = c_col_idxs_array.get_data();
     auto c_vals = c_vals_array.get_data();
 
     std::unordered_map<IndexType, ValueType> local_row_nzs;
-    for (size_type a_row = 0; a_row < c_rows; ++a_row) {
+    for (size_type a_row = 0; a_row < rows; ++a_row) {
         local_row_nzs.clear();
         if (vbeta != zero(vbeta)) {
             spgemm_accumulate_row(local_row_nzs, c, vbeta, a_row);
@@ -311,11 +310,6 @@ void advanced_spgemm(std::shared_ptr<const ReferenceExecutor> exec,
             ++c_nz;
         }
     }
-
-    auto new_c = matrix::Csr<ValueType, IndexType>::create(
-        exec, c_size, std::move(c_vals_array), std::move(c_col_idxs_array),
-        std::move(c_row_ptrs_array), c->get_strategy());
-    new_c->move_to(c);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(